45f7a77171
Build & Deploy / build-and-deploy (push) Successful in 46s
- OCR: ny read_all_bibs() returnerer alle unike startnumre (≥2 sifre) per bilde
- Ingest: oppretter én passering per bib (ikke bare beste), ingen bib → needs_review
- image_tagger.py: skriv/les bib-metadata som JSON i EXIF UserComment (piexif)
- Ingest + resolve: tagger bildefilen med bibs automatisk og ved manuell bekreftelse
- API: POST /api/passages/{id}/reanalyze — re-kjør OCR på eksisterende bilde
- API: POST /api/passages/{id}/resolve oppdaterer nå EXIF med bekreftet bib
- races: ny kolonne bib_filter_enabled (med automatisk migrering) + per-løp toggle
- ReviewPage: Re-analyser-knapp og klikk-for-zoom med scroll/drag
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
272 lines
8.7 KiB
Python
272 lines
8.7 KiB
Python
"""
|
|
Bildehåndtering:
|
|
- Overvåk depot/-katalogen for nye bilder
|
|
- Valider EXIF
|
|
- Kjør OCR
|
|
- Flytt til processed/ med unikt filnavn
|
|
- Logg passering til DB
|
|
|
|
Kan kjøres som egen prosess (python ingest.py) eller importeres av API.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import shutil
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import aiosqlite
|
|
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
|
|
from watchdog.observers import Observer
|
|
|
|
from exif_parser import ExifError, parse_image
|
|
from image_tagger import write_bib_tags
|
|
from ocr import read_all_bibs
|
|
from passage_log import log_passage
|
|
from profile_db import get_or_create_athlete, init_db
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DEPOT_DIR = Path("/depot")
|
|
PROCESSED_DIR = Path("/processed")
|
|
REJECTED_DIR = DEPOT_DIR / "rejected"
|
|
DB_PATH = "/data/timing.db"
|
|
|
|
# Konfidens-terskel for automatisk logging
|
|
MIN_AUTO_CONFIDENCE = 0.75
|
|
|
|
VALID_SUFFIXES = {".jpg", ".jpeg", ".png"}
|
|
|
|
|
|
def _destination_path(source: Path, timestamp) -> Path:
|
|
"""
|
|
Bygg destinasjonssti: processed/<år>/<måned>/<uuid>_<originalfilnavn>
|
|
"""
|
|
year = timestamp.strftime("%Y")
|
|
month = timestamp.strftime("%m")
|
|
unique_name = f"{uuid.uuid4().hex}_{source.name}"
|
|
dest = PROCESSED_DIR / year / month / unique_name
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
return dest
|
|
|
|
|
|
async def process_image(path: Path) -> None:
|
|
"""
|
|
Behandle ett bilde: valider EXIF, kjør OCR, flytt fil, logg passering.
|
|
"""
|
|
if path.suffix.lower() not in VALID_SUFFIXES:
|
|
logger.debug("Ignorerer ikke-bilde: %s", path)
|
|
return
|
|
|
|
logger.info("Behandler: %s", path.name)
|
|
|
|
# --- EXIF-validering ---
|
|
try:
|
|
meta = parse_image(path)
|
|
except ExifError as e:
|
|
logger.warning("Ugyldig EXIF i %s: %s — avviser", path.name, e)
|
|
REJECTED_DIR.mkdir(parents=True, exist_ok=True)
|
|
shutil.move(str(path), str(REJECTED_DIR / path.name))
|
|
return
|
|
|
|
# --- OCR ---
|
|
bibs = read_all_bibs(path)
|
|
logger.debug("OCR: %d startnumre funnet", len(bibs))
|
|
|
|
# --- Flytt til processed/ ---
|
|
dest = _destination_path(path, meta.timestamp_utc)
|
|
shutil.move(str(path), str(dest))
|
|
logger.info("Flyttet til: %s", dest)
|
|
|
|
async with aiosqlite.connect(DB_PATH) as db:
|
|
db.row_factory = aiosqlite.Row
|
|
await init_db(db)
|
|
|
|
if not bibs:
|
|
# Ingen bib funnet — legg til manuell gjennomgang
|
|
await log_passage(
|
|
db,
|
|
profile_id=None,
|
|
bib_number=None,
|
|
station=meta.station or "unknown",
|
|
timestamp_utc=meta.timestamp_utc,
|
|
gps_lat=meta.gps_lat,
|
|
gps_lon=meta.gps_lon,
|
|
gps_alt=meta.gps_alt,
|
|
confidence=0.0,
|
|
proximity_score=0.0,
|
|
id_method="bib_ocr_uncertain",
|
|
source_image=str(dest),
|
|
needs_review=True,
|
|
review_note="number_unreadable",
|
|
)
|
|
logger.info("Passering logget (ingen bib): station=%s", meta.station)
|
|
else:
|
|
for ocr in bibs:
|
|
confidence = ocr.confidence
|
|
needs_review = confidence < MIN_AUTO_CONFIDENCE
|
|
id_method = "bib_ocr" if not needs_review else "bib_ocr_uncertain"
|
|
review_note = "low_confidence" if needs_review else None
|
|
|
|
profile_id = None
|
|
if ocr.digits and not needs_review:
|
|
profile_id = await get_or_create_athlete(db, ocr.digits)
|
|
|
|
await log_passage(
|
|
db,
|
|
profile_id=profile_id,
|
|
bib_number=ocr.digits,
|
|
station=meta.station or "unknown",
|
|
timestamp_utc=meta.timestamp_utc,
|
|
gps_lat=meta.gps_lat,
|
|
gps_lon=meta.gps_lon,
|
|
gps_alt=meta.gps_alt,
|
|
confidence=confidence,
|
|
proximity_score=ocr.proximity_score,
|
|
id_method=id_method,
|
|
source_image=str(dest),
|
|
needs_review=needs_review,
|
|
review_note=review_note,
|
|
)
|
|
logger.info(
|
|
"Passering logget: bib=%s station=%s needs_review=%s",
|
|
ocr.digits, meta.station, needs_review,
|
|
)
|
|
|
|
# Skriv alle funne bibs til EXIF-metadata i filen
|
|
found_bibs = [ocr.digits for ocr in bibs if ocr.digits]
|
|
if found_bibs:
|
|
write_bib_tags(dest, found_bibs, station=meta.station or "unknown")
|
|
|
|
|
|
async def process_image_with_override(
|
|
path: Path,
|
|
*,
|
|
race_id: Optional[str],
|
|
station_name: str,
|
|
gps_lat: Optional[float],
|
|
gps_lon: Optional[float],
|
|
gps_alt: Optional[float],
|
|
db,
|
|
) -> None:
|
|
"""
|
|
Behandle bilde med manuelt oppgitt stasjon og GPS (fra web-opplasting).
|
|
EXIF-tid brukes hvis tilgjengelig, ellers nåværende tidspunkt.
|
|
"""
|
|
from datetime import datetime, timezone
|
|
from profile_db import get_or_create_athlete as _get_or_create
|
|
|
|
logger.info("Web-opplasting: %s → stasjon=%s", path.name, station_name)
|
|
|
|
# Forsøk EXIF for tidsstempel, fallback til nå
|
|
try:
|
|
meta = parse_image(path)
|
|
timestamp = meta.timestamp_utc
|
|
except ExifError:
|
|
timestamp = datetime.now(timezone.utc)
|
|
|
|
bibs = read_all_bibs(path)
|
|
dest = _destination_path(path, timestamp)
|
|
shutil.move(str(path), str(dest))
|
|
|
|
if not bibs:
|
|
await log_passage(
|
|
db,
|
|
race_id=race_id,
|
|
profile_id=None,
|
|
bib_number=None,
|
|
station=station_name,
|
|
timestamp_utc=timestamp,
|
|
gps_lat=gps_lat or 0.0,
|
|
gps_lon=gps_lon or 0.0,
|
|
gps_alt=gps_alt,
|
|
confidence=0.0,
|
|
proximity_score=0.0,
|
|
id_method="bib_ocr_uncertain",
|
|
source_image=str(dest),
|
|
needs_review=True,
|
|
review_note="number_unreadable",
|
|
)
|
|
logger.info("Passering logget (ingen bib): station=%s", station_name)
|
|
else:
|
|
for ocr in bibs:
|
|
confidence = ocr.confidence
|
|
needs_review = confidence < MIN_AUTO_CONFIDENCE
|
|
id_method = "bib_ocr" if not needs_review else "bib_ocr_uncertain"
|
|
review_note = "low_confidence" if needs_review else None
|
|
|
|
profile_id = None
|
|
if ocr.digits and not needs_review:
|
|
profile_id = await _get_or_create(db, ocr.digits)
|
|
|
|
await log_passage(
|
|
db,
|
|
race_id=race_id,
|
|
profile_id=profile_id,
|
|
bib_number=ocr.digits,
|
|
station=station_name,
|
|
timestamp_utc=timestamp,
|
|
gps_lat=gps_lat or 0.0,
|
|
gps_lon=gps_lon or 0.0,
|
|
gps_alt=gps_alt,
|
|
confidence=confidence,
|
|
proximity_score=ocr.proximity_score,
|
|
id_method=id_method,
|
|
source_image=str(dest),
|
|
needs_review=needs_review,
|
|
review_note=review_note,
|
|
)
|
|
logger.info("Passering logget: bib=%s station=%s", ocr.digits, station_name)
|
|
|
|
# Skriv alle funne bibs til EXIF-metadata i filen
|
|
found_bibs = [ocr.digits for ocr in bibs if ocr.digits]
|
|
if found_bibs:
|
|
write_bib_tags(dest, found_bibs, station=station_name, race_id=race_id)
|
|
|
|
|
|
async def process_existing() -> None:
|
|
"""Behandle bilder som allerede ligger i depot/ ved oppstart."""
|
|
for path in sorted(DEPOT_DIR.glob("*")):
|
|
if path.is_file() and path.suffix.lower() in VALID_SUFFIXES:
|
|
await process_image(path)
|
|
|
|
|
|
class DepotHandler(FileSystemEventHandler):
|
|
"""Watchdog-handler: kaller process_image ved nye filer."""
|
|
|
|
def __init__(self, loop: asyncio.AbstractEventLoop):
|
|
self._loop = loop
|
|
|
|
def on_created(self, event: FileCreatedEvent):
|
|
if not event.is_directory:
|
|
path = Path(event.src_path)
|
|
asyncio.run_coroutine_threadsafe(process_image(path), self._loop)
|
|
|
|
|
|
async def watch_depot() -> None:
|
|
"""Start filsystem-overvåkning av depot/."""
|
|
loop = asyncio.get_running_loop()
|
|
handler = DepotHandler(loop)
|
|
observer = Observer()
|
|
observer.schedule(handler, str(DEPOT_DIR), recursive=False)
|
|
observer.start()
|
|
logger.info("Overvåker depot: %s", DEPOT_DIR)
|
|
|
|
try:
|
|
while True:
|
|
await asyncio.sleep(1)
|
|
finally:
|
|
observer.stop()
|
|
observer.join()
|
|
|
|
|
|
async def main() -> None:
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|
await process_existing()
|
|
await watch_depot()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|