Files
steinhelge 45f7a77171
Build & Deploy / build-and-deploy (push) Successful in 46s
Støtte for flere bibs per bilde, EXIF-metadata og zoom i gjennomgang
- OCR: ny read_all_bibs() returnerer alle unike startnumre (≥2 sifre) per bilde
- Ingest: oppretter én passering per bib (ikke bare beste), ingen bib → needs_review
- image_tagger.py: skriv/les bib-metadata som JSON i EXIF UserComment (piexif)
- Ingest + resolve: tagger bildefilen med bibs automatisk og ved manuell bekreftelse
- API: POST /api/passages/{id}/reanalyze — re-kjør OCR på eksisterende bilde
- API: POST /api/passages/{id}/resolve oppdaterer nå EXIF med bekreftet bib
- races: ny kolonne bib_filter_enabled (med automatisk migrering) + per-løp toggle
- ReviewPage: Re-analyser-knapp og klikk-for-zoom med scroll/drag

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-22 09:01:51 +01:00

272 lines
8.7 KiB
Python

"""
Bildehåndtering:
- Overvåk depot/-katalogen for nye bilder
- Valider EXIF
- Kjør OCR
- Flytt til processed/ med unikt filnavn
- Logg passering til DB
Kan kjøres som egen prosess (python ingest.py) eller importeres av API.
"""
import asyncio
import logging
import shutil
import uuid
from pathlib import Path
from typing import Optional
import aiosqlite
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
from watchdog.observers import Observer
from exif_parser import ExifError, parse_image
from image_tagger import write_bib_tags
from ocr import read_all_bibs
from passage_log import log_passage
from profile_db import get_or_create_athlete, init_db
logger = logging.getLogger(__name__)
DEPOT_DIR = Path("/depot")
PROCESSED_DIR = Path("/processed")
REJECTED_DIR = DEPOT_DIR / "rejected"
DB_PATH = "/data/timing.db"
# Konfidens-terskel for automatisk logging
MIN_AUTO_CONFIDENCE = 0.75
VALID_SUFFIXES = {".jpg", ".jpeg", ".png"}
def _destination_path(source: Path, timestamp) -> Path:
"""
Bygg destinasjonssti: processed/<år>/<måned>/<uuid>_<originalfilnavn>
"""
year = timestamp.strftime("%Y")
month = timestamp.strftime("%m")
unique_name = f"{uuid.uuid4().hex}_{source.name}"
dest = PROCESSED_DIR / year / month / unique_name
dest.parent.mkdir(parents=True, exist_ok=True)
return dest
async def process_image(path: Path) -> None:
"""
Behandle ett bilde: valider EXIF, kjør OCR, flytt fil, logg passering.
"""
if path.suffix.lower() not in VALID_SUFFIXES:
logger.debug("Ignorerer ikke-bilde: %s", path)
return
logger.info("Behandler: %s", path.name)
# --- EXIF-validering ---
try:
meta = parse_image(path)
except ExifError as e:
logger.warning("Ugyldig EXIF i %s: %s — avviser", path.name, e)
REJECTED_DIR.mkdir(parents=True, exist_ok=True)
shutil.move(str(path), str(REJECTED_DIR / path.name))
return
# --- OCR ---
bibs = read_all_bibs(path)
logger.debug("OCR: %d startnumre funnet", len(bibs))
# --- Flytt til processed/ ---
dest = _destination_path(path, meta.timestamp_utc)
shutil.move(str(path), str(dest))
logger.info("Flyttet til: %s", dest)
async with aiosqlite.connect(DB_PATH) as db:
db.row_factory = aiosqlite.Row
await init_db(db)
if not bibs:
# Ingen bib funnet — legg til manuell gjennomgang
await log_passage(
db,
profile_id=None,
bib_number=None,
station=meta.station or "unknown",
timestamp_utc=meta.timestamp_utc,
gps_lat=meta.gps_lat,
gps_lon=meta.gps_lon,
gps_alt=meta.gps_alt,
confidence=0.0,
proximity_score=0.0,
id_method="bib_ocr_uncertain",
source_image=str(dest),
needs_review=True,
review_note="number_unreadable",
)
logger.info("Passering logget (ingen bib): station=%s", meta.station)
else:
for ocr in bibs:
confidence = ocr.confidence
needs_review = confidence < MIN_AUTO_CONFIDENCE
id_method = "bib_ocr" if not needs_review else "bib_ocr_uncertain"
review_note = "low_confidence" if needs_review else None
profile_id = None
if ocr.digits and not needs_review:
profile_id = await get_or_create_athlete(db, ocr.digits)
await log_passage(
db,
profile_id=profile_id,
bib_number=ocr.digits,
station=meta.station or "unknown",
timestamp_utc=meta.timestamp_utc,
gps_lat=meta.gps_lat,
gps_lon=meta.gps_lon,
gps_alt=meta.gps_alt,
confidence=confidence,
proximity_score=ocr.proximity_score,
id_method=id_method,
source_image=str(dest),
needs_review=needs_review,
review_note=review_note,
)
logger.info(
"Passering logget: bib=%s station=%s needs_review=%s",
ocr.digits, meta.station, needs_review,
)
# Skriv alle funne bibs til EXIF-metadata i filen
found_bibs = [ocr.digits for ocr in bibs if ocr.digits]
if found_bibs:
write_bib_tags(dest, found_bibs, station=meta.station or "unknown")
async def process_image_with_override(
path: Path,
*,
race_id: Optional[str],
station_name: str,
gps_lat: Optional[float],
gps_lon: Optional[float],
gps_alt: Optional[float],
db,
) -> None:
"""
Behandle bilde med manuelt oppgitt stasjon og GPS (fra web-opplasting).
EXIF-tid brukes hvis tilgjengelig, ellers nåværende tidspunkt.
"""
from datetime import datetime, timezone
from profile_db import get_or_create_athlete as _get_or_create
logger.info("Web-opplasting: %s → stasjon=%s", path.name, station_name)
# Forsøk EXIF for tidsstempel, fallback til nå
try:
meta = parse_image(path)
timestamp = meta.timestamp_utc
except ExifError:
timestamp = datetime.now(timezone.utc)
bibs = read_all_bibs(path)
dest = _destination_path(path, timestamp)
shutil.move(str(path), str(dest))
if not bibs:
await log_passage(
db,
race_id=race_id,
profile_id=None,
bib_number=None,
station=station_name,
timestamp_utc=timestamp,
gps_lat=gps_lat or 0.0,
gps_lon=gps_lon or 0.0,
gps_alt=gps_alt,
confidence=0.0,
proximity_score=0.0,
id_method="bib_ocr_uncertain",
source_image=str(dest),
needs_review=True,
review_note="number_unreadable",
)
logger.info("Passering logget (ingen bib): station=%s", station_name)
else:
for ocr in bibs:
confidence = ocr.confidence
needs_review = confidence < MIN_AUTO_CONFIDENCE
id_method = "bib_ocr" if not needs_review else "bib_ocr_uncertain"
review_note = "low_confidence" if needs_review else None
profile_id = None
if ocr.digits and not needs_review:
profile_id = await _get_or_create(db, ocr.digits)
await log_passage(
db,
race_id=race_id,
profile_id=profile_id,
bib_number=ocr.digits,
station=station_name,
timestamp_utc=timestamp,
gps_lat=gps_lat or 0.0,
gps_lon=gps_lon or 0.0,
gps_alt=gps_alt,
confidence=confidence,
proximity_score=ocr.proximity_score,
id_method=id_method,
source_image=str(dest),
needs_review=needs_review,
review_note=review_note,
)
logger.info("Passering logget: bib=%s station=%s", ocr.digits, station_name)
# Skriv alle funne bibs til EXIF-metadata i filen
found_bibs = [ocr.digits for ocr in bibs if ocr.digits]
if found_bibs:
write_bib_tags(dest, found_bibs, station=station_name, race_id=race_id)
async def process_existing() -> None:
"""Behandle bilder som allerede ligger i depot/ ved oppstart."""
for path in sorted(DEPOT_DIR.glob("*")):
if path.is_file() and path.suffix.lower() in VALID_SUFFIXES:
await process_image(path)
class DepotHandler(FileSystemEventHandler):
"""Watchdog-handler: kaller process_image ved nye filer."""
def __init__(self, loop: asyncio.AbstractEventLoop):
self._loop = loop
def on_created(self, event: FileCreatedEvent):
if not event.is_directory:
path = Path(event.src_path)
asyncio.run_coroutine_threadsafe(process_image(path), self._loop)
async def watch_depot() -> None:
"""Start filsystem-overvåkning av depot/."""
loop = asyncio.get_running_loop()
handler = DepotHandler(loop)
observer = Observer()
observer.schedule(handler, str(DEPOT_DIR), recursive=False)
observer.start()
logger.info("Overvåker depot: %s", DEPOT_DIR)
try:
while True:
await asyncio.sleep(1)
finally:
observer.stop()
observer.join()
async def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
await process_existing()
await watch_depot()
if __name__ == "__main__":
asyncio.run(main())