Files
timing/backend/ingest.py
T
steinhelge 018f84efd8
Build & Deploy / build-and-deploy (push) Successful in 45s
Auto-create athlete in startlist for unrecognized bib numbers
When a bib number is detected (via OCR or manual entry during review)
but not found in the start list, it is now automatically added with
the placeholder name "Ukjent #<nr>" instead of being left without a
profile_id (which would exclude it from results).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 17:51:03 +01:00

230 lines
6.7 KiB
Python

"""
Bildehåndtering:
- Overvåk depot/-katalogen for nye bilder
- Valider EXIF
- Kjør OCR
- Flytt til processed/ med unikt filnavn
- Logg passering til DB
Kan kjøres som egen prosess (python ingest.py) eller importeres av API.
"""
import asyncio
import logging
import shutil
import uuid
from pathlib import Path
from typing import Optional
import aiosqlite
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
from watchdog.observers import Observer
from exif_parser import ExifError, parse_image
from ocr import read_bib
from passage_log import log_passage
from profile_db import get_or_create_athlete, init_db
logger = logging.getLogger(__name__)
DEPOT_DIR = Path("/depot")
PROCESSED_DIR = Path("/processed")
REJECTED_DIR = DEPOT_DIR / "rejected"
DB_PATH = "/data/timing.db"
# Konfidens-terskel for automatisk logging
MIN_AUTO_CONFIDENCE = 0.75
VALID_SUFFIXES = {".jpg", ".jpeg", ".png"}
def _destination_path(source: Path, timestamp) -> Path:
"""
Bygg destinasjonssti: processed/<år>/<måned>/<uuid>_<originalfilnavn>
"""
year = timestamp.strftime("%Y")
month = timestamp.strftime("%m")
unique_name = f"{uuid.uuid4().hex}_{source.name}"
dest = PROCESSED_DIR / year / month / unique_name
dest.parent.mkdir(parents=True, exist_ok=True)
return dest
async def process_image(path: Path) -> None:
"""
Behandle ett bilde: valider EXIF, kjør OCR, flytt fil, logg passering.
"""
if path.suffix.lower() not in VALID_SUFFIXES:
logger.debug("Ignorerer ikke-bilde: %s", path)
return
logger.info("Behandler: %s", path.name)
# --- EXIF-validering ---
try:
meta = parse_image(path)
except ExifError as e:
logger.warning("Ugyldig EXIF i %s: %s — avviser", path.name, e)
REJECTED_DIR.mkdir(parents=True, exist_ok=True)
shutil.move(str(path), str(REJECTED_DIR / path.name))
return
# --- OCR ---
ocr = read_bib(path)
logger.debug("OCR: digits=%s conf=%.2f", ocr.digits, ocr.confidence)
# --- Flytt til processed/ ---
dest = _destination_path(path, meta.timestamp_utc)
shutil.move(str(path), str(dest))
logger.info("Flyttet til: %s", dest)
# --- Bestem konfidens og review-flagg ---
confidence = ocr.confidence
needs_review = False
review_note = None
id_method = "bib_ocr"
if ocr.digits is None or confidence < MIN_AUTO_CONFIDENCE:
needs_review = True
review_note = "number_unreadable" if ocr.digits is None else "low_confidence"
id_method = "bib_ocr_uncertain"
# --- Koble mot profil-DB ---
profile_id = None
bib_number = ocr.digits
async with aiosqlite.connect(DB_PATH) as db:
db.row_factory = aiosqlite.Row
await init_db(db)
if bib_number and not needs_review:
profile_id = await get_or_create_athlete(db, bib_number)
await log_passage(
db,
profile_id=profile_id,
bib_number=bib_number,
station=meta.station or "unknown",
timestamp_utc=meta.timestamp_utc,
gps_lat=meta.gps_lat,
gps_lon=meta.gps_lon,
gps_alt=meta.gps_alt,
confidence=confidence,
proximity_score=ocr.proximity_score,
id_method=id_method,
source_image=str(dest),
needs_review=needs_review,
review_note=review_note,
)
logger.info(
"Passering logget: bib=%s station=%s needs_review=%s",
bib_number, meta.station, needs_review,
)
async def process_image_with_override(
path: Path,
*,
race_id: Optional[str],
station_name: str,
gps_lat: Optional[float],
gps_lon: Optional[float],
gps_alt: Optional[float],
db,
) -> None:
"""
Behandle bilde med manuelt oppgitt stasjon og GPS (fra web-opplasting).
EXIF-tid brukes hvis tilgjengelig, ellers nåværende tidspunkt.
"""
from datetime import datetime, timezone
from profile_db import get_or_create_athlete as _get_or_create
logger.info("Web-opplasting: %s → stasjon=%s", path.name, station_name)
# Forsøk EXIF for tidsstempel, fallback til nå
try:
meta = parse_image(path)
timestamp = meta.timestamp_utc
except ExifError:
timestamp = datetime.now(timezone.utc)
ocr = read_bib(path)
dest = _destination_path(path, timestamp)
shutil.move(str(path), str(dest))
confidence = ocr.confidence
needs_review = ocr.digits is None or confidence < MIN_AUTO_CONFIDENCE
id_method = "bib_ocr" if not needs_review else "bib_ocr_uncertain"
review_note = None if not needs_review else (
"number_unreadable" if ocr.digits is None else "low_confidence"
)
profile_id = None
if ocr.digits and not needs_review:
profile_id = await _get_or_create(db, ocr.digits)
await log_passage(
db,
race_id=race_id,
profile_id=profile_id,
bib_number=ocr.digits,
station=station_name,
timestamp_utc=timestamp,
gps_lat=gps_lat or 0.0,
gps_lon=gps_lon or 0.0,
gps_alt=gps_alt,
confidence=confidence,
proximity_score=ocr.proximity_score,
id_method=id_method,
source_image=str(dest),
needs_review=needs_review,
review_note=review_note,
)
logger.info("Passering logget: bib=%s station=%s", ocr.digits, station_name)
async def process_existing() -> None:
"""Behandle bilder som allerede ligger i depot/ ved oppstart."""
for path in sorted(DEPOT_DIR.glob("*")):
if path.is_file() and path.suffix.lower() in VALID_SUFFIXES:
await process_image(path)
class DepotHandler(FileSystemEventHandler):
"""Watchdog-handler: kaller process_image ved nye filer."""
def __init__(self, loop: asyncio.AbstractEventLoop):
self._loop = loop
def on_created(self, event: FileCreatedEvent):
if not event.is_directory:
path = Path(event.src_path)
asyncio.run_coroutine_threadsafe(process_image(path), self._loop)
async def watch_depot() -> None:
"""Start filsystem-overvåkning av depot/."""
loop = asyncio.get_running_loop()
handler = DepotHandler(loop)
observer = Observer()
observer.schedule(handler, str(DEPOT_DIR), recursive=False)
observer.start()
logger.info("Overvåker depot: %s", DEPOT_DIR)
try:
while True:
await asyncio.sleep(1)
finally:
observer.stop()
observer.join()
async def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
await process_existing()
await watch_depot()
if __name__ == "__main__":
asyncio.run(main())