Files
timing/backend/ingest.py
T
steinhelge 5393e85a74
Build & Deploy / build-and-deploy (push) Successful in 2m18s
Add race and station management
- races table: name, date, description, is_active
- stations table: ordered checkpoints with GPS per race
- New /api/races and /api/races/{id}/stations endpoints
- Upload now requires race + station selection; uses station GPS
  so images without GPS EXIF are accepted
- passages filtered by active race throughout
- RacePage: create races, manage stations (add/edit/delete checkpoints)
- Navbar shows active race name
- Start and finish stations created automatically per race

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 09:44:45 +01:00

236 lines
6.8 KiB
Python

"""
Bildehåndtering:
- Overvåk depot/-katalogen for nye bilder
- Valider EXIF
- Kjør OCR
- Flytt til processed/ med unikt filnavn
- Logg passering til DB
Kan kjøres som egen prosess (python ingest.py) eller importeres av API.
"""
import asyncio
import logging
import shutil
import uuid
from pathlib import Path
from typing import Optional
import aiosqlite
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
from watchdog.observers import Observer
from exif_parser import ExifError, parse_image
from ocr import read_bib
from passage_log import log_passage
from profile_db import get_athlete_by_bib, init_db
logger = logging.getLogger(__name__)
DEPOT_DIR = Path("/depot")
PROCESSED_DIR = Path("/processed")
REJECTED_DIR = DEPOT_DIR / "rejected"
DB_PATH = "/data/timing.db"
# Konfidens-terskel for automatisk logging
MIN_AUTO_CONFIDENCE = 0.75
VALID_SUFFIXES = {".jpg", ".jpeg", ".png"}
def _destination_path(source: Path, timestamp) -> Path:
"""
Bygg destinasjonssti: processed/<år>/<måned>/<uuid>_<originalfilnavn>
"""
year = timestamp.strftime("%Y")
month = timestamp.strftime("%m")
unique_name = f"{uuid.uuid4().hex}_{source.name}"
dest = PROCESSED_DIR / year / month / unique_name
dest.parent.mkdir(parents=True, exist_ok=True)
return dest
async def process_image(path: Path) -> None:
"""
Behandle ett bilde: valider EXIF, kjør OCR, flytt fil, logg passering.
"""
if path.suffix.lower() not in VALID_SUFFIXES:
logger.debug("Ignorerer ikke-bilde: %s", path)
return
logger.info("Behandler: %s", path.name)
# --- EXIF-validering ---
try:
meta = parse_image(path)
except ExifError as e:
logger.warning("Ugyldig EXIF i %s: %s — avviser", path.name, e)
REJECTED_DIR.mkdir(parents=True, exist_ok=True)
shutil.move(str(path), str(REJECTED_DIR / path.name))
return
# --- OCR ---
ocr = read_bib(path)
logger.debug("OCR: digits=%s conf=%.2f", ocr.digits, ocr.confidence)
# --- Flytt til processed/ ---
dest = _destination_path(path, meta.timestamp_utc)
shutil.move(str(path), str(dest))
logger.info("Flyttet til: %s", dest)
# --- Bestem konfidens og review-flagg ---
confidence = ocr.confidence
needs_review = False
review_note = None
id_method = "bib_ocr"
if ocr.digits is None or confidence < MIN_AUTO_CONFIDENCE:
needs_review = True
review_note = "number_unreadable" if ocr.digits is None else "low_confidence"
id_method = "bib_ocr_uncertain"
# --- Koble mot profil-DB ---
profile_id = None
bib_number = ocr.digits
async with aiosqlite.connect(DB_PATH) as db:
db.row_factory = aiosqlite.Row
await init_db(db)
if bib_number and not needs_review:
athlete = await get_athlete_by_bib(db, bib_number)
if athlete:
profile_id = athlete["profile_id"]
else:
logger.debug("Ukjent startnummer: %s", bib_number)
await log_passage(
db,
profile_id=profile_id,
bib_number=bib_number,
station=meta.station or "unknown",
timestamp_utc=meta.timestamp_utc,
gps_lat=meta.gps_lat,
gps_lon=meta.gps_lon,
gps_alt=meta.gps_alt,
confidence=confidence,
proximity_score=ocr.proximity_score,
id_method=id_method,
source_image=str(dest),
needs_review=needs_review,
review_note=review_note,
)
logger.info(
"Passering logget: bib=%s station=%s needs_review=%s",
bib_number, meta.station, needs_review,
)
async def process_image_with_override(
path: Path,
*,
race_id: Optional[str],
station_name: str,
gps_lat: Optional[float],
gps_lon: Optional[float],
gps_alt: Optional[float],
db,
) -> None:
"""
Behandle bilde med manuelt oppgitt stasjon og GPS (fra web-opplasting).
EXIF-tid brukes hvis tilgjengelig, ellers nåværende tidspunkt.
"""
from datetime import datetime, timezone
from profile_db import get_athlete_by_bib
logger.info("Web-opplasting: %s → stasjon=%s", path.name, station_name)
# Forsøk EXIF for tidsstempel, fallback til nå
try:
meta = parse_image(path)
timestamp = meta.timestamp_utc
except ExifError:
timestamp = datetime.now(timezone.utc)
ocr = read_bib(path)
dest = _destination_path(path, timestamp)
shutil.move(str(path), str(dest))
confidence = ocr.confidence
needs_review = ocr.digits is None or confidence < MIN_AUTO_CONFIDENCE
id_method = "bib_ocr" if not needs_review else "bib_ocr_uncertain"
review_note = None if not needs_review else (
"number_unreadable" if ocr.digits is None else "low_confidence"
)
profile_id = None
if ocr.digits and not needs_review:
athlete = await get_athlete_by_bib(db, ocr.digits)
if athlete:
profile_id = athlete["profile_id"]
await log_passage(
db,
race_id=race_id,
profile_id=profile_id,
bib_number=ocr.digits,
station=station_name,
timestamp_utc=timestamp,
gps_lat=gps_lat or 0.0,
gps_lon=gps_lon or 0.0,
gps_alt=gps_alt,
confidence=confidence,
proximity_score=ocr.proximity_score,
id_method=id_method,
source_image=str(dest),
needs_review=needs_review,
review_note=review_note,
)
logger.info("Passering logget: bib=%s station=%s", ocr.digits, station_name)
async def process_existing() -> None:
"""Behandle bilder som allerede ligger i depot/ ved oppstart."""
for path in sorted(DEPOT_DIR.glob("*")):
if path.is_file() and path.suffix.lower() in VALID_SUFFIXES:
await process_image(path)
class DepotHandler(FileSystemEventHandler):
"""Watchdog-handler: kaller process_image ved nye filer."""
def __init__(self, loop: asyncio.AbstractEventLoop):
self._loop = loop
def on_created(self, event: FileCreatedEvent):
if not event.is_directory:
path = Path(event.src_path)
asyncio.run_coroutine_threadsafe(process_image(path), self._loop)
async def watch_depot() -> None:
"""Start filsystem-overvåkning av depot/."""
loop = asyncio.get_running_loop()
handler = DepotHandler(loop)
observer = Observer()
observer.schedule(handler, str(DEPOT_DIR), recursive=False)
observer.start()
logger.info("Overvåker depot: %s", DEPOT_DIR)
try:
while True:
await asyncio.sleep(1)
finally:
observer.stop()
observer.join()
async def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
await process_existing()
await watch_depot()
if __name__ == "__main__":
asyncio.run(main())