diff --git a/backend/ingest.py b/backend/ingest.py index 3be0875..2f44f49 100644 --- a/backend/ingest.py +++ b/backend/ingest.py @@ -113,6 +113,7 @@ async def process_image(path: Path) -> None: gps_lon=meta.gps_lon, gps_alt=meta.gps_alt, confidence=confidence, + proximity_score=ocr.proximity_score, id_method=id_method, source_image=str(dest), needs_review=needs_review, diff --git a/backend/ocr.py b/backend/ocr.py index 333a855..a338270 100644 --- a/backend/ocr.py +++ b/backend/ocr.py @@ -29,6 +29,7 @@ class OcrResult: digits: Optional[str] # Gjenkjente sifre, f.eks. "42", None hvis ingen confidence: float # 0.0–1.0 partial: bool # True hvis nummeret trolig er delvis skjult + proximity_score: float = 0.0 # Areal av detektert bib-boks (px²) — større = nærmere kamera raw_texts: list[str] = field(default_factory=list) # Alle OCR-treff for debug @@ -56,28 +57,34 @@ def _preprocess(image_path: Path) -> np.ndarray: return enhanced -def _extract_bib_number(texts: list[tuple]) -> tuple[Optional[str], float, bool]: +def _bbox_area(bbox) -> float: + """Beregn areal av EasyOCR bounding box [[x1,y1],[x2,y2],[x3,y3],[x4,y4]].""" + xs = [p[0] for p in bbox] + ys = [p[1] for p in bbox] + return (max(xs) - min(xs)) * (max(ys) - min(ys)) + + +def _extract_bib_number(texts: list[tuple]) -> tuple[Optional[str], float, bool, float]: """ Finn beste siffersekvens blant OCR-treff. - Returnerer (sifre, konfidens, partial). + Returnerer (sifre, konfidens, partial, proximity_score). + proximity_score = areal av bounding box i piksler² (større = nærmere kamera). """ candidates = [] - for (_, text, conf) in texts: - # Behold kun sifre + for (bbox, text, conf) in texts: digits = re.sub(r"[^0-9]", "", text) if digits: - candidates.append((digits, float(conf))) + candidates.append((digits, float(conf), _bbox_area(bbox))) if not candidates: - return None, 0.0, False + return None, 0.0, False, 0.0 # Velg kandidat med høyest konfidens - best_digits, best_conf = max(candidates, key=lambda x: x[1]) + best_digits, best_conf, best_area = max(candidates, key=lambda x: x[1]) - # Heuristikk: 1–2 sifre kan tyde på delvis synlig nummer partial = len(best_digits) < 2 - return best_digits, best_conf, partial + return best_digits, best_conf, partial, best_area def read_bib(image_path: Path) -> OcrResult: @@ -91,12 +98,13 @@ def read_bib(image_path: Path) -> OcrResult: results = reader.readtext(processed, detail=1, paragraph=False) raw_texts = [text for (_, text, _) in results] - digits, confidence, partial = _extract_bib_number(results) + digits, confidence, partial, proximity_score = _extract_bib_number(results) return OcrResult( digits=digits, confidence=confidence, partial=partial, + proximity_score=proximity_score, raw_texts=raw_texts, ) except Exception as e: diff --git a/backend/passage_log.py b/backend/passage_log.py index 57fc83f..86c8de8 100644 --- a/backend/passage_log.py +++ b/backend/passage_log.py @@ -3,11 +3,43 @@ Skriv og query passeringslogg i SQLite. """ import uuid -from datetime import datetime +from datetime import datetime, timedelta, timezone +from pathlib import Path from typing import Optional import aiosqlite +# Tidsvindu for deduplisering: bilder av samme utøver ved samme stasjon +DEDUP_WINDOW_SECONDS = 2 + + +async def _find_duplicate( + db: aiosqlite.Connection, + bib_number: str, + station: str, + timestamp_utc: datetime, +) -> Optional[dict]: + """ + Finn eksisterende passering med samme bib og stasjon innen DEDUP_WINDOW_SECONDS. + Returnerer raden, eller None. + """ + window_start = (timestamp_utc - timedelta(seconds=DEDUP_WINDOW_SECONDS)).isoformat() + window_end = (timestamp_utc + timedelta(seconds=DEDUP_WINDOW_SECONDS)).isoformat() + + async with db.execute( + """ + SELECT passage_id, proximity_score, source_image + FROM passages + WHERE bib_number = ? AND station = ? + AND timestamp_utc BETWEEN ? AND ? + ORDER BY proximity_score DESC + LIMIT 1 + """, + (bib_number, station, window_start, window_end), + ) as cur: + row = await cur.fetchone() + return dict(row) if row else None + async def log_passage( db: aiosqlite.Connection, @@ -20,21 +52,61 @@ async def log_passage( gps_lon: float, gps_alt: Optional[float], confidence: float, + proximity_score: float = 0.0, id_method: str, source_image: str, needs_review: bool = False, review_note: Optional[str] = None, ) -> str: - """Logg én passering. Returnerer passage_id.""" + """ + Logg én passering med deduplisering. + + Hvis et bilde av samme utøver ved samme stasjon allerede er logget innen + DEDUP_WINDOW_SECONDS, beholder vi bildet nærmest kamera (høyest proximity_score) + ettersom det gir det mest nøyaktige tidsstempelet. + + Returnerer passage_id (enten ny eller eksisterende). + """ + if bib_number: + duplicate = await _find_duplicate(db, bib_number, station, timestamp_utc) + if duplicate: + if proximity_score > duplicate["proximity_score"]: + # Nytt bilde er nærmere kamera — oppdater tidsstempel og bildesti + old_image = duplicate["source_image"] + await db.execute( + """ + UPDATE passages + SET timestamp_utc = ?, proximity_score = ?, + source_image = ?, confidence = ?, id_method = ? + WHERE passage_id = ? + """, + ( + timestamp_utc.isoformat(), + proximity_score, + source_image, + confidence, + id_method, + duplicate["passage_id"], + ), + ) + await db.commit() + # Slett det gamle, dårligere bildet + _delete_image_file(old_image) + return duplicate["passage_id"] + else: + # Eksisterende bilde er nærmere kamera — forkast nytt bilde + _delete_image_file(source_image) + return duplicate["passage_id"] + passage_id = str(uuid.uuid4()) await db.execute( """ INSERT INTO passages ( passage_id, profile_id, bib_number, station, timestamp_utc, gps_lat, gps_lon, gps_alt, - confidence, id_method, source_image, + confidence, proximity_score, id_method, source_image, needs_review, review_note - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( passage_id, @@ -46,6 +118,7 @@ async def log_passage( gps_lon, gps_alt, confidence, + proximity_score, id_method, source_image, int(needs_review), @@ -56,6 +129,15 @@ async def log_passage( return passage_id +def _delete_image_file(path: str) -> None: + """Slett bildefil stille — logg advarsel ved feil.""" + import logging + try: + Path(path).unlink(missing_ok=True) + except Exception as e: + logging.getLogger(__name__).warning("Kunne ikke slette duplikatbilde %s: %s", path, e) + + async def get_passages( db: aiosqlite.Connection, profile_id: Optional[str] = None, diff --git a/backend/profile_db.py b/backend/profile_db.py index 2aa4d8f..ba4bdc1 100644 --- a/backend/profile_db.py +++ b/backend/profile_db.py @@ -24,20 +24,21 @@ async def init_db(db: aiosqlite.Connection) -> None: ); CREATE TABLE IF NOT EXISTS passages ( - passage_id TEXT PRIMARY KEY, - profile_id TEXT REFERENCES athletes(profile_id), - bib_number TEXT, - station TEXT NOT NULL, - timestamp_utc TEXT NOT NULL, - gps_lat REAL, - gps_lon REAL, - gps_alt REAL, - confidence REAL, - id_method TEXT, - source_image TEXT, - needs_review INTEGER NOT NULL DEFAULT 0, - review_note TEXT, - created_at TEXT NOT NULL DEFAULT (datetime('now')) + passage_id TEXT PRIMARY KEY, + profile_id TEXT REFERENCES athletes(profile_id), + bib_number TEXT, + station TEXT NOT NULL, + timestamp_utc TEXT NOT NULL, + gps_lat REAL, + gps_lon REAL, + gps_alt REAL, + confidence REAL, + proximity_score REAL NOT NULL DEFAULT 0, + id_method TEXT, + source_image TEXT, + needs_review INTEGER NOT NULL DEFAULT 0, + review_note TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) ); CREATE INDEX IF NOT EXISTS idx_passages_profile ON passages(profile_id);