Add deduplication: keep image closest to camera within 2s window
Build & Deploy / build-and-deploy (push) Has been cancelled

Within a burst sequence from the same station, the image where the athlete
is physically closest to the camera gives the most accurate passage timestamp.
Proximity is measured by bib bounding box area (larger = closer).

When a duplicate is detected:
- New image closer: update timestamp + image path, delete old image
- Existing image closer: discard new image

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-20 15:22:57 +01:00
parent b191b08579
commit 24645dfd11
4 changed files with 120 additions and 28 deletions
+1
View File
@@ -113,6 +113,7 @@ async def process_image(path: Path) -> None:
gps_lon=meta.gps_lon,
gps_alt=meta.gps_alt,
confidence=confidence,
proximity_score=ocr.proximity_score,
id_method=id_method,
source_image=str(dest),
needs_review=needs_review,
+18 -10
View File
@@ -29,6 +29,7 @@ class OcrResult:
digits: Optional[str] # Gjenkjente sifre, f.eks. "42", None hvis ingen
confidence: float # 0.01.0
partial: bool # True hvis nummeret trolig er delvis skjult
proximity_score: float = 0.0 # Areal av detektert bib-boks (px²) — større = nærmere kamera
raw_texts: list[str] = field(default_factory=list) # Alle OCR-treff for debug
@@ -56,28 +57,34 @@ def _preprocess(image_path: Path) -> np.ndarray:
return enhanced
def _extract_bib_number(texts: list[tuple]) -> tuple[Optional[str], float, bool]:
def _bbox_area(bbox) -> float:
"""Beregn areal av EasyOCR bounding box [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]."""
xs = [p[0] for p in bbox]
ys = [p[1] for p in bbox]
return (max(xs) - min(xs)) * (max(ys) - min(ys))
def _extract_bib_number(texts: list[tuple]) -> tuple[Optional[str], float, bool, float]:
"""
Finn beste siffersekvens blant OCR-treff.
Returnerer (sifre, konfidens, partial).
Returnerer (sifre, konfidens, partial, proximity_score).
proximity_score = areal av bounding box i piksler² (større = nærmere kamera).
"""
candidates = []
for (_, text, conf) in texts:
# Behold kun sifre
for (bbox, text, conf) in texts:
digits = re.sub(r"[^0-9]", "", text)
if digits:
candidates.append((digits, float(conf)))
candidates.append((digits, float(conf), _bbox_area(bbox)))
if not candidates:
return None, 0.0, False
return None, 0.0, False, 0.0
# Velg kandidat med høyest konfidens
best_digits, best_conf = max(candidates, key=lambda x: x[1])
best_digits, best_conf, best_area = max(candidates, key=lambda x: x[1])
# Heuristikk: 12 sifre kan tyde på delvis synlig nummer
partial = len(best_digits) < 2
return best_digits, best_conf, partial
return best_digits, best_conf, partial, best_area
def read_bib(image_path: Path) -> OcrResult:
@@ -91,12 +98,13 @@ def read_bib(image_path: Path) -> OcrResult:
results = reader.readtext(processed, detail=1, paragraph=False)
raw_texts = [text for (_, text, _) in results]
digits, confidence, partial = _extract_bib_number(results)
digits, confidence, partial, proximity_score = _extract_bib_number(results)
return OcrResult(
digits=digits,
confidence=confidence,
partial=partial,
proximity_score=proximity_score,
raw_texts=raw_texts,
)
except Exception as e:
+86 -4
View File
@@ -3,11 +3,43 @@ Skriv og query passeringslogg i SQLite.
"""
import uuid
from datetime import datetime
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Optional
import aiosqlite
# Tidsvindu for deduplisering: bilder av samme utøver ved samme stasjon
DEDUP_WINDOW_SECONDS = 2
async def _find_duplicate(
db: aiosqlite.Connection,
bib_number: str,
station: str,
timestamp_utc: datetime,
) -> Optional[dict]:
"""
Finn eksisterende passering med samme bib og stasjon innen DEDUP_WINDOW_SECONDS.
Returnerer raden, eller None.
"""
window_start = (timestamp_utc - timedelta(seconds=DEDUP_WINDOW_SECONDS)).isoformat()
window_end = (timestamp_utc + timedelta(seconds=DEDUP_WINDOW_SECONDS)).isoformat()
async with db.execute(
"""
SELECT passage_id, proximity_score, source_image
FROM passages
WHERE bib_number = ? AND station = ?
AND timestamp_utc BETWEEN ? AND ?
ORDER BY proximity_score DESC
LIMIT 1
""",
(bib_number, station, window_start, window_end),
) as cur:
row = await cur.fetchone()
return dict(row) if row else None
async def log_passage(
db: aiosqlite.Connection,
@@ -20,21 +52,61 @@ async def log_passage(
gps_lon: float,
gps_alt: Optional[float],
confidence: float,
proximity_score: float = 0.0,
id_method: str,
source_image: str,
needs_review: bool = False,
review_note: Optional[str] = None,
) -> str:
"""Logg én passering. Returnerer passage_id."""
"""
Logg én passering med deduplisering.
Hvis et bilde av samme utøver ved samme stasjon allerede er logget innen
DEDUP_WINDOW_SECONDS, beholder vi bildet nærmest kamera (høyest proximity_score)
ettersom det gir det mest nøyaktige tidsstempelet.
Returnerer passage_id (enten ny eller eksisterende).
"""
if bib_number:
duplicate = await _find_duplicate(db, bib_number, station, timestamp_utc)
if duplicate:
if proximity_score > duplicate["proximity_score"]:
# Nytt bilde er nærmere kamera — oppdater tidsstempel og bildesti
old_image = duplicate["source_image"]
await db.execute(
"""
UPDATE passages
SET timestamp_utc = ?, proximity_score = ?,
source_image = ?, confidence = ?, id_method = ?
WHERE passage_id = ?
""",
(
timestamp_utc.isoformat(),
proximity_score,
source_image,
confidence,
id_method,
duplicate["passage_id"],
),
)
await db.commit()
# Slett det gamle, dårligere bildet
_delete_image_file(old_image)
return duplicate["passage_id"]
else:
# Eksisterende bilde er nærmere kamera — forkast nytt bilde
_delete_image_file(source_image)
return duplicate["passage_id"]
passage_id = str(uuid.uuid4())
await db.execute(
"""
INSERT INTO passages (
passage_id, profile_id, bib_number, station,
timestamp_utc, gps_lat, gps_lon, gps_alt,
confidence, id_method, source_image,
confidence, proximity_score, id_method, source_image,
needs_review, review_note
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
passage_id,
@@ -46,6 +118,7 @@ async def log_passage(
gps_lon,
gps_alt,
confidence,
proximity_score,
id_method,
source_image,
int(needs_review),
@@ -56,6 +129,15 @@ async def log_passage(
return passage_id
def _delete_image_file(path: str) -> None:
"""Slett bildefil stille — logg advarsel ved feil."""
import logging
try:
Path(path).unlink(missing_ok=True)
except Exception as e:
logging.getLogger(__name__).warning("Kunne ikke slette duplikatbilde %s: %s", path, e)
async def get_passages(
db: aiosqlite.Connection,
profile_id: Optional[str] = None,
+1
View File
@@ -33,6 +33,7 @@ async def init_db(db: aiosqlite.Connection) -> None:
gps_lon REAL,
gps_alt REAL,
confidence REAL,
proximity_score REAL NOT NULL DEFAULT 0,
id_method TEXT,
source_image TEXT,
needs_review INTEGER NOT NULL DEFAULT 0,