Add deduplication: keep image closest to camera within 2s window
Build & Deploy / build-and-deploy (push) Has been cancelled

Within a burst sequence from the same station, the image where the athlete
is physically closest to the camera gives the most accurate passage timestamp.
Proximity is measured by bib bounding box area (larger = closer).

When a duplicate is detected:
- New image closer: update timestamp + image path, delete old image
- Existing image closer: discard new image

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-20 15:22:57 +01:00
parent b191b08579
commit 24645dfd11
4 changed files with 120 additions and 28 deletions
+18 -10
View File
@@ -29,6 +29,7 @@ class OcrResult:
digits: Optional[str] # Gjenkjente sifre, f.eks. "42", None hvis ingen
confidence: float # 0.01.0
partial: bool # True hvis nummeret trolig er delvis skjult
proximity_score: float = 0.0 # Areal av detektert bib-boks (px²) — større = nærmere kamera
raw_texts: list[str] = field(default_factory=list) # Alle OCR-treff for debug
@@ -56,28 +57,34 @@ def _preprocess(image_path: Path) -> np.ndarray:
return enhanced
def _extract_bib_number(texts: list[tuple]) -> tuple[Optional[str], float, bool]:
def _bbox_area(bbox) -> float:
"""Beregn areal av EasyOCR bounding box [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]."""
xs = [p[0] for p in bbox]
ys = [p[1] for p in bbox]
return (max(xs) - min(xs)) * (max(ys) - min(ys))
def _extract_bib_number(texts: list[tuple]) -> tuple[Optional[str], float, bool, float]:
"""
Finn beste siffersekvens blant OCR-treff.
Returnerer (sifre, konfidens, partial).
Returnerer (sifre, konfidens, partial, proximity_score).
proximity_score = areal av bounding box i piksler² (større = nærmere kamera).
"""
candidates = []
for (_, text, conf) in texts:
# Behold kun sifre
for (bbox, text, conf) in texts:
digits = re.sub(r"[^0-9]", "", text)
if digits:
candidates.append((digits, float(conf)))
candidates.append((digits, float(conf), _bbox_area(bbox)))
if not candidates:
return None, 0.0, False
return None, 0.0, False, 0.0
# Velg kandidat med høyest konfidens
best_digits, best_conf = max(candidates, key=lambda x: x[1])
best_digits, best_conf, best_area = max(candidates, key=lambda x: x[1])
# Heuristikk: 12 sifre kan tyde på delvis synlig nummer
partial = len(best_digits) < 2
return best_digits, best_conf, partial
return best_digits, best_conf, partial, best_area
def read_bib(image_path: Path) -> OcrResult:
@@ -91,12 +98,13 @@ def read_bib(image_path: Path) -> OcrResult:
results = reader.readtext(processed, detail=1, paragraph=False)
raw_texts = [text for (_, text, _) in results]
digits, confidence, partial = _extract_bib_number(results)
digits, confidence, partial, proximity_score = _extract_bib_number(results)
return OcrResult(
digits=digits,
confidence=confidence,
partial=partial,
proximity_score=proximity_score,
raw_texts=raw_texts,
)
except Exception as e: