Add deduplication: keep image closest to camera within 2s window
Build & Deploy / build-and-deploy (push) Has been cancelled
Build & Deploy / build-and-deploy (push) Has been cancelled
Within a burst sequence from the same station, the image where the athlete is physically closest to the camera gives the most accurate passage timestamp. Proximity is measured by bib bounding box area (larger = closer). When a duplicate is detected: - New image closer: update timestamp + image path, delete old image - Existing image closer: discard new image Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -113,6 +113,7 @@ async def process_image(path: Path) -> None:
|
|||||||
gps_lon=meta.gps_lon,
|
gps_lon=meta.gps_lon,
|
||||||
gps_alt=meta.gps_alt,
|
gps_alt=meta.gps_alt,
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
|
proximity_score=ocr.proximity_score,
|
||||||
id_method=id_method,
|
id_method=id_method,
|
||||||
source_image=str(dest),
|
source_image=str(dest),
|
||||||
needs_review=needs_review,
|
needs_review=needs_review,
|
||||||
|
|||||||
+18
-10
@@ -29,6 +29,7 @@ class OcrResult:
|
|||||||
digits: Optional[str] # Gjenkjente sifre, f.eks. "42", None hvis ingen
|
digits: Optional[str] # Gjenkjente sifre, f.eks. "42", None hvis ingen
|
||||||
confidence: float # 0.0–1.0
|
confidence: float # 0.0–1.0
|
||||||
partial: bool # True hvis nummeret trolig er delvis skjult
|
partial: bool # True hvis nummeret trolig er delvis skjult
|
||||||
|
proximity_score: float = 0.0 # Areal av detektert bib-boks (px²) — større = nærmere kamera
|
||||||
raw_texts: list[str] = field(default_factory=list) # Alle OCR-treff for debug
|
raw_texts: list[str] = field(default_factory=list) # Alle OCR-treff for debug
|
||||||
|
|
||||||
|
|
||||||
@@ -56,28 +57,34 @@ def _preprocess(image_path: Path) -> np.ndarray:
|
|||||||
return enhanced
|
return enhanced
|
||||||
|
|
||||||
|
|
||||||
def _extract_bib_number(texts: list[tuple]) -> tuple[Optional[str], float, bool]:
|
def _bbox_area(bbox) -> float:
|
||||||
|
"""Beregn areal av EasyOCR bounding box [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]."""
|
||||||
|
xs = [p[0] for p in bbox]
|
||||||
|
ys = [p[1] for p in bbox]
|
||||||
|
return (max(xs) - min(xs)) * (max(ys) - min(ys))
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_bib_number(texts: list[tuple]) -> tuple[Optional[str], float, bool, float]:
|
||||||
"""
|
"""
|
||||||
Finn beste siffersekvens blant OCR-treff.
|
Finn beste siffersekvens blant OCR-treff.
|
||||||
Returnerer (sifre, konfidens, partial).
|
Returnerer (sifre, konfidens, partial, proximity_score).
|
||||||
|
proximity_score = areal av bounding box i piksler² (større = nærmere kamera).
|
||||||
"""
|
"""
|
||||||
candidates = []
|
candidates = []
|
||||||
for (_, text, conf) in texts:
|
for (bbox, text, conf) in texts:
|
||||||
# Behold kun sifre
|
|
||||||
digits = re.sub(r"[^0-9]", "", text)
|
digits = re.sub(r"[^0-9]", "", text)
|
||||||
if digits:
|
if digits:
|
||||||
candidates.append((digits, float(conf)))
|
candidates.append((digits, float(conf), _bbox_area(bbox)))
|
||||||
|
|
||||||
if not candidates:
|
if not candidates:
|
||||||
return None, 0.0, False
|
return None, 0.0, False, 0.0
|
||||||
|
|
||||||
# Velg kandidat med høyest konfidens
|
# Velg kandidat med høyest konfidens
|
||||||
best_digits, best_conf = max(candidates, key=lambda x: x[1])
|
best_digits, best_conf, best_area = max(candidates, key=lambda x: x[1])
|
||||||
|
|
||||||
# Heuristikk: 1–2 sifre kan tyde på delvis synlig nummer
|
|
||||||
partial = len(best_digits) < 2
|
partial = len(best_digits) < 2
|
||||||
|
|
||||||
return best_digits, best_conf, partial
|
return best_digits, best_conf, partial, best_area
|
||||||
|
|
||||||
|
|
||||||
def read_bib(image_path: Path) -> OcrResult:
|
def read_bib(image_path: Path) -> OcrResult:
|
||||||
@@ -91,12 +98,13 @@ def read_bib(image_path: Path) -> OcrResult:
|
|||||||
results = reader.readtext(processed, detail=1, paragraph=False)
|
results = reader.readtext(processed, detail=1, paragraph=False)
|
||||||
|
|
||||||
raw_texts = [text for (_, text, _) in results]
|
raw_texts = [text for (_, text, _) in results]
|
||||||
digits, confidence, partial = _extract_bib_number(results)
|
digits, confidence, partial, proximity_score = _extract_bib_number(results)
|
||||||
|
|
||||||
return OcrResult(
|
return OcrResult(
|
||||||
digits=digits,
|
digits=digits,
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
partial=partial,
|
partial=partial,
|
||||||
|
proximity_score=proximity_score,
|
||||||
raw_texts=raw_texts,
|
raw_texts=raw_texts,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
+86
-4
@@ -3,11 +3,43 @@ Skriv og query passeringslogg i SQLite.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import aiosqlite
|
import aiosqlite
|
||||||
|
|
||||||
|
# Tidsvindu for deduplisering: bilder av samme utøver ved samme stasjon
|
||||||
|
DEDUP_WINDOW_SECONDS = 2
|
||||||
|
|
||||||
|
|
||||||
|
async def _find_duplicate(
|
||||||
|
db: aiosqlite.Connection,
|
||||||
|
bib_number: str,
|
||||||
|
station: str,
|
||||||
|
timestamp_utc: datetime,
|
||||||
|
) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
Finn eksisterende passering med samme bib og stasjon innen DEDUP_WINDOW_SECONDS.
|
||||||
|
Returnerer raden, eller None.
|
||||||
|
"""
|
||||||
|
window_start = (timestamp_utc - timedelta(seconds=DEDUP_WINDOW_SECONDS)).isoformat()
|
||||||
|
window_end = (timestamp_utc + timedelta(seconds=DEDUP_WINDOW_SECONDS)).isoformat()
|
||||||
|
|
||||||
|
async with db.execute(
|
||||||
|
"""
|
||||||
|
SELECT passage_id, proximity_score, source_image
|
||||||
|
FROM passages
|
||||||
|
WHERE bib_number = ? AND station = ?
|
||||||
|
AND timestamp_utc BETWEEN ? AND ?
|
||||||
|
ORDER BY proximity_score DESC
|
||||||
|
LIMIT 1
|
||||||
|
""",
|
||||||
|
(bib_number, station, window_start, window_end),
|
||||||
|
) as cur:
|
||||||
|
row = await cur.fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
async def log_passage(
|
async def log_passage(
|
||||||
db: aiosqlite.Connection,
|
db: aiosqlite.Connection,
|
||||||
@@ -20,21 +52,61 @@ async def log_passage(
|
|||||||
gps_lon: float,
|
gps_lon: float,
|
||||||
gps_alt: Optional[float],
|
gps_alt: Optional[float],
|
||||||
confidence: float,
|
confidence: float,
|
||||||
|
proximity_score: float = 0.0,
|
||||||
id_method: str,
|
id_method: str,
|
||||||
source_image: str,
|
source_image: str,
|
||||||
needs_review: bool = False,
|
needs_review: bool = False,
|
||||||
review_note: Optional[str] = None,
|
review_note: Optional[str] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Logg én passering. Returnerer passage_id."""
|
"""
|
||||||
|
Logg én passering med deduplisering.
|
||||||
|
|
||||||
|
Hvis et bilde av samme utøver ved samme stasjon allerede er logget innen
|
||||||
|
DEDUP_WINDOW_SECONDS, beholder vi bildet nærmest kamera (høyest proximity_score)
|
||||||
|
ettersom det gir det mest nøyaktige tidsstempelet.
|
||||||
|
|
||||||
|
Returnerer passage_id (enten ny eller eksisterende).
|
||||||
|
"""
|
||||||
|
if bib_number:
|
||||||
|
duplicate = await _find_duplicate(db, bib_number, station, timestamp_utc)
|
||||||
|
if duplicate:
|
||||||
|
if proximity_score > duplicate["proximity_score"]:
|
||||||
|
# Nytt bilde er nærmere kamera — oppdater tidsstempel og bildesti
|
||||||
|
old_image = duplicate["source_image"]
|
||||||
|
await db.execute(
|
||||||
|
"""
|
||||||
|
UPDATE passages
|
||||||
|
SET timestamp_utc = ?, proximity_score = ?,
|
||||||
|
source_image = ?, confidence = ?, id_method = ?
|
||||||
|
WHERE passage_id = ?
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
timestamp_utc.isoformat(),
|
||||||
|
proximity_score,
|
||||||
|
source_image,
|
||||||
|
confidence,
|
||||||
|
id_method,
|
||||||
|
duplicate["passage_id"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
# Slett det gamle, dårligere bildet
|
||||||
|
_delete_image_file(old_image)
|
||||||
|
return duplicate["passage_id"]
|
||||||
|
else:
|
||||||
|
# Eksisterende bilde er nærmere kamera — forkast nytt bilde
|
||||||
|
_delete_image_file(source_image)
|
||||||
|
return duplicate["passage_id"]
|
||||||
|
|
||||||
passage_id = str(uuid.uuid4())
|
passage_id = str(uuid.uuid4())
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""
|
"""
|
||||||
INSERT INTO passages (
|
INSERT INTO passages (
|
||||||
passage_id, profile_id, bib_number, station,
|
passage_id, profile_id, bib_number, station,
|
||||||
timestamp_utc, gps_lat, gps_lon, gps_alt,
|
timestamp_utc, gps_lat, gps_lon, gps_alt,
|
||||||
confidence, id_method, source_image,
|
confidence, proximity_score, id_method, source_image,
|
||||||
needs_review, review_note
|
needs_review, review_note
|
||||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
""",
|
""",
|
||||||
(
|
(
|
||||||
passage_id,
|
passage_id,
|
||||||
@@ -46,6 +118,7 @@ async def log_passage(
|
|||||||
gps_lon,
|
gps_lon,
|
||||||
gps_alt,
|
gps_alt,
|
||||||
confidence,
|
confidence,
|
||||||
|
proximity_score,
|
||||||
id_method,
|
id_method,
|
||||||
source_image,
|
source_image,
|
||||||
int(needs_review),
|
int(needs_review),
|
||||||
@@ -56,6 +129,15 @@ async def log_passage(
|
|||||||
return passage_id
|
return passage_id
|
||||||
|
|
||||||
|
|
||||||
|
def _delete_image_file(path: str) -> None:
|
||||||
|
"""Slett bildefil stille — logg advarsel ved feil."""
|
||||||
|
import logging
|
||||||
|
try:
|
||||||
|
Path(path).unlink(missing_ok=True)
|
||||||
|
except Exception as e:
|
||||||
|
logging.getLogger(__name__).warning("Kunne ikke slette duplikatbilde %s: %s", path, e)
|
||||||
|
|
||||||
|
|
||||||
async def get_passages(
|
async def get_passages(
|
||||||
db: aiosqlite.Connection,
|
db: aiosqlite.Connection,
|
||||||
profile_id: Optional[str] = None,
|
profile_id: Optional[str] = None,
|
||||||
|
|||||||
+15
-14
@@ -24,20 +24,21 @@ async def init_db(db: aiosqlite.Connection) -> None:
|
|||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS passages (
|
CREATE TABLE IF NOT EXISTS passages (
|
||||||
passage_id TEXT PRIMARY KEY,
|
passage_id TEXT PRIMARY KEY,
|
||||||
profile_id TEXT REFERENCES athletes(profile_id),
|
profile_id TEXT REFERENCES athletes(profile_id),
|
||||||
bib_number TEXT,
|
bib_number TEXT,
|
||||||
station TEXT NOT NULL,
|
station TEXT NOT NULL,
|
||||||
timestamp_utc TEXT NOT NULL,
|
timestamp_utc TEXT NOT NULL,
|
||||||
gps_lat REAL,
|
gps_lat REAL,
|
||||||
gps_lon REAL,
|
gps_lon REAL,
|
||||||
gps_alt REAL,
|
gps_alt REAL,
|
||||||
confidence REAL,
|
confidence REAL,
|
||||||
id_method TEXT,
|
proximity_score REAL NOT NULL DEFAULT 0,
|
||||||
source_image TEXT,
|
id_method TEXT,
|
||||||
needs_review INTEGER NOT NULL DEFAULT 0,
|
source_image TEXT,
|
||||||
review_note TEXT,
|
needs_review INTEGER NOT NULL DEFAULT 0,
|
||||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
review_note TEXT,
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_passages_profile ON passages(profile_id);
|
CREATE INDEX IF NOT EXISTS idx_passages_profile ON passages(profile_id);
|
||||||
|
|||||||
Reference in New Issue
Block a user