Merge pull request 'fix(precedents): נרמול case_number עמיד-להתנגשות — מדלג ומתעד, לא קורס (#145)' (#266) from worktree-backfill-citations-run into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m30s
G12 Leak-Guard / leak-guard (push) Successful in 5s
Lint — undefined names / undefined-names (push) Successful in 12s

This commit was merged in pull request #266.
This commit is contained in:
2026-06-15 04:17:38 +00:00
3 changed files with 38 additions and 4 deletions

View File

@@ -67,7 +67,7 @@ async def main() -> None:
rows = await _empty_citation_rows(args.limit)
print(f"רשומות עם citation_formatted ריק: {len(rows)}\n")
n_pass1 = n_pass2 = n_abstain = 0
n_pass1 = n_pass2 = n_abstain = n_errors = 0
for r in rows:
cid = r["id"]
# Pass 1 — deterministic from the stored row (no LLM).
@@ -92,7 +92,13 @@ async def main() -> None:
print(f" ? [llm?] {r['case_number']} — would run extractor (dry-run)")
continue
res = await precedent_metadata_extractor.extract_and_apply(cid)
# One bad row must never abort the batch — log and move on.
try:
res = await precedent_metadata_extractor.extract_and_apply(cid)
except Exception as e: # noqa: BLE001 — best-effort backfill, reported per-row
n_errors += 1
print(f" ✗ [error] {r['case_number']}: {type(e).__name__}: {e}")
continue
record2 = await db.get_case_law(cid)
new_cit = (record2.get("citation_formatted") or "").strip()
if new_cit:
@@ -109,7 +115,7 @@ async def main() -> None:
print(
f"\nסיכום: דטרמיניסטי={n_pass1} · LLM={n_pass2} · "
f"נמנע (חסר רכיב)={n_abstain}"
f"נמנע (חסר רכיב)={n_abstain} · שגיאות={n_errors}"
+ ("" if args.apply else " (dry-run — לא נכתב)")
)