Merge pull request 'fix(precedents): נרמול case_number עמיד-להתנגשות — מדלג ומתעד, לא קורס (#145)' (#266) from worktree-backfill-citations-run into main
This commit was merged in pull request #266.
This commit is contained in:
@@ -4112,6 +4112,20 @@ async def get_case_law_by_citation(case_number: str) -> dict | None:
|
|||||||
return _row_to_case_law(row) if row else None
|
return _row_to_case_law(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
async def case_number_collides(case_number: str, exclude_id: UUID) -> bool:
|
||||||
|
"""True if assigning ``case_number`` to a NON-internal row would violate the
|
||||||
|
partial unique index ``uq_case_law_external_number`` (``case_number`` WHERE
|
||||||
|
source_kind <> 'internal_committee') — i.e. another non-internal row already owns
|
||||||
|
that docket. Lets a caller SKIP the identity normalization (a duplicate to dedupe
|
||||||
|
later) instead of crashing the whole operation on the unique violation."""
|
||||||
|
pool = await get_pool()
|
||||||
|
return bool(await pool.fetchval(
|
||||||
|
"SELECT 1 FROM case_law WHERE case_number = $1 AND id <> $2 "
|
||||||
|
"AND source_kind <> 'internal_committee' LIMIT 1",
|
||||||
|
case_number, exclude_id,
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
async def create_external_case_law(
|
async def create_external_case_law(
|
||||||
case_number: str,
|
case_number: str,
|
||||||
case_name: str,
|
case_name: str,
|
||||||
|
|||||||
@@ -364,7 +364,21 @@ async def apply_to_record(
|
|||||||
and cn_clean != cur_cn
|
and cn_clean != cur_cn
|
||||||
and (overwrite_case_number or citation_shaped)
|
and (overwrite_case_number or citation_shaped)
|
||||||
):
|
):
|
||||||
fields_to_update["case_number"] = cn_clean
|
# Skip (don't crash) when the clean docket already belongs to ANOTHER
|
||||||
|
# non-internal row — a duplicate to dedupe later, not this run's concern.
|
||||||
|
# Writing it would hit uq_case_law_external_number and abort the whole merge
|
||||||
|
# (including the citation). No-silent-swallow: log the skip.
|
||||||
|
if (
|
||||||
|
record.get("source_kind") != "internal_committee"
|
||||||
|
and await db.case_number_collides(cn_clean, case_law_id)
|
||||||
|
):
|
||||||
|
logger.warning(
|
||||||
|
"metadata_extractor: case_number normalization %r→%r skipped — docket "
|
||||||
|
"already owned by another non-internal row (likely duplicate)",
|
||||||
|
cur_cn, cn_clean,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
fields_to_update["case_number"] = cn_clean
|
||||||
|
|
||||||
# parties — store the extracted "עורר נ' משיב" line (the re-derivable basis for
|
# parties — store the extracted "עורר נ' משיב" line (the re-derivable basis for
|
||||||
# the deterministic citation). Only fill when empty; chair edits are preserved.
|
# the deterministic citation). Only fill when empty; chair edits are preserved.
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ async def main() -> None:
|
|||||||
rows = await _empty_citation_rows(args.limit)
|
rows = await _empty_citation_rows(args.limit)
|
||||||
print(f"רשומות עם citation_formatted ריק: {len(rows)}\n")
|
print(f"רשומות עם citation_formatted ריק: {len(rows)}\n")
|
||||||
|
|
||||||
n_pass1 = n_pass2 = n_abstain = 0
|
n_pass1 = n_pass2 = n_abstain = n_errors = 0
|
||||||
for r in rows:
|
for r in rows:
|
||||||
cid = r["id"]
|
cid = r["id"]
|
||||||
# Pass 1 — deterministic from the stored row (no LLM).
|
# Pass 1 — deterministic from the stored row (no LLM).
|
||||||
@@ -92,7 +92,13 @@ async def main() -> None:
|
|||||||
print(f" ? [llm?] {r['case_number']} — would run extractor (dry-run)")
|
print(f" ? [llm?] {r['case_number']} — would run extractor (dry-run)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
res = await precedent_metadata_extractor.extract_and_apply(cid)
|
# One bad row must never abort the batch — log and move on.
|
||||||
|
try:
|
||||||
|
res = await precedent_metadata_extractor.extract_and_apply(cid)
|
||||||
|
except Exception as e: # noqa: BLE001 — best-effort backfill, reported per-row
|
||||||
|
n_errors += 1
|
||||||
|
print(f" ✗ [error] {r['case_number']}: {type(e).__name__}: {e}")
|
||||||
|
continue
|
||||||
record2 = await db.get_case_law(cid)
|
record2 = await db.get_case_law(cid)
|
||||||
new_cit = (record2.get("citation_formatted") or "").strip()
|
new_cit = (record2.get("citation_formatted") or "").strip()
|
||||||
if new_cit:
|
if new_cit:
|
||||||
@@ -109,7 +115,7 @@ async def main() -> None:
|
|||||||
|
|
||||||
print(
|
print(
|
||||||
f"\nסיכום: דטרמיניסטי={n_pass1} · LLM={n_pass2} · "
|
f"\nסיכום: דטרמיניסטי={n_pass1} · LLM={n_pass2} · "
|
||||||
f"נמנע (חסר רכיב)={n_abstain}"
|
f"נמנע (חסר רכיב)={n_abstain} · שגיאות={n_errors}"
|
||||||
+ ("" if args.apply else " (dry-run — לא נכתב)")
|
+ ("" if args.apply else " (dry-run — לא נכתב)")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user