Merge pull request 'fix(precedents): נרמול case_number עמיד-להתנגשות — מדלג ומתעד, לא קורס (#145)' (#266) from worktree-backfill-citations-run into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m30s
G12 Leak-Guard / leak-guard (push) Successful in 5s
Lint — undefined names / undefined-names (push) Successful in 12s

This commit was merged in pull request #266.
This commit is contained in:
2026-06-15 04:17:38 +00:00
3 changed files with 38 additions and 4 deletions

View File

@@ -4112,6 +4112,20 @@ async def get_case_law_by_citation(case_number: str) -> dict | None:
return _row_to_case_law(row) if row else None
async def case_number_collides(case_number: str, exclude_id: UUID) -> bool:
"""True if assigning ``case_number`` to a NON-internal row would violate the
partial unique index ``uq_case_law_external_number`` (``case_number`` WHERE
source_kind <> 'internal_committee') — i.e. another non-internal row already owns
that docket. Lets a caller SKIP the identity normalization (a duplicate to dedupe
later) instead of crashing the whole operation on the unique violation."""
pool = await get_pool()
return bool(await pool.fetchval(
"SELECT 1 FROM case_law WHERE case_number = $1 AND id <> $2 "
"AND source_kind <> 'internal_committee' LIMIT 1",
case_number, exclude_id,
))
async def create_external_case_law(
case_number: str,
case_name: str,

View File

@@ -364,7 +364,21 @@ async def apply_to_record(
and cn_clean != cur_cn
and (overwrite_case_number or citation_shaped)
):
fields_to_update["case_number"] = cn_clean
# Skip (don't crash) when the clean docket already belongs to ANOTHER
# non-internal row — a duplicate to dedupe later, not this run's concern.
# Writing it would hit uq_case_law_external_number and abort the whole merge
# (including the citation). No-silent-swallow: log the skip.
if (
record.get("source_kind") != "internal_committee"
and await db.case_number_collides(cn_clean, case_law_id)
):
logger.warning(
"metadata_extractor: case_number normalization %r%r skipped — docket "
"already owned by another non-internal row (likely duplicate)",
cur_cn, cn_clean,
)
else:
fields_to_update["case_number"] = cn_clean
# parties — store the extracted "עורר נ' משיב" line (the re-derivable basis for
# the deterministic citation). Only fill when empty; chair edits are preserved.