Merge pull request 'fix(precedents): normalize citation→docket case_number + enforce source_type↔precedent_level' (#256) from worktree-precedent-casenum-sourcetype into main
This commit was merged in pull request #256.
This commit is contained in:
@@ -15,6 +15,7 @@ in ``apply_to_record``.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from datetime import date as date_type
|
from datetime import date as date_type
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
@@ -220,6 +221,31 @@ async def extract_metadata(case_law_id: UUID | str) -> dict:
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# Israeli court docket: digits with slash/dash separators, no spaces, no letters
|
||||||
|
# (e.g. "1132-09-24", "4768/22", "35758-09-25"). Used to (a) detect a
|
||||||
|
# citation-shaped case_number that must be normalized and (b) guard against ever
|
||||||
|
# writing a non-docket string into the identity field.
|
||||||
|
_DOCKET_RE = re.compile(r"\d{1,6}(?:[-/]\d{1,4}){1,2}")
|
||||||
|
|
||||||
|
|
||||||
|
def _is_clean_docket(s: str) -> bool:
|
||||||
|
return bool(_DOCKET_RE.fullmatch((s or "").strip()))
|
||||||
|
|
||||||
|
|
||||||
|
def _source_type_for_level(level: str) -> str:
|
||||||
|
"""Derive source_type from precedent_level — the library section is driven by
|
||||||
|
source_type, so the two MUST agree (an LLM slip pairing
|
||||||
|
precedent_level='ועדת_ערר_מחוזית' with source_type='court_ruling' files a
|
||||||
|
committee decision under "court rulings"). Empty when the level is
|
||||||
|
indeterminate (don't force a guess)."""
|
||||||
|
level = (level or "").strip()
|
||||||
|
if level.startswith("ועדת_ערר"):
|
||||||
|
return "appeals_committee"
|
||||||
|
if level in ("עליון", "מנהלי"):
|
||||||
|
return "court_ruling"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
async def apply_to_record(
|
async def apply_to_record(
|
||||||
case_law_id: UUID | str,
|
case_law_id: UUID | str,
|
||||||
suggested: dict,
|
suggested: dict,
|
||||||
@@ -327,10 +353,23 @@ async def apply_to_record(
|
|||||||
if pt and (record.get("source_kind") == "internal_committee"):
|
if pt and (record.get("source_kind") == "internal_committee"):
|
||||||
fields_to_update["proceeding_type"] = pt
|
fields_to_update["proceeding_type"] = pt
|
||||||
|
|
||||||
if overwrite_case_number:
|
# case_number normalization. The precedent upload / missing-precedent flow
|
||||||
cn = (suggested.get("case_number_clean") or "").strip()
|
# stores the FULL citation string into case_number (precedent_library:
|
||||||
if cn:
|
# case_number=citation). Replace it with the clean docket when the LLM gives
|
||||||
fields_to_update["case_number"] = cn
|
# one AND either (a) caller forces it (overwrite_case_number — migrations) or
|
||||||
|
# (b) the stored value is clearly citation-shaped (has a space / is long — a
|
||||||
|
# real docket never is). Guard: only write a value that IS a clean docket, so
|
||||||
|
# a bad LLM output can never corrupt the identity field.
|
||||||
|
cn_clean = (suggested.get("case_number_clean") or "").strip()
|
||||||
|
cur_cn = cur_case_number
|
||||||
|
citation_shaped = bool(cur_cn) and (" " in cur_cn or len(cur_cn) > 20)
|
||||||
|
if (
|
||||||
|
cn_clean
|
||||||
|
and _is_clean_docket(cn_clean)
|
||||||
|
and cn_clean != cur_cn
|
||||||
|
and (overwrite_case_number or citation_shaped)
|
||||||
|
):
|
||||||
|
fields_to_update["case_number"] = cn_clean
|
||||||
|
|
||||||
# citation_formatted — full citation per Israeli citation rules. Only
|
# citation_formatted — full citation per Israeli citation rules. Only
|
||||||
# fill if empty; user edits in /precedents/[id] are preserved.
|
# fill if empty; user edits in /precedents/[id] are preserved.
|
||||||
@@ -355,6 +394,26 @@ async def apply_to_record(
|
|||||||
if s:
|
if s:
|
||||||
fields_to_update["district"] = s
|
fields_to_update["district"] = s
|
||||||
|
|
||||||
|
# Enforce source_type ↔ precedent_level consistency in CODE (the LLM prompt
|
||||||
|
# asks for it, but a slip would file a ועדת-ערר decision under "court
|
||||||
|
# rulings"). Derive from the EFFECTIVE level (this run's update or the stored
|
||||||
|
# value) and override an inconsistent source_type — even one already on the
|
||||||
|
# record, since the library section depends on it.
|
||||||
|
eff_level = (
|
||||||
|
fields_to_update.get("precedent_level")
|
||||||
|
or record.get("precedent_level")
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
derived_st = _source_type_for_level(eff_level)
|
||||||
|
if derived_st:
|
||||||
|
eff_st = (
|
||||||
|
fields_to_update.get("source_type")
|
||||||
|
or record.get("source_type")
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
if eff_st != derived_st:
|
||||||
|
fields_to_update["source_type"] = derived_st
|
||||||
|
|
||||||
if not fields_to_update:
|
if not fields_to_update:
|
||||||
return {"updated": False, "fields": []}
|
return {"updated": False, "fields": []}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user