FU-2b: internal case_number reconciliation tooling (GAP-07/08) #15

Merged
chaim merged 6 commits from fix/fu2b-identifier-reconciliation into main 2026-05-31 08:59:14 +00:00
2 changed files with 37 additions and 4 deletions
Showing only changes of commit e46868feda - Show all commits

View File

@@ -48,3 +48,15 @@ def test_consistency_flag_when_bare_absent_from_citation():
assert fu2b._consistency_flag("403-17", "ערר (...) 403/17 אהרון ברק") == "OK"
assert fu2b._consistency_flag("403-17", "ערר (...) 1975/24 מישהו אחר") == "MISMATCH"
assert fu2b._consistency_flag("403-17", "") == "NO_CITATION"
def test_proc_mismatch_detects_prefix_vs_type_conflict():
# case_number prefix disagrees with proceeding_type → must flag (prefix is
# stripped by the migration, so a wrong proceeding_type loses the signal).
assert fu2b._proc_mismatch('בל"מ 1010-01-25', "ערר") is True
assert fu2b._proc_mismatch('בל"מ (...) 1028/20 חלוואני', "ערר") is True
# agreement → no flag
assert fu2b._proc_mismatch('ערר 1024/24 נילי', "ערר") is False
assert fu2b._proc_mismatch('בל"מ 1010-01-25', 'בל"מ') is False
# bare number with no prefix → nothing to contradict
assert fu2b._proc_mismatch("8047/23", 'בל"מ') is False

View File

@@ -73,6 +73,24 @@ def _consistency_flag(bare: str | None, citation_formatted: str) -> str:
return "OK" if bare in cf else "MISMATCH"
def _proc_mismatch(case_number: str, proceeding_type: str) -> bool:
"""True if the case_number's leading proceeding prefix disagrees with proceeding_type.
The migration strips the prefix from case_number, so a בל"מ prefix paired with
proceeding_type='ערר' (or vice-versa) would SILENTLY LOSE the proceeding signal.
Such rows must be flagged for chair adjudication, never auto-applied.
"""
cn = (case_number or "").lstrip().lstrip("") # drop RTL/LTR marks
pt = (proceeding_type or "").strip()
starts_balam = cn.startswith('בל"מ') or cn.startswith("בל”מ")
starts_arar = cn.startswith("ערר")
if starts_balam and pt and pt != 'בל"מ':
return True
if starts_arar and pt and pt != "ערר":
return True
return False
async def _build_reconciliation() -> list[dict]:
from legal_mcp.services import db
pool = await db.get_pool()
@@ -93,6 +111,7 @@ async def _build_reconciliation() -> list[dict]:
"citation_formatted": r["cf"],
"extract_flag": flag,
"consistency": cons,
"proc_flag": "PROC_MISMATCH" if _proc_mismatch(r["case_number"], r["proceeding_type"] or "") else "",
"will_change": "yes" if changes else "no",
})
from collections import Counter
@@ -111,13 +130,14 @@ def _write_table(rows: list[dict], ts: str) -> tuple[Path, Path]:
csv_path = AUDIT_DIR / f"fu2b-reconciliation-{ts}.csv"
md_path = AUDIT_DIR / f"fu2b-reconciliation-{ts}.md"
cols = ["id", "current_case_number", "proposed_bare", "proceeding_type",
"citation_formatted", "extract_flag", "consistency", "dup_check", "will_change"]
"citation_formatted", "extract_flag", "consistency", "proc_flag", "dup_check", "will_change"]
with csv_path.open("w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=cols)
w.writeheader()
w.writerows(rows)
changing = [r for r in rows if r["will_change"] == "yes"]
flagged = [r for r in rows if r["extract_flag"] != "OK" or r["consistency"] == "MISMATCH" or r["dup_check"]]
flagged = [r for r in rows if r["extract_flag"] != "OK" or r["consistency"] == "MISMATCH"
or r["dup_check"] or r["proc_flag"]]
with md_path.open("w", encoding="utf-8") as f:
f.write(f"# FU-2b — טבלת-תיאום מזהים (internal_committee) — {ts}\n\n")
f.write(f"- סה\"כ רשומות: {len(rows)}\n- ישתנו: {len(changing)}\n- מסומנות לסקירה: {len(flagged)}\n\n")
@@ -126,7 +146,7 @@ def _write_table(rows: list[dict], ts: str) -> tuple[Path, Path]:
for r in flagged:
fl = " ".join(x for x in [r["extract_flag"] if r["extract_flag"] != "OK" else "",
r["consistency"] if r["consistency"] == "MISMATCH" else "",
r["dup_check"]] if x)
r["proc_flag"], r["dup_check"]] if x)
f.write(f"| {r['current_case_number'][:50]} | {r['proposed_bare']} | {r['proceeding_type']} | {fl} |\n")
f.write("\n## כל השינויים המוצעים\n\n")
f.write("| current_case_number | → proposed_bare | proc |\n|---|---|---|\n")
@@ -174,7 +194,8 @@ async def main() -> int:
rows = await _build_reconciliation()
csv_path, md_path = _write_table(rows, ts)
changing = sum(1 for r in rows if r["will_change"] == "yes")
flagged = sum(1 for r in rows if r["extract_flag"] != "OK" or r["consistency"] == "MISMATCH" or r["dup_check"])
flagged = sum(1 for r in rows if r["extract_flag"] != "OK" or r["consistency"] == "MISMATCH"
or r["dup_check"] or r["proc_flag"])
print(f"DRY-RUN: {len(rows)} rows | will_change={changing} | flagged={flagged}")
print(f" table: {md_path}")
print(f" csv: {csv_path}")