feat(fu2b): flag PROC_MISMATCH (case_number prefix vs proceeding_type) for chair

Dry-run surfaced 2 rows with בל"מ prefix but proceeding_type=ערר. Since the
migration strips the prefix, a wrong proceeding_type would silently lose the
בל"מ signal — must be chair-adjudicated, not auto-applied. Chair table now
flags 4 rows: 2 DUP_CHECK (8047-23) + 2 PROC_MISMATCH.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-31 08:57:42 +00:00
parent ab8d17fdd8
commit e46868feda
2 changed files with 37 additions and 4 deletions

View File

@@ -48,3 +48,15 @@ def test_consistency_flag_when_bare_absent_from_citation():
assert fu2b._consistency_flag("403-17", "ערר (...) 403/17 אהרון ברק") == "OK" assert fu2b._consistency_flag("403-17", "ערר (...) 403/17 אהרון ברק") == "OK"
assert fu2b._consistency_flag("403-17", "ערר (...) 1975/24 מישהו אחר") == "MISMATCH" assert fu2b._consistency_flag("403-17", "ערר (...) 1975/24 מישהו אחר") == "MISMATCH"
assert fu2b._consistency_flag("403-17", "") == "NO_CITATION" assert fu2b._consistency_flag("403-17", "") == "NO_CITATION"
def test_proc_mismatch_detects_prefix_vs_type_conflict():
# case_number prefix disagrees with proceeding_type → must flag (prefix is
# stripped by the migration, so a wrong proceeding_type loses the signal).
assert fu2b._proc_mismatch('בל"מ 1010-01-25', "ערר") is True
assert fu2b._proc_mismatch('בל"מ (...) 1028/20 חלוואני', "ערר") is True
# agreement → no flag
assert fu2b._proc_mismatch('ערר 1024/24 נילי', "ערר") is False
assert fu2b._proc_mismatch('בל"מ 1010-01-25', 'בל"מ') is False
# bare number with no prefix → nothing to contradict
assert fu2b._proc_mismatch("8047/23", 'בל"מ') is False

View File

@@ -73,6 +73,24 @@ def _consistency_flag(bare: str | None, citation_formatted: str) -> str:
return "OK" if bare in cf else "MISMATCH" return "OK" if bare in cf else "MISMATCH"
def _proc_mismatch(case_number: str, proceeding_type: str) -> bool:
"""True if the case_number's leading proceeding prefix disagrees with proceeding_type.
The migration strips the prefix from case_number, so a בל"מ prefix paired with
proceeding_type='ערר' (or vice-versa) would SILENTLY LOSE the proceeding signal.
Such rows must be flagged for chair adjudication, never auto-applied.
"""
cn = (case_number or "").lstrip().lstrip("") # drop RTL/LTR marks
pt = (proceeding_type or "").strip()
starts_balam = cn.startswith('בל"מ') or cn.startswith("בל”מ")
starts_arar = cn.startswith("ערר")
if starts_balam and pt and pt != 'בל"מ':
return True
if starts_arar and pt and pt != "ערר":
return True
return False
async def _build_reconciliation() -> list[dict]: async def _build_reconciliation() -> list[dict]:
from legal_mcp.services import db from legal_mcp.services import db
pool = await db.get_pool() pool = await db.get_pool()
@@ -93,6 +111,7 @@ async def _build_reconciliation() -> list[dict]:
"citation_formatted": r["cf"], "citation_formatted": r["cf"],
"extract_flag": flag, "extract_flag": flag,
"consistency": cons, "consistency": cons,
"proc_flag": "PROC_MISMATCH" if _proc_mismatch(r["case_number"], r["proceeding_type"] or "") else "",
"will_change": "yes" if changes else "no", "will_change": "yes" if changes else "no",
}) })
from collections import Counter from collections import Counter
@@ -111,13 +130,14 @@ def _write_table(rows: list[dict], ts: str) -> tuple[Path, Path]:
csv_path = AUDIT_DIR / f"fu2b-reconciliation-{ts}.csv" csv_path = AUDIT_DIR / f"fu2b-reconciliation-{ts}.csv"
md_path = AUDIT_DIR / f"fu2b-reconciliation-{ts}.md" md_path = AUDIT_DIR / f"fu2b-reconciliation-{ts}.md"
cols = ["id", "current_case_number", "proposed_bare", "proceeding_type", cols = ["id", "current_case_number", "proposed_bare", "proceeding_type",
"citation_formatted", "extract_flag", "consistency", "dup_check", "will_change"] "citation_formatted", "extract_flag", "consistency", "proc_flag", "dup_check", "will_change"]
with csv_path.open("w", newline="", encoding="utf-8") as f: with csv_path.open("w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=cols) w = csv.DictWriter(f, fieldnames=cols)
w.writeheader() w.writeheader()
w.writerows(rows) w.writerows(rows)
changing = [r for r in rows if r["will_change"] == "yes"] changing = [r for r in rows if r["will_change"] == "yes"]
flagged = [r for r in rows if r["extract_flag"] != "OK" or r["consistency"] == "MISMATCH" or r["dup_check"]] flagged = [r for r in rows if r["extract_flag"] != "OK" or r["consistency"] == "MISMATCH"
or r["dup_check"] or r["proc_flag"]]
with md_path.open("w", encoding="utf-8") as f: with md_path.open("w", encoding="utf-8") as f:
f.write(f"# FU-2b — טבלת-תיאום מזהים (internal_committee) — {ts}\n\n") f.write(f"# FU-2b — טבלת-תיאום מזהים (internal_committee) — {ts}\n\n")
f.write(f"- סה\"כ רשומות: {len(rows)}\n- ישתנו: {len(changing)}\n- מסומנות לסקירה: {len(flagged)}\n\n") f.write(f"- סה\"כ רשומות: {len(rows)}\n- ישתנו: {len(changing)}\n- מסומנות לסקירה: {len(flagged)}\n\n")
@@ -126,7 +146,7 @@ def _write_table(rows: list[dict], ts: str) -> tuple[Path, Path]:
for r in flagged: for r in flagged:
fl = " ".join(x for x in [r["extract_flag"] if r["extract_flag"] != "OK" else "", fl = " ".join(x for x in [r["extract_flag"] if r["extract_flag"] != "OK" else "",
r["consistency"] if r["consistency"] == "MISMATCH" else "", r["consistency"] if r["consistency"] == "MISMATCH" else "",
r["dup_check"]] if x) r["proc_flag"], r["dup_check"]] if x)
f.write(f"| {r['current_case_number'][:50]} | {r['proposed_bare']} | {r['proceeding_type']} | {fl} |\n") f.write(f"| {r['current_case_number'][:50]} | {r['proposed_bare']} | {r['proceeding_type']} | {fl} |\n")
f.write("\n## כל השינויים המוצעים\n\n") f.write("\n## כל השינויים המוצעים\n\n")
f.write("| current_case_number | → proposed_bare | proc |\n|---|---|---|\n") f.write("| current_case_number | → proposed_bare | proc |\n|---|---|---|\n")
@@ -174,7 +194,8 @@ async def main() -> int:
rows = await _build_reconciliation() rows = await _build_reconciliation()
csv_path, md_path = _write_table(rows, ts) csv_path, md_path = _write_table(rows, ts)
changing = sum(1 for r in rows if r["will_change"] == "yes") changing = sum(1 for r in rows if r["will_change"] == "yes")
flagged = sum(1 for r in rows if r["extract_flag"] != "OK" or r["consistency"] == "MISMATCH" or r["dup_check"]) flagged = sum(1 for r in rows if r["extract_flag"] != "OK" or r["consistency"] == "MISMATCH"
or r["dup_check"] or r["proc_flag"])
print(f"DRY-RUN: {len(rows)} rows | will_change={changing} | flagged={flagged}") print(f"DRY-RUN: {len(rows)} rows | will_change={changing} | flagged={flagged}")
print(f" table: {md_path}") print(f" table: {md_path}")
print(f" csv: {csv_path}") print(f" csv: {csv_path}")