Dry-run surfaced 2 rows with בל"מ prefix but proceeding_type=ערר. Since the migration strips the prefix, a wrong proceeding_type would silently lose the בל"מ signal — must be chair-adjudicated, not auto-applied. Chair table now flags 4 rows: 2 DUP_CHECK (8047-23) + 2 PROC_MISMATCH. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
63 lines
2.8 KiB
Python
63 lines
2.8 KiB
Python
"""FU-2b: deterministic bare-number extraction (offline)."""
|
||
from __future__ import annotations
|
||
|
||
import importlib.util
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
|
||
# Load the migration script as a module (it lives in scripts/, not a package).
|
||
_SCRIPT = Path(__file__).resolve().parents[2] / "scripts" / "fu2b_reconcile_internal_case_numbers.py"
|
||
_spec = importlib.util.spec_from_file_location("fu2b_reconcile", _SCRIPT)
|
||
fu2b = importlib.util.module_from_spec(_spec)
|
||
_spec.loader.exec_module(fu2b)
|
||
|
||
|
||
@pytest.mark.parametrize("raw,expected_bare", [
|
||
("ערר (ועדות ערר - תכנון ובנייה ירושלים) 403/17 אהרון ברק נ'", "403-17"),
|
||
("ערר (...) 8136-10-24 שחר שות'", "8136-10-24"), # month preserved
|
||
("בל\"מ (...) 1028/20 חלוואני ריאד", "1028-20"),
|
||
("8047/23", "8047-23"), # already-bare-ish
|
||
("ערר 81002-01-21", "81002-01-21"),
|
||
])
|
||
def test_extract_bare_single_token(raw, expected_bare):
|
||
bare, flag = fu2b._extract_bare(raw)
|
||
assert bare == expected_bare
|
||
assert flag == "OK"
|
||
|
||
|
||
def test_extract_bare_no_number():
|
||
bare, flag = fu2b._extract_bare("ערר אדלר נ' הוועדה")
|
||
assert bare is None and flag == "NO_NUMBER"
|
||
|
||
|
||
def test_extract_bare_multiple_numbers_flagged():
|
||
# Two case-number-shaped tokens → ambiguous, must NOT auto-pick.
|
||
bare, flag = fu2b._extract_bare("ערר 403/17 ו-1024/24 מאוחדים")
|
||
assert bare is None and flag == "MULTI_NUMBER"
|
||
|
||
|
||
def test_extract_bare_preserves_month_not_padding():
|
||
# Month kept exactly; 2-part stays 2-part (no invented month).
|
||
assert fu2b._extract_bare("ערר 8126/24 פלוני")[0] == "8126-24"
|
||
assert fu2b._extract_bare("ערר 8126-03-25 פלוני")[0] == "8126-03-25"
|
||
|
||
|
||
def test_consistency_flag_when_bare_absent_from_citation():
|
||
# proposed bare must appear in citation_formatted, else MISMATCH.
|
||
assert fu2b._consistency_flag("403-17", "ערר (...) 403/17 אהרון ברק") == "OK"
|
||
assert fu2b._consistency_flag("403-17", "ערר (...) 1975/24 מישהו אחר") == "MISMATCH"
|
||
assert fu2b._consistency_flag("403-17", "") == "NO_CITATION"
|
||
|
||
|
||
def test_proc_mismatch_detects_prefix_vs_type_conflict():
|
||
# case_number prefix disagrees with proceeding_type → must flag (prefix is
|
||
# stripped by the migration, so a wrong proceeding_type loses the signal).
|
||
assert fu2b._proc_mismatch('בל"מ 1010-01-25', "ערר") is True
|
||
assert fu2b._proc_mismatch('בל"מ (...) 1028/20 חלוואני', "ערר") is True
|
||
# agreement → no flag
|
||
assert fu2b._proc_mismatch('ערר 1024/24 נילי', "ערר") is False
|
||
assert fu2b._proc_mismatch('בל"מ 1010-01-25', 'בל"מ') is False
|
||
# bare number with no prefix → nothing to contradict
|
||
assert fu2b._proc_mismatch("8047/23", 'בל"מ') is False
|