test(fu2b): failing tests for bare-number extraction (FU-2b)

This commit is contained in:
2026-05-31 08:52:48 +00:00
parent c2de69272d
commit a41fcedc28

View File

@@ -0,0 +1,50 @@
"""FU-2b: deterministic bare-number extraction (offline)."""
from __future__ import annotations
import importlib.util
from pathlib import Path
import pytest
# Load the migration script as a module (it lives in scripts/, not a package).
_SCRIPT = Path(__file__).resolve().parents[2] / "scripts" / "fu2b_reconcile_internal_case_numbers.py"
_spec = importlib.util.spec_from_file_location("fu2b_reconcile", _SCRIPT)
fu2b = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(fu2b)
@pytest.mark.parametrize("raw,expected_bare", [
("ערר (‏ועדות ערר - תכנון ובנייה ירושלים‏) 403/17 אהרון ברק נ'", "403-17"),
("ערר (...) 8136-10-24 שחר שות'", "8136-10-24"), # month preserved
("בל\"מ (...) 1028/20 חלוואני ריאד", "1028-20"),
("8047/23", "8047-23"), # already-bare-ish
("ערר 81002-01-21", "81002-01-21"),
])
def test_extract_bare_single_token(raw, expected_bare):
bare, flag = fu2b._extract_bare(raw)
assert bare == expected_bare
assert flag == "OK"
def test_extract_bare_no_number():
bare, flag = fu2b._extract_bare("ערר אדלר נ' הוועדה")
assert bare is None and flag == "NO_NUMBER"
def test_extract_bare_multiple_numbers_flagged():
# Two case-number-shaped tokens → ambiguous, must NOT auto-pick.
bare, flag = fu2b._extract_bare("ערר 403/17 ו-1024/24 מאוחדים")
assert bare is None and flag == "MULTI_NUMBER"
def test_extract_bare_preserves_month_not_padding():
# Month kept exactly; 2-part stays 2-part (no invented month).
assert fu2b._extract_bare("ערר 8126/24 פלוני")[0] == "8126-24"
assert fu2b._extract_bare("ערר 8126-03-25 פלוני")[0] == "8126-03-25"
def test_consistency_flag_when_bare_absent_from_citation():
# proposed bare must appear in citation_formatted, else MISMATCH.
assert fu2b._consistency_flag("403-17", "ערר (...) 403/17 אהרון ברק") == "OK"
assert fu2b._consistency_flag("403-17", "ערר (...) 1975/24 מישהו אחר") == "MISMATCH"
assert fu2b._consistency_flag("403-17", "") == "NO_CITATION"