"""FU-2b: deterministic bare-number extraction (offline).""" from __future__ import annotations import importlib.util from pathlib import Path import pytest # Load the migration script as a module (it lives in scripts/, not a package). _SCRIPT = Path(__file__).resolve().parents[2] / "scripts" / "fu2b_reconcile_internal_case_numbers.py" _spec = importlib.util.spec_from_file_location("fu2b_reconcile", _SCRIPT) fu2b = importlib.util.module_from_spec(_spec) _spec.loader.exec_module(fu2b) @pytest.mark.parametrize("raw,expected_bare", [ ("ערר ‏(‏ועדות ערר - תכנון ובנייה ירושלים‏)‏ 403/17 אהרון ברק נ'", "403-17"), ("ערר (...) 8136-10-24 שחר שות'", "8136-10-24"), # month preserved ("בל\"מ (...) 1028/20 חלוואני ריאד", "1028-20"), ("8047/23", "8047-23"), # already-bare-ish ("ערר 81002-01-21", "81002-01-21"), ]) def test_extract_bare_single_token(raw, expected_bare): bare, flag = fu2b._extract_bare(raw) assert bare == expected_bare assert flag == "OK" def test_extract_bare_no_number(): bare, flag = fu2b._extract_bare("ערר אדלר נ' הוועדה") assert bare is None and flag == "NO_NUMBER" def test_extract_bare_multiple_numbers_flagged(): # Two case-number-shaped tokens → ambiguous, must NOT auto-pick. bare, flag = fu2b._extract_bare("ערר 403/17 ו-1024/24 מאוחדים") assert bare is None and flag == "MULTI_NUMBER" def test_extract_bare_preserves_month_not_padding(): # Month kept exactly; 2-part stays 2-part (no invented month). assert fu2b._extract_bare("ערר 8126/24 פלוני")[0] == "8126-24" assert fu2b._extract_bare("ערר 8126-03-25 פלוני")[0] == "8126-03-25" def test_consistency_flag_when_bare_absent_from_citation(): # proposed bare must appear in citation_formatted, else MISMATCH. assert fu2b._consistency_flag("403-17", "ערר (...) 403/17 אהרון ברק") == "OK" assert fu2b._consistency_flag("403-17", "ערר (...) 1975/24 מישהו אחר") == "MISMATCH" assert fu2b._consistency_flag("403-17", "") == "NO_CITATION"