51 lines
2.1 KiB
Python
51 lines
2.1 KiB
Python
"""FU-2b: deterministic bare-number extraction (offline)."""
|
||
from __future__ import annotations
|
||
|
||
import importlib.util
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
|
||
# Load the migration script as a module (it lives in scripts/, not a package).
|
||
_SCRIPT = Path(__file__).resolve().parents[2] / "scripts" / "fu2b_reconcile_internal_case_numbers.py"
|
||
_spec = importlib.util.spec_from_file_location("fu2b_reconcile", _SCRIPT)
|
||
fu2b = importlib.util.module_from_spec(_spec)
|
||
_spec.loader.exec_module(fu2b)
|
||
|
||
|
||
@pytest.mark.parametrize("raw,expected_bare", [
|
||
("ערר (ועדות ערר - תכנון ובנייה ירושלים) 403/17 אהרון ברק נ'", "403-17"),
|
||
("ערר (...) 8136-10-24 שחר שות'", "8136-10-24"), # month preserved
|
||
("בל\"מ (...) 1028/20 חלוואני ריאד", "1028-20"),
|
||
("8047/23", "8047-23"), # already-bare-ish
|
||
("ערר 81002-01-21", "81002-01-21"),
|
||
])
|
||
def test_extract_bare_single_token(raw, expected_bare):
|
||
bare, flag = fu2b._extract_bare(raw)
|
||
assert bare == expected_bare
|
||
assert flag == "OK"
|
||
|
||
|
||
def test_extract_bare_no_number():
|
||
bare, flag = fu2b._extract_bare("ערר אדלר נ' הוועדה")
|
||
assert bare is None and flag == "NO_NUMBER"
|
||
|
||
|
||
def test_extract_bare_multiple_numbers_flagged():
|
||
# Two case-number-shaped tokens → ambiguous, must NOT auto-pick.
|
||
bare, flag = fu2b._extract_bare("ערר 403/17 ו-1024/24 מאוחדים")
|
||
assert bare is None and flag == "MULTI_NUMBER"
|
||
|
||
|
||
def test_extract_bare_preserves_month_not_padding():
|
||
# Month kept exactly; 2-part stays 2-part (no invented month).
|
||
assert fu2b._extract_bare("ערר 8126/24 פלוני")[0] == "8126-24"
|
||
assert fu2b._extract_bare("ערר 8126-03-25 פלוני")[0] == "8126-03-25"
|
||
|
||
|
||
def test_consistency_flag_when_bare_absent_from_citation():
|
||
# proposed bare must appear in citation_formatted, else MISMATCH.
|
||
assert fu2b._consistency_flag("403-17", "ערר (...) 403/17 אהרון ברק") == "OK"
|
||
assert fu2b._consistency_flag("403-17", "ערר (...) 1975/24 מישהו אחר") == "MISMATCH"
|
||
assert fu2b._consistency_flag("403-17", "") == "NO_CITATION"
|