From a41fcedc286587ada73c1eacdc1d423b634d5d0e Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 31 May 2026 08:52:48 +0000 Subject: [PATCH] test(fu2b): failing tests for bare-number extraction (FU-2b) --- mcp-server/tests/test_fu2b_reconcile.py | 50 +++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 mcp-server/tests/test_fu2b_reconcile.py diff --git a/mcp-server/tests/test_fu2b_reconcile.py b/mcp-server/tests/test_fu2b_reconcile.py new file mode 100644 index 0000000..39c53f5 --- /dev/null +++ b/mcp-server/tests/test_fu2b_reconcile.py @@ -0,0 +1,50 @@ +"""FU-2b: deterministic bare-number extraction (offline).""" +from __future__ import annotations + +import importlib.util +from pathlib import Path + +import pytest + +# Load the migration script as a module (it lives in scripts/, not a package). +_SCRIPT = Path(__file__).resolve().parents[2] / "scripts" / "fu2b_reconcile_internal_case_numbers.py" +_spec = importlib.util.spec_from_file_location("fu2b_reconcile", _SCRIPT) +fu2b = importlib.util.module_from_spec(_spec) +_spec.loader.exec_module(fu2b) + + +@pytest.mark.parametrize("raw,expected_bare", [ + ("ערר ‏(‏ועדות ערר - תכנון ובנייה ירושלים‏)‏ 403/17 אהרון ברק נ'", "403-17"), + ("ערר (...) 8136-10-24 שחר שות'", "8136-10-24"), # month preserved + ("בל\"מ (...) 1028/20 חלוואני ריאד", "1028-20"), + ("8047/23", "8047-23"), # already-bare-ish + ("ערר 81002-01-21", "81002-01-21"), +]) +def test_extract_bare_single_token(raw, expected_bare): + bare, flag = fu2b._extract_bare(raw) + assert bare == expected_bare + assert flag == "OK" + + +def test_extract_bare_no_number(): + bare, flag = fu2b._extract_bare("ערר אדלר נ' הוועדה") + assert bare is None and flag == "NO_NUMBER" + + +def test_extract_bare_multiple_numbers_flagged(): + # Two case-number-shaped tokens → ambiguous, must NOT auto-pick. + bare, flag = fu2b._extract_bare("ערר 403/17 ו-1024/24 מאוחדים") + assert bare is None and flag == "MULTI_NUMBER" + + +def test_extract_bare_preserves_month_not_padding(): + # Month kept exactly; 2-part stays 2-part (no invented month). + assert fu2b._extract_bare("ערר 8126/24 פלוני")[0] == "8126-24" + assert fu2b._extract_bare("ערר 8126-03-25 פלוני")[0] == "8126-03-25" + + +def test_consistency_flag_when_bare_absent_from_citation(): + # proposed bare must appear in citation_formatted, else MISMATCH. + assert fu2b._consistency_flag("403-17", "ערר (...) 403/17 אהרון ברק") == "OK" + assert fu2b._consistency_flag("403-17", "ערר (...) 1975/24 מישהו אחר") == "MISMATCH" + assert fu2b._consistency_flag("403-17", "") == "NO_CITATION"