"""Unit tests for db._canonical_case_number — #137 / INV-ID2 / X1 §1. The write-time canonicalizer must extract the case-number TOKEN only and drop any trailing display text (party names) that a mis-passed full citation glued onto the number. A clean number is returned unchanged; no month is invented. """ from __future__ import annotations from legal_mcp.services.db import _canonical_case_number as canon def test_clean_numbers_unchanged(): assert canon("8137-24") == "8137-24" assert canon("85074-09-24") == "85074-09-24" assert canon("8126-03-25") == "8126-03-25" # Legacy two-part number — month is NOT invented (X1 §1). assert canon("8126-25") == "8126-25" def test_prefix_stripped(): assert canon("ערר 8137/24") == "8137-24" assert canon('בל"מ 85074-09-24') == "85074-09-24" def test_trailing_party_names_dropped(): # The #137 symptom: ingest left "85074-04-25) רפאל לוי …" in the identifier. assert canon("85074-04-25) רפאל לוי ואח' נ' הוועדה המקומית - חולון") == "85074-04-25" assert canon("8137-24 פלוני נ' אלמוני") == "8137-24" def test_empty_and_no_digit(): assert canon("") == "" assert canon("ללא מספר") == ""