שתי מערכות מקבילות לאותו מושג ("פסיקה מצוטטת שטקסטה לא נקלט"): טבלת
missing_precedents (תור-רכישה ידני של היו"ר) מול case_law source_kind='cited_only'
(stubs מגרף-הציטוטים/X11). חפיפה≈0 → 31 ה-stubs לא הופיעו ב-/missing-precedents.
הכרעה (G2): missing_precedents = SoT-לתור-יחיד; cited_only = מקור-גילוי נגזר (כמו
יומונים מזינים radar). גוזרים רשומת missing_precedents 'open' לכל stub.
תיקון:
- court_citation.citation_dedup_key — מפתח-dedup **designator-aware**
(`{designator}|{docket}`). **מתקן פגם בתוכנית-הניתוח:** dedup על מספר-בלבד היה
ממזג בטעות אותו docket בערכאות שונות (בג"ץ 389/87 ≠ ע"א 389/87; 18 כאלה בקיים).
- סכמה V40: missing_precedents מקבל citation_norm (מפתח-dedup) + discovery_source
(manual|cited_only|digest|writer) + index. **בלי UNIQUE** — הקורפוס מחזיק
לגיטימית אותו docket בערכאות שונות; ייחודיות נאכפת designator-aware בנתיב-היצירה.
- create_missing_precedent: מחשב citation_norm בכתיבה (G1), מקבל discovery_source
+ linked_case_law_id. find_missing_precedent_by_citation: dedup דרך citation_norm
(fallback ל-citation גולמי כשאין מספר).
- scripts/derive_missing_from_cited_only.py: backfill citation_norm ל-291 +
גזירת 31 (dry-run: 31 ייווצרו, 0 deduped). linked_case_law_id=stub, status=open
→ promote-in-place בהעלאת-טקסט דרך ON CONFLICT הקיים. אידמפוטנטי.
תלוי-הקשר: #140 (הגדרת cited_only). מתואם עם #136 (digest→MP — אותו citation_norm
+ create path). תיקון-נתון יורץ אחרי הפריסה.
בדיקות: test_dedup_key_is_designator_aware (בג"ץ≠ע"א, ערר≠בל"מ, גרסאות-format
מתמזגות). כל 356 עוברות. guards נקיים.
Invariants: G2 (SoT-לתור יחיד, cited_only נגזר), G1 (citation_norm מנורמל בכתיבה),
G3 (idempotent upsert), G10 (שער-העלאה ידני נשמר), G12.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
136 lines
5.3 KiB
Python
136 lines
5.3 KiB
Python
"""Unit tests for the X13 court-citation classifier."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from legal_mcp.services.court_citation import (
|
|
case_number_from_citation,
|
|
citation_dedup_key,
|
|
classify,
|
|
normalize_case_number,
|
|
)
|
|
|
|
|
|
def test_dedup_key_is_designator_aware():
|
|
"""#143 — same docket across DIFFERENT courts must NOT collapse to one key
|
|
(deduping on the bare number would wrongly merge distinct precedents)."""
|
|
bagatz = citation_dedup_key('בג"ץ 389/87')
|
|
civil = citation_dedup_key('ע"א 389/87')
|
|
assert bagatz and civil and bagatz != civil, (bagatz, civil)
|
|
# ...while gershayim/format variants of the SAME court+docket DO collapse.
|
|
assert citation_dedup_key('עע"מ 9057/09') == citation_dedup_key("עע״מ 9057-09")
|
|
# committee proceeding-type is part of the key (ערר ≠ בל"מ).
|
|
assert citation_dedup_key("ערר 1192/18") != citation_dedup_key('בל"מ 1192/18')
|
|
|
|
|
|
def test_dedup_key_empty_without_number():
|
|
assert citation_dedup_key("") == ""
|
|
assert citation_dedup_key("פסק דין בלי מספר") == ""
|
|
|
|
|
|
def test_admin_filed_format_the_example():
|
|
"""The plan's example: עת"מ 46111-12-22 → admin, parsed into (46111,12,22)."""
|
|
c = classify('עת"מ 46111-12-22 יכין-אפק בע"מ נ\' הוועדה המחוזית')
|
|
assert c.tier == "admin"
|
|
assert c.court_prefix in ('עת"מ', "עת״מ")
|
|
assert c.case_number_raw == "46111-12-22"
|
|
assert c.case_number_norm == "46111-12-22"
|
|
assert (c.file_number, c.month, c.year) == ("46111", "12", "22")
|
|
assert c.fetchable is True
|
|
|
|
|
|
def test_bare_filed_number_defaults_admin():
|
|
c = classify("46111-12-22")
|
|
assert c.tier == "admin"
|
|
assert (c.file_number, c.month, c.year) == ("46111", "12", "22")
|
|
|
|
|
|
def test_supreme_prefixes():
|
|
for cit, pref in [
|
|
('עע"מ 1234/22', "supreme"),
|
|
('בג"ץ 5678/21', "supreme"),
|
|
('ע"א 999/20', "supreme"),
|
|
('רע"א 4/19', "supreme"),
|
|
('בר"מ 8126/24', "supreme"),
|
|
]:
|
|
c = classify(cit)
|
|
assert c.tier == pref, f"{cit} -> {c.tier}"
|
|
assert c.fetchable is True
|
|
|
|
|
|
def test_appeals_committee_is_skip():
|
|
"""ערר / בל"מ must never be auto-fetched (needs Nevo) — INV-CF6."""
|
|
for cit in ['ערר 1110/20', 'בל"מ 8048/24', "ערר 1015-01-24 ירושלים שקופה"]:
|
|
c = classify(cit)
|
|
assert c.tier == "skip", f"{cit} -> {c.tier}"
|
|
assert c.fetchable is False
|
|
|
|
|
|
def test_skip_wins_over_court_match():
|
|
"""An 'ערר' citation that also contains court-like digits stays skip."""
|
|
c = classify("ראה החלטתי בערר 1041/24 ובהמשך")
|
|
assert c.tier == "skip"
|
|
|
|
|
|
def test_admin_amn_prefix():
|
|
c = classify('עמ"נ 12345-06-23')
|
|
assert c.tier == "admin"
|
|
assert (c.file_number, c.month, c.year) == ("12345", "06", "23")
|
|
|
|
|
|
def test_two_group_serial_has_no_filed_triple():
|
|
"""Supreme serial 1234/22 normalizes but yields no (file,month,year)."""
|
|
c = classify('עע"מ 1234/22')
|
|
assert c.case_number_norm == "1234-22"
|
|
assert c.file_number is None
|
|
|
|
|
|
def test_implausible_month_not_parsed_as_filed():
|
|
# 1234-22-05 has month=22 → not a valid filed triple.
|
|
assert classify("1234-22-05").tier in ("unknown", "admin")
|
|
c = classify("1234-22-05")
|
|
if c.tier == "admin":
|
|
assert c.month is None
|
|
|
|
|
|
def test_empty_and_garbage():
|
|
assert classify("").tier == "unknown"
|
|
assert classify("שלום עולם בלי ציטוט").tier == "unknown"
|
|
|
|
|
|
def test_normalize_case_number():
|
|
assert normalize_case_number('עת"מ 46111/12/22') == "46111-12-22"
|
|
assert normalize_case_number("1110/20") == "1110-20"
|
|
|
|
|
|
def test_case_number_from_citation_strips_party_names():
|
|
"""#137 — a full ועדת-ערר citation yields ONLY the number, never the
|
|
display string with party names (INV-ID2). This is the exact precedent
|
|
1bf0bae0 that planted ``85074-04-25) רפאל לוי …`` into case_number."""
|
|
cit = 'ערר (ת"א 85074-04-25) רפאל לוי ואח\' נ\' הוועדה המקומית - חולון'
|
|
assert case_number_from_citation(cit) == "85074-04-25"
|
|
|
|
|
|
def test_case_number_from_citation_various_forms():
|
|
assert case_number_from_citation('ערר (ת"א 1198-12-25) זאטוס') == "1198-12-25"
|
|
assert case_number_from_citation("85074-04-25") == "85074-04-25"
|
|
assert case_number_from_citation('בל"מ 85074-09-24') == "85074-09-24"
|
|
assert case_number_from_citation("ערר 8137/24") == "8137-24"
|
|
|
|
|
|
def test_case_number_from_citation_empty_when_unparseable():
|
|
"""No number → '' so the caller demands a manual number, never the raw
|
|
citation (the #137 fallback that caused the bug)."""
|
|
assert case_number_from_citation("") == ""
|
|
assert case_number_from_citation("פסק דין בלי מספר") == ""
|
|
|
|
|
|
def test_supreme_with_net_format_triple():
|
|
"""A Supreme prefix carrying a נט-format number exposes the triple so the
|
|
orchestrator can route it to Tier-1 (נט המשפט serves Supreme too)."""
|
|
c = classify('בר"מ 72182-06-25 הימנותא נ\' הוועדה המקומית')
|
|
assert c.tier == "supreme"
|
|
assert (c.file_number, c.month, c.year) == ("72182", "06", "25")
|
|
# serial-format Supreme has no triple → stays Tier-0-only
|
|
s = classify('עע"מ 5886/24')
|
|
assert s.tier == "supreme" and s.file_number is None
|