תת-מערכת אחזור-פסיקה אוטומטי: כשיומון מצביע על פס"ד בית-משפט, מסווגים את הערכאה, מורידים מהמקור הציבורי המתאים, וקולטים דרך צינור-הקליטה הקנוני. - spec-first: docs/spec/X13-court-fetch.md (INV-CF1..CF7) + אינדקס - מסווג court_citation.py (supreme/admin/skip) + 10 בדיקות (עת"מ 46111-12-22 → admin) - Tier 0: court_fetch_supreme.py — supremedecisions API (reverse-engineered), httpx + browser-headers (אומת 200) + politeness - תור court_fetch_jobs (SCHEMA_V30) + DB helpers + court_fetch_orchestrator.py - Tier 1 scaffold: legal-court-fetch-service (aiohttp+Bearer, מראת legal-chat-service) + camofox_client (Camoufox open-source) + recaptcha_audio (Whisper מקומי) + pm2 - Tier 2 fallback חינני: manual + missing_precedent (INV-CF2/CF3 — אין drop שקט) - כלי-MCP court_verdict_fetch / court_fetch_status; SCRIPTS.md Invariants: מקיים G2 (מסלול-קליטה יחיד, INV-CF1) · G3/G1 (idempotent+נרמול, INV-CF5) · G4/§6 (אין בליעה שקטה, INV-CF2) · G10 (שער-אנושי, INV-CF3) · G5 (source_type, INV-CF6) · G9 (provenance+audit, INV-CF7). מקורות INV-CF4: RFC 9309 · Google crawler · OWASP OAT. Follow-ups (טרם אומתו חי): live Tier-0 validation · התקנת camofox-browser+whisper · כיול selectors Tier-1 · COURT_FETCH_SHARED_SECRET (Infisical+Coolify) · טריגר מ-digest try_autolink (worktree-digests-radar). V30 עלול להתנגש עם digests-radar. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
81 lines
2.7 KiB
Python
81 lines
2.7 KiB
Python
"""Unit tests for the X13 court-citation classifier."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from legal_mcp.services.court_citation import classify, normalize_case_number
|
|
|
|
|
|
def test_admin_filed_format_the_example():
|
|
"""The plan's example: עת"מ 46111-12-22 → admin, parsed into (46111,12,22)."""
|
|
c = classify('עת"מ 46111-12-22 יכין-אפק בע"מ נ\' הוועדה המחוזית')
|
|
assert c.tier == "admin"
|
|
assert c.court_prefix in ('עת"מ', "עת״מ")
|
|
assert c.case_number_raw == "46111-12-22"
|
|
assert c.case_number_norm == "46111-12-22"
|
|
assert (c.file_number, c.month, c.year) == ("46111", "12", "22")
|
|
assert c.fetchable is True
|
|
|
|
|
|
def test_bare_filed_number_defaults_admin():
|
|
c = classify("46111-12-22")
|
|
assert c.tier == "admin"
|
|
assert (c.file_number, c.month, c.year) == ("46111", "12", "22")
|
|
|
|
|
|
def test_supreme_prefixes():
|
|
for cit, pref in [
|
|
('עע"מ 1234/22', "supreme"),
|
|
('בג"ץ 5678/21', "supreme"),
|
|
('ע"א 999/20', "supreme"),
|
|
('רע"א 4/19', "supreme"),
|
|
('בר"מ 8126/24', "supreme"),
|
|
]:
|
|
c = classify(cit)
|
|
assert c.tier == pref, f"{cit} -> {c.tier}"
|
|
assert c.fetchable is True
|
|
|
|
|
|
def test_appeals_committee_is_skip():
|
|
"""ערר / בל"מ must never be auto-fetched (needs Nevo) — INV-CF6."""
|
|
for cit in ['ערר 1110/20', 'בל"מ 8048/24', "ערר 1015-01-24 ירושלים שקופה"]:
|
|
c = classify(cit)
|
|
assert c.tier == "skip", f"{cit} -> {c.tier}"
|
|
assert c.fetchable is False
|
|
|
|
|
|
def test_skip_wins_over_court_match():
|
|
"""An 'ערר' citation that also contains court-like digits stays skip."""
|
|
c = classify("ראה החלטתי בערר 1041/24 ובהמשך")
|
|
assert c.tier == "skip"
|
|
|
|
|
|
def test_admin_amn_prefix():
|
|
c = classify('עמ"נ 12345-06-23')
|
|
assert c.tier == "admin"
|
|
assert (c.file_number, c.month, c.year) == ("12345", "06", "23")
|
|
|
|
|
|
def test_two_group_serial_has_no_filed_triple():
|
|
"""Supreme serial 1234/22 normalizes but yields no (file,month,year)."""
|
|
c = classify('עע"מ 1234/22')
|
|
assert c.case_number_norm == "1234-22"
|
|
assert c.file_number is None
|
|
|
|
|
|
def test_implausible_month_not_parsed_as_filed():
|
|
# 1234-22-05 has month=22 → not a valid filed triple.
|
|
assert classify("1234-22-05").tier in ("unknown", "admin")
|
|
c = classify("1234-22-05")
|
|
if c.tier == "admin":
|
|
assert c.month is None
|
|
|
|
|
|
def test_empty_and_garbage():
|
|
assert classify("").tier == "unknown"
|
|
assert classify("שלום עולם בלי ציטוט").tier == "unknown"
|
|
|
|
|
|
def test_normalize_case_number():
|
|
assert normalize_case_number('עת"מ 46111/12/22') == "46111-12-22"
|
|
assert normalize_case_number("1110/20") == "1110-20"
|