From 69b34f1c3ff4d517ad9ff4bbb6ba4bef05022eaa Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 7 Jun 2026 20:45:20 +0000 Subject: [PATCH] =?UTF-8?q?fix(X13):=20route=20by=20=D7=A0=D7=98-format=20?= =?UTF-8?q?availability;=20robust=20fetch=20error=20handling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live drain surfaced three issues: 1. Tier-0 needed `h2` (httpx http2) — added to the court-fetch extra. 2. Supreme cases that carry a נט-format number (e.g. בר"מ 72182-06-25) were routed to the unvalidated Tier-0 and failed, even though נט המשפט serves Supreme cases too. classify() now parses the file-month-year triple for Supreme prefixes; the orchestrator routes by triple-availability: נט-format present → Tier-1 (validated, all courts) serial-only Supreme (עע"מ 5886/24) → Tier-0 neither → clear "no public route" failure Validated live: בר"מ 72182-06-25 fetched via Tier-1 (5-page PDF). 3. A non-`RuntimeError` fetch exception (the h2 import error) left jobs stuck in 'running'. The fetch block now catches any Exception → _record_failure (INV-CF2/CF3), so a job always reaches a terminal state. + test_supreme_with_net_format_triple. Suite 11/11. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp-server/pyproject.toml | 1 + .../src/legal_mcp/services/court_citation.py | 12 ++++++-- .../services/court_fetch_orchestrator.py | 30 ++++++++++++++----- mcp-server/tests/test_court_citation.py | 11 +++++++ 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/mcp-server/pyproject.toml b/mcp-server/pyproject.toml index afb9cee..89ba7e1 100644 --- a/mcp-server/pyproject.toml +++ b/mcp-server/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ court-fetch = [ "camoufox>=0.4.11", "faster-whisper>=1.0.0", + "h2>=4.0.0", # Tier-0 supremedecisions uses httpx http2 ] [build-system] diff --git a/mcp-server/src/legal_mcp/services/court_citation.py b/mcp-server/src/legal_mcp/services/court_citation.py index c85495f..db14027 100644 --- a/mcp-server/src/legal_mcp/services/court_citation.py +++ b/mcp-server/src/legal_mcp/services/court_citation.py @@ -157,15 +157,23 @@ def classify(citation: str) -> CourtCitation: case_number_norm=normalize_case_number(raw), ) - # 2. Supreme Court prefix → Tier 0. + # 2. Supreme Court prefix → Tier 0. Still parse a נט-format triple when the + # number carries one (e.g. בר"מ 72182-06-25): נט המשפט serves Supreme + # cases too, so a triple lets the orchestrator route to the validated + # Tier-1 flow instead of the serial-only Tier-0. m = _SUPREME_RX.search(text) if m: raw = m.group(2) + norm = normalize_case_number(raw) + filed = _split_filed(norm) return CourtCitation( tier="supreme", court_prefix=m.group(1), case_number_raw=raw, - case_number_norm=normalize_case_number(raw), + case_number_norm=norm, + file_number=filed[0] if filed else None, + month=filed[1] if filed else None, + year=filed[2] if filed else None, ) # 3. District / admin prefix → Tier 1. diff --git a/mcp-server/src/legal_mcp/services/court_fetch_orchestrator.py b/mcp-server/src/legal_mcp/services/court_fetch_orchestrator.py index 2cc8750..f824c93 100644 --- a/mcp-server/src/legal_mcp/services/court_fetch_orchestrator.py +++ b/mcp-server/src/legal_mcp/services/court_fetch_orchestrator.py @@ -170,14 +170,15 @@ async def fetch_and_ingest( await db.court_fetch_job_update(job_id, status="running", bump_attempts=True) # ── fetch ── + # Route by what the number lets us do, not just the court prefix: נט המשפט + # (Tier 1) serves ALL courts — Supreme included — as long as the citation + # carries a נט-format triple (file-month-year). Validated live on both + # district (עת"מ 43830-12-24) and Supreme (בר"מ 72182-06-25). Only a serial- + # only Supreme number (e.g. עע"מ 5886/24, no month) can't be looked up that + # way → fall through to Tier 0 (supremedecisions). + has_net_format = bool(cit.file_number and cit.month and cit.year) try: - if cit.tier == "supreme": - fetched = await fetch_supreme_verdict( - citation=citation, case_number_norm=cit.case_number_norm - ) - content, filename = fetched.content, fetched.filename - source_url, court = fetched.source_url, fetched.court - else: # admin → Tier 1 + if has_net_format: res = await _fetch_tier1_admin(cit) if not res.get("ok"): raise RuntimeError(res.get("reason") or "אחזור נכשל") @@ -186,7 +187,20 @@ async def fetch_and_ingest( filename = res.get("filename") or f"{cit.case_number_norm}.pdf" source_url = res.get("source_url", "") court = res.get("court") or cit.court_prefix - except (_Tier1Unavailable, SupremeFetchError, RuntimeError) as e: + elif cit.tier == "supreme": + fetched = await fetch_supreme_verdict( + citation=citation, case_number_norm=cit.case_number_norm + ) + content, filename = fetched.content, fetched.filename + source_url, court = fetched.source_url, fetched.court + else: + raise RuntimeError( + f"מספר-תיק {cit.case_number_norm} אינו בפורמט נט-המשפט ואינו עליון — " + "אין מסלול-אחזור ציבורי" + ) + except Exception as e: # noqa: BLE001 — any fetch error is recorded, never + # left hanging in 'running' (INV-CF2). _record_failure escalates to + # 'manual' after MAX_AUTONOMOUS_ATTEMPTS (INV-CF3). return await _record_failure(job_id, cit, citation, str(e)) # ── ingest into the canonical pipeline (INV-CF1) ── diff --git a/mcp-server/tests/test_court_citation.py b/mcp-server/tests/test_court_citation.py index 3521aa6..989d0e6 100644 --- a/mcp-server/tests/test_court_citation.py +++ b/mcp-server/tests/test_court_citation.py @@ -78,3 +78,14 @@ def test_empty_and_garbage(): def test_normalize_case_number(): assert normalize_case_number('עת"מ 46111/12/22') == "46111-12-22" assert normalize_case_number("1110/20") == "1110-20" + + +def test_supreme_with_net_format_triple(): + """A Supreme prefix carrying a נט-format number exposes the triple so the + orchestrator can route it to Tier-1 (נט המשפט serves Supreme too).""" + c = classify('בר"מ 72182-06-25 הימנותא נ\' הוועדה המקומית') + assert c.tier == "supreme" + assert (c.file_number, c.month, c.year) == ("72182", "06", "25") + # serial-format Supreme has no triple → stays Tier-0-only + s = classify('עע"מ 5886/24') + assert s.tier == "supreme" and s.file_number is None -- 2.49.1