fix(X13): ניתוב לפי פורמט-נט; טיפול-שגיאות חסין באחזור #124

Merged
chaim merged 1 commits from worktree-court-fetch-routing into main 2026-06-07 20:46:02 +00:00
4 changed files with 44 additions and 10 deletions

View File

@@ -32,6 +32,7 @@ dependencies = [
court-fetch = [
"camoufox>=0.4.11",
"faster-whisper>=1.0.0",
"h2>=4.0.0", # Tier-0 supremedecisions uses httpx http2
]
[build-system]

View File

@@ -157,15 +157,23 @@ def classify(citation: str) -> CourtCitation:
case_number_norm=normalize_case_number(raw),
)
# 2. Supreme Court prefix → Tier 0.
# 2. Supreme Court prefix → Tier 0. Still parse a נט-format triple when the
# number carries one (e.g. בר"מ 72182-06-25): נט המשפט serves Supreme
# cases too, so a triple lets the orchestrator route to the validated
# Tier-1 flow instead of the serial-only Tier-0.
m = _SUPREME_RX.search(text)
if m:
raw = m.group(2)
norm = normalize_case_number(raw)
filed = _split_filed(norm)
return CourtCitation(
tier="supreme",
court_prefix=m.group(1),
case_number_raw=raw,
case_number_norm=normalize_case_number(raw),
case_number_norm=norm,
file_number=filed[0] if filed else None,
month=filed[1] if filed else None,
year=filed[2] if filed else None,
)
# 3. District / admin prefix → Tier 1.

View File

@@ -170,14 +170,15 @@ async def fetch_and_ingest(
await db.court_fetch_job_update(job_id, status="running", bump_attempts=True)
# ── fetch ──
# Route by what the number lets us do, not just the court prefix: נט המשפט
# (Tier 1) serves ALL courts — Supreme included — as long as the citation
# carries a נט-format triple (file-month-year). Validated live on both
# district (עת"מ 43830-12-24) and Supreme (בר"מ 72182-06-25). Only a serial-
# only Supreme number (e.g. עע"מ 5886/24, no month) can't be looked up that
# way → fall through to Tier 0 (supremedecisions).
has_net_format = bool(cit.file_number and cit.month and cit.year)
try:
if cit.tier == "supreme":
fetched = await fetch_supreme_verdict(
citation=citation, case_number_norm=cit.case_number_norm
)
content, filename = fetched.content, fetched.filename
source_url, court = fetched.source_url, fetched.court
else: # admin → Tier 1
if has_net_format:
res = await _fetch_tier1_admin(cit)
if not res.get("ok"):
raise RuntimeError(res.get("reason") or "אחזור נכשל")
@@ -186,7 +187,20 @@ async def fetch_and_ingest(
filename = res.get("filename") or f"{cit.case_number_norm}.pdf"
source_url = res.get("source_url", "")
court = res.get("court") or cit.court_prefix
except (_Tier1Unavailable, SupremeFetchError, RuntimeError) as e:
elif cit.tier == "supreme":
fetched = await fetch_supreme_verdict(
citation=citation, case_number_norm=cit.case_number_norm
)
content, filename = fetched.content, fetched.filename
source_url, court = fetched.source_url, fetched.court
else:
raise RuntimeError(
f"מספר-תיק {cit.case_number_norm} אינו בפורמט נט-המשפט ואינו עליון — "
"אין מסלול-אחזור ציבורי"
)
except Exception as e: # noqa: BLE001 — any fetch error is recorded, never
# left hanging in 'running' (INV-CF2). _record_failure escalates to
# 'manual' after MAX_AUTONOMOUS_ATTEMPTS (INV-CF3).
return await _record_failure(job_id, cit, citation, str(e))
# ── ingest into the canonical pipeline (INV-CF1) ──

View File

@@ -78,3 +78,14 @@ def test_empty_and_garbage():
def test_normalize_case_number():
assert normalize_case_number('עת"מ 46111/12/22') == "46111-12-22"
assert normalize_case_number("1110/20") == "1110-20"
def test_supreme_with_net_format_triple():
"""A Supreme prefix carrying a נט-format number exposes the triple so the
orchestrator can route it to Tier-1 (נט המשפט serves Supreme too)."""
c = classify('בר"מ 72182-06-25 הימנותא נ\' הוועדה המקומית')
assert c.tier == "supreme"
assert (c.file_number, c.month, c.year) == ("72182", "06", "25")
# serial-format Supreme has no triple → stays Tier-0-only
s = classify('עע"מ 5886/24')
assert s.tier == "supreme" and s.file_number is None