Merge pull request 'fix(X13): ניתוב לפי פורמט-נט; טיפול-שגיאות חסין באחזור' (#124) from worktree-court-fetch-routing into main
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m43s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m43s
This commit was merged in pull request #124.
This commit is contained in:
@@ -32,6 +32,7 @@ dependencies = [
|
|||||||
court-fetch = [
|
court-fetch = [
|
||||||
"camoufox>=0.4.11",
|
"camoufox>=0.4.11",
|
||||||
"faster-whisper>=1.0.0",
|
"faster-whisper>=1.0.0",
|
||||||
|
"h2>=4.0.0", # Tier-0 supremedecisions uses httpx http2
|
||||||
]
|
]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|||||||
@@ -157,15 +157,23 @@ def classify(citation: str) -> CourtCitation:
|
|||||||
case_number_norm=normalize_case_number(raw),
|
case_number_norm=normalize_case_number(raw),
|
||||||
)
|
)
|
||||||
|
|
||||||
# 2. Supreme Court prefix → Tier 0.
|
# 2. Supreme Court prefix → Tier 0. Still parse a נט-format triple when the
|
||||||
|
# number carries one (e.g. בר"מ 72182-06-25): נט המשפט serves Supreme
|
||||||
|
# cases too, so a triple lets the orchestrator route to the validated
|
||||||
|
# Tier-1 flow instead of the serial-only Tier-0.
|
||||||
m = _SUPREME_RX.search(text)
|
m = _SUPREME_RX.search(text)
|
||||||
if m:
|
if m:
|
||||||
raw = m.group(2)
|
raw = m.group(2)
|
||||||
|
norm = normalize_case_number(raw)
|
||||||
|
filed = _split_filed(norm)
|
||||||
return CourtCitation(
|
return CourtCitation(
|
||||||
tier="supreme",
|
tier="supreme",
|
||||||
court_prefix=m.group(1),
|
court_prefix=m.group(1),
|
||||||
case_number_raw=raw,
|
case_number_raw=raw,
|
||||||
case_number_norm=normalize_case_number(raw),
|
case_number_norm=norm,
|
||||||
|
file_number=filed[0] if filed else None,
|
||||||
|
month=filed[1] if filed else None,
|
||||||
|
year=filed[2] if filed else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3. District / admin prefix → Tier 1.
|
# 3. District / admin prefix → Tier 1.
|
||||||
|
|||||||
@@ -170,14 +170,15 @@ async def fetch_and_ingest(
|
|||||||
await db.court_fetch_job_update(job_id, status="running", bump_attempts=True)
|
await db.court_fetch_job_update(job_id, status="running", bump_attempts=True)
|
||||||
|
|
||||||
# ── fetch ──
|
# ── fetch ──
|
||||||
|
# Route by what the number lets us do, not just the court prefix: נט המשפט
|
||||||
|
# (Tier 1) serves ALL courts — Supreme included — as long as the citation
|
||||||
|
# carries a נט-format triple (file-month-year). Validated live on both
|
||||||
|
# district (עת"מ 43830-12-24) and Supreme (בר"מ 72182-06-25). Only a serial-
|
||||||
|
# only Supreme number (e.g. עע"מ 5886/24, no month) can't be looked up that
|
||||||
|
# way → fall through to Tier 0 (supremedecisions).
|
||||||
|
has_net_format = bool(cit.file_number and cit.month and cit.year)
|
||||||
try:
|
try:
|
||||||
if cit.tier == "supreme":
|
if has_net_format:
|
||||||
fetched = await fetch_supreme_verdict(
|
|
||||||
citation=citation, case_number_norm=cit.case_number_norm
|
|
||||||
)
|
|
||||||
content, filename = fetched.content, fetched.filename
|
|
||||||
source_url, court = fetched.source_url, fetched.court
|
|
||||||
else: # admin → Tier 1
|
|
||||||
res = await _fetch_tier1_admin(cit)
|
res = await _fetch_tier1_admin(cit)
|
||||||
if not res.get("ok"):
|
if not res.get("ok"):
|
||||||
raise RuntimeError(res.get("reason") or "אחזור נכשל")
|
raise RuntimeError(res.get("reason") or "אחזור נכשל")
|
||||||
@@ -186,7 +187,20 @@ async def fetch_and_ingest(
|
|||||||
filename = res.get("filename") or f"{cit.case_number_norm}.pdf"
|
filename = res.get("filename") or f"{cit.case_number_norm}.pdf"
|
||||||
source_url = res.get("source_url", "")
|
source_url = res.get("source_url", "")
|
||||||
court = res.get("court") or cit.court_prefix
|
court = res.get("court") or cit.court_prefix
|
||||||
except (_Tier1Unavailable, SupremeFetchError, RuntimeError) as e:
|
elif cit.tier == "supreme":
|
||||||
|
fetched = await fetch_supreme_verdict(
|
||||||
|
citation=citation, case_number_norm=cit.case_number_norm
|
||||||
|
)
|
||||||
|
content, filename = fetched.content, fetched.filename
|
||||||
|
source_url, court = fetched.source_url, fetched.court
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"מספר-תיק {cit.case_number_norm} אינו בפורמט נט-המשפט ואינו עליון — "
|
||||||
|
"אין מסלול-אחזור ציבורי"
|
||||||
|
)
|
||||||
|
except Exception as e: # noqa: BLE001 — any fetch error is recorded, never
|
||||||
|
# left hanging in 'running' (INV-CF2). _record_failure escalates to
|
||||||
|
# 'manual' after MAX_AUTONOMOUS_ATTEMPTS (INV-CF3).
|
||||||
return await _record_failure(job_id, cit, citation, str(e))
|
return await _record_failure(job_id, cit, citation, str(e))
|
||||||
|
|
||||||
# ── ingest into the canonical pipeline (INV-CF1) ──
|
# ── ingest into the canonical pipeline (INV-CF1) ──
|
||||||
|
|||||||
@@ -78,3 +78,14 @@ def test_empty_and_garbage():
|
|||||||
def test_normalize_case_number():
|
def test_normalize_case_number():
|
||||||
assert normalize_case_number('עת"מ 46111/12/22') == "46111-12-22"
|
assert normalize_case_number('עת"מ 46111/12/22') == "46111-12-22"
|
||||||
assert normalize_case_number("1110/20") == "1110-20"
|
assert normalize_case_number("1110/20") == "1110-20"
|
||||||
|
|
||||||
|
|
||||||
|
def test_supreme_with_net_format_triple():
|
||||||
|
"""A Supreme prefix carrying a נט-format number exposes the triple so the
|
||||||
|
orchestrator can route it to Tier-1 (נט המשפט serves Supreme too)."""
|
||||||
|
c = classify('בר"מ 72182-06-25 הימנותא נ\' הוועדה המקומית')
|
||||||
|
assert c.tier == "supreme"
|
||||||
|
assert (c.file_number, c.month, c.year) == ("72182", "06", "25")
|
||||||
|
# serial-format Supreme has no triple → stays Tier-0-only
|
||||||
|
s = classify('עע"מ 5886/24')
|
||||||
|
assert s.tier == "supreme" and s.file_number is None
|
||||||
|
|||||||
Reference in New Issue
Block a user