feat(digests): יומון-לא-מקושר → "פסיקה חסרה" — סוף לבליעה-שקטה (#136)
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 4s
Lint — undefined names / undefined-names (pull_request) Successful in 11s

צינור-היומונים (X12) קישר אוטומטית רק לפסיקה שכבר בקורפוס; ציטוט שלא נמצא נבלע
בשקט אלא אם היה פס"ד בית-משפט בר-אחזור. כך 369 עררים + 21 לא-מסווגים שהוזכרו
ביומונים מעולם לא הופיעו כפער.

תיקון (G2 — מסווג יחיד + נתיב-MP יחיד; INV-DIG3/CF2 — אין בליעה-שקטה):
- digest_library: ה-gap branch (try_autolink ללא-התאמה) קורא כעת
  _handle_unlinked_citation — ניתוב דרך court_citation.classify: supreme/admin →
  court_fetch_job (כקודם; האורקסטרטור פותח MP משלו בכשל), skip(ערר/בל"מ)/unknown →
  missing_precedent (discovery_source='digest', provenance=מס'-יומון+digest_id),
  deduped designator-aware דרך citation_norm (#143).
- court_fetch_orchestrator._open_gap: הוקשח ל-dedup אמיתי (find לפני create) +
  discovery_source='court_fetch' — התגובה הבטיחה "deduped" אך create לא דידאפ.
- scripts/backfill_digest_missing_precedents.py: מריץ try_autolink על 461 הקיימים
  (dry-run: 71 fetchable + 390 gap). אידמפוטנטי. יורץ אחרי הפריסה.

תלוי-הקשר #143 (citation_norm + נתיב-יצירה). השפעת-UI: דף "פסיקה חסרה" יגדל
מ-207 ל-~597 פתוחים (אושר ע"י חיים).

בדיקות: test_digest_unlinked_citation (ערר→MP, פס"ד→fetch, dedup, unknown→MP).
כל 360 עוברות. guards נקיים.

Invariants: G2 (מסווג+נתיב-MP יחיד), INV-DIG3/INV-CF2 (פער גלוי, לא נבלע),
INV-DIG1 (יומון מצביע, לא מצוטט), G1 (dedup מנורמל), G12.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-15 09:06:12 +00:00
parent ea8712ecff
commit 9fd506ff2b
5 changed files with 204 additions and 21 deletions

View File

@@ -314,10 +314,14 @@ async def _record_failure(
async def _open_gap(citation: str, *, reason: str) -> None:
"""Open a missing_precedent gap so the chair sees it (INV-CF2/CF3).
Best-effort + de-duplicated by the missing_precedents layer; a failure
here is logged, never raised (it must not mask the original outcome).
Best-effort + de-duplicated (designator-aware via citation_norm, #143); a
failure here is logged, never raised (it must not mask the original outcome).
"""
try:
await db.create_missing_precedent(citation=citation, notes=reason)
if await db.find_missing_precedent_by_citation(citation):
return
await db.create_missing_precedent(
citation=citation, notes=reason, discovery_source="court_fetch",
)
except Exception:
logger.warning("could not open missing_precedent for %s", citation)

View File

@@ -83,38 +83,60 @@ async def try_autolink(digest_id: UUID | str, underlying_citation: str) -> str |
logger.warning("digest try_autolink lookup failed for %r: %s", citation, e)
return None
if not match:
# Gap (INV-DIG3): the underlying ruling isn't in the corpus. If it's a
# court verdict (not ועדת-ערר), enqueue an X13 auto-fetch job so the gap
# is actionable instead of silently dropped (INV-CF2). Never raises.
await _enqueue_court_fetch(digest_id, citation)
# Gap (INV-DIG3): the underlying ruling isn't in the corpus. Surface it —
# never drop silently (INV-CF2). Court verdicts (supreme/admin) get an X13
# auto-fetch job; ועדת-ערר / unknown — which נט-המשפט can't serve — get a
# missing_precedent the chair sees on /missing-precedents (#136). Never
# raises.
await _handle_unlinked_citation(digest_id, citation)
return None
await db.link_digest_to_case_law(digest_id, match["id"])
return str(match["id"])
async def _enqueue_court_fetch(digest_id: UUID | str, citation: str) -> None:
"""Queue an X13 court-verdict fetch for an unlinked digest citation.
async def _handle_unlinked_citation(digest_id: UUID | str, citation: str) -> None:
"""Surface an unlinked digest citation — auto-fetch if possible, else record
a missing_precedent. Closes the silent-drop gap (#136, INV-DIG3/CF2).
Court rulings (supreme/admin) → a ``court_fetch_jobs`` row drained later by
``court_fetch_drain``. ועדת-ערר (skip) is left alone — it needs Nevo and is
surfaced through the normal missing-precedent path, not auto-fetch.
Routing via the ONE canonical classifier (``court_citation.classify``):
* supreme/admin → ``court_fetch_jobs`` (drained by X13; on fetch failure the
orchestrator opens its own missing_precedent, so no double-record here).
* skip (ערר/בל"מ) / unknown → ``missing_precedents`` (needs Nevo / manual;
נט-המשפט can't serve it). Deduped designator-aware via citation_norm
(#143) so re-runs and overlaps don't pile up.
"""
try:
from legal_mcp.services import court_citation
cit = court_citation.classify(citation)
if cit.tier not in ("supreme", "admin"):
if cit.tier in ("supreme", "admin"):
await db.court_fetch_job_upsert(
case_number_norm=cit.case_number_norm,
citation_raw=citation,
tier=cit.tier,
court=cit.court_prefix,
digest_id=UUID(str(digest_id)),
)
logger.info("digest %s: enqueued court-fetch for %r (tier=%s)",
digest_id, citation, cit.tier)
return
await db.court_fetch_job_upsert(
case_number_norm=cit.case_number_norm,
citation_raw=citation,
tier=cit.tier,
court=cit.court_prefix,
digest_id=UUID(str(digest_id)),
# Non-fetchable (ערר/בל"מ/unknown) — open a missing_precedent gap so it's
# visible and actionable instead of vanishing. Dedup first (#143).
if await db.find_missing_precedent_by_citation(citation):
return
digest = await db.get_digest(digest_id)
yomon = (digest or {}).get("yomon_number") or ""
note = (f"זוהה דרך יומון מס' {yomon} (digest_id={digest_id})" if yomon
else f"זוהה דרך יומון (digest_id={digest_id})")
await db.create_missing_precedent(
citation=citation,
discovery_source="digest",
notes=note,
)
logger.info("digest %s: enqueued court-fetch for %r (tier=%s)",
logger.info("digest %s: opened missing_precedent for %r (tier=%s)",
digest_id, citation, cit.tier)
except Exception as e: # never break digest ingest
logger.warning("digest court-fetch enqueue failed for %r: %s", citation, e)
logger.warning("digest unlinked-citation handling failed for %r: %s",
citation, e)
# ── Container-safe creation (web upload) — no LLM, no embedding ──────

View File

@@ -0,0 +1,88 @@
"""Regression test for #136 — an unlinked digest citation must never be dropped
silently.
``_handle_unlinked_citation`` routes via the canonical classifier:
* supreme/admin → a court-fetch job (no missing_precedent here — the X13
orchestrator opens its own on failure),
* skip (ערר/בל"מ) / unknown → a deduped missing_precedent (discovery_source
'digest'), which previously vanished.
Runs OFFLINE — monkeypatches the db calls and records what each routing did.
"""
from __future__ import annotations
import asyncio
import pytest
from legal_mcp.services import digest_library as dl
from legal_mcp.services import db
class _Spy:
def __init__(self):
self.court_fetch = []
self.created_mp = []
self.find_mp_returns = None
def install(self, monkeypatch):
async def _job_upsert(**kw):
self.court_fetch.append(kw)
async def _find_mp(citation, case_id=None):
return self.find_mp_returns
async def _create_mp(**kw):
self.created_mp.append(kw)
return {"id": "mp"}
async def _get_digest(_id):
return {"yomon_number": "5167"}
monkeypatch.setattr(db, "court_fetch_job_upsert", _job_upsert)
monkeypatch.setattr(db, "find_missing_precedent_by_citation", _find_mp)
monkeypatch.setattr(db, "create_missing_precedent", _create_mp)
monkeypatch.setattr(db, "get_digest", _get_digest)
def _run(coro):
loop = asyncio.new_event_loop()
try:
return loop.run_until_complete(coro)
finally:
loop.close()
@pytest.fixture()
def spy(monkeypatch):
s = _Spy()
s.install(monkeypatch)
return s
_DID = "11111111-1111-1111-1111-111111111111"
def test_committee_citation_opens_missing_precedent(spy):
_run(dl._handle_unlinked_citation(_DID, "ערר 1198-12-25 זאטוס"))
assert spy.court_fetch == [] # ערר is never auto-fetched
assert len(spy.created_mp) == 1, spy.created_mp
mp = spy.created_mp[0]
assert mp["discovery_source"] == "digest"
assert "יומון" in (mp["notes"] or "") # provenance recorded
def test_court_verdict_enqueues_fetch_not_mp(spy):
_run(dl._handle_unlinked_citation(_DID, 'עע"מ 3975/22 פלוני'))
assert len(spy.court_fetch) == 1, spy.court_fetch
assert spy.created_mp == [] # fetchable → orchestrator owns its MP
def test_dedup_skips_existing_gap(spy):
spy.find_mp_returns = {"id": "existing"} # gap already recorded
_run(dl._handle_unlinked_citation(_DID, "ערר 1192/18"))
assert spy.created_mp == [] # no duplicate
def test_unknown_citation_opens_missing_precedent(spy):
_run(dl._handle_unlinked_citation(_DID, "משהו בלי ערכאה ברורה"))
# unknown tier is not fetchable → must still surface as a gap, never dropped.
assert spy.court_fetch == []
assert len(spy.created_mp) == 1