From 36f21c815eff2a985097df99016b5213bff42873 Mon Sep 17 00:00:00 2001 From: Chaim Date: Mon, 4 May 2026 05:13:10 +0000 Subject: [PATCH] fix(precedents): distinguish silent extraction failure from "no halachot" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Observed 2026-05-03: a `precedent_process_pending(halacha)` run that chained two precedents (1110/20 → 317/10) succeeded for the first (9 halachot, 129 chunks) and produced status=`no_halachot` for the second despite it being a 47KB Supreme Court ruling with rich legal analysis. A manual single-precedent re-run on 317/10 immediately extracted 53 halachot. Diagnosis: every chunk's claude_session call in the back-to-back run silently failed (likely Anthropic rate-limit storm after the 1110/20 token burn), and the empty list was reported as "Claude looked and found nothing" — same code path as a real 0-halacha ruling. The user couldn't tell the difference. Three changes: 1. Surface chunk-level failures (halacha_extractor.py) `_extract_chunk` now returns `(halachot, succeeded)` so the caller can count how many chunks crashed. `extract()` uses this to distinguish: - `no_halachot` — chunks ran cleanly, Claude found nothing - `extraction_failed` — ≥50% of chunks crashed AND zero halachot came back (rate limit, subprocess crash, etc.) When `extraction_failed`, DB status is left as 'processing' so the request stays in the queue for the caller to retry — instead of the old behaviour where it got marked 'completed' and silently dropped from the queue. 2. Inter-precedent cooldown (precedent_library.py) `process_pending_extractions` now sleeps 30s between precedents. Anthropic rate-limits per-org, and back-to-back large rulings (~4M tokens for 1110/20, immediately followed by another 2-3M) was the empirical trigger. 30s gives the per-minute counter time to drain. 3. Auto-retry on extraction_failed (precedent_library.py) When a precedent comes back as `extraction_failed`, retry once after a 60s cooldown before giving up. Rate-limit storms are transient — the manual re-run of 317/10 minutes later succeeded with 53 halachot and zero chunk failures, confirming a single retry is sufficient. Only retries `extraction_failed`; never `no_halachot` (Claude looked and there genuinely is no holding). The DB status now ends up as 'failed' only after retries are exhausted, matching the UI's terminal-failure chip. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../legal_mcp/services/halacha_extractor.py | 64 ++++++++++++++++--- .../legal_mcp/services/precedent_library.py | 64 +++++++++++++++++-- 2 files changed, 114 insertions(+), 14 deletions(-) diff --git a/mcp-server/src/legal_mcp/services/halacha_extractor.py b/mcp-server/src/legal_mcp/services/halacha_extractor.py index d3267f4..67c5523 100644 --- a/mcp-server/src/legal_mcp/services/halacha_extractor.py +++ b/mcp-server/src/legal_mcp/services/halacha_extractor.py @@ -36,6 +36,14 @@ logger = logging.getLogger(__name__) CHUNK_CONCURRENCY = 3 CHUNK_RETRY_ATTEMPTS = 1 +# If at least this fraction of chunks crash and the precedent yields zero +# halachot, treat the run as `extraction_failed` rather than `no_halachot`. +# Picked at 0.5 so a precedent that genuinely has no holdings (e.g. a remand +# ruling that just sends the case back) isn't misflagged just because a few +# chunks timed out, while a real rate-limit storm — which kills nearly every +# call — is correctly distinguished and re-tried by the caller. +EXTRACTION_FAILURE_THRESHOLD = 0.5 + # Sections from which to extract. facts/intro/appellant_claims/respondent_claims # never contain holdings, only positions, so we skip them. EXTRACTABLE_SECTIONS = ("legal_analysis", "ruling", "conclusion") @@ -267,12 +275,18 @@ async def _extract_chunk( chunk_total: int, context: str, is_binding: bool, -) -> list[dict]: +) -> tuple[list[dict], bool]: """Run the halacha extractor on one chunk with retry. - The prompt branches on ``is_binding`` so that non-binding sources - (other appeals committees, district courts) yield application / - persuasive entries rather than a forced 0-result strict halacha pass. + Returns ``(halachot, succeeded)`` so the caller can distinguish "Claude + said there are no halachot here" (`(_, True)`) from "every attempt + crashed/timed out" (`(_, False)`). Without this distinction a precedent + that hit a rate-limit storm looks identical to one that genuinely has no + halachot — and gets silently marked `no_halachot`. + + The prompt branches on ``is_binding`` so non-binding sources (other + appeals committees, district courts) yield application/persuasive + entries rather than a forced 0-result strict halacha pass. """ base_prompt = ( HALACHA_EXTRACTION_PROMPT_BINDING if is_binding @@ -299,7 +313,7 @@ async def _extract_chunk( ) continue if isinstance(result, list): - return result + return result, True logger.warning( "halacha_extractor chunk %d/%d attempt %d returned non-list (%s)", chunk_index + 1, chunk_total, attempt + 1, type(result).__name__, @@ -308,7 +322,7 @@ async def _extract_chunk( "halacha_extractor chunk %d/%d failed after %d attempts: %s", chunk_index + 1, chunk_total, CHUNK_RETRY_ATTEMPTS + 1, last_err, ) - return [] + return [], False async def extract(case_law_id: UUID | str) -> dict: @@ -359,7 +373,7 @@ async def extract(case_law_id: UUID | str) -> dict: sem = asyncio.Semaphore(CHUNK_CONCURRENCY) - async def _bounded(idx: int, chunk_row: dict) -> list[dict]: + async def _bounded(idx: int, chunk_row: dict) -> tuple[list[dict], bool]: async with sem: return await _extract_chunk( chunk_row["content"], chunk_row["section_type"], @@ -370,12 +384,44 @@ async def extract(case_law_id: UUID | str) -> dict: *[_bounded(i, c) for i, c in enumerate(chunks)] ) raw_halachot: list[dict] = [] - for items in chunk_results: + failed_chunks = 0 + for items, ok in chunk_results: raw_halachot.extend(items) + if not ok: + failed_chunks += 1 + + # If most chunks failed (rate limit storm, claude_session crash, etc.) + # do NOT touch the DB status — leave it 'processing' so the caller can + # retry without the request falling out of the queue. The caller + # (`process_pending_extractions`) is responsible for either retrying or + # finalising the status as 'failed' after retries are exhausted. This + # is the bug that produced 317/10's silent `no_halachot` after a + # 129-chunk neighbour saturated the API. + failure_rate = failed_chunks / len(chunks) if chunks else 0 + if failure_rate >= EXTRACTION_FAILURE_THRESHOLD and not raw_halachot: + logger.error( + "halacha_extractor: case_law=%s extraction_failed — " + "%d/%d chunks failed (rate=%.0f%%), no halachot retrieved. " + "DB status left as 'processing' for caller-level retry.", + case_law_id, failed_chunks, len(chunks), failure_rate * 100, + ) + return { + "status": "extraction_failed", + "extracted": 0, + "stored": 0, + "failed_chunks": failed_chunks, + "total_chunks": len(chunks), + } if not raw_halachot: await db.set_case_law_halacha_status(case_law_id, "completed") - return {"status": "no_halachot", "extracted": 0, "stored": 0} + return { + "status": "no_halachot", + "extracted": 0, + "stored": 0, + "failed_chunks": failed_chunks, + "total_chunks": len(chunks), + } # Validate against the full text of the precedent for the quote check. full_text = record.get("full_text") or "" diff --git a/mcp-server/src/legal_mcp/services/precedent_library.py b/mcp-server/src/legal_mcp/services/precedent_library.py index 81c6bae..de4a1c2 100644 --- a/mcp-server/src/legal_mcp/services/precedent_library.py +++ b/mcp-server/src/legal_mcp/services/precedent_library.py @@ -270,6 +270,19 @@ async def reextract_halachot( return result +# Wait this many seconds between precedents in a multi-precedent run. +# Anthropic rate-limits across the org, so back-to-back extractions of large +# rulings (e.g. 129 chunks for one, then 79 for another) can spill the second +# precedent into a 429 storm. Observed 2026-05-03: 1110/20 succeeded with 9 +# halachot, 317/10 immediately after returned silent no_halachot. +INTER_PRECEDENT_COOLDOWN_SEC = 30 + +# How many times to retry a precedent that came back as 'extraction_failed' +# (i.e. >50% chunks crashed). Each retry uses a longer cooldown. +PRECEDENT_RETRY_ATTEMPTS = 1 +PRECEDENT_RETRY_COOLDOWN_SEC = 60 + + async def process_pending_extractions(kind: str = "metadata", limit: int = 20) -> dict: """Drain the extraction queue (UI-button-stamped requests). @@ -279,6 +292,14 @@ async def process_pending_extractions(kind: str = "metadata", limit: int = 20) - tool — picks each stamped row up, runs the extractor, and clears the timestamp. + Sequencing: precedents are processed serially (never in parallel) and + each is followed by a short cooldown so the Anthropic rate-limit + counter has time to drain before the next big precedent starts. If + halacha extraction comes back as ``extraction_failed`` we retry the + same precedent once with a longer cooldown — matching the empirical + pattern where the second precedent in a back-to-back run gets + rate-limited but recovers after a brief pause. + Args: kind: 'metadata' or 'halacha'. limit: max rows to process this run. @@ -292,15 +313,46 @@ async def process_pending_extractions(kind: str = "metadata", limit: int = 20) - if not pending: return {"status": "no_pending", "kind": kind, "processed": 0, "results": []} + async def _run_once(cid: UUID) -> dict: + if kind == "metadata": + return await precedent_metadata_extractor.extract_and_apply(cid) + return await halacha_extractor.extract(cid) + results: list[dict] = [] processed = 0 - for row in pending: + for idx, row in enumerate(pending): + if idx > 0: + await asyncio.sleep(INTER_PRECEDENT_COOLDOWN_SEC) cid = UUID(str(row["id"])) + attempts = 0 + result: dict = {} try: - if kind == "metadata": - result = await precedent_metadata_extractor.extract_and_apply(cid) - else: - result = await halacha_extractor.extract(cid) + result = await _run_once(cid) + # Retry only on systematic extraction failure (rate-limit storm). + # Don't retry on 'no_halachot' — that means Claude looked and + # genuinely found nothing. + while ( + result.get("status") == "extraction_failed" + and attempts < PRECEDENT_RETRY_ATTEMPTS + ): + attempts += 1 + logger.warning( + "process_pending_extractions: %s returned extraction_failed " + "(%d/%d chunks crashed), retry %d/%d after %ds cooldown", + cid, + result.get("failed_chunks", 0), + result.get("total_chunks", 0), + attempts, PRECEDENT_RETRY_ATTEMPTS, + PRECEDENT_RETRY_COOLDOWN_SEC, + ) + await asyncio.sleep(PRECEDENT_RETRY_COOLDOWN_SEC) + result = await _run_once(cid) + + # Finalise: success or terminal failure both clear the request + # so the queue moves on. (Use 'failed' DB state for terminal + # extraction_failed so the UI shows the warning chip.) + if kind == "halacha" and result.get("status") == "extraction_failed": + await db.set_case_law_halacha_status(cid, "failed") await db.clear_extraction_request(cid, kind=kind) processed += 1 results.append({ @@ -309,6 +361,7 @@ async def process_pending_extractions(kind: str = "metadata", limit: int = 20) - "status": result.get("status", "unknown"), "fields": result.get("fields", []), "stored": result.get("stored", 0), + "retry_attempts": attempts, }) except Exception as e: logger.exception("process_pending_extractions failed for %s: %s", cid, e) @@ -317,6 +370,7 @@ async def process_pending_extractions(kind: str = "metadata", limit: int = 20) - "case_number": row.get("case_number", ""), "status": "failed", "error": str(e), + "retry_attempts": attempts, }) # Don't clear the request — it stays for the next run.