fix(halacha): rate-limit refusal ≠ empty answer — לא checkpoint chunk בכשל (#144)

תיקון-ליבה (b): כש-claude CLI מחזיר exit=0 עם הודעת-מגבלה/שגיאה כ-result, query זיהה אותה כהצלחה → _extract_chunk קיבל []/non-list וסימן chunk כ-done-ריק; resume דילג עליו לתמיד → תת-חילוץ קבוע (3→1→0). עכשיו is_error/_looks_like_limit_notice הופכים אותה לכשל-חולף → retry → raise → chunk נשאר un-checkpointed → resume משחזר (כך force-delete כבר לא הרסני-לצמיתות). + churn-detect במתזמר (Δdone<0 / Δhal<-2 → אזהרה+churn_ok ב-JSON). + scripts/reconcile_under_extracted_halacha.py — שחזור completed-עם-0-הלכות-ו≥3 מקטעי-נימוק (dry-run הראה 15 מועמדים); נתיב-הזמנה קנוני (G2), שמרני (לא remand). הערה: אטומיות-מלאה (staging_run_id) נדחתה — PR #257 מיתן את ה-trigger, ו-(b)+resume מונעים אובדן-קבוע (force-delete מתאושש דרך resume). בדיקות: test_claude_session_limit_notice. כל 354 עוברות. guards נקיים. Invariants: G1, INV-G3/X16 (checkpoint=הושלם-באמת), INV-G4 (churn לא-שקט), G12. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-15 04:21:15 +00:00
parent 9e45e5a46d
commit 6cc100f9f8
5 changed files with 190 additions and 4 deletions
--- a/mcp-server/src/legal_mcp/services/claude_session.py
+++ b/mcp-server/src/legal_mcp/services/claude_session.py
@@ -60,6 +60,31 @@ _SESSION_MARKER_EXACT = frozenset({"AI_AGENT", "CLAUDE_EFFORT"})
 MAX_RETRIES = 3
 RETRY_BACKOFF_BASE = 5  # seconds; sleep = base * attempt_number

+# Phrases the CLI emits as the "result" of an exit-0 run that actually hit a
+# usage/rate limit (a refusal NOTICE, not a real answer). Matched only against a
+# result that is NOT structured output (doesn't start with [ or {), so a genuine
+# JSON extraction containing these words as content is never mis-flagged.
+_LIMIT_NOTICE_MARKERS = (
+    "usage limit",
+    "rate limit",
+    "limit reached",
+    "limit will reset",
+    "try again later",
+)
+
+
+def _looks_like_limit_notice(data: dict) -> bool:
+    """True if an exit-0 ``result`` is really a usage/rate-limit refusal."""
+    result = data.get("result")
+    if not isinstance(result, str):
+        return False
+    stripped = result.lstrip()
+    # Structured output (JSON array/object) is a real answer, never a notice.
+    if stripped.startswith(("[", "{")):
+        return False
+    low = result.lower()
+    return any(m in low for m in _LIMIT_NOTICE_MARKERS)
+

 def _clean_subprocess_env() -> dict[str, str]:
    """Copy the current env minus Claude Code session markers.
@@ -187,11 +212,27 @@ async def query(
                try:
                    data = json.loads(stdout)
                    if isinstance(data, dict) and "result" in data:
-                        return data["result"]
-                    return stdout
+                        # A usage/rate-limit hit can exit 0 with a refusal NOTICE
+                        # as the "result" (is_error / error subtype). Returning it
+                        # as success makes callers treat a throttled run as a real
+                        # empty answer — e.g. the halacha extractor then checkpoints
+                        # the chunk as done-with-0-halachot and a resume skips it
+                        # forever (#138/#144 silent under-extraction). Treat it as a
+                        # transient failure → retry, and raise if it persists so the
+                        # chunk stays un-checkpointed for a real resume.
+                        if data.get("is_error") or _looks_like_limit_notice(data):
+                            last_err = (
+                                f"error result (subtype={data.get('subtype')}): "
+                                f"{str(data.get('result',''))[:200]}"
+                            )
+                        else:
+                            return data["result"]
+                    else:
+                        return stdout
                except json.JSONDecodeError:
                    return stdout
-            last_err = "empty response"
+            else:
+                last_err = "empty response"

        # Transient failure — retry with linear backoff unless this was the last try.
        if attempt < MAX_RETRIES:
--- a/mcp-server/tests/test_claude_session_limit_notice.py
+++ b/mcp-server/tests/test_claude_session_limit_notice.py
@@ -0,0 +1,40 @@
+"""Regression test for #144 — a usage/rate-limit refusal returned with exit 0
+must NOT be treated as a real (empty) answer.
+
+``_looks_like_limit_notice`` recognises a prose refusal in the CLI's exit-0
+``result`` so ``query`` raises instead of returning it — keeping the halacha
+extractor from checkpointing the chunk as done-with-0-halachot (silent
+under-extraction). A genuine JSON result (array/object) is never mis-flagged.
+"""
+
+from __future__ import annotations
+
+from legal_mcp.services import claude_session as cs
+
+
+def test_detects_usage_limit_prose():
+    assert cs._looks_like_limit_notice(
+        {"result": "Claude usage limit reached. Try again later."})
+    assert cs._looks_like_limit_notice(
+        {"result": "You've hit the rate limit for this model."})
+
+
+def test_json_array_result_is_not_a_notice():
+    # A real halacha extraction — must pass through as a valid answer.
+    assert not cs._looks_like_limit_notice(
+        {"result": '[{"rule_statement": "the usage limit of a permit ..."}]'})
+
+
+def test_json_object_result_is_not_a_notice():
+    assert not cs._looks_like_limit_notice(
+        {"result": '{"summary": "rate limit doctrine"}'})
+
+
+def test_plain_answer_without_markers_is_not_a_notice():
+    assert not cs._looks_like_limit_notice({"result": "אין הלכות בקטע זה."})
+
+
+def test_missing_or_nonstring_result():
+    assert not cs._looks_like_limit_notice({})
+    assert not cs._looks_like_limit_notice({"result": None})
+    assert not cs._looks_like_limit_notice({"result": 42})