fix(halacha): rate-limit refusal ≠ empty answer — לא checkpoint chunk בכשל (#144)
תיקון-ליבה (b): כש-claude CLI מחזיר exit=0 עם הודעת-מגבלה/שגיאה כ-result, query זיהה אותה כהצלחה → _extract_chunk קיבל []/non-list וסימן chunk כ-done-ריק; resume דילג עליו לתמיד → תת-חילוץ קבוע (3→1→0). עכשיו is_error/_looks_like_limit_notice הופכים אותה לכשל-חולף → retry → raise → chunk נשאר un-checkpointed → resume משחזר (כך force-delete כבר לא הרסני-לצמיתות). + churn-detect במתזמר (Δdone<0 / Δhal<-2 → אזהרה+churn_ok ב-JSON). + scripts/reconcile_under_extracted_halacha.py — שחזור completed-עם-0-הלכות-ו≥3 מקטעי-נימוק (dry-run הראה 15 מועמדים); נתיב-הזמנה קנוני (G2), שמרני (לא remand). הערה: אטומיות-מלאה (staging_run_id) נדחתה — PR #257 מיתן את ה-trigger, ו-(b)+resume מונעים אובדן-קבוע (force-delete מתאושש דרך resume). בדיקות: test_claude_session_limit_notice. כל 354 עוברות. guards נקיים. Invariants: G1, INV-G3/X16 (checkpoint=הושלם-באמת), INV-G4 (churn לא-שקט), G12. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -60,6 +60,31 @@ _SESSION_MARKER_EXACT = frozenset({"AI_AGENT", "CLAUDE_EFFORT"})
|
||||
MAX_RETRIES = 3
|
||||
RETRY_BACKOFF_BASE = 5 # seconds; sleep = base * attempt_number
|
||||
|
||||
# Phrases the CLI emits as the "result" of an exit-0 run that actually hit a
|
||||
# usage/rate limit (a refusal NOTICE, not a real answer). Matched only against a
|
||||
# result that is NOT structured output (doesn't start with [ or {), so a genuine
|
||||
# JSON extraction containing these words as content is never mis-flagged.
|
||||
_LIMIT_NOTICE_MARKERS = (
|
||||
"usage limit",
|
||||
"rate limit",
|
||||
"limit reached",
|
||||
"limit will reset",
|
||||
"try again later",
|
||||
)
|
||||
|
||||
|
||||
def _looks_like_limit_notice(data: dict) -> bool:
|
||||
"""True if an exit-0 ``result`` is really a usage/rate-limit refusal."""
|
||||
result = data.get("result")
|
||||
if not isinstance(result, str):
|
||||
return False
|
||||
stripped = result.lstrip()
|
||||
# Structured output (JSON array/object) is a real answer, never a notice.
|
||||
if stripped.startswith(("[", "{")):
|
||||
return False
|
||||
low = result.lower()
|
||||
return any(m in low for m in _LIMIT_NOTICE_MARKERS)
|
||||
|
||||
|
||||
def _clean_subprocess_env() -> dict[str, str]:
|
||||
"""Copy the current env minus Claude Code session markers.
|
||||
@@ -187,11 +212,27 @@ async def query(
|
||||
try:
|
||||
data = json.loads(stdout)
|
||||
if isinstance(data, dict) and "result" in data:
|
||||
return data["result"]
|
||||
return stdout
|
||||
# A usage/rate-limit hit can exit 0 with a refusal NOTICE
|
||||
# as the "result" (is_error / error subtype). Returning it
|
||||
# as success makes callers treat a throttled run as a real
|
||||
# empty answer — e.g. the halacha extractor then checkpoints
|
||||
# the chunk as done-with-0-halachot and a resume skips it
|
||||
# forever (#138/#144 silent under-extraction). Treat it as a
|
||||
# transient failure → retry, and raise if it persists so the
|
||||
# chunk stays un-checkpointed for a real resume.
|
||||
if data.get("is_error") or _looks_like_limit_notice(data):
|
||||
last_err = (
|
||||
f"error result (subtype={data.get('subtype')}): "
|
||||
f"{str(data.get('result',''))[:200]}"
|
||||
)
|
||||
else:
|
||||
return data["result"]
|
||||
else:
|
||||
return stdout
|
||||
except json.JSONDecodeError:
|
||||
return stdout
|
||||
last_err = "empty response"
|
||||
else:
|
||||
last_err = "empty response"
|
||||
|
||||
# Transient failure — retry with linear backoff unless this was the last try.
|
||||
if attempt < MAX_RETRIES:
|
||||
|
||||
40
mcp-server/tests/test_claude_session_limit_notice.py
Normal file
40
mcp-server/tests/test_claude_session_limit_notice.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""Regression test for #144 — a usage/rate-limit refusal returned with exit 0
|
||||
must NOT be treated as a real (empty) answer.
|
||||
|
||||
``_looks_like_limit_notice`` recognises a prose refusal in the CLI's exit-0
|
||||
``result`` so ``query`` raises instead of returning it — keeping the halacha
|
||||
extractor from checkpointing the chunk as done-with-0-halachot (silent
|
||||
under-extraction). A genuine JSON result (array/object) is never mis-flagged.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from legal_mcp.services import claude_session as cs
|
||||
|
||||
|
||||
def test_detects_usage_limit_prose():
|
||||
assert cs._looks_like_limit_notice(
|
||||
{"result": "Claude usage limit reached. Try again later."})
|
||||
assert cs._looks_like_limit_notice(
|
||||
{"result": "You've hit the rate limit for this model."})
|
||||
|
||||
|
||||
def test_json_array_result_is_not_a_notice():
|
||||
# A real halacha extraction — must pass through as a valid answer.
|
||||
assert not cs._looks_like_limit_notice(
|
||||
{"result": '[{"rule_statement": "the usage limit of a permit ..."}]'})
|
||||
|
||||
|
||||
def test_json_object_result_is_not_a_notice():
|
||||
assert not cs._looks_like_limit_notice(
|
||||
{"result": '{"summary": "rate limit doctrine"}'})
|
||||
|
||||
|
||||
def test_plain_answer_without_markers_is_not_a_notice():
|
||||
assert not cs._looks_like_limit_notice({"result": "אין הלכות בקטע זה."})
|
||||
|
||||
|
||||
def test_missing_or_nonstring_result():
|
||||
assert not cs._looks_like_limit_notice({})
|
||||
assert not cs._looks_like_limit_notice({"result": None})
|
||||
assert not cs._looks_like_limit_notice({"result": 42})
|
||||
Reference in New Issue
Block a user