fix: prevent write_interim_draft context overflow (465K → ≤300K chars)

Two bugs caused all 5 interim blocks to fail with "Claude CLI failed
(exit 1): unknown error":

1. source_context was embedded BOTH inside the prompt template (via
   {source_context}) AND prepended again in write_block — doubling every
   block's context size (232K chars × 2 = 465K chars).

2. _build_source_context loaded all 9 case documents for every block
   regardless of relevance.

Fixes:
- Remove the duplicate source_context prepend in write_block; the
  template already contains it via {source_context}
- Add per-block document filtering (_BLOCK_DOC_TYPES): block-he/zayin →
  empty, block-chet → protocol only, block-tet → appraisals only
- Add 400K char guard before calling claude -p with a descriptive error
  (vs opaque "exit 1: unknown error")
- Add prompt-size warning and size info in claude_session error messages

Result: block-he 0 chars, block-zayin 0 chars, block-vav ~172K,
block-chet ~45K, block-tet ~300K (all under 400K limit)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-10 10:49:47 +00:00
parent 10a63fb9e0
commit a9cd8aeb12
2 changed files with 43 additions and 11 deletions

View File

@@ -360,13 +360,9 @@ async def write_block(
post_hearing_context=post_hearing_context, post_hearing_context=post_hearing_context,
) )
# Restructure: sources first, then instructions # source_context is already embedded inside formatted_prompt via {source_context} in the
prompt = ( # template. Do NOT prepend it again — doing so doubles the prompt size (was 465K chars).
f"## חומרי מקור (מסמכים מלאים — צטט מהם מילה במילה כשאפשר):\n\n" prompt = formatted_prompt
f"{source_context}\n\n"
f"---\n\n"
f"{formatted_prompt}"
)
if instructions: if instructions:
prompt += f"\n\n## הנחיות נוספות:\n{instructions}" prompt += f"\n\n## הנחיות נוספות:\n{instructions}"
@@ -377,6 +373,19 @@ async def write_block(
if not dir_doc.get("approved"): if not dir_doc.get("approved"):
raise ValueError("לא ניתן לכתוב בלוק דיון ללא כיוון מאושר. הפעל brainstorm → approve_direction קודם.") raise ValueError("לא ניתן לכתוב בלוק דיון ללא כיוון מאושר. הפעל brainstorm → approve_direction קודם.")
# Guard against context overflow before calling claude -p.
# Sonnet: 200K context → ~800K chars max; Opus: 200K context → same.
# In practice the CLI has crashed on prompts above ~400K chars, so use
# that as a conservative ceiling (well below the token limit).
_MAX_PROMPT_CHARS = 400_000
if len(prompt) > _MAX_PROMPT_CHARS:
raise RuntimeError(
f"Prompt too large for {block_id}: {len(prompt):,} chars "
f"(limit {_MAX_PROMPT_CHARS:,}). "
f"source_context: {len(source_context):,} chars. "
f"Reduce documents or call extract_appraiser_facts first."
)
# Call Claude via Claude Code session (no API) # Call Claude via Claude Code session (no API)
model_key = block_cfg["model"] model_key = block_cfg["model"]
timeout = claude_session.LONG_TIMEOUT if model_key == "opus" else claude_session.DEFAULT_TIMEOUT timeout = claude_session.LONG_TIMEOUT if model_key == "opus" else claude_session.DEFAULT_TIMEOUT
@@ -414,16 +423,35 @@ def _build_case_context(case: dict, decision: dict | None) -> str:
- תוצאה: {outcome_heb}""" - תוצאה: {outcome_heb}"""
# Which doc_types are relevant per block.
# None → skip source docs entirely (block uses other context, e.g. claims_context)
# [] → include all doc types (default for unspecified blocks)
# [..] → include only the listed doc_type values
_BLOCK_DOC_TYPES: dict[str, list[str] | None] = {
"block-he": None, # only case_context needed; no full docs
"block-vav": ["appeal", "protocol"], # כתב ערר + פרוטוקול ועדה
"block-zayin": None, # claims_context is sufficient
"block-chet": ["protocol"], # פרוטוקול + השלמות טיעון
"block-tet": ["appraisal"], # שומות בלבד
# block-yod, block-yod-alef, block-he etc. default → all docs
}
async def _build_source_context(case_id: UUID, block_id: str) -> str: async def _build_source_context(case_id: UUID, block_id: str) -> str:
"""Get full document texts for the block. """Get document texts for the block, filtered by relevance.
Per Anthropic best practices: send full source documents, not truncated excerpts. Per Anthropic best practices: send full source documents, not truncated excerpts.
Place documents at the TOP of the prompt (before instructions) for 30% better recall. Per-block filtering prevents context overflow on large cases (9+ docs).
For grounding: instruct Claude to cite word-for-word from these documents.
""" """
allowed = _BLOCK_DOC_TYPES.get(block_id, []) # [] sentinel = not in map → all docs
if allowed is None:
return "" # this block doesn't need raw source docs
docs = await db.list_documents(case_id) docs = await db.list_documents(case_id)
context_parts = [] context_parts = []
for doc in docs: for doc in docs:
if allowed and doc["doc_type"] not in allowed:
continue
text = await db.get_document_text(UUID(doc["id"])) text = await db.get_document_text(UUID(doc["id"]))
if text: if text:
context_parts.append(f"--- מסמך: {doc['title']} ({doc['doc_type']}) ---\n{text}") context_parts.append(f"--- מסמך: {doc['title']} ({doc['doc_type']}) ---\n{text}")

View File

@@ -72,6 +72,9 @@ async def query(
""" """
full_prompt = f"{system}\n\n{prompt}" if system else prompt full_prompt = f"{system}\n\n{prompt}" if system else prompt
if len(full_prompt) > 150_000:
logger.warning("Large prompt: %d chars — may hit context limits", len(full_prompt))
cmd = [ cmd = [
"claude", "-p", "claude", "-p",
"--output-format", "json", "--output-format", "json",
@@ -110,7 +113,8 @@ async def query(
if proc.returncode != 0: if proc.returncode != 0:
stderr = stderr_b.decode("utf-8", errors="replace").strip()[:500] or "unknown error" stderr = stderr_b.decode("utf-8", errors="replace").strip()[:500] or "unknown error"
raise RuntimeError(f"Claude CLI failed (exit {proc.returncode}): {stderr}") size_info = f"; prompt_len={len(full_prompt):,} chars" if len(full_prompt) > 100_000 else ""
raise RuntimeError(f"Claude CLI failed (exit {proc.returncode}): {stderr}{size_info}")
stdout = stdout_b.decode("utf-8", errors="replace").strip() stdout = stdout_b.decode("utf-8", errors="replace").strip()
if not stdout: if not stdout: