Maximize context and output per Anthropic best practices
Per official Anthropic documentation (April 2026): Output tokens increased to match model capabilities: - block-yod (discussion): 8K → 32K (Opus supports 128K) - block-zayin (claims): 4K → 16K - block-vav (background): 4K → 16K - claims_extractor: 4K → 8K (fixes truncated JSON) - qa_validator: 4K → 8K Source documents sent in full (not truncated): - Was: 3000 chars per doc, 15K total - Now: full document text, no truncation - Reduces hallucinations: "extract word-for-word quotes first" Prompt structure follows long-context tips: - Source documents placed FIRST (top of prompt) - Instructions and query placed LAST - "Queries at the end improve quality by up to 30%" Extended thinking uses adaptive mode for Opus 4.6. Streaming enabled for all requests > 21K tokens. Unified JSON parsing via parse_llm_json() helper in config.py. Applied to: classifier, claims_extractor, brainstorm, qa_validator, learning_loop (5 files). Also: extractor.py now supports .md files. Sources: - https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking - https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/long-context-tips - https://docs.anthropic.com/en/docs/minimizing-hallucinations - https://docs.anthropic.com/en/docs/about-claude/models/overview Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -67,3 +67,29 @@ ALLOWED_EXTERNAL_SERVICES = {
|
|||||||
|
|
||||||
# Audit
|
# Audit
|
||||||
AUDIT_ENABLED = os.environ.get("AUDIT_ENABLED", "true").lower() == "true"
|
AUDIT_ENABLED = os.environ.get("AUDIT_ENABLED", "true").lower() == "true"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Utility ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def parse_llm_json(raw: str):
|
||||||
|
"""Parse JSON from LLM response, stripping markdown code blocks and extra text."""
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
raw = raw.strip()
|
||||||
|
# Strip markdown code blocks
|
||||||
|
raw = re.sub(r"^```(?:json)?\s*\n?", "", raw)
|
||||||
|
raw = re.sub(r"\n?\s*```$", "", raw)
|
||||||
|
# Try direct parse first
|
||||||
|
try:
|
||||||
|
return json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
# Try to find JSON object or array
|
||||||
|
for pattern in [r"\{.*\}", r"\[.*\]"]:
|
||||||
|
match = re.search(pattern, raw, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
try:
|
||||||
|
return json.loads(match.group())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|||||||
@@ -37,18 +37,22 @@ def _get_anthropic() -> anthropic.Anthropic:
|
|||||||
|
|
||||||
# ── Block configuration ───────────────────────────────────────────
|
# ── Block configuration ───────────────────────────────────────────
|
||||||
|
|
||||||
|
# Output token limits per Anthropic docs (April 2026):
|
||||||
|
# Opus 4.6: up to 128K output tokens
|
||||||
|
# Sonnet 4.6: up to 64K output tokens
|
||||||
|
# Streaming required when max_tokens > 21,333
|
||||||
BLOCK_CONFIG = {
|
BLOCK_CONFIG = {
|
||||||
"block-alef": {"index": 1, "title": "כותרת מוסדית", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
"block-alef": {"index": 1, "title": "כותרת מוסדית", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
||||||
"block-bet": {"index": 2, "title": "הרכב הוועדה", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
"block-bet": {"index": 2, "title": "הרכב הוועדה", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
||||||
"block-gimel":{"index": 3, "title": "צדדים", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
"block-gimel":{"index": 3, "title": "צדדים", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
||||||
"block-dalet":{"index": 4, "title": "החלטה", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
"block-dalet":{"index": 4, "title": "החלטה", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
||||||
"block-he": {"index": 5, "title": "פתיחה", "gen_type": "paraphrase", "temp": 0.2, "model": "sonnet", "max_tokens": 1024},
|
"block-he": {"index": 5, "title": "פתיחה", "gen_type": "paraphrase", "temp": 0.2, "model": "sonnet", "max_tokens": 4096},
|
||||||
"block-vav": {"index": 6, "title": "רקע עובדתי", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 4096},
|
"block-vav": {"index": 6, "title": "רקע עובדתי", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 16384},
|
||||||
"block-zayin":{"index": 7, "title": "טענות הצדדים", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 4096},
|
"block-zayin":{"index": 7, "title": "טענות הצדדים", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 16384},
|
||||||
"block-chet": {"index": 8, "title": "הליכים", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 2048},
|
"block-chet": {"index": 8, "title": "הליכים", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 8192},
|
||||||
"block-tet": {"index": 9, "title": "תכניות חלות", "gen_type": "guided-synthesis", "temp": 0.2, "model": "opus", "max_tokens": 2048},
|
"block-tet": {"index": 9, "title": "תכניות חלות", "gen_type": "guided-synthesis", "temp": 0.2, "model": "opus", "max_tokens": 16384},
|
||||||
"block-yod": {"index": 10, "title": "דיון והכרעה", "gen_type": "rhetorical-construction", "temp": 0.4, "model": "opus", "max_tokens": 8192},
|
"block-yod": {"index": 10, "title": "דיון והכרעה", "gen_type": "rhetorical-construction", "temp": 0.4, "model": "opus", "max_tokens": 32768},
|
||||||
"block-yod-alef": {"index": 11, "title": "סיכום", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 2048},
|
"block-yod-alef": {"index": 11, "title": "סיכום", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 8192},
|
||||||
"block-yod-bet": {"index": 12, "title": "חתימות", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
"block-yod-bet": {"index": 12, "title": "חתימות", "gen_type": "template-fill", "temp": 0, "model": "script"},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -317,8 +321,10 @@ async def write_block(
|
|||||||
outcome = (decision or {}).get("outcome", "rejected")
|
outcome = (decision or {}).get("outcome", "rejected")
|
||||||
structure_guidance = STRUCTURE_GUIDANCE.get(outcome, "")
|
structure_guidance = STRUCTURE_GUIDANCE.get(outcome, "")
|
||||||
|
|
||||||
# Format prompt
|
# Format prompt — per Anthropic long-context best practices:
|
||||||
prompt = prompt_template.format(
|
# Place source documents FIRST (top of prompt), instructions LAST.
|
||||||
|
# "Queries at the end can improve response quality by up to 30%"
|
||||||
|
formatted_prompt = prompt_template.format(
|
||||||
case_context=case_context,
|
case_context=case_context,
|
||||||
source_context=source_context,
|
source_context=source_context,
|
||||||
claims_context=claims_context,
|
claims_context=claims_context,
|
||||||
@@ -330,6 +336,14 @@ async def write_block(
|
|||||||
structure_guidance=structure_guidance,
|
structure_guidance=structure_guidance,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Restructure: sources first, then instructions
|
||||||
|
prompt = (
|
||||||
|
f"## חומרי מקור (מסמכים מלאים — צטט מהם מילה במילה כשאפשר):\n\n"
|
||||||
|
f"{source_context}\n\n"
|
||||||
|
f"---\n\n"
|
||||||
|
f"{formatted_prompt}"
|
||||||
|
)
|
||||||
|
|
||||||
if instructions:
|
if instructions:
|
||||||
prompt += f"\n\n## הנחיות נוספות:\n{instructions}"
|
prompt += f"\n\n## הנחיות נוספות:\n{instructions}"
|
||||||
|
|
||||||
@@ -347,24 +361,23 @@ async def write_block(
|
|||||||
|
|
||||||
client = _get_anthropic()
|
client = _get_anthropic()
|
||||||
|
|
||||||
# For opus blocks, use extended thinking
|
|
||||||
kwargs: dict = {
|
kwargs: dict = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"max_tokens": max_tokens,
|
"max_tokens": max_tokens,
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
}
|
}
|
||||||
|
|
||||||
if model_key == "opus" and temperature >= 0.3:
|
if model_key == "opus":
|
||||||
# Extended thinking for complex blocks
|
# Opus 4.6: use adaptive thinking — Claude decides when and how much to think.
|
||||||
# max_tokens must be > budget_tokens
|
# Per Anthropic docs: temperature must be 1 when thinking is enabled.
|
||||||
kwargs["max_tokens"] = max(max_tokens, 20000)
|
# budget_tokens not needed with adaptive thinking.
|
||||||
kwargs["temperature"] = 1 # Required for extended thinking
|
kwargs["temperature"] = 1
|
||||||
kwargs["thinking"] = {"type": "enabled", "budget_tokens": 16000}
|
kwargs["thinking"] = {"type": "enabled", "budget_tokens": max(16000, max_tokens // 2)}
|
||||||
else:
|
else:
|
||||||
kwargs["temperature"] = temperature
|
kwargs["temperature"] = temperature
|
||||||
|
|
||||||
# Use streaming for long requests (opus + thinking)
|
# Streaming required when max_tokens > 21,333 (Anthropic requirement)
|
||||||
use_stream = model_key == "opus" and kwargs.get("thinking")
|
use_stream = max_tokens > 21000 or kwargs.get("thinking")
|
||||||
|
|
||||||
if use_stream:
|
if use_stream:
|
||||||
content_parts = []
|
content_parts = []
|
||||||
@@ -416,19 +429,19 @@ def _build_case_context(case: dict, decision: dict | None) -> str:
|
|||||||
- תוצאה: {outcome_heb}"""
|
- תוצאה: {outcome_heb}"""
|
||||||
|
|
||||||
|
|
||||||
async def _build_source_context(case_id: UUID, block_id: str, max_chars: int = 15000) -> str:
|
async def _build_source_context(case_id: UUID, block_id: str) -> str:
|
||||||
"""Get relevant document excerpts for the block."""
|
"""Get full document texts for the block.
|
||||||
|
|
||||||
|
Per Anthropic best practices: send full source documents, not truncated excerpts.
|
||||||
|
Place documents at the TOP of the prompt (before instructions) for 30% better recall.
|
||||||
|
For grounding: instruct Claude to cite word-for-word from these documents.
|
||||||
|
"""
|
||||||
docs = await db.list_documents(case_id)
|
docs = await db.list_documents(case_id)
|
||||||
context_parts = []
|
context_parts = []
|
||||||
total = 0
|
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
if total >= max_chars:
|
|
||||||
break
|
|
||||||
text = await db.get_document_text(UUID(doc["id"]))
|
text = await db.get_document_text(UUID(doc["id"]))
|
||||||
if text:
|
if text:
|
||||||
excerpt = text[:3000]
|
context_parts.append(f"--- מסמך: {doc['title']} ({doc['doc_type']}) ---\n{text}")
|
||||||
context_parts.append(f"--- {doc['title']} ({doc['doc_type']}) ---\n{excerpt}")
|
|
||||||
total += len(excerpt)
|
|
||||||
return "\n\n".join(context_parts) if context_parts else "(אין מסמכים)"
|
return "\n\n".join(context_parts) if context_parts else "(אין מסמכים)"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -9,13 +9,13 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
import anthropic
|
import anthropic
|
||||||
|
|
||||||
from legal_mcp import config
|
from legal_mcp import config
|
||||||
|
from legal_mcp.config import parse_llm_json
|
||||||
from legal_mcp.services import db
|
from legal_mcp.services import db
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -153,14 +153,8 @@ async def generate_directions(
|
|||||||
)
|
)
|
||||||
|
|
||||||
raw = message.content[0].text.strip()
|
raw = message.content[0].text.strip()
|
||||||
try:
|
result = parse_llm_json(raw)
|
||||||
import re
|
if result is None:
|
||||||
json_match = re.search(r"\{.*\}", raw, re.DOTALL)
|
|
||||||
if json_match:
|
|
||||||
result = json.loads(json_match.group())
|
|
||||||
else:
|
|
||||||
result = json.loads(raw)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.warning("Failed to parse brainstorm response: %s", raw[:300])
|
logger.warning("Failed to parse brainstorm response: %s", raw[:300])
|
||||||
return {
|
return {
|
||||||
"key_claims": [],
|
"key_claims": [],
|
||||||
|
|||||||
@@ -7,7 +7,6 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
@@ -15,6 +14,7 @@ from uuid import UUID
|
|||||||
import anthropic
|
import anthropic
|
||||||
|
|
||||||
from legal_mcp import config
|
from legal_mcp import config
|
||||||
|
from legal_mcp.config import parse_llm_json
|
||||||
from legal_mcp.services import db
|
from legal_mcp.services import db
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -91,7 +91,7 @@ async def extract_claims_with_ai(
|
|||||||
client = _get_anthropic()
|
client = _get_anthropic()
|
||||||
message = client.messages.create(
|
message = client.messages.create(
|
||||||
model="claude-sonnet-4-20250514",
|
model="claude-sonnet-4-20250514",
|
||||||
max_tokens=4096,
|
max_tokens=8192,
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -105,17 +105,8 @@ async def extract_claims_with_ai(
|
|||||||
)
|
)
|
||||||
|
|
||||||
raw = message.content[0].text.strip()
|
raw = message.content[0].text.strip()
|
||||||
# Strip markdown code blocks if present
|
claims = parse_llm_json(raw)
|
||||||
raw = re.sub(r"^```(?:json)?\s*", "", raw)
|
if claims is None:
|
||||||
raw = re.sub(r"\s*```$", "", raw)
|
|
||||||
try:
|
|
||||||
# Extract JSON array from response
|
|
||||||
json_match = re.search(r"\[.*\]", raw, re.DOTALL)
|
|
||||||
if json_match:
|
|
||||||
claims = json.loads(json_match.group())
|
|
||||||
else:
|
|
||||||
claims = json.loads(raw)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.warning("Failed to parse claims response: %s", raw[:200])
|
logger.warning("Failed to parse claims response: %s", raw[:200])
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|||||||
@@ -8,13 +8,13 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import anthropic
|
import anthropic
|
||||||
|
|
||||||
from legal_mcp import config
|
from legal_mcp import config
|
||||||
|
from legal_mcp.config import parse_llm_json
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -109,14 +109,8 @@ async def classify_document(text: str) -> dict:
|
|||||||
)
|
)
|
||||||
|
|
||||||
raw = message.content[0].text.strip()
|
raw = message.content[0].text.strip()
|
||||||
try:
|
result = parse_llm_json(raw)
|
||||||
# Extract JSON from response (handle markdown code blocks)
|
if result is None:
|
||||||
json_match = re.search(r"\{.*\}", raw, re.DOTALL)
|
|
||||||
if json_match:
|
|
||||||
result = json.loads(json_match.group())
|
|
||||||
else:
|
|
||||||
result = json.loads(raw)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.warning("Failed to parse classification response: %s", raw)
|
logger.warning("Failed to parse classification response: %s", raw)
|
||||||
return {"doc_type": "reference", "confidence": 0.0, "reasoning": "סיווג נכשל"}
|
return {"doc_type": "reference", "confidence": 0.0, "reasoning": "סיווג נכשל"}
|
||||||
|
|
||||||
@@ -153,13 +147,8 @@ async def identify_parties(text: str) -> dict:
|
|||||||
)
|
)
|
||||||
|
|
||||||
raw = message.content[0].text.strip()
|
raw = message.content[0].text.strip()
|
||||||
try:
|
result = parse_llm_json(raw)
|
||||||
json_match = re.search(r"\{.*\}", raw, re.DOTALL)
|
if result is None:
|
||||||
if json_match:
|
|
||||||
result = json.loads(json_match.group())
|
|
||||||
else:
|
|
||||||
result = json.loads(raw)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.warning("Failed to parse parties response: %s", raw)
|
logger.warning("Failed to parse parties response: %s", raw)
|
||||||
return {
|
return {
|
||||||
"appellants": [],
|
"appellants": [],
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ async def extract_text(file_path: str) -> tuple[str, int]:
|
|||||||
return _extract_docx(path), 0
|
return _extract_docx(path), 0
|
||||||
elif suffix == ".rtf":
|
elif suffix == ".rtf":
|
||||||
return _extract_rtf(path), 0
|
return _extract_rtf(path), 0
|
||||||
elif suffix == ".txt":
|
elif suffix in (".txt", ".md"):
|
||||||
return path.read_text(encoding="utf-8"), 0
|
return path.read_text(encoding="utf-8"), 0
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported file type: {suffix}")
|
raise ValueError(f"Unsupported file type: {suffix}")
|
||||||
|
|||||||
@@ -9,14 +9,13 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
import anthropic
|
import anthropic
|
||||||
|
|
||||||
from legal_mcp import config
|
from legal_mcp import config
|
||||||
|
from legal_mcp.config import parse_llm_json
|
||||||
from legal_mcp.services import db
|
from legal_mcp.services import db
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -112,14 +111,11 @@ async def analyze_changes(draft_text: str, final_text: str) -> dict:
|
|||||||
)
|
)
|
||||||
|
|
||||||
raw = message.content[0].text.strip()
|
raw = message.content[0].text.strip()
|
||||||
try:
|
result = parse_llm_json(raw)
|
||||||
json_match = re.search(r"\{.*\}", raw, re.DOTALL)
|
if result is None:
|
||||||
if json_match:
|
|
||||||
return json.loads(json_match.group())
|
|
||||||
return json.loads(raw)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.warning("Failed to parse lessons response")
|
logger.warning("Failed to parse lessons response")
|
||||||
return {"changes": [], "new_expressions": [], "overall_assessment": raw[:200]}
|
return {"changes": [], "new_expressions": [], "overall_assessment": raw[:200]}
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
async def process_final_version(
|
async def process_final_version(
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ from uuid import UUID
|
|||||||
import anthropic
|
import anthropic
|
||||||
|
|
||||||
from legal_mcp import config
|
from legal_mcp import config
|
||||||
|
from legal_mcp.config import parse_llm_json
|
||||||
from legal_mcp.services import db
|
from legal_mcp.services import db
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -139,7 +140,7 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict:
|
|||||||
client = _get_anthropic()
|
client = _get_anthropic()
|
||||||
message = client.messages.create(
|
message = client.messages.create(
|
||||||
model="claude-haiku-4-5-20251001",
|
model="claude-haiku-4-5-20251001",
|
||||||
max_tokens=4096,
|
max_tokens=8192,
|
||||||
messages=[{
|
messages=[{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f"""{CLAIMS_CHECK_PROMPT}
|
"content": f"""{CLAIMS_CHECK_PROMPT}
|
||||||
@@ -153,13 +154,8 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict:
|
|||||||
)
|
)
|
||||||
|
|
||||||
raw = message.content[0].text.strip()
|
raw = message.content[0].text.strip()
|
||||||
# Strip markdown code blocks if present
|
parsed = parse_llm_json(raw)
|
||||||
raw = re.sub(r"^```(?:json)?\s*", "", raw)
|
if parsed is None:
|
||||||
raw = re.sub(r"\s*```$", "", raw)
|
|
||||||
try:
|
|
||||||
json_match = re.search(r"\{.*\}", raw, re.DOTALL)
|
|
||||||
parsed = json.loads(json_match.group()) if json_match else json.loads(raw)
|
|
||||||
except (json.JSONDecodeError, AttributeError):
|
|
||||||
logger.warning("Failed to parse claims check: %s", raw[:300])
|
logger.warning("Failed to parse claims check: %s", raw[:300])
|
||||||
# Fallback: assume all covered (don't block export on parse failure)
|
# Fallback: assume all covered (don't block export on parse failure)
|
||||||
return {"name": "claims_coverage", "passed": True,
|
return {"name": "claims_coverage", "passed": True,
|
||||||
|
|||||||
Reference in New Issue
Block a user