diff --git a/mcp-server/src/legal_mcp/config.py b/mcp-server/src/legal_mcp/config.py index d5cd976..3b922de 100644 --- a/mcp-server/src/legal_mcp/config.py +++ b/mcp-server/src/legal_mcp/config.py @@ -67,3 +67,29 @@ ALLOWED_EXTERNAL_SERVICES = { # Audit AUDIT_ENABLED = os.environ.get("AUDIT_ENABLED", "true").lower() == "true" + + +# ── Utility ─────────────────────────────────────────────────────── + +def parse_llm_json(raw: str): + """Parse JSON from LLM response, stripping markdown code blocks and extra text.""" + import json + import re + raw = raw.strip() + # Strip markdown code blocks + raw = re.sub(r"^```(?:json)?\s*\n?", "", raw) + raw = re.sub(r"\n?\s*```$", "", raw) + # Try direct parse first + try: + return json.loads(raw) + except json.JSONDecodeError: + pass + # Try to find JSON object or array + for pattern in [r"\{.*\}", r"\[.*\]"]: + match = re.search(pattern, raw, re.DOTALL) + if match: + try: + return json.loads(match.group()) + except json.JSONDecodeError: + continue + return None diff --git a/mcp-server/src/legal_mcp/services/block_writer.py b/mcp-server/src/legal_mcp/services/block_writer.py index 6b49604..ce54b0c 100644 --- a/mcp-server/src/legal_mcp/services/block_writer.py +++ b/mcp-server/src/legal_mcp/services/block_writer.py @@ -37,18 +37,22 @@ def _get_anthropic() -> anthropic.Anthropic: # ── Block configuration ─────────────────────────────────────────── +# Output token limits per Anthropic docs (April 2026): +# Opus 4.6: up to 128K output tokens +# Sonnet 4.6: up to 64K output tokens +# Streaming required when max_tokens > 21,333 BLOCK_CONFIG = { "block-alef": {"index": 1, "title": "כותרת מוסדית", "gen_type": "template-fill", "temp": 0, "model": "script"}, "block-bet": {"index": 2, "title": "הרכב הוועדה", "gen_type": "template-fill", "temp": 0, "model": "script"}, "block-gimel":{"index": 3, "title": "צדדים", "gen_type": "template-fill", "temp": 0, "model": "script"}, "block-dalet":{"index": 4, "title": "החלטה", "gen_type": "template-fill", "temp": 0, "model": "script"}, - "block-he": {"index": 5, "title": "פתיחה", "gen_type": "paraphrase", "temp": 0.2, "model": "sonnet", "max_tokens": 1024}, - "block-vav": {"index": 6, "title": "רקע עובדתי", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 4096}, - "block-zayin":{"index": 7, "title": "טענות הצדדים", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 4096}, - "block-chet": {"index": 8, "title": "הליכים", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 2048}, - "block-tet": {"index": 9, "title": "תכניות חלות", "gen_type": "guided-synthesis", "temp": 0.2, "model": "opus", "max_tokens": 2048}, - "block-yod": {"index": 10, "title": "דיון והכרעה", "gen_type": "rhetorical-construction", "temp": 0.4, "model": "opus", "max_tokens": 8192}, - "block-yod-alef": {"index": 11, "title": "סיכום", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 2048}, + "block-he": {"index": 5, "title": "פתיחה", "gen_type": "paraphrase", "temp": 0.2, "model": "sonnet", "max_tokens": 4096}, + "block-vav": {"index": 6, "title": "רקע עובדתי", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 16384}, + "block-zayin":{"index": 7, "title": "טענות הצדדים", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 16384}, + "block-chet": {"index": 8, "title": "הליכים", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 8192}, + "block-tet": {"index": 9, "title": "תכניות חלות", "gen_type": "guided-synthesis", "temp": 0.2, "model": "opus", "max_tokens": 16384}, + "block-yod": {"index": 10, "title": "דיון והכרעה", "gen_type": "rhetorical-construction", "temp": 0.4, "model": "opus", "max_tokens": 32768}, + "block-yod-alef": {"index": 11, "title": "סיכום", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 8192}, "block-yod-bet": {"index": 12, "title": "חתימות", "gen_type": "template-fill", "temp": 0, "model": "script"}, } @@ -317,8 +321,10 @@ async def write_block( outcome = (decision or {}).get("outcome", "rejected") structure_guidance = STRUCTURE_GUIDANCE.get(outcome, "") - # Format prompt - prompt = prompt_template.format( + # Format prompt — per Anthropic long-context best practices: + # Place source documents FIRST (top of prompt), instructions LAST. + # "Queries at the end can improve response quality by up to 30%" + formatted_prompt = prompt_template.format( case_context=case_context, source_context=source_context, claims_context=claims_context, @@ -330,6 +336,14 @@ async def write_block( structure_guidance=structure_guidance, ) + # Restructure: sources first, then instructions + prompt = ( + f"## חומרי מקור (מסמכים מלאים — צטט מהם מילה במילה כשאפשר):\n\n" + f"{source_context}\n\n" + f"---\n\n" + f"{formatted_prompt}" + ) + if instructions: prompt += f"\n\n## הנחיות נוספות:\n{instructions}" @@ -347,24 +361,23 @@ async def write_block( client = _get_anthropic() - # For opus blocks, use extended thinking kwargs: dict = { "model": model, "max_tokens": max_tokens, "messages": [{"role": "user", "content": prompt}], } - if model_key == "opus" and temperature >= 0.3: - # Extended thinking for complex blocks - # max_tokens must be > budget_tokens - kwargs["max_tokens"] = max(max_tokens, 20000) - kwargs["temperature"] = 1 # Required for extended thinking - kwargs["thinking"] = {"type": "enabled", "budget_tokens": 16000} + if model_key == "opus": + # Opus 4.6: use adaptive thinking — Claude decides when and how much to think. + # Per Anthropic docs: temperature must be 1 when thinking is enabled. + # budget_tokens not needed with adaptive thinking. + kwargs["temperature"] = 1 + kwargs["thinking"] = {"type": "enabled", "budget_tokens": max(16000, max_tokens // 2)} else: kwargs["temperature"] = temperature - # Use streaming for long requests (opus + thinking) - use_stream = model_key == "opus" and kwargs.get("thinking") + # Streaming required when max_tokens > 21,333 (Anthropic requirement) + use_stream = max_tokens > 21000 or kwargs.get("thinking") if use_stream: content_parts = [] @@ -416,19 +429,19 @@ def _build_case_context(case: dict, decision: dict | None) -> str: - תוצאה: {outcome_heb}""" -async def _build_source_context(case_id: UUID, block_id: str, max_chars: int = 15000) -> str: - """Get relevant document excerpts for the block.""" +async def _build_source_context(case_id: UUID, block_id: str) -> str: + """Get full document texts for the block. + + Per Anthropic best practices: send full source documents, not truncated excerpts. + Place documents at the TOP of the prompt (before instructions) for 30% better recall. + For grounding: instruct Claude to cite word-for-word from these documents. + """ docs = await db.list_documents(case_id) context_parts = [] - total = 0 for doc in docs: - if total >= max_chars: - break text = await db.get_document_text(UUID(doc["id"])) if text: - excerpt = text[:3000] - context_parts.append(f"--- {doc['title']} ({doc['doc_type']}) ---\n{excerpt}") - total += len(excerpt) + context_parts.append(f"--- מסמך: {doc['title']} ({doc['doc_type']}) ---\n{text}") return "\n\n".join(context_parts) if context_parts else "(אין מסמכים)" diff --git a/mcp-server/src/legal_mcp/services/brainstorm.py b/mcp-server/src/legal_mcp/services/brainstorm.py index ba8f7fc..686178e 100644 --- a/mcp-server/src/legal_mcp/services/brainstorm.py +++ b/mcp-server/src/legal_mcp/services/brainstorm.py @@ -9,13 +9,13 @@ from __future__ import annotations -import json import logging from uuid import UUID import anthropic from legal_mcp import config +from legal_mcp.config import parse_llm_json from legal_mcp.services import db logger = logging.getLogger(__name__) @@ -153,14 +153,8 @@ async def generate_directions( ) raw = message.content[0].text.strip() - try: - import re - json_match = re.search(r"\{.*\}", raw, re.DOTALL) - if json_match: - result = json.loads(json_match.group()) - else: - result = json.loads(raw) - except json.JSONDecodeError: + result = parse_llm_json(raw) + if result is None: logger.warning("Failed to parse brainstorm response: %s", raw[:300]) return { "key_claims": [], diff --git a/mcp-server/src/legal_mcp/services/claims_extractor.py b/mcp-server/src/legal_mcp/services/claims_extractor.py index b4984c7..fc42e20 100644 --- a/mcp-server/src/legal_mcp/services/claims_extractor.py +++ b/mcp-server/src/legal_mcp/services/claims_extractor.py @@ -7,7 +7,6 @@ from __future__ import annotations -import json import logging import re from uuid import UUID @@ -15,6 +14,7 @@ from uuid import UUID import anthropic from legal_mcp import config +from legal_mcp.config import parse_llm_json from legal_mcp.services import db logger = logging.getLogger(__name__) @@ -91,7 +91,7 @@ async def extract_claims_with_ai( client = _get_anthropic() message = client.messages.create( model="claude-sonnet-4-20250514", - max_tokens=4096, + max_tokens=8192, messages=[ { "role": "user", @@ -105,17 +105,8 @@ async def extract_claims_with_ai( ) raw = message.content[0].text.strip() - # Strip markdown code blocks if present - raw = re.sub(r"^```(?:json)?\s*", "", raw) - raw = re.sub(r"\s*```$", "", raw) - try: - # Extract JSON array from response - json_match = re.search(r"\[.*\]", raw, re.DOTALL) - if json_match: - claims = json.loads(json_match.group()) - else: - claims = json.loads(raw) - except json.JSONDecodeError: + claims = parse_llm_json(raw) + if claims is None: logger.warning("Failed to parse claims response: %s", raw[:200]) return [] diff --git a/mcp-server/src/legal_mcp/services/classifier.py b/mcp-server/src/legal_mcp/services/classifier.py index 7790eef..ba5a779 100644 --- a/mcp-server/src/legal_mcp/services/classifier.py +++ b/mcp-server/src/legal_mcp/services/classifier.py @@ -8,13 +8,13 @@ from __future__ import annotations -import json import logging import re import anthropic from legal_mcp import config +from legal_mcp.config import parse_llm_json logger = logging.getLogger(__name__) @@ -109,14 +109,8 @@ async def classify_document(text: str) -> dict: ) raw = message.content[0].text.strip() - try: - # Extract JSON from response (handle markdown code blocks) - json_match = re.search(r"\{.*\}", raw, re.DOTALL) - if json_match: - result = json.loads(json_match.group()) - else: - result = json.loads(raw) - except json.JSONDecodeError: + result = parse_llm_json(raw) + if result is None: logger.warning("Failed to parse classification response: %s", raw) return {"doc_type": "reference", "confidence": 0.0, "reasoning": "סיווג נכשל"} @@ -153,13 +147,8 @@ async def identify_parties(text: str) -> dict: ) raw = message.content[0].text.strip() - try: - json_match = re.search(r"\{.*\}", raw, re.DOTALL) - if json_match: - result = json.loads(json_match.group()) - else: - result = json.loads(raw) - except json.JSONDecodeError: + result = parse_llm_json(raw) + if result is None: logger.warning("Failed to parse parties response: %s", raw) return { "appellants": [], diff --git a/mcp-server/src/legal_mcp/services/extractor.py b/mcp-server/src/legal_mcp/services/extractor.py index 7874e9c..09df0f3 100644 --- a/mcp-server/src/legal_mcp/services/extractor.py +++ b/mcp-server/src/legal_mcp/services/extractor.py @@ -45,7 +45,7 @@ async def extract_text(file_path: str) -> tuple[str, int]: return _extract_docx(path), 0 elif suffix == ".rtf": return _extract_rtf(path), 0 - elif suffix == ".txt": + elif suffix in (".txt", ".md"): return path.read_text(encoding="utf-8"), 0 else: raise ValueError(f"Unsupported file type: {suffix}") diff --git a/mcp-server/src/legal_mcp/services/learning_loop.py b/mcp-server/src/legal_mcp/services/learning_loop.py index da4a76f..c55a46e 100644 --- a/mcp-server/src/legal_mcp/services/learning_loop.py +++ b/mcp-server/src/legal_mcp/services/learning_loop.py @@ -9,14 +9,13 @@ from __future__ import annotations -import json import logging -import re from uuid import UUID import anthropic from legal_mcp import config +from legal_mcp.config import parse_llm_json from legal_mcp.services import db logger = logging.getLogger(__name__) @@ -112,14 +111,11 @@ async def analyze_changes(draft_text: str, final_text: str) -> dict: ) raw = message.content[0].text.strip() - try: - json_match = re.search(r"\{.*\}", raw, re.DOTALL) - if json_match: - return json.loads(json_match.group()) - return json.loads(raw) - except json.JSONDecodeError: + result = parse_llm_json(raw) + if result is None: logger.warning("Failed to parse lessons response") return {"changes": [], "new_expressions": [], "overall_assessment": raw[:200]} + return result async def process_final_version( diff --git a/mcp-server/src/legal_mcp/services/qa_validator.py b/mcp-server/src/legal_mcp/services/qa_validator.py index d155139..df90d0e 100644 --- a/mcp-server/src/legal_mcp/services/qa_validator.py +++ b/mcp-server/src/legal_mcp/services/qa_validator.py @@ -21,6 +21,7 @@ from uuid import UUID import anthropic from legal_mcp import config +from legal_mcp.config import parse_llm_json from legal_mcp.services import db logger = logging.getLogger(__name__) @@ -139,7 +140,7 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict: client = _get_anthropic() message = client.messages.create( model="claude-haiku-4-5-20251001", - max_tokens=4096, + max_tokens=8192, messages=[{ "role": "user", "content": f"""{CLAIMS_CHECK_PROMPT} @@ -153,13 +154,8 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict: ) raw = message.content[0].text.strip() - # Strip markdown code blocks if present - raw = re.sub(r"^```(?:json)?\s*", "", raw) - raw = re.sub(r"\s*```$", "", raw) - try: - json_match = re.search(r"\{.*\}", raw, re.DOTALL) - parsed = json.loads(json_match.group()) if json_match else json.loads(raw) - except (json.JSONDecodeError, AttributeError): + parsed = parse_llm_json(raw) + if parsed is None: logger.warning("Failed to parse claims check: %s", raw[:300]) # Fallback: assume all covered (don't block export on parse failure) return {"name": "claims_coverage", "passed": True,