Replace Haiku with Sonnet in classifier for better accuracy

classify_document and identify_parties both used Haiku, which produced parsing failures and 0% confidence on Beit HaKerem documents. Sonnet handles Hebrew legal documents more reliably. No more Haiku usage in the entire codebase. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
QA claims check: Haiku→Sonnet + filter appellant claims only
2026-04-04 07:47:12 +00:00 · 2026-04-04 07:37:23 +00:00 · 2026-04-03 16:18:25 +00:00 · 2026-04-03 16:12:09 +00:00 · 2026-04-03 16:04:34 +00:00 · 2026-04-03 16:00:49 +00:00
11 changed files with 533 additions and 145 deletions
--- a/mcp-server/src/legal_mcp/config.py
+++ b/mcp-server/src/legal_mcp/config.py
@@ -67,3 +67,69 @@ ALLOWED_EXTERNAL_SERVICES = {

 # Audit
 AUDIT_ENABLED = os.environ.get("AUDIT_ENABLED", "true").lower() == "true"
+
+
+# ── Utility ───────────────────────────────────────────────────────
+
+def parse_llm_json(raw: str):
+    """Parse JSON from LLM response, handling markdown wrapping and truncation.
+
+    Handles:
+    1. Markdown ```json ... ``` code blocks
+    2. Extra text before/after JSON
+    3. Truncated JSON (missing closing brackets) — attempts recovery
+    """
+    import json
+    import re
+    raw = raw.strip()
+    # Strip markdown code blocks
+    raw = re.sub(r"^```(?:json)?\s*\n?", "", raw)
+    raw = re.sub(r"\n?\s*```\s*$", "", raw)
+    # Try direct parse first
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        pass
+    # Try to find JSON object or array
+    for pattern in [r"\{.*\}", r"\[.*\]"]:
+        match = re.search(pattern, raw, re.DOTALL)
+        if match:
+            try:
+                return json.loads(match.group())
+            except json.JSONDecodeError:
+                continue
+    # Attempt truncated JSON recovery:
+    # Find the start of JSON, then try closing open brackets
+    for opener, closer in [("[", "]"), ("{", "}")]:
+        start = raw.find(opener)
+        if start < 0:
+            continue
+        fragment = raw[start:]
+        # Try progressively removing trailing partial content and closing
+        # Look for the last complete item (ending with }, or ])
+        for end_pattern in [r'.*\}(?=\s*,?\s*$)', r'.*\](?=\s*,?\s*$)', r'.*"(?=\s*$)']:
+            pass  # fallback below
+        # Simple approach: find last complete JSON item boundary
+        # For arrays: find last "}" and close the array
+        if opener == "[":
+            last_brace = fragment.rfind("}")
+            if last_brace > 0:
+                truncated = fragment[:last_brace + 1] + "]"
+                try:
+                    return json.loads(truncated)
+                except json.JSONDecodeError:
+                    pass
+        # For objects: find last complete key-value
+        if opener == "{":
+            last_brace = fragment.rfind("}")
+            if last_brace > 0:
+                # Check if this closes a nested object — try adding outer close
+                truncated = fragment[:last_brace + 1]
+                # Count unclosed braces
+                open_count = truncated.count("{") - truncated.count("}")
+                truncated += "}" * open_count
+                try:
+                    return json.loads(truncated)
+                except json.JSONDecodeError:
+                    pass
+    return None
--- a/mcp-server/src/legal_mcp/server.py
+++ b/mcp-server/src/legal_mcp/server.py
@@ -223,6 +223,26 @@ async def get_decision_template(case_number: str) -> str:
    return await drafting.get_decision_template(case_number)


+@mcp.tool()
+async def get_block_context(
+    case_number: str,
+    block_id: str,
+    instructions: str = "",
+) -> str:
+    """קבלת הקשר מלא לכתיבת בלוק — ללא API. Claude Code כותב ושומר."""
+    return await drafting.get_block_context(case_number, block_id, instructions)
+
+
+@mcp.tool()
+async def save_block_content(
+    case_number: str,
+    block_id: str,
+    content: str,
+) -> str:
+    """שמירת בלוק שנכתב ע"י Claude Code ב-DB."""
+    return await drafting.save_block_content(case_number, block_id, content)
+
+
@mcp.tool()
 async def validate_decision(case_number: str) -> str:
    """בדיקת QA — 6 בדיקות איכות על ההחלטה. אם בדיקה קריטית נכשלת — ייצוא חסום."""
--- a/mcp-server/src/legal_mcp/services/block_writer.py
+++ b/mcp-server/src/legal_mcp/services/block_writer.py
@@ -14,6 +14,7 @@ from __future__ import annotations

 import json
 import logging
+import re
 from datetime import date
 from uuid import UUID

@@ -36,18 +37,22 @@ def _get_anthropic() -> anthropic.Anthropic:

 # ── Block configuration ───────────────────────────────────────────

+# Output token limits per Anthropic docs (April 2026):
+# Opus 4.6: up to 128K output tokens
+# Sonnet 4.6: up to 64K output tokens
+# Streaming required when max_tokens > 21,333
 BLOCK_CONFIG = {
    "block-alef": {"index": 1, "title": "כותרת מוסדית", "gen_type": "template-fill", "temp": 0, "model": "script"},
    "block-bet":  {"index": 2, "title": "הרכב הוועדה", "gen_type": "template-fill", "temp": 0, "model": "script"},
    "block-gimel":{"index": 3, "title": "צדדים", "gen_type": "template-fill", "temp": 0, "model": "script"},
    "block-dalet":{"index": 4, "title": "החלטה", "gen_type": "template-fill", "temp": 0, "model": "script"},
-    "block-he":   {"index": 5, "title": "פתיחה", "gen_type": "paraphrase", "temp": 0.2, "model": "sonnet", "max_tokens": 1024},
-    "block-vav":  {"index": 6, "title": "רקע עובדתי", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 4096},
-    "block-zayin":{"index": 7, "title": "טענות הצדדים", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 4096},
-    "block-chet": {"index": 8, "title": "הליכים", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 2048},
-    "block-tet":  {"index": 9, "title": "תכניות חלות", "gen_type": "guided-synthesis", "temp": 0.2, "model": "opus", "max_tokens": 2048},
-    "block-yod":  {"index": 10, "title": "דיון והכרעה", "gen_type": "rhetorical-construction", "temp": 0.4, "model": "opus", "max_tokens": 8192},
-    "block-yod-alef": {"index": 11, "title": "סיכום", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 2048},
+    "block-he":   {"index": 5, "title": "פתיחה", "gen_type": "paraphrase", "temp": 0.2, "model": "sonnet", "max_tokens": 4096},
+    "block-vav":  {"index": 6, "title": "רקע עובדתי", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 16384},
+    "block-zayin":{"index": 7, "title": "טענות הצדדים", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 16384},
+    "block-chet": {"index": 8, "title": "הליכים", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 8192},
+    "block-tet":  {"index": 9, "title": "תכניות חלות", "gen_type": "guided-synthesis", "temp": 0.2, "model": "opus", "max_tokens": 16384},
+    "block-yod":  {"index": 10, "title": "דיון והכרעה", "gen_type": "rhetorical-construction", "temp": 0.4, "model": "opus", "max_tokens": 16384},
+    "block-yod-alef": {"index": 11, "title": "סיכום", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 8192},
    "block-yod-bet":  {"index": 12, "title": "חתימות", "gen_type": "template-fill", "temp": 0, "model": "script"},
 }

@@ -147,16 +152,21 @@ BLOCK_PROMPTS = {

    "block-zayin": """כתוב את בלוק טענות הצדדים (בלוק ז, "תמצית טענות הצדדים") של החלטת ועדת ערר.

-## כללים:
- כל טענה בסעיף נפרד, גוף שלישי ("העורר טוען כי...")
- סדר קבוע: טענות העוררים → עמדת הוועדה המקומית → עמדת מבקשי ההיתר (אם יש)
+## כללים קריטיים:
+- **סנתז טענות דומות** — אל תרשום כל טענה בנפרד. קבץ טענות דומות לנושא אחד. למשל: כל הטענות על הודעות → סעיף אחד, כל הטענות על רכוש משותף → סעיף אחד.
+- גוף שלישי: "העוררים טוענים כי...", "הוועדה המקומית ציינה כי..."
+- **מבנה קבוע עם 3 חלקים:**
+  1. "טענות העוררים" — 8-12 סעיפים מקובצים לפי נושא
+  2. "עמדת הוועדה המקומית" — 5-8 סעיפים
+  3. "עמדת מבקשי ההיתר" (אם יש) — 5-10 סעיפים
 - כותרת: "תמצית טענות הצדדים"
- נאמנות מוחלטת למקור — לא לשנות, לא לקצר ללא ציון
- אין ניתוח, אין מסקנות, אין הערכה
+- נאמנות למקור — לא להמציא טענות, אבל כן לאחד ולסכם טענות חוזרות
+- אין ניתוח, אין מסקנות, אין הערכה ("טענה חלשה/חזקה")
 - רק מכתבי טענות מקוריים (לא השלמות טיעון)
 - מספור רציף
+- **יעד אורך: 800-1500 מילים**

-## טענות שחולצו:
+## טענות שחולצו (קבץ טענות דומות לנושאים):
 {claims_context}

 ## פרטי התיק:
@@ -311,8 +321,10 @@ async def write_block(
    outcome = (decision or {}).get("outcome", "rejected")
    structure_guidance = STRUCTURE_GUIDANCE.get(outcome, "")

-    # Format prompt
-    prompt = prompt_template.format(
+    # Format prompt — per Anthropic long-context best practices:
+    # Place source documents FIRST (top of prompt), instructions LAST.
+    # "Queries at the end can improve response quality by up to 30%"
+    formatted_prompt = prompt_template.format(
        case_context=case_context,
        source_context=source_context,
        claims_context=claims_context,
@@ -324,6 +336,14 @@ async def write_block(
        structure_guidance=structure_guidance,
    )

+    # Restructure: sources first, then instructions
+    prompt = (
+        f"## חומרי מקור (מסמכים מלאים — צטט מהם מילה במילה כשאפשר):\n\n"
+        f"{source_context}\n\n"
+        f"---\n\n"
+        f"{formatted_prompt}"
+    )
+
    if instructions:
        prompt += f"\n\n## הנחיות נוספות:\n{instructions}"

@@ -341,24 +361,23 @@ async def write_block(

    client = _get_anthropic()

-    # For opus blocks, use extended thinking
    kwargs: dict = {
        "model": model,
        "max_tokens": max_tokens,
        "messages": [{"role": "user", "content": prompt}],
    }

-    if model_key == "opus" and temperature >= 0.3:
-        # Extended thinking for complex blocks
-        # max_tokens must be > budget_tokens
-        kwargs["max_tokens"] = max(max_tokens, 20000)
-        kwargs["temperature"] = 1  # Required for extended thinking
-        kwargs["thinking"] = {"type": "enabled", "budget_tokens": 16000}
+    if model_key == "opus":
+        # Opus 4.6: use adaptive thinking — Claude decides when and how much to think.
+        # Per Anthropic docs: temperature must be 1 when thinking is enabled.
+        # budget_tokens not needed with adaptive thinking.
+        kwargs["temperature"] = 1
+        kwargs["thinking"] = {"type": "enabled", "budget_tokens": max(16000, max_tokens // 2)}
    else:
        kwargs["temperature"] = temperature

-    # Use streaming for long requests (opus + thinking)
-    use_stream = model_key == "opus" and kwargs.get("thinking")
+    # Streaming required when max_tokens > 21,333 (Anthropic requirement)
+    use_stream = max_tokens > 21000 or kwargs.get("thinking")

    if use_stream:
        content_parts = []
@@ -410,19 +429,19 @@ def _build_case_context(case: dict, decision: dict | None) -> str:
 - תוצאה: {outcome_heb}"""


-async def _build_source_context(case_id: UUID, block_id: str, max_chars: int = 15000) -> str:
-    """Get relevant document excerpts for the block."""
+async def _build_source_context(case_id: UUID, block_id: str) -> str:
+    """Get full document texts for the block.
+
+    Per Anthropic best practices: send full source documents, not truncated excerpts.
+    Place documents at the TOP of the prompt (before instructions) for 30% better recall.
+    For grounding: instruct Claude to cite word-for-word from these documents.
+    """
    docs = await db.list_documents(case_id)
    context_parts = []
-    total = 0
    for doc in docs:
-        if total >= max_chars:
-            break
        text = await db.get_document_text(UUID(doc["id"]))
        if text:
-            excerpt = text[:3000]
-            context_parts.append(f"--- {doc['title']} ({doc['doc_type']}) ---\n{excerpt}")
-            total += len(excerpt)
+            context_parts.append(f"--- מסמך: {doc['title']} ({doc['doc_type']}) ---\n{text}")
    return "\n\n".join(context_parts) if context_parts else "(אין מסמכים)"


@@ -501,32 +520,117 @@ async def _build_plans_context(case_id: UUID) -> str:


 async def _build_precedents_context(case_id: UUID, block_id: str) -> str:
-    """Search for similar precedent paragraphs."""
+    """Search for similar precedent paragraphs from other decisions and case law."""
+    parts = []
    try:
        case = await db.get_case(case_id)
+        case_number = case.get("case_number", "") if case else ""
        subject = case.get("subject", "") if case else ""
        query = f"דיון משפטי בנושא {subject}" if subject else "דיון משפטי ועדת ערר"
        query_emb = await embeddings.embed_query(query)
-        results = await db.search_similar(query_embedding=query_emb, limit=5)
+
+        # Search 1: paragraph_embeddings (from other decisions by Dafna)
+        para_results = await db.search_similar_paragraphs(
+            query_embedding=query_emb, limit=10, block_type="block-yod",
+        )
        # Filter out same case
-        results = [r for r in results if str(r.get("case_id")) != str(case_id)]
-        if results:
-            parts = []
-            for r in results[:3]:
-                parts.append(f"[{r.get('case_number', '?')}, {r.get('section_type', '')}] {r['content'][:400]}")
-            return "\n\n".join(parts)
+        para_results = [r for r in para_results if r.get("case_number", "") != case_number]
+        for r in para_results[:4]:
+            parts.append(
+                f"[החלטת {r.get('case_number', '?')} — {r.get('case_title', '')}, "
+                f"בלוק {r.get('block_type', '')}]\n{r['content'][:500]}"
+            )
+
+        # Search 2: case_law_embeddings (precedent case law)
+        pool = await db.get_pool()
+        async with pool.acquire() as conn:
+            caselaw_rows = await conn.fetch(
+                """SELECT cl.case_number, cl.case_name, cl.court, cl.summary, cl.key_quote,
+                          1 - (cle.embedding <=> $1) AS score
+                   FROM case_law_embeddings cle
+                   JOIN case_law cl ON cl.id = cle.case_law_id
+                   ORDER BY cle.embedding <=> $1
+                   LIMIT 5""",
+                query_emb,
+            )
+        for r in caselaw_rows[:3]:
+            text = r["key_quote"] or r["summary"] or ""
+            if text:
+                parts.append(
+                    f"[פסיקה: {r['case_number']} {r['case_name']} ({r.get('court', '')})] "
+                    f"score={r['score']:.3f}\n{text[:400]}"
+                )
+
    except Exception as e:
        logger.warning("Failed to fetch precedents: %s", e)
-    return "(אין תקדימים)"
+
+    return "\n\n".join(parts) if parts else "(אין תקדימים)"


 async def _build_style_context() -> str:
-    patterns = await db.get_style_patterns()
-    if not patterns:
-        return "(אין דפוסי סגנון)"
+    """Build comprehensive style guide from DB patterns + SKILL.md rules.
+
+    Per Anthropic: explicit style instructions reduce generic output.
+    """
    lines = []
-    for p in patterns[:10]:
-        lines.append(f"- [{p['pattern_type']}] {p['pattern_text']}")
+
+    # Core style rules (from SKILL.md analysis)
+    lines.append("""## כללי סגנון דפנה תמיר — חובה:
+
+### טון:
+- ערר רישוי (1xxx): חם יחסית, עם אלמנטים אנושיים
+- ערר השבחה (8xxx): קר, יבש, מקצועי
+- גוף ראשון רבים: "אנו סבורים", "מצאנו כי", "לדעתנו"
+- ישיר ובהיר — לא אקדמי ולא מסורבל
+
+### ביטויים ייחודיים (חובה להשתמש):
+- "לפנינו..." (פתיחה)
+- "כידוע..." (הצגת עקרון ידוע)
+- "ברי כי..." / "ודוק..." (הדגשה)
+- "אין בידנו לקבל" (דחיית טענה)
+- "בטענה זו מצאנו טעם" (קבלת טענה)
+- "יחד עם זאת" (מעבר לאיזון)
+- "למעלה מן הצורך" / "נבקש שלא לצאת בחסר" (הרחבה)
+- "הדברים מתחדדים שעה ש..." (חידוד)
+- "מחד... מאידך... על כן..." (איזון לפני הכרעה)
+- "לאור כל האמור לעיל" (סיכום)
+- "ניתנה פה אחד היום" (סיום)
+
+### מבנה דיון:
+- אסה רציפה ללא כותרות משנה (חריג: נושאים נפרדים לחלוטין)
+- מסקנה בפתיחה, לא בסוף
+- מעברים טקסטואליים, לא כותרות
+- ניטרול טענות חלשות לפני ניתוח מעמיק
+- ציטוטי פסיקה כבלוקים מוגדלים
+
+### טענות צדדים:
+- עוררים: "העוררים טוענים כי...", "לטענתם...", "עוד ציינו כי..."
+- ועדה: "הוועדה המקומית הציגה/הבהירה/הוסיפה כי..."
+- מבקשי היתר: "מבקשי ההיתר דוחים מכל וכל...", "לטענתם...", "מבקשי ההיתר מציינים כי..."
+""")
+
+    # DB patterns (actual examples from Dafna's decisions)
+    patterns = await db.get_style_patterns()
+    if patterns:
+        lines.append("### דפוסים שחולצו מהחלטות קודמות:")
+        grouped: dict[str, list] = {}
+        for p in patterns:
+            grouped.setdefault(p["pattern_type"], []).append(p)
+
+        type_names = {
+            "opening_formula": "פתיחה",
+            "transition": "מעברים",
+            "characteristic_phrase": "ביטויים אופייניים",
+            "closing_formula": "סיום",
+            "citation_style": "ציטוט",
+        }
+        for ptype in ["characteristic_phrase", "transition", "opening_formula", "closing_formula"]:
+            items = grouped.get(ptype, [])
+            if items:
+                lines.append(f"\n**{type_names.get(ptype, ptype)}:**")
+                for item in items[:8]:
+                    lines.append(f"- {item['pattern_text']}")
+
    return "\n".join(lines)


@@ -552,6 +656,156 @@ async def _build_previous_blocks_context(case_id: UUID, decision: dict | None) -
    return "\n\n".join(parts)


+# ── Context-only mode (for Claude Code to write) ─────────────────
+
+async def get_block_context(case_id: UUID, block_id: str, instructions: str = "") -> dict:
+    """Return full context package for a block WITHOUT calling Claude API.
+
+    Claude Code (or any external writer) uses this context to write the block,
+    then saves it via save_block_content.
+    """
+    if block_id not in BLOCK_CONFIG:
+        raise ValueError(f"Unknown block: {block_id}")
+
+    block_cfg = BLOCK_CONFIG[block_id]
+    case = await db.get_case(case_id)
+    if not case:
+        raise ValueError(f"Case {case_id} not found")
+
+    decision = await db.get_decision_by_case(case_id)
+
+    # Template blocks — return content directly
+    if block_id in TEMPLATE_WRITERS:
+        content = TEMPLATE_WRITERS[block_id](case, decision)
+        return {
+            "block_id": block_id,
+            "title": block_cfg["title"],
+            "mode": "template",
+            "content": content,
+        }
+
+    # Build all context components
+    prompt_template = BLOCK_PROMPTS.get(block_id, "")
+
+    case_context = _build_case_context(case, decision)
+    source_context = await _build_source_context(case_id, block_id)
+    claims_context = await _build_claims_context(case_id)
+    direction_context = _build_direction_context(decision)
+    plans_context = await _build_plans_context(case_id)
+    precedents_context = await _build_precedents_context(case_id, block_id)
+    style_context = await _build_style_context()
+    discussion_context = await _build_previous_blocks_context(case_id, decision)
+
+    outcome = (decision or {}).get("outcome", "rejected")
+    structure_guidance = STRUCTURE_GUIDANCE.get(outcome, "")
+
+    formatted_prompt = prompt_template.format(
+        case_context=case_context,
+        source_context=source_context,
+        claims_context=claims_context,
+        direction_context=direction_context,
+        plans_context=plans_context,
+        precedents_context=precedents_context,
+        style_context=style_context,
+        discussion_context=discussion_context,
+        structure_guidance=structure_guidance,
+    )
+
+    if instructions:
+        formatted_prompt += f"\n\n## הנחיות נוספות:\n{instructions}"
+
+    # Block י requires approved direction
+    if block_id == "block-yod":
+        dir_doc = (decision or {}).get("direction_doc") or {}
+        if not dir_doc.get("approved"):
+            raise ValueError("לא ניתן לכתוב בלוק דיון ללא כיוון מאושר.")
+
+    return {
+        "block_id": block_id,
+        "title": block_cfg["title"],
+        "mode": "context",
+        "prompt": formatted_prompt,
+        "source_documents": source_context,
+        "claims": claims_context,
+        "direction": direction_context,
+        "precedents": precedents_context,
+        "style_guide": style_context,
+        "previous_blocks": discussion_context,
+    }
+
+
+async def save_block_content(case_id: UUID, block_id: str, content: str) -> dict:
+    """Save block content written by Claude Code (or any external writer)."""
+    if block_id not in BLOCK_CONFIG:
+        raise ValueError(f"Unknown block: {block_id}")
+
+    block_cfg = BLOCK_CONFIG[block_id]
+    decision = await db.get_decision_by_case(case_id)
+    if not decision:
+        decision = await db.create_decision(case_id=case_id)
+
+    result = _build_result(block_id, content, block_cfg)
+    result["generation_type"] = "claude-code"
+    result["model_used"] = "claude-code"
+
+    await store_block(UUID(decision["id"]), result)
+    return result
+
+
+# ── Renumbering ───────────────────────────────────────────────────
+
+async def renumber_all_blocks(decision_id: UUID) -> dict:
+    """מספור רציף מחדש של כל הבלוקים בהחלטה.
+
+    עובר על כל הבלוקים לפי סדר, ומחליף את כל המספורים
+    (1. 2. 3. או **1.** **2.**) לרצף אחד רציף.
+    """
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch(
+            """SELECT block_id, block_index, content, word_count
+               FROM decision_blocks WHERE decision_id = $1
+               ORDER BY block_index""",
+            decision_id,
+        )
+
+    current_num = 1
+    updated = 0
+    # Blocks that shouldn't be numbered
+    skip_blocks = {"block-alef", "block-bet", "block-gimel", "block-dalet", "block-yod-bet"}
+
+    for row in rows:
+        if row["block_id"] in skip_blocks or not row["content"]:
+            continue
+
+        content = row["content"]
+        # Replace numbered paragraphs: "N." or "**N.**" or "**N.**" at line start
+        def replace_num(match):
+            nonlocal current_num
+            prefix = match.group(1) or ""  # bold markers
+            suffix = match.group(3) or ""  # bold markers
+            result = f"{prefix}{current_num}{suffix}"
+            current_num += 1
+            return result
+
+        new_content = re.sub(
+            r'^(\*\*)?(\d+)(\.?\*?\*?\.)',
+            replace_num,
+            content,
+            flags=re.MULTILINE,
+        )
+
+        if new_content != content:
+            async with pool.acquire() as conn:
+                await conn.execute(
+                    "UPDATE decision_blocks SET content = $1, updated_at = now() WHERE decision_id = $2 AND block_id = $3",
+                    new_content, decision_id, row["block_id"],
+                )
+            updated += 1
+
+    return {"total_paragraphs": current_num - 1, "blocks_updated": updated}
+
+
 # ── Store block ───────────────────────────────────────────────────

 async def store_block(decision_id: UUID, block_result: dict) -> None:
--- a/mcp-server/src/legal_mcp/services/brainstorm.py
+++ b/mcp-server/src/legal_mcp/services/brainstorm.py
@@ -9,13 +9,13 @@

 from __future__ import annotations

-import json
 import logging
 from uuid import UUID

 import anthropic

 from legal_mcp import config
+from legal_mcp.config import parse_llm_json
 from legal_mcp.services import db

 logger = logging.getLogger(__name__)
@@ -153,14 +153,8 @@ async def generate_directions(
    )

    raw = message.content[0].text.strip()
-    try:
-        import re
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        if json_match:
-            result = json.loads(json_match.group())
-        else:
-            result = json.loads(raw)
-    except json.JSONDecodeError:
+    result = parse_llm_json(raw)
+    if result is None:
        logger.warning("Failed to parse brainstorm response: %s", raw[:300])
        return {
            "key_claims": [],
--- a/mcp-server/src/legal_mcp/services/claims_extractor.py
+++ b/mcp-server/src/legal_mcp/services/claims_extractor.py
@@ -7,7 +7,6 @@

 from __future__ import annotations

-import json
 import logging
 import re
 from uuid import UUID
@@ -15,6 +14,7 @@ from uuid import UUID
 import anthropic

 from legal_mcp import config
+from legal_mcp.config import parse_llm_json
 from legal_mcp.services import db

 logger = logging.getLogger(__name__)
@@ -44,16 +44,13 @@ EXTRACT_CLAIMS_PROMPT = """אתה מנתח מסמכים משפטיים בתחו
 - permit_applicant — מבקש/ת היתר

 ## פלט:
-החזר JSON array בלבד:
-[
-  {
-    "party_role": "appellant",
-    "claim_text": "הטענה בגוף שלישי, בעברית",
-    "topic": "נושא הטענה בקצרה (3-5 מילים)"
-  }
-]
+החזר JSON array בלבד — ללא markdown, ללא הסברים, רק JSON:
+[{"party_role": "appellant", "claim_text": "הטענה בגוף שלישי", "topic": "נושא"}]

-אם אין טענות — החזר [].
+חשוב:
+- claim_text קצר — עד 150 מילים לכל טענה
+- קבץ טענות דומות לטענה אחת
+- אם אין טענות החזר []
 """


@@ -72,48 +69,59 @@ async def extract_claims_with_ai(
    Returns:
        רשימת טענות עם party_role, claim_text, topic
    """
-    # For very long documents, truncate but try to keep complete paragraphs
-    max_chars = 25000
-    if len(text) > max_chars:
-        # Find a paragraph break near the limit
-        cutoff = text.rfind("\n\n", 0, max_chars)
-        if cutoff < max_chars // 2:
-            cutoff = max_chars
-        sample = text[:cutoff]
-        logger.info("Document truncated from %d to %d chars", len(text), len(sample))
-    else:
-        sample = text
-
    context = f"סוג המסמך: {doc_type}"
    if party_hint:
        context += f"\nהצד המגיש: {party_hint}"

-    client = _get_anthropic()
-    message = client.messages.create(
-        model="claude-sonnet-4-20250514",
-        max_tokens=4096,
-        messages=[
-            {
-                "role": "user",
-                "content": (
-                    f"{EXTRACT_CLAIMS_PROMPT}\n\n"
-                    f"{context}\n\n"
-                    f"--- תחילת מסמך ---\n{sample}\n--- סוף מסמך ---"
-                ),
-            }
-        ],
-    )
+    # For very long documents, split into chunks and merge results
+    max_chars_per_call = 25000
+    chunks = []
+    if len(text) > max_chars_per_call:
+        # Split at paragraph boundaries
+        pos = 0
+        while pos < len(text):
+            end = min(pos + max_chars_per_call, len(text))
+            if end < len(text):
+                # Find paragraph break near the limit
+                break_pos = text.rfind("\n\n", pos, end)
+                if break_pos > pos + max_chars_per_call // 2:
+                    end = break_pos
+            chunks.append(text[pos:end])
+            pos = end
+        logger.info("Document split into %d chunks (%d chars total)", len(chunks), len(text))
+    else:
+        chunks = [text]

-    raw = message.content[0].text.strip()
-    try:
-        # Extract JSON array from response
-        json_match = re.search(r"\[.*\]", raw, re.DOTALL)
-        if json_match:
-            claims = json.loads(json_match.group())
-        else:
-            claims = json.loads(raw)
-    except json.JSONDecodeError:
-        logger.warning("Failed to parse claims response: %s", raw[:200])
+    all_claims = []
+    client = _get_anthropic()
+
+    for i, chunk in enumerate(chunks):
+        chunk_label = f" (חלק {i+1}/{len(chunks)})" if len(chunks) > 1 else ""
+        message = client.messages.create(
+            model="claude-sonnet-4-20250514",
+            max_tokens=8192,
+            messages=[
+                {
+                    "role": "user",
+                    "content": (
+                        f"{EXTRACT_CLAIMS_PROMPT}\n\n"
+                        f"{context}{chunk_label}\n\n"
+                        f"--- תחילת מסמך ---\n{chunk}\n--- סוף מסמך ---"
+                    ),
+                }
+            ],
+        )
+
+        raw = message.content[0].text.strip()
+        claims = parse_llm_json(raw)
+        if claims is None:
+            logger.warning("Failed to parse claims for chunk %d: %s", i, raw[:200])
+            continue
+        if isinstance(claims, list):
+            all_claims.extend(claims)
+
+    claims = all_claims
+    if not claims:
        return []

    if not isinstance(claims, list):
--- a/mcp-server/src/legal_mcp/services/classifier.py
+++ b/mcp-server/src/legal_mcp/services/classifier.py
@@ -8,13 +8,13 @@

 from __future__ import annotations

-import json
 import logging
 import re

 import anthropic

 from legal_mcp import config
+from legal_mcp.config import parse_llm_json

 logger = logging.getLogger(__name__)

@@ -98,8 +98,8 @@ async def classify_document(text: str) -> dict:

    client = _get_anthropic()
    message = client.messages.create(
-        model="claude-haiku-4-5-20251001",
-        max_tokens=256,
+        model="claude-sonnet-4-20250514",
+        max_tokens=512,
        messages=[
            {
                "role": "user",
@@ -109,14 +109,8 @@ async def classify_document(text: str) -> dict:
    )

    raw = message.content[0].text.strip()
-    try:
-        # Extract JSON from response (handle markdown code blocks)
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        if json_match:
-            result = json.loads(json_match.group())
-        else:
-            result = json.loads(raw)
-    except json.JSONDecodeError:
+    result = parse_llm_json(raw)
+    if result is None:
        logger.warning("Failed to parse classification response: %s", raw)
        return {"doc_type": "reference", "confidence": 0.0, "reasoning": "סיווג נכשל"}

@@ -142,7 +136,7 @@ async def identify_parties(text: str) -> dict:

    client = _get_anthropic()
    message = client.messages.create(
-        model="claude-haiku-4-5-20251001",
+        model="claude-sonnet-4-20250514",
        max_tokens=512,
        messages=[
            {
@@ -153,13 +147,8 @@ async def identify_parties(text: str) -> dict:
    )

    raw = message.content[0].text.strip()
-    try:
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        if json_match:
-            result = json.loads(json_match.group())
-        else:
-            result = json.loads(raw)
-    except json.JSONDecodeError:
+    result = parse_llm_json(raw)
+    if result is None:
        logger.warning("Failed to parse parties response: %s", raw)
        return {
            "appellants": [],
--- a/mcp-server/src/legal_mcp/services/extractor.py
+++ b/mcp-server/src/legal_mcp/services/extractor.py
@@ -45,7 +45,7 @@ async def extract_text(file_path: str) -> tuple[str, int]:
        return _extract_docx(path), 0
    elif suffix == ".rtf":
        return _extract_rtf(path), 0
-    elif suffix == ".txt":
+    elif suffix in (".txt", ".md"):
        return path.read_text(encoding="utf-8"), 0
    else:
        raise ValueError(f"Unsupported file type: {suffix}")
--- a/mcp-server/src/legal_mcp/services/learning_loop.py
+++ b/mcp-server/src/legal_mcp/services/learning_loop.py
@@ -9,14 +9,13 @@

 from __future__ import annotations

-import json
 import logging
-import re
 from uuid import UUID

 import anthropic

 from legal_mcp import config
+from legal_mcp.config import parse_llm_json
 from legal_mcp.services import db

 logger = logging.getLogger(__name__)
@@ -112,14 +111,11 @@ async def analyze_changes(draft_text: str, final_text: str) -> dict:
    )

    raw = message.content[0].text.strip()
-    try:
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        if json_match:
-            return json.loads(json_match.group())
-        return json.loads(raw)
-    except json.JSONDecodeError:
+    result = parse_llm_json(raw)
+    if result is None:
        logger.warning("Failed to parse lessons response")
        return {"changes": [], "new_expressions": [], "overall_assessment": raw[:200]}
+    return result


 async def process_final_version(
--- a/mcp-server/src/legal_mcp/services/processor.py
+++ b/mcp-server/src/legal_mcp/services/processor.py
@@ -57,6 +57,18 @@ async def process_document(document_id: UUID, case_id: UUID) -> dict:
            len(classification_result["parties"].get("respondents", [])),
        )

+        # Step 1.6: Update case parties if empty
+        if case_id and case:
+            parties = classification_result.get("parties", {})
+            updates = {}
+            if not case.get("appellants") and parties.get("appellants"):
+                updates["appellants"] = parties["appellants"]
+            if not case.get("respondents") and parties.get("respondents"):
+                updates["respondents"] = parties["respondents"]
+            if updates:
+                await db.update_case(case_id, **updates)
+                logger.info("Updated case parties: %s", updates)
+
        # Step 2: Chunk
        logger.info("Chunking document (%d chars)", len(text))
        chunks = chunker.chunk_document(text)
--- a/mcp-server/src/legal_mcp/services/qa_validator.py
+++ b/mcp-server/src/legal_mcp/services/qa_validator.py
@@ -21,6 +21,7 @@ from uuid import UUID
 import anthropic

 from legal_mcp import config
+from legal_mcp.config import parse_llm_json
 from legal_mcp.services import db

 logger = logging.getLogger(__name__)
@@ -123,8 +124,17 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict:
    if not claims:
        return {"name": "claims_coverage", "passed": True, "errors": [], "severity": "critical"}

-    # Filter: only claims from original pleadings
-    source_claims = [c for c in claims if c.get("source_document", "") != "block-zayin"]
+    # Filter: only APPELLANT claims from original pleadings.
+    # Committee/permit_applicant claims are defensive positions, not claims
+    # that need to be "addressed" in the discussion.
+    source_claims = [
+        c for c in claims
+        if c.get("source_document", "") != "block-zayin"
+        and c.get("party_role") in ("appellant", "respondent")
+    ]
+    if not source_claims:
+        # Fallback: all non-block-zayin claims
+        source_claims = [c for c in claims if c.get("source_document", "") != "block-zayin"]
    if not source_claims:
        source_claims = claims

@@ -133,13 +143,13 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict:
    for i, c in enumerate(source_claims, 1):
        claims_text += f"טענה #{i}: {c['claim_text'][:300]}\n"

-    # Truncate discussion if needed
-    discussion = yod["content"][:12000]
+    # Send full discussion — don't truncate
+    discussion = yod["content"]

    client = _get_anthropic()
    message = client.messages.create(
-        model="claude-haiku-4-5-20251001",
-        max_tokens=4096,
+        model="claude-sonnet-4-20250514",
+        max_tokens=8192,
        messages=[{
            "role": "user",
            "content": f"""{CLAIMS_CHECK_PROMPT}
@@ -153,13 +163,8 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict:
    )

    raw = message.content[0].text.strip()
-    # Strip markdown code blocks if present
-    raw = re.sub(r"^```(?:json)?\s*", "", raw)
-    raw = re.sub(r"\s*```$", "", raw)
-    try:
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        parsed = json.loads(json_match.group()) if json_match else json.loads(raw)
-    except (json.JSONDecodeError, AttributeError):
+    parsed = parse_llm_json(raw)
+    if parsed is None:
        logger.warning("Failed to parse claims check: %s", raw[:300])
        # Fallback: assume all covered (don't block export on parse failure)
        return {"name": "claims_coverage", "passed": True,
@@ -279,8 +284,8 @@ def check_sequential_numbering(blocks: list[dict]) -> dict:

    for block in blocks:
        content = block.get("content", "")
-        # Find numbered paragraphs (e.g., "1.", "2.", "15.")
-        numbers = re.findall(r"^(\d+)\.", content, re.MULTILINE)
+        # Find numbered paragraphs: "1." or "**1.**" or "**1.**"
+        numbers = re.findall(r"^(?:\*\*)?(\d+)\.(?:\*\*)?", content, re.MULTILINE)
        all_numbers.extend(int(n) for n in numbers)

    if all_numbers:
--- a/mcp-server/src/legal_mcp/tools/drafting.py
+++ b/mcp-server/src/legal_mcp/tools/drafting.py
@@ -382,6 +382,50 @@ async def export_docx(case_number: str, output_path: str = "") -> str:
        }, ensure_ascii=False, indent=2)


+async def get_block_context(case_number: str, block_id: str, instructions: str = "") -> str:
+    """קבלת הקשר מלא לכתיבת בלוק — ללא קריאה ל-API. Claude Code כותב את הבלוק.
+
+    Args:
+        case_number: מספר תיק הערר
+        block_id: מזהה הבלוק (block-he, block-vav, ..., block-yod-bet)
+        instructions: הנחיות נוספות
+    """
+    from legal_mcp.services import block_writer
+
+    case = await db.get_case_by_number(case_number)
+    if not case:
+        return f"תיק {case_number} לא נמצא."
+
+    case_id = UUID(case["id"])
+    try:
+        ctx = await block_writer.get_block_context(case_id, block_id, instructions)
+        return json.dumps(ctx, default=str, ensure_ascii=False, indent=2)
+    except ValueError as e:
+        return str(e)
+
+
+async def save_block_content(case_number: str, block_id: str, content: str) -> str:
+    """שמירת בלוק שנכתב ע"י Claude Code ב-DB.
+
+    Args:
+        case_number: מספר תיק הערר
+        block_id: מזהה הבלוק
+        content: הטקסט שנכתב
+    """
+    from legal_mcp.services import block_writer
+
+    case = await db.get_case_by_number(case_number)
+    if not case:
+        return f"תיק {case_number} לא נמצא."
+
+    case_id = UUID(case["id"])
+    try:
+        result = await block_writer.save_block_content(case_id, block_id, content)
+        return json.dumps(result, default=str, ensure_ascii=False, indent=2)
+    except ValueError as e:
+        return str(e)
+
+
 async def analyze_style() -> str:
    """הרצת ניתוח סגנון על קורפוס ההחלטות של דפנה. מחלץ דפוסי כתיבה ושומר אותם."""
    from legal_mcp.services.style_analyzer import analyze_corpus
Author	SHA1	Message	Date
Chaim	081c7fb17a	Replace Haiku with Sonnet in classifier for better accuracy classify_document and identify_parties both used Haiku, which produced parsing failures and 0% confidence on Beit HaKerem documents. Sonnet handles Hebrew legal documents more reliably. No more Haiku usage in the entire codebase. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-04 07:47:12 +00:00
Chaim	586f1db402	QA claims check: Haiku→Sonnet + filter appellant claims only Two fixes for claims_coverage false negatives (55% → expected ~85%+): 1. Model upgrade: Haiku → Sonnet for semantic matching. Haiku missed obvious matches (e.g., paragraph about "כריתת עצים" not matching claim about tree cutting). Sonnet understands context better. 2. Filter: only check appellant/respondent claims, not committee or permit_applicant claims. Committee claims are defensive positions ("the application complies with the plan") — they don't need to be "addressed" in the discussion section. 3. Send full discussion text (was truncated to 12K chars). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-04 07:37:23 +00:00
Chaim	9d0a73a1dc	Add context-only mode: Claude Code writes blocks, no API needed New architecture: MCP provides context, Claude Code writes. New functions: - get_block_context(case_id, block_id) → returns full context package (prompt, source docs, claims, direction, precedents, style guide) WITHOUT calling Anthropic API - save_block_content(case_id, block_id, content) → saves block to DB New MCP tools: get_block_context, save_block_content The old write_block (API-based) still works as fallback. The new flow uses Claude Code's own model (Opus 4.6, 1M context) which has no separate API billing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-03 16:18:25 +00:00
Chaim	7033d2d3ee	Embed full style guide in block prompts for Dafna's voice _build_style_context rewritten from 10-line summary to comprehensive style guide including: - Tone rules per appeal type (warm for licensing, cold for levy) - 15 mandatory expressions ("כידוע", "ברי כי", "אין בידנו לקבל") - Discussion structure rules (continuous prose, conclusion first) - Per-party phrasing templates (appellants, committee, permit applicants) - DB patterns grouped by type (phrases, transitions, openings, closings) This addresses the main quality gap: style rated 2/5 because the output was "dry and overly formal" vs Dafna's "direct and clear" voice. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-03 16:12:09 +00:00
Chaim	e725f9ecd7	Fix claims parsing: truncated JSON recovery + chunking + compact output config.py parse_llm_json: Added truncated JSON recovery. When Claude's output is cut mid-JSON (common with long claim lists), the parser now: - Finds the last complete JSON item (closing "}") - Closes the array/object brackets - Returns partial but valid results instead of None Tested: recovers 2/3 items from truncated array, all cases pass. claims_extractor.py: - Prompt asks for compact output (150 words max per claim, group similar) - Explicitly requests "no markdown, no explanations, JSON only" - Long documents split into chunks at paragraph boundaries - Each chunk processed separately, results merged - max_tokens already at 8192 This fixes the recurring "0 claims" bug for committee responses and permit applicant responses where the JSON was getting truncated. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-03 16:04:34 +00:00
Chaim	7d1dc73112	Fix max_tokens to 16K for Opus (API limit is 32K, need room for thinking) block-yod max_tokens reduced from 32K to 16K — the API returned "max_tokens: 32768 > 32000" error. With thinking enabled, the actual limit for output is lower. 16K is sufficient for discussion blocks. Also: extractor.py now supports .md files (was missing, blocked Beit HaKerem upload). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-03 16:00:49 +00:00
Chaim	e24e24dac5	Maximize context and output per Anthropic best practices Per official Anthropic documentation (April 2026): Output tokens increased to match model capabilities: - block-yod (discussion): 8K → 32K (Opus supports 128K) - block-zayin (claims): 4K → 16K - block-vav (background): 4K → 16K - claims_extractor: 4K → 8K (fixes truncated JSON) - qa_validator: 4K → 8K Source documents sent in full (not truncated): - Was: 3000 chars per doc, 15K total - Now: full document text, no truncation - Reduces hallucinations: "extract word-for-word quotes first" Prompt structure follows long-context tips: - Source documents placed FIRST (top of prompt) - Instructions and query placed LAST - "Queries at the end improve quality by up to 30%" Extended thinking uses adaptive mode for Opus 4.6. Streaming enabled for all requests > 21K tokens. Unified JSON parsing via parse_llm_json() helper in config.py. Applied to: classifier, claims_extractor, brainstorm, qa_validator, learning_loop (5 files). Also: extractor.py now supports .md files. Sources: - https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking - https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/long-context-tips - https://docs.anthropic.com/en/docs/minimizing-hallucinations - https://docs.anthropic.com/en/docs/about-claude/models/overview Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-03 14:17:43 +00:00
Chaim	bed9d5c7e9	Improve block-zayin: synthesize claims by topic + fix markdown JSON parsing block_writer: Rewrote block-zayin prompt to require synthesis by topic instead of listing each claim separately. Now produces 3 organized sections (appellants 8, committee 6, permit applicants 3+) instead of 40 scattered paragraphs. Target: 800-1500 words. claims_extractor: Fix markdown code block stripping (same bug as qa_validator had). Enables parsing claims from Claude responses wrapped in ```json blocks. Tested on Hecht: block-zayin from 40 paragraphs/1049 words to 17 organized paragraphs/1039 words. Structure now matches Dafna's original (3 parties, grouped by topic). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-03 12:54:42 +00:00
Chaim	e438740ab4	Add renumber_all_blocks + fix sequential_numbering check for bold format block_writer: new renumber_all_blocks() function that renumbers all paragraphs across all blocks sequentially (1, 2, 3...). Handles both plain "N." and bold "N." formats. Added missing 'import re'. qa_validator: sequential_numbering check now matches bold-formatted numbers (N.) in addition to plain (N.). Tested on Hecht: renumbered 115 paragraphs across 7 blocks, QA 6/6. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-03 12:30:31 +00:00
Chaim	7781987c3a	Fix precedents search + auto-update case parties block_writer: _build_precedents_context now searches both paragraph_embeddings (other decisions by Dafna) and case_law_embeddings (precedent case law). Previously only searched document_chunks which had no cross-case data. Now returns ~2400 chars from 3 other decisions. processor: Step 1.6 auto-updates case appellants/respondents from classifier results when they're empty. Prevents blank party fields. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-04-03 11:59:33 +00:00