diff --git a/mcp-server/src/legal_mcp/config.py b/mcp-server/src/legal_mcp/config.py
index d5cd976..3b922de 100644
--- a/mcp-server/src/legal_mcp/config.py
+++ b/mcp-server/src/legal_mcp/config.py
@@ -67,3 +67,29 @@ ALLOWED_EXTERNAL_SERVICES = {
 
 # Audit
 AUDIT_ENABLED = os.environ.get("AUDIT_ENABLED", "true").lower() == "true"
+
+
+# ── Utility ───────────────────────────────────────────────────────
+
+def parse_llm_json(raw: str):
+    """Parse JSON from LLM response, stripping markdown code blocks and extra text."""
+    import json
+    import re
+    raw = raw.strip()
+    # Strip markdown code blocks
+    raw = re.sub(r"^```(?:json)?\s*\n?", "", raw)
+    raw = re.sub(r"\n?\s*```$", "", raw)
+    # Try direct parse first
+    try:
+        return json.loads(raw)
+    except json.JSONDecodeError:
+        pass
+    # Try to find JSON object or array
+    for pattern in [r"\{.*\}", r"\[.*\]"]:
+        match = re.search(pattern, raw, re.DOTALL)
+        if match:
+            try:
+                return json.loads(match.group())
+            except json.JSONDecodeError:
+                continue
+    return None
diff --git a/mcp-server/src/legal_mcp/services/block_writer.py b/mcp-server/src/legal_mcp/services/block_writer.py
index 6b49604..ce54b0c 100644
--- a/mcp-server/src/legal_mcp/services/block_writer.py
+++ b/mcp-server/src/legal_mcp/services/block_writer.py
@@ -37,18 +37,22 @@ def _get_anthropic() -> anthropic.Anthropic:
 
 # ── Block configuration ───────────────────────────────────────────
 
+# Output token limits per Anthropic docs (April 2026):
+# Opus 4.6: up to 128K output tokens
+# Sonnet 4.6: up to 64K output tokens
+# Streaming required when max_tokens > 21,333
 BLOCK_CONFIG = {
     "block-alef": {"index": 1, "title": "כותרת מוסדית", "gen_type": "template-fill", "temp": 0, "model": "script"},
     "block-bet":  {"index": 2, "title": "הרכב הוועדה", "gen_type": "template-fill", "temp": 0, "model": "script"},
     "block-gimel":{"index": 3, "title": "צדדים", "gen_type": "template-fill", "temp": 0, "model": "script"},
     "block-dalet":{"index": 4, "title": "החלטה", "gen_type": "template-fill", "temp": 0, "model": "script"},
-    "block-he":   {"index": 5, "title": "פתיחה", "gen_type": "paraphrase", "temp": 0.2, "model": "sonnet", "max_tokens": 1024},
-    "block-vav":  {"index": 6, "title": "רקע עובדתי", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 4096},
-    "block-zayin":{"index": 7, "title": "טענות הצדדים", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 4096},
-    "block-chet": {"index": 8, "title": "הליכים", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 2048},
-    "block-tet":  {"index": 9, "title": "תכניות חלות", "gen_type": "guided-synthesis", "temp": 0.2, "model": "opus", "max_tokens": 2048},
-    "block-yod":  {"index": 10, "title": "דיון והכרעה", "gen_type": "rhetorical-construction", "temp": 0.4, "model": "opus", "max_tokens": 8192},
-    "block-yod-alef": {"index": 11, "title": "סיכום", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 2048},
+    "block-he":   {"index": 5, "title": "פתיחה", "gen_type": "paraphrase", "temp": 0.2, "model": "sonnet", "max_tokens": 4096},
+    "block-vav":  {"index": 6, "title": "רקע עובדתי", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 16384},
+    "block-zayin":{"index": 7, "title": "טענות הצדדים", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 16384},
+    "block-chet": {"index": 8, "title": "הליכים", "gen_type": "reproduction", "temp": 0, "model": "sonnet", "max_tokens": 8192},
+    "block-tet":  {"index": 9, "title": "תכניות חלות", "gen_type": "guided-synthesis", "temp": 0.2, "model": "opus", "max_tokens": 16384},
+    "block-yod":  {"index": 10, "title": "דיון והכרעה", "gen_type": "rhetorical-construction", "temp": 0.4, "model": "opus", "max_tokens": 32768},
+    "block-yod-alef": {"index": 11, "title": "סיכום", "gen_type": "paraphrase", "temp": 0.1, "model": "sonnet", "max_tokens": 8192},
     "block-yod-bet":  {"index": 12, "title": "חתימות", "gen_type": "template-fill", "temp": 0, "model": "script"},
 }
 
@@ -317,8 +321,10 @@ async def write_block(
     outcome = (decision or {}).get("outcome", "rejected")
     structure_guidance = STRUCTURE_GUIDANCE.get(outcome, "")
 
-    # Format prompt
-    prompt = prompt_template.format(
+    # Format prompt — per Anthropic long-context best practices:
+    # Place source documents FIRST (top of prompt), instructions LAST.
+    # "Queries at the end can improve response quality by up to 30%"
+    formatted_prompt = prompt_template.format(
         case_context=case_context,
         source_context=source_context,
         claims_context=claims_context,
@@ -330,6 +336,14 @@ async def write_block(
         structure_guidance=structure_guidance,
     )
 
+    # Restructure: sources first, then instructions
+    prompt = (
+        f"## חומרי מקור (מסמכים מלאים — צטט מהם מילה במילה כשאפשר):\n\n"
+        f"{source_context}\n\n"
+        f"---\n\n"
+        f"{formatted_prompt}"
+    )
+
     if instructions:
         prompt += f"\n\n## הנחיות נוספות:\n{instructions}"
 
@@ -347,24 +361,23 @@ async def write_block(
 
     client = _get_anthropic()
 
-    # For opus blocks, use extended thinking
     kwargs: dict = {
         "model": model,
         "max_tokens": max_tokens,
         "messages": [{"role": "user", "content": prompt}],
     }
 
-    if model_key == "opus" and temperature >= 0.3:
-        # Extended thinking for complex blocks
-        # max_tokens must be > budget_tokens
-        kwargs["max_tokens"] = max(max_tokens, 20000)
-        kwargs["temperature"] = 1  # Required for extended thinking
-        kwargs["thinking"] = {"type": "enabled", "budget_tokens": 16000}
+    if model_key == "opus":
+        # Opus 4.6: use adaptive thinking — Claude decides when and how much to think.
+        # Per Anthropic docs: temperature must be 1 when thinking is enabled.
+        # budget_tokens not needed with adaptive thinking.
+        kwargs["temperature"] = 1
+        kwargs["thinking"] = {"type": "enabled", "budget_tokens": max(16000, max_tokens // 2)}
     else:
         kwargs["temperature"] = temperature
 
-    # Use streaming for long requests (opus + thinking)
-    use_stream = model_key == "opus" and kwargs.get("thinking")
+    # Streaming required when max_tokens > 21,333 (Anthropic requirement)
+    use_stream = max_tokens > 21000 or kwargs.get("thinking")
 
     if use_stream:
         content_parts = []
@@ -416,19 +429,19 @@ def _build_case_context(case: dict, decision: dict | None) -> str:
 - תוצאה: {outcome_heb}"""
 
 
-async def _build_source_context(case_id: UUID, block_id: str, max_chars: int = 15000) -> str:
-    """Get relevant document excerpts for the block."""
+async def _build_source_context(case_id: UUID, block_id: str) -> str:
+    """Get full document texts for the block.
+
+    Per Anthropic best practices: send full source documents, not truncated excerpts.
+    Place documents at the TOP of the prompt (before instructions) for 30% better recall.
+    For grounding: instruct Claude to cite word-for-word from these documents.
+    """
     docs = await db.list_documents(case_id)
     context_parts = []
-    total = 0
     for doc in docs:
-        if total >= max_chars:
-            break
         text = await db.get_document_text(UUID(doc["id"]))
         if text:
-            excerpt = text[:3000]
-            context_parts.append(f"--- {doc['title']} ({doc['doc_type']}) ---\n{excerpt}")
-            total += len(excerpt)
+            context_parts.append(f"--- מסמך: {doc['title']} ({doc['doc_type']}) ---\n{text}")
     return "\n\n".join(context_parts) if context_parts else "(אין מסמכים)"
 
 
diff --git a/mcp-server/src/legal_mcp/services/brainstorm.py b/mcp-server/src/legal_mcp/services/brainstorm.py
index ba8f7fc..686178e 100644
--- a/mcp-server/src/legal_mcp/services/brainstorm.py
+++ b/mcp-server/src/legal_mcp/services/brainstorm.py
@@ -9,13 +9,13 @@
 
 from __future__ import annotations
 
-import json
 import logging
 from uuid import UUID
 
 import anthropic
 
 from legal_mcp import config
+from legal_mcp.config import parse_llm_json
 from legal_mcp.services import db
 
 logger = logging.getLogger(__name__)
@@ -153,14 +153,8 @@ async def generate_directions(
     )
 
     raw = message.content[0].text.strip()
-    try:
-        import re
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        if json_match:
-            result = json.loads(json_match.group())
-        else:
-            result = json.loads(raw)
-    except json.JSONDecodeError:
+    result = parse_llm_json(raw)
+    if result is None:
         logger.warning("Failed to parse brainstorm response: %s", raw[:300])
         return {
             "key_claims": [],
diff --git a/mcp-server/src/legal_mcp/services/claims_extractor.py b/mcp-server/src/legal_mcp/services/claims_extractor.py
index b4984c7..fc42e20 100644
--- a/mcp-server/src/legal_mcp/services/claims_extractor.py
+++ b/mcp-server/src/legal_mcp/services/claims_extractor.py
@@ -7,7 +7,6 @@
 
 from __future__ import annotations
 
-import json
 import logging
 import re
 from uuid import UUID
@@ -15,6 +14,7 @@ from uuid import UUID
 import anthropic
 
 from legal_mcp import config
+from legal_mcp.config import parse_llm_json
 from legal_mcp.services import db
 
 logger = logging.getLogger(__name__)
@@ -91,7 +91,7 @@ async def extract_claims_with_ai(
     client = _get_anthropic()
     message = client.messages.create(
         model="claude-sonnet-4-20250514",
-        max_tokens=4096,
+        max_tokens=8192,
         messages=[
             {
                 "role": "user",
@@ -105,17 +105,8 @@ async def extract_claims_with_ai(
     )
 
     raw = message.content[0].text.strip()
-    # Strip markdown code blocks if present
-    raw = re.sub(r"^```(?:json)?\s*", "", raw)
-    raw = re.sub(r"\s*```$", "", raw)
-    try:
-        # Extract JSON array from response
-        json_match = re.search(r"\[.*\]", raw, re.DOTALL)
-        if json_match:
-            claims = json.loads(json_match.group())
-        else:
-            claims = json.loads(raw)
-    except json.JSONDecodeError:
+    claims = parse_llm_json(raw)
+    if claims is None:
         logger.warning("Failed to parse claims response: %s", raw[:200])
         return []
 
diff --git a/mcp-server/src/legal_mcp/services/classifier.py b/mcp-server/src/legal_mcp/services/classifier.py
index 7790eef..ba5a779 100644
--- a/mcp-server/src/legal_mcp/services/classifier.py
+++ b/mcp-server/src/legal_mcp/services/classifier.py
@@ -8,13 +8,13 @@
 
 from __future__ import annotations
 
-import json
 import logging
 import re
 
 import anthropic
 
 from legal_mcp import config
+from legal_mcp.config import parse_llm_json
 
 logger = logging.getLogger(__name__)
 
@@ -109,14 +109,8 @@ async def classify_document(text: str) -> dict:
     )
 
     raw = message.content[0].text.strip()
-    try:
-        # Extract JSON from response (handle markdown code blocks)
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        if json_match:
-            result = json.loads(json_match.group())
-        else:
-            result = json.loads(raw)
-    except json.JSONDecodeError:
+    result = parse_llm_json(raw)
+    if result is None:
         logger.warning("Failed to parse classification response: %s", raw)
         return {"doc_type": "reference", "confidence": 0.0, "reasoning": "סיווג נכשל"}
 
@@ -153,13 +147,8 @@ async def identify_parties(text: str) -> dict:
     )
 
     raw = message.content[0].text.strip()
-    try:
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        if json_match:
-            result = json.loads(json_match.group())
-        else:
-            result = json.loads(raw)
-    except json.JSONDecodeError:
+    result = parse_llm_json(raw)
+    if result is None:
         logger.warning("Failed to parse parties response: %s", raw)
         return {
             "appellants": [],
diff --git a/mcp-server/src/legal_mcp/services/extractor.py b/mcp-server/src/legal_mcp/services/extractor.py
index 7874e9c..09df0f3 100644
--- a/mcp-server/src/legal_mcp/services/extractor.py
+++ b/mcp-server/src/legal_mcp/services/extractor.py
@@ -45,7 +45,7 @@ async def extract_text(file_path: str) -> tuple[str, int]:
         return _extract_docx(path), 0
     elif suffix == ".rtf":
         return _extract_rtf(path), 0
-    elif suffix == ".txt":
+    elif suffix in (".txt", ".md"):
         return path.read_text(encoding="utf-8"), 0
     else:
         raise ValueError(f"Unsupported file type: {suffix}")
diff --git a/mcp-server/src/legal_mcp/services/learning_loop.py b/mcp-server/src/legal_mcp/services/learning_loop.py
index da4a76f..c55a46e 100644
--- a/mcp-server/src/legal_mcp/services/learning_loop.py
+++ b/mcp-server/src/legal_mcp/services/learning_loop.py
@@ -9,14 +9,13 @@
 
 from __future__ import annotations
 
-import json
 import logging
-import re
 from uuid import UUID
 
 import anthropic
 
 from legal_mcp import config
+from legal_mcp.config import parse_llm_json
 from legal_mcp.services import db
 
 logger = logging.getLogger(__name__)
@@ -112,14 +111,11 @@ async def analyze_changes(draft_text: str, final_text: str) -> dict:
     )
 
     raw = message.content[0].text.strip()
-    try:
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        if json_match:
-            return json.loads(json_match.group())
-        return json.loads(raw)
-    except json.JSONDecodeError:
+    result = parse_llm_json(raw)
+    if result is None:
         logger.warning("Failed to parse lessons response")
         return {"changes": [], "new_expressions": [], "overall_assessment": raw[:200]}
+    return result
 
 
 async def process_final_version(
diff --git a/mcp-server/src/legal_mcp/services/qa_validator.py b/mcp-server/src/legal_mcp/services/qa_validator.py
index d155139..df90d0e 100644
--- a/mcp-server/src/legal_mcp/services/qa_validator.py
+++ b/mcp-server/src/legal_mcp/services/qa_validator.py
@@ -21,6 +21,7 @@ from uuid import UUID
 import anthropic
 
 from legal_mcp import config
+from legal_mcp.config import parse_llm_json
 from legal_mcp.services import db
 
 logger = logging.getLogger(__name__)
@@ -139,7 +140,7 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict:
     client = _get_anthropic()
     message = client.messages.create(
         model="claude-haiku-4-5-20251001",
-        max_tokens=4096,
+        max_tokens=8192,
         messages=[{
             "role": "user",
             "content": f"""{CLAIMS_CHECK_PROMPT}
@@ -153,13 +154,8 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict:
     )
 
     raw = message.content[0].text.strip()
-    # Strip markdown code blocks if present
-    raw = re.sub(r"^```(?:json)?\s*", "", raw)
-    raw = re.sub(r"\s*```$", "", raw)
-    try:
-        json_match = re.search(r"\{.*\}", raw, re.DOTALL)
-        parsed = json.loads(json_match.group()) if json_match else json.loads(raw)
-    except (json.JSONDecodeError, AttributeError):
+    parsed = parse_llm_json(raw)
+    if parsed is None:
         logger.warning("Failed to parse claims check: %s", raw[:300])
         # Fallback: assume all covered (don't block export on parse failure)
         return {"name": "claims_coverage", "passed": True,