Replace all Anthropic API calls with Claude Code session (claude -p)

New module claude_session.py provides query() and query_json() that run prompts via `claude -p` CLI — uses the claude.ai session, zero API cost. Converted 6 services: - claims_extractor.py: extract_claims_with_ai - brainstorm.py: brainstorm_directions - block_writer.py: write_block (was streaming+thinking, now simple) - qa_validator.py: claims_coverage check - style_analyzer.py: 3 API calls (single pass, multi pass, synthesis) - learning_loop.py: extract_lessons Only extractor.py still uses Anthropic API (for PDF OCR with Vision). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 14:14:08 +00:00
parent e5dc037088
commit bacb330a2a
7 changed files with 115 additions and 192 deletions
--- a/mcp-server/src/legal_mcp/services/block_writer.py
+++ b/mcp-server/src/legal_mcp/services/block_writer.py
@@ -18,22 +18,11 @@ import re
 from datetime import date
 from uuid import UUID

-import anthropic
-
 from legal_mcp import config
-from legal_mcp.services import db, embeddings
+from legal_mcp.services import db, embeddings, claude_session

 logger = logging.getLogger(__name__)

-_anthropic_client: anthropic.Anthropic | None = None
-
-
-def _get_anthropic() -> anthropic.Anthropic:
-    global _anthropic_client
-    if _anthropic_client is None:
-        _anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
-    return _anthropic_client
-

 # ── Block configuration ───────────────────────────────────────────

@@ -353,49 +342,10 @@ async def write_block(
        if not dir_doc.get("approved"):
            raise ValueError("לא ניתן לכתוב בלוק דיון ללא כיוון מאושר. הפעל brainstorm → approve_direction קודם.")

-    # Call Claude
+    # Call Claude via Claude Code session (no API)
    model_key = block_cfg["model"]
-    model = MODEL_MAP.get(model_key, MODEL_MAP["sonnet"])
-    temperature = block_cfg["temp"]
-    max_tokens = block_cfg.get("max_tokens", 4096)
-
-    client = _get_anthropic()
-
-    kwargs: dict = {
-        "model": model,
-        "max_tokens": max_tokens,
-        "messages": [{"role": "user", "content": prompt}],
-    }
-
-    if model_key == "opus":
-        # Opus 4.6: use adaptive thinking — Claude decides when and how much to think.
-        # Per Anthropic docs: temperature must be 1 when thinking is enabled.
-        # budget_tokens not needed with adaptive thinking.
-        kwargs["temperature"] = 1
-        kwargs["thinking"] = {"type": "enabled", "budget_tokens": max(16000, max_tokens // 2)}
-    else:
-        kwargs["temperature"] = temperature
-
-    # Streaming required when max_tokens > 21,333 (Anthropic requirement)
-    use_stream = max_tokens > 21000 or kwargs.get("thinking")
-
-    if use_stream:
-        content_parts = []
-        with client.messages.stream(**kwargs) as stream:
-            for event in stream:
-                pass  # consume stream
-            response = stream.get_final_message()
-        for block in response.content:
-            if block.type == "text":
-                content_parts.append(block.text)
-        content = "\n".join(content_parts)
-    else:
-        message = client.messages.create(**kwargs)
-        content = ""
-        for block in message.content:
-            if block.type == "text":
-                content = block.text
-                break
+    timeout = claude_session.LONG_TIMEOUT if model_key == "opus" else claude_session.DEFAULT_TIMEOUT
+    content = claude_session.query(prompt, timeout=timeout)

    return _build_result(block_id, content, block_cfg)