Replace all Anthropic API calls with Claude Code session (claude -p)

New module claude_session.py provides query() and query_json() that run prompts via `claude -p` CLI — uses the claude.ai session, zero API cost. Converted 6 services: - claims_extractor.py: extract_claims_with_ai - brainstorm.py: brainstorm_directions - block_writer.py: write_block (was streaming+thinking, now simple) - qa_validator.py: claims_coverage check - style_analyzer.py: 3 API calls (single pass, multi pass, synthesis) - learning_loop.py: extract_lessons Only extractor.py still uses Anthropic API (for PDF OCR with Vision). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 14:14:08 +00:00
parent e5dc037088
commit bacb330a2a
7 changed files with 115 additions and 192 deletions
--- a/mcp-server/src/legal_mcp/services/block_writer.py
+++ b/mcp-server/src/legal_mcp/services/block_writer.py
@@ -18,22 +18,11 @@ import re
 from datetime import date
 from uuid import UUID

-import anthropic
-
 from legal_mcp import config
-from legal_mcp.services import db, embeddings
+from legal_mcp.services import db, embeddings, claude_session

 logger = logging.getLogger(__name__)

-_anthropic_client: anthropic.Anthropic | None = None
-
-
-def _get_anthropic() -> anthropic.Anthropic:
-    global _anthropic_client
-    if _anthropic_client is None:
-        _anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
-    return _anthropic_client
-

 # ── Block configuration ───────────────────────────────────────────

@@ -353,49 +342,10 @@ async def write_block(
        if not dir_doc.get("approved"):
            raise ValueError("לא ניתן לכתוב בלוק דיון ללא כיוון מאושר. הפעל brainstorm → approve_direction קודם.")

-    # Call Claude
+    # Call Claude via Claude Code session (no API)
    model_key = block_cfg["model"]
-    model = MODEL_MAP.get(model_key, MODEL_MAP["sonnet"])
-    temperature = block_cfg["temp"]
-    max_tokens = block_cfg.get("max_tokens", 4096)
-
-    client = _get_anthropic()
-
-    kwargs: dict = {
-        "model": model,
-        "max_tokens": max_tokens,
-        "messages": [{"role": "user", "content": prompt}],
-    }
-
-    if model_key == "opus":
-        # Opus 4.6: use adaptive thinking — Claude decides when and how much to think.
-        # Per Anthropic docs: temperature must be 1 when thinking is enabled.
-        # budget_tokens not needed with adaptive thinking.
-        kwargs["temperature"] = 1
-        kwargs["thinking"] = {"type": "enabled", "budget_tokens": max(16000, max_tokens // 2)}
-    else:
-        kwargs["temperature"] = temperature
-
-    # Streaming required when max_tokens > 21,333 (Anthropic requirement)
-    use_stream = max_tokens > 21000 or kwargs.get("thinking")
-
-    if use_stream:
-        content_parts = []
-        with client.messages.stream(**kwargs) as stream:
-            for event in stream:
-                pass  # consume stream
-            response = stream.get_final_message()
-        for block in response.content:
-            if block.type == "text":
-                content_parts.append(block.text)
-        content = "\n".join(content_parts)
-    else:
-        message = client.messages.create(**kwargs)
-        content = ""
-        for block in message.content:
-            if block.type == "text":
-                content = block.text
-                break
+    timeout = claude_session.LONG_TIMEOUT if model_key == "opus" else claude_session.DEFAULT_TIMEOUT
+    content = claude_session.query(prompt, timeout=timeout)

    return _build_result(block_id, content, block_cfg)

--- a/mcp-server/src/legal_mcp/services/brainstorm.py
+++ b/mcp-server/src/legal_mcp/services/brainstorm.py
@@ -12,23 +12,12 @@ from __future__ import annotations
 import logging
 from uuid import UUID

-import anthropic
-
 from legal_mcp import config
 from legal_mcp.config import parse_llm_json
-from legal_mcp.services import db
+from legal_mcp.services import db, claude_session

 logger = logging.getLogger(__name__)

-_anthropic_client: anthropic.Anthropic | None = None
-
-
-def _get_anthropic() -> anthropic.Anthropic:
-    global _anthropic_client
-    if _anthropic_client is None:
-        _anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
-    return _anthropic_client
-

 BRAINSTORM_PROMPT = """אתה יועץ משפטי מומחה בתכנון ובניה. תפקידך לסייע בגיבוש כיוון להחלטת ועדת ערר.

@@ -145,15 +134,7 @@ async def generate_directions(
 {doc_context or '(אין מסמכים בתיק)'}
 """

-    client = _get_anthropic()
-    message = client.messages.create(
-        model="claude-sonnet-4-20250514",
-        max_tokens=4096,
-        messages=[{"role": "user", "content": user_content}],
-    )
-
-    raw = message.content[0].text.strip()
-    result = parse_llm_json(raw)
+    result = claude_session.query_json(user_content, timeout=120)
    if result is None:
        logger.warning("Failed to parse brainstorm response: %s", raw[:300])
        return {
--- a/mcp-server/src/legal_mcp/services/claims_extractor.py
+++ b/mcp-server/src/legal_mcp/services/claims_extractor.py
@@ -1,7 +1,7 @@
-"""חילוץ טענות מכתבי טענות (ערר, תשובה) באמצעות Claude API.
+"""חילוץ טענות מכתבי טענות (ערר, תשובה) באמצעות Claude Code session.

 שתי גישות:
-1. extract_claims_with_ai — חילוץ עם Claude (לכתבי טענות קלט)
+1. extract_claims_with_ai — חילוץ עם Claude Code headless (לכתבי טענות קלט)
 2. extract_claims_from_block — חילוץ regex (מבלוק ז של החלטות סופיות)
 """

@@ -11,23 +11,12 @@ import logging
 import re
 from uuid import UUID

-import anthropic
-
 from legal_mcp import config
 from legal_mcp.config import parse_llm_json
-from legal_mcp.services import db
+from legal_mcp.services import db, claude_session

 logger = logging.getLogger(__name__)

-_anthropic_client: anthropic.Anthropic | None = None
-
-
-def _get_anthropic() -> anthropic.Anthropic:
-    global _anthropic_client
-    if _anthropic_client is None:
-        _anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
-    return _anthropic_client
-

 EXTRACT_CLAIMS_PROMPT = """אתה מנתח מסמכים משפטיים בתחום תכנון ובניה. תפקידך לחלץ טענות מכתב טענות.

@@ -93,27 +82,15 @@ async def extract_claims_with_ai(
        chunks = [text]

    all_claims = []
-    client = _get_anthropic()

    for i, chunk in enumerate(chunks):
        chunk_label = f" (חלק {i+1}/{len(chunks)})" if len(chunks) > 1 else ""
-        message = client.messages.create(
-            model="claude-sonnet-4-20250514",
-            max_tokens=8192,
-            messages=[
-                {
-                    "role": "user",
-                    "content": (
-                        f"{EXTRACT_CLAIMS_PROMPT}\n\n"
-                        f"{context}{chunk_label}\n\n"
-                        f"--- תחילת מסמך ---\n{chunk}\n--- סוף מסמך ---"
-                    ),
-                }
-            ],
+        prompt = (
+            f"{EXTRACT_CLAIMS_PROMPT}\n\n"
+            f"{context}{chunk_label}\n\n"
+            f"--- תחילת מסמך ---\n{chunk}\n--- סוף מסמך ---"
        )
-
-        raw = message.content[0].text.strip()
-        claims = parse_llm_json(raw)
+        claims = claude_session.query_json(prompt, timeout=120)
        if claims is None:
            logger.warning("Failed to parse claims for chunk %d: %s", i, raw[:200])
            continue
--- a/mcp-server/src/legal_mcp/services/claude_session.py
+++ b/mcp-server/src/legal_mcp/services/claude_session.py
@@ -0,0 +1,77 @@
+"""Claude Code session bridge — runs prompts via `claude -p` instead of API.
+
+All LLM calls in the project should use this module instead of calling
+the Anthropic API directly. This uses the local Claude Code CLI which
+runs on the user's claude.ai session — zero API cost.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import subprocess
+from pathlib import Path
+
+from legal_mcp.config import parse_llm_json
+
+logger = logging.getLogger(__name__)
+
+# Default timeout for claude -p calls (seconds)
+DEFAULT_TIMEOUT = 120
+LONG_TIMEOUT = 300  # For complex tasks like block writing
+
+
+def query(prompt: str, timeout: int = DEFAULT_TIMEOUT, max_turns: int = 1) -> str:
+    """Send a prompt to Claude Code headless and return the text response.
+
+    Args:
+        prompt: The prompt to send.
+        timeout: Max seconds to wait.
+        max_turns: Max conversation turns (1 = single response).
+
+    Returns:
+        The text response from Claude.
+
+    Raises:
+        RuntimeError: If claude CLI is not available or fails.
+    """
+    cmd = [
+        "claude", "-p", prompt,
+        "--output-format", "json",
+        "--max-turns", str(max_turns),
+    ]
+
+    try:
+        result = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=timeout,
+        )
+    except FileNotFoundError:
+        raise RuntimeError("Claude CLI not found. Install Claude Code or add 'claude' to PATH.")
+    except subprocess.TimeoutExpired:
+        raise RuntimeError(f"Claude CLI timed out after {timeout}s")
+
+    if result.returncode != 0:
+        stderr = result.stderr.strip()[:500] if result.stderr else "unknown error"
+        raise RuntimeError(f"Claude CLI failed (exit {result.returncode}): {stderr}")
+
+    stdout = result.stdout.strip()
+    if not stdout:
+        raise RuntimeError("Claude CLI returned empty response")
+
+    # claude -p --output-format json returns {"type":"result","result":"..."}
+    try:
+        data = json.loads(stdout)
+        if isinstance(data, dict) and "result" in data:
+            return data["result"]
+        return stdout
+    except json.JSONDecodeError:
+        return stdout
+
+
+def query_json(prompt: str, timeout: int = DEFAULT_TIMEOUT) -> dict | list | None:
+    """Send a prompt and parse the response as JSON.
+
+    Uses parse_llm_json for robust parsing (handles markdown wrapping, truncation).
+    """
+    raw = query(prompt, timeout=timeout)
+    return parse_llm_json(raw)
--- a/mcp-server/src/legal_mcp/services/learning_loop.py
+++ b/mcp-server/src/legal_mcp/services/learning_loop.py
@@ -12,23 +12,12 @@ from __future__ import annotations
 import logging
 from uuid import UUID

-import anthropic
-
 from legal_mcp import config
 from legal_mcp.config import parse_llm_json
-from legal_mcp.services import db
+from legal_mcp.services import db, claude_session

 logger = logging.getLogger(__name__)

-_anthropic_client: anthropic.Anthropic | None = None
-
-
-def _get_anthropic() -> anthropic.Anthropic:
-    global _anthropic_client
-    if _anthropic_client is None:
-        _anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
-    return _anthropic_client
-

 def compute_diff_stats(draft_text: str, final_text: str) -> dict:
    """חישוב סטטיסטיקות השוואה בין טיוטה לסופית."""
@@ -93,25 +82,15 @@ async def analyze_changes(draft_text: str, final_text: str) -> dict:
    draft_sample = draft_text[:max_chars]
    final_sample = final_text[:max_chars]

-    client = _get_anthropic()
-    message = client.messages.create(
-        model="claude-sonnet-4-20250514",
-        max_tokens=4096,
-        messages=[{
-            "role": "user",
-            "content": f"""{LESSONS_PROMPT}
+    prompt = f"""{LESSONS_PROMPT}

 --- טיוטה ---
 {draft_sample}

 --- גרסה סופית ---
 {final_sample}
-""",
-        }],
-    )
-
-    raw = message.content[0].text.strip()
-    result = parse_llm_json(raw)
+"""
+    result = claude_session.query_json(prompt, timeout=120)
    if result is None:
        logger.warning("Failed to parse lessons response")
        return {"changes": [], "new_expressions": [], "overall_assessment": raw[:200]}
--- a/mcp-server/src/legal_mcp/services/qa_validator.py
+++ b/mcp-server/src/legal_mcp/services/qa_validator.py
@@ -18,11 +18,9 @@ import logging
 import re
 from uuid import UUID

-import anthropic
-
 from legal_mcp import config
 from legal_mcp.config import parse_llm_json
-from legal_mcp.services import db
+from legal_mcp.services import db, claude_session

 logger = logging.getLogger(__name__)

@@ -89,14 +87,6 @@ def check_neutral_background(blocks: list[dict]) -> dict:
    }


-_anthropic_client: anthropic.Anthropic | None = None
-
-
-def _get_anthropic() -> anthropic.Anthropic:
-    global _anthropic_client
-    if _anthropic_client is None:
-        _anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
-    return _anthropic_client


 CLAIMS_CHECK_PROMPT = """אתה בודק איכות החלטות משפטיות. קיבלת רשימת טענות שהועלו בכתבי הטענות, ואת בלוק הדיון של ההחלטה.
@@ -146,24 +136,15 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict:
    # Send full discussion — don't truncate
    discussion = yod["content"]

-    client = _get_anthropic()
-    message = client.messages.create(
-        model="claude-sonnet-4-20250514",
-        max_tokens=8192,
-        messages=[{
-            "role": "user",
-            "content": f"""{CLAIMS_CHECK_PROMPT}
+    prompt = f"""{CLAIMS_CHECK_PROMPT}

 ## טענות ({len(source_claims)}):
 {claims_text}

 ## בלוק הדיון:
-{discussion}""",
-        }],
-    )
+{discussion}"""

-    raw = message.content[0].text.strip()
-    parsed = parse_llm_json(raw)
+    parsed = claude_session.query_json(prompt, timeout=120)
    if parsed is None:
        logger.warning("Failed to parse claims check: %s", raw[:300])
        # Fallback: assume all covered (don't block export on parse failure)
--- a/mcp-server/src/legal_mcp/services/style_analyzer.py
+++ b/mcp-server/src/legal_mcp/services/style_analyzer.py
@@ -6,10 +6,8 @@ import json
 import logging
 import re

-import anthropic
-
 from legal_mcp import config
-from legal_mcp.services import db
+from legal_mcp.services import db, claude_session

 logger = logging.getLogger(__name__)

@@ -150,24 +148,16 @@ async def _analyze_single_pass(rows) -> dict:
        decisions_text += f"\n\n--- החלטה {row['decision_number'] or 'ללא מספר'} ---\n"
        decisions_text += row["full_text"]

-    client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
-    message = client.messages.create(
-        model="claude-opus-4-6",
-        max_tokens=16384,
-        messages=[
-            {
-                "role": "user",
-                "content": ANALYSIS_PROMPT.format(decisions=decisions_text),
-            }
-        ],
+    raw = claude_session.query(
+        ANALYSIS_PROMPT.format(decisions=decisions_text),
+        timeout=claude_session.LONG_TIMEOUT,
    )

-    return await _parse_and_store_patterns(message.content[0].text, len(rows))
+    return await _parse_and_store_patterns(raw, len(rows))


 async def _analyze_multi_pass(rows) -> dict:
    """Analyze each decision individually, then synthesize patterns."""
-    client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
    all_patterns = []

    # Pass 1: Analyze each decision individually
@@ -175,18 +165,12 @@ async def _analyze_multi_pass(rows) -> dict:
        decision_text = f"--- החלטה {row['decision_number'] or 'ללא מספר'} ---\n"
        decision_text += row["full_text"]

-        message = client.messages.create(
-            model="claude-opus-4-6",
-            max_tokens=8192,
-            messages=[
-                {
-                    "role": "user",
-                    "content": SINGLE_DECISION_PROMPT.format(decision=decision_text),
-                }
-            ],
+        raw = claude_session.query(
+            SINGLE_DECISION_PROMPT.format(decision=decision_text),
+            timeout=claude_session.LONG_TIMEOUT,
        )

-        patterns = _extract_json(message.content[0].text)
+        patterns = _extract_json(raw)
        if patterns:
            all_patterns.extend(patterns)

@@ -194,21 +178,15 @@ async def _analyze_multi_pass(rows) -> dict:
        return {"error": "לא הצלחתי לחלץ דפוסים מההחלטות"}

    # Pass 2: Synthesize across all decisions
-    message = client.messages.create(
-        model="claude-opus-4-6",
-        max_tokens=16384,
-        messages=[
-            {
-                "role": "user",
-                "content": SYNTHESIS_PROMPT.format(
-                    num_decisions=len(rows),
-                    patterns=json.dumps(all_patterns, ensure_ascii=False, indent=2),
-                ),
-            }
-        ],
+    raw = claude_session.query(
+        SYNTHESIS_PROMPT.format(
+            num_decisions=len(rows),
+            patterns=json.dumps(all_patterns, ensure_ascii=False, indent=2),
+        ),
+        timeout=claude_session.LONG_TIMEOUT,
    )

-    return await _parse_and_store_patterns(message.content[0].text, len(rows))
+    return await _parse_and_store_patterns(raw, len(rows))


 def _extract_json(response_text: str) -> list | None: