Replace all Anthropic API calls with Claude Code session (claude -p)

New module claude_session.py provides query() and query_json() that run prompts via `claude -p` CLI — uses the claude.ai session, zero API cost. Converted 6 services: - claims_extractor.py: extract_claims_with_ai - brainstorm.py: brainstorm_directions - block_writer.py: write_block (was streaming+thinking, now simple) - qa_validator.py: claims_coverage check - style_analyzer.py: 3 API calls (single pass, multi pass, synthesis) - learning_loop.py: extract_lessons Only extractor.py still uses Anthropic API (for PDF OCR with Vision). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 14:14:08 +00:00
parent e5dc037088
commit bacb330a2a
7 changed files with 115 additions and 192 deletions
--- a/mcp-server/src/legal_mcp/services/claude_session.py
+++ b/mcp-server/src/legal_mcp/services/claude_session.py
@@ -0,0 +1,77 @@
+"""Claude Code session bridge — runs prompts via `claude -p` instead of API.
+
+All LLM calls in the project should use this module instead of calling
+the Anthropic API directly. This uses the local Claude Code CLI which
+runs on the user's claude.ai session — zero API cost.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import subprocess
+from pathlib import Path
+
+from legal_mcp.config import parse_llm_json
+
+logger = logging.getLogger(__name__)
+
+# Default timeout for claude -p calls (seconds)
+DEFAULT_TIMEOUT = 120
+LONG_TIMEOUT = 300  # For complex tasks like block writing
+
+
+def query(prompt: str, timeout: int = DEFAULT_TIMEOUT, max_turns: int = 1) -> str:
+    """Send a prompt to Claude Code headless and return the text response.
+
+    Args:
+        prompt: The prompt to send.
+        timeout: Max seconds to wait.
+        max_turns: Max conversation turns (1 = single response).
+
+    Returns:
+        The text response from Claude.
+
+    Raises:
+        RuntimeError: If claude CLI is not available or fails.
+    """
+    cmd = [
+        "claude", "-p", prompt,
+        "--output-format", "json",
+        "--max-turns", str(max_turns),
+    ]
+
+    try:
+        result = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=timeout,
+        )
+    except FileNotFoundError:
+        raise RuntimeError("Claude CLI not found. Install Claude Code or add 'claude' to PATH.")
+    except subprocess.TimeoutExpired:
+        raise RuntimeError(f"Claude CLI timed out after {timeout}s")
+
+    if result.returncode != 0:
+        stderr = result.stderr.strip()[:500] if result.stderr else "unknown error"
+        raise RuntimeError(f"Claude CLI failed (exit {result.returncode}): {stderr}")
+
+    stdout = result.stdout.strip()
+    if not stdout:
+        raise RuntimeError("Claude CLI returned empty response")
+
+    # claude -p --output-format json returns {"type":"result","result":"..."}
+    try:
+        data = json.loads(stdout)
+        if isinstance(data, dict) and "result" in data:
+            return data["result"]
+        return stdout
+    except json.JSONDecodeError:
+        return stdout
+
+
+def query_json(prompt: str, timeout: int = DEFAULT_TIMEOUT) -> dict | list | None:
+    """Send a prompt and parse the response as JSON.
+
+    Uses parse_llm_json for robust parsing (handles markdown wrapping, truncation).
+    """
+    raw = query(prompt, timeout=timeout)
+    return parse_llm_json(raw)