Replace all Anthropic API calls with Claude Code session (claude -p)

New module claude_session.py provides query() and query_json() that
run prompts via `claude -p` CLI — uses the claude.ai session, zero API cost.

Converted 6 services:
- claims_extractor.py: extract_claims_with_ai
- brainstorm.py: brainstorm_directions
- block_writer.py: write_block (was streaming+thinking, now simple)
- qa_validator.py: claims_coverage check
- style_analyzer.py: 3 API calls (single pass, multi pass, synthesis)
- learning_loop.py: extract_lessons

Only extractor.py still uses Anthropic API (for PDF OCR with Vision).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-04 14:14:08 +00:00
parent e5dc037088
commit bacb330a2a
7 changed files with 115 additions and 192 deletions

View File

@@ -0,0 +1,77 @@
"""Claude Code session bridge — runs prompts via `claude -p` instead of API.
All LLM calls in the project should use this module instead of calling
the Anthropic API directly. This uses the local Claude Code CLI which
runs on the user's claude.ai session — zero API cost.
"""
from __future__ import annotations
import json
import logging
import subprocess
from pathlib import Path
from legal_mcp.config import parse_llm_json
logger = logging.getLogger(__name__)
# Default timeout for claude -p calls (seconds)
DEFAULT_TIMEOUT = 120
LONG_TIMEOUT = 300 # For complex tasks like block writing
def query(prompt: str, timeout: int = DEFAULT_TIMEOUT, max_turns: int = 1) -> str:
"""Send a prompt to Claude Code headless and return the text response.
Args:
prompt: The prompt to send.
timeout: Max seconds to wait.
max_turns: Max conversation turns (1 = single response).
Returns:
The text response from Claude.
Raises:
RuntimeError: If claude CLI is not available or fails.
"""
cmd = [
"claude", "-p", prompt,
"--output-format", "json",
"--max-turns", str(max_turns),
]
try:
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=timeout,
)
except FileNotFoundError:
raise RuntimeError("Claude CLI not found. Install Claude Code or add 'claude' to PATH.")
except subprocess.TimeoutExpired:
raise RuntimeError(f"Claude CLI timed out after {timeout}s")
if result.returncode != 0:
stderr = result.stderr.strip()[:500] if result.stderr else "unknown error"
raise RuntimeError(f"Claude CLI failed (exit {result.returncode}): {stderr}")
stdout = result.stdout.strip()
if not stdout:
raise RuntimeError("Claude CLI returned empty response")
# claude -p --output-format json returns {"type":"result","result":"..."}
try:
data = json.loads(stdout)
if isinstance(data, dict) and "result" in data:
return data["result"]
return stdout
except json.JSONDecodeError:
return stdout
def query_json(prompt: str, timeout: int = DEFAULT_TIMEOUT) -> dict | list | None:
"""Send a prompt and parse the response as JSON.
Uses parse_llm_json for robust parsing (handles markdown wrapping, truncation).
"""
raw = query(prompt, timeout=timeout)
return parse_llm_json(raw)