Replace all Anthropic API calls with Claude Code session (claude -p)

New module claude_session.py provides query() and query_json() that
run prompts via `claude -p` CLI — uses the claude.ai session, zero API cost.

Converted 6 services:
- claims_extractor.py: extract_claims_with_ai
- brainstorm.py: brainstorm_directions
- block_writer.py: write_block (was streaming+thinking, now simple)
- qa_validator.py: claims_coverage check
- style_analyzer.py: 3 API calls (single pass, multi pass, synthesis)
- learning_loop.py: extract_lessons

Only extractor.py still uses Anthropic API (for PDF OCR with Vision).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-04 14:14:08 +00:00
parent e5dc037088
commit bacb330a2a
7 changed files with 115 additions and 192 deletions

View File

@@ -18,22 +18,11 @@ import re
from datetime import date
from uuid import UUID
import anthropic
from legal_mcp import config
from legal_mcp.services import db, embeddings
from legal_mcp.services import db, embeddings, claude_session
logger = logging.getLogger(__name__)
_anthropic_client: anthropic.Anthropic | None = None
def _get_anthropic() -> anthropic.Anthropic:
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
return _anthropic_client
# ── Block configuration ───────────────────────────────────────────
@@ -353,49 +342,10 @@ async def write_block(
if not dir_doc.get("approved"):
raise ValueError("לא ניתן לכתוב בלוק דיון ללא כיוון מאושר. הפעל brainstorm → approve_direction קודם.")
# Call Claude
# Call Claude via Claude Code session (no API)
model_key = block_cfg["model"]
model = MODEL_MAP.get(model_key, MODEL_MAP["sonnet"])
temperature = block_cfg["temp"]
max_tokens = block_cfg.get("max_tokens", 4096)
client = _get_anthropic()
kwargs: dict = {
"model": model,
"max_tokens": max_tokens,
"messages": [{"role": "user", "content": prompt}],
}
if model_key == "opus":
# Opus 4.6: use adaptive thinking — Claude decides when and how much to think.
# Per Anthropic docs: temperature must be 1 when thinking is enabled.
# budget_tokens not needed with adaptive thinking.
kwargs["temperature"] = 1
kwargs["thinking"] = {"type": "enabled", "budget_tokens": max(16000, max_tokens // 2)}
else:
kwargs["temperature"] = temperature
# Streaming required when max_tokens > 21,333 (Anthropic requirement)
use_stream = max_tokens > 21000 or kwargs.get("thinking")
if use_stream:
content_parts = []
with client.messages.stream(**kwargs) as stream:
for event in stream:
pass # consume stream
response = stream.get_final_message()
for block in response.content:
if block.type == "text":
content_parts.append(block.text)
content = "\n".join(content_parts)
else:
message = client.messages.create(**kwargs)
content = ""
for block in message.content:
if block.type == "text":
content = block.text
break
timeout = claude_session.LONG_TIMEOUT if model_key == "opus" else claude_session.DEFAULT_TIMEOUT
content = claude_session.query(prompt, timeout=timeout)
return _build_result(block_id, content, block_cfg)