Replace all Anthropic API calls with Claude Code session (claude -p)
New module claude_session.py provides query() and query_json() that run prompts via `claude -p` CLI — uses the claude.ai session, zero API cost. Converted 6 services: - claims_extractor.py: extract_claims_with_ai - brainstorm.py: brainstorm_directions - block_writer.py: write_block (was streaming+thinking, now simple) - qa_validator.py: claims_coverage check - style_analyzer.py: 3 API calls (single pass, multi pass, synthesis) - learning_loop.py: extract_lessons Only extractor.py still uses Anthropic API (for PDF OCR with Vision). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -18,22 +18,11 @@ import re
|
||||
from datetime import date
|
||||
from uuid import UUID
|
||||
|
||||
import anthropic
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import db, embeddings
|
||||
from legal_mcp.services import db, embeddings, claude_session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_anthropic_client: anthropic.Anthropic | None = None
|
||||
|
||||
|
||||
def _get_anthropic() -> anthropic.Anthropic:
|
||||
global _anthropic_client
|
||||
if _anthropic_client is None:
|
||||
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
|
||||
return _anthropic_client
|
||||
|
||||
|
||||
# ── Block configuration ───────────────────────────────────────────
|
||||
|
||||
@@ -353,49 +342,10 @@ async def write_block(
|
||||
if not dir_doc.get("approved"):
|
||||
raise ValueError("לא ניתן לכתוב בלוק דיון ללא כיוון מאושר. הפעל brainstorm → approve_direction קודם.")
|
||||
|
||||
# Call Claude
|
||||
# Call Claude via Claude Code session (no API)
|
||||
model_key = block_cfg["model"]
|
||||
model = MODEL_MAP.get(model_key, MODEL_MAP["sonnet"])
|
||||
temperature = block_cfg["temp"]
|
||||
max_tokens = block_cfg.get("max_tokens", 4096)
|
||||
|
||||
client = _get_anthropic()
|
||||
|
||||
kwargs: dict = {
|
||||
"model": model,
|
||||
"max_tokens": max_tokens,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
if model_key == "opus":
|
||||
# Opus 4.6: use adaptive thinking — Claude decides when and how much to think.
|
||||
# Per Anthropic docs: temperature must be 1 when thinking is enabled.
|
||||
# budget_tokens not needed with adaptive thinking.
|
||||
kwargs["temperature"] = 1
|
||||
kwargs["thinking"] = {"type": "enabled", "budget_tokens": max(16000, max_tokens // 2)}
|
||||
else:
|
||||
kwargs["temperature"] = temperature
|
||||
|
||||
# Streaming required when max_tokens > 21,333 (Anthropic requirement)
|
||||
use_stream = max_tokens > 21000 or kwargs.get("thinking")
|
||||
|
||||
if use_stream:
|
||||
content_parts = []
|
||||
with client.messages.stream(**kwargs) as stream:
|
||||
for event in stream:
|
||||
pass # consume stream
|
||||
response = stream.get_final_message()
|
||||
for block in response.content:
|
||||
if block.type == "text":
|
||||
content_parts.append(block.text)
|
||||
content = "\n".join(content_parts)
|
||||
else:
|
||||
message = client.messages.create(**kwargs)
|
||||
content = ""
|
||||
for block in message.content:
|
||||
if block.type == "text":
|
||||
content = block.text
|
||||
break
|
||||
timeout = claude_session.LONG_TIMEOUT if model_key == "opus" else claude_session.DEFAULT_TIMEOUT
|
||||
content = claude_session.query(prompt, timeout=timeout)
|
||||
|
||||
return _build_result(block_id, content, block_cfg)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user