Replace all Anthropic API calls with Claude Code session (claude -p)

New module claude_session.py provides query() and query_json() that
run prompts via `claude -p` CLI — uses the claude.ai session, zero API cost.

Converted 6 services:
- claims_extractor.py: extract_claims_with_ai
- brainstorm.py: brainstorm_directions
- block_writer.py: write_block (was streaming+thinking, now simple)
- qa_validator.py: claims_coverage check
- style_analyzer.py: 3 API calls (single pass, multi pass, synthesis)
- learning_loop.py: extract_lessons

Only extractor.py still uses Anthropic API (for PDF OCR with Vision).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-04 14:14:08 +00:00
parent e5dc037088
commit bacb330a2a
7 changed files with 115 additions and 192 deletions

View File

@@ -18,22 +18,11 @@ import re
from datetime import date
from uuid import UUID
import anthropic
from legal_mcp import config
from legal_mcp.services import db, embeddings
from legal_mcp.services import db, embeddings, claude_session
logger = logging.getLogger(__name__)
_anthropic_client: anthropic.Anthropic | None = None
def _get_anthropic() -> anthropic.Anthropic:
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
return _anthropic_client
# ── Block configuration ───────────────────────────────────────────
@@ -353,49 +342,10 @@ async def write_block(
if not dir_doc.get("approved"):
raise ValueError("לא ניתן לכתוב בלוק דיון ללא כיוון מאושר. הפעל brainstorm → approve_direction קודם.")
# Call Claude
# Call Claude via Claude Code session (no API)
model_key = block_cfg["model"]
model = MODEL_MAP.get(model_key, MODEL_MAP["sonnet"])
temperature = block_cfg["temp"]
max_tokens = block_cfg.get("max_tokens", 4096)
client = _get_anthropic()
kwargs: dict = {
"model": model,
"max_tokens": max_tokens,
"messages": [{"role": "user", "content": prompt}],
}
if model_key == "opus":
# Opus 4.6: use adaptive thinking — Claude decides when and how much to think.
# Per Anthropic docs: temperature must be 1 when thinking is enabled.
# budget_tokens not needed with adaptive thinking.
kwargs["temperature"] = 1
kwargs["thinking"] = {"type": "enabled", "budget_tokens": max(16000, max_tokens // 2)}
else:
kwargs["temperature"] = temperature
# Streaming required when max_tokens > 21,333 (Anthropic requirement)
use_stream = max_tokens > 21000 or kwargs.get("thinking")
if use_stream:
content_parts = []
with client.messages.stream(**kwargs) as stream:
for event in stream:
pass # consume stream
response = stream.get_final_message()
for block in response.content:
if block.type == "text":
content_parts.append(block.text)
content = "\n".join(content_parts)
else:
message = client.messages.create(**kwargs)
content = ""
for block in message.content:
if block.type == "text":
content = block.text
break
timeout = claude_session.LONG_TIMEOUT if model_key == "opus" else claude_session.DEFAULT_TIMEOUT
content = claude_session.query(prompt, timeout=timeout)
return _build_result(block_id, content, block_cfg)

View File

@@ -12,23 +12,12 @@ from __future__ import annotations
import logging
from uuid import UUID
import anthropic
from legal_mcp import config
from legal_mcp.config import parse_llm_json
from legal_mcp.services import db
from legal_mcp.services import db, claude_session
logger = logging.getLogger(__name__)
_anthropic_client: anthropic.Anthropic | None = None
def _get_anthropic() -> anthropic.Anthropic:
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
return _anthropic_client
BRAINSTORM_PROMPT = """אתה יועץ משפטי מומחה בתכנון ובניה. תפקידך לסייע בגיבוש כיוון להחלטת ועדת ערר.
@@ -145,15 +134,7 @@ async def generate_directions(
{doc_context or '(אין מסמכים בתיק)'}
"""
client = _get_anthropic()
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=4096,
messages=[{"role": "user", "content": user_content}],
)
raw = message.content[0].text.strip()
result = parse_llm_json(raw)
result = claude_session.query_json(user_content, timeout=120)
if result is None:
logger.warning("Failed to parse brainstorm response: %s", raw[:300])
return {

View File

@@ -1,7 +1,7 @@
"""חילוץ טענות מכתבי טענות (ערר, תשובה) באמצעות Claude API.
"""חילוץ טענות מכתבי טענות (ערר, תשובה) באמצעות Claude Code session.
שתי גישות:
1. extract_claims_with_ai — חילוץ עם Claude (לכתבי טענות קלט)
1. extract_claims_with_ai — חילוץ עם Claude Code headless (לכתבי טענות קלט)
2. extract_claims_from_block — חילוץ regex (מבלוק ז של החלטות סופיות)
"""
@@ -11,23 +11,12 @@ import logging
import re
from uuid import UUID
import anthropic
from legal_mcp import config
from legal_mcp.config import parse_llm_json
from legal_mcp.services import db
from legal_mcp.services import db, claude_session
logger = logging.getLogger(__name__)
_anthropic_client: anthropic.Anthropic | None = None
def _get_anthropic() -> anthropic.Anthropic:
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
return _anthropic_client
EXTRACT_CLAIMS_PROMPT = """אתה מנתח מסמכים משפטיים בתחום תכנון ובניה. תפקידך לחלץ טענות מכתב טענות.
@@ -93,27 +82,15 @@ async def extract_claims_with_ai(
chunks = [text]
all_claims = []
client = _get_anthropic()
for i, chunk in enumerate(chunks):
chunk_label = f" (חלק {i+1}/{len(chunks)})" if len(chunks) > 1 else ""
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=8192,
messages=[
{
"role": "user",
"content": (
f"{EXTRACT_CLAIMS_PROMPT}\n\n"
f"{context}{chunk_label}\n\n"
f"--- תחילת מסמך ---\n{chunk}\n--- סוף מסמך ---"
),
}
],
prompt = (
f"{EXTRACT_CLAIMS_PROMPT}\n\n"
f"{context}{chunk_label}\n\n"
f"--- תחילת מסמך ---\n{chunk}\n--- סוף מסמך ---"
)
raw = message.content[0].text.strip()
claims = parse_llm_json(raw)
claims = claude_session.query_json(prompt, timeout=120)
if claims is None:
logger.warning("Failed to parse claims for chunk %d: %s", i, raw[:200])
continue

View File

@@ -0,0 +1,77 @@
"""Claude Code session bridge — runs prompts via `claude -p` instead of API.
All LLM calls in the project should use this module instead of calling
the Anthropic API directly. This uses the local Claude Code CLI which
runs on the user's claude.ai session — zero API cost.
"""
from __future__ import annotations
import json
import logging
import subprocess
from pathlib import Path
from legal_mcp.config import parse_llm_json
logger = logging.getLogger(__name__)
# Default timeout for claude -p calls (seconds)
DEFAULT_TIMEOUT = 120
LONG_TIMEOUT = 300 # For complex tasks like block writing
def query(prompt: str, timeout: int = DEFAULT_TIMEOUT, max_turns: int = 1) -> str:
"""Send a prompt to Claude Code headless and return the text response.
Args:
prompt: The prompt to send.
timeout: Max seconds to wait.
max_turns: Max conversation turns (1 = single response).
Returns:
The text response from Claude.
Raises:
RuntimeError: If claude CLI is not available or fails.
"""
cmd = [
"claude", "-p", prompt,
"--output-format", "json",
"--max-turns", str(max_turns),
]
try:
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=timeout,
)
except FileNotFoundError:
raise RuntimeError("Claude CLI not found. Install Claude Code or add 'claude' to PATH.")
except subprocess.TimeoutExpired:
raise RuntimeError(f"Claude CLI timed out after {timeout}s")
if result.returncode != 0:
stderr = result.stderr.strip()[:500] if result.stderr else "unknown error"
raise RuntimeError(f"Claude CLI failed (exit {result.returncode}): {stderr}")
stdout = result.stdout.strip()
if not stdout:
raise RuntimeError("Claude CLI returned empty response")
# claude -p --output-format json returns {"type":"result","result":"..."}
try:
data = json.loads(stdout)
if isinstance(data, dict) and "result" in data:
return data["result"]
return stdout
except json.JSONDecodeError:
return stdout
def query_json(prompt: str, timeout: int = DEFAULT_TIMEOUT) -> dict | list | None:
"""Send a prompt and parse the response as JSON.
Uses parse_llm_json for robust parsing (handles markdown wrapping, truncation).
"""
raw = query(prompt, timeout=timeout)
return parse_llm_json(raw)

View File

@@ -12,23 +12,12 @@ from __future__ import annotations
import logging
from uuid import UUID
import anthropic
from legal_mcp import config
from legal_mcp.config import parse_llm_json
from legal_mcp.services import db
from legal_mcp.services import db, claude_session
logger = logging.getLogger(__name__)
_anthropic_client: anthropic.Anthropic | None = None
def _get_anthropic() -> anthropic.Anthropic:
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
return _anthropic_client
def compute_diff_stats(draft_text: str, final_text: str) -> dict:
"""חישוב סטטיסטיקות השוואה בין טיוטה לסופית."""
@@ -93,25 +82,15 @@ async def analyze_changes(draft_text: str, final_text: str) -> dict:
draft_sample = draft_text[:max_chars]
final_sample = final_text[:max_chars]
client = _get_anthropic()
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=4096,
messages=[{
"role": "user",
"content": f"""{LESSONS_PROMPT}
prompt = f"""{LESSONS_PROMPT}
--- טיוטה ---
{draft_sample}
--- גרסה סופית ---
{final_sample}
""",
}],
)
raw = message.content[0].text.strip()
result = parse_llm_json(raw)
"""
result = claude_session.query_json(prompt, timeout=120)
if result is None:
logger.warning("Failed to parse lessons response")
return {"changes": [], "new_expressions": [], "overall_assessment": raw[:200]}

View File

@@ -18,11 +18,9 @@ import logging
import re
from uuid import UUID
import anthropic
from legal_mcp import config
from legal_mcp.config import parse_llm_json
from legal_mcp.services import db
from legal_mcp.services import db, claude_session
logger = logging.getLogger(__name__)
@@ -89,14 +87,6 @@ def check_neutral_background(blocks: list[dict]) -> dict:
}
_anthropic_client: anthropic.Anthropic | None = None
def _get_anthropic() -> anthropic.Anthropic:
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
return _anthropic_client
CLAIMS_CHECK_PROMPT = """אתה בודק איכות החלטות משפטיות. קיבלת רשימת טענות שהועלו בכתבי הטענות, ואת בלוק הדיון של ההחלטה.
@@ -146,24 +136,15 @@ async def check_claims_coverage(blocks: list[dict], claims: list[dict]) -> dict:
# Send full discussion — don't truncate
discussion = yod["content"]
client = _get_anthropic()
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=8192,
messages=[{
"role": "user",
"content": f"""{CLAIMS_CHECK_PROMPT}
prompt = f"""{CLAIMS_CHECK_PROMPT}
## טענות ({len(source_claims)}):
{claims_text}
## בלוק הדיון:
{discussion}""",
}],
)
{discussion}"""
raw = message.content[0].text.strip()
parsed = parse_llm_json(raw)
parsed = claude_session.query_json(prompt, timeout=120)
if parsed is None:
logger.warning("Failed to parse claims check: %s", raw[:300])
# Fallback: assume all covered (don't block export on parse failure)

View File

@@ -6,10 +6,8 @@ import json
import logging
import re
import anthropic
from legal_mcp import config
from legal_mcp.services import db
from legal_mcp.services import db, claude_session
logger = logging.getLogger(__name__)
@@ -150,24 +148,16 @@ async def _analyze_single_pass(rows) -> dict:
decisions_text += f"\n\n--- החלטה {row['decision_number'] or 'ללא מספר'} ---\n"
decisions_text += row["full_text"]
client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
message = client.messages.create(
model="claude-opus-4-6",
max_tokens=16384,
messages=[
{
"role": "user",
"content": ANALYSIS_PROMPT.format(decisions=decisions_text),
}
],
raw = claude_session.query(
ANALYSIS_PROMPT.format(decisions=decisions_text),
timeout=claude_session.LONG_TIMEOUT,
)
return await _parse_and_store_patterns(message.content[0].text, len(rows))
return await _parse_and_store_patterns(raw, len(rows))
async def _analyze_multi_pass(rows) -> dict:
"""Analyze each decision individually, then synthesize patterns."""
client = anthropic.Anthropic(api_key=config.ANTHROPIC_API_KEY)
all_patterns = []
# Pass 1: Analyze each decision individually
@@ -175,18 +165,12 @@ async def _analyze_multi_pass(rows) -> dict:
decision_text = f"--- החלטה {row['decision_number'] or 'ללא מספר'} ---\n"
decision_text += row["full_text"]
message = client.messages.create(
model="claude-opus-4-6",
max_tokens=8192,
messages=[
{
"role": "user",
"content": SINGLE_DECISION_PROMPT.format(decision=decision_text),
}
],
raw = claude_session.query(
SINGLE_DECISION_PROMPT.format(decision=decision_text),
timeout=claude_session.LONG_TIMEOUT,
)
patterns = _extract_json(message.content[0].text)
patterns = _extract_json(raw)
if patterns:
all_patterns.extend(patterns)
@@ -194,21 +178,15 @@ async def _analyze_multi_pass(rows) -> dict:
return {"error": "לא הצלחתי לחלץ דפוסים מההחלטות"}
# Pass 2: Synthesize across all decisions
message = client.messages.create(
model="claude-opus-4-6",
max_tokens=16384,
messages=[
{
"role": "user",
"content": SYNTHESIS_PROMPT.format(
num_decisions=len(rows),
patterns=json.dumps(all_patterns, ensure_ascii=False, indent=2),
),
}
],
raw = claude_session.query(
SYNTHESIS_PROMPT.format(
num_decisions=len(rows),
patterns=json.dumps(all_patterns, ensure_ascii=False, indent=2),
),
timeout=claude_session.LONG_TIMEOUT,
)
return await _parse_and_store_patterns(message.content[0].text, len(rows))
return await _parse_and_store_patterns(raw, len(rows))
def _extract_json(response_text: str) -> list | None: