diff --git a/mcp-server/pyproject.toml b/mcp-server/pyproject.toml index f60d38a..2829d57 100644 --- a/mcp-server/pyproject.toml +++ b/mcp-server/pyproject.toml @@ -8,7 +8,6 @@ dependencies = [ "asyncpg>=0.29.0", "pgvector>=0.3.0", "voyageai>=0.3.0", - "anthropic>=0.45.0", "python-dotenv>=1.0.0", "pydantic>=2.0.0", "pymupdf>=1.25.0", diff --git a/mcp-server/src/legal_mcp/services/claude_session.py b/mcp-server/src/legal_mcp/services/claude_session.py index 757b76c..2a892d1 100644 --- a/mcp-server/src/legal_mcp/services/claude_session.py +++ b/mcp-server/src/legal_mcp/services/claude_session.py @@ -1,26 +1,27 @@ -"""Claude Code session bridge — runs prompts via `claude -p` or Anthropic SDK. +"""Claude Code session bridge — runs prompts via the local `claude` CLI. -History: originally shelled out to `claude -p` exclusively (zero direct API -cost via the developer's claude.ai session). That works locally but fails -in the legal-ai Docker container, which does not ship the CLI. To keep the -same call sites working in production, the module now tries the CLI first -and falls back to the Anthropic SDK using ``ANTHROPIC_API_KEY`` when the -CLI binary is absent. +All LLM calls in legal-ai go through this module. We shell out to the local +Claude Code CLI which uses the developer's claude.ai session — zero direct +API cost. -Both paths share a single shape: ``query()`` returns text, ``query_json()`` -parses that text as JSON. Callers don't need to know which path executed. +**Architectural rule (do not violate):** this module only works when invoked +from the local MCP server (the Python process at +`/home/chaim/legal-ai/mcp-server/`, launched per `~/.claude.json`). It will +**not** work when called from the legal-ai Docker container — that container +has no `claude` CLI and no claude.ai session. Any code path under `web/` +(FastAPI) that calls this module — directly or via an extractor like +`halacha_extractor`, `claims_extractor`, `precedent_metadata_extractor`, +`block_writer`, `qa_validator`, `learning_loop`, `local_classifier`, +`appraiser_facts_extractor`, `brainstorm`, `style_analyzer` — is wrong. +LLM-dependent operations must be exposed as MCP tools and triggered from +agents (or the chair via Claude Code), where this module runs locally with +CLI access. Async history: originally synchronous (``subprocess.run``) with a 120 s -timeout. That broke for large legal documents — sync subprocess stalled -the asyncio loop, and 120 s was far too short for cold-cache Hebrew prompts +timeout. That broke for large legal documents — sync subprocess stalled the +asyncio loop, and 120 s was far too short for cold-cache Hebrew prompts (case 8174-24 hit three timeouts in a row). Fixed by going async with a 30-minute ceiling. - -Caching contract (SDK path): pass long, repeated instruction text via -``system=...``. The SDK path adds ``cache_control: ephemeral`` so back-to-back -chunk calls reuse the cached prefix at ~10% of read cost. The CLI path doesn't -expose API-level caching; with ``system`` set, we just prepend it to the -prompt — same observable behavior, no caching benefit. """ from __future__ import annotations @@ -28,63 +29,46 @@ from __future__ import annotations import asyncio import json import logging -import os -import shutil from legal_mcp.config import parse_llm_json logger = logging.getLogger(__name__) -# Default ceiling for any single LLM call, in seconds. +# Default ceiling for any single ``claude -p`` invocation, in seconds. # 30 min covers any single-document call we make in practice (chunking # handles the rest); the bound exists only to prevent runaway zombies. DEFAULT_TIMEOUT = 1800 LONG_TIMEOUT = 3600 # opus block writing on full case context -# Anthropic SDK fallback config — used when `claude` CLI is not on PATH. -# Default to Sonnet 4.6: strong balance of Hebrew legal-text quality and -# cost for the per-chunk extraction workload. Override via env if needed. -DEFAULT_SDK_MODEL = os.environ.get("CLAUDE_SDK_MODEL", "claude-sonnet-4-6") -DEFAULT_SDK_MAX_TOKENS = int(os.environ.get("CLAUDE_SDK_MAX_TOKENS", "8192")) -_anthropic_client = None - - -def _has_cli() -> bool: - return shutil.which("claude") is not None - - -def _get_anthropic_client(): - """Lazy-init the AsyncAnthropic client. Raises with a clear message if - the package or API key is missing — better than letting the SDK 401 in - the middle of a multi-chunk extraction. - """ - global _anthropic_client - if _anthropic_client is not None: - return _anthropic_client - try: - import anthropic - except ImportError as e: - raise RuntimeError( - "The 'anthropic' package is required when the Claude CLI is " - "unavailable. Add it to mcp-server/pyproject.toml." - ) from e - if not os.environ.get("ANTHROPIC_API_KEY"): - raise RuntimeError( - "ANTHROPIC_API_KEY is not set; cannot fall back to Anthropic SDK." - ) - _anthropic_client = anthropic.AsyncAnthropic() - return _anthropic_client - - -async def _query_cli( - prompt: str, system: str | None, timeout: int, max_turns: int, +async def query( + prompt: str, + timeout: int = DEFAULT_TIMEOUT, + max_turns: int = 1, + *, + system: str | None = None, ) -> str: - """Run the prompt via the local `claude` CLI subprocess. + """Send a prompt to Claude Code headless and return the text response. - Uses the developer's claude.ai session — zero direct API cost. With - ``system`` set, we just prepend it to the prompt; the CLI doesn't - expose API-level caching anyway. + Passes the prompt via stdin (not argv) to avoid the OS ARG_MAX limit — + prompts can be 500K+ chars when analyzing a full style corpus. + + Args: + prompt: The prompt to send. + timeout: Max seconds before the subprocess is killed. + max_turns: Max conversation turns (1 = single response). + system: Optional repeated-instruction text. Prepended to ``prompt`` + for the CLI; we don't pass it as a separate arg because the + CLI doesn't expose API-level caching. The parameter exists so + extractors can structure their calls cleanly today, and to make + a future SDK-backed path drop-in. + + Returns: + The text response from Claude. + + Raises: + RuntimeError: if the CLI is unavailable (e.g., called from the + container — see module docstring), or fails, or times out. """ full_prompt = f"{system}\n\n{prompt}" if system else prompt @@ -94,12 +78,21 @@ async def _query_cli( "--max-turns", str(max_turns), ] - proc = await asyncio.create_subprocess_exec( - *cmd, - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + try: + proc = await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + except FileNotFoundError: + raise RuntimeError( + "Claude CLI not found. This module only works when invoked " + "from the local MCP server — see the architectural rule in " + "the module docstring. If this error came from a FastAPI " + "endpoint in the container, refactor the call into an MCP " + "tool that the chair triggers from Claude Code." + ) try: stdout_b, stderr_b = await asyncio.wait_for( @@ -133,95 +126,6 @@ async def _query_cli( return stdout -async def _query_sdk(prompt: str, system: str | None, timeout: int) -> str: - """Run the prompt via the Anthropic SDK with 5-min ephemeral caching - on the system message. Streams the response to dodge HTTP read timeouts - on long Hebrew JSON outputs. - """ - import anthropic - - client = _get_anthropic_client() - - kwargs: dict = { - "model": DEFAULT_SDK_MODEL, - "max_tokens": DEFAULT_SDK_MAX_TOKENS, - "messages": [{"role": "user", "content": prompt}], - } - if system: - # cache_control: ephemeral → 5-min TTL. The same system text repeats - # across every chunk in an extraction run, so we get - # cache_read_input_tokens on every call after the first. - kwargs["system"] = [{ - "type": "text", - "text": system, - "cache_control": {"type": "ephemeral"}, - }] - - try: - async with client.messages.stream(**kwargs) as stream: - message = await asyncio.wait_for( - stream.get_final_message(), - timeout=timeout, - ) - except asyncio.TimeoutError: - raise RuntimeError(f"Anthropic SDK call timed out after {timeout}s") - except anthropic.APIError as e: - raise RuntimeError(f"Anthropic SDK call failed: {e}") from e - - text_parts: list[str] = [] - for block in message.content: - if getattr(block, "type", None) == "text": - text_parts.append(block.text) - out = "".join(text_parts).strip() - if not out: - raise RuntimeError("Anthropic SDK returned no text content") - - usage = getattr(message, "usage", None) - if usage is not None: - logger.debug( - "claude_session SDK usage: input=%s cache_read=%s cache_write=%s output=%s", - getattr(usage, "input_tokens", None), - getattr(usage, "cache_read_input_tokens", None), - getattr(usage, "cache_creation_input_tokens", None), - getattr(usage, "output_tokens", None), - ) - return out - - -async def query( - prompt: str, - timeout: int = DEFAULT_TIMEOUT, - max_turns: int = 1, - *, - system: str | None = None, -) -> str: - """Send a prompt to Claude and return the text response. - - Tries the Claude CLI first (zero API cost, uses claude.ai session). - Falls back to the Anthropic SDK with ANTHROPIC_API_KEY when the CLI is - absent — this is the production-Docker path. - - Pass repeating instruction text via ``system=`` so the SDK path can - cache it (5-min ephemeral). Pass only the per-chunk content via - ``prompt``. The CLI path concatenates them; the SDK path keeps them - separate so caching works. - - Args: - prompt: The user-facing prompt text. - timeout: Max seconds before the call is aborted. - max_turns: CLI-only — max conversation turns (1 = single response). - system: Optional system message. With the SDK path, gets cached - with 5-min ephemeral TTL when set. - - Raises: - RuntimeError: if both paths fail or time out. The message includes - which path raised so the caller can distinguish CLI from SDK. - """ - if _has_cli(): - return await _query_cli(prompt, system, timeout, max_turns) - return await _query_sdk(prompt, system, timeout) - - async def query_json( prompt: str, timeout: int = DEFAULT_TIMEOUT, diff --git a/mcp-server/src/legal_mcp/services/precedent_library.py b/mcp-server/src/legal_mcp/services/precedent_library.py index f2921f7..8400952 100644 --- a/mcp-server/src/legal_mcp/services/precedent_library.py +++ b/mcp-server/src/legal_mcp/services/precedent_library.py @@ -22,14 +22,13 @@ from typing import Awaitable, Callable from uuid import UUID, uuid4 from legal_mcp import config -from legal_mcp.services import ( - chunker, - db, - embeddings, - extractor, - halacha_extractor, - precedent_metadata_extractor, -) +from legal_mcp.services import chunker, db, embeddings, extractor + +# Note: halacha_extractor and precedent_metadata_extractor are NOT imported +# at module load. They are imported lazily inside the dedicated re-extract +# entry points so that `ingest_precedent` (called from the FastAPI container, +# where `claude` CLI is unavailable) cannot accidentally pull them in. See +# the architectural rule in services/claude_session.py. logger = logging.getLogger(__name__) @@ -189,36 +188,30 @@ async def ingest_precedent( ] stored_chunks = await db.store_precedent_chunks(case_law_id, chunk_dicts) + # Pipeline split: the container does the non-LLM half (extract + + # chunk + embed + store). LLM-driven extraction (metadata, halachot) + # runs separately via the MCP tools `precedent_extract_metadata` / + # `precedent_extract_halachot` from local Claude Code, where + # `claude` CLI is available. Mark statuses so the chair can see + # what's pending in the UI. await db.set_case_law_extraction_status(case_law_id, "completed") + await db.set_case_law_halacha_status(case_law_id, "pending") - await progress("extracting_metadata", 65, "מחלץ מטא-דאטה (תקציר, תגיות)") - try: - metadata_result = await precedent_metadata_extractor.extract_and_apply( - case_law_id, - ) - except Exception as e: - logger.warning("metadata extraction failed (non-fatal): %s", e) - metadata_result = {"status": "failed", "fields": []} - - await progress("extracting_halachot", 80, "מחלץ הלכות / יישומים") - halacha_result = await halacha_extractor.extract(case_law_id) - - msg = ( - f"הוכנס לספרייה: {stored_chunks} chunks, " - f"{halacha_result.get('stored', 0)} פריטים ממתינים לאישור" + await progress( + "completed", + 100, + f"הוכנס לספרייה: {stored_chunks} chunks. " + f"חילוץ הלכות ומטא-דאטה — להפעיל מ-Claude Code " + f"(precedent_extract_halachot / precedent_extract_metadata).", ) - if metadata_result.get("fields"): - msg += f"; מולאו אוטומטית: {', '.join(metadata_result['fields'])}" - await progress("completed", 100, msg) return { "status": "completed", "case_law_id": str(case_law_id), "chunks": stored_chunks, - "halachot": halacha_result.get("stored", 0), - "halachot_extracted_raw": halacha_result.get("extracted", 0), - "halachot_verified": halacha_result.get("verified", 0), - "metadata_filled": metadata_result.get("fields", []), + "halachot": 0, + "halachot_pending": True, + "metadata_filled": [], "pages": page_count, } @@ -233,7 +226,15 @@ async def reextract_halachot( case_law_id: UUID | str, progress: ProgressCb | None = None, ) -> dict: - """Re-run the halacha extractor on an existing precedent. Idempotent.""" + """Re-run the halacha extractor on an existing precedent. Idempotent. + + **MCP-tool-only path.** This function calls into ``halacha_extractor``, + which calls ``claude_session`` — the local CLI is required. Invoking + this from the FastAPI container will raise ``Claude CLI not found``. + See the architectural rule in ``services/claude_session.py``. + """ + from legal_mcp.services import halacha_extractor + progress = progress or _noop_progress if isinstance(case_law_id, str): case_law_id = UUID(case_law_id) @@ -261,7 +262,11 @@ async def reextract_metadata( Only fills empty fields (subject_tags, summary, headnote, key_quote, appeal_subtype, and case_name when it equals the citation). User values are preserved. + + **MCP-tool-only path** — same constraint as :func:`reextract_halachot`. """ + from legal_mcp.services import precedent_metadata_extractor + progress = progress or _noop_progress if isinstance(case_law_id, str): case_law_id = UUID(case_law_id) diff --git a/web-ui/src/components/precedents/library-list-panel.tsx b/web-ui/src/components/precedents/library-list-panel.tsx index 6b17140..cfe6251 100644 --- a/web-ui/src/components/precedents/library-list-panel.tsx +++ b/web-ui/src/components/precedents/library-list-panel.tsx @@ -1,7 +1,7 @@ "use client"; import { useState } from "react"; -import { Trash2, Plus, RefreshCw, Pencil } from "lucide-react"; +import { Trash2, Plus, Pencil } from "lucide-react"; import { toast } from "sonner"; import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow, @@ -16,7 +16,6 @@ import { import { usePrecedents, useDeletePrecedent, - useReExtractHalachot, type Precedent, type PracticeArea, } from "@/lib/api/precedent-library"; @@ -63,7 +62,6 @@ function PrecedentRow({ onEdit: (id: string) => void; }) { const del = useDeletePrecedent(); - const reExtract = useReExtractHalachot(); const onDelete = async () => { if (!window.confirm(`למחוק את ${p.case_number}? cascade ימחק את ה-chunks וההלכות.`)) return; @@ -75,15 +73,6 @@ function PrecedentRow({ } }; - const onReExtract = async () => { - try { - await reExtract.mutateAsync(p.id); - toast.success("חילוץ הלכות החל"); - } catch (e) { - toast.error(e instanceof Error ? e.message : "שגיאה"); - } - }; - return ( @@ -119,15 +108,6 @@ function PrecedentRow({ > - - {isMetaRunning && (metadataProgress as { step?: string } | null)?.step && ( -
- {(metadataProgress as { step?: string }).step} -
- )} -
diff --git a/web-ui/src/lib/api/precedent-library.ts b/web-ui/src/lib/api/precedent-library.ts index b307195..61240b8 100644 --- a/web-ui/src/lib/api/precedent-library.ts +++ b/web-ui/src/lib/api/precedent-library.ts @@ -336,34 +336,12 @@ export function useUpdatePrecedent() { }); } -export function useReExtractHalachot() { - const qc = useQueryClient(); - return useMutation({ - mutationFn: (id: string) => - apiRequest<{ task_id: string }>( - `/api/precedent-library/${encodeURIComponent(id)}/extract-halachot`, - { method: "POST" }, - ), - onSuccess: (_, id) => { - qc.invalidateQueries({ queryKey: libraryKeys.detail(id) }); - }, - }); -} - -export function useReExtractMetadata() { - const qc = useQueryClient(); - return useMutation({ - mutationFn: (id: string) => - apiRequest<{ task_id: string }>( - `/api/precedent-library/${encodeURIComponent(id)}/extract-metadata`, - { method: "POST" }, - ), - onSuccess: (_, id) => { - qc.invalidateQueries({ queryKey: libraryKeys.detail(id) }); - qc.invalidateQueries({ queryKey: libraryKeys.all }); - }, - }); -} +// Halacha + metadata extraction are not exposed as HTTP mutations because +// they call the local `claude` CLI through the MCP server — see the rule +// in mcp-server/src/legal_mcp/services/claude_session.py. The chair +// triggers them from Claude Code via: +// mcp__legal-ai__precedent_extract_halachot +// mcp__legal-ai__precedent_extract_metadata export function useHalachotPending(limit = 200) { return useQuery({ diff --git a/web/app.py b/web/app.py index c343f06..6f28484 100644 --- a/web/app.py +++ b/web/app.py @@ -3748,66 +3748,12 @@ async def precedent_library_delete(case_law_id: str): return {"deleted": True, "case_law_id": case_law_id} -@app.post("/api/precedent-library/{case_law_id}/extract-halachot") -async def precedent_library_reextract(case_law_id: str): - """Re-run halacha extraction in background. Returns a task_id for SSE.""" - try: - cid = UUID(case_law_id) - except ValueError: - raise HTTPException(400, "case_law_id לא תקין") - record = await db.get_case_law(cid) - if not record: - raise HTTPException(404, "פסיקה לא נמצאה") - - task_id = str(uuid4()) - label = record.get("case_number") or case_law_id - await _progress.set(task_id, { - "status": "queued", "filename": label, "stage": "queued", "percent": 0, - }) - publish = _make_progress_publisher(task_id, label) - - async def _run(): - try: - await plib_service.reextract_halachot(cid, progress=publish) - except Exception as e: - logger.exception("re-extract halachot failed") - await _progress.set(task_id, { - "status": "failed", "error": str(e), "filename": label, - }) - - asyncio.create_task(_run()) - return {"task_id": task_id} - - -@app.post("/api/precedent-library/{case_law_id}/extract-metadata") -async def precedent_library_extract_metadata(case_law_id: str): - """Re-run metadata extraction in background. Fills empty fields only.""" - try: - cid = UUID(case_law_id) - except ValueError: - raise HTTPException(400, "case_law_id לא תקין") - record = await db.get_case_law(cid) - if not record: - raise HTTPException(404, "פסיקה לא נמצאה") - - task_id = str(uuid4()) - label = record.get("case_number") or case_law_id - await _progress.set(task_id, { - "status": "queued", "filename": label, "stage": "queued", "percent": 0, - }) - publish = _make_progress_publisher(task_id, label) - - async def _run(): - try: - await plib_service.reextract_metadata(cid, progress=publish) - except Exception as e: - logger.exception("re-extract metadata failed") - await _progress.set(task_id, { - "status": "failed", "error": str(e), "filename": label, - }) - - asyncio.create_task(_run()) - return {"task_id": task_id} +# Halacha and metadata extraction are LLM-driven and rely on the local +# `claude` CLI via mcp-server/services/claude_session.py — they CANNOT run +# from this container (no CLI, no claude.ai session). They are exposed as +# MCP tools (`precedent_extract_halachot`, `precedent_extract_metadata`) +# and triggered from local Claude Code, not via HTTP. See +# services/claude_session.py for the architectural rule. @app.get("/api/halachot")