diff --git a/mcp-server/pyproject.toml b/mcp-server/pyproject.toml
index f60d38a..2829d57 100644
--- a/mcp-server/pyproject.toml
+++ b/mcp-server/pyproject.toml
@@ -8,7 +8,6 @@ dependencies = [
     "asyncpg>=0.29.0",
     "pgvector>=0.3.0",
     "voyageai>=0.3.0",
-    "anthropic>=0.45.0",
     "python-dotenv>=1.0.0",
     "pydantic>=2.0.0",
     "pymupdf>=1.25.0",
diff --git a/mcp-server/src/legal_mcp/services/claude_session.py b/mcp-server/src/legal_mcp/services/claude_session.py
index 757b76c..2a892d1 100644
--- a/mcp-server/src/legal_mcp/services/claude_session.py
+++ b/mcp-server/src/legal_mcp/services/claude_session.py
@@ -1,26 +1,27 @@
-"""Claude Code session bridge — runs prompts via `claude -p` or Anthropic SDK.
+"""Claude Code session bridge — runs prompts via the local `claude` CLI.
 
-History: originally shelled out to `claude -p` exclusively (zero direct API
-cost via the developer's claude.ai session). That works locally but fails
-in the legal-ai Docker container, which does not ship the CLI. To keep the
-same call sites working in production, the module now tries the CLI first
-and falls back to the Anthropic SDK using ``ANTHROPIC_API_KEY`` when the
-CLI binary is absent.
+All LLM calls in legal-ai go through this module. We shell out to the local
+Claude Code CLI which uses the developer's claude.ai session — zero direct
+API cost.
 
-Both paths share a single shape: ``query()`` returns text, ``query_json()``
-parses that text as JSON. Callers don't need to know which path executed.
+**Architectural rule (do not violate):** this module only works when invoked
+from the local MCP server (the Python process at
+`/home/chaim/legal-ai/mcp-server/`, launched per `~/.claude.json`). It will
+**not** work when called from the legal-ai Docker container — that container
+has no `claude` CLI and no claude.ai session. Any code path under `web/`
+(FastAPI) that calls this module — directly or via an extractor like
+`halacha_extractor`, `claims_extractor`, `precedent_metadata_extractor`,
+`block_writer`, `qa_validator`, `learning_loop`, `local_classifier`,
+`appraiser_facts_extractor`, `brainstorm`, `style_analyzer` — is wrong.
+LLM-dependent operations must be exposed as MCP tools and triggered from
+agents (or the chair via Claude Code), where this module runs locally with
+CLI access.
 
 Async history: originally synchronous (``subprocess.run``) with a 120 s
-timeout. That broke for large legal documents — sync subprocess stalled
-the asyncio loop, and 120 s was far too short for cold-cache Hebrew prompts
+timeout. That broke for large legal documents — sync subprocess stalled the
+asyncio loop, and 120 s was far too short for cold-cache Hebrew prompts
 (case 8174-24 hit three timeouts in a row). Fixed by going async with a
 30-minute ceiling.
-
-Caching contract (SDK path): pass long, repeated instruction text via
-``system=...``. The SDK path adds ``cache_control: ephemeral`` so back-to-back
-chunk calls reuse the cached prefix at ~10% of read cost. The CLI path doesn't
-expose API-level caching; with ``system`` set, we just prepend it to the
-prompt — same observable behavior, no caching benefit.
 """
 
 from __future__ import annotations
@@ -28,63 +29,46 @@ from __future__ import annotations
 import asyncio
 import json
 import logging
-import os
-import shutil
 
 from legal_mcp.config import parse_llm_json
 
 logger = logging.getLogger(__name__)
 
-# Default ceiling for any single LLM call, in seconds.
+# Default ceiling for any single ``claude -p`` invocation, in seconds.
 # 30 min covers any single-document call we make in practice (chunking
 # handles the rest); the bound exists only to prevent runaway zombies.
 DEFAULT_TIMEOUT = 1800
 LONG_TIMEOUT = 3600  # opus block writing on full case context
 
-# Anthropic SDK fallback config — used when `claude` CLI is not on PATH.
-# Default to Sonnet 4.6: strong balance of Hebrew legal-text quality and
-# cost for the per-chunk extraction workload. Override via env if needed.
-DEFAULT_SDK_MODEL = os.environ.get("CLAUDE_SDK_MODEL", "claude-sonnet-4-6")
-DEFAULT_SDK_MAX_TOKENS = int(os.environ.get("CLAUDE_SDK_MAX_TOKENS", "8192"))
 
-_anthropic_client = None
-
-
-def _has_cli() -> bool:
-    return shutil.which("claude") is not None
-
-
-def _get_anthropic_client():
-    """Lazy-init the AsyncAnthropic client. Raises with a clear message if
-    the package or API key is missing — better than letting the SDK 401 in
-    the middle of a multi-chunk extraction.
-    """
-    global _anthropic_client
-    if _anthropic_client is not None:
-        return _anthropic_client
-    try:
-        import anthropic
-    except ImportError as e:
-        raise RuntimeError(
-            "The 'anthropic' package is required when the Claude CLI is "
-            "unavailable. Add it to mcp-server/pyproject.toml."
-        ) from e
-    if not os.environ.get("ANTHROPIC_API_KEY"):
-        raise RuntimeError(
-            "ANTHROPIC_API_KEY is not set; cannot fall back to Anthropic SDK."
-        )
-    _anthropic_client = anthropic.AsyncAnthropic()
-    return _anthropic_client
-
-
-async def _query_cli(
-    prompt: str, system: str | None, timeout: int, max_turns: int,
+async def query(
+    prompt: str,
+    timeout: int = DEFAULT_TIMEOUT,
+    max_turns: int = 1,
+    *,
+    system: str | None = None,
 ) -> str:
-    """Run the prompt via the local `claude` CLI subprocess.
+    """Send a prompt to Claude Code headless and return the text response.
 
-    Uses the developer's claude.ai session — zero direct API cost. With
-    ``system`` set, we just prepend it to the prompt; the CLI doesn't
-    expose API-level caching anyway.
+    Passes the prompt via stdin (not argv) to avoid the OS ARG_MAX limit —
+    prompts can be 500K+ chars when analyzing a full style corpus.
+
+    Args:
+        prompt: The prompt to send.
+        timeout: Max seconds before the subprocess is killed.
+        max_turns: Max conversation turns (1 = single response).
+        system: Optional repeated-instruction text. Prepended to ``prompt``
+            for the CLI; we don't pass it as a separate arg because the
+            CLI doesn't expose API-level caching. The parameter exists so
+            extractors can structure their calls cleanly today, and to make
+            a future SDK-backed path drop-in.
+
+    Returns:
+        The text response from Claude.
+
+    Raises:
+        RuntimeError: if the CLI is unavailable (e.g., called from the
+            container — see module docstring), or fails, or times out.
     """
     full_prompt = f"{system}\n\n{prompt}" if system else prompt
 
@@ -94,12 +78,21 @@ async def _query_cli(
         "--max-turns", str(max_turns),
     ]
 
-    proc = await asyncio.create_subprocess_exec(
-        *cmd,
-        stdin=asyncio.subprocess.PIPE,
-        stdout=asyncio.subprocess.PIPE,
-        stderr=asyncio.subprocess.PIPE,
-    )
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+    except FileNotFoundError:
+        raise RuntimeError(
+            "Claude CLI not found. This module only works when invoked "
+            "from the local MCP server — see the architectural rule in "
+            "the module docstring. If this error came from a FastAPI "
+            "endpoint in the container, refactor the call into an MCP "
+            "tool that the chair triggers from Claude Code."
+        )
 
     try:
         stdout_b, stderr_b = await asyncio.wait_for(
@@ -133,95 +126,6 @@ async def _query_cli(
         return stdout
 
 
-async def _query_sdk(prompt: str, system: str | None, timeout: int) -> str:
-    """Run the prompt via the Anthropic SDK with 5-min ephemeral caching
-    on the system message. Streams the response to dodge HTTP read timeouts
-    on long Hebrew JSON outputs.
-    """
-    import anthropic
-
-    client = _get_anthropic_client()
-
-    kwargs: dict = {
-        "model": DEFAULT_SDK_MODEL,
-        "max_tokens": DEFAULT_SDK_MAX_TOKENS,
-        "messages": [{"role": "user", "content": prompt}],
-    }
-    if system:
-        # cache_control: ephemeral → 5-min TTL. The same system text repeats
-        # across every chunk in an extraction run, so we get
-        # cache_read_input_tokens on every call after the first.
-        kwargs["system"] = [{
-            "type": "text",
-            "text": system,
-            "cache_control": {"type": "ephemeral"},
-        }]
-
-    try:
-        async with client.messages.stream(**kwargs) as stream:
-            message = await asyncio.wait_for(
-                stream.get_final_message(),
-                timeout=timeout,
-            )
-    except asyncio.TimeoutError:
-        raise RuntimeError(f"Anthropic SDK call timed out after {timeout}s")
-    except anthropic.APIError as e:
-        raise RuntimeError(f"Anthropic SDK call failed: {e}") from e
-
-    text_parts: list[str] = []
-    for block in message.content:
-        if getattr(block, "type", None) == "text":
-            text_parts.append(block.text)
-    out = "".join(text_parts).strip()
-    if not out:
-        raise RuntimeError("Anthropic SDK returned no text content")
-
-    usage = getattr(message, "usage", None)
-    if usage is not None:
-        logger.debug(
-            "claude_session SDK usage: input=%s cache_read=%s cache_write=%s output=%s",
-            getattr(usage, "input_tokens", None),
-            getattr(usage, "cache_read_input_tokens", None),
-            getattr(usage, "cache_creation_input_tokens", None),
-            getattr(usage, "output_tokens", None),
-        )
-    return out
-
-
-async def query(
-    prompt: str,
-    timeout: int = DEFAULT_TIMEOUT,
-    max_turns: int = 1,
-    *,
-    system: str | None = None,
-) -> str:
-    """Send a prompt to Claude and return the text response.
-
-    Tries the Claude CLI first (zero API cost, uses claude.ai session).
-    Falls back to the Anthropic SDK with ANTHROPIC_API_KEY when the CLI is
-    absent — this is the production-Docker path.
-
-    Pass repeating instruction text via ``system=`` so the SDK path can
-    cache it (5-min ephemeral). Pass only the per-chunk content via
-    ``prompt``. The CLI path concatenates them; the SDK path keeps them
-    separate so caching works.
-
-    Args:
-        prompt: The user-facing prompt text.
-        timeout: Max seconds before the call is aborted.
-        max_turns: CLI-only — max conversation turns (1 = single response).
-        system: Optional system message. With the SDK path, gets cached
-            with 5-min ephemeral TTL when set.
-
-    Raises:
-        RuntimeError: if both paths fail or time out. The message includes
-            which path raised so the caller can distinguish CLI from SDK.
-    """
-    if _has_cli():
-        return await _query_cli(prompt, system, timeout, max_turns)
-    return await _query_sdk(prompt, system, timeout)
-
-
 async def query_json(
     prompt: str,
     timeout: int = DEFAULT_TIMEOUT,
diff --git a/mcp-server/src/legal_mcp/services/precedent_library.py b/mcp-server/src/legal_mcp/services/precedent_library.py
index f2921f7..8400952 100644
--- a/mcp-server/src/legal_mcp/services/precedent_library.py
+++ b/mcp-server/src/legal_mcp/services/precedent_library.py
@@ -22,14 +22,13 @@ from typing import Awaitable, Callable
 from uuid import UUID, uuid4
 
 from legal_mcp import config
-from legal_mcp.services import (
-    chunker,
-    db,
-    embeddings,
-    extractor,
-    halacha_extractor,
-    precedent_metadata_extractor,
-)
+from legal_mcp.services import chunker, db, embeddings, extractor
+
+# Note: halacha_extractor and precedent_metadata_extractor are NOT imported
+# at module load. They are imported lazily inside the dedicated re-extract
+# entry points so that `ingest_precedent` (called from the FastAPI container,
+# where `claude` CLI is unavailable) cannot accidentally pull them in. See
+# the architectural rule in services/claude_session.py.
 
 logger = logging.getLogger(__name__)
 
@@ -189,36 +188,30 @@ async def ingest_precedent(
         ]
         stored_chunks = await db.store_precedent_chunks(case_law_id, chunk_dicts)
 
+        # Pipeline split: the container does the non-LLM half (extract +
+        # chunk + embed + store). LLM-driven extraction (metadata, halachot)
+        # runs separately via the MCP tools `precedent_extract_metadata` /
+        # `precedent_extract_halachot` from local Claude Code, where
+        # `claude` CLI is available. Mark statuses so the chair can see
+        # what's pending in the UI.
         await db.set_case_law_extraction_status(case_law_id, "completed")
+        await db.set_case_law_halacha_status(case_law_id, "pending")
 
-        await progress("extracting_metadata", 65, "מחלץ מטא-דאטה (תקציר, תגיות)")
-        try:
-            metadata_result = await precedent_metadata_extractor.extract_and_apply(
-                case_law_id,
-            )
-        except Exception as e:
-            logger.warning("metadata extraction failed (non-fatal): %s", e)
-            metadata_result = {"status": "failed", "fields": []}
-
-        await progress("extracting_halachot", 80, "מחלץ הלכות / יישומים")
-        halacha_result = await halacha_extractor.extract(case_law_id)
-
-        msg = (
-            f"הוכנס לספרייה: {stored_chunks} chunks, "
-            f"{halacha_result.get('stored', 0)} פריטים ממתינים לאישור"
+        await progress(
+            "completed",
+            100,
+            f"הוכנס לספרייה: {stored_chunks} chunks. "
+            f"חילוץ הלכות ומטא-דאטה — להפעיל מ-Claude Code "
+            f"(precedent_extract_halachot / precedent_extract_metadata).",
         )
-        if metadata_result.get("fields"):
-            msg += f"; מולאו אוטומטית: {', '.join(metadata_result['fields'])}"
-        await progress("completed", 100, msg)
 
         return {
             "status": "completed",
             "case_law_id": str(case_law_id),
             "chunks": stored_chunks,
-            "halachot": halacha_result.get("stored", 0),
-            "halachot_extracted_raw": halacha_result.get("extracted", 0),
-            "halachot_verified": halacha_result.get("verified", 0),
-            "metadata_filled": metadata_result.get("fields", []),
+            "halachot": 0,
+            "halachot_pending": True,
+            "metadata_filled": [],
             "pages": page_count,
         }
 
@@ -233,7 +226,15 @@ async def reextract_halachot(
     case_law_id: UUID | str,
     progress: ProgressCb | None = None,
 ) -> dict:
-    """Re-run the halacha extractor on an existing precedent. Idempotent."""
+    """Re-run the halacha extractor on an existing precedent. Idempotent.
+
+    **MCP-tool-only path.** This function calls into ``halacha_extractor``,
+    which calls ``claude_session`` — the local CLI is required. Invoking
+    this from the FastAPI container will raise ``Claude CLI not found``.
+    See the architectural rule in ``services/claude_session.py``.
+    """
+    from legal_mcp.services import halacha_extractor
+
     progress = progress or _noop_progress
     if isinstance(case_law_id, str):
         case_law_id = UUID(case_law_id)
@@ -261,7 +262,11 @@ async def reextract_metadata(
     Only fills empty fields (subject_tags, summary, headnote, key_quote,
     appeal_subtype, and case_name when it equals the citation). User
     values are preserved.
+
+    **MCP-tool-only path** — same constraint as :func:`reextract_halachot`.
     """
+    from legal_mcp.services import precedent_metadata_extractor
+
     progress = progress or _noop_progress
     if isinstance(case_law_id, str):
         case_law_id = UUID(case_law_id)
diff --git a/web-ui/src/components/precedents/library-list-panel.tsx b/web-ui/src/components/precedents/library-list-panel.tsx
index 6b17140..cfe6251 100644
--- a/web-ui/src/components/precedents/library-list-panel.tsx
+++ b/web-ui/src/components/precedents/library-list-panel.tsx
@@ -1,7 +1,7 @@
 "use client";
 
 import { useState } from "react";
-import { Trash2, Plus, RefreshCw, Pencil } from "lucide-react";
+import { Trash2, Plus, Pencil } from "lucide-react";
 import { toast } from "sonner";
 import {
   Table, TableBody, TableCell, TableHead, TableHeader, TableRow,
@@ -16,7 +16,6 @@ import {
 import {
   usePrecedents,
   useDeletePrecedent,
-  useReExtractHalachot,
   type Precedent,
   type PracticeArea,
 } from "@/lib/api/precedent-library";
@@ -63,7 +62,6 @@ function PrecedentRow({
   onEdit: (id: string) => void;
 }) {
   const del = useDeletePrecedent();
-  const reExtract = useReExtractHalachot();
 
   const onDelete = async () => {
     if (!window.confirm(`למחוק את ${p.case_number}? cascade ימחק את ה-chunks וההלכות.`)) return;
@@ -75,15 +73,6 @@ function PrecedentRow({
     }
   };
 
-  const onReExtract = async () => {
-    try {
-      await reExtract.mutateAsync(p.id);
-      toast.success("חילוץ הלכות החל");
-    } catch (e) {
-      toast.error(e instanceof Error ? e.message : "שגיאה");
-    }
-  };
-
   return (
     <TableRow className="border-rule hover:bg-gold-wash/30">
       <TableCell className="font-semibold text-navy" dir="ltr">
@@ -119,15 +108,6 @@ function PrecedentRow({
           >
             <Pencil className="w-4 h-4" />
           </Button>
-          <Button
-            variant="ghost" size="sm" onClick={onReExtract}
-            disabled={reExtract.isPending}
-            aria-label="חלץ הלכות מחדש"
-            title="חלץ הלכות מחדש"
-            className="text-ink-muted hover:text-navy"
-          >
-            <RefreshCw className="w-4 h-4" />
-          </Button>
           <Button
             variant="ghost" size="sm" onClick={onDelete}
             disabled={del.isPending}
diff --git a/web-ui/src/components/precedents/precedent-edit-sheet.tsx b/web-ui/src/components/precedents/precedent-edit-sheet.tsx
index ed1290e..ed2f9af 100644
--- a/web-ui/src/components/precedents/precedent-edit-sheet.tsx
+++ b/web-ui/src/components/precedents/precedent-edit-sheet.tsx
@@ -1,7 +1,7 @@
 "use client";
 
 import { useEffect, useState } from "react";
-import { Save, Sparkles, Loader2 } from "lucide-react";
+import { Save } from "lucide-react";
 import { toast } from "sonner";
 import {
   Sheet, SheetContent, SheetHeader, SheetTitle, SheetDescription,
@@ -17,11 +17,9 @@ import {
 import {
   usePrecedent,
   useUpdatePrecedent,
-  useReExtractMetadata,
   type PracticeArea,
   type SourceType,
 } from "@/lib/api/precedent-library";
-import { useProgress } from "@/lib/api/documents";
 import {
   PRACTICE_AREAS, PRECEDENT_LEVELS, SOURCE_TYPES,
 } from "./practice-area";
@@ -61,11 +59,8 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) {
   const open = caseLawId !== null;
   const { data: record, isPending } = usePrecedent(caseLawId);
   const update = useUpdatePrecedent();
-  const reextractMeta = useReExtractMetadata();
 
   const [form, setForm] = useState<FormState>(EMPTY);
-  const [metadataTaskId, setMetadataTaskId] = useState<string | null>(null);
-  const metadataProgress = useProgress(metadataTaskId);
 
   // Hydrate form when the record loads.
   useEffect(() => {
@@ -88,17 +83,6 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) {
     });
   }, [record]);
 
-  // Auto-close metadata progress on completion + refresh form
-  useEffect(() => {
-    if (metadataProgress?.status === "completed") {
-      toast.success("חילוץ מטא-דאטה הסתיים — השדות עודכנו");
-      setMetadataTaskId(null);
-    } else if (metadataProgress?.status === "failed") {
-      toast.error(`חילוץ מטא-דאטה נכשל: ${metadataProgress.error || ""}`);
-      setMetadataTaskId(null);
-    }
-  }, [metadataProgress]);
-
   const onSubmit = async (e: React.FormEvent) => {
     e.preventDefault();
     if (!caseLawId) return;
@@ -128,21 +112,6 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) {
     }
   };
 
-  const onTriggerMetadata = async () => {
-    if (!caseLawId) return;
-    try {
-      const res = await reextractMeta.mutateAsync(caseLawId);
-      setMetadataTaskId(res.task_id);
-      toast.message("מחלץ מטא-דאטה ברקע…");
-    } catch (err) {
-      toast.error(err instanceof Error ? err.message : "שגיאה");
-    }
-  };
-
-  const isMetaRunning = metadataTaskId !== null
-    && metadataProgress?.status !== "completed"
-    && metadataProgress?.status !== "failed";
-
   return (
     <Sheet open={open} onOpenChange={(o) => { if (!o) onOpenChange(false); }}>
       <SheetContent side="left" className="w-full sm:max-w-2xl overflow-y-auto" dir="rtl">
@@ -150,7 +119,9 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) {
           <SheetTitle className="text-navy">עריכת פרטי פסיקה</SheetTitle>
           <SheetDescription className="text-ink-muted">
             כל השדות ניתנים לעריכה חוץ ממראה המקום (מזהה ייחודי).
-            כפתור &quot;חלץ מטא-דאטה אוטומטית&quot; מנתח את הטקסט וממלא רק שדות ריקים.
+            לחילוץ מטא-דאטה אוטומטי או הלכות — להפעיל מ-Claude Code את
+            ה-MCP tools <code>precedent_extract_metadata</code> /{" "}
+            <code>precedent_extract_halachot</code>.
           </SheetDescription>
         </SheetHeader>
 
@@ -160,34 +131,13 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) {
           </div>
         ) : (
           <form onSubmit={onSubmit} className="px-6 pb-6 space-y-4 mt-4">
-            <div className="rounded-lg border border-rule bg-rule-soft/40 p-3 flex items-start gap-3">
-              <div className="flex-1">
-                <div className="text-[0.78rem] text-ink-muted">מראה מקום (לא ניתן לעריכה)</div>
-                <div className="text-navy font-mono text-sm break-all" dir="ltr">
-                  {record.case_number}
-                </div>
+            <div className="rounded-lg border border-rule bg-rule-soft/40 p-3">
+              <div className="text-[0.78rem] text-ink-muted">מראה מקום (לא ניתן לעריכה)</div>
+              <div className="text-navy font-mono text-sm break-all" dir="ltr">
+                {record.case_number}
               </div>
-              <Button
-                type="button" size="sm" variant="outline"
-                onClick={onTriggerMetadata}
-                disabled={isMetaRunning || reextractMeta.isPending}
-                className="shrink-0"
-              >
-                {isMetaRunning ? (
-                  <Loader2 className="w-3.5 h-3.5 me-1 animate-spin" />
-                ) : (
-                  <Sparkles className="w-3.5 h-3.5 me-1" />
-                )}
-                חלץ מטא-דאטה אוטומטית
-              </Button>
             </div>
 
-            {isMetaRunning && (metadataProgress as { step?: string } | null)?.step && (
-              <div className="text-[0.78rem] text-ink-muted">
-                {(metadataProgress as { step?: string }).step}
-              </div>
-            )}
-
             <div className="grid grid-cols-2 gap-3">
               <div className="space-y-1">
                 <Label htmlFor="case-name">שם קצר</Label>
diff --git a/web-ui/src/lib/api/precedent-library.ts b/web-ui/src/lib/api/precedent-library.ts
index b307195..61240b8 100644
--- a/web-ui/src/lib/api/precedent-library.ts
+++ b/web-ui/src/lib/api/precedent-library.ts
@@ -336,34 +336,12 @@ export function useUpdatePrecedent() {
   });
 }
 
-export function useReExtractHalachot() {
-  const qc = useQueryClient();
-  return useMutation({
-    mutationFn: (id: string) =>
-      apiRequest<{ task_id: string }>(
-        `/api/precedent-library/${encodeURIComponent(id)}/extract-halachot`,
-        { method: "POST" },
-      ),
-    onSuccess: (_, id) => {
-      qc.invalidateQueries({ queryKey: libraryKeys.detail(id) });
-    },
-  });
-}
-
-export function useReExtractMetadata() {
-  const qc = useQueryClient();
-  return useMutation({
-    mutationFn: (id: string) =>
-      apiRequest<{ task_id: string }>(
-        `/api/precedent-library/${encodeURIComponent(id)}/extract-metadata`,
-        { method: "POST" },
-      ),
-    onSuccess: (_, id) => {
-      qc.invalidateQueries({ queryKey: libraryKeys.detail(id) });
-      qc.invalidateQueries({ queryKey: libraryKeys.all });
-    },
-  });
-}
+// Halacha + metadata extraction are not exposed as HTTP mutations because
+// they call the local `claude` CLI through the MCP server — see the rule
+// in mcp-server/src/legal_mcp/services/claude_session.py. The chair
+// triggers them from Claude Code via:
+//   mcp__legal-ai__precedent_extract_halachot <case_law_id>
+//   mcp__legal-ai__precedent_extract_metadata <case_law_id>
 
 export function useHalachotPending(limit = 200) {
   return useQuery({
diff --git a/web/app.py b/web/app.py
index c343f06..6f28484 100644
--- a/web/app.py
+++ b/web/app.py
@@ -3748,66 +3748,12 @@ async def precedent_library_delete(case_law_id: str):
     return {"deleted": True, "case_law_id": case_law_id}
 
 
-@app.post("/api/precedent-library/{case_law_id}/extract-halachot")
-async def precedent_library_reextract(case_law_id: str):
-    """Re-run halacha extraction in background. Returns a task_id for SSE."""
-    try:
-        cid = UUID(case_law_id)
-    except ValueError:
-        raise HTTPException(400, "case_law_id לא תקין")
-    record = await db.get_case_law(cid)
-    if not record:
-        raise HTTPException(404, "פסיקה לא נמצאה")
-
-    task_id = str(uuid4())
-    label = record.get("case_number") or case_law_id
-    await _progress.set(task_id, {
-        "status": "queued", "filename": label, "stage": "queued", "percent": 0,
-    })
-    publish = _make_progress_publisher(task_id, label)
-
-    async def _run():
-        try:
-            await plib_service.reextract_halachot(cid, progress=publish)
-        except Exception as e:
-            logger.exception("re-extract halachot failed")
-            await _progress.set(task_id, {
-                "status": "failed", "error": str(e), "filename": label,
-            })
-
-    asyncio.create_task(_run())
-    return {"task_id": task_id}
-
-
-@app.post("/api/precedent-library/{case_law_id}/extract-metadata")
-async def precedent_library_extract_metadata(case_law_id: str):
-    """Re-run metadata extraction in background. Fills empty fields only."""
-    try:
-        cid = UUID(case_law_id)
-    except ValueError:
-        raise HTTPException(400, "case_law_id לא תקין")
-    record = await db.get_case_law(cid)
-    if not record:
-        raise HTTPException(404, "פסיקה לא נמצאה")
-
-    task_id = str(uuid4())
-    label = record.get("case_number") or case_law_id
-    await _progress.set(task_id, {
-        "status": "queued", "filename": label, "stage": "queued", "percent": 0,
-    })
-    publish = _make_progress_publisher(task_id, label)
-
-    async def _run():
-        try:
-            await plib_service.reextract_metadata(cid, progress=publish)
-        except Exception as e:
-            logger.exception("re-extract metadata failed")
-            await _progress.set(task_id, {
-                "status": "failed", "error": str(e), "filename": label,
-            })
-
-    asyncio.create_task(_run())
-    return {"task_id": task_id}
+# Halacha and metadata extraction are LLM-driven and rely on the local
+# `claude` CLI via mcp-server/services/claude_session.py — they CANNOT run
+# from this container (no CLI, no claude.ai session). They are exposed as
+# MCP tools (`precedent_extract_halachot`, `precedent_extract_metadata`)
+# and triggered from local Claude Code, not via HTTP. See
+# services/claude_session.py for the architectural rule.
 
 
 @app.get("/api/halachot")