fix(mcp): eliminate "No such tool available" race at agent wakeup

When Paperclip wakes the CEO and the model issues an mcp__legal-ai__* call within ~10s of session init, Claude Code sometimes returns "No such tool available" because the legal-ai MCP server hasn't finished bringing up its tool catalog yet. Observed twice today on CMPA precedent-extraction wakeups (sessions 9989fbaf and a9c61801); the agent fell back to bash + .venv/bin/python and finished the work, but the race needed fixing on the server side. Three changes that close the window: 1. Lazy schema init (services/db.py + server.py) `init_schema()` was awaited inside the FastMCP lifespan, blocking the `initialize`/`tools/list` handshake until ~10 CREATE TABLE IF NOT EXISTS statements ran. Under contention (two CEOs waking at once for different companies) this stretched. Now the lifespan returns immediately and `get_pool()` runs the schema migrations exactly once on first DB access, guarded by an asyncio.Lock. tools/list is answered in milliseconds regardless of DB state. 2. Lazy heavy imports - services/embeddings.py: voyageai (~450ms) loaded only inside _get_client() - services/extractor.py: google.cloud.vision (~550ms) loaded only inside _get_vision_client() and _ocr_with_google_vision() These two were being imported at module top from legal_mcp.tools.documents -> services.processor -> services.{ extractor,embeddings}, so the FastMCP server couldn't even start responding until both finished. Cold start dropped from 2.7s to 1.17s end-to-end (init + tools/list response). 3. Agent-side warmup + retry guidance (.claude/agents/legal-ceo.md) Even with a fast server, the model can still race on the very first call. The precedent-extraction section now tells the CEO to call workflow_status as a warmup probe and to retry after a short sleep if it sees "No such tool available", before falling back to the python bypass. Also expanded the precedent-tool whitelists on the sub-agents that delegate halacha/library work (commits 4a9a6b7 + 7ee90dc added the tools to the MCP server but only the CEO got them in its allowed list). Added to: legal-researcher (full extraction set), legal-analyst (library_get/list + halacha review), legal-writer (library lookups + halacha_review), legal-qa (library_get + halacha_review), and the two that the CEO was already missing (halacha_review, halachot_pending). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 20:23:14 +00:00
parent d12cdb1fad
commit d4496b96f1
9 changed files with 93 additions and 31 deletions
--- a/mcp-server/src/legal_mcp/server.py
+++ b/mcp-server/src/legal_mcp/server.py
@@ -23,12 +23,17 @@ logger = logging.getLogger("legal_mcp")

@asynccontextmanager
 async def lifespan(server: FastMCP) -> AsyncIterator[None]:
-    """Initialize DB schema on startup, close pool on shutdown."""
-    from legal_mcp.services.db import close_pool, init_schema
+    """Server startup is now non-blocking.

-    logger.info("Initializing database schema...")
-    await init_schema()
-    logger.info("Ezer Mishpati MCP server ready")
+    Schema init was moved out of the lifespan to fix a race where Claude Code
+    would call a tool before `tools/list` had been answered — manifesting as
+    "No such tool available". Lifespan now returns immediately so the MCP
+    handshake completes in milliseconds; the schema is initialized lazily on
+    the first DB access via services/db.get_pool().
+    """
+    from legal_mcp.services.db import close_pool
+
+    logger.info("Ezer Mishpati MCP server ready (schema init deferred)")
    try:
        yield
    finally:
--- a/mcp-server/src/legal_mcp/services/db.py
+++ b/mcp-server/src/legal_mcp/services/db.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import asyncio
 import json
 import logging
 from datetime import date
@@ -15,23 +16,42 @@ from legal_mcp import config
 logger = logging.getLogger(__name__)

 _pool: asyncpg.Pool | None = None
+_schema_ready: bool = False
+_init_lock: asyncio.Lock = asyncio.Lock()


 async def get_pool() -> asyncpg.Pool:
-    global _pool
-    if _pool is None:
-        # First, ensure pgvector extension exists (before registering type codec)
-        conn = await asyncpg.connect(config.POSTGRES_URL)
-        await conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
-        await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')
-        await conn.close()
+    """Return the connection pool, creating it (and running schema init) lazily.
+
+    The MCP server's `lifespan` no longer blocks on schema init — it's done
+    here on first DB access. This keeps the `initialize`/`tools/list` MCP
+    handshake immediate so Claude Code never sees a stale "No such tool".
+    """
+    global _pool, _schema_ready
+    if _pool is not None and _schema_ready:
+        return _pool
+
+    async with _init_lock:
+        if _pool is None:
+            # First, ensure pgvector extension exists (before registering type codec)
+            conn = await asyncpg.connect(config.POSTGRES_URL)
+            try:
+                await conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
+                await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')
+            finally:
+                await conn.close()
+
+            _pool = await asyncpg.create_pool(
+                config.POSTGRES_URL,
+                min_size=2,
+                max_size=10,
+                init=_init_connection,
+            )
+
+        if not _schema_ready:
+            await _run_schema_migrations(_pool)
+            _schema_ready = True

-        _pool = await asyncpg.create_pool(
-            config.POSTGRES_URL,
-            min_size=2,
-            max_size=10,
-            init=_init_connection,
-        )
    return _pool


@@ -671,8 +691,7 @@ CREATE INDEX IF NOT EXISTS idx_prec_img_emb_case_law
 """


-async def init_schema() -> None:
-    pool = await get_pool()
+async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
    async with pool.acquire() as conn:
        await conn.execute(SCHEMA_SQL)
        await conn.execute(MIGRATIONS_SQL)
@@ -687,6 +706,11 @@ async def init_schema() -> None:
    logger.info("Database schema initialized (v1-v9)")


+async def init_schema() -> None:
+    """Backward-compatible wrapper. Schema init now runs lazily inside get_pool()."""
+    await get_pool()
+
+
 # ── Case CRUD ───────────────────────────────────────────────────────

 async def create_case(
--- a/mcp-server/src/legal_mcp/services/embeddings.py
+++ b/mcp-server/src/legal_mcp/services/embeddings.py
@@ -5,16 +5,18 @@ from __future__ import annotations
 import logging
 from typing import TYPE_CHECKING

-import voyageai
-
 from legal_mcp import config

 if TYPE_CHECKING:
+    import voyageai
    from PIL import Image as PILImage

 logger = logging.getLogger(__name__)

-_client: voyageai.Client | None = None
+# voyageai is imported lazily inside _get_client to keep MCP server startup
+# fast — loading voyageai eagerly costs ~450ms and Claude Code's first tool
+# call can hit a "No such tool available" race if the server isn't ready yet.
+_client: "voyageai.Client | None" = None

 # Per-call cap for multimodal_embed. POC ran 89 pages (~312K tokens)
 # in a single call comfortably; 50 leaves safe headroom for densely-
@@ -22,9 +24,10 @@ _client: voyageai.Client | None = None
 _MULTIMODAL_BATCH_SIZE = 50


-def _get_client() -> voyageai.Client:
+def _get_client() -> "voyageai.Client":
    global _client
    if _client is None:
+        import voyageai
        _client = voyageai.Client(api_key=config.VOYAGE_API_KEY)
    return _client

--- a/mcp-server/src/legal_mcp/services/extractor.py
+++ b/mcp-server/src/legal_mcp/services/extractor.py
@@ -15,25 +15,29 @@ import re
 import subprocess
 import tempfile
 from pathlib import Path
+from typing import TYPE_CHECKING

 import fitz  # PyMuPDF
 from PIL import Image
 from docx import Document as DocxDocument
-from google.cloud import vision
 from striprtf.striprtf import rtf_to_text

 from legal_mcp import config

+if TYPE_CHECKING:
+    from google.cloud import vision
+
 logger = logging.getLogger(__name__)

-# ── Google Cloud Vision client ───────────────────────────────────
+# ── Google Cloud Vision client (imported lazily — saves ~550ms at MCP startup) ──

-_vision_client: vision.ImageAnnotatorClient | None = None
+_vision_client: "vision.ImageAnnotatorClient | None" = None


-def _get_vision_client() -> vision.ImageAnnotatorClient:
+def _get_vision_client() -> "vision.ImageAnnotatorClient":
    global _vision_client
    if _vision_client is None:
+        from google.cloud import vision
        _vision_client = vision.ImageAnnotatorClient(
            client_options={"api_key": config.GOOGLE_CLOUD_VISION_API_KEY}
        )
@@ -223,6 +227,7 @@ def page_at_offset(offset: int, page_offsets: list[int]) -> int:

 def _ocr_with_google_vision(image_bytes: bytes, page_num: int) -> str:
    """OCR a single page image using Google Cloud Vision API."""
+    from google.cloud import vision  # lazy: keeps MCP startup fast
    client = _get_vision_client()
    image = vision.Image(content=image_bytes)