From d4496b96f1811200b514b63faf90d45a2929256b Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 3 May 2026 20:23:14 +0000 Subject: [PATCH] fix(mcp): eliminate "No such tool available" race at agent wakeup When Paperclip wakes the CEO and the model issues an mcp__legal-ai__* call within ~10s of session init, Claude Code sometimes returns "No such tool available" because the legal-ai MCP server hasn't finished bringing up its tool catalog yet. Observed twice today on CMPA precedent-extraction wakeups (sessions 9989fbaf and a9c61801); the agent fell back to bash + .venv/bin/python and finished the work, but the race needed fixing on the server side. Three changes that close the window: 1. Lazy schema init (services/db.py + server.py) `init_schema()` was awaited inside the FastMCP lifespan, blocking the `initialize`/`tools/list` handshake until ~10 CREATE TABLE IF NOT EXISTS statements ran. Under contention (two CEOs waking at once for different companies) this stretched. Now the lifespan returns immediately and `get_pool()` runs the schema migrations exactly once on first DB access, guarded by an asyncio.Lock. tools/list is answered in milliseconds regardless of DB state. 2. Lazy heavy imports - services/embeddings.py: voyageai (~450ms) loaded only inside _get_client() - services/extractor.py: google.cloud.vision (~550ms) loaded only inside _get_vision_client() and _ocr_with_google_vision() These two were being imported at module top from legal_mcp.tools.documents -> services.processor -> services.{ extractor,embeddings}, so the FastMCP server couldn't even start responding until both finished. Cold start dropped from 2.7s to 1.17s end-to-end (init + tools/list response). 3. Agent-side warmup + retry guidance (.claude/agents/legal-ceo.md) Even with a fast server, the model can still race on the very first call. The precedent-extraction section now tells the CEO to call workflow_status as a warmup probe and to retry after a short sleep if it sees "No such tool available", before falling back to the python bypass. Also expanded the precedent-tool whitelists on the sub-agents that delegate halacha/library work (commits 4a9a6b7 + 7ee90dc added the tools to the MCP server but only the CEO got them in its allowed list). Added to: legal-researcher (full extraction set), legal-analyst (library_get/list + halacha review), legal-writer (library lookups + halacha_review), legal-qa (library_get + halacha_review), and the two that the CEO was already missing (halacha_review, halachot_pending). Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/agents/legal-analyst.md | 4 ++ .claude/agents/legal-ceo.md | 15 ++++-- .claude/agents/legal-qa.md | 2 + .claude/agents/legal-researcher.md | 7 +++ .claude/agents/legal-writer.md | 3 ++ mcp-server/src/legal_mcp/server.py | 15 ++++-- mcp-server/src/legal_mcp/services/db.py | 54 +++++++++++++------ .../src/legal_mcp/services/embeddings.py | 11 ++-- .../src/legal_mcp/services/extractor.py | 13 +++-- 9 files changed, 93 insertions(+), 31 deletions(-) diff --git a/.claude/agents/legal-analyst.md b/.claude/agents/legal-analyst.md index 22231ae..90df935 100644 --- a/.claude/agents/legal-analyst.md +++ b/.claude/agents/legal-analyst.md @@ -19,6 +19,10 @@ tools: - mcp__legal-ai__search_case_documents - mcp__legal-ai__search_decisions - mcp__legal-ai__search_precedent_library + - mcp__legal-ai__precedent_library_get + - mcp__legal-ai__precedent_library_list + - mcp__legal-ai__halacha_review + - mcp__legal-ai__halachot_pending - mcp__legal-ai__find_similar_cases - mcp__legal-ai__workflow_status - mcp__legal-ai__processing_status diff --git a/.claude/agents/legal-ceo.md b/.claude/agents/legal-ceo.md index 6ca95a6..4c5d796 100644 --- a/.claude/agents/legal-ceo.md +++ b/.claude/agents/legal-ceo.md @@ -34,6 +34,8 @@ tools: - mcp__legal-ai__precedent_extract_metadata - mcp__legal-ai__precedent_library_get - mcp__legal-ai__precedent_library_list + - mcp__legal-ai__halacha_review + - mcp__legal-ai__halachot_pending --- # עוזר משפטי — מנהל תהליך כתיבת החלטות @@ -164,16 +166,23 @@ Paperclip חוסם אוטומטית כל issue ב-`in_progress` שאין לו ru מופעל כשפסק דין חדש מועלה לספרייה. ה-issue נמצא בפרויקט "ספריית פסיקה — תור חילוץ" ומשויך אליך. +**⚠️ MCP startup race — חובה לקרוא לפני הקריאה הראשונה!** +ה-MCP server של legal-ai לוקח ~3-10 שניות לעלות בעת wakeup חדש (Python imports). אם הקריאה הראשונה ל-`mcp__legal-ai__*` תחזיר `"No such tool available"` — זה race, **לא bug אמיתי**. הפעולה הנכונה: +1. הרץ `Bash sleep 5` — תן ל-MCP server להתייצב. +2. נסה שוב את אותו כלי MCP. +3. אם עדיין נכשל אחרי 2 retries — fallback ל-Python ישיר (`Bash` עם `.venv/bin/python -c "from legal_mcp.tools.precedent_library import ..."`). + **מה לעשות:** 1. קרא את ה-description של ה-issue — מצוין שם `case_law_id` וה-citation. -2. הרץ פעמיים: +2. **warmup**: קרא קודם `mcp__legal-ai__workflow_status(case_number="warmup")` (כלי קל שמאלץ MCP להתחבר). אם נכשל ב-"No such tool available" → `Bash sleep 5` ואז retry. רק אחרי שזה עובד, המשך: +3. הרץ פעמיים: ``` mcp__legal-ai__precedent_process_pending(kind="metadata") mcp__legal-ai__precedent_process_pending(kind="halacha") ``` הכלי מעבד את **כל** הפסיקות שבתור — אם תוקיע אחת והגיעו עוד בינתיים, גם הן יעובדו. -3. כשמסתיים: כתוב comment קצר ב-issue (`mcp__legal-ai__precedent_process_pending` מחזיר את התוצאה — סכם בעברית: כמה הלכות חולצו, אילו שדות מטא-דאטה הושלמו, ו-status לכל פסיקה). -4. סמן את ה-issue כ-`done`. +4. כשמסתיים: כתוב comment קצר ב-issue (`mcp__legal-ai__precedent_process_pending` מחזיר את התוצאה — סכם בעברית: כמה הלכות חולצו, אילו שדות מטא-דאטה הושלמו, ו-status לכל פסיקה). +5. סמן את ה-issue כ-`done`. **אל**: אל תיצור issues של ביצוע בתיקי ערר, אל תיכנס לתהליך כתיבת החלטה — זו רק עבודת תחזוקה של ספריית הפסיקה. diff --git a/.claude/agents/legal-qa.md b/.claude/agents/legal-qa.md index 5cd8fda..809ff71 100644 --- a/.claude/agents/legal-qa.md +++ b/.claude/agents/legal-qa.md @@ -15,6 +15,8 @@ tools: - mcp__legal-ai__workflow_status - mcp__legal-ai__search_case_documents - mcp__legal-ai__search_precedent_library + - mcp__legal-ai__precedent_library_get + - mcp__legal-ai__halacha_review --- # בודק איכות — סוכן QA להחלטות ועדת ערר diff --git a/.claude/agents/legal-researcher.md b/.claude/agents/legal-researcher.md index 6d6933e..c35ec05 100644 --- a/.claude/agents/legal-researcher.md +++ b/.claude/agents/legal-researcher.md @@ -20,6 +20,13 @@ tools: - mcp__legal-ai__precedent_list - mcp__legal-ai__precedent_search_library - mcp__legal-ai__search_precedent_library + - mcp__legal-ai__precedent_library_get + - mcp__legal-ai__precedent_library_list + - mcp__legal-ai__precedent_extract_halachot + - mcp__legal-ai__precedent_extract_metadata + - mcp__legal-ai__precedent_process_pending + - mcp__legal-ai__halacha_review + - mcp__legal-ai__halachot_pending - mcp__legal-ai__workflow_status --- diff --git a/.claude/agents/legal-writer.md b/.claude/agents/legal-writer.md index 07741c0..2bec767 100644 --- a/.claude/agents/legal-writer.md +++ b/.claude/agents/legal-writer.md @@ -20,6 +20,9 @@ tools: - mcp__legal-ai__write_block - mcp__legal-ai__search_decisions - mcp__legal-ai__search_precedent_library + - mcp__legal-ai__precedent_library_get + - mcp__legal-ai__precedent_library_list + - mcp__legal-ai__halacha_review - mcp__legal-ai__search_case_documents - mcp__legal-ai__get_style_guide - mcp__legal-ai__workflow_status diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index fd197e4..998bcc4 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -23,12 +23,17 @@ logger = logging.getLogger("legal_mcp") @asynccontextmanager async def lifespan(server: FastMCP) -> AsyncIterator[None]: - """Initialize DB schema on startup, close pool on shutdown.""" - from legal_mcp.services.db import close_pool, init_schema + """Server startup is now non-blocking. - logger.info("Initializing database schema...") - await init_schema() - logger.info("Ezer Mishpati MCP server ready") + Schema init was moved out of the lifespan to fix a race where Claude Code + would call a tool before `tools/list` had been answered — manifesting as + "No such tool available". Lifespan now returns immediately so the MCP + handshake completes in milliseconds; the schema is initialized lazily on + the first DB access via services/db.get_pool(). + """ + from legal_mcp.services.db import close_pool + + logger.info("Ezer Mishpati MCP server ready (schema init deferred)") try: yield finally: diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index e9e85fd..4d8acde 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import json import logging from datetime import date @@ -15,23 +16,42 @@ from legal_mcp import config logger = logging.getLogger(__name__) _pool: asyncpg.Pool | None = None +_schema_ready: bool = False +_init_lock: asyncio.Lock = asyncio.Lock() async def get_pool() -> asyncpg.Pool: - global _pool - if _pool is None: - # First, ensure pgvector extension exists (before registering type codec) - conn = await asyncpg.connect(config.POSTGRES_URL) - await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') - await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"') - await conn.close() + """Return the connection pool, creating it (and running schema init) lazily. + + The MCP server's `lifespan` no longer blocks on schema init — it's done + here on first DB access. This keeps the `initialize`/`tools/list` MCP + handshake immediate so Claude Code never sees a stale "No such tool". + """ + global _pool, _schema_ready + if _pool is not None and _schema_ready: + return _pool + + async with _init_lock: + if _pool is None: + # First, ensure pgvector extension exists (before registering type codec) + conn = await asyncpg.connect(config.POSTGRES_URL) + try: + await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') + await conn.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"') + finally: + await conn.close() + + _pool = await asyncpg.create_pool( + config.POSTGRES_URL, + min_size=2, + max_size=10, + init=_init_connection, + ) + + if not _schema_ready: + await _run_schema_migrations(_pool) + _schema_ready = True - _pool = await asyncpg.create_pool( - config.POSTGRES_URL, - min_size=2, - max_size=10, - init=_init_connection, - ) return _pool @@ -671,8 +691,7 @@ CREATE INDEX IF NOT EXISTS idx_prec_img_emb_case_law """ -async def init_schema() -> None: - pool = await get_pool() +async def _run_schema_migrations(pool: asyncpg.Pool) -> None: async with pool.acquire() as conn: await conn.execute(SCHEMA_SQL) await conn.execute(MIGRATIONS_SQL) @@ -687,6 +706,11 @@ async def init_schema() -> None: logger.info("Database schema initialized (v1-v9)") +async def init_schema() -> None: + """Backward-compatible wrapper. Schema init now runs lazily inside get_pool().""" + await get_pool() + + # ── Case CRUD ─────────────────────────────────────────────────────── async def create_case( diff --git a/mcp-server/src/legal_mcp/services/embeddings.py b/mcp-server/src/legal_mcp/services/embeddings.py index 4a6aa1c..10b4b23 100644 --- a/mcp-server/src/legal_mcp/services/embeddings.py +++ b/mcp-server/src/legal_mcp/services/embeddings.py @@ -5,16 +5,18 @@ from __future__ import annotations import logging from typing import TYPE_CHECKING -import voyageai - from legal_mcp import config if TYPE_CHECKING: + import voyageai from PIL import Image as PILImage logger = logging.getLogger(__name__) -_client: voyageai.Client | None = None +# voyageai is imported lazily inside _get_client to keep MCP server startup +# fast — loading voyageai eagerly costs ~450ms and Claude Code's first tool +# call can hit a "No such tool available" race if the server isn't ready yet. +_client: "voyageai.Client | None" = None # Per-call cap for multimodal_embed. POC ran 89 pages (~312K tokens) # in a single call comfortably; 50 leaves safe headroom for densely- @@ -22,9 +24,10 @@ _client: voyageai.Client | None = None _MULTIMODAL_BATCH_SIZE = 50 -def _get_client() -> voyageai.Client: +def _get_client() -> "voyageai.Client": global _client if _client is None: + import voyageai _client = voyageai.Client(api_key=config.VOYAGE_API_KEY) return _client diff --git a/mcp-server/src/legal_mcp/services/extractor.py b/mcp-server/src/legal_mcp/services/extractor.py index 4309537..18d36e0 100644 --- a/mcp-server/src/legal_mcp/services/extractor.py +++ b/mcp-server/src/legal_mcp/services/extractor.py @@ -15,25 +15,29 @@ import re import subprocess import tempfile from pathlib import Path +from typing import TYPE_CHECKING import fitz # PyMuPDF from PIL import Image from docx import Document as DocxDocument -from google.cloud import vision from striprtf.striprtf import rtf_to_text from legal_mcp import config +if TYPE_CHECKING: + from google.cloud import vision + logger = logging.getLogger(__name__) -# ── Google Cloud Vision client ─────────────────────────────────── +# ── Google Cloud Vision client (imported lazily — saves ~550ms at MCP startup) ── -_vision_client: vision.ImageAnnotatorClient | None = None +_vision_client: "vision.ImageAnnotatorClient | None" = None -def _get_vision_client() -> vision.ImageAnnotatorClient: +def _get_vision_client() -> "vision.ImageAnnotatorClient": global _vision_client if _vision_client is None: + from google.cloud import vision _vision_client = vision.ImageAnnotatorClient( client_options={"api_key": config.GOOGLE_CLOUD_VISION_API_KEY} ) @@ -223,6 +227,7 @@ def page_at_offset(offset: int, page_offsets: list[int]) -> int: def _ocr_with_google_vision(image_bytes: bytes, page_num: int) -> str: """OCR a single page image using Google Cloud Vision API.""" + from google.cloud import vision # lazy: keeps MCP startup fast client = _get_vision_client() image = vision.Image(content=image_bytes)