From 4a9a6b7970b64f115ac604d88929112463ec6a0f Mon Sep 17 00:00:00 2001 From: Chaim Date: Sun, 3 May 2026 12:32:25 +0000 Subject: [PATCH] feat(precedents): UI button queues extraction for local MCP worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chair wanted a one-click "extract metadata" button on the edit sheet. The constraint stays the same — claude_session needs the local CLI which the container doesn't have, so the button can't run the extractor itself. Compromise: button stamps a queue marker; the local MCP server drains the queue on demand. DB (V8): two nullable timestamps on case_law, metadata_extraction_requested_at and halacha_extraction_requested_at, with partial indexes for cheap "find pending" scans. API: POST /api/precedent-library/{id}/request-metadata → stamp the row POST /api/precedent-library/{id}/request-halachot → same for halacha GET /api/precedent-library/queue/pending?kind=... → read-only view UI: Sparkles button in the edit sheet header. Click → toast tells the chair what to run from Claude Code. The button never triggers the extractor directly from the container. MCP tool: precedent_process_pending(kind, limit) — runs from Claude Code with the local CLI, picks up everything stamped, calls the extractor for each, clears the timestamp on success. Failures keep the timestamp so the next invocation retries them. Architectural rule (claude_session local-only) is preserved end-to-end and called out in the new endpoint comment + tool docstring. Co-Authored-By: Claude Opus 4.7 (1M context) --- mcp-server/src/legal_mcp/server.py | 8 +- mcp-server/src/legal_mcp/services/db.py | 99 ++++++++++++++++++- .../legal_mcp/services/precedent_library.py | 59 +++++++++++ .../src/legal_mcp/tools/precedent_library.py | 19 +++- .../precedents/precedent-edit-sheet.tsx | 42 ++++++-- web-ui/src/lib/api/precedent-library.ts | 41 ++++++-- web/app.py | 46 ++++++++- 7 files changed, 293 insertions(+), 21 deletions(-) diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index aaaf101..fd197e4 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -212,10 +212,16 @@ async def precedent_extract_halachot(case_law_id: str) -> str: @mcp.tool() async def precedent_extract_metadata(case_law_id: str) -> str: - """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype) מהטקסט. ממלא רק שדות ריקים.""" + """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype, date, level, court, source_type) מהטקסט. ממלא רק שדות ריקים.""" return await plib.precedent_extract_metadata(case_law_id) +@mcp.tool() +async def precedent_process_pending(kind: str = "metadata", limit: int = 20) -> str: + """ריקון תור בקשות חילוץ שנשלחו מ-UI. kind: 'metadata' או 'halacha'. מריץ extractor מקומית עם CLI על כל פריט בתור, ומנקה את הסימון אחרי הצלחה.""" + return await plib.precedent_process_pending(kind, limit) + + @mcp.tool() async def search_precedent_library( query: str, diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 9457fde..26e66e8 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -603,6 +603,26 @@ CREATE INDEX IF NOT EXISTS idx_halachot_vec """ +# ── V8: Extraction request queue ───────────────────────────────── +# Web UI buttons ("Sparkles" = request metadata extraction; "Refresh" = +# request halacha extraction) run inside the FastAPI container, which has +# no `claude` CLI. They can't run the LLM extractor directly. Instead they +# stamp a request timestamp here, and the chair (or me) runs the MCP tool +# `precedent_process_pending_extractions` from local Claude Code, where the +# CLI is available, to drain the queue. See claude_session.py for the rule. + +SCHEMA_V8_SQL = """ +ALTER TABLE case_law ADD COLUMN IF NOT EXISTS metadata_extraction_requested_at TIMESTAMPTZ; +ALTER TABLE case_law ADD COLUMN IF NOT EXISTS halacha_extraction_requested_at TIMESTAMPTZ; +CREATE INDEX IF NOT EXISTS idx_case_law_metadata_requested + ON case_law(metadata_extraction_requested_at) + WHERE metadata_extraction_requested_at IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_case_law_halacha_requested + ON case_law(halacha_extraction_requested_at) + WHERE halacha_extraction_requested_at IS NOT NULL; +""" + + async def init_schema() -> None: pool = await get_pool() async with pool.acquire() as conn: @@ -614,7 +634,8 @@ async def init_schema() -> None: await conn.execute(SCHEMA_V5_SQL) await conn.execute(SCHEMA_V6_SQL) await conn.execute(SCHEMA_V7_SQL) - logger.info("Database schema initialized (v1-v7)") + await conn.execute(SCHEMA_V8_SQL) + logger.info("Database schema initialized (v1-v8)") # ── Case CRUD ─────────────────────────────────────────────────────── @@ -2191,3 +2212,79 @@ async def precedent_library_stats() -> dict: "halachot_pending": int(halachot_pending or 0), "halachot_approved": int(halachot_approved or 0), } + + +# ── V8: extraction request queue helpers ───────────────────────── + + +async def request_metadata_extraction(case_law_id: UUID) -> bool: + """Stamp ``metadata_extraction_requested_at`` for the local MCP worker + to pick up. Returns False if the row is missing.""" + pool = await get_pool() + result = await pool.execute( + "UPDATE case_law SET metadata_extraction_requested_at = now() " + "WHERE id = $1 AND source_kind = 'external_upload'", + case_law_id, + ) + return result == "UPDATE 1" + + +async def request_halacha_extraction(case_law_id: UUID) -> bool: + """Same but for halacha extraction.""" + pool = await get_pool() + result = await pool.execute( + "UPDATE case_law SET halacha_extraction_requested_at = now() " + "WHERE id = $1 AND source_kind = 'external_upload'", + case_law_id, + ) + return result == "UPDATE 1" + + +async def list_pending_extraction_requests( + kind: str = "metadata", # 'metadata' | 'halacha' + limit: int = 20, +) -> list[dict]: + """Return rows requesting extraction, oldest request first. + + The MCP worker drains the queue in order: process → clear timestamp. + """ + col = ( + "metadata_extraction_requested_at" + if kind == "metadata" + else "halacha_extraction_requested_at" + ) + pool = await get_pool() + rows = await pool.fetch( + f"""SELECT id, case_number, case_name, court, date, + practice_area, is_binding, {col} AS requested_at + FROM case_law + WHERE {col} IS NOT NULL + AND source_kind = 'external_upload' + ORDER BY {col} ASC + LIMIT $1""", + limit, + ) + out = [] + for r in rows: + d = dict(r) + if d.get("date") is not None: + d["date"] = d["date"].isoformat() + if d.get("requested_at") is not None: + d["requested_at"] = d["requested_at"].isoformat() + out.append(d) + return out + + +async def clear_extraction_request( + case_law_id: UUID, kind: str = "metadata", +) -> None: + col = ( + "metadata_extraction_requested_at" + if kind == "metadata" + else "halacha_extraction_requested_at" + ) + pool = await get_pool() + await pool.execute( + f"UPDATE case_law SET {col} = NULL WHERE id = $1", + case_law_id, + ) diff --git a/mcp-server/src/legal_mcp/services/precedent_library.py b/mcp-server/src/legal_mcp/services/precedent_library.py index 8400952..1a1776b 100644 --- a/mcp-server/src/legal_mcp/services/precedent_library.py +++ b/mcp-server/src/legal_mcp/services/precedent_library.py @@ -253,6 +253,65 @@ async def reextract_halachot( return result +async def process_pending_extractions(kind: str = "metadata", limit: int = 20) -> dict: + """Drain the extraction queue (UI-button-stamped requests). + + The button in the web UI cannot run claude_session itself (it lives in + the container, no CLI). It just stamps ``metadata_extraction_requested_at`` + on the row. This function — called from local Claude Code via the MCP + tool — picks each stamped row up, runs the extractor, and clears the + timestamp. + + Args: + kind: 'metadata' or 'halacha'. + limit: max rows to process this run. + """ + from legal_mcp.services import halacha_extractor, precedent_metadata_extractor + + if kind not in {"metadata", "halacha"}: + raise ValueError("kind must be 'metadata' or 'halacha'") + + pending = await db.list_pending_extraction_requests(kind=kind, limit=limit) + if not pending: + return {"status": "no_pending", "kind": kind, "processed": 0, "results": []} + + results: list[dict] = [] + processed = 0 + for row in pending: + cid = UUID(str(row["id"])) + try: + if kind == "metadata": + result = await precedent_metadata_extractor.extract_and_apply(cid) + else: + result = await halacha_extractor.extract(cid) + await db.clear_extraction_request(cid, kind=kind) + processed += 1 + results.append({ + "case_law_id": str(cid), + "case_number": row.get("case_number", ""), + "status": result.get("status", "unknown"), + "fields": result.get("fields", []), + "stored": result.get("stored", 0), + }) + except Exception as e: + logger.exception("process_pending_extractions failed for %s: %s", cid, e) + results.append({ + "case_law_id": str(cid), + "case_number": row.get("case_number", ""), + "status": "failed", + "error": str(e), + }) + # Don't clear the request — it stays for the next run. + + return { + "status": "completed", + "kind": kind, + "processed": processed, + "total_pending": len(pending), + "results": results, + } + + async def reextract_metadata( case_law_id: UUID | str, progress: ProgressCb | None = None, diff --git a/mcp-server/src/legal_mcp/tools/precedent_library.py b/mcp-server/src/legal_mcp/tools/precedent_library.py index e704ef5..2b01b97 100644 --- a/mcp-server/src/legal_mcp/tools/precedent_library.py +++ b/mcp-server/src/legal_mcp/tools/precedent_library.py @@ -140,7 +140,7 @@ async def precedent_extract_halachot(case_law_id: str) -> str: async def precedent_extract_metadata(case_law_id: str) -> str: - """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype) מהטקסט. ממלא רק שדות ריקים — לא דורס מה שכבר הוזן.""" + """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype, date, level, court, source_type) מהטקסט. ממלא רק שדות ריקים — לא דורס מה שכבר הוזן.""" try: cid = UUID(case_law_id) except ValueError: @@ -152,6 +152,23 @@ async def precedent_extract_metadata(case_law_id: str) -> str: return _ok(result) +async def precedent_process_pending(kind: str = "metadata", limit: int = 20) -> str: + """ריקון תור בקשות חילוץ שנערמו ע"י כפתורי ה-UI. kind: 'metadata' או 'halacha'. + + הכפתור ב-UI מסמן ב-DB שהפסיקה מבקשת חילוץ. כלי זה (שרץ מקומית עם CLI) + סורק את התור ומריץ את ה-extractor לכל פריט. אחרי הצלחה הסימון מתנקה. + """ + if kind not in {"metadata", "halacha"}: + return _err("kind חייב להיות 'metadata' או 'halacha'") + try: + result = await precedent_library.process_pending_extractions( + kind=kind, limit=limit, + ) + except Exception as e: + return _err(str(e)) + return _ok(result) + + async def search_precedent_library( query: str, practice_area: str = "", diff --git a/web-ui/src/components/precedents/precedent-edit-sheet.tsx b/web-ui/src/components/precedents/precedent-edit-sheet.tsx index ed2f9af..eda34c6 100644 --- a/web-ui/src/components/precedents/precedent-edit-sheet.tsx +++ b/web-ui/src/components/precedents/precedent-edit-sheet.tsx @@ -1,7 +1,7 @@ "use client"; import { useEffect, useState } from "react"; -import { Save } from "lucide-react"; +import { Save, Sparkles } from "lucide-react"; import { toast } from "sonner"; import { Sheet, SheetContent, SheetHeader, SheetTitle, SheetDescription, @@ -17,6 +17,7 @@ import { import { usePrecedent, useUpdatePrecedent, + useRequestMetadataExtraction, type PracticeArea, type SourceType, } from "@/lib/api/precedent-library"; @@ -59,6 +60,7 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) { const open = caseLawId !== null; const { data: record, isPending } = usePrecedent(caseLawId); const update = useUpdatePrecedent(); + const requestMetadata = useRequestMetadataExtraction(); const [form, setForm] = useState(EMPTY); @@ -112,6 +114,18 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) { } }; + const onRequestMetadata = async () => { + if (!caseLawId) return; + try { + await requestMetadata.mutateAsync(caseLawId); + toast.success( + "סומן לחילוץ מטא-דאטה. הריצי מ-Claude Code: precedent_process_pending", + ); + } catch (err) { + toast.error(err instanceof Error ? err.message : "שגיאה"); + } + }; + return ( { if (!o) onOpenChange(false); }}> @@ -119,9 +133,9 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) { עריכת פרטי פסיקה כל השדות ניתנים לעריכה חוץ ממראה המקום (מזהה ייחודי). - לחילוץ מטא-דאטה אוטומטי או הלכות — להפעיל מ-Claude Code את - ה-MCP tools precedent_extract_metadata /{" "} - precedent_extract_halachot. + כפתור "חלץ מטא-דאטה" שולח בקשה לתור מקומי שאני מרוקן + מ-Claude Code (ה-LLM רץ מקומית עם claude session, + לא ב-API). @@ -131,11 +145,23 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) { ) : (
-
-
מראה מקום (לא ניתן לעריכה)
-
- {record.case_number} +
+
+
מראה מקום (לא ניתן לעריכה)
+
+ {record.case_number} +
+
diff --git a/web-ui/src/lib/api/precedent-library.ts b/web-ui/src/lib/api/precedent-library.ts index 61240b8..437820f 100644 --- a/web-ui/src/lib/api/precedent-library.ts +++ b/web-ui/src/lib/api/precedent-library.ts @@ -336,12 +336,41 @@ export function useUpdatePrecedent() { }); } -// Halacha + metadata extraction are not exposed as HTTP mutations because -// they call the local `claude` CLI through the MCP server — see the rule -// in mcp-server/src/legal_mcp/services/claude_session.py. The chair -// triggers them from Claude Code via: -// mcp__legal-ai__precedent_extract_halachot -// mcp__legal-ai__precedent_extract_metadata +/* Extraction can't run inside the container (no `claude` CLI). The + * "request" endpoints below stamp a queue marker in case_law; the chair + * (or me) drains the queue from Claude Code by invoking the MCP tool + * `precedent_process_pending`, which runs the actual extractor locally. + * See the rule in mcp-server/src/legal_mcp/services/claude_session.py. */ + +export function useRequestMetadataExtraction() { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (id: string) => + apiRequest<{ queued: boolean }>( + `/api/precedent-library/${encodeURIComponent(id)}/request-metadata`, + { method: "POST" }, + ), + onSuccess: (_, id) => { + qc.invalidateQueries({ queryKey: libraryKeys.detail(id) }); + qc.invalidateQueries({ queryKey: libraryKeys.all }); + }, + }); +} + +export function useRequestHalachotExtraction() { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (id: string) => + apiRequest<{ queued: boolean }>( + `/api/precedent-library/${encodeURIComponent(id)}/request-halachot`, + { method: "POST" }, + ), + onSuccess: (_, id) => { + qc.invalidateQueries({ queryKey: libraryKeys.detail(id) }); + qc.invalidateQueries({ queryKey: libraryKeys.all }); + }, + }); +} export function useHalachotPending(limit = 200) { return useQuery({ diff --git a/web/app.py b/web/app.py index 6f28484..43b66a8 100644 --- a/web/app.py +++ b/web/app.py @@ -3750,10 +3750,48 @@ async def precedent_library_delete(case_law_id: str): # Halacha and metadata extraction are LLM-driven and rely on the local # `claude` CLI via mcp-server/services/claude_session.py — they CANNOT run -# from this container (no CLI, no claude.ai session). They are exposed as -# MCP tools (`precedent_extract_halachot`, `precedent_extract_metadata`) -# and triggered from local Claude Code, not via HTTP. See -# services/claude_session.py for the architectural rule. +# from this container (no CLI, no claude.ai session). The endpoints below +# DON'T run extraction; they only stamp a request in the queue. The +# corresponding MCP tools (`precedent_process_pending_metadata`, +# `precedent_process_pending_halachot`), invoked from local Claude Code, +# drain the queue. + + +@app.post("/api/precedent-library/{case_law_id}/request-metadata") +async def precedent_request_metadata(case_law_id: str): + """Stamp the case_law row as needing metadata extraction. The local + MCP worker (`precedent_process_pending_metadata`) will pick it up.""" + try: + cid = UUID(case_law_id) + except ValueError: + raise HTTPException(400, "case_law_id לא תקין") + ok = await db.request_metadata_extraction(cid) + if not ok: + raise HTTPException(404, "פסיקה לא נמצאה (או לא מסוג external_upload)") + return {"queued": True, "case_law_id": case_law_id, "kind": "metadata"} + + +@app.post("/api/precedent-library/{case_law_id}/request-halachot") +async def precedent_request_halachot(case_law_id: str): + """Same, for halacha re-extraction.""" + try: + cid = UUID(case_law_id) + except ValueError: + raise HTTPException(400, "case_law_id לא תקין") + ok = await db.request_halacha_extraction(cid) + if not ok: + raise HTTPException(404, "פסיקה לא נמצאה (או לא מסוג external_upload)") + return {"queued": True, "case_law_id": case_law_id, "kind": "halacha"} + + +@app.get("/api/precedent-library/queue/pending") +async def precedent_queue_pending(kind: str = "metadata", limit: int = 20): + """Read-only view of the queue. The MCP worker reads this too, but the + UI calls it to show 'X ממתינות לעיבוד מקומי' badges.""" + if kind not in {"metadata", "halacha"}: + raise HTTPException(400, "kind חייב להיות metadata או halacha") + items = await db.list_pending_extraction_requests(kind=kind, limit=limit) + return {"items": items, "count": len(items)} @app.get("/api/halachot")