diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index aaaf101..fd197e4 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -212,10 +212,16 @@ async def precedent_extract_halachot(case_law_id: str) -> str: @mcp.tool() async def precedent_extract_metadata(case_law_id: str) -> str: - """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype) מהטקסט. ממלא רק שדות ריקים.""" + """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype, date, level, court, source_type) מהטקסט. ממלא רק שדות ריקים.""" return await plib.precedent_extract_metadata(case_law_id) +@mcp.tool() +async def precedent_process_pending(kind: str = "metadata", limit: int = 20) -> str: + """ריקון תור בקשות חילוץ שנשלחו מ-UI. kind: 'metadata' או 'halacha'. מריץ extractor מקומית עם CLI על כל פריט בתור, ומנקה את הסימון אחרי הצלחה.""" + return await plib.precedent_process_pending(kind, limit) + + @mcp.tool() async def search_precedent_library( query: str, diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 9457fde..26e66e8 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -603,6 +603,26 @@ CREATE INDEX IF NOT EXISTS idx_halachot_vec """ +# ── V8: Extraction request queue ───────────────────────────────── +# Web UI buttons ("Sparkles" = request metadata extraction; "Refresh" = +# request halacha extraction) run inside the FastAPI container, which has +# no `claude` CLI. They can't run the LLM extractor directly. Instead they +# stamp a request timestamp here, and the chair (or me) runs the MCP tool +# `precedent_process_pending_extractions` from local Claude Code, where the +# CLI is available, to drain the queue. See claude_session.py for the rule. + +SCHEMA_V8_SQL = """ +ALTER TABLE case_law ADD COLUMN IF NOT EXISTS metadata_extraction_requested_at TIMESTAMPTZ; +ALTER TABLE case_law ADD COLUMN IF NOT EXISTS halacha_extraction_requested_at TIMESTAMPTZ; +CREATE INDEX IF NOT EXISTS idx_case_law_metadata_requested + ON case_law(metadata_extraction_requested_at) + WHERE metadata_extraction_requested_at IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_case_law_halacha_requested + ON case_law(halacha_extraction_requested_at) + WHERE halacha_extraction_requested_at IS NOT NULL; +""" + + async def init_schema() -> None: pool = await get_pool() async with pool.acquire() as conn: @@ -614,7 +634,8 @@ async def init_schema() -> None: await conn.execute(SCHEMA_V5_SQL) await conn.execute(SCHEMA_V6_SQL) await conn.execute(SCHEMA_V7_SQL) - logger.info("Database schema initialized (v1-v7)") + await conn.execute(SCHEMA_V8_SQL) + logger.info("Database schema initialized (v1-v8)") # ── Case CRUD ─────────────────────────────────────────────────────── @@ -2191,3 +2212,79 @@ async def precedent_library_stats() -> dict: "halachot_pending": int(halachot_pending or 0), "halachot_approved": int(halachot_approved or 0), } + + +# ── V8: extraction request queue helpers ───────────────────────── + + +async def request_metadata_extraction(case_law_id: UUID) -> bool: + """Stamp ``metadata_extraction_requested_at`` for the local MCP worker + to pick up. Returns False if the row is missing.""" + pool = await get_pool() + result = await pool.execute( + "UPDATE case_law SET metadata_extraction_requested_at = now() " + "WHERE id = $1 AND source_kind = 'external_upload'", + case_law_id, + ) + return result == "UPDATE 1" + + +async def request_halacha_extraction(case_law_id: UUID) -> bool: + """Same but for halacha extraction.""" + pool = await get_pool() + result = await pool.execute( + "UPDATE case_law SET halacha_extraction_requested_at = now() " + "WHERE id = $1 AND source_kind = 'external_upload'", + case_law_id, + ) + return result == "UPDATE 1" + + +async def list_pending_extraction_requests( + kind: str = "metadata", # 'metadata' | 'halacha' + limit: int = 20, +) -> list[dict]: + """Return rows requesting extraction, oldest request first. + + The MCP worker drains the queue in order: process → clear timestamp. + """ + col = ( + "metadata_extraction_requested_at" + if kind == "metadata" + else "halacha_extraction_requested_at" + ) + pool = await get_pool() + rows = await pool.fetch( + f"""SELECT id, case_number, case_name, court, date, + practice_area, is_binding, {col} AS requested_at + FROM case_law + WHERE {col} IS NOT NULL + AND source_kind = 'external_upload' + ORDER BY {col} ASC + LIMIT $1""", + limit, + ) + out = [] + for r in rows: + d = dict(r) + if d.get("date") is not None: + d["date"] = d["date"].isoformat() + if d.get("requested_at") is not None: + d["requested_at"] = d["requested_at"].isoformat() + out.append(d) + return out + + +async def clear_extraction_request( + case_law_id: UUID, kind: str = "metadata", +) -> None: + col = ( + "metadata_extraction_requested_at" + if kind == "metadata" + else "halacha_extraction_requested_at" + ) + pool = await get_pool() + await pool.execute( + f"UPDATE case_law SET {col} = NULL WHERE id = $1", + case_law_id, + ) diff --git a/mcp-server/src/legal_mcp/services/precedent_library.py b/mcp-server/src/legal_mcp/services/precedent_library.py index 8400952..1a1776b 100644 --- a/mcp-server/src/legal_mcp/services/precedent_library.py +++ b/mcp-server/src/legal_mcp/services/precedent_library.py @@ -253,6 +253,65 @@ async def reextract_halachot( return result +async def process_pending_extractions(kind: str = "metadata", limit: int = 20) -> dict: + """Drain the extraction queue (UI-button-stamped requests). + + The button in the web UI cannot run claude_session itself (it lives in + the container, no CLI). It just stamps ``metadata_extraction_requested_at`` + on the row. This function — called from local Claude Code via the MCP + tool — picks each stamped row up, runs the extractor, and clears the + timestamp. + + Args: + kind: 'metadata' or 'halacha'. + limit: max rows to process this run. + """ + from legal_mcp.services import halacha_extractor, precedent_metadata_extractor + + if kind not in {"metadata", "halacha"}: + raise ValueError("kind must be 'metadata' or 'halacha'") + + pending = await db.list_pending_extraction_requests(kind=kind, limit=limit) + if not pending: + return {"status": "no_pending", "kind": kind, "processed": 0, "results": []} + + results: list[dict] = [] + processed = 0 + for row in pending: + cid = UUID(str(row["id"])) + try: + if kind == "metadata": + result = await precedent_metadata_extractor.extract_and_apply(cid) + else: + result = await halacha_extractor.extract(cid) + await db.clear_extraction_request(cid, kind=kind) + processed += 1 + results.append({ + "case_law_id": str(cid), + "case_number": row.get("case_number", ""), + "status": result.get("status", "unknown"), + "fields": result.get("fields", []), + "stored": result.get("stored", 0), + }) + except Exception as e: + logger.exception("process_pending_extractions failed for %s: %s", cid, e) + results.append({ + "case_law_id": str(cid), + "case_number": row.get("case_number", ""), + "status": "failed", + "error": str(e), + }) + # Don't clear the request — it stays for the next run. + + return { + "status": "completed", + "kind": kind, + "processed": processed, + "total_pending": len(pending), + "results": results, + } + + async def reextract_metadata( case_law_id: UUID | str, progress: ProgressCb | None = None, diff --git a/mcp-server/src/legal_mcp/tools/precedent_library.py b/mcp-server/src/legal_mcp/tools/precedent_library.py index e704ef5..2b01b97 100644 --- a/mcp-server/src/legal_mcp/tools/precedent_library.py +++ b/mcp-server/src/legal_mcp/tools/precedent_library.py @@ -140,7 +140,7 @@ async def precedent_extract_halachot(case_law_id: str) -> str: async def precedent_extract_metadata(case_law_id: str) -> str: - """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype) מהטקסט. ממלא רק שדות ריקים — לא דורס מה שכבר הוזן.""" + """חילוץ מטא-דאטה (case_name קצר, summary, headnote, key_quote, subject_tags, appeal_subtype, date, level, court, source_type) מהטקסט. ממלא רק שדות ריקים — לא דורס מה שכבר הוזן.""" try: cid = UUID(case_law_id) except ValueError: @@ -152,6 +152,23 @@ async def precedent_extract_metadata(case_law_id: str) -> str: return _ok(result) +async def precedent_process_pending(kind: str = "metadata", limit: int = 20) -> str: + """ריקון תור בקשות חילוץ שנערמו ע"י כפתורי ה-UI. kind: 'metadata' או 'halacha'. + + הכפתור ב-UI מסמן ב-DB שהפסיקה מבקשת חילוץ. כלי זה (שרץ מקומית עם CLI) + סורק את התור ומריץ את ה-extractor לכל פריט. אחרי הצלחה הסימון מתנקה. + """ + if kind not in {"metadata", "halacha"}: + return _err("kind חייב להיות 'metadata' או 'halacha'") + try: + result = await precedent_library.process_pending_extractions( + kind=kind, limit=limit, + ) + except Exception as e: + return _err(str(e)) + return _ok(result) + + async def search_precedent_library( query: str, practice_area: str = "", diff --git a/web-ui/src/components/precedents/precedent-edit-sheet.tsx b/web-ui/src/components/precedents/precedent-edit-sheet.tsx index ed2f9af..eda34c6 100644 --- a/web-ui/src/components/precedents/precedent-edit-sheet.tsx +++ b/web-ui/src/components/precedents/precedent-edit-sheet.tsx @@ -1,7 +1,7 @@ "use client"; import { useEffect, useState } from "react"; -import { Save } from "lucide-react"; +import { Save, Sparkles } from "lucide-react"; import { toast } from "sonner"; import { Sheet, SheetContent, SheetHeader, SheetTitle, SheetDescription, @@ -17,6 +17,7 @@ import { import { usePrecedent, useUpdatePrecedent, + useRequestMetadataExtraction, type PracticeArea, type SourceType, } from "@/lib/api/precedent-library"; @@ -59,6 +60,7 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) { const open = caseLawId !== null; const { data: record, isPending } = usePrecedent(caseLawId); const update = useUpdatePrecedent(); + const requestMetadata = useRequestMetadataExtraction(); const [form, setForm] = useState(EMPTY); @@ -112,6 +114,18 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) { } }; + const onRequestMetadata = async () => { + if (!caseLawId) return; + try { + await requestMetadata.mutateAsync(caseLawId); + toast.success( + "סומן לחילוץ מטא-דאטה. הריצי מ-Claude Code: precedent_process_pending", + ); + } catch (err) { + toast.error(err instanceof Error ? err.message : "שגיאה"); + } + }; + return ( { if (!o) onOpenChange(false); }}> @@ -119,9 +133,9 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) { עריכת פרטי פסיקה כל השדות ניתנים לעריכה חוץ ממראה המקום (מזהה ייחודי). - לחילוץ מטא-דאטה אוטומטי או הלכות — להפעיל מ-Claude Code את - ה-MCP tools precedent_extract_metadata /{" "} - precedent_extract_halachot. + כפתור "חלץ מטא-דאטה" שולח בקשה לתור מקומי שאני מרוקן + מ-Claude Code (ה-LLM רץ מקומית עם claude session, + לא ב-API). @@ -131,11 +145,23 @@ export function PrecedentEditSheet({ caseLawId, onOpenChange }: Props) { ) : (
-
-
מראה מקום (לא ניתן לעריכה)
-
- {record.case_number} +
+
+
מראה מקום (לא ניתן לעריכה)
+
+ {record.case_number} +
+
diff --git a/web-ui/src/lib/api/precedent-library.ts b/web-ui/src/lib/api/precedent-library.ts index 61240b8..437820f 100644 --- a/web-ui/src/lib/api/precedent-library.ts +++ b/web-ui/src/lib/api/precedent-library.ts @@ -336,12 +336,41 @@ export function useUpdatePrecedent() { }); } -// Halacha + metadata extraction are not exposed as HTTP mutations because -// they call the local `claude` CLI through the MCP server — see the rule -// in mcp-server/src/legal_mcp/services/claude_session.py. The chair -// triggers them from Claude Code via: -// mcp__legal-ai__precedent_extract_halachot -// mcp__legal-ai__precedent_extract_metadata +/* Extraction can't run inside the container (no `claude` CLI). The + * "request" endpoints below stamp a queue marker in case_law; the chair + * (or me) drains the queue from Claude Code by invoking the MCP tool + * `precedent_process_pending`, which runs the actual extractor locally. + * See the rule in mcp-server/src/legal_mcp/services/claude_session.py. */ + +export function useRequestMetadataExtraction() { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (id: string) => + apiRequest<{ queued: boolean }>( + `/api/precedent-library/${encodeURIComponent(id)}/request-metadata`, + { method: "POST" }, + ), + onSuccess: (_, id) => { + qc.invalidateQueries({ queryKey: libraryKeys.detail(id) }); + qc.invalidateQueries({ queryKey: libraryKeys.all }); + }, + }); +} + +export function useRequestHalachotExtraction() { + const qc = useQueryClient(); + return useMutation({ + mutationFn: (id: string) => + apiRequest<{ queued: boolean }>( + `/api/precedent-library/${encodeURIComponent(id)}/request-halachot`, + { method: "POST" }, + ), + onSuccess: (_, id) => { + qc.invalidateQueries({ queryKey: libraryKeys.detail(id) }); + qc.invalidateQueries({ queryKey: libraryKeys.all }); + }, + }); +} export function useHalachotPending(limit = 200) { return useQuery({ diff --git a/web/app.py b/web/app.py index 6f28484..43b66a8 100644 --- a/web/app.py +++ b/web/app.py @@ -3750,10 +3750,48 @@ async def precedent_library_delete(case_law_id: str): # Halacha and metadata extraction are LLM-driven and rely on the local # `claude` CLI via mcp-server/services/claude_session.py — they CANNOT run -# from this container (no CLI, no claude.ai session). They are exposed as -# MCP tools (`precedent_extract_halachot`, `precedent_extract_metadata`) -# and triggered from local Claude Code, not via HTTP. See -# services/claude_session.py for the architectural rule. +# from this container (no CLI, no claude.ai session). The endpoints below +# DON'T run extraction; they only stamp a request in the queue. The +# corresponding MCP tools (`precedent_process_pending_metadata`, +# `precedent_process_pending_halachot`), invoked from local Claude Code, +# drain the queue. + + +@app.post("/api/precedent-library/{case_law_id}/request-metadata") +async def precedent_request_metadata(case_law_id: str): + """Stamp the case_law row as needing metadata extraction. The local + MCP worker (`precedent_process_pending_metadata`) will pick it up.""" + try: + cid = UUID(case_law_id) + except ValueError: + raise HTTPException(400, "case_law_id לא תקין") + ok = await db.request_metadata_extraction(cid) + if not ok: + raise HTTPException(404, "פסיקה לא נמצאה (או לא מסוג external_upload)") + return {"queued": True, "case_law_id": case_law_id, "kind": "metadata"} + + +@app.post("/api/precedent-library/{case_law_id}/request-halachot") +async def precedent_request_halachot(case_law_id: str): + """Same, for halacha re-extraction.""" + try: + cid = UUID(case_law_id) + except ValueError: + raise HTTPException(400, "case_law_id לא תקין") + ok = await db.request_halacha_extraction(cid) + if not ok: + raise HTTPException(404, "פסיקה לא נמצאה (או לא מסוג external_upload)") + return {"queued": True, "case_law_id": case_law_id, "kind": "halacha"} + + +@app.get("/api/precedent-library/queue/pending") +async def precedent_queue_pending(kind: str = "metadata", limit: int = 20): + """Read-only view of the queue. The MCP worker reads this too, but the + UI calls it to show 'X ממתינות לעיבוד מקומי' badges.""" + if kind not in {"metadata", "halacha"}: + raise HTTPException(400, "kind חייב להיות metadata או halacha") + items = await db.list_pending_extraction_requests(kind=kind, limit=limit) + return {"items": items, "count": len(items)} @app.get("/api/halachot")