"""MCP tool wrappers for the style_corpus metadata-enrichment flow. The actual extractor lives in ``legal_mcp.services.style_metadata_extractor``; this module just exposes it as MCP tools that the chair (or a future automation) can call from Claude Code. Why these tools matter: the upload pipeline (`/api/training/upload` → `_process_proofread_training`) inserts a style_corpus row with ``summary=''``, ``outcome=''``, ``key_principles=[]`` because LLM extraction can't run from the FastAPI container (no claude CLI there). This module fills that gap — call it from the host, where ``claude`` CLI is available, and the row gets enriched. """ from __future__ import annotations from uuid import UUID from legal_mcp.services import db, style_metadata_extractor from legal_mcp.tools.envelope import err as _err, ok as _ok # GAP-48: SSoT envelope async def extract_decision_metadata(corpus_id: str, overwrite: bool = False) -> str: """חילוץ מטא-דאטה (summary, outcome, key_principles, appeal_subtype) להחלטה בקורפוס הסגנון. ברירת מחדל ``overwrite=False`` ממלא רק שדות ריקים. הזן ``overwrite=true`` כדי לרענן ערכים שכבר נכתבו. """ try: cid = UUID(corpus_id) except ValueError: return _err("corpus_id לא תקין") try: result = await style_metadata_extractor.extract_and_apply(cid, overwrite=overwrite) except Exception as e: return _err(str(e)) return _ok(result) async def list_corpus_pending_enrichment(limit: int = 50) -> str: """רשימת רשומות style_corpus שחסר להן summary/outcome/key_principles — מועמדות להעשרה.""" pool = await db.get_pool() async with pool.acquire() as conn: rows = await conn.fetch( """ SELECT id, decision_number, decision_date, length(full_text) AS chars, coalesce(summary, '') = '' AS missing_summary, coalesce(outcome, '') = '' AS missing_outcome, coalesce(jsonb_array_length(key_principles), 0) = 0 AS missing_principles FROM style_corpus WHERE coalesce(summary, '') = '' OR coalesce(outcome, '') = '' OR coalesce(jsonb_array_length(key_principles), 0) = 0 ORDER BY decision_date NULLS LAST LIMIT $1 """, limit, ) items = [ { "corpus_id": str(r["id"]), "decision_number": r["decision_number"] or "", "decision_date": str(r["decision_date"]) if r["decision_date"] else "", "chars": r["chars"], "missing": [ f for f, v in ( ("summary", r["missing_summary"]), ("outcome", r["missing_outcome"]), ("key_principles", r["missing_principles"]), ) if v ], } for r in rows ] return _ok({"count": len(items), "items": items})