fix(appraiser-facts): route extraction through analyst wakeup (was silent 0)

The "חלץ עובדות שמאיות" UI button hit POST /api/cases/{n}/extract-appraiser-facts which called appraiser_facts_extractor inline — that shells out to the local `claude` CLI, which is absent in the Coolify container, so every doc errored, the per-doc try/except swallowed it, and the response was "completed, 0 facts". Refactored the endpoint to wake the legal-analyst of the correct company via Paperclip (same pattern as wake_curator_for_final), and surface extraction_failed instead of "completed" when every doc errored.
2026-05-26 11:02:55 +00:00
parent 7ad995aade
commit 3a05e30c8d
5 changed files with 221 additions and 13 deletions
--- a/mcp-server/src/legal_mcp/services/appraiser_facts_extractor.py
+++ b/mcp-server/src/legal_mcp/services/appraiser_facts_extractor.py
@@ -250,8 +250,19 @@ async def extract_appraiser_facts(case_id: UUID) -> dict:

    conflicts = await db.detect_appraiser_conflicts(case_id)

+    # Don't swallow extractor failures: if every appraisal errored and no
+    # facts were extracted, surface that as a distinct status instead of
+    # the misleading "completed, 0 facts" we used to return — the caller
+    # (and the UI) need to know that nothing actually ran.
+    all_errored = (
+        total_facts == 0
+        and by_doc
+        and all(d.get("status") == "error" for d in by_doc)
+    )
+    status = "extraction_failed" if all_errored else "completed"
+
    return {
-        "status": "completed",
+        "status": status,
        "appraisal_count": len(appraisals),
        "total_facts": total_facts,
        "conflicts": conflicts,
--- a/web-ui/src/components/cases/document-type-editor.tsx
+++ b/web-ui/src/components/cases/document-type-editor.tsx
@@ -269,6 +269,26 @@ function PostSaveView({
        </div>
      )}

+      {extractResult?.status === "queued" && (
+        <div className="rounded-md border border-info/30 bg-info-bg px-2.5 py-2 text-[0.72rem] text-ink space-y-0.5">
+          <p>
+            <strong>נשלח לאנליטיקאי.</strong> ה-issue נפתח ב-Paperclip והחילוץ
+            ירוץ ברקע. תראה comment בעברית עם התוצאה כשהוא יסיים — לרוב כמה
+            דקות.
+          </p>
+        </div>
+      )}
+
+      {extractResult?.status === "skipped" && (
+        <div className="rounded-md border border-warn/40 bg-warn-bg px-2.5 py-2 text-[0.72rem] text-ink space-y-0.5">
+          <p>
+            <strong>לא ניתן להפעיל אוטומטית</strong> ({extractResult.reason}).
+            הפעל ידנית מ-Claude Code:
+            <code className="ms-1 select-all">mcp__legal-ai__extract_appraiser_facts</code>
+          </p>
+        </div>
+      )}
+
      {extractResult?.status === "no_appraisals" && (
        <p className="text-[0.72rem] text-ink-muted">
          אין בתיק מסמכים מתויגים כ-שומה.
@@ -320,8 +340,8 @@ function PostSaveView({

      {pending && (
        <p className="text-[0.68rem] text-ink-muted leading-tight">
-          החילוץ יכול להימשך כמה דקות — שומות ארוכות עוברות ניתוח פסקה אחר
-          פסקה ע"י המודל.
+          שולח לאנליטיקאי דרך Paperclip — לוקח שנייה. החילוץ עצמו ירוץ אצל
+          האנליטיקאי וייתן comment כשיסיים.
        </p>
      )}
    </div>
--- a/web-ui/src/lib/api/documents.ts
+++ b/web-ui/src/lib/api/documents.ts
@@ -160,6 +160,23 @@ export type ExtractAppraiserFactsResponse =
      appraisal_count: number;
      missing: { document_id: string; title: string; current_side: string }[];
      message: string;
+    }
+  | {
+      // The chair clicked the button; backend created a child Paperclip
+      // issue assigned to the legal-analyst, which will run the MCP tool
+      // on the host (where the Claude CLI lives) and post results back.
+      status: "queued";
+      sub_issue_id: string;
+      analyst_id: string;
+      main_issue_id: string;
+    }
+  | {
+      // No analyst route was available (no API key / no analyst configured /
+      // no Paperclip issue linked to the case). Non-fatal — the chair can
+      // still trigger extraction manually from Claude Code.
+      status: "skipped";
+      reason: "no_api_key" | "no_analyst" | "no_issue" | string;
+      company_id?: string;
    };

 async function extractAppraiserFacts(
--- a/web/app.py
+++ b/web/app.py
@@ -61,6 +61,7 @@ from web.paperclip_client import (
    reject_interaction as pc_reject_interaction,
    respond_to_interaction as pc_respond_to_interaction,
    restore_project as pc_restore_project,
+    wake_analyst_for_appraiser_facts as pc_wake_analyst_for_appraiser_facts,
    wake_ceo_agent as pc_wake_ceo,
    wake_curator_for_final as pc_wake_curator_for_final,
    wake_for_precedent_extraction as pc_wake_for_precedent_extraction,
@@ -3977,28 +3978,74 @@ async def api_patch_document(case_number: str, doc_id: str, req: DocumentPatchRe

@app.post("/api/cases/{case_number}/extract-appraiser-facts")
 async def api_extract_appraiser_facts(case_number: str):
-    """Run structured extraction of plans + permits from every appraisal
-    document in the case, and detect conflicts between appraisers.
+    """Queue appraiser-fact extraction by waking the legal-analyst agent.

-    Blocks if any appraisal document is missing metadata.appraiser_side —
-    the chair must tag every appraisal (committee / appellant / deciding)
-    before extraction can identify the deciding appraiser's governing view.
+    The extraction itself calls `claude_session.query_json()`, which shells
+    out to the local `claude` CLI — present on the agent host, **absent in
+    this FastAPI container**. So we cannot run the extractor inline here.

-    Returns the extractor's summary dict as-is. Shape:
-        {"status": "completed"|"sides_missing"|"no_appraisals", ...}
+    Instead we delegate: create a child Paperclip issue under the case's
+    main issue, assigned to the analyst of the correct company, and trigger
+    a wakeup with `mutation: extract_appraiser_facts`. The analyst runs the
+    MCP tool locally and posts results as a comment.
+
+    Pre-check: short-circuits with `sides_missing` if any appraisal is
+    untagged, so the chair gets immediate feedback without spinning up an
+    agent for nothing. The check uses `_validate_sides_tagged` against the
+    documents already in the DB — no LLM call, safe to run in-container.
+
+    Response shape:
+        {"status": "queued", "sub_issue_id", "analyst_id", "main_issue_id"}
+      or {"status": "sides_missing", "missing": [...], "message": "..."}
+      or {"status": "no_appraisals", ...}
+      or {"status": "skipped", "reason": "no_api_key"|"no_analyst"|"no_issue"}
    """
    from legal_mcp.services import appraiser_facts_extractor
+    from legal_mcp.services import db as mcp_db

    case = await db.get_case_by_number(case_number)
    if not case:
        raise HTTPException(404, f"תיק {case_number} לא נמצא")

+    # Pre-validate without touching Claude — surface sides_missing directly
+    # so the UI can show the list of untagged appraisals immediately.
+    docs = await mcp_db.list_documents(UUID(case["id"]))
+    appraisals = [d for d in docs if d.get("doc_type") == "appraisal"]
+    if not appraisals:
+        return {
+            "status": "no_appraisals",
+            "appraisal_count": 0,
+            "total_facts": 0,
+            "conflicts": [],
+        }
+    missing = appraiser_facts_extractor._validate_sides_tagged(appraisals)
+    if missing:
+        return {
+            "status": "sides_missing",
+            "appraisal_count": len(appraisals),
+            "missing": missing,
+            "message": (
+                "חסר תיוג appraiser_side במסמכי שומה. תייג כל שומה דרך ה-UI "
+                "(ועדה / עורר / מכריע) והרץ שוב."
+            ),
+        }
+
+    # Route to the analyst of the correct company by case-number prefix
+    prefix = case_number[:1]
+    company_id = (
+        PAPERCLIP_COMPANIES["licensing"] if prefix == "1"
+        else PAPERCLIP_COMPANIES["betterment"] if prefix in ("8", "9")
+        else ""
+    )
+
    try:
-        result = await appraiser_facts_extractor.extract_appraiser_facts(
-            UUID(case["id"])
+        result = await pc_wake_analyst_for_appraiser_facts(
+            case_number, company_id=company_id,
        )
    except Exception as e:
-        raise HTTPException(500, f"חילוץ נכשל: {e}")
+        logger.exception("analyst wakeup failed for %s", case_number)
+        raise HTTPException(500, f"לא ניתן לשלוח לאנליטיקאי: {e}")
+
    return result


--- a/web/paperclip_client.py
+++ b/web/paperclip_client.py
@@ -53,6 +53,15 @@ CURATOR_AGENTS = {
    COMPANIES["betterment"]: "d6f7c55d-570a-46b8-8d72-1286d07da0d8",  # CMPA curator
 }

+# Legal Analyst (מנתח משפטי) agent per company — woken from the chair UI
+# when the chair finishes tagging appraisals and asks for fact extraction.
+# The analyst runs `mcp__legal-ai__extract_appraiser_facts` locally (where
+# the Claude CLI is present), since the FastAPI container cannot.
+ANALYST_AGENTS = {
+    COMPANIES["licensing"]: "c26e9439-a88a-49dc-9e67-2262c95db65c",   # CMP analyst
+    COMPANIES["betterment"]: "f70fd353-6cde-46b3-8d6c-cfad12100b1b",  # CMPA analyst
+}
+
 # Fallback mapping — used only when DB lookup returns no results.
 # בל"מ (extension_request_*) variants route to the same company as their
 # parent domain — בל"מ ברישוי → CMP, בל"מ בהיטל השבחה → CMPA, וכו'.
@@ -1016,3 +1025,107 @@ async def wake_curator_for_final(
        "curator_id": curator_id,
        "main_issue_id": main_issue_id,
    }
+
+
+async def wake_analyst_for_appraiser_facts(
+    case_number: str,
+    company_id: str,
+) -> dict:
+    """Wake the legal-analyst to extract appraiser facts for this case.
+
+    Triggered by the chair clicking "חלץ עובדות שמאיות עכשיו" in the UI.
+    The FastAPI container cannot run `extract_appraiser_facts` directly —
+    the extractor calls `claude_session.query_json()`, which only works
+    where the local `claude` CLI is present (the MCP server / agent runner
+    on the host). So instead of running it inline, we create a child issue
+    under the case's main Paperclip issue, assign it to the analyst of the
+    correct company, and trigger a wakeup with `mutation: extract_appraiser_facts`.
+    The analyst's HEARTBEAT picks up the issue, runs the MCP tool locally,
+    and reports back via a comment.
+
+    Returns a dict shaped for the FastAPI endpoint to serialize as-is:
+        {"status": "queued", "sub_issue_id", "analyst_id", "main_issue_id"}
+      or {"status": "skipped", "reason": "..."} for non-fatal early outs.
+    """
+    if not PAPERCLIP_BOARD_API_KEY:
+        logger.warning(
+            "PAPERCLIP_BOARD_API_KEY not set — cannot queue analyst wakeup for %s",
+            case_number,
+        )
+        return {"status": "skipped", "reason": "no_api_key"}
+
+    analyst_id = ANALYST_AGENTS.get(company_id)
+    if not analyst_id:
+        logger.info("No analyst configured for company %s — skipping", company_id)
+        return {"status": "skipped", "reason": "no_analyst", "company_id": company_id}
+
+    issues = await get_case_issues(case_number)
+    if not issues:
+        logger.warning(
+            "No Paperclip issues found for case %s — cannot queue analyst", case_number,
+        )
+        return {"status": "skipped", "reason": "no_issue"}
+
+    main_issue = next((i for i in issues if i.get("status") == "in_progress"), None) or issues[0]
+    main_issue_id = main_issue["id"]
+
+    description = (
+        f"חיים תייג שומות בתיק {case_number} וביקש חילוץ עובדות שמאיות.\n\n"
+        f"הרץ `mcp__legal-ai__extract_appraiser_facts(case_number=\"{case_number}\")` "
+        f"וכתוב comment בעברית עם תוצאת החילוץ — מספר תכניות, מספר היתרים, "
+        f"וסתירות (אם יש) בין שמאים. אם המסמכים חסרי תיוג `appraiser_side`, "
+        f"דווח ב-comment על השומות החסרות וסגור את ה-issue כ-blocked."
+    )
+    child_resp = await pc_request(
+        "POST",
+        f"/api/issues/{main_issue_id}/children",
+        json={
+            "title": f"[ערר {case_number}] חילוץ עובדות שמאיות",
+            "description": description,
+            "status": "in_progress",
+            "priority": "normal",
+            "assigneeAgentId": analyst_id,
+        },
+        raise_on_error=True,
+    )
+    sub_issue = child_resp.json()
+    sub_issue_id = sub_issue["id"]
+
+    # Tag plugin_state so the case page surfaces this sub-issue too
+    try:
+        conn = await asyncpg.connect(PAPERCLIP_DB_URL)
+        try:
+            await _link_case_to_issue(conn, sub_issue_id, case_number)
+        finally:
+            await conn.close()
+    except Exception as e:
+        logger.warning("plugin_state link failed for sub_issue=%s: %s", sub_issue_id, e)
+
+    wake_resp = await pc_request(
+        "POST",
+        f"/api/agents/{analyst_id}/wakeup",
+        json={
+            "source": "on_demand",
+            "triggerDetail": "manual",
+            "reason": f"extract_appraiser_facts_{case_number}",
+            # Use "assignment" — the same mutation `wake_curator_for_final`
+            # sends. The HEARTBEAT recognises it; the task-specific intent
+            # is conveyed by the child-issue's description, not the payload.
+            "payload": {
+                "issueId": sub_issue_id,
+                "mutation": "assignment",
+                "caseNumber": case_number,
+            },
+        },
+        raise_on_error=True,
+    )
+    logger.info(
+        "Analyst wakeup for case %s: sub_issue=%s analyst=%s wake=%s",
+        case_number, sub_issue_id, analyst_id, wake_resp.status_code,
+    )
+    return {
+        "status": "queued",
+        "sub_issue_id": sub_issue_id,
+        "analyst_id": analyst_id,
+        "main_issue_id": main_issue_id,
+    }