fix(appraiser-facts): route extraction through analyst wakeup (was silent 0)

The "חלץ עובדות שמאיות" UI button hit POST /api/cases/{n}/extract-appraiser-facts which called appraiser_facts_extractor inline — that shells out to the local `claude` CLI, which is absent in the Coolify container, so every doc errored, the per-doc try/except swallowed it, and the response was "completed, 0 facts". Refactored the endpoint to wake the legal-analyst of the correct company via Paperclip (same pattern as wake_curator_for_final), and surface extraction_failed instead of "completed" when every doc errored.
2026-05-26 11:02:55 +00:00
parent 7ad995aade
commit 3a05e30c8d
5 changed files with 221 additions and 13 deletions
--- a/mcp-server/src/legal_mcp/services/appraiser_facts_extractor.py
+++ b/mcp-server/src/legal_mcp/services/appraiser_facts_extractor.py
@@ -250,8 +250,19 @@ async def extract_appraiser_facts(case_id: UUID) -> dict:
    conflicts = await db.detect_appraiser_conflicts(case_id)
    # Don't swallow extractor failures: if every appraisal errored and no
    # facts were extracted, surface that as a distinct status instead of
    # the misleading "completed, 0 facts" we used to return — the caller
    # (and the UI) need to know that nothing actually ran.
    all_errored = (
        total_facts == 0
        and by_doc
        and all(d.get("status") == "error" for d in by_doc)
    )
    status = "extraction_failed" if all_errored else "completed"
    return {
-        "status": "completed",
+        "status": status,
        "appraisal_count": len(appraisals),
        "total_facts": total_facts,
        "conflicts": conflicts,
--- a/web-ui/src/components/cases/document-type-editor.tsx
+++ b/web-ui/src/components/cases/document-type-editor.tsx
@@ -269,6 +269,26 @@ function PostSaveView({
        </div>
      )}
      {extractResult?.status === "queued" && (
        <div className="rounded-md border border-info/30 bg-info-bg px-2.5 py-2 text-[0.72rem] text-ink space-y-0.5">
          <p>
            <strong>נשלח לאנליטיקאי.</strong> ה-issue נפתח ב-Paperclip והחילוץ
            ירוץ ברקע. תראה comment בעברית עם התוצאה כשהוא יסיים — לרוב כמה
            דקות.
          </p>
        </div>
      )}
      {extractResult?.status === "skipped" && (
        <div className="rounded-md border border-warn/40 bg-warn-bg px-2.5 py-2 text-[0.72rem] text-ink space-y-0.5">
          <p>
            <strong>לא ניתן להפעיל אוטומטית</strong> ({extractResult.reason}).
            הפעל ידנית מ-Claude Code:
            <code className="ms-1 select-all">mcp__legal-ai__extract_appraiser_facts</code>
          </p>
        </div>
      )}
      {extractResult?.status === "no_appraisals" && (
        <p className="text-[0.72rem] text-ink-muted">
          אין בתיק מסמכים מתויגים כ-שומה.
@@ -320,8 +340,8 @@ function PostSaveView({
      {pending && (
        <p className="text-[0.68rem] text-ink-muted leading-tight">
-          החילוץ יכול להימשך כמה דקות — שומות ארוכות עוברות ניתוח פסקה אחר
+          שולח לאנליטיקאי דרך Paperclip — לוקח שנייה. החילוץ עצמו ירוץ אצל
-          פסקה ע"י המודל.
+          האנליטיקאי וייתן comment כשיסיים.
        </p>
      )}
    </div>
--- a/web-ui/src/lib/api/documents.ts
+++ b/web-ui/src/lib/api/documents.ts
@@ -160,6 +160,23 @@ export type ExtractAppraiserFactsResponse =
      appraisal_count: number;
      missing: { document_id: string; title: string; current_side: string }[];
      message: string;
    }
  | {
      // The chair clicked the button; backend created a child Paperclip
      // issue assigned to the legal-analyst, which will run the MCP tool
      // on the host (where the Claude CLI lives) and post results back.
      status: "queued";
      sub_issue_id: string;
      analyst_id: string;
      main_issue_id: string;
    }
  | {
      // No analyst route was available (no API key / no analyst configured /
      // no Paperclip issue linked to the case). Non-fatal — the chair can
      // still trigger extraction manually from Claude Code.
      status: "skipped";
      reason: "no_api_key" | "no_analyst" | "no_issue" | string;
      company_id?: string;
    };
 async function extractAppraiserFacts(
--- a/web/app.py
+++ b/web/app.py
@@ -61,6 +61,7 @@ from web.paperclip_client import (
    reject_interaction as pc_reject_interaction,
    respond_to_interaction as pc_respond_to_interaction,
    restore_project as pc_restore_project,
    wake_analyst_for_appraiser_facts as pc_wake_analyst_for_appraiser_facts,
    wake_ceo_agent as pc_wake_ceo,
    wake_curator_for_final as pc_wake_curator_for_final,
    wake_for_precedent_extraction as pc_wake_for_precedent_extraction,
@@ -3977,28 +3978,74 @@ async def api_patch_document(case_number: str, doc_id: str, req: DocumentPatchRe
@app.post("/api/cases/{case_number}/extract-appraiser-facts")
 async def api_extract_appraiser_facts(case_number: str):
-    """Run structured extraction of plans + permits from every appraisal
+    """Queue appraiser-fact extraction by waking the legal-analyst agent.
    document in the case, and detect conflicts between appraisers.
-    Blocks if any appraisal document is missing metadata.appraiser_side —
+    The extraction itself calls `claude_session.query_json()`, which shells
-    the chair must tag every appraisal (committee / appellant / deciding)
+    out to the local `claude` CLI — present on the agent host, **absent in
-    before extraction can identify the deciding appraiser's governing view.
+    this FastAPI container**. So we cannot run the extractor inline here.
-    Returns the extractor's summary dict as-is. Shape:
+    Instead we delegate: create a child Paperclip issue under the case's
-        {"status": "completed"|"sides_missing"|"no_appraisals", ...}
+    main issue, assigned to the analyst of the correct company, and trigger
    a wakeup with `mutation: extract_appraiser_facts`. The analyst runs the
    MCP tool locally and posts results as a comment.
    Pre-check: short-circuits with `sides_missing` if any appraisal is
    untagged, so the chair gets immediate feedback without spinning up an
    agent for nothing. The check uses `_validate_sides_tagged` against the
    documents already in the DB — no LLM call, safe to run in-container.
    Response shape:
        {"status": "queued", "sub_issue_id", "analyst_id", "main_issue_id"}
      or {"status": "sides_missing", "missing": [...], "message": "..."}
      or {"status": "no_appraisals", ...}
      or {"status": "skipped", "reason": "no_api_key"|"no_analyst"|"no_issue"}
    """
    from legal_mcp.services import appraiser_facts_extractor
    from legal_mcp.services import db as mcp_db
    case = await db.get_case_by_number(case_number)
    if not case:
        raise HTTPException(404, f"תיק {case_number} לא נמצא")
    # Pre-validate without touching Claude — surface sides_missing directly
    # so the UI can show the list of untagged appraisals immediately.
    docs = await mcp_db.list_documents(UUID(case["id"]))
    appraisals = [d for d in docs if d.get("doc_type") == "appraisal"]
    if not appraisals:
        return {
            "status": "no_appraisals",
            "appraisal_count": 0,
            "total_facts": 0,
            "conflicts": [],
        }
    missing = appraiser_facts_extractor._validate_sides_tagged(appraisals)
    if missing:
        return {
            "status": "sides_missing",
            "appraisal_count": len(appraisals),
            "missing": missing,
            "message": (
                "חסר תיוג appraiser_side במסמכי שומה. תייג כל שומה דרך ה-UI "
                "(ועדה / עורר / מכריע) והרץ שוב."
            ),
        }
    # Route to the analyst of the correct company by case-number prefix
    prefix = case_number[:1]
    company_id = (
        PAPERCLIP_COMPANIES["licensing"] if prefix == "1"
        else PAPERCLIP_COMPANIES["betterment"] if prefix in ("8", "9")
        else ""
    )
    try:
-        result = await appraiser_facts_extractor.extract_appraiser_facts(
+        result = await pc_wake_analyst_for_appraiser_facts(
-            UUID(case["id"])
+            case_number, company_id=company_id,
        )
    except Exception as e:
-        raise HTTPException(500, f"חילוץ נכשל: {e}")
+        logger.exception("analyst wakeup failed for %s", case_number)
        raise HTTPException(500, f"לא ניתן לשלוח לאנליטיקאי: {e}")
    return result
--- a/web/paperclip_client.py
+++ b/web/paperclip_client.py
@@ -53,6 +53,15 @@ CURATOR_AGENTS = {
    COMPANIES["betterment"]: "d6f7c55d-570a-46b8-8d72-1286d07da0d8",  # CMPA curator
 }
 # Legal Analyst (מנתח משפטי) agent per company — woken from the chair UI
 # when the chair finishes tagging appraisals and asks for fact extraction.
 # The analyst runs `mcp__legal-ai__extract_appraiser_facts` locally (where
 # the Claude CLI is present), since the FastAPI container cannot.
 ANALYST_AGENTS = {
    COMPANIES["licensing"]: "c26e9439-a88a-49dc-9e67-2262c95db65c",   # CMP analyst
    COMPANIES["betterment"]: "f70fd353-6cde-46b3-8d6c-cfad12100b1b",  # CMPA analyst
 }
 # Fallback mapping — used only when DB lookup returns no results.
 # בל"מ (extension_request_*) variants route to the same company as their
 # parent domain — בל"מ ברישוי → CMP, בל"מ בהיטל השבחה → CMPA, וכו'.
@@ -1016,3 +1025,107 @@ async def wake_curator_for_final(
        "curator_id": curator_id,
        "main_issue_id": main_issue_id,
    }
 async def wake_analyst_for_appraiser_facts(
    case_number: str,
    company_id: str,
 ) -> dict:
    """Wake the legal-analyst to extract appraiser facts for this case.
    Triggered by the chair clicking "חלץ עובדות שמאיות עכשיו" in the UI.
    The FastAPI container cannot run `extract_appraiser_facts` directly —
    the extractor calls `claude_session.query_json()`, which only works
    where the local `claude` CLI is present (the MCP server / agent runner
    on the host). So instead of running it inline, we create a child issue
    under the case's main Paperclip issue, assign it to the analyst of the
    correct company, and trigger a wakeup with `mutation: extract_appraiser_facts`.
    The analyst's HEARTBEAT picks up the issue, runs the MCP tool locally,
    and reports back via a comment.
    Returns a dict shaped for the FastAPI endpoint to serialize as-is:
        {"status": "queued", "sub_issue_id", "analyst_id", "main_issue_id"}
      or {"status": "skipped", "reason": "..."} for non-fatal early outs.
    """
    if not PAPERCLIP_BOARD_API_KEY:
        logger.warning(
            "PAPERCLIP_BOARD_API_KEY not set — cannot queue analyst wakeup for %s",
            case_number,
        )
        return {"status": "skipped", "reason": "no_api_key"}
    analyst_id = ANALYST_AGENTS.get(company_id)
    if not analyst_id:
        logger.info("No analyst configured for company %s — skipping", company_id)
        return {"status": "skipped", "reason": "no_analyst", "company_id": company_id}
    issues = await get_case_issues(case_number)
    if not issues:
        logger.warning(
            "No Paperclip issues found for case %s — cannot queue analyst", case_number,
        )
        return {"status": "skipped", "reason": "no_issue"}
    main_issue = next((i for i in issues if i.get("status") == "in_progress"), None) or issues[0]
    main_issue_id = main_issue["id"]
    description = (
        f"חיים תייג שומות בתיק {case_number} וביקש חילוץ עובדות שמאיות.\n\n"
        f"הרץ `mcp__legal-ai__extract_appraiser_facts(case_number=\"{case_number}\")` "
        f"וכתוב comment בעברית עם תוצאת החילוץ — מספר תכניות, מספר היתרים, "
        f"וסתירות (אם יש) בין שמאים. אם המסמכים חסרי תיוג `appraiser_side`, "
        f"דווח ב-comment על השומות החסרות וסגור את ה-issue כ-blocked."
    )
    child_resp = await pc_request(
        "POST",
        f"/api/issues/{main_issue_id}/children",
        json={
            "title": f"[ערר {case_number}] חילוץ עובדות שמאיות",
            "description": description,
            "status": "in_progress",
            "priority": "normal",
            "assigneeAgentId": analyst_id,
        },
        raise_on_error=True,
    )
    sub_issue = child_resp.json()
    sub_issue_id = sub_issue["id"]
    # Tag plugin_state so the case page surfaces this sub-issue too
    try:
        conn = await asyncpg.connect(PAPERCLIP_DB_URL)
        try:
            await _link_case_to_issue(conn, sub_issue_id, case_number)
        finally:
            await conn.close()
    except Exception as e:
        logger.warning("plugin_state link failed for sub_issue=%s: %s", sub_issue_id, e)
    wake_resp = await pc_request(
        "POST",
        f"/api/agents/{analyst_id}/wakeup",
        json={
            "source": "on_demand",
            "triggerDetail": "manual",
            "reason": f"extract_appraiser_facts_{case_number}",
            # Use "assignment" — the same mutation `wake_curator_for_final`
            # sends. The HEARTBEAT recognises it; the task-specific intent
            # is conveyed by the child-issue's description, not the payload.
            "payload": {
                "issueId": sub_issue_id,
                "mutation": "assignment",
                "caseNumber": case_number,
            },
        },
        raise_on_error=True,
    )
    logger.info(
        "Analyst wakeup for case %s: sub_issue=%s analyst=%s wake=%s",
        case_number, sub_issue_id, analyst_id, wake_resp.status_code,
    )
    return {
        "status": "queued",
        "sub_issue_id": sub_issue_id,
        "analyst_id": analyst_id,
        "main_issue_id": main_issue_id,
    }