Management UI: corpus delete, process panel, activity feed, diagnostics

- DELETE /api/training/corpus/{id} + delete button on training page, with confirmation dialog and recompute hint - /api/system/tasks + floating process panel (bottom-left) showing active background tasks with live 3s polling - /api/system/recent-activity derives a feed from cases, style_corpus, and last style_patterns run; sidebar on home page renders with relative timestamps - /api/system/diagnostics + /#/diagnostics page showing DB health, row counts per table, active tasks, stuck documents (>10 min), failed extractions - Cosmetic: signature phrase headline now prefers clean phrases over bracket-heavy templates for display Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 12:04:13 +00:00
parent fcb2e1a325
commit 3e0221ccec
3 changed files with 689 additions and 23 deletions
--- a/web/app.py
+++ b/web/app.py
@@ -604,9 +604,21 @@ async def _compute_signature_phrases(conn) -> dict:
    total_decisions = await conn.fetchval("SELECT count(*) FROM style_corpus")

    if items:
-        top = items[0]
-        # Clean up for display: strip placeholder brackets and split alternatives
-        display = re.sub(r"\[[^\]]*\]", "", top["text"]).replace("  ", " ").strip()
+        # Pick the first item that's a relatively clean phrase, not a template
+        # (templates with many placeholders make bad display text)
+        top = None
+        for item in items[:5]:
+            text = item["text"]
+            placeholder_count = len(re.findall(r"\[[^\]]*\]", text))
+            if placeholder_count <= 1:
+                top = item
+                break
+        if top is None:
+            top = items[0]
+
+        # Clean up for display
+        display = re.sub(r"\[[^\]]*\]", "", top["text"])
+        display = re.sub(r"\s+", " ", display).strip(" .,:;\"'")
        display = display.split(" / ")[0].split(" או ")[0].strip(" .,:;\"'")
        if len(display) > 60:
            display = display[:57] + "..."
@@ -758,6 +770,19 @@ async def training_style_report():
    }


+@app.delete("/api/training/corpus/{corpus_id}")
+async def training_corpus_delete(corpus_id: str):
+    """Remove a decision from the style corpus."""
+    try:
+        cid = UUID(corpus_id)
+    except ValueError:
+        raise HTTPException(400, "invalid corpus_id")
+    result = await db.delete_from_style_corpus(cid)
+    if not result.get("deleted"):
+        raise HTTPException(404, result.get("reason", "not found"))
+    return result
+
+
@app.get("/api/training/corpus")
 async def training_corpus_list():
    """List all decisions currently in the style corpus."""
@@ -786,6 +811,25 @@ async def training_corpus_list():
    ]


+@app.get("/api/system/tasks")
+async def system_tasks():
+    """List all active background tasks (from in-memory _progress dict)."""
+    items = []
+    for task_id, data in list(_progress.items()):
+        status = data.get("status", "unknown")
+        # Skip terminal states older than this request
+        if status in ("completed", "failed"):
+            continue
+        items.append({
+            "task_id": task_id,
+            "status": status,
+            "step": data.get("step", ""),
+            "filename": data.get("filename", ""),
+            "error": data.get("error", ""),
+        })
+    return {"active": items, "count": len(items)}
+
+
@app.get("/api/progress/{task_id}")
 async def progress_stream(task_id: str):
    """SSE stream of processing progress."""
@@ -971,6 +1015,134 @@ async def api_processing_status():
    return json.loads(result)


+@app.get("/api/system/diagnostics")
+async def system_diagnostics():
+    """System health snapshot: DB counts, recent failures, task queue."""
+    pool = await db.get_pool()
+    async with pool.acquire() as conn:
+        db_ok = False
+        try:
+            await conn.fetchval("SELECT 1")
+            db_ok = True
+        except Exception:
+            pass
+
+        tables = {}
+        for t in ("cases", "documents", "document_chunks", "style_corpus", "style_patterns"):
+            try:
+                tables[t] = await conn.fetchval(f"SELECT count(*) FROM {t}")
+            except Exception:
+                tables[t] = None
+
+        # Documents that failed extraction or are stuck
+        failed_docs = await conn.fetch(
+            "SELECT d.id, d.title, d.extraction_status, d.created_at, "
+            "       c.case_number "
+            "FROM documents d LEFT JOIN cases c ON d.case_id = c.id "
+            "WHERE d.extraction_status IN ('failed', 'error') "
+            "ORDER BY d.created_at DESC LIMIT 20"
+        )
+        stuck_docs = await conn.fetch(
+            "SELECT d.id, d.title, d.extraction_status, d.created_at, "
+            "       c.case_number "
+            "FROM documents d LEFT JOIN cases c ON d.case_id = c.id "
+            "WHERE d.extraction_status IN ('pending', 'processing') "
+            "  AND d.created_at < now() - interval '10 minutes' "
+            "ORDER BY d.created_at DESC LIMIT 20"
+        )
+
+    active_tasks = [
+        {"task_id": tid, "filename": d.get("filename", ""),
+         "status": d.get("status", ""), "step": d.get("step", "")}
+        for tid, d in _progress.items()
+        if d.get("status") not in ("completed", "failed")
+    ]
+
+    return {
+        "db_ok": db_ok,
+        "tables": tables,
+        "failed_documents": [
+            {
+                "id": str(r["id"]),
+                "title": r["title"] or "",
+                "status": r["extraction_status"],
+                "case_number": r["case_number"] or "",
+                "created_at": r["created_at"].isoformat() if r["created_at"] else None,
+            }
+            for r in failed_docs
+        ],
+        "stuck_documents": [
+            {
+                "id": str(r["id"]),
+                "title": r["title"] or "",
+                "status": r["extraction_status"],
+                "case_number": r["case_number"] or "",
+                "created_at": r["created_at"].isoformat() if r["created_at"] else None,
+            }
+            for r in stuck_docs
+        ],
+        "active_tasks": active_tasks,
+    }
+
+
+@app.get("/api/system/recent-activity")
+async def system_recent_activity(limit: int = 8):
+    """Derive a feed of recent events from cases + style_corpus + style_patterns.
+
+    Each event has: type, label, timestamp, target.
+    """
+    pool = await db.get_pool()
+    events: list[dict] = []
+
+    async with pool.acquire() as conn:
+        # Recent cases
+        cases = await conn.fetch(
+            "SELECT case_number, title, created_at FROM cases "
+            "ORDER BY created_at DESC LIMIT $1", limit
+        )
+        for c in cases:
+            events.append({
+                "type": "case_created",
+                "label": f"תיק חדש: ערר {c['case_number']}",
+                "detail": c["title"] or "",
+                "timestamp": c["created_at"].isoformat() if c["created_at"] else None,
+                "target": f"/#/case/{c['case_number']}",
+            })
+
+        # Recent corpus additions
+        corpus = await conn.fetch(
+            "SELECT decision_number, created_at FROM style_corpus "
+            "ORDER BY created_at DESC LIMIT $1", limit
+        )
+        for r in corpus:
+            events.append({
+                "type": "corpus_added",
+                "label": f"החלטה נוספה לקורפוס: {r['decision_number'] or 'ללא מספר'}",
+                "detail": "",
+                "timestamp": r["created_at"].isoformat() if r["created_at"] else None,
+                "target": "/#/training",
+            })
+
+        # Last style analysis run (if any)
+        last_pattern = await conn.fetchrow(
+            "SELECT created_at FROM style_patterns "
+            "ORDER BY created_at DESC LIMIT 1"
+        )
+        if last_pattern and last_pattern["created_at"]:
+            count = await conn.fetchval("SELECT count(*) FROM style_patterns")
+            events.append({
+                "type": "analysis_run",
+                "label": f"ניתוח סגנון — {count} דפוסים חולצו",
+                "detail": "",
+                "timestamp": last_pattern["created_at"].isoformat(),
+                "target": "/#/style-report",
+            })
+
+    # Sort by timestamp desc, take top N
+    events.sort(key=lambda e: e["timestamp"] or "", reverse=True)
+    return {"events": events[:limit]}
+
+
 # ── Workflow API — outcome, direction, claims, QA, learning ──────