Management UI: corpus delete, process panel, activity feed, diagnostics

- DELETE /api/training/corpus/{id} + delete button on training page,
  with confirmation dialog and recompute hint
- /api/system/tasks + floating process panel (bottom-left) showing
  active background tasks with live 3s polling
- /api/system/recent-activity derives a feed from cases, style_corpus,
  and last style_patterns run; sidebar on home page renders with
  relative timestamps
- /api/system/diagnostics + /#/diagnostics page showing DB health,
  row counts per table, active tasks, stuck documents (>10 min),
  failed extractions
- Cosmetic: signature phrase headline now prefers clean phrases over
  bracket-heavy templates for display

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-11 12:04:13 +00:00
parent fcb2e1a325
commit 3e0221ccec
3 changed files with 689 additions and 23 deletions

View File

@@ -604,9 +604,21 @@ async def _compute_signature_phrases(conn) -> dict:
total_decisions = await conn.fetchval("SELECT count(*) FROM style_corpus")
if items:
top = items[0]
# Clean up for display: strip placeholder brackets and split alternatives
display = re.sub(r"\[[^\]]*\]", "", top["text"]).replace(" ", " ").strip()
# Pick the first item that's a relatively clean phrase, not a template
# (templates with many placeholders make bad display text)
top = None
for item in items[:5]:
text = item["text"]
placeholder_count = len(re.findall(r"\[[^\]]*\]", text))
if placeholder_count <= 1:
top = item
break
if top is None:
top = items[0]
# Clean up for display
display = re.sub(r"\[[^\]]*\]", "", top["text"])
display = re.sub(r"\s+", " ", display).strip(" .,:;\"'")
display = display.split(" / ")[0].split(" או ")[0].strip(" .,:;\"'")
if len(display) > 60:
display = display[:57] + "..."
@@ -758,6 +770,19 @@ async def training_style_report():
}
@app.delete("/api/training/corpus/{corpus_id}")
async def training_corpus_delete(corpus_id: str):
"""Remove a decision from the style corpus."""
try:
cid = UUID(corpus_id)
except ValueError:
raise HTTPException(400, "invalid corpus_id")
result = await db.delete_from_style_corpus(cid)
if not result.get("deleted"):
raise HTTPException(404, result.get("reason", "not found"))
return result
@app.get("/api/training/corpus")
async def training_corpus_list():
"""List all decisions currently in the style corpus."""
@@ -786,6 +811,25 @@ async def training_corpus_list():
]
@app.get("/api/system/tasks")
async def system_tasks():
"""List all active background tasks (from in-memory _progress dict)."""
items = []
for task_id, data in list(_progress.items()):
status = data.get("status", "unknown")
# Skip terminal states older than this request
if status in ("completed", "failed"):
continue
items.append({
"task_id": task_id,
"status": status,
"step": data.get("step", ""),
"filename": data.get("filename", ""),
"error": data.get("error", ""),
})
return {"active": items, "count": len(items)}
@app.get("/api/progress/{task_id}")
async def progress_stream(task_id: str):
"""SSE stream of processing progress."""
@@ -971,6 +1015,134 @@ async def api_processing_status():
return json.loads(result)
@app.get("/api/system/diagnostics")
async def system_diagnostics():
"""System health snapshot: DB counts, recent failures, task queue."""
pool = await db.get_pool()
async with pool.acquire() as conn:
db_ok = False
try:
await conn.fetchval("SELECT 1")
db_ok = True
except Exception:
pass
tables = {}
for t in ("cases", "documents", "document_chunks", "style_corpus", "style_patterns"):
try:
tables[t] = await conn.fetchval(f"SELECT count(*) FROM {t}")
except Exception:
tables[t] = None
# Documents that failed extraction or are stuck
failed_docs = await conn.fetch(
"SELECT d.id, d.title, d.extraction_status, d.created_at, "
" c.case_number "
"FROM documents d LEFT JOIN cases c ON d.case_id = c.id "
"WHERE d.extraction_status IN ('failed', 'error') "
"ORDER BY d.created_at DESC LIMIT 20"
)
stuck_docs = await conn.fetch(
"SELECT d.id, d.title, d.extraction_status, d.created_at, "
" c.case_number "
"FROM documents d LEFT JOIN cases c ON d.case_id = c.id "
"WHERE d.extraction_status IN ('pending', 'processing') "
" AND d.created_at < now() - interval '10 minutes' "
"ORDER BY d.created_at DESC LIMIT 20"
)
active_tasks = [
{"task_id": tid, "filename": d.get("filename", ""),
"status": d.get("status", ""), "step": d.get("step", "")}
for tid, d in _progress.items()
if d.get("status") not in ("completed", "failed")
]
return {
"db_ok": db_ok,
"tables": tables,
"failed_documents": [
{
"id": str(r["id"]),
"title": r["title"] or "",
"status": r["extraction_status"],
"case_number": r["case_number"] or "",
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
}
for r in failed_docs
],
"stuck_documents": [
{
"id": str(r["id"]),
"title": r["title"] or "",
"status": r["extraction_status"],
"case_number": r["case_number"] or "",
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
}
for r in stuck_docs
],
"active_tasks": active_tasks,
}
@app.get("/api/system/recent-activity")
async def system_recent_activity(limit: int = 8):
"""Derive a feed of recent events from cases + style_corpus + style_patterns.
Each event has: type, label, timestamp, target.
"""
pool = await db.get_pool()
events: list[dict] = []
async with pool.acquire() as conn:
# Recent cases
cases = await conn.fetch(
"SELECT case_number, title, created_at FROM cases "
"ORDER BY created_at DESC LIMIT $1", limit
)
for c in cases:
events.append({
"type": "case_created",
"label": f"תיק חדש: ערר {c['case_number']}",
"detail": c["title"] or "",
"timestamp": c["created_at"].isoformat() if c["created_at"] else None,
"target": f"/#/case/{c['case_number']}",
})
# Recent corpus additions
corpus = await conn.fetch(
"SELECT decision_number, created_at FROM style_corpus "
"ORDER BY created_at DESC LIMIT $1", limit
)
for r in corpus:
events.append({
"type": "corpus_added",
"label": f"החלטה נוספה לקורפוס: {r['decision_number'] or 'ללא מספר'}",
"detail": "",
"timestamp": r["created_at"].isoformat() if r["created_at"] else None,
"target": "/#/training",
})
# Last style analysis run (if any)
last_pattern = await conn.fetchrow(
"SELECT created_at FROM style_patterns "
"ORDER BY created_at DESC LIMIT 1"
)
if last_pattern and last_pattern["created_at"]:
count = await conn.fetchval("SELECT count(*) FROM style_patterns")
events.append({
"type": "analysis_run",
"label": f"ניתוח סגנון — {count} דפוסים חולצו",
"detail": "",
"timestamp": last_pattern["created_at"].isoformat(),
"target": "/#/style-report",
})
# Sort by timestamp desc, take top N
events.sort(key=lambda e: e["timestamp"] or "", reverse=True)
return {"events": events[:limit]}
# ── Workflow API — outcome, direction, claims, QA, learning ──────