Backend for the halacha approval-queue triage (#84). The keyboard UI, batch actions and defer/reject (#84.4–6) already shipped; this adds the gating, prioritization and metrics the queue was missing. db.list_halachot — two opt-in triage controls: * exclude_low_quality (#84.1): drop items carrying ANY quality_flag (application / quote_unverified / truncated / non_decision / thin / nli_unsupported / near_duplicate) — they belong in a 'needs extraction fix' bucket, not the chair's approve queue. * order_by_priority (#84.3): active-learning order — negatively-treated first, then most-uncertain (lowest confidence), then oldest — instead of FIFO, so the highest-value decisions surface first. halachot_pending (MCP) — now gated + prioritized BY DEFAULT; include_low_quality= true reveals the needs-fix bucket. The agent review path benefits immediately. GET /api/halachot — same two params, default OFF (non-breaking; the UI opts in). metrics.halacha_backlog (#84.7) — splits pending into clean vs flagged, adds deferred, reviewed_total, approve_ratio, and a pending_by_flag breakdown, so the backlog distinguishes real review work from extraction noise. Deferred (documented): #84.2 near-duplicate cluster cards and wiring the UI fetch to the new params require frontend work + an api:types regen AFTER this deploys (the new query params aren't in prod's OpenAPI until then) — a clean follow-up. The backend fully supports both now. Verified against the live DB (read-only): - pending 177 → gated-clean 110, 0 flagged items leak into the clean queue. - priority order surfaces the lowest-confidence items first (0.55, 0.55, ...). - backlog: pending_clean=110 / pending_flagged=67 / approve_ratio=0.916, pending_by_flag={nli_unsupported:59, quote_unverified:3, thin:3, truncated:2}. - pytest tests/test_halacha_quality.py — 52 passed (no regression). Invariants: G1 (gate at source — SQL filter, not post-hoc); G2 (no parallel path — same list_halachot); §6 (flagged items routed to a bucket, never dropped). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
227 lines
8.9 KiB
Python
227 lines
8.9 KiB
Python
"""מדדי הצלחה (KPIs) לתהליך כתיבת החלטות.
|
|
|
|
מדדים:
|
|
1. אחוז שינוי — השוואת טיוטה לגרסה סופית (יעד: <10%)
|
|
2. אפס הזיות — ספירת הפניות לא מבוססות
|
|
3. מענה לכל טענה — כיסוי טענות בדיון
|
|
4. משקלות בטווח — עמידה ביחסי הזהב
|
|
5. רקע ניטרלי — ללא מילות שיפוט
|
|
6. זמן עיבוד — מקליטה עד טיוטה
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from datetime import datetime
|
|
from uuid import UUID
|
|
|
|
from legal_mcp.services import db
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def get_case_metrics(case_id: UUID) -> dict:
|
|
"""חישוב מדדים לתיק בודד."""
|
|
case = await db.get_case(case_id)
|
|
if not case:
|
|
raise ValueError(f"Case {case_id} not found")
|
|
|
|
decision = await db.get_decision_by_case(case_id)
|
|
pool = await db.get_pool()
|
|
|
|
metrics = {
|
|
"case_number": case["case_number"],
|
|
"title": case.get("title", ""),
|
|
"status": case.get("status", ""),
|
|
}
|
|
|
|
# 1. Change percentage (if final version exists)
|
|
if decision and decision.get("status") == "final":
|
|
async with pool.acquire() as conn:
|
|
# Get draft word count
|
|
draft_words = await conn.fetchval(
|
|
"SELECT SUM(word_count) FROM decision_blocks WHERE decision_id = $1",
|
|
UUID(decision["id"]),
|
|
)
|
|
metrics["draft_words"] = draft_words or 0
|
|
# Change percent is stored during learning loop
|
|
metrics["change_percent"] = None # populated from learning_loop results
|
|
else:
|
|
metrics["draft_words"] = 0
|
|
metrics["change_percent"] = None
|
|
|
|
# 2. QA results
|
|
async with pool.acquire() as conn:
|
|
qa_rows = await conn.fetch(
|
|
"SELECT check_name, passed, severity, errors FROM qa_results WHERE case_id = $1",
|
|
case_id,
|
|
)
|
|
|
|
if qa_rows:
|
|
qa_results = {}
|
|
for row in qa_rows:
|
|
errors = json.loads(row["errors"]) if isinstance(row["errors"], str) else row["errors"]
|
|
qa_results[row["check_name"]] = {
|
|
"passed": row["passed"],
|
|
"severity": row["severity"],
|
|
"error_count": len(errors) if errors else 0,
|
|
}
|
|
metrics["qa"] = qa_results
|
|
metrics["qa_passed"] = all(r["passed"] for r in qa_results.values())
|
|
metrics["qa_critical_failures"] = sum(
|
|
1 for r in qa_results.values()
|
|
if not r["passed"] and r["severity"] == "critical"
|
|
)
|
|
else:
|
|
metrics["qa"] = None
|
|
metrics["qa_passed"] = None
|
|
|
|
# 3. Claims coverage
|
|
claims = await db.get_claims(case_id)
|
|
metrics["total_claims"] = len(claims)
|
|
|
|
# 4. Documents
|
|
docs = await db.list_documents(case_id)
|
|
metrics["total_documents"] = len(docs)
|
|
|
|
# 5. Processing time
|
|
if docs and decision:
|
|
first_doc_time = min(
|
|
d.get("created_at", datetime.max) for d in docs
|
|
if d.get("created_at")
|
|
)
|
|
decision_time = decision.get("created_at")
|
|
if first_doc_time and decision_time:
|
|
delta = decision_time - first_doc_time
|
|
metrics["processing_hours"] = round(delta.total_seconds() / 3600, 1)
|
|
else:
|
|
metrics["processing_hours"] = None
|
|
else:
|
|
metrics["processing_hours"] = None
|
|
|
|
return metrics
|
|
|
|
|
|
async def halacha_backlog(conn) -> dict:
|
|
"""תור אישור-ההלכות (GAP-14 / INV-QA1 / G10) — נראות ה-backlog האנושי.
|
|
|
|
הלכות נכנסות כ-`pending_review` ובלתי-נראות לחיפוש עד אישור היו"ר; בלי ספירה
|
|
גלויה, אישור-חסר נשאר סמוי (10/19 התגלה במקרה). מקבל connection פתוח כדי
|
|
שאפשר יהיה לשלב בסנאפ-שוט קיים (get_dashboard, /api/system/diagnostics).
|
|
"""
|
|
rows = await conn.fetch(
|
|
"SELECT review_status, COUNT(*) AS n FROM halachot GROUP BY review_status"
|
|
)
|
|
counts = {r["review_status"]: r["n"] for r in rows}
|
|
oldest = await conn.fetchval(
|
|
"SELECT MIN(created_at) FROM halachot WHERE review_status = 'pending_review'"
|
|
)
|
|
# #84.7 — split the pending bucket: how many are genuine candidates (clean)
|
|
# vs flagged 'needs extraction fix', and the breakdown by flag, so the chair
|
|
# sees how much of the backlog is real review vs extraction noise.
|
|
pending_clean = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM halachot WHERE review_status = 'pending_review' "
|
|
"AND COALESCE(array_length(quality_flags, 1), 0) = 0"
|
|
)
|
|
flag_rows = await conn.fetch(
|
|
"SELECT flag, COUNT(*) AS n FROM ("
|
|
" SELECT unnest(quality_flags) AS flag FROM halachot "
|
|
" WHERE review_status = 'pending_review'"
|
|
") t GROUP BY flag ORDER BY n DESC"
|
|
)
|
|
pending_total = counts.get("pending_review", 0)
|
|
reviewed = counts.get("approved", 0) + counts.get("rejected", 0) + counts.get("published", 0)
|
|
return {
|
|
"pending_review": pending_total,
|
|
"pending_clean": pending_clean, # real review candidates (#84.1)
|
|
"pending_flagged": pending_total - pending_clean, # needs-fix bucket
|
|
"approved": counts.get("approved", 0),
|
|
"rejected": counts.get("rejected", 0),
|
|
"deferred": counts.get("deferred", 0),
|
|
"published": counts.get("published", 0),
|
|
"total": sum(counts.values()),
|
|
"reviewed_total": reviewed,
|
|
"approve_ratio": round(counts.get("approved", 0) / reviewed, 3) if reviewed else None,
|
|
"pending_by_flag": {r["flag"]: r["n"] for r in flag_rows},
|
|
"oldest_pending_at": oldest.isoformat() if oldest else None,
|
|
}
|
|
|
|
|
|
async def get_dashboard() -> dict:
|
|
"""דשבורד כולל — סיכום מדדים על כל התיקים."""
|
|
pool = await db.get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
# Case counts by status
|
|
status_rows = await conn.fetch(
|
|
"SELECT status, COUNT(*) as cnt FROM cases GROUP BY status ORDER BY cnt DESC"
|
|
)
|
|
cases_by_status = {r["status"]: r["cnt"] for r in status_rows}
|
|
|
|
# Total counts
|
|
total_cases = await conn.fetchval("SELECT COUNT(*) FROM cases")
|
|
total_docs = await conn.fetchval("SELECT COUNT(*) FROM documents")
|
|
total_claims = await conn.fetchval("SELECT COUNT(*) FROM claims")
|
|
total_chunks = await conn.fetchval("SELECT COUNT(*) FROM document_chunks")
|
|
total_decisions = await conn.fetchval("SELECT COUNT(*) FROM decisions")
|
|
total_corpus = await conn.fetchval("SELECT COUNT(*) FROM style_corpus")
|
|
total_patterns = await conn.fetchval("SELECT COUNT(*) FROM style_patterns")
|
|
total_case_law = await conn.fetchval("SELECT COUNT(*) FROM case_law")
|
|
non_searchable_case_law = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM case_law WHERE NOT searchable"
|
|
)
|
|
cases_with_stale_blocks = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM cases WHERE blocks_stale"
|
|
)
|
|
stale_embedding_case_law = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM case_law "
|
|
"WHERE coalesce(full_text,'') <> '' AND content_hash IS DISTINCT FROM indexed_hash")
|
|
|
|
# QA summary
|
|
qa_total = await conn.fetchval("SELECT COUNT(DISTINCT case_id) FROM qa_results")
|
|
qa_passed = await conn.fetchval(
|
|
"""SELECT COUNT(DISTINCT case_id) FROM qa_results
|
|
WHERE case_id NOT IN (
|
|
SELECT case_id FROM qa_results WHERE passed = false AND severity = 'critical'
|
|
)"""
|
|
)
|
|
|
|
# Final decisions
|
|
final_count = await conn.fetchval(
|
|
"SELECT COUNT(*) FROM decisions WHERE status = 'final'"
|
|
)
|
|
|
|
# Average words per decision
|
|
avg_words = await conn.fetchval(
|
|
"SELECT AVG(total_words) FROM decisions WHERE total_words > 0"
|
|
)
|
|
|
|
# Halacha review backlog (GAP-14 / INV-QA1 / G10)
|
|
backlog = await halacha_backlog(conn)
|
|
|
|
return {
|
|
"summary": {
|
|
"total_cases": total_cases,
|
|
"total_documents": total_docs,
|
|
"total_claims": total_claims,
|
|
"total_chunks": total_chunks,
|
|
"total_decisions": total_decisions,
|
|
"final_decisions": final_count,
|
|
"style_corpus": total_corpus,
|
|
"style_patterns": total_patterns,
|
|
"case_law_entries": total_case_law,
|
|
"non_searchable_case_law": non_searchable_case_law,
|
|
"cases_with_stale_blocks": cases_with_stale_blocks,
|
|
"stale_embedding_case_law": stale_embedding_case_law,
|
|
},
|
|
"cases_by_status": cases_by_status,
|
|
"halacha_backlog": backlog,
|
|
"qa": {
|
|
"cases_validated": qa_total,
|
|
"cases_passed": qa_passed,
|
|
"pass_rate": round(qa_passed / qa_total * 100, 1) if qa_total else None,
|
|
},
|
|
"avg_decision_words": round(avg_words) if avg_words else None,
|
|
}
|