feat(halacha): #84.7 — מדדי-תור (throughput + איכות) #193

Merged
chaim merged 1 commits from worktree-halacha-queue-metrics into main 2026-06-11 16:42:42 +00:00
2 changed files with 77 additions and 0 deletions
Showing only changes of commit a4b4ebbbb1 - Show all commits

View File

@@ -103,12 +103,25 @@ async def get_case_metrics(case_id: UUID) -> dict:
return metrics return metrics
def _median(values: list[float]) -> float | None:
"""Median of a numeric list (None if empty). Pure — unit-tested."""
s = sorted(v for v in values if v is not None)
if not s:
return None
mid = len(s) // 2
return s[mid] if len(s) % 2 else (s[mid - 1] + s[mid]) / 2
async def halacha_backlog(conn) -> dict: async def halacha_backlog(conn) -> dict:
"""תור אישור-ההלכות (GAP-14 / INV-QA1 / G10) — נראות ה-backlog האנושי. """תור אישור-ההלכות (GAP-14 / INV-QA1 / G10) — נראות ה-backlog האנושי.
הלכות נכנסות כ-`pending_review` ובלתי-נראות לחיפוש עד אישור היו"ר; בלי ספירה הלכות נכנסות כ-`pending_review` ובלתי-נראות לחיפוש עד אישור היו"ר; בלי ספירה
גלויה, אישור-חסר נשאר סמוי (10/19 התגלה במקרה). מקבל connection פתוח כדי גלויה, אישור-חסר נשאר סמוי (10/19 התגלה במקרה). מקבל connection פתוח כדי
שאפשר יהיה לשלב בסנאפ-שוט קיים (get_dashboard, /api/system/diagnostics). שאפשר יהיה לשלב בסנאפ-שוט קיים (get_dashboard, /api/system/diagnostics).
כולל גם מדדי-תור (#84.7): throughput (24ש'/7ימים), יחסי approve/reject/defer,
זמן-חציוני-לפריט (פער בין החלטות עוקבות בתוך session של 30 דק'), ופילוח
מי-החליט (panel/auto/chair) — כדי לראות גם מהירות וגם איכות, לא רק backlog.
""" """
rows = await conn.fetch( rows = await conn.fetch(
"SELECT review_status, COUNT(*) AS n FROM halachot GROUP BY review_status" "SELECT review_status, COUNT(*) AS n FROM halachot GROUP BY review_status"
@@ -132,6 +145,38 @@ async def halacha_backlog(conn) -> dict:
) )
pending_total = counts.get("pending_review", 0) pending_total = counts.get("pending_review", 0)
reviewed = counts.get("approved", 0) + counts.get("rejected", 0) + counts.get("published", 0) reviewed = counts.get("approved", 0) + counts.get("rejected", 0) + counts.get("published", 0)
# ── #84.7 queue throughput + quality ──────────────────────────────────────
# throughput windows (decisions = anything with a reviewed_at stamp)
tp = await conn.fetchrow(
"SELECT COUNT(*) FILTER (WHERE reviewed_at >= now() - interval '24 hours') AS d24, "
" COUNT(*) FILTER (WHERE reviewed_at >= now() - interval '7 days') AS d7 "
"FROM halachot WHERE reviewed_at IS NOT NULL"
)
# who decided — panel (tri-model), auto (confidence gate), chair (human), other
who_rows = await conn.fetch(
"SELECT CASE "
" WHEN reviewer LIKE 'panel:%' THEN 'panel' "
" WHEN reviewer LIKE 'auto-approved%' THEN 'auto' "
" WHEN reviewer LIKE 'chair%' THEN 'chair' "
" ELSE 'other' END AS who, COUNT(*) AS n "
"FROM halachot WHERE reviewed_at IS NOT NULL GROUP BY 1"
)
by_reviewer = {r["who"]: r["n"] for r in who_rows}
# time-per-item proxy: median seconds between consecutive HAND-PACED
# decisions — gaps in [1s, 30min]. Excludes 0-second gaps (batch operations
# like panel/auto stamp many rows with the same reviewed_at) and >30-min gaps
# (between sessions), so the number reflects interactive review pacing, not
# machine throughput. None when the queue is entirely batch-decided.
gap_rows = await conn.fetch(
"SELECT EXTRACT(EPOCH FROM (reviewed_at - prev)) AS gap FROM ("
" SELECT reviewed_at, LAG(reviewed_at) OVER (ORDER BY reviewed_at) AS prev "
" FROM halachot WHERE reviewed_at IS NOT NULL"
") t WHERE prev IS NOT NULL "
"AND reviewed_at - prev BETWEEN interval '1 second' AND interval '30 minutes'"
)
median_secs = _median([float(r["gap"]) for r in gap_rows if r["gap"] is not None])
return { return {
"pending_review": pending_total, "pending_review": pending_total,
"pending_clean": pending_clean, # real review candidates (#84.1) "pending_clean": pending_clean, # real review candidates (#84.1)
@@ -143,8 +188,16 @@ async def halacha_backlog(conn) -> dict:
"total": sum(counts.values()), "total": sum(counts.values()),
"reviewed_total": reviewed, "reviewed_total": reviewed,
"approve_ratio": round(counts.get("approved", 0) / reviewed, 3) if reviewed else None, "approve_ratio": round(counts.get("approved", 0) / reviewed, 3) if reviewed else None,
"reject_ratio": round(counts.get("rejected", 0) / reviewed, 3) if reviewed else None,
"defer_ratio": (round(counts.get("deferred", 0) / (reviewed + counts.get("deferred", 0)), 3)
if (reviewed + counts.get("deferred", 0)) else None),
"pending_by_flag": {r["flag"]: r["n"] for r in flag_rows}, "pending_by_flag": {r["flag"]: r["n"] for r in flag_rows},
"oldest_pending_at": oldest.isoformat() if oldest else None, "oldest_pending_at": oldest.isoformat() if oldest else None,
# #84.7 throughput + quality
"throughput_24h": tp["d24"] if tp else 0,
"throughput_7d": tp["d7"] if tp else 0,
"median_seconds_per_decision": round(median_secs, 1) if median_secs is not None else None,
"by_reviewer": by_reviewer,
} }

View File

@@ -0,0 +1,24 @@
"""Test for #84.7 — _median helper used by the queue-metrics time-per-item proxy."""
from __future__ import annotations
import pytest
from legal_mcp.services import metrics
def test_median_empty_is_none():
assert metrics._median([]) is None
assert metrics._median([None, None]) is None
def test_median_odd():
assert metrics._median([3.0, 1.0, 2.0]) == 2.0
def test_median_even_averages_middle():
assert metrics._median([4.0, 1.0, 3.0, 2.0]) == pytest.approx(2.5)
def test_median_ignores_none():
assert metrics._median([None, 5.0, None, 1.0, 3.0]) == 3.0