From a4b4ebbbb117a2b9280744ff84102c86d652adc0 Mon Sep 17 00:00:00 2001 From: Chaim Date: Thu, 11 Jun 2026 16:42:22 +0000 Subject: [PATCH] =?UTF-8?q?feat(halacha):=20#84.7=20=E2=80=94=20queue=20th?= =?UTF-8?q?roughput=20+=20quality=20metrics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit הרחבת metrics.halacha_backlog (G2 — אותה פונקציה, אין מסלול-מטריקות מקביל; כבר מוגשת דרך /api/system/diagnostics) במדדי-תור שחסרו: - throughput_24h / throughput_7d — קצב-ההחלטות (reviewed_at בחלון). - approve/reject/defer ratios (קודם רק approve). - median_seconds_per_decision — זמן-חציוני-לפריט, מחושב רק על פערים [1ש',30דק'] כדי לבטא קצב-אנושי אינטראקטיבי (פער-0 של batch panel/auto מוחרג, וגם פערים >30דק' בין sessions). 41.4s בייצור; None כשהתור כולו batch. - by_reviewer — פילוח panel/auto/chair/other (מי החליט). spot-check post-hoc כבר מכוסה ע"י halacha_panel_audit.py (re-judge של מאושרי-פאנל). _median חולץ כ-helper טהור ובדיק. invariants: G2 (הרחבת מטריקה קיימת) · INV-QA1/G10 (נראות שער-האנוש — גם מהירות וגם איכות). tests: 4 offline (_median) + אומת חי על ה-DB (476 pending, throughput 115/956, median 41.4s). Co-Authored-By: Claude Opus 4.8 --- mcp-server/src/legal_mcp/services/metrics.py | 53 ++++++++++++++++++++ mcp-server/tests/test_metrics_median.py | 24 +++++++++ 2 files changed, 77 insertions(+) create mode 100644 mcp-server/tests/test_metrics_median.py diff --git a/mcp-server/src/legal_mcp/services/metrics.py b/mcp-server/src/legal_mcp/services/metrics.py index ab6acec..3a16b83 100644 --- a/mcp-server/src/legal_mcp/services/metrics.py +++ b/mcp-server/src/legal_mcp/services/metrics.py @@ -103,12 +103,25 @@ async def get_case_metrics(case_id: UUID) -> dict: return metrics +def _median(values: list[float]) -> float | None: + """Median of a numeric list (None if empty). Pure — unit-tested.""" + s = sorted(v for v in values if v is not None) + if not s: + return None + mid = len(s) // 2 + return s[mid] if len(s) % 2 else (s[mid - 1] + s[mid]) / 2 + + async def halacha_backlog(conn) -> dict: """תור אישור-ההלכות (GAP-14 / INV-QA1 / G10) — נראות ה-backlog האנושי. הלכות נכנסות כ-`pending_review` ובלתי-נראות לחיפוש עד אישור היו"ר; בלי ספירה גלויה, אישור-חסר נשאר סמוי (10/19 התגלה במקרה). מקבל connection פתוח כדי שאפשר יהיה לשלב בסנאפ-שוט קיים (get_dashboard, /api/system/diagnostics). + + כולל גם מדדי-תור (#84.7): throughput (24ש'/7ימים), יחסי approve/reject/defer, + זמן-חציוני-לפריט (פער בין החלטות עוקבות בתוך session של 30 דק'), ופילוח + מי-החליט (panel/auto/chair) — כדי לראות גם מהירות וגם איכות, לא רק backlog. """ rows = await conn.fetch( "SELECT review_status, COUNT(*) AS n FROM halachot GROUP BY review_status" @@ -132,6 +145,38 @@ async def halacha_backlog(conn) -> dict: ) pending_total = counts.get("pending_review", 0) reviewed = counts.get("approved", 0) + counts.get("rejected", 0) + counts.get("published", 0) + + # ── #84.7 queue throughput + quality ────────────────────────────────────── + # throughput windows (decisions = anything with a reviewed_at stamp) + tp = await conn.fetchrow( + "SELECT COUNT(*) FILTER (WHERE reviewed_at >= now() - interval '24 hours') AS d24, " + " COUNT(*) FILTER (WHERE reviewed_at >= now() - interval '7 days') AS d7 " + "FROM halachot WHERE reviewed_at IS NOT NULL" + ) + # who decided — panel (tri-model), auto (confidence gate), chair (human), other + who_rows = await conn.fetch( + "SELECT CASE " + " WHEN reviewer LIKE 'panel:%' THEN 'panel' " + " WHEN reviewer LIKE 'auto-approved%' THEN 'auto' " + " WHEN reviewer LIKE 'chair%' THEN 'chair' " + " ELSE 'other' END AS who, COUNT(*) AS n " + "FROM halachot WHERE reviewed_at IS NOT NULL GROUP BY 1" + ) + by_reviewer = {r["who"]: r["n"] for r in who_rows} + # time-per-item proxy: median seconds between consecutive HAND-PACED + # decisions — gaps in [1s, 30min]. Excludes 0-second gaps (batch operations + # like panel/auto stamp many rows with the same reviewed_at) and >30-min gaps + # (between sessions), so the number reflects interactive review pacing, not + # machine throughput. None when the queue is entirely batch-decided. + gap_rows = await conn.fetch( + "SELECT EXTRACT(EPOCH FROM (reviewed_at - prev)) AS gap FROM (" + " SELECT reviewed_at, LAG(reviewed_at) OVER (ORDER BY reviewed_at) AS prev " + " FROM halachot WHERE reviewed_at IS NOT NULL" + ") t WHERE prev IS NOT NULL " + "AND reviewed_at - prev BETWEEN interval '1 second' AND interval '30 minutes'" + ) + median_secs = _median([float(r["gap"]) for r in gap_rows if r["gap"] is not None]) + return { "pending_review": pending_total, "pending_clean": pending_clean, # real review candidates (#84.1) @@ -143,8 +188,16 @@ async def halacha_backlog(conn) -> dict: "total": sum(counts.values()), "reviewed_total": reviewed, "approve_ratio": round(counts.get("approved", 0) / reviewed, 3) if reviewed else None, + "reject_ratio": round(counts.get("rejected", 0) / reviewed, 3) if reviewed else None, + "defer_ratio": (round(counts.get("deferred", 0) / (reviewed + counts.get("deferred", 0)), 3) + if (reviewed + counts.get("deferred", 0)) else None), "pending_by_flag": {r["flag"]: r["n"] for r in flag_rows}, "oldest_pending_at": oldest.isoformat() if oldest else None, + # #84.7 throughput + quality + "throughput_24h": tp["d24"] if tp else 0, + "throughput_7d": tp["d7"] if tp else 0, + "median_seconds_per_decision": round(median_secs, 1) if median_secs is not None else None, + "by_reviewer": by_reviewer, } diff --git a/mcp-server/tests/test_metrics_median.py b/mcp-server/tests/test_metrics_median.py new file mode 100644 index 0000000..123444e --- /dev/null +++ b/mcp-server/tests/test_metrics_median.py @@ -0,0 +1,24 @@ +"""Test for #84.7 — _median helper used by the queue-metrics time-per-item proxy.""" + +from __future__ import annotations + +import pytest + +from legal_mcp.services import metrics + + +def test_median_empty_is_none(): + assert metrics._median([]) is None + assert metrics._median([None, None]) is None + + +def test_median_odd(): + assert metrics._median([3.0, 1.0, 2.0]) == 2.0 + + +def test_median_even_averages_middle(): + assert metrics._median([4.0, 1.0, 3.0, 2.0]) == pytest.approx(2.5) + + +def test_median_ignores_none(): + assert metrics._median([None, 5.0, None, 1.0, 3.0]) == 3.0