From d98ef14f41c2833fd8e91522fcdbb82bc66240a7 Mon Sep 17 00:00:00 2001 From: Chaim Date: Fri, 12 Jun 2026 06:47:58 +0000 Subject: [PATCH] =?UTF-8?q?feat(learning):=20FU-3=20=E2=80=94=20uncertaint?= =?UTF-8?q?y-sampling=20=D7=A9=D7=9C=20=D7=AA=D7=95=D7=A8-=D7=94=D7=90?= =?UTF-8?q?=D7=99=D7=A9=D7=95=D7=A8=20=D7=9C=D7=A4=D7=99=20=D7=9E=D7=97?= =?UTF-8?q?=D7=9C=D7=95=D7=A7=D7=AA-=D7=94=D7=A4=D7=90=D7=A0=D7=9C=20(#133?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit תור-אישור-ההלכות הקיים (order_by_priority, #84.3) מקדם עכשיו את ההלכות שהפאנל התלבט עליהן: split קודם, אחר-כך incomplete — התוויות בעלות-הערך הגבוה ביותר ללולאת-הלמידה (הכרעת-היו"ר מפרקת אי-ודאות אמיתית ומזינה את זיקוק-ה-rubric ב-FU-4). uncertainty-sampling על סיגנל-המחלוקת האמיתי של הפאנל, לא רק confidence-החילוץ. - list_halachot: LEFT JOIN לאחרון-הסבבים (DISTINCT ON latest round_ts מ- halacha_panel_rounds) + מפתח-מיון ראשי CASE verdict split→0/incomplete→1/ else→2, לפני מפתחות #84.3 (corroboration→confidence→age). סבבים פה-אחד ופריטים-ללא-סבב נשארים בזנב עם הסדר הקיים. - panel_verdict נחשף בכל שורה (UI יכול לתייג "פיצול" + ביקורת-סדר). - שימוש חוזר בדגל order_by_priority הקיים ובטאב הקיים — בלי מסלול/דגל מקביל (G2). ה-UI כבר מבקש order_by_priority=true → אפס שינוי-UI, אין צורך בשער-עיצוב. - test_halacha_priority_panel_order.py: 3 בדיקות offline (SQL-capture) — מפתח-מחלוקת ראשי בעדיפות, FIFO ללא דליפת-CASE, panel_verdict נבחר. Invariants: INV-G10 (capture-only, לא משנה review_status) · G1/G2 · INV-IA (אותו שער/טאב). רגרסיה: 76 בדיקות עברו. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp-server/src/legal_mcp/services/db.py | 27 ++++-- .../test_halacha_priority_panel_order.py | 85 +++++++++++++++++++ 2 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 mcp-server/tests/test_halacha_priority_panel_order.py diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 6926775..5755df4 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -4474,9 +4474,12 @@ async def list_halachot( truncated_quote / quote_unverified / non_decision / thin_restatement / nli_unsupported / near_duplicate). These belong in a 'needs extraction fix' bucket, not the chair's approve queue (#84.1). - order_by_priority — replace FIFO with an active-learning order (#84.3): - negatively-treated first, then most-uncertain (lowest confidence), then - oldest — so the chair sees the highest-value decisions first. + order_by_priority — replace FIFO with an active-learning order (#84.3, #133/FU-3): + panel-disagreement first (the panel SPLIT, then ran INCOMPLETE — the + labels of highest learning value: the chair's call resolves a genuine + ambiguity and feeds rubric distillation, FU-4), then negatively-treated, + then most-uncertain (lowest confidence), then oldest. Uncertainty-sampling + on the panel's real disagreement signal, not just extraction confidence. cluster — annotate each row with ``cluster_id`` + ``cluster_size`` (#84.2): same-precedent halachot within HALACHA_CLUSTER_COSINE form one group so the UI can collapse near-identical principles into a single review card. @@ -4501,9 +4504,15 @@ async def list_halachot( # a clean item has an empty/NULL quality_flags array conditions.append("COALESCE(array_length(h.quality_flags, 1), 0) = 0") where_sql = f"WHERE {' AND '.join(conditions)}" if conditions else "" + # #133/FU-3: rank the panel's latest verdict so splits/incompletes — the + # highest-value active-learning labels — float to the top of the queue. + # 'split' (genuine 1-1 disagreement) before 'incomplete' (a judge failed, + # less informative); unanimous rounds and not-yet-judged items share the + # tail and keep the #84.3 ordering among themselves. order_sql = ( - "ORDER BY corroboration_negative DESC, h.confidence ASC NULLS LAST, " - "h.created_at ASC" + "ORDER BY (CASE pr.verdict WHEN 'split' THEN 0 WHEN 'incomplete' THEN 1 " + "ELSE 2 END) ASC, corroboration_negative DESC, " + "h.confidence ASC NULLS LAST, h.created_at ASC" if order_by_priority else "ORDER BY h.case_law_id, h.halacha_index" ) @@ -4518,7 +4527,8 @@ async def list_halachot( cl.case_number, cl.case_name, cl.court, cl.date AS decision_date, cl.precedent_level, COALESCE(cor.corroboration_count, 0)::int AS corroboration_count, - COALESCE(cor.corroboration_negative, false) AS corroboration_negative + COALESCE(cor.corroboration_negative, false) AS corroboration_negative, + pr.verdict AS panel_verdict FROM halachot h LEFT JOIN case_law cl ON cl.id = h.case_law_id LEFT JOIN ( @@ -4533,6 +4543,11 @@ async def list_halachot( FROM halacha_citation_corroboration GROUP BY halacha_id ) cor ON cor.halacha_id = h.id + LEFT JOIN ( + SELECT DISTINCT ON (halacha_id) halacha_id, verdict + FROM halacha_panel_rounds + ORDER BY halacha_id, round_ts DESC + ) pr ON pr.halacha_id = h.id {where_sql} {order_sql} LIMIT ${idx} OFFSET ${idx + 1} diff --git a/mcp-server/tests/test_halacha_priority_panel_order.py b/mcp-server/tests/test_halacha_priority_panel_order.py new file mode 100644 index 0000000..d613b1b --- /dev/null +++ b/mcp-server/tests/test_halacha_priority_panel_order.py @@ -0,0 +1,85 @@ +"""Tests for #133 / FU-3 — active uncertainty-sampling of the chair review queue. + +When the chair queue is requested with order_by_priority, the items the 3-judge +panel SPLIT on (and then INCOMPLETE rounds) must float to the top — those are +the highest-value active-learning labels (the chair's call resolves a genuine +ambiguity and feeds rubric distillation, FU-4). This reuses the existing +order_by_priority flag (no parallel path, G2). + +Runs fully OFFLINE: monkeypatches db.get_pool with a fake pool that captures the +SQL passed to fetch, and asserts the ORDER BY / JOIN shape — no Postgres. +""" + +from __future__ import annotations + +import asyncio + +import pytest + +from legal_mcp.services import db + + +class _FakePool: + """Captures SQL passed to ``fetch``; returns no rows.""" + + def __init__(self) -> None: + self.queries: list[str] = [] + + async def fetch(self, sql: str, *args): # noqa: ANN002, ANN201 + self.queries.append(sql) + return [] + + +@pytest.fixture() +def fake_pool(monkeypatch: pytest.MonkeyPatch) -> _FakePool: + pool = _FakePool() + + async def _get_pool() -> _FakePool: + return pool + + monkeypatch.setattr(db, "get_pool", _get_pool) + return pool + + +def _list_sql(pool: _FakePool) -> str: + return next(q for q in pool.queries if "FROM halachot h" in q) + + +def test_priority_order_ranks_panel_split_first(fake_pool: _FakePool) -> None: + asyncio.run( + db.list_halachot(review_status="pending_review", order_by_priority=True) + ) + sql = _list_sql(fake_pool) + # latest-verdict join is present … + assert "FROM halacha_panel_rounds" in sql + assert "DISTINCT ON (halacha_id)" in sql + # … and the ORDER BY ranks split before incomplete before everything else, + # AHEAD of the #84.3 corroboration/confidence/age keys. + order = sql[sql.index("ORDER BY"):] + assert "WHEN 'split' THEN 0" in order + assert "WHEN 'incomplete' THEN 1" in order + rank_pos = order.index("CASE pr.verdict") + corr_pos = order.index("corroboration_negative") + conf_pos = order.index("h.confidence") + assert rank_pos < corr_pos < conf_pos, ( + "panel-disagreement rank must be the PRIMARY sort key, before the " + "existing #84.3 corroboration/confidence ordering" + ) + + +def test_fifo_order_has_no_panel_rank(fake_pool: _FakePool) -> None: + """Without order_by_priority the queue stays in deterministic FIFO order — + the panel-rank CASE must not leak into the default ordering.""" + asyncio.run(db.list_halachot(review_status="pending_review")) + sql = _list_sql(fake_pool) + order = sql[sql.index("ORDER BY"):] + assert "CASE pr.verdict" not in order + assert "h.case_law_id, h.halacha_index" in order + + +def test_panel_verdict_selected(fake_pool: _FakePool) -> None: + """panel_verdict is surfaced on each row so the UI can badge *why* an item + is at the top of the queue (and so the order is auditable).""" + asyncio.run(db.list_halachot(order_by_priority=True)) + sql = _list_sql(fake_pool) + assert "pr.verdict AS panel_verdict" in sql