Merge pull request 'feat(learning): FU-3 — uncertainty-sampling של תור-האישור לפי מחלוקת-הפאנל (#133)' (#222) from worktree-halacha-active-learning-fu3 into main
This commit was merged in pull request #222.
This commit is contained in:
@@ -4474,9 +4474,12 @@ async def list_halachot(
|
|||||||
truncated_quote / quote_unverified / non_decision / thin_restatement /
|
truncated_quote / quote_unverified / non_decision / thin_restatement /
|
||||||
nli_unsupported / near_duplicate). These belong in a 'needs extraction
|
nli_unsupported / near_duplicate). These belong in a 'needs extraction
|
||||||
fix' bucket, not the chair's approve queue (#84.1).
|
fix' bucket, not the chair's approve queue (#84.1).
|
||||||
order_by_priority — replace FIFO with an active-learning order (#84.3):
|
order_by_priority — replace FIFO with an active-learning order (#84.3, #133/FU-3):
|
||||||
negatively-treated first, then most-uncertain (lowest confidence), then
|
panel-disagreement first (the panel SPLIT, then ran INCOMPLETE — the
|
||||||
oldest — so the chair sees the highest-value decisions first.
|
labels of highest learning value: the chair's call resolves a genuine
|
||||||
|
ambiguity and feeds rubric distillation, FU-4), then negatively-treated,
|
||||||
|
then most-uncertain (lowest confidence), then oldest. Uncertainty-sampling
|
||||||
|
on the panel's real disagreement signal, not just extraction confidence.
|
||||||
cluster — annotate each row with ``cluster_id`` + ``cluster_size`` (#84.2):
|
cluster — annotate each row with ``cluster_id`` + ``cluster_size`` (#84.2):
|
||||||
same-precedent halachot within HALACHA_CLUSTER_COSINE form one group so
|
same-precedent halachot within HALACHA_CLUSTER_COSINE form one group so
|
||||||
the UI can collapse near-identical principles into a single review card.
|
the UI can collapse near-identical principles into a single review card.
|
||||||
@@ -4501,9 +4504,15 @@ async def list_halachot(
|
|||||||
# a clean item has an empty/NULL quality_flags array
|
# a clean item has an empty/NULL quality_flags array
|
||||||
conditions.append("COALESCE(array_length(h.quality_flags, 1), 0) = 0")
|
conditions.append("COALESCE(array_length(h.quality_flags, 1), 0) = 0")
|
||||||
where_sql = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
where_sql = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||||
|
# #133/FU-3: rank the panel's latest verdict so splits/incompletes — the
|
||||||
|
# highest-value active-learning labels — float to the top of the queue.
|
||||||
|
# 'split' (genuine 1-1 disagreement) before 'incomplete' (a judge failed,
|
||||||
|
# less informative); unanimous rounds and not-yet-judged items share the
|
||||||
|
# tail and keep the #84.3 ordering among themselves.
|
||||||
order_sql = (
|
order_sql = (
|
||||||
"ORDER BY corroboration_negative DESC, h.confidence ASC NULLS LAST, "
|
"ORDER BY (CASE pr.verdict WHEN 'split' THEN 0 WHEN 'incomplete' THEN 1 "
|
||||||
"h.created_at ASC"
|
"ELSE 2 END) ASC, corroboration_negative DESC, "
|
||||||
|
"h.confidence ASC NULLS LAST, h.created_at ASC"
|
||||||
if order_by_priority
|
if order_by_priority
|
||||||
else "ORDER BY h.case_law_id, h.halacha_index"
|
else "ORDER BY h.case_law_id, h.halacha_index"
|
||||||
)
|
)
|
||||||
@@ -4518,7 +4527,8 @@ async def list_halachot(
|
|||||||
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
|
cl.case_number, cl.case_name, cl.court, cl.date AS decision_date,
|
||||||
cl.precedent_level,
|
cl.precedent_level,
|
||||||
COALESCE(cor.corroboration_count, 0)::int AS corroboration_count,
|
COALESCE(cor.corroboration_count, 0)::int AS corroboration_count,
|
||||||
COALESCE(cor.corroboration_negative, false) AS corroboration_negative
|
COALESCE(cor.corroboration_negative, false) AS corroboration_negative,
|
||||||
|
pr.verdict AS panel_verdict
|
||||||
FROM halachot h
|
FROM halachot h
|
||||||
LEFT JOIN case_law cl ON cl.id = h.case_law_id
|
LEFT JOIN case_law cl ON cl.id = h.case_law_id
|
||||||
LEFT JOIN (
|
LEFT JOIN (
|
||||||
@@ -4533,6 +4543,11 @@ async def list_halachot(
|
|||||||
FROM halacha_citation_corroboration
|
FROM halacha_citation_corroboration
|
||||||
GROUP BY halacha_id
|
GROUP BY halacha_id
|
||||||
) cor ON cor.halacha_id = h.id
|
) cor ON cor.halacha_id = h.id
|
||||||
|
LEFT JOIN (
|
||||||
|
SELECT DISTINCT ON (halacha_id) halacha_id, verdict
|
||||||
|
FROM halacha_panel_rounds
|
||||||
|
ORDER BY halacha_id, round_ts DESC
|
||||||
|
) pr ON pr.halacha_id = h.id
|
||||||
{where_sql}
|
{where_sql}
|
||||||
{order_sql}
|
{order_sql}
|
||||||
LIMIT ${idx} OFFSET ${idx + 1}
|
LIMIT ${idx} OFFSET ${idx + 1}
|
||||||
|
|||||||
85
mcp-server/tests/test_halacha_priority_panel_order.py
Normal file
85
mcp-server/tests/test_halacha_priority_panel_order.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
"""Tests for #133 / FU-3 — active uncertainty-sampling of the chair review queue.
|
||||||
|
|
||||||
|
When the chair queue is requested with order_by_priority, the items the 3-judge
|
||||||
|
panel SPLIT on (and then INCOMPLETE rounds) must float to the top — those are
|
||||||
|
the highest-value active-learning labels (the chair's call resolves a genuine
|
||||||
|
ambiguity and feeds rubric distillation, FU-4). This reuses the existing
|
||||||
|
order_by_priority flag (no parallel path, G2).
|
||||||
|
|
||||||
|
Runs fully OFFLINE: monkeypatches db.get_pool with a fake pool that captures the
|
||||||
|
SQL passed to fetch, and asserts the ORDER BY / JOIN shape — no Postgres.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from legal_mcp.services import db
|
||||||
|
|
||||||
|
|
||||||
|
class _FakePool:
|
||||||
|
"""Captures SQL passed to ``fetch``; returns no rows."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.queries: list[str] = []
|
||||||
|
|
||||||
|
async def fetch(self, sql: str, *args): # noqa: ANN002, ANN201
|
||||||
|
self.queries.append(sql)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def fake_pool(monkeypatch: pytest.MonkeyPatch) -> _FakePool:
|
||||||
|
pool = _FakePool()
|
||||||
|
|
||||||
|
async def _get_pool() -> _FakePool:
|
||||||
|
return pool
|
||||||
|
|
||||||
|
monkeypatch.setattr(db, "get_pool", _get_pool)
|
||||||
|
return pool
|
||||||
|
|
||||||
|
|
||||||
|
def _list_sql(pool: _FakePool) -> str:
|
||||||
|
return next(q for q in pool.queries if "FROM halachot h" in q)
|
||||||
|
|
||||||
|
|
||||||
|
def test_priority_order_ranks_panel_split_first(fake_pool: _FakePool) -> None:
|
||||||
|
asyncio.run(
|
||||||
|
db.list_halachot(review_status="pending_review", order_by_priority=True)
|
||||||
|
)
|
||||||
|
sql = _list_sql(fake_pool)
|
||||||
|
# latest-verdict join is present …
|
||||||
|
assert "FROM halacha_panel_rounds" in sql
|
||||||
|
assert "DISTINCT ON (halacha_id)" in sql
|
||||||
|
# … and the ORDER BY ranks split before incomplete before everything else,
|
||||||
|
# AHEAD of the #84.3 corroboration/confidence/age keys.
|
||||||
|
order = sql[sql.index("ORDER BY"):]
|
||||||
|
assert "WHEN 'split' THEN 0" in order
|
||||||
|
assert "WHEN 'incomplete' THEN 1" in order
|
||||||
|
rank_pos = order.index("CASE pr.verdict")
|
||||||
|
corr_pos = order.index("corroboration_negative")
|
||||||
|
conf_pos = order.index("h.confidence")
|
||||||
|
assert rank_pos < corr_pos < conf_pos, (
|
||||||
|
"panel-disagreement rank must be the PRIMARY sort key, before the "
|
||||||
|
"existing #84.3 corroboration/confidence ordering"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_fifo_order_has_no_panel_rank(fake_pool: _FakePool) -> None:
|
||||||
|
"""Without order_by_priority the queue stays in deterministic FIFO order —
|
||||||
|
the panel-rank CASE must not leak into the default ordering."""
|
||||||
|
asyncio.run(db.list_halachot(review_status="pending_review"))
|
||||||
|
sql = _list_sql(fake_pool)
|
||||||
|
order = sql[sql.index("ORDER BY"):]
|
||||||
|
assert "CASE pr.verdict" not in order
|
||||||
|
assert "h.case_law_id, h.halacha_index" in order
|
||||||
|
|
||||||
|
|
||||||
|
def test_panel_verdict_selected(fake_pool: _FakePool) -> None:
|
||||||
|
"""panel_verdict is surfaced on each row so the UI can badge *why* an item
|
||||||
|
is at the top of the queue (and so the order is auditable)."""
|
||||||
|
asyncio.run(db.list_halachot(order_by_priority=True))
|
||||||
|
sql = _list_sql(fake_pool)
|
||||||
|
assert "pr.verdict AS panel_verdict" in sql
|
||||||
Reference in New Issue
Block a user