Merge pull request 'feat(learning): FU-2 — לכידת seed אקטיב-לרנינג בשער-היו"ר הקיים (#133)' (#216) from worktree-halacha-active-learning-fu2 into main
This commit was merged in pull request #216.
This commit is contained in:
@@ -4635,6 +4635,10 @@ async def update_halacha(
|
|||||||
reviewed_at, created_at, updated_at
|
reviewed_at, created_at, updated_at
|
||||||
"""
|
"""
|
||||||
row = await pool.fetchrow(sql, *params)
|
row = await pool.fetchrow(sql, *params)
|
||||||
|
# FU-2 (#133): a firm chair decision on a panel-adjudicated halacha mints an
|
||||||
|
# active-learning seed. Capture-only — never blocks the gate (errors logged).
|
||||||
|
if row and review_status is not None:
|
||||||
|
await seed_goldset_from_chair(halacha_id, review_status, reviewer)
|
||||||
return dict(row) if row else None
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
@@ -4669,6 +4673,12 @@ async def update_halachot_batch(
|
|||||||
WHERE id = ANY($1::uuid[])""",
|
WHERE id = ANY($1::uuid[])""",
|
||||||
ids, review_status, *( [reviewer] if stamp else [] ),
|
ids, review_status, *( [reviewer] if stamp else [] ),
|
||||||
)
|
)
|
||||||
|
# FU-2 (#133): mint an active-learning seed for each panel-adjudicated
|
||||||
|
# halacha in the group. seed_goldset_from_chair is idempotent, self-guards
|
||||||
|
# on a prior panel round, and never raises.
|
||||||
|
if stamp:
|
||||||
|
for hid in ids:
|
||||||
|
await seed_goldset_from_chair(hid, review_status, reviewer)
|
||||||
try:
|
try:
|
||||||
return int(result.split()[-1])
|
return int(result.split()[-1])
|
||||||
except (ValueError, IndexError):
|
except (ValueError, IndexError):
|
||||||
@@ -5089,6 +5099,83 @@ async def insert_panel_round(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# The machine reviewers that DON'T represent a human ground-truth decision.
|
||||||
|
# A seed must never be minted from these (echo-chamber guard, #133). They use
|
||||||
|
# raw SQL today and never reach update_halacha, so this is defense-in-depth.
|
||||||
|
_MACHINE_REVIEWER_PREFIXES = ("panel:", "corroborated")
|
||||||
|
|
||||||
|
# Chair decisions that are a firm keep/drop judgment → map to the coarse
|
||||||
|
# is_holding gold-set axis ("is this a real, keepable rule?", the axis
|
||||||
|
# halacha_panel_calibrate.py measures against). 'deferred'/'pending_review'
|
||||||
|
# are a snooze, not a judgment → no seed.
|
||||||
|
_CHAIR_SEED_LABEL = {"approved": True, "published": True, "rejected": False}
|
||||||
|
|
||||||
|
|
||||||
|
def _chair_seed_label(review_status: str, reviewer: str = "") -> bool | None:
|
||||||
|
"""Pure gate for the FU-2 seed: the is_holding label a chair decision should
|
||||||
|
mint, or None when NO seed is allowed — either a non-firm status
|
||||||
|
(deferred/pending_review) or a machine reviewer (echo-chamber guard). Kept
|
||||||
|
pure (no DB) so the guard is unit-testable offline."""
|
||||||
|
is_holding = _CHAIR_SEED_LABEL.get(review_status)
|
||||||
|
if is_holding is None:
|
||||||
|
return None
|
||||||
|
rev = (reviewer or "").strip().lower()
|
||||||
|
if any(rev.startswith(p) for p in _MACHINE_REVIEWER_PREFIXES):
|
||||||
|
return None
|
||||||
|
return is_holding
|
||||||
|
|
||||||
|
|
||||||
|
async def seed_goldset_from_chair(
|
||||||
|
halacha_id: UUID, review_status: str, reviewer: str = "",
|
||||||
|
) -> bool:
|
||||||
|
"""Active-learning seed at the existing chair gate (#133 / FU-2).
|
||||||
|
|
||||||
|
When the chair makes a firm keep/drop decision on a halacha the panel
|
||||||
|
ALREADY adjudicated, capture it as a chair-tagged gold-set label. This is
|
||||||
|
the ONLY signal the active-learning loop is allowed to learn from — human
|
||||||
|
ground-truth, never the panel's own votes (learning from the votes is an
|
||||||
|
echo-chamber: agreement rises, accuracy doesn't, the panel drifts from the
|
||||||
|
chair). The (panel votes ⋈ chair decision) pair feeds rubric distillation
|
||||||
|
(FU-4) and measurement (FU-5).
|
||||||
|
|
||||||
|
Capture-only and idempotent (UPSERT on the chair-live batch); it never
|
||||||
|
touches `halachot` (the chair gate on /precedents stays the single source
|
||||||
|
of truth, INV-G10) and never raises into the chair gate.
|
||||||
|
|
||||||
|
Fires only when ALL hold:
|
||||||
|
- review_status is a firm decision (approved/published → keep=True,
|
||||||
|
rejected → drop=False); 'deferred'/'pending_review' are skipped.
|
||||||
|
- the reviewer is human (machine reviewers excluded — defense-in-depth).
|
||||||
|
- a prior halacha_panel_rounds row exists: the seed's value is the
|
||||||
|
panel-vs-chair pair, so a halacha the panel never judged is skipped.
|
||||||
|
|
||||||
|
Returns True iff a seed row was written/updated.
|
||||||
|
"""
|
||||||
|
is_holding = _chair_seed_label(review_status, reviewer)
|
||||||
|
if is_holding is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
pool = await get_pool()
|
||||||
|
# Only seed halachot the panel previously adjudicated (FU-1 rounds).
|
||||||
|
had_round = await pool.fetchval(
|
||||||
|
"SELECT EXISTS(SELECT 1 FROM halacha_panel_rounds WHERE halacha_id = $1)",
|
||||||
|
halacha_id,
|
||||||
|
)
|
||||||
|
if not had_round:
|
||||||
|
return False
|
||||||
|
await pool.execute(
|
||||||
|
"INSERT INTO halacha_goldset (halacha_id, batch, is_holding, tagged_by, tagged_at) "
|
||||||
|
"VALUES ($1, 'chair-live', $2, 'chair', now()) "
|
||||||
|
"ON CONFLICT (halacha_id, batch) DO UPDATE "
|
||||||
|
"SET is_holding = EXCLUDED.is_holding, tagged_by = 'chair', tagged_at = now()",
|
||||||
|
halacha_id, is_holding,
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e: # never let a learning seed break the chair gate
|
||||||
|
logger.warning("FU-2 gold-set seed failed for halacha %s: %s", halacha_id, e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def goldset_tag(
|
async def goldset_tag(
|
||||||
goldset_id: UUID, *, is_holding: bool | None = None,
|
goldset_id: UUID, *, is_holding: bool | None = None,
|
||||||
correct_type: str | None = None, quote_complete: bool | None = None,
|
correct_type: str | None = None, quote_complete: bool | None = None,
|
||||||
|
|||||||
66
mcp-server/tests/test_chair_seed_gate.py
Normal file
66
mcp-server/tests/test_chair_seed_gate.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
"""Tests for #133 / FU-2 — the chair-decision active-learning seed gate.
|
||||||
|
|
||||||
|
Covers the PURE gate function db._chair_seed_label, which decides whether (and
|
||||||
|
with what is_holding label) a chair decision on a halacha should mint a gold-set
|
||||||
|
seed. The DB write (seed_goldset_from_chair) and the prior-panel-round filter
|
||||||
|
need a live Postgres and are exercised via the integration smoke test in the
|
||||||
|
task's testStrategy; here we lock down the policy offline.
|
||||||
|
|
||||||
|
The critical invariant under test: a seed is NEVER minted from a machine
|
||||||
|
reviewer (echo-chamber guard, #133) — only firm human keep/drop decisions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from legal_mcp.services import db
|
||||||
|
|
||||||
|
|
||||||
|
# ── firm decisions map to the coarse is_holding axis ──────────────────────────
|
||||||
|
|
||||||
|
def test_approved_is_keep():
|
||||||
|
assert db._chair_seed_label("approved", "דפנה") is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_published_is_keep():
|
||||||
|
assert db._chair_seed_label("published", "דפנה") is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_rejected_is_drop():
|
||||||
|
assert db._chair_seed_label("rejected", "דפנה") is False
|
||||||
|
|
||||||
|
|
||||||
|
# ── non-firm statuses mint no seed (a snooze is not a judgment) ────────────────
|
||||||
|
|
||||||
|
def test_deferred_no_seed():
|
||||||
|
assert db._chair_seed_label("deferred", "דפנה") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_pending_review_no_seed():
|
||||||
|
assert db._chair_seed_label("pending_review", "דפנה") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_unknown_status_no_seed():
|
||||||
|
assert db._chair_seed_label("", "דפנה") is None
|
||||||
|
|
||||||
|
|
||||||
|
# ── echo-chamber guard: machine reviewers never seed ──────────────────────────
|
||||||
|
|
||||||
|
def test_panel_reviewer_blocked():
|
||||||
|
"""The 3-judge panel must never label its own ground-truth (echo-chamber)."""
|
||||||
|
assert db._chair_seed_label("approved", "panel:opus+deepseek+gemini 2/3-keep") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_corroboration_reviewer_blocked():
|
||||||
|
assert db._chair_seed_label("approved", "corroborated (4 judicial citations ≥ 2)") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_panel_reviewer_blocked_case_insensitive():
|
||||||
|
assert db._chair_seed_label("rejected", "PANEL:opus") is None
|
||||||
|
|
||||||
|
|
||||||
|
# ── empty reviewer is still a human gate (UI sends no reviewer string) ─────────
|
||||||
|
|
||||||
|
def test_empty_reviewer_is_human_gate():
|
||||||
|
"""The /precedents UI patches review_status with no reviewer string; that is
|
||||||
|
still the chair gate (the panel/corroboration use raw SQL, not update_halacha)."""
|
||||||
|
assert db._chair_seed_label("approved", "") is True
|
||||||
Reference in New Issue
Block a user