From 614c06ab606a70301a9f4bbd420350bd50355fe0 Mon Sep 17 00:00:00 2001 From: Chaim Date: Fri, 12 Jun 2026 04:37:12 +0000 Subject: [PATCH] =?UTF-8?q?feat(learning):=20FU-2=20=E2=80=94=20=D7=9C?= =?UTF-8?q?=D7=9B=D7=99=D7=93=D7=AA=20seed=20=D7=90=D7=A7=D7=98=D7=99?= =?UTF-8?q?=D7=91-=D7=9C=D7=A8=D7=A0=D7=99=D7=A0=D7=92=20=D7=91=D7=A9?= =?UTF-8?q?=D7=A2=D7=A8-=D7=94=D7=99=D7=95"=D7=A8=20=D7=94=D7=A7=D7=99?= =?UTF-8?q?=D7=99=D7=9D=20(#133)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit כל הכרעת keep/drop חדה של היו"ר על הלכה שהפאנל כבר שפט (יש לה שורה ב-halacha_panel_rounds) פולטת seed gold-set מתויג-יו"ר — הסיגנל היחיד שמותר ללולאת הלמידה ללמוד ממנו. לימוד מהצבעות-הפאנל-עצמן = echo-chamber ואסור; לכן הזרע נטבע אך-ורק מהכרעה אנושית. - db.seed_goldset_from_chair(): capture-only, idempotent (UPSERT על batch='chair-live', tagged_by='chair'), לעולם לא נוגע ב-halachot ולא זורק שגיאה לתוך השער (INV-G10). ממפה approved/published→keep, rejected→drop; deferred/pending_review = נודניק, בלי seed. - db._chair_seed_label(): שער טהור (בלי DB) → guard echo-chamber unit-testable; מסנן reviewer מכונה (panel:* / corroborated*). - מחובר ב-db layer (update_halacha + update_halachot_batch) כך שכל מסלולי-השער מתכנסים (G1 נרמול-במקור, G2 בלי מסלול מקביל). הפאנל משתמש ב-SQL גולמי ולא ב-update_halacha → אין echo-chamber מבני. - מצריך שורת-פאנל קודמת: ערך-הזרע הוא זוג (הצבעות-פאנל ⋈ הכרעת-יו"ר) שמזין זיקוק-rubric (FU-4) ומדידה (FU-5). - test_chair_seed_gate.py: 10 בדיקות offline על מדיניות-השער + guard. Invariants: INV-G10 (שער-אישור יחיד, capture-only) · INV-LRN1 (propose-only — אין auto-commit) · G1/G2 · anti-echo-chamber (#133). אין UI/שער חדש (INV-IA). תצוגת-הצבעות-הפאנל ב-HalachaReviewPanel (אופציונלי) נדחית — מצריכה שער-עיצוב Claude Design. Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp-server/src/legal_mcp/services/db.py | 87 ++++++++++++++++++++++++ mcp-server/tests/test_chair_seed_gate.py | 66 ++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 mcp-server/tests/test_chair_seed_gate.py diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 1f3bdb2..b26800a 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -4635,6 +4635,10 @@ async def update_halacha( reviewed_at, created_at, updated_at """ row = await pool.fetchrow(sql, *params) + # FU-2 (#133): a firm chair decision on a panel-adjudicated halacha mints an + # active-learning seed. Capture-only — never blocks the gate (errors logged). + if row and review_status is not None: + await seed_goldset_from_chair(halacha_id, review_status, reviewer) return dict(row) if row else None @@ -4669,6 +4673,12 @@ async def update_halachot_batch( WHERE id = ANY($1::uuid[])""", ids, review_status, *( [reviewer] if stamp else [] ), ) + # FU-2 (#133): mint an active-learning seed for each panel-adjudicated + # halacha in the group. seed_goldset_from_chair is idempotent, self-guards + # on a prior panel round, and never raises. + if stamp: + for hid in ids: + await seed_goldset_from_chair(hid, review_status, reviewer) try: return int(result.split()[-1]) except (ValueError, IndexError): @@ -5089,6 +5099,83 @@ async def insert_panel_round( ) +# The machine reviewers that DON'T represent a human ground-truth decision. +# A seed must never be minted from these (echo-chamber guard, #133). They use +# raw SQL today and never reach update_halacha, so this is defense-in-depth. +_MACHINE_REVIEWER_PREFIXES = ("panel:", "corroborated") + +# Chair decisions that are a firm keep/drop judgment → map to the coarse +# is_holding gold-set axis ("is this a real, keepable rule?", the axis +# halacha_panel_calibrate.py measures against). 'deferred'/'pending_review' +# are a snooze, not a judgment → no seed. +_CHAIR_SEED_LABEL = {"approved": True, "published": True, "rejected": False} + + +def _chair_seed_label(review_status: str, reviewer: str = "") -> bool | None: + """Pure gate for the FU-2 seed: the is_holding label a chair decision should + mint, or None when NO seed is allowed — either a non-firm status + (deferred/pending_review) or a machine reviewer (echo-chamber guard). Kept + pure (no DB) so the guard is unit-testable offline.""" + is_holding = _CHAIR_SEED_LABEL.get(review_status) + if is_holding is None: + return None + rev = (reviewer or "").strip().lower() + if any(rev.startswith(p) for p in _MACHINE_REVIEWER_PREFIXES): + return None + return is_holding + + +async def seed_goldset_from_chair( + halacha_id: UUID, review_status: str, reviewer: str = "", +) -> bool: + """Active-learning seed at the existing chair gate (#133 / FU-2). + + When the chair makes a firm keep/drop decision on a halacha the panel + ALREADY adjudicated, capture it as a chair-tagged gold-set label. This is + the ONLY signal the active-learning loop is allowed to learn from — human + ground-truth, never the panel's own votes (learning from the votes is an + echo-chamber: agreement rises, accuracy doesn't, the panel drifts from the + chair). The (panel votes ⋈ chair decision) pair feeds rubric distillation + (FU-4) and measurement (FU-5). + + Capture-only and idempotent (UPSERT on the chair-live batch); it never + touches `halachot` (the chair gate on /precedents stays the single source + of truth, INV-G10) and never raises into the chair gate. + + Fires only when ALL hold: + - review_status is a firm decision (approved/published → keep=True, + rejected → drop=False); 'deferred'/'pending_review' are skipped. + - the reviewer is human (machine reviewers excluded — defense-in-depth). + - a prior halacha_panel_rounds row exists: the seed's value is the + panel-vs-chair pair, so a halacha the panel never judged is skipped. + + Returns True iff a seed row was written/updated. + """ + is_holding = _chair_seed_label(review_status, reviewer) + if is_holding is None: + return False + try: + pool = await get_pool() + # Only seed halachot the panel previously adjudicated (FU-1 rounds). + had_round = await pool.fetchval( + "SELECT EXISTS(SELECT 1 FROM halacha_panel_rounds WHERE halacha_id = $1)", + halacha_id, + ) + if not had_round: + return False + await pool.execute( + "INSERT INTO halacha_goldset (halacha_id, batch, is_holding, tagged_by, tagged_at) " + "VALUES ($1, 'chair-live', $2, 'chair', now()) " + "ON CONFLICT (halacha_id, batch) DO UPDATE " + "SET is_holding = EXCLUDED.is_holding, tagged_by = 'chair', tagged_at = now()", + halacha_id, is_holding, + ) + return True + except Exception as e: # never let a learning seed break the chair gate + logger.warning("FU-2 gold-set seed failed for halacha %s: %s", halacha_id, e) + return False + + async def goldset_tag( goldset_id: UUID, *, is_holding: bool | None = None, correct_type: str | None = None, quote_complete: bool | None = None, diff --git a/mcp-server/tests/test_chair_seed_gate.py b/mcp-server/tests/test_chair_seed_gate.py new file mode 100644 index 0000000..f15db75 --- /dev/null +++ b/mcp-server/tests/test_chair_seed_gate.py @@ -0,0 +1,66 @@ +"""Tests for #133 / FU-2 — the chair-decision active-learning seed gate. + +Covers the PURE gate function db._chair_seed_label, which decides whether (and +with what is_holding label) a chair decision on a halacha should mint a gold-set +seed. The DB write (seed_goldset_from_chair) and the prior-panel-round filter +need a live Postgres and are exercised via the integration smoke test in the +task's testStrategy; here we lock down the policy offline. + +The critical invariant under test: a seed is NEVER minted from a machine +reviewer (echo-chamber guard, #133) — only firm human keep/drop decisions. +""" + +from __future__ import annotations + +from legal_mcp.services import db + + +# ── firm decisions map to the coarse is_holding axis ────────────────────────── + +def test_approved_is_keep(): + assert db._chair_seed_label("approved", "דפנה") is True + + +def test_published_is_keep(): + assert db._chair_seed_label("published", "דפנה") is True + + +def test_rejected_is_drop(): + assert db._chair_seed_label("rejected", "דפנה") is False + + +# ── non-firm statuses mint no seed (a snooze is not a judgment) ──────────────── + +def test_deferred_no_seed(): + assert db._chair_seed_label("deferred", "דפנה") is None + + +def test_pending_review_no_seed(): + assert db._chair_seed_label("pending_review", "דפנה") is None + + +def test_unknown_status_no_seed(): + assert db._chair_seed_label("", "דפנה") is None + + +# ── echo-chamber guard: machine reviewers never seed ────────────────────────── + +def test_panel_reviewer_blocked(): + """The 3-judge panel must never label its own ground-truth (echo-chamber).""" + assert db._chair_seed_label("approved", "panel:opus+deepseek+gemini 2/3-keep") is None + + +def test_corroboration_reviewer_blocked(): + assert db._chair_seed_label("approved", "corroborated (4 judicial citations ≥ 2)") is None + + +def test_panel_reviewer_blocked_case_insensitive(): + assert db._chair_seed_label("rejected", "PANEL:opus") is None + + +# ── empty reviewer is still a human gate (UI sends no reviewer string) ───────── + +def test_empty_reviewer_is_human_gate(): + """The /precedents UI patches review_status with no reviewer string; that is + still the chair gate (the panel/corroboration use raw SQL, not update_halacha).""" + assert db._chair_seed_label("approved", "") is True -- 2.49.1