feat(learning): FU-1 — לכידת סבבי-פאנל להלכות (#133)
All checks were successful
G12 Leak-Guard / leak-guard (pull_request) Successful in 7s

לולאת ה-active-learning זקוקה לסיגנל ללמוד ממנו, אבל הפאנל
(halacha_panel_approve.py) זרק עד כה את הצבעות-3-השופטים ואת
ההנמקות — שרד רק review_status הסופי על halachot. בלי
ההצבעות+הנימוקים אין דרך לזקק rubric משופר.

FU-1:
- טבלה חדשה halacha_panel_rounds (SCHEMA_V35) — שורה לכל
  (הלכה, סבב): הצבעה+נימוק לכל לינאז' (claude/deepseek/gemini),
  ה-verdict, ומה הריצה עשתה (applied_action), apply_mode.
  במתכונת עמודות-הפאנל של halacha_goldset.
- db.insert_panel_round() — helper כתיבה (capture-only).
- halacha_panel_approve.py: שומר את התשובות הגולמיות (במקום
  לזרוק את הנימוק), מוסיף reason ל-NLI_SYSTEM, וכותב סבב לכל
  פריט בשני המצבים (dry-run ו---apply). --no-capture לדילוג.

capture-only: לעולם לא נוגע ב-halachot — שער-היו"ר ב-/precedents
נשאר מקור-האמת היחיד (INV-G10). ה-seed ללמידה נוצר בהצלבה מול
הכרעת-היו"ר המאוחרת על אותה הלכה (FU-2).

Invariants: מקיים INV-G10 (capture-only, שער-יו"ר יחיד),
INV-LRN1/3 (לכידה-מבנית; propose-only — אין auto-commit),
G1 (לכידה-במקור), G2 (יכולת חדשה, לא מסלול-מקביל),
G12 (לא נוגע ב-Paperclip port). חלק מ-#133.

smoke (dry-run --limit 8): 6 nli captured, errors=0, נימוקים
מלאים מ-3 השופטים.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-12 04:22:48 +00:00
parent b4e79aa8fa
commit 0a7869175e
3 changed files with 168 additions and 61 deletions

View File

@@ -1455,6 +1455,39 @@ CREATE INDEX IF NOT EXISTS idx_decision_lessons_review
ON decision_lessons(review_status);
"""
SCHEMA_V35_SQL = """
-- halacha_panel_rounds (#133 / FU-1): captures EVERY 3-judge panel adjudication
-- so the active-learning loop has something to learn from. Until now the panel
-- (halacha_panel_approve.py) threw the per-judge votes and rationales away — only
-- the final review_status survived on `halachot`. Without the votes+reasons there
-- is no signal to mine ("panel said X, chair said Y") and no way to distil a better
-- decision rubric. One row per (halacha, round): the three lineages' vote+reason,
-- the derived verdict, and what the run did about it. This is a CAPTURE/audit table,
-- NOT a decision — it never changes a halacha's review_status (the chair gate on
-- /precedents stays the single source of truth, INV-G10). The learning seed is
-- formed later by joining this against the chair's decision on `halachot`
-- (reviewed_at > round_ts, reviewer='דפנה'). Modeled on halacha_goldset's panel
-- columns. question = which axis was judged ('keep' for clean bucket, 'entailed'
-- for nli). apply_mode=false means a dry-run produced the row (still kept — every
-- analysis is a learning datapoint); true means --apply acted on it.
CREATE TABLE IF NOT EXISTS halacha_panel_rounds (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
halacha_id UUID NOT NULL REFERENCES halachot(id) ON DELETE CASCADE,
round_ts TIMESTAMPTZ NOT NULL, -- one stamp shared by a whole run
question TEXT NOT NULL, -- 'keep' | 'entailed'
bucket TEXT NOT NULL DEFAULT '', -- clean | nli | defect | other
claude_vote BOOLEAN, claude_reason TEXT NOT NULL DEFAULT '',
deepseek_vote BOOLEAN, deepseek_reason TEXT NOT NULL DEFAULT '',
gemini_vote BOOLEAN, gemini_reason TEXT NOT NULL DEFAULT '',
verdict TEXT NOT NULL DEFAULT '', -- unanimous_yes|unanimous_no|split|incomplete
applied_action TEXT NOT NULL DEFAULT '', -- approved|rejected|nli_cleared|chair|'' (dry-run)
apply_mode BOOLEAN NOT NULL DEFAULT false,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_panel_rounds_halacha ON halacha_panel_rounds(halacha_id);
CREATE INDEX IF NOT EXISTS idx_panel_rounds_ts ON halacha_panel_rounds(round_ts);
"""
async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
async with pool.acquire() as conn:
@@ -1493,7 +1526,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
await conn.execute(SCHEMA_V32_SQL)
await conn.execute(SCHEMA_V33_SQL)
await conn.execute(SCHEMA_V34_SQL)
logger.info("Database schema initialized (v1-v33)")
await conn.execute(SCHEMA_V35_SQL)
logger.info("Database schema initialized (v1-v35)")
async def init_schema() -> None:
@@ -5015,6 +5049,42 @@ async def goldset_set_panel_label(
)
async def insert_panel_round(
halacha_id: UUID, *, round_ts: datetime, question: str, bucket: str,
claude: dict | None, deepseek: dict | None, gemini: dict | None,
vote_key: str, verdict: str, applied_action: str = "", apply_mode: bool = False,
) -> None:
"""Persist ONE 3-judge panel adjudication of one halacha (#133 / FU-1).
Capture-only: writes to halacha_panel_rounds and never touches `halachot`
(the chair gate stays the single source of truth, INV-G10). Each per-model
dict is the judge's raw JSON reply ({"<vote_key>": bool, "reason": str}) or
None when that judge failed. vote_key is 'keep' (clean bucket) or 'entailed'
(nli). round_ts is shared across a whole run so a round can be reconstructed.
The learning seed is formed later by joining this against the chair's later
decision on the same halacha.
"""
def _v(d):
if not isinstance(d, dict) or vote_key not in d:
return None
x = d[vote_key]
return x if isinstance(x, bool) else str(x).strip().lower() in ("true", "1", "yes", "כן")
def _r(d):
return str(d.get("reason") or "")[:500] if isinstance(d, dict) else ""
pool = await get_pool()
await pool.execute(
"INSERT INTO halacha_panel_rounds (halacha_id, round_ts, question, bucket, "
"claude_vote, claude_reason, deepseek_vote, deepseek_reason, "
"gemini_vote, gemini_reason, verdict, applied_action, apply_mode) "
"VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13)",
halacha_id, round_ts, question, bucket,
_v(claude), _r(claude), _v(deepseek), _r(deepseek),
_v(gemini), _r(gemini), verdict, applied_action, apply_mode,
)
async def goldset_tag(
goldset_id: UUID, *, is_holding: bool | None = None,
correct_type: str | None = None, quote_complete: bool | None = None,