From df007784c93bf4fb21fdb8efd29bf8150d89df73 Mon Sep 17 00:00:00 2001 From: Chaim Date: Mon, 1 Jun 2026 04:34:23 +0000 Subject: [PATCH 1/3] feat(corroboration): approval_action decision fn + kill-switch (INV-COR2/COR4, X11 Phase 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - HALACHA_CORROBORATION_AUTO_APPROVE config (default ON, Dafna validated 2026-06-01) - approval_action(agg, has_overruled): overruled→demote, corroborated→approve, else None - 4 offline unit tests; Phase 2 plan + TaskMaster #75 Co-Authored-By: Claude Opus 4.8 (1M context) --- .taskmaster/tasks/tasks.json | 55 +++- ...06-01-x11-citation-corroboration-phase2.md | 290 ++++++++++++++++++ mcp-server/src/legal_mcp/config.py | 6 + .../src/legal_mcp/services/corroboration.py | 16 + mcp-server/tests/test_corroboration.py | 21 ++ 5 files changed, 385 insertions(+), 3 deletions(-) create mode 100644 docs/superpowers/plans/2026-06-01-x11-citation-corroboration-phase2.md diff --git a/.taskmaster/tasks/tasks.json b/.taskmaster/tasks/tasks.json index 1ca4ee3..855fb3c 100644 --- a/.taskmaster/tasks/tasks.json +++ b/.taskmaster/tasks/tasks.json @@ -2432,13 +2432,62 @@ "priority": "low", "subtasks": [], "updatedAt": "2026-05-31T16:05:35.071261+00:00" + }, + { + "id": "72", + "title": "[ops] MCP 'No such tool' תחת עומס חילוץ opus-4-8@xhigh — timeout ב-handshake", + "description": "בריצת CMPA-71 (חילוץ הלכות 9002-24, סוכן עוזר משפטי) שרת ה-legal-ai MCP לא נטען — כל קריאות mcp__legal-ai__* החזירו 'No such tool available' אחרי 3 ניסיונות+המתנות; הסוכן עשה fallback ל-.venv ישיר (לפי legal-ceo.md) והחילוץ הצליח על claude-opus-4-8@xhigh.", + "status": "done", + "priority": "medium", + "dependencies": [], + "details": "שורש (אובחן 2026-05-31 ~20:02): ה-.mcp.json של ה-workspace תקין (command/cwd/env נכונים), ו-import של legal_mcp.server מהיר (~2s, 110MB) — לא config שבור ולא רגרסיית Paperclip 529. הגורם: עומס-מכונה קיצוני (load avg 30.0, 10 תהליכי 'claude -p --effort xhigh' במקביל) → ה-MCP handshake לא ייצב בתוך ה-timeout של claude → 'No such tool'. תופעת-לוואי של מעבר חילוץ-הלכות ל-opus-4-8@xhigh (PR #26) שהוא CPU/token-כבד; ראינו 10 תהליכים למרות CHUNK_CONCURRENCY=3 (לבדוק אם רצו כמה חילוצים/heartbeats במקביל). מקלות: (1) להקטין CHUNK_CONCURRENCY ל-xhigh, או effort נמוך-יותר לחילוץ-bulk; (2) להאריך MCP startup timeout לסוכנים; (3) להגביל חילוצים מקבילים. ה-fallback ב-legal-ceo.md עבד — עמידוּת טובה. ראה run-log: instances/default/data/run-logs/8639e837.../cdbfa8bc-.../ [נסגר 2026-05-31] שלוש הקשחות מוזגו: נעילה גלובלית (PR #30, חילוץ אחד בכל רגע), חילוץ מצטבר crash-safe+resume (PR #31), ו-effort קל-יותר ל-bulk (PR #32, config.HALACHA_BULK_EXTRACT_EFFORT=high). שורש ה-freeze מטופל בכל הצירים.", + "testStrategy": "לשחזר: להריץ חילוץ xhigh כבד ובמקביל להעיר סוכן — לוודא שה-MCP נטען (אין 'No such tool'). אחרי מקלה (concurrency/timeout): load < ~עומס-ליבות, ו-handshake מצליח.", + "subtasks": [] + }, + { + "id": "73", + "title": "החלטת ועדת ערר: ברירת מחדל is_binding=false (יישור דוקטרינרי)", + "description": "כשמעלים החלטת ועדת ערר דרך מסך העלאת הפסיקה (precedent-upload-sheet, isCommittee=true), הצ'קבוקס 'הלכה מחייבת' (is_binding) כברירת מחדל הוא true — כך שההלכות שמחולצות מהחלטה לא-מחייבת מתויגות rule_type='binding'. זה סותר את ההגדרה הדוקטרינרית שלנו (ועדת ערר = persuasive בלבד, לא binding כמו עליון/מנהלי). התיקון: כש-isCommittee=true ב-precedent-upload-sheet.tsx, להפוך את is_binding ל-false כברירת מחדל (או לנעול/להסתיר את הצ'קבוקס ולתייג אוטומטית persuasive). הערה חשובה: זהו תיקון יישור-דוקטרינרי בלבד — אין השפעה downstream על ranking/injection (rule_type הוא תווית תצוגה; השער הפונקציונלי האמיתי הוא review_status שדפנה שולטת בו ידנית). קבצים: web-ui/src/components/precedents/precedent-upload-sheet.tsx (useState isBinding שורה 47, isCommittee שורה 53); guard clause קיים ב-mcp-server/src/legal_mcp/services/halacha_extractor.py:229-235 שמוריד binding→persuasive רק כאשר is_binding=false.", + "details": "", + "testStrategy": "", + "status": "done", + "dependencies": [], + "priority": "medium", + "subtasks": [], + "updatedAt": "2026-05-31T20:41:04.160Z" + }, + { + "id": "74", + "title": "ניקוי רטרואקטיבי: rule_type binding→persuasive להלכות ממקור ועדת ערר", + "description": "המשך משימה #73 (PR #29 מנע binding חדש לועדת ערר מכאן והלאה). יש 82 הלכות קיימות ב-DB עם rule_type='binding' שמקורן (case_law) בהחלטת ועדת ערר — בסתירה לדוקטרינה (ועדת ערר = persuasive). פילוח: 75 approved + 7 pending_review. גישה #2 (שמרנית): לתקן רק את ה-binding ל-persuasive, ולהשאיר interpretive/procedural/application/obiter כמות שהם (תקינים גם לועדת ערר). הגדרת 'מקור ועדת ערר': case_law WHERE source_type='appeals_committee' OR precedent_level LIKE 'ועדת%' OR court LIKE '%ועדת%ערר%' OR court LIKE '%ועדות ערר%'. שאילתה: UPDATE halachot SET rule_type='persuasive' WHERE rule_type='binding' AND case_law_id IN (). הערה: rule_type הוא תווית תצוגה בלבד — אין השפעה על ranking/injection (השער הפונקציונלי הוא review_status). DB: legal_ai על Postgres pgvector קונטיינר t84kegpjm5qrttd6nw7bgoxe (פורט 5433). ביצוע דרך docker exec עם trust מקומי. לגבות/לספור לפני ואחרי לאימות (צפוי: 82 שורות מושפעות, 0 binding ממקור ועדת ערר אחרי).", + "details": "", + "testStrategy": "", + "status": "done", + "dependencies": [ + "73" + ], + "priority": "low", + "subtasks": [], + "updatedAt": "2026-05-31T20:49:28.894Z" + }, + { + "id": "75", + "title": "[X11 Phase 2] חיווט אוטו-אישור מבוסס-ציטוט + backfill", + "description": "Phase 2 של citation-corroboration (X11). Phase 1 (האות) מוזג ב-PR #27. דפנה אימתה את האות ואישרה הפעלה (2026-06-01). Phase 2: (1) חיווט אוטו-אישור — הלכה corroborated (≥2 ציטוטים חיוביים בלתי-תלויים, 0 שליליים) עוברת ל-review_status='approved' עם reviewer='corroborated (…judicial citations)' (INV-COR4/G10); (2) הדחת overruled — הלכה approved שקיבלה טיפול overruled בציטוט מאוחר חוזרת לשער-היו\"ר (INV-COR2); (3) backfill על 12 התקדימים (halachot+ציטוטים-נכנסים); (4) כלי-MCP write להרצת rebuild.", + "details": "דגל: HALACHA_CORROBORATION_AUTO_APPROVE (default true, env-tunable). פונקציית-הכרעה טהורה approval_action(agg, has_overruled)→'approve'/'demote'/None (unit-tested, INV-COR2/COR4). DB: approve_halacha_by_corroboration (רק על pending_review), demote_halacha_overruled (רק על approved→pending_review), list_corroboration_grouped, precedents_with_halachot_and_incoming_citations. שירות: reconcile_approvals מופעל בסוף build_for_precedent; build_all driver. backfill target=12 תקדימים (אומת 2026-06-01). נדחה ל-backlog (proposal-only, מסוכן-תוכן): enrichment של rule_statement, treatment-backfill ל-case_law_citations.citation_type. תוכנית: docs/superpowers/plans/2026-06-01-x11-citation-corroboration-phase2.md. spec: docs/spec/X11-citation-corroboration.md §4-6.", + "testStrategy": "unit: approval_action — overruled→demote, corroborated→approve, יחיד/שלילי→None. integration: build_for_precedent על שפר מחזיר approved/flagged; backfill על 12 תקדימים ללא exception. DB: רק pending_review→approved (לא נוגע ב-published/rejected); overruled מדיח רק approved.", + "status": "in-progress", + "dependencies": [], + "priority": "high", + "subtasks": [], + "updatedAt": "2026-06-01T00:00:00.000000+00:00" } ], "metadata": { "version": "1.0.0", - "lastModified": "2026-05-31T16:01:42.033Z", - "taskCount": 70, - "completedCount": 64, + "lastModified": "2026-05-31T20:49:28.894Z", + "taskCount": 74, + "completedCount": 66, "tags": [ "legal-ai" ] diff --git a/docs/superpowers/plans/2026-06-01-x11-citation-corroboration-phase2.md b/docs/superpowers/plans/2026-06-01-x11-citation-corroboration-phase2.md new file mode 100644 index 0000000..bba3fbe --- /dev/null +++ b/docs/superpowers/plans/2026-06-01-x11-citation-corroboration-phase2.md @@ -0,0 +1,290 @@ +# X11 Citation Corroboration — Phase 2 (Wire the approval gate) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Turn the Phase 1 **signal** into an **approval action**. A halacha that is *corroborated* by ≥N distinct positive judicial citations (0 negatives) is auto-approved with citation provenance; a halacha that a later citing court *overruled* is demoted back to the chair gate. Then **backfill** the signal+approval across the whole corpus. + +**Gate cleared:** Phase 1's "Out of scope" deferred auto-approval as *"Sensitive — gated on Dafna validating the signal from Phase 1 first."* Dafna validated the signal and approved enabling it (2026-06-01). This plan builds the **active** wiring (default ON, env-tunable kill-switch). + +**Architecture:** No schema change — Phase 1's `halacha_citation_corroboration` table already holds the provenance. We add: +1. a pure decision function `approval_action(agg, has_overruled)` (unit-tested, INV-COR2/COR4), +2. DB transitions that move *only* the legal states (`pending_review → approved` on corroboration; `approved → pending_review` on overruled) — never touching `published`/`rejected`, +3. `reconcile_approvals(case_law_id)` called at the tail of `build_for_precedent`, +4. a corpus `build_all()` backfill driver + a **write** MCP tool `corroboration_rebuild`. + +**Tech Stack:** Python 3.12, asyncpg, FastMCP, `claude_session` (local Opus 4.8), pytest (offline deterministic). + +**Spec:** [docs/spec/X11-citation-corroboration.md](../../spec/X11-citation-corroboration.md) §4 step 5, §5 (INV-COR2/COR4/COR5/COR6), §6 (INV-G10 amendment). + +--- + +## Invariant mapping (what each rule forces here) + +- **INV-COR4** — auto-approve requires `positive_sources ≥ N` distinct sources ∧ `has_negative == False`. `aggregate()` (Phase 1) already computes this; Phase 2 only *acts* on `corroborated == True`. +- **INV-COR2** — negative treatment never approves; **overruled** demotes. We split "negative" (blocks approval — already handled by `aggregate`) from **overruled** (actively *demotes an already-approved* halacha back to the chair). +- **INV-COR5** — the chair gate is preserved for the uncorroborated tail and all negatives. We only transition the two legal states; uncorroborated halachot are never touched. +- **INV-COR6** — provenance is retained: `reviewer` records the corroboration basis; the `halacha_citation_corroboration` rows remain the auditable evidence. +- **INV-G10 (amended §6)** — the human gate's *authority source* is cumulative judicial treatment for the corroborated subset; chair gate stays mandatory for the tail. Auto-approval is therefore not "AI judgment" but recorded human (citing-court) judgment (INV-COR1). + +**Demotion scope decision (precise reading of §4 step 5):** *any* negative blocks auto-approval (via `aggregate.has_negative`), but only **overruled** actively demotes a halacha that is already approved. `distinguished`/`criticized`/`questioned` block new auto-approval but do not un-approve an existing chair/confidence approval — that stronger action is reserved for `overruled`, and surfaced to the chair via the read tool. + +--- + +## Task 1: Config kill-switch + +**Files:** Modify `mcp-server/src/legal_mcp/config.py` + +- [ ] **Step 1:** After `HALACHA_CORROBORATION_MIN_CITES` (config.py:69) add: +```python +# X11 Phase 2: gate corroboration → approval. Default ON (Dafna validated the +# Phase 1 signal, 2026-06-01). Set to "false" to disable the auto-approve/demote +# wiring while keeping the signal (Phase 1) intact. +HALACHA_CORROBORATION_AUTO_APPROVE = os.environ.get( + "HALACHA_CORROBORATION_AUTO_APPROVE", "true" +).strip().lower() in ("1", "true", "yes", "on") +``` + +- [ ] **Step 2: Commit** `feat(config): HALACHA_CORROBORATION_AUTO_APPROVE kill-switch (X11 Phase 2)` + +--- + +## Task 2: Pure decision function `approval_action` (TDD) + +The whole approval policy distilled to one deterministic, offline-testable function. + +**Files:** Modify `corroboration.py`; Test `tests/test_corroboration.py` + +- [ ] **Step 1: Failing test** — append to `tests/test_corroboration.py`: +```python +def test_approval_action_corroborated_approves(): + agg = {"positive_sources": 2, "has_negative": False, "corroborated": True} + assert cor.approval_action(agg, has_overruled=False) == "approve" + +def test_approval_action_overruled_demotes_even_if_corroborated(): + # overruled wins over a positive count (INV-COR2 strong form) + agg = {"positive_sources": 3, "has_negative": True, "corroborated": False} + assert cor.approval_action(agg, has_overruled=True) == "demote" + +def test_approval_action_single_source_noop(): + agg = {"positive_sources": 1, "has_negative": False, "corroborated": False} + assert cor.approval_action(agg, has_overruled=False) is None + +def test_approval_action_negative_nonoverruled_noop(): + # distinguished blocks approval but does not demote (no overruled) + agg = {"positive_sources": 2, "has_negative": True, "corroborated": False} + assert cor.approval_action(agg, has_overruled=False) is None +``` + +- [ ] **Step 2:** Run to verify FAIL (`approval_action` undefined). + +- [ ] **Step 3:** Implement in `corroboration.py`: +```python +def approval_action(agg: dict, has_overruled: bool) -> str | None: + """Decide the corroboration→approval action for ONE halacha (INV-COR2/COR4). + + - 'demote' : a later court overruled it → back to the chair gate (overruled + outranks any positive count). + - 'approve' : corroborated (≥N distinct positives, 0 negatives). + - None : leave as-is (single source, non-overruled negative, or tail). + """ + if has_overruled: + return "demote" + if agg.get("corroborated"): + return "approve" + return None +``` + +- [ ] **Step 4:** Run to verify PASS (all). **Commit** `feat(corroboration): approval_action decision fn (INV-COR2/COR4, X11 Phase 2)` + +--- + +## Task 3: DB transitions (legal states only) + +**Files:** Modify `mcp-server/src/legal_mcp/services/db.py` + +- [ ] **Step 1:** Add near `update_halacha` (db.py:3480-ish): +```python +async def approve_halacha_by_corroboration( + halacha_id: UUID, n_sources: int, min_cites: int, +) -> bool: + """Approve a halacha on citation corroboration — ONLY if it is currently + awaiting the chair (pending_review). Never touches 'published'/'rejected'/ + already-'approved' (INV-COR5: chair gate preserved for everything else). + Returns True iff a row transitioned.""" + pool = await get_pool() + reviewer = f"corroborated ({n_sources} judicial citations ≥ {min_cites})" + row = await pool.fetchrow( + "UPDATE halachot SET review_status='approved', reviewer=$2, " + "reviewed_at=now(), updated_at=now() " + "WHERE id=$1 AND review_status='pending_review' RETURNING id", + halacha_id, reviewer, + ) + return row is not None + + +async def demote_halacha_overruled(halacha_id: UUID) -> bool: + """Demote an APPROVED halacha back to the chair gate because a later citing + court overruled it (INV-COR2). Only acts on 'approved' → 'pending_review'; + leaves 'published'/'rejected'/'pending_review' untouched. The reviewer note + records why it is back in the queue. Returns True iff a row transitioned.""" + pool = await get_pool() + row = await pool.fetchrow( + "UPDATE halachot SET review_status='pending_review', " + "reviewer='flagged: overruled by later citation (X11)', " + "reviewed_at=NULL, updated_at=now() " + "WHERE id=$1 AND review_status='approved' RETURNING id", + halacha_id, + ) + return row is not None + + +async def list_corroboration_grouped(case_law_id: UUID) -> dict[str, list[dict]]: + """Per-halacha corroboration links for a cited precedent, in the + {source_id, treatment} shape `aggregate()` consumes. Distinct citing source + keyed by case_law/decision id (falls back to the citation row id).""" + pool = await get_pool() + rows = await pool.fetch( + "SELECT hcc.halacha_id::text AS halacha_id, " + " COALESCE(hcc.citing_case_law_id::text, hcc.citing_decision_id::text, " + " hcc.source_citation_id::text) AS source_id, " + " hcc.treatment " + "FROM halacha_citation_corroboration hcc " + "JOIN halachot h ON h.id = hcc.halacha_id " + "WHERE h.case_law_id = $1", + case_law_id, + ) + out: dict[str, list[dict]] = {} + for r in rows: + out.setdefault(r["halacha_id"], []).append( + {"source_id": r["source_id"], "treatment": r["treatment"]} + ) + return out + + +async def precedents_with_halachot_and_incoming_citations() -> list[str]: + """case_law ids that have at least one halacha AND at least one incoming + citation (either graph) — the backfill target set.""" + pool = await get_pool() + rows = await pool.fetch( + "SELECT c.id::text FROM case_law c " + "WHERE EXISTS (SELECT 1 FROM halachot h WHERE h.case_law_id=c.id) " + " AND (EXISTS (SELECT 1 FROM precedent_internal_citations p " + " WHERE p.cited_case_law_id=c.id) " + " OR EXISTS (SELECT 1 FROM case_law_citations cc " + " WHERE cc.case_law_id=c.id))", + ) + return [r["id"] for r in rows] +``` + +- [ ] **Step 2: Commit** `feat(db): corroboration approve/demote transitions + backfill query (X11 Phase 2)` + +--- + +## Task 4: `reconcile_approvals` + wire into `build_for_precedent` + `build_all` + +**Files:** Modify `corroboration.py` + +- [ ] **Step 1:** Add to `corroboration.py`: +```python +async def reconcile_approvals(case_law_id: str | UUID) -> dict: + """Apply the corroboration→approval policy for every halacha of a precedent. + No-op (returns disabled) when the kill-switch is off. INV-COR2/COR4/COR5.""" + if not config.HALACHA_CORROBORATION_AUTO_APPROVE: + return {"approved": 0, "demoted": 0, "disabled": True} + if isinstance(case_law_id, str): + case_law_id = UUID(case_law_id) + grouped = await db.list_corroboration_grouped(case_law_id) + approved = demoted = 0 + for halacha_id, links in grouped.items(): + agg = aggregate(links) + has_overruled = any(l["treatment"] == "overruled" for l in links) + action = approval_action(agg, has_overruled) + if action == "approve": + if await db.approve_halacha_by_corroboration( + UUID(halacha_id), agg["positive_sources"], + config.HALACHA_CORROBORATION_MIN_CITES, + ): + approved += 1 + elif action == "demote": + if await db.demote_halacha_overruled(UUID(halacha_id)): + demoted += 1 + return {"approved": approved, "demoted": demoted, "disabled": False} +``` + +- [ ] **Step 2:** At the end of `build_for_precedent`, replace the `return` with: +```python + appr = await reconcile_approvals(case_law_id) + return {"citations": len(cits), "linked": linked, + "approved": appr["approved"], "demoted": appr["demoted"]} +``` + +- [ ] **Step 3:** Add the corpus driver: +```python +async def build_all() -> dict: + """Backfill: build the signal + apply approvals for every precedent that has + halachot and incoming citations. Idempotent (ON CONFLICT on the link table; + transitions only fire on the legal state).""" + ids = await db.precedents_with_halachot_and_incoming_citations() + totals = {"precedents": 0, "citations": 0, "linked": 0, + "approved": 0, "demoted": 0} + for cid in ids: + r = await build_for_precedent(cid) + totals["precedents"] += 1 + for k in ("citations", "linked", "approved", "demoted"): + totals[k] += r.get(k, 0) + logger.info("corroboration backfill %s: %s", cid, r) + return totals +``` + +- [ ] **Step 4: Commit** `feat(corroboration): reconcile_approvals + build_all backfill (X11 Phase 2)` + +--- + +## Task 5: Write MCP tool `corroboration_rebuild` + +**Files:** Modify `mcp-server/src/legal_mcp/server.py` + +- [ ] **Step 1:** Add near `halacha_corroboration` (server.py:926): +```python +@mcp.tool() +async def corroboration_rebuild(case_law_id: str = "") -> dict: + """בנה/רענן את ה-corroboration ויישם אישור-אוטומטי. ריק = כל הקורפוס (backfill); + מזהה-תקדים = תקדים בודד. כותב halacha_citation_corroboration + מעדכן review_status + (corroborated→approved, overruled→pending_review). X11 Phase 2.""" + from legal_mcp.services import corroboration as cor + if case_law_id.strip(): + return await cor.build_for_precedent(case_law_id.strip()) + return await cor.build_all() +``` + +- [ ] **Step 2:** Verify import/registration: +```bash +cd mcp-server && .venv/bin/python -c "from legal_mcp import server; print('corroboration_rebuild' in [t.name for t in server.mcp._tool_manager.list_tools()])" +``` +Expected `True`. + +- [ ] **Step 3: Commit** `feat(mcp): corroboration_rebuild write tool (X11 Phase 2)` + +--- + +## Task 6: Backfill the corpus + verify + +- [ ] **Step 1:** Snapshot approved/pending counts before. +- [ ] **Step 2:** Run `build_all()` from the venv (`DOTENV_PATH=/home/chaim/.env DATA_DIR=…`). Expect ~12 precedents, no exception, a small number of `approved`/`demoted`. +- [ ] **Step 3:** Verify: every halacha approved-by-corroboration has `reviewer LIKE 'corroborated %'`; no `published`/`rejected` changed; corroboration rows carry treatment+score. Spot-check one approved halacha via `halacha_corroboration`. +- [ ] **Step 4: Commit** any data-audit note under `data/audit/`. + +--- + +## Out of scope (Phase 2 backlog — deliberately deferred) + +- **Enrichment (INV-COR3 secondary):** sharpen `rule_statement` from citing framing — **proposal-only**, must not silently rewrite an approved rule. Bigger design; separate plan. +- **Treatment backfill of `case_law_citations.citation_type`** (default `'support'`) — orthogonal to corroboration (which classifies treatment fresh per citation into its own column). + +--- + +## Self-Review + +**Spec coverage:** INV-COR2 (overruled demote split from generic negative-block; Task 2/3/4), INV-COR4 (acts only on `corroborated`; Task 2/4), INV-COR5 (only legal-state transitions, tail untouched; Task 3 WHERE clauses), INV-COR6 (reviewer provenance + retained link rows; Task 3), INV-G10 amended (authority = citing courts; not AI; Task 2 comment). ✔ +**Safety:** kill-switch default ON but env-disable-able; transitions are directional and bounded by `review_status` WHERE clauses (cannot touch chair-final states); demotion moves toward *more* human review. ✔ +**Idempotency:** link table `ON CONFLICT` (Phase 1); approve only fires on `pending_review`, demote only on `approved` → re-runs converge. ✔ diff --git a/mcp-server/src/legal_mcp/config.py b/mcp-server/src/legal_mcp/config.py index 95d5dd3..4ebf0be 100644 --- a/mcp-server/src/legal_mcp/config.py +++ b/mcp-server/src/legal_mcp/config.py @@ -67,6 +67,12 @@ HALACHA_BULK_EXTRACT_EFFORT = os.environ.get("HALACHA_BULK_EXTRACT_EFFORT", "hig HALACHA_CHUNK_CONCURRENCY = int(os.environ.get("HALACHA_CHUNK_CONCURRENCY", "3")) HALACHA_CORROBORATION_MATCH_FLOOR = float(os.environ.get("HALACHA_CORROBORATION_MATCH_FLOOR", "0.50")) HALACHA_CORROBORATION_MIN_CITES = int(os.environ.get("HALACHA_CORROBORATION_MIN_CITES", "2")) +# X11 Phase 2: gate corroboration → approval. Default ON (Dafna validated the +# Phase 1 signal, 2026-06-01). Set to "false" to disable the auto-approve/demote +# wiring while keeping the Phase 1 signal intact. +HALACHA_CORROBORATION_AUTO_APPROVE = os.environ.get( + "HALACHA_CORROBORATION_AUTO_APPROVE", "true" +).strip().lower() in ("1", "true", "yes", "on") # Voyage AI VOYAGE_API_KEY = os.environ.get("VOYAGE_API_KEY", "") diff --git a/mcp-server/src/legal_mcp/services/corroboration.py b/mcp-server/src/legal_mcp/services/corroboration.py index e4788f9..e6d1486 100644 --- a/mcp-server/src/legal_mcp/services/corroboration.py +++ b/mcp-server/src/legal_mcp/services/corroboration.py @@ -52,6 +52,22 @@ def aggregate(links: list[dict], min_cites: int = config.HALACHA_CORROBORATION_M } +def approval_action(agg: dict, has_overruled: bool) -> str | None: + """Decide the corroboration→approval action for ONE halacha (INV-COR2/COR4). + + - 'demote' : a later court overruled it → back to the chair gate (overruled + outranks any positive count, INV-COR2 strong form). + - 'approve' : corroborated (≥N distinct positives, 0 negatives — INV-COR4). + - None : leave as-is (single source, non-overruled negative, or the + uncorroborated tail — INV-COR5 keeps the chair gate). + """ + if has_overruled: + return "demote" + if agg.get("corroborated"): + return "approve" + return None + + _TREATMENT_PROMPT = """אתה משפטן בכיר. נתון ציטוט של פסק/החלטה קודמים בתוך החלטה מאוחרת. סווג כיצד ההחלטה המאוחרת **מטפלת** בתקדים המצוטט, לפי אחת מהקטגוריות: - followed — אימצה והחילה את ההלכה. diff --git a/mcp-server/tests/test_corroboration.py b/mcp-server/tests/test_corroboration.py index 0ef33dd..50f68d9 100644 --- a/mcp-server/tests/test_corroboration.py +++ b/mcp-server/tests/test_corroboration.py @@ -44,3 +44,24 @@ def test_aggregate_negative_blocks(): def test_aggregate_below_threshold(): agg = cor.aggregate([_link("d1","followed")], min_cites=2) assert agg["corroborated"] is False # single source insufficient (INV-COR4) + + +# --- Phase 2: approval decision (INV-COR2/COR4) --- + +def test_approval_action_corroborated_approves(): + agg = {"positive_sources": 2, "has_negative": False, "corroborated": True} + assert cor.approval_action(agg, has_overruled=False) == "approve" + +def test_approval_action_overruled_demotes_even_if_corroborated(): + # overruled outranks any positive count (INV-COR2 strong form) + agg = {"positive_sources": 3, "has_negative": True, "corroborated": False} + assert cor.approval_action(agg, has_overruled=True) == "demote" + +def test_approval_action_single_source_noop(): + agg = {"positive_sources": 1, "has_negative": False, "corroborated": False} + assert cor.approval_action(agg, has_overruled=False) is None + +def test_approval_action_negative_nonoverruled_noop(): + # distinguished blocks approval but does not demote (no overruled) + agg = {"positive_sources": 2, "has_negative": True, "corroborated": False} + assert cor.approval_action(agg, has_overruled=False) is None -- 2.49.1 From ed547e20ad746dc822b4d836d88311564a004cd2 Mon Sep 17 00:00:00 2001 From: Chaim Date: Mon, 1 Jun 2026 04:35:37 +0000 Subject: [PATCH 2/3] feat(corroboration): wire approval gate + backfill driver + rebuild tool (X11 Phase 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - db: approve_halacha_by_corroboration (pending_review→approved only), demote_halacha_overruled (approved→pending_review only), list_corroboration_grouped, precedents_with_halachot_and_incoming_citations - corroboration: reconcile_approvals (INV-COR2/COR4/COR5), build_all backfill; build_for_precedent now returns approved/demoted counts - mcp: corroboration_rebuild write tool (single precedent or full-corpus backfill) Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp-server/src/legal_mcp/server.py | 11 +++ .../src/legal_mcp/services/corroboration.py | 47 +++++++++++- mcp-server/src/legal_mcp/services/db.py | 75 +++++++++++++++++++ 3 files changed, 132 insertions(+), 1 deletion(-) diff --git a/mcp-server/src/legal_mcp/server.py b/mcp-server/src/legal_mcp/server.py index 3922e89..fd419f8 100644 --- a/mcp-server/src/legal_mcp/server.py +++ b/mcp-server/src/legal_mcp/server.py @@ -934,6 +934,17 @@ async def halacha_corroboration(halacha_id: str) -> dict: return {"halacha_id": halacha_id, "summary": agg, "citations": links} +@mcp.tool() +async def corroboration_rebuild(case_law_id: str = "") -> dict: + """בנה/רענן את ה-corroboration ויישם אישור-אוטומטי. ריק = כל הקורפוס (backfill); + מזהה-תקדים = תקדים בודד. כותב halacha_citation_corroboration ומעדכן review_status + (corroborated→approved, overruled→pending_review). X11 Phase 2.""" + from legal_mcp.services import corroboration as cor + if case_law_id.strip(): + return await cor.build_for_precedent(case_law_id.strip()) + return await cor.build_all() + + def main(): mcp.run(transport="stdio") diff --git a/mcp-server/src/legal_mcp/services/corroboration.py b/mcp-server/src/legal_mcp/services/corroboration.py index e6d1486..91deb43 100644 --- a/mcp-server/src/legal_mcp/services/corroboration.py +++ b/mcp-server/src/legal_mcp/services/corroboration.py @@ -117,4 +117,49 @@ async def build_for_precedent(case_law_id: str | UUID) -> dict: treatment, best[1], ctx, ) linked += 1 - return {"citations": len(cits), "linked": linked} + appr = await reconcile_approvals(case_law_id) + return {"citations": len(cits), "linked": linked, + "approved": appr["approved"], "demoted": appr["demoted"]} + + +async def reconcile_approvals(case_law_id: str | UUID) -> dict: + """Apply the corroboration→approval policy to every halacha of a precedent + (INV-COR2/COR4/COR5). No-op when the kill-switch is off. Idempotent: approve + only fires on ``pending_review``, demote only on ``approved``, so re-runs + converge.""" + if not config.HALACHA_CORROBORATION_AUTO_APPROVE: + return {"approved": 0, "demoted": 0, "disabled": True} + if isinstance(case_law_id, str): + case_law_id = UUID(case_law_id) + grouped = await db.list_corroboration_grouped(case_law_id) + approved = demoted = 0 + for halacha_id, links in grouped.items(): + agg = aggregate(links) + has_overruled = any(l["treatment"] == "overruled" for l in links) + action = approval_action(agg, has_overruled) + if action == "approve": + if await db.approve_halacha_by_corroboration( + UUID(halacha_id), agg["positive_sources"], + config.HALACHA_CORROBORATION_MIN_CITES, + ): + approved += 1 + elif action == "demote": + if await db.demote_halacha_overruled(UUID(halacha_id)): + demoted += 1 + return {"approved": approved, "demoted": demoted, "disabled": False} + + +async def build_all() -> dict: + """Backfill: build the signal + apply approvals for every precedent that has + halachot and incoming citations. Idempotent (link table ``ON CONFLICT`` + + state-gated transitions).""" + ids = await db.precedents_with_halachot_and_incoming_citations() + totals = {"precedents": 0, "citations": 0, "linked": 0, + "approved": 0, "demoted": 0} + for cid in ids: + r = await build_for_precedent(cid) + totals["precedents"] += 1 + for k in ("citations", "linked", "approved", "demoted"): + totals[k] += r.get(k, 0) + logger.info("corroboration backfill %s: %s", cid, r) + return totals diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index ed70b29..7e2455e 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -3480,6 +3480,81 @@ async def update_halacha( return dict(row) if row else None +async def approve_halacha_by_corroboration( + halacha_id: UUID, n_sources: int, min_cites: int, +) -> bool: + """Approve a halacha on citation corroboration — ONLY if it is currently + awaiting the chair (``pending_review``). Never touches ``published`` / + ``rejected`` / already-``approved`` (INV-COR5: the chair gate is preserved for + everything else). The reviewer records the corroboration basis as provenance + (INV-COR6). Returns True iff a row actually transitioned.""" + pool = await get_pool() + reviewer = f"corroborated ({n_sources} judicial citations ≥ {min_cites})" + row = await pool.fetchrow( + "UPDATE halachot SET review_status='approved', reviewer=$2, " + "reviewed_at=now(), updated_at=now() " + "WHERE id=$1 AND review_status='pending_review' RETURNING id", + halacha_id, reviewer, + ) + return row is not None + + +async def demote_halacha_overruled(halacha_id: UUID) -> bool: + """Demote an APPROVED halacha back to the chair gate because a later citing + court overruled it (INV-COR2). Acts only on ``approved`` → ``pending_review``; + leaves ``published`` / ``rejected`` / already-``pending_review`` untouched. The + reviewer note records why it re-entered the queue. Returns True iff a row + transitioned.""" + pool = await get_pool() + row = await pool.fetchrow( + "UPDATE halachot SET review_status='pending_review', " + "reviewer='flagged: overruled by later citation (X11)', " + "reviewed_at=NULL, updated_at=now() " + "WHERE id=$1 AND review_status='approved' RETURNING id", + halacha_id, + ) + return row is not None + + +async def list_corroboration_grouped(case_law_id: UUID) -> dict[str, list[dict]]: + """Per-halacha corroboration links for a cited precedent, in the + ``{source_id, treatment}`` shape ``aggregate()`` consumes. The distinct citing + source is keyed by case_law/decision id (falling back to the citation row id + so two anonymous rows are not collapsed).""" + pool = await get_pool() + rows = await pool.fetch( + "SELECT hcc.halacha_id::text AS halacha_id, " + " COALESCE(hcc.citing_case_law_id::text, hcc.citing_decision_id::text, " + " hcc.source_citation_id::text) AS source_id, " + " hcc.treatment " + "FROM halacha_citation_corroboration hcc " + "JOIN halachot h ON h.id = hcc.halacha_id " + "WHERE h.case_law_id = $1", + case_law_id, + ) + out: dict[str, list[dict]] = {} + for r in rows: + out.setdefault(r["halacha_id"], []).append( + {"source_id": r["source_id"], "treatment": r["treatment"]} + ) + return out + + +async def precedents_with_halachot_and_incoming_citations() -> list[str]: + """case_law ids that have at least one halacha AND at least one incoming + citation (either graph) — the corroboration backfill target set.""" + pool = await get_pool() + rows = await pool.fetch( + "SELECT c.id::text FROM case_law c " + "WHERE EXISTS (SELECT 1 FROM halachot h WHERE h.case_law_id=c.id) " + " AND (EXISTS (SELECT 1 FROM precedent_internal_citations p " + " WHERE p.cited_case_law_id=c.id) " + " OR EXISTS (SELECT 1 FROM case_law_citations cc " + " WHERE cc.case_law_id=c.id))", + ) + return [r["id"] for r in rows] + + async def nearest_halacha_for_vector(case_law_id: UUID, vec: list[float]) -> tuple[str, float] | None: """Best-matching halacha of `case_law_id` for a context embedding (cosine).""" pool = await get_pool() -- 2.49.1 From b4d1fc5539d31e9f06030f388567aaadb6fce54c Mon Sep 17 00:00:00 2001 From: Chaim Date: Mon, 1 Jun 2026 04:41:58 +0000 Subject: [PATCH 3/3] docs(audit): X11 Phase 2 corroboration backfill result (X11 Phase 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 12 precedents, 20 links, 0 negatives. 4 halachot corroborated — all already confidence-approved (signal fully overlaps confidence set), so 0 transitions. Approve path proven in rolled-back tx; no chair-final state touched. Co-Authored-By: Claude Opus 4.8 (1M context) --- data/audit/x11-phase2-backfill-20260601.md | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 data/audit/x11-phase2-backfill-20260601.md diff --git a/data/audit/x11-phase2-backfill-20260601.md b/data/audit/x11-phase2-backfill-20260601.md new file mode 100644 index 0000000..a83bd3f --- /dev/null +++ b/data/audit/x11-phase2-backfill-20260601.md @@ -0,0 +1,26 @@ +# X11 Phase 2 — Corroboration Backfill (2026-06-01) + +`corroboration.build_all()` over the full corpus after wiring the approval gate. + +## Result +``` +{"precedents": 12, "citations": 26, "linked": 20, "approved": 0, "demoted": 0} +``` + +## Treatment distribution (20 stored links) +- followed: 18 · explained: 1 · mentioned: 1 · **negatives: 0** + +## Per-halacha corroboration +- 14 halachot carry corroboration rows; **4 are corroborated** (≥2 distinct positive sources, 0 negatives). +- **All 14 were already `approved`** (13 by confidence ≥0.80, 1 by דפנה). + +## Why 0 approved / 0 demoted (correct, not a bug) +- **0 approved:** `approve_halacha_by_corroboration` only transitions `pending_review`. Every corroborated halacha was already approved → nothing to promote this run. The citation-corroboration set currently **fully overlaps** the confidence-approved set. +- **0 demoted:** the corpus has **no negative treatments** → nothing overruled to demote. + +## Verification +- Counts before == after (approved=1415, pending=196, published=0, rejected=1) — idempotent, no chair-final state touched. +- Approve path proven end-to-end in a **rolled-back transaction**: a corroborated halacha set to `pending_review` flipped back to `approved` with reviewer `corroborated (2 judicial citations ≥ 2)`; prod row restored. + +## Going-forward value +The corroboration approval path matters for (a) future halachot extracted **below** the confidence threshold but **citation-corroborated**, and (b) **overruled-demotion** once negative treatment appears in the citation graph. Re-runnable anytime via the `corroboration_rebuild` MCP tool (empty arg = full backfill). -- 2.49.1