From b7b44f445364b3f0e6ea55bada1109bb20ca794f Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 6 Jun 2026 21:29:46 +0000 Subject: [PATCH] feat(halacha): equivalent-halacha (parallel-authority) links across precedents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-precedent recurrence of a principle is real but is NOT citation corroboration (X11) — the 5 candidate pairs have ZERO citations between their precedents. Recording them in halacha_citation_corroboration would fabricate citation data and inflate corroboration_count. This adds a proper, separate halacha-level link for parallel authority. Schema (V28): equivalent_halachot — symmetric (halacha_a < halacha_b, CHECK + UNIQUE), non-citation, cross-precedent-only. ON DELETE CASCADE. db.py: - link_equivalent_halachot (idempotent; rejects same-id and SAME-precedent pairs — parallel authority is cross-precedent by definition), unlink, and list_equivalent_for_halacha. - list_halachot gains include_equivalents → _annotate_equivalents attaches an `equivalents` list (both directions) per row. API: include_equivalents on GET /api/halachot; GET/POST/DELETE /api/halachot/{id}/equivalents for the chair to view/link/unlink manually. scripts/halacha_batch_reconcile.py: --link records found cross-precedent pairs as equivalent_halachot (non-destructive, idempotent). web-ui: Halacha.equivalents type; the clean review queue fetches include_equivalents; the review card shows a gold "עיקרון מקביל ב-N" badge + an expandable list (case + rule + similarity) labeled "אסמכתה מקבילה — לא ציטוט". Populated the 5 reviewed pairs (chair decision: keep all + link as parallel authority). Verified: 5 rows; the 1023-20 hub annotates 3 of its halachot with equivalents; tsc --noEmit exits 0. Invariants: G1 (model recurrence at source in its own table, not by abusing the citator); G2 (no parallel path — extends list_halachot); citator integrity preserved (corroboration stays citation-only). Co-Authored-By: Claude Opus 4.8 (1M context) --- mcp-server/src/legal_mcp/services/db.py | 137 +++++++++++++++++- scripts/SCRIPTS.md | 2 +- scripts/halacha_batch_reconcile.py | 19 ++- .../precedents/halacha-review-panel.tsx | 40 +++++ web-ui/src/lib/api/precedent-library.ts | 11 +- web/app.py | 46 +++++- 6 files changed, 249 insertions(+), 6 deletions(-) diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 664139f..4ca0fd0 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -1232,6 +1232,30 @@ CREATE INDEX IF NOT EXISTS idx_style_exemplars_section ON style_exemplars(sectio CREATE INDEX IF NOT EXISTS idx_style_exemplars_decision ON style_exemplars(decision_number, source); """ +SCHEMA_V28_SQL = """ +-- equivalent_halachot (#84.2 follow-up): halacha-level PARALLEL-AUTHORITY links. +-- Distinct from halacha_citation_corroboration (X11): that records an actual +-- citation of a halacha by a later decision; this records that two halachot of +-- DIFFERENT precedents state the same legal principle INDEPENDENTLY (no citation +-- between them). Symmetric and non-directional — stored with halacha_a < halacha_b +-- so each pair is unique and self-links are impossible. Never merges/deletes the +-- halachot; it only relates them so the chair sees a principle recurs across +-- committees (a real-but-non-citation signal the citator must not fabricate). +CREATE TABLE IF NOT EXISTS equivalent_halachot ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + halacha_a UUID NOT NULL REFERENCES halachot(id) ON DELETE CASCADE, + halacha_b UUID NOT NULL REFERENCES halachot(id) ON DELETE CASCADE, + cosine NUMERIC(4,3) DEFAULT 0, + note TEXT DEFAULT '', + created_by TEXT DEFAULT '', + created_at TIMESTAMPTZ DEFAULT now(), + CHECK (halacha_a < halacha_b), + UNIQUE (halacha_a, halacha_b) +); +CREATE INDEX IF NOT EXISTS idx_equiv_halacha_a ON equivalent_halachot(halacha_a); +CREATE INDEX IF NOT EXISTS idx_equiv_halacha_b ON equivalent_halachot(halacha_b); +""" + async def _run_schema_migrations(pool: asyncpg.Pool) -> None: async with pool.acquire() as conn: @@ -1263,7 +1287,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None: await conn.execute(SCHEMA_V25_SQL) await conn.execute(SCHEMA_V26_SQL) await conn.execute(SCHEMA_V27_SQL) - logger.info("Database schema initialized (v1-v27)") + await conn.execute(SCHEMA_V28_SQL) + logger.info("Database schema initialized (v1-v28)") async def init_schema() -> None: @@ -3795,6 +3820,7 @@ async def list_halachot( exclude_low_quality: bool = False, order_by_priority: bool = False, cluster: bool = False, + include_equivalents: bool = False, ) -> list[dict]: """List halachot with optional triage controls (#84). @@ -3874,6 +3900,8 @@ async def list_halachot( out.append(d) if cluster and out: await _annotate_clusters(pool, out) + if include_equivalents and out: + await _annotate_equivalents(pool, out) return out @@ -4135,6 +4163,113 @@ async def store_corroboration( ) +# ── Parallel-authority (equivalent halachot) — #84.2 follow-up ─────────────── +# +# A NON-citation, symmetric link between halachot of different precedents that +# state the same principle. Kept entirely separate from the citation corroboration +# above so the citator's counts never include non-citation recurrences. + +def _equiv_order(a: UUID, b: UUID) -> tuple[UUID, UUID]: + """Canonical ordering (halacha_a < halacha_b) so the pair is symmetric+unique.""" + return (a, b) if str(a) < str(b) else (b, a) + + +async def link_equivalent_halachot( + a: UUID, b: UUID, *, cosine: float = 0.0, note: str = "", created_by: str = "", +) -> bool: + """Record that two halachot (different precedents) state the same principle. + + Idempotent (symmetric UNIQUE). Returns False and does nothing if a == b or + the two belong to the SAME precedent (parallel authority is cross-precedent + by definition; within-precedent sameness is the dedup/cluster concern).""" + if a == b: + return False + pool = await get_pool() + same = await pool.fetchval( + "SELECT (SELECT case_law_id FROM halachot WHERE id=$1) " + " = (SELECT case_law_id FROM halachot WHERE id=$2)", a, b, + ) + if same: + return False + lo, hi = _equiv_order(a, b) + await pool.execute( + "INSERT INTO equivalent_halachot (halacha_a, halacha_b, cosine, note, created_by) " + "VALUES ($1,$2,$3,$4,$5) ON CONFLICT (halacha_a, halacha_b) DO UPDATE SET " + "cosine=GREATEST(equivalent_halachot.cosine, EXCLUDED.cosine), " + "note=COALESCE(NULLIF(EXCLUDED.note,''), equivalent_halachot.note)", + lo, hi, round(float(cosine), 3), note, created_by, + ) + return True + + +async def unlink_equivalent_halachot(a: UUID, b: UUID) -> bool: + pool = await get_pool() + lo, hi = _equiv_order(a, b) + res = await pool.execute( + "DELETE FROM equivalent_halachot WHERE halacha_a=$1 AND halacha_b=$2", lo, hi, + ) + return res.endswith(" 1") + + +async def list_equivalent_for_halacha(halacha_id: UUID) -> list[dict]: + """The other halachot linked as parallel authority to this one (both sides).""" + pool = await get_pool() + rows = await pool.fetch( + "SELECT e.cosine, h.id::text AS halacha_id, h.rule_statement, " + " cl.case_number, cl.case_name " + "FROM equivalent_halachot e " + "JOIN halachot h ON h.id = CASE WHEN e.halacha_a=$1 THEN e.halacha_b ELSE e.halacha_a END " + "JOIN case_law cl ON cl.id = h.case_law_id " + "WHERE e.halacha_a=$1 OR e.halacha_b=$1 " + "ORDER BY e.cosine DESC", halacha_id, + ) + return [ + { + "halacha_id": r["halacha_id"], + "rule_statement": r["rule_statement"], + "case_number": r["case_number"], + "case_name": r["case_name"], + "cosine": float(r["cosine"]) if r["cosine"] is not None else None, + } + for r in rows + ] + + +async def _annotate_equivalents(pool, out: list[dict]) -> None: + """Attach an `equivalents` list to each row (#84.2) — parallel-authority links. + + Adds both directions, so when both halachot of a pair are on the same page + each one lists the other.""" + ids = [d["id"] for d in out] + rows = await pool.fetch( + "SELECT e.halacha_a, e.halacha_b, e.cosine, " + " ha.rule_statement AS a_rule, cla.case_number AS a_case, " + " hb.rule_statement AS b_rule, clb.case_number AS b_case " + "FROM equivalent_halachot e " + "JOIN halachot ha ON ha.id = e.halacha_a " + "JOIN case_law cla ON cla.id = ha.case_law_id " + "JOIN halachot hb ON hb.id = e.halacha_b " + "JOIN case_law clb ON clb.id = hb.case_law_id " + "WHERE e.halacha_a = ANY($1::uuid[]) OR e.halacha_b = ANY($1::uuid[])", + ids, + ) + idset = {str(i) for i in ids} + by_src: dict[str, list[dict]] = {} + for r in rows: + a, b = str(r["halacha_a"]), str(r["halacha_b"]) + cos = float(r["cosine"]) if r["cosine"] is not None else None + if a in idset: + by_src.setdefault(a, []).append({ + "halacha_id": b, "case_number": r["b_case"], + "rule_statement": r["b_rule"], "cosine": cos}) + if b in idset: + by_src.setdefault(b, []).append({ + "halacha_id": a, "case_number": r["a_case"], + "rule_statement": r["a_rule"], "cosine": cos}) + for d in out: + d["equivalents"] = by_src.get(str(d["id"]), []) + + async def list_corroboration_for_halacha(halacha_id: UUID) -> list[dict]: """Return all corroboration rows for one halacha, ordered by match_score DESC.""" pool = await get_pool() diff --git a/scripts/SCRIPTS.md b/scripts/SCRIPTS.md index 4f9d73d..1b4c16f 100644 --- a/scripts/SCRIPTS.md +++ b/scripts/SCRIPTS.md @@ -39,7 +39,7 @@ | `backfill_nevo_preamble.py` | python | **#86.2** — מיגרציית-נתונים: חיתוך preamble/רציו של נבו שדלף לפסיקה שהוטמעה לפני תיקון #86.1. מאתר כל `case_law` ש-`strip_nevo_preamble(full_text)` עדיין מקצר (דליפה היסטורית), ומבצע: (1) לכידת ה-מיני-רציו ל-`case_law.nevo_ratio` (gold-set ל-#86.3); (2) שכתוב `full_text` החתוך + חישוב-מחדש של `content_hash`; (3) `reindex_case_law` (re-chunk+embed, ללא re-OCR/LLM); (4) **סימון (לא מחיקה)** הלכות ש-`supporting_quote` שלהן בתוך ה-preamble שהוסר → `pending_review` + quality_flag `nevo_preamble_leak`. **שומר-בטיחות:** שורות עם keep%<`--min-keep` (ברירת-מחדל 60) מוחרגות מ-`--apply` כחשד over-strip (אלא אם `--include-suspicious`). **dry-run כברירת-מחדל**; `--apply` כותב backup JSON + manifest CSV ל-`data/audit/` תחילה. idempotent. רץ עם venv של mcp-server. **chair-gated** (לאמת manifest לפני apply) | מיגרציית-נתונים — dry-run בוצע (19 פסקים, 27 הלכות מזוהמות); apply ממתין לאישור | | `nevo_ratio_benchmark.py` | python | **#86.3** — מדידת איכות חילוץ-הלכות מול ה-מיני-רציו של נבו (gold-set מקצועי חינמי). לכל פסק עם `nevo_ratio` (או נגזר מ-`full_text` אם טרם בוצע backfill): LLM-judge מקומי (`claude_session`, אפס עלות) ממפה סמנטית את הלכות-המערכת מול הלכות-נבו ומפיק **recall** (כיסוי הלכות-נבו), **precision** (אחוז הלכותינו הממופות), **granularity** (יחס פירוק — איתות over-extraction ל-#81.5). `--case ` / `--all [--limit N]` / `--model` / `--out`. כותב CSV ל-`data/audit/`. רץ עם venv של mcp-server (דורש Claude CLI מקומי). אומת על בג"ץ 1764/05: recall 0.875, precision 1.0, granularity 1.75x | ידני — מדידת-איכות (CI/ad-hoc) | | `halacha_goldset.py` | python | **#81.7** — הארנס gold-set לאיכות חילוץ-הלכות. `export --n N` מייצא מדגם מרובד (לפי precedent×rule_type) ל-CSV עם עמודות-תיוג ריקות (`is_holding`/`correct_type`/`quote_complete`) לתיוג ידני (חיים/דפנה). `score --in ` קורא את ה-CSV המתויג ומודד כל ולידטור (`compute_quality_flags`/`is_fact_dependent`/`is_quote_truncated`/`is_thin_restatement`) מול אמת-המידה האנושית: P/R/F1 + confusion. בסיס ל-#81.8 (כיול סף האישור). מייבא את אותם ולידטורים שה-extractor מריץ. רץ עם venv של mcp-server | ידני — export→תיוג→score | -| `halacha_batch_reconcile.py` | python | **#82.7** — dedup חוצה-פסקים offline (שמרני, **dry-run בלבד**). dedup-on-insert משווה רק תוך-פסק; כאן סף מחמיר (cosine ≥0.95, `--cosine`) ולא-הרסני: מאתר זוגות הלכות near-duplicate בין פסקים שונים (pgvector `<=>` exact) עם איתות לקסיקלי (Jaccard/Levenshtein) ומדווח ל-CSV ב-`data/audit/` לסקירת היו"ר. לא מדלג/ממזג/מוחק. `--include-pending`. רץ עם venv של mcp-server. אומת: 819 הלכות → 5 זוגות מועמדים | ידני — דוח-סקירה | +| `halacha_batch_reconcile.py` | python | **#82.7** — dedup חוצה-פסקים offline (שמרני, **dry-run בלבד**). dedup-on-insert משווה רק תוך-פסק; כאן סף מחמיר (cosine ≥0.95, `--cosine`) ולא-הרסני: מאתר זוגות הלכות near-duplicate בין פסקים שונים (pgvector `<=>` exact) עם איתות לקסיקלי (Jaccard/Levenshtein) ומדווח ל-CSV ב-`data/audit/` לסקירת היו"ר. לא מדלג/ממזג/מוחק. `--include-pending`. **`--link`** רושם את הזוגות שנמצאו כ-`equivalent_halachot` (parallel authority, #84.2 — קישור-מקביל ברמת-הלכה, **לא** ציטוט; idempotent, לא-הרסני). רץ עם venv של mcp-server. אומת: 800 הלכות → 5 זוגות (קושרו). | ידני — דוח-סקירה / `--link` לקישור | | `calibrate_halacha_dedup.py` | python | **#82.1** — כיול ספי ה-dedup הלקסיקלי (#82.3) מול gold-set הניקוי. קורא `halacha-cleanup-manifest-*.csv` (זוגות duplicate↔survivor מתויגי-אדם), טוען טקסט-survivor מה-DB, ו-sweep של (jaccard_min × levenshtein_min) עם P/R/F1, מסמן את נקודת-העבודה המוגדרת. אימת ש-(0.55, 0.70) → **precision 1.0** (אפס false-merge), recall 0.30 — מתאים לאיתות-משני שחוסם auto-approve. `--manifest `. רץ עם venv של mcp-server | חד-פעמי — כיול (בוצע 2026-06-06) | | `audit_corpus_integrity.py` | python | בדיקה תקופתית של עקביות הקורפוס — 3 בדיקות SQL read-only על `case_law` ו-`cases`: (A) `external_upload` עם prefix פנימי `ערר`/`בל"מ`; (B) `internal_committee` חסר `chair_name`/`district`; (C) `cases.practice_area` מחוץ ל-{`rishuy_uvniya`, `betterment_levy`, `compensation_197`, `''`}. כותב log מצטבר ל-`data/logs/corpus_integrity_audit.log` ובמצב הפרות שולח wakeup ל-CEO ב-Paperclip (best-effort, רק אם `PAPERCLIP_API_URL`+`PAPERCLIP_API_KEY` מוגדרים). דגל: `--no-notify`. Idempotent, יוצא 0. **Cron יומי 07:00**: `0 7 * * * /home/chaim/legal-ai/mcp-server/.venv/bin/python /home/chaim/legal-ai/scripts/audit_corpus_integrity.py` | `0 7 * * *` (cron) | | `backfill_legal_arguments.py` | python | Backfill `legal_arguments` לתיקים עם `claims` קיימים (TaskMaster #36). מקבץ פרופוזיציות גולמיות לטיעונים משפטיים מובחנים (~6-12 לכל צד) דרך `argument_aggregator.aggregate_claims_to_arguments` (Claude CLI). תומך `--dry-run`/`--apply`/`--force`/`--case ...`. **חייב לרוץ מהמכונה המקומית** (לא קונטיינר) — `claude_session` דורש Claude CLI | ידני per-case (`python scripts/backfill_legal_arguments.py --apply --case 1017-03-26`) | diff --git a/scripts/halacha_batch_reconcile.py b/scripts/halacha_batch_reconcile.py index 7a82cf5..31313f2 100644 --- a/scripts/halacha_batch_reconcile.py +++ b/scripts/halacha_batch_reconcile.py @@ -91,7 +91,22 @@ async def main(args: argparse.Namespace) -> int: w = csv.DictWriter(f, fieldnames=list(pairs[0].keys())) w.writeheader() w.writerows(pairs) - print(f"\nreport: {out} (review-only — nothing changed)", flush=True) + print(f"\nreport: {out}", flush=True) + + if args.link and pairs: + # #84.2 — record each pair as parallel authority (equivalent_halachot). + # Non-destructive: links only, never merges/deletes. Idempotent. + linked = 0 + for p in pairs: + if await db.link_equivalent_halachot( + p["id_a"], p["id_b"], cosine=p["cosine"], + note="cross-precedent parallel authority (halacha_batch_reconcile)", + created_by="batch_reconcile", + ): + linked += 1 + print(f"linked {linked}/{len(pairs)} pairs as equivalent_halachot", flush=True) + elif pairs: + print("(review-only — pass --link to record them as equivalent_halachot)", flush=True) return 0 @@ -102,5 +117,7 @@ if __name__ == "__main__": help="min cosine for a cross-precedent candidate (default 0.95)") ap.add_argument("--include-pending", action="store_true", help="also scan pending_review halachot (default: approved/published only)") + ap.add_argument("--link", action="store_true", + help="record found pairs as equivalent_halachot (parallel authority, #84.2)") args = ap.parse_args() sys.exit(asyncio.run(main(args))) diff --git a/web-ui/src/components/precedents/halacha-review-panel.tsx b/web-ui/src/components/precedents/halacha-review-panel.tsx index 51bef3a..7b9df21 100644 --- a/web-ui/src/components/precedents/halacha-review-panel.tsx +++ b/web-ui/src/components/precedents/halacha-review-panel.tsx @@ -68,7 +68,9 @@ function HalachaCard({ onSave: (patch: Partial) => Promise; }) { const variants = h.variants ?? []; + const equivalents = h.equivalents ?? []; const [showVariants, setShowVariants] = useState(false); + const [showEquiv, setShowEquiv] = useState(false); const [editing, setEditing] = useState(false); const [draft, setDraft] = useState({ rule_statement: h.rule_statement, @@ -122,6 +124,12 @@ function HalachaCard({ +{variants.length} וריאנטים )} + {equivalents.length > 0 && ( + + עיקרון מקביל ב-{equivalents.length} + + )} @@ -220,6 +228,38 @@ function HalachaCard({ )} + {equivalents.length > 0 && ( +
+ + {showEquiv && ( +
    + {equivalents.map((e) => ( +
  • + {cleanCitation(e.case_number)} + {" — "}{e.rule_statement} + {e.cosine != null && ( + + (דמיון {e.cosine.toFixed(2)}) + + )} +
  • + ))} +
+ )} +
+ )} +
{editing ? ( <> diff --git a/web-ui/src/lib/api/precedent-library.ts b/web-ui/src/lib/api/precedent-library.ts index 4956d28..95ba14f 100644 --- a/web-ui/src/lib/api/precedent-library.ts +++ b/web-ui/src/lib/api/precedent-library.ts @@ -97,6 +97,15 @@ export type Halacha = { * UI collapses them into one review card. cluster_size === 1 → singleton. */ cluster_id?: string; cluster_size?: number; + /* #84.2 parallel authority (present only when fetched with include_equivalents): + * the SAME principle stated independently in OTHER precedents — recurrence, not + * citation (distinct from corroboration_count). */ + equivalents?: { + halacha_id: string; + case_number: string; + rule_statement: string; + cosine: number | null; + }[]; }; export type RelatedCase = { @@ -584,7 +593,7 @@ export function useHalachotPending( const qs = needsFix ? `review_status=pending_review&exclude_low_quality=false&limit=${limit}` : `review_status=pending_review&exclude_low_quality=true` - + `&order_by_priority=true&cluster=true&limit=${limit}`; + + `&order_by_priority=true&cluster=true&include_equivalents=true&limit=${limit}`; return useQuery({ queryKey: [...libraryKeys.halachotPending(), needsFix ? "needsfix" : "clean"], queryFn: async ({ signal }) => { diff --git a/web/app.py b/web/app.py index c31fa31..4959917 100644 --- a/web/app.py +++ b/web/app.py @@ -6034,10 +6034,12 @@ async def halachot_list( exclude_low_quality: bool = False, order_by_priority: bool = False, cluster: bool = False, + include_equivalents: bool = False, ): """List halachot. ``exclude_low_quality`` hides flagged items (#84.1), - ``order_by_priority`` switches to the active-learning order (#84.3), and - ``cluster`` annotates near-duplicate groups for one-card review (#84.2). All + ``order_by_priority`` switches to the active-learning order (#84.3), + ``cluster`` annotates near-duplicate groups for one-card review (#84.2), and + ``include_equivalents`` attaches cross-precedent parallel-authority links. All default off so existing callers are unaffected; the review queue opts in.""" cid: UUID | None = None if case_law_id: @@ -6053,10 +6055,50 @@ async def halachot_list( exclude_low_quality=exclude_low_quality, order_by_priority=order_by_priority, cluster=cluster, + include_equivalents=include_equivalents, ) return {"items": rows, "count": len(rows)} +class EquivalentLinkRequest(BaseModel): + other_id: str + note: str = "" + + +@app.get("/api/halachot/{halacha_id}/equivalents") +async def halacha_equivalents_list(halacha_id: str): + """Cross-precedent parallel-authority links for a halacha (#84.2).""" + try: + hid = UUID(halacha_id) + except ValueError: + raise HTTPException(400, "halacha_id לא תקין") + return {"items": await db.list_equivalent_for_halacha(hid)} + + +@app.post("/api/halachot/{halacha_id}/equivalents") +async def halacha_equivalents_link(halacha_id: str, req: EquivalentLinkRequest): + """Chair links two halachot as the same principle across precedents (#84.2).""" + try: + hid = UUID(halacha_id) + oid = UUID(req.other_id) + except ValueError: + raise HTTPException(400, "מזהה הלכה לא תקין") + ok = await db.link_equivalent_halachot(hid, oid, note=req.note, created_by="chair") + if not ok: + raise HTTPException( + 400, "לא ניתן לקשר — אותה הלכה או שתי הלכות מאותו פסק (קישור-מקביל הוא חוצה-פסקים)") + return {"ok": True} + + +@app.delete("/api/halachot/{halacha_id}/equivalents/{other_id}") +async def halacha_equivalents_unlink(halacha_id: str, other_id: str): + try: + hid, oid = UUID(halacha_id), UUID(other_id) + except ValueError: + raise HTTPException(400, "מזהה הלכה לא תקין") + return {"ok": await db.unlink_equivalent_halachot(hid, oid)} + + @app.patch("/api/halachot/{halacha_id}") async def halacha_update(halacha_id: str, req: HalachaUpdateRequest): """Approve / reject / edit a halacha. Used by the chair review queue.""" -- 2.49.1