feat(halacha): equivalent-halacha (parallel-authority) links across precedents #100

Merged
chaim merged 1 commits from worktree-equivalent-halachot into main 2026-06-06 21:30:22 +00:00
6 changed files with 249 additions and 6 deletions
Showing only changes of commit b7b44f4453 - Show all commits

View File

@@ -1232,6 +1232,30 @@ CREATE INDEX IF NOT EXISTS idx_style_exemplars_section ON style_exemplars(sectio
CREATE INDEX IF NOT EXISTS idx_style_exemplars_decision ON style_exemplars(decision_number, source);
"""
SCHEMA_V28_SQL = """
-- equivalent_halachot (#84.2 follow-up): halacha-level PARALLEL-AUTHORITY links.
-- Distinct from halacha_citation_corroboration (X11): that records an actual
-- citation of a halacha by a later decision; this records that two halachot of
-- DIFFERENT precedents state the same legal principle INDEPENDENTLY (no citation
-- between them). Symmetric and non-directional — stored with halacha_a < halacha_b
-- so each pair is unique and self-links are impossible. Never merges/deletes the
-- halachot; it only relates them so the chair sees a principle recurs across
-- committees (a real-but-non-citation signal the citator must not fabricate).
CREATE TABLE IF NOT EXISTS equivalent_halachot (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
halacha_a UUID NOT NULL REFERENCES halachot(id) ON DELETE CASCADE,
halacha_b UUID NOT NULL REFERENCES halachot(id) ON DELETE CASCADE,
cosine NUMERIC(4,3) DEFAULT 0,
note TEXT DEFAULT '',
created_by TEXT DEFAULT '',
created_at TIMESTAMPTZ DEFAULT now(),
CHECK (halacha_a < halacha_b),
UNIQUE (halacha_a, halacha_b)
);
CREATE INDEX IF NOT EXISTS idx_equiv_halacha_a ON equivalent_halachot(halacha_a);
CREATE INDEX IF NOT EXISTS idx_equiv_halacha_b ON equivalent_halachot(halacha_b);
"""
async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
async with pool.acquire() as conn:
@@ -1263,7 +1287,8 @@ async def _run_schema_migrations(pool: asyncpg.Pool) -> None:
await conn.execute(SCHEMA_V25_SQL)
await conn.execute(SCHEMA_V26_SQL)
await conn.execute(SCHEMA_V27_SQL)
logger.info("Database schema initialized (v1-v27)")
await conn.execute(SCHEMA_V28_SQL)
logger.info("Database schema initialized (v1-v28)")
async def init_schema() -> None:
@@ -3795,6 +3820,7 @@ async def list_halachot(
exclude_low_quality: bool = False,
order_by_priority: bool = False,
cluster: bool = False,
include_equivalents: bool = False,
) -> list[dict]:
"""List halachot with optional triage controls (#84).
@@ -3874,6 +3900,8 @@ async def list_halachot(
out.append(d)
if cluster and out:
await _annotate_clusters(pool, out)
if include_equivalents and out:
await _annotate_equivalents(pool, out)
return out
@@ -4135,6 +4163,113 @@ async def store_corroboration(
)
# ── Parallel-authority (equivalent halachot) — #84.2 follow-up ───────────────
#
# A NON-citation, symmetric link between halachot of different precedents that
# state the same principle. Kept entirely separate from the citation corroboration
# above so the citator's counts never include non-citation recurrences.
def _equiv_order(a: UUID, b: UUID) -> tuple[UUID, UUID]:
"""Canonical ordering (halacha_a < halacha_b) so the pair is symmetric+unique."""
return (a, b) if str(a) < str(b) else (b, a)
async def link_equivalent_halachot(
a: UUID, b: UUID, *, cosine: float = 0.0, note: str = "", created_by: str = "",
) -> bool:
"""Record that two halachot (different precedents) state the same principle.
Idempotent (symmetric UNIQUE). Returns False and does nothing if a == b or
the two belong to the SAME precedent (parallel authority is cross-precedent
by definition; within-precedent sameness is the dedup/cluster concern)."""
if a == b:
return False
pool = await get_pool()
same = await pool.fetchval(
"SELECT (SELECT case_law_id FROM halachot WHERE id=$1) "
" = (SELECT case_law_id FROM halachot WHERE id=$2)", a, b,
)
if same:
return False
lo, hi = _equiv_order(a, b)
await pool.execute(
"INSERT INTO equivalent_halachot (halacha_a, halacha_b, cosine, note, created_by) "
"VALUES ($1,$2,$3,$4,$5) ON CONFLICT (halacha_a, halacha_b) DO UPDATE SET "
"cosine=GREATEST(equivalent_halachot.cosine, EXCLUDED.cosine), "
"note=COALESCE(NULLIF(EXCLUDED.note,''), equivalent_halachot.note)",
lo, hi, round(float(cosine), 3), note, created_by,
)
return True
async def unlink_equivalent_halachot(a: UUID, b: UUID) -> bool:
pool = await get_pool()
lo, hi = _equiv_order(a, b)
res = await pool.execute(
"DELETE FROM equivalent_halachot WHERE halacha_a=$1 AND halacha_b=$2", lo, hi,
)
return res.endswith(" 1")
async def list_equivalent_for_halacha(halacha_id: UUID) -> list[dict]:
"""The other halachot linked as parallel authority to this one (both sides)."""
pool = await get_pool()
rows = await pool.fetch(
"SELECT e.cosine, h.id::text AS halacha_id, h.rule_statement, "
" cl.case_number, cl.case_name "
"FROM equivalent_halachot e "
"JOIN halachot h ON h.id = CASE WHEN e.halacha_a=$1 THEN e.halacha_b ELSE e.halacha_a END "
"JOIN case_law cl ON cl.id = h.case_law_id "
"WHERE e.halacha_a=$1 OR e.halacha_b=$1 "
"ORDER BY e.cosine DESC", halacha_id,
)
return [
{
"halacha_id": r["halacha_id"],
"rule_statement": r["rule_statement"],
"case_number": r["case_number"],
"case_name": r["case_name"],
"cosine": float(r["cosine"]) if r["cosine"] is not None else None,
}
for r in rows
]
async def _annotate_equivalents(pool, out: list[dict]) -> None:
"""Attach an `equivalents` list to each row (#84.2) — parallel-authority links.
Adds both directions, so when both halachot of a pair are on the same page
each one lists the other."""
ids = [d["id"] for d in out]
rows = await pool.fetch(
"SELECT e.halacha_a, e.halacha_b, e.cosine, "
" ha.rule_statement AS a_rule, cla.case_number AS a_case, "
" hb.rule_statement AS b_rule, clb.case_number AS b_case "
"FROM equivalent_halachot e "
"JOIN halachot ha ON ha.id = e.halacha_a "
"JOIN case_law cla ON cla.id = ha.case_law_id "
"JOIN halachot hb ON hb.id = e.halacha_b "
"JOIN case_law clb ON clb.id = hb.case_law_id "
"WHERE e.halacha_a = ANY($1::uuid[]) OR e.halacha_b = ANY($1::uuid[])",
ids,
)
idset = {str(i) for i in ids}
by_src: dict[str, list[dict]] = {}
for r in rows:
a, b = str(r["halacha_a"]), str(r["halacha_b"])
cos = float(r["cosine"]) if r["cosine"] is not None else None
if a in idset:
by_src.setdefault(a, []).append({
"halacha_id": b, "case_number": r["b_case"],
"rule_statement": r["b_rule"], "cosine": cos})
if b in idset:
by_src.setdefault(b, []).append({
"halacha_id": a, "case_number": r["a_case"],
"rule_statement": r["a_rule"], "cosine": cos})
for d in out:
d["equivalents"] = by_src.get(str(d["id"]), [])
async def list_corroboration_for_halacha(halacha_id: UUID) -> list[dict]:
"""Return all corroboration rows for one halacha, ordered by match_score DESC."""
pool = await get_pool()

View File

@@ -39,7 +39,7 @@
| `backfill_nevo_preamble.py` | python | **#86.2** — מיגרציית-נתונים: חיתוך preamble/רציו של נבו שדלף לפסיקה שהוטמעה לפני תיקון #86.1. מאתר כל `case_law` ש-`strip_nevo_preamble(full_text)` עדיין מקצר (דליפה היסטורית), ומבצע: (1) לכידת ה-מיני-רציו ל-`case_law.nevo_ratio` (gold-set ל-#86.3); (2) שכתוב `full_text` החתוך + חישוב-מחדש של `content_hash`; (3) `reindex_case_law` (re-chunk+embed, ללא re-OCR/LLM); (4) **סימון (לא מחיקה)** הלכות ש-`supporting_quote` שלהן בתוך ה-preamble שהוסר → `pending_review` + quality_flag `nevo_preamble_leak`. **שומר-בטיחות:** שורות עם keep%<`--min-keep` (ברירת-מחדל 60) מוחרגות מ-`--apply` כחשד over-strip (אלא אם `--include-suspicious`). **dry-run כברירת-מחדל**; `--apply` כותב backup JSON + manifest CSV ל-`data/audit/` תחילה. idempotent. רץ עם venv של mcp-server. **chair-gated** (לאמת manifest לפני apply) | מיגרציית-נתונים — dry-run בוצע (19 פסקים, 27 הלכות מזוהמות); apply ממתין לאישור |
| `nevo_ratio_benchmark.py` | python | **#86.3** — מדידת איכות חילוץ-הלכות מול ה-מיני-רציו של נבו (gold-set מקצועי חינמי). לכל פסק עם `nevo_ratio` (או נגזר מ-`full_text` אם טרם בוצע backfill): LLM-judge מקומי (`claude_session`, אפס עלות) ממפה סמנטית את הלכות-המערכת מול הלכות-נבו ומפיק **recall** (כיסוי הלכות-נבו), **precision** (אחוז הלכותינו הממופות), **granularity** (יחס פירוק — איתות over-extraction ל-#81.5). `--case <num>` / `--all [--limit N]` / `--model` / `--out`. כותב CSV ל-`data/audit/`. רץ עם venv של mcp-server (דורש Claude CLI מקומי). אומת על בג"ץ 1764/05: recall 0.875, precision 1.0, granularity 1.75x | ידני — מדידת-איכות (CI/ad-hoc) |
| `halacha_goldset.py` | python | **#81.7** — הארנס gold-set לאיכות חילוץ-הלכות. `export --n N` מייצא מדגם מרובד (לפי precedent×rule_type) ל-CSV עם עמודות-תיוג ריקות (`is_holding`/`correct_type`/`quote_complete`) לתיוג ידני (חיים/דפנה). `score --in <csv>` קורא את ה-CSV המתויג ומודד כל ולידטור (`compute_quality_flags`/`is_fact_dependent`/`is_quote_truncated`/`is_thin_restatement`) מול אמת-המידה האנושית: P/R/F1 + confusion. בסיס ל-#81.8 (כיול סף האישור). מייבא את אותם ולידטורים שה-extractor מריץ. רץ עם venv של mcp-server | ידני — export→תיוג→score |
| `halacha_batch_reconcile.py` | python | **#82.7** — dedup חוצה-פסקים offline (שמרני, **dry-run בלבד**). dedup-on-insert משווה רק תוך-פסק; כאן סף מחמיר (cosine ≥0.95, `--cosine`) ולא-הרסני: מאתר זוגות הלכות near-duplicate בין פסקים שונים (pgvector `<=>` exact) עם איתות לקסיקלי (Jaccard/Levenshtein) ומדווח ל-CSV ב-`data/audit/` לסקירת היו"ר. לא מדלג/ממזג/מוחק. `--include-pending`. רץ עם venv של mcp-server. אומת: 819 הלכות → 5 זוגות מועמדים | ידני — דוח-סקירה |
| `halacha_batch_reconcile.py` | python | **#82.7** — dedup חוצה-פסקים offline (שמרני, **dry-run בלבד**). dedup-on-insert משווה רק תוך-פסק; כאן סף מחמיר (cosine ≥0.95, `--cosine`) ולא-הרסני: מאתר זוגות הלכות near-duplicate בין פסקים שונים (pgvector `<=>` exact) עם איתות לקסיקלי (Jaccard/Levenshtein) ומדווח ל-CSV ב-`data/audit/` לסקירת היו"ר. לא מדלג/ממזג/מוחק. `--include-pending`. **`--link`** רושם את הזוגות שנמצאו כ-`equivalent_halachot` (parallel authority, #84.2 — קישור-מקביל ברמת-הלכה, **לא** ציטוט; idempotent, לא-הרסני). רץ עם venv של mcp-server. אומת: 800 הלכות → 5 זוגות (קושרו). | ידני — דוח-סקירה / `--link` לקישור |
| `calibrate_halacha_dedup.py` | python | **#82.1** — כיול ספי ה-dedup הלקסיקלי (#82.3) מול gold-set הניקוי. קורא `halacha-cleanup-manifest-*.csv` (זוגות duplicate↔survivor מתויגי-אדם), טוען טקסט-survivor מה-DB, ו-sweep של (jaccard_min × levenshtein_min) עם P/R/F1, מסמן את נקודת-העבודה המוגדרת. אימת ש-(0.55, 0.70) → **precision 1.0** (אפס false-merge), recall 0.30 — מתאים לאיתות-משני שחוסם auto-approve. `--manifest <path>`. רץ עם venv של mcp-server | חד-פעמי — כיול (בוצע 2026-06-06) |
| `audit_corpus_integrity.py` | python | בדיקה תקופתית של עקביות הקורפוס — 3 בדיקות SQL read-only על `case_law` ו-`cases`: (A) `external_upload` עם prefix פנימי `ערר`/`בל"מ`; (B) `internal_committee` חסר `chair_name`/`district`; (C) `cases.practice_area` מחוץ ל-{`rishuy_uvniya`, `betterment_levy`, `compensation_197`, `''`}. כותב log מצטבר ל-`data/logs/corpus_integrity_audit.log` ובמצב הפרות שולח wakeup ל-CEO ב-Paperclip (best-effort, רק אם `PAPERCLIP_API_URL`+`PAPERCLIP_API_KEY` מוגדרים). דגל: `--no-notify`. Idempotent, יוצא 0. **Cron יומי 07:00**: `0 7 * * * /home/chaim/legal-ai/mcp-server/.venv/bin/python /home/chaim/legal-ai/scripts/audit_corpus_integrity.py` | `0 7 * * *` (cron) |
| `backfill_legal_arguments.py` | python | Backfill `legal_arguments` לתיקים עם `claims` קיימים (TaskMaster #36). מקבץ פרופוזיציות גולמיות לטיעונים משפטיים מובחנים (~6-12 לכל צד) דרך `argument_aggregator.aggregate_claims_to_arguments` (Claude CLI). תומך `--dry-run`/`--apply`/`--force`/`--case <num>...`. **חייב לרוץ מהמכונה המקומית** (לא קונטיינר) — `claude_session` דורש Claude CLI | ידני per-case (`python scripts/backfill_legal_arguments.py --apply --case 1017-03-26`) |

View File

@@ -91,7 +91,22 @@ async def main(args: argparse.Namespace) -> int:
w = csv.DictWriter(f, fieldnames=list(pairs[0].keys()))
w.writeheader()
w.writerows(pairs)
print(f"\nreport: {out} (review-only — nothing changed)", flush=True)
print(f"\nreport: {out}", flush=True)
if args.link and pairs:
# #84.2 — record each pair as parallel authority (equivalent_halachot).
# Non-destructive: links only, never merges/deletes. Idempotent.
linked = 0
for p in pairs:
if await db.link_equivalent_halachot(
p["id_a"], p["id_b"], cosine=p["cosine"],
note="cross-precedent parallel authority (halacha_batch_reconcile)",
created_by="batch_reconcile",
):
linked += 1
print(f"linked {linked}/{len(pairs)} pairs as equivalent_halachot", flush=True)
elif pairs:
print("(review-only — pass --link to record them as equivalent_halachot)", flush=True)
return 0
@@ -102,5 +117,7 @@ if __name__ == "__main__":
help="min cosine for a cross-precedent candidate (default 0.95)")
ap.add_argument("--include-pending", action="store_true",
help="also scan pending_review halachot (default: approved/published only)")
ap.add_argument("--link", action="store_true",
help="record found pairs as equivalent_halachot (parallel authority, #84.2)")
args = ap.parse_args()
sys.exit(asyncio.run(main(args)))

View File

@@ -68,7 +68,9 @@ function HalachaCard({
onSave: (patch: Partial<EditState>) => Promise<void>;
}) {
const variants = h.variants ?? [];
const equivalents = h.equivalents ?? [];
const [showVariants, setShowVariants] = useState(false);
const [showEquiv, setShowEquiv] = useState(false);
const [editing, setEditing] = useState(false);
const [draft, setDraft] = useState<EditState>({
rule_statement: h.rule_statement,
@@ -122,6 +124,12 @@ function HalachaCard({
+{variants.length} וריאנטים
</Badge>
)}
{equivalents.length > 0 && (
<Badge variant="outline"
className="text-[0.65rem] bg-gold-wash text-gold-deep border-gold/40">
עיקרון מקביל ב-{equivalents.length}
</Badge>
)}
<CorroborationBadge halacha={h} />
</span>
</div>
@@ -220,6 +228,38 @@ function HalachaCard({
</div>
)}
{equivalents.length > 0 && (
<div className="rounded-md border border-gold/30 bg-gold-wash/40">
<button
type="button"
onClick={() => setShowEquiv((v) => !v)}
className="w-full flex items-center gap-2 px-3 py-2 text-[0.72rem] text-gold-deep hover:bg-gold-wash/70 transition-colors"
aria-expanded={showEquiv}
>
{showEquiv ? <ChevronDown className="w-3.5 h-3.5" /> : <ChevronLeft className="w-3.5 h-3.5" />}
<span className="font-medium">
עיקרון מקביל ב-{equivalents.length} החלטות אחרות (אסמכתה מקבילה)
</span>
<span className="me-auto text-ink-muted">לא ציטוט הישנות עצמאית</span>
</button>
{showEquiv && (
<ul className="px-4 pb-3 pt-1 space-y-2">
{equivalents.map((e) => (
<li key={e.halacha_id} className="text-[0.78rem] text-ink-soft leading-relaxed border-r-2 border-gold/30 pr-3" dir="rtl">
<span className="font-semibold text-navy">{cleanCitation(e.case_number)}</span>
{" — "}{e.rule_statement}
{e.cosine != null && (
<span className="text-[0.65rem] text-ink-muted tabular-nums ms-2">
(דמיון {e.cosine.toFixed(2)})
</span>
)}
</li>
))}
</ul>
)}
</div>
)}
<div className="flex items-center gap-2 justify-end pt-1 border-t border-rule-soft">
{editing ? (
<>

View File

@@ -97,6 +97,15 @@ export type Halacha = {
* UI collapses them into one review card. cluster_size === 1 → singleton. */
cluster_id?: string;
cluster_size?: number;
/* #84.2 parallel authority (present only when fetched with include_equivalents):
* the SAME principle stated independently in OTHER precedents — recurrence, not
* citation (distinct from corroboration_count). */
equivalents?: {
halacha_id: string;
case_number: string;
rule_statement: string;
cosine: number | null;
}[];
};
export type RelatedCase = {
@@ -584,7 +593,7 @@ export function useHalachotPending(
const qs = needsFix
? `review_status=pending_review&exclude_low_quality=false&limit=${limit}`
: `review_status=pending_review&exclude_low_quality=true`
+ `&order_by_priority=true&cluster=true&limit=${limit}`;
+ `&order_by_priority=true&cluster=true&include_equivalents=true&limit=${limit}`;
return useQuery({
queryKey: [...libraryKeys.halachotPending(), needsFix ? "needsfix" : "clean"],
queryFn: async ({ signal }) => {

View File

@@ -6034,10 +6034,12 @@ async def halachot_list(
exclude_low_quality: bool = False,
order_by_priority: bool = False,
cluster: bool = False,
include_equivalents: bool = False,
):
"""List halachot. ``exclude_low_quality`` hides flagged items (#84.1),
``order_by_priority`` switches to the active-learning order (#84.3), and
``cluster`` annotates near-duplicate groups for one-card review (#84.2). All
``order_by_priority`` switches to the active-learning order (#84.3),
``cluster`` annotates near-duplicate groups for one-card review (#84.2), and
``include_equivalents`` attaches cross-precedent parallel-authority links. All
default off so existing callers are unaffected; the review queue opts in."""
cid: UUID | None = None
if case_law_id:
@@ -6053,10 +6055,50 @@ async def halachot_list(
exclude_low_quality=exclude_low_quality,
order_by_priority=order_by_priority,
cluster=cluster,
include_equivalents=include_equivalents,
)
return {"items": rows, "count": len(rows)}
class EquivalentLinkRequest(BaseModel):
other_id: str
note: str = ""
@app.get("/api/halachot/{halacha_id}/equivalents")
async def halacha_equivalents_list(halacha_id: str):
"""Cross-precedent parallel-authority links for a halacha (#84.2)."""
try:
hid = UUID(halacha_id)
except ValueError:
raise HTTPException(400, "halacha_id לא תקין")
return {"items": await db.list_equivalent_for_halacha(hid)}
@app.post("/api/halachot/{halacha_id}/equivalents")
async def halacha_equivalents_link(halacha_id: str, req: EquivalentLinkRequest):
"""Chair links two halachot as the same principle across precedents (#84.2)."""
try:
hid = UUID(halacha_id)
oid = UUID(req.other_id)
except ValueError:
raise HTTPException(400, "מזהה הלכה לא תקין")
ok = await db.link_equivalent_halachot(hid, oid, note=req.note, created_by="chair")
if not ok:
raise HTTPException(
400, "לא ניתן לקשר — אותה הלכה או שתי הלכות מאותו פסק (קישור-מקביל הוא חוצה-פסקים)")
return {"ok": True}
@app.delete("/api/halachot/{halacha_id}/equivalents/{other_id}")
async def halacha_equivalents_unlink(halacha_id: str, other_id: str):
try:
hid, oid = UUID(halacha_id), UUID(other_id)
except ValueError:
raise HTTPException(400, "מזהה הלכה לא תקין")
return {"ok": await db.unlink_equivalent_halachot(hid, oid)}
@app.patch("/api/halachot/{halacha_id}")
async def halacha_update(halacha_id: str, req: HalachaUpdateRequest):
"""Approve / reject / edit a halacha. Used by the chair review queue."""