Files
legal-ai/scripts/final_halacha_pipeline.py
Chaim 2f43960353 feat(learning): מטא-דאטה מלא להחלטות-פנימיות בקליטה + חילוץ-הלכות אוטומטי
סוגר את הפער שעלה על בל"מ 8126: החלטה שנכנסה לספריית-הפסיקה הופיעה ללא
מטא-דאטה (summary/citation/date ריקים, proceeding_type שגוי) כי מחלץ-ה-Gemini
מיועד לפסיקה חיצונית ומחזיר no_metadata לפנימיות, והחילוץ-הלכות נשאר pending.

web/app.py — `_enroll_final_in_library` עכשיו ממלא **דטרמיניסטית** מהתיק (בלי LLM):
- proceeding_type (מהתיק — בל"מ/ערר, גם idempotency key נכון מהקליטה הראשונה),
  decision_date (fallback ל-hearing_date), subject_tags, summary (=subject).
- `citation_formatted` נבנה דטרמיניסטית (`_build_internal_citation`):
  'ועדת ערר ... בל"מ <num> <עורר> נ' <משיב> (יו"ר עו"ד <chair>)'.

scripts/final_halacha_pipeline.py — שלב [0] חדש: `precedent_extract_halachot`
על ההחלטה עצמה (idempotent — מדלג כש-completed/dry-run), כך שהלכות-ההחלטה
לא נשארות pending.

אומת: py_compile ✓ · ה-pipeline רץ dry-run נקי (4 שלבים). 8126 כבר תוקן ידנית;
מכאן זה אוטומטי לכל החלטה. Invariants: INV-LRN4/X11 · G1 (נרמול-במקור) ·
DM7 · feedback_silent_swallow.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 15:19:14 +00:00

121 lines
5.5 KiB
Python

#!/usr/bin/env python3
"""One-shot LOCAL pipeline for the 'run-halacha' button (halacha validation).
The /api/cases/{case}/final/run-halacha endpoint wakes the Hermes curator, which
runs THIS single deterministic command (the 3-judge panel uses local DeepSeek+Gemini
keys + the local claude CLI, so it can't run inside the container).
Steps:
[0] precedent_extract_halachot → extract the halachot the DECISION ITSELF states
(its own case_law row), so they aren't left pending. Idempotent.
[1] extract_internal_citations(chair) → links the citation graph for the chair's
decisions (idempotent; ON CONFLICT DO NOTHING).
[2] corroboration_rebuild → builds the citation-treatment signal and applies the
corroborated→approved / overruled→pending policy (X11 Phase 2).
[3] halacha_panel_approve --apply → 3 judges (Opus+DeepSeek+Gemini); agreement
auto-approves/rejects (reversible, CSV-backed); splits/defects → chair (INV-G10).
NB: per-precedent halacha extraction for newly-cited precedents is NOT automated here
(it needs each cited precedent to be in the library with a known case_law_id) — the
chair drives that from /precedents when a missing precedent is added.
Local-only. Idempotent. The panel pass over the full pending queue can take minutes.
cd ~/legal-ai/mcp-server
.venv/bin/python ../scripts/final_halacha_pipeline.py --case 8126-03-25
"""
from __future__ import annotations
import argparse
import asyncio
import json
import sys
from argparse import Namespace
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
from legal_mcp.services import corroboration, db # noqa: E402
from legal_mcp.tools.citations import extract_internal_citations # noqa: E402
from legal_mcp.tools.precedent_library import precedent_extract_halachot # noqa: E402
async def _decision_law_row(case_number: str) -> dict | None:
"""The case's own decision row in case_law (internal_committee), if enrolled."""
pool = await db.get_pool()
async with pool.acquire() as conn:
r = await conn.fetchrow(
"SELECT id, halacha_extraction_status FROM case_law WHERE case_number = $1 "
"AND source_kind = 'internal_committee' ORDER BY created_at DESC LIMIT 1",
case_number,
)
return dict(r) if r else None
async def main(args: argparse.Namespace) -> int:
case_number = args.case
case = await db.get_case_by_number(case_number)
if not case:
print(f"✗ תיק {case_number} לא נמצא")
return 1
chair = case.get("chair_name") or "דפנה תמיר"
# [0] extract the halachot the decision ITSELF states (its own row in case_law) —
# so they are not left pending. Idempotent: skip when already completed or on dry-run.
row = await _decision_law_row(case_number)
if not row:
print(f"[0/4] ההחלטה {case_number} אינה ב-case_law עדיין — דילוג על חילוץ-הלכות")
elif row.get("halacha_extraction_status") == "completed":
print(f"[0/4] חילוץ-הלכות מההחלטה — דולג (כבר completed)")
elif args.dry_run:
print(f"[0/4] חילוץ-הלכות מההחלטה — מדולג (dry-run)")
else:
print(f"[0/4] precedent_extract_halachot (החלטה {case_number})…", flush=True)
try:
raw0 = await precedent_extract_halachot(str(row["id"]))
d0 = json.loads(raw0).get("data", {})
print(f" ✓ status={d0.get('status')} stored={d0.get('stored', d0.get('extracted'))}")
except Exception as e:
print(f" ⚠ halacha extraction failed (non-fatal): {e}")
# [1] citation graph
print(f"[1/4] extract_internal_citations (chair={chair})…", flush=True)
raw = await extract_internal_citations(chair_name=chair, limit=0)
try:
d = json.loads(raw).get("data", {})
print(f" ✓ extracted {d.get('extracted')} · linked {d.get('linked')} "
f"· new {d.get('new')}")
except Exception:
print(f" (citations returned: {str(raw)[:160]})")
# [2] corroboration signal + policy (whole corpus backfill) — skipped on dry-run
if args.dry_run:
print("[2/4] corroboration_rebuild — מדולג (dry-run)")
else:
print("[2/4] corroboration_rebuild (backfill)…", flush=True)
try:
cr = await corroboration.build_all()
print(f"{cr}")
except Exception as e:
print(f" ⚠ corroboration failed (non-fatal): {e}")
# [3] three-judge halacha panel
apply = not args.dry_run
print(f"[3/4] halacha_panel_approve {'--apply' if apply else '(dry-run)'} "
f"(Opus+DeepSeek+Gemini)…", flush=True)
import halacha_panel_approve as hpa
rc = await hpa.main(Namespace(limit=args.limit, concurrency=6, apply=apply))
print("\n✓ pipeline-אימות-הלכות הושלם" + (" (dry-run)" if args.dry_run else ""))
return rc or 0
if __name__ == "__main__":
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument("--case", required=True, help="case_number, e.g. 8126-03-25")
ap.add_argument("--limit", type=int, default=0,
help="cap pending halachot judged (0 = full queue)")
ap.add_argument("--dry-run", dest="dry_run", action="store_true",
help="citations only; skip corroboration writes; panel in dry-run")
raise SystemExit(asyncio.run(main(ap.parse_args())))