Files
legal-ai/scripts/final_learning_pipeline.py
Chaim 0f0656ecca feat(learning): חיווט אוטונומי לכפתורי מסלול-הסופי — סקריפט-תזמור אחד לכל שלב
הכפתורים "הרץ למידת-קול"/"הרץ אימות-הלכות" מעירים את הרמס, ובמקום שהסוכן
(DeepSeek) ירכיב כמה קריאות-כלי (שביר), הוא מריץ עכשיו פקודה דטרמיניסטית אחת.

חדש:
- scripts/final_learning_pipeline.py — (1) ingest_final_version עם נתיב-הסופי
  (מדלג אם הזוג כבר analyzed; --force לחידוש), (2) רישום לקורפוס-הסגנון
  (idempotent — סוגר את הפער שפאנל-הסגנון דרש corpus_id), (3) style_lesson_panel
  --apply. --dry-run להרצה בטוחה.
- scripts/final_halacha_pipeline.py — extract_internal_citations →
  corroboration.build_all → halacha_panel_approve --apply. --dry-run / --limit.

briefs הרמס (web/paperclip_client._curator_task_brief) פושטו לפקודה-אחת לכל
task — חסין מול הרצת-סוכן. תוקנו שני הפערים שזוהו: ingest דרש file_path,
ופאנל-הסגנון דרש style_corpus.

נלווה: תיקון help מיושן של halacha_panel_approve (--apply מחווט). SCRIPTS.md.

אומת: שני ה-pipelines רצו dry-run על בל"מ 8126-03-25 (skip-ingest, קורפוס,
פאנלים) בהצלחה. Invariants: INV-LRN1/LRN5/G10 (הפיך, שער-יו"ר ידני נשמר),
INV-DM7. G2 — תזמור של יכולות קיימות, לא מסלול-מקביל.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 10:21:39 +00:00

141 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""One-shot LOCAL pipeline for the 'run-learning' button (voice learning).
The container can't run the LLM steps (claude/DeepSeek/Gemini keys are local), so
the /api/cases/{case}/final/run-learning endpoint wakes the Hermes curator, which
runs THIS single deterministic command. Collapsing the flow into one script (rather
than asking the agent to assemble several tool calls) makes the autonomous path
reliable.
Steps:
[1] ingest_final_version(case, file_path) → Opus distils draft↔final into
draft_final_pairs.analysis (status→analyzed). INV-LRN5 separates style↔substance.
[2] enroll the final into style_corpus (idempotent) so lessons have a corpus_id.
[3] style_lesson_panel --apply → DeepSeek+Gemini vote per style lesson; 2/2-keep →
decision_lesson (source=panel:deepseek+gemini); split → chair (INV-G10).
The fold into SKILL.md / legal-decision-lessons.md stays a manual chair gate.
Local-only. Idempotent — safe to re-run.
cd ~/legal-ai/mcp-server
.venv/bin/python ../scripts/final_learning_pipeline.py --case 8126-03-25
"""
from __future__ import annotations
import argparse
import asyncio
import json
import sys
from argparse import Namespace
from pathlib import Path
# scripts/ is not a package — make style_lesson_panel importable.
sys.path.insert(0, str(Path(__file__).resolve().parent))
from legal_mcp import config # noqa: E402
from legal_mcp.services import db # noqa: E402
from legal_mcp.tools.documents import document_upload_training # noqa: E402
from legal_mcp.tools.workflow import ingest_final_version # noqa: E402
def _resolve_final_path(case_number: str) -> str | None:
"""The canonical final saved by /final/upload, with a graceful fallback."""
export_dir = config.find_case_dir(case_number) / "exports"
canonical = export_dir / f"סופי-{case_number}.docx"
if canonical.exists():
return str(canonical)
cands = sorted(export_dir.glob("סופי-*.docx"))
return str(cands[0]) if cands else None
async def _has_style_corpus(decision_number: str) -> bool:
pool = await db.get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow(
"SELECT 1 FROM style_corpus WHERE decision_number = $1 LIMIT 1",
decision_number,
)
return bool(row)
async def _latest_pair_status(case_id) -> str | None:
pool = await db.get_pool()
async with pool.acquire() as conn:
return await conn.fetchval(
"SELECT status FROM draft_final_pairs WHERE case_id = $1 "
"ORDER BY created_at DESC LIMIT 1",
case_id,
)
async def main(args: argparse.Namespace) -> int:
case_number = args.case
case = await db.get_case_by_number(case_number)
if not case:
print(f"✗ תיק {case_number} לא נמצא")
return 1
final_path = _resolve_final_path(case_number)
if not final_path:
print(f"✗ לא נמצא קובץ סופי ל-{case_number} (העלה דרך 'העלאת החלטה סופית של היו\"ר')")
return 1
print(f"final: {final_path}\n")
# [1] distillation (Opus) — skip if already analyzed (idempotent; --force to redo)
status = await _latest_pair_status(case["id"])
if status == "analyzed" and not args.force:
print(f"[1/3] ingest_final_version — דולג (הזוג כבר analyzed; --force לחידוש)")
else:
print("[1/3] ingest_final_version — דיסטילציית טיוטה↔סופי…", flush=True)
raw = await ingest_final_version(case_number, file_path=final_path)
try:
env = json.loads(raw)
if env.get("status") == "error":
print(f"{env.get('message')}")
return 1
d = env.get("data", {})
ds = d.get("diff_stats", {})
print(f" ✓ change {ds.get('change_percent')}% · lessons {d.get('lessons_count')} "
f"· new_expr {d.get('new_expressions')}")
except Exception:
print(f" (ingest returned: {raw[:200]})")
# [2] enroll into style_corpus (idempotent) — lessons need a corpus_id
print("[2/3] רישום לקורפוס-הסגנון (idempotent)…", flush=True)
if await _has_style_corpus(case_number):
print(" ✓ כבר רשום בקורפוס-הסגנון")
else:
r = await document_upload_training(
final_path,
decision_number=case_number,
title=f"החלטה סופית — {case.get('proceeding_type', '')} {case_number}".strip(),
practice_area=case.get("practice_area") or "appeals_committee",
appeal_subtype=case.get("appeal_subtype") or "",
)
try:
print(f" ✓ corpus_id {json.loads(r).get('data', {}).get('corpus_id')}")
except Exception:
print(f" (training upload returned: {r[:160]})")
# [3] two-judge style panel (DeepSeek + Gemini)
apply = not args.dry_run
print(f"[3/3] פאנל-סגנון דו-סוכני (DeepSeek+Gemini) {'--apply' if apply else '(dry-run)'}",
flush=True)
import style_lesson_panel as slp
rc = await slp.main(Namespace(
case=case_number, pair_id=None, apply=apply, limit=0, concurrency=4,
))
print("\n✓ pipeline-למידה הושלם" + (" (dry-run)" if args.dry_run else ""))
return rc or 0
if __name__ == "__main__":
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument("--case", required=True, help="case_number, e.g. 8126-03-25")
ap.add_argument("--dry-run", dest="dry_run", action="store_true",
help="run the chain but the style panel in dry-run (no decision_lesson writes)")
ap.add_argument("--force", action="store_true",
help="re-run ingest_final_version even if the pair is already analyzed")
raise SystemExit(asyncio.run(main(ap.parse_args())))