feat(learning): חיווט אוטונומי לכפתורי מסלול-הסופי — סקריפט-תזמור אחד לכל שלב
הכפתורים "הרץ למידת-קול"/"הרץ אימות-הלכות" מעירים את הרמס, ובמקום שהסוכן (DeepSeek) ירכיב כמה קריאות-כלי (שביר), הוא מריץ עכשיו פקודה דטרמיניסטית אחת. חדש: - scripts/final_learning_pipeline.py — (1) ingest_final_version עם נתיב-הסופי (מדלג אם הזוג כבר analyzed; --force לחידוש), (2) רישום לקורפוס-הסגנון (idempotent — סוגר את הפער שפאנל-הסגנון דרש corpus_id), (3) style_lesson_panel --apply. --dry-run להרצה בטוחה. - scripts/final_halacha_pipeline.py — extract_internal_citations → corroboration.build_all → halacha_panel_approve --apply. --dry-run / --limit. briefs הרמס (web/paperclip_client._curator_task_brief) פושטו לפקודה-אחת לכל task — חסין מול הרצת-סוכן. תוקנו שני הפערים שזוהו: ingest דרש file_path, ופאנל-הסגנון דרש style_corpus. נלווה: תיקון help מיושן של halacha_panel_approve (--apply מחווט). SCRIPTS.md. אומת: שני ה-pipelines רצו dry-run על בל"מ 8126-03-25 (skip-ingest, קורפוס, פאנלים) בהצלחה. Invariants: INV-LRN1/LRN5/G10 (הפיך, שער-יו"ר ידני נשמר), INV-DM7. G2 — תזמור של יכולות קיימות, לא מסלול-מקביל. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
140
scripts/final_learning_pipeline.py
Normal file
140
scripts/final_learning_pipeline.py
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python3
|
||||
"""One-shot LOCAL pipeline for the 'run-learning' button (voice learning).
|
||||
|
||||
The container can't run the LLM steps (claude/DeepSeek/Gemini keys are local), so
|
||||
the /api/cases/{case}/final/run-learning endpoint wakes the Hermes curator, which
|
||||
runs THIS single deterministic command. Collapsing the flow into one script (rather
|
||||
than asking the agent to assemble several tool calls) makes the autonomous path
|
||||
reliable.
|
||||
|
||||
Steps:
|
||||
[1] ingest_final_version(case, file_path) → Opus distils draft↔final into
|
||||
draft_final_pairs.analysis (status→analyzed). INV-LRN5 separates style↔substance.
|
||||
[2] enroll the final into style_corpus (idempotent) so lessons have a corpus_id.
|
||||
[3] style_lesson_panel --apply → DeepSeek+Gemini vote per style lesson; 2/2-keep →
|
||||
decision_lesson (source=panel:deepseek+gemini); split → chair (INV-G10).
|
||||
|
||||
The fold into SKILL.md / legal-decision-lessons.md stays a manual chair gate.
|
||||
Local-only. Idempotent — safe to re-run.
|
||||
|
||||
cd ~/legal-ai/mcp-server
|
||||
.venv/bin/python ../scripts/final_learning_pipeline.py --case 8126-03-25
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from argparse import Namespace
|
||||
from pathlib import Path
|
||||
|
||||
# scripts/ is not a package — make style_lesson_panel importable.
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
|
||||
from legal_mcp import config # noqa: E402
|
||||
from legal_mcp.services import db # noqa: E402
|
||||
from legal_mcp.tools.documents import document_upload_training # noqa: E402
|
||||
from legal_mcp.tools.workflow import ingest_final_version # noqa: E402
|
||||
|
||||
|
||||
def _resolve_final_path(case_number: str) -> str | None:
|
||||
"""The canonical final saved by /final/upload, with a graceful fallback."""
|
||||
export_dir = config.find_case_dir(case_number) / "exports"
|
||||
canonical = export_dir / f"סופי-{case_number}.docx"
|
||||
if canonical.exists():
|
||||
return str(canonical)
|
||||
cands = sorted(export_dir.glob("סופי-*.docx"))
|
||||
return str(cands[0]) if cands else None
|
||||
|
||||
|
||||
async def _has_style_corpus(decision_number: str) -> bool:
|
||||
pool = await db.get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"SELECT 1 FROM style_corpus WHERE decision_number = $1 LIMIT 1",
|
||||
decision_number,
|
||||
)
|
||||
return bool(row)
|
||||
|
||||
|
||||
async def _latest_pair_status(case_id) -> str | None:
|
||||
pool = await db.get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
return await conn.fetchval(
|
||||
"SELECT status FROM draft_final_pairs WHERE case_id = $1 "
|
||||
"ORDER BY created_at DESC LIMIT 1",
|
||||
case_id,
|
||||
)
|
||||
|
||||
|
||||
async def main(args: argparse.Namespace) -> int:
|
||||
case_number = args.case
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
print(f"✗ תיק {case_number} לא נמצא")
|
||||
return 1
|
||||
|
||||
final_path = _resolve_final_path(case_number)
|
||||
if not final_path:
|
||||
print(f"✗ לא נמצא קובץ סופי ל-{case_number} (העלה דרך 'העלאת החלטה סופית של היו\"ר')")
|
||||
return 1
|
||||
print(f"final: {final_path}\n")
|
||||
|
||||
# [1] distillation (Opus) — skip if already analyzed (idempotent; --force to redo)
|
||||
status = await _latest_pair_status(case["id"])
|
||||
if status == "analyzed" and not args.force:
|
||||
print(f"[1/3] ingest_final_version — דולג (הזוג כבר analyzed; --force לחידוש)")
|
||||
else:
|
||||
print("[1/3] ingest_final_version — דיסטילציית טיוטה↔סופי…", flush=True)
|
||||
raw = await ingest_final_version(case_number, file_path=final_path)
|
||||
try:
|
||||
env = json.loads(raw)
|
||||
if env.get("status") == "error":
|
||||
print(f" ✗ {env.get('message')}")
|
||||
return 1
|
||||
d = env.get("data", {})
|
||||
ds = d.get("diff_stats", {})
|
||||
print(f" ✓ change {ds.get('change_percent')}% · lessons {d.get('lessons_count')} "
|
||||
f"· new_expr {d.get('new_expressions')}")
|
||||
except Exception:
|
||||
print(f" (ingest returned: {raw[:200]})")
|
||||
|
||||
# [2] enroll into style_corpus (idempotent) — lessons need a corpus_id
|
||||
print("[2/3] רישום לקורפוס-הסגנון (idempotent)…", flush=True)
|
||||
if await _has_style_corpus(case_number):
|
||||
print(" ✓ כבר רשום בקורפוס-הסגנון")
|
||||
else:
|
||||
r = await document_upload_training(
|
||||
final_path,
|
||||
decision_number=case_number,
|
||||
title=f"החלטה סופית — {case.get('proceeding_type', '')} {case_number}".strip(),
|
||||
practice_area=case.get("practice_area") or "appeals_committee",
|
||||
appeal_subtype=case.get("appeal_subtype") or "",
|
||||
)
|
||||
try:
|
||||
print(f" ✓ corpus_id {json.loads(r).get('data', {}).get('corpus_id')}")
|
||||
except Exception:
|
||||
print(f" (training upload returned: {r[:160]})")
|
||||
|
||||
# [3] two-judge style panel (DeepSeek + Gemini)
|
||||
apply = not args.dry_run
|
||||
print(f"[3/3] פאנל-סגנון דו-סוכני (DeepSeek+Gemini) {'--apply' if apply else '(dry-run)'}…",
|
||||
flush=True)
|
||||
import style_lesson_panel as slp
|
||||
rc = await slp.main(Namespace(
|
||||
case=case_number, pair_id=None, apply=apply, limit=0, concurrency=4,
|
||||
))
|
||||
print("\n✓ pipeline-למידה הושלם" + (" (dry-run)" if args.dry_run else ""))
|
||||
return rc or 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ap = argparse.ArgumentParser(description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
ap.add_argument("--case", required=True, help="case_number, e.g. 8126-03-25")
|
||||
ap.add_argument("--dry-run", dest="dry_run", action="store_true",
|
||||
help="run the chain but the style panel in dry-run (no decision_lesson writes)")
|
||||
ap.add_argument("--force", action="store_true",
|
||||
help="re-run ingest_final_version even if the pair is already analyzed")
|
||||
raise SystemExit(asyncio.run(main(ap.parse_args())))
|
||||
Reference in New Issue
Block a user