Merge pull request 'feat(pipeline): עמידות (LangGraph) ל-final_halacha (P0, X16/INV-DUR1, #114)' (#178) from worktree-langgraph-durable-pipeline into main
This commit was merged in pull request #178.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,6 +6,7 @@ data/backups/
|
||||
data/precedent-library/
|
||||
data/.auto-sync.log
|
||||
data/*.db
|
||||
data/checkpoints/ # X16 durable-pipeline SQLite checkpoints (runtime artifact)
|
||||
*.bak-pre-*
|
||||
mcp-server/.venv/
|
||||
__pycache__/
|
||||
|
||||
@@ -35,6 +35,16 @@ court-fetch = [
|
||||
"faster-whisper>=1.0.0",
|
||||
"h2>=4.0.0", # Tier-0 supremedecisions uses httpx http2
|
||||
]
|
||||
# Durable execution for the local one-shot pipelines (X16 / INV-DUR1) —
|
||||
# final_halacha_pipeline / final_learning_pipeline gain crash/OOM resume via
|
||||
# scripts/_pipeline_runtime.py. HOST-ONLY (the pipelines run locally, not in the
|
||||
# container): install on the host venv with `pip install -e ".[durable]"`. The
|
||||
# runtime degrades gracefully to linear execution when these are absent, so the
|
||||
# run-halacha / run-learning buttons keep working until then.
|
||||
durable = [
|
||||
"langgraph>=1.0,<2.0",
|
||||
"langgraph-checkpoint-sqlite>=3.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=68.0"]
|
||||
|
||||
80
mcp-server/tests/test_pipeline_runtime.py
Normal file
80
mcp-server/tests/test_pipeline_runtime.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""Tests for the durable pipeline runtime (scripts/_pipeline_runtime.py / X16).
|
||||
|
||||
The LINEAR fallback is tested unconditionally. The DURABLE (LangGraph) path —
|
||||
crash-then-resume and --fresh — is tested only where ``langgraph`` is installed
|
||||
(``importorskip``), so the suite still passes in a venv without it (the runtime
|
||||
itself degrades gracefully there too).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Load scripts/_pipeline_runtime.py (scripts/ is not a package).
|
||||
_RT = Path(__file__).resolve().parents[2] / "scripts" / "_pipeline_runtime.py"
|
||||
_spec = importlib.util.spec_from_file_location("_pipeline_runtime", _RT)
|
||||
rt = importlib.util.module_from_spec(_spec)
|
||||
sys.modules["_pipeline_runtime"] = rt
|
||||
_spec.loader.exec_module(rt) # type: ignore[union-attr]
|
||||
|
||||
|
||||
def _counting_steps(fail_step2_once: bool):
|
||||
"""4 steps; each records how many times it actually ran. s2 can fail once."""
|
||||
runs = {"s1": 0, "s2": 0, "s3": 0, "s4": 0}
|
||||
state = {"s2_failed": False}
|
||||
|
||||
def mk(name: str, fail: bool = False) -> rt.Step:
|
||||
async def run(results: dict) -> dict:
|
||||
if fail and not state["s2_failed"]:
|
||||
state["s2_failed"] = True
|
||||
raise RuntimeError(f"{name} simulated crash")
|
||||
runs[name] += 1
|
||||
return {name: "ok"}
|
||||
return rt.Step(name, run)
|
||||
|
||||
steps = [mk("s1"), mk("s2", fail_step2_once), mk("s3"), mk("s4")]
|
||||
return steps, runs
|
||||
|
||||
|
||||
def test_linear_fallback_runs_all_steps() -> None:
|
||||
steps, runs = _counting_steps(fail_step2_once=False)
|
||||
out = asyncio.run(rt._run_linear(steps))
|
||||
assert out == {"s1": "ok", "s2": "ok", "s3": "ok", "s4": "ok"}
|
||||
assert all(runs[s] == 1 for s in runs)
|
||||
|
||||
|
||||
def test_resume_skips_completed_steps(tmp_path: Path) -> None:
|
||||
pytest.importorskip("langgraph")
|
||||
db = tmp_path / "rt.sqlite"
|
||||
steps, runs = _counting_steps(fail_step2_once=True)
|
||||
tid = "halacha:RESUME-TEST"
|
||||
|
||||
# Run 1: s2 crashes — s1 ran and is checkpointed.
|
||||
with pytest.raises(RuntimeError):
|
||||
asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db))
|
||||
assert runs == {"s1": 1, "s2": 0, "s3": 0, "s4": 0}
|
||||
|
||||
# Run 2: resume — s1 is NOT re-run; s2/s3/s4 complete.
|
||||
out = asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db))
|
||||
assert out == {"s1": "ok", "s2": "ok", "s3": "ok", "s4": "ok"}
|
||||
assert runs["s1"] == 1, "completed step s1 must NOT re-run on resume"
|
||||
assert runs["s2"] == 1 and runs["s3"] == 1 and runs["s4"] == 1
|
||||
|
||||
|
||||
def test_fresh_reruns_all_after_completion(tmp_path: Path) -> None:
|
||||
pytest.importorskip("langgraph")
|
||||
db = tmp_path / "rt2.sqlite"
|
||||
steps, runs = _counting_steps(fail_step2_once=False)
|
||||
tid = "halacha:FRESH-TEST"
|
||||
|
||||
asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db))
|
||||
assert all(runs[s] == 1 for s in runs)
|
||||
|
||||
# fresh=True clears the completed checkpoint and runs everything again.
|
||||
asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db, fresh=True))
|
||||
assert all(runs[s] == 2 for s in runs), "fresh run must re-execute every step"
|
||||
@@ -53,7 +53,8 @@
|
||||
| `halacha_panel_approve.py` | python | **פאנל-אישור הלכות (Trust-or-Escalate, dry-run).** 3 שופטים בלתי-תלויי-לינאז' (Opus/claude_session · DeepSeek · Gemini-2.5-flash) מצביעים על ה**ציר-הגס האמין** (92% חוצה-מודלים): נקיות→"הלכה לשמירה?"; nli_unsupported→"הציטוט תומך בכלל?" (שיפוט-מחדש); פגומות→re-extraction. רק ורדיקט מוסכם פועל אוטומטית, **פיצול מסלים ליו"ר** (INV-G10). `--apply` **מחווט** (clean: רוב 2/3; nli: פה-אחד-entailed מנקה flag) — הפיך, מגבה ל-`data/audit/` קודם. מפתחות: DeepSeek מ-`~/.hermes/...`, Gemini מ-`~/.env`. **חובה מקומי**. dry-run 2026-06-07: 197→103 אוטו (פה-אחד) / ~15 (רוב). | ידני / שלב-אימות-הלכות במסלול-הסופי |
|
||||
| `style_lesson_panel.py` | python | **פאנל-סגנון דו-סוכני (למידה כפולה).** על-גבי דיסטילציית-ה-Opus (draft↔final ב-`draft_final_pairs.analysis`), שני שופטים בלתי-תלויים — DeepSeek + Gemini-2.5-flash — מצביעים לכל לקח על השאלה הגסה "האם זו הנחיית-סגנון מופשטת ובת-הכללה (INV-LRN5 — קול ולא מהות)?". הסכמה 2/2-keep → נכתב כ-`decision_lesson` (`source=panel:deepseek+gemini`); 2/2-drop → לא נכתב; פיצול/substance → מוסלם ליו"ר. `--apply` הפיך, מגבה ל-`data/audit/`. הטמעה ל-SKILL.md/lessons.md נשארת שער-יו"ר ידני (INV-G10). מפתחות כמו פאנל-ההלכות. **חובה מקומי**. `--case <num>` / `--pair-id <uuid>`. | שלב-למידה במסלול-הסופי |
|
||||
| `final_learning_pipeline.py` | python | **תזמור שלב-הלמידה (פקודה אחת).** מופעל ע"י הרמס כשלוחצים "הרץ למידת-קול" במסלול-הסופי. דטרמיניסטי: (1) `ingest_final_version` עם נתיב-הסופי, (2) רישום לקורפוס-הסגנון (idempotent), (3) `style_lesson_panel --apply`. מקפל את הזרימה לפקודה אחת כדי שהסוכן לא ירכיב כמה קריאות (חסין). idempotent. **חובה מקומי**. `--case <num>`. | אוטו (כפתור run-learning) / ידני |
|
||||
| `final_halacha_pipeline.py` | python | **תזמור שלב-אימות-ההלכות (פקודה אחת).** מופעל ע"י הרמס כשלוחצים "הרץ אימות-הלכות". דטרמיניסטי: (1) `extract_internal_citations(chair)`, (2) `corroboration.build_all()`, (3) `halacha_panel_approve --apply`. **חובה מקומי**. `--case <num>` / `--limit N` (תקרת תור). | אוטו (כפתור run-halacha) / ידני |
|
||||
| `final_halacha_pipeline.py` | python | **תזמור שלב-אימות-ההלכות (פקודה אחת).** מופעל ע"י הרמס כשלוחצים "הרץ אימות-הלכות". דטרמיניסטי: (0) `precedent_extract_halachot` (החלטה), (1) `extract_internal_citations(chair)`, (2) `corroboration.build_all()`, (3) `halacha_panel_approve --apply`. **עמיד (X16/INV-DUR1):** 4 הצעדים רצים דרך `_pipeline_runtime.py` עם checkpoint לכל תיק — קריסה בפאנל [3] ממשיכה מ-[3]. ברירת-מחדל auto-resume; `--fresh` ריצה נקייה. **חובה מקומי**. `--case <num>` / `--limit N` / `--fresh`. | אוטו (כפתור run-halacha) / ידני |
|
||||
| `_pipeline_runtime.py` | python | **runtime עמידות משותף (X16 / INV-DUR1)** ל-`final_halacha_pipeline` ו-`final_learning_pipeline` (מימוש אחד, G2). עוטף רשימת-צעדים async ב-LangGraph `StateGraph` ליניארי עם `AsyncSqliteSaver` (checkpoint לכל צעד; resume מדלג על צעדים שהושלמו). **degradation חיננית:** ללא langgraph (`pip install -e ".[durable]"`) — ריצה ליניארית כמו קודם (הכפתור לא נשבר). `Step(name, run)` + `run_pipeline(steps, thread_id, checkpoint_db, fresh)`. נבדק: `mcp-server/tests/test_pipeline_runtime.py`. | מיובא ע"י סקריפטי-המסלול-הסופי |
|
||||
| `curator_apply_pipeline_branch.py` | python | **מקור-אמת לחיווט-הכפתורים של הרמס.** prompt-ה-curator חי רק ב-Paperclip DB (`agents.adapter_config.promptTemplate`). הסקריפט מקדים branch כך שיקיצה עם reason `final_learning_*`/`final_halacha_*` מריצה את ה-pipeline המתאים (HOME/DOTENV/DATA_DIR מוחלטים → DeepSeek+Gemini keys + DATA_DIR נפתרים נכון) ועוצרת, אחרת §A/§B כרגיל. idempotent (מסיר branch קודם). מחיל על שני הסוכנים (CMP+CMPA). `--verify`. **להריץ אחרי reset/יצירה-מחדש של סוכן-curator.** | אחרי reset prompt של curator |
|
||||
| `halacha_panel_audit.py` | python | **רשת-ביטחון לפאנל** (selective-prediction monitoring) — דוגם הלכות שאושרו ע"י הפאנל (`reviewer LIKE 'panel:%'`), מריץ עליהן **שוב** את הצבעת-ה-KEEP של 3 השופטים, ומציף כל מקרה שכעת נוטה DROP (false-keep פוטנציאלי). report-only כברירת-מחדל; `--flag` מחזיר את ה-flips ל-`pending_review` לסקירת-יו"ר. `--sample N`/`--seed`. בסיס 2026-06-07: 0/15. מיועד להרצה תקופתית (שבועי). מייבא שופטים מ-`halacha_panel_approve`. **חובה מקומי**. | תקופתי (שבועי) — ניטור |
|
||||
| `halacha_panel_calibrate.py` | python | **כיול מדיניות-ההצבעה של הפאנל** (Trust-or-Escalate, ICLR 2025). מריץ את שאלת-ה-KEEP של `halacha_panel_approve` על מדגם-הזהב ומודד מול `is_holding` (הציר-הגס) precision+coverage לכל מדיניות (unanimous/majority) + ספירת false-keep/false-drop. נותן את **אחוז-הטעות בפועל** לבחירת סף-סיכון α. מייבא שופטים מ-`halacha_panel_approve` (מקור-אמת יחיד). read-only, **חובה מקומי**. | ידני — לפני חיווט `--apply` |
|
||||
|
||||
130
scripts/_pipeline_runtime.py
Normal file
130
scripts/_pipeline_runtime.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""Durable execution runtime for the local one-shot pipelines (INV-DUR1 / X16).
|
||||
|
||||
Wraps an ordered list of named async steps in a LangGraph linear ``StateGraph``
|
||||
with a SQLite checkpointer, so a crash / OOM / kill resumes from the last
|
||||
COMPLETED step instead of re-running the whole pipeline (idempotency makes a
|
||||
re-run *safe*; durability makes it *not pay twice*).
|
||||
|
||||
Shared by ``final_halacha_pipeline.py`` and ``final_learning_pipeline.py`` — one
|
||||
implementation, not one-per-script (G2).
|
||||
|
||||
Graceful degradation: if ``langgraph`` is not installed (e.g. the shared venv
|
||||
hasn't been updated yet), the steps run LINEARLY — exactly as before — with a
|
||||
warning. The production button (run-halacha / run-learning, driven by Hermes)
|
||||
never breaks waiting on the dependency; it simply gains durable resume once
|
||||
``langgraph`` + ``langgraph-checkpoint-sqlite`` are present.
|
||||
|
||||
Scope (X16 §1): LangGraph is used ONLY as the internal engine of these local
|
||||
scripts — never as an agent-platform orchestrator (that would create a parallel
|
||||
path to Paperclip, breaking G2/G12). HITL stays with the chair gates / Paperclip.
|
||||
|
||||
A "step" is ``Step(name, run)`` where ``run`` is an async callable taking the
|
||||
accumulated results dict and returning a dict to merge into it (typically
|
||||
``{<something>: <summary>}``). The step's real side-effects (DB writes, the LLM
|
||||
panel) happen inside ``run``; LangGraph checkpoints *that the node finished* so a
|
||||
resume skips it.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Any, Awaitable, Callable, TypedDict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
StepFn = Callable[[dict], Awaitable[dict]]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Step:
|
||||
name: str
|
||||
run: StepFn
|
||||
|
||||
|
||||
def _merge(a: dict, b: dict) -> dict:
|
||||
return {**a, **b}
|
||||
|
||||
|
||||
async def _run_linear(steps: list[Step]) -> dict:
|
||||
"""Fallback: run steps in order with no checkpointing (pre-X16 behaviour)."""
|
||||
results: dict[str, Any] = {}
|
||||
for step in steps:
|
||||
out = await step.run(results)
|
||||
if out:
|
||||
results.update(out)
|
||||
return results
|
||||
|
||||
|
||||
async def run_pipeline(
|
||||
steps: list[Step],
|
||||
*,
|
||||
thread_id: str,
|
||||
checkpoint_db: str | Path,
|
||||
resume: bool = True,
|
||||
fresh: bool = False,
|
||||
) -> dict:
|
||||
"""Run ``steps`` in order with durable checkpointing keyed by ``thread_id``.
|
||||
|
||||
* A brand-new ``thread_id`` (or ``fresh=True``) runs from the first step.
|
||||
* An INCOMPLETE thread (a prior run crashed mid-way) is RESUMED — completed
|
||||
steps are skipped, execution continues from the failed step.
|
||||
* A COMPLETED thread re-run (idempotent re-extraction) starts fresh — the
|
||||
stale checkpoint is cleared first so step-accumulators don't double-count.
|
||||
|
||||
Returns the accumulated results dict (``{step_name: <return>, ...}``).
|
||||
"""
|
||||
try:
|
||||
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
except Exception as e: # noqa: BLE001 — any import failure → safe linear path
|
||||
logger.warning(
|
||||
"langgraph unavailable (%s) — running %d steps LINEARLY without "
|
||||
"durable checkpointing (X16/INV-DUR1 inactive; install langgraph + "
|
||||
"langgraph-checkpoint-sqlite to enable resume).",
|
||||
e, len(steps),
|
||||
)
|
||||
return await _run_linear(steps)
|
||||
|
||||
class State(TypedDict):
|
||||
results: Annotated[dict, _merge]
|
||||
|
||||
def _make_node(step: Step):
|
||||
async def _node(state: State) -> dict:
|
||||
out = await step.run(state.get("results", {}))
|
||||
return {"results": out or {}}
|
||||
return _node
|
||||
|
||||
graph = StateGraph(State)
|
||||
prev = START
|
||||
for step in steps:
|
||||
graph.add_node(step.name, _make_node(step))
|
||||
graph.add_edge(prev, step.name)
|
||||
prev = step.name
|
||||
graph.add_edge(prev, END)
|
||||
|
||||
checkpoint_db = Path(checkpoint_db)
|
||||
checkpoint_db.parent.mkdir(parents=True, exist_ok=True)
|
||||
config = {"configurable": {"thread_id": thread_id}}
|
||||
|
||||
async with AsyncSqliteSaver.from_conn_string(str(checkpoint_db)) as saver:
|
||||
app = graph.compile(checkpointer=saver)
|
||||
snapshot = await app.aget_state(config)
|
||||
ran = (snapshot.values or {}).get("results", {}) if snapshot else {}
|
||||
incomplete = bool(ran) and tuple(snapshot.next or ()) != ()
|
||||
|
||||
if not fresh and incomplete:
|
||||
logger.info(
|
||||
"pipeline %s — resuming from %s (%d step(s) already done: %s)",
|
||||
thread_id, snapshot.next, len(ran), ", ".join(ran),
|
||||
)
|
||||
final = await app.ainvoke(None, config)
|
||||
else:
|
||||
if snapshot and (snapshot.values or {}):
|
||||
# stale/completed checkpoint — clear so this is a true fresh run.
|
||||
await saver.adelete_thread(thread_id)
|
||||
if fresh and ran:
|
||||
logger.info("pipeline %s — --fresh: cleared prior checkpoint", thread_id)
|
||||
final = await app.ainvoke({"results": {}}, config)
|
||||
|
||||
return (final or {}).get("results", {})
|
||||
@@ -21,8 +21,16 @@ chair drives that from /precedents when a missing precedent is added.
|
||||
|
||||
Local-only. Idempotent. The panel pass over the full pending queue can take minutes.
|
||||
|
||||
Durable (X16 / INV-DUR1): the 4 steps run through scripts/_pipeline_runtime.py
|
||||
with a SQLite checkpoint per case (data/checkpoints/halacha.sqlite). A crash/OOM
|
||||
in the long panel [3] RESUMES from [3] on the next run instead of re-paying
|
||||
[0]–[2]. Default = auto-resume an interrupted run; ``--fresh`` forces a clean run
|
||||
from [0]. Requires the host extra ``pip install -e ".[durable]"`` (mcp-server);
|
||||
without it the steps run linearly (same as before) — the button never breaks.
|
||||
|
||||
cd ~/legal-ai/mcp-server
|
||||
.venv/bin/python ../scripts/final_halacha_pipeline.py --case 8126-03-25
|
||||
.venv/bin/python ../scripts/final_halacha_pipeline.py --case 8126-03-25 --fresh
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -35,6 +43,8 @@ from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
|
||||
import _pipeline_runtime # noqa: E402 — durable runtime (X16); scripts/ on sys.path
|
||||
from legal_mcp import config # noqa: E402
|
||||
from legal_mcp.services import corroboration, db # noqa: E402
|
||||
from legal_mcp.tools.citations import extract_internal_citations # noqa: E402
|
||||
from legal_mcp.tools.precedent_library import precedent_extract_halachot # noqa: E402
|
||||
@@ -59,54 +69,89 @@ async def main(args: argparse.Namespace) -> int:
|
||||
print(f"✗ תיק {case_number} לא נמצא")
|
||||
return 1
|
||||
chair = case.get("chair_name") or "דפנה תמיר"
|
||||
|
||||
# [0] extract the halachot the decision ITSELF states (its own row in case_law) —
|
||||
# so they are not left pending. Idempotent: skip when already completed or on dry-run.
|
||||
row = await _decision_law_row(case_number)
|
||||
if not row:
|
||||
print(f"[0/4] ההחלטה {case_number} אינה ב-case_law עדיין — דילוג על חילוץ-הלכות")
|
||||
elif row.get("halacha_extraction_status") == "completed":
|
||||
print(f"[0/4] חילוץ-הלכות מההחלטה — דולג (כבר completed)")
|
||||
elif args.dry_run:
|
||||
print(f"[0/4] חילוץ-הלכות מההחלטה — מדולג (dry-run)")
|
||||
else:
|
||||
|
||||
# The 4 steps as durable nodes (X16 / INV-DUR1): each is checkpointed the
|
||||
# moment it finishes, so a crash/OOM in the long panel [3] resumes from [3]
|
||||
# instead of re-paying [0]–[2]. Steps [0] and [2] stay non-fatal (record the
|
||||
# error and continue); [1]/[3] may raise → the graph halts and the next run
|
||||
# resumes there. All steps are idempotent, so a fresh re-run is also safe.
|
||||
|
||||
async def step_extract(results: dict) -> dict:
|
||||
# [0] extract the halachot the decision ITSELF states (its own case_law row).
|
||||
if not row:
|
||||
print(f"[0/4] ההחלטה {case_number} אינה ב-case_law עדיין — דילוג על חילוץ-הלכות")
|
||||
return {"extract": "skipped:not-enrolled"}
|
||||
if row.get("halacha_extraction_status") == "completed":
|
||||
print("[0/4] חילוץ-הלכות מההחלטה — דולג (כבר completed)")
|
||||
return {"extract": "skipped:completed"}
|
||||
if args.dry_run:
|
||||
print("[0/4] חילוץ-הלכות מההחלטה — מדולג (dry-run)")
|
||||
return {"extract": "skipped:dry-run"}
|
||||
print(f"[0/4] precedent_extract_halachot (החלטה {case_number})…", flush=True)
|
||||
try:
|
||||
raw0 = await precedent_extract_halachot(str(row["id"]))
|
||||
d0 = json.loads(raw0).get("data", {})
|
||||
print(f" ✓ status={d0.get('status')} stored={d0.get('stored', d0.get('extracted'))}")
|
||||
except Exception as e:
|
||||
return {"extract": d0.get("status", "done")}
|
||||
except Exception as e: # non-fatal — record and continue
|
||||
print(f" ⚠ halacha extraction failed (non-fatal): {e}")
|
||||
return {"extract": f"error:{e}"}
|
||||
|
||||
# [1] citation graph
|
||||
print(f"[1/4] extract_internal_citations (chair={chair})…", flush=True)
|
||||
raw = await extract_internal_citations(chair_name=chair, limit=0)
|
||||
try:
|
||||
d = json.loads(raw).get("data", {})
|
||||
print(f" ✓ extracted {d.get('extracted')} · linked {d.get('linked')} "
|
||||
f"· new {d.get('new')}")
|
||||
except Exception:
|
||||
print(f" (citations returned: {str(raw)[:160]})")
|
||||
async def step_citations(results: dict) -> dict:
|
||||
# [1] citation graph
|
||||
print(f"[1/4] extract_internal_citations (chair={chair})…", flush=True)
|
||||
raw = await extract_internal_citations(chair_name=chair, limit=0)
|
||||
try:
|
||||
d = json.loads(raw).get("data", {})
|
||||
print(f" ✓ extracted {d.get('extracted')} · linked {d.get('linked')} "
|
||||
f"· new {d.get('new')}")
|
||||
return {"citations": "done"}
|
||||
except Exception:
|
||||
print(f" (citations returned: {str(raw)[:160]})")
|
||||
return {"citations": "unparsed"}
|
||||
|
||||
# [2] corroboration signal + policy (whole corpus backfill) — skipped on dry-run
|
||||
if args.dry_run:
|
||||
print("[2/4] corroboration_rebuild — מדולג (dry-run)")
|
||||
else:
|
||||
async def step_corroboration(results: dict) -> dict:
|
||||
# [2] corroboration signal + policy (whole corpus backfill) — skip on dry-run.
|
||||
if args.dry_run:
|
||||
print("[2/4] corroboration_rebuild — מדולג (dry-run)")
|
||||
return {"corroboration": "skipped:dry-run"}
|
||||
print("[2/4] corroboration_rebuild (backfill)…", flush=True)
|
||||
try:
|
||||
cr = await corroboration.build_all()
|
||||
print(f" ✓ {cr}")
|
||||
except Exception as e:
|
||||
return {"corroboration": "done"}
|
||||
except Exception as e: # non-fatal
|
||||
print(f" ⚠ corroboration failed (non-fatal): {e}")
|
||||
return {"corroboration": f"error:{e}"}
|
||||
|
||||
# [3] three-judge halacha panel
|
||||
apply = not args.dry_run
|
||||
print(f"[3/4] halacha_panel_approve {'--apply' if apply else '(dry-run)'} "
|
||||
f"(Opus+DeepSeek+Gemini)…", flush=True)
|
||||
import halacha_panel_approve as hpa
|
||||
rc = await hpa.main(Namespace(limit=args.limit, concurrency=6, apply=apply))
|
||||
async def step_panel(results: dict) -> dict:
|
||||
# [3] three-judge halacha panel (the long step durability protects).
|
||||
apply = not args.dry_run
|
||||
print(f"[3/4] halacha_panel_approve {'--apply' if apply else '(dry-run)'} "
|
||||
f"(Opus+DeepSeek+Gemini)…", flush=True)
|
||||
import halacha_panel_approve as hpa
|
||||
rc = await hpa.main(Namespace(limit=args.limit, concurrency=6, apply=apply))
|
||||
return {"panel_rc": rc or 0}
|
||||
|
||||
steps = [
|
||||
_pipeline_runtime.Step("extract_decision_halachot", step_extract),
|
||||
_pipeline_runtime.Step("citations", step_citations),
|
||||
_pipeline_runtime.Step("corroboration", step_corroboration),
|
||||
_pipeline_runtime.Step("panel", step_panel),
|
||||
]
|
||||
checkpoint_db = config.DATA_DIR / "checkpoints" / "halacha.sqlite"
|
||||
# Stable thread per case → an interrupted real run resumes; dry-runs are
|
||||
# previews (own thread, always fresh — never resume a stale preview).
|
||||
thread_id = f"halacha:{case_number}" + (":dryrun" if args.dry_run else "")
|
||||
results = await _pipeline_runtime.run_pipeline(
|
||||
steps,
|
||||
thread_id=thread_id,
|
||||
checkpoint_db=checkpoint_db,
|
||||
fresh=bool(args.fresh) or args.dry_run,
|
||||
)
|
||||
print("\n✓ pipeline-אימות-הלכות הושלם" + (" (dry-run)" if args.dry_run else ""))
|
||||
return rc or 0
|
||||
return int(results.get("panel_rc", 0) or 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -117,4 +162,7 @@ if __name__ == "__main__":
|
||||
help="cap pending halachot judged (0 = full queue)")
|
||||
ap.add_argument("--dry-run", dest="dry_run", action="store_true",
|
||||
help="citations only; skip corroboration writes; panel in dry-run")
|
||||
ap.add_argument("--fresh", action="store_true",
|
||||
help="ignore any incomplete checkpoint and run from step [0] "
|
||||
"(default: auto-resume an interrupted run; X16/INV-DUR1)")
|
||||
raise SystemExit(asyncio.run(main(ap.parse_args())))
|
||||
|
||||
Reference in New Issue
Block a user