scripts/_pipeline_runtime.py — runtime עמידות משותף: עוטף רשימת-צעדים async ב-LangGraph StateGraph ליניארי עם AsyncSqliteSaver (checkpoint לכל צעד). קריסה/OOM ממשיכה מהצעד שנכשל במקום להריץ הכל מחדש. degradation חיננית: ללא langgraph → ריצה ליניארית כמו קודם (הכפתור לא נשבר). מימוש אחד לשני הפייפליינים (G2). final_halacha_pipeline.py — 4 הצעדים ([0]extract [1]citations [2]corroboration [3]panel) רצים דרך ה-runtime. CLI זהה + --fresh (ברירת-מחדל auto-resume). thread יציב לכל תיק; dry-run = preview נפרד (תמיד fresh). קריסה בפאנל [3] → resume מ-[3] (steps 0-2 שמורים). pyproject: extra "durable" (langgraph + langgraph-checkpoint-sqlite) — host-only, optional. data/checkpoints/ ב-.gitignore. גבול (X16 §1): LangGraph רק כמנוע-פנימי של הסקריפט — לא orchestrator (לא מסלול מקביל ל-Paperclip; G2/G12). #108 (atomic extract) קדם לזה כתנאי. אימות: test_pipeline_runtime.py — עם langgraph (venv-זמני): 3 passed (resume מדלג צעדים שהושלמו · fresh מריץ-מחדש · linear). בלי langgraph (venv משותף): 1 passed + 2 skipped (degradation). final_halacha מתקמפל ומיובא נקי בשני המצבים. הרצה end-to-end על הפייפליין החי (DB+LLM) — לאחר `pip install -e ".[durable]"` בעץ הראשי. Invariants: INV-DUR1 (עמידות), G2 (runtime יחיד), G3 (idempotency מחוזק). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
81 lines
3.1 KiB
Python
81 lines
3.1 KiB
Python
"""Tests for the durable pipeline runtime (scripts/_pipeline_runtime.py / X16).
|
|
|
|
The LINEAR fallback is tested unconditionally. The DURABLE (LangGraph) path —
|
|
crash-then-resume and --fresh — is tested only where ``langgraph`` is installed
|
|
(``importorskip``), so the suite still passes in a venv without it (the runtime
|
|
itself degrades gracefully there too).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import importlib.util
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
# Load scripts/_pipeline_runtime.py (scripts/ is not a package).
|
|
_RT = Path(__file__).resolve().parents[2] / "scripts" / "_pipeline_runtime.py"
|
|
_spec = importlib.util.spec_from_file_location("_pipeline_runtime", _RT)
|
|
rt = importlib.util.module_from_spec(_spec)
|
|
sys.modules["_pipeline_runtime"] = rt
|
|
_spec.loader.exec_module(rt) # type: ignore[union-attr]
|
|
|
|
|
|
def _counting_steps(fail_step2_once: bool):
|
|
"""4 steps; each records how many times it actually ran. s2 can fail once."""
|
|
runs = {"s1": 0, "s2": 0, "s3": 0, "s4": 0}
|
|
state = {"s2_failed": False}
|
|
|
|
def mk(name: str, fail: bool = False) -> rt.Step:
|
|
async def run(results: dict) -> dict:
|
|
if fail and not state["s2_failed"]:
|
|
state["s2_failed"] = True
|
|
raise RuntimeError(f"{name} simulated crash")
|
|
runs[name] += 1
|
|
return {name: "ok"}
|
|
return rt.Step(name, run)
|
|
|
|
steps = [mk("s1"), mk("s2", fail_step2_once), mk("s3"), mk("s4")]
|
|
return steps, runs
|
|
|
|
|
|
def test_linear_fallback_runs_all_steps() -> None:
|
|
steps, runs = _counting_steps(fail_step2_once=False)
|
|
out = asyncio.run(rt._run_linear(steps))
|
|
assert out == {"s1": "ok", "s2": "ok", "s3": "ok", "s4": "ok"}
|
|
assert all(runs[s] == 1 for s in runs)
|
|
|
|
|
|
def test_resume_skips_completed_steps(tmp_path: Path) -> None:
|
|
pytest.importorskip("langgraph")
|
|
db = tmp_path / "rt.sqlite"
|
|
steps, runs = _counting_steps(fail_step2_once=True)
|
|
tid = "halacha:RESUME-TEST"
|
|
|
|
# Run 1: s2 crashes — s1 ran and is checkpointed.
|
|
with pytest.raises(RuntimeError):
|
|
asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db))
|
|
assert runs == {"s1": 1, "s2": 0, "s3": 0, "s4": 0}
|
|
|
|
# Run 2: resume — s1 is NOT re-run; s2/s3/s4 complete.
|
|
out = asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db))
|
|
assert out == {"s1": "ok", "s2": "ok", "s3": "ok", "s4": "ok"}
|
|
assert runs["s1"] == 1, "completed step s1 must NOT re-run on resume"
|
|
assert runs["s2"] == 1 and runs["s3"] == 1 and runs["s4"] == 1
|
|
|
|
|
|
def test_fresh_reruns_all_after_completion(tmp_path: Path) -> None:
|
|
pytest.importorskip("langgraph")
|
|
db = tmp_path / "rt2.sqlite"
|
|
steps, runs = _counting_steps(fail_step2_once=False)
|
|
tid = "halacha:FRESH-TEST"
|
|
|
|
asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db))
|
|
assert all(runs[s] == 1 for s in runs)
|
|
|
|
# fresh=True clears the completed checkpoint and runs everything again.
|
|
asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db, fresh=True))
|
|
assert all(runs[s] == 2 for s in runs), "fresh run must re-execute every step"
|