"""Tests for the durable pipeline runtime (scripts/_pipeline_runtime.py / X16). The LINEAR fallback is tested unconditionally. The DURABLE (LangGraph) path — crash-then-resume and --fresh — is tested only where ``langgraph`` is installed (``importorskip``), so the suite still passes in a venv without it (the runtime itself degrades gracefully there too). """ from __future__ import annotations import asyncio import importlib.util import sys from pathlib import Path import pytest # Load scripts/_pipeline_runtime.py (scripts/ is not a package). _RT = Path(__file__).resolve().parents[2] / "scripts" / "_pipeline_runtime.py" _spec = importlib.util.spec_from_file_location("_pipeline_runtime", _RT) rt = importlib.util.module_from_spec(_spec) sys.modules["_pipeline_runtime"] = rt _spec.loader.exec_module(rt) # type: ignore[union-attr] def _counting_steps(fail_step2_once: bool): """4 steps; each records how many times it actually ran. s2 can fail once.""" runs = {"s1": 0, "s2": 0, "s3": 0, "s4": 0} state = {"s2_failed": False} def mk(name: str, fail: bool = False) -> rt.Step: async def run(results: dict) -> dict: if fail and not state["s2_failed"]: state["s2_failed"] = True raise RuntimeError(f"{name} simulated crash") runs[name] += 1 return {name: "ok"} return rt.Step(name, run) steps = [mk("s1"), mk("s2", fail_step2_once), mk("s3"), mk("s4")] return steps, runs def test_linear_fallback_runs_all_steps() -> None: steps, runs = _counting_steps(fail_step2_once=False) out = asyncio.run(rt._run_linear(steps)) assert out == {"s1": "ok", "s2": "ok", "s3": "ok", "s4": "ok"} assert all(runs[s] == 1 for s in runs) def test_resume_skips_completed_steps(tmp_path: Path) -> None: pytest.importorskip("langgraph") db = tmp_path / "rt.sqlite" steps, runs = _counting_steps(fail_step2_once=True) tid = "halacha:RESUME-TEST" # Run 1: s2 crashes — s1 ran and is checkpointed. with pytest.raises(RuntimeError): asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db)) assert runs == {"s1": 1, "s2": 0, "s3": 0, "s4": 0} # Run 2: resume — s1 is NOT re-run; s2/s3/s4 complete. out = asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db)) assert out == {"s1": "ok", "s2": "ok", "s3": "ok", "s4": "ok"} assert runs["s1"] == 1, "completed step s1 must NOT re-run on resume" assert runs["s2"] == 1 and runs["s3"] == 1 and runs["s4"] == 1 def test_fresh_reruns_all_after_completion(tmp_path: Path) -> None: pytest.importorskip("langgraph") db = tmp_path / "rt2.sqlite" steps, runs = _counting_steps(fail_step2_once=False) tid = "halacha:FRESH-TEST" asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db)) assert all(runs[s] == 1 for s in runs) # fresh=True clears the completed checkpoint and runs everything again. asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db, fresh=True)) assert all(runs[s] == 2 for s in runs), "fresh run must re-execute every step"