Merge pull request 'feat(pipeline): עמידות (LangGraph) ל-final_halacha (P0, X16/INV-DUR1, #114)' (#178) from worktree-langgraph-durable-pipeline into main

2026-06-10 09:53:07 +00:00
parent 61d235175f e7d8b24d7c
commit f5650196b7
6 changed files with 303 additions and 33 deletions
--- a/mcp-server/pyproject.toml
+++ b/mcp-server/pyproject.toml
@@ -35,6 +35,16 @@ court-fetch = [
    "faster-whisper>=1.0.0",
    "h2>=4.0.0",          # Tier-0 supremedecisions uses httpx http2
 ]
+# Durable execution for the local one-shot pipelines (X16 / INV-DUR1) —
+# final_halacha_pipeline / final_learning_pipeline gain crash/OOM resume via
+# scripts/_pipeline_runtime.py. HOST-ONLY (the pipelines run locally, not in the
+# container): install on the host venv with `pip install -e ".[durable]"`. The
+# runtime degrades gracefully to linear execution when these are absent, so the
+# run-halacha / run-learning buttons keep working until then.
+durable = [
+    "langgraph>=1.0,<2.0",
+    "langgraph-checkpoint-sqlite>=3.0",
+]

 [build-system]
 requires = ["setuptools>=68.0"]
--- a/mcp-server/tests/test_pipeline_runtime.py
+++ b/mcp-server/tests/test_pipeline_runtime.py
@@ -0,0 +1,80 @@
+"""Tests for the durable pipeline runtime (scripts/_pipeline_runtime.py / X16).
+
+The LINEAR fallback is tested unconditionally. The DURABLE (LangGraph) path —
+crash-then-resume and --fresh — is tested only where ``langgraph`` is installed
+(``importorskip``), so the suite still passes in a venv without it (the runtime
+itself degrades gracefully there too).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import importlib.util
+import sys
+from pathlib import Path
+
+import pytest
+
+# Load scripts/_pipeline_runtime.py (scripts/ is not a package).
+_RT = Path(__file__).resolve().parents[2] / "scripts" / "_pipeline_runtime.py"
+_spec = importlib.util.spec_from_file_location("_pipeline_runtime", _RT)
+rt = importlib.util.module_from_spec(_spec)
+sys.modules["_pipeline_runtime"] = rt
+_spec.loader.exec_module(rt)  # type: ignore[union-attr]
+
+
+def _counting_steps(fail_step2_once: bool):
+    """4 steps; each records how many times it actually ran. s2 can fail once."""
+    runs = {"s1": 0, "s2": 0, "s3": 0, "s4": 0}
+    state = {"s2_failed": False}
+
+    def mk(name: str, fail: bool = False) -> rt.Step:
+        async def run(results: dict) -> dict:
+            if fail and not state["s2_failed"]:
+                state["s2_failed"] = True
+                raise RuntimeError(f"{name} simulated crash")
+            runs[name] += 1
+            return {name: "ok"}
+        return rt.Step(name, run)
+
+    steps = [mk("s1"), mk("s2", fail_step2_once), mk("s3"), mk("s4")]
+    return steps, runs
+
+
+def test_linear_fallback_runs_all_steps() -> None:
+    steps, runs = _counting_steps(fail_step2_once=False)
+    out = asyncio.run(rt._run_linear(steps))
+    assert out == {"s1": "ok", "s2": "ok", "s3": "ok", "s4": "ok"}
+    assert all(runs[s] == 1 for s in runs)
+
+
+def test_resume_skips_completed_steps(tmp_path: Path) -> None:
+    pytest.importorskip("langgraph")
+    db = tmp_path / "rt.sqlite"
+    steps, runs = _counting_steps(fail_step2_once=True)
+    tid = "halacha:RESUME-TEST"
+
+    # Run 1: s2 crashes — s1 ran and is checkpointed.
+    with pytest.raises(RuntimeError):
+        asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db))
+    assert runs == {"s1": 1, "s2": 0, "s3": 0, "s4": 0}
+
+    # Run 2: resume — s1 is NOT re-run; s2/s3/s4 complete.
+    out = asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db))
+    assert out == {"s1": "ok", "s2": "ok", "s3": "ok", "s4": "ok"}
+    assert runs["s1"] == 1, "completed step s1 must NOT re-run on resume"
+    assert runs["s2"] == 1 and runs["s3"] == 1 and runs["s4"] == 1
+
+
+def test_fresh_reruns_all_after_completion(tmp_path: Path) -> None:
+    pytest.importorskip("langgraph")
+    db = tmp_path / "rt2.sqlite"
+    steps, runs = _counting_steps(fail_step2_once=False)
+    tid = "halacha:FRESH-TEST"
+
+    asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db))
+    assert all(runs[s] == 1 for s in runs)
+
+    # fresh=True clears the completed checkpoint and runs everything again.
+    asyncio.run(rt.run_pipeline(steps, thread_id=tid, checkpoint_db=db, fresh=True))
+    assert all(runs[s] == 2 for s in runs), "fresh run must re-execute every step"