From 084b31cd9b93f199cca8d6617eadbcaf4be3fcb7 Mon Sep 17 00:00:00 2001 From: Chaim Date: Sat, 30 May 2026 17:58:50 +0000 Subject: [PATCH] fix(qa): enforce critical-QA gate on export + fix neutral_background critical-but-passed (GAP-15/16, INV-QA3/EX3) Co-Authored-By: Claude Opus 4.8 --- mcp-server/src/legal_mcp/services/db.py | 31 ++++ .../src/legal_mcp/services/qa_validator.py | 2 +- mcp-server/src/legal_mcp/tools/drafting.py | 20 +++ mcp-server/tests/test_export_qa_gate.py | 151 ++++++++++++++++++ 4 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 mcp-server/tests/test_export_qa_gate.py diff --git a/mcp-server/src/legal_mcp/services/db.py b/mcp-server/src/legal_mcp/services/db.py index 540ff1a..ac9ac70 100644 --- a/mcp-server/src/legal_mcp/services/db.py +++ b/mcp-server/src/legal_mcp/services/db.py @@ -1509,6 +1509,37 @@ async def get_decision_by_case(case_id: UUID) -> dict | None: return d +async def get_critical_qa_failures(case_id: UUID) -> list[dict]: + """Return critical-severity failures from the case's latest QA run. + + ``qa_results`` is cleared+rewritten per ``validate_decision`` run, so the + current rows for a ``case_id`` ARE the latest run. Returns rows where + ``severity='critical' AND passed=false``. Callers distinguish "no QA run + yet" (no rows at all) via ``qa_run_exists`` below. + """ + pool = await get_pool() + async with pool.acquire() as conn: + rows = await conn.fetch( + """SELECT check_name, severity, passed, errors + FROM qa_results + WHERE case_id = $1 AND severity = 'critical' AND passed = false + ORDER BY check_name""", + case_id, + ) + return [dict(r) for r in rows] + + +async def qa_run_exists(case_id: UUID) -> bool: + """True if a QA run has ever been recorded for this case (any rows).""" + pool = await get_pool() + async with pool.acquire() as conn: + n = await conn.fetchval( + "SELECT count(*) FROM qa_results WHERE case_id = $1", + case_id, + ) + return bool(n) + + async def update_decision(decision_id: UUID, **fields) -> None: if not fields: return diff --git a/mcp-server/src/legal_mcp/services/qa_validator.py b/mcp-server/src/legal_mcp/services/qa_validator.py index f585208..8b90040 100644 --- a/mcp-server/src/legal_mcp/services/qa_validator.py +++ b/mcp-server/src/legal_mcp/services/qa_validator.py @@ -67,7 +67,7 @@ def check_neutral_background(blocks: list[dict]) -> dict: """בדיקת ניטרליות בלוק הרקע (ו).""" vav = next((b for b in blocks if b["block_id"] == "block-vav"), None) if not vav or not vav.get("content"): - return {"name": "neutral_background", "passed": True, "errors": [], "severity": "critical"} + return {"name": "neutral_background", "passed": True, "errors": [], "severity": "warning"} errors = [] lines = vav["content"].split("\n") diff --git a/mcp-server/src/legal_mcp/tools/drafting.py b/mcp-server/src/legal_mcp/tools/drafting.py index 06b2007..81dd7ec 100644 --- a/mcp-server/src/legal_mcp/tools/drafting.py +++ b/mcp-server/src/legal_mcp/tools/drafting.py @@ -399,6 +399,26 @@ async def export_docx(case_number: str, output_path: str = "") -> str: case_id = UUID(case["id"]) + # INV-EX3 / INV-QA3: a decision cannot be exported while critical QA gates + # fail (or before QA has been run at all). Gate on the STORED qa_results — + # cheap SELECT, no LLM re-run. + if not await db.qa_run_exists(case_id): + return json.dumps({ + "status": "error", + "message": "ייצוא נחסם: בקרת איכות (QA) טרם רצה על התיק. " + "הרץ validate_decision לפני ייצוא.", + }, ensure_ascii=False, indent=2) + + critical = await db.get_critical_qa_failures(case_id) + if critical: + gate_names = ", ".join(r["check_name"] for r in critical) + return json.dumps({ + "status": "error", + "message": f"ייצוא נחסם: שערי QA קריטיים נכשלו ({gate_names}). " + f"תקן את הליקויים והרץ validate_decision מחדש לפני ייצוא.", + "failed_gates": [r["check_name"] for r in critical], + }, ensure_ascii=False, indent=2) + try: path = await docx_exporter.export_decision(case_id, output_path or None) # Register this export as the new source of truth diff --git a/mcp-server/tests/test_export_qa_gate.py b/mcp-server/tests/test_export_qa_gate.py new file mode 100644 index 0000000..2a078d2 --- /dev/null +++ b/mcp-server/tests/test_export_qa_gate.py @@ -0,0 +1,151 @@ +"""Regression tests for FU-6. + +GAP-16 (INV-QA consistency): ``check_neutral_background`` must NOT return a +``severity='critical'`` result while ``passed=True``. The empty/missing +block-ו fallback now reports ``severity='warning'`` (consistent with passed). + +GAP-15 (INV-EX3 / INV-QA3): ``export_docx`` must refuse to export while +critical QA gates fail OR before any QA run exists. It gates on the STORED +``qa_results`` (cheap SELECT via ``db.get_critical_qa_failures`` / +``db.qa_run_exists``) — it does NOT re-run the LLM validator. + +All tests run fully OFFLINE — the pool / db helpers / exporter / git are +monkeypatched. No live Postgres needed. +""" + +from __future__ import annotations + +import asyncio +import json + +import pytest + +from legal_mcp.services import db +from legal_mcp.services import qa_validator +from legal_mcp.tools import drafting + + +# ── GAP-16 ──────────────────────────────────────────────────────── + +def test_neutral_background_empty_block_is_warning_not_critical() -> None: + """Empty/missing block-ו → passed=True, so severity must be 'warning'.""" + res = qa_validator.check_neutral_background([]) # no block-vav present + assert res["passed"] is True + assert res["severity"] == "warning", ( + "a passed result must not carry severity='critical' (GAP-16)" + ) + + +def test_neutral_background_dirty_block_still_critical_path_untouched() -> None: + """A block-ו with judgment words still fails — fix didn't soften real checks.""" + bad_word = qa_validator.VALUE_WORDS[0] + res = qa_validator.check_neutral_background( + [{"block_id": "block-vav", "content": f"הרקע: {bad_word} מאוד"}] + ) + assert res["passed"] is False + assert res["errors"], "judgment-word violation should be reported" + + +# ── GAP-15 ──────────────────────────────────────────────────────── + +@pytest.fixture() +def patched_export(monkeypatch: pytest.MonkeyPatch) -> dict: + """Monkeypatch case lookup, exporter, draft-path setter, and git so that + ``export_docx`` is isolated to the QA-gate decision. Returns a dict of + call-tracking flags. + """ + calls = {"exported": False, "set_draft": False, "committed": False} + + async def _get_case_by_number(case_number: str) -> dict: + return {"id": "00000000-0000-0000-0000-000000000001"} + + async def _export_decision(case_id, output_path=None) -> str: + calls["exported"] = True + return "/tmp/decision.docx" + + async def _set_active_draft_path(case_id, path) -> None: + calls["set_draft"] = True + + def _commit_and_push(case_dir, msg) -> None: + calls["committed"] = True + + # find_case_dir is called only on the success path; make it a no-op dir + class _FakeDir: + def exists(self) -> bool: + return False + + monkeypatch.setattr(db, "get_case_by_number", _get_case_by_number) + monkeypatch.setattr(drafting.config, "find_case_dir", lambda cn: _FakeDir()) + monkeypatch.setattr(drafting.git_sync, "commit_and_push", _commit_and_push) + # docx_exporter / set_active_draft_path are looked up dynamically; patch both + import legal_mcp.services.docx_exporter as docx_exporter + monkeypatch.setattr(docx_exporter, "export_decision", _export_decision) + monkeypatch.setattr(db, "set_active_draft_path", _set_active_draft_path) + return calls + + +def _run(coro): + return asyncio.run(coro) + + +def test_export_blocked_when_no_qa_run( + patched_export: dict, monkeypatch: pytest.MonkeyPatch +) -> None: + async def _qa_run_exists(case_id) -> bool: + return False + + async def _get_critical(case_id) -> list: + return [] + + monkeypatch.setattr(db, "qa_run_exists", _qa_run_exists) + monkeypatch.setattr(db, "get_critical_qa_failures", _get_critical) + + out = json.loads(_run(drafting.export_docx("8001-24"))) + assert out["status"] == "error" + assert "QA" in out["message"] or "validate_decision" in out["message"] + assert patched_export["exported"] is False, "must not call the exporter" + assert patched_export["committed"] is False, "must not git-commit" + + +def test_export_blocked_when_critical_failures( + patched_export: dict, monkeypatch: pytest.MonkeyPatch +) -> None: + async def _qa_run_exists(case_id) -> bool: + return True + + async def _get_critical(case_id) -> list: + return [ + {"check_name": "claims_coverage", "severity": "critical", + "passed": False, "errors": []}, + {"check_name": "structural_integrity", "severity": "critical", + "passed": False, "errors": []}, + ] + + monkeypatch.setattr(db, "qa_run_exists", _qa_run_exists) + monkeypatch.setattr(db, "get_critical_qa_failures", _get_critical) + + out = json.loads(_run(drafting.export_docx("8001-24"))) + assert out["status"] == "error" + assert out["failed_gates"] == ["claims_coverage", "structural_integrity"] + assert "claims_coverage" in out["message"] + assert patched_export["exported"] is False, "must not call the exporter" + assert patched_export["committed"] is False, "must not git-commit" + + +def test_export_proceeds_when_clean( + patched_export: dict, monkeypatch: pytest.MonkeyPatch +) -> None: + async def _qa_run_exists(case_id) -> bool: + return True + + async def _get_critical(case_id) -> list: + return [] + + monkeypatch.setattr(db, "qa_run_exists", _qa_run_exists) + monkeypatch.setattr(db, "get_critical_qa_failures", _get_critical) + + out = json.loads(_run(drafting.export_docx("8001-24"))) + assert out["status"] == "completed", out + assert out["path"] == "/tmp/decision.docx" + assert patched_export["exported"] is True, "clean QA must allow export" + assert patched_export["set_draft"] is True, "active_draft_path must be set"