merge: FU-6 — code-enforced QA gates (GAP-15/16)
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m38s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m38s
export_docx hard-blocks on critical QA failures (gates on stored qa_results, no LLM re-run); neutral_background severity consistency fix; export HTTP endpoint returns 409 on block (UI shows error, not false success). Verified offline (test_export_qa_gate.py 5/5). Closes PR #9.
This commit is contained in:
@@ -1509,6 +1509,37 @@ async def get_decision_by_case(case_id: UUID) -> dict | None:
|
|||||||
return d
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
async def get_critical_qa_failures(case_id: UUID) -> list[dict]:
|
||||||
|
"""Return critical-severity failures from the case's latest QA run.
|
||||||
|
|
||||||
|
``qa_results`` is cleared+rewritten per ``validate_decision`` run, so the
|
||||||
|
current rows for a ``case_id`` ARE the latest run. Returns rows where
|
||||||
|
``severity='critical' AND passed=false``. Callers distinguish "no QA run
|
||||||
|
yet" (no rows at all) via ``qa_run_exists`` below.
|
||||||
|
"""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
rows = await conn.fetch(
|
||||||
|
"""SELECT check_name, severity, passed, errors
|
||||||
|
FROM qa_results
|
||||||
|
WHERE case_id = $1 AND severity = 'critical' AND passed = false
|
||||||
|
ORDER BY check_name""",
|
||||||
|
case_id,
|
||||||
|
)
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
async def qa_run_exists(case_id: UUID) -> bool:
|
||||||
|
"""True if a QA run has ever been recorded for this case (any rows)."""
|
||||||
|
pool = await get_pool()
|
||||||
|
async with pool.acquire() as conn:
|
||||||
|
n = await conn.fetchval(
|
||||||
|
"SELECT count(*) FROM qa_results WHERE case_id = $1",
|
||||||
|
case_id,
|
||||||
|
)
|
||||||
|
return bool(n)
|
||||||
|
|
||||||
|
|
||||||
async def update_decision(decision_id: UUID, **fields) -> None:
|
async def update_decision(decision_id: UUID, **fields) -> None:
|
||||||
if not fields:
|
if not fields:
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ def check_neutral_background(blocks: list[dict]) -> dict:
|
|||||||
"""בדיקת ניטרליות בלוק הרקע (ו)."""
|
"""בדיקת ניטרליות בלוק הרקע (ו)."""
|
||||||
vav = next((b for b in blocks if b["block_id"] == "block-vav"), None)
|
vav = next((b for b in blocks if b["block_id"] == "block-vav"), None)
|
||||||
if not vav or not vav.get("content"):
|
if not vav or not vav.get("content"):
|
||||||
return {"name": "neutral_background", "passed": True, "errors": [], "severity": "critical"}
|
return {"name": "neutral_background", "passed": True, "errors": [], "severity": "warning"}
|
||||||
|
|
||||||
errors = []
|
errors = []
|
||||||
lines = vav["content"].split("\n")
|
lines = vav["content"].split("\n")
|
||||||
|
|||||||
@@ -399,6 +399,26 @@ async def export_docx(case_number: str, output_path: str = "") -> str:
|
|||||||
|
|
||||||
case_id = UUID(case["id"])
|
case_id = UUID(case["id"])
|
||||||
|
|
||||||
|
# INV-EX3 / INV-QA3: a decision cannot be exported while critical QA gates
|
||||||
|
# fail (or before QA has been run at all). Gate on the STORED qa_results —
|
||||||
|
# cheap SELECT, no LLM re-run.
|
||||||
|
if not await db.qa_run_exists(case_id):
|
||||||
|
return json.dumps({
|
||||||
|
"status": "error",
|
||||||
|
"message": "ייצוא נחסם: בקרת איכות (QA) טרם רצה על התיק. "
|
||||||
|
"הרץ validate_decision לפני ייצוא.",
|
||||||
|
}, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
critical = await db.get_critical_qa_failures(case_id)
|
||||||
|
if critical:
|
||||||
|
gate_names = ", ".join(r["check_name"] for r in critical)
|
||||||
|
return json.dumps({
|
||||||
|
"status": "error",
|
||||||
|
"message": f"ייצוא נחסם: שערי QA קריטיים נכשלו ({gate_names}). "
|
||||||
|
f"תקן את הליקויים והרץ validate_decision מחדש לפני ייצוא.",
|
||||||
|
"failed_gates": [r["check_name"] for r in critical],
|
||||||
|
}, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
path = await docx_exporter.export_decision(case_id, output_path or None)
|
path = await docx_exporter.export_decision(case_id, output_path or None)
|
||||||
# Register this export as the new source of truth
|
# Register this export as the new source of truth
|
||||||
|
|||||||
151
mcp-server/tests/test_export_qa_gate.py
Normal file
151
mcp-server/tests/test_export_qa_gate.py
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
"""Regression tests for FU-6.
|
||||||
|
|
||||||
|
GAP-16 (INV-QA consistency): ``check_neutral_background`` must NOT return a
|
||||||
|
``severity='critical'`` result while ``passed=True``. The empty/missing
|
||||||
|
block-ו fallback now reports ``severity='warning'`` (consistent with passed).
|
||||||
|
|
||||||
|
GAP-15 (INV-EX3 / INV-QA3): ``export_docx`` must refuse to export while
|
||||||
|
critical QA gates fail OR before any QA run exists. It gates on the STORED
|
||||||
|
``qa_results`` (cheap SELECT via ``db.get_critical_qa_failures`` /
|
||||||
|
``db.qa_run_exists``) — it does NOT re-run the LLM validator.
|
||||||
|
|
||||||
|
All tests run fully OFFLINE — the pool / db helpers / exporter / git are
|
||||||
|
monkeypatched. No live Postgres needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from legal_mcp.services import db
|
||||||
|
from legal_mcp.services import qa_validator
|
||||||
|
from legal_mcp.tools import drafting
|
||||||
|
|
||||||
|
|
||||||
|
# ── GAP-16 ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_neutral_background_empty_block_is_warning_not_critical() -> None:
|
||||||
|
"""Empty/missing block-ו → passed=True, so severity must be 'warning'."""
|
||||||
|
res = qa_validator.check_neutral_background([]) # no block-vav present
|
||||||
|
assert res["passed"] is True
|
||||||
|
assert res["severity"] == "warning", (
|
||||||
|
"a passed result must not carry severity='critical' (GAP-16)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_neutral_background_dirty_block_still_critical_path_untouched() -> None:
|
||||||
|
"""A block-ו with judgment words still fails — fix didn't soften real checks."""
|
||||||
|
bad_word = qa_validator.VALUE_WORDS[0]
|
||||||
|
res = qa_validator.check_neutral_background(
|
||||||
|
[{"block_id": "block-vav", "content": f"הרקע: {bad_word} מאוד"}]
|
||||||
|
)
|
||||||
|
assert res["passed"] is False
|
||||||
|
assert res["errors"], "judgment-word violation should be reported"
|
||||||
|
|
||||||
|
|
||||||
|
# ── GAP-15 ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def patched_export(monkeypatch: pytest.MonkeyPatch) -> dict:
|
||||||
|
"""Monkeypatch case lookup, exporter, draft-path setter, and git so that
|
||||||
|
``export_docx`` is isolated to the QA-gate decision. Returns a dict of
|
||||||
|
call-tracking flags.
|
||||||
|
"""
|
||||||
|
calls = {"exported": False, "set_draft": False, "committed": False}
|
||||||
|
|
||||||
|
async def _get_case_by_number(case_number: str) -> dict:
|
||||||
|
return {"id": "00000000-0000-0000-0000-000000000001"}
|
||||||
|
|
||||||
|
async def _export_decision(case_id, output_path=None) -> str:
|
||||||
|
calls["exported"] = True
|
||||||
|
return "/tmp/decision.docx"
|
||||||
|
|
||||||
|
async def _set_active_draft_path(case_id, path) -> None:
|
||||||
|
calls["set_draft"] = True
|
||||||
|
|
||||||
|
def _commit_and_push(case_dir, msg) -> None:
|
||||||
|
calls["committed"] = True
|
||||||
|
|
||||||
|
# find_case_dir is called only on the success path; make it a no-op dir
|
||||||
|
class _FakeDir:
|
||||||
|
def exists(self) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
monkeypatch.setattr(db, "get_case_by_number", _get_case_by_number)
|
||||||
|
monkeypatch.setattr(drafting.config, "find_case_dir", lambda cn: _FakeDir())
|
||||||
|
monkeypatch.setattr(drafting.git_sync, "commit_and_push", _commit_and_push)
|
||||||
|
# docx_exporter / set_active_draft_path are looked up dynamically; patch both
|
||||||
|
import legal_mcp.services.docx_exporter as docx_exporter
|
||||||
|
monkeypatch.setattr(docx_exporter, "export_decision", _export_decision)
|
||||||
|
monkeypatch.setattr(db, "set_active_draft_path", _set_active_draft_path)
|
||||||
|
return calls
|
||||||
|
|
||||||
|
|
||||||
|
def _run(coro):
|
||||||
|
return asyncio.run(coro)
|
||||||
|
|
||||||
|
|
||||||
|
def test_export_blocked_when_no_qa_run(
|
||||||
|
patched_export: dict, monkeypatch: pytest.MonkeyPatch
|
||||||
|
) -> None:
|
||||||
|
async def _qa_run_exists(case_id) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _get_critical(case_id) -> list:
|
||||||
|
return []
|
||||||
|
|
||||||
|
monkeypatch.setattr(db, "qa_run_exists", _qa_run_exists)
|
||||||
|
monkeypatch.setattr(db, "get_critical_qa_failures", _get_critical)
|
||||||
|
|
||||||
|
out = json.loads(_run(drafting.export_docx("8001-24")))
|
||||||
|
assert out["status"] == "error"
|
||||||
|
assert "QA" in out["message"] or "validate_decision" in out["message"]
|
||||||
|
assert patched_export["exported"] is False, "must not call the exporter"
|
||||||
|
assert patched_export["committed"] is False, "must not git-commit"
|
||||||
|
|
||||||
|
|
||||||
|
def test_export_blocked_when_critical_failures(
|
||||||
|
patched_export: dict, monkeypatch: pytest.MonkeyPatch
|
||||||
|
) -> None:
|
||||||
|
async def _qa_run_exists(case_id) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def _get_critical(case_id) -> list:
|
||||||
|
return [
|
||||||
|
{"check_name": "claims_coverage", "severity": "critical",
|
||||||
|
"passed": False, "errors": []},
|
||||||
|
{"check_name": "structural_integrity", "severity": "critical",
|
||||||
|
"passed": False, "errors": []},
|
||||||
|
]
|
||||||
|
|
||||||
|
monkeypatch.setattr(db, "qa_run_exists", _qa_run_exists)
|
||||||
|
monkeypatch.setattr(db, "get_critical_qa_failures", _get_critical)
|
||||||
|
|
||||||
|
out = json.loads(_run(drafting.export_docx("8001-24")))
|
||||||
|
assert out["status"] == "error"
|
||||||
|
assert out["failed_gates"] == ["claims_coverage", "structural_integrity"]
|
||||||
|
assert "claims_coverage" in out["message"]
|
||||||
|
assert patched_export["exported"] is False, "must not call the exporter"
|
||||||
|
assert patched_export["committed"] is False, "must not git-commit"
|
||||||
|
|
||||||
|
|
||||||
|
def test_export_proceeds_when_clean(
|
||||||
|
patched_export: dict, monkeypatch: pytest.MonkeyPatch
|
||||||
|
) -> None:
|
||||||
|
async def _qa_run_exists(case_id) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def _get_critical(case_id) -> list:
|
||||||
|
return []
|
||||||
|
|
||||||
|
monkeypatch.setattr(db, "qa_run_exists", _qa_run_exists)
|
||||||
|
monkeypatch.setattr(db, "get_critical_qa_failures", _get_critical)
|
||||||
|
|
||||||
|
out = json.loads(_run(drafting.export_docx("8001-24")))
|
||||||
|
assert out["status"] == "completed", out
|
||||||
|
assert out["path"] == "/tmp/decision.docx"
|
||||||
|
assert patched_export["exported"] is True, "clean QA must allow export"
|
||||||
|
assert patched_export["set_draft"] is True, "active_draft_path must be set"
|
||||||
23
web/app.py
23
web/app.py
@@ -3170,10 +3170,25 @@ async def api_export_docx(case_number: str, background_tasks: BackgroundTasks):
|
|||||||
(markdown body + download link) to the linked issue.
|
(markdown body + download link) to the linked issue.
|
||||||
"""
|
"""
|
||||||
result = await drafting_tools.export_docx(case_number)
|
result = await drafting_tools.export_docx(case_number)
|
||||||
try:
|
if isinstance(result, dict):
|
||||||
data = json.loads(result)
|
data = result
|
||||||
except json.JSONDecodeError:
|
else:
|
||||||
raise HTTPException(500, result)
|
try:
|
||||||
|
data = json.loads(result)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# export_docx can also return a plain (non-JSON) string, e.g.
|
||||||
|
# "תיק ... לא נמצא." — surface it as a 500 with the raw text.
|
||||||
|
raise HTTPException(500, str(result))
|
||||||
|
|
||||||
|
# FU-6: a QA gate (or another error) can block the export. export_docx
|
||||||
|
# signals this with status == "error". Returning the existing 200 here
|
||||||
|
# would let the UI show a false "exported successfully" toast, so we map
|
||||||
|
# a block to 409 Conflict carrying the Hebrew message + failed_gates.
|
||||||
|
if isinstance(data, dict) and data.get("status") == "error":
|
||||||
|
detail = {"message": data.get("message", "ייצוא נחסם.")}
|
||||||
|
if data.get("failed_gates"):
|
||||||
|
detail["failed_gates"] = data["failed_gates"]
|
||||||
|
raise HTTPException(409, detail)
|
||||||
|
|
||||||
# Notify the Paperclip plugin to attach the final-decision document.
|
# Notify the Paperclip plugin to attach the final-decision document.
|
||||||
docx_filename = (
|
docx_filename = (
|
||||||
|
|||||||
Reference in New Issue
Block a user