Files
legal-ai/mcp-server/tests/test_export_qa_gate.py
Chaim 29af008271 feat(mcp): FU-14 GAP-48 פרוסה 3 — envelope למשפחת drafting (סגירת GAP-48)
הפרוסה האחרונה של GAP-48 (INV-TOOL1). 18 כלי drafting הומרו ל-{status,data,message}
דרך tools/envelope.py — כולל מסלול הפקת-ההחלטה הקריטי.

עיקרון לכלים עם כשל משמעותי (export_docx/revise_draft/apply_user_edit): err()
ברמת-המעטפת — כך שהסוכן והמשתמש רואים את הכשל; failed_gates רוכב ב-data.
שאר הכלים: ok(data=payload) להצלחה, err להיעדר-תיק/קלט-שגוי/חריגה.

6 צרכני-app.py חוּוטו (get_decision_template, apply_user_edit ×2, revise_draft,
list_bookmarks, export_docx) עם envelope_unwrap + בדיקת status=="error"→4xx,
לשמירת חוזה-ה-API (X6) ללא-שינוי. test_export_qa_gate עודכן לחוזה החדש.

בדיקות: 182/182 עוברים (כולל שערי-QA של הייצוא).

GAP-48 סגור: כל ~12 משפחות-הכלים אחידות. נותר ב-FU-14: GAP-49/50 (שובר), GAP-54.

Invariants: משלים INV-TOOL1 + G2. מתועד ב-X9 (נסגר) + gap-audit פרוסה 7.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 17:51:56 +00:00

154 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Regression tests for FU-6.
GAP-16 (INV-QA consistency): ``check_neutral_background`` must NOT return a
``severity='critical'`` result while ``passed=True``. The empty/missing
block-ו fallback now reports ``severity='warning'`` (consistent with passed).
GAP-15 (INV-EX3 / INV-QA3): ``export_docx`` must refuse to export while
critical QA gates fail OR before any QA run exists. It gates on the STORED
``qa_results`` (cheap SELECT via ``db.get_critical_qa_failures`` /
``db.qa_run_exists``) — it does NOT re-run the LLM validator.
All tests run fully OFFLINE — the pool / db helpers / exporter / git are
monkeypatched. No live Postgres needed.
"""
from __future__ import annotations
import asyncio
import json
import pytest
from legal_mcp.services import db
from legal_mcp.services import qa_validator
from legal_mcp.tools import drafting
# ── GAP-16 ────────────────────────────────────────────────────────
def test_neutral_background_empty_block_is_warning_not_critical() -> None:
"""Empty/missing block-ו → passed=True, so severity must be 'warning'."""
res = qa_validator.check_neutral_background([]) # no block-vav present
assert res["passed"] is True
assert res["severity"] == "warning", (
"a passed result must not carry severity='critical' (GAP-16)"
)
def test_neutral_background_dirty_block_still_critical_path_untouched() -> None:
"""A block-ו with judgment words still fails — fix didn't soften real checks."""
bad_word = qa_validator.VALUE_WORDS[0]
res = qa_validator.check_neutral_background(
[{"block_id": "block-vav", "content": f"הרקע: {bad_word} מאוד"}]
)
assert res["passed"] is False
assert res["errors"], "judgment-word violation should be reported"
# ── GAP-15 ────────────────────────────────────────────────────────
@pytest.fixture()
def patched_export(monkeypatch: pytest.MonkeyPatch) -> dict:
"""Monkeypatch case lookup, exporter, draft-path setter, and git so that
``export_docx`` is isolated to the QA-gate decision. Returns a dict of
call-tracking flags.
"""
calls = {"exported": False, "set_draft": False, "committed": False}
async def _get_case_by_number(case_number: str) -> dict:
return {"id": "00000000-0000-0000-0000-000000000001"}
async def _export_decision(case_id, output_path=None) -> str:
calls["exported"] = True
return "/tmp/decision.docx"
async def _set_active_draft_path(case_id, path) -> None:
calls["set_draft"] = True
def _commit_and_push(case_dir, msg) -> None:
calls["committed"] = True
# find_case_dir is called only on the success path; make it a no-op dir
class _FakeDir:
def exists(self) -> bool:
return False
monkeypatch.setattr(db, "get_case_by_number", _get_case_by_number)
monkeypatch.setattr(drafting.config, "find_case_dir", lambda cn: _FakeDir())
monkeypatch.setattr(drafting.git_sync, "commit_and_push", _commit_and_push)
# docx_exporter / set_active_draft_path are looked up dynamically; patch both
import legal_mcp.services.docx_exporter as docx_exporter
monkeypatch.setattr(docx_exporter, "export_decision", _export_decision)
monkeypatch.setattr(db, "set_active_draft_path", _set_active_draft_path)
return calls
def _run(coro):
return asyncio.run(coro)
def test_export_blocked_when_no_qa_run(
patched_export: dict, monkeypatch: pytest.MonkeyPatch
) -> None:
async def _qa_run_exists(case_id) -> bool:
return False
async def _get_critical(case_id) -> list:
return []
monkeypatch.setattr(db, "qa_run_exists", _qa_run_exists)
monkeypatch.setattr(db, "get_critical_qa_failures", _get_critical)
out = json.loads(_run(drafting.export_docx("8001-24")))
assert out["status"] == "error"
assert "QA" in out["message"] or "validate_decision" in out["message"]
assert patched_export["exported"] is False, "must not call the exporter"
assert patched_export["committed"] is False, "must not git-commit"
def test_export_blocked_when_critical_failures(
patched_export: dict, monkeypatch: pytest.MonkeyPatch
) -> None:
async def _qa_run_exists(case_id) -> bool:
return True
async def _get_critical(case_id) -> list:
return [
{"check_name": "claims_coverage", "severity": "critical",
"passed": False, "errors": []},
{"check_name": "structural_integrity", "severity": "critical",
"passed": False, "errors": []},
]
monkeypatch.setattr(db, "qa_run_exists", _qa_run_exists)
monkeypatch.setattr(db, "get_critical_qa_failures", _get_critical)
out = json.loads(_run(drafting.export_docx("8001-24")))
# GAP-48: {status,data,message} envelope; failed_gates rides in data.
assert out["status"] == "error"
assert out["data"]["failed_gates"] == ["claims_coverage", "structural_integrity"]
assert "claims_coverage" in out["message"]
assert patched_export["exported"] is False, "must not call the exporter"
assert patched_export["committed"] is False, "must not git-commit"
def test_export_proceeds_when_clean(
patched_export: dict, monkeypatch: pytest.MonkeyPatch
) -> None:
async def _qa_run_exists(case_id) -> bool:
return True
async def _get_critical(case_id) -> list:
return []
monkeypatch.setattr(db, "qa_run_exists", _qa_run_exists)
monkeypatch.setattr(db, "get_critical_qa_failures", _get_critical)
out = json.loads(_run(drafting.export_docx("8001-24")))
# GAP-48: success is envelope status "ok"; payload (path) rides in data.
assert out["status"] == "ok", out
assert out["data"]["path"] == "/tmp/decision.docx"
assert patched_export["exported"] is True, "clean QA must allow export"
assert patched_export["set_draft"] is True, "active_draft_path must be set"