Files
legal-ai/mcp-server/tests/test_track_changes_e2e.py
Chaim 726498126d
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m29s
Add Track Changes architecture for draft revisions (CMP + CMPA)
Fixes critical bug in 1033-25: user-uploaded עריכה-*.docx files were
orphaned on disk while exports kept rebuilding from stale DB blocks.

New architecture:
- User-uploaded DOCX becomes the source of truth (cases.active_draft_path)
- System edits via XML surgery with real Word <w:ins>/<w:del> revisions
- User can Accept/Reject each change from within Word

Components:
- docx_reviser.py: XML surgery for Track Changes (15 tests)
- docx_retrofit.py: retroactive bookmark injection with Hebrew marker
  detection + heading heuristic (9 tests)
- docx_exporter.py: emits bookmarks around each of the 12 blocks
- 3 new MCP tools: apply_user_edit, list_bookmarks, revise_draft
- 4 new/updated endpoints: upload (auto-registers active draft),
  /exports/revise, /exports/bookmarks, /exports/{filename}/retrofit,
  /active-draft
- DB migration: cases.active_draft_path column
- UI: correct banner using real v-numbers, "מקור האמת" badge,
  detailed upload toast with bookmarks_added/missing_blocks
- agents: legal-exporter (3 export modes), legal-ceo (stage G for
  revision handling), legal-writer (revision mode)

Multi-tenancy:
- Works for both CMP (1xxx cases) and CMPA (8xxx/9xxx cases)
- New revise-draft skill added to both companies
- deploy-track-changes.sh syncs skills CMP ↔ CMPA
- retrofit_case.py: one-off retrofit of existing files

Tests: 34 passing (15 reviser + 9 retrofit + 4 exporter bookmarks + 6 e2e)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-16 18:49:30 +00:00

238 lines
9.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""בדיקות end-to-end לזרימה המלאה: exporter → retrofit → reviser.
הבדיקות האלה מחברות את כל השכבות של ארכיטקטורת Track Changes ומוודאות
שהזרימה עובדת על מסמכים שנוצרו על-ידי ה-exporter עצמו (בלוקים עם bookmarks
מובנים) ועל מסמכים רגילים שעברו retrofit.
"""
from __future__ import annotations
import zipfile
from datetime import datetime, timezone
from pathlib import Path
import pytest
from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from lxml import etree
from legal_mcp.services import docx_retrofit, docx_reviser
from legal_mcp.services.docx_exporter import (
_BOOKMARK_ID_START,
_wrap_block_with_bookmarks,
)
from legal_mcp.services.docx_reviser import (
NSMAP,
Revision,
_w,
apply_tracked_revisions,
list_bookmarks,
)
# ── Helpers ────────────────────────────────────────────────────────
def _make_exporter_style_docx(path: Path) -> None:
"""Simulate what docx_exporter produces: paragraphs wrapped in bookmarks
for each of the 12 blocks, with David font and RTL."""
doc = Document()
bm_counter = [_BOOKMARK_ID_START]
blocks = [
("block-alef", "בפני: דפנה תמיר, יו\"ר ועדת הערר"),
("block-bet", "ערר מספר 1033-25"),
("block-heh", "רקע\nהנכס מצוי ברחוב הר בשן"),
("block-yod", "דיון והכרעה\nלאחר שבחנו את טענות הצדדים"),
("block-yod-bet", "ההחלטה\nהערר מתקבל בחלקו"),
]
for name, content in blocks:
def writer(c=content):
for line in c.split("\n"):
if line.strip():
doc.add_paragraph(line.strip())
_wrap_block_with_bookmarks(doc, name, writer, bm_counter)
doc.save(str(path))
def _make_user_edited_docx(path: Path) -> None:
"""Simulate what a user produces by editing in Word: no bookmarks,
heading-style paragraphs in Daphna style."""
doc = Document()
for text in [
"בפני: דפנה תמיר, יו\"ר ועדת הערר מחוז ירושלים",
"ערר מספר 9999-25",
"רקע",
"הנכס מצוי ברחוב שמואל הנגיד 10, ירושלים",
"תמצית טענות הצדדים",
"העוררים טוענים שהבנייה חורגת מהתכנית",
"תגובת המשיבה",
"הוועדה המקומית טוענת שהבקשה תואמת",
"ההליכים בפני ועדת הערר",
"קיימנו דיון בנוכחות הצדדים",
"דיון והכרעה",
"לאחר שבחנו את טענות הצדדים בחון מעמיק",
"סוף דבר",
"הערר נדחה",
]:
doc.add_paragraph(text)
doc.save(str(path))
# ── Exporter-style (built-in bookmarks) ──────────────────────────
def test_exporter_output_works_with_reviser(tmp_path: Path) -> None:
src = tmp_path / "exported.docx"
_make_exporter_style_docx(src)
# All 5 bookmarks should be present directly from "export"
bookmarks = list_bookmarks(src)
assert set(bookmarks) >= {"block-alef", "block-bet", "block-heh",
"block-yod", "block-yod-bet"}
out = tmp_path / "revised.docx"
revs = [
Revision(id="r1", type="insert_after", anchor_bookmark="block-yod",
content="תוספת מערכת: פסק הלכה חדש", style="body"),
]
result = apply_tracked_revisions(src, out, revs)
assert result.applied == 1
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
raw_text = "".join(tree.itertext())
assert "תוספת מערכת" in raw_text
# The revision is tracked (inside <w:ins>)
ins_list = tree.findall(".//w:ins", NSMAP)
assert any("תוספת מערכת" in "".join(el.itertext()) for el in ins_list)
# ── User-edited DOCX (no bookmarks) — needs retrofit first ──────
def test_retrofit_then_revise_on_user_edit(tmp_path: Path) -> None:
user_file = tmp_path / "user_edit.docx"
_make_user_edited_docx(user_file)
# Initially no named bookmarks
assert list_bookmarks(user_file) == []
# Retrofit — should detect blocks via heading heuristic
result = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
added = set(result["bookmarks_added"])
# Must include at least block-yod (for common "insert pasak halacha" task)
assert "block-yod" in added
# Plus block-heh (רקע) and block-zayin (תמצית טענות)
assert "block-heh" in added
assert "block-zayin" in added
# Now apply a revision on the retrofitted file
out = tmp_path / "revised.docx"
revs = [Revision(id="r1", type="insert_after",
anchor_bookmark="block-yod",
content="פסק הלכה שהוסף: בבג\"ץ 1/23 נקבע כי...",
style="body")]
rr = apply_tracked_revisions(user_file, out, revs)
assert rr.applied == 1
# Verify output has the insertion inside <w:ins>
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
ins_texts = ["".join(el.itertext()) for el in tree.iterfind(".//w:ins", NSMAP)]
assert any("פסק הלכה שהוסף" in t for t in ins_texts)
def test_retrofit_preserves_original_paragraphs(tmp_path: Path) -> None:
user_file = tmp_path / "user.docx"
_make_user_edited_docx(user_file)
before_doc = Document(str(user_file))
before_texts = [p.text for p in before_doc.paragraphs]
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
after_doc = Document(str(user_file))
after_texts = [p.text for p in after_doc.paragraphs]
# Paragraph texts should be identical (we only added bookmark markers)
assert before_texts == after_texts
def test_idempotent_retrofit_and_revise(tmp_path: Path) -> None:
"""Running retrofit twice + revising should still produce valid output."""
user_file = tmp_path / "user.docx"
_make_user_edited_docx(user_file)
# First retrofit
r1 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
# Second retrofit — should add no new bookmarks
r2 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
assert r2["bookmarks_added"] == []
assert set(r2["existing_bookmarks"]) >= set(r1["bookmarks_added"])
# Then revise works normally
out = tmp_path / "revised.docx"
revs = [Revision(id="r1", type="insert_after",
anchor_bookmark="block-yod", content="x")]
result = apply_tracked_revisions(user_file, out, revs)
assert result.applied == 1
def test_multiple_revisions_all_tracked_independently(tmp_path: Path) -> None:
"""Verify multiple tracked changes each get independent ins ids so
user can Accept/Reject each one separately in Word."""
user_file = tmp_path / "user.docx"
_make_user_edited_docx(user_file)
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
out = tmp_path / "revised.docx"
revs = [
Revision(id="r1", type="insert_after",
anchor_bookmark="block-heh", content="תוספת 1"),
Revision(id="r2", type="insert_after",
anchor_bookmark="block-yod", content="תוספת 2"),
Revision(id="r3", type="insert_before",
anchor_bookmark="block-yod-alef", content="תוספת 3"),
]
result = apply_tracked_revisions(user_file, out, revs)
assert result.applied == 3
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
ins_ids = {el.get(_w("id")) for el in tree.iterfind(".//w:ins", NSMAP)}
assert len(ins_ids) >= 3 # at least one unique id per revision
def test_rtl_preserved_in_tracked_insertion(tmp_path: Path) -> None:
"""Inserted paragraph must have bidi + rtl + David font so it renders
correctly in Word alongside the user's content."""
user_file = tmp_path / "user.docx"
_make_user_edited_docx(user_file)
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
out = tmp_path / "out.docx"
revs = [Revision(id="r1", type="insert_after",
anchor_bookmark="block-yod", content="עברית RTL")]
apply_tracked_revisions(user_file, out, revs)
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
# Find the ins that holds runs
for ins in tree.iterfind(".//w:ins", NSMAP):
runs = ins.findall(".//w:r", NSMAP)
for r in runs:
text_els = r.findall(".//w:t", NSMAP)
if any("עברית RTL" in (t.text or "") for t in text_els):
rPr = r.find(_w("rPr"))
assert rPr is not None
assert rPr.find(_w("rtl")) is not None
rFonts = rPr.find(_w("rFonts"))
assert rFonts is not None
assert rFonts.get(_w("ascii")) == "David"
return
pytest.fail("tracked insertion with 'עברית RTL' not found")