All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m29s
Fixes critical bug in 1033-25: user-uploaded עריכה-*.docx files were
orphaned on disk while exports kept rebuilding from stale DB blocks.
New architecture:
- User-uploaded DOCX becomes the source of truth (cases.active_draft_path)
- System edits via XML surgery with real Word <w:ins>/<w:del> revisions
- User can Accept/Reject each change from within Word
Components:
- docx_reviser.py: XML surgery for Track Changes (15 tests)
- docx_retrofit.py: retroactive bookmark injection with Hebrew marker
detection + heading heuristic (9 tests)
- docx_exporter.py: emits bookmarks around each of the 12 blocks
- 3 new MCP tools: apply_user_edit, list_bookmarks, revise_draft
- 4 new/updated endpoints: upload (auto-registers active draft),
/exports/revise, /exports/bookmarks, /exports/{filename}/retrofit,
/active-draft
- DB migration: cases.active_draft_path column
- UI: correct banner using real v-numbers, "מקור האמת" badge,
detailed upload toast with bookmarks_added/missing_blocks
- agents: legal-exporter (3 export modes), legal-ceo (stage G for
revision handling), legal-writer (revision mode)
Multi-tenancy:
- Works for both CMP (1xxx cases) and CMPA (8xxx/9xxx cases)
- New revise-draft skill added to both companies
- deploy-track-changes.sh syncs skills CMP ↔ CMPA
- retrofit_case.py: one-off retrofit of existing files
Tests: 34 passing (15 reviser + 9 retrofit + 4 exporter bookmarks + 6 e2e)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
343 lines
13 KiB
Python
343 lines
13 KiB
Python
"""בדיקות docx_reviser — Track Changes XML surgery.
|
||
|
||
הבדיקות יוצרות DOCX בסיסי עם bookmarks, מפעילות revisions, ובודקות:
|
||
1. שה-XML שנוצר תקף ונטען חזרה כ-Document
|
||
2. שה-<w:ins> / <w:del> קיימים בפורמט הנכון
|
||
3. שה-bookmarks נשמרים אחרי עריכה
|
||
4. שגופן David ו-RTL נשמרים
|
||
5. שכשלונות מטופלים אלגנטית (bookmark חסר → failed, לא crash)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import zipfile
|
||
from datetime import datetime, timezone
|
||
from io import BytesIO
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
from docx import Document
|
||
from docx.oxml import OxmlElement
|
||
from docx.oxml.ns import qn
|
||
from lxml import etree
|
||
|
||
from legal_mcp.services import docx_reviser
|
||
from legal_mcp.services.docx_reviser import (
|
||
NSMAP,
|
||
Revision,
|
||
_w,
|
||
apply_tracked_revisions,
|
||
list_bookmarks,
|
||
)
|
||
|
||
|
||
# ── Test fixtures ──────────────────────────────────────────────────
|
||
|
||
|
||
def _insert_bookmark(paragraph, name: str, bm_id: int) -> None:
|
||
"""Insert a <w:bookmarkStart> at the start of a paragraph and a
|
||
<w:bookmarkEnd> at the end."""
|
||
p_elem = paragraph._p
|
||
|
||
start = OxmlElement("w:bookmarkStart")
|
||
start.set(qn("w:id"), str(bm_id))
|
||
start.set(qn("w:name"), name)
|
||
p_elem.insert(0, start)
|
||
|
||
end = OxmlElement("w:bookmarkEnd")
|
||
end.set(qn("w:id"), str(bm_id))
|
||
p_elem.append(end)
|
||
|
||
|
||
def _make_sample_docx(path: Path) -> None:
|
||
"""Create a simple DOCX with 3 paragraphs, each with a bookmark."""
|
||
doc = Document()
|
||
for idx, name in enumerate(("block-alef", "block-yod", "block-yod-bet")):
|
||
p = doc.add_paragraph()
|
||
run = p.add_run(f"תוכן פסקה של {name}")
|
||
run.font.name = "David"
|
||
_insert_bookmark(p, name, idx + 1)
|
||
doc.save(str(path))
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_docx(tmp_path: Path) -> Path:
|
||
path = tmp_path / "source.docx"
|
||
_make_sample_docx(path)
|
||
return path
|
||
|
||
|
||
# ── list_bookmarks ────────────────────────────────────────────────
|
||
|
||
|
||
def test_list_bookmarks_returns_all_named(sample_docx: Path) -> None:
|
||
names = list_bookmarks(sample_docx)
|
||
assert set(names) == {"block-alef", "block-yod", "block-yod-bet"}
|
||
|
||
|
||
def test_list_bookmarks_excludes_internal(tmp_path: Path) -> None:
|
||
"""Bookmarks starting with '_' (like _GoBack) should be filtered out."""
|
||
path = tmp_path / "internal.docx"
|
||
doc = Document()
|
||
p1 = doc.add_paragraph("visible")
|
||
_insert_bookmark(p1, "block-real", 1)
|
||
p2 = doc.add_paragraph("hidden")
|
||
_insert_bookmark(p2, "_GoBack", 2)
|
||
doc.save(str(path))
|
||
|
||
names = list_bookmarks(path)
|
||
assert names == ["block-real"]
|
||
|
||
|
||
# ── apply_tracked_revisions: insert_after ─────────────────────────
|
||
|
||
|
||
def test_insert_after_adds_tracked_paragraph(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(
|
||
id="r1",
|
||
type="insert_after",
|
||
anchor_bookmark="block-yod",
|
||
content="פסקה חדשה שהמערכת מוסיפה.",
|
||
)
|
||
result = apply_tracked_revisions(
|
||
sample_docx, out, [rev],
|
||
author="מערכת AI",
|
||
date=datetime(2026, 4, 16, 14, 0, tzinfo=timezone.utc),
|
||
)
|
||
assert result.applied == 1
|
||
assert result.failed == 0
|
||
assert out.exists()
|
||
|
||
# Verify <w:ins> present in document.xml
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
doc_xml = zf.read("word/document.xml")
|
||
tree = etree.fromstring(doc_xml)
|
||
ins_elements = tree.findall(".//w:ins", NSMAP)
|
||
assert len(ins_elements) >= 1
|
||
# Verify the content is there
|
||
all_text = "".join(tree.itertext())
|
||
assert "פסקה חדשה שהמערכת מוסיפה." in all_text
|
||
# Verify original content preserved
|
||
assert "תוכן פסקה של block-yod" in all_text
|
||
|
||
|
||
def _find_ins_with_runs(tree: etree._Element) -> etree._Element | None:
|
||
"""Pick the <w:ins> that actually wraps runs (not the pilcrow-marker one)."""
|
||
for ins in tree.iterfind(".//w:ins", NSMAP):
|
||
if ins.find(".//w:r", NSMAP) is not None:
|
||
return ins
|
||
return None
|
||
|
||
|
||
def test_insert_after_ins_has_author_and_date(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(id="r1", type="insert_after",
|
||
anchor_bookmark="block-alef", content="test")
|
||
apply_tracked_revisions(sample_docx, out, [rev], author="דפנה")
|
||
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
doc_xml = zf.read("word/document.xml")
|
||
tree = etree.fromstring(doc_xml)
|
||
ins = _find_ins_with_runs(tree)
|
||
assert ins is not None
|
||
assert ins.get(_w("author")) == "דפנה"
|
||
date_str = ins.get(_w("date"))
|
||
assert date_str is not None
|
||
assert date_str.endswith("Z") # ISO 8601 UTC
|
||
|
||
|
||
def test_insert_after_uses_rtl_and_david(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(id="r1", type="insert_after",
|
||
anchor_bookmark="block-alef", content="מוסף")
|
||
apply_tracked_revisions(sample_docx, out, [rev])
|
||
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||
|
||
ins = _find_ins_with_runs(tree)
|
||
assert ins is not None
|
||
run = ins.find(".//w:r", NSMAP)
|
||
assert run is not None
|
||
rPr = run.find(_w("rPr"))
|
||
assert rPr is not None
|
||
assert rPr.find(_w("rtl")) is not None
|
||
rFonts = rPr.find(_w("rFonts"))
|
||
assert rFonts is not None
|
||
assert rFonts.get(_w("ascii")) == "David"
|
||
|
||
|
||
# ── apply_tracked_revisions: insert_before ────────────────────────
|
||
|
||
|
||
def test_insert_before_places_above_anchor(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(id="r1", type="insert_before",
|
||
anchor_bookmark="block-yod", content="לפני י.")
|
||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||
assert result.applied == 1
|
||
|
||
# Order check: new paragraph's text must appear before "block-yod"
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||
paragraphs = tree.findall(".//w:p", NSMAP)
|
||
texts = ["".join(p.itertext()) for p in paragraphs]
|
||
idx_new = next(i for i, t in enumerate(texts) if "לפני י." in t)
|
||
idx_yod = next(i for i, t in enumerate(texts) if "תוכן פסקה של block-yod" in t)
|
||
assert idx_new < idx_yod
|
||
|
||
|
||
# ── apply_tracked_revisions: delete ───────────────────────────────
|
||
|
||
|
||
def test_delete_wraps_runs_in_w_del(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(id="r1", type="delete", anchor_bookmark="block-yod", content="")
|
||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||
assert result.applied == 1
|
||
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||
dels = tree.findall(".//w:del", NSMAP)
|
||
assert len(dels) >= 1
|
||
# Inside w:del, text elements must become w:delText
|
||
del_texts = dels[0].findall(".//w:delText", NSMAP)
|
||
assert any("block-yod" in (t.text or "") for t in del_texts)
|
||
|
||
|
||
# ── apply_tracked_revisions: replace ─────────────────────────────
|
||
|
||
|
||
def test_replace_creates_both_ins_and_del(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(id="r1", type="replace",
|
||
anchor_bookmark="block-yod", content="תוכן חדש לחלוטין")
|
||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||
assert result.applied == 1
|
||
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||
assert len(tree.findall(".//w:ins", NSMAP)) >= 1
|
||
assert len(tree.findall(".//w:del", NSMAP)) >= 1
|
||
|
||
|
||
# ── Failure modes ─────────────────────────────────────────────────
|
||
|
||
|
||
def test_missing_bookmark_returns_failed_not_crash(
|
||
sample_docx: Path, tmp_path: Path,
|
||
) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(id="r1", type="insert_after",
|
||
anchor_bookmark="does-not-exist", content="x")
|
||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||
assert result.applied == 0
|
||
assert result.failed == 1
|
||
assert result.results[0].status == "failed"
|
||
assert "not found" in (result.results[0].error or "")
|
||
# Output file still produced (unchanged copy)
|
||
assert out.exists()
|
||
|
||
|
||
def test_empty_revisions_list_produces_copy(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
result = apply_tracked_revisions(sample_docx, out, [])
|
||
assert result.applied == 0
|
||
assert result.failed == 0
|
||
assert out.exists()
|
||
# bookmarks should still be there
|
||
assert set(list_bookmarks(out)) == {"block-alef", "block-yod", "block-yod-bet"}
|
||
|
||
|
||
# ── Track revisions flag in settings ──────────────────────────────
|
||
|
||
|
||
def test_track_revisions_flag_is_enabled(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(id="r1", type="insert_after",
|
||
anchor_bookmark="block-alef", content="x")
|
||
apply_tracked_revisions(sample_docx, out, [rev])
|
||
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
settings_xml = zf.read("word/settings.xml")
|
||
settings_tree = etree.fromstring(settings_xml)
|
||
tr = settings_tree.find(_w("trackRevisions"))
|
||
assert tr is not None
|
||
|
||
|
||
# ── Multiple revisions with unique IDs ────────────────────────────
|
||
|
||
|
||
def test_multiple_revisions_get_unique_ids(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
revs = [
|
||
Revision(id="r1", type="insert_after",
|
||
anchor_bookmark="block-alef", content="ראשון"),
|
||
Revision(id="r2", type="insert_after",
|
||
anchor_bookmark="block-yod", content="שני"),
|
||
Revision(id="r3", type="delete", anchor_bookmark="block-yod-bet"),
|
||
]
|
||
result = apply_tracked_revisions(sample_docx, out, revs)
|
||
assert result.applied == 3
|
||
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||
all_ids: list[str] = []
|
||
for xpath in (".//w:ins", ".//w:del"):
|
||
for el in tree.iterfind(xpath, NSMAP):
|
||
wid = el.get(_w("id"))
|
||
if wid:
|
||
all_ids.append(wid)
|
||
assert len(all_ids) == len(set(all_ids)), f"duplicate IDs: {all_ids}"
|
||
|
||
|
||
# ── DOCX remains openable as Document ─────────────────────────────
|
||
|
||
|
||
def test_output_docx_is_openable_by_python_docx(
|
||
sample_docx: Path, tmp_path: Path,
|
||
) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(id="r1", type="insert_after",
|
||
anchor_bookmark="block-yod", content="תוכן חדש")
|
||
apply_tracked_revisions(sample_docx, out, [rev])
|
||
# Must be openable as a valid DOCX by python-docx (no exceptions)
|
||
doc = Document(str(out))
|
||
# Original text is still accessible via python-docx
|
||
all_text = "\n".join(p.text for p in doc.paragraphs)
|
||
assert "block-yod" in all_text
|
||
|
||
# Inserted (tracked) text is present in the raw XML via itertext
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||
raw_text = "".join(tree.itertext())
|
||
assert "תוכן חדש" in raw_text
|
||
|
||
|
||
# ── Bookmarks preserved through revisions ─────────────────────────
|
||
|
||
|
||
def test_bookmarks_preserved_after_insert(sample_docx: Path, tmp_path: Path) -> None:
|
||
out = tmp_path / "out.docx"
|
||
rev = Revision(id="r1", type="insert_after",
|
||
anchor_bookmark="block-yod", content="x")
|
||
apply_tracked_revisions(sample_docx, out, [rev])
|
||
names = list_bookmarks(out)
|
||
assert set(names) == {"block-alef", "block-yod", "block-yod-bet"}
|
||
|
||
|
||
# ── Idempotency of loading/saving without changes ────────────────
|
||
|
||
|
||
def test_save_without_revisions_preserves_content(
|
||
sample_docx: Path, tmp_path: Path,
|
||
) -> None:
|
||
out = tmp_path / "out.docx"
|
||
apply_tracked_revisions(sample_docx, out, [])
|
||
doc_orig = Document(str(sample_docx))
|
||
doc_new = Document(str(out))
|
||
orig_text = [p.text for p in doc_orig.paragraphs]
|
||
new_text = [p.text for p in doc_new.paragraphs]
|
||
assert orig_text == new_text
|