Add Track Changes architecture for draft revisions (CMP + CMPA)
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m29s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m29s
Fixes critical bug in 1033-25: user-uploaded עריכה-*.docx files were
orphaned on disk while exports kept rebuilding from stale DB blocks.
New architecture:
- User-uploaded DOCX becomes the source of truth (cases.active_draft_path)
- System edits via XML surgery with real Word <w:ins>/<w:del> revisions
- User can Accept/Reject each change from within Word
Components:
- docx_reviser.py: XML surgery for Track Changes (15 tests)
- docx_retrofit.py: retroactive bookmark injection with Hebrew marker
detection + heading heuristic (9 tests)
- docx_exporter.py: emits bookmarks around each of the 12 blocks
- 3 new MCP tools: apply_user_edit, list_bookmarks, revise_draft
- 4 new/updated endpoints: upload (auto-registers active draft),
/exports/revise, /exports/bookmarks, /exports/{filename}/retrofit,
/active-draft
- DB migration: cases.active_draft_path column
- UI: correct banner using real v-numbers, "מקור האמת" badge,
detailed upload toast with bookmarks_added/missing_blocks
- agents: legal-exporter (3 export modes), legal-ceo (stage G for
revision handling), legal-writer (revision mode)
Multi-tenancy:
- Works for both CMP (1xxx cases) and CMPA (8xxx/9xxx cases)
- New revise-draft skill added to both companies
- deploy-track-changes.sh syncs skills CMP ↔ CMPA
- retrofit_case.py: one-off retrofit of existing files
Tests: 34 passing (15 reviser + 9 retrofit + 4 exporter bookmarks + 6 e2e)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
0
mcp-server/tests/__init__.py
Normal file
0
mcp-server/tests/__init__.py
Normal file
103
mcp-server/tests/test_docx_exporter_bookmarks.py
Normal file
103
mcp-server/tests/test_docx_exporter_bookmarks.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""בדיקות ל-bookmark helpers ב-docx_exporter.
|
||||
|
||||
הבדיקות מתרכזות ב-helper functions בלבד (לא בכל ה-export flow שדורש DB).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
from lxml import etree
|
||||
|
||||
from legal_mcp.services.docx_exporter import (
|
||||
_BOOKMARK_ID_START,
|
||||
_insert_bookmark_end,
|
||||
_insert_bookmark_start,
|
||||
_wrap_block_with_bookmarks,
|
||||
)
|
||||
from legal_mcp.services.docx_reviser import NSMAP, _w, list_bookmarks
|
||||
|
||||
|
||||
def test_insert_bookmark_helpers_create_valid_xml(tmp_path: Path) -> None:
|
||||
doc = Document()
|
||||
p = doc.add_paragraph("תוכן בלוק י")
|
||||
_insert_bookmark_start(p, "block-yod", 10001)
|
||||
_insert_bookmark_end(p, 10001)
|
||||
|
||||
out = tmp_path / "out.docx"
|
||||
doc.save(str(out))
|
||||
|
||||
# Verify via list_bookmarks (uses the same XML)
|
||||
assert list_bookmarks(out) == ["block-yod"]
|
||||
|
||||
|
||||
def test_wrap_block_with_bookmarks_wraps_multiple_paragraphs(tmp_path: Path) -> None:
|
||||
doc = Document()
|
||||
doc.add_paragraph("ראשון — לפני") # noise before
|
||||
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
|
||||
def writer() -> None:
|
||||
doc.add_paragraph("בלוק — פסקה 1")
|
||||
doc.add_paragraph("בלוק — פסקה 2")
|
||||
doc.add_paragraph("בלוק — פסקה 3")
|
||||
|
||||
_wrap_block_with_bookmarks(doc, "block-yod", writer, bm_counter)
|
||||
doc.add_paragraph("אחרי — אחרון") # noise after
|
||||
|
||||
out = tmp_path / "out.docx"
|
||||
doc.save(str(out))
|
||||
|
||||
# The bookmark should wrap exactly the 3 middle paragraphs
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
|
||||
paragraphs = tree.findall(".//w:p", NSMAP)
|
||||
# Find para index of bookmarkStart and bookmarkEnd
|
||||
start_idx = end_idx = None
|
||||
for i, p in enumerate(paragraphs):
|
||||
if p.find(".//w:bookmarkStart", NSMAP) is not None:
|
||||
start_idx = i
|
||||
if p.find(".//w:bookmarkEnd", NSMAP) is not None:
|
||||
end_idx = i
|
||||
assert start_idx is not None
|
||||
assert end_idx is not None
|
||||
# The paragraph containing start must be the first new one ("פסקה 1")
|
||||
start_text = "".join(paragraphs[start_idx].itertext())
|
||||
end_text = "".join(paragraphs[end_idx].itertext())
|
||||
assert "פסקה 1" in start_text
|
||||
assert "פסקה 3" in end_text
|
||||
|
||||
|
||||
def test_wrap_block_skipped_when_writer_adds_nothing(tmp_path: Path) -> None:
|
||||
doc = Document()
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
_wrap_block_with_bookmarks(doc, "block-empty", lambda: None, bm_counter)
|
||||
out = tmp_path / "out.docx"
|
||||
doc.save(str(out))
|
||||
assert list_bookmarks(out) == []
|
||||
|
||||
|
||||
def test_multiple_blocks_get_unique_bookmark_ids(tmp_path: Path) -> None:
|
||||
doc = Document()
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
for name in ("block-alef", "block-bet", "block-gimel"):
|
||||
_wrap_block_with_bookmarks(
|
||||
doc, name,
|
||||
lambda n=name: doc.add_paragraph(f"תוכן של {n}"),
|
||||
bm_counter,
|
||||
)
|
||||
out = tmp_path / "out.docx"
|
||||
doc.save(str(out))
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
|
||||
ids = [el.get(_w("id")) for el in tree.iterfind(".//w:bookmarkStart", NSMAP)]
|
||||
assert len(ids) == 3
|
||||
assert len(set(ids)) == 3
|
||||
|
||||
names = list_bookmarks(out)
|
||||
assert set(names) == {"block-alef", "block-bet", "block-gimel"}
|
||||
141
mcp-server/tests/test_docx_retrofit.py
Normal file
141
mcp-server/tests/test_docx_retrofit.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""בדיקות docx_retrofit — הזרקת bookmarks רטרואקטיבית."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
|
||||
from legal_mcp.services.docx_retrofit import (
|
||||
BLOCK_ORDER,
|
||||
retrofit_bookmarks,
|
||||
)
|
||||
from legal_mcp.services.docx_reviser import list_bookmarks
|
||||
|
||||
|
||||
def _make_docx_with_hebrew_blocks(path: Path, markers: list[str]) -> None:
|
||||
"""Create a DOCX where each paragraph starts with a Hebrew block marker."""
|
||||
doc = Document()
|
||||
for marker in markers:
|
||||
doc.add_paragraph(f"{marker}. תוכן הבלוק שמתחיל ב-{marker}")
|
||||
doc.add_paragraph(f"עוד פסקה בבלוק {marker}")
|
||||
doc.save(str(path))
|
||||
|
||||
|
||||
def test_retrofit_detects_all_standard_blocks(tmp_path: Path) -> None:
|
||||
src = tmp_path / "src.docx"
|
||||
_make_docx_with_hebrew_blocks(
|
||||
src, ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "יא", "יב"],
|
||||
)
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert len(result["bookmarks_added"]) == 12
|
||||
assert result["missing_blocks"] == []
|
||||
|
||||
names = list_bookmarks(src)
|
||||
expected = {name for name, _ in BLOCK_ORDER}
|
||||
assert set(names) == expected
|
||||
|
||||
|
||||
def test_retrofit_reports_missing_blocks(tmp_path: Path) -> None:
|
||||
src = tmp_path / "src.docx"
|
||||
# Only 4 blocks present
|
||||
_make_docx_with_hebrew_blocks(src, ["א", "ב", "ג", "ד"])
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert result["bookmarks_added"] == [
|
||||
"block-alef", "block-bet", "block-gimel", "block-dalet",
|
||||
]
|
||||
assert "block-heh" in result["missing_blocks"]
|
||||
assert "block-yod-bet" in result["missing_blocks"]
|
||||
|
||||
|
||||
def test_retrofit_distinguishes_yod_from_yod_alef_yod_bet(tmp_path: Path) -> None:
|
||||
"""י, יא, יב must all be distinguished — longer markers win."""
|
||||
src = tmp_path / "src.docx"
|
||||
_make_docx_with_hebrew_blocks(src, ["ט", "י", "יא", "יב"])
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert set(result["bookmarks_added"]) == {
|
||||
"block-tet", "block-yod", "block-yod-alef", "block-yod-bet",
|
||||
}
|
||||
|
||||
|
||||
def test_retrofit_skips_existing_bookmarks(tmp_path: Path) -> None:
|
||||
"""Running retrofit twice doesn't duplicate bookmarks."""
|
||||
src = tmp_path / "src.docx"
|
||||
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
|
||||
|
||||
first = retrofit_bookmarks(src, backup=False)
|
||||
assert first["bookmarks_added"] == ["block-alef", "block-bet"]
|
||||
|
||||
second = retrofit_bookmarks(src, backup=False)
|
||||
assert second["bookmarks_added"] == [] # nothing new
|
||||
assert set(second["existing_bookmarks"]) == {"block-alef", "block-bet"}
|
||||
|
||||
# Final document should still have exactly 2 bookmarks
|
||||
assert set(list_bookmarks(src)) == {"block-alef", "block-bet"}
|
||||
|
||||
|
||||
def test_retrofit_creates_backup(tmp_path: Path) -> None:
|
||||
src = tmp_path / "file.docx"
|
||||
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
|
||||
retrofit_bookmarks(src) # backup=True (default)
|
||||
backup = src.with_suffix(".pre-retrofit.docx")
|
||||
assert backup.exists()
|
||||
|
||||
|
||||
def test_retrofit_to_different_output_path_no_backup(tmp_path: Path) -> None:
|
||||
src = tmp_path / "src.docx"
|
||||
out = tmp_path / "out.docx"
|
||||
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
|
||||
retrofit_bookmarks(src, output_path=out)
|
||||
# source untouched
|
||||
assert list_bookmarks(src) == []
|
||||
# output has bookmarks
|
||||
assert set(list_bookmarks(out)) == {"block-alef", "block-bet"}
|
||||
|
||||
|
||||
def test_retrofit_ignores_marker_in_middle_of_text(tmp_path: Path) -> None:
|
||||
"""A lone 'י' inside body text (not at start) should not be detected as block."""
|
||||
src = tmp_path / "src.docx"
|
||||
doc = Document()
|
||||
doc.add_paragraph("א. תחילת הבלוק")
|
||||
doc.add_paragraph("טקסט עם האות י לא בתחילת שורה, זה לא בלוק.")
|
||||
doc.add_paragraph("ב. בלוק שני")
|
||||
doc.save(str(src))
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert "block-alef" in result["bookmarks_added"]
|
||||
assert "block-bet" in result["bookmarks_added"]
|
||||
# 'block-yod' should NOT be detected
|
||||
assert "block-yod" not in result["bookmarks_added"]
|
||||
|
||||
|
||||
def test_retrofit_out_of_order_markers_picks_forward_only(tmp_path: Path) -> None:
|
||||
"""If a later-ordered marker appears first, earlier ones are treated as missing.
|
||||
|
||||
Scanner advances forward through BLOCK_ORDER — it won't go back to claim
|
||||
an earlier marker after already seeing a later one.
|
||||
"""
|
||||
src = tmp_path / "src.docx"
|
||||
doc = Document()
|
||||
doc.add_paragraph("ב. מופיע ראשון")
|
||||
doc.add_paragraph("א. מופיע אחרי — יידחה כי 'א' לפני 'ב'")
|
||||
doc.add_paragraph("ג. בלוק גימל")
|
||||
doc.save(str(src))
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert "block-bet" in result["bookmarks_added"]
|
||||
assert "block-gimel" in result["bookmarks_added"]
|
||||
# 'א' was not detected (the first paragraph was 'ב' — scanner advanced past א)
|
||||
assert "block-alef" in result["missing_blocks"]
|
||||
|
||||
|
||||
def test_retrofit_empty_document_reports_all_missing(tmp_path: Path) -> None:
|
||||
src = tmp_path / "empty.docx"
|
||||
doc = Document()
|
||||
doc.save(str(src))
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert result["bookmarks_added"] == []
|
||||
assert len(result["missing_blocks"]) == 12
|
||||
342
mcp-server/tests/test_docx_reviser.py
Normal file
342
mcp-server/tests/test_docx_reviser.py
Normal file
@@ -0,0 +1,342 @@
|
||||
"""בדיקות docx_reviser — Track Changes XML surgery.
|
||||
|
||||
הבדיקות יוצרות DOCX בסיסי עם bookmarks, מפעילות revisions, ובודקות:
|
||||
1. שה-XML שנוצר תקף ונטען חזרה כ-Document
|
||||
2. שה-<w:ins> / <w:del> קיימים בפורמט הנכון
|
||||
3. שה-bookmarks נשמרים אחרי עריכה
|
||||
4. שגופן David ו-RTL נשמרים
|
||||
5. שכשלונות מטופלים אלגנטית (bookmark חסר → failed, לא crash)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import zipfile
|
||||
from datetime import datetime, timezone
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from docx import Document
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
from lxml import etree
|
||||
|
||||
from legal_mcp.services import docx_reviser
|
||||
from legal_mcp.services.docx_reviser import (
|
||||
NSMAP,
|
||||
Revision,
|
||||
_w,
|
||||
apply_tracked_revisions,
|
||||
list_bookmarks,
|
||||
)
|
||||
|
||||
|
||||
# ── Test fixtures ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _insert_bookmark(paragraph, name: str, bm_id: int) -> None:
|
||||
"""Insert a <w:bookmarkStart> at the start of a paragraph and a
|
||||
<w:bookmarkEnd> at the end."""
|
||||
p_elem = paragraph._p
|
||||
|
||||
start = OxmlElement("w:bookmarkStart")
|
||||
start.set(qn("w:id"), str(bm_id))
|
||||
start.set(qn("w:name"), name)
|
||||
p_elem.insert(0, start)
|
||||
|
||||
end = OxmlElement("w:bookmarkEnd")
|
||||
end.set(qn("w:id"), str(bm_id))
|
||||
p_elem.append(end)
|
||||
|
||||
|
||||
def _make_sample_docx(path: Path) -> None:
|
||||
"""Create a simple DOCX with 3 paragraphs, each with a bookmark."""
|
||||
doc = Document()
|
||||
for idx, name in enumerate(("block-alef", "block-yod", "block-yod-bet")):
|
||||
p = doc.add_paragraph()
|
||||
run = p.add_run(f"תוכן פסקה של {name}")
|
||||
run.font.name = "David"
|
||||
_insert_bookmark(p, name, idx + 1)
|
||||
doc.save(str(path))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_docx(tmp_path: Path) -> Path:
|
||||
path = tmp_path / "source.docx"
|
||||
_make_sample_docx(path)
|
||||
return path
|
||||
|
||||
|
||||
# ── list_bookmarks ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_list_bookmarks_returns_all_named(sample_docx: Path) -> None:
|
||||
names = list_bookmarks(sample_docx)
|
||||
assert set(names) == {"block-alef", "block-yod", "block-yod-bet"}
|
||||
|
||||
|
||||
def test_list_bookmarks_excludes_internal(tmp_path: Path) -> None:
|
||||
"""Bookmarks starting with '_' (like _GoBack) should be filtered out."""
|
||||
path = tmp_path / "internal.docx"
|
||||
doc = Document()
|
||||
p1 = doc.add_paragraph("visible")
|
||||
_insert_bookmark(p1, "block-real", 1)
|
||||
p2 = doc.add_paragraph("hidden")
|
||||
_insert_bookmark(p2, "_GoBack", 2)
|
||||
doc.save(str(path))
|
||||
|
||||
names = list_bookmarks(path)
|
||||
assert names == ["block-real"]
|
||||
|
||||
|
||||
# ── apply_tracked_revisions: insert_after ─────────────────────────
|
||||
|
||||
|
||||
def test_insert_after_adds_tracked_paragraph(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(
|
||||
id="r1",
|
||||
type="insert_after",
|
||||
anchor_bookmark="block-yod",
|
||||
content="פסקה חדשה שהמערכת מוסיפה.",
|
||||
)
|
||||
result = apply_tracked_revisions(
|
||||
sample_docx, out, [rev],
|
||||
author="מערכת AI",
|
||||
date=datetime(2026, 4, 16, 14, 0, tzinfo=timezone.utc),
|
||||
)
|
||||
assert result.applied == 1
|
||||
assert result.failed == 0
|
||||
assert out.exists()
|
||||
|
||||
# Verify <w:ins> present in document.xml
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
doc_xml = zf.read("word/document.xml")
|
||||
tree = etree.fromstring(doc_xml)
|
||||
ins_elements = tree.findall(".//w:ins", NSMAP)
|
||||
assert len(ins_elements) >= 1
|
||||
# Verify the content is there
|
||||
all_text = "".join(tree.itertext())
|
||||
assert "פסקה חדשה שהמערכת מוסיפה." in all_text
|
||||
# Verify original content preserved
|
||||
assert "תוכן פסקה של block-yod" in all_text
|
||||
|
||||
|
||||
def _find_ins_with_runs(tree: etree._Element) -> etree._Element | None:
|
||||
"""Pick the <w:ins> that actually wraps runs (not the pilcrow-marker one)."""
|
||||
for ins in tree.iterfind(".//w:ins", NSMAP):
|
||||
if ins.find(".//w:r", NSMAP) is not None:
|
||||
return ins
|
||||
return None
|
||||
|
||||
|
||||
def test_insert_after_ins_has_author_and_date(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-alef", content="test")
|
||||
apply_tracked_revisions(sample_docx, out, [rev], author="דפנה")
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
doc_xml = zf.read("word/document.xml")
|
||||
tree = etree.fromstring(doc_xml)
|
||||
ins = _find_ins_with_runs(tree)
|
||||
assert ins is not None
|
||||
assert ins.get(_w("author")) == "דפנה"
|
||||
date_str = ins.get(_w("date"))
|
||||
assert date_str is not None
|
||||
assert date_str.endswith("Z") # ISO 8601 UTC
|
||||
|
||||
|
||||
def test_insert_after_uses_rtl_and_david(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-alef", content="מוסף")
|
||||
apply_tracked_revisions(sample_docx, out, [rev])
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
|
||||
ins = _find_ins_with_runs(tree)
|
||||
assert ins is not None
|
||||
run = ins.find(".//w:r", NSMAP)
|
||||
assert run is not None
|
||||
rPr = run.find(_w("rPr"))
|
||||
assert rPr is not None
|
||||
assert rPr.find(_w("rtl")) is not None
|
||||
rFonts = rPr.find(_w("rFonts"))
|
||||
assert rFonts is not None
|
||||
assert rFonts.get(_w("ascii")) == "David"
|
||||
|
||||
|
||||
# ── apply_tracked_revisions: insert_before ────────────────────────
|
||||
|
||||
|
||||
def test_insert_before_places_above_anchor(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_before",
|
||||
anchor_bookmark="block-yod", content="לפני י.")
|
||||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||||
assert result.applied == 1
|
||||
|
||||
# Order check: new paragraph's text must appear before "block-yod"
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
paragraphs = tree.findall(".//w:p", NSMAP)
|
||||
texts = ["".join(p.itertext()) for p in paragraphs]
|
||||
idx_new = next(i for i, t in enumerate(texts) if "לפני י." in t)
|
||||
idx_yod = next(i for i, t in enumerate(texts) if "תוכן פסקה של block-yod" in t)
|
||||
assert idx_new < idx_yod
|
||||
|
||||
|
||||
# ── apply_tracked_revisions: delete ───────────────────────────────
|
||||
|
||||
|
||||
def test_delete_wraps_runs_in_w_del(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="delete", anchor_bookmark="block-yod", content="")
|
||||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||||
assert result.applied == 1
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
dels = tree.findall(".//w:del", NSMAP)
|
||||
assert len(dels) >= 1
|
||||
# Inside w:del, text elements must become w:delText
|
||||
del_texts = dels[0].findall(".//w:delText", NSMAP)
|
||||
assert any("block-yod" in (t.text or "") for t in del_texts)
|
||||
|
||||
|
||||
# ── apply_tracked_revisions: replace ─────────────────────────────
|
||||
|
||||
|
||||
def test_replace_creates_both_ins_and_del(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="replace",
|
||||
anchor_bookmark="block-yod", content="תוכן חדש לחלוטין")
|
||||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||||
assert result.applied == 1
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
assert len(tree.findall(".//w:ins", NSMAP)) >= 1
|
||||
assert len(tree.findall(".//w:del", NSMAP)) >= 1
|
||||
|
||||
|
||||
# ── Failure modes ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_missing_bookmark_returns_failed_not_crash(
|
||||
sample_docx: Path, tmp_path: Path,
|
||||
) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="does-not-exist", content="x")
|
||||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||||
assert result.applied == 0
|
||||
assert result.failed == 1
|
||||
assert result.results[0].status == "failed"
|
||||
assert "not found" in (result.results[0].error or "")
|
||||
# Output file still produced (unchanged copy)
|
||||
assert out.exists()
|
||||
|
||||
|
||||
def test_empty_revisions_list_produces_copy(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
result = apply_tracked_revisions(sample_docx, out, [])
|
||||
assert result.applied == 0
|
||||
assert result.failed == 0
|
||||
assert out.exists()
|
||||
# bookmarks should still be there
|
||||
assert set(list_bookmarks(out)) == {"block-alef", "block-yod", "block-yod-bet"}
|
||||
|
||||
|
||||
# ── Track revisions flag in settings ──────────────────────────────
|
||||
|
||||
|
||||
def test_track_revisions_flag_is_enabled(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-alef", content="x")
|
||||
apply_tracked_revisions(sample_docx, out, [rev])
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
settings_xml = zf.read("word/settings.xml")
|
||||
settings_tree = etree.fromstring(settings_xml)
|
||||
tr = settings_tree.find(_w("trackRevisions"))
|
||||
assert tr is not None
|
||||
|
||||
|
||||
# ── Multiple revisions with unique IDs ────────────────────────────
|
||||
|
||||
|
||||
def test_multiple_revisions_get_unique_ids(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
revs = [
|
||||
Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-alef", content="ראשון"),
|
||||
Revision(id="r2", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="שני"),
|
||||
Revision(id="r3", type="delete", anchor_bookmark="block-yod-bet"),
|
||||
]
|
||||
result = apply_tracked_revisions(sample_docx, out, revs)
|
||||
assert result.applied == 3
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
all_ids: list[str] = []
|
||||
for xpath in (".//w:ins", ".//w:del"):
|
||||
for el in tree.iterfind(xpath, NSMAP):
|
||||
wid = el.get(_w("id"))
|
||||
if wid:
|
||||
all_ids.append(wid)
|
||||
assert len(all_ids) == len(set(all_ids)), f"duplicate IDs: {all_ids}"
|
||||
|
||||
|
||||
# ── DOCX remains openable as Document ─────────────────────────────
|
||||
|
||||
|
||||
def test_output_docx_is_openable_by_python_docx(
|
||||
sample_docx: Path, tmp_path: Path,
|
||||
) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="תוכן חדש")
|
||||
apply_tracked_revisions(sample_docx, out, [rev])
|
||||
# Must be openable as a valid DOCX by python-docx (no exceptions)
|
||||
doc = Document(str(out))
|
||||
# Original text is still accessible via python-docx
|
||||
all_text = "\n".join(p.text for p in doc.paragraphs)
|
||||
assert "block-yod" in all_text
|
||||
|
||||
# Inserted (tracked) text is present in the raw XML via itertext
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
raw_text = "".join(tree.itertext())
|
||||
assert "תוכן חדש" in raw_text
|
||||
|
||||
|
||||
# ── Bookmarks preserved through revisions ─────────────────────────
|
||||
|
||||
|
||||
def test_bookmarks_preserved_after_insert(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="x")
|
||||
apply_tracked_revisions(sample_docx, out, [rev])
|
||||
names = list_bookmarks(out)
|
||||
assert set(names) == {"block-alef", "block-yod", "block-yod-bet"}
|
||||
|
||||
|
||||
# ── Idempotency of loading/saving without changes ────────────────
|
||||
|
||||
|
||||
def test_save_without_revisions_preserves_content(
|
||||
sample_docx: Path, tmp_path: Path,
|
||||
) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
apply_tracked_revisions(sample_docx, out, [])
|
||||
doc_orig = Document(str(sample_docx))
|
||||
doc_new = Document(str(out))
|
||||
orig_text = [p.text for p in doc_orig.paragraphs]
|
||||
new_text = [p.text for p in doc_new.paragraphs]
|
||||
assert orig_text == new_text
|
||||
237
mcp-server/tests/test_track_changes_e2e.py
Normal file
237
mcp-server/tests/test_track_changes_e2e.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""בדיקות end-to-end לזרימה המלאה: exporter → retrofit → reviser.
|
||||
|
||||
הבדיקות האלה מחברות את כל השכבות של ארכיטקטורת Track Changes ומוודאות
|
||||
שהזרימה עובדת על מסמכים שנוצרו על-ידי ה-exporter עצמו (בלוקים עם bookmarks
|
||||
מובנים) ועל מסמכים רגילים שעברו retrofit.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import zipfile
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from docx import Document
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
from lxml import etree
|
||||
|
||||
from legal_mcp.services import docx_retrofit, docx_reviser
|
||||
from legal_mcp.services.docx_exporter import (
|
||||
_BOOKMARK_ID_START,
|
||||
_wrap_block_with_bookmarks,
|
||||
)
|
||||
from legal_mcp.services.docx_reviser import (
|
||||
NSMAP,
|
||||
Revision,
|
||||
_w,
|
||||
apply_tracked_revisions,
|
||||
list_bookmarks,
|
||||
)
|
||||
|
||||
|
||||
# ── Helpers ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _make_exporter_style_docx(path: Path) -> None:
|
||||
"""Simulate what docx_exporter produces: paragraphs wrapped in bookmarks
|
||||
for each of the 12 blocks, with David font and RTL."""
|
||||
doc = Document()
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
|
||||
blocks = [
|
||||
("block-alef", "בפני: דפנה תמיר, יו\"ר ועדת הערר"),
|
||||
("block-bet", "ערר מספר 1033-25"),
|
||||
("block-heh", "רקע\nהנכס מצוי ברחוב הר בשן"),
|
||||
("block-yod", "דיון והכרעה\nלאחר שבחנו את טענות הצדדים"),
|
||||
("block-yod-bet", "ההחלטה\nהערר מתקבל בחלקו"),
|
||||
]
|
||||
|
||||
for name, content in blocks:
|
||||
def writer(c=content):
|
||||
for line in c.split("\n"):
|
||||
if line.strip():
|
||||
doc.add_paragraph(line.strip())
|
||||
_wrap_block_with_bookmarks(doc, name, writer, bm_counter)
|
||||
|
||||
doc.save(str(path))
|
||||
|
||||
|
||||
def _make_user_edited_docx(path: Path) -> None:
|
||||
"""Simulate what a user produces by editing in Word: no bookmarks,
|
||||
heading-style paragraphs in Daphna style."""
|
||||
doc = Document()
|
||||
for text in [
|
||||
"בפני: דפנה תמיר, יו\"ר ועדת הערר מחוז ירושלים",
|
||||
"ערר מספר 9999-25",
|
||||
"רקע",
|
||||
"הנכס מצוי ברחוב שמואל הנגיד 10, ירושלים",
|
||||
"תמצית טענות הצדדים",
|
||||
"העוררים טוענים שהבנייה חורגת מהתכנית",
|
||||
"תגובת המשיבה",
|
||||
"הוועדה המקומית טוענת שהבקשה תואמת",
|
||||
"ההליכים בפני ועדת הערר",
|
||||
"קיימנו דיון בנוכחות הצדדים",
|
||||
"דיון והכרעה",
|
||||
"לאחר שבחנו את טענות הצדדים בחון מעמיק",
|
||||
"סוף דבר",
|
||||
"הערר נדחה",
|
||||
]:
|
||||
doc.add_paragraph(text)
|
||||
doc.save(str(path))
|
||||
|
||||
|
||||
# ── Exporter-style (built-in bookmarks) ──────────────────────────
|
||||
|
||||
|
||||
def test_exporter_output_works_with_reviser(tmp_path: Path) -> None:
|
||||
src = tmp_path / "exported.docx"
|
||||
_make_exporter_style_docx(src)
|
||||
|
||||
# All 5 bookmarks should be present directly from "export"
|
||||
bookmarks = list_bookmarks(src)
|
||||
assert set(bookmarks) >= {"block-alef", "block-bet", "block-heh",
|
||||
"block-yod", "block-yod-bet"}
|
||||
|
||||
out = tmp_path / "revised.docx"
|
||||
revs = [
|
||||
Revision(id="r1", type="insert_after", anchor_bookmark="block-yod",
|
||||
content="תוספת מערכת: פסק הלכה חדש", style="body"),
|
||||
]
|
||||
result = apply_tracked_revisions(src, out, revs)
|
||||
assert result.applied == 1
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
raw_text = "".join(tree.itertext())
|
||||
assert "תוספת מערכת" in raw_text
|
||||
# The revision is tracked (inside <w:ins>)
|
||||
ins_list = tree.findall(".//w:ins", NSMAP)
|
||||
assert any("תוספת מערכת" in "".join(el.itertext()) for el in ins_list)
|
||||
|
||||
|
||||
# ── User-edited DOCX (no bookmarks) — needs retrofit first ──────
|
||||
|
||||
|
||||
def test_retrofit_then_revise_on_user_edit(tmp_path: Path) -> None:
|
||||
user_file = tmp_path / "user_edit.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
|
||||
# Initially no named bookmarks
|
||||
assert list_bookmarks(user_file) == []
|
||||
|
||||
# Retrofit — should detect blocks via heading heuristic
|
||||
result = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
added = set(result["bookmarks_added"])
|
||||
# Must include at least block-yod (for common "insert pasak halacha" task)
|
||||
assert "block-yod" in added
|
||||
# Plus block-heh (רקע) and block-zayin (תמצית טענות)
|
||||
assert "block-heh" in added
|
||||
assert "block-zayin" in added
|
||||
|
||||
# Now apply a revision on the retrofitted file
|
||||
out = tmp_path / "revised.docx"
|
||||
revs = [Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod",
|
||||
content="פסק הלכה שהוסף: בבג\"ץ 1/23 נקבע כי...",
|
||||
style="body")]
|
||||
rr = apply_tracked_revisions(user_file, out, revs)
|
||||
assert rr.applied == 1
|
||||
|
||||
# Verify output has the insertion inside <w:ins>
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
ins_texts = ["".join(el.itertext()) for el in tree.iterfind(".//w:ins", NSMAP)]
|
||||
assert any("פסק הלכה שהוסף" in t for t in ins_texts)
|
||||
|
||||
|
||||
def test_retrofit_preserves_original_paragraphs(tmp_path: Path) -> None:
|
||||
user_file = tmp_path / "user.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
|
||||
before_doc = Document(str(user_file))
|
||||
before_texts = [p.text for p in before_doc.paragraphs]
|
||||
|
||||
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
|
||||
after_doc = Document(str(user_file))
|
||||
after_texts = [p.text for p in after_doc.paragraphs]
|
||||
# Paragraph texts should be identical (we only added bookmark markers)
|
||||
assert before_texts == after_texts
|
||||
|
||||
|
||||
def test_idempotent_retrofit_and_revise(tmp_path: Path) -> None:
|
||||
"""Running retrofit twice + revising should still produce valid output."""
|
||||
user_file = tmp_path / "user.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
|
||||
# First retrofit
|
||||
r1 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
# Second retrofit — should add no new bookmarks
|
||||
r2 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
assert r2["bookmarks_added"] == []
|
||||
assert set(r2["existing_bookmarks"]) >= set(r1["bookmarks_added"])
|
||||
|
||||
# Then revise works normally
|
||||
out = tmp_path / "revised.docx"
|
||||
revs = [Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="x")]
|
||||
result = apply_tracked_revisions(user_file, out, revs)
|
||||
assert result.applied == 1
|
||||
|
||||
|
||||
def test_multiple_revisions_all_tracked_independently(tmp_path: Path) -> None:
|
||||
"""Verify multiple tracked changes each get independent ins ids so
|
||||
user can Accept/Reject each one separately in Word."""
|
||||
user_file = tmp_path / "user.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
|
||||
out = tmp_path / "revised.docx"
|
||||
revs = [
|
||||
Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-heh", content="תוספת 1"),
|
||||
Revision(id="r2", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="תוספת 2"),
|
||||
Revision(id="r3", type="insert_before",
|
||||
anchor_bookmark="block-yod-alef", content="תוספת 3"),
|
||||
]
|
||||
result = apply_tracked_revisions(user_file, out, revs)
|
||||
assert result.applied == 3
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
ins_ids = {el.get(_w("id")) for el in tree.iterfind(".//w:ins", NSMAP)}
|
||||
assert len(ins_ids) >= 3 # at least one unique id per revision
|
||||
|
||||
|
||||
def test_rtl_preserved_in_tracked_insertion(tmp_path: Path) -> None:
|
||||
"""Inserted paragraph must have bidi + rtl + David font so it renders
|
||||
correctly in Word alongside the user's content."""
|
||||
user_file = tmp_path / "user.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
|
||||
out = tmp_path / "out.docx"
|
||||
revs = [Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="עברית RTL")]
|
||||
apply_tracked_revisions(user_file, out, revs)
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
|
||||
# Find the ins that holds runs
|
||||
for ins in tree.iterfind(".//w:ins", NSMAP):
|
||||
runs = ins.findall(".//w:r", NSMAP)
|
||||
for r in runs:
|
||||
text_els = r.findall(".//w:t", NSMAP)
|
||||
if any("עברית RTL" in (t.text or "") for t in text_els):
|
||||
rPr = r.find(_w("rPr"))
|
||||
assert rPr is not None
|
||||
assert rPr.find(_w("rtl")) is not None
|
||||
rFonts = rPr.find(_w("rFonts"))
|
||||
assert rFonts is not None
|
||||
assert rFonts.get(_w("ascii")) == "David"
|
||||
return
|
||||
pytest.fail("tracked insertion with 'עברית RTL' not found")
|
||||
Reference in New Issue
Block a user