"""בדיקות docx_reviser — Track Changes XML surgery. הבדיקות יוצרות DOCX בסיסי עם bookmarks, מפעילות revisions, ובודקות: 1. שה-XML שנוצר תקף ונטען חזרה כ-Document 2. שה- / קיימים בפורמט הנכון 3. שה-bookmarks נשמרים אחרי עריכה 4. שגופן David ו-RTL נשמרים 5. שכשלונות מטופלים אלגנטית (bookmark חסר → failed, לא crash) """ from __future__ import annotations import zipfile from datetime import datetime, timezone from io import BytesIO from pathlib import Path import pytest from docx import Document from docx.oxml import OxmlElement from docx.oxml.ns import qn from lxml import etree from legal_mcp.services import docx_reviser from legal_mcp.services.docx_reviser import ( NSMAP, Revision, _w, apply_tracked_revisions, list_bookmarks, ) # ── Test fixtures ────────────────────────────────────────────────── def _insert_bookmark(paragraph, name: str, bm_id: int) -> None: """Insert a at the start of a paragraph and a at the end.""" p_elem = paragraph._p start = OxmlElement("w:bookmarkStart") start.set(qn("w:id"), str(bm_id)) start.set(qn("w:name"), name) p_elem.insert(0, start) end = OxmlElement("w:bookmarkEnd") end.set(qn("w:id"), str(bm_id)) p_elem.append(end) def _make_sample_docx(path: Path) -> None: """Create a simple DOCX with 3 paragraphs, each with a bookmark.""" doc = Document() for idx, name in enumerate(("block-alef", "block-yod", "block-yod-bet")): p = doc.add_paragraph() run = p.add_run(f"תוכן פסקה של {name}") run.font.name = "David" _insert_bookmark(p, name, idx + 1) doc.save(str(path)) @pytest.fixture def sample_docx(tmp_path: Path) -> Path: path = tmp_path / "source.docx" _make_sample_docx(path) return path # ── list_bookmarks ──────────────────────────────────────────────── def test_list_bookmarks_returns_all_named(sample_docx: Path) -> None: names = list_bookmarks(sample_docx) assert set(names) == {"block-alef", "block-yod", "block-yod-bet"} def test_list_bookmarks_excludes_internal(tmp_path: Path) -> None: """Bookmarks starting with '_' (like _GoBack) should be filtered out.""" path = tmp_path / "internal.docx" doc = Document() p1 = doc.add_paragraph("visible") _insert_bookmark(p1, "block-real", 1) p2 = doc.add_paragraph("hidden") _insert_bookmark(p2, "_GoBack", 2) doc.save(str(path)) names = list_bookmarks(path) assert names == ["block-real"] # ── apply_tracked_revisions: insert_after ───────────────────────── def test_insert_after_adds_tracked_paragraph(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" rev = Revision( id="r1", type="insert_after", anchor_bookmark="block-yod", content="פסקה חדשה שהמערכת מוסיפה.", ) result = apply_tracked_revisions( sample_docx, out, [rev], author="מערכת AI", date=datetime(2026, 4, 16, 14, 0, tzinfo=timezone.utc), ) assert result.applied == 1 assert result.failed == 0 assert out.exists() # Verify present in document.xml with zipfile.ZipFile(out, "r") as zf: doc_xml = zf.read("word/document.xml") tree = etree.fromstring(doc_xml) ins_elements = tree.findall(".//w:ins", NSMAP) assert len(ins_elements) >= 1 # Verify the content is there all_text = "".join(tree.itertext()) assert "פסקה חדשה שהמערכת מוסיפה." in all_text # Verify original content preserved assert "תוכן פסקה של block-yod" in all_text def _find_ins_with_runs(tree: etree._Element) -> etree._Element | None: """Pick the that actually wraps runs (not the pilcrow-marker one).""" for ins in tree.iterfind(".//w:ins", NSMAP): if ins.find(".//w:r", NSMAP) is not None: return ins return None def test_insert_after_ins_has_author_and_date(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" rev = Revision(id="r1", type="insert_after", anchor_bookmark="block-alef", content="test") apply_tracked_revisions(sample_docx, out, [rev], author="דפנה") with zipfile.ZipFile(out, "r") as zf: doc_xml = zf.read("word/document.xml") tree = etree.fromstring(doc_xml) ins = _find_ins_with_runs(tree) assert ins is not None assert ins.get(_w("author")) == "דפנה" date_str = ins.get(_w("date")) assert date_str is not None assert date_str.endswith("Z") # ISO 8601 UTC def test_insert_after_uses_rtl_and_david(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" rev = Revision(id="r1", type="insert_after", anchor_bookmark="block-alef", content="מוסף") apply_tracked_revisions(sample_docx, out, [rev]) with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) ins = _find_ins_with_runs(tree) assert ins is not None run = ins.find(".//w:r", NSMAP) assert run is not None rPr = run.find(_w("rPr")) assert rPr is not None assert rPr.find(_w("rtl")) is not None rFonts = rPr.find(_w("rFonts")) assert rFonts is not None assert rFonts.get(_w("ascii")) == "David" # ── apply_tracked_revisions: insert_before ──────────────────────── def test_insert_before_places_above_anchor(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" rev = Revision(id="r1", type="insert_before", anchor_bookmark="block-yod", content="לפני י.") result = apply_tracked_revisions(sample_docx, out, [rev]) assert result.applied == 1 # Order check: new paragraph's text must appear before "block-yod" with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) paragraphs = tree.findall(".//w:p", NSMAP) texts = ["".join(p.itertext()) for p in paragraphs] idx_new = next(i for i, t in enumerate(texts) if "לפני י." in t) idx_yod = next(i for i, t in enumerate(texts) if "תוכן פסקה של block-yod" in t) assert idx_new < idx_yod # ── apply_tracked_revisions: delete ─────────────────────────────── def test_delete_wraps_runs_in_w_del(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" rev = Revision(id="r1", type="delete", anchor_bookmark="block-yod", content="") result = apply_tracked_revisions(sample_docx, out, [rev]) assert result.applied == 1 with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) dels = tree.findall(".//w:del", NSMAP) assert len(dels) >= 1 # Inside w:del, text elements must become w:delText del_texts = dels[0].findall(".//w:delText", NSMAP) assert any("block-yod" in (t.text or "") for t in del_texts) # ── apply_tracked_revisions: replace ───────────────────────────── def test_replace_creates_both_ins_and_del(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" rev = Revision(id="r1", type="replace", anchor_bookmark="block-yod", content="תוכן חדש לחלוטין") result = apply_tracked_revisions(sample_docx, out, [rev]) assert result.applied == 1 with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) assert len(tree.findall(".//w:ins", NSMAP)) >= 1 assert len(tree.findall(".//w:del", NSMAP)) >= 1 # ── Failure modes ───────────────────────────────────────────────── def test_missing_bookmark_returns_failed_not_crash( sample_docx: Path, tmp_path: Path, ) -> None: out = tmp_path / "out.docx" rev = Revision(id="r1", type="insert_after", anchor_bookmark="does-not-exist", content="x") result = apply_tracked_revisions(sample_docx, out, [rev]) assert result.applied == 0 assert result.failed == 1 assert result.results[0].status == "failed" assert "not found" in (result.results[0].error or "") # Output file still produced (unchanged copy) assert out.exists() def test_empty_revisions_list_produces_copy(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" result = apply_tracked_revisions(sample_docx, out, []) assert result.applied == 0 assert result.failed == 0 assert out.exists() # bookmarks should still be there assert set(list_bookmarks(out)) == {"block-alef", "block-yod", "block-yod-bet"} # ── Track revisions flag in settings ────────────────────────────── def test_track_revisions_flag_is_enabled(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" rev = Revision(id="r1", type="insert_after", anchor_bookmark="block-alef", content="x") apply_tracked_revisions(sample_docx, out, [rev]) with zipfile.ZipFile(out, "r") as zf: settings_xml = zf.read("word/settings.xml") settings_tree = etree.fromstring(settings_xml) tr = settings_tree.find(_w("trackRevisions")) assert tr is not None # ── Multiple revisions with unique IDs ──────────────────────────── def test_multiple_revisions_get_unique_ids(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" revs = [ Revision(id="r1", type="insert_after", anchor_bookmark="block-alef", content="ראשון"), Revision(id="r2", type="insert_after", anchor_bookmark="block-yod", content="שני"), Revision(id="r3", type="delete", anchor_bookmark="block-yod-bet"), ] result = apply_tracked_revisions(sample_docx, out, revs) assert result.applied == 3 with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) all_ids: list[str] = [] for xpath in (".//w:ins", ".//w:del"): for el in tree.iterfind(xpath, NSMAP): wid = el.get(_w("id")) if wid: all_ids.append(wid) assert len(all_ids) == len(set(all_ids)), f"duplicate IDs: {all_ids}" # ── DOCX remains openable as Document ───────────────────────────── def test_output_docx_is_openable_by_python_docx( sample_docx: Path, tmp_path: Path, ) -> None: out = tmp_path / "out.docx" rev = Revision(id="r1", type="insert_after", anchor_bookmark="block-yod", content="תוכן חדש") apply_tracked_revisions(sample_docx, out, [rev]) # Must be openable as a valid DOCX by python-docx (no exceptions) doc = Document(str(out)) # Original text is still accessible via python-docx all_text = "\n".join(p.text for p in doc.paragraphs) assert "block-yod" in all_text # Inserted (tracked) text is present in the raw XML via itertext with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) raw_text = "".join(tree.itertext()) assert "תוכן חדש" in raw_text # ── Bookmarks preserved through revisions ───────────────────────── def test_bookmarks_preserved_after_insert(sample_docx: Path, tmp_path: Path) -> None: out = tmp_path / "out.docx" rev = Revision(id="r1", type="insert_after", anchor_bookmark="block-yod", content="x") apply_tracked_revisions(sample_docx, out, [rev]) names = list_bookmarks(out) assert set(names) == {"block-alef", "block-yod", "block-yod-bet"} # ── Idempotency of loading/saving without changes ──────────────── def test_save_without_revisions_preserves_content( sample_docx: Path, tmp_path: Path, ) -> None: out = tmp_path / "out.docx" apply_tracked_revisions(sample_docx, out, []) doc_orig = Document(str(sample_docx)) doc_new = Document(str(out)) orig_text = [p.text for p in doc_orig.paragraphs] new_text = [p.text for p in doc_new.paragraphs] assert orig_text == new_text