"""בדיקות end-to-end לזרימה המלאה: exporter → retrofit → reviser. הבדיקות האלה מחברות את כל השכבות של ארכיטקטורת Track Changes ומוודאות שהזרימה עובדת על מסמכים שנוצרו על-ידי ה-exporter עצמו (בלוקים עם bookmarks מובנים) ועל מסמכים רגילים שעברו retrofit. """ from __future__ import annotations import zipfile from datetime import datetime, timezone from pathlib import Path import pytest from docx import Document from docx.oxml import OxmlElement from docx.oxml.ns import qn from lxml import etree from legal_mcp.services import docx_retrofit, docx_reviser from legal_mcp.services.docx_exporter import ( _BOOKMARK_ID_START, _wrap_block_with_bookmarks, ) from legal_mcp.services.docx_reviser import ( NSMAP, Revision, _w, apply_tracked_revisions, list_bookmarks, ) # ── Helpers ──────────────────────────────────────────────────────── def _make_exporter_style_docx(path: Path) -> None: """Simulate what docx_exporter produces: paragraphs wrapped in bookmarks for each of the 12 blocks, with David font and RTL.""" doc = Document() bm_counter = [_BOOKMARK_ID_START] blocks = [ ("block-alef", "בפני: דפנה תמיר, יו\"ר ועדת הערר"), ("block-bet", "ערר מספר 1033-25"), ("block-heh", "רקע\nהנכס מצוי ברחוב הר בשן"), ("block-yod", "דיון והכרעה\nלאחר שבחנו את טענות הצדדים"), ("block-yod-bet", "ההחלטה\nהערר מתקבל בחלקו"), ] for name, content in blocks: def writer(c=content): for line in c.split("\n"): if line.strip(): doc.add_paragraph(line.strip()) _wrap_block_with_bookmarks(doc, name, writer, bm_counter) doc.save(str(path)) def _make_user_edited_docx(path: Path) -> None: """Simulate what a user produces by editing in Word: no bookmarks, heading-style paragraphs in Daphna style.""" doc = Document() for text in [ "בפני: דפנה תמיר, יו\"ר ועדת הערר מחוז ירושלים", "ערר מספר 9999-25", "רקע", "הנכס מצוי ברחוב שמואל הנגיד 10, ירושלים", "תמצית טענות הצדדים", "העוררים טוענים שהבנייה חורגת מהתכנית", "תגובת המשיבה", "הוועדה המקומית טוענת שהבקשה תואמת", "ההליכים בפני ועדת הערר", "קיימנו דיון בנוכחות הצדדים", "דיון והכרעה", "לאחר שבחנו את טענות הצדדים בחון מעמיק", "סוף דבר", "הערר נדחה", ]: doc.add_paragraph(text) doc.save(str(path)) # ── Exporter-style (built-in bookmarks) ────────────────────────── def test_exporter_output_works_with_reviser(tmp_path: Path) -> None: src = tmp_path / "exported.docx" _make_exporter_style_docx(src) # All 5 bookmarks should be present directly from "export" bookmarks = list_bookmarks(src) assert set(bookmarks) >= {"block-alef", "block-bet", "block-heh", "block-yod", "block-yod-bet"} out = tmp_path / "revised.docx" revs = [ Revision(id="r1", type="insert_after", anchor_bookmark="block-yod", content="תוספת מערכת: פסק הלכה חדש", style="body"), ] result = apply_tracked_revisions(src, out, revs) assert result.applied == 1 with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) raw_text = "".join(tree.itertext()) assert "תוספת מערכת" in raw_text # The revision is tracked (inside ) ins_list = tree.findall(".//w:ins", NSMAP) assert any("תוספת מערכת" in "".join(el.itertext()) for el in ins_list) # ── User-edited DOCX (no bookmarks) — needs retrofit first ────── def test_retrofit_then_revise_on_user_edit(tmp_path: Path) -> None: user_file = tmp_path / "user_edit.docx" _make_user_edited_docx(user_file) # Initially no named bookmarks assert list_bookmarks(user_file) == [] # Retrofit — should detect blocks via heading heuristic result = docx_retrofit.retrofit_bookmarks(user_file, backup=False) added = set(result["bookmarks_added"]) # Must include at least block-yod (for common "insert pasak halacha" task) assert "block-yod" in added # Plus block-heh (רקע) and block-zayin (תמצית טענות) assert "block-heh" in added assert "block-zayin" in added # Now apply a revision on the retrofitted file out = tmp_path / "revised.docx" revs = [Revision(id="r1", type="insert_after", anchor_bookmark="block-yod", content="פסק הלכה שהוסף: בבג\"ץ 1/23 נקבע כי...", style="body")] rr = apply_tracked_revisions(user_file, out, revs) assert rr.applied == 1 # Verify output has the insertion inside with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) ins_texts = ["".join(el.itertext()) for el in tree.iterfind(".//w:ins", NSMAP)] assert any("פסק הלכה שהוסף" in t for t in ins_texts) def test_retrofit_preserves_original_paragraphs(tmp_path: Path) -> None: user_file = tmp_path / "user.docx" _make_user_edited_docx(user_file) before_doc = Document(str(user_file)) before_texts = [p.text for p in before_doc.paragraphs] docx_retrofit.retrofit_bookmarks(user_file, backup=False) after_doc = Document(str(user_file)) after_texts = [p.text for p in after_doc.paragraphs] # Paragraph texts should be identical (we only added bookmark markers) assert before_texts == after_texts def test_idempotent_retrofit_and_revise(tmp_path: Path) -> None: """Running retrofit twice + revising should still produce valid output.""" user_file = tmp_path / "user.docx" _make_user_edited_docx(user_file) # First retrofit r1 = docx_retrofit.retrofit_bookmarks(user_file, backup=False) # Second retrofit — should add no new bookmarks r2 = docx_retrofit.retrofit_bookmarks(user_file, backup=False) assert r2["bookmarks_added"] == [] assert set(r2["existing_bookmarks"]) >= set(r1["bookmarks_added"]) # Then revise works normally out = tmp_path / "revised.docx" revs = [Revision(id="r1", type="insert_after", anchor_bookmark="block-yod", content="x")] result = apply_tracked_revisions(user_file, out, revs) assert result.applied == 1 def test_multiple_revisions_all_tracked_independently(tmp_path: Path) -> None: """Verify multiple tracked changes each get independent ins ids so user can Accept/Reject each one separately in Word.""" user_file = tmp_path / "user.docx" _make_user_edited_docx(user_file) docx_retrofit.retrofit_bookmarks(user_file, backup=False) out = tmp_path / "revised.docx" revs = [ Revision(id="r1", type="insert_after", anchor_bookmark="block-heh", content="תוספת 1"), Revision(id="r2", type="insert_after", anchor_bookmark="block-yod", content="תוספת 2"), Revision(id="r3", type="insert_before", anchor_bookmark="block-yod-alef", content="תוספת 3"), ] result = apply_tracked_revisions(user_file, out, revs) assert result.applied == 3 with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) ins_ids = {el.get(_w("id")) for el in tree.iterfind(".//w:ins", NSMAP)} assert len(ins_ids) >= 3 # at least one unique id per revision def test_rtl_preserved_in_tracked_insertion(tmp_path: Path) -> None: """Inserted paragraph must have bidi + rtl + David font so it renders correctly in Word alongside the user's content.""" user_file = tmp_path / "user.docx" _make_user_edited_docx(user_file) docx_retrofit.retrofit_bookmarks(user_file, backup=False) out = tmp_path / "out.docx" revs = [Revision(id="r1", type="insert_after", anchor_bookmark="block-yod", content="עברית RTL")] apply_tracked_revisions(user_file, out, revs) with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) # Find the ins that holds runs for ins in tree.iterfind(".//w:ins", NSMAP): runs = ins.findall(".//w:r", NSMAP) for r in runs: text_els = r.findall(".//w:t", NSMAP) if any("עברית RTL" in (t.text or "") for t in text_els): rPr = r.find(_w("rPr")) assert rPr is not None assert rPr.find(_w("rtl")) is not None rFonts = rPr.find(_w("rFonts")) assert rFonts is not None assert rFonts.get(_w("ascii")) == "David" return pytest.fail("tracked insertion with 'עברית RTL' not found")