legal-ai/mcp-server/tests/test_track_changes_e2e.py

"""בדיקות end-to-end לזרימה המלאה: exporter → retrofit → reviser.

הבדיקות האלה מחברות את כל השכבות של ארכיטקטורת Track Changes ומוודאות
שהזרימה עובדת על מסמכים שנוצרו על-ידי ה-exporter עצמו (בלוקים עם bookmarks
מובנים) ועל מסמכים רגילים שעברו retrofit.
"""

from __future__ import annotations

import zipfile
from datetime import datetime, timezone
from pathlib import Path

import pytest
from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from lxml import etree

from legal_mcp.services import docx_retrofit, docx_reviser
from legal_mcp.services.docx_exporter import (
    _BOOKMARK_ID_START,
    _wrap_block_with_bookmarks,
)
from legal_mcp.services.docx_reviser import (
    NSMAP,
    Revision,
    _w,
    apply_tracked_revisions,
    list_bookmarks,
)


# ── Helpers ────────────────────────────────────────────────────────


def _make_exporter_style_docx(path: Path) -> None:
    """Simulate what docx_exporter produces: paragraphs wrapped in bookmarks
    for each of the 12 blocks, with David font and RTL."""
    doc = Document()
    bm_counter = [_BOOKMARK_ID_START]

    blocks = [
        ("block-alef", "בפני: דפנה תמיר, יו\"ר ועדת הערר"),
        ("block-bet",  "ערר מספר 1033-25"),
        ("block-heh",  "רקע\nהנכס מצוי ברחוב הר בשן"),
        ("block-yod",  "דיון והכרעה\nלאחר שבחנו את טענות הצדדים"),
        ("block-yod-bet", "ההחלטה\nהערר מתקבל בחלקו"),
    ]

    for name, content in blocks:
        def writer(c=content):
            for line in c.split("\n"):
                if line.strip():
                    doc.add_paragraph(line.strip())
        _wrap_block_with_bookmarks(doc, name, writer, bm_counter)

    doc.save(str(path))


def _make_user_edited_docx(path: Path) -> None:
    """Simulate what a user produces by editing in Word: no bookmarks,
    heading-style paragraphs in Daphna style."""
    doc = Document()
    for text in [
        "בפני: דפנה תמיר, יו\"ר ועדת הערר מחוז ירושלים",
        "ערר מספר 9999-25",
        "רקע",
        "הנכס מצוי ברחוב שמואל הנגיד 10, ירושלים",
        "תמצית טענות הצדדים",
        "העוררים טוענים שהבנייה חורגת מהתכנית",
        "תגובת המשיבה",
        "הוועדה המקומית טוענת שהבקשה תואמת",
        "ההליכים בפני ועדת הערר",
        "קיימנו דיון בנוכחות הצדדים",
        "דיון והכרעה",
        "לאחר שבחנו את טענות הצדדים בחון מעמיק",
        "סוף דבר",
        "הערר נדחה",
    ]:
        doc.add_paragraph(text)
    doc.save(str(path))


# ── Exporter-style (built-in bookmarks) ──────────────────────────


def test_exporter_output_works_with_reviser(tmp_path: Path) -> None:
    src = tmp_path / "exported.docx"
    _make_exporter_style_docx(src)

    # All 5 bookmarks should be present directly from "export"
    bookmarks = list_bookmarks(src)
    assert set(bookmarks) >= {"block-alef", "block-bet", "block-heh",
                              "block-yod", "block-yod-bet"}

    out = tmp_path / "revised.docx"
    revs = [
        Revision(id="r1", type="insert_after", anchor_bookmark="block-yod",
                 content="תוספת מערכת: פסק הלכה חדש", style="body"),
    ]
    result = apply_tracked_revisions(src, out, revs)
    assert result.applied == 1

    with zipfile.ZipFile(out, "r") as zf:
        tree = etree.fromstring(zf.read("word/document.xml"))
    raw_text = "".join(tree.itertext())
    assert "תוספת מערכת" in raw_text
    # The revision is tracked (inside <w:ins>)
    ins_list = tree.findall(".//w:ins", NSMAP)
    assert any("תוספת מערכת" in "".join(el.itertext()) for el in ins_list)


# ── User-edited DOCX (no bookmarks) — needs retrofit first ──────


def test_retrofit_then_revise_on_user_edit(tmp_path: Path) -> None:
    user_file = tmp_path / "user_edit.docx"
    _make_user_edited_docx(user_file)

    # Initially no named bookmarks
    assert list_bookmarks(user_file) == []

    # Retrofit — should detect blocks via heading heuristic
    result = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
    added = set(result["bookmarks_added"])
    # Must include at least block-yod (for common "insert pasak halacha" task)
    assert "block-yod" in added
    # Plus block-heh (רקע) and block-zayin (תמצית טענות)
    assert "block-heh" in added
    assert "block-zayin" in added

    # Now apply a revision on the retrofitted file
    out = tmp_path / "revised.docx"
    revs = [Revision(id="r1", type="insert_after",
                     anchor_bookmark="block-yod",
                     content="פסק הלכה שהוסף: בבג\"ץ 1/23 נקבע כי...",
                     style="body")]
    rr = apply_tracked_revisions(user_file, out, revs)
    assert rr.applied == 1

    # Verify output has the insertion inside <w:ins>
    with zipfile.ZipFile(out, "r") as zf:
        tree = etree.fromstring(zf.read("word/document.xml"))
    ins_texts = ["".join(el.itertext()) for el in tree.iterfind(".//w:ins", NSMAP)]
    assert any("פסק הלכה שהוסף" in t for t in ins_texts)


def test_retrofit_preserves_original_paragraphs(tmp_path: Path) -> None:
    user_file = tmp_path / "user.docx"
    _make_user_edited_docx(user_file)

    before_doc = Document(str(user_file))
    before_texts = [p.text for p in before_doc.paragraphs]

    docx_retrofit.retrofit_bookmarks(user_file, backup=False)

    after_doc = Document(str(user_file))
    after_texts = [p.text for p in after_doc.paragraphs]
    # Paragraph texts should be identical (we only added bookmark markers)
    assert before_texts == after_texts


def test_idempotent_retrofit_and_revise(tmp_path: Path) -> None:
    """Running retrofit twice + revising should still produce valid output."""
    user_file = tmp_path / "user.docx"
    _make_user_edited_docx(user_file)

    # First retrofit
    r1 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
    # Second retrofit — should add no new bookmarks
    r2 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
    assert r2["bookmarks_added"] == []
    assert set(r2["existing_bookmarks"]) >= set(r1["bookmarks_added"])

    # Then revise works normally
    out = tmp_path / "revised.docx"
    revs = [Revision(id="r1", type="insert_after",
                     anchor_bookmark="block-yod", content="x")]
    result = apply_tracked_revisions(user_file, out, revs)
    assert result.applied == 1


def test_multiple_revisions_all_tracked_independently(tmp_path: Path) -> None:
    """Verify multiple tracked changes each get independent ins ids so
    user can Accept/Reject each one separately in Word."""
    user_file = tmp_path / "user.docx"
    _make_user_edited_docx(user_file)
    docx_retrofit.retrofit_bookmarks(user_file, backup=False)

    out = tmp_path / "revised.docx"
    revs = [
        Revision(id="r1", type="insert_after",
                 anchor_bookmark="block-heh", content="תוספת 1"),
        Revision(id="r2", type="insert_after",
                 anchor_bookmark="block-yod", content="תוספת 2"),
        Revision(id="r3", type="insert_before",
                 anchor_bookmark="block-yod-alef", content="תוספת 3"),
    ]
    result = apply_tracked_revisions(user_file, out, revs)
    assert result.applied == 3

    with zipfile.ZipFile(out, "r") as zf:
        tree = etree.fromstring(zf.read("word/document.xml"))
    ins_ids = {el.get(_w("id")) for el in tree.iterfind(".//w:ins", NSMAP)}
    assert len(ins_ids) >= 3  # at least one unique id per revision


def test_rtl_preserved_in_tracked_insertion(tmp_path: Path) -> None:
    """Inserted paragraph must have bidi + rtl + David font so it renders
    correctly in Word alongside the user's content."""
    user_file = tmp_path / "user.docx"
    _make_user_edited_docx(user_file)
    docx_retrofit.retrofit_bookmarks(user_file, backup=False)

    out = tmp_path / "out.docx"
    revs = [Revision(id="r1", type="insert_after",
                     anchor_bookmark="block-yod", content="עברית RTL")]
    apply_tracked_revisions(user_file, out, revs)

    with zipfile.ZipFile(out, "r") as zf:
        tree = etree.fromstring(zf.read("word/document.xml"))

    # Find the ins that holds runs
    for ins in tree.iterfind(".//w:ins", NSMAP):
        runs = ins.findall(".//w:r", NSMAP)
        for r in runs:
            text_els = r.findall(".//w:t", NSMAP)
            if any("עברית RTL" in (t.text or "") for t in text_els):
                rPr = r.find(_w("rPr"))
                assert rPr is not None
                assert rPr.find(_w("rtl")) is not None
                rFonts = rPr.find(_w("rFonts"))
                assert rFonts is not None
                assert rFonts.get(_w("ascii")) == "David"
                return
    pytest.fail("tracked insertion with 'עברית RTL' not found")