Add Track Changes architecture for draft revisions (CMP + CMPA)
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m29s

Fixes critical bug in 1033-25: user-uploaded עריכה-*.docx files were
orphaned on disk while exports kept rebuilding from stale DB blocks.

New architecture:
- User-uploaded DOCX becomes the source of truth (cases.active_draft_path)
- System edits via XML surgery with real Word <w:ins>/<w:del> revisions
- User can Accept/Reject each change from within Word

Components:
- docx_reviser.py: XML surgery for Track Changes (15 tests)
- docx_retrofit.py: retroactive bookmark injection with Hebrew marker
  detection + heading heuristic (9 tests)
- docx_exporter.py: emits bookmarks around each of the 12 blocks
- 3 new MCP tools: apply_user_edit, list_bookmarks, revise_draft
- 4 new/updated endpoints: upload (auto-registers active draft),
  /exports/revise, /exports/bookmarks, /exports/{filename}/retrofit,
  /active-draft
- DB migration: cases.active_draft_path column
- UI: correct banner using real v-numbers, "מקור האמת" badge,
  detailed upload toast with bookmarks_added/missing_blocks
- agents: legal-exporter (3 export modes), legal-ceo (stage G for
  revision handling), legal-writer (revision mode)

Multi-tenancy:
- Works for both CMP (1xxx cases) and CMPA (8xxx/9xxx cases)
- New revise-draft skill added to both companies
- deploy-track-changes.sh syncs skills CMP ↔ CMPA
- retrofit_case.py: one-off retrofit of existing files

Tests: 34 passing (15 reviser + 9 retrofit + 4 exporter bookmarks + 6 e2e)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-16 18:49:30 +00:00
parent 28daff58be
commit 726498126d
20 changed files with 2419 additions and 23 deletions

View File

View File

@@ -0,0 +1,103 @@
"""בדיקות ל-bookmark helpers ב-docx_exporter.
הבדיקות מתרכזות ב-helper functions בלבד (לא בכל ה-export flow שדורש DB).
"""
from __future__ import annotations
import zipfile
from pathlib import Path
from docx import Document
from lxml import etree
from legal_mcp.services.docx_exporter import (
_BOOKMARK_ID_START,
_insert_bookmark_end,
_insert_bookmark_start,
_wrap_block_with_bookmarks,
)
from legal_mcp.services.docx_reviser import NSMAP, _w, list_bookmarks
def test_insert_bookmark_helpers_create_valid_xml(tmp_path: Path) -> None:
doc = Document()
p = doc.add_paragraph("תוכן בלוק י")
_insert_bookmark_start(p, "block-yod", 10001)
_insert_bookmark_end(p, 10001)
out = tmp_path / "out.docx"
doc.save(str(out))
# Verify via list_bookmarks (uses the same XML)
assert list_bookmarks(out) == ["block-yod"]
def test_wrap_block_with_bookmarks_wraps_multiple_paragraphs(tmp_path: Path) -> None:
doc = Document()
doc.add_paragraph("ראשון — לפני") # noise before
bm_counter = [_BOOKMARK_ID_START]
def writer() -> None:
doc.add_paragraph("בלוק — פסקה 1")
doc.add_paragraph("בלוק — פסקה 2")
doc.add_paragraph("בלוק — פסקה 3")
_wrap_block_with_bookmarks(doc, "block-yod", writer, bm_counter)
doc.add_paragraph("אחרי — אחרון") # noise after
out = tmp_path / "out.docx"
doc.save(str(out))
# The bookmark should wrap exactly the 3 middle paragraphs
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
paragraphs = tree.findall(".//w:p", NSMAP)
# Find para index of bookmarkStart and bookmarkEnd
start_idx = end_idx = None
for i, p in enumerate(paragraphs):
if p.find(".//w:bookmarkStart", NSMAP) is not None:
start_idx = i
if p.find(".//w:bookmarkEnd", NSMAP) is not None:
end_idx = i
assert start_idx is not None
assert end_idx is not None
# The paragraph containing start must be the first new one ("פסקה 1")
start_text = "".join(paragraphs[start_idx].itertext())
end_text = "".join(paragraphs[end_idx].itertext())
assert "פסקה 1" in start_text
assert "פסקה 3" in end_text
def test_wrap_block_skipped_when_writer_adds_nothing(tmp_path: Path) -> None:
doc = Document()
bm_counter = [_BOOKMARK_ID_START]
_wrap_block_with_bookmarks(doc, "block-empty", lambda: None, bm_counter)
out = tmp_path / "out.docx"
doc.save(str(out))
assert list_bookmarks(out) == []
def test_multiple_blocks_get_unique_bookmark_ids(tmp_path: Path) -> None:
doc = Document()
bm_counter = [_BOOKMARK_ID_START]
for name in ("block-alef", "block-bet", "block-gimel"):
_wrap_block_with_bookmarks(
doc, name,
lambda n=name: doc.add_paragraph(f"תוכן של {n}"),
bm_counter,
)
out = tmp_path / "out.docx"
doc.save(str(out))
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
ids = [el.get(_w("id")) for el in tree.iterfind(".//w:bookmarkStart", NSMAP)]
assert len(ids) == 3
assert len(set(ids)) == 3
names = list_bookmarks(out)
assert set(names) == {"block-alef", "block-bet", "block-gimel"}

View File

@@ -0,0 +1,141 @@
"""בדיקות docx_retrofit — הזרקת bookmarks רטרואקטיבית."""
from __future__ import annotations
from pathlib import Path
from docx import Document
from legal_mcp.services.docx_retrofit import (
BLOCK_ORDER,
retrofit_bookmarks,
)
from legal_mcp.services.docx_reviser import list_bookmarks
def _make_docx_with_hebrew_blocks(path: Path, markers: list[str]) -> None:
"""Create a DOCX where each paragraph starts with a Hebrew block marker."""
doc = Document()
for marker in markers:
doc.add_paragraph(f"{marker}. תוכן הבלוק שמתחיל ב-{marker}")
doc.add_paragraph(f"עוד פסקה בבלוק {marker}")
doc.save(str(path))
def test_retrofit_detects_all_standard_blocks(tmp_path: Path) -> None:
src = tmp_path / "src.docx"
_make_docx_with_hebrew_blocks(
src, ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "יא", "יב"],
)
result = retrofit_bookmarks(src, backup=False)
assert len(result["bookmarks_added"]) == 12
assert result["missing_blocks"] == []
names = list_bookmarks(src)
expected = {name for name, _ in BLOCK_ORDER}
assert set(names) == expected
def test_retrofit_reports_missing_blocks(tmp_path: Path) -> None:
src = tmp_path / "src.docx"
# Only 4 blocks present
_make_docx_with_hebrew_blocks(src, ["א", "ב", "ג", "ד"])
result = retrofit_bookmarks(src, backup=False)
assert result["bookmarks_added"] == [
"block-alef", "block-bet", "block-gimel", "block-dalet",
]
assert "block-heh" in result["missing_blocks"]
assert "block-yod-bet" in result["missing_blocks"]
def test_retrofit_distinguishes_yod_from_yod_alef_yod_bet(tmp_path: Path) -> None:
"""י, יא, יב must all be distinguished — longer markers win."""
src = tmp_path / "src.docx"
_make_docx_with_hebrew_blocks(src, ["ט", "י", "יא", "יב"])
result = retrofit_bookmarks(src, backup=False)
assert set(result["bookmarks_added"]) == {
"block-tet", "block-yod", "block-yod-alef", "block-yod-bet",
}
def test_retrofit_skips_existing_bookmarks(tmp_path: Path) -> None:
"""Running retrofit twice doesn't duplicate bookmarks."""
src = tmp_path / "src.docx"
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
first = retrofit_bookmarks(src, backup=False)
assert first["bookmarks_added"] == ["block-alef", "block-bet"]
second = retrofit_bookmarks(src, backup=False)
assert second["bookmarks_added"] == [] # nothing new
assert set(second["existing_bookmarks"]) == {"block-alef", "block-bet"}
# Final document should still have exactly 2 bookmarks
assert set(list_bookmarks(src)) == {"block-alef", "block-bet"}
def test_retrofit_creates_backup(tmp_path: Path) -> None:
src = tmp_path / "file.docx"
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
retrofit_bookmarks(src) # backup=True (default)
backup = src.with_suffix(".pre-retrofit.docx")
assert backup.exists()
def test_retrofit_to_different_output_path_no_backup(tmp_path: Path) -> None:
src = tmp_path / "src.docx"
out = tmp_path / "out.docx"
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
retrofit_bookmarks(src, output_path=out)
# source untouched
assert list_bookmarks(src) == []
# output has bookmarks
assert set(list_bookmarks(out)) == {"block-alef", "block-bet"}
def test_retrofit_ignores_marker_in_middle_of_text(tmp_path: Path) -> None:
"""A lone 'י' inside body text (not at start) should not be detected as block."""
src = tmp_path / "src.docx"
doc = Document()
doc.add_paragraph("א. תחילת הבלוק")
doc.add_paragraph("טקסט עם האות י לא בתחילת שורה, זה לא בלוק.")
doc.add_paragraph("ב. בלוק שני")
doc.save(str(src))
result = retrofit_bookmarks(src, backup=False)
assert "block-alef" in result["bookmarks_added"]
assert "block-bet" in result["bookmarks_added"]
# 'block-yod' should NOT be detected
assert "block-yod" not in result["bookmarks_added"]
def test_retrofit_out_of_order_markers_picks_forward_only(tmp_path: Path) -> None:
"""If a later-ordered marker appears first, earlier ones are treated as missing.
Scanner advances forward through BLOCK_ORDER — it won't go back to claim
an earlier marker after already seeing a later one.
"""
src = tmp_path / "src.docx"
doc = Document()
doc.add_paragraph("ב. מופיע ראשון")
doc.add_paragraph("א. מופיע אחרי — יידחה כי 'א' לפני 'ב'")
doc.add_paragraph("ג. בלוק גימל")
doc.save(str(src))
result = retrofit_bookmarks(src, backup=False)
assert "block-bet" in result["bookmarks_added"]
assert "block-gimel" in result["bookmarks_added"]
# 'א' was not detected (the first paragraph was 'ב' — scanner advanced past א)
assert "block-alef" in result["missing_blocks"]
def test_retrofit_empty_document_reports_all_missing(tmp_path: Path) -> None:
src = tmp_path / "empty.docx"
doc = Document()
doc.save(str(src))
result = retrofit_bookmarks(src, backup=False)
assert result["bookmarks_added"] == []
assert len(result["missing_blocks"]) == 12

View File

@@ -0,0 +1,342 @@
"""בדיקות docx_reviser — Track Changes XML surgery.
הבדיקות יוצרות DOCX בסיסי עם bookmarks, מפעילות revisions, ובודקות:
1. שה-XML שנוצר תקף ונטען חזרה כ-Document
2. שה-<w:ins> / <w:del> קיימים בפורמט הנכון
3. שה-bookmarks נשמרים אחרי עריכה
4. שגופן David ו-RTL נשמרים
5. שכשלונות מטופלים אלגנטית (bookmark חסר → failed, לא crash)
"""
from __future__ import annotations
import zipfile
from datetime import datetime, timezone
from io import BytesIO
from pathlib import Path
import pytest
from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from lxml import etree
from legal_mcp.services import docx_reviser
from legal_mcp.services.docx_reviser import (
NSMAP,
Revision,
_w,
apply_tracked_revisions,
list_bookmarks,
)
# ── Test fixtures ──────────────────────────────────────────────────
def _insert_bookmark(paragraph, name: str, bm_id: int) -> None:
"""Insert a <w:bookmarkStart> at the start of a paragraph and a
<w:bookmarkEnd> at the end."""
p_elem = paragraph._p
start = OxmlElement("w:bookmarkStart")
start.set(qn("w:id"), str(bm_id))
start.set(qn("w:name"), name)
p_elem.insert(0, start)
end = OxmlElement("w:bookmarkEnd")
end.set(qn("w:id"), str(bm_id))
p_elem.append(end)
def _make_sample_docx(path: Path) -> None:
"""Create a simple DOCX with 3 paragraphs, each with a bookmark."""
doc = Document()
for idx, name in enumerate(("block-alef", "block-yod", "block-yod-bet")):
p = doc.add_paragraph()
run = p.add_run(f"תוכן פסקה של {name}")
run.font.name = "David"
_insert_bookmark(p, name, idx + 1)
doc.save(str(path))
@pytest.fixture
def sample_docx(tmp_path: Path) -> Path:
path = tmp_path / "source.docx"
_make_sample_docx(path)
return path
# ── list_bookmarks ────────────────────────────────────────────────
def test_list_bookmarks_returns_all_named(sample_docx: Path) -> None:
names = list_bookmarks(sample_docx)
assert set(names) == {"block-alef", "block-yod", "block-yod-bet"}
def test_list_bookmarks_excludes_internal(tmp_path: Path) -> None:
"""Bookmarks starting with '_' (like _GoBack) should be filtered out."""
path = tmp_path / "internal.docx"
doc = Document()
p1 = doc.add_paragraph("visible")
_insert_bookmark(p1, "block-real", 1)
p2 = doc.add_paragraph("hidden")
_insert_bookmark(p2, "_GoBack", 2)
doc.save(str(path))
names = list_bookmarks(path)
assert names == ["block-real"]
# ── apply_tracked_revisions: insert_after ─────────────────────────
def test_insert_after_adds_tracked_paragraph(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
rev = Revision(
id="r1",
type="insert_after",
anchor_bookmark="block-yod",
content="פסקה חדשה שהמערכת מוסיפה.",
)
result = apply_tracked_revisions(
sample_docx, out, [rev],
author="מערכת AI",
date=datetime(2026, 4, 16, 14, 0, tzinfo=timezone.utc),
)
assert result.applied == 1
assert result.failed == 0
assert out.exists()
# Verify <w:ins> present in document.xml
with zipfile.ZipFile(out, "r") as zf:
doc_xml = zf.read("word/document.xml")
tree = etree.fromstring(doc_xml)
ins_elements = tree.findall(".//w:ins", NSMAP)
assert len(ins_elements) >= 1
# Verify the content is there
all_text = "".join(tree.itertext())
assert "פסקה חדשה שהמערכת מוסיפה." in all_text
# Verify original content preserved
assert "תוכן פסקה של block-yod" in all_text
def _find_ins_with_runs(tree: etree._Element) -> etree._Element | None:
"""Pick the <w:ins> that actually wraps runs (not the pilcrow-marker one)."""
for ins in tree.iterfind(".//w:ins", NSMAP):
if ins.find(".//w:r", NSMAP) is not None:
return ins
return None
def test_insert_after_ins_has_author_and_date(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
rev = Revision(id="r1", type="insert_after",
anchor_bookmark="block-alef", content="test")
apply_tracked_revisions(sample_docx, out, [rev], author="דפנה")
with zipfile.ZipFile(out, "r") as zf:
doc_xml = zf.read("word/document.xml")
tree = etree.fromstring(doc_xml)
ins = _find_ins_with_runs(tree)
assert ins is not None
assert ins.get(_w("author")) == "דפנה"
date_str = ins.get(_w("date"))
assert date_str is not None
assert date_str.endswith("Z") # ISO 8601 UTC
def test_insert_after_uses_rtl_and_david(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
rev = Revision(id="r1", type="insert_after",
anchor_bookmark="block-alef", content="מוסף")
apply_tracked_revisions(sample_docx, out, [rev])
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
ins = _find_ins_with_runs(tree)
assert ins is not None
run = ins.find(".//w:r", NSMAP)
assert run is not None
rPr = run.find(_w("rPr"))
assert rPr is not None
assert rPr.find(_w("rtl")) is not None
rFonts = rPr.find(_w("rFonts"))
assert rFonts is not None
assert rFonts.get(_w("ascii")) == "David"
# ── apply_tracked_revisions: insert_before ────────────────────────
def test_insert_before_places_above_anchor(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
rev = Revision(id="r1", type="insert_before",
anchor_bookmark="block-yod", content="לפני י.")
result = apply_tracked_revisions(sample_docx, out, [rev])
assert result.applied == 1
# Order check: new paragraph's text must appear before "block-yod"
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
paragraphs = tree.findall(".//w:p", NSMAP)
texts = ["".join(p.itertext()) for p in paragraphs]
idx_new = next(i for i, t in enumerate(texts) if "לפני י." in t)
idx_yod = next(i for i, t in enumerate(texts) if "תוכן פסקה של block-yod" in t)
assert idx_new < idx_yod
# ── apply_tracked_revisions: delete ───────────────────────────────
def test_delete_wraps_runs_in_w_del(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
rev = Revision(id="r1", type="delete", anchor_bookmark="block-yod", content="")
result = apply_tracked_revisions(sample_docx, out, [rev])
assert result.applied == 1
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
dels = tree.findall(".//w:del", NSMAP)
assert len(dels) >= 1
# Inside w:del, text elements must become w:delText
del_texts = dels[0].findall(".//w:delText", NSMAP)
assert any("block-yod" in (t.text or "") for t in del_texts)
# ── apply_tracked_revisions: replace ─────────────────────────────
def test_replace_creates_both_ins_and_del(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
rev = Revision(id="r1", type="replace",
anchor_bookmark="block-yod", content="תוכן חדש לחלוטין")
result = apply_tracked_revisions(sample_docx, out, [rev])
assert result.applied == 1
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
assert len(tree.findall(".//w:ins", NSMAP)) >= 1
assert len(tree.findall(".//w:del", NSMAP)) >= 1
# ── Failure modes ─────────────────────────────────────────────────
def test_missing_bookmark_returns_failed_not_crash(
sample_docx: Path, tmp_path: Path,
) -> None:
out = tmp_path / "out.docx"
rev = Revision(id="r1", type="insert_after",
anchor_bookmark="does-not-exist", content="x")
result = apply_tracked_revisions(sample_docx, out, [rev])
assert result.applied == 0
assert result.failed == 1
assert result.results[0].status == "failed"
assert "not found" in (result.results[0].error or "")
# Output file still produced (unchanged copy)
assert out.exists()
def test_empty_revisions_list_produces_copy(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
result = apply_tracked_revisions(sample_docx, out, [])
assert result.applied == 0
assert result.failed == 0
assert out.exists()
# bookmarks should still be there
assert set(list_bookmarks(out)) == {"block-alef", "block-yod", "block-yod-bet"}
# ── Track revisions flag in settings ──────────────────────────────
def test_track_revisions_flag_is_enabled(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
rev = Revision(id="r1", type="insert_after",
anchor_bookmark="block-alef", content="x")
apply_tracked_revisions(sample_docx, out, [rev])
with zipfile.ZipFile(out, "r") as zf:
settings_xml = zf.read("word/settings.xml")
settings_tree = etree.fromstring(settings_xml)
tr = settings_tree.find(_w("trackRevisions"))
assert tr is not None
# ── Multiple revisions with unique IDs ────────────────────────────
def test_multiple_revisions_get_unique_ids(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
revs = [
Revision(id="r1", type="insert_after",
anchor_bookmark="block-alef", content="ראשון"),
Revision(id="r2", type="insert_after",
anchor_bookmark="block-yod", content="שני"),
Revision(id="r3", type="delete", anchor_bookmark="block-yod-bet"),
]
result = apply_tracked_revisions(sample_docx, out, revs)
assert result.applied == 3
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
all_ids: list[str] = []
for xpath in (".//w:ins", ".//w:del"):
for el in tree.iterfind(xpath, NSMAP):
wid = el.get(_w("id"))
if wid:
all_ids.append(wid)
assert len(all_ids) == len(set(all_ids)), f"duplicate IDs: {all_ids}"
# ── DOCX remains openable as Document ─────────────────────────────
def test_output_docx_is_openable_by_python_docx(
sample_docx: Path, tmp_path: Path,
) -> None:
out = tmp_path / "out.docx"
rev = Revision(id="r1", type="insert_after",
anchor_bookmark="block-yod", content="תוכן חדש")
apply_tracked_revisions(sample_docx, out, [rev])
# Must be openable as a valid DOCX by python-docx (no exceptions)
doc = Document(str(out))
# Original text is still accessible via python-docx
all_text = "\n".join(p.text for p in doc.paragraphs)
assert "block-yod" in all_text
# Inserted (tracked) text is present in the raw XML via itertext
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
raw_text = "".join(tree.itertext())
assert "תוכן חדש" in raw_text
# ── Bookmarks preserved through revisions ─────────────────────────
def test_bookmarks_preserved_after_insert(sample_docx: Path, tmp_path: Path) -> None:
out = tmp_path / "out.docx"
rev = Revision(id="r1", type="insert_after",
anchor_bookmark="block-yod", content="x")
apply_tracked_revisions(sample_docx, out, [rev])
names = list_bookmarks(out)
assert set(names) == {"block-alef", "block-yod", "block-yod-bet"}
# ── Idempotency of loading/saving without changes ────────────────
def test_save_without_revisions_preserves_content(
sample_docx: Path, tmp_path: Path,
) -> None:
out = tmp_path / "out.docx"
apply_tracked_revisions(sample_docx, out, [])
doc_orig = Document(str(sample_docx))
doc_new = Document(str(out))
orig_text = [p.text for p in doc_orig.paragraphs]
new_text = [p.text for p in doc_new.paragraphs]
assert orig_text == new_text

View File

@@ -0,0 +1,237 @@
"""בדיקות end-to-end לזרימה המלאה: exporter → retrofit → reviser.
הבדיקות האלה מחברות את כל השכבות של ארכיטקטורת Track Changes ומוודאות
שהזרימה עובדת על מסמכים שנוצרו על-ידי ה-exporter עצמו (בלוקים עם bookmarks
מובנים) ועל מסמכים רגילים שעברו retrofit.
"""
from __future__ import annotations
import zipfile
from datetime import datetime, timezone
from pathlib import Path
import pytest
from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from lxml import etree
from legal_mcp.services import docx_retrofit, docx_reviser
from legal_mcp.services.docx_exporter import (
_BOOKMARK_ID_START,
_wrap_block_with_bookmarks,
)
from legal_mcp.services.docx_reviser import (
NSMAP,
Revision,
_w,
apply_tracked_revisions,
list_bookmarks,
)
# ── Helpers ────────────────────────────────────────────────────────
def _make_exporter_style_docx(path: Path) -> None:
"""Simulate what docx_exporter produces: paragraphs wrapped in bookmarks
for each of the 12 blocks, with David font and RTL."""
doc = Document()
bm_counter = [_BOOKMARK_ID_START]
blocks = [
("block-alef", "בפני: דפנה תמיר, יו\"ר ועדת הערר"),
("block-bet", "ערר מספר 1033-25"),
("block-heh", "רקע\nהנכס מצוי ברחוב הר בשן"),
("block-yod", "דיון והכרעה\nלאחר שבחנו את טענות הצדדים"),
("block-yod-bet", "ההחלטה\nהערר מתקבל בחלקו"),
]
for name, content in blocks:
def writer(c=content):
for line in c.split("\n"):
if line.strip():
doc.add_paragraph(line.strip())
_wrap_block_with_bookmarks(doc, name, writer, bm_counter)
doc.save(str(path))
def _make_user_edited_docx(path: Path) -> None:
"""Simulate what a user produces by editing in Word: no bookmarks,
heading-style paragraphs in Daphna style."""
doc = Document()
for text in [
"בפני: דפנה תמיר, יו\"ר ועדת הערר מחוז ירושלים",
"ערר מספר 9999-25",
"רקע",
"הנכס מצוי ברחוב שמואל הנגיד 10, ירושלים",
"תמצית טענות הצדדים",
"העוררים טוענים שהבנייה חורגת מהתכנית",
"תגובת המשיבה",
"הוועדה המקומית טוענת שהבקשה תואמת",
"ההליכים בפני ועדת הערר",
"קיימנו דיון בנוכחות הצדדים",
"דיון והכרעה",
"לאחר שבחנו את טענות הצדדים בחון מעמיק",
"סוף דבר",
"הערר נדחה",
]:
doc.add_paragraph(text)
doc.save(str(path))
# ── Exporter-style (built-in bookmarks) ──────────────────────────
def test_exporter_output_works_with_reviser(tmp_path: Path) -> None:
src = tmp_path / "exported.docx"
_make_exporter_style_docx(src)
# All 5 bookmarks should be present directly from "export"
bookmarks = list_bookmarks(src)
assert set(bookmarks) >= {"block-alef", "block-bet", "block-heh",
"block-yod", "block-yod-bet"}
out = tmp_path / "revised.docx"
revs = [
Revision(id="r1", type="insert_after", anchor_bookmark="block-yod",
content="תוספת מערכת: פסק הלכה חדש", style="body"),
]
result = apply_tracked_revisions(src, out, revs)
assert result.applied == 1
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
raw_text = "".join(tree.itertext())
assert "תוספת מערכת" in raw_text
# The revision is tracked (inside <w:ins>)
ins_list = tree.findall(".//w:ins", NSMAP)
assert any("תוספת מערכת" in "".join(el.itertext()) for el in ins_list)
# ── User-edited DOCX (no bookmarks) — needs retrofit first ──────
def test_retrofit_then_revise_on_user_edit(tmp_path: Path) -> None:
user_file = tmp_path / "user_edit.docx"
_make_user_edited_docx(user_file)
# Initially no named bookmarks
assert list_bookmarks(user_file) == []
# Retrofit — should detect blocks via heading heuristic
result = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
added = set(result["bookmarks_added"])
# Must include at least block-yod (for common "insert pasak halacha" task)
assert "block-yod" in added
# Plus block-heh (רקע) and block-zayin (תמצית טענות)
assert "block-heh" in added
assert "block-zayin" in added
# Now apply a revision on the retrofitted file
out = tmp_path / "revised.docx"
revs = [Revision(id="r1", type="insert_after",
anchor_bookmark="block-yod",
content="פסק הלכה שהוסף: בבג\"ץ 1/23 נקבע כי...",
style="body")]
rr = apply_tracked_revisions(user_file, out, revs)
assert rr.applied == 1
# Verify output has the insertion inside <w:ins>
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
ins_texts = ["".join(el.itertext()) for el in tree.iterfind(".//w:ins", NSMAP)]
assert any("פסק הלכה שהוסף" in t for t in ins_texts)
def test_retrofit_preserves_original_paragraphs(tmp_path: Path) -> None:
user_file = tmp_path / "user.docx"
_make_user_edited_docx(user_file)
before_doc = Document(str(user_file))
before_texts = [p.text for p in before_doc.paragraphs]
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
after_doc = Document(str(user_file))
after_texts = [p.text for p in after_doc.paragraphs]
# Paragraph texts should be identical (we only added bookmark markers)
assert before_texts == after_texts
def test_idempotent_retrofit_and_revise(tmp_path: Path) -> None:
"""Running retrofit twice + revising should still produce valid output."""
user_file = tmp_path / "user.docx"
_make_user_edited_docx(user_file)
# First retrofit
r1 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
# Second retrofit — should add no new bookmarks
r2 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
assert r2["bookmarks_added"] == []
assert set(r2["existing_bookmarks"]) >= set(r1["bookmarks_added"])
# Then revise works normally
out = tmp_path / "revised.docx"
revs = [Revision(id="r1", type="insert_after",
anchor_bookmark="block-yod", content="x")]
result = apply_tracked_revisions(user_file, out, revs)
assert result.applied == 1
def test_multiple_revisions_all_tracked_independently(tmp_path: Path) -> None:
"""Verify multiple tracked changes each get independent ins ids so
user can Accept/Reject each one separately in Word."""
user_file = tmp_path / "user.docx"
_make_user_edited_docx(user_file)
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
out = tmp_path / "revised.docx"
revs = [
Revision(id="r1", type="insert_after",
anchor_bookmark="block-heh", content="תוספת 1"),
Revision(id="r2", type="insert_after",
anchor_bookmark="block-yod", content="תוספת 2"),
Revision(id="r3", type="insert_before",
anchor_bookmark="block-yod-alef", content="תוספת 3"),
]
result = apply_tracked_revisions(user_file, out, revs)
assert result.applied == 3
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
ins_ids = {el.get(_w("id")) for el in tree.iterfind(".//w:ins", NSMAP)}
assert len(ins_ids) >= 3 # at least one unique id per revision
def test_rtl_preserved_in_tracked_insertion(tmp_path: Path) -> None:
"""Inserted paragraph must have bidi + rtl + David font so it renders
correctly in Word alongside the user's content."""
user_file = tmp_path / "user.docx"
_make_user_edited_docx(user_file)
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
out = tmp_path / "out.docx"
revs = [Revision(id="r1", type="insert_after",
anchor_bookmark="block-yod", content="עברית RTL")]
apply_tracked_revisions(user_file, out, revs)
with zipfile.ZipFile(out, "r") as zf:
tree = etree.fromstring(zf.read("word/document.xml"))
# Find the ins that holds runs
for ins in tree.iterfind(".//w:ins", NSMAP):
runs = ins.findall(".//w:r", NSMAP)
for r in runs:
text_els = r.findall(".//w:t", NSMAP)
if any("עברית RTL" in (t.text or "") for t in text_els):
rPr = r.find(_w("rPr"))
assert rPr is not None
assert rPr.find(_w("rtl")) is not None
rFonts = rPr.find(_w("rFonts"))
assert rFonts is not None
assert rFonts.get(_w("ascii")) == "David"
return
pytest.fail("tracked insertion with 'עברית RTL' not found")