All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m29s
Fixes critical bug in 1033-25: user-uploaded עריכה-*.docx files were
orphaned on disk while exports kept rebuilding from stale DB blocks.
New architecture:
- User-uploaded DOCX becomes the source of truth (cases.active_draft_path)
- System edits via XML surgery with real Word <w:ins>/<w:del> revisions
- User can Accept/Reject each change from within Word
Components:
- docx_reviser.py: XML surgery for Track Changes (15 tests)
- docx_retrofit.py: retroactive bookmark injection with Hebrew marker
detection + heading heuristic (9 tests)
- docx_exporter.py: emits bookmarks around each of the 12 blocks
- 3 new MCP tools: apply_user_edit, list_bookmarks, revise_draft
- 4 new/updated endpoints: upload (auto-registers active draft),
/exports/revise, /exports/bookmarks, /exports/{filename}/retrofit,
/active-draft
- DB migration: cases.active_draft_path column
- UI: correct banner using real v-numbers, "מקור האמת" badge,
detailed upload toast with bookmarks_added/missing_blocks
- agents: legal-exporter (3 export modes), legal-ceo (stage G for
revision handling), legal-writer (revision mode)
Multi-tenancy:
- Works for both CMP (1xxx cases) and CMPA (8xxx/9xxx cases)
- New revise-draft skill added to both companies
- deploy-track-changes.sh syncs skills CMP ↔ CMPA
- retrofit_case.py: one-off retrofit of existing files
Tests: 34 passing (15 reviser + 9 retrofit + 4 exporter bookmarks + 6 e2e)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
142 lines
5.2 KiB
Python
142 lines
5.2 KiB
Python
"""בדיקות docx_retrofit — הזרקת bookmarks רטרואקטיבית."""
|
||
|
||
from __future__ import annotations
|
||
|
||
from pathlib import Path
|
||
|
||
from docx import Document
|
||
|
||
from legal_mcp.services.docx_retrofit import (
|
||
BLOCK_ORDER,
|
||
retrofit_bookmarks,
|
||
)
|
||
from legal_mcp.services.docx_reviser import list_bookmarks
|
||
|
||
|
||
def _make_docx_with_hebrew_blocks(path: Path, markers: list[str]) -> None:
|
||
"""Create a DOCX where each paragraph starts with a Hebrew block marker."""
|
||
doc = Document()
|
||
for marker in markers:
|
||
doc.add_paragraph(f"{marker}. תוכן הבלוק שמתחיל ב-{marker}")
|
||
doc.add_paragraph(f"עוד פסקה בבלוק {marker}")
|
||
doc.save(str(path))
|
||
|
||
|
||
def test_retrofit_detects_all_standard_blocks(tmp_path: Path) -> None:
|
||
src = tmp_path / "src.docx"
|
||
_make_docx_with_hebrew_blocks(
|
||
src, ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "יא", "יב"],
|
||
)
|
||
|
||
result = retrofit_bookmarks(src, backup=False)
|
||
assert len(result["bookmarks_added"]) == 12
|
||
assert result["missing_blocks"] == []
|
||
|
||
names = list_bookmarks(src)
|
||
expected = {name for name, _ in BLOCK_ORDER}
|
||
assert set(names) == expected
|
||
|
||
|
||
def test_retrofit_reports_missing_blocks(tmp_path: Path) -> None:
|
||
src = tmp_path / "src.docx"
|
||
# Only 4 blocks present
|
||
_make_docx_with_hebrew_blocks(src, ["א", "ב", "ג", "ד"])
|
||
|
||
result = retrofit_bookmarks(src, backup=False)
|
||
assert result["bookmarks_added"] == [
|
||
"block-alef", "block-bet", "block-gimel", "block-dalet",
|
||
]
|
||
assert "block-heh" in result["missing_blocks"]
|
||
assert "block-yod-bet" in result["missing_blocks"]
|
||
|
||
|
||
def test_retrofit_distinguishes_yod_from_yod_alef_yod_bet(tmp_path: Path) -> None:
|
||
"""י, יא, יב must all be distinguished — longer markers win."""
|
||
src = tmp_path / "src.docx"
|
||
_make_docx_with_hebrew_blocks(src, ["ט", "י", "יא", "יב"])
|
||
|
||
result = retrofit_bookmarks(src, backup=False)
|
||
assert set(result["bookmarks_added"]) == {
|
||
"block-tet", "block-yod", "block-yod-alef", "block-yod-bet",
|
||
}
|
||
|
||
|
||
def test_retrofit_skips_existing_bookmarks(tmp_path: Path) -> None:
|
||
"""Running retrofit twice doesn't duplicate bookmarks."""
|
||
src = tmp_path / "src.docx"
|
||
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
|
||
|
||
first = retrofit_bookmarks(src, backup=False)
|
||
assert first["bookmarks_added"] == ["block-alef", "block-bet"]
|
||
|
||
second = retrofit_bookmarks(src, backup=False)
|
||
assert second["bookmarks_added"] == [] # nothing new
|
||
assert set(second["existing_bookmarks"]) == {"block-alef", "block-bet"}
|
||
|
||
# Final document should still have exactly 2 bookmarks
|
||
assert set(list_bookmarks(src)) == {"block-alef", "block-bet"}
|
||
|
||
|
||
def test_retrofit_creates_backup(tmp_path: Path) -> None:
|
||
src = tmp_path / "file.docx"
|
||
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
|
||
retrofit_bookmarks(src) # backup=True (default)
|
||
backup = src.with_suffix(".pre-retrofit.docx")
|
||
assert backup.exists()
|
||
|
||
|
||
def test_retrofit_to_different_output_path_no_backup(tmp_path: Path) -> None:
|
||
src = tmp_path / "src.docx"
|
||
out = tmp_path / "out.docx"
|
||
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
|
||
retrofit_bookmarks(src, output_path=out)
|
||
# source untouched
|
||
assert list_bookmarks(src) == []
|
||
# output has bookmarks
|
||
assert set(list_bookmarks(out)) == {"block-alef", "block-bet"}
|
||
|
||
|
||
def test_retrofit_ignores_marker_in_middle_of_text(tmp_path: Path) -> None:
|
||
"""A lone 'י' inside body text (not at start) should not be detected as block."""
|
||
src = tmp_path / "src.docx"
|
||
doc = Document()
|
||
doc.add_paragraph("א. תחילת הבלוק")
|
||
doc.add_paragraph("טקסט עם האות י לא בתחילת שורה, זה לא בלוק.")
|
||
doc.add_paragraph("ב. בלוק שני")
|
||
doc.save(str(src))
|
||
|
||
result = retrofit_bookmarks(src, backup=False)
|
||
assert "block-alef" in result["bookmarks_added"]
|
||
assert "block-bet" in result["bookmarks_added"]
|
||
# 'block-yod' should NOT be detected
|
||
assert "block-yod" not in result["bookmarks_added"]
|
||
|
||
|
||
def test_retrofit_out_of_order_markers_picks_forward_only(tmp_path: Path) -> None:
|
||
"""If a later-ordered marker appears first, earlier ones are treated as missing.
|
||
|
||
Scanner advances forward through BLOCK_ORDER — it won't go back to claim
|
||
an earlier marker after already seeing a later one.
|
||
"""
|
||
src = tmp_path / "src.docx"
|
||
doc = Document()
|
||
doc.add_paragraph("ב. מופיע ראשון")
|
||
doc.add_paragraph("א. מופיע אחרי — יידחה כי 'א' לפני 'ב'")
|
||
doc.add_paragraph("ג. בלוק גימל")
|
||
doc.save(str(src))
|
||
|
||
result = retrofit_bookmarks(src, backup=False)
|
||
assert "block-bet" in result["bookmarks_added"]
|
||
assert "block-gimel" in result["bookmarks_added"]
|
||
# 'א' was not detected (the first paragraph was 'ב' — scanner advanced past א)
|
||
assert "block-alef" in result["missing_blocks"]
|
||
|
||
|
||
def test_retrofit_empty_document_reports_all_missing(tmp_path: Path) -> None:
|
||
src = tmp_path / "empty.docx"
|
||
doc = Document()
|
||
doc.save(str(src))
|
||
result = retrofit_bookmarks(src, backup=False)
|
||
assert result["bookmarks_added"] == []
|
||
assert len(result["missing_blocks"]) == 12
|