Files
legal-ai/mcp-server/tests/test_docx_retrofit.py
Chaim 726498126d
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m29s
Add Track Changes architecture for draft revisions (CMP + CMPA)
Fixes critical bug in 1033-25: user-uploaded עריכה-*.docx files were
orphaned on disk while exports kept rebuilding from stale DB blocks.

New architecture:
- User-uploaded DOCX becomes the source of truth (cases.active_draft_path)
- System edits via XML surgery with real Word <w:ins>/<w:del> revisions
- User can Accept/Reject each change from within Word

Components:
- docx_reviser.py: XML surgery for Track Changes (15 tests)
- docx_retrofit.py: retroactive bookmark injection with Hebrew marker
  detection + heading heuristic (9 tests)
- docx_exporter.py: emits bookmarks around each of the 12 blocks
- 3 new MCP tools: apply_user_edit, list_bookmarks, revise_draft
- 4 new/updated endpoints: upload (auto-registers active draft),
  /exports/revise, /exports/bookmarks, /exports/{filename}/retrofit,
  /active-draft
- DB migration: cases.active_draft_path column
- UI: correct banner using real v-numbers, "מקור האמת" badge,
  detailed upload toast with bookmarks_added/missing_blocks
- agents: legal-exporter (3 export modes), legal-ceo (stage G for
  revision handling), legal-writer (revision mode)

Multi-tenancy:
- Works for both CMP (1xxx cases) and CMPA (8xxx/9xxx cases)
- New revise-draft skill added to both companies
- deploy-track-changes.sh syncs skills CMP ↔ CMPA
- retrofit_case.py: one-off retrofit of existing files

Tests: 34 passing (15 reviser + 9 retrofit + 4 exporter bookmarks + 6 e2e)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-16 18:49:30 +00:00

142 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""בדיקות docx_retrofit — הזרקת bookmarks רטרואקטיבית."""
from __future__ import annotations
from pathlib import Path
from docx import Document
from legal_mcp.services.docx_retrofit import (
BLOCK_ORDER,
retrofit_bookmarks,
)
from legal_mcp.services.docx_reviser import list_bookmarks
def _make_docx_with_hebrew_blocks(path: Path, markers: list[str]) -> None:
"""Create a DOCX where each paragraph starts with a Hebrew block marker."""
doc = Document()
for marker in markers:
doc.add_paragraph(f"{marker}. תוכן הבלוק שמתחיל ב-{marker}")
doc.add_paragraph(f"עוד פסקה בבלוק {marker}")
doc.save(str(path))
def test_retrofit_detects_all_standard_blocks(tmp_path: Path) -> None:
src = tmp_path / "src.docx"
_make_docx_with_hebrew_blocks(
src, ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "יא", "יב"],
)
result = retrofit_bookmarks(src, backup=False)
assert len(result["bookmarks_added"]) == 12
assert result["missing_blocks"] == []
names = list_bookmarks(src)
expected = {name for name, _ in BLOCK_ORDER}
assert set(names) == expected
def test_retrofit_reports_missing_blocks(tmp_path: Path) -> None:
src = tmp_path / "src.docx"
# Only 4 blocks present
_make_docx_with_hebrew_blocks(src, ["א", "ב", "ג", "ד"])
result = retrofit_bookmarks(src, backup=False)
assert result["bookmarks_added"] == [
"block-alef", "block-bet", "block-gimel", "block-dalet",
]
assert "block-heh" in result["missing_blocks"]
assert "block-yod-bet" in result["missing_blocks"]
def test_retrofit_distinguishes_yod_from_yod_alef_yod_bet(tmp_path: Path) -> None:
"""י, יא, יב must all be distinguished — longer markers win."""
src = tmp_path / "src.docx"
_make_docx_with_hebrew_blocks(src, ["ט", "י", "יא", "יב"])
result = retrofit_bookmarks(src, backup=False)
assert set(result["bookmarks_added"]) == {
"block-tet", "block-yod", "block-yod-alef", "block-yod-bet",
}
def test_retrofit_skips_existing_bookmarks(tmp_path: Path) -> None:
"""Running retrofit twice doesn't duplicate bookmarks."""
src = tmp_path / "src.docx"
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
first = retrofit_bookmarks(src, backup=False)
assert first["bookmarks_added"] == ["block-alef", "block-bet"]
second = retrofit_bookmarks(src, backup=False)
assert second["bookmarks_added"] == [] # nothing new
assert set(second["existing_bookmarks"]) == {"block-alef", "block-bet"}
# Final document should still have exactly 2 bookmarks
assert set(list_bookmarks(src)) == {"block-alef", "block-bet"}
def test_retrofit_creates_backup(tmp_path: Path) -> None:
src = tmp_path / "file.docx"
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
retrofit_bookmarks(src) # backup=True (default)
backup = src.with_suffix(".pre-retrofit.docx")
assert backup.exists()
def test_retrofit_to_different_output_path_no_backup(tmp_path: Path) -> None:
src = tmp_path / "src.docx"
out = tmp_path / "out.docx"
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
retrofit_bookmarks(src, output_path=out)
# source untouched
assert list_bookmarks(src) == []
# output has bookmarks
assert set(list_bookmarks(out)) == {"block-alef", "block-bet"}
def test_retrofit_ignores_marker_in_middle_of_text(tmp_path: Path) -> None:
"""A lone 'י' inside body text (not at start) should not be detected as block."""
src = tmp_path / "src.docx"
doc = Document()
doc.add_paragraph("א. תחילת הבלוק")
doc.add_paragraph("טקסט עם האות י לא בתחילת שורה, זה לא בלוק.")
doc.add_paragraph("ב. בלוק שני")
doc.save(str(src))
result = retrofit_bookmarks(src, backup=False)
assert "block-alef" in result["bookmarks_added"]
assert "block-bet" in result["bookmarks_added"]
# 'block-yod' should NOT be detected
assert "block-yod" not in result["bookmarks_added"]
def test_retrofit_out_of_order_markers_picks_forward_only(tmp_path: Path) -> None:
"""If a later-ordered marker appears first, earlier ones are treated as missing.
Scanner advances forward through BLOCK_ORDER — it won't go back to claim
an earlier marker after already seeing a later one.
"""
src = tmp_path / "src.docx"
doc = Document()
doc.add_paragraph("ב. מופיע ראשון")
doc.add_paragraph("א. מופיע אחרי — יידחה כי 'א' לפני 'ב'")
doc.add_paragraph("ג. בלוק גימל")
doc.save(str(src))
result = retrofit_bookmarks(src, backup=False)
assert "block-bet" in result["bookmarks_added"]
assert "block-gimel" in result["bookmarks_added"]
# 'א' was not detected (the first paragraph was 'ב' — scanner advanced past א)
assert "block-alef" in result["missing_blocks"]
def test_retrofit_empty_document_reports_all_missing(tmp_path: Path) -> None:
src = tmp_path / "empty.docx"
doc = Document()
doc.save(str(src))
result = retrofit_bookmarks(src, backup=False)
assert result["bookmarks_added"] == []
assert len(result["missing_blocks"]) == 12