Add Track Changes architecture for draft revisions (CMP + CMPA)
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m29s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m29s
Fixes critical bug in 1033-25: user-uploaded עריכה-*.docx files were
orphaned on disk while exports kept rebuilding from stale DB blocks.
New architecture:
- User-uploaded DOCX becomes the source of truth (cases.active_draft_path)
- System edits via XML surgery with real Word <w:ins>/<w:del> revisions
- User can Accept/Reject each change from within Word
Components:
- docx_reviser.py: XML surgery for Track Changes (15 tests)
- docx_retrofit.py: retroactive bookmark injection with Hebrew marker
detection + heading heuristic (9 tests)
- docx_exporter.py: emits bookmarks around each of the 12 blocks
- 3 new MCP tools: apply_user_edit, list_bookmarks, revise_draft
- 4 new/updated endpoints: upload (auto-registers active draft),
/exports/revise, /exports/bookmarks, /exports/{filename}/retrofit,
/active-draft
- DB migration: cases.active_draft_path column
- UI: correct banner using real v-numbers, "מקור האמת" badge,
detailed upload toast with bookmarks_added/missing_blocks
- agents: legal-exporter (3 export modes), legal-ceo (stage G for
revision handling), legal-writer (revision mode)
Multi-tenancy:
- Works for both CMP (1xxx cases) and CMPA (8xxx/9xxx cases)
- New revise-draft skill added to both companies
- deploy-track-changes.sh syncs skills CMP ↔ CMPA
- retrofit_case.py: one-off retrofit of existing files
Tests: 34 passing (15 reviser + 9 retrofit + 4 exporter bookmarks + 6 e2e)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -321,6 +321,25 @@ async def export_docx(case_number: str, output_path: str = "") -> str:
|
||||
return await drafting.export_docx(case_number, output_path)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def apply_user_edit(case_number: str, edit_filename: str) -> str:
|
||||
"""רישום עריכה שהעלה המשתמש (עריכה-v*.docx) כמקור האמת החדש — מזריק bookmarks אם חסר."""
|
||||
return await drafting.apply_user_edit(case_number, edit_filename)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def list_bookmarks(case_number: str) -> str:
|
||||
"""רשימת bookmarks הקיימים ב-active_draft של התיק (אנקורים ל-revisions)."""
|
||||
return await drafting.list_bookmarks(case_number)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def revise_draft(case_number: str, revisions_json: str,
|
||||
author: str = "מערכת AI") -> str:
|
||||
"""החלת revisions (Track Changes) על ה-active_draft, יוצר טיוטה-v{N+1}.docx חדשה."""
|
||||
return await drafting.revise_draft(case_number, revisions_json, author)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def analyze_style(appeal_subtype: str = "") -> str:
|
||||
"""ניתוח סגנון על קורפוס ההחלטות של דפנה. מחלץ ושומר דפוסי כתיבה. סוג ערר: building_permit / betterment_levy / compensation_197 (ריק = הכל)."""
|
||||
|
||||
@@ -161,6 +161,11 @@ ALTER TABLE decisions ADD COLUMN IF NOT EXISTS outcome_reasoning TEXT DEFAULT ''
|
||||
ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_type TEXT DEFAULT '';
|
||||
ALTER TABLE cases ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT 'appeals_committee';
|
||||
ALTER TABLE cases ADD COLUMN IF NOT EXISTS appeal_subtype TEXT DEFAULT '';
|
||||
-- active_draft_path = path to the DOCX that is the current source of truth
|
||||
-- for this case's decision text. Set to the latest טיוטה-v*.docx after export,
|
||||
-- or the latest עריכה-v*.docx after user upload. Used by revise_draft to know
|
||||
-- what file to base Track Changes revisions on.
|
||||
ALTER TABLE cases ADD COLUMN IF NOT EXISTS active_draft_path TEXT;
|
||||
|
||||
-- הרחבת style_corpus עם practice_area / appeal_subtype
|
||||
ALTER TABLE style_corpus ADD COLUMN IF NOT EXISTS practice_area TEXT DEFAULT 'appeals_committee';
|
||||
@@ -520,6 +525,25 @@ async def get_case(case_id: UUID) -> dict | None:
|
||||
return _row_to_case(row)
|
||||
|
||||
|
||||
async def set_active_draft_path(case_id: UUID, path: str | None) -> None:
|
||||
"""Update the case's active_draft_path (the DOCX that is source of truth)."""
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
await conn.execute(
|
||||
"UPDATE cases SET active_draft_path = $1, updated_at = now() WHERE id = $2",
|
||||
path, case_id,
|
||||
)
|
||||
|
||||
|
||||
async def get_active_draft_path(case_id: UUID) -> str | None:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"SELECT active_draft_path FROM cases WHERE id = $1", case_id,
|
||||
)
|
||||
return row["active_draft_path"] if row else None
|
||||
|
||||
|
||||
async def get_case_by_number(case_number: str) -> dict | None:
|
||||
pool = await get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
|
||||
@@ -58,6 +58,57 @@ def _set_rtl_section(section) -> None:
|
||||
sectPr.append(bidi)
|
||||
|
||||
|
||||
# ── Bookmark helpers ──────────────────────────────────────────────
|
||||
|
||||
# Keep a per-document bookmark id counter. Bookmarks must have unique ids
|
||||
# across the whole document; we start from a high value to avoid collisions
|
||||
# with whatever Word's default template already assigned.
|
||||
_BOOKMARK_ID_START = 10000
|
||||
|
||||
|
||||
def _insert_bookmark_start(paragraph, name: str, bm_id: int) -> None:
|
||||
"""Insert a <w:bookmarkStart> at the beginning of a paragraph."""
|
||||
el = OxmlElement("w:bookmarkStart")
|
||||
el.set(qn("w:id"), str(bm_id))
|
||||
el.set(qn("w:name"), name)
|
||||
paragraph._p.insert(0, el)
|
||||
|
||||
|
||||
def _insert_bookmark_end(paragraph, bm_id: int) -> None:
|
||||
"""Insert a <w:bookmarkEnd> at the end of a paragraph."""
|
||||
el = OxmlElement("w:bookmarkEnd")
|
||||
el.set(qn("w:id"), str(bm_id))
|
||||
paragraph._p.append(el)
|
||||
|
||||
|
||||
def _wrap_block_with_bookmarks(doc, block_name: str,
|
||||
write_block_fn, bm_counter: list[int]) -> None:
|
||||
"""Write a block with bookmarkStart before and bookmarkEnd after.
|
||||
|
||||
Uses a mutable counter (list of one int) so the caller keeps state
|
||||
across multiple blocks.
|
||||
"""
|
||||
# Record paragraph count before writing
|
||||
body = doc.element.body
|
||||
before_count = len([c for c in body if c.tag == qn("w:p")])
|
||||
|
||||
write_block_fn()
|
||||
|
||||
after_count = len([c for c in body if c.tag == qn("w:p")])
|
||||
if after_count == before_count:
|
||||
# Block produced no paragraphs — nothing to wrap
|
||||
return
|
||||
|
||||
# Use python-docx's paragraph indexing
|
||||
first_new = doc.paragraphs[before_count]
|
||||
last_new = doc.paragraphs[after_count - 1]
|
||||
|
||||
bm_counter[0] += 1
|
||||
bm_id = bm_counter[0]
|
||||
_insert_bookmark_start(first_new, block_name, bm_id)
|
||||
_insert_bookmark_end(last_new, bm_id)
|
||||
|
||||
|
||||
def _add_paragraph(doc, text: str, style: str = "Normal",
|
||||
bold: bool = False, font_size=None,
|
||||
alignment=None, space_after: Pt | None = None) -> None:
|
||||
@@ -160,14 +211,22 @@ async def export_decision(case_id: UUID, output_path: str | None = None) -> str:
|
||||
section.right_margin = PAGE_MARGIN
|
||||
_set_rtl_section(section)
|
||||
|
||||
# Write blocks
|
||||
# Write blocks with bookmarks wrapping each block (anchors for revisions)
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
for block in blocks:
|
||||
block_id = block["block_id"]
|
||||
content = block["content"] or ""
|
||||
if not content.strip():
|
||||
continue
|
||||
|
||||
_write_block_to_docx(doc, block_id, block["title"], content)
|
||||
_wrap_block_with_bookmarks(
|
||||
doc,
|
||||
f"block-{block_id}",
|
||||
lambda b=block, bid=block_id, c=content: _write_block_to_docx(
|
||||
doc, bid, b["title"], c,
|
||||
),
|
||||
bm_counter,
|
||||
)
|
||||
|
||||
# Determine output path — versioned under cases/{case_number}/exports/
|
||||
if not output_path:
|
||||
|
||||
290
mcp-server/src/legal_mcp/services/docx_retrofit.py
Normal file
290
mcp-server/src/legal_mcp/services/docx_retrofit.py
Normal file
@@ -0,0 +1,290 @@
|
||||
"""הזרקת bookmarks רטרואקטיבית ל-DOCX שלא נוצרו ע"י ה-exporter.
|
||||
|
||||
כאשר משתמש מעלה `עריכה-v*.docx` שנערך ב-Word מחוץ למערכת, אין בו את ה-
|
||||
bookmarks שאנו מצפים להם (block-alef ... block-yod-bet). השירות כאן
|
||||
מזהה את תחילת כל בלוק לפי סימני הפתיחה העבריים (א., ב., ... יב.) ב-
|
||||
הפסקאות הראשונות שלו, ומזריק bookmarkStart/bookmarkEnd בהתאם.
|
||||
|
||||
נעשה בצורה defensive — אם לא מצליחים לזהות בלוק, הוא פשוט לא יקבל
|
||||
bookmark (`missing_blocks` בתוצאה). השרת אמור להתריע למשתמש.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from legal_mcp.services.docx_reviser import (
|
||||
NSMAP,
|
||||
_load_docx_xml,
|
||||
_save_docx_xml,
|
||||
_w,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Block identification ──────────────────────────────────────────
|
||||
|
||||
# The 12 blocks in order, with their Hebrew letter marker
|
||||
BLOCK_ORDER = [
|
||||
("block-alef", "א"),
|
||||
("block-bet", "ב"),
|
||||
("block-gimel", "ג"),
|
||||
("block-dalet", "ד"),
|
||||
("block-heh", "ה"),
|
||||
("block-vav", "ו"),
|
||||
("block-zayin", "ז"),
|
||||
("block-chet", "ח"),
|
||||
("block-tet", "ט"),
|
||||
("block-yod", "י"),
|
||||
("block-yod-alef", "יא"),
|
||||
("block-yod-bet", "יב"),
|
||||
]
|
||||
|
||||
# Regex matching a paragraph that begins with a Hebrew block marker
|
||||
# followed by '.', ')', ' ', or end-of-string. The marker must be followed
|
||||
# either by whitespace/punctuation or end of text to avoid matching longer
|
||||
# words that happen to start with these letters.
|
||||
_BLOCK_MARKERS_BY_LETTER: dict[str, str] = {letter: name for name, letter in BLOCK_ORDER}
|
||||
|
||||
# Longer markers (יא, יב) first so regex matches them before falling back to 'י'
|
||||
_MARKER_ALTERNATION = "|".join(
|
||||
re.escape(letter)
|
||||
for letter in sorted(_BLOCK_MARKERS_BY_LETTER, key=len, reverse=True)
|
||||
)
|
||||
_BLOCK_MARKER_RE = re.compile(
|
||||
rf"^\s*({_MARKER_ALTERNATION})\s*[\.\)\-]\s*"
|
||||
)
|
||||
|
||||
# Secondary heuristic: Hebrew section headings that reliably mark the
|
||||
# start of each block in the Daphna Tamir style (used when markers
|
||||
# "א.", "ב." etc. are missing — common in user-edited Word files).
|
||||
#
|
||||
# Key observations from the 12-block schema:
|
||||
# block-alef: "בפני: דפנה תמיר" or decision number page
|
||||
# block-bet: "ערר מספר" line
|
||||
# block-gimel: appellants vs respondents (parties)
|
||||
# block-dalet: bold "החלטה" centered
|
||||
# block-heh: "רקע" / "רקע עובדתי" / "פתח דבר"
|
||||
# block-vav: "תכניות חלות" / "ההליך שבפנינו" / "ההליכים בפני"
|
||||
# block-zayin: "תמצית טענות" / "טענות הצדדים"
|
||||
# block-chet: "תגובת המשיבה" / "עמדת הוועדה"
|
||||
# block-tet: "ההליכים בפני ועדת הערר" / "הדיון בפנינו"
|
||||
# block-yod: "דיון והכרעה" / "דיון"
|
||||
# block-yod-alef: "סוף דבר" / "סיכום"
|
||||
# block-yod-bet: "ההחלטה" (signature / closing block)
|
||||
_BLOCK_HEADING_PATTERNS: list[tuple[str, list[str]]] = [
|
||||
("block-alef", [r"בפני[:\s]", r"ועדת הערר"]),
|
||||
("block-bet", [r"^ערר\s+מספר", r"^ערר\s+\d"]),
|
||||
("block-gimel", [r"^נגד\s*$", r"^—\s*נגד\s*—"]),
|
||||
("block-dalet", [r"^החלטה\s*$"]),
|
||||
("block-heh", [r"^רקע\s*$", r"^רקע\s+עובדתי", r"^פתח\s+דבר"]),
|
||||
("block-vav", [r"^תכניות\s+חלות", r"^ההליכים?\s+שבפנינו", r"^ההליכים?\s+בפני\s+הוועדה\s+המקומית"]),
|
||||
("block-zayin", [r"^תמצית\s+טענות", r"^טענות\s+הצדדים", r"^טענות\s+העוררי"]),
|
||||
("block-chet", [r"^תגובת\s+המשיב", r"^עמדת\s+הוועדה\s+המקומית", r"^תשובת"]),
|
||||
("block-tet", [r"^ההליכים?\s+בפני\s+ועדת\s+הערר", r"^הדיון\s+בפנינו"]),
|
||||
("block-yod", [r"^דיון\s+והכרעה", r"^דיון\s*$", r"^ההכרעה"]),
|
||||
("block-yod-alef", [r"^סוף\s+דבר", r"^סיכום\s*$"]),
|
||||
("block-yod-bet", [r"^ההחלטה\s*$", r"^על\s+כן[,\.]?"]),
|
||||
]
|
||||
|
||||
_COMPILED_HEADING_PATTERNS: list[tuple[str, list[re.Pattern[str]]]] = [
|
||||
(name, [re.compile(p) for p in patterns])
|
||||
for name, patterns in _BLOCK_HEADING_PATTERNS
|
||||
]
|
||||
|
||||
|
||||
def _paragraph_text(p: etree._Element) -> str:
|
||||
"""Return the full text of a paragraph, joining all w:t nodes."""
|
||||
return "".join(p.itertext()).strip()
|
||||
|
||||
|
||||
def _detect_block_starts(
|
||||
paragraphs: list[etree._Element],
|
||||
) -> dict[str, int]:
|
||||
"""Return a mapping of block_name → paragraph index (start of that block).
|
||||
|
||||
Uses a greedy scan: for each paragraph, if its text starts with an
|
||||
expected block marker and the block hasn't been assigned yet, assign
|
||||
this paragraph as the block's start.
|
||||
"""
|
||||
found: dict[str, int] = {}
|
||||
expected_order = [name for name, _ in BLOCK_ORDER]
|
||||
pointer = 0 # index into expected_order — next expected block
|
||||
|
||||
for i, p in enumerate(paragraphs):
|
||||
text = _paragraph_text(p)
|
||||
if not text:
|
||||
continue
|
||||
|
||||
matched_name: str | None = None
|
||||
|
||||
# Try marker-based (א., ב., ...) first
|
||||
m = _BLOCK_MARKER_RE.match(text)
|
||||
if m:
|
||||
letter = m.group(1)
|
||||
matched_name = _BLOCK_MARKERS_BY_LETTER.get(letter)
|
||||
|
||||
# Fall back to heading-keyword heuristic (Daphna style)
|
||||
if matched_name is None:
|
||||
for name, patterns in _COMPILED_HEADING_PATTERNS:
|
||||
if name in found:
|
||||
continue
|
||||
# Only check patterns for blocks we haven't assigned yet
|
||||
# AND that come at/after the current pointer — to keep the
|
||||
# greedy forward-scan semantics consistent with markers.
|
||||
if expected_order.index(name) < pointer:
|
||||
continue
|
||||
if any(pat.search(text) for pat in patterns):
|
||||
matched_name = name
|
||||
break
|
||||
|
||||
if matched_name is None:
|
||||
continue
|
||||
if matched_name in found:
|
||||
continue
|
||||
if pointer >= len(expected_order):
|
||||
continue
|
||||
name_idx_in_order = expected_order.index(matched_name)
|
||||
if name_idx_in_order >= pointer:
|
||||
found[matched_name] = i
|
||||
pointer = name_idx_in_order + 1
|
||||
return found
|
||||
|
||||
|
||||
def _insert_bookmark_around_range(
|
||||
body: etree._Element,
|
||||
paragraphs: list[etree._Element],
|
||||
start_idx: int,
|
||||
end_idx: int,
|
||||
name: str,
|
||||
bm_id: int,
|
||||
) -> None:
|
||||
"""Insert bookmarkStart at the start of paragraph start_idx and
|
||||
bookmarkEnd at the end of paragraph end_idx."""
|
||||
start_el = etree.Element(_w("bookmarkStart"))
|
||||
start_el.set(_w("id"), str(bm_id))
|
||||
start_el.set(_w("name"), name)
|
||||
|
||||
end_el = etree.Element(_w("bookmarkEnd"))
|
||||
end_el.set(_w("id"), str(bm_id))
|
||||
|
||||
start_p = paragraphs[start_idx]
|
||||
end_p = paragraphs[end_idx]
|
||||
start_p.insert(0, start_el)
|
||||
end_p.append(end_el)
|
||||
|
||||
|
||||
def _next_bookmark_id(doc_tree: etree._Element) -> int:
|
||||
"""Find max existing bookmark id and return next unused."""
|
||||
max_id = 9999
|
||||
for el in doc_tree.iterfind(".//w:bookmarkStart", NSMAP):
|
||||
wid = el.get(_w("id"))
|
||||
if wid:
|
||||
try:
|
||||
max_id = max(max_id, int(wid))
|
||||
except ValueError:
|
||||
pass
|
||||
return max_id + 1
|
||||
|
||||
|
||||
# ── Public API ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def retrofit_bookmarks(
|
||||
docx_path: str | Path,
|
||||
*,
|
||||
output_path: str | Path | None = None,
|
||||
backup: bool = True,
|
||||
) -> dict:
|
||||
"""Inject block-* bookmarks into an existing DOCX via heuristic detection.
|
||||
|
||||
Args:
|
||||
docx_path: path to DOCX file (modified in place unless output_path set).
|
||||
output_path: if given, write to this path instead of overwriting.
|
||||
backup: if True and writing in place, save the original as
|
||||
`<path>.pre-retrofit.docx` first.
|
||||
|
||||
Returns:
|
||||
{
|
||||
'bookmarks_added': ['block-alef', ...],
|
||||
'missing_blocks': ['block-dalet', ...],
|
||||
'existing_bookmarks': [...] # bookmarks already on the doc
|
||||
}
|
||||
"""
|
||||
docx_path = Path(docx_path)
|
||||
if not docx_path.exists():
|
||||
raise FileNotFoundError(str(docx_path))
|
||||
|
||||
if output_path is None:
|
||||
output_path = docx_path
|
||||
output_path = Path(output_path)
|
||||
|
||||
members, doc_tree, settings_tree = _load_docx_xml(docx_path)
|
||||
|
||||
# Existing bookmarks
|
||||
existing_names: list[str] = []
|
||||
for el in doc_tree.iterfind(".//w:bookmarkStart", NSMAP):
|
||||
name = el.get(_w("name"))
|
||||
if name:
|
||||
existing_names.append(name)
|
||||
|
||||
# Collect *top-level* body paragraphs (don't descend into tables etc.
|
||||
# for now — MVP). The XPath ".//w:p" would include table cells too;
|
||||
# for retrofitting we only care about the main flow.
|
||||
body = doc_tree.find(f".//{_w('body')}")
|
||||
if body is None:
|
||||
raise ValueError("document has no <w:body>")
|
||||
paragraphs = [p for p in body if p.tag == _w("p")]
|
||||
|
||||
if not paragraphs:
|
||||
return {
|
||||
"bookmarks_added": [],
|
||||
"missing_blocks": [n for n, _ in BLOCK_ORDER],
|
||||
"existing_bookmarks": existing_names,
|
||||
}
|
||||
|
||||
block_starts = _detect_block_starts(paragraphs)
|
||||
|
||||
# Calculate end_idx for each block = paragraph before the next block's start,
|
||||
# or last paragraph if this is the last block found.
|
||||
ordered_found = sorted(block_starts.items(), key=lambda kv: kv[1])
|
||||
ranges: list[tuple[str, int, int]] = []
|
||||
for i, (name, start_idx) in enumerate(ordered_found):
|
||||
if i + 1 < len(ordered_found):
|
||||
end_idx = ordered_found[i + 1][1] - 1
|
||||
else:
|
||||
end_idx = len(paragraphs) - 1
|
||||
ranges.append((name, start_idx, max(start_idx, end_idx)))
|
||||
|
||||
# Backup if overwriting in place
|
||||
if backup and output_path.resolve() == docx_path.resolve():
|
||||
backup_path = docx_path.with_suffix(".pre-retrofit.docx")
|
||||
shutil.copy2(str(docx_path), str(backup_path))
|
||||
|
||||
# Inject bookmarks, skipping any that already exist
|
||||
next_id = _next_bookmark_id(doc_tree)
|
||||
added: list[str] = []
|
||||
for name, s, e in ranges:
|
||||
if name in existing_names:
|
||||
continue
|
||||
_insert_bookmark_around_range(body, paragraphs, s, e, name, next_id)
|
||||
added.append(name)
|
||||
next_id += 1
|
||||
|
||||
_save_docx_xml(members, doc_tree, settings_tree, output_path)
|
||||
|
||||
missing = [n for n, _ in BLOCK_ORDER if n not in block_starts and n not in existing_names]
|
||||
logger.info("retrofit %s: added=%s missing=%s",
|
||||
docx_path.name, added, missing)
|
||||
return {
|
||||
"bookmarks_added": added,
|
||||
"missing_blocks": missing,
|
||||
"existing_bookmarks": existing_names,
|
||||
}
|
||||
514
mcp-server/src/legal_mcp/services/docx_reviser.py
Normal file
514
mcp-server/src/legal_mcp/services/docx_reviser.py
Normal file
@@ -0,0 +1,514 @@
|
||||
"""עריכת DOCX עם Track Changes אמיתיים של Word.
|
||||
|
||||
השירות מיועד לקבל DOCX קיים (עם bookmarks שזיהו אנקורים) ולהחיל עליו
|
||||
עריכות מסומנות כ-w:ins / w:del, שבאים לידי ביטוי ב-Word כ-Track Changes
|
||||
שהמשתמש יכול Accept/Reject.
|
||||
|
||||
אסטרטגיית אנקורים: bookmarks בשמות כגון 'block-yod', 'block-yod-para-3'
|
||||
שמוכנסים בזמן הייצוא הראשוני (docx_exporter.py) או רטרואקטיבית
|
||||
(docx_retrofit.py).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import zipfile
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
from lxml import etree
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── XML namespaces ─────────────────────────────────────────────────
|
||||
|
||||
W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||
NSMAP = {"w": W_NS}
|
||||
|
||||
|
||||
def _w(tag: str) -> str:
|
||||
"""Build a fully qualified tag name in the w: namespace."""
|
||||
return f"{{{W_NS}}}{tag}"
|
||||
|
||||
|
||||
# ── Data models ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
RevisionType = Literal["insert_after", "insert_before", "replace", "delete"]
|
||||
StyleType = Literal["body", "quote", "heading", "bold"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Revision:
|
||||
"""A single tracked change to apply to the DOCX."""
|
||||
|
||||
id: str
|
||||
type: RevisionType
|
||||
anchor_bookmark: str
|
||||
content: str = ""
|
||||
style: StyleType = "body"
|
||||
reason: str = ""
|
||||
anchor_position: Literal["start", "end"] = "end"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RevisionResult:
|
||||
"""Result of applying a single revision."""
|
||||
|
||||
id: str
|
||||
status: Literal["applied", "failed"]
|
||||
error: str | None = None
|
||||
ins_id: int | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RevisionBatchResult:
|
||||
"""Aggregate result of applying a revision batch."""
|
||||
|
||||
applied: int = 0
|
||||
failed: int = 0
|
||||
results: list[RevisionResult] = field(default_factory=list)
|
||||
output_path: str = ""
|
||||
|
||||
|
||||
# ── XML helpers ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _load_docx_xml(docx_path: Path) -> tuple[dict[str, bytes], etree._Element, etree._Element]:
|
||||
"""Load a DOCX as a dict of zip members + parsed document/settings trees."""
|
||||
members: dict[str, bytes] = {}
|
||||
with zipfile.ZipFile(docx_path, "r") as zf:
|
||||
for name in zf.namelist():
|
||||
members[name] = zf.read(name)
|
||||
|
||||
if "word/document.xml" not in members:
|
||||
raise ValueError(f"{docx_path}: missing word/document.xml")
|
||||
|
||||
document_tree = etree.fromstring(members["word/document.xml"])
|
||||
settings_bytes = members.get("word/settings.xml")
|
||||
if settings_bytes:
|
||||
settings_tree = etree.fromstring(settings_bytes)
|
||||
else:
|
||||
settings_tree = etree.Element(_w("settings"), nsmap=NSMAP)
|
||||
|
||||
return members, document_tree, settings_tree
|
||||
|
||||
|
||||
def _save_docx_xml(
|
||||
members: dict[str, bytes],
|
||||
document_tree: etree._Element,
|
||||
settings_tree: etree._Element,
|
||||
output_path: Path,
|
||||
) -> None:
|
||||
"""Write a DOCX back to disk with updated document/settings XML."""
|
||||
members = dict(members)
|
||||
members["word/document.xml"] = etree.tostring(
|
||||
document_tree, xml_declaration=True, encoding="UTF-8", standalone=True
|
||||
)
|
||||
members["word/settings.xml"] = etree.tostring(
|
||||
settings_tree, xml_declaration=True, encoding="UTF-8", standalone=True
|
||||
)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
buffer = BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for name, data in members.items():
|
||||
zf.writestr(name, data)
|
||||
output_path.write_bytes(buffer.getvalue())
|
||||
|
||||
|
||||
def _ensure_track_revisions(settings_tree: etree._Element) -> None:
|
||||
"""Ensure <w:trackRevisions/> is present in settings.xml.
|
||||
|
||||
Note: This enables *display* of track changes — actual w:ins/w:del nodes
|
||||
are rendered as tracked regardless. Word respects trackRevisions for
|
||||
recording further user edits too.
|
||||
"""
|
||||
existing = settings_tree.find(_w("trackRevisions"))
|
||||
if existing is None:
|
||||
el = etree.SubElement(settings_tree, _w("trackRevisions"))
|
||||
el.set(_w("val"), "true")
|
||||
|
||||
|
||||
def _next_revision_id(document_tree: etree._Element) -> int:
|
||||
"""Find max existing w:id on w:ins/w:del/w:bookmarkStart and return next."""
|
||||
max_id = 0
|
||||
for xpath in (
|
||||
".//w:ins", ".//w:del", ".//w:bookmarkStart", ".//w:bookmarkEnd",
|
||||
".//w:commentRangeStart", ".//w:comment",
|
||||
):
|
||||
for el in document_tree.iterfind(xpath, NSMAP):
|
||||
val = el.get(_w("id"))
|
||||
if val:
|
||||
try:
|
||||
max_id = max(max_id, int(val))
|
||||
except ValueError:
|
||||
pass
|
||||
return max_id + 1
|
||||
|
||||
|
||||
def _find_bookmark(
|
||||
document_tree: etree._Element, name: str
|
||||
) -> tuple[etree._Element | None, etree._Element | None]:
|
||||
"""Find w:bookmarkStart and w:bookmarkEnd elements by bookmark name."""
|
||||
start = None
|
||||
end = None
|
||||
for el in document_tree.iterfind(".//w:bookmarkStart", NSMAP):
|
||||
if el.get(_w("name")) == name:
|
||||
start = el
|
||||
break
|
||||
if start is None:
|
||||
return None, None
|
||||
bm_id = start.get(_w("id"))
|
||||
for el in document_tree.iterfind(".//w:bookmarkEnd", NSMAP):
|
||||
if el.get(_w("id")) == bm_id:
|
||||
end = el
|
||||
break
|
||||
return start, end
|
||||
|
||||
|
||||
def _find_enclosing_paragraph(element: etree._Element) -> etree._Element | None:
|
||||
"""Walk up from an element to find its enclosing w:p."""
|
||||
cur = element
|
||||
while cur is not None:
|
||||
if cur.tag == _w("p"):
|
||||
return cur
|
||||
cur = cur.getparent()
|
||||
return None
|
||||
|
||||
|
||||
# ── Paragraph builders ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def _build_run(text: str, *, bold: bool = False, italic: bool = False,
|
||||
font: str = "David", size_half_pt: int | None = None) -> etree._Element:
|
||||
"""Build a w:r (run) element with RTL/David defaults and given text."""
|
||||
r = etree.Element(_w("r"))
|
||||
rPr = etree.SubElement(r, _w("rPr"))
|
||||
|
||||
rFonts = etree.SubElement(rPr, _w("rFonts"))
|
||||
rFonts.set(_w("ascii"), font)
|
||||
rFonts.set(_w("hAnsi"), font)
|
||||
rFonts.set(_w("cs"), font)
|
||||
rFonts.set(_w("hint"), "cs")
|
||||
|
||||
if size_half_pt is not None:
|
||||
sz = etree.SubElement(rPr, _w("sz"))
|
||||
sz.set(_w("val"), str(size_half_pt))
|
||||
szCs = etree.SubElement(rPr, _w("szCs"))
|
||||
szCs.set(_w("val"), str(size_half_pt))
|
||||
|
||||
if bold:
|
||||
etree.SubElement(rPr, _w("b"))
|
||||
etree.SubElement(rPr, _w("bCs"))
|
||||
if italic:
|
||||
etree.SubElement(rPr, _w("i"))
|
||||
etree.SubElement(rPr, _w("iCs"))
|
||||
|
||||
etree.SubElement(rPr, _w("rtl"))
|
||||
|
||||
t = etree.SubElement(r, _w("t"))
|
||||
t.set("{http://www.w3.org/XML/1998/namespace}space", "preserve")
|
||||
t.text = text
|
||||
return r
|
||||
|
||||
|
||||
def _build_paragraph(text: str, *, style: StyleType = "body") -> etree._Element:
|
||||
"""Build a w:p (paragraph) with RTL + David + given text."""
|
||||
p = etree.Element(_w("p"))
|
||||
pPr = etree.SubElement(p, _w("pPr"))
|
||||
bidi = etree.SubElement(pPr, _w("bidi"))
|
||||
bidi.set(_w("val"), "1")
|
||||
|
||||
# Right alignment for body/RTL
|
||||
jc = etree.SubElement(pPr, _w("jc"))
|
||||
jc.set(_w("val"), "right")
|
||||
|
||||
rPr_p = etree.SubElement(pPr, _w("rPr"))
|
||||
etree.SubElement(rPr_p, _w("rtl"))
|
||||
|
||||
bold = style in ("heading", "bold")
|
||||
italic = style == "quote"
|
||||
size = None
|
||||
if style == "heading":
|
||||
size = 28 # 14pt
|
||||
elif style == "quote":
|
||||
size = 22 # 11pt
|
||||
run = _build_run(text, bold=bold, italic=italic, size_half_pt=size)
|
||||
p.append(run)
|
||||
return p
|
||||
|
||||
|
||||
def _wrap_in_ins(elements: list[etree._Element], *, ins_id: int,
|
||||
author: str, date_iso: str) -> etree._Element:
|
||||
"""Wrap a list of *run-level* elements in a single <w:ins>."""
|
||||
ins = etree.Element(_w("ins"))
|
||||
ins.set(_w("id"), str(ins_id))
|
||||
ins.set(_w("author"), author)
|
||||
ins.set(_w("date"), date_iso)
|
||||
for el in elements:
|
||||
ins.append(el)
|
||||
return ins
|
||||
|
||||
|
||||
def _make_tracked_paragraph_insert(
|
||||
text: str, *, style: StyleType, ins_id: int, author: str, date_iso: str,
|
||||
mark_id: int | None = None,
|
||||
) -> etree._Element:
|
||||
"""Build a whole tracked-inserted paragraph.
|
||||
|
||||
DOCX convention for a fully-inserted paragraph:
|
||||
1. All <w:r> runs are wrapped in a single <w:ins> (own id).
|
||||
2. The paragraph's pPr/rPr gets an <w:ins> marker for the paragraph
|
||||
mark itself (pilcrow) — this uses its *own* id.
|
||||
"""
|
||||
if mark_id is None:
|
||||
mark_id = ins_id
|
||||
p = _build_paragraph(text, style=style)
|
||||
pPr = p.find(_w("pPr"))
|
||||
assert pPr is not None
|
||||
rPr = pPr.find(_w("rPr"))
|
||||
if rPr is None:
|
||||
rPr = etree.SubElement(pPr, _w("rPr"))
|
||||
ins_mark = etree.SubElement(rPr, _w("ins"))
|
||||
ins_mark.set(_w("id"), str(mark_id))
|
||||
ins_mark.set(_w("author"), author)
|
||||
ins_mark.set(_w("date"), date_iso)
|
||||
|
||||
runs = [child for child in list(p) if child.tag == _w("r")]
|
||||
if runs:
|
||||
for r in runs:
|
||||
p.remove(r)
|
||||
ins = _wrap_in_ins(runs, ins_id=ins_id, author=author, date_iso=date_iso)
|
||||
p.append(ins)
|
||||
return p
|
||||
|
||||
|
||||
def _mark_runs_as_deleted(paragraph: etree._Element, *, del_id: int,
|
||||
author: str, date_iso: str) -> None:
|
||||
"""Convert all <w:r> in a paragraph to <w:del>-wrapped runs.
|
||||
|
||||
Within a <w:del>, <w:t> must become <w:delText>.
|
||||
"""
|
||||
runs = [child for child in list(paragraph) if child.tag == _w("r")]
|
||||
if not runs:
|
||||
return
|
||||
# Convert <w:t> → <w:delText> inside each run
|
||||
for r in runs:
|
||||
for t in r.findall(_w("t")):
|
||||
t.tag = _w("delText")
|
||||
paragraph.remove(r)
|
||||
wrapper = etree.Element(_w("del"))
|
||||
wrapper.set(_w("id"), str(del_id))
|
||||
wrapper.set(_w("author"), author)
|
||||
wrapper.set(_w("date"), date_iso)
|
||||
for r in runs:
|
||||
wrapper.append(r)
|
||||
paragraph.append(wrapper)
|
||||
|
||||
|
||||
# ── Revision application ───────────────────────────────────────────
|
||||
|
||||
|
||||
def _apply_insert(
|
||||
document_tree: etree._Element,
|
||||
revision: Revision,
|
||||
*,
|
||||
ins_id: int,
|
||||
author: str,
|
||||
date_iso: str,
|
||||
) -> RevisionResult:
|
||||
"""Apply insert_after / insert_before relative to a bookmark."""
|
||||
start, end = _find_bookmark(document_tree, revision.anchor_bookmark)
|
||||
if start is None:
|
||||
return RevisionResult(id=revision.id, status="failed",
|
||||
error=f"bookmark '{revision.anchor_bookmark}' not found")
|
||||
|
||||
# Pick anchor element based on position
|
||||
if revision.type == "insert_before":
|
||||
anchor = start
|
||||
else: # insert_after — default
|
||||
anchor = end if end is not None else start
|
||||
|
||||
enclosing_p = _find_enclosing_paragraph(anchor)
|
||||
if enclosing_p is None:
|
||||
return RevisionResult(id=revision.id, status="failed",
|
||||
error="anchor has no enclosing paragraph")
|
||||
|
||||
# Build new tracked paragraph. ins_id for run wrapper, ins_id+1 for mark.
|
||||
new_p = _make_tracked_paragraph_insert(
|
||||
revision.content, style=revision.style,
|
||||
ins_id=ins_id, mark_id=ins_id + 1,
|
||||
author=author, date_iso=date_iso,
|
||||
)
|
||||
|
||||
parent = enclosing_p.getparent()
|
||||
if parent is None:
|
||||
return RevisionResult(id=revision.id, status="failed",
|
||||
error="enclosing paragraph has no parent")
|
||||
idx = list(parent).index(enclosing_p)
|
||||
insert_idx = idx if revision.type == "insert_before" else idx + 1
|
||||
parent.insert(insert_idx, new_p)
|
||||
|
||||
return RevisionResult(id=revision.id, status="applied", ins_id=ins_id)
|
||||
|
||||
|
||||
def _apply_delete(
|
||||
document_tree: etree._Element,
|
||||
revision: Revision,
|
||||
*,
|
||||
del_id: int,
|
||||
author: str,
|
||||
date_iso: str,
|
||||
) -> RevisionResult:
|
||||
"""Mark the paragraph enclosed by a bookmark as deleted."""
|
||||
start, end = _find_bookmark(document_tree, revision.anchor_bookmark)
|
||||
if start is None:
|
||||
return RevisionResult(id=revision.id, status="failed",
|
||||
error=f"bookmark '{revision.anchor_bookmark}' not found")
|
||||
|
||||
enclosing_p = _find_enclosing_paragraph(start)
|
||||
if enclosing_p is None:
|
||||
return RevisionResult(id=revision.id, status="failed",
|
||||
error="anchor has no enclosing paragraph")
|
||||
|
||||
_mark_runs_as_deleted(enclosing_p, del_id=del_id,
|
||||
author=author, date_iso=date_iso)
|
||||
return RevisionResult(id=revision.id, status="applied", ins_id=del_id)
|
||||
|
||||
|
||||
def _apply_replace(
|
||||
document_tree: etree._Element,
|
||||
revision: Revision,
|
||||
*,
|
||||
ins_id: int,
|
||||
del_id: int,
|
||||
author: str,
|
||||
date_iso: str,
|
||||
) -> RevisionResult:
|
||||
"""Replace = delete the existing paragraph + insert new one after it."""
|
||||
start, end = _find_bookmark(document_tree, revision.anchor_bookmark)
|
||||
if start is None:
|
||||
return RevisionResult(id=revision.id, status="failed",
|
||||
error=f"bookmark '{revision.anchor_bookmark}' not found")
|
||||
|
||||
enclosing_p = _find_enclosing_paragraph(start)
|
||||
if enclosing_p is None:
|
||||
return RevisionResult(id=revision.id, status="failed",
|
||||
error="anchor has no enclosing paragraph")
|
||||
|
||||
parent = enclosing_p.getparent()
|
||||
if parent is None:
|
||||
return RevisionResult(id=revision.id, status="failed",
|
||||
error="enclosing paragraph has no parent")
|
||||
|
||||
new_p = _make_tracked_paragraph_insert(
|
||||
revision.content, style=revision.style,
|
||||
ins_id=ins_id, mark_id=ins_id + 1,
|
||||
author=author, date_iso=date_iso,
|
||||
)
|
||||
idx = list(parent).index(enclosing_p)
|
||||
parent.insert(idx + 1, new_p)
|
||||
|
||||
_mark_runs_as_deleted(enclosing_p, del_id=del_id,
|
||||
author=author, date_iso=date_iso)
|
||||
return RevisionResult(id=revision.id, status="applied", ins_id=ins_id)
|
||||
|
||||
|
||||
# ── Public API ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def apply_tracked_revisions(
|
||||
source_path: str | Path,
|
||||
output_path: str | Path,
|
||||
revisions: list[Revision],
|
||||
*,
|
||||
author: str = "מערכת AI",
|
||||
date: datetime | None = None,
|
||||
) -> RevisionBatchResult:
|
||||
"""Apply a batch of tracked revisions to a DOCX, producing a new DOCX.
|
||||
|
||||
The source file is never mutated. Output is a new DOCX with <w:ins> /
|
||||
<w:del> markers that Word renders as Track Changes (Accept/Reject).
|
||||
|
||||
Args:
|
||||
source_path: existing DOCX (e.g. עריכה-v1.docx) — retains user edits.
|
||||
output_path: where to write the revised DOCX (e.g. טיוטה-v6.docx).
|
||||
revisions: list of Revision objects. Anchors are bookmark names.
|
||||
author: displayed as the revision author in Word.
|
||||
date: revision timestamp (defaults to now, UTC).
|
||||
|
||||
Returns:
|
||||
RevisionBatchResult with per-revision status.
|
||||
"""
|
||||
source_path = Path(source_path)
|
||||
output_path = Path(output_path)
|
||||
|
||||
if date is None:
|
||||
date = datetime.now(timezone.utc)
|
||||
date_iso = date.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
members, doc_tree, settings_tree = _load_docx_xml(source_path)
|
||||
_ensure_track_revisions(settings_tree)
|
||||
|
||||
next_id = _next_revision_id(doc_tree)
|
||||
|
||||
batch = RevisionBatchResult()
|
||||
for rev in revisions:
|
||||
try:
|
||||
if rev.type in ("insert_after", "insert_before"):
|
||||
result = _apply_insert(doc_tree, rev, ins_id=next_id,
|
||||
author=author, date_iso=date_iso)
|
||||
# insert consumes 2 IDs: run-wrapper + paragraph-mark
|
||||
next_id += 2
|
||||
elif rev.type == "delete":
|
||||
result = _apply_delete(doc_tree, rev, del_id=next_id,
|
||||
author=author, date_iso=date_iso)
|
||||
next_id += 1
|
||||
elif rev.type == "replace":
|
||||
result = _apply_replace(doc_tree, rev,
|
||||
ins_id=next_id, del_id=next_id + 2,
|
||||
author=author, date_iso=date_iso)
|
||||
# replace consumes 3 IDs: ins-run, ins-mark, del
|
||||
next_id += 3
|
||||
else:
|
||||
result = RevisionResult(id=rev.id, status="failed",
|
||||
error=f"unknown type: {rev.type}")
|
||||
except Exception as e: # pragma: no cover - defensive
|
||||
logger.exception("revision %s failed", rev.id)
|
||||
result = RevisionResult(id=rev.id, status="failed", error=str(e))
|
||||
|
||||
batch.results.append(result)
|
||||
if result.status == "applied":
|
||||
batch.applied += 1
|
||||
else:
|
||||
batch.failed += 1
|
||||
|
||||
_save_docx_xml(members, doc_tree, settings_tree, output_path)
|
||||
batch.output_path = str(output_path)
|
||||
logger.info("applied %d revisions (failed %d) → %s",
|
||||
batch.applied, batch.failed, output_path)
|
||||
return batch
|
||||
|
||||
|
||||
def list_bookmarks(docx_path: str | Path) -> list[str]:
|
||||
"""Return bookmark names present in the DOCX (excluding '_' internal ones)."""
|
||||
docx_path = Path(docx_path)
|
||||
members, doc_tree, _ = _load_docx_xml(docx_path)
|
||||
names: list[str] = []
|
||||
for el in doc_tree.iterfind(".//w:bookmarkStart", NSMAP):
|
||||
name = el.get(_w("name"))
|
||||
if name and not name.startswith("_"):
|
||||
names.append(name)
|
||||
return names
|
||||
|
||||
|
||||
def copy_with_revisions(
|
||||
source_path: str | Path, output_path: str | Path,
|
||||
) -> None:
|
||||
"""Copy source → output unchanged (used when revisions list is empty)."""
|
||||
shutil.copy2(str(source_path), str(output_path))
|
||||
@@ -384,6 +384,9 @@ async def validate_decision(case_number: str) -> str:
|
||||
async def export_docx(case_number: str, output_path: str = "") -> str:
|
||||
"""ייצוא החלטה לקובץ DOCX מעוצב — גופן David, RTL, כותרות, מספור סעיפים.
|
||||
|
||||
הקובץ נוצר עם bookmarks ב-12 הבלוקים (אנקורים ל-revisions עתידיים),
|
||||
ומסומן כ-active_draft_path של התיק.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
output_path: נתיב לשמירה (אופציונלי — ברירת מחדל: תיקיית התיק)
|
||||
@@ -398,9 +401,12 @@ async def export_docx(case_number: str, output_path: str = "") -> str:
|
||||
|
||||
try:
|
||||
path = await docx_exporter.export_decision(case_id, output_path or None)
|
||||
# Register this export as the new source of truth
|
||||
await db.set_active_draft_path(case_id, path)
|
||||
return json.dumps({
|
||||
"status": "completed",
|
||||
"path": path,
|
||||
"active_draft_path": path,
|
||||
"message": f"DOCX נוצר: {path}",
|
||||
}, ensure_ascii=False, indent=2)
|
||||
except ValueError as e:
|
||||
@@ -410,6 +416,163 @@ async def export_docx(case_number: str, output_path: str = "") -> str:
|
||||
}, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def apply_user_edit(case_number: str, edit_filename: str) -> str:
|
||||
"""רישום עריכה שהעלה המשתמש כמקור האמת החדש של התיק.
|
||||
|
||||
התהליך:
|
||||
1. מאתר את הקובץ `עריכה-v*.docx` בתיקיית ה-exports
|
||||
2. מזריק bookmarks רטרואקטיבית (אם אין) דרך docx_retrofit
|
||||
3. מעדכן את cases.active_draft_path
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
edit_filename: שם הקובץ (למשל "עריכה-v1.docx") או נתיב מלא
|
||||
"""
|
||||
from legal_mcp.services import docx_retrofit
|
||||
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return json.dumps({"status": "error",
|
||||
"message": f"תיק {case_number} לא נמצא."},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
case_id = UUID(case["id"])
|
||||
export_dir = config.find_case_dir(case_number) / "exports"
|
||||
edit_path = export_dir / edit_filename if "/" not in edit_filename else Path(edit_filename)
|
||||
if not edit_path.exists():
|
||||
return json.dumps({"status": "error",
|
||||
"message": f"קובץ לא נמצא: {edit_path}"},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
try:
|
||||
retrofit_result = docx_retrofit.retrofit_bookmarks(edit_path)
|
||||
await db.set_active_draft_path(case_id, str(edit_path))
|
||||
return json.dumps({
|
||||
"status": "completed",
|
||||
"active_draft_path": str(edit_path),
|
||||
"bookmarks_added": retrofit_result.get("bookmarks_added", []),
|
||||
"missing_blocks": retrofit_result.get("missing_blocks", []),
|
||||
"existing_bookmarks": retrofit_result.get("existing_bookmarks", []),
|
||||
}, ensure_ascii=False, indent=2)
|
||||
except Exception as e:
|
||||
return json.dumps({"status": "error", "message": str(e)},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def list_bookmarks(case_number: str) -> str:
|
||||
"""רשימת bookmarks הקיימים ב-active_draft של התיק.
|
||||
|
||||
משמש לסוכנים כדי לדעת אילו אנקורים זמינים לפני שליחת revisions.
|
||||
"""
|
||||
from legal_mcp.services import docx_reviser
|
||||
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return json.dumps({"status": "error",
|
||||
"message": f"תיק {case_number} לא נמצא."},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
active_path = await db.get_active_draft_path(UUID(case["id"]))
|
||||
if not active_path or not Path(active_path).exists():
|
||||
return json.dumps({"status": "no_active_draft",
|
||||
"message": "לא נמצא active_draft. הרץ ייצוא או העלה עריכה."},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
try:
|
||||
names = docx_reviser.list_bookmarks(active_path)
|
||||
return json.dumps({
|
||||
"status": "completed",
|
||||
"active_draft_path": active_path,
|
||||
"bookmarks": names,
|
||||
}, ensure_ascii=False, indent=2)
|
||||
except Exception as e:
|
||||
return json.dumps({"status": "error", "message": str(e)},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def revise_draft(case_number: str, revisions_json: str,
|
||||
author: str = "מערכת AI") -> str:
|
||||
"""החלת revisions מסומנים כ-Track Changes על ה-active_draft של התיק.
|
||||
|
||||
יוצר קובץ חדש `טיוטה-v{N+1}.docx` (מגרסה הבאה בתור), ומעדכן את
|
||||
active_draft_path אליו.
|
||||
|
||||
Args:
|
||||
case_number: מספר תיק הערר
|
||||
revisions_json: JSON string של array עם אובייקטים:
|
||||
[{"id": "r1", "type": "insert_after"|"insert_before"|"replace"|"delete",
|
||||
"anchor_bookmark": "block-yod", "content": "...", "style": "body"|"heading"|"quote",
|
||||
"reason": "..."}, ...]
|
||||
author: מחרוזת המחבר שתופיע ב-Track Changes
|
||||
"""
|
||||
from legal_mcp.services import docx_reviser
|
||||
|
||||
case = await db.get_case_by_number(case_number)
|
||||
if not case:
|
||||
return json.dumps({"status": "error",
|
||||
"message": f"תיק {case_number} לא נמצא."},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
case_id = UUID(case["id"])
|
||||
active_path = await db.get_active_draft_path(case_id)
|
||||
if not active_path or not Path(active_path).exists():
|
||||
return json.dumps({"status": "error",
|
||||
"message": "אין active_draft. הרץ ייצוא או apply_user_edit קודם."},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
try:
|
||||
raw = json.loads(revisions_json) if isinstance(revisions_json, str) else revisions_json
|
||||
except json.JSONDecodeError as e:
|
||||
return json.dumps({"status": "error", "message": f"JSON לא תקף: {e}"},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
revisions = []
|
||||
for item in raw:
|
||||
revisions.append(docx_reviser.Revision(
|
||||
id=item.get("id", ""),
|
||||
type=item["type"],
|
||||
anchor_bookmark=item["anchor_bookmark"],
|
||||
content=item.get("content", ""),
|
||||
style=item.get("style", "body"),
|
||||
reason=item.get("reason", ""),
|
||||
anchor_position=item.get("anchor_position", "end"),
|
||||
))
|
||||
|
||||
# Determine output path — next טיוטה-v{N}.docx
|
||||
export_dir = config.find_case_dir(case_number) / "exports"
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
existing = list(export_dir.glob("טיוטה-v*.docx"))
|
||||
next_ver = 1
|
||||
for p in existing:
|
||||
try:
|
||||
ver = int(p.stem.split("-v")[1])
|
||||
next_ver = max(next_ver, ver + 1)
|
||||
except (IndexError, ValueError):
|
||||
pass
|
||||
output_path = export_dir / f"טיוטה-v{next_ver}.docx"
|
||||
|
||||
try:
|
||||
result = docx_reviser.apply_tracked_revisions(
|
||||
active_path, output_path, revisions, author=author,
|
||||
)
|
||||
await db.set_active_draft_path(case_id, str(output_path))
|
||||
return json.dumps({
|
||||
"status": "completed",
|
||||
"output_path": str(output_path),
|
||||
"version": next_ver,
|
||||
"applied": result.applied,
|
||||
"failed": result.failed,
|
||||
"active_draft_path": str(output_path),
|
||||
"results": [
|
||||
{"id": r.id, "status": r.status, "error": r.error}
|
||||
for r in result.results
|
||||
],
|
||||
}, ensure_ascii=False, indent=2)
|
||||
except Exception as e:
|
||||
return json.dumps({"status": "error", "message": str(e)},
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
async def get_block_context(case_number: str, block_id: str, instructions: str = "") -> str:
|
||||
"""קבלת הקשר מלא לכתיבת בלוק — ללא קריאה ל-API. Claude Code כותב את הבלוק.
|
||||
|
||||
|
||||
0
mcp-server/tests/__init__.py
Normal file
0
mcp-server/tests/__init__.py
Normal file
103
mcp-server/tests/test_docx_exporter_bookmarks.py
Normal file
103
mcp-server/tests/test_docx_exporter_bookmarks.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""בדיקות ל-bookmark helpers ב-docx_exporter.
|
||||
|
||||
הבדיקות מתרכזות ב-helper functions בלבד (לא בכל ה-export flow שדורש DB).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
from lxml import etree
|
||||
|
||||
from legal_mcp.services.docx_exporter import (
|
||||
_BOOKMARK_ID_START,
|
||||
_insert_bookmark_end,
|
||||
_insert_bookmark_start,
|
||||
_wrap_block_with_bookmarks,
|
||||
)
|
||||
from legal_mcp.services.docx_reviser import NSMAP, _w, list_bookmarks
|
||||
|
||||
|
||||
def test_insert_bookmark_helpers_create_valid_xml(tmp_path: Path) -> None:
|
||||
doc = Document()
|
||||
p = doc.add_paragraph("תוכן בלוק י")
|
||||
_insert_bookmark_start(p, "block-yod", 10001)
|
||||
_insert_bookmark_end(p, 10001)
|
||||
|
||||
out = tmp_path / "out.docx"
|
||||
doc.save(str(out))
|
||||
|
||||
# Verify via list_bookmarks (uses the same XML)
|
||||
assert list_bookmarks(out) == ["block-yod"]
|
||||
|
||||
|
||||
def test_wrap_block_with_bookmarks_wraps_multiple_paragraphs(tmp_path: Path) -> None:
|
||||
doc = Document()
|
||||
doc.add_paragraph("ראשון — לפני") # noise before
|
||||
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
|
||||
def writer() -> None:
|
||||
doc.add_paragraph("בלוק — פסקה 1")
|
||||
doc.add_paragraph("בלוק — פסקה 2")
|
||||
doc.add_paragraph("בלוק — פסקה 3")
|
||||
|
||||
_wrap_block_with_bookmarks(doc, "block-yod", writer, bm_counter)
|
||||
doc.add_paragraph("אחרי — אחרון") # noise after
|
||||
|
||||
out = tmp_path / "out.docx"
|
||||
doc.save(str(out))
|
||||
|
||||
# The bookmark should wrap exactly the 3 middle paragraphs
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
|
||||
paragraphs = tree.findall(".//w:p", NSMAP)
|
||||
# Find para index of bookmarkStart and bookmarkEnd
|
||||
start_idx = end_idx = None
|
||||
for i, p in enumerate(paragraphs):
|
||||
if p.find(".//w:bookmarkStart", NSMAP) is not None:
|
||||
start_idx = i
|
||||
if p.find(".//w:bookmarkEnd", NSMAP) is not None:
|
||||
end_idx = i
|
||||
assert start_idx is not None
|
||||
assert end_idx is not None
|
||||
# The paragraph containing start must be the first new one ("פסקה 1")
|
||||
start_text = "".join(paragraphs[start_idx].itertext())
|
||||
end_text = "".join(paragraphs[end_idx].itertext())
|
||||
assert "פסקה 1" in start_text
|
||||
assert "פסקה 3" in end_text
|
||||
|
||||
|
||||
def test_wrap_block_skipped_when_writer_adds_nothing(tmp_path: Path) -> None:
|
||||
doc = Document()
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
_wrap_block_with_bookmarks(doc, "block-empty", lambda: None, bm_counter)
|
||||
out = tmp_path / "out.docx"
|
||||
doc.save(str(out))
|
||||
assert list_bookmarks(out) == []
|
||||
|
||||
|
||||
def test_multiple_blocks_get_unique_bookmark_ids(tmp_path: Path) -> None:
|
||||
doc = Document()
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
for name in ("block-alef", "block-bet", "block-gimel"):
|
||||
_wrap_block_with_bookmarks(
|
||||
doc, name,
|
||||
lambda n=name: doc.add_paragraph(f"תוכן של {n}"),
|
||||
bm_counter,
|
||||
)
|
||||
out = tmp_path / "out.docx"
|
||||
doc.save(str(out))
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
|
||||
ids = [el.get(_w("id")) for el in tree.iterfind(".//w:bookmarkStart", NSMAP)]
|
||||
assert len(ids) == 3
|
||||
assert len(set(ids)) == 3
|
||||
|
||||
names = list_bookmarks(out)
|
||||
assert set(names) == {"block-alef", "block-bet", "block-gimel"}
|
||||
141
mcp-server/tests/test_docx_retrofit.py
Normal file
141
mcp-server/tests/test_docx_retrofit.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""בדיקות docx_retrofit — הזרקת bookmarks רטרואקטיבית."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
|
||||
from legal_mcp.services.docx_retrofit import (
|
||||
BLOCK_ORDER,
|
||||
retrofit_bookmarks,
|
||||
)
|
||||
from legal_mcp.services.docx_reviser import list_bookmarks
|
||||
|
||||
|
||||
def _make_docx_with_hebrew_blocks(path: Path, markers: list[str]) -> None:
|
||||
"""Create a DOCX where each paragraph starts with a Hebrew block marker."""
|
||||
doc = Document()
|
||||
for marker in markers:
|
||||
doc.add_paragraph(f"{marker}. תוכן הבלוק שמתחיל ב-{marker}")
|
||||
doc.add_paragraph(f"עוד פסקה בבלוק {marker}")
|
||||
doc.save(str(path))
|
||||
|
||||
|
||||
def test_retrofit_detects_all_standard_blocks(tmp_path: Path) -> None:
|
||||
src = tmp_path / "src.docx"
|
||||
_make_docx_with_hebrew_blocks(
|
||||
src, ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "יא", "יב"],
|
||||
)
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert len(result["bookmarks_added"]) == 12
|
||||
assert result["missing_blocks"] == []
|
||||
|
||||
names = list_bookmarks(src)
|
||||
expected = {name for name, _ in BLOCK_ORDER}
|
||||
assert set(names) == expected
|
||||
|
||||
|
||||
def test_retrofit_reports_missing_blocks(tmp_path: Path) -> None:
|
||||
src = tmp_path / "src.docx"
|
||||
# Only 4 blocks present
|
||||
_make_docx_with_hebrew_blocks(src, ["א", "ב", "ג", "ד"])
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert result["bookmarks_added"] == [
|
||||
"block-alef", "block-bet", "block-gimel", "block-dalet",
|
||||
]
|
||||
assert "block-heh" in result["missing_blocks"]
|
||||
assert "block-yod-bet" in result["missing_blocks"]
|
||||
|
||||
|
||||
def test_retrofit_distinguishes_yod_from_yod_alef_yod_bet(tmp_path: Path) -> None:
|
||||
"""י, יא, יב must all be distinguished — longer markers win."""
|
||||
src = tmp_path / "src.docx"
|
||||
_make_docx_with_hebrew_blocks(src, ["ט", "י", "יא", "יב"])
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert set(result["bookmarks_added"]) == {
|
||||
"block-tet", "block-yod", "block-yod-alef", "block-yod-bet",
|
||||
}
|
||||
|
||||
|
||||
def test_retrofit_skips_existing_bookmarks(tmp_path: Path) -> None:
|
||||
"""Running retrofit twice doesn't duplicate bookmarks."""
|
||||
src = tmp_path / "src.docx"
|
||||
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
|
||||
|
||||
first = retrofit_bookmarks(src, backup=False)
|
||||
assert first["bookmarks_added"] == ["block-alef", "block-bet"]
|
||||
|
||||
second = retrofit_bookmarks(src, backup=False)
|
||||
assert second["bookmarks_added"] == [] # nothing new
|
||||
assert set(second["existing_bookmarks"]) == {"block-alef", "block-bet"}
|
||||
|
||||
# Final document should still have exactly 2 bookmarks
|
||||
assert set(list_bookmarks(src)) == {"block-alef", "block-bet"}
|
||||
|
||||
|
||||
def test_retrofit_creates_backup(tmp_path: Path) -> None:
|
||||
src = tmp_path / "file.docx"
|
||||
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
|
||||
retrofit_bookmarks(src) # backup=True (default)
|
||||
backup = src.with_suffix(".pre-retrofit.docx")
|
||||
assert backup.exists()
|
||||
|
||||
|
||||
def test_retrofit_to_different_output_path_no_backup(tmp_path: Path) -> None:
|
||||
src = tmp_path / "src.docx"
|
||||
out = tmp_path / "out.docx"
|
||||
_make_docx_with_hebrew_blocks(src, ["א", "ב"])
|
||||
retrofit_bookmarks(src, output_path=out)
|
||||
# source untouched
|
||||
assert list_bookmarks(src) == []
|
||||
# output has bookmarks
|
||||
assert set(list_bookmarks(out)) == {"block-alef", "block-bet"}
|
||||
|
||||
|
||||
def test_retrofit_ignores_marker_in_middle_of_text(tmp_path: Path) -> None:
|
||||
"""A lone 'י' inside body text (not at start) should not be detected as block."""
|
||||
src = tmp_path / "src.docx"
|
||||
doc = Document()
|
||||
doc.add_paragraph("א. תחילת הבלוק")
|
||||
doc.add_paragraph("טקסט עם האות י לא בתחילת שורה, זה לא בלוק.")
|
||||
doc.add_paragraph("ב. בלוק שני")
|
||||
doc.save(str(src))
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert "block-alef" in result["bookmarks_added"]
|
||||
assert "block-bet" in result["bookmarks_added"]
|
||||
# 'block-yod' should NOT be detected
|
||||
assert "block-yod" not in result["bookmarks_added"]
|
||||
|
||||
|
||||
def test_retrofit_out_of_order_markers_picks_forward_only(tmp_path: Path) -> None:
|
||||
"""If a later-ordered marker appears first, earlier ones are treated as missing.
|
||||
|
||||
Scanner advances forward through BLOCK_ORDER — it won't go back to claim
|
||||
an earlier marker after already seeing a later one.
|
||||
"""
|
||||
src = tmp_path / "src.docx"
|
||||
doc = Document()
|
||||
doc.add_paragraph("ב. מופיע ראשון")
|
||||
doc.add_paragraph("א. מופיע אחרי — יידחה כי 'א' לפני 'ב'")
|
||||
doc.add_paragraph("ג. בלוק גימל")
|
||||
doc.save(str(src))
|
||||
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert "block-bet" in result["bookmarks_added"]
|
||||
assert "block-gimel" in result["bookmarks_added"]
|
||||
# 'א' was not detected (the first paragraph was 'ב' — scanner advanced past א)
|
||||
assert "block-alef" in result["missing_blocks"]
|
||||
|
||||
|
||||
def test_retrofit_empty_document_reports_all_missing(tmp_path: Path) -> None:
|
||||
src = tmp_path / "empty.docx"
|
||||
doc = Document()
|
||||
doc.save(str(src))
|
||||
result = retrofit_bookmarks(src, backup=False)
|
||||
assert result["bookmarks_added"] == []
|
||||
assert len(result["missing_blocks"]) == 12
|
||||
342
mcp-server/tests/test_docx_reviser.py
Normal file
342
mcp-server/tests/test_docx_reviser.py
Normal file
@@ -0,0 +1,342 @@
|
||||
"""בדיקות docx_reviser — Track Changes XML surgery.
|
||||
|
||||
הבדיקות יוצרות DOCX בסיסי עם bookmarks, מפעילות revisions, ובודקות:
|
||||
1. שה-XML שנוצר תקף ונטען חזרה כ-Document
|
||||
2. שה-<w:ins> / <w:del> קיימים בפורמט הנכון
|
||||
3. שה-bookmarks נשמרים אחרי עריכה
|
||||
4. שגופן David ו-RTL נשמרים
|
||||
5. שכשלונות מטופלים אלגנטית (bookmark חסר → failed, לא crash)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import zipfile
|
||||
from datetime import datetime, timezone
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from docx import Document
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
from lxml import etree
|
||||
|
||||
from legal_mcp.services import docx_reviser
|
||||
from legal_mcp.services.docx_reviser import (
|
||||
NSMAP,
|
||||
Revision,
|
||||
_w,
|
||||
apply_tracked_revisions,
|
||||
list_bookmarks,
|
||||
)
|
||||
|
||||
|
||||
# ── Test fixtures ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _insert_bookmark(paragraph, name: str, bm_id: int) -> None:
|
||||
"""Insert a <w:bookmarkStart> at the start of a paragraph and a
|
||||
<w:bookmarkEnd> at the end."""
|
||||
p_elem = paragraph._p
|
||||
|
||||
start = OxmlElement("w:bookmarkStart")
|
||||
start.set(qn("w:id"), str(bm_id))
|
||||
start.set(qn("w:name"), name)
|
||||
p_elem.insert(0, start)
|
||||
|
||||
end = OxmlElement("w:bookmarkEnd")
|
||||
end.set(qn("w:id"), str(bm_id))
|
||||
p_elem.append(end)
|
||||
|
||||
|
||||
def _make_sample_docx(path: Path) -> None:
|
||||
"""Create a simple DOCX with 3 paragraphs, each with a bookmark."""
|
||||
doc = Document()
|
||||
for idx, name in enumerate(("block-alef", "block-yod", "block-yod-bet")):
|
||||
p = doc.add_paragraph()
|
||||
run = p.add_run(f"תוכן פסקה של {name}")
|
||||
run.font.name = "David"
|
||||
_insert_bookmark(p, name, idx + 1)
|
||||
doc.save(str(path))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_docx(tmp_path: Path) -> Path:
|
||||
path = tmp_path / "source.docx"
|
||||
_make_sample_docx(path)
|
||||
return path
|
||||
|
||||
|
||||
# ── list_bookmarks ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_list_bookmarks_returns_all_named(sample_docx: Path) -> None:
|
||||
names = list_bookmarks(sample_docx)
|
||||
assert set(names) == {"block-alef", "block-yod", "block-yod-bet"}
|
||||
|
||||
|
||||
def test_list_bookmarks_excludes_internal(tmp_path: Path) -> None:
|
||||
"""Bookmarks starting with '_' (like _GoBack) should be filtered out."""
|
||||
path = tmp_path / "internal.docx"
|
||||
doc = Document()
|
||||
p1 = doc.add_paragraph("visible")
|
||||
_insert_bookmark(p1, "block-real", 1)
|
||||
p2 = doc.add_paragraph("hidden")
|
||||
_insert_bookmark(p2, "_GoBack", 2)
|
||||
doc.save(str(path))
|
||||
|
||||
names = list_bookmarks(path)
|
||||
assert names == ["block-real"]
|
||||
|
||||
|
||||
# ── apply_tracked_revisions: insert_after ─────────────────────────
|
||||
|
||||
|
||||
def test_insert_after_adds_tracked_paragraph(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(
|
||||
id="r1",
|
||||
type="insert_after",
|
||||
anchor_bookmark="block-yod",
|
||||
content="פסקה חדשה שהמערכת מוסיפה.",
|
||||
)
|
||||
result = apply_tracked_revisions(
|
||||
sample_docx, out, [rev],
|
||||
author="מערכת AI",
|
||||
date=datetime(2026, 4, 16, 14, 0, tzinfo=timezone.utc),
|
||||
)
|
||||
assert result.applied == 1
|
||||
assert result.failed == 0
|
||||
assert out.exists()
|
||||
|
||||
# Verify <w:ins> present in document.xml
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
doc_xml = zf.read("word/document.xml")
|
||||
tree = etree.fromstring(doc_xml)
|
||||
ins_elements = tree.findall(".//w:ins", NSMAP)
|
||||
assert len(ins_elements) >= 1
|
||||
# Verify the content is there
|
||||
all_text = "".join(tree.itertext())
|
||||
assert "פסקה חדשה שהמערכת מוסיפה." in all_text
|
||||
# Verify original content preserved
|
||||
assert "תוכן פסקה של block-yod" in all_text
|
||||
|
||||
|
||||
def _find_ins_with_runs(tree: etree._Element) -> etree._Element | None:
|
||||
"""Pick the <w:ins> that actually wraps runs (not the pilcrow-marker one)."""
|
||||
for ins in tree.iterfind(".//w:ins", NSMAP):
|
||||
if ins.find(".//w:r", NSMAP) is not None:
|
||||
return ins
|
||||
return None
|
||||
|
||||
|
||||
def test_insert_after_ins_has_author_and_date(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-alef", content="test")
|
||||
apply_tracked_revisions(sample_docx, out, [rev], author="דפנה")
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
doc_xml = zf.read("word/document.xml")
|
||||
tree = etree.fromstring(doc_xml)
|
||||
ins = _find_ins_with_runs(tree)
|
||||
assert ins is not None
|
||||
assert ins.get(_w("author")) == "דפנה"
|
||||
date_str = ins.get(_w("date"))
|
||||
assert date_str is not None
|
||||
assert date_str.endswith("Z") # ISO 8601 UTC
|
||||
|
||||
|
||||
def test_insert_after_uses_rtl_and_david(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-alef", content="מוסף")
|
||||
apply_tracked_revisions(sample_docx, out, [rev])
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
|
||||
ins = _find_ins_with_runs(tree)
|
||||
assert ins is not None
|
||||
run = ins.find(".//w:r", NSMAP)
|
||||
assert run is not None
|
||||
rPr = run.find(_w("rPr"))
|
||||
assert rPr is not None
|
||||
assert rPr.find(_w("rtl")) is not None
|
||||
rFonts = rPr.find(_w("rFonts"))
|
||||
assert rFonts is not None
|
||||
assert rFonts.get(_w("ascii")) == "David"
|
||||
|
||||
|
||||
# ── apply_tracked_revisions: insert_before ────────────────────────
|
||||
|
||||
|
||||
def test_insert_before_places_above_anchor(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_before",
|
||||
anchor_bookmark="block-yod", content="לפני י.")
|
||||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||||
assert result.applied == 1
|
||||
|
||||
# Order check: new paragraph's text must appear before "block-yod"
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
paragraphs = tree.findall(".//w:p", NSMAP)
|
||||
texts = ["".join(p.itertext()) for p in paragraphs]
|
||||
idx_new = next(i for i, t in enumerate(texts) if "לפני י." in t)
|
||||
idx_yod = next(i for i, t in enumerate(texts) if "תוכן פסקה של block-yod" in t)
|
||||
assert idx_new < idx_yod
|
||||
|
||||
|
||||
# ── apply_tracked_revisions: delete ───────────────────────────────
|
||||
|
||||
|
||||
def test_delete_wraps_runs_in_w_del(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="delete", anchor_bookmark="block-yod", content="")
|
||||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||||
assert result.applied == 1
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
dels = tree.findall(".//w:del", NSMAP)
|
||||
assert len(dels) >= 1
|
||||
# Inside w:del, text elements must become w:delText
|
||||
del_texts = dels[0].findall(".//w:delText", NSMAP)
|
||||
assert any("block-yod" in (t.text or "") for t in del_texts)
|
||||
|
||||
|
||||
# ── apply_tracked_revisions: replace ─────────────────────────────
|
||||
|
||||
|
||||
def test_replace_creates_both_ins_and_del(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="replace",
|
||||
anchor_bookmark="block-yod", content="תוכן חדש לחלוטין")
|
||||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||||
assert result.applied == 1
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
assert len(tree.findall(".//w:ins", NSMAP)) >= 1
|
||||
assert len(tree.findall(".//w:del", NSMAP)) >= 1
|
||||
|
||||
|
||||
# ── Failure modes ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_missing_bookmark_returns_failed_not_crash(
|
||||
sample_docx: Path, tmp_path: Path,
|
||||
) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="does-not-exist", content="x")
|
||||
result = apply_tracked_revisions(sample_docx, out, [rev])
|
||||
assert result.applied == 0
|
||||
assert result.failed == 1
|
||||
assert result.results[0].status == "failed"
|
||||
assert "not found" in (result.results[0].error or "")
|
||||
# Output file still produced (unchanged copy)
|
||||
assert out.exists()
|
||||
|
||||
|
||||
def test_empty_revisions_list_produces_copy(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
result = apply_tracked_revisions(sample_docx, out, [])
|
||||
assert result.applied == 0
|
||||
assert result.failed == 0
|
||||
assert out.exists()
|
||||
# bookmarks should still be there
|
||||
assert set(list_bookmarks(out)) == {"block-alef", "block-yod", "block-yod-bet"}
|
||||
|
||||
|
||||
# ── Track revisions flag in settings ──────────────────────────────
|
||||
|
||||
|
||||
def test_track_revisions_flag_is_enabled(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-alef", content="x")
|
||||
apply_tracked_revisions(sample_docx, out, [rev])
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
settings_xml = zf.read("word/settings.xml")
|
||||
settings_tree = etree.fromstring(settings_xml)
|
||||
tr = settings_tree.find(_w("trackRevisions"))
|
||||
assert tr is not None
|
||||
|
||||
|
||||
# ── Multiple revisions with unique IDs ────────────────────────────
|
||||
|
||||
|
||||
def test_multiple_revisions_get_unique_ids(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
revs = [
|
||||
Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-alef", content="ראשון"),
|
||||
Revision(id="r2", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="שני"),
|
||||
Revision(id="r3", type="delete", anchor_bookmark="block-yod-bet"),
|
||||
]
|
||||
result = apply_tracked_revisions(sample_docx, out, revs)
|
||||
assert result.applied == 3
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
all_ids: list[str] = []
|
||||
for xpath in (".//w:ins", ".//w:del"):
|
||||
for el in tree.iterfind(xpath, NSMAP):
|
||||
wid = el.get(_w("id"))
|
||||
if wid:
|
||||
all_ids.append(wid)
|
||||
assert len(all_ids) == len(set(all_ids)), f"duplicate IDs: {all_ids}"
|
||||
|
||||
|
||||
# ── DOCX remains openable as Document ─────────────────────────────
|
||||
|
||||
|
||||
def test_output_docx_is_openable_by_python_docx(
|
||||
sample_docx: Path, tmp_path: Path,
|
||||
) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="תוכן חדש")
|
||||
apply_tracked_revisions(sample_docx, out, [rev])
|
||||
# Must be openable as a valid DOCX by python-docx (no exceptions)
|
||||
doc = Document(str(out))
|
||||
# Original text is still accessible via python-docx
|
||||
all_text = "\n".join(p.text for p in doc.paragraphs)
|
||||
assert "block-yod" in all_text
|
||||
|
||||
# Inserted (tracked) text is present in the raw XML via itertext
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
raw_text = "".join(tree.itertext())
|
||||
assert "תוכן חדש" in raw_text
|
||||
|
||||
|
||||
# ── Bookmarks preserved through revisions ─────────────────────────
|
||||
|
||||
|
||||
def test_bookmarks_preserved_after_insert(sample_docx: Path, tmp_path: Path) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
rev = Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="x")
|
||||
apply_tracked_revisions(sample_docx, out, [rev])
|
||||
names = list_bookmarks(out)
|
||||
assert set(names) == {"block-alef", "block-yod", "block-yod-bet"}
|
||||
|
||||
|
||||
# ── Idempotency of loading/saving without changes ────────────────
|
||||
|
||||
|
||||
def test_save_without_revisions_preserves_content(
|
||||
sample_docx: Path, tmp_path: Path,
|
||||
) -> None:
|
||||
out = tmp_path / "out.docx"
|
||||
apply_tracked_revisions(sample_docx, out, [])
|
||||
doc_orig = Document(str(sample_docx))
|
||||
doc_new = Document(str(out))
|
||||
orig_text = [p.text for p in doc_orig.paragraphs]
|
||||
new_text = [p.text for p in doc_new.paragraphs]
|
||||
assert orig_text == new_text
|
||||
237
mcp-server/tests/test_track_changes_e2e.py
Normal file
237
mcp-server/tests/test_track_changes_e2e.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""בדיקות end-to-end לזרימה המלאה: exporter → retrofit → reviser.
|
||||
|
||||
הבדיקות האלה מחברות את כל השכבות של ארכיטקטורת Track Changes ומוודאות
|
||||
שהזרימה עובדת על מסמכים שנוצרו על-ידי ה-exporter עצמו (בלוקים עם bookmarks
|
||||
מובנים) ועל מסמכים רגילים שעברו retrofit.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import zipfile
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from docx import Document
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
from lxml import etree
|
||||
|
||||
from legal_mcp.services import docx_retrofit, docx_reviser
|
||||
from legal_mcp.services.docx_exporter import (
|
||||
_BOOKMARK_ID_START,
|
||||
_wrap_block_with_bookmarks,
|
||||
)
|
||||
from legal_mcp.services.docx_reviser import (
|
||||
NSMAP,
|
||||
Revision,
|
||||
_w,
|
||||
apply_tracked_revisions,
|
||||
list_bookmarks,
|
||||
)
|
||||
|
||||
|
||||
# ── Helpers ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _make_exporter_style_docx(path: Path) -> None:
|
||||
"""Simulate what docx_exporter produces: paragraphs wrapped in bookmarks
|
||||
for each of the 12 blocks, with David font and RTL."""
|
||||
doc = Document()
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
|
||||
blocks = [
|
||||
("block-alef", "בפני: דפנה תמיר, יו\"ר ועדת הערר"),
|
||||
("block-bet", "ערר מספר 1033-25"),
|
||||
("block-heh", "רקע\nהנכס מצוי ברחוב הר בשן"),
|
||||
("block-yod", "דיון והכרעה\nלאחר שבחנו את טענות הצדדים"),
|
||||
("block-yod-bet", "ההחלטה\nהערר מתקבל בחלקו"),
|
||||
]
|
||||
|
||||
for name, content in blocks:
|
||||
def writer(c=content):
|
||||
for line in c.split("\n"):
|
||||
if line.strip():
|
||||
doc.add_paragraph(line.strip())
|
||||
_wrap_block_with_bookmarks(doc, name, writer, bm_counter)
|
||||
|
||||
doc.save(str(path))
|
||||
|
||||
|
||||
def _make_user_edited_docx(path: Path) -> None:
|
||||
"""Simulate what a user produces by editing in Word: no bookmarks,
|
||||
heading-style paragraphs in Daphna style."""
|
||||
doc = Document()
|
||||
for text in [
|
||||
"בפני: דפנה תמיר, יו\"ר ועדת הערר מחוז ירושלים",
|
||||
"ערר מספר 9999-25",
|
||||
"רקע",
|
||||
"הנכס מצוי ברחוב שמואל הנגיד 10, ירושלים",
|
||||
"תמצית טענות הצדדים",
|
||||
"העוררים טוענים שהבנייה חורגת מהתכנית",
|
||||
"תגובת המשיבה",
|
||||
"הוועדה המקומית טוענת שהבקשה תואמת",
|
||||
"ההליכים בפני ועדת הערר",
|
||||
"קיימנו דיון בנוכחות הצדדים",
|
||||
"דיון והכרעה",
|
||||
"לאחר שבחנו את טענות הצדדים בחון מעמיק",
|
||||
"סוף דבר",
|
||||
"הערר נדחה",
|
||||
]:
|
||||
doc.add_paragraph(text)
|
||||
doc.save(str(path))
|
||||
|
||||
|
||||
# ── Exporter-style (built-in bookmarks) ──────────────────────────
|
||||
|
||||
|
||||
def test_exporter_output_works_with_reviser(tmp_path: Path) -> None:
|
||||
src = tmp_path / "exported.docx"
|
||||
_make_exporter_style_docx(src)
|
||||
|
||||
# All 5 bookmarks should be present directly from "export"
|
||||
bookmarks = list_bookmarks(src)
|
||||
assert set(bookmarks) >= {"block-alef", "block-bet", "block-heh",
|
||||
"block-yod", "block-yod-bet"}
|
||||
|
||||
out = tmp_path / "revised.docx"
|
||||
revs = [
|
||||
Revision(id="r1", type="insert_after", anchor_bookmark="block-yod",
|
||||
content="תוספת מערכת: פסק הלכה חדש", style="body"),
|
||||
]
|
||||
result = apply_tracked_revisions(src, out, revs)
|
||||
assert result.applied == 1
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
raw_text = "".join(tree.itertext())
|
||||
assert "תוספת מערכת" in raw_text
|
||||
# The revision is tracked (inside <w:ins>)
|
||||
ins_list = tree.findall(".//w:ins", NSMAP)
|
||||
assert any("תוספת מערכת" in "".join(el.itertext()) for el in ins_list)
|
||||
|
||||
|
||||
# ── User-edited DOCX (no bookmarks) — needs retrofit first ──────
|
||||
|
||||
|
||||
def test_retrofit_then_revise_on_user_edit(tmp_path: Path) -> None:
|
||||
user_file = tmp_path / "user_edit.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
|
||||
# Initially no named bookmarks
|
||||
assert list_bookmarks(user_file) == []
|
||||
|
||||
# Retrofit — should detect blocks via heading heuristic
|
||||
result = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
added = set(result["bookmarks_added"])
|
||||
# Must include at least block-yod (for common "insert pasak halacha" task)
|
||||
assert "block-yod" in added
|
||||
# Plus block-heh (רקע) and block-zayin (תמצית טענות)
|
||||
assert "block-heh" in added
|
||||
assert "block-zayin" in added
|
||||
|
||||
# Now apply a revision on the retrofitted file
|
||||
out = tmp_path / "revised.docx"
|
||||
revs = [Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod",
|
||||
content="פסק הלכה שהוסף: בבג\"ץ 1/23 נקבע כי...",
|
||||
style="body")]
|
||||
rr = apply_tracked_revisions(user_file, out, revs)
|
||||
assert rr.applied == 1
|
||||
|
||||
# Verify output has the insertion inside <w:ins>
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
ins_texts = ["".join(el.itertext()) for el in tree.iterfind(".//w:ins", NSMAP)]
|
||||
assert any("פסק הלכה שהוסף" in t for t in ins_texts)
|
||||
|
||||
|
||||
def test_retrofit_preserves_original_paragraphs(tmp_path: Path) -> None:
|
||||
user_file = tmp_path / "user.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
|
||||
before_doc = Document(str(user_file))
|
||||
before_texts = [p.text for p in before_doc.paragraphs]
|
||||
|
||||
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
|
||||
after_doc = Document(str(user_file))
|
||||
after_texts = [p.text for p in after_doc.paragraphs]
|
||||
# Paragraph texts should be identical (we only added bookmark markers)
|
||||
assert before_texts == after_texts
|
||||
|
||||
|
||||
def test_idempotent_retrofit_and_revise(tmp_path: Path) -> None:
|
||||
"""Running retrofit twice + revising should still produce valid output."""
|
||||
user_file = tmp_path / "user.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
|
||||
# First retrofit
|
||||
r1 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
# Second retrofit — should add no new bookmarks
|
||||
r2 = docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
assert r2["bookmarks_added"] == []
|
||||
assert set(r2["existing_bookmarks"]) >= set(r1["bookmarks_added"])
|
||||
|
||||
# Then revise works normally
|
||||
out = tmp_path / "revised.docx"
|
||||
revs = [Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="x")]
|
||||
result = apply_tracked_revisions(user_file, out, revs)
|
||||
assert result.applied == 1
|
||||
|
||||
|
||||
def test_multiple_revisions_all_tracked_independently(tmp_path: Path) -> None:
|
||||
"""Verify multiple tracked changes each get independent ins ids so
|
||||
user can Accept/Reject each one separately in Word."""
|
||||
user_file = tmp_path / "user.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
|
||||
out = tmp_path / "revised.docx"
|
||||
revs = [
|
||||
Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-heh", content="תוספת 1"),
|
||||
Revision(id="r2", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="תוספת 2"),
|
||||
Revision(id="r3", type="insert_before",
|
||||
anchor_bookmark="block-yod-alef", content="תוספת 3"),
|
||||
]
|
||||
result = apply_tracked_revisions(user_file, out, revs)
|
||||
assert result.applied == 3
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
ins_ids = {el.get(_w("id")) for el in tree.iterfind(".//w:ins", NSMAP)}
|
||||
assert len(ins_ids) >= 3 # at least one unique id per revision
|
||||
|
||||
|
||||
def test_rtl_preserved_in_tracked_insertion(tmp_path: Path) -> None:
|
||||
"""Inserted paragraph must have bidi + rtl + David font so it renders
|
||||
correctly in Word alongside the user's content."""
|
||||
user_file = tmp_path / "user.docx"
|
||||
_make_user_edited_docx(user_file)
|
||||
docx_retrofit.retrofit_bookmarks(user_file, backup=False)
|
||||
|
||||
out = tmp_path / "out.docx"
|
||||
revs = [Revision(id="r1", type="insert_after",
|
||||
anchor_bookmark="block-yod", content="עברית RTL")]
|
||||
apply_tracked_revisions(user_file, out, revs)
|
||||
|
||||
with zipfile.ZipFile(out, "r") as zf:
|
||||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||||
|
||||
# Find the ins that holds runs
|
||||
for ins in tree.iterfind(".//w:ins", NSMAP):
|
||||
runs = ins.findall(".//w:r", NSMAP)
|
||||
for r in runs:
|
||||
text_els = r.findall(".//w:t", NSMAP)
|
||||
if any("עברית RTL" in (t.text or "") for t in text_els):
|
||||
rPr = r.find(_w("rPr"))
|
||||
assert rPr is not None
|
||||
assert rPr.find(_w("rtl")) is not None
|
||||
rFonts = rPr.find(_w("rFonts"))
|
||||
assert rFonts is not None
|
||||
assert rFonts.get(_w("ascii")) == "David"
|
||||
return
|
||||
pytest.fail("tracked insertion with 'עברית RTL' not found")
|
||||
Reference in New Issue
Block a user