Files
legal-ai/mcp-server/src/legal_mcp/services/docx_exporter.py
Chaim eab0ca906c feat(interim): include block-he opening in pre-ruling interim drafts
block-he (פתיחה ניטרלית) was previously emitted only in final decisions.
For interim drafts shown to the chair before ruling, including a neutral
opening helps the chair confirm framing before approving downstream blocks.
Skipped if empty, so legacy cases without block-he are unaffected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 17:25:54 +00:00

535 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""ייצוא החלטת ועדת ערר ל-DOCX מעוצב.
דרישות: גופן David, RTL מלא, כותרות, מספור סעיפים רציף.
"""
from __future__ import annotations
import logging
import re
from datetime import date
from pathlib import Path
from uuid import UUID
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from legal_mcp import config
from legal_mcp.services import db
logger = logging.getLogger(__name__)
# Path to the converted decision template. Carries David font, RTL, margins,
# and styles (Title / Heading 1-2 / Normal / Quote / List Paragraph).
# Populated once by `scripts/convert_decision_template.py` from `.dotx`.
TEMPLATE_PATH = (
Path(__file__).resolve().parents[4]
/ "skills" / "docx" / "decision_template.docx"
)
# ── RTL helpers ───────────────────────────────────────────────────
# Three layers of RTL are required (per skills/docx/SKILL.md):
# 1. Section: <w:bidi/> in sectPr (inherited from template)
# 2. Paragraph: <w:bidi/> directly in pPr — paragraph direction
# 3. Run: <w:rtl/> in rPr — tells Word to use cs (complex-script) font
# Without explicit font on run, Hebrew can render in the ascii slot
# (Times New Roman) — so we also force David on all four font slots.
HEBREW_FONT = "David"
def _mark_run_rtl(run) -> None:
"""Force David font on all four slots, then add <w:rtl/>."""
rPr = run._r.get_or_add_rPr()
if rPr.find(qn("w:rFonts")) is None:
fonts = OxmlElement("w:rFonts")
fonts.set(qn("w:ascii"), HEBREW_FONT)
fonts.set(qn("w:hAnsi"), HEBREW_FONT)
fonts.set(qn("w:cs"), HEBREW_FONT)
fonts.set(qn("w:eastAsia"), HEBREW_FONT)
rPr.insert(0, fonts)
if rPr.find(qn("w:rtl")) is None:
rPr.append(OxmlElement("w:rtl"))
def _mark_paragraph_rtl(paragraph) -> None:
"""Add <w:bidi/> directly to pPr (paragraph direction) and <w:rtl/>
to the paragraph-mark rPr (affects trailing ¶ glyph)."""
pPr = paragraph._p.get_or_add_pPr()
# (2) <w:bidi/> directly in pPr — paragraph direction
if pPr.find(qn("w:bidi")) is None:
bidi = OxmlElement("w:bidi")
pstyle = pPr.find(qn("w:pStyle"))
if pstyle is not None:
pstyle.addnext(bidi)
else:
pPr.insert(0, bidi)
# paragraph-mark rPr gets <w:rtl/> so ¶ inherits RTL too
rPr = pPr.find(qn("w:rPr"))
if rPr is None:
rPr = OxmlElement("w:rPr")
pPr.append(rPr)
if rPr.find(qn("w:rtl")) is None:
rPr.append(OxmlElement("w:rtl"))
def _set_paragraph_jc(paragraph, value: str) -> None:
"""Force <w:jc w:val="..."/> on a paragraph, overriding style-inherited jc.
Needed because Heading 3 in the template ships with jc=center — we want
body headings justified right (jc=both) like Normal.
"""
pPr = paragraph._p.get_or_add_pPr()
existing = pPr.find(qn("w:jc"))
if existing is not None:
pPr.remove(existing)
jc = OxmlElement("w:jc")
jc.set(qn("w:val"), value)
pPr.append(jc)
def _suppress_paragraph_numbering(paragraph) -> None:
"""Kill any style-inherited auto-numbering on this paragraph.
Heading styles linked to outline lists can auto-inject א./ב./ג. markers
in some Word versions even when the style we read doesn't show numPr.
Setting numId=0 explicitly removes the paragraph from any list.
"""
pPr = paragraph._p.get_or_add_pPr()
existing = pPr.find(qn("w:numPr"))
if existing is not None:
pPr.remove(existing)
numPr = OxmlElement("w:numPr")
ilvl = OxmlElement("w:ilvl")
ilvl.set(qn("w:val"), "0")
numId = OxmlElement("w:numId")
numId.set(qn("w:val"), "0")
numPr.append(ilvl)
numPr.append(numId)
pPr.append(numPr)
def _clear_body(doc) -> None:
"""Remove all paragraphs in the document body while keeping sectPr.
The template ships with sample paragraphs we don't want. Section
properties (page size, margins, bidi) stay intact.
"""
body = doc.element.body
for p in list(body.findall(qn("w:p"))):
body.remove(p)
# ── Bookmark helpers ──────────────────────────────────────────────
# Keep a per-document bookmark id counter. Bookmarks must have unique ids
# across the whole document; we start from a high value to avoid collisions
# with whatever Word's default template already assigned.
_BOOKMARK_ID_START = 10000
def _insert_bookmark_start(paragraph, name: str, bm_id: int) -> None:
"""Insert a <w:bookmarkStart> at the beginning of a paragraph."""
el = OxmlElement("w:bookmarkStart")
el.set(qn("w:id"), str(bm_id))
el.set(qn("w:name"), name)
paragraph._p.insert(0, el)
def _insert_bookmark_end(paragraph, bm_id: int) -> None:
"""Insert a <w:bookmarkEnd> at the end of a paragraph."""
el = OxmlElement("w:bookmarkEnd")
el.set(qn("w:id"), str(bm_id))
paragraph._p.append(el)
def _wrap_block_with_bookmarks(doc, block_name: str,
write_block_fn, bm_counter: list[int]) -> None:
"""Write a block with bookmarkStart before and bookmarkEnd after.
Uses a mutable counter (list of one int) so the caller keeps state
across multiple blocks.
"""
# Record paragraph count before writing
body = doc.element.body
before_count = len([c for c in body if c.tag == qn("w:p")])
write_block_fn()
after_count = len([c for c in body if c.tag == qn("w:p")])
if after_count == before_count:
# Block produced no paragraphs — nothing to wrap
return
# Use python-docx's paragraph indexing
first_new = doc.paragraphs[before_count]
last_new = doc.paragraphs[after_count - 1]
bm_counter[0] += 1
bm_id = bm_counter[0]
_insert_bookmark_start(first_new, block_name, bm_id)
_insert_bookmark_end(last_new, bm_id)
# ── Content cleanup ──────────────────────────────────────────────
# Em-dash (—, U+2014) and en-dash (, U+2013) — per chair's no-dash policy,
# strip from body text. Surrounding spaces collapse.
_DASH_RE = re.compile(r"\s*[—–]\s*")
_MULTI_SPACE_RE = re.compile(r" {2,}")
def _strip_dashes(text: str) -> str:
"""Remove em/en-dashes and collapse surrounding whitespace."""
text = _DASH_RE.sub(" ", text)
return _MULTI_SPACE_RE.sub(" ", text).strip()
# Numbered paragraph: "1. content", "23. content" — auto-numbered via
# List Paragraph style so order reflects emission, not literal prefix.
_NUM_PREFIX_RE = re.compile(r"^(\d+)\.\s+(.*)$", re.DOTALL)
# Markdown inline bold — `**...**`
_INLINE_BOLD_RE = re.compile(r"\*\*([^\n*]+?)\*\*")
def _add_runs_with_inline_bold(paragraph, text: str, *, bold_all: bool = False) -> None:
"""Split text on `**...**` markers, alternating plain and bold runs.
Keeps `**טענה חשובה**` rendering as bold instead of leaving literal
asterisks. When bold_all is True, every run is bold (used for headings
that still carry inline-bold markup).
"""
pos = 0
for m in _INLINE_BOLD_RE.finditer(text):
if m.start() > pos:
plain = paragraph.add_run(text[pos:m.start()])
if bold_all:
plain.bold = True
_mark_run_rtl(plain)
run_bold = paragraph.add_run(m.group(1))
run_bold.bold = True
_mark_run_rtl(run_bold)
pos = m.end()
if pos < len(text):
tail = paragraph.add_run(text[pos:])
if bold_all:
tail.bold = True
_mark_run_rtl(tail)
def _add_styled_paragraph(doc, text: str, style: str = "Normal",
bold: bool = False,
alignment=None):
"""Add a paragraph using a template style.
Font, size, RTL direction and spacing all come from the style
definition in the template — we only pick the style by name.
Renders `**...**` markdown as inline bold runs.
Returns the paragraph so callers can apply further overrides.
"""
para = doc.add_paragraph(style=style)
_mark_paragraph_rtl(para)
if alignment is not None:
para.alignment = alignment
if text:
_add_runs_with_inline_bold(para, text, bold_all=bold)
return para
def _add_centered_paragraph(doc, text: str, *, bold: bool = True,
style: str = "Normal") -> None:
_add_styled_paragraph(doc, text, style=style, bold=bold,
alignment=WD_ALIGN_PARAGRAPH.CENTER)
def _add_heading(doc, text: str, *, style: str) -> None:
"""Heading with overrides: jc=both (overrides style-center / style-left)
and suppressed auto-numbering (so style-linked outline lists don't inject
א./ב./ג. — chair manages markers manually in content)."""
para = doc.add_paragraph(style=style)
_mark_paragraph_rtl(para)
_set_paragraph_jc(para, "both")
_suppress_paragraph_numbering(para)
if text:
_add_runs_with_inline_bold(para, text)
def _add_blockquote(doc, text: str) -> None:
"""Indented quote using the template's Quote style."""
_add_styled_paragraph(doc, text, style="Quote")
def _add_image_placeholder(doc, description: str) -> None:
_add_styled_paragraph(doc, f"[{description}]", style="Normal",
alignment=WD_ALIGN_PARAGRAPH.CENTER)
def _add_spacer(doc) -> None:
"""Add an empty paragraph as a visual spacer."""
para = doc.add_paragraph(style="Normal")
_mark_paragraph_rtl(para)
# ── Main export ───────────────────────────────────────────────────
# Order in which blocks are emitted for each export mode.
# 'final' = standard 12-block decision in canonical order (block_index).
# 'interim' = pre-ruling draft requested by the chair before ratio decidendi
# is set: רקע → תכניות+היתרים → טענות → הליכים, omitting opening (ה),
# ruling (י), summary (יא), and signatures (יב).
_INTERIM_BLOCK_ORDER = [
"block-alef", # institutional header (skipped if empty — first page optional)
"block-bet", # panel (skipped if empty)
"block-gimel", # parties (skipped if empty)
"block-dalet", # "החלטה" title (skipped if empty)
"block-he", # פתיחה ניטרלית (skipped if empty — opt-in for pre-ruling drafts)
"block-vav", # רקע עובדתי
"block-tet", # תכניות + היתרים (extended)
"block-zayin", # טענות הצדדים
"block-chet", # הליכים (incl. post-hearing)
]
def _draft_filename_prefix(mode: str) -> str:
return "טיוטת-ביניים" if mode == "interim" else "טיוטה"
async def export_decision(
case_id: UUID,
output_path: str | None = None,
mode: str = "final",
) -> str:
"""ייצוא החלטה ל-DOCX.
Args:
case_id: מזהה התיק
output_path: נתיב לשמירה (אופציונלי)
mode: 'final' (ברירת מחדל) או 'interim' (טיוטת ביניים — ללא
דיון/סיכום/חתימות, סדר חדש: רקע → תכניות+היתרים → טענות → הליכים)
Returns:
נתיב הקובץ שנוצר
"""
if mode not in ("final", "interim"):
raise ValueError(f"Unknown export mode: {mode}")
case = await db.get_case(case_id)
if not case:
raise ValueError(f"Case {case_id} not found")
decision = await db.get_decision_by_case(case_id)
if not decision:
raise ValueError(f"No decision for case {case_id}")
# Get blocks
pool = await db.get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch(
"""SELECT block_id, block_index, title, content, word_count
FROM decision_blocks
WHERE decision_id = $1
ORDER BY block_index""",
UUID(decision["id"]),
)
if not rows:
raise ValueError("No blocks in decision")
by_id = {r["block_id"]: r for r in rows}
if mode == "interim":
ordered_blocks = [by_id[bid] for bid in _INTERIM_BLOCK_ORDER if bid in by_id]
if not ordered_blocks:
raise ValueError(
"אין בלוקים מתאימים לטיוטת ביניים. הרץ write_interim_draft קודם."
)
else:
ordered_blocks = list(rows)
if not TEMPLATE_PATH.exists():
raise FileNotFoundError(
f"Template not found at {TEMPLATE_PATH}. "
"Run scripts/convert_decision_template.py first."
)
doc = Document(str(TEMPLATE_PATH))
_clear_body(doc)
# Write blocks with bookmarks wrapping each block (anchors for revisions)
bm_counter = [_BOOKMARK_ID_START]
for block in ordered_blocks:
block_id = block["block_id"]
content = block["content"] or ""
if not content.strip():
continue
_wrap_block_with_bookmarks(
doc,
f"block-{block_id}",
lambda b=block, bid=block_id, c=content: _write_block_to_docx(
doc, bid, b["title"], c,
),
bm_counter,
)
# Determine output path — versioned under cases/{case_number}/exports/
if not output_path:
export_dir = config.find_case_dir(case["case_number"]) / "exports"
export_dir.mkdir(parents=True, exist_ok=True)
prefix = _draft_filename_prefix(mode)
existing = sorted(export_dir.glob(f"{prefix}-v*.docx"))
next_ver = 1
for p in existing:
try:
ver = int(p.stem.split("-v")[1])
next_ver = max(next_ver, ver + 1)
except (IndexError, ValueError):
pass
output_path = str(export_dir / f"{prefix}-v{next_ver}.docx")
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
doc.save(output_path)
logger.info("DOCX exported (mode=%s): %s", mode, output_path)
return output_path
def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None:
"""Write a single block to the DOCX document using template styles."""
# Header blocks (א-ד)
if block_id == "block-alef":
for line in content.split("\n"):
if line.strip():
_add_styled_paragraph(doc, line.strip(), style="Heading 1",
alignment=WD_ALIGN_PARAGRAPH.CENTER)
return
if block_id == "block-bet":
_add_spacer(doc)
for line in content.split("\n"):
if line.strip():
_add_centered_paragraph(doc, line.strip(), bold=False)
return
if block_id == "block-gimel":
_add_spacer(doc)
for line in content.split("\n"):
stripped = line.strip()
if not stripped:
continue
if stripped == "נגד":
_add_centered_paragraph(doc, "— נגד —", bold=True)
else:
_add_centered_paragraph(doc, stripped, bold=False)
return
if block_id == "block-dalet":
_add_spacer(doc)
# Avoid style=Title: its rFonts use theme fonts (majorHAnsi / majorBidi)
# and 28pt size — renders Hebrew oversized and in the wrong face.
# Heading 1 carries David and proper RTL, bold + center gives the
# same visual weight.
para = _add_styled_paragraph(doc, "החלטה", style="Heading 1",
alignment=WD_ALIGN_PARAGRAPH.CENTER,
bold=True)
_suppress_paragraph_numbering(para)
_add_spacer(doc)
return
if block_id == "block-yod-bet":
_add_spacer(doc)
for line in content.split("\n"):
if line.strip():
_add_centered_paragraph(doc, line.strip(), bold=False)
return
# Content blocks (ה-יא) — parse paragraphs
for para_text in content.split("\n"):
stripped = _strip_dashes(para_text.strip())
if not stripped:
continue
# Markdown H1/H2/H3 → template heading styles
md_heading = re.match(r"^(#{1,6})\s+(.*)$", stripped)
if md_heading:
level = len(md_heading.group(1))
heading_text = md_heading.group(2).strip()
style = "Heading 1" if level == 1 else f"Heading {min(level, 3)}"
_add_heading(doc, heading_text, style=style)
continue
# Standalone `**...**` line — treat as a sub-heading (Heading 3)
stand_bold = re.match(r"^\*\*([^\n*]+?)\*\*$", stripped)
if stand_bold:
_add_heading(doc, stand_bold.group(1).strip(), style="Heading 3")
continue
if _is_section_heading(stripped):
_add_heading(doc, stripped, style="Heading 2")
continue
if stripped.startswith('"') or stripped.startswith("״") or stripped.startswith(">"):
clean = stripped.lstrip(">").strip().strip('"').strip("״").strip('"')
_add_blockquote(doc, clean)
continue
if "📷" in stripped or (stripped.startswith("[") and "תמונה" in stripped):
_add_image_placeholder(doc, stripped.strip("[]📷 "))
continue
# Numbered body paragraph ("1. text") → List Paragraph with auto-num.
# The literal prefix is dropped; Word renders "1. 2. 3. ..." via numId.
num_match = _NUM_PREFIX_RE.match(stripped)
if num_match:
body_text = num_match.group(2).strip()
_add_styled_paragraph(doc, body_text, style="List Paragraph")
continue
_add_styled_paragraph(doc, stripped, style="Normal")
_SECTION_HEADING_PATTERNS = [
re.compile(p) for p in (
# Block-level titles
r"^פתח\s+דבר",
r"^רקע\s+עובדתי",
r"^תמצית\s+טענות",
r"^טענות\s+הצדדים",
r"^טענות\s+העוררי",
r"^טענות\s+המשיב",
r"^עמדת\s+הוועדה",
r"^עמדת\s+מבקשי",
r"^ההליכים\s+בפני",
r"^הליכים\s+בפני",
r"^דיון\s+והכרעה",
r"^סוף\s+דבר",
r"^סיכום",
# Subsection titles produced by legal-writer inside block-vav/block-tet
r"^המצב\s+התכנוני",
r"^הליכי\s+הרישוי",
r"^שומת\s+ההשבחה",
r"^הליך\s+השומה",
r"^הגשת\s+הערר",
r"^תכניות\s+מתאר",
r"^תכניות\s+מפורטות",
r"^תכניות\s+חלות",
r"^תכניות\s+החלות",
r"^מדיניות\s+מהנדס",
r"^היתרי\s+בני",
r"^היתר\s+בני",
)
]
def _is_section_heading(text: str) -> bool:
"""Detect legal-decision section headings — mapped to Heading 2 style."""
return any(p.search(text) for p in _SECTION_HEADING_PATTERNS)