block-he (פתיחה ניטרלית) was previously emitted only in final decisions. For interim drafts shown to the chair before ruling, including a neutral opening helps the chair confirm framing before approving downstream blocks. Skipped if empty, so legacy cases without block-he are unaffected. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
535 lines
19 KiB
Python
535 lines
19 KiB
Python
"""ייצוא החלטת ועדת ערר ל-DOCX מעוצב.
|
||
|
||
דרישות: גופן David, RTL מלא, כותרות, מספור סעיפים רציף.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
import re
|
||
from datetime import date
|
||
from pathlib import Path
|
||
from uuid import UUID
|
||
|
||
from docx import Document
|
||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
from docx.oxml import OxmlElement
|
||
from docx.oxml.ns import qn
|
||
|
||
from legal_mcp import config
|
||
from legal_mcp.services import db
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# Path to the converted decision template. Carries David font, RTL, margins,
|
||
# and styles (Title / Heading 1-2 / Normal / Quote / List Paragraph).
|
||
# Populated once by `scripts/convert_decision_template.py` from `.dotx`.
|
||
TEMPLATE_PATH = (
|
||
Path(__file__).resolve().parents[4]
|
||
/ "skills" / "docx" / "decision_template.docx"
|
||
)
|
||
|
||
|
||
# ── RTL helpers ───────────────────────────────────────────────────
|
||
# Three layers of RTL are required (per skills/docx/SKILL.md):
|
||
# 1. Section: <w:bidi/> in sectPr (inherited from template)
|
||
# 2. Paragraph: <w:bidi/> directly in pPr — paragraph direction
|
||
# 3. Run: <w:rtl/> in rPr — tells Word to use cs (complex-script) font
|
||
# Without explicit font on run, Hebrew can render in the ascii slot
|
||
# (Times New Roman) — so we also force David on all four font slots.
|
||
|
||
HEBREW_FONT = "David"
|
||
|
||
|
||
def _mark_run_rtl(run) -> None:
|
||
"""Force David font on all four slots, then add <w:rtl/>."""
|
||
rPr = run._r.get_or_add_rPr()
|
||
if rPr.find(qn("w:rFonts")) is None:
|
||
fonts = OxmlElement("w:rFonts")
|
||
fonts.set(qn("w:ascii"), HEBREW_FONT)
|
||
fonts.set(qn("w:hAnsi"), HEBREW_FONT)
|
||
fonts.set(qn("w:cs"), HEBREW_FONT)
|
||
fonts.set(qn("w:eastAsia"), HEBREW_FONT)
|
||
rPr.insert(0, fonts)
|
||
if rPr.find(qn("w:rtl")) is None:
|
||
rPr.append(OxmlElement("w:rtl"))
|
||
|
||
|
||
def _mark_paragraph_rtl(paragraph) -> None:
|
||
"""Add <w:bidi/> directly to pPr (paragraph direction) and <w:rtl/>
|
||
to the paragraph-mark rPr (affects trailing ¶ glyph)."""
|
||
pPr = paragraph._p.get_or_add_pPr()
|
||
# (2) <w:bidi/> directly in pPr — paragraph direction
|
||
if pPr.find(qn("w:bidi")) is None:
|
||
bidi = OxmlElement("w:bidi")
|
||
pstyle = pPr.find(qn("w:pStyle"))
|
||
if pstyle is not None:
|
||
pstyle.addnext(bidi)
|
||
else:
|
||
pPr.insert(0, bidi)
|
||
# paragraph-mark rPr gets <w:rtl/> so ¶ inherits RTL too
|
||
rPr = pPr.find(qn("w:rPr"))
|
||
if rPr is None:
|
||
rPr = OxmlElement("w:rPr")
|
||
pPr.append(rPr)
|
||
if rPr.find(qn("w:rtl")) is None:
|
||
rPr.append(OxmlElement("w:rtl"))
|
||
|
||
|
||
def _set_paragraph_jc(paragraph, value: str) -> None:
|
||
"""Force <w:jc w:val="..."/> on a paragraph, overriding style-inherited jc.
|
||
|
||
Needed because Heading 3 in the template ships with jc=center — we want
|
||
body headings justified right (jc=both) like Normal.
|
||
"""
|
||
pPr = paragraph._p.get_or_add_pPr()
|
||
existing = pPr.find(qn("w:jc"))
|
||
if existing is not None:
|
||
pPr.remove(existing)
|
||
jc = OxmlElement("w:jc")
|
||
jc.set(qn("w:val"), value)
|
||
pPr.append(jc)
|
||
|
||
|
||
def _suppress_paragraph_numbering(paragraph) -> None:
|
||
"""Kill any style-inherited auto-numbering on this paragraph.
|
||
|
||
Heading styles linked to outline lists can auto-inject א./ב./ג. markers
|
||
in some Word versions even when the style we read doesn't show numPr.
|
||
Setting numId=0 explicitly removes the paragraph from any list.
|
||
"""
|
||
pPr = paragraph._p.get_or_add_pPr()
|
||
existing = pPr.find(qn("w:numPr"))
|
||
if existing is not None:
|
||
pPr.remove(existing)
|
||
numPr = OxmlElement("w:numPr")
|
||
ilvl = OxmlElement("w:ilvl")
|
||
ilvl.set(qn("w:val"), "0")
|
||
numId = OxmlElement("w:numId")
|
||
numId.set(qn("w:val"), "0")
|
||
numPr.append(ilvl)
|
||
numPr.append(numId)
|
||
pPr.append(numPr)
|
||
|
||
|
||
def _clear_body(doc) -> None:
|
||
"""Remove all paragraphs in the document body while keeping sectPr.
|
||
|
||
The template ships with sample paragraphs we don't want. Section
|
||
properties (page size, margins, bidi) stay intact.
|
||
"""
|
||
body = doc.element.body
|
||
for p in list(body.findall(qn("w:p"))):
|
||
body.remove(p)
|
||
|
||
|
||
# ── Bookmark helpers ──────────────────────────────────────────────
|
||
|
||
# Keep a per-document bookmark id counter. Bookmarks must have unique ids
|
||
# across the whole document; we start from a high value to avoid collisions
|
||
# with whatever Word's default template already assigned.
|
||
_BOOKMARK_ID_START = 10000
|
||
|
||
|
||
def _insert_bookmark_start(paragraph, name: str, bm_id: int) -> None:
|
||
"""Insert a <w:bookmarkStart> at the beginning of a paragraph."""
|
||
el = OxmlElement("w:bookmarkStart")
|
||
el.set(qn("w:id"), str(bm_id))
|
||
el.set(qn("w:name"), name)
|
||
paragraph._p.insert(0, el)
|
||
|
||
|
||
def _insert_bookmark_end(paragraph, bm_id: int) -> None:
|
||
"""Insert a <w:bookmarkEnd> at the end of a paragraph."""
|
||
el = OxmlElement("w:bookmarkEnd")
|
||
el.set(qn("w:id"), str(bm_id))
|
||
paragraph._p.append(el)
|
||
|
||
|
||
def _wrap_block_with_bookmarks(doc, block_name: str,
|
||
write_block_fn, bm_counter: list[int]) -> None:
|
||
"""Write a block with bookmarkStart before and bookmarkEnd after.
|
||
|
||
Uses a mutable counter (list of one int) so the caller keeps state
|
||
across multiple blocks.
|
||
"""
|
||
# Record paragraph count before writing
|
||
body = doc.element.body
|
||
before_count = len([c for c in body if c.tag == qn("w:p")])
|
||
|
||
write_block_fn()
|
||
|
||
after_count = len([c for c in body if c.tag == qn("w:p")])
|
||
if after_count == before_count:
|
||
# Block produced no paragraphs — nothing to wrap
|
||
return
|
||
|
||
# Use python-docx's paragraph indexing
|
||
first_new = doc.paragraphs[before_count]
|
||
last_new = doc.paragraphs[after_count - 1]
|
||
|
||
bm_counter[0] += 1
|
||
bm_id = bm_counter[0]
|
||
_insert_bookmark_start(first_new, block_name, bm_id)
|
||
_insert_bookmark_end(last_new, bm_id)
|
||
|
||
|
||
# ── Content cleanup ──────────────────────────────────────────────
|
||
|
||
# Em-dash (—, U+2014) and en-dash (–, U+2013) — per chair's no-dash policy,
|
||
# strip from body text. Surrounding spaces collapse.
|
||
_DASH_RE = re.compile(r"\s*[—–]\s*")
|
||
_MULTI_SPACE_RE = re.compile(r" {2,}")
|
||
|
||
|
||
def _strip_dashes(text: str) -> str:
|
||
"""Remove em/en-dashes and collapse surrounding whitespace."""
|
||
text = _DASH_RE.sub(" ", text)
|
||
return _MULTI_SPACE_RE.sub(" ", text).strip()
|
||
|
||
|
||
# Numbered paragraph: "1. content", "23. content" — auto-numbered via
|
||
# List Paragraph style so order reflects emission, not literal prefix.
|
||
_NUM_PREFIX_RE = re.compile(r"^(\d+)\.\s+(.*)$", re.DOTALL)
|
||
|
||
|
||
# Markdown inline bold — `**...**`
|
||
_INLINE_BOLD_RE = re.compile(r"\*\*([^\n*]+?)\*\*")
|
||
|
||
|
||
def _add_runs_with_inline_bold(paragraph, text: str, *, bold_all: bool = False) -> None:
|
||
"""Split text on `**...**` markers, alternating plain and bold runs.
|
||
|
||
Keeps `**טענה חשובה**` rendering as bold instead of leaving literal
|
||
asterisks. When bold_all is True, every run is bold (used for headings
|
||
that still carry inline-bold markup).
|
||
"""
|
||
pos = 0
|
||
for m in _INLINE_BOLD_RE.finditer(text):
|
||
if m.start() > pos:
|
||
plain = paragraph.add_run(text[pos:m.start()])
|
||
if bold_all:
|
||
plain.bold = True
|
||
_mark_run_rtl(plain)
|
||
run_bold = paragraph.add_run(m.group(1))
|
||
run_bold.bold = True
|
||
_mark_run_rtl(run_bold)
|
||
pos = m.end()
|
||
if pos < len(text):
|
||
tail = paragraph.add_run(text[pos:])
|
||
if bold_all:
|
||
tail.bold = True
|
||
_mark_run_rtl(tail)
|
||
|
||
|
||
def _add_styled_paragraph(doc, text: str, style: str = "Normal",
|
||
bold: bool = False,
|
||
alignment=None):
|
||
"""Add a paragraph using a template style.
|
||
|
||
Font, size, RTL direction and spacing all come from the style
|
||
definition in the template — we only pick the style by name.
|
||
Renders `**...**` markdown as inline bold runs.
|
||
|
||
Returns the paragraph so callers can apply further overrides.
|
||
"""
|
||
para = doc.add_paragraph(style=style)
|
||
_mark_paragraph_rtl(para)
|
||
|
||
if alignment is not None:
|
||
para.alignment = alignment
|
||
|
||
if text:
|
||
_add_runs_with_inline_bold(para, text, bold_all=bold)
|
||
|
||
return para
|
||
|
||
|
||
def _add_centered_paragraph(doc, text: str, *, bold: bool = True,
|
||
style: str = "Normal") -> None:
|
||
_add_styled_paragraph(doc, text, style=style, bold=bold,
|
||
alignment=WD_ALIGN_PARAGRAPH.CENTER)
|
||
|
||
|
||
def _add_heading(doc, text: str, *, style: str) -> None:
|
||
"""Heading with overrides: jc=both (overrides style-center / style-left)
|
||
and suppressed auto-numbering (so style-linked outline lists don't inject
|
||
א./ב./ג. — chair manages markers manually in content)."""
|
||
para = doc.add_paragraph(style=style)
|
||
_mark_paragraph_rtl(para)
|
||
_set_paragraph_jc(para, "both")
|
||
_suppress_paragraph_numbering(para)
|
||
if text:
|
||
_add_runs_with_inline_bold(para, text)
|
||
|
||
|
||
def _add_blockquote(doc, text: str) -> None:
|
||
"""Indented quote using the template's Quote style."""
|
||
_add_styled_paragraph(doc, text, style="Quote")
|
||
|
||
|
||
def _add_image_placeholder(doc, description: str) -> None:
|
||
_add_styled_paragraph(doc, f"[{description}]", style="Normal",
|
||
alignment=WD_ALIGN_PARAGRAPH.CENTER)
|
||
|
||
|
||
def _add_spacer(doc) -> None:
|
||
"""Add an empty paragraph as a visual spacer."""
|
||
para = doc.add_paragraph(style="Normal")
|
||
_mark_paragraph_rtl(para)
|
||
|
||
|
||
# ── Main export ───────────────────────────────────────────────────
|
||
|
||
# Order in which blocks are emitted for each export mode.
|
||
# 'final' = standard 12-block decision in canonical order (block_index).
|
||
# 'interim' = pre-ruling draft requested by the chair before ratio decidendi
|
||
# is set: רקע → תכניות+היתרים → טענות → הליכים, omitting opening (ה),
|
||
# ruling (י), summary (יא), and signatures (יב).
|
||
_INTERIM_BLOCK_ORDER = [
|
||
"block-alef", # institutional header (skipped if empty — first page optional)
|
||
"block-bet", # panel (skipped if empty)
|
||
"block-gimel", # parties (skipped if empty)
|
||
"block-dalet", # "החלטה" title (skipped if empty)
|
||
"block-he", # פתיחה ניטרלית (skipped if empty — opt-in for pre-ruling drafts)
|
||
"block-vav", # רקע עובדתי
|
||
"block-tet", # תכניות + היתרים (extended)
|
||
"block-zayin", # טענות הצדדים
|
||
"block-chet", # הליכים (incl. post-hearing)
|
||
]
|
||
|
||
|
||
def _draft_filename_prefix(mode: str) -> str:
|
||
return "טיוטת-ביניים" if mode == "interim" else "טיוטה"
|
||
|
||
|
||
async def export_decision(
|
||
case_id: UUID,
|
||
output_path: str | None = None,
|
||
mode: str = "final",
|
||
) -> str:
|
||
"""ייצוא החלטה ל-DOCX.
|
||
|
||
Args:
|
||
case_id: מזהה התיק
|
||
output_path: נתיב לשמירה (אופציונלי)
|
||
mode: 'final' (ברירת מחדל) או 'interim' (טיוטת ביניים — ללא
|
||
דיון/סיכום/חתימות, סדר חדש: רקע → תכניות+היתרים → טענות → הליכים)
|
||
|
||
Returns:
|
||
נתיב הקובץ שנוצר
|
||
"""
|
||
if mode not in ("final", "interim"):
|
||
raise ValueError(f"Unknown export mode: {mode}")
|
||
|
||
case = await db.get_case(case_id)
|
||
if not case:
|
||
raise ValueError(f"Case {case_id} not found")
|
||
|
||
decision = await db.get_decision_by_case(case_id)
|
||
if not decision:
|
||
raise ValueError(f"No decision for case {case_id}")
|
||
|
||
# Get blocks
|
||
pool = await db.get_pool()
|
||
async with pool.acquire() as conn:
|
||
rows = await conn.fetch(
|
||
"""SELECT block_id, block_index, title, content, word_count
|
||
FROM decision_blocks
|
||
WHERE decision_id = $1
|
||
ORDER BY block_index""",
|
||
UUID(decision["id"]),
|
||
)
|
||
|
||
if not rows:
|
||
raise ValueError("No blocks in decision")
|
||
|
||
by_id = {r["block_id"]: r for r in rows}
|
||
|
||
if mode == "interim":
|
||
ordered_blocks = [by_id[bid] for bid in _INTERIM_BLOCK_ORDER if bid in by_id]
|
||
if not ordered_blocks:
|
||
raise ValueError(
|
||
"אין בלוקים מתאימים לטיוטת ביניים. הרץ write_interim_draft קודם."
|
||
)
|
||
else:
|
||
ordered_blocks = list(rows)
|
||
|
||
if not TEMPLATE_PATH.exists():
|
||
raise FileNotFoundError(
|
||
f"Template not found at {TEMPLATE_PATH}. "
|
||
"Run scripts/convert_decision_template.py first."
|
||
)
|
||
|
||
doc = Document(str(TEMPLATE_PATH))
|
||
_clear_body(doc)
|
||
|
||
# Write blocks with bookmarks wrapping each block (anchors for revisions)
|
||
bm_counter = [_BOOKMARK_ID_START]
|
||
for block in ordered_blocks:
|
||
block_id = block["block_id"]
|
||
content = block["content"] or ""
|
||
if not content.strip():
|
||
continue
|
||
|
||
_wrap_block_with_bookmarks(
|
||
doc,
|
||
f"block-{block_id}",
|
||
lambda b=block, bid=block_id, c=content: _write_block_to_docx(
|
||
doc, bid, b["title"], c,
|
||
),
|
||
bm_counter,
|
||
)
|
||
|
||
# Determine output path — versioned under cases/{case_number}/exports/
|
||
if not output_path:
|
||
export_dir = config.find_case_dir(case["case_number"]) / "exports"
|
||
export_dir.mkdir(parents=True, exist_ok=True)
|
||
prefix = _draft_filename_prefix(mode)
|
||
existing = sorted(export_dir.glob(f"{prefix}-v*.docx"))
|
||
next_ver = 1
|
||
for p in existing:
|
||
try:
|
||
ver = int(p.stem.split("-v")[1])
|
||
next_ver = max(next_ver, ver + 1)
|
||
except (IndexError, ValueError):
|
||
pass
|
||
output_path = str(export_dir / f"{prefix}-v{next_ver}.docx")
|
||
|
||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||
doc.save(output_path)
|
||
logger.info("DOCX exported (mode=%s): %s", mode, output_path)
|
||
return output_path
|
||
|
||
|
||
def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None:
|
||
"""Write a single block to the DOCX document using template styles."""
|
||
# Header blocks (א-ד)
|
||
if block_id == "block-alef":
|
||
for line in content.split("\n"):
|
||
if line.strip():
|
||
_add_styled_paragraph(doc, line.strip(), style="Heading 1",
|
||
alignment=WD_ALIGN_PARAGRAPH.CENTER)
|
||
return
|
||
|
||
if block_id == "block-bet":
|
||
_add_spacer(doc)
|
||
for line in content.split("\n"):
|
||
if line.strip():
|
||
_add_centered_paragraph(doc, line.strip(), bold=False)
|
||
return
|
||
|
||
if block_id == "block-gimel":
|
||
_add_spacer(doc)
|
||
for line in content.split("\n"):
|
||
stripped = line.strip()
|
||
if not stripped:
|
||
continue
|
||
if stripped == "נגד":
|
||
_add_centered_paragraph(doc, "— נגד —", bold=True)
|
||
else:
|
||
_add_centered_paragraph(doc, stripped, bold=False)
|
||
return
|
||
|
||
if block_id == "block-dalet":
|
||
_add_spacer(doc)
|
||
# Avoid style=Title: its rFonts use theme fonts (majorHAnsi / majorBidi)
|
||
# and 28pt size — renders Hebrew oversized and in the wrong face.
|
||
# Heading 1 carries David and proper RTL, bold + center gives the
|
||
# same visual weight.
|
||
para = _add_styled_paragraph(doc, "החלטה", style="Heading 1",
|
||
alignment=WD_ALIGN_PARAGRAPH.CENTER,
|
||
bold=True)
|
||
_suppress_paragraph_numbering(para)
|
||
_add_spacer(doc)
|
||
return
|
||
|
||
if block_id == "block-yod-bet":
|
||
_add_spacer(doc)
|
||
for line in content.split("\n"):
|
||
if line.strip():
|
||
_add_centered_paragraph(doc, line.strip(), bold=False)
|
||
return
|
||
|
||
# Content blocks (ה-יא) — parse paragraphs
|
||
for para_text in content.split("\n"):
|
||
stripped = _strip_dashes(para_text.strip())
|
||
if not stripped:
|
||
continue
|
||
|
||
# Markdown H1/H2/H3 → template heading styles
|
||
md_heading = re.match(r"^(#{1,6})\s+(.*)$", stripped)
|
||
if md_heading:
|
||
level = len(md_heading.group(1))
|
||
heading_text = md_heading.group(2).strip()
|
||
style = "Heading 1" if level == 1 else f"Heading {min(level, 3)}"
|
||
_add_heading(doc, heading_text, style=style)
|
||
continue
|
||
|
||
# Standalone `**...**` line — treat as a sub-heading (Heading 3)
|
||
stand_bold = re.match(r"^\*\*([^\n*]+?)\*\*$", stripped)
|
||
if stand_bold:
|
||
_add_heading(doc, stand_bold.group(1).strip(), style="Heading 3")
|
||
continue
|
||
|
||
if _is_section_heading(stripped):
|
||
_add_heading(doc, stripped, style="Heading 2")
|
||
continue
|
||
|
||
if stripped.startswith('"') or stripped.startswith("״") or stripped.startswith(">"):
|
||
clean = stripped.lstrip(">").strip().strip('"').strip("״").strip('"')
|
||
_add_blockquote(doc, clean)
|
||
continue
|
||
|
||
if "📷" in stripped or (stripped.startswith("[") and "תמונה" in stripped):
|
||
_add_image_placeholder(doc, stripped.strip("[]📷 "))
|
||
continue
|
||
|
||
# Numbered body paragraph ("1. text") → List Paragraph with auto-num.
|
||
# The literal prefix is dropped; Word renders "1. 2. 3. ..." via numId.
|
||
num_match = _NUM_PREFIX_RE.match(stripped)
|
||
if num_match:
|
||
body_text = num_match.group(2).strip()
|
||
_add_styled_paragraph(doc, body_text, style="List Paragraph")
|
||
continue
|
||
|
||
_add_styled_paragraph(doc, stripped, style="Normal")
|
||
|
||
|
||
_SECTION_HEADING_PATTERNS = [
|
||
re.compile(p) for p in (
|
||
# Block-level titles
|
||
r"^פתח\s+דבר",
|
||
r"^רקע\s+עובדתי",
|
||
r"^תמצית\s+טענות",
|
||
r"^טענות\s+הצדדים",
|
||
r"^טענות\s+העוררי",
|
||
r"^טענות\s+המשיב",
|
||
r"^עמדת\s+הוועדה",
|
||
r"^עמדת\s+מבקשי",
|
||
r"^ההליכים\s+בפני",
|
||
r"^הליכים\s+בפני",
|
||
r"^דיון\s+והכרעה",
|
||
r"^סוף\s+דבר",
|
||
r"^סיכום",
|
||
# Subsection titles produced by legal-writer inside block-vav/block-tet
|
||
r"^המצב\s+התכנוני",
|
||
r"^הליכי\s+הרישוי",
|
||
r"^שומת\s+ההשבחה",
|
||
r"^הליך\s+השומה",
|
||
r"^הגשת\s+הערר",
|
||
r"^תכניות\s+מתאר",
|
||
r"^תכניות\s+מפורטות",
|
||
r"^תכניות\s+חלות",
|
||
r"^תכניות\s+החלות",
|
||
r"^מדיניות\s+מהנדס",
|
||
r"^היתרי\s+בני",
|
||
r"^היתר\s+בני",
|
||
)
|
||
]
|
||
|
||
|
||
def _is_section_heading(text: str) -> bool:
|
||
"""Detect legal-decision section headings — mapped to Heading 2 style."""
|
||
return any(p.search(text) for p in _SECTION_HEADING_PATTERNS)
|