"""ייצוא החלטת ועדת ערר ל-DOCX מעוצב. דרישות: גופן David, RTL מלא, כותרות, מספור סעיפים רציף. """ from __future__ import annotations import logging import re from datetime import date from pathlib import Path from uuid import UUID from docx import Document from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml import OxmlElement from docx.oxml.ns import qn from legal_mcp import config from legal_mcp.services import db logger = logging.getLogger(__name__) # Path to the converted decision template. Carries David font, RTL, margins, # and styles (Title / Heading 1-2 / Normal / Quote / List Paragraph). # Populated once by `scripts/convert_decision_template.py` from `.dotx`. TEMPLATE_PATH = ( Path(__file__).resolve().parents[4] / "skills" / "docx" / "decision_template.docx" ) # ── RTL helpers ─────────────────────────────────────────────────── # Three layers of RTL are required (per skills/docx/SKILL.md): # 1. Section: in sectPr (inherited from template) # 2. Paragraph: directly in pPr — paragraph direction # 3. Run: in rPr — tells Word to use cs (complex-script) font # Without explicit font on run, Hebrew can render in the ascii slot # (Times New Roman) — so we also force David on all four font slots. HEBREW_FONT = "David" def _mark_run_rtl(run) -> None: """Force David font on all four slots, then add .""" rPr = run._r.get_or_add_rPr() if rPr.find(qn("w:rFonts")) is None: fonts = OxmlElement("w:rFonts") fonts.set(qn("w:ascii"), HEBREW_FONT) fonts.set(qn("w:hAnsi"), HEBREW_FONT) fonts.set(qn("w:cs"), HEBREW_FONT) fonts.set(qn("w:eastAsia"), HEBREW_FONT) rPr.insert(0, fonts) if rPr.find(qn("w:rtl")) is None: rPr.append(OxmlElement("w:rtl")) def _mark_paragraph_rtl(paragraph) -> None: """Add directly to pPr (paragraph direction) and to the paragraph-mark rPr (affects trailing ¶ glyph).""" pPr = paragraph._p.get_or_add_pPr() # (2) directly in pPr — paragraph direction if pPr.find(qn("w:bidi")) is None: bidi = OxmlElement("w:bidi") pstyle = pPr.find(qn("w:pStyle")) if pstyle is not None: pstyle.addnext(bidi) else: pPr.insert(0, bidi) # paragraph-mark rPr gets so ¶ inherits RTL too rPr = pPr.find(qn("w:rPr")) if rPr is None: rPr = OxmlElement("w:rPr") pPr.append(rPr) if rPr.find(qn("w:rtl")) is None: rPr.append(OxmlElement("w:rtl")) def _set_paragraph_jc(paragraph, value: str) -> None: """Force on a paragraph, overriding style-inherited jc. Needed because Heading 3 in the template ships with jc=center — we want body headings justified right (jc=both) like Normal. """ pPr = paragraph._p.get_or_add_pPr() existing = pPr.find(qn("w:jc")) if existing is not None: pPr.remove(existing) jc = OxmlElement("w:jc") jc.set(qn("w:val"), value) pPr.append(jc) def _suppress_paragraph_numbering(paragraph) -> None: """Kill any style-inherited auto-numbering on this paragraph. Heading styles linked to outline lists can auto-inject א./ב./ג. markers in some Word versions even when the style we read doesn't show numPr. Setting numId=0 explicitly removes the paragraph from any list. """ pPr = paragraph._p.get_or_add_pPr() existing = pPr.find(qn("w:numPr")) if existing is not None: pPr.remove(existing) numPr = OxmlElement("w:numPr") ilvl = OxmlElement("w:ilvl") ilvl.set(qn("w:val"), "0") numId = OxmlElement("w:numId") numId.set(qn("w:val"), "0") numPr.append(ilvl) numPr.append(numId) pPr.append(numPr) def _clear_body(doc) -> None: """Remove all paragraphs in the document body while keeping sectPr. The template ships with sample paragraphs we don't want. Section properties (page size, margins, bidi) stay intact. """ body = doc.element.body for p in list(body.findall(qn("w:p"))): body.remove(p) # ── Bookmark helpers ────────────────────────────────────────────── # Keep a per-document bookmark id counter. Bookmarks must have unique ids # across the whole document; we start from a high value to avoid collisions # with whatever Word's default template already assigned. _BOOKMARK_ID_START = 10000 def _insert_bookmark_start(paragraph, name: str, bm_id: int) -> None: """Insert a at the beginning of a paragraph.""" el = OxmlElement("w:bookmarkStart") el.set(qn("w:id"), str(bm_id)) el.set(qn("w:name"), name) paragraph._p.insert(0, el) def _insert_bookmark_end(paragraph, bm_id: int) -> None: """Insert a at the end of a paragraph.""" el = OxmlElement("w:bookmarkEnd") el.set(qn("w:id"), str(bm_id)) paragraph._p.append(el) def _wrap_block_with_bookmarks(doc, block_name: str, write_block_fn, bm_counter: list[int]) -> None: """Write a block with bookmarkStart before and bookmarkEnd after. Uses a mutable counter (list of one int) so the caller keeps state across multiple blocks. """ # Record paragraph count before writing body = doc.element.body before_count = len([c for c in body if c.tag == qn("w:p")]) write_block_fn() after_count = len([c for c in body if c.tag == qn("w:p")]) if after_count == before_count: # Block produced no paragraphs — nothing to wrap return # Use python-docx's paragraph indexing first_new = doc.paragraphs[before_count] last_new = doc.paragraphs[after_count - 1] bm_counter[0] += 1 bm_id = bm_counter[0] _insert_bookmark_start(first_new, block_name, bm_id) _insert_bookmark_end(last_new, bm_id) # ── Content cleanup ────────────────────────────────────────────── # Em-dash (—, U+2014) and en-dash (–, U+2013) — per chair's no-dash policy, # strip from body text. Surrounding spaces collapse. _DASH_RE = re.compile(r"\s*[—–]\s*") _MULTI_SPACE_RE = re.compile(r" {2,}") def _strip_dashes(text: str) -> str: """Remove em/en-dashes and collapse surrounding whitespace.""" text = _DASH_RE.sub(" ", text) return _MULTI_SPACE_RE.sub(" ", text).strip() # Numbered paragraph: "1. content", "23. content" — auto-numbered via # List Paragraph style so order reflects emission, not literal prefix. _NUM_PREFIX_RE = re.compile(r"^(\d+)\.\s+(.*)$", re.DOTALL) # Markdown inline bold — `**...**` _INLINE_BOLD_RE = re.compile(r"\*\*([^\n*]+?)\*\*") def _add_runs_with_inline_bold(paragraph, text: str, *, bold_all: bool = False) -> None: """Split text on `**...**` markers, alternating plain and bold runs. Keeps `**טענה חשובה**` rendering as bold instead of leaving literal asterisks. When bold_all is True, every run is bold (used for headings that still carry inline-bold markup). """ pos = 0 for m in _INLINE_BOLD_RE.finditer(text): if m.start() > pos: plain = paragraph.add_run(text[pos:m.start()]) if bold_all: plain.bold = True _mark_run_rtl(plain) run_bold = paragraph.add_run(m.group(1)) run_bold.bold = True _mark_run_rtl(run_bold) pos = m.end() if pos < len(text): tail = paragraph.add_run(text[pos:]) if bold_all: tail.bold = True _mark_run_rtl(tail) def _add_styled_paragraph(doc, text: str, style: str = "Normal", bold: bool = False, alignment=None): """Add a paragraph using a template style. Font, size, RTL direction and spacing all come from the style definition in the template — we only pick the style by name. Renders `**...**` markdown as inline bold runs. Returns the paragraph so callers can apply further overrides. """ para = doc.add_paragraph(style=style) _mark_paragraph_rtl(para) if alignment is not None: para.alignment = alignment if text: _add_runs_with_inline_bold(para, text, bold_all=bold) return para def _add_centered_paragraph(doc, text: str, *, bold: bool = True, style: str = "Normal") -> None: _add_styled_paragraph(doc, text, style=style, bold=bold, alignment=WD_ALIGN_PARAGRAPH.CENTER) def _add_heading(doc, text: str, *, style: str) -> None: """Heading with overrides: jc=both (overrides style-center / style-left) and suppressed auto-numbering (so style-linked outline lists don't inject א./ב./ג. — chair manages markers manually in content).""" para = doc.add_paragraph(style=style) _mark_paragraph_rtl(para) _set_paragraph_jc(para, "both") _suppress_paragraph_numbering(para) if text: _add_runs_with_inline_bold(para, text) def _add_blockquote(doc, text: str) -> None: """Indented quote using the template's Quote style.""" _add_styled_paragraph(doc, text, style="Quote") def _add_image_placeholder(doc, description: str) -> None: _add_styled_paragraph(doc, f"[{description}]", style="Normal", alignment=WD_ALIGN_PARAGRAPH.CENTER) def _add_spacer(doc) -> None: """Add an empty paragraph as a visual spacer.""" para = doc.add_paragraph(style="Normal") _mark_paragraph_rtl(para) # ── Main export ─────────────────────────────────────────────────── # Order in which blocks are emitted for each export mode. # 'final' = standard 12-block decision in canonical order (block_index). # 'interim' = pre-ruling draft requested by the chair before ratio decidendi # is set: רקע → תכניות+היתרים → טענות → הליכים, omitting opening (ה), # ruling (י), summary (יא), and signatures (יב). _INTERIM_BLOCK_ORDER = [ "block-alef", # institutional header (skipped if empty — first page optional) "block-bet", # panel (skipped if empty) "block-gimel", # parties (skipped if empty) "block-dalet", # "החלטה" title (skipped if empty) "block-vav", # רקע עובדתי "block-tet", # תכניות + היתרים (extended) "block-zayin", # טענות הצדדים "block-chet", # הליכים (incl. post-hearing) ] def _draft_filename_prefix(mode: str) -> str: return "טיוטת-ביניים" if mode == "interim" else "טיוטה" async def export_decision( case_id: UUID, output_path: str | None = None, mode: str = "final", ) -> str: """ייצוא החלטה ל-DOCX. Args: case_id: מזהה התיק output_path: נתיב לשמירה (אופציונלי) mode: 'final' (ברירת מחדל) או 'interim' (טיוטת ביניים — ללא דיון/סיכום/חתימות, סדר חדש: רקע → תכניות+היתרים → טענות → הליכים) Returns: נתיב הקובץ שנוצר """ if mode not in ("final", "interim"): raise ValueError(f"Unknown export mode: {mode}") case = await db.get_case(case_id) if not case: raise ValueError(f"Case {case_id} not found") decision = await db.get_decision_by_case(case_id) if not decision: raise ValueError(f"No decision for case {case_id}") # Get blocks pool = await db.get_pool() async with pool.acquire() as conn: rows = await conn.fetch( """SELECT block_id, block_index, title, content, word_count FROM decision_blocks WHERE decision_id = $1 ORDER BY block_index""", UUID(decision["id"]), ) if not rows: raise ValueError("No blocks in decision") by_id = {r["block_id"]: r for r in rows} if mode == "interim": ordered_blocks = [by_id[bid] for bid in _INTERIM_BLOCK_ORDER if bid in by_id] if not ordered_blocks: raise ValueError( "אין בלוקים מתאימים לטיוטת ביניים. הרץ write_interim_draft קודם." ) else: ordered_blocks = list(rows) if not TEMPLATE_PATH.exists(): raise FileNotFoundError( f"Template not found at {TEMPLATE_PATH}. " "Run scripts/convert_decision_template.py first." ) doc = Document(str(TEMPLATE_PATH)) _clear_body(doc) # Write blocks with bookmarks wrapping each block (anchors for revisions) bm_counter = [_BOOKMARK_ID_START] for block in ordered_blocks: block_id = block["block_id"] content = block["content"] or "" if not content.strip(): continue _wrap_block_with_bookmarks( doc, f"block-{block_id}", lambda b=block, bid=block_id, c=content: _write_block_to_docx( doc, bid, b["title"], c, ), bm_counter, ) # Determine output path — versioned under cases/{case_number}/exports/ if not output_path: export_dir = config.find_case_dir(case["case_number"]) / "exports" export_dir.mkdir(parents=True, exist_ok=True) prefix = _draft_filename_prefix(mode) existing = sorted(export_dir.glob(f"{prefix}-v*.docx")) next_ver = 1 for p in existing: try: ver = int(p.stem.split("-v")[1]) next_ver = max(next_ver, ver + 1) except (IndexError, ValueError): pass output_path = str(export_dir / f"{prefix}-v{next_ver}.docx") Path(output_path).parent.mkdir(parents=True, exist_ok=True) doc.save(output_path) logger.info("DOCX exported (mode=%s): %s", mode, output_path) return output_path def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None: """Write a single block to the DOCX document using template styles.""" # Header blocks (א-ד) if block_id == "block-alef": for line in content.split("\n"): if line.strip(): _add_styled_paragraph(doc, line.strip(), style="Heading 1", alignment=WD_ALIGN_PARAGRAPH.CENTER) return if block_id == "block-bet": _add_spacer(doc) for line in content.split("\n"): if line.strip(): _add_centered_paragraph(doc, line.strip(), bold=False) return if block_id == "block-gimel": _add_spacer(doc) for line in content.split("\n"): stripped = line.strip() if not stripped: continue if stripped == "נגד": _add_centered_paragraph(doc, "— נגד —", bold=True) else: _add_centered_paragraph(doc, stripped, bold=False) return if block_id == "block-dalet": _add_spacer(doc) # Avoid style=Title: its rFonts use theme fonts (majorHAnsi / majorBidi) # and 28pt size — renders Hebrew oversized and in the wrong face. # Heading 1 carries David and proper RTL, bold + center gives the # same visual weight. para = _add_styled_paragraph(doc, "החלטה", style="Heading 1", alignment=WD_ALIGN_PARAGRAPH.CENTER, bold=True) _suppress_paragraph_numbering(para) _add_spacer(doc) return if block_id == "block-yod-bet": _add_spacer(doc) for line in content.split("\n"): if line.strip(): _add_centered_paragraph(doc, line.strip(), bold=False) return # Content blocks (ה-יא) — parse paragraphs for para_text in content.split("\n"): stripped = _strip_dashes(para_text.strip()) if not stripped: continue # Markdown H1/H2/H3 → template heading styles md_heading = re.match(r"^(#{1,6})\s+(.*)$", stripped) if md_heading: level = len(md_heading.group(1)) heading_text = md_heading.group(2).strip() style = "Heading 1" if level == 1 else f"Heading {min(level, 3)}" _add_heading(doc, heading_text, style=style) continue # Standalone `**...**` line — treat as a sub-heading (Heading 3) stand_bold = re.match(r"^\*\*([^\n*]+?)\*\*$", stripped) if stand_bold: _add_heading(doc, stand_bold.group(1).strip(), style="Heading 3") continue if _is_section_heading(stripped): _add_heading(doc, stripped, style="Heading 2") continue if stripped.startswith('"') or stripped.startswith("״") or stripped.startswith(">"): clean = stripped.lstrip(">").strip().strip('"').strip("״").strip('"') _add_blockquote(doc, clean) continue if "📷" in stripped or (stripped.startswith("[") and "תמונה" in stripped): _add_image_placeholder(doc, stripped.strip("[]📷 ")) continue # Numbered body paragraph ("1. text") → List Paragraph with auto-num. # The literal prefix is dropped; Word renders "1. 2. 3. ..." via numId. num_match = _NUM_PREFIX_RE.match(stripped) if num_match: body_text = num_match.group(2).strip() _add_styled_paragraph(doc, body_text, style="List Paragraph") continue _add_styled_paragraph(doc, stripped, style="Normal") _SECTION_HEADING_PATTERNS = [ re.compile(p) for p in ( # Block-level titles r"^פתח\s+דבר", r"^רקע\s+עובדתי", r"^תמצית\s+טענות", r"^טענות\s+הצדדים", r"^טענות\s+העוררי", r"^טענות\s+המשיב", r"^עמדת\s+הוועדה", r"^עמדת\s+מבקשי", r"^ההליכים\s+בפני", r"^הליכים\s+בפני", r"^דיון\s+והכרעה", r"^סוף\s+דבר", r"^סיכום", # Subsection titles produced by legal-writer inside block-vav/block-tet r"^המצב\s+התכנוני", r"^הליכי\s+הרישוי", r"^שומת\s+ההשבחה", r"^הליך\s+השומה", r"^הגשת\s+הערר", r"^תכניות\s+מתאר", r"^תכניות\s+מפורטות", r"^תכניות\s+חלות", r"^תכניות\s+החלות", r"^מדיניות\s+מהנדס", r"^היתרי\s+בני", r"^היתר\s+בני", ) ] def _is_section_heading(text: str) -> bool: """Detect legal-decision section headings — mapped to Heading 2 style.""" return any(p.search(text) for p in _SECTION_HEADING_PATTERNS)