"""ייצוא החלטת ועדת ערר ל-DOCX מעוצב. דרישות: גופן David, RTL מלא, כותרות, מספור סעיפים רציף. """ from __future__ import annotations import logging import re from datetime import date from pathlib import Path from uuid import UUID from docx import Document from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml import OxmlElement from docx.oxml.ns import qn from legal_mcp import config from legal_mcp.services import db logger = logging.getLogger(__name__) # Path to the converted decision template. Carries David font, RTL, margins, # and styles (Title / Heading 1-2 / Normal / Quote / List Paragraph). # Populated once by `scripts/convert_decision_template.py` from `.dotx`. TEMPLATE_PATH = ( Path(__file__).resolve().parents[4] / "skills" / "docx" / "decision_template.docx" ) # ── RTL helpers ─────────────────────────────────────────────────── # Three layers of RTL are required (per skills/docx/SKILL.md): # 1. Section: in sectPr (inherited from template) # 2. Paragraph: directly in pPr — paragraph direction # 3. Run: in rPr — tells Word to use cs (complex-script) font # Without explicit font on run, Hebrew can render in the ascii slot # (Times New Roman) — so we also force David on all four font slots. HEBREW_FONT = "David" def _mark_run_rtl(run) -> None: """Force David font on all four slots, then add .""" rPr = run._r.get_or_add_rPr() if rPr.find(qn("w:rFonts")) is None: fonts = OxmlElement("w:rFonts") fonts.set(qn("w:ascii"), HEBREW_FONT) fonts.set(qn("w:hAnsi"), HEBREW_FONT) fonts.set(qn("w:cs"), HEBREW_FONT) fonts.set(qn("w:eastAsia"), HEBREW_FONT) rPr.insert(0, fonts) if rPr.find(qn("w:rtl")) is None: rPr.append(OxmlElement("w:rtl")) def _mark_paragraph_rtl(paragraph) -> None: """Add directly to pPr (paragraph direction) and to the paragraph-mark rPr (affects trailing ¶ glyph).""" pPr = paragraph._p.get_or_add_pPr() # (2) directly in pPr — paragraph direction if pPr.find(qn("w:bidi")) is None: bidi = OxmlElement("w:bidi") pstyle = pPr.find(qn("w:pStyle")) if pstyle is not None: pstyle.addnext(bidi) else: pPr.insert(0, bidi) # paragraph-mark rPr gets so ¶ inherits RTL too rPr = pPr.find(qn("w:rPr")) if rPr is None: rPr = OxmlElement("w:rPr") pPr.append(rPr) if rPr.find(qn("w:rtl")) is None: rPr.append(OxmlElement("w:rtl")) def _set_paragraph_jc(paragraph, value: str) -> None: """Force on a paragraph, overriding style-inherited jc. Needed because Heading 3 in the template ships with jc=center — we want body headings justified right (jc=both) like Normal. """ pPr = paragraph._p.get_or_add_pPr() existing = pPr.find(qn("w:jc")) if existing is not None: pPr.remove(existing) jc = OxmlElement("w:jc") jc.set(qn("w:val"), value) pPr.append(jc) def _suppress_paragraph_numbering(paragraph) -> None: """Kill any style-inherited auto-numbering on this paragraph. Heading styles linked to outline lists can auto-inject א./ב./ג. markers in some Word versions even when the style we read doesn't show numPr. Setting numId=0 explicitly removes the paragraph from any list. """ pPr = paragraph._p.get_or_add_pPr() existing = pPr.find(qn("w:numPr")) if existing is not None: pPr.remove(existing) numPr = OxmlElement("w:numPr") ilvl = OxmlElement("w:ilvl") ilvl.set(qn("w:val"), "0") numId = OxmlElement("w:numId") numId.set(qn("w:val"), "0") numPr.append(ilvl) numPr.append(numId) pPr.append(numPr) def _ensure_decision_numbering(doc) -> int: """T9 — define a single continuous decimal list (RTL) and return its numId. Dafna's decisions are ALWAYS sequentially numbered (1. 2. 3. ...). The template ships no numbering definition, so previously the body paragraphs were stripped of their manual "N." prefix and styled "List Paragraph" — which carries NO numPr, yielding UNNUMBERED output. Here we inject one decimal abstractNum + num into the numbering part once per document; body paragraphs then reference it (real Word auto-numbering → renumbers automatically, copy-pastes cleanly). """ cached = getattr(doc, "_decision_num_id", None) if cached is not None: return cached numbering = doc.part.numbering_part.element # def _next_id(tag: str, attr: str) -> int: ids = [int(el.get(qn(attr))) for el in numbering.findall(qn(tag)) if el.get(qn(attr)) and el.get(qn(attr)).isdigit()] return (max(ids) + 1) if ids else 1 abstract_id = _next_id("w:abstractNum", "w:abstractNumId") num_id = _next_id("w:num", "w:numId") abstract = OxmlElement("w:abstractNum") abstract.set(qn("w:abstractNumId"), str(abstract_id)) mlt = OxmlElement("w:multiLevelType") mlt.set(qn("w:val"), "singleLevel") abstract.append(mlt) lvl = OxmlElement("w:lvl") lvl.set(qn("w:ilvl"), "0") for tag, val in (("w:start", "1"), ("w:numFmt", "decimal"), ("w:lvlText", "%1."), ("w:lvlJc", "right")): el = OxmlElement(tag) el.set(qn("w:val"), val) lvl.append(el) lvl_ppr = OxmlElement("w:pPr") ind = OxmlElement("w:ind") ind.set(qn("w:start"), "720") ind.set(qn("w:hanging"), "360") lvl_ppr.append(ind) lvl.append(lvl_ppr) abstract.append(lvl) num = OxmlElement("w:num") num.set(qn("w:numId"), str(num_id)) anum_ref = OxmlElement("w:abstractNumId") anum_ref.set(qn("w:val"), str(abstract_id)) num.append(anum_ref) # abstractNum elements must precede num elements in . last_abstract = numbering.findall(qn("w:abstractNum")) if last_abstract: last_abstract[-1].addnext(abstract) else: numbering.insert(0, abstract) numbering.append(num) doc._decision_num_id = num_id return num_id def _apply_list_numbering(paragraph, num_id: int) -> None: """Attach paragraph to the continuous decision list (real auto-numbering).""" pPr = paragraph._p.get_or_add_pPr() existing = pPr.find(qn("w:numPr")) if existing is not None: pPr.remove(existing) numPr = OxmlElement("w:numPr") ilvl = OxmlElement("w:ilvl") ilvl.set(qn("w:val"), "0") nid = OxmlElement("w:numId") nid.set(qn("w:val"), str(num_id)) numPr.append(ilvl) numPr.append(nid) pPr.append(numPr) def _clear_body(doc) -> None: """Remove all paragraphs in the document body while keeping sectPr. The template ships with sample paragraphs we don't want. Section properties (page size, margins, bidi) stay intact. """ body = doc.element.body for p in list(body.findall(qn("w:p"))): body.remove(p) # ── Bookmark helpers ────────────────────────────────────────────── # Keep a per-document bookmark id counter. Bookmarks must have unique ids # across the whole document; we start from a high value to avoid collisions # with whatever Word's default template already assigned. _BOOKMARK_ID_START = 10000 def _insert_bookmark_start(paragraph, name: str, bm_id: int) -> None: """Insert a at the beginning of a paragraph.""" el = OxmlElement("w:bookmarkStart") el.set(qn("w:id"), str(bm_id)) el.set(qn("w:name"), name) paragraph._p.insert(0, el) def _insert_bookmark_end(paragraph, bm_id: int) -> None: """Insert a at the end of a paragraph.""" el = OxmlElement("w:bookmarkEnd") el.set(qn("w:id"), str(bm_id)) paragraph._p.append(el) def _wrap_block_with_bookmarks(doc, block_name: str, write_block_fn, bm_counter: list[int]) -> None: """Write a block with bookmarkStart before and bookmarkEnd after. Uses a mutable counter (list of one int) so the caller keeps state across multiple blocks. """ # Record paragraph count before writing body = doc.element.body before_count = len([c for c in body if c.tag == qn("w:p")]) write_block_fn() after_count = len([c for c in body if c.tag == qn("w:p")]) if after_count == before_count: # Block produced no paragraphs — nothing to wrap return # Use python-docx's paragraph indexing first_new = doc.paragraphs[before_count] last_new = doc.paragraphs[after_count - 1] bm_counter[0] += 1 bm_id = bm_counter[0] _insert_bookmark_start(first_new, block_name, bm_id) _insert_bookmark_end(last_new, bm_id) # ── Content cleanup ────────────────────────────────────────────── # Em-dash (—, U+2014) and en-dash (–, U+2013) — per chair's no-dash policy, # strip from body text. Surrounding spaces collapse. _DASH_RE = re.compile(r"\s*[—–]\s*") _MULTI_SPACE_RE = re.compile(r" {2,}") def _strip_dashes(text: str) -> str: """Remove em/en-dashes and collapse surrounding whitespace.""" text = _DASH_RE.sub(" ", text) return _MULTI_SPACE_RE.sub(" ", text).strip() # Numbered paragraph: "1. content", "23. content" — auto-numbered via # List Paragraph style so order reflects emission, not literal prefix. _NUM_PREFIX_RE = re.compile(r"^(\d+)\.\s+(.*)$", re.DOTALL) # Markdown inline bold — `**...**` _INLINE_BOLD_RE = re.compile(r"\*\*([^\n*]+?)\*\*") def _add_runs_with_inline_bold(paragraph, text: str, *, bold_all: bool = False) -> None: """Split text on `**...**` markers, alternating plain and bold runs. Keeps `**טענה חשובה**` rendering as bold instead of leaving literal asterisks. When bold_all is True, every run is bold (used for headings that still carry inline-bold markup). """ pos = 0 for m in _INLINE_BOLD_RE.finditer(text): if m.start() > pos: plain = paragraph.add_run(text[pos:m.start()]) if bold_all: plain.bold = True _mark_run_rtl(plain) run_bold = paragraph.add_run(m.group(1)) run_bold.bold = True _mark_run_rtl(run_bold) pos = m.end() if pos < len(text): tail = paragraph.add_run(text[pos:]) if bold_all: tail.bold = True _mark_run_rtl(tail) def _add_styled_paragraph(doc, text: str, style: str = "Normal", bold: bool = False, alignment=None): """Add a paragraph using a template style. Font, size, RTL direction and spacing all come from the style definition in the template — we only pick the style by name. Renders `**...**` markdown as inline bold runs. Returns the paragraph so callers can apply further overrides. """ para = doc.add_paragraph(style=style) _mark_paragraph_rtl(para) if alignment is not None: para.alignment = alignment if text: _add_runs_with_inline_bold(para, text, bold_all=bold) return para def _add_centered_paragraph(doc, text: str, *, bold: bool = True, style: str = "Normal") -> None: _add_styled_paragraph(doc, text, style=style, bold=bold, alignment=WD_ALIGN_PARAGRAPH.CENTER) def _add_heading(doc, text: str, *, style: str) -> None: """Heading with overrides: jc=both (overrides style-center / style-left) and suppressed auto-numbering (so style-linked outline lists don't inject א./ב./ג. — chair manages markers manually in content).""" para = doc.add_paragraph(style=style) _mark_paragraph_rtl(para) _set_paragraph_jc(para, "both") _suppress_paragraph_numbering(para) if text: _add_runs_with_inline_bold(para, text) def _add_blockquote(doc, text: str) -> None: """Indented quote using the template's Quote style.""" _add_styled_paragraph(doc, text, style="Quote") def _add_image_placeholder(doc, description: str) -> None: _add_styled_paragraph(doc, f"[{description}]", style="Normal", alignment=WD_ALIGN_PARAGRAPH.CENTER) def _add_spacer(doc) -> None: """Add an empty paragraph as a visual spacer.""" para = doc.add_paragraph(style="Normal") _mark_paragraph_rtl(para) # ── Main export ─────────────────────────────────────────────────── # Order in which blocks are emitted for each export mode. # 'final' = standard 12-block decision in canonical order (block_index). # 'interim' = pre-ruling draft requested by the chair before ratio decidendi # is set: רקע → תכניות+היתרים → טענות → הליכים, omitting opening (ה), # ruling (י), summary (יא), and signatures (יב). _INTERIM_BLOCK_ORDER = [ "block-alef", # institutional header (skipped if empty — first page optional) "block-bet", # panel (skipped if empty) "block-gimel", # parties (skipped if empty) "block-dalet", # "החלטה" title (skipped if empty) "block-he", # פתיחה ניטרלית (skipped if empty — opt-in for pre-ruling drafts) "block-vav", # רקע עובדתי "block-tet", # תכניות + היתרים (extended) "block-zayin", # טענות הצדדים "block-chet", # הליכים (incl. post-hearing) ] def _draft_filename_prefix(mode: str) -> str: return "טיוטת-ביניים" if mode == "interim" else "טיוטה" async def export_decision( case_id: UUID, output_path: str | None = None, mode: str = "final", ) -> str: """ייצוא החלטה ל-DOCX. Args: case_id: מזהה התיק output_path: נתיב לשמירה (אופציונלי) mode: 'final' (ברירת מחדל) או 'interim' (טיוטת ביניים — ללא דיון/סיכום/חתימות, סדר חדש: רקע → תכניות+היתרים → טענות → הליכים) Returns: נתיב הקובץ שנוצר """ if mode not in ("final", "interim"): raise ValueError(f"Unknown export mode: {mode}") case = await db.get_case(case_id) if not case: raise ValueError(f"Case {case_id} not found") decision = await db.get_decision_by_case(case_id) if not decision: raise ValueError(f"No decision for case {case_id}") # Get blocks pool = await db.get_pool() async with pool.acquire() as conn: rows = await conn.fetch( """SELECT block_id, block_index, title, content, word_count FROM decision_blocks WHERE decision_id = $1 ORDER BY block_index""", UUID(decision["id"]), ) if not rows: raise ValueError("No blocks in decision") by_id = {r["block_id"]: r for r in rows} if mode == "interim": ordered_blocks = [by_id[bid] for bid in _INTERIM_BLOCK_ORDER if bid in by_id] if not ordered_blocks: raise ValueError( "אין בלוקים מתאימים לטיוטת ביניים. הרץ write_interim_draft קודם." ) else: ordered_blocks = list(rows) if not TEMPLATE_PATH.exists(): raise FileNotFoundError( f"Template not found at {TEMPLATE_PATH}. " "Run scripts/convert_decision_template.py first." ) doc = Document(str(TEMPLATE_PATH)) _clear_body(doc) # Write blocks with bookmarks wrapping each block (anchors for revisions) bm_counter = [_BOOKMARK_ID_START] for block in ordered_blocks: block_id = block["block_id"] content = block["content"] or "" if not content.strip(): continue _wrap_block_with_bookmarks( doc, f"block-{block_id}", lambda b=block, bid=block_id, c=content: _write_block_to_docx( doc, bid, b["title"], c, ), bm_counter, ) # Determine output path — versioned under cases/{case_number}/exports/ if not output_path: export_dir = config.find_case_dir(case["case_number"]) / "exports" export_dir.mkdir(parents=True, exist_ok=True) prefix = _draft_filename_prefix(mode) existing = sorted(export_dir.glob(f"{prefix}-v*.docx")) next_ver = 1 for p in existing: try: ver = int(p.stem.split("-v")[1]) next_ver = max(next_ver, ver + 1) except (IndexError, ValueError): pass output_path = str(export_dir / f"{prefix}-v{next_ver}.docx") Path(output_path).parent.mkdir(parents=True, exist_ok=True) doc.save(output_path) logger.info("DOCX exported (mode=%s): %s", mode, output_path) return output_path def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None: """Write a single block to the DOCX document using template styles.""" # Header blocks (א-ד) if block_id == "block-alef": for line in content.split("\n"): if line.strip(): _add_styled_paragraph(doc, line.strip(), style="Heading 1", alignment=WD_ALIGN_PARAGRAPH.CENTER) return if block_id == "block-bet": _add_spacer(doc) for line in content.split("\n"): if line.strip(): _add_centered_paragraph(doc, line.strip(), bold=False) return if block_id == "block-gimel": _add_spacer(doc) for line in content.split("\n"): stripped = line.strip() if not stripped: continue if stripped == "נגד": _add_centered_paragraph(doc, "— נגד —", bold=True) else: _add_centered_paragraph(doc, stripped, bold=False) return if block_id == "block-dalet": _add_spacer(doc) # Avoid style=Title: its rFonts use theme fonts (majorHAnsi / majorBidi) # and 28pt size — renders Hebrew oversized and in the wrong face. # Heading 1 carries David and proper RTL, bold + center gives the # same visual weight. para = _add_styled_paragraph(doc, "החלטה", style="Heading 1", alignment=WD_ALIGN_PARAGRAPH.CENTER, bold=True) _suppress_paragraph_numbering(para) _add_spacer(doc) return if block_id == "block-yod-bet": _add_spacer(doc) for line in content.split("\n"): if line.strip(): _add_centered_paragraph(doc, line.strip(), bold=False) return # Content blocks (ה-יא) — parse paragraphs for para_text in content.split("\n"): stripped = _strip_dashes(para_text.strip()) if not stripped: continue # Markdown H1/H2/H3 → template heading styles md_heading = re.match(r"^(#{1,6})\s+(.*)$", stripped) if md_heading: level = len(md_heading.group(1)) heading_text = md_heading.group(2).strip() style = "Heading 1" if level == 1 else f"Heading {min(level, 3)}" _add_heading(doc, heading_text, style=style) continue # Standalone `**...**` line — treat as a sub-heading (Heading 3) stand_bold = re.match(r"^\*\*([^\n*]+?)\*\*$", stripped) if stand_bold: _add_heading(doc, stand_bold.group(1).strip(), style="Heading 3") continue if _is_section_heading(stripped): _add_heading(doc, stripped, style="Heading 2") continue if stripped.startswith('"') or stripped.startswith("״") or stripped.startswith(">"): clean = stripped.lstrip(">").strip().strip('"').strip("״").strip('"') _add_blockquote(doc, clean) continue if "📷" in stripped or (stripped.startswith("[") and "תמונה" in stripped): _add_image_placeholder(doc, stripped.strip("[]📷 ")) continue # Numbered body paragraph ("1. text") → real Word auto-numbering (T9). # The literal prefix is dropped and a numPr referencing the document's # continuous decimal list is attached, so Word renders "1. 2. 3. ..." # itself (renumbers on edit, copy-pastes without stray digits). num_match = _NUM_PREFIX_RE.match(stripped) if num_match: body_text = num_match.group(2).strip() para = _add_styled_paragraph(doc, body_text, style="List Paragraph") _apply_list_numbering(para, _ensure_decision_numbering(doc)) continue _add_styled_paragraph(doc, stripped, style="Normal") _SECTION_HEADING_PATTERNS = [ re.compile(p) for p in ( # Block-level titles r"^פתח\s+דבר", r"^רקע\s+עובדתי", r"^תמצית\s+טענות", r"^טענות\s+הצדדים", r"^טענות\s+העוררי", r"^טענות\s+המשיב", r"^עמדת\s+הוועדה", r"^עמדת\s+מבקשי", r"^ההליכים\s+בפני", r"^הליכים\s+בפני", r"^דיון\s+והכרעה", r"^סוף\s+דבר", r"^סיכום", # Subsection titles produced by legal-writer inside block-vav/block-tet r"^המצב\s+התכנוני", r"^הליכי\s+הרישוי", r"^שומת\s+ההשבחה", r"^הליך\s+השומה", r"^הגשת\s+הערר", r"^תכניות\s+מתאר", r"^תכניות\s+מפורטות", r"^תכניות\s+חלות", r"^תכניות\s+החלות", r"^מדיניות\s+מהנדס", r"^היתרי\s+בני", r"^היתר\s+בני", ) ] def _is_section_heading(text: str) -> bool: """Detect legal-decision section headings — mapped to Heading 2 style.""" return any(p.search(text) for p in _SECTION_HEADING_PATTERNS)