"""Export the legal analysis (analysis-and-research.md + precedents) to a DOCX file that uses דפנה's decision template styles. The template lives at `skills/docx/decision_template.docx` (converted once from `טיוטת החלטה.dotx` via `scripts/convert_decision_template.py`). We open it, wipe the sample body paragraphs, and write new content by applying style names only — never by hand-setting font/size/RTL/margins, because the template's styles.xml already carries those. Style mapping: "Title" → the document title (case number, date) "Heading 2" → top-level section headers (טענות סף / סוגיות להכרעה / מסקנות) "Normal" + bold → subsection headers (individual claim/issue) "Normal" → field label (bold run) + value "Quote" → precedent quote text "Normal" (italic) → precedent citation Output: data/cases/{case_number}/exports/ניתוח-משפטי-v{N}.docx """ from __future__ import annotations import re from pathlib import Path from typing import Any from uuid import UUID from docx import Document from docx.document import Document as DocumentT from docx.oxml.ns import qn from docx.oxml import OxmlElement from docx.text.paragraph import Paragraph from docx.text.run import Run from legal_mcp import config from legal_mcp.services import db, research_md def _mark_run_rtl(run: Run) -> None: """Mark a run as complex-script (Hebrew/Arabic) so Word uses the `cs` font slot from the style (David) rather than `ascii` (Times New Roman). Without this, runs we add programmatically render Hebrew in the ascii font — even though the paragraph style has ``. """ rPr = run._r.get_or_add_rPr() if rPr.find(qn("w:rtl")) is None: rPr.append(OxmlElement("w:rtl")) def _mark_paragraph_rtl(paragraph: Paragraph) -> None: """Add `` inside the paragraph's rPr so the paragraph mark itself is treated as RTL. The paragraph style already sets bidi direction, but empty paragraphs and trailing marks need this flag. """ pPr = paragraph._p.get_or_add_pPr() rPr = pPr.find(qn("w:rPr")) if rPr is None: rPr = OxmlElement("w:rPr") pPr.append(rPr) if rPr.find(qn("w:rtl")) is None: rPr.append(OxmlElement("w:rtl")) # Path to the converted template. Static — populated by # scripts/convert_decision_template.py. TEMPLATE_PATH = ( Path(__file__).resolve().parents[4] / "skills" / "docx" / "decision_template.docx" ) CHAIR_POSITION_LABEL = "עמדת ועדת הערר" CHAIR_POSITION_PLACEHOLDER = "[טרם מולאה עמדת ועדת הערר]" NUMBERED_LINE_RE = re.compile(r"^\s*(\d+)[.)]\s+(.+)$") BULLET_LINE_RE = re.compile(r"^\s*[\-\u2022\*\u25CF\u25E6]\s+(.+)$") # (א) (ב) (ג) ... — Hebrew-letter enumeration used by the authors. # We keep the marker inside the text (the author wrote it), but render the # paragraph as "List Paragraph" without the numPr so the visual indentation # matches the template's list style without adding a double "1." prefix. HEB_LETTER_LINE_RE = re.compile(r"^\s*\([א-ת]\)\s+") # A standalone **LABEL:** line (the whole trimmed line is wrapped in ** **) STANDALONE_LABEL_RE = re.compile(r"^\s*\*\*([^\n*]+?):\*\*\s*$") # A short standalone "XYZ:" line (no ** **) — acts as a sub-heading for the # paragraphs that follow. Limit to short phrases to avoid eating real # sentences that happen to end with a colon. PLAIN_LABEL_RE = re.compile(r"^\s*([^\n:]{2,40}):\s*$") # "**LABEL:** value" inline — bold label followed by prose on the same line. INLINE_LABEL_RE = re.compile(r"^\s*\*\*([^\n*]+?):\*\*\s+(.+)$") def _classify_line(line: str) -> tuple[str, str]: """Return (kind, clean_text) where kind ∈ {numbered, bullet, heb_letter, label_heading, inline_label, plain}. clean_text conventions: - numbered/bullet — marker stripped - heb_letter — marker kept (author supplied it) - label_heading — surrounding ** and trailing : stripped - inline_label — "LABEL\x00VALUE" (NUL-separated; _emit splits it) """ m = STANDALONE_LABEL_RE.match(line) if m: return "label_heading", m.group(1).strip() m = INLINE_LABEL_RE.match(line) if m: return "inline_label", f"{m.group(1).strip()}\x00{m.group(2).strip()}" m = NUMBERED_LINE_RE.match(line) if m: return "numbered", m.group(2).strip() m = BULLET_LINE_RE.match(line) if m: inner = m.group(1).strip() # A bullet whose only content is **LABEL:** is a heading, not a list item. # E.g. "- **נקודות פתוחות:**" m2 = STANDALONE_LABEL_RE.match(inner) if m2: return "label_heading", m2.group(1).strip() # A bullet of the form "- **LABEL:** value" → inline label. m3 = INLINE_LABEL_RE.match(inner) if m3: return "inline_label", f"{m3.group(1).strip()}\x00{m3.group(2).strip()}" return "bullet", inner if HEB_LETTER_LINE_RE.match(line): return "heb_letter", line.strip() m = PLAIN_LABEL_RE.match(line) if m: return "label_heading", m.group(1).strip() return "plain", line.strip() def _strip_numpr(paragraph: Paragraph) -> None: """Remove any from the paragraph's pPr. Used when we want the visual styling of `List Paragraph` (indent, font) without Word's auto-decimal "1." prefix — e.g. for Hebrew- letter enumeration where the author wrote (א) (ב) (ג) manually. """ pPr = paragraph._p.get_or_add_pPr() for numPr in pPr.findall(qn("w:numPr")): pPr.remove(numPr) # Characters that the code should never emit (user instruction: "no dashes"). # Applied only to code-generated text, not to user content from the md file. _CODE_DASH_RE = re.compile(r"[\u2013\u2014]") # Markdown inline bold — `**...**` _INLINE_BOLD_RE = re.compile(r"\*\*([^\n*]+?)\*\*") def _no_dash(text: str) -> str: """Strip em/en dashes from text the code emits (not from source content).""" return _CODE_DASH_RE.sub("", text) def _add_runs_with_inline_bold(paragraph: Paragraph, text: str) -> None: """Split `text` on `**...**` markers, adding alternating plain and bold runs to `paragraph`. All runs are marked RTL and passed through `_no_dash`. This keeps `**טענה חשובה**` rendering as bold (as the author intended) instead of leaving the literal asterisks in the output. """ text = _no_dash(text) pos = 0 for m in _INLINE_BOLD_RE.finditer(text): if m.start() > pos: plain = paragraph.add_run(text[pos : m.start()]) _mark_run_rtl(plain) bold = paragraph.add_run(m.group(1)) bold.bold = True _mark_run_rtl(bold) pos = m.end() if pos < len(text): tail = paragraph.add_run(text[pos:]) _mark_run_rtl(tail) def _clear_body(doc: DocumentT) -> None: """Remove every paragraph currently in the document body. The template ships with example paragraphs ("רקע", "דיון והכרעה"…) that we don't want in the output. Section properties (sectPr) are kept so page size / margins / RTL / footer remain intact. """ body = doc.element.body for p in list(body.findall(qn("w:p"))): body.remove(p) # Leave sectPr alone — it carries page setup including bidi. def _add_paragraph(doc: DocumentT, text: str, style: str) -> Paragraph: p = doc.add_paragraph(style=style) _mark_paragraph_rtl(p) if text: _add_runs_with_inline_bold(p, text) return p def _add_label_value( doc: DocumentT, label: str, value: str, *, value_italic: bool = False ) -> Paragraph: """Add a paragraph with a bold label and an inline value. Example rendering: **עמדת המבקשת:** The party argues that… """ p = doc.add_paragraph(style="Normal") _mark_paragraph_rtl(p) run_label = p.add_run(f"{_no_dash(label)}: ") run_label.bold = True _mark_run_rtl(run_label) if value: if value_italic: # Placeholder text — italic, no inline-bold handling. run_value = p.add_run(_no_dash(value)) run_value.italic = True _mark_run_rtl(run_value) else: _add_runs_with_inline_bold(p, value) return p def _add_multiline_value( doc: DocumentT, label: str, value: str ) -> None: """Render a field (label + value). Multi-line values get the label as its own Heading 2 paragraph (so the structure visually breaks between fields), then each body line as its own paragraph routed through `_emit_content_line`. Single-line values stay inline (bold label + text) — a Heading 2 for a one-liner would look inflated. """ lines = [ln for ln in value.splitlines() if ln.strip()] if not lines: _add_label_value(doc, label, "") return if len(lines) == 1: kind, text = _classify_line(lines[0]) # Single-line — inline with label regardless of kind _add_label_value(doc, label, text) return # Multi-line: label as Heading 2, then each line via _emit_content_line _add_paragraph(doc, label, "Heading 2") for line in lines: _emit_content_line(doc, line) def _emit_content_line(doc: DocumentT, line: str) -> None: """Render a single line of content using the right template style. - `label_heading` (e.g. "**נקודות פתוחות:**" alone) → Heading 2 - `numbered` ("1. ...") → List Paragraph (auto-decimal) - `heb_letter` ("(א) ...") → List Paragraph with numPr stripped (author supplied the marker) - `bullet` ("- ...") → Normal (marker stripped) - `plain` → Normal """ kind, text = _classify_line(line) if kind == "label_heading": _add_paragraph(doc, text, "Heading 2") return if kind == "inline_label": label, value = text.split("\x00", 1) _add_label_value(doc, label, value) return if kind == "numbered": para = doc.add_paragraph(style="List Paragraph") elif kind == "heb_letter": para = doc.add_paragraph(style="List Paragraph") _strip_numpr(para) else: para = doc.add_paragraph(style="Normal") _mark_paragraph_rtl(para) _add_runs_with_inline_bold(para, text) def _format_subsection_title(item: dict[str, Any], kind_label: str) -> str: """Return '{kind_label} {number}: {title}' e.g. 'טענת סף 1: חוסר סמכות'.""" number = item.get("number") or "" title = item.get("title", "").strip() if number and title: return f"{kind_label} {number}: {title}" if title: return title return f"{kind_label} {number}".strip() def _write_subsection( doc: DocumentT, item: dict[str, Any], precedents_for_item: list[dict[str, Any]], kind_label: str, ) -> None: # Subsection header — bolded Normal paragraph, not a Heading, # so it visually sits under the section's Heading 2. header_text = _format_subsection_title(item, kind_label) p = doc.add_paragraph(style="Normal") _mark_paragraph_rtl(p) run = p.add_run(_no_dash(header_text)) run.bold = True _mark_run_rtl(run) # Regular fields (party positions, legal questions, etc.) for field in item.get("fields", []): label = field.get("label", "").strip() content = field.get("content", "").strip() if not label: continue _add_multiline_value(doc, label, content) # Chair position — special handling: always render, use placeholder if empty. chair_position = (item.get("chair_position") or "").strip() if chair_position: _add_multiline_value(doc, CHAIR_POSITION_LABEL, chair_position) else: _add_label_value( doc, CHAIR_POSITION_LABEL, CHAIR_POSITION_PLACEHOLDER, value_italic=True, ) # Precedents attached to this subsection if precedents_for_item: p = doc.add_paragraph(style="Normal") _mark_paragraph_rtl(p) run = p.add_run("פסיקה רלוונטית:") run.bold = True _mark_run_rtl(run) for prec in precedents_for_item: quote = (prec.get("quote") or "").strip() citation = (prec.get("citation") or "").strip() if quote: _add_paragraph(doc, quote, "Quote") if citation: cite_p = doc.add_paragraph(style="Normal") _mark_paragraph_rtl(cite_p) cite_run = cite_p.add_run(_no_dash(citation)) cite_run.italic = True _mark_run_rtl(cite_run) def _add_background_section( doc: DocumentT, title: str, body: str | None ) -> None: """Render a background H2 section (e.g. "רקע דיוני") from a prose body. Lines are routed through `_emit_content_line` so bullets, `**labels:**`, and (א) enumerations all get the template styles. """ if not body or not body.strip(): return _add_paragraph(doc, title, "Heading 2") for raw in body.splitlines(): if not raw.strip(): continue _emit_content_line(doc, raw) def _group_precedents( precedents: list[dict[str, Any]], ) -> tuple[list[dict], dict[str, list[dict]]]: """Split the flat precedent list into case-level and per-section maps. Returns (case_level_precedents, {section_id: [precedents]}). """ case_level: list[dict] = [] by_section: dict[str, list[dict]] = {} for p in precedents: sid = p.get("section_id") if sid is None: case_level.append(p) else: by_section.setdefault(sid, []).append(p) return case_level, by_section def _next_version(export_dir: Path) -> int: """Return the next version number for ניתוח-משפטי-v{N}.docx.""" existing = sorted(export_dir.glob("ניתוח-משפטי-v*.docx")) next_ver = 1 for p in existing: try: ver = int(p.stem.split("-v")[1]) except (IndexError, ValueError): continue next_ver = max(next_ver, ver + 1) return next_ver async def build_analysis_docx(case_number: str) -> Path: """Build a DOCX of the legal analysis for a case using the template styles, and save a versioned copy under the case's exports folder. Raises FileNotFoundError if no analysis file or template exists. """ if not TEMPLATE_PATH.exists(): raise FileNotFoundError( f"Template not found at {TEMPLATE_PATH}. " "Run: python scripts/convert_decision_template.py" ) case_dir = config.find_case_dir(case_number) analysis_path = case_dir / "documents" / "research" / "analysis-and-research.md" if not analysis_path.exists(): raise FileNotFoundError( f"Analysis file not found for case {case_number}" ) parsed = research_md.parse(analysis_path) # Resolve case_id so we can fetch precedents. Missing case → proceed # without precedents rather than failing the export. case_level_precedents: list[dict] = [] precedents_by_section: dict[str, list[dict]] = {} case = await db.get_case_by_number(case_number) if case: precedents = await db.list_case_precedents(UUID(case["id"])) case_level_precedents, precedents_by_section = _group_precedents(precedents) doc = Document(str(TEMPLATE_PATH)) _clear_body(doc) # Document title header = parsed.get("header", {}) date = header.get("date", "").strip() title_text = f"ניתוח משפטי וכתיבת עמדה בערר {case_number}" _add_paragraph(doc, title_text, "Heading 1") if date: p_date = doc.add_paragraph(style="Normal") _mark_paragraph_rtl(p_date) run_date = p_date.add_run(f"תאריך: {date}") _mark_run_rtl(run_date) # Background sections — printed first so the reader gets context # before any claims/precedents. These come only in the exported DOCX, # not in the web UI (the UI renders them elsewhere). _add_background_section(doc, "רקע לניתוח", parsed.get("represented_party")) _add_background_section(doc, "רקע דיוני", parsed.get("procedural_background")) _add_background_section(doc, "עובדות מוסכמות", parsed.get("agreed_facts")) _add_background_section( doc, "עובדות שנויות במחלוקת", parsed.get("disputed_facts") ) # Case-level precedents appear at the top (they cut across claims/issues) if case_level_precedents: _add_paragraph(doc, "פסיקה כללית", "Heading 2") for prec in case_level_precedents: quote = (prec.get("quote") or "").strip() citation = (prec.get("citation") or "").strip() if quote: _add_paragraph(doc, quote, "Quote") if citation: cp = doc.add_paragraph(style="Normal") _mark_paragraph_rtl(cp) cr = cp.add_run(_no_dash(citation)) cr.italic = True _mark_run_rtl(cr) # Threshold claims threshold_claims = parsed.get("threshold_claims", []) if threshold_claims: _add_paragraph(doc, "טענות סף", "Heading 2") for tc in threshold_claims: _write_subsection( doc, tc, precedents_by_section.get(tc["id"], []), "טענת סף" ) # Issues issues = parsed.get("issues", []) if issues: _add_paragraph(doc, "סוגיות להכרעה", "Heading 2") for iss in issues: _write_subsection( doc, iss, precedents_by_section.get(iss["id"], []), "סוגיה" ) # Conclusions conclusions = (parsed.get("conclusions") or "").strip() if conclusions: _add_paragraph(doc, "מסקנות", "Heading 2") for raw in conclusions.splitlines(): if not raw.strip(): continue _emit_content_line(doc, raw) # Save versioned export_dir = case_dir / "exports" export_dir.mkdir(parents=True, exist_ok=True) version = _next_version(export_dir) out_path = export_dir / f"ניתוח-משפטי-v{version}.docx" doc.save(str(out_path)) return out_path