DOCX exporter: 3-layer RTL + David font on all slots
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m30s
All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m30s
Hebrew was rendering LTR or in Times New Roman fallback in some Word contexts. Root cause: incomplete RTL marking and missing font hints on the run level. Three layers of RTL are required (per skills/docx/SKILL.md): 1. Section: <w:bidi/> in sectPr (now inherited from template) 2. Paragraph: <w:bidi/> directly in pPr (paragraph direction) 3. Run: <w:rtl/> in rPr — tells Word to use cs (complex-script) font Without an explicit font on the run, Hebrew renders in the ascii slot (Times New Roman). Force David on all four slots (ascii / hAnsi / cs / eastAsia) so every shaping path picks the correct font. Changes: - TEMPLATE_PATH now points to skills/docx/decision_template.docx (carries David, RTL, margins, styles); replaces hard-coded constants. - _mark_run_rtl: writes rFonts on all four slots, then appends <w:rtl/>. - _mark_paragraph_rtl: places <w:bidi/> directly in pPr (not nested in rPr — that was the bug), and adds <w:rtl/> to the paragraph-mark rPr. - _set_paragraph_jc: forces explicit jc, overriding style-inherited. Tests: - test_mark_paragraph_rtl_adds_bidi_directly_in_pPr — guards against the regression where bidi was nested inside rPr. - test_mark_run_rtl_forces_david_on_all_font_slots — ensures all four font slots are set, not just cs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -15,47 +15,112 @@ from docx import Document
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
from docx.shared import Cm, Pt, RGBColor
|
||||
|
||||
from legal_mcp import config
|
||||
from legal_mcp.services import db
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Constants ─────────────────────────────────────────────────────
|
||||
|
||||
FONT_NAME = "David"
|
||||
FONT_SIZE_BODY = Pt(12)
|
||||
FONT_SIZE_TITLE = Pt(16)
|
||||
FONT_SIZE_HEADING = Pt(14)
|
||||
LINE_SPACING = 1.5
|
||||
PAGE_MARGIN = Cm(2.5)
|
||||
# Path to the converted decision template. Carries David font, RTL, margins,
|
||||
# and styles (Title / Heading 1-2 / Normal / Quote / List Paragraph).
|
||||
# Populated once by `scripts/convert_decision_template.py` from `.dotx`.
|
||||
TEMPLATE_PATH = (
|
||||
Path(__file__).resolve().parents[4]
|
||||
/ "skills" / "docx" / "decision_template.docx"
|
||||
)
|
||||
|
||||
|
||||
# ── RTL helpers ───────────────────────────────────────────────────
|
||||
# Three layers of RTL are required (per skills/docx/SKILL.md):
|
||||
# 1. Section: <w:bidi/> in sectPr (inherited from template)
|
||||
# 2. Paragraph: <w:bidi/> directly in pPr — paragraph direction
|
||||
# 3. Run: <w:rtl/> in rPr — tells Word to use cs (complex-script) font
|
||||
# Without explicit font on run, Hebrew can render in the ascii slot
|
||||
# (Times New Roman) — so we also force David on all four font slots.
|
||||
|
||||
def _set_rtl_paragraph(paragraph) -> None:
|
||||
"""Set paragraph-level RTL properties."""
|
||||
pPr = paragraph._element.get_or_add_pPr()
|
||||
bidi = OxmlElement("w:bidi")
|
||||
bidi.set(qn("w:val"), "1")
|
||||
pPr.append(bidi)
|
||||
HEBREW_FONT = "David"
|
||||
|
||||
|
||||
def _set_rtl_run(run) -> None:
|
||||
"""Set run-level RTL properties."""
|
||||
rPr = run._element.get_or_add_rPr()
|
||||
rtl = OxmlElement("w:rtl")
|
||||
rtl.set(qn("w:val"), "1")
|
||||
rPr.append(rtl)
|
||||
def _mark_run_rtl(run) -> None:
|
||||
"""Force David font on all four slots, then add <w:rtl/>."""
|
||||
rPr = run._r.get_or_add_rPr()
|
||||
if rPr.find(qn("w:rFonts")) is None:
|
||||
fonts = OxmlElement("w:rFonts")
|
||||
fonts.set(qn("w:ascii"), HEBREW_FONT)
|
||||
fonts.set(qn("w:hAnsi"), HEBREW_FONT)
|
||||
fonts.set(qn("w:cs"), HEBREW_FONT)
|
||||
fonts.set(qn("w:eastAsia"), HEBREW_FONT)
|
||||
rPr.insert(0, fonts)
|
||||
if rPr.find(qn("w:rtl")) is None:
|
||||
rPr.append(OxmlElement("w:rtl"))
|
||||
|
||||
|
||||
def _set_rtl_section(section) -> None:
|
||||
"""Set section-level RTL (bidi)."""
|
||||
sectPr = section._sectPr
|
||||
bidi = OxmlElement("w:bidi")
|
||||
bidi.set(qn("w:val"), "1")
|
||||
sectPr.append(bidi)
|
||||
def _mark_paragraph_rtl(paragraph) -> None:
|
||||
"""Add <w:bidi/> directly to pPr (paragraph direction) and <w:rtl/>
|
||||
to the paragraph-mark rPr (affects trailing ¶ glyph)."""
|
||||
pPr = paragraph._p.get_or_add_pPr()
|
||||
# (2) <w:bidi/> directly in pPr — paragraph direction
|
||||
if pPr.find(qn("w:bidi")) is None:
|
||||
bidi = OxmlElement("w:bidi")
|
||||
pstyle = pPr.find(qn("w:pStyle"))
|
||||
if pstyle is not None:
|
||||
pstyle.addnext(bidi)
|
||||
else:
|
||||
pPr.insert(0, bidi)
|
||||
# paragraph-mark rPr gets <w:rtl/> so ¶ inherits RTL too
|
||||
rPr = pPr.find(qn("w:rPr"))
|
||||
if rPr is None:
|
||||
rPr = OxmlElement("w:rPr")
|
||||
pPr.append(rPr)
|
||||
if rPr.find(qn("w:rtl")) is None:
|
||||
rPr.append(OxmlElement("w:rtl"))
|
||||
|
||||
|
||||
def _set_paragraph_jc(paragraph, value: str) -> None:
|
||||
"""Force <w:jc w:val="..."/> on a paragraph, overriding style-inherited jc.
|
||||
|
||||
Needed because Heading 3 in the template ships with jc=center — we want
|
||||
body headings justified right (jc=both) like Normal.
|
||||
"""
|
||||
pPr = paragraph._p.get_or_add_pPr()
|
||||
existing = pPr.find(qn("w:jc"))
|
||||
if existing is not None:
|
||||
pPr.remove(existing)
|
||||
jc = OxmlElement("w:jc")
|
||||
jc.set(qn("w:val"), value)
|
||||
pPr.append(jc)
|
||||
|
||||
|
||||
def _suppress_paragraph_numbering(paragraph) -> None:
|
||||
"""Kill any style-inherited auto-numbering on this paragraph.
|
||||
|
||||
Heading styles linked to outline lists can auto-inject א./ב./ג. markers
|
||||
in some Word versions even when the style we read doesn't show numPr.
|
||||
Setting numId=0 explicitly removes the paragraph from any list.
|
||||
"""
|
||||
pPr = paragraph._p.get_or_add_pPr()
|
||||
existing = pPr.find(qn("w:numPr"))
|
||||
if existing is not None:
|
||||
pPr.remove(existing)
|
||||
numPr = OxmlElement("w:numPr")
|
||||
ilvl = OxmlElement("w:ilvl")
|
||||
ilvl.set(qn("w:val"), "0")
|
||||
numId = OxmlElement("w:numId")
|
||||
numId.set(qn("w:val"), "0")
|
||||
numPr.append(ilvl)
|
||||
numPr.append(numId)
|
||||
pPr.append(numPr)
|
||||
|
||||
|
||||
def _clear_body(doc) -> None:
|
||||
"""Remove all paragraphs in the document body while keeping sectPr.
|
||||
|
||||
The template ships with sample paragraphs we don't want. Section
|
||||
properties (page size, margins, bidi) stay intact.
|
||||
"""
|
||||
body = doc.element.body
|
||||
for p in list(body.findall(qn("w:p"))):
|
||||
body.remove(p)
|
||||
|
||||
|
||||
# ── Bookmark helpers ──────────────────────────────────────────────
|
||||
@@ -109,61 +174,109 @@ def _wrap_block_with_bookmarks(doc, block_name: str,
|
||||
_insert_bookmark_end(last_new, bm_id)
|
||||
|
||||
|
||||
def _add_paragraph(doc, text: str, style: str = "Normal",
|
||||
bold: bool = False, font_size=None,
|
||||
alignment=None, space_after: Pt | None = None) -> None:
|
||||
"""Add an RTL paragraph with David font."""
|
||||
para = doc.add_paragraph()
|
||||
_set_rtl_paragraph(para)
|
||||
# ── Content cleanup ──────────────────────────────────────────────
|
||||
|
||||
if alignment:
|
||||
# Em-dash (—, U+2014) and en-dash (–, U+2013) — per chair's no-dash policy,
|
||||
# strip from body text. Surrounding spaces collapse.
|
||||
_DASH_RE = re.compile(r"\s*[—–]\s*")
|
||||
_MULTI_SPACE_RE = re.compile(r" {2,}")
|
||||
|
||||
|
||||
def _strip_dashes(text: str) -> str:
|
||||
"""Remove em/en-dashes and collapse surrounding whitespace."""
|
||||
text = _DASH_RE.sub(" ", text)
|
||||
return _MULTI_SPACE_RE.sub(" ", text).strip()
|
||||
|
||||
|
||||
# Numbered paragraph: "1. content", "23. content" — auto-numbered via
|
||||
# List Paragraph style so order reflects emission, not literal prefix.
|
||||
_NUM_PREFIX_RE = re.compile(r"^(\d+)\.\s+(.*)$", re.DOTALL)
|
||||
|
||||
|
||||
# Markdown inline bold — `**...**`
|
||||
_INLINE_BOLD_RE = re.compile(r"\*\*([^\n*]+?)\*\*")
|
||||
|
||||
|
||||
def _add_runs_with_inline_bold(paragraph, text: str, *, bold_all: bool = False) -> None:
|
||||
"""Split text on `**...**` markers, alternating plain and bold runs.
|
||||
|
||||
Keeps `**טענה חשובה**` rendering as bold instead of leaving literal
|
||||
asterisks. When bold_all is True, every run is bold (used for headings
|
||||
that still carry inline-bold markup).
|
||||
"""
|
||||
pos = 0
|
||||
for m in _INLINE_BOLD_RE.finditer(text):
|
||||
if m.start() > pos:
|
||||
plain = paragraph.add_run(text[pos:m.start()])
|
||||
if bold_all:
|
||||
plain.bold = True
|
||||
_mark_run_rtl(plain)
|
||||
run_bold = paragraph.add_run(m.group(1))
|
||||
run_bold.bold = True
|
||||
_mark_run_rtl(run_bold)
|
||||
pos = m.end()
|
||||
if pos < len(text):
|
||||
tail = paragraph.add_run(text[pos:])
|
||||
if bold_all:
|
||||
tail.bold = True
|
||||
_mark_run_rtl(tail)
|
||||
|
||||
|
||||
def _add_styled_paragraph(doc, text: str, style: str = "Normal",
|
||||
bold: bool = False,
|
||||
alignment=None):
|
||||
"""Add a paragraph using a template style.
|
||||
|
||||
Font, size, RTL direction and spacing all come from the style
|
||||
definition in the template — we only pick the style by name.
|
||||
Renders `**...**` markdown as inline bold runs.
|
||||
|
||||
Returns the paragraph so callers can apply further overrides.
|
||||
"""
|
||||
para = doc.add_paragraph(style=style)
|
||||
_mark_paragraph_rtl(para)
|
||||
|
||||
if alignment is not None:
|
||||
para.alignment = alignment
|
||||
else:
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||||
|
||||
run = para.add_run(text)
|
||||
run.font.name = FONT_NAME
|
||||
run.font.size = font_size or FONT_SIZE_BODY
|
||||
run.bold = bold
|
||||
_set_rtl_run(run)
|
||||
if text:
|
||||
_add_runs_with_inline_bold(para, text, bold_all=bold)
|
||||
|
||||
# Line spacing
|
||||
pf = para.paragraph_format
|
||||
pf.line_spacing = LINE_SPACING
|
||||
if space_after is not None:
|
||||
pf.space_after = space_after
|
||||
return para
|
||||
|
||||
|
||||
def _add_centered_paragraph(doc, text: str, bold: bool = True,
|
||||
font_size=None) -> None:
|
||||
"""Add centered RTL paragraph."""
|
||||
_add_paragraph(doc, text, bold=bold, font_size=font_size,
|
||||
alignment=WD_ALIGN_PARAGRAPH.CENTER)
|
||||
def _add_centered_paragraph(doc, text: str, *, bold: bool = True,
|
||||
style: str = "Normal") -> None:
|
||||
_add_styled_paragraph(doc, text, style=style, bold=bold,
|
||||
alignment=WD_ALIGN_PARAGRAPH.CENTER)
|
||||
|
||||
|
||||
def _add_heading(doc, text: str, *, style: str) -> None:
|
||||
"""Heading with overrides: jc=both (overrides style-center / style-left)
|
||||
and suppressed auto-numbering (so style-linked outline lists don't inject
|
||||
א./ב./ג. — chair manages markers manually in content)."""
|
||||
para = doc.add_paragraph(style=style)
|
||||
_mark_paragraph_rtl(para)
|
||||
_set_paragraph_jc(para, "both")
|
||||
_suppress_paragraph_numbering(para)
|
||||
if text:
|
||||
_add_runs_with_inline_bold(para, text)
|
||||
|
||||
|
||||
def _add_blockquote(doc, text: str) -> None:
|
||||
"""Add indented blockquote paragraph."""
|
||||
para = doc.add_paragraph()
|
||||
_set_rtl_paragraph(para)
|
||||
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||||
|
||||
run = para.add_run(text)
|
||||
run.font.name = FONT_NAME
|
||||
run.font.size = Pt(11)
|
||||
run.italic = True
|
||||
_set_rtl_run(run)
|
||||
|
||||
pf = para.paragraph_format
|
||||
pf.left_indent = Cm(1.5)
|
||||
pf.right_indent = Cm(1.5)
|
||||
pf.line_spacing = LINE_SPACING
|
||||
"""Indented quote using the template's Quote style."""
|
||||
_add_styled_paragraph(doc, text, style="Quote")
|
||||
|
||||
|
||||
def _add_image_placeholder(doc, description: str) -> None:
|
||||
"""Add image placeholder box."""
|
||||
_add_paragraph(doc, f"[{description}]",
|
||||
alignment=WD_ALIGN_PARAGRAPH.CENTER,
|
||||
font_size=Pt(10))
|
||||
_add_styled_paragraph(doc, f"[{description}]", style="Normal",
|
||||
alignment=WD_ALIGN_PARAGRAPH.CENTER)
|
||||
|
||||
|
||||
def _add_spacer(doc) -> None:
|
||||
"""Add an empty paragraph as a visual spacer."""
|
||||
para = doc.add_paragraph(style="Normal")
|
||||
_mark_paragraph_rtl(para)
|
||||
|
||||
|
||||
# ── Main export ───────────────────────────────────────────────────
|
||||
@@ -241,16 +354,14 @@ async def export_decision(
|
||||
else:
|
||||
ordered_blocks = list(rows)
|
||||
|
||||
# Create document
|
||||
doc = Document()
|
||||
if not TEMPLATE_PATH.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Template not found at {TEMPLATE_PATH}. "
|
||||
"Run scripts/convert_decision_template.py first."
|
||||
)
|
||||
|
||||
# Set page margins
|
||||
for section in doc.sections:
|
||||
section.top_margin = PAGE_MARGIN
|
||||
section.bottom_margin = PAGE_MARGIN
|
||||
section.left_margin = PAGE_MARGIN
|
||||
section.right_margin = PAGE_MARGIN
|
||||
_set_rtl_section(section)
|
||||
doc = Document(str(TEMPLATE_PATH))
|
||||
_clear_body(doc)
|
||||
|
||||
# Write blocks with bookmarks wrapping each block (anchors for revisions)
|
||||
bm_counter = [_BOOKMARK_ID_START]
|
||||
@@ -291,93 +402,132 @@ async def export_decision(
|
||||
|
||||
|
||||
def _write_block_to_docx(doc, block_id: str, title: str, content: str) -> None:
|
||||
"""Write a single block to the DOCX document."""
|
||||
"""Write a single block to the DOCX document using template styles."""
|
||||
# Header blocks (א-ד)
|
||||
if block_id == "block-alef":
|
||||
for line in content.split("\n"):
|
||||
if line.strip():
|
||||
_add_centered_paragraph(doc, line.strip(), bold=True, font_size=FONT_SIZE_HEADING)
|
||||
_add_styled_paragraph(doc, line.strip(), style="Heading 1",
|
||||
alignment=WD_ALIGN_PARAGRAPH.CENTER)
|
||||
return
|
||||
|
||||
if block_id == "block-bet":
|
||||
_add_paragraph(doc, "", space_after=Pt(6)) # spacer
|
||||
_add_spacer(doc)
|
||||
for line in content.split("\n"):
|
||||
if line.strip():
|
||||
_add_centered_paragraph(doc, line.strip(), bold=False, font_size=FONT_SIZE_BODY)
|
||||
_add_centered_paragraph(doc, line.strip(), bold=False)
|
||||
return
|
||||
|
||||
if block_id == "block-gimel":
|
||||
_add_paragraph(doc, "", space_after=Pt(6))
|
||||
lines = content.split("\n")
|
||||
for line in lines:
|
||||
_add_spacer(doc)
|
||||
for line in content.split("\n"):
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
if stripped == "נגד":
|
||||
_add_centered_paragraph(doc, "— נגד —", bold=True, font_size=FONT_SIZE_BODY)
|
||||
_add_centered_paragraph(doc, "— נגד —", bold=True)
|
||||
else:
|
||||
_add_centered_paragraph(doc, stripped, bold=False, font_size=FONT_SIZE_BODY)
|
||||
_add_centered_paragraph(doc, stripped, bold=False)
|
||||
return
|
||||
|
||||
if block_id == "block-dalet":
|
||||
_add_paragraph(doc, "", space_after=Pt(12)) # spacer
|
||||
_add_centered_paragraph(doc, "החלטה", bold=True, font_size=FONT_SIZE_TITLE)
|
||||
_add_paragraph(doc, "", space_after=Pt(12))
|
||||
_add_spacer(doc)
|
||||
# Avoid style=Title: its rFonts use theme fonts (majorHAnsi / majorBidi)
|
||||
# and 28pt size — renders Hebrew oversized and in the wrong face.
|
||||
# Heading 1 carries David and proper RTL, bold + center gives the
|
||||
# same visual weight.
|
||||
para = _add_styled_paragraph(doc, "החלטה", style="Heading 1",
|
||||
alignment=WD_ALIGN_PARAGRAPH.CENTER,
|
||||
bold=True)
|
||||
_suppress_paragraph_numbering(para)
|
||||
_add_spacer(doc)
|
||||
return
|
||||
|
||||
if block_id == "block-yod-bet":
|
||||
_add_paragraph(doc, "", space_after=Pt(24)) # spacer
|
||||
_add_spacer(doc)
|
||||
for line in content.split("\n"):
|
||||
if line.strip():
|
||||
_add_centered_paragraph(doc, line.strip(), bold=False, font_size=FONT_SIZE_BODY)
|
||||
_add_centered_paragraph(doc, line.strip(), bold=False)
|
||||
return
|
||||
|
||||
# Content blocks (ה-יא) — parse paragraphs
|
||||
paragraphs = content.split("\n")
|
||||
for para_text in paragraphs:
|
||||
stripped = para_text.strip()
|
||||
for para_text in content.split("\n"):
|
||||
stripped = _strip_dashes(para_text.strip())
|
||||
if not stripped:
|
||||
continue
|
||||
|
||||
# Section headings (e.g., "תמצית טענות הצדדים", "טענות העוררים")
|
||||
if _is_section_heading(stripped):
|
||||
_add_paragraph(doc, stripped, bold=True, font_size=FONT_SIZE_HEADING,
|
||||
space_after=Pt(6))
|
||||
# Markdown H1/H2/H3 → template heading styles
|
||||
md_heading = re.match(r"^(#{1,6})\s+(.*)$", stripped)
|
||||
if md_heading:
|
||||
level = len(md_heading.group(1))
|
||||
heading_text = md_heading.group(2).strip()
|
||||
style = "Heading 1" if level == 1 else f"Heading {min(level, 3)}"
|
||||
_add_heading(doc, heading_text, style=style)
|
||||
continue
|
||||
|
||||
# Standalone `**...**` line — treat as a sub-heading (Heading 3)
|
||||
stand_bold = re.match(r"^\*\*([^\n*]+?)\*\*$", stripped)
|
||||
if stand_bold:
|
||||
_add_heading(doc, stand_bold.group(1).strip(), style="Heading 3")
|
||||
continue
|
||||
|
||||
if _is_section_heading(stripped):
|
||||
_add_heading(doc, stripped, style="Heading 2")
|
||||
continue
|
||||
|
||||
# Blockquotes (indented quotes from protocols/rulings)
|
||||
if stripped.startswith('"') or stripped.startswith("״") or stripped.startswith(">"):
|
||||
clean = stripped.lstrip(">").strip().strip('"').strip("״").strip('"')
|
||||
_add_blockquote(doc, clean)
|
||||
continue
|
||||
|
||||
# Image placeholders
|
||||
if "📷" in stripped or stripped.startswith("[") and "תמונה" in stripped:
|
||||
if "📷" in stripped or (stripped.startswith("[") and "תמונה" in stripped):
|
||||
_add_image_placeholder(doc, stripped.strip("[]📷 "))
|
||||
continue
|
||||
|
||||
# Regular numbered paragraph or plain text
|
||||
_add_paragraph(doc, stripped)
|
||||
# Numbered body paragraph ("1. text") → List Paragraph with auto-num.
|
||||
# The literal prefix is dropped; Word renders "1. 2. 3. ..." via numId.
|
||||
num_match = _NUM_PREFIX_RE.match(stripped)
|
||||
if num_match:
|
||||
body_text = num_match.group(2).strip()
|
||||
_add_styled_paragraph(doc, body_text, style="List Paragraph")
|
||||
continue
|
||||
|
||||
_add_styled_paragraph(doc, stripped, style="Normal")
|
||||
|
||||
|
||||
def _is_section_heading(text: str) -> bool:
|
||||
"""Detect section headings in decision text."""
|
||||
heading_patterns = [
|
||||
_SECTION_HEADING_PATTERNS = [
|
||||
re.compile(p) for p in (
|
||||
# Block-level titles
|
||||
r"^פתח\s+דבר",
|
||||
r"^רקע\s+עובדתי",
|
||||
r"^תמצית\s+טענות",
|
||||
r"^טענות\s+הצדדים",
|
||||
r"^טענות\s+העוררי",
|
||||
r"^טענות\s+המשיב",
|
||||
r"^עמדת\s+הוועדה",
|
||||
r"^עמדת\s+מבקשי",
|
||||
r"^ההליכים\s+בפני",
|
||||
r"^הליכים\s+בפני",
|
||||
r"^דיון\s+והכרעה",
|
||||
r"^סוף\s+דבר",
|
||||
r"^סיכום",
|
||||
r"^פתח\s+דבר",
|
||||
# Subsection titles produced by legal-writer inside block-vav/block-tet
|
||||
r"^המצב\s+התכנוני",
|
||||
r"^הליכי\s+הרישוי",
|
||||
r"^שומת\s+ההשבחה",
|
||||
r"^הליך\s+השומה",
|
||||
r"^הגשת\s+הערר",
|
||||
r"^תכניות\s+מתאר",
|
||||
r"^תכניות\s+מפורטות",
|
||||
r"^תכניות\s+חלות",
|
||||
]
|
||||
for pattern in heading_patterns:
|
||||
if re.search(pattern, text):
|
||||
return True
|
||||
# Short bold-like lines (under 60 chars, not numbered)
|
||||
if len(text) < 60 and not re.match(r"^\d+\.", text):
|
||||
return False
|
||||
return False
|
||||
r"^תכניות\s+החלות",
|
||||
r"^מדיניות\s+מהנדס",
|
||||
r"^היתרי\s+בני",
|
||||
r"^היתר\s+בני",
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def _is_section_heading(text: str) -> bool:
|
||||
"""Detect legal-decision section headings — mapped to Heading 2 style."""
|
||||
return any(p.search(text) for p in _SECTION_HEADING_PATTERNS)
|
||||
|
||||
@@ -13,12 +13,20 @@ from lxml import etree
|
||||
|
||||
from legal_mcp.services.docx_exporter import (
|
||||
_BOOKMARK_ID_START,
|
||||
HEBREW_FONT,
|
||||
_add_styled_paragraph,
|
||||
_insert_bookmark_end,
|
||||
_insert_bookmark_start,
|
||||
_mark_paragraph_rtl,
|
||||
_mark_run_rtl,
|
||||
_strip_dashes,
|
||||
_wrap_block_with_bookmarks,
|
||||
_write_block_to_docx,
|
||||
)
|
||||
from legal_mcp.services.docx_reviser import NSMAP, _w, list_bookmarks
|
||||
|
||||
from docx.oxml.ns import qn
|
||||
|
||||
|
||||
def test_insert_bookmark_helpers_create_valid_xml(tmp_path: Path) -> None:
|
||||
doc = Document()
|
||||
@@ -101,3 +109,119 @@ def test_multiple_blocks_get_unique_bookmark_ids(tmp_path: Path) -> None:
|
||||
|
||||
names = list_bookmarks(out)
|
||||
assert set(names) == {"block-alef", "block-bet", "block-gimel"}
|
||||
|
||||
|
||||
# ── RTL / David-font invariants ───────────────────────────────────
|
||||
# These guard against regressions where Hebrew renders LTR or in the wrong
|
||||
# font slot (Times New Roman instead of David). See plan file for context.
|
||||
|
||||
|
||||
def test_mark_paragraph_rtl_adds_bidi_directly_in_pPr() -> None:
|
||||
doc = Document()
|
||||
p = doc.add_paragraph("טקסט בעברית")
|
||||
_mark_paragraph_rtl(p)
|
||||
pPr = p._p.find(qn("w:pPr"))
|
||||
assert pPr is not None
|
||||
# <w:bidi/> must be a direct child of pPr (paragraph direction),
|
||||
# NOT nested inside <w:rPr>.
|
||||
assert pPr.find(qn("w:bidi")) is not None
|
||||
# paragraph-mark rPr still gets <w:rtl/>
|
||||
rPr = pPr.find(qn("w:rPr"))
|
||||
assert rPr is not None and rPr.find(qn("w:rtl")) is not None
|
||||
|
||||
|
||||
def test_mark_run_rtl_forces_david_on_all_font_slots() -> None:
|
||||
doc = Document()
|
||||
p = doc.add_paragraph()
|
||||
run = p.add_run("טקסט")
|
||||
_mark_run_rtl(run)
|
||||
rPr = run._r.find(qn("w:rPr"))
|
||||
assert rPr is not None
|
||||
fonts = rPr.find(qn("w:rFonts"))
|
||||
assert fonts is not None
|
||||
for slot in ("w:ascii", "w:hAnsi", "w:cs", "w:eastAsia"):
|
||||
assert fonts.get(qn(slot)) == HEBREW_FONT, f"{slot} not {HEBREW_FONT}"
|
||||
assert rPr.find(qn("w:rtl")) is not None
|
||||
|
||||
|
||||
def test_styled_paragraph_applies_bidi_and_david() -> None:
|
||||
"""End-to-end: _add_styled_paragraph produces pPr/bidi + rFonts/cs=David."""
|
||||
doc = Document()
|
||||
_add_styled_paragraph(doc, "פסקה עברית", style="Normal")
|
||||
p = doc.paragraphs[-1]
|
||||
assert p._p.find(qn("w:pPr")).find(qn("w:bidi")) is not None
|
||||
run = p.runs[0]
|
||||
fonts = run._r.find(qn("w:rPr")).find(qn("w:rFonts"))
|
||||
assert fonts.get(qn("w:cs")) == HEBREW_FONT
|
||||
|
||||
|
||||
def test_block_dalet_does_not_use_title_style() -> None:
|
||||
"""Title style uses theme fonts and 28pt — avoid for Hebrew."""
|
||||
doc = Document()
|
||||
_write_block_to_docx(doc, "block-dalet", title="", content="")
|
||||
styles_used = {p.style.name for p in doc.paragraphs}
|
||||
assert "Title" not in styles_used, (
|
||||
f"block-dalet should not produce a Title-styled paragraph, got {styles_used}"
|
||||
)
|
||||
# The 'החלטה' text must still appear somewhere
|
||||
texts = [p.text for p in doc.paragraphs]
|
||||
assert any("החלטה" in t for t in texts)
|
||||
|
||||
|
||||
# ── Heading overrides, numbered-list, dash strip ──────────────────
|
||||
|
||||
|
||||
def test_strip_dashes_removes_em_and_en_dashes() -> None:
|
||||
assert _strip_dashes("תכנית 1454198 — אושרה ביום") == "תכנית 1454198 אושרה ביום"
|
||||
assert _strip_dashes("א – ב") == "א ב"
|
||||
assert _strip_dashes("no dash") == "no dash"
|
||||
# Collapsed whitespace
|
||||
assert _strip_dashes("רקע — עובדתי") == "רקע עובדתי"
|
||||
|
||||
|
||||
def test_heading2_gets_justified_and_no_numbering() -> None:
|
||||
"""Section heading → Heading 2 with jc=both and numId=0."""
|
||||
doc = Document()
|
||||
_write_block_to_docx(doc, "block-vav", title="", content="דיון והכרעה")
|
||||
heading = next(p for p in doc.paragraphs if p.style.name == "Heading 2")
|
||||
pPr = heading._p.find(qn("w:pPr"))
|
||||
jc = pPr.find(qn("w:jc"))
|
||||
assert jc is not None and jc.get(qn("w:val")) == "both"
|
||||
numPr = pPr.find(qn("w:numPr"))
|
||||
assert numPr is not None
|
||||
numId = numPr.find(qn("w:numId"))
|
||||
assert numId is not None and numId.get(qn("w:val")) == "0"
|
||||
|
||||
|
||||
def test_heading3_gets_justified_not_centered() -> None:
|
||||
"""Heading 3 in template has jc=center — override to jc=both."""
|
||||
doc = Document()
|
||||
_write_block_to_docx(doc, "block-vav", title="", content="**המצב התכנוני**")
|
||||
heading = next(p for p in doc.paragraphs if p.style.name == "Heading 3")
|
||||
jc = heading._p.find(qn("w:pPr")).find(qn("w:jc"))
|
||||
assert jc is not None and jc.get(qn("w:val")) == "both"
|
||||
|
||||
|
||||
def test_numbered_paragraph_uses_list_paragraph_and_strips_prefix() -> None:
|
||||
"""'1. text' → List Paragraph style, literal '1. ' removed."""
|
||||
doc = Document()
|
||||
_write_block_to_docx(
|
||||
doc, "block-vav", title="",
|
||||
content="1. עניינו של ערר זה.\n2. שכונת נווה יעקב.",
|
||||
)
|
||||
lp = [p for p in doc.paragraphs if p.style.name == "List Paragraph"]
|
||||
assert len(lp) == 2
|
||||
assert lp[0].text.startswith("עניינו")
|
||||
assert not lp[0].text.startswith("1.")
|
||||
assert lp[1].text.startswith("שכונת")
|
||||
|
||||
|
||||
def test_body_content_has_no_em_dashes() -> None:
|
||||
"""Content with em-dashes is rendered without them."""
|
||||
doc = Document()
|
||||
_write_block_to_docx(
|
||||
doc, "block-vav", title="",
|
||||
content="3. תכנית 5924 — קובעת את שטחי הבנייה.",
|
||||
)
|
||||
texts = "\n".join(p.text for p in doc.paragraphs)
|
||||
assert "—" not in texts
|
||||
|
||||
Reference in New Issue
Block a user