All checks were successful
Build & Deploy / build-and-deploy (push) Successful in 1m30s
Hebrew was rendering LTR or in Times New Roman fallback in some Word contexts. Root cause: incomplete RTL marking and missing font hints on the run level. Three layers of RTL are required (per skills/docx/SKILL.md): 1. Section: <w:bidi/> in sectPr (now inherited from template) 2. Paragraph: <w:bidi/> directly in pPr (paragraph direction) 3. Run: <w:rtl/> in rPr — tells Word to use cs (complex-script) font Without an explicit font on the run, Hebrew renders in the ascii slot (Times New Roman). Force David on all four slots (ascii / hAnsi / cs / eastAsia) so every shaping path picks the correct font. Changes: - TEMPLATE_PATH now points to skills/docx/decision_template.docx (carries David, RTL, margins, styles); replaces hard-coded constants. - _mark_run_rtl: writes rFonts on all four slots, then appends <w:rtl/>. - _mark_paragraph_rtl: places <w:bidi/> directly in pPr (not nested in rPr — that was the bug), and adds <w:rtl/> to the paragraph-mark rPr. - _set_paragraph_jc: forces explicit jc, overriding style-inherited. Tests: - test_mark_paragraph_rtl_adds_bidi_directly_in_pPr — guards against the regression where bidi was nested inside rPr. - test_mark_run_rtl_forces_david_on_all_font_slots — ensures all four font slots are set, not just cs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
228 lines
8.1 KiB
Python
228 lines
8.1 KiB
Python
"""בדיקות ל-bookmark helpers ב-docx_exporter.
|
||
|
||
הבדיקות מתרכזות ב-helper functions בלבד (לא בכל ה-export flow שדורש DB).
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import zipfile
|
||
from pathlib import Path
|
||
|
||
from docx import Document
|
||
from lxml import etree
|
||
|
||
from legal_mcp.services.docx_exporter import (
|
||
_BOOKMARK_ID_START,
|
||
HEBREW_FONT,
|
||
_add_styled_paragraph,
|
||
_insert_bookmark_end,
|
||
_insert_bookmark_start,
|
||
_mark_paragraph_rtl,
|
||
_mark_run_rtl,
|
||
_strip_dashes,
|
||
_wrap_block_with_bookmarks,
|
||
_write_block_to_docx,
|
||
)
|
||
from legal_mcp.services.docx_reviser import NSMAP, _w, list_bookmarks
|
||
|
||
from docx.oxml.ns import qn
|
||
|
||
|
||
def test_insert_bookmark_helpers_create_valid_xml(tmp_path: Path) -> None:
|
||
doc = Document()
|
||
p = doc.add_paragraph("תוכן בלוק י")
|
||
_insert_bookmark_start(p, "block-yod", 10001)
|
||
_insert_bookmark_end(p, 10001)
|
||
|
||
out = tmp_path / "out.docx"
|
||
doc.save(str(out))
|
||
|
||
# Verify via list_bookmarks (uses the same XML)
|
||
assert list_bookmarks(out) == ["block-yod"]
|
||
|
||
|
||
def test_wrap_block_with_bookmarks_wraps_multiple_paragraphs(tmp_path: Path) -> None:
|
||
doc = Document()
|
||
doc.add_paragraph("ראשון — לפני") # noise before
|
||
|
||
bm_counter = [_BOOKMARK_ID_START]
|
||
|
||
def writer() -> None:
|
||
doc.add_paragraph("בלוק — פסקה 1")
|
||
doc.add_paragraph("בלוק — פסקה 2")
|
||
doc.add_paragraph("בלוק — פסקה 3")
|
||
|
||
_wrap_block_with_bookmarks(doc, "block-yod", writer, bm_counter)
|
||
doc.add_paragraph("אחרי — אחרון") # noise after
|
||
|
||
out = tmp_path / "out.docx"
|
||
doc.save(str(out))
|
||
|
||
# The bookmark should wrap exactly the 3 middle paragraphs
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||
|
||
paragraphs = tree.findall(".//w:p", NSMAP)
|
||
# Find para index of bookmarkStart and bookmarkEnd
|
||
start_idx = end_idx = None
|
||
for i, p in enumerate(paragraphs):
|
||
if p.find(".//w:bookmarkStart", NSMAP) is not None:
|
||
start_idx = i
|
||
if p.find(".//w:bookmarkEnd", NSMAP) is not None:
|
||
end_idx = i
|
||
assert start_idx is not None
|
||
assert end_idx is not None
|
||
# The paragraph containing start must be the first new one ("פסקה 1")
|
||
start_text = "".join(paragraphs[start_idx].itertext())
|
||
end_text = "".join(paragraphs[end_idx].itertext())
|
||
assert "פסקה 1" in start_text
|
||
assert "פסקה 3" in end_text
|
||
|
||
|
||
def test_wrap_block_skipped_when_writer_adds_nothing(tmp_path: Path) -> None:
|
||
doc = Document()
|
||
bm_counter = [_BOOKMARK_ID_START]
|
||
_wrap_block_with_bookmarks(doc, "block-empty", lambda: None, bm_counter)
|
||
out = tmp_path / "out.docx"
|
||
doc.save(str(out))
|
||
assert list_bookmarks(out) == []
|
||
|
||
|
||
def test_multiple_blocks_get_unique_bookmark_ids(tmp_path: Path) -> None:
|
||
doc = Document()
|
||
bm_counter = [_BOOKMARK_ID_START]
|
||
for name in ("block-alef", "block-bet", "block-gimel"):
|
||
_wrap_block_with_bookmarks(
|
||
doc, name,
|
||
lambda n=name: doc.add_paragraph(f"תוכן של {n}"),
|
||
bm_counter,
|
||
)
|
||
out = tmp_path / "out.docx"
|
||
doc.save(str(out))
|
||
|
||
with zipfile.ZipFile(out, "r") as zf:
|
||
tree = etree.fromstring(zf.read("word/document.xml"))
|
||
|
||
ids = [el.get(_w("id")) for el in tree.iterfind(".//w:bookmarkStart", NSMAP)]
|
||
assert len(ids) == 3
|
||
assert len(set(ids)) == 3
|
||
|
||
names = list_bookmarks(out)
|
||
assert set(names) == {"block-alef", "block-bet", "block-gimel"}
|
||
|
||
|
||
# ── RTL / David-font invariants ───────────────────────────────────
|
||
# These guard against regressions where Hebrew renders LTR or in the wrong
|
||
# font slot (Times New Roman instead of David). See plan file for context.
|
||
|
||
|
||
def test_mark_paragraph_rtl_adds_bidi_directly_in_pPr() -> None:
|
||
doc = Document()
|
||
p = doc.add_paragraph("טקסט בעברית")
|
||
_mark_paragraph_rtl(p)
|
||
pPr = p._p.find(qn("w:pPr"))
|
||
assert pPr is not None
|
||
# <w:bidi/> must be a direct child of pPr (paragraph direction),
|
||
# NOT nested inside <w:rPr>.
|
||
assert pPr.find(qn("w:bidi")) is not None
|
||
# paragraph-mark rPr still gets <w:rtl/>
|
||
rPr = pPr.find(qn("w:rPr"))
|
||
assert rPr is not None and rPr.find(qn("w:rtl")) is not None
|
||
|
||
|
||
def test_mark_run_rtl_forces_david_on_all_font_slots() -> None:
|
||
doc = Document()
|
||
p = doc.add_paragraph()
|
||
run = p.add_run("טקסט")
|
||
_mark_run_rtl(run)
|
||
rPr = run._r.find(qn("w:rPr"))
|
||
assert rPr is not None
|
||
fonts = rPr.find(qn("w:rFonts"))
|
||
assert fonts is not None
|
||
for slot in ("w:ascii", "w:hAnsi", "w:cs", "w:eastAsia"):
|
||
assert fonts.get(qn(slot)) == HEBREW_FONT, f"{slot} not {HEBREW_FONT}"
|
||
assert rPr.find(qn("w:rtl")) is not None
|
||
|
||
|
||
def test_styled_paragraph_applies_bidi_and_david() -> None:
|
||
"""End-to-end: _add_styled_paragraph produces pPr/bidi + rFonts/cs=David."""
|
||
doc = Document()
|
||
_add_styled_paragraph(doc, "פסקה עברית", style="Normal")
|
||
p = doc.paragraphs[-1]
|
||
assert p._p.find(qn("w:pPr")).find(qn("w:bidi")) is not None
|
||
run = p.runs[0]
|
||
fonts = run._r.find(qn("w:rPr")).find(qn("w:rFonts"))
|
||
assert fonts.get(qn("w:cs")) == HEBREW_FONT
|
||
|
||
|
||
def test_block_dalet_does_not_use_title_style() -> None:
|
||
"""Title style uses theme fonts and 28pt — avoid for Hebrew."""
|
||
doc = Document()
|
||
_write_block_to_docx(doc, "block-dalet", title="", content="")
|
||
styles_used = {p.style.name for p in doc.paragraphs}
|
||
assert "Title" not in styles_used, (
|
||
f"block-dalet should not produce a Title-styled paragraph, got {styles_used}"
|
||
)
|
||
# The 'החלטה' text must still appear somewhere
|
||
texts = [p.text for p in doc.paragraphs]
|
||
assert any("החלטה" in t for t in texts)
|
||
|
||
|
||
# ── Heading overrides, numbered-list, dash strip ──────────────────
|
||
|
||
|
||
def test_strip_dashes_removes_em_and_en_dashes() -> None:
|
||
assert _strip_dashes("תכנית 1454198 — אושרה ביום") == "תכנית 1454198 אושרה ביום"
|
||
assert _strip_dashes("א – ב") == "א ב"
|
||
assert _strip_dashes("no dash") == "no dash"
|
||
# Collapsed whitespace
|
||
assert _strip_dashes("רקע — עובדתי") == "רקע עובדתי"
|
||
|
||
|
||
def test_heading2_gets_justified_and_no_numbering() -> None:
|
||
"""Section heading → Heading 2 with jc=both and numId=0."""
|
||
doc = Document()
|
||
_write_block_to_docx(doc, "block-vav", title="", content="דיון והכרעה")
|
||
heading = next(p for p in doc.paragraphs if p.style.name == "Heading 2")
|
||
pPr = heading._p.find(qn("w:pPr"))
|
||
jc = pPr.find(qn("w:jc"))
|
||
assert jc is not None and jc.get(qn("w:val")) == "both"
|
||
numPr = pPr.find(qn("w:numPr"))
|
||
assert numPr is not None
|
||
numId = numPr.find(qn("w:numId"))
|
||
assert numId is not None and numId.get(qn("w:val")) == "0"
|
||
|
||
|
||
def test_heading3_gets_justified_not_centered() -> None:
|
||
"""Heading 3 in template has jc=center — override to jc=both."""
|
||
doc = Document()
|
||
_write_block_to_docx(doc, "block-vav", title="", content="**המצב התכנוני**")
|
||
heading = next(p for p in doc.paragraphs if p.style.name == "Heading 3")
|
||
jc = heading._p.find(qn("w:pPr")).find(qn("w:jc"))
|
||
assert jc is not None and jc.get(qn("w:val")) == "both"
|
||
|
||
|
||
def test_numbered_paragraph_uses_list_paragraph_and_strips_prefix() -> None:
|
||
"""'1. text' → List Paragraph style, literal '1. ' removed."""
|
||
doc = Document()
|
||
_write_block_to_docx(
|
||
doc, "block-vav", title="",
|
||
content="1. עניינו של ערר זה.\n2. שכונת נווה יעקב.",
|
||
)
|
||
lp = [p for p in doc.paragraphs if p.style.name == "List Paragraph"]
|
||
assert len(lp) == 2
|
||
assert lp[0].text.startswith("עניינו")
|
||
assert not lp[0].text.startswith("1.")
|
||
assert lp[1].text.startswith("שכונת")
|
||
|
||
|
||
def test_body_content_has_no_em_dashes() -> None:
|
||
"""Content with em-dashes is rendered without them."""
|
||
doc = Document()
|
||
_write_block_to_docx(
|
||
doc, "block-vav", title="",
|
||
content="3. תכנית 5924 — קובעת את שטחי הבנייה.",
|
||
)
|
||
texts = "\n".join(p.text for p in doc.paragraphs)
|
||
assert "—" not in texts
|