"""בדיקות ל-bookmark helpers ב-docx_exporter. הבדיקות מתרכזות ב-helper functions בלבד (לא בכל ה-export flow שדורש DB). """ from __future__ import annotations import zipfile from pathlib import Path from docx import Document from lxml import etree from legal_mcp.services.docx_exporter import ( _BOOKMARK_ID_START, HEBREW_FONT, _add_styled_paragraph, _insert_bookmark_end, _insert_bookmark_start, _mark_paragraph_rtl, _mark_run_rtl, _strip_dashes, _wrap_block_with_bookmarks, _write_block_to_docx, ) from legal_mcp.services.docx_reviser import NSMAP, _w, list_bookmarks from docx.oxml.ns import qn def test_insert_bookmark_helpers_create_valid_xml(tmp_path: Path) -> None: doc = Document() p = doc.add_paragraph("תוכן בלוק י") _insert_bookmark_start(p, "block-yod", 10001) _insert_bookmark_end(p, 10001) out = tmp_path / "out.docx" doc.save(str(out)) # Verify via list_bookmarks (uses the same XML) assert list_bookmarks(out) == ["block-yod"] def test_wrap_block_with_bookmarks_wraps_multiple_paragraphs(tmp_path: Path) -> None: doc = Document() doc.add_paragraph("ראשון — לפני") # noise before bm_counter = [_BOOKMARK_ID_START] def writer() -> None: doc.add_paragraph("בלוק — פסקה 1") doc.add_paragraph("בלוק — פסקה 2") doc.add_paragraph("בלוק — פסקה 3") _wrap_block_with_bookmarks(doc, "block-yod", writer, bm_counter) doc.add_paragraph("אחרי — אחרון") # noise after out = tmp_path / "out.docx" doc.save(str(out)) # The bookmark should wrap exactly the 3 middle paragraphs with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) paragraphs = tree.findall(".//w:p", NSMAP) # Find para index of bookmarkStart and bookmarkEnd start_idx = end_idx = None for i, p in enumerate(paragraphs): if p.find(".//w:bookmarkStart", NSMAP) is not None: start_idx = i if p.find(".//w:bookmarkEnd", NSMAP) is not None: end_idx = i assert start_idx is not None assert end_idx is not None # The paragraph containing start must be the first new one ("פסקה 1") start_text = "".join(paragraphs[start_idx].itertext()) end_text = "".join(paragraphs[end_idx].itertext()) assert "פסקה 1" in start_text assert "פסקה 3" in end_text def test_wrap_block_skipped_when_writer_adds_nothing(tmp_path: Path) -> None: doc = Document() bm_counter = [_BOOKMARK_ID_START] _wrap_block_with_bookmarks(doc, "block-empty", lambda: None, bm_counter) out = tmp_path / "out.docx" doc.save(str(out)) assert list_bookmarks(out) == [] def test_multiple_blocks_get_unique_bookmark_ids(tmp_path: Path) -> None: doc = Document() bm_counter = [_BOOKMARK_ID_START] for name in ("block-alef", "block-bet", "block-gimel"): _wrap_block_with_bookmarks( doc, name, lambda n=name: doc.add_paragraph(f"תוכן של {n}"), bm_counter, ) out = tmp_path / "out.docx" doc.save(str(out)) with zipfile.ZipFile(out, "r") as zf: tree = etree.fromstring(zf.read("word/document.xml")) ids = [el.get(_w("id")) for el in tree.iterfind(".//w:bookmarkStart", NSMAP)] assert len(ids) == 3 assert len(set(ids)) == 3 names = list_bookmarks(out) assert set(names) == {"block-alef", "block-bet", "block-gimel"} # ── RTL / David-font invariants ─────────────────────────────────── # These guard against regressions where Hebrew renders LTR or in the wrong # font slot (Times New Roman instead of David). See plan file for context. def test_mark_paragraph_rtl_adds_bidi_directly_in_pPr() -> None: doc = Document() p = doc.add_paragraph("טקסט בעברית") _mark_paragraph_rtl(p) pPr = p._p.find(qn("w:pPr")) assert pPr is not None # must be a direct child of pPr (paragraph direction), # NOT nested inside . assert pPr.find(qn("w:bidi")) is not None # paragraph-mark rPr still gets rPr = pPr.find(qn("w:rPr")) assert rPr is not None and rPr.find(qn("w:rtl")) is not None def test_mark_run_rtl_forces_david_on_all_font_slots() -> None: doc = Document() p = doc.add_paragraph() run = p.add_run("טקסט") _mark_run_rtl(run) rPr = run._r.find(qn("w:rPr")) assert rPr is not None fonts = rPr.find(qn("w:rFonts")) assert fonts is not None for slot in ("w:ascii", "w:hAnsi", "w:cs", "w:eastAsia"): assert fonts.get(qn(slot)) == HEBREW_FONT, f"{slot} not {HEBREW_FONT}" assert rPr.find(qn("w:rtl")) is not None def test_styled_paragraph_applies_bidi_and_david() -> None: """End-to-end: _add_styled_paragraph produces pPr/bidi + rFonts/cs=David.""" doc = Document() _add_styled_paragraph(doc, "פסקה עברית", style="Normal") p = doc.paragraphs[-1] assert p._p.find(qn("w:pPr")).find(qn("w:bidi")) is not None run = p.runs[0] fonts = run._r.find(qn("w:rPr")).find(qn("w:rFonts")) assert fonts.get(qn("w:cs")) == HEBREW_FONT def test_block_dalet_does_not_use_title_style() -> None: """Title style uses theme fonts and 28pt — avoid for Hebrew.""" doc = Document() _write_block_to_docx(doc, "block-dalet", title="", content="") styles_used = {p.style.name for p in doc.paragraphs} assert "Title" not in styles_used, ( f"block-dalet should not produce a Title-styled paragraph, got {styles_used}" ) # The 'החלטה' text must still appear somewhere texts = [p.text for p in doc.paragraphs] assert any("החלטה" in t for t in texts) # ── Heading overrides, numbered-list, dash strip ────────────────── def test_strip_dashes_removes_em_and_en_dashes() -> None: assert _strip_dashes("תכנית 1454198 — אושרה ביום") == "תכנית 1454198 אושרה ביום" assert _strip_dashes("א – ב") == "א ב" assert _strip_dashes("no dash") == "no dash" # Collapsed whitespace assert _strip_dashes("רקע — עובדתי") == "רקע עובדתי" def test_heading2_gets_justified_and_no_numbering() -> None: """Section heading → Heading 2 with jc=both and numId=0.""" doc = Document() _write_block_to_docx(doc, "block-vav", title="", content="דיון והכרעה") heading = next(p for p in doc.paragraphs if p.style.name == "Heading 2") pPr = heading._p.find(qn("w:pPr")) jc = pPr.find(qn("w:jc")) assert jc is not None and jc.get(qn("w:val")) == "both" numPr = pPr.find(qn("w:numPr")) assert numPr is not None numId = numPr.find(qn("w:numId")) assert numId is not None and numId.get(qn("w:val")) == "0" def test_heading3_gets_justified_not_centered() -> None: """Heading 3 in template has jc=center — override to jc=both.""" doc = Document() _write_block_to_docx(doc, "block-vav", title="", content="**המצב התכנוני**") heading = next(p for p in doc.paragraphs if p.style.name == "Heading 3") jc = heading._p.find(qn("w:pPr")).find(qn("w:jc")) assert jc is not None and jc.get(qn("w:val")) == "both" def test_numbered_paragraph_uses_list_paragraph_and_strips_prefix() -> None: """'1. text' → List Paragraph style, literal '1. ' removed.""" doc = Document() _write_block_to_docx( doc, "block-vav", title="", content="1. עניינו של ערר זה.\n2. שכונת נווה יעקב.", ) lp = [p for p in doc.paragraphs if p.style.name == "List Paragraph"] assert len(lp) == 2 assert lp[0].text.startswith("עניינו") assert not lp[0].text.startswith("1.") assert lp[1].text.startswith("שכונת") def test_body_content_has_no_em_dashes() -> None: """Content with em-dashes is rendered without them.""" doc = Document() _write_block_to_docx( doc, "block-vav", title="", content="3. תכנית 5924 — קובעת את שטחי הבנייה.", ) texts = "\n".join(p.text for p in doc.paragraphs) assert "—" not in texts