"""בדיקות docx_retrofit — הזרקת bookmarks רטרואקטיבית.""" from __future__ import annotations from pathlib import Path from docx import Document from legal_mcp.services.docx_retrofit import ( BLOCK_ORDER, retrofit_bookmarks, ) from legal_mcp.services.docx_reviser import list_bookmarks def _make_docx_with_hebrew_blocks(path: Path, markers: list[str]) -> None: """Create a DOCX where each paragraph starts with a Hebrew block marker.""" doc = Document() for marker in markers: doc.add_paragraph(f"{marker}. תוכן הבלוק שמתחיל ב-{marker}") doc.add_paragraph(f"עוד פסקה בבלוק {marker}") doc.save(str(path)) def test_retrofit_detects_all_standard_blocks(tmp_path: Path) -> None: src = tmp_path / "src.docx" _make_docx_with_hebrew_blocks( src, ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "יא", "יב"], ) result = retrofit_bookmarks(src, backup=False) assert len(result["bookmarks_added"]) == 12 assert result["missing_blocks"] == [] names = list_bookmarks(src) expected = {name for name, _ in BLOCK_ORDER} assert set(names) == expected def test_retrofit_reports_missing_blocks(tmp_path: Path) -> None: src = tmp_path / "src.docx" # Only 4 blocks present _make_docx_with_hebrew_blocks(src, ["א", "ב", "ג", "ד"]) result = retrofit_bookmarks(src, backup=False) assert result["bookmarks_added"] == [ "block-alef", "block-bet", "block-gimel", "block-dalet", ] assert "block-heh" in result["missing_blocks"] assert "block-yod-bet" in result["missing_blocks"] def test_retrofit_distinguishes_yod_from_yod_alef_yod_bet(tmp_path: Path) -> None: """י, יא, יב must all be distinguished — longer markers win.""" src = tmp_path / "src.docx" _make_docx_with_hebrew_blocks(src, ["ט", "י", "יא", "יב"]) result = retrofit_bookmarks(src, backup=False) assert set(result["bookmarks_added"]) == { "block-tet", "block-yod", "block-yod-alef", "block-yod-bet", } def test_retrofit_skips_existing_bookmarks(tmp_path: Path) -> None: """Running retrofit twice doesn't duplicate bookmarks.""" src = tmp_path / "src.docx" _make_docx_with_hebrew_blocks(src, ["א", "ב"]) first = retrofit_bookmarks(src, backup=False) assert first["bookmarks_added"] == ["block-alef", "block-bet"] second = retrofit_bookmarks(src, backup=False) assert second["bookmarks_added"] == [] # nothing new assert set(second["existing_bookmarks"]) == {"block-alef", "block-bet"} # Final document should still have exactly 2 bookmarks assert set(list_bookmarks(src)) == {"block-alef", "block-bet"} def test_retrofit_creates_backup(tmp_path: Path) -> None: src = tmp_path / "file.docx" _make_docx_with_hebrew_blocks(src, ["א", "ב"]) retrofit_bookmarks(src) # backup=True (default) backup = src.with_suffix(".pre-retrofit.docx") assert backup.exists() def test_retrofit_to_different_output_path_no_backup(tmp_path: Path) -> None: src = tmp_path / "src.docx" out = tmp_path / "out.docx" _make_docx_with_hebrew_blocks(src, ["א", "ב"]) retrofit_bookmarks(src, output_path=out) # source untouched assert list_bookmarks(src) == [] # output has bookmarks assert set(list_bookmarks(out)) == {"block-alef", "block-bet"} def test_retrofit_ignores_marker_in_middle_of_text(tmp_path: Path) -> None: """A lone 'י' inside body text (not at start) should not be detected as block.""" src = tmp_path / "src.docx" doc = Document() doc.add_paragraph("א. תחילת הבלוק") doc.add_paragraph("טקסט עם האות י לא בתחילת שורה, זה לא בלוק.") doc.add_paragraph("ב. בלוק שני") doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) assert "block-alef" in result["bookmarks_added"] assert "block-bet" in result["bookmarks_added"] # 'block-yod' should NOT be detected assert "block-yod" not in result["bookmarks_added"] def test_retrofit_out_of_order_markers_picks_forward_only(tmp_path: Path) -> None: """If a later-ordered marker appears first, earlier ones are treated as missing. Scanner advances forward through BLOCK_ORDER — it won't go back to claim an earlier marker after already seeing a later one. """ src = tmp_path / "src.docx" doc = Document() doc.add_paragraph("ב. מופיע ראשון") doc.add_paragraph("א. מופיע אחרי — יידחה כי 'א' לפני 'ב'") doc.add_paragraph("ג. בלוק גימל") doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) assert "block-bet" in result["bookmarks_added"] assert "block-gimel" in result["bookmarks_added"] # 'א' was not detected (the first paragraph was 'ב' — scanner advanced past א) assert "block-alef" in result["missing_blocks"] def test_retrofit_empty_document_reports_all_missing(tmp_path: Path) -> None: src = tmp_path / "empty.docx" doc = Document() doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) assert result["bookmarks_added"] == [] assert len(result["missing_blocks"]) == 12