"""בדיקות docx_retrofit — הזרקת bookmarks רטרואקטיבית.""" from __future__ import annotations from pathlib import Path from docx import Document from legal_mcp.services.docx_retrofit import ( BLOCK_ORDER, retrofit_bookmarks, ) from legal_mcp.services.docx_reviser import list_bookmarks def _make_docx_with_hebrew_blocks(path: Path, markers: list[str]) -> None: """Create a DOCX where each paragraph starts with a Hebrew block marker.""" doc = Document() for marker in markers: doc.add_paragraph(f"{marker}. תוכן הבלוק שמתחיל ב-{marker}") doc.add_paragraph(f"עוד פסקה בבלוק {marker}") doc.save(str(path)) def test_retrofit_detects_all_standard_blocks(tmp_path: Path) -> None: src = tmp_path / "src.docx" _make_docx_with_hebrew_blocks( src, ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "יא", "יב"], ) result = retrofit_bookmarks(src, backup=False) assert len(result["bookmarks_added"]) == 12 assert result["missing_blocks"] == [] names = list_bookmarks(src) expected = {name for name, _ in BLOCK_ORDER} assert set(names) == expected def test_retrofit_reports_missing_blocks(tmp_path: Path) -> None: src = tmp_path / "src.docx" # Only 4 blocks present _make_docx_with_hebrew_blocks(src, ["א", "ב", "ג", "ד"]) result = retrofit_bookmarks(src, backup=False) assert result["bookmarks_added"] == [ "block-alef", "block-bet", "block-gimel", "block-dalet", ] assert "block-heh" in result["missing_blocks"] assert "block-yod-bet" in result["missing_blocks"] def test_retrofit_distinguishes_yod_from_yod_alef_yod_bet(tmp_path: Path) -> None: """י, יא, יב must all be distinguished — longer markers win.""" src = tmp_path / "src.docx" _make_docx_with_hebrew_blocks(src, ["ט", "י", "יא", "יב"]) result = retrofit_bookmarks(src, backup=False) # The four content blocks must all be detected; cover blocks added via fallback. assert {"block-tet", "block-yod", "block-yod-alef", "block-yod-bet"} <= set( result["bookmarks_added"] ) def test_retrofit_skips_existing_bookmarks(tmp_path: Path) -> None: """Running retrofit twice doesn't duplicate bookmarks.""" src = tmp_path / "src.docx" _make_docx_with_hebrew_blocks(src, ["א", "ב"]) first = retrofit_bookmarks(src, backup=False) # alef/bet from markers; gimel/dalet from cover-block fallback assert {"block-alef", "block-bet"} <= set(first["bookmarks_added"]) second = retrofit_bookmarks(src, backup=False) assert second["bookmarks_added"] == [] # nothing new # All previously added bookmarks now exist on the document assert set(first["bookmarks_added"]) <= set(second["existing_bookmarks"]) def test_retrofit_creates_backup(tmp_path: Path) -> None: src = tmp_path / "file.docx" _make_docx_with_hebrew_blocks(src, ["א", "ב"]) retrofit_bookmarks(src) # backup=True (default) backup = src.with_suffix(".pre-retrofit.docx") assert backup.exists() def test_retrofit_to_different_output_path_no_backup(tmp_path: Path) -> None: src = tmp_path / "src.docx" out = tmp_path / "out.docx" _make_docx_with_hebrew_blocks(src, ["א", "ב"]) retrofit_bookmarks(src, output_path=out) # source untouched assert list_bookmarks(src) == [] # output has bookmarks (alef+bet from markers; gimel+dalet via fallback) assert {"block-alef", "block-bet"} <= set(list_bookmarks(out)) def test_retrofit_ignores_marker_in_middle_of_text(tmp_path: Path) -> None: """A lone 'י' inside body text (not at start) should not be detected as block.""" src = tmp_path / "src.docx" doc = Document() doc.add_paragraph("א. תחילת הבלוק") doc.add_paragraph("טקסט עם האות י לא בתחילת שורה, זה לא בלוק.") doc.add_paragraph("ב. בלוק שני") doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) assert "block-alef" in result["bookmarks_added"] assert "block-bet" in result["bookmarks_added"] # 'block-yod' should NOT be detected assert "block-yod" not in result["bookmarks_added"] def test_retrofit_out_of_order_markers_picks_forward_only(tmp_path: Path) -> None: """If a later-ordered marker appears first, earlier ones are treated as missing. Scanner advances forward through BLOCK_ORDER — it won't go back to claim an earlier marker after already seeing a later one. block-alef will be surfaced via the cover-block fallback rather than from the actual marker. """ src = tmp_path / "src.docx" doc = Document() doc.add_paragraph("ב. מופיע ראשון") doc.add_paragraph("א. מופיע אחרי — יידחה כי 'א' לפני 'ב'") doc.add_paragraph("ג. בלוק גימל") doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) assert "block-bet" in result["bookmarks_added"] assert "block-gimel" in result["bookmarks_added"] # 'א' marker was skipped by forward-scan, so it appears as a structural # fallback (no real content), not from real detection. assert "block-alef" in result["structural_fallback"] def test_retrofit_empty_document_reports_all_missing(tmp_path: Path) -> None: src = tmp_path / "empty.docx" doc = Document() doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) assert result["bookmarks_added"] == [] assert len(result["missing_blocks"]) == 12 def test_retrofit_al_ken_midblock_does_not_capture_yod_bet(tmp_path: Path) -> None: """'על כן, במקום בו...' באמצע block-yod לא צריך להיתפס כ-yod-bet.""" src = tmp_path / "src.docx" doc = Document() doc.add_paragraph("פתח דבר") doc.add_paragraph("רקע עובדתי קצר.") doc.add_paragraph("דיון והכרעה") doc.add_paragraph("על כן, במקום בו קיים פתרון חניה אין מקום להתערב.") doc.add_paragraph("סוף דבר") doc.add_paragraph("פסק דין סופי.") doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) assert "block-yod-alef" in result["bookmarks_added"] assert "block-yod-bet" not in result["bookmarks_added"] def test_retrofit_al_ken_operative_captures_yod_bet(tmp_path: Path) -> None: """'על כן, אנו מחליטים' באמת אופרטיבי — צריך להיתפס כ-yod-bet.""" src = tmp_path / "src.docx" doc = Document() doc.add_paragraph("דיון והכרעה") doc.add_paragraph("נימוקים מפורטים.") doc.add_paragraph("סוף דבר") doc.add_paragraph("על כן, אנו מחליטים לקבל את הערר.") doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) assert "block-yod-alef" in result["bookmarks_added"] assert "block-yod-bet" in result["bookmarks_added"] def test_retrofit_vav_al_hamekarkein_pattern(tmp_path: Path) -> None: """'על המקרקעין חלות התכניות' — דפוס block-vav מקורפוס 1130.""" src = tmp_path / "src.docx" doc = Document() doc.add_paragraph("פתח דבר") doc.add_paragraph("המקרקעין מצויים בכתובת...") doc.add_paragraph("על המקרקעין חלות התכניות הבאות") doc.add_paragraph("פירוט תכניות.") doc.add_paragraph("תמצית טענות הצדדים") doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) assert "block-vav" in result["bookmarks_added"] def test_retrofit_cover_blocks_structural_fallback(tmp_path: Path) -> None: """אם alef-dalet לא בקובץ — לקבל bookmarks ריקים בהתחלה (structural_fallback).""" src = tmp_path / "src.docx" doc = Document() doc.add_paragraph("פתח דבר") doc.add_paragraph("תוכן.") doc.add_paragraph("דיון והכרעה") doc.add_paragraph("הכרעה.") doc.save(str(src)) result = retrofit_bookmarks(src, backup=False) for name in ["block-alef", "block-bet", "block-gimel", "block-dalet"]: assert name in result["bookmarks_added"] assert name not in result["missing_blocks"] assert set(result["structural_fallback"]) == { "block-alef", "block-bet", "block-gimel", "block-dalet", } def test_retrofit_no_double_fallback_when_cover_present(tmp_path: Path) -> None: """אם block-alef קיים בקובץ אמיתית — לא לזרוק fallback מבני.""" src = tmp_path / "src.docx" _make_docx_with_hebrew_blocks( src, ["א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "יא", "יב"], ) result = retrofit_bookmarks(src, backup=False) assert result["structural_fallback"] == []