"""Convert דפנה's decision .dotx template to a loadable .docx file. python-docx cannot open .dotx files directly (content type is `...template.main+xml` rather than `...document.main+xml`). This script produces a sibling .docx by rewriting [Content_Types].xml and dropping the `word/glossary/` part (which is template-specific and can interfere with plain Document() loading). The output preserves every style definition, numbering, fonts, and section properties — the only things we want from the template. Run once (or whenever the source .dotx changes): python scripts/convert_decision_template.py Input: data/training/טיוטת החלטה.dotx Output: skills/docx/decision_template.docx """ from __future__ import annotations import re import sys import zipfile from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent SRC = REPO_ROOT / "data" / "training" / "טיוטת החלטה.dotx" DST = REPO_ROOT / "skills" / "docx" / "decision_template.docx" TEMPLATE_CONTENT_TYPE = ( "application/vnd.openxmlformats-officedocument." "wordprocessingml.template.main+xml" ) DOCUMENT_CONTENT_TYPE = ( "application/vnd.openxmlformats-officedocument." "wordprocessingml.document.main+xml" ) def convert(src: Path, dst: Path) -> None: if not src.exists(): raise FileNotFoundError(f"Template not found: {src}") dst.parent.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(src, "r") as zin: names = zin.namelist() with zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout: for name in names: # Drop glossary part — template-only, confuses Document() if name.startswith("word/glossary/"): continue data = zin.read(name) if name == "[Content_Types].xml": text = data.decode("utf-8") text = text.replace( TEMPLATE_CONTENT_TYPE, DOCUMENT_CONTENT_TYPE ) # Drop every that points at /word/glossary/... text = re.sub( r']*?/>', "", text, ) data = text.encode("utf-8") elif name == "word/_rels/document.xml.rels": # Strip the glossaryDocument relationship — the target # part is being removed, so the ref would dangle. text = data.decode("utf-8") text = re.sub( r']*?glossaryDocument[^>]*?/>', "", text, ) data = text.encode("utf-8") zout.writestr(name, data) def verify(dst: Path) -> None: """Load with python-docx and print a few style names to confirm it works.""" from docx import Document doc = Document(str(dst)) key_styles = {"Normal", "Heading 2", "Quote", "List Paragraph", "Title"} found = {s.name for s in doc.styles if s.name in key_styles} missing = key_styles - found if missing: print(f"WARN: missing styles: {missing}", file=sys.stderr) else: print(f"OK — all key styles present: {sorted(found)}") def main() -> None: print(f"Source: {SRC}") print(f"Dest: {DST}") convert(SRC, DST) print(f"Wrote {DST.stat().st_size:,} bytes") verify(DST) if __name__ == "__main__": main()