Pre-existing agent updates + analysis DOCX export

Updates accumulated from prior sessions: - HEARTBEAT: company-based filtering (CMP/CMPA) rules - legal-qa, legal-researcher: routine updates - analysis_docx_exporter: new service for analysis DOCX export - compose page: "הורד כ-DOCX" button for analysis - decision_template.docx: template for exporter Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-16 18:49:10 +00:00
parent 3da4d73498
commit 28daff58be
7 changed files with 665 additions and 3 deletions
--- a/scripts/convert_decision_template.py
+++ b/scripts/convert_decision_template.py
@@ -0,0 +1,102 @@
+"""Convert דפנה's decision .dotx template to a loadable .docx file.
+
+python-docx cannot open .dotx files directly (content type is
+`...template.main+xml` rather than `...document.main+xml`). This script
+produces a sibling .docx by rewriting [Content_Types].xml and dropping
+the `word/glossary/` part (which is template-specific and can interfere
+with plain Document() loading).
+
+The output preserves every style definition, numbering, fonts, and
+section properties — the only things we want from the template.
+
+Run once (or whenever the source .dotx changes):
+
+    python scripts/convert_decision_template.py
+
+Input:  data/training/טיוטת החלטה.dotx
+Output: skills/docx/decision_template.docx
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+import zipfile
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+SRC = REPO_ROOT / "data" / "training" / "טיוטת החלטה.dotx"
+DST = REPO_ROOT / "skills" / "docx" / "decision_template.docx"
+
+TEMPLATE_CONTENT_TYPE = (
+    "application/vnd.openxmlformats-officedocument."
+    "wordprocessingml.template.main+xml"
+)
+DOCUMENT_CONTENT_TYPE = (
+    "application/vnd.openxmlformats-officedocument."
+    "wordprocessingml.document.main+xml"
+)
+
+
+def convert(src: Path, dst: Path) -> None:
+    if not src.exists():
+        raise FileNotFoundError(f"Template not found: {src}")
+    dst.parent.mkdir(parents=True, exist_ok=True)
+
+    with zipfile.ZipFile(src, "r") as zin:
+        names = zin.namelist()
+        with zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout:
+            for name in names:
+                # Drop glossary part — template-only, confuses Document()
+                if name.startswith("word/glossary/"):
+                    continue
+                data = zin.read(name)
+                if name == "[Content_Types].xml":
+                    text = data.decode("utf-8")
+                    text = text.replace(
+                        TEMPLATE_CONTENT_TYPE, DOCUMENT_CONTENT_TYPE
+                    )
+                    # Drop every <Override> that points at /word/glossary/...
+                    text = re.sub(
+                        r'<Override\s+PartName="/word/glossary/[^"]*"[^>]*?/>',
+                        "",
+                        text,
+                    )
+                    data = text.encode("utf-8")
+                elif name == "word/_rels/document.xml.rels":
+                    # Strip the glossaryDocument relationship — the target
+                    # part is being removed, so the ref would dangle.
+                    text = data.decode("utf-8")
+                    text = re.sub(
+                        r'<Relationship\s+[^>]*?glossaryDocument[^>]*?/>',
+                        "",
+                        text,
+                    )
+                    data = text.encode("utf-8")
+                zout.writestr(name, data)
+
+
+def verify(dst: Path) -> None:
+    """Load with python-docx and print a few style names to confirm it works."""
+    from docx import Document
+
+    doc = Document(str(dst))
+    key_styles = {"Normal", "Heading 2", "Quote", "List Paragraph", "Title"}
+    found = {s.name for s in doc.styles if s.name in key_styles}
+    missing = key_styles - found
+    if missing:
+        print(f"WARN: missing styles: {missing}", file=sys.stderr)
+    else:
+        print(f"OK — all key styles present: {sorted(found)}")
+
+
+def main() -> None:
+    print(f"Source: {SRC}")
+    print(f"Dest:   {DST}")
+    convert(SRC, DST)
+    print(f"Wrote {DST.stat().st_size:,} bytes")
+    verify(DST)
+
+
+if __name__ == "__main__":
+    main()