Pre-existing agent updates + analysis DOCX export

Updates accumulated from prior sessions:
- HEARTBEAT: company-based filtering (CMP/CMPA) rules
- legal-qa, legal-researcher: routine updates
- analysis_docx_exporter: new service for analysis DOCX export
- compose page: "הורד כ-DOCX" button for analysis
- decision_template.docx: template for exporter

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-16 18:49:10 +00:00
parent 3da4d73498
commit 28daff58be
7 changed files with 665 additions and 3 deletions

View File

@@ -0,0 +1,102 @@
"""Convert דפנה's decision .dotx template to a loadable .docx file.
python-docx cannot open .dotx files directly (content type is
`...template.main+xml` rather than `...document.main+xml`). This script
produces a sibling .docx by rewriting [Content_Types].xml and dropping
the `word/glossary/` part (which is template-specific and can interfere
with plain Document() loading).
The output preserves every style definition, numbering, fonts, and
section properties — the only things we want from the template.
Run once (or whenever the source .dotx changes):
python scripts/convert_decision_template.py
Input: data/training/טיוטת החלטה.dotx
Output: skills/docx/decision_template.docx
"""
from __future__ import annotations
import re
import sys
import zipfile
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent.parent
SRC = REPO_ROOT / "data" / "training" / "טיוטת החלטה.dotx"
DST = REPO_ROOT / "skills" / "docx" / "decision_template.docx"
TEMPLATE_CONTENT_TYPE = (
"application/vnd.openxmlformats-officedocument."
"wordprocessingml.template.main+xml"
)
DOCUMENT_CONTENT_TYPE = (
"application/vnd.openxmlformats-officedocument."
"wordprocessingml.document.main+xml"
)
def convert(src: Path, dst: Path) -> None:
if not src.exists():
raise FileNotFoundError(f"Template not found: {src}")
dst.parent.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(src, "r") as zin:
names = zin.namelist()
with zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout:
for name in names:
# Drop glossary part — template-only, confuses Document()
if name.startswith("word/glossary/"):
continue
data = zin.read(name)
if name == "[Content_Types].xml":
text = data.decode("utf-8")
text = text.replace(
TEMPLATE_CONTENT_TYPE, DOCUMENT_CONTENT_TYPE
)
# Drop every <Override> that points at /word/glossary/...
text = re.sub(
r'<Override\s+PartName="/word/glossary/[^"]*"[^>]*?/>',
"",
text,
)
data = text.encode("utf-8")
elif name == "word/_rels/document.xml.rels":
# Strip the glossaryDocument relationship — the target
# part is being removed, so the ref would dangle.
text = data.decode("utf-8")
text = re.sub(
r'<Relationship\s+[^>]*?glossaryDocument[^>]*?/>',
"",
text,
)
data = text.encode("utf-8")
zout.writestr(name, data)
def verify(dst: Path) -> None:
"""Load with python-docx and print a few style names to confirm it works."""
from docx import Document
doc = Document(str(dst))
key_styles = {"Normal", "Heading 2", "Quote", "List Paragraph", "Title"}
found = {s.name for s in doc.styles if s.name in key_styles}
missing = key_styles - found
if missing:
print(f"WARN: missing styles: {missing}", file=sys.stderr)
else:
print(f"OK — all key styles present: {sorted(found)}")
def main() -> None:
print(f"Source: {SRC}")
print(f"Dest: {DST}")
convert(SRC, DST)
print(f"Wrote {DST.stat().st_size:,} bytes")
verify(DST)
if __name__ == "__main__":
main()