Pre-existing agent updates + analysis DOCX export
Updates accumulated from prior sessions: - HEARTBEAT: company-based filtering (CMP/CMPA) rules - legal-qa, legal-researcher: routine updates - analysis_docx_exporter: new service for analysis DOCX export - compose page: "הורד כ-DOCX" button for analysis - decision_template.docx: template for exporter Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
102
scripts/convert_decision_template.py
Normal file
102
scripts/convert_decision_template.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""Convert דפנה's decision .dotx template to a loadable .docx file.
|
||||
|
||||
python-docx cannot open .dotx files directly (content type is
|
||||
`...template.main+xml` rather than `...document.main+xml`). This script
|
||||
produces a sibling .docx by rewriting [Content_Types].xml and dropping
|
||||
the `word/glossary/` part (which is template-specific and can interfere
|
||||
with plain Document() loading).
|
||||
|
||||
The output preserves every style definition, numbering, fonts, and
|
||||
section properties — the only things we want from the template.
|
||||
|
||||
Run once (or whenever the source .dotx changes):
|
||||
|
||||
python scripts/convert_decision_template.py
|
||||
|
||||
Input: data/training/טיוטת החלטה.dotx
|
||||
Output: skills/docx/decision_template.docx
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
SRC = REPO_ROOT / "data" / "training" / "טיוטת החלטה.dotx"
|
||||
DST = REPO_ROOT / "skills" / "docx" / "decision_template.docx"
|
||||
|
||||
TEMPLATE_CONTENT_TYPE = (
|
||||
"application/vnd.openxmlformats-officedocument."
|
||||
"wordprocessingml.template.main+xml"
|
||||
)
|
||||
DOCUMENT_CONTENT_TYPE = (
|
||||
"application/vnd.openxmlformats-officedocument."
|
||||
"wordprocessingml.document.main+xml"
|
||||
)
|
||||
|
||||
|
||||
def convert(src: Path, dst: Path) -> None:
|
||||
if not src.exists():
|
||||
raise FileNotFoundError(f"Template not found: {src}")
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with zipfile.ZipFile(src, "r") as zin:
|
||||
names = zin.namelist()
|
||||
with zipfile.ZipFile(dst, "w", zipfile.ZIP_DEFLATED) as zout:
|
||||
for name in names:
|
||||
# Drop glossary part — template-only, confuses Document()
|
||||
if name.startswith("word/glossary/"):
|
||||
continue
|
||||
data = zin.read(name)
|
||||
if name == "[Content_Types].xml":
|
||||
text = data.decode("utf-8")
|
||||
text = text.replace(
|
||||
TEMPLATE_CONTENT_TYPE, DOCUMENT_CONTENT_TYPE
|
||||
)
|
||||
# Drop every <Override> that points at /word/glossary/...
|
||||
text = re.sub(
|
||||
r'<Override\s+PartName="/word/glossary/[^"]*"[^>]*?/>',
|
||||
"",
|
||||
text,
|
||||
)
|
||||
data = text.encode("utf-8")
|
||||
elif name == "word/_rels/document.xml.rels":
|
||||
# Strip the glossaryDocument relationship — the target
|
||||
# part is being removed, so the ref would dangle.
|
||||
text = data.decode("utf-8")
|
||||
text = re.sub(
|
||||
r'<Relationship\s+[^>]*?glossaryDocument[^>]*?/>',
|
||||
"",
|
||||
text,
|
||||
)
|
||||
data = text.encode("utf-8")
|
||||
zout.writestr(name, data)
|
||||
|
||||
|
||||
def verify(dst: Path) -> None:
|
||||
"""Load with python-docx and print a few style names to confirm it works."""
|
||||
from docx import Document
|
||||
|
||||
doc = Document(str(dst))
|
||||
key_styles = {"Normal", "Heading 2", "Quote", "List Paragraph", "Title"}
|
||||
found = {s.name for s in doc.styles if s.name in key_styles}
|
||||
missing = key_styles - found
|
||||
if missing:
|
||||
print(f"WARN: missing styles: {missing}", file=sys.stderr)
|
||||
else:
|
||||
print(f"OK — all key styles present: {sorted(found)}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
print(f"Source: {SRC}")
|
||||
print(f"Dest: {DST}")
|
||||
convert(SRC, DST)
|
||||
print(f"Wrote {DST.stat().st_size:,} bytes")
|
||||
verify(DST)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user