"""Small DOCX writer built with the Python standard library.
The writer intentionally supports only the structures this platform needs:
headings, paragraphs, bullet lists, simple tables, and page breaks. That keeps
the reporting pipeline portable inside operational mirrors where optional DOCX
libraries may not be installed.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable, Sequence
from zipfile import ZIP_DEFLATED, ZipFile
import html
WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships"
DOC_REL_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
def esc(value: object) -> str:
return html.escape(str(value), quote=True)
def tag(name: str, content: str = "", attrs: dict[str, str] | None = None) -> str:
attr_text = ""
if attrs:
attr_text = " " + " ".join(f'{key}="{esc(value)}"' for key, value in attrs.items())
return f"<{name}{attr_text}>{content}{name}>"
def empty_tag(name: str, attrs: dict[str, str] | None = None) -> str:
attr_text = ""
if attrs:
attr_text = " " + " ".join(f'{key}="{esc(value)}"' for key, value in attrs.items())
return f"<{name}{attr_text}/>"
def text_run(text: str, bold: bool = False, italic: bool = False) -> str:
props = ""
if bold or italic:
inner = ""
if bold:
inner += empty_tag("w:b")
if italic:
inner += empty_tag("w:i")
props = tag("w:rPr", inner)
preserve = {"xml:space": "preserve"} if text.startswith(" ") or text.endswith(" ") else None
return tag("w:r", props + tag("w:t", esc(text), preserve))
def paragraph_xml(text: str = "", style: str | None = None, bullet: bool = False) -> str:
props = ""
if style:
props += tag("w:pStyle", "", {"w:val": style})
if bullet:
props += tag("w:numPr", tag("w:ilvl", "", {"w:val": "0"}) + tag("w:numId", "", {"w:val": "1"}))
prop_xml = tag("w:pPr", props) if props else ""
return tag("w:p", prop_xml + text_run(text))
def heading_xml(text: str, level: int) -> str:
level = max(1, min(4, int(level)))
return paragraph_xml(text, style=f"Heading{level}")
def page_break_xml() -> str:
return tag("w:p", tag("w:r", empty_tag("w:br", {"w:type": "page"})))
def cell_xml(value: str) -> str:
props = tag("w:tcPr", tag("w:tcW", "", {"w:w": "2400", "w:type": "dxa"}))
return tag("w:tc", props + paragraph_xml(value))
def row_xml(values: Sequence[str]) -> str:
return tag("w:tr", "".join(cell_xml(value) for value in values))
def table_xml(headers: Sequence[str], rows: Sequence[Sequence[str]]) -> str:
border = empty_tag("w:top", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
border += empty_tag("w:left", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
border += empty_tag("w:bottom", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
border += empty_tag("w:right", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
border += empty_tag("w:insideH", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
border += empty_tag("w:insideV", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
props = tag("w:tblPr", tag("w:tblBorders", border))
body = row_xml(headers) + "".join(row_xml([str(cell) for cell in row]) for row in rows)
return tag("w:tbl", props + body)
@dataclass(slots=True)
class DocxElement:
kind: str
text: str = ""
level: int = 1
headers: tuple[str, ...] = ()
rows: tuple[tuple[str, ...], ...] = ()
@dataclass
class DocxDocument:
"""Minimal document model that can be written to a .docx file."""
title: str
subject: str = "Tudo Para IA Mais Humana"
creator: str = "mais_humana"
elements: list[DocxElement] = field(default_factory=list)
def heading(self, text: str, level: int = 1) -> None:
self.elements.append(DocxElement(kind="heading", text=text, level=level))
def paragraph(self, text: str = "") -> None:
self.elements.append(DocxElement(kind="paragraph", text=text))
def bullet(self, text: str) -> None:
self.elements.append(DocxElement(kind="bullet", text=text))
def table(self, headers: Sequence[str], rows: Sequence[Sequence[object]]) -> None:
normalized_rows = tuple(tuple(str(value) for value in row) for row in rows)
self.elements.append(DocxElement(kind="table", headers=tuple(headers), rows=normalized_rows))
def page_break(self) -> None:
self.elements.append(DocxElement(kind="page_break"))
def extend_paragraphs(self, lines: Iterable[str]) -> None:
for line in lines:
stripped = str(line).strip()
if not stripped:
self.paragraph("")
elif stripped.endswith(":") and len(stripped) < 80:
self.heading(stripped[:-1], 2)
else:
self.paragraph(stripped)
def document_body(self) -> str:
body = [heading_xml(self.title, 1)]
for element in self.elements:
if element.kind == "heading":
body.append(heading_xml(element.text, element.level))
elif element.kind == "paragraph":
body.append(paragraph_xml(element.text))
elif element.kind == "bullet":
body.append(paragraph_xml(element.text, bullet=True))
elif element.kind == "table":
body.append(table_xml(element.headers, element.rows))
elif element.kind == "page_break":
body.append(page_break_xml())
section_props = tag(
"w:sectPr",
empty_tag("w:pgSz", {"w:w": "11906", "w:h": "16838"})
+ empty_tag("w:pgMar", {"w:top": "1440", "w:right": "1080", "w:bottom": "1440", "w:left": "1080"}),
)
return (
''
f''
+ tag("w:body", "".join(body) + section_props)
+ ""
)
def core_properties(self) -> str:
return (
''
''
f"{esc(self.title)}"
f"{esc(self.subject)}"
f"{esc(self.creator)}"
"mais_humana"
""
)
def styles(self) -> str:
styles = [
style_xml("Normal", "paragraph", "Normal", "Calibri", 22),
style_xml("Heading1", "paragraph", "heading 1", "Aptos Display", 32, bold=True),
style_xml("Heading2", "paragraph", "heading 2", "Aptos Display", 26, bold=True),
style_xml("Heading3", "paragraph", "heading 3", "Aptos Display", 23, bold=True),
style_xml("Heading4", "paragraph", "heading 4", "Aptos Display", 21, bold=True),
]
numbering = (
''
''
''
''
""
)
return (
''
f'' + "".join(styles) + ""
), numbering
def write(self, path: Path) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
styles, numbering = self.styles()
with ZipFile(path, "w", compression=ZIP_DEFLATED) as archive:
archive.writestr("[Content_Types].xml", content_types_xml())
archive.writestr("_rels/.rels", package_relationships_xml())
archive.writestr("docProps/core.xml", self.core_properties())
archive.writestr("word/document.xml", self.document_body())
archive.writestr("word/styles.xml", styles)
archive.writestr("word/numbering.xml", numbering)
archive.writestr("word/_rels/document.xml.rels", document_relationships_xml())
return path
def style_xml(style_id: str, style_type: str, name: str, font: str, size: int, bold: bool = False) -> str:
run_props = tag("w:rFonts", "", {"w:ascii": font, "w:hAnsi": font})
run_props += tag("w:sz", "", {"w:val": str(size)})
if bold:
run_props += empty_tag("w:b")
return tag(
"w:style",
tag("w:name", "", {"w:val": name}) + tag("w:rPr", run_props),
{"w:type": style_type, "w:styleId": style_id},
)
def content_types_xml() -> str:
return (
''
''
''
''
''
''
''
''
""
)
def package_relationships_xml() -> str:
return (
''
f''
f''
''
""
)
def document_relationships_xml() -> str:
return (
''
f''
f''
f''
""
)
def write_lines_docx(path: Path, title: str, lines: Sequence[str]) -> Path:
doc = DocxDocument(title=title)
for line in lines:
clean = str(line).strip()
if not clean:
doc.paragraph("")
elif len(clean) < 90 and not clean.endswith(".") and ":" not in clean:
doc.heading(clean, 2)
elif clean.startswith("- "):
doc.bullet(clean[2:])
else:
doc.paragraph(clean)
return doc.write(path)