"""Small DOCX writer built with the Python standard library. The writer intentionally supports only the structures this platform needs: headings, paragraphs, bullet lists, simple tables, and page breaks. That keeps the reporting pipeline portable inside operational mirrors where optional DOCX libraries may not be installed. """ from __future__ import annotations from dataclasses import dataclass, field from pathlib import Path from typing import Iterable, Sequence from zipfile import ZIP_DEFLATED, ZipFile import html WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships" DOC_REL_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" def esc(value: object) -> str: return html.escape(str(value), quote=True) def tag(name: str, content: str = "", attrs: dict[str, str] | None = None) -> str: attr_text = "" if attrs: attr_text = " " + " ".join(f'{key}="{esc(value)}"' for key, value in attrs.items()) return f"<{name}{attr_text}>{content}" def empty_tag(name: str, attrs: dict[str, str] | None = None) -> str: attr_text = "" if attrs: attr_text = " " + " ".join(f'{key}="{esc(value)}"' for key, value in attrs.items()) return f"<{name}{attr_text}/>" def text_run(text: str, bold: bool = False, italic: bool = False) -> str: props = "" if bold or italic: inner = "" if bold: inner += empty_tag("w:b") if italic: inner += empty_tag("w:i") props = tag("w:rPr", inner) preserve = {"xml:space": "preserve"} if text.startswith(" ") or text.endswith(" ") else None return tag("w:r", props + tag("w:t", esc(text), preserve)) def paragraph_xml(text: str = "", style: str | None = None, bullet: bool = False) -> str: props = "" if style: props += tag("w:pStyle", "", {"w:val": style}) if bullet: props += tag("w:numPr", tag("w:ilvl", "", {"w:val": "0"}) + tag("w:numId", "", {"w:val": "1"})) prop_xml = tag("w:pPr", props) if props else "" return tag("w:p", prop_xml + text_run(text)) def heading_xml(text: str, level: int) -> str: level = max(1, min(4, int(level))) return paragraph_xml(text, style=f"Heading{level}") def page_break_xml() -> str: return tag("w:p", tag("w:r", empty_tag("w:br", {"w:type": "page"}))) def cell_xml(value: str) -> str: props = tag("w:tcPr", tag("w:tcW", "", {"w:w": "2400", "w:type": "dxa"})) return tag("w:tc", props + paragraph_xml(value)) def row_xml(values: Sequence[str]) -> str: return tag("w:tr", "".join(cell_xml(value) for value in values)) def table_xml(headers: Sequence[str], rows: Sequence[Sequence[str]]) -> str: border = empty_tag("w:top", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"}) border += empty_tag("w:left", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"}) border += empty_tag("w:bottom", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"}) border += empty_tag("w:right", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"}) border += empty_tag("w:insideH", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"}) border += empty_tag("w:insideV", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"}) props = tag("w:tblPr", tag("w:tblBorders", border)) body = row_xml(headers) + "".join(row_xml([str(cell) for cell in row]) for row in rows) return tag("w:tbl", props + body) @dataclass(slots=True) class DocxElement: kind: str text: str = "" level: int = 1 headers: tuple[str, ...] = () rows: tuple[tuple[str, ...], ...] = () @dataclass class DocxDocument: """Minimal document model that can be written to a .docx file.""" title: str subject: str = "Tudo Para IA Mais Humana" creator: str = "mais_humana" elements: list[DocxElement] = field(default_factory=list) def heading(self, text: str, level: int = 1) -> None: self.elements.append(DocxElement(kind="heading", text=text, level=level)) def paragraph(self, text: str = "") -> None: self.elements.append(DocxElement(kind="paragraph", text=text)) def bullet(self, text: str) -> None: self.elements.append(DocxElement(kind="bullet", text=text)) def table(self, headers: Sequence[str], rows: Sequence[Sequence[object]]) -> None: normalized_rows = tuple(tuple(str(value) for value in row) for row in rows) self.elements.append(DocxElement(kind="table", headers=tuple(headers), rows=normalized_rows)) def page_break(self) -> None: self.elements.append(DocxElement(kind="page_break")) def extend_paragraphs(self, lines: Iterable[str]) -> None: for line in lines: stripped = str(line).strip() if not stripped: self.paragraph("") elif stripped.endswith(":") and len(stripped) < 80: self.heading(stripped[:-1], 2) else: self.paragraph(stripped) def document_body(self) -> str: body = [heading_xml(self.title, 1)] for element in self.elements: if element.kind == "heading": body.append(heading_xml(element.text, element.level)) elif element.kind == "paragraph": body.append(paragraph_xml(element.text)) elif element.kind == "bullet": body.append(paragraph_xml(element.text, bullet=True)) elif element.kind == "table": body.append(table_xml(element.headers, element.rows)) elif element.kind == "page_break": body.append(page_break_xml()) section_props = tag( "w:sectPr", empty_tag("w:pgSz", {"w:w": "11906", "w:h": "16838"}) + empty_tag("w:pgMar", {"w:top": "1440", "w:right": "1080", "w:bottom": "1440", "w:left": "1080"}), ) return ( '' f'' + tag("w:body", "".join(body) + section_props) + "" ) def core_properties(self) -> str: return ( '' '' f"{esc(self.title)}" f"{esc(self.subject)}" f"{esc(self.creator)}" "mais_humana" "" ) def styles(self) -> str: styles = [ style_xml("Normal", "paragraph", "Normal", "Calibri", 22), style_xml("Heading1", "paragraph", "heading 1", "Aptos Display", 32, bold=True), style_xml("Heading2", "paragraph", "heading 2", "Aptos Display", 26, bold=True), style_xml("Heading3", "paragraph", "heading 3", "Aptos Display", 23, bold=True), style_xml("Heading4", "paragraph", "heading 4", "Aptos Display", 21, bold=True), ] numbering = ( '' '' '' '' "" ) return ( '' f'' + "".join(styles) + "" ), numbering def write(self, path: Path) -> Path: path.parent.mkdir(parents=True, exist_ok=True) styles, numbering = self.styles() with ZipFile(path, "w", compression=ZIP_DEFLATED) as archive: archive.writestr("[Content_Types].xml", content_types_xml()) archive.writestr("_rels/.rels", package_relationships_xml()) archive.writestr("docProps/core.xml", self.core_properties()) archive.writestr("word/document.xml", self.document_body()) archive.writestr("word/styles.xml", styles) archive.writestr("word/numbering.xml", numbering) archive.writestr("word/_rels/document.xml.rels", document_relationships_xml()) return path def style_xml(style_id: str, style_type: str, name: str, font: str, size: int, bold: bool = False) -> str: run_props = tag("w:rFonts", "", {"w:ascii": font, "w:hAnsi": font}) run_props += tag("w:sz", "", {"w:val": str(size)}) if bold: run_props += empty_tag("w:b") return tag( "w:style", tag("w:name", "", {"w:val": name}) + tag("w:rPr", run_props), {"w:type": style_type, "w:styleId": style_id}, ) def content_types_xml() -> str: return ( '' '' '' '' '' '' '' '' "" ) def package_relationships_xml() -> str: return ( '' f'' f'' '' "" ) def document_relationships_xml() -> str: return ( '' f'' f'' f'' "" ) def write_lines_docx(path: Path, title: str, lines: Sequence[str]) -> Path: doc = DocxDocument(title=title) for line in lines: clean = str(line).strip() if not clean: doc.paragraph("") elif len(clean) < 90 and not clean.endswith(".") and ":" not in clean: doc.heading(clean, 2) elif clean.startswith("- "): doc.bullet(clean[2:]) else: doc.paragraph(clean) return doc.write(path)