feat: fundar plataforma mais humana
This commit is contained in:
270
src/mais_humana/docx_writer.py
Normal file
270
src/mais_humana/docx_writer.py
Normal file
@@ -0,0 +1,270 @@
|
||||
"""Small DOCX writer built with the Python standard library.
|
||||
|
||||
The writer intentionally supports only the structures this platform needs:
|
||||
headings, paragraphs, bullet lists, simple tables, and page breaks. That keeps
|
||||
the reporting pipeline portable inside operational mirrors where optional DOCX
|
||||
libraries may not be installed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Sequence
|
||||
from zipfile import ZIP_DEFLATED, ZipFile
|
||||
import html
|
||||
|
||||
|
||||
WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||
REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships"
|
||||
DOC_REL_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
|
||||
|
||||
def esc(value: object) -> str:
|
||||
return html.escape(str(value), quote=True)
|
||||
|
||||
|
||||
def tag(name: str, content: str = "", attrs: dict[str, str] | None = None) -> str:
|
||||
attr_text = ""
|
||||
if attrs:
|
||||
attr_text = " " + " ".join(f'{key}="{esc(value)}"' for key, value in attrs.items())
|
||||
return f"<{name}{attr_text}>{content}</{name}>"
|
||||
|
||||
|
||||
def empty_tag(name: str, attrs: dict[str, str] | None = None) -> str:
|
||||
attr_text = ""
|
||||
if attrs:
|
||||
attr_text = " " + " ".join(f'{key}="{esc(value)}"' for key, value in attrs.items())
|
||||
return f"<{name}{attr_text}/>"
|
||||
|
||||
|
||||
def text_run(text: str, bold: bool = False, italic: bool = False) -> str:
|
||||
props = ""
|
||||
if bold or italic:
|
||||
inner = ""
|
||||
if bold:
|
||||
inner += empty_tag("w:b")
|
||||
if italic:
|
||||
inner += empty_tag("w:i")
|
||||
props = tag("w:rPr", inner)
|
||||
preserve = {"xml:space": "preserve"} if text.startswith(" ") or text.endswith(" ") else None
|
||||
return tag("w:r", props + tag("w:t", esc(text), preserve))
|
||||
|
||||
|
||||
def paragraph_xml(text: str = "", style: str | None = None, bullet: bool = False) -> str:
|
||||
props = ""
|
||||
if style:
|
||||
props += tag("w:pStyle", "", {"w:val": style})
|
||||
if bullet:
|
||||
props += tag("w:numPr", tag("w:ilvl", "", {"w:val": "0"}) + tag("w:numId", "", {"w:val": "1"}))
|
||||
prop_xml = tag("w:pPr", props) if props else ""
|
||||
return tag("w:p", prop_xml + text_run(text))
|
||||
|
||||
|
||||
def heading_xml(text: str, level: int) -> str:
|
||||
level = max(1, min(4, int(level)))
|
||||
return paragraph_xml(text, style=f"Heading{level}")
|
||||
|
||||
|
||||
def page_break_xml() -> str:
|
||||
return tag("w:p", tag("w:r", empty_tag("w:br", {"w:type": "page"})))
|
||||
|
||||
|
||||
def cell_xml(value: str) -> str:
|
||||
props = tag("w:tcPr", tag("w:tcW", "", {"w:w": "2400", "w:type": "dxa"}))
|
||||
return tag("w:tc", props + paragraph_xml(value))
|
||||
|
||||
|
||||
def row_xml(values: Sequence[str]) -> str:
|
||||
return tag("w:tr", "".join(cell_xml(value) for value in values))
|
||||
|
||||
|
||||
def table_xml(headers: Sequence[str], rows: Sequence[Sequence[str]]) -> str:
|
||||
border = empty_tag("w:top", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
|
||||
border += empty_tag("w:left", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
|
||||
border += empty_tag("w:bottom", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
|
||||
border += empty_tag("w:right", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
|
||||
border += empty_tag("w:insideH", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
|
||||
border += empty_tag("w:insideV", {"w:val": "single", "w:sz": "4", "w:space": "0", "w:color": "B8C0CC"})
|
||||
props = tag("w:tblPr", tag("w:tblBorders", border))
|
||||
body = row_xml(headers) + "".join(row_xml([str(cell) for cell in row]) for row in rows)
|
||||
return tag("w:tbl", props + body)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class DocxElement:
|
||||
kind: str
|
||||
text: str = ""
|
||||
level: int = 1
|
||||
headers: tuple[str, ...] = ()
|
||||
rows: tuple[tuple[str, ...], ...] = ()
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocxDocument:
|
||||
"""Minimal document model that can be written to a .docx file."""
|
||||
|
||||
title: str
|
||||
subject: str = "Tudo Para IA Mais Humana"
|
||||
creator: str = "mais_humana"
|
||||
elements: list[DocxElement] = field(default_factory=list)
|
||||
|
||||
def heading(self, text: str, level: int = 1) -> None:
|
||||
self.elements.append(DocxElement(kind="heading", text=text, level=level))
|
||||
|
||||
def paragraph(self, text: str = "") -> None:
|
||||
self.elements.append(DocxElement(kind="paragraph", text=text))
|
||||
|
||||
def bullet(self, text: str) -> None:
|
||||
self.elements.append(DocxElement(kind="bullet", text=text))
|
||||
|
||||
def table(self, headers: Sequence[str], rows: Sequence[Sequence[object]]) -> None:
|
||||
normalized_rows = tuple(tuple(str(value) for value in row) for row in rows)
|
||||
self.elements.append(DocxElement(kind="table", headers=tuple(headers), rows=normalized_rows))
|
||||
|
||||
def page_break(self) -> None:
|
||||
self.elements.append(DocxElement(kind="page_break"))
|
||||
|
||||
def extend_paragraphs(self, lines: Iterable[str]) -> None:
|
||||
for line in lines:
|
||||
stripped = str(line).strip()
|
||||
if not stripped:
|
||||
self.paragraph("")
|
||||
elif stripped.endswith(":") and len(stripped) < 80:
|
||||
self.heading(stripped[:-1], 2)
|
||||
else:
|
||||
self.paragraph(stripped)
|
||||
|
||||
def document_body(self) -> str:
|
||||
body = [heading_xml(self.title, 1)]
|
||||
for element in self.elements:
|
||||
if element.kind == "heading":
|
||||
body.append(heading_xml(element.text, element.level))
|
||||
elif element.kind == "paragraph":
|
||||
body.append(paragraph_xml(element.text))
|
||||
elif element.kind == "bullet":
|
||||
body.append(paragraph_xml(element.text, bullet=True))
|
||||
elif element.kind == "table":
|
||||
body.append(table_xml(element.headers, element.rows))
|
||||
elif element.kind == "page_break":
|
||||
body.append(page_break_xml())
|
||||
section_props = tag(
|
||||
"w:sectPr",
|
||||
empty_tag("w:pgSz", {"w:w": "11906", "w:h": "16838"})
|
||||
+ empty_tag("w:pgMar", {"w:top": "1440", "w:right": "1080", "w:bottom": "1440", "w:left": "1080"}),
|
||||
)
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
f'<w:document xmlns:w="{WORD_NS}">'
|
||||
+ tag("w:body", "".join(body) + section_props)
|
||||
+ "</w:document>"
|
||||
)
|
||||
|
||||
def core_properties(self) -> str:
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" '
|
||||
'xmlns:dc="http://purl.org/dc/elements/1.1/" '
|
||||
'xmlns:dcterms="http://purl.org/dc/terms/" '
|
||||
'xmlns:dcmitype="http://purl.org/dc/dcmitype/" '
|
||||
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">'
|
||||
f"<dc:title>{esc(self.title)}</dc:title>"
|
||||
f"<dc:subject>{esc(self.subject)}</dc:subject>"
|
||||
f"<dc:creator>{esc(self.creator)}</dc:creator>"
|
||||
"<cp:lastModifiedBy>mais_humana</cp:lastModifiedBy>"
|
||||
"</cp:coreProperties>"
|
||||
)
|
||||
|
||||
def styles(self) -> str:
|
||||
styles = [
|
||||
style_xml("Normal", "paragraph", "Normal", "Calibri", 22),
|
||||
style_xml("Heading1", "paragraph", "heading 1", "Aptos Display", 32, bold=True),
|
||||
style_xml("Heading2", "paragraph", "heading 2", "Aptos Display", 26, bold=True),
|
||||
style_xml("Heading3", "paragraph", "heading 3", "Aptos Display", 23, bold=True),
|
||||
style_xml("Heading4", "paragraph", "heading 4", "Aptos Display", 21, bold=True),
|
||||
]
|
||||
numbering = (
|
||||
'<w:numbering xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
|
||||
'<w:abstractNum w:abstractNumId="1">'
|
||||
'<w:lvl w:ilvl="0"><w:start w:val="1"/><w:numFmt w:val="bullet"/>'
|
||||
'<w:lvlText w:val="*"/><w:lvlJc w:val="left"/></w:lvl>'
|
||||
"</w:abstractNum><w:num w:numId=\"1\"><w:abstractNumId w:val=\"1\"/></w:num></w:numbering>"
|
||||
)
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
f'<w:styles xmlns:w="{WORD_NS}">' + "".join(styles) + "</w:styles>"
|
||||
), numbering
|
||||
|
||||
def write(self, path: Path) -> Path:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
styles, numbering = self.styles()
|
||||
with ZipFile(path, "w", compression=ZIP_DEFLATED) as archive:
|
||||
archive.writestr("[Content_Types].xml", content_types_xml())
|
||||
archive.writestr("_rels/.rels", package_relationships_xml())
|
||||
archive.writestr("docProps/core.xml", self.core_properties())
|
||||
archive.writestr("word/document.xml", self.document_body())
|
||||
archive.writestr("word/styles.xml", styles)
|
||||
archive.writestr("word/numbering.xml", numbering)
|
||||
archive.writestr("word/_rels/document.xml.rels", document_relationships_xml())
|
||||
return path
|
||||
|
||||
|
||||
def style_xml(style_id: str, style_type: str, name: str, font: str, size: int, bold: bool = False) -> str:
|
||||
run_props = tag("w:rFonts", "", {"w:ascii": font, "w:hAnsi": font})
|
||||
run_props += tag("w:sz", "", {"w:val": str(size)})
|
||||
if bold:
|
||||
run_props += empty_tag("w:b")
|
||||
return tag(
|
||||
"w:style",
|
||||
tag("w:name", "", {"w:val": name}) + tag("w:rPr", run_props),
|
||||
{"w:type": style_type, "w:styleId": style_id},
|
||||
)
|
||||
|
||||
|
||||
def content_types_xml() -> str:
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
|
||||
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
|
||||
'<Default Extension="xml" ContentType="application/xml"/>'
|
||||
'<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
|
||||
'<Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>'
|
||||
'<Override PartName="/word/numbering.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>'
|
||||
'<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>'
|
||||
"</Types>"
|
||||
)
|
||||
|
||||
|
||||
def package_relationships_xml() -> str:
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
f'<Relationships xmlns="{REL_NS}">'
|
||||
f'<Relationship Id="rId1" Type="{DOC_REL_NS}/officeDocument" Target="word/document.xml"/>'
|
||||
'<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>'
|
||||
"</Relationships>"
|
||||
)
|
||||
|
||||
|
||||
def document_relationships_xml() -> str:
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
||||
f'<Relationships xmlns="{REL_NS}">'
|
||||
f'<Relationship Id="rId1" Type="{DOC_REL_NS}/styles" Target="styles.xml"/>'
|
||||
f'<Relationship Id="rId2" Type="{DOC_REL_NS}/numbering" Target="numbering.xml"/>'
|
||||
"</Relationships>"
|
||||
)
|
||||
|
||||
|
||||
def write_lines_docx(path: Path, title: str, lines: Sequence[str]) -> Path:
|
||||
doc = DocxDocument(title=title)
|
||||
for line in lines:
|
||||
clean = str(line).strip()
|
||||
if not clean:
|
||||
doc.paragraph("")
|
||||
elif len(clean) < 90 and not clean.endswith(".") and ":" not in clean:
|
||||
doc.heading(clean, 2)
|
||||
elif clean.startswith("- "):
|
||||
doc.bullet(clean[2:])
|
||||
else:
|
||||
doc.paragraph(clean)
|
||||
return doc.write(path)
|
||||
Reference in New Issue
Block a user