feat: fundar plataforma mais humana

2026-04-30 06:42:00 -03:00
commit c9c1056193
183 changed files with 639629 additions and 0 deletions
--- a/src/mais_humana/redaction.py
+++ b/src/mais_humana/redaction.py
@@ -0,0 +1,127 @@
+"""Secret and sensitive-text checks for generated human artifacts."""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, Sequence
+
+from .models import as_plain_data
+
+
+SECRET_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = (
+    ("generic_token_assignment", re.compile(r"(?i)\b(token|secret|password|api[_-]?key)\s*[:=]\s*['\"]?[A-Za-z0-9_\-]{16,}")),
+    ("bearer_token", re.compile(r"(?i)\bbearer\s+[A-Za-z0-9_\-\.]{20,}")),
+    (
+        "cloudflare_token_assignment",
+        re.compile(r"(?i)\b(cloudflare[_-]?(api[_-]?)?token|cf[_-]?token)\b\s*[:=]\s*['\"]?[A-Za-z0-9_\-]{24,}"),
+    ),
+    ("private_key", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")),
+    ("connection_string", re.compile(r"(?i)\b(postgres|mysql|mongodb|redis)://[^\\s]+")),
+)
+
+ALLOWLIST_TERMS = {
+    "credentialRef",
+    "secretRef",
+    "tokenRef",
+    "redaction",
+    "sem segredo",
+    "nao vazar",
+}
+
+
+@dataclass(slots=True)
+class RedactionFinding:
+    path: str
+    pattern_id: str
+    line: int
+    sample: str
+    severity: str
+    recommendation: str
+
+    def to_dict(self) -> dict[str, object]:
+        return as_plain_data(self)
+
+
+@dataclass(slots=True)
+class RedactionReport:
+    scanned_files: int
+    findings: tuple[RedactionFinding, ...]
+    passed: bool
+
+    def to_dict(self) -> dict[str, object]:
+        return as_plain_data(self)
+
+
+def is_allowlisted(line: str) -> bool:
+    lowered = line.lower()
+    return any(term.lower() in lowered for term in ALLOWLIST_TERMS)
+
+
+def scan_text_for_secrets(path: str, text: str) -> tuple[RedactionFinding, ...]:
+    findings: list[RedactionFinding] = []
+    for line_number, line in enumerate(text.splitlines(), start=1):
+        if is_allowlisted(line):
+            continue
+        for pattern_id, pattern in SECRET_PATTERNS:
+            match = pattern.search(line)
+            if not match:
+                continue
+            sample = match.group(0)
+            if len(sample) > 90:
+                sample = sample[:87] + "..."
+            severity = "critical" if pattern_id in {"private_key", "connection_string"} else "warning"
+            findings.append(
+                RedactionFinding(
+                    path=path,
+                    pattern_id=pattern_id,
+                    line=line_number,
+                    sample=sample,
+                    severity=severity,
+                    recommendation="Substituir valor sensivel por referencia opaca e registrar apenas credentialRef/secretRef.",
+                )
+            )
+    return tuple(findings)
+
+
+def iter_text_files(root: Path, suffixes: Sequence[str] = (".md", ".json", ".csv", ".html", ".txt")) -> Iterable[Path]:
+    if not root.exists():
+        return
+    for path in root.rglob("*"):
+        if path.is_file() and path.suffix.lower() in suffixes:
+            if any(part in {".git", ".test-tmp", "__pycache__", "node_modules", "dist", "build"} for part in path.parts):
+                continue
+            yield path
+
+
+def scan_generated_artifacts(root: Path) -> RedactionReport:
+    findings: list[RedactionFinding] = []
+    count = 0
+    for path in iter_text_files(root):
+        count += 1
+        try:
+            text = path.read_text(encoding="utf-8", errors="ignore")
+        except OSError:
+            continue
+        findings.extend(scan_text_for_secrets(str(path), text))
+    return RedactionReport(scanned_files=count, findings=tuple(findings), passed=not findings)
+
+
+def redaction_markdown(report: RedactionReport) -> str:
+    lines = ["# Redaction Check Mais Humana", ""]
+    lines.append(f"- arquivos varridos: `{report.scanned_files}`")
+    lines.append(f"- passou: `{report.passed}`")
+    lines.append(f"- achados: `{len(report.findings)}`")
+    lines.append("")
+    if report.findings:
+        lines.append("## Achados")
+        lines.append("")
+        for finding in report.findings:
+            lines.append(
+                f"- `{finding.severity}` {finding.path}:{finding.line} "
+                f"({finding.pattern_id}) - {finding.recommendation}"
+            )
+    else:
+        lines.append("Nenhum segredo aparente encontrado nos artefatos textuais gerados.")
+    return "\n".join(lines).strip() + "\n"