"""Searchable evidence index for generated human reports.""" from __future__ import annotations from dataclasses import dataclass from typing import Iterable, Sequence from .models import Evidence, EvidenceKind, PlatformScan, as_plain_data @dataclass(slots=True) class EvidenceRecord: evidence_id: str platform_id: str kind: str reference: str summary: str confidence: float tags: tuple[str, ...] def to_dict(self) -> dict[str, object]: return as_plain_data(self) @dataclass(slots=True) class EvidenceQuery: platform_id: str | None = None kind: str | None = None min_confidence: float = 0.0 tag: str | None = None text: str | None = None def normalize_kind(kind: EvidenceKind | str) -> str: return kind.value if isinstance(kind, EvidenceKind) else str(kind) def record_from_evidence(platform_id: str, evidence: Evidence, index: int) -> EvidenceRecord: return EvidenceRecord( evidence_id=f"{platform_id}-{index:04d}-{normalize_kind(evidence.kind)}", platform_id=platform_id, kind=normalize_kind(evidence.kind), reference=evidence.reference, summary=evidence.summary, confidence=evidence.confidence, tags=tuple(evidence.tags), ) def build_evidence_index(scans: Sequence[PlatformScan]) -> tuple[EvidenceRecord, ...]: records: list[EvidenceRecord] = [] for scan in scans: for index, evidence in enumerate(scan.evidence, start=1): records.append(record_from_evidence(scan.platform.platform_id, evidence, index)) records.sort(key=lambda item: (item.platform_id, item.kind, -item.confidence, item.reference)) return tuple(records) def query_evidence(records: Sequence[EvidenceRecord], query: EvidenceQuery) -> tuple[EvidenceRecord, ...]: output: list[EvidenceRecord] = [] text = query.text.lower() if query.text else None for record in records: if query.platform_id and record.platform_id != query.platform_id: continue if query.kind and record.kind != query.kind: continue if record.confidence < query.min_confidence: continue if query.tag and query.tag not in record.tags: continue if text and text not in f"{record.summary} {record.reference} {' '.join(record.tags)}".lower(): continue output.append(record) return tuple(output) def evidence_counts_by_platform(records: Sequence[EvidenceRecord]) -> dict[str, int]: counts: dict[str, int] = {} for record in records: counts[record.platform_id] = counts.get(record.platform_id, 0) + 1 return dict(sorted(counts.items())) def evidence_counts_by_kind(records: Sequence[EvidenceRecord]) -> dict[str, int]: counts: dict[str, int] = {} for record in records: counts[record.kind] = counts.get(record.kind, 0) + 1 return dict(sorted(counts.items(), key=lambda item: (-item[1], item[0]))) def strongest_evidence(records: Sequence[EvidenceRecord], limit: int = 25) -> tuple[EvidenceRecord, ...]: ordered = sorted(records, key=lambda item: (-item.confidence, item.platform_id, item.kind)) return tuple(ordered[:limit]) def weakest_evidence(records: Sequence[EvidenceRecord], limit: int = 25) -> tuple[EvidenceRecord, ...]: ordered = sorted(records, key=lambda item: (item.confidence, item.platform_id, item.kind)) return tuple(ordered[:limit]) def evidence_markdown(records: Sequence[EvidenceRecord]) -> str: lines = ["# Indice de evidencias humanas", ""] lines.append(f"Total de evidencias: `{len(records)}`") lines.append("") lines.append("## Por plataforma") lines.append("") for platform_id, count in evidence_counts_by_platform(records).items(): lines.append(f"- {platform_id}: {count}") lines.append("") lines.append("## Por tipo") lines.append("") for kind, count in evidence_counts_by_kind(records).items(): lines.append(f"- {kind}: {count}") lines.append("") lines.append("## Evidencias fortes") lines.append("") for record in strongest_evidence(records, limit=30): lines.append(f"- `{record.platform_id}` `{record.kind}` {record.confidence:.2f}: {record.reference} - {record.summary}") lines.append("") lines.append("## Evidencias fracas para revisar") lines.append("") for record in weakest_evidence(records, limit=20): lines.append(f"- `{record.platform_id}` `{record.kind}` {record.confidence:.2f}: {record.reference} - {record.summary}") return "\n".join(lines).strip() + "\n" def evidence_records_for_platform(records: Sequence[EvidenceRecord], platform_id: str) -> tuple[EvidenceRecord, ...]: return query_evidence(records, EvidenceQuery(platform_id=platform_id)) def evidence_records_for_human_surface(records: Sequence[EvidenceRecord]) -> tuple[EvidenceRecord, ...]: kinds = {"ui_surface", "mcp_tool", "observability", "business_rule", "security"} return tuple(record for record in records if record.kind in kinds) def evidence_gap_summary(records: Sequence[EvidenceRecord], expected_platforms: Iterable[str]) -> tuple[str, ...]: counts = evidence_counts_by_platform(records) gaps: list[str] = [] for platform_id in expected_platforms: count = counts.get(platform_id, 0) if count == 0: gaps.append(f"{platform_id}: nenhuma evidencia") elif count < 5: gaps.append(f"{platform_id}: poucas evidencias ({count})") return tuple(gaps)