147 lines
5.4 KiB
Python
147 lines
5.4 KiB
Python
"""Searchable evidence index for generated human reports."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Iterable, Sequence
|
|
|
|
from .models import Evidence, EvidenceKind, PlatformScan, as_plain_data
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class EvidenceRecord:
|
|
evidence_id: str
|
|
platform_id: str
|
|
kind: str
|
|
reference: str
|
|
summary: str
|
|
confidence: float
|
|
tags: tuple[str, ...]
|
|
|
|
def to_dict(self) -> dict[str, object]:
|
|
return as_plain_data(self)
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class EvidenceQuery:
|
|
platform_id: str | None = None
|
|
kind: str | None = None
|
|
min_confidence: float = 0.0
|
|
tag: str | None = None
|
|
text: str | None = None
|
|
|
|
|
|
def normalize_kind(kind: EvidenceKind | str) -> str:
|
|
return kind.value if isinstance(kind, EvidenceKind) else str(kind)
|
|
|
|
|
|
def record_from_evidence(platform_id: str, evidence: Evidence, index: int) -> EvidenceRecord:
|
|
return EvidenceRecord(
|
|
evidence_id=f"{platform_id}-{index:04d}-{normalize_kind(evidence.kind)}",
|
|
platform_id=platform_id,
|
|
kind=normalize_kind(evidence.kind),
|
|
reference=evidence.reference,
|
|
summary=evidence.summary,
|
|
confidence=evidence.confidence,
|
|
tags=tuple(evidence.tags),
|
|
)
|
|
|
|
|
|
def build_evidence_index(scans: Sequence[PlatformScan]) -> tuple[EvidenceRecord, ...]:
|
|
records: list[EvidenceRecord] = []
|
|
for scan in scans:
|
|
for index, evidence in enumerate(scan.evidence, start=1):
|
|
records.append(record_from_evidence(scan.platform.platform_id, evidence, index))
|
|
records.sort(key=lambda item: (item.platform_id, item.kind, -item.confidence, item.reference))
|
|
return tuple(records)
|
|
|
|
|
|
def query_evidence(records: Sequence[EvidenceRecord], query: EvidenceQuery) -> tuple[EvidenceRecord, ...]:
|
|
output: list[EvidenceRecord] = []
|
|
text = query.text.lower() if query.text else None
|
|
for record in records:
|
|
if query.platform_id and record.platform_id != query.platform_id:
|
|
continue
|
|
if query.kind and record.kind != query.kind:
|
|
continue
|
|
if record.confidence < query.min_confidence:
|
|
continue
|
|
if query.tag and query.tag not in record.tags:
|
|
continue
|
|
if text and text not in f"{record.summary} {record.reference} {' '.join(record.tags)}".lower():
|
|
continue
|
|
output.append(record)
|
|
return tuple(output)
|
|
|
|
|
|
def evidence_counts_by_platform(records: Sequence[EvidenceRecord]) -> dict[str, int]:
|
|
counts: dict[str, int] = {}
|
|
for record in records:
|
|
counts[record.platform_id] = counts.get(record.platform_id, 0) + 1
|
|
return dict(sorted(counts.items()))
|
|
|
|
|
|
def evidence_counts_by_kind(records: Sequence[EvidenceRecord]) -> dict[str, int]:
|
|
counts: dict[str, int] = {}
|
|
for record in records:
|
|
counts[record.kind] = counts.get(record.kind, 0) + 1
|
|
return dict(sorted(counts.items(), key=lambda item: (-item[1], item[0])))
|
|
|
|
|
|
def strongest_evidence(records: Sequence[EvidenceRecord], limit: int = 25) -> tuple[EvidenceRecord, ...]:
|
|
ordered = sorted(records, key=lambda item: (-item.confidence, item.platform_id, item.kind))
|
|
return tuple(ordered[:limit])
|
|
|
|
|
|
def weakest_evidence(records: Sequence[EvidenceRecord], limit: int = 25) -> tuple[EvidenceRecord, ...]:
|
|
ordered = sorted(records, key=lambda item: (item.confidence, item.platform_id, item.kind))
|
|
return tuple(ordered[:limit])
|
|
|
|
|
|
def evidence_markdown(records: Sequence[EvidenceRecord]) -> str:
|
|
lines = ["# Indice de evidencias humanas", ""]
|
|
lines.append(f"Total de evidencias: `{len(records)}`")
|
|
lines.append("")
|
|
lines.append("## Por plataforma")
|
|
lines.append("")
|
|
for platform_id, count in evidence_counts_by_platform(records).items():
|
|
lines.append(f"- {platform_id}: {count}")
|
|
lines.append("")
|
|
lines.append("## Por tipo")
|
|
lines.append("")
|
|
for kind, count in evidence_counts_by_kind(records).items():
|
|
lines.append(f"- {kind}: {count}")
|
|
lines.append("")
|
|
lines.append("## Evidencias fortes")
|
|
lines.append("")
|
|
for record in strongest_evidence(records, limit=30):
|
|
lines.append(f"- `{record.platform_id}` `{record.kind}` {record.confidence:.2f}: {record.reference} - {record.summary}")
|
|
lines.append("")
|
|
lines.append("## Evidencias fracas para revisar")
|
|
lines.append("")
|
|
for record in weakest_evidence(records, limit=20):
|
|
lines.append(f"- `{record.platform_id}` `{record.kind}` {record.confidence:.2f}: {record.reference} - {record.summary}")
|
|
return "\n".join(lines).strip() + "\n"
|
|
|
|
|
|
def evidence_records_for_platform(records: Sequence[EvidenceRecord], platform_id: str) -> tuple[EvidenceRecord, ...]:
|
|
return query_evidence(records, EvidenceQuery(platform_id=platform_id))
|
|
|
|
|
|
def evidence_records_for_human_surface(records: Sequence[EvidenceRecord]) -> tuple[EvidenceRecord, ...]:
|
|
kinds = {"ui_surface", "mcp_tool", "observability", "business_rule", "security"}
|
|
return tuple(record for record in records if record.kind in kinds)
|
|
|
|
|
|
def evidence_gap_summary(records: Sequence[EvidenceRecord], expected_platforms: Iterable[str]) -> tuple[str, ...]:
|
|
counts = evidence_counts_by_platform(records)
|
|
gaps: list[str] = []
|
|
for platform_id in expected_platforms:
|
|
count = counts.get(platform_id, 0)
|
|
if count == 0:
|
|
gaps.append(f"{platform_id}: nenhuma evidencia")
|
|
elif count < 5:
|
|
gaps.append(f"{platform_id}: poucas evidencias ({count})")
|
|
return tuple(gaps)
|
|
|