tudo-para-ia-mais-humana-pl…/src/mais_humana/blocker_catalog.py

"""Signal extraction rules for the Mais Humana operational dossier.

This module turns local repository evidence into normalized operational signals.
It does not decide whether a project is good or bad by a single keyword.  The
goal is to preserve useful nuance:

* a Docs catalog-only decision can be a formal exception or a blocker;
* a BYOK credential reference is a capability, while a missing tenant smoke is
  a blocker;
* Cloudflare plugin denial is expected and must not become a platform blocker;
* wrangler, HTTP evidence, readiness, sameSource, and panelReady are real
  operational signals;
* repository, Git, tests, OpenAPI, and security redaction remain separate gates.
"""

from __future__ import annotations

import re
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, Sequence

from .models import EvidenceKind, NeedCategory, PlatformHumanReport, PlatformScan, Recommendation, merge_unique, slugify
from .operational_models import (
    EvidenceRole,
    GateDomain,
    HumanReadinessStage,
    OperationalSignal,
    SignalKind,
    SignalSeverity,
    SourceConfidence,
    SourceReference,
    source_refs_from_evidence,
    source_refs_from_strings,
    stable_digest,
)


@dataclass(slots=True)
class SignalRule:
    """A lightweight matching rule for evidence summaries, warning text, and paths."""

    rule_id: str
    title: str
    kind: SignalKind
    domain: GateDomain
    severity: SignalSeverity
    stage: HumanReadinessStage
    categories: tuple[NeedCategory, ...]
    patterns: tuple[str, ...]
    positive_summary: str
    next_action: str
    tags: tuple[str, ...] = ()

    def matches(self, text: str) -> bool:
        lowered = text.lower()
        return any(re.search(pattern, lowered, re.I) for pattern in self.patterns)


def rule(
    rule_id: str,
    title: str,
    kind: SignalKind,
    domain: GateDomain,
    severity: SignalSeverity,
    stage: HumanReadinessStage,
    categories: Iterable[NeedCategory],
    patterns: Iterable[str],
    summary: str,
    next_action: str,
    tags: Iterable[str] = (),
) -> SignalRule:
    return SignalRule(
        rule_id=rule_id,
        title=title,
        kind=kind,
        domain=domain,
        severity=severity,
        stage=stage,
        categories=tuple(categories),
        patterns=tuple(patterns),
        positive_summary=summary,
        next_action=next_action,
        tags=tuple(tags),
    )


CAPABILITY_RULES: tuple[SignalRule, ...] = (
    rule(
        "readiness-surface",
        "Readiness operacional detectada",
        SignalKind.CAPABILITY,
        GateDomain.OBSERVABILITY,
        SignalSeverity.INFO,
        HumanReadinessStage.TECHNICAL_READY,
        (NeedCategory.OBSERVABILITY, NeedCategory.OPERATIONS),
        (r"\breadiness\b", r"\bready\b", r"prontid"),
        "O repositorio possui indicios de readiness ou prontidao operacional.",
        "manter readiness como evidencia regressiva",
        ("readiness",),
    ),
    rule(
        "health-surface",
        "Health check detectado",
        SignalKind.CAPABILITY,
        GateDomain.RUNTIME,
        SignalSeverity.INFO,
        HumanReadinessStage.TECHNICAL_READY,
        (NeedCategory.OPERATIONS, NeedCategory.OBSERVABILITY),
        (r"\bhealth\b", r"/health\b"),
        "O repositorio expoe ou documenta health check.",
        "validar health em smoke local ou publicado",
        ("health",),
    ),
    rule(
        "openapi-contract",
        "Contrato OpenAPI detectado",
        SignalKind.CAPABILITY,
        GateDomain.CONTRACT,
        SignalSeverity.INFO,
        HumanReadinessStage.HUMAN_EXPLAINABLE,
        (NeedCategory.DOCUMENTATION, NeedCategory.INTEGRATION, NeedCategory.GOVERNANCE),
        (r"openapi", r"swagger"),
        "O repositorio possui contrato OpenAPI, documento ou rota relacionada.",
        "manter contrato sincronizado com rotas reais",
        ("openapi",),
    ),
    rule(
        "panel-ready-signal",
        "panelReady detectado",
        SignalKind.CAPABILITY,
        GateDomain.PANEL,
        SignalSeverity.INFO,
        HumanReadinessStage.PANEL_READY,
        (NeedCategory.EXPERIENCE, NeedCategory.GOVERNANCE),
        (r"panelready", r"panel ready", r"painel.*pront"),
        "Ha indicio de contrato de tela pronto para painel humano.",
        "validar se panelReady usa a mesma fonte do GPT",
        ("panelReady",),
    ),
    rule(
        "same-source-signal",
        "sameSource detectado",
        SignalKind.CAPABILITY,
        GateDomain.PANEL,
        SignalSeverity.INFO,
        HumanReadinessStage.PANEL_READY,
        (NeedCategory.EXPERIENCE, NeedCategory.GOVERNANCE, NeedCategory.OBSERVABILITY),
        (r"samesource", r"same source", r"mesma fonte", r"sourcehash", r"recordsHash"),
        "Ha indicio de mesma fonte entre GPT, painel e evidencia.",
        "manter hash de fonte e registros em regressao",
        ("sameSource",),
    ),
    rule(
        "credential-ref-signal",
        "credentialRef detectado",
        SignalKind.CAPABILITY,
        GateDomain.SECURITY,
        SignalSeverity.INFO,
        HumanReadinessStage.CONTROLLED_READY,
        (NeedCategory.SECURITY, NeedCategory.INTEGRATION),
        (r"credentialref", r"credential ref", r"credential:"),
        "A plataforma usa referencia de credencial em vez de expor segredo bruto.",
        "validar nao vazamento em relatorios, logs e respostas",
        ("credentialRef", "redaction"),
    ),
    rule(
        "byok-signal",
        "BYOK detectado",
        SignalKind.CAPABILITY,
        GateDomain.INTEGRATION,
        SignalSeverity.INFO,
        HumanReadinessStage.CONTROLLED_READY,
        (NeedCategory.INTEGRATION, NeedCategory.SECURITY, NeedCategory.COMMERCIAL),
        (r"\bbyok\b", r"bring your own key", r"credencial.*cliente"),
        "A jornada BYOK aparece como superficie tecnica ou comercial.",
        "validar cadeia organizacao, usuario, entitlement, credentialRef, smoke e consumo",
        ("BYOK",),
    ),
    rule(
        "audit-trace-signal",
        "Trace e auditoria detectados",
        SignalKind.CAPABILITY,
        GateDomain.OBSERVABILITY,
        SignalSeverity.INFO,
        HumanReadinessStage.HUMAN_EXPLAINABLE,
        (NeedCategory.OBSERVABILITY, NeedCategory.GOVERNANCE),
        (r"\baudit\b", r"\btrace\b", r"auditid", r"traceid", r"auditoria"),
        "A plataforma registra ou expõe trace/audit para leitura operacional.",
        "garantir que trace/audit nao contenham segredo bruto",
        ("audit", "trace"),
    ),
    rule(
        "business-entitlement-signal",
        "Entitlement ou regra comercial detectada",
        SignalKind.CAPABILITY,
        GateDomain.BUSINESS,
        SignalSeverity.INFO,
        HumanReadinessStage.HUMAN_EXPLAINABLE,
        (NeedCategory.COMMERCIAL, NeedCategory.FINANCE),
        (r"entitlement", r"checkout", r"invoice", r"billing", r"franquia", r"cobranca"),
        "Ha evidencia de regra comercial, cobranca, consumo ou entitlement.",
        "sincronizar Business como fonte unica de plano, franquia e bloqueio",
        ("business", "entitlement"),
    ),
    rule(
        "identity-rbac-signal",
        "Identity/RBAC detectado",
        SignalKind.CAPABILITY,
        GateDomain.IDENTITY,
        SignalSeverity.INFO,
        HumanReadinessStage.HUMAN_EXPLAINABLE,
        (NeedCategory.SECURITY, NeedCategory.ADMINISTRATION, NeedCategory.GOVERNANCE),
        (r"\brbac\b", r"identity", r"organizacao", r"organization", r"user", r"usuario", r"tenant"),
        "Ha evidencia de identidade, papel, organizacao, tenant ou permissao.",
        "amarrar actor, organizationId, role e escopo nos contratos humanos",
        ("identity", "rbac"),
    ),
    rule(
        "wrangler-operational-signal",
        "Wrangler operacional detectado",
        SignalKind.CAPABILITY,
        GateDomain.CLOUD,
        SignalSeverity.INFO,
        HumanReadinessStage.TECHNICAL_READY,
        (NeedCategory.OPERATIONS, NeedCategory.INTEGRATION),
        (r"\bwrangler\b", r"workers\.dev", r"cloudflare worker"),
        "Ha evidencia de operacao Cloudflare por wrangler/Worker.",
        "usar wrangler para deploy, logs, rotas, secrets e health checks",
        ("wrangler", "cloudflare"),
    ),
)


BLOCKER_RULES: tuple[SignalRule, ...] = (
    rule(
        "repo-missing",
        "Repositorio real ausente",
        SignalKind.BLOCKER,
        GateDomain.REPOSITORY,
        SignalSeverity.CRITICAL,
        HumanReadinessStage.NOT_FOUND,
        (NeedCategory.GOVERNANCE, NeedCategory.OPERATIONS),
        (r"repositorio real nao encontrado", r"repo.*ausente", r"not found"),
        "Sem repositorio real local nao ha base material para validar a plataforma.",
        "criar ou clonar repositorio real sem numero da pasta gerencial",
        ("repository",),
    ),
    rule(
        "git-missing",
        "Git local ausente ou inacessivel",
        SignalKind.BLOCKER,
        GateDomain.REPOSITORY,
        SignalSeverity.HIGH,
        HumanReadinessStage.LOCAL_ONLY,
        (NeedCategory.GOVERNANCE, NeedCategory.OPERATIONS),
        (r"sem \.git", r"git.*ausente", r"git.*inacess", r"permission denied.*\.git", r"index\.lock"),
        "Sem Git operacional a rodada nao consegue registrar commit, hash e sincronizacao.",
        "resolver ACL de .git, configurar origin e repetir commit/push",
        ("git", "sync"),
    ),
    rule(
        "tests-missing",
        "Testes nao encontrados",
        SignalKind.GAP,
        GateDomain.TESTS,
        SignalSeverity.MEDIUM,
        HumanReadinessStage.TECHNICAL_READY,
        (NeedCategory.OPERATIONS, NeedCategory.OBSERVABILITY),
        (r"testes nao encontrados", r"no tests", r"sem teste"),
        "A varredura nao encontrou suite ou smoke detectavel.",
        "criar teste canonico de health/readiness/contrato humano",
        ("tests",),
    ),
    rule(
        "openapi-missing",
        "Contrato OpenAPI nao encontrado",
        SignalKind.GAP,
        GateDomain.CONTRACT,
        SignalSeverity.MEDIUM,
        HumanReadinessStage.TECHNICAL_READY,
        (NeedCategory.DOCUMENTATION, NeedCategory.INTEGRATION),
        (r"openapi nao encontrado", r"openapi.*missing", r"sem openapi"),
        "Sem contrato OpenAPI ou equivalente, a integracao fica menos auditavel.",
        "publicar OpenAPI minima ou declarar contrato alternativo versionado",
        ("openapi", "contract"),
    ),
    rule(
        "docs-catalog-only",
        "Docs catalogOnly exige decisao formal",
        SignalKind.BLOCKER,
        GateDomain.DOCS,
        SignalSeverity.HIGH,
        HumanReadinessStage.CATALOG_ONLY,
        (NeedCategory.DOCUMENTATION, NeedCategory.GOVERNANCE),
        (r"catalogonly", r"catalog_only", r"catalog-only", r"docs.*catalog"),
        "Docs aparece como catalogOnly; isso precisa ser excecao formal ou leitura minima responseReady.",
        "promover leitura canonica minima de Docs ou registrar excecao deliberada",
        ("docs", "catalogOnly"),
    ),
    rule(
        "intelligence-unsupported",
        "Intelligence sem promocao operacional completa",
        SignalKind.BLOCKER,
        GateDomain.GOVERNANCE,
        SignalSeverity.HIGH,
        HumanReadinessStage.CATALOG_ONLY,
        (NeedCategory.STRATEGY, NeedCategory.OBSERVABILITY),
        (r"unsupported", r"catalogonly-local-ready", r"intelligence.*planned", r"public endpoint.*missing"),
        "Intelligence aparece local/catalogada, mas ainda depende de endpoint, storage ou registro operacional.",
        "manter como catalogOnly planejada ate publicar smoke HTTP e registrar no MCP central",
        ("intelligence", "unsupported"),
    ),
    rule(
        "credential-live-pending",
        "Credencial live ou BYOK pendente",
        SignalKind.BLOCKER,
        GateDomain.INTEGRATION,
        SignalSeverity.HIGH,
        HumanReadinessStage.CONTROLLED_READY,
        (NeedCategory.INTEGRATION, NeedCategory.SECURITY, NeedCategory.COMMERCIAL),
        (r"token.*missing", r"credential.*not.*ready", r"needs_token", r"live.*credential", r"credencial live", r"byok.*pend"),
        "A integracao depende de credencial live, token ou credentialRef por tenant.",
        "criar sessao BYOK, gerar credentialRef, executar smoke readonly e provar nao vazamento",
        ("credential", "BYOK"),
    ),
    rule(
        "panel-source-divergence",
        "Painel e GPT podem divergir",
        SignalKind.RISK,
        GateDomain.PANEL,
        SignalSeverity.HIGH,
        HumanReadinessStage.PANEL_READY,
        (NeedCategory.EXPERIENCE, NeedCategory.GOVERNANCE),
        (r"samesource.*false", r"same source.*false", r"diverg", r"source.*mismatch"),
        "Ha indicio de divergencia entre fonte do painel e fonte explicada pelo GPT.",
        "reconciliar sourceEndpoint, sourceToolId, sourcePayloadHash e sourceRecordsHash",
        ("sameSource", "panelReady"),
    ),
    rule(
        "plugin-cloudflare-expected-denial",
        "Negativa do plugin Cloudflare nao e blocker operacional",
        SignalKind.EXCEPTION,
        GateDomain.CLOUD,
        SignalSeverity.INFO,
        HumanReadinessStage.TECHNICAL_READY,
        (NeedCategory.OPERATIONS, NeedCategory.INTEGRATION),
        (r"plugin.*cloudflare.*denied", r"cloudflare-plugin-auth-denied", r"user rejected mcp tool call"),
        "Falha ou negativa do plugin Cloudflare e esperada e nao deve bloquear a OS.",
        "registrar tentativa do plugin e seguir trabalho operacional por wrangler quando aplicavel",
        ("cloudflare-plugin", "expected"),
    ),
    rule(
        "cloudflare-binding-local-blocker",
        "Bindings Cloudflare ausentes no ambiente local",
        SignalKind.RISK,
        GateDomain.CLOUD,
        SignalSeverity.MEDIUM,
        HumanReadinessStage.TECHNICAL_READY,
        (NeedCategory.OPERATIONS, NeedCategory.INTEGRATION),
        (r"cloudflare-bindings", r"binding.*missing", r"bindings.*ausent", r"d1.*missing", r"kv.*missing", r"r2.*missing"),
        "O runtime local indica bindings ausentes; isso limita prova live, mas nao invalida evidencia local.",
        "validar bindings com wrangler e registrar ambiente alvo do smoke",
        ("cloudflare", "bindings"),
    ),
)


PLATFORM_SPECIFIC_SIGNALS: dict[str, tuple[SignalRule, ...]] = {
    "docs": (
        rule(
            "docs-canonical-read",
            "Leitura canonica de Docs precisa ficar explicita",
            SignalKind.DECISION,
            GateDomain.DOCS,
            SignalSeverity.HIGH,
            HumanReadinessStage.CATALOG_ONLY,
            (NeedCategory.DOCUMENTATION, NeedCategory.GOVERNANCE),
            (r"docs", r"document", r"contrato", r"canon"),
            "Docs precisa decidir entre leitura responseReady minima e excecao catalogOnly formal.",
            "criar gate Docs: responseReady minimo ou excecao documentada sem bloquear ready global",
            ("docs", "decision"),
        ),
    ),
    "integracoes": (
        rule(
            "integracoes-byok-chain",
            "Jornada BYOK ponta a ponta deve ser provada",
            SignalKind.DECISION,
            GateDomain.INTEGRATION,
            SignalSeverity.HIGH,
            HumanReadinessStage.CONTROLLED_READY,
            (NeedCategory.INTEGRATION, NeedCategory.SECURITY, NeedCategory.COMMERCIAL),
            (r"byok", r"credentialref", r"cloudflare", r"gitlab", r"stripe", r"whatsapp"),
            "Integracoes tem base BYOK, mas precisa provar usuario, organizacao, entitlement, credentialRef e smoke.",
            "executar fluxo encadeado BYOK com nao vazamento e consumo auditavel",
            ("BYOK", "integracoes"),
        ),
    ),
    "business": (
        rule(
            "business-blocker-isolation",
            "Business deve isolar blockers por produto",
            SignalKind.DECISION,
            GateDomain.BUSINESS,
            SignalSeverity.MEDIUM,
            HumanReadinessStage.HUMAN_EXPLAINABLE,
            (NeedCategory.COMMERCIAL, NeedCategory.FINANCE, NeedCategory.GOVERNANCE),
            (r"blocker", r"panelready", r"entitlement", r"readycontrolled", r"commercial"),
            "Business aparece como fonte de readiness comercial e precisa impedir contaminacao global indevida.",
            "validar blocker por productId, stage e impacto comercial isolado",
            ("business", "blocker-policy"),
        ),
    ),
    "compliance": (
        rule(
            "compliance-admin-view",
            "Compliance deve manter admin view same-source",
            SignalKind.DECISION,
            GateDomain.COMPLIANCE,
            SignalSeverity.MEDIUM,
            HumanReadinessStage.PANEL_READY,
            (NeedCategory.LEGAL, NeedCategory.SECURITY, NeedCategory.GOVERNANCE),
            (r"compliance\.admin_view\.readiness", r"sameSource", r"panelReady", r"retention", r"policy"),
            "Compliance possui admin view e deve manter mesma fonte, redaction, retention e evidencia.",
            "validar regressao de panelReady, source hash, retention e dados redigidos",
            ("compliance", "admin-view"),
        ),
    ),
    "intelligence": (
        rule(
            "intelligence-promotion-gates",
            "Intelligence precisa de gates de promocao",
            SignalKind.DECISION,
            GateDomain.GOVERNANCE,
            SignalSeverity.HIGH,
            HumanReadinessStage.CATALOG_ONLY,
            (NeedCategory.STRATEGY, NeedCategory.OBSERVABILITY, NeedCategory.GOVERNANCE),
            (r"runtimeMinimum", r"responseReadyControlled", r"catalogOnly-local-ready", r"public smoke"),
            "Intelligence ja descreve gates, mas precisa evidencia publica para sair de catalogOnly local.",
            "executar smoke publico health/profile/readiness/openapi/admin e publicar evidencia",
            ("intelligence", "promotion"),
        ),
    ),
}


def evidence_text(report: PlatformHumanReport) -> str:
    parts = [
        report.platform.platform_id,
        report.platform.title,
        report.platform.mission,
        report.scan.readme_excerpt,
        " ".join(report.scan.warnings),
    ]
    parts.extend(evidence.summary for evidence in report.scan.evidence[:240])
    parts.extend(evidence.path for evidence in report.scan.evidence[:240])
    parts.extend(recommendation.title + " " + recommendation.reason for recommendation in report.recommendations[:12])
    return "\n".join(parts)


def refs_for_rule(report: PlatformHumanReport, rule_item: SignalRule, limit: int = 6) -> tuple[SourceReference, ...]:
    matched = []
    for evidence in report.scan.evidence:
        text = f"{evidence.path} {evidence.summary} {' '.join(evidence.tags)}"
        if rule_item.matches(text):
            matched.append(evidence)
    refs = list(source_refs_from_evidence(matched, limit=limit))
    if not refs and report.scan.warnings:
        for warning in report.scan.warnings:
            if rule_item.matches(warning):
                refs.append(SourceReference(path=report.scan.repo_path, summary=warning, confidence=SourceConfidence.DERIVED, role=EvidenceRole.PRIMARY))
    if not refs and report.platform.known_blockers:
        for blocker in report.platform.known_blockers:
            if rule_item.matches(blocker) or rule_item.kind == SignalKind.BLOCKER:
                refs.append(SourceReference(path=report.scan.repo_path, summary=blocker, confidence=SourceConfidence.DECLARED, role=EvidenceRole.PRIMARY))
    if not refs and rule_item.matches(report.scan.readme_excerpt):
        refs.append(SourceReference(path=f"{report.scan.repo_path}/README.md", summary="README contem sinal relacionado.", confidence=SourceConfidence.DERIVED))
    return tuple(refs[:limit])


def signal_from_rule(report: PlatformHumanReport, rule_item: SignalRule, refs: Sequence[SourceReference] | None = None) -> OperationalSignal:
    refs = tuple(refs or ())
    base = {
        "platform": report.platform.platform_id,
        "rule": rule_item.rule_id,
        "refs": [ref.reference for ref in refs],
    }
    return OperationalSignal(
        signal_id=f"{report.platform.platform_id}.{rule_item.rule_id}.{stable_digest(base, length=8)}",
        platform_id=report.platform.platform_id,
        kind=rule_item.kind,
        domain=rule_item.domain,
        title=rule_item.title,
        summary=rule_item.positive_summary,
        severity=rule_item.severity,
        stage=rule_item.stage,
        categories=rule_item.categories,
        sources=tuple(refs),
        tags=rule_item.tags,
        next_action=rule_item.next_action,
    )


def scan_rules(report: PlatformHumanReport, rules: Sequence[SignalRule]) -> tuple[OperationalSignal, ...]:
    text = evidence_text(report)
    signals: list[OperationalSignal] = []
    for rule_item in rules:
        if not rule_item.matches(text):
            continue
        refs = refs_for_rule(report, rule_item)
        signals.append(signal_from_rule(report, rule_item, refs))
    return tuple(signals)


def repository_signals(report: PlatformHumanReport) -> tuple[OperationalSignal, ...]:
    scan = report.scan
    signals: list[OperationalSignal] = []
    if scan.exists:
        signals.append(
            OperationalSignal(
                signal_id=f"{scan.platform.platform_id}.repo.exists",
                platform_id=scan.platform.platform_id,
                kind=SignalKind.CAPABILITY,
                domain=GateDomain.REPOSITORY,
                title="Repositorio local encontrado",
                summary="O espelho local existe e pode ser analisado.",
                severity=SignalSeverity.INFO,
                stage=HumanReadinessStage.LOCAL_ONLY,
                sources=(SourceReference(path=scan.repo_path, summary="Repositorio existe no filesystem.", confidence=SourceConfidence.DIRECT),),
                tags=("repository",),
                next_action="manter repositorio sincronizado e com hash rastreavel",
            )
        )
    else:
        signals.append(
            OperationalSignal(
                signal_id=f"{scan.platform.platform_id}.repo.missing",
                platform_id=scan.platform.platform_id,
                kind=SignalKind.BLOCKER,
                domain=GateDomain.REPOSITORY,
                title="Repositorio local ausente",
                summary="Nao existe espelho local para leitura ou validacao.",
                severity=SignalSeverity.CRITICAL,
                stage=HumanReadinessStage.NOT_FOUND,
                sources=(SourceReference(path=scan.repo_path, summary="Caminho nao encontrado.", confidence=SourceConfidence.MISSING, role=EvidenceRole.ABSENT),),
                tags=("repository", "missing"),
                next_action="criar ou clonar o repositorio real",
            )
        )
    if scan.git_present:
        signals.append(
            OperationalSignal(
                signal_id=f"{scan.platform.platform_id}.git.present",
                platform_id=scan.platform.platform_id,
                kind=SignalKind.CAPABILITY,
                domain=GateDomain.REPOSITORY,
                title="Git local detectado",
                summary="Branch, HEAD ou metadados Git foram detectados no repositorio.",
                severity=SignalSeverity.INFO,
                stage=HumanReadinessStage.TECHNICAL_READY,
                sources=source_refs_from_strings((scan.branch or "branch desconhecida", scan.head or "head desconhecido", scan.remote_origin or "origin nao configurado"), "Metadado Git detectado."),
                tags=("git",),
                next_action="validar git status e sincronizacao no fechamento",
            )
        )
    elif scan.exists:
        signals.append(
            OperationalSignal(
                signal_id=f"{scan.platform.platform_id}.git.missing",
                platform_id=scan.platform.platform_id,
                kind=SignalKind.BLOCKER,
                domain=GateDomain.REPOSITORY,
                title="Git local nao detectado",
                summary="Repositorio existe, mas .git nao foi detectado pela varredura.",
                severity=SignalSeverity.HIGH,
                stage=HumanReadinessStage.LOCAL_ONLY,
                sources=(SourceReference(path=scan.repo_path, summary="Repositorio sem .git detectavel.", confidence=SourceConfidence.MISSING),),
                tags=("git", "sync"),
                next_action="inicializar Git ou corrigir permissao local de .git",
            )
        )
    return tuple(signals)


def warning_signals(report: PlatformHumanReport) -> tuple[OperationalSignal, ...]:
    signals: list[OperationalSignal] = []
    for warning in report.scan.warnings:
        matched = False
        for rule_item in BLOCKER_RULES:
            if rule_item.matches(warning):
                refs = (SourceReference(path=report.scan.repo_path, summary=warning, confidence=SourceConfidence.DERIVED, role=EvidenceRole.PRIMARY),)
                signals.append(signal_from_rule(report, rule_item, refs))
                matched = True
        if not matched:
            signal_id = f"{report.platform.platform_id}.warning.{slugify(warning)}.{stable_digest(warning, 6)}"
            signals.append(
                OperationalSignal(
                    signal_id=signal_id,
                    platform_id=report.platform.platform_id,
                    kind=SignalKind.GAP,
                    domain=GateDomain.GOVERNANCE,
                    title="Warning de varredura",
                    summary=warning,
                    severity=SignalSeverity.MEDIUM,
                    stage=HumanReadinessStage.TECHNICAL_READY,
                    sources=(SourceReference(path=report.scan.repo_path, summary=warning, confidence=SourceConfidence.DERIVED),),
                    tags=("warning",),
                    next_action="classificar warning e registrar evidencia de resolucao ou excecao",
                )
            )
    return tuple(signals)


def known_blocker_signals(report: PlatformHumanReport) -> tuple[OperationalSignal, ...]:
    signals: list[OperationalSignal] = []
    for blocker in report.platform.known_blockers:
        matched_rules = [rule_item for rule_item in BLOCKER_RULES if rule_item.matches(blocker)]
        if not matched_rules:
            matched_rules = [
                rule(
                    "known-blocker",
                    "Bloqueio conhecido catalogado",
                    SignalKind.BLOCKER,
                    GateDomain.GOVERNANCE,
                    SignalSeverity.HIGH,
                    HumanReadinessStage.BLOCKED,
                    report.platform.primary_categories,
                    (re.escape(blocker.lower()),),
                    "Bloqueio conhecido precisa ser resolvido, isolado ou formalizado.",
                    "resolver, isolar ou formalizar o bloqueio conhecido",
                    ("known-blocker",),
                )
            ]
        for rule_item in matched_rules:
            refs = (SourceReference(path=report.scan.repo_path, summary=blocker, confidence=SourceConfidence.DECLARED, role=EvidenceRole.PRIMARY),)
            signals.append(signal_from_rule(report, rule_item, refs))
    return tuple(signals)


def score_signals(report: PlatformHumanReport) -> tuple[OperationalSignal, ...]:
    score = report.average_score
    if score >= 90:
        return (
            OperationalSignal(
                signal_id=f"{report.platform.platform_id}.score.high",
                platform_id=report.platform.platform_id,
                kind=SignalKind.CAPABILITY,
                domain=GateDomain.GOVERNANCE,
                title="Score humano alto",
                summary=f"Score medio humano {score}; a plataforma tem boa cobertura por perfil.",
                severity=SignalSeverity.INFO,
                stage=HumanReadinessStage.HUMAN_EXPLAINABLE,
                tags=("score",),
                next_action="trocar score isolado por gates com blockers formais e evidencia viva",
            ),
        )
    if score >= 70:
        severity = SignalSeverity.LOW
        stage = HumanReadinessStage.HUMAN_EXPLAINABLE
    elif score >= 50:
        severity = SignalSeverity.MEDIUM
        stage = HumanReadinessStage.TECHNICAL_READY
    else:
        severity = SignalSeverity.HIGH
        stage = HumanReadinessStage.PLANNED
    weakest = sorted(report.cells, key=lambda item: item.score)[:4]
    evidence = tuple(
        SourceReference(path=report.scan.repo_path, summary=f"{cell.profile_id}:{cell.score}", confidence=SourceConfidence.DERIVED)
        for cell in weakest
    )
    return (
        OperationalSignal(
            signal_id=f"{report.platform.platform_id}.score.attention",
            platform_id=report.platform.platform_id,
            kind=SignalKind.GAP,
            domain=GateDomain.GOVERNANCE,
            title="Score humano exige melhoria",
            summary=f"Score medio humano {score}; perfis mais frageis precisam de OS direcionada.",
            severity=severity,
            stage=stage,
            sources=evidence,
            tags=("score", "matrix"),
            next_action="priorizar perfis de menor score em tela, relatorio ou comando humano",
        ),
    )


def recommendation_signals(report: PlatformHumanReport, recommendations: Sequence[Recommendation]) -> tuple[OperationalSignal, ...]:
    signals: list[OperationalSignal] = []
    for rec in recommendations:
        if rec.platform_id != report.platform.platform_id:
            continue
        severity = SignalSeverity.HIGH if rec.priority >= 85 else SignalSeverity.MEDIUM if rec.priority >= 65 else SignalSeverity.LOW
        kind = SignalKind.BLOCKER if rec.priority >= 85 else SignalKind.DECISION
        domain = domain_from_categories(rec.categories)
        refs = source_refs_from_strings(rec.affected_paths or (report.scan.repo_path,), rec.reason, confidence=SourceConfidence.DECLARED)
        signals.append(
            OperationalSignal(
                signal_id=f"{rec.recommendation_id}.{stable_digest(rec.reason, 6)}",
                platform_id=report.platform.platform_id,
                kind=kind,
                domain=domain,
                title=rec.title,
                summary=rec.reason,
                severity=severity,
                stage=HumanReadinessStage.HUMAN_EXPLAINABLE,
                categories=rec.categories,
                sources=refs,
                tags=("recommendation", rec.suggested_order_type.value),
                next_action=rec.expected_impact,
            )
        )
    return tuple(signals)


def domain_from_categories(categories: Sequence[NeedCategory]) -> GateDomain:
    priority = {
        NeedCategory.SECURITY: GateDomain.SECURITY,
        NeedCategory.INTEGRATION: GateDomain.INTEGRATION,
        NeedCategory.COMMERCIAL: GateDomain.BUSINESS,
        NeedCategory.FINANCE: GateDomain.BUSINESS,
        NeedCategory.LEGAL: GateDomain.COMPLIANCE,
        NeedCategory.DOCUMENTATION: GateDomain.DOCS,
        NeedCategory.EXPERIENCE: GateDomain.PANEL,
        NeedCategory.OBSERVABILITY: GateDomain.OBSERVABILITY,
        NeedCategory.GOVERNANCE: GateDomain.GOVERNANCE,
        NeedCategory.OPERATIONS: GateDomain.RUNTIME,
    }
    for category in categories:
        if category in priority:
            return priority[category]
    return GateDomain.GOVERNANCE


def dedupe_signals(signals: Iterable[OperationalSignal]) -> tuple[OperationalSignal, ...]:
    seen: set[tuple[str, str, str]] = set()
    output: list[OperationalSignal] = []
    for signal in signals:
        key = (signal.platform_id, signal.title.lower(), signal.domain.value)
        if key in seen:
            continue
        seen.add(key)
        output.append(signal)
    output.sort(key=lambda item: (item.platform_id, -severity_to_sort(item.severity), item.domain.value, item.title))
    return tuple(output)


def severity_to_sort(severity: SignalSeverity) -> int:
    return {
        SignalSeverity.CRITICAL: 5,
        SignalSeverity.HIGH: 4,
        SignalSeverity.MEDIUM: 3,
        SignalSeverity.LOW: 2,
        SignalSeverity.INFO: 1,
    }.get(severity, 0)


def build_operational_signals(report: PlatformHumanReport, recommendations: Sequence[Recommendation] = ()) -> tuple[OperationalSignal, ...]:
    signals: list[OperationalSignal] = []
    signals.extend(repository_signals(report))
    signals.extend(scan_rules(report, CAPABILITY_RULES))
    signals.extend(scan_rules(report, BLOCKER_RULES))
    signals.extend(scan_rules(report, PLATFORM_SPECIFIC_SIGNALS.get(report.platform.platform_id, ())))
    signals.extend(warning_signals(report))
    signals.extend(known_blocker_signals(report))
    signals.extend(score_signals(report))
    signals.extend(recommendation_signals(report, recommendations))
    return dedupe_signals(signals)


def summarize_blockers(signals: Sequence[OperationalSignal], limit: int = 12) -> tuple[str, ...]:
    blockers = [signal for signal in signals if signal.is_blocking]
    blockers.sort(key=lambda item: (-severity_to_sort(item.severity), item.platform_id, item.title))
    return merge_unique(f"{signal.platform_id}: {signal.title} - {signal.next_action}" for signal in blockers[:limit])


def summarize_capabilities(signals: Sequence[OperationalSignal], limit: int = 12) -> tuple[str, ...]:
    caps = [signal for signal in signals if signal.kind == SignalKind.CAPABILITY]
    caps.sort(key=lambda item: (item.platform_id, item.domain.value, item.title))
    return merge_unique(f"{signal.platform_id}: {signal.title}" for signal in caps[:limit])


def scan_repository_for_order_text(repo_path: Path, patterns: Sequence[str], max_files: int = 80) -> tuple[SourceReference, ...]:
    """Search text files for specific order-related patterns.

    The function is intentionally small and safe: it skips known build/vendor
    directories, reads only bounded text files, and returns references rather
    than raw content.
    """

    skip = {".git", "node_modules", "dist", "build", "coverage", "__pycache__", ".wrangler", "vendor"}
    extensions = {".md", ".ts", ".tsx", ".js", ".mjs", ".cjs", ".py", ".json", ".toml", ".yml", ".yaml"}
    refs: list[SourceReference] = []
    if not repo_path.exists():
        return (
            SourceReference(path=str(repo_path), summary="Repositorio nao encontrado para busca de texto.", confidence=SourceConfidence.MISSING, role=EvidenceRole.ABSENT),
        )
    stack = [repo_path]
    while stack and len(refs) < max_files:
        current = stack.pop()
        try:
            entries = sorted(current.iterdir(), key=lambda item: item.name.lower())
        except OSError:
            continue
        for entry in entries:
            if entry.is_dir():
                if entry.name not in skip:
                    stack.append(entry)
                continue
            if entry.suffix.lower() not in extensions:
                continue
            try:
                if entry.stat().st_size > 240_000:
                    continue
                lines = entry.read_text(encoding="utf-8", errors="ignore").splitlines()
            except OSError:
                continue
            rel = str(entry.relative_to(repo_path)).replace("\\", "/")
            for index, line in enumerate(lines, start=1):
                lowered = line.lower()
                if any(re.search(pattern, lowered, re.I) for pattern in patterns):
                    refs.append(
                        SourceReference(
                            path=rel,
                            line=index,
                            summary="Trecho local referencia tema da ordem sem expor conteudo bruto.",
                            confidence=SourceConfidence.DIRECT,
                            role=EvidenceRole.SUPPORTING,
                        )
                    )
                    break
            if len(refs) >= max_files:
                break
    return tuple(refs)