phase 14-15: docker build validation and helm deployment

2026-04-11 11:59:45 -07:00
parent 7394d241c9
commit ce10afa034
179 changed files with 32559 additions and 576 deletions
@@ -0,0 +1,241 @@
+"""Suppression logic for low-quality data or low confidence.
+
+Evaluates the quality of the underlying data feeding a trend summary
+and suppresses automated trade eligibility when data quality is poor.
+Suppressed recommendations are marked as informational only.
+
+This layer runs *before* the eligibility engine and acts as a pre-filter
+on data quality. The eligibility engine handles signal-level thresholds
+(confidence, strength, contradiction); this module handles data-level
+quality concerns (stale evidence, low extraction quality, poor source
+diversity, insufficient valid documents).
+
+Requirements: 7.4
+"""
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+
+from services.shared.schemas import TrendSummary
+
+logger = logging.getLogger(__name__)
+
+
+class SuppressionReason(str, Enum):
+    """Why a recommendation was suppressed due to data quality."""
+
+    LOW_DATA_CONFIDENCE = "low_data_confidence"
+    STALE_EVIDENCE = "stale_evidence"
+    LOW_SOURCE_DIVERSITY = "low_source_diversity"
+    HIGH_EXTRACTION_FAILURE_RATE = "high_extraction_failure_rate"
+    INSUFFICIENT_VALID_DOCUMENTS = "insufficient_valid_documents"
+
+
+@dataclass(frozen=True)
+class SuppressionConfig:
+    """Tunable thresholds for data quality suppression.
+
+    These thresholds focus on the quality of the *input data* rather
+    than the trend signal itself (which is handled by EligibilityConfig).
+    """
+
+    # Minimum average extraction confidence across evidence documents.
+    # Below this, the underlying data is too unreliable for trade decisions.
+    min_avg_extraction_confidence: float = 0.40
+
+    # Maximum age (hours) of the most recent evidence document.
+    # If the freshest evidence is older than this, the trend is stale.
+    max_evidence_staleness_hours: float = 168.0  # 7 days
+
+    # Minimum number of distinct source types (e.g. news, filings, market)
+    # represented in the evidence. Low diversity means the signal may be
+    # driven by a single unreliable source class.
+    min_source_types: int = 1
+
+    # Maximum tolerable extraction failure rate (0-1).
+    # If more than this fraction of documents failed extraction,
+    # the data pipeline is unreliable for this ticker.
+    max_extraction_failure_rate: float = 0.50
+
+    # Minimum number of valid (non-failed) documents that contributed
+    # to the trend. Below this, there isn't enough data to act on.
+    min_valid_documents: int = 2
+
+    # Overall data quality confidence threshold.
+    # The computed data quality score must exceed this for the
+    # recommendation to be eligible for automated trading.
+    min_data_quality_score: float = 0.30
+
+
+DEFAULT_SUPPRESSION_CONFIG = SuppressionConfig()
+
+
+@dataclass
+class DataQualityContext:
+    """Quality metrics about the data underlying a trend summary.
+
+    Populated by querying document and extraction metadata for the
+    ticker and window. When not available from the database, callers
+    can construct this from the trend summary itself.
+    """
+
+    total_documents: int = 0
+    valid_documents: int = 0
+    failed_documents: int = 0
+    avg_extraction_confidence: float = 0.0
+    newest_evidence_at: datetime | None = None
+    source_types: set[str] = field(default_factory=set)
+
+
+@dataclass
+class SuppressionResult:
+    """Output of the suppression evaluation."""
+
+    suppressed: bool
+    reasons: list[SuppressionReason] = field(default_factory=list)
+    data_quality_score: float = 0.0
+    context: DataQualityContext | None = None
+
+
+def build_quality_context_from_summary(
+    summary: TrendSummary,
+) -> DataQualityContext:
+    """Build a minimal DataQualityContext from a TrendSummary.
+
+    This is a fallback when full document-level quality metrics aren't
+    available. It uses the trend summary's evidence counts and confidence
+    as proxies.
+    """
+    total = len(summary.top_supporting_evidence) + len(summary.top_opposing_evidence)
+    return DataQualityContext(
+        total_documents=total,
+        valid_documents=total,
+        failed_documents=0,
+        avg_extraction_confidence=summary.confidence,
+        newest_evidence_at=summary.generated_at,
+        source_types=set(),
+    )
+
+
+def _compute_data_quality_score(
+    ctx: DataQualityContext,
+    config: SuppressionConfig,
+    reference_time: datetime,
+) -> float:
+    """Compute an overall data quality score from the context.
+
+    Returns a value in [0, 1] where higher is better quality.
+    Components:
+    - Extraction confidence (40% weight)
+    - Evidence freshness (30% weight)
+    - Document coverage (30% weight)
+    """
+    # Extraction confidence component
+    conf_component = min(ctx.avg_extraction_confidence / 0.8, 1.0)
+
+    # Freshness component
+    if ctx.newest_evidence_at is not None:
+        if ctx.newest_evidence_at.tzinfo is None:
+            newest = ctx.newest_evidence_at.replace(tzinfo=timezone.utc)
+        else:
+            newest = ctx.newest_evidence_at
+        age_hours = (reference_time - newest).total_seconds() / 3600.0
+        max_hours = config.max_evidence_staleness_hours
+        freshness_component = max(0.0, 1.0 - (age_hours / max_hours))
+    else:
+        freshness_component = 0.0
+
+    # Document coverage component
+    if ctx.total_documents > 0:
+        valid_ratio = ctx.valid_documents / ctx.total_documents
+        count_factor = min(ctx.valid_documents / 10.0, 1.0)
+        coverage_component = valid_ratio * count_factor
+    else:
+        coverage_component = 0.0
+
+    score = (0.4 * conf_component) + (0.3 * freshness_component) + (0.3 * coverage_component)
+    return round(max(0.0, min(1.0, score)), 4)
+
+
+def evaluate_suppression(
+    summary: TrendSummary,
+    quality_ctx: DataQualityContext | None = None,
+    config: SuppressionConfig = DEFAULT_SUPPRESSION_CONFIG,
+    reference_time: datetime | None = None,
+) -> SuppressionResult:
+    """Evaluate whether a recommendation should be suppressed due to data quality.
+
+    Checks multiple data quality dimensions and returns a SuppressionResult
+    indicating whether the recommendation should be suppressed and why.
+
+    Args:
+        summary: The trend summary to evaluate.
+        quality_ctx: Data quality context. If None, a minimal context is
+            built from the trend summary itself.
+        config: Suppression thresholds.
+        reference_time: Reference time for staleness checks.
+
+    Returns:
+        SuppressionResult with suppression decision and reasons.
+    """
+    if reference_time is None:
+        reference_time = datetime.now(timezone.utc)
+
+    ctx = quality_ctx or build_quality_context_from_summary(summary)
+    reasons: list[SuppressionReason] = []
+
+    # Check average extraction confidence
+    if ctx.avg_extraction_confidence < config.min_avg_extraction_confidence:
+        reasons.append(SuppressionReason.LOW_DATA_CONFIDENCE)
+
+    # Check evidence staleness
+    if ctx.newest_evidence_at is not None:
+        newest = ctx.newest_evidence_at
+        if newest.tzinfo is None:
+            newest = newest.replace(tzinfo=timezone.utc)
+        age_hours = (reference_time - newest).total_seconds() / 3600.0
+        if age_hours > config.max_evidence_staleness_hours:
+            reasons.append(SuppressionReason.STALE_EVIDENCE)
+    elif ctx.total_documents > 0:
+        # Have documents but no timestamp — treat as stale
+        reasons.append(SuppressionReason.STALE_EVIDENCE)
+
+    # Check source diversity
+    if len(ctx.source_types) < config.min_source_types and ctx.total_documents > 0:
+        reasons.append(SuppressionReason.LOW_SOURCE_DIVERSITY)
+
+    # Check extraction failure rate
+    if ctx.total_documents > 0:
+        failure_rate = ctx.failed_documents / ctx.total_documents
+        if failure_rate > config.max_extraction_failure_rate:
+            reasons.append(SuppressionReason.HIGH_EXTRACTION_FAILURE_RATE)
+
+    # Check minimum valid documents
+    if ctx.valid_documents < config.min_valid_documents:
+        reasons.append(SuppressionReason.INSUFFICIENT_VALID_DOCUMENTS)
+
+    # Compute overall data quality score
+    quality_score = _compute_data_quality_score(ctx, config, reference_time)
+
+    # If quality score is below threshold, add a general suppression reason
+    if quality_score < config.min_data_quality_score and SuppressionReason.LOW_DATA_CONFIDENCE not in reasons:
+        reasons.append(SuppressionReason.LOW_DATA_CONFIDENCE)
+
+    suppressed = len(reasons) > 0
+
+    if suppressed:
+        logger.info(
+            "Recommendation suppressed for %s/%s: reasons=%s quality_score=%.3f",
+            summary.entity_id, summary.window.value,
+            [r.value for r in reasons], quality_score,
+        )
+
+    return SuppressionResult(
+        suppressed=suppressed,
+        reasons=reasons,
+        data_quality_score=quality_score,
+        context=ctx,
+    )