phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,241 @@
|
||||
"""Suppression logic for low-quality data or low confidence.
|
||||
|
||||
Evaluates the quality of the underlying data feeding a trend summary
|
||||
and suppresses automated trade eligibility when data quality is poor.
|
||||
Suppressed recommendations are marked as informational only.
|
||||
|
||||
This layer runs *before* the eligibility engine and acts as a pre-filter
|
||||
on data quality. The eligibility engine handles signal-level thresholds
|
||||
(confidence, strength, contradiction); this module handles data-level
|
||||
quality concerns (stale evidence, low extraction quality, poor source
|
||||
diversity, insufficient valid documents).
|
||||
|
||||
Requirements: 7.4
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
|
||||
from services.shared.schemas import TrendSummary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SuppressionReason(str, Enum):
|
||||
"""Why a recommendation was suppressed due to data quality."""
|
||||
|
||||
LOW_DATA_CONFIDENCE = "low_data_confidence"
|
||||
STALE_EVIDENCE = "stale_evidence"
|
||||
LOW_SOURCE_DIVERSITY = "low_source_diversity"
|
||||
HIGH_EXTRACTION_FAILURE_RATE = "high_extraction_failure_rate"
|
||||
INSUFFICIENT_VALID_DOCUMENTS = "insufficient_valid_documents"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SuppressionConfig:
|
||||
"""Tunable thresholds for data quality suppression.
|
||||
|
||||
These thresholds focus on the quality of the *input data* rather
|
||||
than the trend signal itself (which is handled by EligibilityConfig).
|
||||
"""
|
||||
|
||||
# Minimum average extraction confidence across evidence documents.
|
||||
# Below this, the underlying data is too unreliable for trade decisions.
|
||||
min_avg_extraction_confidence: float = 0.40
|
||||
|
||||
# Maximum age (hours) of the most recent evidence document.
|
||||
# If the freshest evidence is older than this, the trend is stale.
|
||||
max_evidence_staleness_hours: float = 168.0 # 7 days
|
||||
|
||||
# Minimum number of distinct source types (e.g. news, filings, market)
|
||||
# represented in the evidence. Low diversity means the signal may be
|
||||
# driven by a single unreliable source class.
|
||||
min_source_types: int = 1
|
||||
|
||||
# Maximum tolerable extraction failure rate (0-1).
|
||||
# If more than this fraction of documents failed extraction,
|
||||
# the data pipeline is unreliable for this ticker.
|
||||
max_extraction_failure_rate: float = 0.50
|
||||
|
||||
# Minimum number of valid (non-failed) documents that contributed
|
||||
# to the trend. Below this, there isn't enough data to act on.
|
||||
min_valid_documents: int = 2
|
||||
|
||||
# Overall data quality confidence threshold.
|
||||
# The computed data quality score must exceed this for the
|
||||
# recommendation to be eligible for automated trading.
|
||||
min_data_quality_score: float = 0.30
|
||||
|
||||
|
||||
DEFAULT_SUPPRESSION_CONFIG = SuppressionConfig()
|
||||
|
||||
|
||||
@dataclass
|
||||
class DataQualityContext:
|
||||
"""Quality metrics about the data underlying a trend summary.
|
||||
|
||||
Populated by querying document and extraction metadata for the
|
||||
ticker and window. When not available from the database, callers
|
||||
can construct this from the trend summary itself.
|
||||
"""
|
||||
|
||||
total_documents: int = 0
|
||||
valid_documents: int = 0
|
||||
failed_documents: int = 0
|
||||
avg_extraction_confidence: float = 0.0
|
||||
newest_evidence_at: datetime | None = None
|
||||
source_types: set[str] = field(default_factory=set)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SuppressionResult:
|
||||
"""Output of the suppression evaluation."""
|
||||
|
||||
suppressed: bool
|
||||
reasons: list[SuppressionReason] = field(default_factory=list)
|
||||
data_quality_score: float = 0.0
|
||||
context: DataQualityContext | None = None
|
||||
|
||||
|
||||
def build_quality_context_from_summary(
|
||||
summary: TrendSummary,
|
||||
) -> DataQualityContext:
|
||||
"""Build a minimal DataQualityContext from a TrendSummary.
|
||||
|
||||
This is a fallback when full document-level quality metrics aren't
|
||||
available. It uses the trend summary's evidence counts and confidence
|
||||
as proxies.
|
||||
"""
|
||||
total = len(summary.top_supporting_evidence) + len(summary.top_opposing_evidence)
|
||||
return DataQualityContext(
|
||||
total_documents=total,
|
||||
valid_documents=total,
|
||||
failed_documents=0,
|
||||
avg_extraction_confidence=summary.confidence,
|
||||
newest_evidence_at=summary.generated_at,
|
||||
source_types=set(),
|
||||
)
|
||||
|
||||
|
||||
def _compute_data_quality_score(
|
||||
ctx: DataQualityContext,
|
||||
config: SuppressionConfig,
|
||||
reference_time: datetime,
|
||||
) -> float:
|
||||
"""Compute an overall data quality score from the context.
|
||||
|
||||
Returns a value in [0, 1] where higher is better quality.
|
||||
Components:
|
||||
- Extraction confidence (40% weight)
|
||||
- Evidence freshness (30% weight)
|
||||
- Document coverage (30% weight)
|
||||
"""
|
||||
# Extraction confidence component
|
||||
conf_component = min(ctx.avg_extraction_confidence / 0.8, 1.0)
|
||||
|
||||
# Freshness component
|
||||
if ctx.newest_evidence_at is not None:
|
||||
if ctx.newest_evidence_at.tzinfo is None:
|
||||
newest = ctx.newest_evidence_at.replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
newest = ctx.newest_evidence_at
|
||||
age_hours = (reference_time - newest).total_seconds() / 3600.0
|
||||
max_hours = config.max_evidence_staleness_hours
|
||||
freshness_component = max(0.0, 1.0 - (age_hours / max_hours))
|
||||
else:
|
||||
freshness_component = 0.0
|
||||
|
||||
# Document coverage component
|
||||
if ctx.total_documents > 0:
|
||||
valid_ratio = ctx.valid_documents / ctx.total_documents
|
||||
count_factor = min(ctx.valid_documents / 10.0, 1.0)
|
||||
coverage_component = valid_ratio * count_factor
|
||||
else:
|
||||
coverage_component = 0.0
|
||||
|
||||
score = (0.4 * conf_component) + (0.3 * freshness_component) + (0.3 * coverage_component)
|
||||
return round(max(0.0, min(1.0, score)), 4)
|
||||
|
||||
|
||||
def evaluate_suppression(
|
||||
summary: TrendSummary,
|
||||
quality_ctx: DataQualityContext | None = None,
|
||||
config: SuppressionConfig = DEFAULT_SUPPRESSION_CONFIG,
|
||||
reference_time: datetime | None = None,
|
||||
) -> SuppressionResult:
|
||||
"""Evaluate whether a recommendation should be suppressed due to data quality.
|
||||
|
||||
Checks multiple data quality dimensions and returns a SuppressionResult
|
||||
indicating whether the recommendation should be suppressed and why.
|
||||
|
||||
Args:
|
||||
summary: The trend summary to evaluate.
|
||||
quality_ctx: Data quality context. If None, a minimal context is
|
||||
built from the trend summary itself.
|
||||
config: Suppression thresholds.
|
||||
reference_time: Reference time for staleness checks.
|
||||
|
||||
Returns:
|
||||
SuppressionResult with suppression decision and reasons.
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
ctx = quality_ctx or build_quality_context_from_summary(summary)
|
||||
reasons: list[SuppressionReason] = []
|
||||
|
||||
# Check average extraction confidence
|
||||
if ctx.avg_extraction_confidence < config.min_avg_extraction_confidence:
|
||||
reasons.append(SuppressionReason.LOW_DATA_CONFIDENCE)
|
||||
|
||||
# Check evidence staleness
|
||||
if ctx.newest_evidence_at is not None:
|
||||
newest = ctx.newest_evidence_at
|
||||
if newest.tzinfo is None:
|
||||
newest = newest.replace(tzinfo=timezone.utc)
|
||||
age_hours = (reference_time - newest).total_seconds() / 3600.0
|
||||
if age_hours > config.max_evidence_staleness_hours:
|
||||
reasons.append(SuppressionReason.STALE_EVIDENCE)
|
||||
elif ctx.total_documents > 0:
|
||||
# Have documents but no timestamp — treat as stale
|
||||
reasons.append(SuppressionReason.STALE_EVIDENCE)
|
||||
|
||||
# Check source diversity
|
||||
if len(ctx.source_types) < config.min_source_types and ctx.total_documents > 0:
|
||||
reasons.append(SuppressionReason.LOW_SOURCE_DIVERSITY)
|
||||
|
||||
# Check extraction failure rate
|
||||
if ctx.total_documents > 0:
|
||||
failure_rate = ctx.failed_documents / ctx.total_documents
|
||||
if failure_rate > config.max_extraction_failure_rate:
|
||||
reasons.append(SuppressionReason.HIGH_EXTRACTION_FAILURE_RATE)
|
||||
|
||||
# Check minimum valid documents
|
||||
if ctx.valid_documents < config.min_valid_documents:
|
||||
reasons.append(SuppressionReason.INSUFFICIENT_VALID_DOCUMENTS)
|
||||
|
||||
# Compute overall data quality score
|
||||
quality_score = _compute_data_quality_score(ctx, config, reference_time)
|
||||
|
||||
# If quality score is below threshold, add a general suppression reason
|
||||
if quality_score < config.min_data_quality_score and SuppressionReason.LOW_DATA_CONFIDENCE not in reasons:
|
||||
reasons.append(SuppressionReason.LOW_DATA_CONFIDENCE)
|
||||
|
||||
suppressed = len(reasons) > 0
|
||||
|
||||
if suppressed:
|
||||
logger.info(
|
||||
"Recommendation suppressed for %s/%s: reasons=%s quality_score=%.3f",
|
||||
summary.entity_id, summary.window.value,
|
||||
[r.value for r in reasons], quality_score,
|
||||
)
|
||||
|
||||
return SuppressionResult(
|
||||
suppressed=suppressed,
|
||||
reasons=reasons,
|
||||
data_quality_score=quality_score,
|
||||
context=ctx,
|
||||
)
|
||||
Reference in New Issue
Block a user