Files
stonks-oracle/tests/test_suppression.py
Celes Renata c85c0068a2 fix: clean up utcnow deprecation warnings, fix 12 failing tests, add CI/CD pipeline manifests
- Replace all datetime.utcnow() with datetime.now(tz=timezone.utc) across 8 files
- Fix 12 failing tests to match current implementation behavior
- Fix pytest_plugins in non-top-level conftest (moved to root conftest.py)
- Auto-fix 189 lint issues (import sorting, unused imports)
- Add CI/CD pipeline infrastructure (ARC, ArgoCD, Kargo manifests)
- Add values-beta.yaml and values-paper.yaml for staged deployments
- Update GitHub Actions workflow to use self-hosted-gremlin runners
- Add integration-test job to CI pipeline

Result: 1596 passed, 0 failed, 0 warnings
2026-04-18 03:59:28 +00:00

227 lines
8.1 KiB
Python

"""Tests for recommendation suppression logic (data quality checks).
Requirements: 7.4
"""
from datetime import datetime, timedelta, timezone
from services.recommendation.suppression import (
DataQualityContext,
SuppressionConfig,
SuppressionReason,
build_quality_context_from_summary,
evaluate_suppression,
)
from services.shared.schemas import TrendDirection, TrendSummary, TrendWindow
NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
def _make_summary(**overrides) -> TrendSummary:
defaults = dict(
entity_type="company",
entity_id="AAPL",
window=TrendWindow.SEVEN_DAY,
trend_direction=TrendDirection.BULLISH,
trend_strength=0.5,
confidence=0.65,
top_supporting_evidence=["doc1", "doc2", "doc3"],
top_opposing_evidence=[],
dominant_catalysts=["earnings"],
material_risks=["regulatory scrutiny"],
contradiction_score=0.1,
generated_at=NOW,
)
defaults.update(overrides)
return TrendSummary(**defaults)
def _make_quality_ctx(**overrides) -> DataQualityContext:
defaults = dict(
total_documents=5,
valid_documents=4,
failed_documents=1,
avg_extraction_confidence=0.7,
newest_evidence_at=NOW - timedelta(hours=6),
source_types={"news_api", "filings_api"},
)
defaults.update(overrides)
return DataQualityContext(**defaults)
# ---------------------------------------------------------------------------
# No suppression for good quality data
# ---------------------------------------------------------------------------
def test_no_suppression_good_quality():
summary = _make_summary()
ctx = _make_quality_ctx()
result = evaluate_suppression(summary, ctx, reference_time=NOW)
assert result.suppressed is False
assert result.reasons == []
assert result.data_quality_score > 0.3
# ---------------------------------------------------------------------------
# Suppression triggers
# ---------------------------------------------------------------------------
def test_suppressed_low_extraction_confidence():
summary = _make_summary()
ctx = _make_quality_ctx(avg_extraction_confidence=0.2)
result = evaluate_suppression(summary, ctx, reference_time=NOW)
assert result.suppressed is True
assert SuppressionReason.LOW_DATA_CONFIDENCE in result.reasons
def test_suppressed_stale_evidence():
summary = _make_summary()
ctx = _make_quality_ctx(newest_evidence_at=NOW - timedelta(days=10))
result = evaluate_suppression(summary, ctx, reference_time=NOW)
assert result.suppressed is True
assert SuppressionReason.STALE_EVIDENCE in result.reasons
def test_suppressed_high_failure_rate():
summary = _make_summary()
ctx = _make_quality_ctx(total_documents=10, failed_documents=6, valid_documents=4)
result = evaluate_suppression(summary, ctx, reference_time=NOW)
assert result.suppressed is True
assert SuppressionReason.HIGH_EXTRACTION_FAILURE_RATE in result.reasons
def test_suppressed_insufficient_valid_documents():
summary = _make_summary(
top_supporting_evidence=["doc1"],
top_opposing_evidence=[],
)
ctx = _make_quality_ctx(total_documents=1, valid_documents=1, failed_documents=0)
result = evaluate_suppression(summary, ctx, reference_time=NOW)
assert result.suppressed is True
assert SuppressionReason.INSUFFICIENT_VALID_DOCUMENTS in result.reasons
def test_suppressed_low_source_diversity():
"""When min_source_types > available source types, suppression fires."""
summary = _make_summary()
ctx = _make_quality_ctx(source_types=set())
config = SuppressionConfig(min_source_types=2)
result = evaluate_suppression(summary, ctx, config=config, reference_time=NOW)
assert result.suppressed is True
assert SuppressionReason.LOW_SOURCE_DIVERSITY in result.reasons
# ---------------------------------------------------------------------------
# Fallback to summary-based context
# ---------------------------------------------------------------------------
def test_fallback_context_from_summary():
summary = _make_summary(confidence=0.7)
ctx = build_quality_context_from_summary(summary)
assert ctx.total_documents == 3 # 3 supporting + 0 opposing
assert ctx.valid_documents == 3
assert ctx.avg_extraction_confidence == 0.7
def test_no_suppression_with_summary_fallback():
"""When no quality context is provided, summary-based fallback is used."""
summary = _make_summary(confidence=0.7)
# Default config has min_source_types=1. The fallback now returns
# source_types={"unknown"} so LOW_SOURCE_DIVERSITY does not fire.
result = evaluate_suppression(summary, config=SuppressionConfig(), reference_time=NOW)
assert result.suppressed is False
# ---------------------------------------------------------------------------
# Data quality score
# ---------------------------------------------------------------------------
def test_quality_score_high_for_good_data():
summary = _make_summary()
ctx = _make_quality_ctx(
avg_extraction_confidence=0.85,
newest_evidence_at=NOW - timedelta(hours=1),
total_documents=10,
valid_documents=10,
failed_documents=0,
)
result = evaluate_suppression(summary, ctx, reference_time=NOW)
assert result.data_quality_score > 0.7
def test_quality_score_low_for_bad_data():
summary = _make_summary()
ctx = _make_quality_ctx(
avg_extraction_confidence=0.1,
newest_evidence_at=NOW - timedelta(days=14),
total_documents=3,
valid_documents=1,
failed_documents=2,
)
result = evaluate_suppression(summary, ctx, reference_time=NOW)
assert result.data_quality_score < 0.3
# ---------------------------------------------------------------------------
# Custom config
# ---------------------------------------------------------------------------
def test_custom_config_stricter_thresholds():
summary = _make_summary()
ctx = _make_quality_ctx(avg_extraction_confidence=0.5)
strict = SuppressionConfig(min_avg_extraction_confidence=0.6)
result = evaluate_suppression(summary, ctx, config=strict, reference_time=NOW)
assert result.suppressed is True
assert SuppressionReason.LOW_DATA_CONFIDENCE in result.reasons
def test_custom_config_relaxed_thresholds():
summary = _make_summary()
ctx = _make_quality_ctx(avg_extraction_confidence=0.3)
relaxed = SuppressionConfig(min_avg_extraction_confidence=0.2)
result = evaluate_suppression(summary, ctx, config=relaxed, reference_time=NOW)
assert SuppressionReason.LOW_DATA_CONFIDENCE not in result.reasons
# ---------------------------------------------------------------------------
# Macro-only suppression (Requirements: 10.3)
# ---------------------------------------------------------------------------
from services.recommendation.suppression import (
MACRO_ONLY_CAVEAT,
evaluate_macro_only_suppression,
)
class TestMacroOnlySuppression:
def test_suppressed_when_only_macro_signals(self):
summary = _make_summary()
result = evaluate_macro_only_suppression(summary, macro_signal_count=3, company_signal_count=0)
assert result is True
def test_not_suppressed_when_company_signals_present(self):
summary = _make_summary()
result = evaluate_macro_only_suppression(summary, macro_signal_count=3, company_signal_count=2)
assert result is False
def test_not_suppressed_when_no_macro_signals(self):
summary = _make_summary()
result = evaluate_macro_only_suppression(summary, macro_signal_count=0, company_signal_count=5)
assert result is False
def test_not_suppressed_when_no_signals_at_all(self):
summary = _make_summary()
result = evaluate_macro_only_suppression(summary, macro_signal_count=0, company_signal_count=0)
assert result is False
def test_macro_only_caveat_is_string(self):
assert isinstance(MACRO_ONLY_CAVEAT, str)
assert "macro" in MACRO_ONLY_CAVEAT.lower()
def test_suppression_reason_enum_has_macro_only(self):
assert SuppressionReason.MACRO_ONLY_SIGNAL.value == "macro_only_signal"