"""Tests for recommendation suppression logic (data quality checks). Requirements: 7.4 """ from datetime import datetime, timedelta, timezone from services.recommendation.suppression import ( DataQualityContext, SuppressionConfig, SuppressionReason, build_quality_context_from_summary, evaluate_suppression, ) from services.shared.schemas import TrendDirection, TrendSummary, TrendWindow NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc) def _make_summary(**overrides) -> TrendSummary: defaults = dict( entity_type="company", entity_id="AAPL", window=TrendWindow.SEVEN_DAY, trend_direction=TrendDirection.BULLISH, trend_strength=0.5, confidence=0.65, top_supporting_evidence=["doc1", "doc2", "doc3"], top_opposing_evidence=[], dominant_catalysts=["earnings"], material_risks=["regulatory scrutiny"], contradiction_score=0.1, generated_at=NOW, ) defaults.update(overrides) return TrendSummary(**defaults) def _make_quality_ctx(**overrides) -> DataQualityContext: defaults = dict( total_documents=5, valid_documents=4, failed_documents=1, avg_extraction_confidence=0.7, newest_evidence_at=NOW - timedelta(hours=6), source_types={"news_api", "filings_api"}, ) defaults.update(overrides) return DataQualityContext(**defaults) # --------------------------------------------------------------------------- # No suppression for good quality data # --------------------------------------------------------------------------- def test_no_suppression_good_quality(): summary = _make_summary() ctx = _make_quality_ctx() result = evaluate_suppression(summary, ctx, reference_time=NOW) assert result.suppressed is False assert result.reasons == [] assert result.data_quality_score > 0.3 # --------------------------------------------------------------------------- # Suppression triggers # --------------------------------------------------------------------------- def test_suppressed_low_extraction_confidence(): summary = _make_summary() ctx = _make_quality_ctx(avg_extraction_confidence=0.2) result = evaluate_suppression(summary, ctx, reference_time=NOW) assert result.suppressed is True assert SuppressionReason.LOW_DATA_CONFIDENCE in result.reasons def test_suppressed_stale_evidence(): summary = _make_summary() ctx = _make_quality_ctx(newest_evidence_at=NOW - timedelta(days=10)) result = evaluate_suppression(summary, ctx, reference_time=NOW) assert result.suppressed is True assert SuppressionReason.STALE_EVIDENCE in result.reasons def test_suppressed_high_failure_rate(): summary = _make_summary() ctx = _make_quality_ctx(total_documents=10, failed_documents=6, valid_documents=4) result = evaluate_suppression(summary, ctx, reference_time=NOW) assert result.suppressed is True assert SuppressionReason.HIGH_EXTRACTION_FAILURE_RATE in result.reasons def test_suppressed_insufficient_valid_documents(): summary = _make_summary( top_supporting_evidence=["doc1"], top_opposing_evidence=[], ) ctx = _make_quality_ctx(total_documents=1, valid_documents=1, failed_documents=0) result = evaluate_suppression(summary, ctx, reference_time=NOW) assert result.suppressed is True assert SuppressionReason.INSUFFICIENT_VALID_DOCUMENTS in result.reasons def test_suppressed_low_source_diversity(): """When min_source_types > available source types, suppression fires.""" summary = _make_summary() ctx = _make_quality_ctx(source_types=set()) config = SuppressionConfig(min_source_types=2) result = evaluate_suppression(summary, ctx, config=config, reference_time=NOW) assert result.suppressed is True assert SuppressionReason.LOW_SOURCE_DIVERSITY in result.reasons # --------------------------------------------------------------------------- # Fallback to summary-based context # --------------------------------------------------------------------------- def test_fallback_context_from_summary(): summary = _make_summary(confidence=0.7) ctx = build_quality_context_from_summary(summary) assert ctx.total_documents == 3 # 3 supporting + 0 opposing assert ctx.valid_documents == 3 assert ctx.avg_extraction_confidence == 0.7 def test_no_suppression_with_summary_fallback(): """When no quality context is provided, summary-based fallback is used.""" summary = _make_summary(confidence=0.7) # Default config has min_source_types=1, but fallback has empty source_types. # With min_source_types=1 and empty source_types, LOW_SOURCE_DIVERSITY fires # only when total_documents > 0. But default min_source_types is 1 and # len(set()) = 0 < 1, so it would fire. Let's use a config that relaxes this. config = SuppressionConfig(min_source_types=0) result = evaluate_suppression(summary, config=config, reference_time=NOW) assert result.suppressed is False # --------------------------------------------------------------------------- # Data quality score # --------------------------------------------------------------------------- def test_quality_score_high_for_good_data(): summary = _make_summary() ctx = _make_quality_ctx( avg_extraction_confidence=0.85, newest_evidence_at=NOW - timedelta(hours=1), total_documents=10, valid_documents=10, failed_documents=0, ) result = evaluate_suppression(summary, ctx, reference_time=NOW) assert result.data_quality_score > 0.7 def test_quality_score_low_for_bad_data(): summary = _make_summary() ctx = _make_quality_ctx( avg_extraction_confidence=0.1, newest_evidence_at=NOW - timedelta(days=14), total_documents=3, valid_documents=1, failed_documents=2, ) result = evaluate_suppression(summary, ctx, reference_time=NOW) assert result.data_quality_score < 0.3 # --------------------------------------------------------------------------- # Custom config # --------------------------------------------------------------------------- def test_custom_config_stricter_thresholds(): summary = _make_summary() ctx = _make_quality_ctx(avg_extraction_confidence=0.5) strict = SuppressionConfig(min_avg_extraction_confidence=0.6) result = evaluate_suppression(summary, ctx, config=strict, reference_time=NOW) assert result.suppressed is True assert SuppressionReason.LOW_DATA_CONFIDENCE in result.reasons def test_custom_config_relaxed_thresholds(): summary = _make_summary() ctx = _make_quality_ctx(avg_extraction_confidence=0.3) relaxed = SuppressionConfig(min_avg_extraction_confidence=0.2) result = evaluate_suppression(summary, ctx, config=relaxed, reference_time=NOW) assert SuppressionReason.LOW_DATA_CONFIDENCE not in result.reasons