feat: signal math upgrade — probabilistic, regime-aware scoring pipeline

Implement full probabilistic signal processing pipeline gated behind probabilistic_scoring_enabled feature flag in risk_configs: - Bayesian log-likelihood accumulator with Beta posterior and entropy - Regime detector (trend-following, panic, mean-reversion, uncertainty) - Source accuracy tracker with per-source historical prediction accuracy - Sigmoid confidence gate replacing binary gate - Information gain surprise weighting for rare events - Adaptive recency decay with event-specific half-lives - Regime multiplier replacing market context multiplier - Weighted disagreement entropy for contradiction detection - Multiplicative macro exposure with conditional integration - Graph-distance attenuated competitive signal propagation - Exponentially weighted momentum with volatility scaling - Expected value recommendation gate All changes backward-compatible: flag=false preserves exact current behavior. New outputs stored in existing JSONB columns (no schema changes except source_accuracy table via migration 034). Tests: 26 property-based tests (14 correctness properties), 99 unit tests, 1789 total tests passing with zero regressions.
2026-04-29 11:41:48 +00:00
parent 8c3c1aab43
commit 4e010bc048
24 changed files with 6058 additions and 60 deletions
@@ -0,0 +1,535 @@
+"""Unit tests for signal scoring upgrades and pipeline-wide behaviors.
+
+Tests information gain, adaptive decay, macro exposure, macro integration,
+graph distance, momentum, EV gate, and feature flag behaviors.
+
+Requirements: 3.1, 3.4, 5.5, 5.6, 10.3, 10.4, 11.3, 13.3, 14.3, 14.4, 16.4, 16.5
+"""
+from __future__ import annotations
+
+import math
+from datetime import datetime, timezone
+
+import pytest
+
+from services.aggregation.interpolation import (
+    _compute_multiplicative_exposure,
+    integrate_macro_signals,
+)
+from services.aggregation.projection import (
+    compute_ew_momentum,
+    compute_trend_momentum,
+)
+from services.aggregation.scoring import (
+    DEFAULT_BASE_RATE,
+    ScoringConfig,
+    SignalWeight,
+    WeightedSignal,
+    compute_adaptive_half_life,
+    compute_info_gain,
+    compute_regime_multiplier,
+    compute_signal_weight,
+)
+from services.aggregation.signal_propagation import (
+    compute_graph_distance_attenuation,
+)
+from services.recommendation.eligibility import (
+    compute_expected_value,
+    evaluate_eligibility,
+)
+from services.shared.schemas import (
+    RecommendationMode,
+    TrendDirection,
+    TrendSummary,
+    TrendWindow,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_trend_summary(**overrides) -> TrendSummary:
+    """Create a minimal TrendSummary for testing."""
+    defaults = {
+        "entity_id": "test-company",
+        "ticker": "TEST",
+        "window": TrendWindow.SEVEN_DAY,
+        "trend_direction": TrendDirection.BULLISH,
+        "trend_strength": 0.5,
+        "confidence": 0.6,
+        "contradiction_score": 0.1,
+        "signal_count": 5,
+        "unique_source_count": 3,
+        "weighted_sentiment_avg": 0.4,
+        "top_supporting_evidence": ["doc-1", "doc-2"],
+        "top_opposing_evidence": ["doc-3"],
+        "material_risks": [],
+    }
+    defaults.update(overrides)
+    return TrendSummary(**defaults)
+
+
+def _make_signal(
+    sentiment: float,
+    combined_weight: float = 1.0,
+    impact: float = 1.0,
+) -> WeightedSignal:
+    """Create a minimal WeightedSignal for testing."""
+    weight = SignalWeight(
+        recency=1.0,
+        credibility=1.0,
+        novelty_bonus=0.0,
+        confidence_gate=1.0,
+        market_ctx_multiplier=1.0,
+        combined=combined_weight,
+    )
+    return WeightedSignal(
+        document_id="test-doc",
+        weight=weight,
+        sentiment_value=sentiment,
+        impact_score=impact,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Information gain clamp (Req 3.4)
+# ---------------------------------------------------------------------------
+
+
+class TestInfoGainClamp:
+    """Test info gain clamp: very rare event → factor ≤ 3.0."""
+
+    def test_very_rare_event_clamped(self):
+        """An event with extremely low base rate is clamped to 3.0."""
+        # base_rate = 0.001 → -log₂(0.001) ≈ 9.97 → r = 1 + 0.3*9.97 ≈ 3.99
+        # Should be clamped to 3.0
+        result = compute_info_gain("unknown_type", lambda_param=0.3, max_gain=3.0, default_base_rate=0.001)
+        assert result <= 3.0
+
+    def test_m_and_a_high_gain(self):
+        """M&A (base_rate=0.03) produces high but clamped gain."""
+        result = compute_info_gain("m_and_a")
+        assert result > 1.0
+        assert result <= 3.0
+
+    def test_earnings_low_gain(self):
+        """Earnings (base_rate=0.25) produces modest gain."""
+        result = compute_info_gain("earnings")
+        assert result >= 1.0
+        assert result < 2.0
+
+    def test_none_event_type_returns_one(self):
+        """None event type returns neutral factor 1.0."""
+        assert compute_info_gain(None) == 1.0
+
+
+# ---------------------------------------------------------------------------
+# Default base rate (Req 3.2)
+# ---------------------------------------------------------------------------
+
+
+class TestDefaultBaseRate:
+    """Test default base rate: unknown event type → 0.1."""
+
+    def test_unknown_event_uses_default(self):
+        """Unknown event type uses DEFAULT_BASE_RATE = 0.1."""
+        result = compute_info_gain("completely_unknown_event")
+        expected = 1.0 + 0.3 * (-math.log2(DEFAULT_BASE_RATE))
+        assert result == pytest.approx(min(expected, 3.0), abs=0.01)
+
+    def test_default_base_rate_value(self):
+        """DEFAULT_BASE_RATE is 0.1."""
+        assert DEFAULT_BASE_RATE == 0.1
+
+
+# ---------------------------------------------------------------------------
+# Adaptive decay edge cases (Req 5.5, 5.6)
+# ---------------------------------------------------------------------------
+
+
+class TestAdaptiveDecayEdgeCases:
+    """Test adaptive decay: all zeros → τ_base, all max → 6×τ_base."""
+
+    def test_all_zeros_gives_base(self):
+        """All β factors zero → τ_i = τ_base (Req 5.6)."""
+        config = ScoringConfig(probabilistic=True)
+        result = compute_adaptive_half_life(
+            base_half_life=72.0,
+            impact_score=0.0,
+            info_gain_factor=1.0,  # r=1 → β_surprise=0
+            market_multiplier=1.0,  # M=1 → β_market=0
+            config=config,
+        )
+        assert result == pytest.approx(72.0)
+
+    def test_all_max_gives_six_times_base(self):
+        """All β factors at max → τ_i ≈ 6×τ_base (Req 5.5).
+
+        β_impact = 1.0 * 1.0 = 1.0
+        β_surprise = ((3.0 - 1.0) / 2.0) * 1.0 = 1.0
+        β_market = ((1.45 - 1.0) / 0.45) * 0.5 = 0.5
+        τ = 72 * (1+1) * (1+1) * (1+0.5) = 72 * 2 * 2 * 1.5 = 432 = 6 * 72
+        """
+        config = ScoringConfig(
+            probabilistic=True,
+            adaptive_decay_impact_scale=1.0,
+            adaptive_decay_surprise_scale=1.0,
+            adaptive_decay_market_scale=0.5,
+        )
+        result = compute_adaptive_half_life(
+            base_half_life=72.0,
+            impact_score=1.0,
+            info_gain_factor=3.0,
+            market_multiplier=1.45,
+            config=config,
+        )
+        assert result == pytest.approx(72.0 * 6.0, rel=0.01)
+
+    def test_adaptive_never_below_base(self):
+        """Adaptive half-life is always >= base (Property 5)."""
+        config = ScoringConfig(probabilistic=True)
+        result = compute_adaptive_half_life(
+            base_half_life=72.0,
+            impact_score=0.5,
+            info_gain_factor=2.0,
+            market_multiplier=1.2,
+            config=config,
+        )
+        assert result >= 72.0
+
+
+# ---------------------------------------------------------------------------
+# Zero overlap → zero macro impact (Req 10.3)
+# ---------------------------------------------------------------------------
+
+
+class TestZeroOverlapMacro:
+    """Test zero overlap → zero macro impact."""
+
+    def test_all_zero_overlaps(self):
+        """All overlaps zero → exposure = 0.0."""
+        exposure = _compute_multiplicative_exposure(0.0, 0.0, 0.0, 0.0)
+        assert exposure == 0.0
+
+
+# ---------------------------------------------------------------------------
+# Max overlap → ≈severity×0.724 (Req 10.4)
+# ---------------------------------------------------------------------------
+
+
+class TestMaxOverlapMacro:
+    """Test max overlap → ≈severity×0.724."""
+
+    def test_all_max_overlaps(self):
+        """All overlaps 1.0 → exposure ≈ 0.689.
+
+        1 - (1-0.35)*(1-0.25)*(1-0.25)*(1-0.15) = 1 - 0.65*0.75*0.75*0.85 ≈ 0.689
+        """
+        exposure = _compute_multiplicative_exposure(1.0, 1.0, 1.0, 1.0)
+        expected = 1.0 - (0.65 * 0.75 * 0.75 * 0.85)
+        assert exposure == pytest.approx(expected, abs=0.001)
+        assert exposure > 0.5  # significantly above zero
+
+
+# ---------------------------------------------------------------------------
+# Macro fallback behaviors (Req 11.3, 11.4)
+# ---------------------------------------------------------------------------
+
+
+class TestMacroFallbackBehaviors:
+    """Test macro fallback: only macro → additive, only company → no modifier."""
+
+    def test_only_macro_additive_fallback(self):
+        """Only macro signals → additive merge (Req 11.3)."""
+        macro_signals = [_make_signal(sentiment=-1.0)]
+        merged, modifier = integrate_macro_signals(
+            company_signals=[],
+            macro_signals=macro_signals,
+            company_direction="neutral",
+            macro_impacts=[],
+            probabilistic=True,
+        )
+        # Macro-only: returns macro signals, modifier = 1.0
+        assert len(merged) == 1
+        assert modifier == 1.0
+
+    def test_only_company_no_modifier(self):
+        """Only company signals → modifier = 1.0 (Req 11.4)."""
+        company_signals = [_make_signal(sentiment=1.0)]
+        merged, modifier = integrate_macro_signals(
+            company_signals=company_signals,
+            macro_signals=[],
+            company_direction="bullish",
+            macro_impacts=[],
+            probabilistic=True,
+        )
+        assert len(merged) == 1
+        assert modifier == 1.0
+
+    def test_heuristic_mode_additive_merge(self):
+        """Heuristic mode: simple concatenation of all signals."""
+        company = [_make_signal(sentiment=1.0)]
+        macro = [_make_signal(sentiment=-1.0)]
+        merged, modifier = integrate_macro_signals(
+            company_signals=company,
+            macro_signals=macro,
+            company_direction="bullish",
+            macro_impacts=[],
+            probabilistic=False,
+        )
+        assert len(merged) == 2
+        assert modifier == 1.0
+
+
+# ---------------------------------------------------------------------------
+# Graph distance cutoff (Req 12.3)
+# ---------------------------------------------------------------------------
+
+
+class TestGraphDistanceCutoff:
+    """Test graph distance cutoff: d>3 → no propagation."""
+
+    def test_distance_4_no_propagation(self):
+        """Distance 4 → transfer strength = 0.0."""
+        result = compute_graph_distance_attenuation(
+            source_strength=1.0, correlation=1.0, distance=4,
+        )
+        assert result == 0.0
+
+    def test_distance_3_propagates(self):
+        """Distance 3 → still propagates (e^(-3) ≈ 0.05)."""
+        result = compute_graph_distance_attenuation(
+            source_strength=1.0, correlation=1.0, distance=3,
+        )
+        assert result > 0.0
+        assert result == pytest.approx(math.exp(-3), abs=0.001)
+
+    def test_distance_1_strongest(self):
+        """Distance 1 → strongest propagation."""
+        d1 = compute_graph_distance_attenuation(1.0, 1.0, 1)
+        d2 = compute_graph_distance_attenuation(1.0, 1.0, 2)
+        d3 = compute_graph_distance_attenuation(1.0, 1.0, 3)
+        assert d1 > d2 > d3 > 0.0
+
+    def test_distance_0_no_propagation(self):
+        """Distance 0 → no propagation (self-loop)."""
+        result = compute_graph_distance_attenuation(1.0, 1.0, 0)
+        assert result == 0.0
+
+
+# ---------------------------------------------------------------------------
+# Momentum fallback (Req 13.3)
+# ---------------------------------------------------------------------------
+
+
+class TestMomentumFallback:
+    """Test momentum fallback: <2 cycles → heuristic."""
+
+    def test_empty_changes_returns_zero(self):
+        """Empty list → 0.0 (fallback)."""
+        assert compute_ew_momentum([]) == 0.0
+
+    def test_single_change_returns_zero(self):
+        """Single change → 0.0 (fewer than 2 cycles)."""
+        assert compute_ew_momentum([0.5]) == 0.0
+
+    def test_two_changes_computes(self):
+        """Two changes → computes EW momentum."""
+        result = compute_ew_momentum([0.3, 0.2])
+        assert result != 0.0
+
+    def test_heuristic_fallback_for_trend_momentum(self):
+        """compute_trend_momentum with no previous data uses heuristic."""
+        result = compute_trend_momentum(
+            current_strength=0.6,
+            current_direction="bullish",
+            previous_strength=None,
+            previous_direction=None,
+        )
+        # Heuristic: dir_sign * strength * 0.5 = 1.0 * 0.6 * 0.5 = 0.3
+        assert result == pytest.approx(0.3, abs=0.01)
+
+
+# ---------------------------------------------------------------------------
+# EV threshold behavior (Req 14.3, 14.4)
+# ---------------------------------------------------------------------------
+
+
+class TestEVThresholdBehavior:
+    """Test EV threshold: EV>0.005→proceed, EV≤0.005→informational."""
+
+    def test_positive_ev_proceeds(self):
+        """EV > 0.005 → recommendation proceeds normally."""
+        summary = _make_trend_summary(
+            trend_direction=TrendDirection.BULLISH,
+            trend_strength=0.5,
+            confidence=0.7,
+        )
+        result = evaluate_eligibility(
+            summary,
+            probabilistic=True,
+            p_bull=0.8,
+            sigma_20=0.02,
+        )
+        # With p_bull=0.8, strength=0.5, sigma_20=0.02, horizon=7d:
+        # R_up = 0.5 * 0.02 * sqrt(7) ≈ 0.0265
+        # R_down = 0.5 * 0.02 * sqrt(7) ≈ 0.0265
+        # EV = 0.8 * 0.0265 - 0.2 * 0.0265 ≈ 0.0159
+        assert result.ev_value is not None
+        assert result.ev_value > 0.005
+        assert result.pipeline_mode == "probabilistic"
+
+    def test_low_ev_forces_informational(self):
+        """EV ≤ 0.005 → forced to informational mode (Req 14.4)."""
+        summary = _make_trend_summary(
+            trend_direction=TrendDirection.BULLISH,
+            trend_strength=0.5,
+            confidence=0.7,
+        )
+        # p_bull near 0.5 → EV near 0
+        result = evaluate_eligibility(
+            summary,
+            probabilistic=True,
+            p_bull=0.5,
+            sigma_20=0.001,  # very low vol → tiny EV
+        )
+        assert result.ev_value is not None
+        assert result.ev_value <= 0.005
+        assert result.mode == RecommendationMode.INFORMATIONAL
+
+    def test_ev_computation_values(self):
+        """Verify EV computation formula directly."""
+        ev = compute_expected_value(
+            p_bull=0.7,
+            strength=0.5,
+            sigma_20=0.02,
+            horizon_days=7.0,
+        )
+        # R_up = 0.5 * 0.02 * sqrt(7) ≈ 0.02646
+        # R_down = 0.5 * 0.02 * sqrt(7) ≈ 0.02646
+        # EV = 0.7 * 0.02646 - 0.3 * 0.02646 ≈ 0.01058
+        assert ev > 0.005
+        assert ev == pytest.approx(0.7 * 0.5 * 0.02 * math.sqrt(7) - 0.3 * 0.5 * 0.02 * math.sqrt(7), abs=0.001)
+
+
+# ---------------------------------------------------------------------------
+# Feature flag behaviors (Req 16.4, 16.5)
+# ---------------------------------------------------------------------------
+
+
+class TestFeatureFlagBehaviors:
+    """Test flag=false→heuristic, flag=true→probabilistic."""
+
+    def test_heuristic_mode_binary_gate(self):
+        """flag=false → uses binary confidence gate."""
+        config = ScoringConfig(probabilistic=False)
+        now = datetime.now(timezone.utc)
+
+        # Below confidence floor → gate = 0
+        result = compute_signal_weight(
+            published_at=now,
+            reference_time=now,
+            window="7d",
+            source_credibility=0.8,
+            extraction_confidence=0.1,  # below floor of 0.2
+            config=config,
+        )
+        assert result.confidence_gate == 0.0
+        assert result.combined == 0.0
+        assert result.sigmoid_gate is None
+
+    def test_probabilistic_mode_sigmoid_gate(self):
+        """flag=true → uses sigmoid confidence gate."""
+        config = ScoringConfig(probabilistic=True)
+        now = datetime.now(timezone.utc)
+
+        result = compute_signal_weight(
+            published_at=now,
+            reference_time=now,
+            window="7d",
+            source_credibility=0.8,
+            extraction_confidence=0.5,
+            config=config,
+        )
+        assert result.sigmoid_gate is not None
+        assert result.sigmoid_gate == pytest.approx(0.5, abs=0.01)
+        assert result.combined > 0.0
+
+    def test_heuristic_mode_no_info_gain(self):
+        """flag=false → info_gain_factor stays at default 1.0."""
+        config = ScoringConfig(probabilistic=False)
+        now = datetime.now(timezone.utc)
+
+        result = compute_signal_weight(
+            published_at=now,
+            reference_time=now,
+            window="7d",
+            source_credibility=0.8,
+            extraction_confidence=0.8,
+            event_type="m_and_a",
+            config=config,
+        )
+        assert result.info_gain_factor == 1.0
+
+    def test_probabilistic_mode_has_info_gain(self):
+        """flag=true → info_gain_factor computed from event type."""
+        config = ScoringConfig(probabilistic=True)
+        now = datetime.now(timezone.utc)
+
+        result = compute_signal_weight(
+            published_at=now,
+            reference_time=now,
+            window="7d",
+            source_credibility=0.8,
+            extraction_confidence=0.8,
+            event_type="m_and_a",
+            config=config,
+        )
+        assert result.info_gain_factor > 1.0
+
+    def test_heuristic_eligibility_skips_ev(self):
+        """flag=false → EV gate is skipped entirely."""
+        summary = _make_trend_summary()
+        result = evaluate_eligibility(summary, probabilistic=False)
+        assert result.ev_value is None
+        assert result.pipeline_mode == "heuristic"
+
+    def test_probabilistic_eligibility_computes_ev(self):
+        """flag=true → EV is computed."""
+        summary = _make_trend_summary()
+        result = evaluate_eligibility(
+            summary, probabilistic=True, p_bull=0.7, sigma_20=0.02,
+        )
+        assert result.ev_value is not None
+        assert result.pipeline_mode == "probabilistic"
+
+
+# ---------------------------------------------------------------------------
+# Regime multiplier edge cases
+# ---------------------------------------------------------------------------
+
+
+class TestRegimeMultiplierEdgeCases:
+    """Test regime multiplier with edge case inputs."""
+
+    def test_no_returns_gives_one(self):
+        """No returns → M_regime = 1.0."""
+        assert compute_regime_multiplier(None, None) == 1.0
+
+    def test_single_return_gives_one(self):
+        """Single return → M_regime = 1.0 (need at least 2)."""
+        assert compute_regime_multiplier([0.01], None) == 1.0
+
+    def test_constant_returns_gives_one(self):
+        """Constant returns (σ=0) → z_r=0 → M_regime = 1.0."""
+        returns = [0.01] * 20
+        result = compute_regime_multiplier(returns, None)
+        assert result == pytest.approx(1.0)
+
+    def test_clamped_to_max(self):
+        """Extreme z-scores → clamped to 2.5."""
+        # Create returns with extreme outlier
+        returns = [0.001] * 19 + [10.0]
+        result = compute_regime_multiplier(returns, None)
+        assert result <= 2.5