feat: signal math upgrade — probabilistic, regime-aware scoring pipeline

Implement full probabilistic signal processing pipeline gated behind probabilistic_scoring_enabled feature flag in risk_configs: - Bayesian log-likelihood accumulator with Beta posterior and entropy - Regime detector (trend-following, panic, mean-reversion, uncertainty) - Source accuracy tracker with per-source historical prediction accuracy - Sigmoid confidence gate replacing binary gate - Information gain surprise weighting for rare events - Adaptive recency decay with event-specific half-lives - Regime multiplier replacing market context multiplier - Weighted disagreement entropy for contradiction detection - Multiplicative macro exposure with conditional integration - Graph-distance attenuated competitive signal propagation - Exponentially weighted momentum with volatility scaling - Expected value recommendation gate All changes backward-compatible: flag=false preserves exact current behavior. New outputs stored in existing JSONB columns (no schema changes except source_accuracy table via migration 034). Tests: 26 property-based tests (14 correctness properties), 99 unit tests, 1789 total tests passing with zero regressions.
2026-04-29 11:41:48 +00:00
parent 8c3c1aab43
commit 4e010bc048
24 changed files with 6058 additions and 60 deletions
@@ -0,0 +1,127 @@
+"""Bayesian accumulator for probabilistic sentiment aggregation.
+
+Accumulates weighted signals into a Bayesian posterior using
+log-likelihood accumulation, Beta distribution parameters, and
+Shannon entropy for mixed-signal detection.
+
+Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 9.1, 9.7
+"""
+from __future__ import annotations
+
+import math
+from dataclasses import dataclass
+
+from services.aggregation.scoring import WeightedSignal
+
+
+@dataclass(frozen=True)
+class BayesianPosterior:
+    """Bayesian posterior state from signal accumulation."""
+
+    p_bull: float  # σ(L_t), bullish probability [0, 1]
+    alpha: float  # Beta distribution α parameter (≥ 1.0)
+    beta: float  # Beta distribution β parameter (≥ 1.0)
+    log_likelihood: float  # Raw log-likelihood accumulation L_t
+    bayesian_confidence: float  # 1 - 4αβ/(α+β)², [0, 1]
+    entropy: float  # Shannon entropy H, [0, 1]
+    signal_count: int  # Number of signals processed
+
+
+# Uninformative prior (no evidence)
+PRIOR = BayesianPosterior(
+    p_bull=0.5,
+    alpha=1.0,
+    beta=1.0,
+    log_likelihood=0.0,
+    bayesian_confidence=0.0,
+    entropy=1.0,
+    signal_count=0,
+)
+
+
+def compute_entropy(p_bull: float) -> float:
+    """Shannon entropy H = -p·log₂(p) - (1-p)·log₂(1-p).
+
+    Returns value in [0, 1]. Maximum at p=0.5, zero at p=0 or p=1.
+    Handles edge cases p≤0 and p≥1 by returning 0.0.
+    """
+    if p_bull <= 0.0 or p_bull >= 1.0:
+        return 0.0
+    q = 1.0 - p_bull
+    return -(p_bull * math.log2(p_bull) + q * math.log2(q))
+
+
+def compute_bayesian_posterior(
+    signals: list[WeightedSignal],
+) -> BayesianPosterior:
+    """Accumulate weighted signals into a Bayesian posterior.
+
+    Computes:
+    - Log-likelihood: L_t = Σ(w_i · s_i)
+    - Bullish probability: P_bull = σ(L_t)
+    - Beta posterior: α = 1 + W_bull, β = 1 + W_bear
+    - Bayesian confidence: C = 1 - 4αβ/(α+β)²
+    - Shannon entropy: H = -p·log₂(p) - (1-p)·log₂(1-p)
+
+    Returns PRIOR for empty signal lists.
+    Skips signals with NaN weight or sentiment.
+    """
+    if not signals:
+        return PRIOR
+
+    log_likelihood = 0.0
+    w_bull = 0.0
+    w_bear = 0.0
+    count = 0
+
+    for sig in signals:
+        combined = sig.weight.combined
+        sentiment = sig.sentiment_value
+
+        # Skip signals with NaN weight or sentiment
+        if math.isnan(combined) or math.isnan(sentiment):
+            continue
+
+        log_likelihood += combined * sentiment
+
+        if sentiment > 0.0:
+            w_bull += combined
+        elif sentiment < 0.0:
+            w_bear += combined
+
+        count += 1
+
+    if count == 0:
+        return PRIOR
+
+    # P_bull via sigmoid: σ(L_t) = 1 / (1 + exp(-L_t))
+    # Guard against overflow in exp for very large |L_t|
+    if log_likelihood > 500.0:
+        p_bull = 1.0
+    elif log_likelihood < -500.0:
+        p_bull = 0.0
+    else:
+        p_bull = 1.0 / (1.0 + math.exp(-log_likelihood))
+
+    # Beta posterior parameters
+    alpha = 1.0 + w_bull
+    beta_param = 1.0 + w_bear
+
+    # Bayesian confidence: C = 1 - 4αβ/(α+β)²
+    ab_sum = alpha + beta_param
+    bayesian_confidence = 1.0 - (4.0 * alpha * beta_param) / (ab_sum * ab_sum)
+    # Clamp to [0, 1] to guard against floating-point rounding
+    bayesian_confidence = max(0.0, min(1.0, bayesian_confidence))
+
+    # Shannon entropy
+    entropy = compute_entropy(p_bull)
+
+    return BayesianPosterior(
+        p_bull=p_bull,
+        alpha=alpha,
+        beta=beta_param,
+        log_likelihood=log_likelihood,
+        bayesian_confidence=bayesian_confidence,
+        entropy=entropy,
+        signal_count=count,
+    )