stonks-oracle/services/aggregation/bayesian.py

"""Bayesian accumulator for probabilistic sentiment aggregation.

Accumulates weighted signals into a Bayesian posterior using
log-likelihood accumulation, Beta distribution parameters, and
Shannon entropy for mixed-signal detection.

Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 9.1, 9.7
"""
from __future__ import annotations

import math
from dataclasses import dataclass

from services.aggregation.scoring import WeightedSignal


@dataclass(frozen=True)
class BayesianPosterior:
    """Bayesian posterior state from signal accumulation."""

    p_bull: float  # σ(L_t), bullish probability [0, 1]
    alpha: float  # Beta distribution α parameter (≥ 1.0)
    beta: float  # Beta distribution β parameter (≥ 1.0)
    log_likelihood: float  # Raw log-likelihood accumulation L_t
    bayesian_confidence: float  # 1 - 4αβ/(α+β)², [0, 1]
    entropy: float  # Shannon entropy H, [0, 1]
    signal_count: int  # Number of signals processed


# Uninformative prior (no evidence)
PRIOR = BayesianPosterior(
    p_bull=0.5,
    alpha=1.0,
    beta=1.0,
    log_likelihood=0.0,
    bayesian_confidence=0.0,
    entropy=1.0,
    signal_count=0,
)


def compute_entropy(p_bull: float) -> float:
    """Shannon entropy H = -p·log₂(p) - (1-p)·log₂(1-p).

    Returns value in [0, 1]. Maximum at p=0.5, zero at p=0 or p=1.
    Handles edge cases p≤0 and p≥1 by returning 0.0.
    """
    if p_bull <= 0.0 or p_bull >= 1.0:
        return 0.0
    q = 1.0 - p_bull
    return -(p_bull * math.log2(p_bull) + q * math.log2(q))


def compute_bayesian_posterior(
    signals: list[WeightedSignal],
) -> BayesianPosterior:
    """Accumulate weighted signals into a Bayesian posterior.

    Computes:
    - Log-likelihood: L_t = Σ(w_i · s_i)
    - Bullish probability: P_bull = σ(L_t)
    - Beta posterior: α = 1 + W_bull, β = 1 + W_bear
    - Bayesian confidence: C = 1 - 4αβ/(α+β)²
    - Shannon entropy: H = -p·log₂(p) - (1-p)·log₂(1-p)

    Returns PRIOR for empty signal lists.
    Skips signals with NaN weight or sentiment.
    """
    if not signals:
        return PRIOR

    log_likelihood = 0.0
    w_bull = 0.0
    w_bear = 0.0
    count = 0

    for sig in signals:
        combined = sig.weight.combined
        sentiment = sig.sentiment_value

        # Skip signals with NaN weight or sentiment
        if math.isnan(combined) or math.isnan(sentiment):
            continue

        log_likelihood += combined * sentiment

        if sentiment > 0.0:
            w_bull += combined
        elif sentiment < 0.0:
            w_bear += combined

        count += 1

    if count == 0:
        return PRIOR

    # P_bull via sigmoid: σ(L_t) = 1 / (1 + exp(-L_t))
    # Guard against overflow in exp for very large |L_t|
    if log_likelihood > 500.0:
        p_bull = 1.0
    elif log_likelihood < -500.0:
        p_bull = 0.0
    else:
        p_bull = 1.0 / (1.0 + math.exp(-log_likelihood))

    # Beta posterior parameters
    alpha = 1.0 + w_bull
    beta_param = 1.0 + w_bear

    # Bayesian confidence: C = 1 - 4αβ/(α+β)²
    ab_sum = alpha + beta_param
    bayesian_confidence = 1.0 - (4.0 * alpha * beta_param) / (ab_sum * ab_sum)
    # Clamp to [0, 1] to guard against floating-point rounding
    bayesian_confidence = max(0.0, min(1.0, bayesian_confidence))

    # Shannon entropy
    entropy = compute_entropy(p_bull)

    return BayesianPosterior(
        p_bull=p_bull,
        alpha=alpha,
        beta=beta_param,
        log_likelihood=log_likelihood,
        bayesian_confidence=bayesian_confidence,
        entropy=entropy,
        signal_count=count,
    )