stonks-oracle/services/signal_engine/probabilistic.py

"""Probabilistic Pipeline (Pipeline B) — Bayesian inference and verdict.

Computes a posterior probability via regime-based priors, likelihood ratio
accumulation with correlation penalty, entropy gating, and expected value
calculation.  Produces a BUY / WATCH / SKIP verdict.

The pipeline reuses the existing ``classify_regime`` infrastructure from
``services.aggregation.regime`` for regime classification and wraps the
Bayesian math with signal-cluster correlation penalties from
``services.signal_engine.correlation``.

Requirements: 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9,
              14.1, 14.2, 14.3, 14.4, 14.5
"""

from __future__ import annotations

import logging
import math

from services.aggregation.regime import MarketRegime, RegimeClassification
from services.signal_engine.config import ProbabilisticConfig
from services.signal_engine.correlation import (
    apply_correlation_penalty,
    classify_signal,
)
from services.signal_engine.models import (
    ConfluenceSignal,
    LikelihoodRatio,
    NormalizedInput,
    ProbabilisticResult,
    SignalDirection,
    Verdict,
)

logger = logging.getLogger(__name__)

# Default hit rate used when no historical hit rate is available.
_DEFAULT_HIT_RATE: float = 0.6


# ---------------------------------------------------------------------------
# Regime → prior mapping
# ---------------------------------------------------------------------------


def _regime_to_prior(
    regime: RegimeClassification,
    config: ProbabilisticConfig,
) -> float:
    """Map a regime classification to a prior probability.

    Mapping (Req 14.2):
    - TREND_FOLLOWING with positive trend_indicator → bull prior (0.58)
    - TREND_FOLLOWING with negative trend_indicator → bear prior (0.42)
    - MEAN_REVERSION → range prior (0.50)
    - PANIC → bear prior (0.42)
    - UNCERTAINTY → range prior (0.50)
    """
    if regime.regime == MarketRegime.TREND_FOLLOWING:
        if regime.trend_indicator > 0:
            return config.regime_prior_bull
        return config.regime_prior_bear
    if regime.regime == MarketRegime.MEAN_REVERSION:
        return config.regime_prior_range
    if regime.regime == MarketRegime.PANIC:
        return config.regime_prior_bear
    # UNCERTAINTY or any unknown → range prior
    return config.regime_prior_range


# ---------------------------------------------------------------------------
# Likelihood ratio computation
# ---------------------------------------------------------------------------


def _compute_likelihood_ratios(
    confluence_signals: list[ConfluenceSignal],
) -> list[LikelihoodRatio]:
    """Compute raw likelihood ratios for each confluence signal.

    For each signal:
    - h = hit rate (use confidence as proxy, default 0.6)
    - s = signal strength (confluence_score)
    - P(sig|up) = h * s + (1 - h) * (1 - s) * 0.5
    - P(sig|down) = 1 - P(sig|up)
    - LR = P(sig|up) / P(sig|down)

    Direction-aware: bearish signals invert the LR (use 1/LR) so that
    bearish evidence reduces P_up.

    Requirements: 6.2
    """
    ratios: list[LikelihoodRatio] = []

    for sig in confluence_signals:
        h = _DEFAULT_HIT_RATE
        s = sig.confluence_score

        # Clamp inputs to valid ranges to avoid numerical issues
        h = max(0.01, min(h, 0.99))
        s = max(0.01, min(s, 0.99))

        p_sig_up = h * s + (1.0 - h) * (1.0 - s) * 0.5
        p_sig_down = 1.0 - p_sig_up

        # Guard against division by zero / near-zero
        if p_sig_down < 1e-10:
            p_sig_down = 1e-10

        lr = p_sig_up / p_sig_down

        # Bearish signals: invert the LR so it reduces P_up
        if sig.direction == SignalDirection.BEARISH:
            lr = 1.0 / lr if lr > 1e-10 else 1e10

        log_lr = math.log(lr) if lr > 0 else 0.0

        cluster = classify_signal(sig.signal_type)

        ratios.append(
            LikelihoodRatio(
                signal_type=sig.signal_type,
                cluster=cluster.value,
                lr=lr,
                log_lr=log_lr,
                penalized_log_lr=log_lr,  # will be updated by penalty
                hit_rate=h,
                strength=s,
            )
        )

    return ratios


# ---------------------------------------------------------------------------
# Log-odds / sigmoid helpers
# ---------------------------------------------------------------------------


def _logit(p: float) -> float:
    """Compute logit(p) = log(p / (1 - p)).

    Clamps p to (1e-10, 1 - 1e-10) to avoid infinities.
    """
    p = max(1e-10, min(p, 1.0 - 1e-10))
    return math.log(p / (1.0 - p))


def _sigmoid(x: float) -> float:
    """Compute sigmoid(x) = 1 / (1 + exp(-x)).

    Clamps the exponent to avoid overflow.
    """
    if x > 500:
        return 1.0
    if x < -500:
        return 0.0
    return 1.0 / (1.0 + math.exp(-x))


# ---------------------------------------------------------------------------
# Shannon entropy
# ---------------------------------------------------------------------------


def _shannon_entropy(p: float) -> float:
    """Compute Shannon entropy H = -p·log₂(p) - (1-p)·log₂(1-p).

    Returns 0.0 at the boundaries (p = 0 or p = 1).
    Result is in [0, 1] for binary entropy.
    """
    if p <= 0.0 or p >= 1.0:
        return 0.0
    return -(p * math.log2(p) + (1.0 - p) * math.log2(1.0 - p))


# ---------------------------------------------------------------------------
# EV_R computation
# ---------------------------------------------------------------------------


def _compute_ev_r(
    p_up: float,
    confluence_signals: list[ConfluenceSignal],
) -> float:
    """Compute expected value per unit risk.

    EV_R = P_up · E[win_R] - (1 - P_up) · 1.0

    E[win_R] is estimated as the average confluence_score × 2.0
    (heuristic for expected win in R-units).  Falls back to 1.0 if
    no signals are available.
    """
    if confluence_signals:
        avg_score = sum(s.confluence_score for s in confluence_signals) / len(
            confluence_signals
        )
        e_win_r = avg_score * 2.0
    else:
        e_win_r = 1.0

    return p_up * e_win_r - (1.0 - p_up) * 1.0


# ---------------------------------------------------------------------------
# Verdict logic
# ---------------------------------------------------------------------------


def _determine_verdict(
    p_up: float,
    entropy: float,
    ev_r: float,
    normalized: NormalizedInput,
    config: ProbabilisticConfig,
) -> tuple[Verdict, list[str]]:
    """Apply threshold logic to determine BUY / WATCH / SKIP verdict.

    Returns the verdict and a list of reasoning strings.

    Requirements: 6.6, 6.7, 6.8
    """
    reasoning: list[str] = []

    valuation_score = (
        normalized.valuation_score if normalized.valuation_score is not None else 0.0
    )

    # --- Entropy gating (Req 6.4) ---
    if entropy > config.entropy_skip:
        reasoning.append(
            f"SKIP: entropy={entropy:.4f} > {config.entropy_skip} (high_entropy)"
        )
        return Verdict.SKIP, reasoning

    # --- Check BUY conditions (Req 6.6) ---
    buy_conditions = {
        "p_up": p_up >= config.buy_p_up,
        "entropy": entropy <= config.buy_entropy_max,
        "ev_r": ev_r >= config.buy_ev_r_min,
        "macro_bias": normalized.macro_bias > config.macro_bias_threshold,
        "valuation": valuation_score >= config.buy_valuation_min,
    }

    all_buy_met = all(buy_conditions.values())

    if all_buy_met:
        reasoning.append(
            f"BUY: all conditions met — P_up={p_up:.4f} "
            f"(>= {config.buy_p_up}), entropy={entropy:.4f} "
            f"(<= {config.buy_entropy_max}), EV_R={ev_r:.4f} "
            f"(>= {config.buy_ev_r_min}), macro_bias={normalized.macro_bias:.2f} "
            f"(> {config.macro_bias_threshold}), valuation={valuation_score:.2f} "
            f"(>= {config.buy_valuation_min})"
        )
        return Verdict.BUY, reasoning

    # --- Check WATCH conditions (Req 6.7) ---
    watch_conditions = {
        "p_up": p_up >= config.watch_p_up,
        "entropy": entropy <= config.watch_entropy_max,
    }

    if all(watch_conditions.values()):
        failed_buy = [k for k, v in buy_conditions.items() if not v]
        reasoning.append(
            f"WATCH: P_up={p_up:.4f} (>= {config.watch_p_up}), "
            f"entropy={entropy:.4f} (<= {config.watch_entropy_max}) "
            f"but BUY conditions not fully met — failed: {', '.join(failed_buy)}"
        )
        return Verdict.WATCH, reasoning

    # --- SKIP (Req 6.8) ---
    reasoning.append(
        f"SKIP: P_up={p_up:.4f}, entropy={entropy:.4f}, EV_R={ev_r:.4f} "
        f"— does not meet WATCH or BUY thresholds"
    )
    return Verdict.SKIP, reasoning


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def run_probabilistic_pipeline(
    normalized: NormalizedInput,
    confluence_signals: list[ConfluenceSignal],
    regime: RegimeClassification,
    config: ProbabilisticConfig,
) -> ProbabilisticResult:
    """Run the Bayesian probabilistic pipeline.

    Steps:
    1. Initialize regime-based prior (bull=0.58, range=0.50, bear=0.42)
    2. Compute likelihood ratios per signal
    3. Apply correlation penalty via ``apply_correlation_penalty()``
    4. Accumulate via log-odds: logit(P_post) = logit(P_prior) + Σ log(LR_i)
    5. Compute Shannon entropy and apply entropy gating
    6. Compute EV_R = P_up · E[win_R] - (1 - P_up) · 1.0
    7. Produce BUY / WATCH / SKIP verdict

    Args:
        normalized: The unified input structure for this evaluation tick.
        confluence_signals: Signals that passed multi-timeframe confluence
            filtering.
        regime: The current market regime classification.
        config: Probabilistic pipeline thresholds.

    Returns:
        A :class:`ProbabilisticResult` with verdict, posterior, entropy,
        EV_R, likelihood ratios, and reasoning.

    Requirements: 6.1–6.9, 14.1–14.5
    """
    reasoning: list[str] = []

    # 1. Regime-based prior (Req 6.1, 14.2)
    prior = _regime_to_prior(regime, config)
    reasoning.append(
        f"Regime={regime.regime.value}, trend_indicator={regime.trend_indicator:.1f} "
        f"→ prior={prior:.2f}"
    )

    # 2. Compute likelihood ratios (Req 6.2)
    raw_lrs = _compute_likelihood_ratios(confluence_signals)

    # 3. Apply correlation penalty (Req 7.1–7.4)
    penalized_lrs = apply_correlation_penalty(raw_lrs)

    # 4. Accumulate via log-odds (Req 6.3, 14.3)
    logit_prior = _logit(prior)
    sum_penalized_log_lr = sum(lr.penalized_log_lr for lr in penalized_lrs)
    logit_posterior = logit_prior + sum_penalized_log_lr
    p_up = _sigmoid(logit_posterior)

    reasoning.append(
        f"logit(prior)={logit_prior:.4f} + Σ penalized_log_lr={sum_penalized_log_lr:.4f} "
        f"= logit(posterior)={logit_posterior:.4f} → P_up={p_up:.4f}"
    )

    # 5. Shannon entropy (Req 6.4)
    entropy = _shannon_entropy(p_up)
    reasoning.append(f"Shannon entropy H={entropy:.4f}")

    # 6. EV_R (Req 6.5)
    ev_r = _compute_ev_r(p_up, confluence_signals)
    reasoning.append(f"EV_R={ev_r:.4f}")

    # 7. Verdict (Req 6.6, 6.7, 6.8)
    verdict, verdict_reasoning = _determine_verdict(
        p_up, entropy, ev_r, normalized, config
    )
    reasoning.extend(verdict_reasoning)

    logger.info(
        "Probabilistic pipeline [%s]: verdict=%s P_up=%.4f "
        "entropy=%.4f EV_R=%.4f prior=%.2f regime=%s signals=%d",
        normalized.ticker,
        verdict.value,
        p_up,
        entropy,
        ev_r,
        prior,
        regime.regime.value,
        len(confluence_signals),
    )

    return ProbabilisticResult(
        verdict=verdict,
        p_up=p_up,
        entropy=entropy,
        ev_r=ev_r,
        prior=prior,
        posterior=p_up,
        likelihood_ratios=penalized_lrs,
        regime=regime.regime.value,
        reasoning=reasoning,
    )