feat: implement dual-pipeline signal engine service

New service at services/signal_engine/ implementing concurrent heuristic (deterministic scoring) and probabilistic (Bayesian inference) pipelines that evaluate technical signals across 6 timeframes (M30-M) and produce independent BUY/WATCH/SKIP verdicts per ticker per evaluation tick. Components: - Input Normalizer: multi-source data assembly with sentinel fallbacks - Signal Library: Fibonacci, MA Stack, RSI, Cup & Handle, Elliott Wave - Multi-Timeframe Confluence Engine: weighted scoring with D/W/M anchors - Hard Filter Engine: macro_bias, valuation, earnings proximity gating - Heuristic Pipeline: S_total scoring with confidence-gated verdicts - Probabilistic Pipeline: Bayesian log-odds with regime priors, entropy gating, EV_R calculation, and signal correlation penalty - Exit Engine: stop-loss, targets, trailing ATR-based stops - Delta Analyzer: pipeline agreement tracking with rolling Redis metrics - Output Formatter: SignalOutput contract + Recommendation schema mapping - Worker orchestrator: concurrent pipelines with failure isolation - Main entry point: queue polling with fail-safe config loading Infrastructure: - Migration 039: signal_engine_outputs table with 3 indexes - Helm chart: signalEngine service entry (processing tier) - Redis key: QUEUE_SIGNAL_ENGINE constant Tests: 390 tests (unit + property-based) covering all components Config: dual_pipeline_enabled=false by default (safe rollout)
2026-05-02 07:32:26 +00:00
parent 7e2343ec2c
commit f468e30af0
61 changed files with 14107 additions and 184 deletions
@@ -0,0 +1,380 @@
+"""Probabilistic Pipeline (Pipeline B) — Bayesian inference and verdict.
+
+Computes a posterior probability via regime-based priors, likelihood ratio
+accumulation with correlation penalty, entropy gating, and expected value
+calculation.  Produces a BUY / WATCH / SKIP verdict.
+
+The pipeline reuses the existing ``classify_regime`` infrastructure from
+``services.aggregation.regime`` for regime classification and wraps the
+Bayesian math with signal-cluster correlation penalties from
+``services.signal_engine.correlation``.
+
+Requirements: 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9,
+              14.1, 14.2, 14.3, 14.4, 14.5
+"""
+
+from __future__ import annotations
+
+import logging
+import math
+
+from services.aggregation.regime import MarketRegime, RegimeClassification
+from services.signal_engine.config import ProbabilisticConfig
+from services.signal_engine.correlation import (
+    apply_correlation_penalty,
+    classify_signal,
+)
+from services.signal_engine.models import (
+    ConfluenceSignal,
+    LikelihoodRatio,
+    NormalizedInput,
+    ProbabilisticResult,
+    SignalDirection,
+    Verdict,
+)
+
+logger = logging.getLogger(__name__)
+
+# Default hit rate used when no historical hit rate is available.
+_DEFAULT_HIT_RATE: float = 0.6
+
+
+# ---------------------------------------------------------------------------
+# Regime → prior mapping
+# ---------------------------------------------------------------------------
+
+
+def _regime_to_prior(
+    regime: RegimeClassification,
+    config: ProbabilisticConfig,
+) -> float:
+    """Map a regime classification to a prior probability.
+
+    Mapping (Req 14.2):
+    - TREND_FOLLOWING with positive trend_indicator → bull prior (0.58)
+    - TREND_FOLLOWING with negative trend_indicator → bear prior (0.42)
+    - MEAN_REVERSION → range prior (0.50)
+    - PANIC → bear prior (0.42)
+    - UNCERTAINTY → range prior (0.50)
+    """
+    if regime.regime == MarketRegime.TREND_FOLLOWING:
+        if regime.trend_indicator > 0:
+            return config.regime_prior_bull
+        return config.regime_prior_bear
+    if regime.regime == MarketRegime.MEAN_REVERSION:
+        return config.regime_prior_range
+    if regime.regime == MarketRegime.PANIC:
+        return config.regime_prior_bear
+    # UNCERTAINTY or any unknown → range prior
+    return config.regime_prior_range
+
+
+# ---------------------------------------------------------------------------
+# Likelihood ratio computation
+# ---------------------------------------------------------------------------
+
+
+def _compute_likelihood_ratios(
+    confluence_signals: list[ConfluenceSignal],
+) -> list[LikelihoodRatio]:
+    """Compute raw likelihood ratios for each confluence signal.
+
+    For each signal:
+    - h = hit rate (use confidence as proxy, default 0.6)
+    - s = signal strength (confluence_score)
+    - P(sig|up) = h * s + (1 - h) * (1 - s) * 0.5
+    - P(sig|down) = 1 - P(sig|up)
+    - LR = P(sig|up) / P(sig|down)
+
+    Direction-aware: bearish signals invert the LR (use 1/LR) so that
+    bearish evidence reduces P_up.
+
+    Requirements: 6.2
+    """
+    ratios: list[LikelihoodRatio] = []
+
+    for sig in confluence_signals:
+        h = _DEFAULT_HIT_RATE
+        s = sig.confluence_score
+
+        # Clamp inputs to valid ranges to avoid numerical issues
+        h = max(0.01, min(h, 0.99))
+        s = max(0.01, min(s, 0.99))
+
+        p_sig_up = h * s + (1.0 - h) * (1.0 - s) * 0.5
+        p_sig_down = 1.0 - p_sig_up
+
+        # Guard against division by zero / near-zero
+        if p_sig_down < 1e-10:
+            p_sig_down = 1e-10
+
+        lr = p_sig_up / p_sig_down
+
+        # Bearish signals: invert the LR so it reduces P_up
+        if sig.direction == SignalDirection.BEARISH:
+            lr = 1.0 / lr if lr > 1e-10 else 1e10
+
+        log_lr = math.log(lr) if lr > 0 else 0.0
+
+        cluster = classify_signal(sig.signal_type)
+
+        ratios.append(
+            LikelihoodRatio(
+                signal_type=sig.signal_type,
+                cluster=cluster.value,
+                lr=lr,
+                log_lr=log_lr,
+                penalized_log_lr=log_lr,  # will be updated by penalty
+                hit_rate=h,
+                strength=s,
+            )
+        )
+
+    return ratios
+
+
+# ---------------------------------------------------------------------------
+# Log-odds / sigmoid helpers
+# ---------------------------------------------------------------------------
+
+
+def _logit(p: float) -> float:
+    """Compute logit(p) = log(p / (1 - p)).
+
+    Clamps p to (1e-10, 1 - 1e-10) to avoid infinities.
+    """
+    p = max(1e-10, min(p, 1.0 - 1e-10))
+    return math.log(p / (1.0 - p))
+
+
+def _sigmoid(x: float) -> float:
+    """Compute sigmoid(x) = 1 / (1 + exp(-x)).
+
+    Clamps the exponent to avoid overflow.
+    """
+    if x > 500:
+        return 1.0
+    if x < -500:
+        return 0.0
+    return 1.0 / (1.0 + math.exp(-x))
+
+
+# ---------------------------------------------------------------------------
+# Shannon entropy
+# ---------------------------------------------------------------------------
+
+
+def _shannon_entropy(p: float) -> float:
+    """Compute Shannon entropy H = -p·log₂(p) - (1-p)·log₂(1-p).
+
+    Returns 0.0 at the boundaries (p = 0 or p = 1).
+    Result is in [0, 1] for binary entropy.
+    """
+    if p <= 0.0 or p >= 1.0:
+        return 0.0
+    return -(p * math.log2(p) + (1.0 - p) * math.log2(1.0 - p))
+
+
+# ---------------------------------------------------------------------------
+# EV_R computation
+# ---------------------------------------------------------------------------
+
+
+def _compute_ev_r(
+    p_up: float,
+    confluence_signals: list[ConfluenceSignal],
+) -> float:
+    """Compute expected value per unit risk.
+
+    EV_R = P_up · E[win_R] - (1 - P_up) · 1.0
+
+    E[win_R] is estimated as the average confluence_score × 2.0
+    (heuristic for expected win in R-units).  Falls back to 1.0 if
+    no signals are available.
+    """
+    if confluence_signals:
+        avg_score = sum(s.confluence_score for s in confluence_signals) / len(
+            confluence_signals
+        )
+        e_win_r = avg_score * 2.0
+    else:
+        e_win_r = 1.0
+
+    return p_up * e_win_r - (1.0 - p_up) * 1.0
+
+
+# ---------------------------------------------------------------------------
+# Verdict logic
+# ---------------------------------------------------------------------------
+
+
+def _determine_verdict(
+    p_up: float,
+    entropy: float,
+    ev_r: float,
+    normalized: NormalizedInput,
+    config: ProbabilisticConfig,
+) -> tuple[Verdict, list[str]]:
+    """Apply threshold logic to determine BUY / WATCH / SKIP verdict.
+
+    Returns the verdict and a list of reasoning strings.
+
+    Requirements: 6.6, 6.7, 6.8
+    """
+    reasoning: list[str] = []
+
+    valuation_score = (
+        normalized.valuation_score if normalized.valuation_score is not None else 0.0
+    )
+
+    # --- Entropy gating (Req 6.4) ---
+    if entropy > config.entropy_skip:
+        reasoning.append(
+            f"SKIP: entropy={entropy:.4f} > {config.entropy_skip} (high_entropy)"
+        )
+        return Verdict.SKIP, reasoning
+
+    # --- Check BUY conditions (Req 6.6) ---
+    buy_conditions = {
+        "p_up": p_up >= config.buy_p_up,
+        "entropy": entropy <= config.buy_entropy_max,
+        "ev_r": ev_r >= config.buy_ev_r_min,
+        "macro_bias": normalized.macro_bias > config.macro_bias_threshold,
+        "valuation": valuation_score >= config.buy_valuation_min,
+    }
+
+    all_buy_met = all(buy_conditions.values())
+
+    if all_buy_met:
+        reasoning.append(
+            f"BUY: all conditions met — P_up={p_up:.4f} "
+            f"(>= {config.buy_p_up}), entropy={entropy:.4f} "
+            f"(<= {config.buy_entropy_max}), EV_R={ev_r:.4f} "
+            f"(>= {config.buy_ev_r_min}), macro_bias={normalized.macro_bias:.2f} "
+            f"(> {config.macro_bias_threshold}), valuation={valuation_score:.2f} "
+            f"(>= {config.buy_valuation_min})"
+        )
+        return Verdict.BUY, reasoning
+
+    # --- Check WATCH conditions (Req 6.7) ---
+    watch_conditions = {
+        "p_up": p_up >= config.watch_p_up,
+        "entropy": entropy <= config.watch_entropy_max,
+    }
+
+    if all(watch_conditions.values()):
+        failed_buy = [k for k, v in buy_conditions.items() if not v]
+        reasoning.append(
+            f"WATCH: P_up={p_up:.4f} (>= {config.watch_p_up}), "
+            f"entropy={entropy:.4f} (<= {config.watch_entropy_max}) "
+            f"but BUY conditions not fully met — failed: {', '.join(failed_buy)}"
+        )
+        return Verdict.WATCH, reasoning
+
+    # --- SKIP (Req 6.8) ---
+    reasoning.append(
+        f"SKIP: P_up={p_up:.4f}, entropy={entropy:.4f}, EV_R={ev_r:.4f} "
+        f"— does not meet WATCH or BUY thresholds"
+    )
+    return Verdict.SKIP, reasoning
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def run_probabilistic_pipeline(
+    normalized: NormalizedInput,
+    confluence_signals: list[ConfluenceSignal],
+    regime: RegimeClassification,
+    config: ProbabilisticConfig,
+) -> ProbabilisticResult:
+    """Run the Bayesian probabilistic pipeline.
+
+    Steps:
+    1. Initialize regime-based prior (bull=0.58, range=0.50, bear=0.42)
+    2. Compute likelihood ratios per signal
+    3. Apply correlation penalty via ``apply_correlation_penalty()``
+    4. Accumulate via log-odds: logit(P_post) = logit(P_prior) + Σ log(LR_i)
+    5. Compute Shannon entropy and apply entropy gating
+    6. Compute EV_R = P_up · E[win_R] - (1 - P_up) · 1.0
+    7. Produce BUY / WATCH / SKIP verdict
+
+    Args:
+        normalized: The unified input structure for this evaluation tick.
+        confluence_signals: Signals that passed multi-timeframe confluence
+            filtering.
+        regime: The current market regime classification.
+        config: Probabilistic pipeline thresholds.
+
+    Returns:
+        A :class:`ProbabilisticResult` with verdict, posterior, entropy,
+        EV_R, likelihood ratios, and reasoning.
+
+    Requirements: 6.1–6.9, 14.1–14.5
+    """
+    reasoning: list[str] = []
+
+    # 1. Regime-based prior (Req 6.1, 14.2)
+    prior = _regime_to_prior(regime, config)
+    reasoning.append(
+        f"Regime={regime.regime.value}, trend_indicator={regime.trend_indicator:.1f} "
+        f"→ prior={prior:.2f}"
+    )
+
+    # 2. Compute likelihood ratios (Req 6.2)
+    raw_lrs = _compute_likelihood_ratios(confluence_signals)
+
+    # 3. Apply correlation penalty (Req 7.1–7.4)
+    penalized_lrs = apply_correlation_penalty(raw_lrs)
+
+    # 4. Accumulate via log-odds (Req 6.3, 14.3)
+    logit_prior = _logit(prior)
+    sum_penalized_log_lr = sum(lr.penalized_log_lr for lr in penalized_lrs)
+    logit_posterior = logit_prior + sum_penalized_log_lr
+    p_up = _sigmoid(logit_posterior)
+
+    reasoning.append(
+        f"logit(prior)={logit_prior:.4f} + Σ penalized_log_lr={sum_penalized_log_lr:.4f} "
+        f"= logit(posterior)={logit_posterior:.4f} → P_up={p_up:.4f}"
+    )
+
+    # 5. Shannon entropy (Req 6.4)
+    entropy = _shannon_entropy(p_up)
+    reasoning.append(f"Shannon entropy H={entropy:.4f}")
+
+    # 6. EV_R (Req 6.5)
+    ev_r = _compute_ev_r(p_up, confluence_signals)
+    reasoning.append(f"EV_R={ev_r:.4f}")
+
+    # 7. Verdict (Req 6.6, 6.7, 6.8)
+    verdict, verdict_reasoning = _determine_verdict(
+        p_up, entropy, ev_r, normalized, config
+    )
+    reasoning.extend(verdict_reasoning)
+
+    logger.info(
+        "Probabilistic pipeline [%s]: verdict=%s P_up=%.4f "
+        "entropy=%.4f EV_R=%.4f prior=%.2f regime=%s signals=%d",
+        normalized.ticker,
+        verdict.value,
+        p_up,
+        entropy,
+        ev_r,
+        prior,
+        regime.regime.value,
+        len(confluence_signals),
+    )
+
+    return ProbabilisticResult(
+        verdict=verdict,
+        p_up=p_up,
+        entropy=entropy,
+        ev_r=ev_r,
+        prior=prior,
+        posterior=p_up,
+        likelihood_ratios=penalized_lrs,
+        regime=regime.regime.value,
+        reasoning=reasoning,
+    )