feat: implement dual-pipeline signal engine service

New service at services/signal_engine/ implementing concurrent heuristic (deterministic scoring) and probabilistic (Bayesian inference) pipelines that evaluate technical signals across 6 timeframes (M30-M) and produce independent BUY/WATCH/SKIP verdicts per ticker per evaluation tick. Components: - Input Normalizer: multi-source data assembly with sentinel fallbacks - Signal Library: Fibonacci, MA Stack, RSI, Cup & Handle, Elliott Wave - Multi-Timeframe Confluence Engine: weighted scoring with D/W/M anchors - Hard Filter Engine: macro_bias, valuation, earnings proximity gating - Heuristic Pipeline: S_total scoring with confidence-gated verdicts - Probabilistic Pipeline: Bayesian log-odds with regime priors, entropy gating, EV_R calculation, and signal correlation penalty - Exit Engine: stop-loss, targets, trailing ATR-based stops - Delta Analyzer: pipeline agreement tracking with rolling Redis metrics - Output Formatter: SignalOutput contract + Recommendation schema mapping - Worker orchestrator: concurrent pipelines with failure isolation - Main entry point: queue polling with fail-safe config loading Infrastructure: - Migration 039: signal_engine_outputs table with 3 indexes - Helm chart: signalEngine service entry (processing tier) - Redis key: QUEUE_SIGNAL_ENGINE constant Tests: 390 tests (unit + property-based) covering all components Config: dual_pipeline_enabled=false by default (safe rollout)
2026-05-02 07:32:26 +00:00
parent 7e2343ec2c
commit f468e30af0
61 changed files with 14107 additions and 184 deletions
@@ -0,0 +1,355 @@
+"""Signal engine configuration loaded from risk_configs + environment.
+
+Defines ``SignalEngineConfig`` (the top-level dataclass) and four derived
+sub-configs — ``HardFilterConfig``, ``HeuristicConfig``,
+``ProbabilisticConfig``, ``ExitConfig`` — that expose relevant subsets for
+cleaner function signatures.
+
+``load_config()`` reads from the ``risk_configs`` table's JSONB ``config``
+column and falls back to safe defaults on any error.  Environment variables
+with the ``SIGNAL_ENGINE_`` prefix override database values.
+
+Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Sub-configs — thin wrappers over relevant subsets of SignalEngineConfig
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class HardFilterConfig:
+    """Thresholds for the pre-pipeline hard filter engine."""
+
+    valuation_min: float = 0.3
+    earnings_days: int = 5
+    macro_bias_skip: float = -1.0
+
+
+@dataclass
+class HeuristicConfig:
+    """Thresholds for the heuristic (deterministic) pipeline verdict."""
+
+    buy_confidence: float = 0.70
+    buy_s_total: float = 1.2
+    buy_valuation_min: float = 0.5
+    watch_confidence: float = 0.55
+    macro_bias_threshold: float = 0.0  # macro_bias must be > this for BUY
+    earnings_days_threshold: int = 5  # earnings_proximity must be > this for BUY
+
+
+@dataclass
+class ProbabilisticConfig:
+    """Thresholds for the probabilistic (Bayesian) pipeline verdict."""
+
+    buy_p_up: float = 0.60
+    buy_entropy_max: float = 0.90
+    buy_ev_r_min: float = 1.5
+    buy_valuation_min: float = 0.5
+    watch_p_up: float = 0.55
+    watch_entropy_max: float = 0.95
+    entropy_skip: float = 0.95
+
+    # Regime priors
+    regime_prior_bull: float = 0.58
+    regime_prior_range: float = 0.50
+    regime_prior_bear: float = 0.42
+
+    # Fundamental gates (same semantics as heuristic)
+    macro_bias_threshold: float = 0.0
+    earnings_days_threshold: int = 5
+
+
+@dataclass
+class ExitConfig:
+    """Configuration for the exit engine."""
+
+    trailing_stop_atr_multiplier: float = 2.0
+
+
+# ---------------------------------------------------------------------------
+# Top-level config
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class SignalEngineConfig:
+    """Configuration loaded from risk_configs + environment.
+
+    All fields carry safe defaults so that a fresh deployment works without
+    any database rows or environment variables.
+    """
+
+    dual_pipeline_enabled: bool = False
+    heuristic_pipeline_enabled: bool = True
+    probabilistic_pipeline_enabled: bool = True
+    shadow_mode: bool = False
+
+    # Timeframe weights
+    timeframe_weights: dict[str, float] = field(
+        default_factory=lambda: {
+            "M30": 0.03,
+            "H1": 0.07,
+            "H4": 0.15,
+            "D": 0.30,
+            "W": 0.30,
+            "M": 0.15,
+        }
+    )
+
+    # Hard filter thresholds
+    hard_filter_valuation_min: float = 0.3
+    hard_filter_earnings_days: int = 5
+    hard_filter_macro_bias_skip: float = -1.0
+
+    # Heuristic verdict thresholds
+    heuristic_buy_confidence: float = 0.70
+    heuristic_buy_s_total: float = 1.2
+    heuristic_buy_valuation_min: float = 0.5
+    heuristic_watch_confidence: float = 0.55
+
+    # Probabilistic verdict thresholds
+    prob_buy_p_up: float = 0.60
+    prob_buy_entropy_max: float = 0.90
+    prob_buy_ev_r_min: float = 1.5
+    prob_buy_valuation_min: float = 0.5
+    prob_watch_p_up: float = 0.55
+    prob_watch_entropy_max: float = 0.95
+    prob_entropy_skip: float = 0.95
+
+    # Regime priors
+    regime_prior_bull: float = 0.58
+    regime_prior_range: float = 0.50
+    regime_prior_bear: float = 0.42
+
+    # Exit engine
+    trailing_stop_atr_multiplier: float = 2.0
+
+    # Polling
+    polling_interval_seconds: int = 30
+
+    # -- Derived sub-configs ------------------------------------------------
+
+    @property
+    def hard_filter_config(self) -> HardFilterConfig:
+        return HardFilterConfig(
+            valuation_min=self.hard_filter_valuation_min,
+            earnings_days=self.hard_filter_earnings_days,
+            macro_bias_skip=self.hard_filter_macro_bias_skip,
+        )
+
+    @property
+    def heuristic_config(self) -> HeuristicConfig:
+        return HeuristicConfig(
+            buy_confidence=self.heuristic_buy_confidence,
+            buy_s_total=self.heuristic_buy_s_total,
+            buy_valuation_min=self.heuristic_buy_valuation_min,
+            watch_confidence=self.heuristic_watch_confidence,
+            macro_bias_threshold=0.0,
+            earnings_days_threshold=self.hard_filter_earnings_days,
+        )
+
+    @property
+    def probabilistic_config(self) -> ProbabilisticConfig:
+        return ProbabilisticConfig(
+            buy_p_up=self.prob_buy_p_up,
+            buy_entropy_max=self.prob_buy_entropy_max,
+            buy_ev_r_min=self.prob_buy_ev_r_min,
+            buy_valuation_min=self.prob_buy_valuation_min,
+            watch_p_up=self.prob_watch_p_up,
+            watch_entropy_max=self.prob_watch_entropy_max,
+            entropy_skip=self.prob_entropy_skip,
+            regime_prior_bull=self.regime_prior_bull,
+            regime_prior_range=self.regime_prior_range,
+            regime_prior_bear=self.regime_prior_bear,
+            macro_bias_threshold=0.0,
+            earnings_days_threshold=self.hard_filter_earnings_days,
+        )
+
+    @property
+    def exit_config(self) -> ExitConfig:
+        return ExitConfig(
+            trailing_stop_atr_multiplier=self.trailing_stop_atr_multiplier,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Config loading helpers
+# ---------------------------------------------------------------------------
+
+# SQL to fetch all signal_engine_* keys from the active risk_configs row's
+# JSONB config column.  The query extracts each top-level key/value pair and
+# filters to those prefixed with ``signal_engine_``.
+_CONFIG_QUERY = """
+SELECT key, value
+FROM (
+    SELECT key, value
+    FROM risk_configs,
+         jsonb_each_text(config)
+    WHERE active = TRUE
+    ORDER BY updated_at DESC
+    LIMIT 1
+) sub
+WHERE key LIKE 'signal_engine_%'
+"""
+
+# Mapping from risk_configs JSON key → SignalEngineConfig field name.
+# Keys in the DB are prefixed ``signal_engine_`` which is stripped to match
+# the dataclass field names.
+_FIELD_TYPES: dict[str, type] = {
+    "dual_pipeline_enabled": bool,
+    "heuristic_pipeline_enabled": bool,
+    "probabilistic_pipeline_enabled": bool,
+    "shadow_mode": bool,
+    "timeframe_weights": dict,
+    "hard_filter_valuation_min": float,
+    "hard_filter_earnings_days": int,
+    "hard_filter_macro_bias_skip": float,
+    "heuristic_buy_confidence": float,
+    "heuristic_buy_s_total": float,
+    "heuristic_buy_valuation_min": float,
+    "heuristic_watch_confidence": float,
+    "prob_buy_p_up": float,
+    "prob_buy_entropy_max": float,
+    "prob_buy_ev_r_min": float,
+    "prob_buy_valuation_min": float,
+    "prob_watch_p_up": float,
+    "prob_watch_entropy_max": float,
+    "prob_entropy_skip": float,
+    "regime_prior_bull": float,
+    "regime_prior_range": float,
+    "regime_prior_bear": float,
+    "trailing_stop_atr_multiplier": float,
+    "polling_interval_seconds": int,
+}
+
+
+def _parse_value(raw: str, target_type: type) -> Any:
+    """Coerce a raw string value from the DB/env into *target_type*.
+
+    Booleans accept ``true``/``false`` (case-insensitive).
+    Dicts are parsed as JSON.
+    """
+    if target_type is bool:
+        return raw.lower() in ("true", "1", "yes")
+    if target_type is dict:
+        return json.loads(raw)
+    if target_type is int:
+        return int(raw)
+    if target_type is float:
+        return float(raw)
+    return raw
+
+
+def _apply_db_rows(
+    config: SignalEngineConfig,
+    rows: list[tuple[str, str]],
+) -> None:
+    """Mutate *config* in-place from ``(key, value)`` DB rows.
+
+    Keys are expected to be prefixed ``signal_engine_`` — the prefix is
+    stripped before matching against dataclass fields.
+    """
+    for key, value in rows:
+        field_name = key.removeprefix("signal_engine_")
+        target_type = _FIELD_TYPES.get(field_name)
+        if target_type is None:
+            logger.debug("Ignoring unknown signal_engine config key: %s", key)
+            continue
+        try:
+            parsed = _parse_value(value, target_type)
+            setattr(config, field_name, parsed)
+        except (ValueError, TypeError, json.JSONDecodeError):
+            logger.warning(
+                "Invalid value for signal_engine config key %s: %r — keeping default",
+                key,
+                value,
+            )
+
+
+def _apply_env_overrides(config: SignalEngineConfig) -> None:
+    """Override config fields from environment variables.
+
+    Environment variables use the ``SIGNAL_ENGINE_`` prefix (upper-case).
+    For example ``SIGNAL_ENGINE_DUAL_PIPELINE_ENABLED=true`` overrides
+    ``dual_pipeline_enabled``.
+    """
+    prefix = "SIGNAL_ENGINE_"
+    for env_key, env_value in os.environ.items():
+        if not env_key.startswith(prefix):
+            continue
+        field_name = env_key[len(prefix):].lower()
+        target_type = _FIELD_TYPES.get(field_name)
+        if target_type is None:
+            continue
+        try:
+            parsed = _parse_value(env_value, target_type)
+            setattr(config, field_name, parsed)
+        except (ValueError, TypeError, json.JSONDecodeError):
+            logger.warning(
+                "Invalid env override %s=%r — keeping previous value",
+                env_key,
+                env_value,
+            )
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+async def load_config(pool: Any) -> SignalEngineConfig:
+    """Load signal engine configuration from the database and environment.
+
+    1. Start with safe defaults (``SignalEngineConfig()``).
+    2. Query ``risk_configs`` for keys prefixed ``signal_engine_``.
+    3. Apply matching values over the defaults.
+    4. Apply environment variable overrides (``SIGNAL_ENGINE_*``).
+    5. On any DB error, fall back to defaults with ``dual_pipeline_enabled=False``.
+
+    The *pool* argument is an ``asyncpg.Pool`` (typed as ``Any`` to avoid a
+    hard import dependency at module level).
+
+    Requirements: 13.1, 13.6, 13.7
+    """
+    config = SignalEngineConfig()
+
+    # Step 1 — read from risk_configs
+    try:
+        rows = await pool.fetch(_CONFIG_QUERY)
+        if rows:
+            _apply_db_rows(config, [(r["key"], r["value"]) for r in rows])
+    except Exception:
+        logger.warning(
+            "Failed to load signal engine config from risk_configs — "
+            "defaulting to disabled (fail-safe)",
+            exc_info=True,
+        )
+        # Ensure fail-safe: dual pipeline stays off
+        config.dual_pipeline_enabled = False
+
+    # Step 2 — environment overrides (always applied, even after DB failure)
+    _apply_env_overrides(config)
+
+    logger.info(
+        "Signal engine config loaded: dual_pipeline_enabled=%s, "
+        "heuristic=%s, probabilistic=%s, shadow_mode=%s, "
+        "polling_interval=%ds",
+        config.dual_pipeline_enabled,
+        config.heuristic_pipeline_enabled,
+        config.probabilistic_pipeline_enabled,
+        config.shadow_mode,
+        config.polling_interval_seconds,
+    )
+
+    return config