stonks-oracle/services/trading/model_quality_gate.py

"""Quality gate for live trading eligibility.

Evaluates aggregate model metrics against configurable thresholds and
determines whether the system meets minimum quality standards for live
trading.  When any threshold is not met, the gate forces all
recommendations to paper mode (fail-safe).

Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7
"""

from __future__ import annotations

import json
import logging
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone

import asyncpg

logger = logging.getLogger("trading_engine.quality_gate")


# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------


@dataclass
class QualityGateConfig:
    """Configurable thresholds for live trading eligibility."""

    min_prediction_count: int = 100
    min_ic: float = 0.03
    min_win_rate: float = 0.53
    max_ece: float = 0.15
    min_excess_return_vs_spy: float = 0.0
    max_snapshot_age_hours: int = 24


@dataclass
class GateThresholdResult:
    """Result for a single threshold check."""

    name: str
    threshold: float
    actual: float
    passed: bool


@dataclass
class QualityGateResult:
    """Full gate evaluation result."""

    passed: bool
    evaluated_at: datetime
    threshold_results: list[GateThresholdResult] = field(default_factory=list)
    reason: str = ""
    snapshot_id: str | None = None
    config: QualityGateConfig = field(default_factory=QualityGateConfig)


# ---------------------------------------------------------------------------
# Threshold evaluation helpers
# ---------------------------------------------------------------------------


def _evaluate_thresholds(
    snapshot: dict,
    config: QualityGateConfig,
) -> list[GateThresholdResult]:
    """Evaluate each threshold against snapshot metric values."""
    results: list[GateThresholdResult] = []

    # min_prediction_count
    actual_count = snapshot.get("prediction_count") or 0
    results.append(
        GateThresholdResult(
            name="min_prediction_count",
            threshold=float(config.min_prediction_count),
            actual=float(actual_count),
            passed=actual_count >= config.min_prediction_count,
        )
    )

    # min_ic
    actual_ic = snapshot.get("information_coefficient")
    if actual_ic is None:
        actual_ic = 0.0
    results.append(
        GateThresholdResult(
            name="min_ic",
            threshold=config.min_ic,
            actual=float(actual_ic),
            passed=float(actual_ic) >= config.min_ic,
        )
    )

    # min_win_rate
    actual_wr = snapshot.get("win_rate")
    if actual_wr is None:
        actual_wr = 0.0
    results.append(
        GateThresholdResult(
            name="min_win_rate",
            threshold=config.min_win_rate,
            actual=float(actual_wr),
            passed=float(actual_wr) >= config.min_win_rate,
        )
    )

    # max_ece (calibration_error)
    actual_ece = snapshot.get("calibration_error")
    if actual_ece is None:
        actual_ece = 1.0  # worst-case when missing
    results.append(
        GateThresholdResult(
            name="max_ece",
            threshold=config.max_ece,
            actual=float(actual_ece),
            passed=float(actual_ece) <= config.max_ece,
        )
    )

    # min_excess_return_vs_spy
    actual_excess = snapshot.get("avg_excess_return_vs_spy")
    if actual_excess is None:
        actual_excess = 0.0
    results.append(
        GateThresholdResult(
            name="min_excess_return_vs_spy",
            threshold=config.min_excess_return_vs_spy,
            actual=float(actual_excess),
            passed=float(actual_excess) >= config.min_excess_return_vs_spy,
        )
    )

    return results


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


async def evaluate_quality_gate(
    pool: asyncpg.Pool,
    config: QualityGateConfig | None = None,
) -> QualityGateResult:
    """Evaluate model quality gate from latest metric snapshot.

    Reads the most recent ``model_metric_snapshot`` for the 30d lookback
    and 7d horizon (the primary evaluation window).

    If no snapshot exists or snapshot is stale (>max_snapshot_age_hours),
    defaults to paper-only mode (fail-safe).

    Stores result in ``risk_configs`` under ``'model_quality_gate'`` key.
    """
    if config is None:
        config = await load_gate_config_from_db(pool)

    now = datetime.now(tz=timezone.utc)

    # Fetch the most recent metric snapshot for 30d lookback / 7d horizon
    try:
        row = await pool.fetchrow(
            """SELECT id, generated_at, prediction_count, win_rate,
                      directional_accuracy, information_coefficient,
                      rank_information_coefficient, avg_return,
                      avg_excess_return_vs_spy, avg_excess_return_vs_sector,
                      calibration_error, brier_score,
                      buy_win_rate, sell_win_rate, hold_win_rate
               FROM model_metric_snapshots
               WHERE lookback_window = '30d' AND horizon = '7d'
               ORDER BY generated_at DESC
               LIMIT 1""",
        )
    except Exception:
        logger.exception("Failed to query model_metric_snapshots")
        row = None

    # Fail-safe: no snapshot exists
    if row is None:
        result = QualityGateResult(
            passed=False,
            evaluated_at=now,
            threshold_results=[],
            reason="no model metric snapshot available — defaulting to paper-only",
            snapshot_id=None,
            config=config,
        )
        logger.warning("Quality gate: %s", result.reason)
        await _store_gate_result(pool, result)
        return result

    snapshot = dict(row)
    snapshot_id = str(snapshot["id"])
    generated_at = snapshot["generated_at"]

    # Fail-safe: stale snapshot
    age_hours = (now - generated_at).total_seconds() / 3600.0
    if age_hours > config.max_snapshot_age_hours:
        result = QualityGateResult(
            passed=False,
            evaluated_at=now,
            threshold_results=[],
            reason=(
                f"most recent snapshot is {age_hours:.1f}h old "
                f"(max {config.max_snapshot_age_hours}h) — defaulting to paper-only"
            ),
            snapshot_id=snapshot_id,
            config=config,
        )
        logger.warning("Quality gate: %s", result.reason)
        await _store_gate_result(pool, result)
        return result

    # Evaluate thresholds
    threshold_results = _evaluate_thresholds(snapshot, config)
    failed = [r for r in threshold_results if not r.passed]

    if failed:
        failed_names = ", ".join(
            f"{r.name}(actual={r.actual:.4f}, threshold={r.threshold:.4f})"
            for r in failed
        )
        reason = f"failed: {failed_names}"
        passed = False
    else:
        reason = "all thresholds met"
        passed = True

    result = QualityGateResult(
        passed=passed,
        evaluated_at=now,
        threshold_results=threshold_results,
        reason=reason,
        snapshot_id=snapshot_id,
        config=config,
    )

    # Log details
    for tr in threshold_results:
        logger.info(
            "Quality gate threshold %s: actual=%.4f threshold=%.4f %s",
            tr.name,
            tr.actual,
            tr.threshold,
            "PASS" if tr.passed else "FAIL",
        )
    logger.info("Quality gate result: %s — %s", "PASS" if passed else "FAIL", reason)

    await _store_gate_result(pool, result)
    return result


async def load_gate_config_from_db(
    pool: asyncpg.Pool,
) -> QualityGateConfig:
    """Load gate thresholds from risk_configs, with defaults.

    Looks for a ``risk_configs`` row with ``name = 'model_quality_gate_config'``.
    If found, merges stored thresholds over the defaults.  If not found or
    the stored JSON is invalid, returns the default config.
    """
    defaults = QualityGateConfig()
    try:
        row = await pool.fetchrow(
            "SELECT config FROM risk_configs WHERE name = 'model_quality_gate_config'",
        )
    except Exception:
        logger.warning("Failed to load gate config from risk_configs — using defaults")
        return defaults

    if row is None:
        return defaults

    try:
        raw = row["config"]
        cfg = raw if isinstance(raw, dict) else json.loads(raw)
    except (json.JSONDecodeError, TypeError):
        logger.warning("Invalid gate config JSON in risk_configs — using defaults")
        return defaults

    return QualityGateConfig(
        min_prediction_count=int(cfg.get("min_prediction_count", defaults.min_prediction_count)),
        min_ic=float(cfg.get("min_ic", defaults.min_ic)),
        min_win_rate=float(cfg.get("min_win_rate", defaults.min_win_rate)),
        max_ece=float(cfg.get("max_ece", defaults.max_ece)),
        min_excess_return_vs_spy=float(
            cfg.get("min_excess_return_vs_spy", defaults.min_excess_return_vs_spy)
        ),
        max_snapshot_age_hours=int(
            cfg.get("max_snapshot_age_hours", defaults.max_snapshot_age_hours)
        ),
    )


# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------


def _gate_result_to_json(result: QualityGateResult) -> str:
    """Serialize a QualityGateResult to JSON for storage in risk_configs."""
    payload = {
        "passed": result.passed,
        "evaluated_at": result.evaluated_at.isoformat(),
        "reason": result.reason,
        "snapshot_id": result.snapshot_id,
        "config": asdict(result.config),
        "threshold_results": [asdict(tr) for tr in result.threshold_results],
    }
    return json.dumps(payload, default=str)


async def _store_gate_result(pool: asyncpg.Pool, result: QualityGateResult) -> None:
    """Upsert gate evaluation result into risk_configs."""
    payload = _gate_result_to_json(result)
    try:
        await pool.execute(
            """INSERT INTO risk_configs (name, config, updated_at)
               VALUES ('model_quality_gate', $1::jsonb, NOW())
               ON CONFLICT (name) WHERE active = TRUE
               DO UPDATE SET config = $1::jsonb, updated_at = NOW()""",
            payload,
        )
    except Exception:
        logger.exception("Failed to store quality gate result in risk_configs")