"""Quality gate for live trading eligibility. Evaluates aggregate model metrics against configurable thresholds and determines whether the system meets minimum quality standards for live trading. When any threshold is not met, the gate forces all recommendations to paper mode (fail-safe). Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7 """ from __future__ import annotations import json import logging from dataclasses import asdict, dataclass, field from datetime import datetime, timezone import asyncpg logger = logging.getLogger("trading_engine.quality_gate") # --------------------------------------------------------------------------- # Data classes # --------------------------------------------------------------------------- @dataclass class QualityGateConfig: """Configurable thresholds for live trading eligibility.""" min_prediction_count: int = 100 min_ic: float = 0.03 min_win_rate: float = 0.53 max_ece: float = 0.15 min_excess_return_vs_spy: float = 0.0 max_snapshot_age_hours: int = 24 @dataclass class GateThresholdResult: """Result for a single threshold check.""" name: str threshold: float actual: float passed: bool @dataclass class QualityGateResult: """Full gate evaluation result.""" passed: bool evaluated_at: datetime threshold_results: list[GateThresholdResult] = field(default_factory=list) reason: str = "" snapshot_id: str | None = None config: QualityGateConfig = field(default_factory=QualityGateConfig) # --------------------------------------------------------------------------- # Threshold evaluation helpers # --------------------------------------------------------------------------- def _evaluate_thresholds( snapshot: dict, config: QualityGateConfig, ) -> list[GateThresholdResult]: """Evaluate each threshold against snapshot metric values.""" results: list[GateThresholdResult] = [] # min_prediction_count actual_count = snapshot.get("prediction_count") or 0 results.append( GateThresholdResult( name="min_prediction_count", threshold=float(config.min_prediction_count), actual=float(actual_count), passed=actual_count >= config.min_prediction_count, ) ) # min_ic actual_ic = snapshot.get("information_coefficient") if actual_ic is None: actual_ic = 0.0 results.append( GateThresholdResult( name="min_ic", threshold=config.min_ic, actual=float(actual_ic), passed=float(actual_ic) >= config.min_ic, ) ) # min_win_rate actual_wr = snapshot.get("win_rate") if actual_wr is None: actual_wr = 0.0 results.append( GateThresholdResult( name="min_win_rate", threshold=config.min_win_rate, actual=float(actual_wr), passed=float(actual_wr) >= config.min_win_rate, ) ) # max_ece (calibration_error) actual_ece = snapshot.get("calibration_error") if actual_ece is None: actual_ece = 1.0 # worst-case when missing results.append( GateThresholdResult( name="max_ece", threshold=config.max_ece, actual=float(actual_ece), passed=float(actual_ece) <= config.max_ece, ) ) # min_excess_return_vs_spy actual_excess = snapshot.get("avg_excess_return_vs_spy") if actual_excess is None: actual_excess = 0.0 results.append( GateThresholdResult( name="min_excess_return_vs_spy", threshold=config.min_excess_return_vs_spy, actual=float(actual_excess), passed=float(actual_excess) >= config.min_excess_return_vs_spy, ) ) return results # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- async def evaluate_quality_gate( pool: asyncpg.Pool, config: QualityGateConfig | None = None, ) -> QualityGateResult: """Evaluate model quality gate from latest metric snapshot. Reads the most recent ``model_metric_snapshot`` for the 30d lookback and 7d horizon (the primary evaluation window). If no snapshot exists or snapshot is stale (>max_snapshot_age_hours), defaults to paper-only mode (fail-safe). Stores result in ``risk_configs`` under ``'model_quality_gate'`` key. """ if config is None: config = await load_gate_config_from_db(pool) now = datetime.now(tz=timezone.utc) # Fetch the most recent metric snapshot for 30d lookback / 7d horizon try: row = await pool.fetchrow( """SELECT id, generated_at, prediction_count, win_rate, directional_accuracy, information_coefficient, rank_information_coefficient, avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector, calibration_error, brier_score, buy_win_rate, sell_win_rate, hold_win_rate FROM model_metric_snapshots WHERE lookback_window = '30d' AND horizon = '7d' ORDER BY generated_at DESC LIMIT 1""", ) except Exception: logger.exception("Failed to query model_metric_snapshots") row = None # Fail-safe: no snapshot exists if row is None: result = QualityGateResult( passed=False, evaluated_at=now, threshold_results=[], reason="no model metric snapshot available — defaulting to paper-only", snapshot_id=None, config=config, ) logger.warning("Quality gate: %s", result.reason) await _store_gate_result(pool, result) return result snapshot = dict(row) snapshot_id = str(snapshot["id"]) generated_at = snapshot["generated_at"] # Fail-safe: stale snapshot age_hours = (now - generated_at).total_seconds() / 3600.0 if age_hours > config.max_snapshot_age_hours: result = QualityGateResult( passed=False, evaluated_at=now, threshold_results=[], reason=( f"most recent snapshot is {age_hours:.1f}h old " f"(max {config.max_snapshot_age_hours}h) — defaulting to paper-only" ), snapshot_id=snapshot_id, config=config, ) logger.warning("Quality gate: %s", result.reason) await _store_gate_result(pool, result) return result # Evaluate thresholds threshold_results = _evaluate_thresholds(snapshot, config) failed = [r for r in threshold_results if not r.passed] if failed: failed_names = ", ".join( f"{r.name}(actual={r.actual:.4f}, threshold={r.threshold:.4f})" for r in failed ) reason = f"failed: {failed_names}" passed = False else: reason = "all thresholds met" passed = True result = QualityGateResult( passed=passed, evaluated_at=now, threshold_results=threshold_results, reason=reason, snapshot_id=snapshot_id, config=config, ) # Log details for tr in threshold_results: logger.info( "Quality gate threshold %s: actual=%.4f threshold=%.4f %s", tr.name, tr.actual, tr.threshold, "PASS" if tr.passed else "FAIL", ) logger.info("Quality gate result: %s — %s", "PASS" if passed else "FAIL", reason) await _store_gate_result(pool, result) return result async def load_gate_config_from_db( pool: asyncpg.Pool, ) -> QualityGateConfig: """Load gate thresholds from risk_configs, with defaults. Looks for a ``risk_configs`` row with ``name = 'model_quality_gate_config'``. If found, merges stored thresholds over the defaults. If not found or the stored JSON is invalid, returns the default config. """ defaults = QualityGateConfig() try: row = await pool.fetchrow( "SELECT config FROM risk_configs WHERE name = 'model_quality_gate_config'", ) except Exception: logger.warning("Failed to load gate config from risk_configs — using defaults") return defaults if row is None: return defaults try: raw = row["config"] cfg = raw if isinstance(raw, dict) else json.loads(raw) except (json.JSONDecodeError, TypeError): logger.warning("Invalid gate config JSON in risk_configs — using defaults") return defaults return QualityGateConfig( min_prediction_count=int(cfg.get("min_prediction_count", defaults.min_prediction_count)), min_ic=float(cfg.get("min_ic", defaults.min_ic)), min_win_rate=float(cfg.get("min_win_rate", defaults.min_win_rate)), max_ece=float(cfg.get("max_ece", defaults.max_ece)), min_excess_return_vs_spy=float( cfg.get("min_excess_return_vs_spy", defaults.min_excess_return_vs_spy) ), max_snapshot_age_hours=int( cfg.get("max_snapshot_age_hours", defaults.max_snapshot_age_hours) ), ) # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- def _gate_result_to_json(result: QualityGateResult) -> str: """Serialize a QualityGateResult to JSON for storage in risk_configs.""" payload = { "passed": result.passed, "evaluated_at": result.evaluated_at.isoformat(), "reason": result.reason, "snapshot_id": result.snapshot_id, "config": asdict(result.config), "threshold_results": [asdict(tr) for tr in result.threshold_results], } return json.dumps(payload, default=str) async def _store_gate_result(pool: asyncpg.Pool, result: QualityGateResult) -> None: """Upsert gate evaluation result into risk_configs.""" payload = _gate_result_to_json(result) try: await pool.execute( """INSERT INTO risk_configs (name, config, updated_at) VALUES ('model_quality_gate', $1::jsonb, NOW()) ON CONFLICT (name) WHERE active = TRUE DO UPDATE SET config = $1::jsonb, updated_at = NOW()""", payload, ) except Exception: logger.exception("Failed to store quality gate result in risk_configs")