7fcc8a6c07
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build-1 unknown status
ci/woodpecker/push/build-3 unknown status
ci/woodpecker/push/build-2 unknown status
ci/woodpecker/push/finalize unknown status
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views - Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores - Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d) - Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison - Attribution engine: per-source, per-catalyst, per-layer performance - Calibration engine: Bayesian shrinkage source reliability - Quality gate for live trading eligibility with configurable thresholds - 7 new /api/validation/* endpoints - Upgraded OpsModel dashboard with validation tab - Enhanced recommendation display with calibration context - Backtest replay validation mode - 86 Python tests (unit + property-based), 179 frontend tests passing
330 lines
10 KiB
Python
330 lines
10 KiB
Python
"""Quality gate for live trading eligibility.
|
|
|
|
Evaluates aggregate model metrics against configurable thresholds and
|
|
determines whether the system meets minimum quality standards for live
|
|
trading. When any threshold is not met, the gate forces all
|
|
recommendations to paper mode (fail-safe).
|
|
|
|
Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from dataclasses import asdict, dataclass, field
|
|
from datetime import datetime, timezone
|
|
|
|
import asyncpg
|
|
|
|
logger = logging.getLogger("trading_engine.quality_gate")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data classes
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@dataclass
|
|
class QualityGateConfig:
|
|
"""Configurable thresholds for live trading eligibility."""
|
|
|
|
min_prediction_count: int = 100
|
|
min_ic: float = 0.03
|
|
min_win_rate: float = 0.53
|
|
max_ece: float = 0.15
|
|
min_excess_return_vs_spy: float = 0.0
|
|
max_snapshot_age_hours: int = 24
|
|
|
|
|
|
@dataclass
|
|
class GateThresholdResult:
|
|
"""Result for a single threshold check."""
|
|
|
|
name: str
|
|
threshold: float
|
|
actual: float
|
|
passed: bool
|
|
|
|
|
|
@dataclass
|
|
class QualityGateResult:
|
|
"""Full gate evaluation result."""
|
|
|
|
passed: bool
|
|
evaluated_at: datetime
|
|
threshold_results: list[GateThresholdResult] = field(default_factory=list)
|
|
reason: str = ""
|
|
snapshot_id: str | None = None
|
|
config: QualityGateConfig = field(default_factory=QualityGateConfig)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Threshold evaluation helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _evaluate_thresholds(
|
|
snapshot: dict,
|
|
config: QualityGateConfig,
|
|
) -> list[GateThresholdResult]:
|
|
"""Evaluate each threshold against snapshot metric values."""
|
|
results: list[GateThresholdResult] = []
|
|
|
|
# min_prediction_count
|
|
actual_count = snapshot.get("prediction_count") or 0
|
|
results.append(
|
|
GateThresholdResult(
|
|
name="min_prediction_count",
|
|
threshold=float(config.min_prediction_count),
|
|
actual=float(actual_count),
|
|
passed=actual_count >= config.min_prediction_count,
|
|
)
|
|
)
|
|
|
|
# min_ic
|
|
actual_ic = snapshot.get("information_coefficient")
|
|
if actual_ic is None:
|
|
actual_ic = 0.0
|
|
results.append(
|
|
GateThresholdResult(
|
|
name="min_ic",
|
|
threshold=config.min_ic,
|
|
actual=float(actual_ic),
|
|
passed=float(actual_ic) >= config.min_ic,
|
|
)
|
|
)
|
|
|
|
# min_win_rate
|
|
actual_wr = snapshot.get("win_rate")
|
|
if actual_wr is None:
|
|
actual_wr = 0.0
|
|
results.append(
|
|
GateThresholdResult(
|
|
name="min_win_rate",
|
|
threshold=config.min_win_rate,
|
|
actual=float(actual_wr),
|
|
passed=float(actual_wr) >= config.min_win_rate,
|
|
)
|
|
)
|
|
|
|
# max_ece (calibration_error)
|
|
actual_ece = snapshot.get("calibration_error")
|
|
if actual_ece is None:
|
|
actual_ece = 1.0 # worst-case when missing
|
|
results.append(
|
|
GateThresholdResult(
|
|
name="max_ece",
|
|
threshold=config.max_ece,
|
|
actual=float(actual_ece),
|
|
passed=float(actual_ece) <= config.max_ece,
|
|
)
|
|
)
|
|
|
|
# min_excess_return_vs_spy
|
|
actual_excess = snapshot.get("avg_excess_return_vs_spy")
|
|
if actual_excess is None:
|
|
actual_excess = 0.0
|
|
results.append(
|
|
GateThresholdResult(
|
|
name="min_excess_return_vs_spy",
|
|
threshold=config.min_excess_return_vs_spy,
|
|
actual=float(actual_excess),
|
|
passed=float(actual_excess) >= config.min_excess_return_vs_spy,
|
|
)
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public API
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def evaluate_quality_gate(
|
|
pool: asyncpg.Pool,
|
|
config: QualityGateConfig | None = None,
|
|
) -> QualityGateResult:
|
|
"""Evaluate model quality gate from latest metric snapshot.
|
|
|
|
Reads the most recent ``model_metric_snapshot`` for the 30d lookback
|
|
and 7d horizon (the primary evaluation window).
|
|
|
|
If no snapshot exists or snapshot is stale (>max_snapshot_age_hours),
|
|
defaults to paper-only mode (fail-safe).
|
|
|
|
Stores result in ``risk_configs`` under ``'model_quality_gate'`` key.
|
|
"""
|
|
if config is None:
|
|
config = await load_gate_config_from_db(pool)
|
|
|
|
now = datetime.now(tz=timezone.utc)
|
|
|
|
# Fetch the most recent metric snapshot for 30d lookback / 7d horizon
|
|
try:
|
|
row = await pool.fetchrow(
|
|
"""SELECT id, generated_at, prediction_count, win_rate,
|
|
directional_accuracy, information_coefficient,
|
|
rank_information_coefficient, avg_return,
|
|
avg_excess_return_vs_spy, avg_excess_return_vs_sector,
|
|
calibration_error, brier_score,
|
|
buy_win_rate, sell_win_rate, hold_win_rate
|
|
FROM model_metric_snapshots
|
|
WHERE lookback_window = '30d' AND horizon = '7d'
|
|
ORDER BY generated_at DESC
|
|
LIMIT 1""",
|
|
)
|
|
except Exception:
|
|
logger.exception("Failed to query model_metric_snapshots")
|
|
row = None
|
|
|
|
# Fail-safe: no snapshot exists
|
|
if row is None:
|
|
result = QualityGateResult(
|
|
passed=False,
|
|
evaluated_at=now,
|
|
threshold_results=[],
|
|
reason="no model metric snapshot available — defaulting to paper-only",
|
|
snapshot_id=None,
|
|
config=config,
|
|
)
|
|
logger.warning("Quality gate: %s", result.reason)
|
|
await _store_gate_result(pool, result)
|
|
return result
|
|
|
|
snapshot = dict(row)
|
|
snapshot_id = str(snapshot["id"])
|
|
generated_at = snapshot["generated_at"]
|
|
|
|
# Fail-safe: stale snapshot
|
|
age_hours = (now - generated_at).total_seconds() / 3600.0
|
|
if age_hours > config.max_snapshot_age_hours:
|
|
result = QualityGateResult(
|
|
passed=False,
|
|
evaluated_at=now,
|
|
threshold_results=[],
|
|
reason=(
|
|
f"most recent snapshot is {age_hours:.1f}h old "
|
|
f"(max {config.max_snapshot_age_hours}h) — defaulting to paper-only"
|
|
),
|
|
snapshot_id=snapshot_id,
|
|
config=config,
|
|
)
|
|
logger.warning("Quality gate: %s", result.reason)
|
|
await _store_gate_result(pool, result)
|
|
return result
|
|
|
|
# Evaluate thresholds
|
|
threshold_results = _evaluate_thresholds(snapshot, config)
|
|
failed = [r for r in threshold_results if not r.passed]
|
|
|
|
if failed:
|
|
failed_names = ", ".join(
|
|
f"{r.name}(actual={r.actual:.4f}, threshold={r.threshold:.4f})"
|
|
for r in failed
|
|
)
|
|
reason = f"failed: {failed_names}"
|
|
passed = False
|
|
else:
|
|
reason = "all thresholds met"
|
|
passed = True
|
|
|
|
result = QualityGateResult(
|
|
passed=passed,
|
|
evaluated_at=now,
|
|
threshold_results=threshold_results,
|
|
reason=reason,
|
|
snapshot_id=snapshot_id,
|
|
config=config,
|
|
)
|
|
|
|
# Log details
|
|
for tr in threshold_results:
|
|
logger.info(
|
|
"Quality gate threshold %s: actual=%.4f threshold=%.4f %s",
|
|
tr.name,
|
|
tr.actual,
|
|
tr.threshold,
|
|
"PASS" if tr.passed else "FAIL",
|
|
)
|
|
logger.info("Quality gate result: %s — %s", "PASS" if passed else "FAIL", reason)
|
|
|
|
await _store_gate_result(pool, result)
|
|
return result
|
|
|
|
|
|
async def load_gate_config_from_db(
|
|
pool: asyncpg.Pool,
|
|
) -> QualityGateConfig:
|
|
"""Load gate thresholds from risk_configs, with defaults.
|
|
|
|
Looks for a ``risk_configs`` row with ``name = 'model_quality_gate_config'``.
|
|
If found, merges stored thresholds over the defaults. If not found or
|
|
the stored JSON is invalid, returns the default config.
|
|
"""
|
|
defaults = QualityGateConfig()
|
|
try:
|
|
row = await pool.fetchrow(
|
|
"SELECT config FROM risk_configs WHERE name = 'model_quality_gate_config'",
|
|
)
|
|
except Exception:
|
|
logger.warning("Failed to load gate config from risk_configs — using defaults")
|
|
return defaults
|
|
|
|
if row is None:
|
|
return defaults
|
|
|
|
try:
|
|
raw = row["config"]
|
|
cfg = raw if isinstance(raw, dict) else json.loads(raw)
|
|
except (json.JSONDecodeError, TypeError):
|
|
logger.warning("Invalid gate config JSON in risk_configs — using defaults")
|
|
return defaults
|
|
|
|
return QualityGateConfig(
|
|
min_prediction_count=int(cfg.get("min_prediction_count", defaults.min_prediction_count)),
|
|
min_ic=float(cfg.get("min_ic", defaults.min_ic)),
|
|
min_win_rate=float(cfg.get("min_win_rate", defaults.min_win_rate)),
|
|
max_ece=float(cfg.get("max_ece", defaults.max_ece)),
|
|
min_excess_return_vs_spy=float(
|
|
cfg.get("min_excess_return_vs_spy", defaults.min_excess_return_vs_spy)
|
|
),
|
|
max_snapshot_age_hours=int(
|
|
cfg.get("max_snapshot_age_hours", defaults.max_snapshot_age_hours)
|
|
),
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Internal helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _gate_result_to_json(result: QualityGateResult) -> str:
|
|
"""Serialize a QualityGateResult to JSON for storage in risk_configs."""
|
|
payload = {
|
|
"passed": result.passed,
|
|
"evaluated_at": result.evaluated_at.isoformat(),
|
|
"reason": result.reason,
|
|
"snapshot_id": result.snapshot_id,
|
|
"config": asdict(result.config),
|
|
"threshold_results": [asdict(tr) for tr in result.threshold_results],
|
|
}
|
|
return json.dumps(payload, default=str)
|
|
|
|
|
|
async def _store_gate_result(pool: asyncpg.Pool, result: QualityGateResult) -> None:
|
|
"""Upsert gate evaluation result into risk_configs."""
|
|
payload = _gate_result_to_json(result)
|
|
try:
|
|
await pool.execute(
|
|
"""INSERT INTO risk_configs (name, config, updated_at)
|
|
VALUES ('model_quality_gate', $1::jsonb, NOW())
|
|
ON CONFLICT (name) WHERE active = TRUE
|
|
DO UPDATE SET config = $1::jsonb, updated_at = NOW()""",
|
|
payload,
|
|
)
|
|
except Exception:
|
|
logger.exception("Failed to store quality gate result in risk_configs")
|