feat: model validation, calibration, and signal quality layer
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build-1 unknown status
ci/woodpecker/push/build-3 unknown status
ci/woodpecker/push/build-2 unknown status
ci/woodpecker/push/finalize unknown status
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build-1 unknown status
ci/woodpecker/push/build-3 unknown status
ci/woodpecker/push/build-2 unknown status
ci/woodpecker/push/finalize unknown status
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views - Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores - Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d) - Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison - Attribution engine: per-source, per-catalyst, per-layer performance - Calibration engine: Bayesian shrinkage source reliability - Quality gate for live trading eligibility with configurable thresholds - 7 new /api/validation/* endpoints - Upgraded OpsModel dashboard with validation tab - Enhanced recommendation display with calibration context - Backtest replay validation mode - 86 Python tests (unit + property-based), 179 frontend tests passing
This commit is contained in:
@@ -43,6 +43,11 @@ from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.logging import new_trace_id, set_trace_context, setup_logging
|
||||
from services.shared.redis_keys import PIPELINE_ENABLED_KEY, QUEUE_BROKER, QUEUE_PREFIX, queue_key
|
||||
from services.shared.schemas import MAJOR_DECISION_CATALYSTS
|
||||
from services.validation.attribution import (
|
||||
compute_catalyst_attribution,
|
||||
compute_layer_attribution,
|
||||
compute_source_attribution,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("query_api")
|
||||
|
||||
@@ -3769,3 +3774,336 @@ async def get_variant_performance_history(
|
||||
agent_id, variant_id, hours,
|
||||
)
|
||||
return [_row_to_dict(r) for r in rows]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model Validation Dashboard (Requirements 12.1, 12.2, 12.3, 12.7)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_VALID_LOOKBACKS = {"7d", "30d", "90d", "all"}
|
||||
_VALID_HORIZONS = {"1h", "6h", "1d", "7d", "30d"}
|
||||
|
||||
|
||||
@app.get("/api/validation/summary")
|
||||
async def get_validation_summary(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Latest model metric snapshot plus quality gate status.
|
||||
|
||||
Returns the most recent model_metric_snapshot for the given
|
||||
lookback/horizon combination, along with the current gate status
|
||||
from risk_configs.
|
||||
|
||||
Requirement 12.1
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
# Latest metric snapshot for the requested lookback/horizon
|
||||
snapshot_row = await pool.fetchrow(
|
||||
"""SELECT id, generated_at, lookback_window, horizon,
|
||||
prediction_count, win_rate, directional_accuracy,
|
||||
information_coefficient, rank_information_coefficient,
|
||||
avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector,
|
||||
calibration_error, brier_score,
|
||||
buy_win_rate, sell_win_rate, hold_win_rate,
|
||||
metadata
|
||||
FROM model_metric_snapshots
|
||||
WHERE lookback_window = $1 AND horizon = $2
|
||||
ORDER BY generated_at DESC
|
||||
LIMIT 1""",
|
||||
lookback, horizon,
|
||||
)
|
||||
|
||||
snapshot = None
|
||||
if snapshot_row:
|
||||
snapshot = _row_to_dict(snapshot_row)
|
||||
snapshot["metadata"] = _parse_jsonb(snapshot.get("metadata"))
|
||||
|
||||
# Gate status from risk_configs
|
||||
gate_row = await pool.fetchrow(
|
||||
"SELECT config, updated_at FROM risk_configs WHERE name = 'model_quality_gate'",
|
||||
)
|
||||
gate_status = None
|
||||
if gate_row:
|
||||
gate_status = _parse_jsonb(gate_row["config"])
|
||||
|
||||
return {
|
||||
"snapshot": snapshot,
|
||||
"gate_status": gate_status,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/calibration")
|
||||
async def get_validation_calibration(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Calibration table with confidence buckets.
|
||||
|
||||
Queries v_prediction_performance for the given lookback/horizon,
|
||||
groups by confidence buckets, and computes avg_confidence,
|
||||
observed_win_rate, count, and miscalibrated flag per bucket.
|
||||
|
||||
Requirement 12.2
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
# Build lookback filter
|
||||
lookback_condition = ""
|
||||
params: list[Any] = [horizon]
|
||||
idx = 2
|
||||
|
||||
if lookback != "all":
|
||||
lookback_days = {"7d": 7, "30d": 30, "90d": 90}[lookback]
|
||||
lookback_condition = f"AND generated_at >= NOW() - make_interval(days => ${idx})"
|
||||
params.append(lookback_days)
|
||||
idx += 1
|
||||
|
||||
rows = await pool.fetch(
|
||||
f"""SELECT confidence, direction_correct
|
||||
FROM v_prediction_performance
|
||||
WHERE horizon = $1
|
||||
{lookback_condition}
|
||||
AND confidence IS NOT NULL""",
|
||||
*params,
|
||||
)
|
||||
|
||||
# Group into calibration buckets
|
||||
buckets_def = [
|
||||
(0.50, 0.60),
|
||||
(0.60, 0.70),
|
||||
(0.70, 0.80),
|
||||
(0.80, 0.90),
|
||||
(0.90, 1.00),
|
||||
]
|
||||
|
||||
buckets = []
|
||||
for low, high in buckets_def:
|
||||
bucket_rows = []
|
||||
for r in rows:
|
||||
conf = float(r["confidence"])
|
||||
if high == 1.00:
|
||||
in_bucket = low <= conf <= high
|
||||
else:
|
||||
in_bucket = low <= conf < high
|
||||
if in_bucket:
|
||||
bucket_rows.append(r)
|
||||
|
||||
count = len(bucket_rows)
|
||||
if count == 0:
|
||||
buckets.append({
|
||||
"bucket_low": low,
|
||||
"bucket_high": high,
|
||||
"avg_confidence": 0.0,
|
||||
"observed_win_rate": 0.0,
|
||||
"prediction_count": 0,
|
||||
"miscalibrated": False,
|
||||
})
|
||||
continue
|
||||
|
||||
avg_conf = sum(float(r["confidence"]) for r in bucket_rows) / count
|
||||
win_count = sum(1 for r in bucket_rows if r["direction_correct"] is True)
|
||||
win_rate = win_count / count
|
||||
diff = abs(avg_conf - win_rate)
|
||||
|
||||
buckets.append({
|
||||
"bucket_low": low,
|
||||
"bucket_high": high,
|
||||
"avg_confidence": round(avg_conf, 4),
|
||||
"observed_win_rate": round(win_rate, 4),
|
||||
"prediction_count": count,
|
||||
"miscalibrated": diff > 0.15,
|
||||
})
|
||||
|
||||
return {"buckets": buckets, "lookback": lookback, "horizon": horizon}
|
||||
|
||||
|
||||
@app.get("/api/validation/ic-by-horizon")
|
||||
async def get_validation_ic_by_horizon(
|
||||
lookback: str = Query(default="30d"),
|
||||
):
|
||||
"""IC and Rank IC per prediction horizon.
|
||||
|
||||
Queries the most recent model_metric_snapshot for the given lookback
|
||||
across all 5 horizons, returning IC and Rank IC for each.
|
||||
|
||||
Requirement 12.3
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
|
||||
rows = await pool.fetch(
|
||||
"""SELECT DISTINCT ON (horizon)
|
||||
horizon,
|
||||
information_coefficient,
|
||||
rank_information_coefficient,
|
||||
prediction_count,
|
||||
generated_at
|
||||
FROM model_metric_snapshots
|
||||
WHERE lookback_window = $1
|
||||
ORDER BY horizon, generated_at DESC""",
|
||||
lookback,
|
||||
)
|
||||
|
||||
horizons = []
|
||||
for r in rows:
|
||||
horizons.append({
|
||||
"horizon": r["horizon"],
|
||||
"information_coefficient": float(r["information_coefficient"]) if r["information_coefficient"] is not None else None,
|
||||
"rank_information_coefficient": float(r["rank_information_coefficient"]) if r["rank_information_coefficient"] is not None else None,
|
||||
"prediction_count": r["prediction_count"],
|
||||
"generated_at": r["generated_at"].isoformat() if r["generated_at"] else None,
|
||||
})
|
||||
|
||||
# Sort by canonical horizon order
|
||||
horizon_order = {"1h": 0, "6h": 1, "1d": 2, "7d": 3, "30d": 4}
|
||||
horizons.sort(key=lambda h: horizon_order.get(h["horizon"], 99))
|
||||
|
||||
return {"horizons": horizons, "lookback": lookback}
|
||||
|
||||
|
||||
@app.get("/api/validation/gate-status")
|
||||
async def get_validation_gate_status():
|
||||
"""Quality gate evaluation detail.
|
||||
|
||||
Returns the stored gate evaluation result from risk_configs
|
||||
where key = 'model_quality_gate'.
|
||||
|
||||
Requirement 12.7
|
||||
"""
|
||||
gate_row = await pool.fetchrow(
|
||||
"SELECT config, updated_at FROM risk_configs WHERE name = 'model_quality_gate'",
|
||||
)
|
||||
|
||||
if not gate_row:
|
||||
return {
|
||||
"gate_status": None,
|
||||
"message": "No gate evaluation found. Model metrics may not have been computed yet.",
|
||||
}
|
||||
|
||||
gate_data = _parse_jsonb(gate_row["config"])
|
||||
updated_at = gate_row["updated_at"].isoformat() if gate_row.get("updated_at") else None
|
||||
|
||||
return {
|
||||
"gate_status": gate_data,
|
||||
"updated_at": updated_at,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Attribution Endpoints (Requirements 12.4, 12.5, 12.6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_LOOKBACK_TO_DAYS: dict[str, int] = {
|
||||
"7d": 7,
|
||||
"30d": 30,
|
||||
"90d": 90,
|
||||
"all": 3650,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/attribution/sources")
|
||||
async def get_validation_attribution_sources(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Per-source performance metrics.
|
||||
|
||||
Returns win rate, IC, average return, duplicate rate, and other
|
||||
attribution metrics for each source, computed over the given
|
||||
lookback window and prediction horizon.
|
||||
|
||||
Requirement 12.4
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
lookback_days = _LOOKBACK_TO_DAYS[lookback]
|
||||
|
||||
try:
|
||||
results = await compute_source_attribution(pool, lookback_days=lookback_days, horizon=horizon)
|
||||
except Exception:
|
||||
logger.exception("Failed to compute source attribution")
|
||||
raise HTTPException(500, "Failed to compute source attribution")
|
||||
|
||||
return {
|
||||
"sources": [asdict(r) for r in results],
|
||||
"lookback": lookback,
|
||||
"horizon": horizon,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/attribution/catalysts")
|
||||
async def get_validation_attribution_catalysts(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Per-catalyst-type performance metrics.
|
||||
|
||||
Returns win rate, IC, average return, and other attribution metrics
|
||||
for each catalyst type, computed over the given lookback window
|
||||
and prediction horizon.
|
||||
|
||||
Requirement 12.5
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
lookback_days = _LOOKBACK_TO_DAYS[lookback]
|
||||
|
||||
try:
|
||||
results = await compute_catalyst_attribution(pool, lookback_days=lookback_days, horizon=horizon)
|
||||
except Exception:
|
||||
logger.exception("Failed to compute catalyst attribution")
|
||||
raise HTTPException(500, "Failed to compute catalyst attribution")
|
||||
|
||||
return {
|
||||
"catalysts": [asdict(r) for r in results],
|
||||
"lookback": lookback,
|
||||
"horizon": horizon,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/attribution/layers")
|
||||
async def get_validation_attribution_layers(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Per-signal-layer (company, macro, competitive) performance metrics.
|
||||
|
||||
Returns average contribution percentage, dominant win rate, and
|
||||
dominant IC for each of the three signal layers, computed over
|
||||
the given lookback window and prediction horizon.
|
||||
|
||||
Requirement 12.6
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
lookback_days = _LOOKBACK_TO_DAYS[lookback]
|
||||
|
||||
try:
|
||||
results = await compute_layer_attribution(pool, lookback_days=lookback_days, horizon=horizon)
|
||||
except Exception:
|
||||
logger.exception("Failed to compute layer attribution")
|
||||
raise HTTPException(500, "Failed to compute layer attribution")
|
||||
|
||||
return {
|
||||
"layers": [asdict(r) for r in results],
|
||||
"lookback": lookback,
|
||||
"horizon": horizon,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user