Files
stonks-oracle/services/validation/calibration.py
T
Celes Renata 7fcc8a6c07
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build-1 unknown status
ci/woodpecker/push/build-3 unknown status
ci/woodpecker/push/build-2 unknown status
ci/woodpecker/push/finalize unknown status
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
feat: model validation, calibration, and signal quality layer
- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views
- Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores
- Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d)
- Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison
- Attribution engine: per-source, per-catalyst, per-layer performance
- Calibration engine: Bayesian shrinkage source reliability
- Quality gate for live trading eligibility with configurable thresholds
- 7 new /api/validation/* endpoints
- Upgraded OpsModel dashboard with validation tab
- Enhanced recommendation display with calibration context
- Backtest replay validation mode
- 86 Python tests (unit + property-based), 179 frontend tests passing
2026-05-01 03:04:58 +00:00

136 lines
4.2 KiB
Python

"""Calibration Engine — Bayesian shrinkage source reliability and weight adjustment.
Computes source reliability scores using Bayesian shrinkage from historical
prediction outcomes, and adjusts evidence weights based on source performance.
Updates the existing source_accuracy table with reliability scores.
Requirements: 8.1, 8.2, 8.3, 8.4, 8.5
"""
from __future__ import annotations
import logging
import asyncpg
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Pure functions — testable without a database
# ---------------------------------------------------------------------------
def compute_source_reliability(
observed_win_rate: float,
sample_count: int,
prior_strength: int = 30,
) -> float:
"""Bayesian shrinkage source reliability.
reliability = 0.5 + (n / (n + prior_strength)) * (observed_win_rate - 0.5)
Returns value in [0.0, 1.0].
When n=0, returns 0.5 (prior mean).
As n→∞, approaches observed_win_rate.
"""
if sample_count <= 0:
return 0.5
shrinkage = sample_count / (sample_count + prior_strength)
reliability = 0.5 + shrinkage * (observed_win_rate - 0.5)
# Clamp to [0.0, 1.0] for safety (should already be in range when
# observed_win_rate is in [0.0, 1.0], but guard against edge cases).
return max(0.0, min(1.0, reliability))
def compute_adjusted_evidence_weight(
base_weight: float,
reliability: float,
) -> float:
"""Adjusted weight = base_weight * (0.5 + reliability), clamped to [0.1, 2.0]."""
adjusted = base_weight * (0.5 + reliability)
return max(0.1, min(2.0, adjusted))
# ---------------------------------------------------------------------------
# SQL queries
# ---------------------------------------------------------------------------
# Query v_source_performance to get per-source win rates and sample counts.
# Groups by source, counting total predictions and directional wins.
_SOURCE_PERFORMANCE_SQL = """
SELECT
source,
COUNT(*) AS sample_count,
COUNT(*) FILTER (WHERE direction_correct = TRUE) AS win_count
FROM v_source_performance
WHERE direction_correct IS NOT NULL
GROUP BY source
"""
# Upsert into source_accuracy: update accuracy_ratio and sample_count
# for existing sources, insert new ones.
_UPSERT_SOURCE_ACCURACY_SQL = """
INSERT INTO source_accuracy (source_id, accuracy_ratio, sample_count, last_updated)
VALUES ($1, $2, $3, NOW())
ON CONFLICT (source_id)
DO UPDATE SET
accuracy_ratio = EXCLUDED.accuracy_ratio,
sample_count = EXCLUDED.sample_count,
last_updated = NOW()
"""
# ---------------------------------------------------------------------------
# Database-backed function
# ---------------------------------------------------------------------------
async def update_source_reliabilities(
pool: asyncpg.Pool,
) -> int:
"""Recompute and store source reliability scores from latest outcomes.
1. Queries v_source_performance to get per-source win rates and counts
2. Computes Bayesian shrinkage reliability for each source
3. Upserts into source_accuracy table (accuracy_ratio = reliability)
Returns count of sources updated.
"""
try:
rows = await pool.fetch(_SOURCE_PERFORMANCE_SQL)
except Exception:
logger.exception("Failed to query source performance for reliability update")
return 0
if not rows:
logger.info("No source performance data available for reliability update")
return 0
updated = 0
for row in rows:
source = row["source"]
sample_count = row["sample_count"]
win_count = row["win_count"]
observed_win_rate = win_count / sample_count if sample_count > 0 else 0.5
reliability = compute_source_reliability(observed_win_rate, sample_count)
try:
await pool.execute(
_UPSERT_SOURCE_ACCURACY_SQL,
source,
reliability,
sample_count,
)
updated += 1
except Exception:
logger.exception(
"Failed to upsert source reliability for source=%s", source
)
logger.info("Updated source reliabilities for %d sources", updated)
return updated