feat: model validation, calibration, and signal quality layer

- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views - Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores - Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d) - Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison - Attribution engine: per-source, per-catalyst, per-layer performance - Calibration engine: Bayesian shrinkage source reliability - Quality gate for live trading eligibility with configurable thresholds - 7 new /api/validation/* endpoints - Upgraded OpsModel dashboard with validation tab - Enhanced recommendation display with calibration context - Backtest replay validation mode - 86 Python tests (unit + property-based), 179 frontend tests passing
2026-05-01 03:04:58 +00:00
parent 5d2ffd9163
commit 7fcc8a6c07
23 changed files with 7554 additions and 9 deletions
@@ -48,6 +48,7 @@ from services.shared.schemas import (
    TrendSummary,
    TrendWindow,
 )
+from services.validation.prediction_snapshot import create_prediction_snapshot

 logger = logging.getLogger(__name__)

@@ -741,6 +742,92 @@ def _map_time_horizon_prefix(window: str) -> str:
    return mapping.get(window, "window_")


+# ---------------------------------------------------------------------------
+# Fetch evidence signals and docs for prediction snapshot (Requirement 1.1)
+# ---------------------------------------------------------------------------
+
+_EVIDENCE_SIGNALS_QUERY = """
+SELECT
+    dir.document_id::text AS document_id,
+    di.id::text AS signal_id,
+    dir.ticker,
+    d.source_type AS source,
+    d.source_type,
+    dir.catalyst_type,
+    dir.sentiment,
+    dir.impact_score AS impact,
+    di.confidence AS extraction_confidence,
+    di.source_credibility AS weight
+FROM document_impact_records dir
+JOIN document_intelligence di ON di.id = dir.intelligence_id
+JOIN documents d ON d.id = di.document_id
+WHERE dir.document_id = ANY($1::uuid[])
+  AND di.validation_status = 'valid'
+"""
+
+_EVIDENCE_DOCS_QUERY = """
+SELECT
+    d.id::text AS document_id,
+    COALESCE(d.title, '') AS title,
+    COALESCE(d.url, '') AS url
+FROM documents d
+WHERE d.id = ANY($1::uuid[])
+"""
+
+
+async def _fetch_evidence_for_snapshot(
+    pool: asyncpg.Pool,
+    document_ids: list[str],
+) -> tuple[list[dict], list[dict]]:
+    """Fetch evidence signals and document metadata for prediction snapshot.
+
+    Filters out non-UUID document IDs (e.g. synthetic pattern IDs) since
+    they cannot be looked up in the documents table.
+
+    Returns (evidence_signals, evidence_docs).
+    """
+    # Filter to valid UUIDs only
+    valid_ids: list[str] = []
+    for doc_id in document_ids:
+        try:
+            _uuid.UUID(doc_id)
+            valid_ids.append(doc_id)
+        except (ValueError, AttributeError):
+            continue
+
+    if not valid_ids:
+        return [], []
+
+    signal_rows = await pool.fetch(_EVIDENCE_SIGNALS_QUERY, valid_ids)
+    evidence_signals = [
+        {
+            "document_id": row["document_id"],
+            "signal_id": row["signal_id"],
+            "ticker": row["ticker"] or "",
+            "source": row["source"] or "",
+            "source_type": row["source_type"] or "",
+            "catalyst_type": row["catalyst_type"] or "",
+            "sentiment": row["sentiment"] or "",
+            "impact": float(row["impact"] or 0.0),
+            "extraction_confidence": float(row["extraction_confidence"] or 0.0),
+            "weight": float(row["weight"] or 0.0),
+        }
+        for row in signal_rows
+    ]
+
+    doc_rows = await pool.fetch(_EVIDENCE_DOCS_QUERY, valid_ids)
+    evidence_docs = [
+        {
+            "document_id": row["document_id"],
+            "title": row["title"],
+            "url": row["url"],
+        }
+        for row in doc_rows
+    ]
+
+    return evidence_signals, evidence_docs
+
+
 async def generate_recommendation(
    pool: asyncpg.Pool,
    ticker: str,
@@ -847,6 +934,22 @@ async def generate_recommendation(
        eligibility_result=result,
    )

+    # 7b. Capture prediction snapshot for model validation (Requirements 1.1, 1.6)
+    try:
+        all_doc_ids = list(summary.top_supporting_evidence) + list(summary.top_opposing_evidence)
+        evidence_signals, evidence_docs = await _fetch_evidence_for_snapshot(
+            pool, all_doc_ids,
+        )
+        await create_prediction_snapshot(
+            pool, rec, summary, evidence_signals, evidence_docs,
+        )
+    except Exception:
+        logger.warning(
+            "Failed to create prediction snapshot for %s/%s — recommendation "
+            "persisted but snapshot creation failed",
+            ticker, rec_id, exc_info=True,
+        )
+
    # 8. Publish prediction facts to analytical tables (Requirement 9.4)
    if minio_client is not None:
        try: