feat: model validation, calibration, and signal quality layer

- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views - Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores - Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d) - Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison - Attribution engine: per-source, per-catalyst, per-layer performance - Calibration engine: Bayesian shrinkage source reliability - Quality gate for live trading eligibility with configurable thresholds - 7 new /api/validation/* endpoints - Upgraded OpsModel dashboard with validation tab - Enhanced recommendation display with calibration context - Backtest replay validation mode - 86 Python tests (unit + property-based), 179 frontend tests passing
2026-05-01 03:04:58 +00:00
parent 5d2ffd9163
commit 7fcc8a6c07
23 changed files with 7554 additions and 9 deletions
@@ -0,0 +1,591 @@
+"""Attribution Engine — per-source, per-catalyst, and per-layer performance.
+
+Joins signal evidence links with prediction outcomes to compute attribution
+metrics that identify which sources, catalyst types, and signal layers
+contribute most to accurate predictions.
+
+Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7
+"""
+from __future__ import annotations
+
+import logging
+import math
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+
+import asyncpg
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class SourceAttribution:
+    """Performance metrics for a single source."""
+
+    source: str
+    source_type: str
+    prediction_count: int
+    avg_weight: float
+    avg_contribution_score: float
+    win_rate: float
+    avg_future_return: float
+    avg_excess_return_vs_spy: float
+    information_coefficient: float | None
+    duplicate_rate: float
+
+
+@dataclass
+class CatalystAttribution:
+    """Performance metrics for a single catalyst type."""
+
+    catalyst_type: str
+    prediction_count: int
+    win_rate: float
+    avg_future_return: float
+    avg_excess_return_vs_spy: float
+    information_coefficient: float | None
+
+
+@dataclass
+class LayerAttribution:
+    """Performance metrics for a signal layer."""
+
+    layer: str  # company, macro, competitive
+    avg_contribution_pct: float
+    dominant_win_rate: float  # win rate when this layer > 30% contribution
+    dominant_ic: float | None  # IC when this layer > 30% contribution
+
+
+# ---------------------------------------------------------------------------
+# Pure computation helpers
+# ---------------------------------------------------------------------------
+
+
+def _pearson_correlation(xs: list[float], ys: list[float]) -> float | None:
+    """Compute Pearson correlation coefficient between two lists.
+
+    Returns None if the lists have fewer than 2 elements or if either
+    has zero variance. Guards against NaN/infinity.
+    """
+    n = len(xs)
+    if n < 2:
+        return None
+
+    mean_x = sum(xs) / n
+    mean_y = sum(ys) / n
+
+    cov = 0.0
+    var_x = 0.0
+    var_y = 0.0
+
+    for x, y in zip(xs, ys):
+        dx = x - mean_x
+        dy = y - mean_y
+        cov += dx * dy
+        var_x += dx * dx
+        var_y += dy * dy
+
+    if var_x == 0.0 or var_y == 0.0:
+        return None
+
+    r = cov / math.sqrt(var_x * var_y)
+
+    if math.isnan(r) or math.isinf(r):
+        return None
+
+    return max(-1.0, min(1.0, r))
+
+
+def _compute_ic(
+    contribution_scores: list[float],
+    future_returns: list[float],
+) -> float | None:
+    """Compute IC (Pearson correlation) between contribution scores and returns.
+
+    Returns None when fewer than 30 data points.
+    """
+    if len(contribution_scores) < 30 or len(future_returns) < 30:
+        return None
+
+    n = min(len(contribution_scores), len(future_returns))
+    return _pearson_correlation(contribution_scores[:n], future_returns[:n])
+
+
+# ---------------------------------------------------------------------------
+# SQL queries — source attribution via v_source_performance
+# ---------------------------------------------------------------------------
+
+_SOURCE_ATTRIBUTION_SQL = """
+SELECT
+    source,
+    source_type,
+    weight,
+    contribution_score,
+    is_duplicate,
+    direction_correct,
+    future_return,
+    excess_return_vs_spy
+FROM v_source_performance
+WHERE horizon = $1
+  AND generated_at >= $2
+"""
+
+_SOURCE_ATTRIBUTION_ALL_SQL = """
+SELECT
+    source,
+    source_type,
+    weight,
+    contribution_score,
+    is_duplicate,
+    direction_correct,
+    future_return,
+    excess_return_vs_spy
+FROM v_source_performance
+WHERE horizon = $1
+"""
+
+# ---------------------------------------------------------------------------
+# SQL queries — catalyst attribution via v_source_performance
+# ---------------------------------------------------------------------------
+
+_CATALYST_ATTRIBUTION_SQL = """
+SELECT
+    catalyst_type,
+    weight,
+    contribution_score,
+    direction_correct,
+    future_return,
+    excess_return_vs_spy
+FROM v_source_performance
+WHERE horizon = $1
+  AND generated_at >= $2
+"""
+
+_CATALYST_ATTRIBUTION_ALL_SQL = """
+SELECT
+    catalyst_type,
+    weight,
+    contribution_score,
+    direction_correct,
+    future_return,
+    excess_return_vs_spy
+FROM v_source_performance
+WHERE horizon = $1
+"""
+
+# ---------------------------------------------------------------------------
+# SQL queries — layer attribution via prediction_snapshots + outcomes
+# ---------------------------------------------------------------------------
+
+_LAYER_ATTRIBUTION_SQL = """
+SELECT
+    ps.score_company,
+    ps.score_macro,
+    ps.score_competitive,
+    po.direction_correct,
+    po.future_return
+FROM prediction_snapshots ps
+JOIN prediction_outcomes po ON po.prediction_id = ps.id
+WHERE po.horizon = $1
+  AND ps.generated_at >= $2
+"""
+
+_LAYER_ATTRIBUTION_ALL_SQL = """
+SELECT
+    ps.score_company,
+    ps.score_macro,
+    ps.score_competitive,
+    po.direction_correct,
+    po.future_return
+FROM prediction_snapshots ps
+JOIN prediction_outcomes po ON po.prediction_id = ps.id
+WHERE po.horizon = $1
+"""
+
+
+# ---------------------------------------------------------------------------
+# Source attribution (Requirements 7.1, 7.2, 7.7)
+# ---------------------------------------------------------------------------
+
+
+async def compute_source_attribution(
+    pool: asyncpg.Pool,
+    lookback_days: int = 30,
+    horizon: str = "7d",
+) -> list[SourceAttribution]:
+    """Compute per-source performance metrics.
+
+    Queries v_source_performance, groups by source, and computes:
+    prediction count, avg weight, avg contribution score, win rate,
+    avg future return, avg excess return vs SPY, IC, and duplicate rate.
+
+    Returns a list of SourceAttribution sorted by prediction count descending.
+    """
+    now = datetime.now().astimezone()
+    cutoff = now - timedelta(days=lookback_days)
+
+    try:
+        rows = await pool.fetch(_SOURCE_ATTRIBUTION_SQL, horizon, cutoff)
+    except Exception:
+        logger.exception(
+            "Failed to query source attribution for horizon=%s lookback=%dd",
+            horizon,
+            lookback_days,
+        )
+        return []
+
+    if not rows:
+        return []
+
+    # Group rows by source
+    source_groups: dict[str, list[dict]] = {}
+    for row in rows:
+        r = dict(row)
+        key = r.get("source") or "unknown"
+        source_groups.setdefault(key, []).append(r)
+
+    results: list[SourceAttribution] = []
+
+    for source, group in source_groups.items():
+        count = len(group)
+
+        # Source type — take the most common one
+        source_type = group[0].get("source_type") or "unknown"
+
+        # Avg weight
+        weights = [r["weight"] for r in group if r.get("weight") is not None]
+        avg_weight = sum(weights) / len(weights) if weights else 0.0
+
+        # Avg contribution score
+        contrib_scores = [
+            r["contribution_score"]
+            for r in group
+            if r.get("contribution_score") is not None
+        ]
+        avg_contribution_score = (
+            sum(contrib_scores) / len(contrib_scores) if contrib_scores else 0.0
+        )
+
+        # Win rate
+        direction_rows = [r for r in group if r.get("direction_correct") is not None]
+        win_count = sum(1 for r in direction_rows if r["direction_correct"] is True)
+        win_rate = win_count / len(direction_rows) if direction_rows else 0.0
+
+        # Avg future return
+        returns = [
+            r["future_return"] for r in group if r.get("future_return") is not None
+        ]
+        avg_future_return = sum(returns) / len(returns) if returns else 0.0
+
+        # Avg excess return vs SPY
+        excess_returns = [
+            r["excess_return_vs_spy"]
+            for r in group
+            if r.get("excess_return_vs_spy") is not None
+        ]
+        avg_excess_return_vs_spy = (
+            sum(excess_returns) / len(excess_returns) if excess_returns else 0.0
+        )
+
+        # IC: correlation between contribution scores and future returns
+        ic_scores = [
+            r["contribution_score"]
+            for r in group
+            if r.get("contribution_score") is not None
+            and r.get("future_return") is not None
+        ]
+        ic_returns = [
+            r["future_return"]
+            for r in group
+            if r.get("contribution_score") is not None
+            and r.get("future_return") is not None
+        ]
+        ic = _compute_ic(ic_scores, ic_returns)
+
+        # Duplicate rate: is_duplicate=true / total
+        dup_count = sum(1 for r in group if r.get("is_duplicate") is True)
+        duplicate_rate = dup_count / count
+
+        results.append(
+            SourceAttribution(
+                source=source,
+                source_type=source_type,
+                prediction_count=count,
+                avg_weight=avg_weight,
+                avg_contribution_score=avg_contribution_score,
+                win_rate=win_rate,
+                avg_future_return=avg_future_return,
+                avg_excess_return_vs_spy=avg_excess_return_vs_spy,
+                information_coefficient=ic,
+                duplicate_rate=duplicate_rate,
+            )
+        )
+
+    # Sort by prediction count descending
+    results.sort(key=lambda a: a.prediction_count, reverse=True)
+
+    logger.info(
+        "Computed source attribution for %d sources (horizon=%s, lookback=%dd)",
+        len(results),
+        horizon,
+        lookback_days,
+    )
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Catalyst attribution (Requirements 7.3, 7.4)
+# ---------------------------------------------------------------------------
+
+
+async def compute_catalyst_attribution(
+    pool: asyncpg.Pool,
+    lookback_days: int = 30,
+    horizon: str = "7d",
+) -> list[CatalystAttribution]:
+    """Compute per-catalyst-type performance metrics.
+
+    Queries v_source_performance, groups by catalyst_type, and computes:
+    prediction count, win rate, avg future return, avg excess return vs SPY,
+    and IC.
+
+    Returns a list of CatalystAttribution sorted by prediction count descending.
+    """
+    now = datetime.now().astimezone()
+    cutoff = now - timedelta(days=lookback_days)
+
+    try:
+        rows = await pool.fetch(_CATALYST_ATTRIBUTION_SQL, horizon, cutoff)
+    except Exception:
+        logger.exception(
+            "Failed to query catalyst attribution for horizon=%s lookback=%dd",
+            horizon,
+            lookback_days,
+        )
+        return []
+
+    if not rows:
+        return []
+
+    # Group rows by catalyst_type
+    catalyst_groups: dict[str, list[dict]] = {}
+    for row in rows:
+        r = dict(row)
+        key = r.get("catalyst_type") or "unknown"
+        catalyst_groups.setdefault(key, []).append(r)
+
+    results: list[CatalystAttribution] = []
+
+    for catalyst_type, group in catalyst_groups.items():
+        count = len(group)
+
+        # Win rate
+        direction_rows = [r for r in group if r.get("direction_correct") is not None]
+        win_count = sum(1 for r in direction_rows if r["direction_correct"] is True)
+        win_rate = win_count / len(direction_rows) if direction_rows else 0.0
+
+        # Avg future return
+        returns = [
+            r["future_return"] for r in group if r.get("future_return") is not None
+        ]
+        avg_future_return = sum(returns) / len(returns) if returns else 0.0
+
+        # Avg excess return vs SPY
+        excess_returns = [
+            r["excess_return_vs_spy"]
+            for r in group
+            if r.get("excess_return_vs_spy") is not None
+        ]
+        avg_excess_return_vs_spy = (
+            sum(excess_returns) / len(excess_returns) if excess_returns else 0.0
+        )
+
+        # IC: correlation between contribution scores and future returns
+        ic_scores = [
+            r["contribution_score"]
+            for r in group
+            if r.get("contribution_score") is not None
+            and r.get("future_return") is not None
+        ]
+        ic_returns = [
+            r["future_return"]
+            for r in group
+            if r.get("contribution_score") is not None
+            and r.get("future_return") is not None
+        ]
+        ic = _compute_ic(ic_scores, ic_returns)
+
+        results.append(
+            CatalystAttribution(
+                catalyst_type=catalyst_type,
+                prediction_count=count,
+                win_rate=win_rate,
+                avg_future_return=avg_future_return,
+                avg_excess_return_vs_spy=avg_excess_return_vs_spy,
+                information_coefficient=ic,
+            )
+        )
+
+    # Sort by prediction count descending
+    results.sort(key=lambda a: a.prediction_count, reverse=True)
+
+    logger.info(
+        "Computed catalyst attribution for %d catalyst types "
+        "(horizon=%s, lookback=%dd)",
+        len(results),
+        horizon,
+        lookback_days,
+    )
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Layer attribution (Requirements 7.5, 7.6)
+# ---------------------------------------------------------------------------
+
+
+async def compute_layer_attribution(
+    pool: asyncpg.Pool,
+    lookback_days: int = 30,
+    horizon: str = "7d",
+) -> list[LayerAttribution]:
+    """Compute per-layer (company, macro, competitive) performance metrics.
+
+    Queries prediction_snapshots joined with prediction_outcomes to get
+    score_company, score_macro, score_competitive alongside outcomes.
+
+    For each layer computes:
+    - avg_contribution_pct: average of layer_score / total_score across all
+      predictions (where total_score > 0)
+    - dominant_win_rate: win rate for predictions where the layer contributes
+      more than 30% of the total score
+    - dominant_ic: IC (Pearson correlation between layer score and future
+      return) for predictions where the layer contributes > 30%
+
+    Returns a list of 3 LayerAttribution objects (company, macro, competitive).
+    """
+    now = datetime.now().astimezone()
+    cutoff = now - timedelta(days=lookback_days)
+
+    try:
+        rows = await pool.fetch(_LAYER_ATTRIBUTION_SQL, horizon, cutoff)
+    except Exception:
+        logger.exception(
+            "Failed to query layer attribution for horizon=%s lookback=%dd",
+            horizon,
+            lookback_days,
+        )
+        return []
+
+    if not rows:
+        return [
+            LayerAttribution(
+                layer="company",
+                avg_contribution_pct=0.0,
+                dominant_win_rate=0.0,
+                dominant_ic=None,
+            ),
+            LayerAttribution(
+                layer="macro",
+                avg_contribution_pct=0.0,
+                dominant_win_rate=0.0,
+                dominant_ic=None,
+            ),
+            LayerAttribution(
+                layer="competitive",
+                avg_contribution_pct=0.0,
+                dominant_win_rate=0.0,
+                dominant_ic=None,
+            ),
+        ]
+
+    row_dicts = [dict(r) for r in rows]
+
+    layers = [
+        ("company", "score_company"),
+        ("macro", "score_macro"),
+        ("competitive", "score_competitive"),
+    ]
+
+    results: list[LayerAttribution] = []
+
+    for layer_name, score_field in layers:
+        # --- Average contribution percentage ---
+        contribution_pcts: list[float] = []
+        for r in row_dicts:
+            total = (
+                (r.get("score_company") or 0.0)
+                + (r.get("score_macro") or 0.0)
+                + (r.get("score_competitive") or 0.0)
+            )
+            if total > 0.0:
+                layer_score = r.get(score_field) or 0.0
+                contribution_pcts.append(layer_score / total)
+
+        avg_contribution_pct = (
+            sum(contribution_pcts) / len(contribution_pcts)
+            if contribution_pcts
+            else 0.0
+        )
+
+        # --- Dominant predictions: layer > 30% of total score ---
+        dominant_rows: list[dict] = []
+        for r in row_dicts:
+            total = (
+                (r.get("score_company") or 0.0)
+                + (r.get("score_macro") or 0.0)
+                + (r.get("score_competitive") or 0.0)
+            )
+            if total > 0.0:
+                layer_score = r.get(score_field) or 0.0
+                if layer_score / total > 0.30:
+                    dominant_rows.append(r)
+
+        # Dominant win rate
+        dominant_direction_rows = [
+            r for r in dominant_rows if r.get("direction_correct") is not None
+        ]
+        dominant_win_count = sum(
+            1 for r in dominant_direction_rows if r["direction_correct"] is True
+        )
+        dominant_win_rate = (
+            dominant_win_count / len(dominant_direction_rows)
+            if dominant_direction_rows
+            else 0.0
+        )
+
+        # Dominant IC: correlation between layer score and future return
+        dom_scores = [
+            r.get(score_field) or 0.0
+            for r in dominant_rows
+            if r.get("future_return") is not None
+        ]
+        dom_returns = [
+            r["future_return"]
+            for r in dominant_rows
+            if r.get("future_return") is not None
+        ]
+        dominant_ic = _compute_ic(dom_scores, dom_returns)
+
+        results.append(
+            LayerAttribution(
+                layer=layer_name,
+                avg_contribution_pct=avg_contribution_pct,
+                dominant_win_rate=dominant_win_rate,
+                dominant_ic=dominant_ic,
+            )
+        )
+
+    logger.info(
+        "Computed layer attribution for 3 layers (horizon=%s, lookback=%dd)",
+        horizon,
+        lookback_days,
+    )
+
+    return results