feat: model validation, calibration, and signal quality layer

- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views - Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores - Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d) - Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison - Attribution engine: per-source, per-catalyst, per-layer performance - Calibration engine: Bayesian shrinkage source reliability - Quality gate for live trading eligibility with configurable thresholds - 7 new /api/validation/* endpoints - Upgraded OpsModel dashboard with validation tab - Enhanced recommendation display with calibration context - Backtest replay validation mode - 86 Python tests (unit + property-based), 179 frontend tests passing
2026-05-01 03:04:58 +00:00
parent 5d2ffd9163
commit 7fcc8a6c07
23 changed files with 7554 additions and 9 deletions
@@ -0,0 +1 @@
+
@@ -0,0 +1,591 @@
+"""Attribution Engine — per-source, per-catalyst, and per-layer performance.
+
+Joins signal evidence links with prediction outcomes to compute attribution
+metrics that identify which sources, catalyst types, and signal layers
+contribute most to accurate predictions.
+
+Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7
+"""
+from __future__ import annotations
+
+import logging
+import math
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+
+import asyncpg
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class SourceAttribution:
+    """Performance metrics for a single source."""
+
+    source: str
+    source_type: str
+    prediction_count: int
+    avg_weight: float
+    avg_contribution_score: float
+    win_rate: float
+    avg_future_return: float
+    avg_excess_return_vs_spy: float
+    information_coefficient: float | None
+    duplicate_rate: float
+
+
+@dataclass
+class CatalystAttribution:
+    """Performance metrics for a single catalyst type."""
+
+    catalyst_type: str
+    prediction_count: int
+    win_rate: float
+    avg_future_return: float
+    avg_excess_return_vs_spy: float
+    information_coefficient: float | None
+
+
+@dataclass
+class LayerAttribution:
+    """Performance metrics for a signal layer."""
+
+    layer: str  # company, macro, competitive
+    avg_contribution_pct: float
+    dominant_win_rate: float  # win rate when this layer > 30% contribution
+    dominant_ic: float | None  # IC when this layer > 30% contribution
+
+
+# ---------------------------------------------------------------------------
+# Pure computation helpers
+# ---------------------------------------------------------------------------
+
+
+def _pearson_correlation(xs: list[float], ys: list[float]) -> float | None:
+    """Compute Pearson correlation coefficient between two lists.
+
+    Returns None if the lists have fewer than 2 elements or if either
+    has zero variance. Guards against NaN/infinity.
+    """
+    n = len(xs)
+    if n < 2:
+        return None
+
+    mean_x = sum(xs) / n
+    mean_y = sum(ys) / n
+
+    cov = 0.0
+    var_x = 0.0
+    var_y = 0.0
+
+    for x, y in zip(xs, ys):
+        dx = x - mean_x
+        dy = y - mean_y
+        cov += dx * dy
+        var_x += dx * dx
+        var_y += dy * dy
+
+    if var_x == 0.0 or var_y == 0.0:
+        return None
+
+    r = cov / math.sqrt(var_x * var_y)
+
+    if math.isnan(r) or math.isinf(r):
+        return None
+
+    return max(-1.0, min(1.0, r))
+
+
+def _compute_ic(
+    contribution_scores: list[float],
+    future_returns: list[float],
+) -> float | None:
+    """Compute IC (Pearson correlation) between contribution scores and returns.
+
+    Returns None when fewer than 30 data points.
+    """
+    if len(contribution_scores) < 30 or len(future_returns) < 30:
+        return None
+
+    n = min(len(contribution_scores), len(future_returns))
+    return _pearson_correlation(contribution_scores[:n], future_returns[:n])
+
+
+# ---------------------------------------------------------------------------
+# SQL queries — source attribution via v_source_performance
+# ---------------------------------------------------------------------------
+
+_SOURCE_ATTRIBUTION_SQL = """
+SELECT
+    source,
+    source_type,
+    weight,
+    contribution_score,
+    is_duplicate,
+    direction_correct,
+    future_return,
+    excess_return_vs_spy
+FROM v_source_performance
+WHERE horizon = $1
+  AND generated_at >= $2
+"""
+
+_SOURCE_ATTRIBUTION_ALL_SQL = """
+SELECT
+    source,
+    source_type,
+    weight,
+    contribution_score,
+    is_duplicate,
+    direction_correct,
+    future_return,
+    excess_return_vs_spy
+FROM v_source_performance
+WHERE horizon = $1
+"""
+
+# ---------------------------------------------------------------------------
+# SQL queries — catalyst attribution via v_source_performance
+# ---------------------------------------------------------------------------
+
+_CATALYST_ATTRIBUTION_SQL = """
+SELECT
+    catalyst_type,
+    weight,
+    contribution_score,
+    direction_correct,
+    future_return,
+    excess_return_vs_spy
+FROM v_source_performance
+WHERE horizon = $1
+  AND generated_at >= $2
+"""
+
+_CATALYST_ATTRIBUTION_ALL_SQL = """
+SELECT
+    catalyst_type,
+    weight,
+    contribution_score,
+    direction_correct,
+    future_return,
+    excess_return_vs_spy
+FROM v_source_performance
+WHERE horizon = $1
+"""
+
+# ---------------------------------------------------------------------------
+# SQL queries — layer attribution via prediction_snapshots + outcomes
+# ---------------------------------------------------------------------------
+
+_LAYER_ATTRIBUTION_SQL = """
+SELECT
+    ps.score_company,
+    ps.score_macro,
+    ps.score_competitive,
+    po.direction_correct,
+    po.future_return
+FROM prediction_snapshots ps
+JOIN prediction_outcomes po ON po.prediction_id = ps.id
+WHERE po.horizon = $1
+  AND ps.generated_at >= $2
+"""
+
+_LAYER_ATTRIBUTION_ALL_SQL = """
+SELECT
+    ps.score_company,
+    ps.score_macro,
+    ps.score_competitive,
+    po.direction_correct,
+    po.future_return
+FROM prediction_snapshots ps
+JOIN prediction_outcomes po ON po.prediction_id = ps.id
+WHERE po.horizon = $1
+"""
+
+
+# ---------------------------------------------------------------------------
+# Source attribution (Requirements 7.1, 7.2, 7.7)
+# ---------------------------------------------------------------------------
+
+
+async def compute_source_attribution(
+    pool: asyncpg.Pool,
+    lookback_days: int = 30,
+    horizon: str = "7d",
+) -> list[SourceAttribution]:
+    """Compute per-source performance metrics.
+
+    Queries v_source_performance, groups by source, and computes:
+    prediction count, avg weight, avg contribution score, win rate,
+    avg future return, avg excess return vs SPY, IC, and duplicate rate.
+
+    Returns a list of SourceAttribution sorted by prediction count descending.
+    """
+    now = datetime.now().astimezone()
+    cutoff = now - timedelta(days=lookback_days)
+
+    try:
+        rows = await pool.fetch(_SOURCE_ATTRIBUTION_SQL, horizon, cutoff)
+    except Exception:
+        logger.exception(
+            "Failed to query source attribution for horizon=%s lookback=%dd",
+            horizon,
+            lookback_days,
+        )
+        return []
+
+    if not rows:
+        return []
+
+    # Group rows by source
+    source_groups: dict[str, list[dict]] = {}
+    for row in rows:
+        r = dict(row)
+        key = r.get("source") or "unknown"
+        source_groups.setdefault(key, []).append(r)
+
+    results: list[SourceAttribution] = []
+
+    for source, group in source_groups.items():
+        count = len(group)
+
+        # Source type — take the most common one
+        source_type = group[0].get("source_type") or "unknown"
+
+        # Avg weight
+        weights = [r["weight"] for r in group if r.get("weight") is not None]
+        avg_weight = sum(weights) / len(weights) if weights else 0.0
+
+        # Avg contribution score
+        contrib_scores = [
+            r["contribution_score"]
+            for r in group
+            if r.get("contribution_score") is not None
+        ]
+        avg_contribution_score = (
+            sum(contrib_scores) / len(contrib_scores) if contrib_scores else 0.0
+        )
+
+        # Win rate
+        direction_rows = [r for r in group if r.get("direction_correct") is not None]
+        win_count = sum(1 for r in direction_rows if r["direction_correct"] is True)
+        win_rate = win_count / len(direction_rows) if direction_rows else 0.0
+
+        # Avg future return
+        returns = [
+            r["future_return"] for r in group if r.get("future_return") is not None
+        ]
+        avg_future_return = sum(returns) / len(returns) if returns else 0.0
+
+        # Avg excess return vs SPY
+        excess_returns = [
+            r["excess_return_vs_spy"]
+            for r in group
+            if r.get("excess_return_vs_spy") is not None
+        ]
+        avg_excess_return_vs_spy = (
+            sum(excess_returns) / len(excess_returns) if excess_returns else 0.0
+        )
+
+        # IC: correlation between contribution scores and future returns
+        ic_scores = [
+            r["contribution_score"]
+            for r in group
+            if r.get("contribution_score") is not None
+            and r.get("future_return") is not None
+        ]
+        ic_returns = [
+            r["future_return"]
+            for r in group
+            if r.get("contribution_score") is not None
+            and r.get("future_return") is not None
+        ]
+        ic = _compute_ic(ic_scores, ic_returns)
+
+        # Duplicate rate: is_duplicate=true / total
+        dup_count = sum(1 for r in group if r.get("is_duplicate") is True)
+        duplicate_rate = dup_count / count
+
+        results.append(
+            SourceAttribution(
+                source=source,
+                source_type=source_type,
+                prediction_count=count,
+                avg_weight=avg_weight,
+                avg_contribution_score=avg_contribution_score,
+                win_rate=win_rate,
+                avg_future_return=avg_future_return,
+                avg_excess_return_vs_spy=avg_excess_return_vs_spy,
+                information_coefficient=ic,
+                duplicate_rate=duplicate_rate,
+            )
+        )
+
+    # Sort by prediction count descending
+    results.sort(key=lambda a: a.prediction_count, reverse=True)
+
+    logger.info(
+        "Computed source attribution for %d sources (horizon=%s, lookback=%dd)",
+        len(results),
+        horizon,
+        lookback_days,
+    )
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Catalyst attribution (Requirements 7.3, 7.4)
+# ---------------------------------------------------------------------------
+
+
+async def compute_catalyst_attribution(
+    pool: asyncpg.Pool,
+    lookback_days: int = 30,
+    horizon: str = "7d",
+) -> list[CatalystAttribution]:
+    """Compute per-catalyst-type performance metrics.
+
+    Queries v_source_performance, groups by catalyst_type, and computes:
+    prediction count, win rate, avg future return, avg excess return vs SPY,
+    and IC.
+
+    Returns a list of CatalystAttribution sorted by prediction count descending.
+    """
+    now = datetime.now().astimezone()
+    cutoff = now - timedelta(days=lookback_days)
+
+    try:
+        rows = await pool.fetch(_CATALYST_ATTRIBUTION_SQL, horizon, cutoff)
+    except Exception:
+        logger.exception(
+            "Failed to query catalyst attribution for horizon=%s lookback=%dd",
+            horizon,
+            lookback_days,
+        )
+        return []
+
+    if not rows:
+        return []
+
+    # Group rows by catalyst_type
+    catalyst_groups: dict[str, list[dict]] = {}
+    for row in rows:
+        r = dict(row)
+        key = r.get("catalyst_type") or "unknown"
+        catalyst_groups.setdefault(key, []).append(r)
+
+    results: list[CatalystAttribution] = []
+
+    for catalyst_type, group in catalyst_groups.items():
+        count = len(group)
+
+        # Win rate
+        direction_rows = [r for r in group if r.get("direction_correct") is not None]
+        win_count = sum(1 for r in direction_rows if r["direction_correct"] is True)
+        win_rate = win_count / len(direction_rows) if direction_rows else 0.0
+
+        # Avg future return
+        returns = [
+            r["future_return"] for r in group if r.get("future_return") is not None
+        ]
+        avg_future_return = sum(returns) / len(returns) if returns else 0.0
+
+        # Avg excess return vs SPY
+        excess_returns = [
+            r["excess_return_vs_spy"]
+            for r in group
+            if r.get("excess_return_vs_spy") is not None
+        ]
+        avg_excess_return_vs_spy = (
+            sum(excess_returns) / len(excess_returns) if excess_returns else 0.0
+        )
+
+        # IC: correlation between contribution scores and future returns
+        ic_scores = [
+            r["contribution_score"]
+            for r in group
+            if r.get("contribution_score") is not None
+            and r.get("future_return") is not None
+        ]
+        ic_returns = [
+            r["future_return"]
+            for r in group
+            if r.get("contribution_score") is not None
+            and r.get("future_return") is not None
+        ]
+        ic = _compute_ic(ic_scores, ic_returns)
+
+        results.append(
+            CatalystAttribution(
+                catalyst_type=catalyst_type,
+                prediction_count=count,
+                win_rate=win_rate,
+                avg_future_return=avg_future_return,
+                avg_excess_return_vs_spy=avg_excess_return_vs_spy,
+                information_coefficient=ic,
+            )
+        )
+
+    # Sort by prediction count descending
+    results.sort(key=lambda a: a.prediction_count, reverse=True)
+
+    logger.info(
+        "Computed catalyst attribution for %d catalyst types "
+        "(horizon=%s, lookback=%dd)",
+        len(results),
+        horizon,
+        lookback_days,
+    )
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Layer attribution (Requirements 7.5, 7.6)
+# ---------------------------------------------------------------------------
+
+
+async def compute_layer_attribution(
+    pool: asyncpg.Pool,
+    lookback_days: int = 30,
+    horizon: str = "7d",
+) -> list[LayerAttribution]:
+    """Compute per-layer (company, macro, competitive) performance metrics.
+
+    Queries prediction_snapshots joined with prediction_outcomes to get
+    score_company, score_macro, score_competitive alongside outcomes.
+
+    For each layer computes:
+    - avg_contribution_pct: average of layer_score / total_score across all
+      predictions (where total_score > 0)
+    - dominant_win_rate: win rate for predictions where the layer contributes
+      more than 30% of the total score
+    - dominant_ic: IC (Pearson correlation between layer score and future
+      return) for predictions where the layer contributes > 30%
+
+    Returns a list of 3 LayerAttribution objects (company, macro, competitive).
+    """
+    now = datetime.now().astimezone()
+    cutoff = now - timedelta(days=lookback_days)
+
+    try:
+        rows = await pool.fetch(_LAYER_ATTRIBUTION_SQL, horizon, cutoff)
+    except Exception:
+        logger.exception(
+            "Failed to query layer attribution for horizon=%s lookback=%dd",
+            horizon,
+            lookback_days,
+        )
+        return []
+
+    if not rows:
+        return [
+            LayerAttribution(
+                layer="company",
+                avg_contribution_pct=0.0,
+                dominant_win_rate=0.0,
+                dominant_ic=None,
+            ),
+            LayerAttribution(
+                layer="macro",
+                avg_contribution_pct=0.0,
+                dominant_win_rate=0.0,
+                dominant_ic=None,
+            ),
+            LayerAttribution(
+                layer="competitive",
+                avg_contribution_pct=0.0,
+                dominant_win_rate=0.0,
+                dominant_ic=None,
+            ),
+        ]
+
+    row_dicts = [dict(r) for r in rows]
+
+    layers = [
+        ("company", "score_company"),
+        ("macro", "score_macro"),
+        ("competitive", "score_competitive"),
+    ]
+
+    results: list[LayerAttribution] = []
+
+    for layer_name, score_field in layers:
+        # --- Average contribution percentage ---
+        contribution_pcts: list[float] = []
+        for r in row_dicts:
+            total = (
+                (r.get("score_company") or 0.0)
+                + (r.get("score_macro") or 0.0)
+                + (r.get("score_competitive") or 0.0)
+            )
+            if total > 0.0:
+                layer_score = r.get(score_field) or 0.0
+                contribution_pcts.append(layer_score / total)
+
+        avg_contribution_pct = (
+            sum(contribution_pcts) / len(contribution_pcts)
+            if contribution_pcts
+            else 0.0
+        )
+
+        # --- Dominant predictions: layer > 30% of total score ---
+        dominant_rows: list[dict] = []
+        for r in row_dicts:
+            total = (
+                (r.get("score_company") or 0.0)
+                + (r.get("score_macro") or 0.0)
+                + (r.get("score_competitive") or 0.0)
+            )
+            if total > 0.0:
+                layer_score = r.get(score_field) or 0.0
+                if layer_score / total > 0.30:
+                    dominant_rows.append(r)
+
+        # Dominant win rate
+        dominant_direction_rows = [
+            r for r in dominant_rows if r.get("direction_correct") is not None
+        ]
+        dominant_win_count = sum(
+            1 for r in dominant_direction_rows if r["direction_correct"] is True
+        )
+        dominant_win_rate = (
+            dominant_win_count / len(dominant_direction_rows)
+            if dominant_direction_rows
+            else 0.0
+        )
+
+        # Dominant IC: correlation between layer score and future return
+        dom_scores = [
+            r.get(score_field) or 0.0
+            for r in dominant_rows
+            if r.get("future_return") is not None
+        ]
+        dom_returns = [
+            r["future_return"]
+            for r in dominant_rows
+            if r.get("future_return") is not None
+        ]
+        dominant_ic = _compute_ic(dom_scores, dom_returns)
+
+        results.append(
+            LayerAttribution(
+                layer=layer_name,
+                avg_contribution_pct=avg_contribution_pct,
+                dominant_win_rate=dominant_win_rate,
+                dominant_ic=dominant_ic,
+            )
+        )
+
+    logger.info(
+        "Computed layer attribution for 3 layers (horizon=%s, lookback=%dd)",
+        horizon,
+        lookback_days,
+    )
+
+    return results
@@ -0,0 +1,135 @@
+"""Calibration Engine — Bayesian shrinkage source reliability and weight adjustment.
+
+Computes source reliability scores using Bayesian shrinkage from historical
+prediction outcomes, and adjusts evidence weights based on source performance.
+Updates the existing source_accuracy table with reliability scores.
+
+Requirements: 8.1, 8.2, 8.3, 8.4, 8.5
+"""
+from __future__ import annotations
+
+import logging
+
+import asyncpg
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Pure functions — testable without a database
+# ---------------------------------------------------------------------------
+
+
+def compute_source_reliability(
+    observed_win_rate: float,
+    sample_count: int,
+    prior_strength: int = 30,
+) -> float:
+    """Bayesian shrinkage source reliability.
+
+    reliability = 0.5 + (n / (n + prior_strength)) * (observed_win_rate - 0.5)
+
+    Returns value in [0.0, 1.0].
+    When n=0, returns 0.5 (prior mean).
+    As n→∞, approaches observed_win_rate.
+    """
+    if sample_count <= 0:
+        return 0.5
+
+    shrinkage = sample_count / (sample_count + prior_strength)
+    reliability = 0.5 + shrinkage * (observed_win_rate - 0.5)
+
+    # Clamp to [0.0, 1.0] for safety (should already be in range when
+    # observed_win_rate is in [0.0, 1.0], but guard against edge cases).
+    return max(0.0, min(1.0, reliability))
+
+
+def compute_adjusted_evidence_weight(
+    base_weight: float,
+    reliability: float,
+) -> float:
+    """Adjusted weight = base_weight * (0.5 + reliability), clamped to [0.1, 2.0]."""
+    adjusted = base_weight * (0.5 + reliability)
+    return max(0.1, min(2.0, adjusted))
+
+
+# ---------------------------------------------------------------------------
+# SQL queries
+# ---------------------------------------------------------------------------
+
+# Query v_source_performance to get per-source win rates and sample counts.
+# Groups by source, counting total predictions and directional wins.
+_SOURCE_PERFORMANCE_SQL = """
+SELECT
+    source,
+    COUNT(*) AS sample_count,
+    COUNT(*) FILTER (WHERE direction_correct = TRUE) AS win_count
+FROM v_source_performance
+WHERE direction_correct IS NOT NULL
+GROUP BY source
+"""
+
+# Upsert into source_accuracy: update accuracy_ratio and sample_count
+# for existing sources, insert new ones.
+_UPSERT_SOURCE_ACCURACY_SQL = """
+INSERT INTO source_accuracy (source_id, accuracy_ratio, sample_count, last_updated)
+VALUES ($1, $2, $3, NOW())
+ON CONFLICT (source_id)
+DO UPDATE SET
+    accuracy_ratio = EXCLUDED.accuracy_ratio,
+    sample_count = EXCLUDED.sample_count,
+    last_updated = NOW()
+"""
+
+
+# ---------------------------------------------------------------------------
+# Database-backed function
+# ---------------------------------------------------------------------------
+
+
+async def update_source_reliabilities(
+    pool: asyncpg.Pool,
+) -> int:
+    """Recompute and store source reliability scores from latest outcomes.
+
+    1. Queries v_source_performance to get per-source win rates and counts
+    2. Computes Bayesian shrinkage reliability for each source
+    3. Upserts into source_accuracy table (accuracy_ratio = reliability)
+
+    Returns count of sources updated.
+    """
+    try:
+        rows = await pool.fetch(_SOURCE_PERFORMANCE_SQL)
+    except Exception:
+        logger.exception("Failed to query source performance for reliability update")
+        return 0
+
+    if not rows:
+        logger.info("No source performance data available for reliability update")
+        return 0
+
+    updated = 0
+
+    for row in rows:
+        source = row["source"]
+        sample_count = row["sample_count"]
+        win_count = row["win_count"]
+
+        observed_win_rate = win_count / sample_count if sample_count > 0 else 0.5
+        reliability = compute_source_reliability(observed_win_rate, sample_count)
+
+        try:
+            await pool.execute(
+                _UPSERT_SOURCE_ACCURACY_SQL,
+                source,
+                reliability,
+                sample_count,
+            )
+            updated += 1
+        except Exception:
+            logger.exception(
+                "Failed to upsert source reliability for source=%s", source
+            )
+
+    logger.info("Updated source reliabilities for %d sources", updated)
+    return updated
@@ -0,0 +1,637 @@
+"""Metrics Engine — computes calibration, IC, Brier, and benchmark metrics.
+
+Aggregates model quality metrics across configurable lookback windows and
+prediction horizons. Stores periodic snapshots for time-series analysis
+of model performance trends.
+
+Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 6.1, 6.2, 6.3, 6.4, 6.5,
+              9.1, 9.2, 9.3, 9.4, 10.1, 10.2, 10.3, 10.4, 10.5
+"""
+from __future__ import annotations
+
+import json
+import logging
+import math
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+
+import asyncpg
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+CONFIDENCE_BUCKETS: list[tuple[float, float]] = [
+    (0.50, 0.60),
+    (0.60, 0.70),
+    (0.70, 0.80),
+    (0.80, 0.90),
+    (0.90, 1.00),
+]
+
+LOOKBACK_WINDOWS: list[str] = ["7d", "30d", "90d", "all"]
+
+LOOKBACK_DURATIONS: dict[str, timedelta | None] = {
+    "7d": timedelta(days=7),
+    "30d": timedelta(days=30),
+    "90d": timedelta(days=90),
+    "all": None,
+}
+
+EVALUATION_HORIZONS: list[str] = ["1h", "6h", "1d", "7d", "30d"]
+
+
+# ---------------------------------------------------------------------------
+# Dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class CalibrationBucket:
+    """Calibration metrics for a single confidence bucket."""
+
+    bucket_low: float
+    bucket_high: float
+    avg_confidence: float
+    observed_win_rate: float
+    prediction_count: int
+    miscalibrated: bool  # |avg_confidence - win_rate| > 0.15
+
+
+@dataclass
+class ModelMetricSnapshot:
+    """Aggregate model quality metrics for a lookback/horizon combination."""
+
+    id: str
+    generated_at: datetime
+    lookback_window: str
+    horizon: str
+    prediction_count: int
+    win_rate: float
+    directional_accuracy: float
+    information_coefficient: float | None
+    rank_information_coefficient: float | None
+    avg_return: float
+    avg_excess_return_vs_spy: float
+    avg_excess_return_vs_sector: float
+    calibration_error: float  # ECE
+    brier_score: float
+    buy_win_rate: float
+    sell_win_rate: float
+    hold_win_rate: float
+    metadata: dict = field(default_factory=dict)
+
+
+# ---------------------------------------------------------------------------
+# Pure computation functions
+# ---------------------------------------------------------------------------
+
+
+def compute_calibration_error(
+    confidences: list[float],
+    outcomes: list[bool],
+) -> tuple[float, list[CalibrationBucket]]:
+    """Compute ECE and calibration buckets.
+
+    ECE = Σ (n_b / N) * |avg_conf_b - win_rate_b|
+
+    Groups predictions into 5 confidence buckets and computes the weighted
+    average of |avg_confidence - observed_win_rate| across all buckets.
+    Flags buckets where |diff| > 0.15 as miscalibrated.
+
+    Returns (ece, buckets). Returns (0.0, []) when no data is provided.
+    """
+    if not confidences or not outcomes:
+        return 0.0, []
+
+    n = len(confidences)
+    buckets: list[CalibrationBucket] = []
+    ece = 0.0
+
+    for low, high in CONFIDENCE_BUCKETS:
+        bucket_confs: list[float] = []
+        bucket_outcomes: list[bool] = []
+
+        for conf, outcome in zip(confidences, outcomes):
+            # Last bucket is inclusive on the right: [0.90, 1.00]
+            if high == 1.00:
+                in_bucket = low <= conf <= high
+            else:
+                in_bucket = low <= conf < high
+
+            if in_bucket:
+                bucket_confs.append(conf)
+                bucket_outcomes.append(outcome)
+
+        count = len(bucket_confs)
+        if count == 0:
+            # Empty bucket — exclude from ECE, still record it
+            buckets.append(
+                CalibrationBucket(
+                    bucket_low=low,
+                    bucket_high=high,
+                    avg_confidence=0.0,
+                    observed_win_rate=0.0,
+                    prediction_count=0,
+                    miscalibrated=False,
+                )
+            )
+            continue
+
+        avg_conf = sum(bucket_confs) / count
+        win_rate = sum(1.0 for o in bucket_outcomes if o) / count
+        diff = abs(avg_conf - win_rate)
+        miscalibrated = diff > 0.15
+
+        buckets.append(
+            CalibrationBucket(
+                bucket_low=low,
+                bucket_high=high,
+                avg_confidence=avg_conf,
+                observed_win_rate=win_rate,
+                prediction_count=count,
+                miscalibrated=miscalibrated,
+            )
+        )
+
+        ece += (count / n) * diff
+
+    return ece, buckets
+
+
+def compute_brier_score(
+    p_bulls: list[float],
+    outcomes: list[bool],
+) -> float:
+    """Brier score = mean((p_bull - outcome)^2).
+
+    outcome is 1.0 when price moved in predicted direction, 0.0 otherwise.
+    Returns value in [0.0, 1.0]. Returns 0.0 for empty input.
+    """
+    if not p_bulls or not outcomes:
+        return 0.0
+
+    n = len(p_bulls)
+    total = 0.0
+    for p, o in zip(p_bulls, outcomes):
+        actual = 1.0 if o else 0.0
+        total += (p - actual) ** 2
+
+    return total / n
+
+
+def _pearson_correlation(xs: list[float], ys: list[float]) -> float | None:
+    """Compute Pearson correlation coefficient between two lists.
+
+    Returns None if the lists have fewer than 2 elements or if either
+    has zero variance. Guards against NaN/infinity.
+    """
+    n = len(xs)
+    if n < 2:
+        return None
+
+    mean_x = sum(xs) / n
+    mean_y = sum(ys) / n
+
+    cov = 0.0
+    var_x = 0.0
+    var_y = 0.0
+
+    for x, y in zip(xs, ys):
+        dx = x - mean_x
+        dy = y - mean_y
+        cov += dx * dy
+        var_x += dx * dx
+        var_y += dy * dy
+
+    if var_x == 0.0 or var_y == 0.0:
+        return None
+
+    r = cov / math.sqrt(var_x * var_y)
+
+    # Guard against floating-point drift
+    if math.isnan(r) or math.isinf(r):
+        return None
+
+    # Clamp to [-1.0, 1.0]
+    return max(-1.0, min(1.0, r))
+
+
+def _rank_data(values: list[float]) -> list[float]:
+    """Compute fractional ranks for a list of values (average tie-breaking)."""
+    n = len(values)
+    indexed = sorted(range(n), key=lambda i: values[i])
+
+    ranks = [0.0] * n
+    i = 0
+    while i < n:
+        # Find the end of the tie group
+        j = i + 1
+        while j < n and values[indexed[j]] == values[indexed[i]]:
+            j += 1
+
+        # Average rank for the tie group (1-based)
+        avg_rank = (i + j + 1) / 2.0
+        for k in range(i, j):
+            ranks[indexed[k]] = avg_rank
+
+        i = j
+
+    return ranks
+
+
+def compute_information_coefficient(
+    scores: list[float],
+    returns: list[float],
+) -> float | None:
+    """Pearson correlation between prediction scores and future returns.
+
+    Returns None when fewer than 30 data points.
+    Returns value in [-1.0, 1.0].
+    """
+    if len(scores) < 30 or len(returns) < 30:
+        return None
+
+    n = min(len(scores), len(returns))
+    return _pearson_correlation(scores[:n], returns[:n])
+
+
+def compute_rank_information_coefficient(
+    scores: list[float],
+    returns: list[float],
+) -> float | None:
+    """Spearman rank correlation between prediction scores and future returns.
+
+    Ranks the data and computes Pearson correlation on the ranks.
+    Returns None when fewer than 30 data points.
+    Returns value in [-1.0, 1.0].
+    """
+    if len(scores) < 30 or len(returns) < 30:
+        return None
+
+    n = min(len(scores), len(returns))
+    ranked_scores = _rank_data(scores[:n])
+    ranked_returns = _rank_data(returns[:n])
+
+    return _pearson_correlation(ranked_scores, ranked_returns)
+
+
+def compute_contribution_scores(
+    weights: list[float],
+) -> list[float]:
+    """Compute contribution scores from document weights.
+
+    Each score = weight_i / sum(weights). Sums to 1.0.
+    Each score in [0.0, 1.0].
+    Returns empty list for empty input.
+    """
+    if not weights:
+        return []
+
+    total = sum(weights)
+    if total == 0.0:
+        n = len(weights)
+        return [1.0 / n] * n
+
+    return [w / total for w in weights]
+
+
+def compute_hit_rate_improvement(win_rate: float) -> float:
+    """Hit rate improvement over random 50/50 baseline.
+
+    Defined as (system_win_rate - 0.5) / 0.5.
+    """
+    return (win_rate - 0.5) / 0.5
+
+
+# ---------------------------------------------------------------------------
+# SQL queries for v_prediction_performance view
+# ---------------------------------------------------------------------------
+
+_PERFORMANCE_DATA_SQL = """
+SELECT
+    ticker,
+    direction,
+    action,
+    confidence,
+    strength,
+    p_bull,
+    score_company,
+    score_macro,
+    score_competitive,
+    future_return,
+    excess_return_vs_spy,
+    excess_return_vs_sector,
+    direction_correct,
+    profitable,
+    horizon,
+    generated_at
+FROM v_prediction_performance
+WHERE horizon = $1
+"""
+
+_PERFORMANCE_DATA_WITH_LOOKBACK_SQL = """
+SELECT
+    ticker,
+    direction,
+    action,
+    confidence,
+    strength,
+    p_bull,
+    score_company,
+    score_macro,
+    score_competitive,
+    future_return,
+    excess_return_vs_spy,
+    excess_return_vs_sector,
+    direction_correct,
+    profitable,
+    horizon,
+    generated_at
+FROM v_prediction_performance
+WHERE horizon = $1
+  AND generated_at >= $2
+"""
+
+_INSERT_METRIC_SNAPSHOT_SQL = """
+INSERT INTO model_metric_snapshots (
+    id, generated_at, lookback_window, horizon,
+    prediction_count, win_rate, directional_accuracy,
+    information_coefficient, rank_information_coefficient,
+    avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector,
+    calibration_error, brier_score,
+    buy_win_rate, sell_win_rate, hold_win_rate,
+    metadata
+) VALUES (
+    $1::uuid, $2, $3, $4,
+    $5, $6, $7,
+    $8, $9,
+    $10, $11, $12,
+    $13, $14,
+    $15, $16, $17,
+    $18::jsonb
+)
+"""
+
+
+# ---------------------------------------------------------------------------
+# Metric computation from raw rows
+# ---------------------------------------------------------------------------
+
+
+def _compute_metrics_from_rows(
+    rows: list[dict],
+    lookback_window: str,
+    horizon: str,
+) -> ModelMetricSnapshot:
+    """Compute all metrics from a list of prediction performance rows.
+
+    Returns a ModelMetricSnapshot with all computed metrics.
+    """
+    now = datetime.now().astimezone()
+    snapshot_id = str(uuid.uuid4())
+
+    prediction_count = len(rows)
+
+    if prediction_count == 0:
+        return ModelMetricSnapshot(
+            id=snapshot_id,
+            generated_at=now,
+            lookback_window=lookback_window,
+            horizon=horizon,
+            prediction_count=0,
+            win_rate=0.0,
+            directional_accuracy=0.0,
+            information_coefficient=None,
+            rank_information_coefficient=None,
+            avg_return=0.0,
+            avg_excess_return_vs_spy=0.0,
+            avg_excess_return_vs_sector=0.0,
+            calibration_error=0.0,
+            brier_score=0.0,
+            buy_win_rate=0.0,
+            sell_win_rate=0.0,
+            hold_win_rate=0.0,
+            metadata={},
+        )
+
+    # --- Win rate and directional accuracy ---
+    direction_correct_count = sum(
+        1 for r in rows if r.get("direction_correct") is True
+    )
+    win_rate = direction_correct_count / prediction_count
+    directional_accuracy = win_rate  # Same metric, different name
+
+    # --- Per-action win rates ---
+    buy_rows = [r for r in rows if (r.get("action") or "").lower() == "buy"]
+    sell_rows = [r for r in rows if (r.get("action") or "").lower() == "sell"]
+    hold_rows = [r for r in rows if (r.get("action") or "").lower() == "hold"]
+
+    buy_win_rate = (
+        sum(1 for r in buy_rows if r.get("direction_correct") is True) / len(buy_rows)
+        if buy_rows
+        else 0.0
+    )
+    sell_win_rate = (
+        sum(1 for r in sell_rows if r.get("direction_correct") is True)
+        / len(sell_rows)
+        if sell_rows
+        else 0.0
+    )
+    hold_win_rate = (
+        sum(1 for r in hold_rows if r.get("direction_correct") is True)
+        / len(hold_rows)
+        if hold_rows
+        else 0.0
+    )
+
+    # --- Average return ---
+    returns_list = [
+        r["future_return"] for r in rows if r.get("future_return") is not None
+    ]
+    avg_return = sum(returns_list) / len(returns_list) if returns_list else 0.0
+
+    # --- Average excess return vs SPY (Requirement 9.1) ---
+    excess_spy_list = [
+        r["excess_return_vs_spy"]
+        for r in rows
+        if r.get("excess_return_vs_spy") is not None
+    ]
+    avg_excess_return_vs_spy = (
+        sum(excess_spy_list) / len(excess_spy_list) if excess_spy_list else 0.0
+    )
+
+    # --- Average excess return vs sector ETF (Requirement 9.2) ---
+    excess_sector_list = [
+        r["excess_return_vs_sector"]
+        for r in rows
+        if r.get("excess_return_vs_sector") is not None
+    ]
+    avg_excess_return_vs_sector = (
+        sum(excess_sector_list) / len(excess_sector_list)
+        if excess_sector_list
+        else 0.0
+    )
+
+    # --- Calibration error (ECE) (Requirements 5.1, 5.2, 5.3, 5.5) ---
+    confidences = [
+        r["confidence"] for r in rows if r.get("confidence") is not None
+    ]
+    outcomes = [
+        r.get("direction_correct") is True
+        for r in rows
+        if r.get("confidence") is not None
+    ]
+    ece, _buckets = compute_calibration_error(confidences, outcomes)
+
+    # --- Brier score (Requirement 5.4) ---
+    p_bulls = [r["p_bull"] for r in rows if r.get("p_bull") is not None]
+    brier_outcomes = [
+        r.get("direction_correct") is True
+        for r in rows
+        if r.get("p_bull") is not None
+    ]
+    brier = compute_brier_score(p_bulls, brier_outcomes)
+
+    # --- Information Coefficient (Requirements 6.1, 6.5) ---
+    ic_scores = [
+        r["strength"] for r in rows if r.get("strength") is not None
+        and r.get("future_return") is not None
+    ]
+    ic_returns = [
+        r["future_return"] for r in rows if r.get("strength") is not None
+        and r.get("future_return") is not None
+    ]
+    ic = compute_information_coefficient(ic_scores, ic_returns)
+
+    # --- Rank Information Coefficient (Requirements 6.2, 6.5) ---
+    rank_ic = compute_rank_information_coefficient(ic_scores, ic_returns)
+
+    # --- Hit rate improvement (Requirement 9.4) ---
+    hit_rate_improvement = compute_hit_rate_improvement(win_rate)
+
+    # --- Metadata (Requirement 10.5) ---
+    metadata: dict = {
+        "hit_rate_improvement": hit_rate_improvement,
+        "buy_count": len(buy_rows),
+        "sell_count": len(sell_rows),
+        "hold_count": len(hold_rows),
+        "returns_count": len(returns_list),
+        "excess_spy_count": len(excess_spy_list),
+        "excess_sector_count": len(excess_sector_list),
+    }
+
+    return ModelMetricSnapshot(
+        id=snapshot_id,
+        generated_at=now,
+        lookback_window=lookback_window,
+        horizon=horizon,
+        prediction_count=prediction_count,
+        win_rate=win_rate,
+        directional_accuracy=directional_accuracy,
+        information_coefficient=ic,
+        rank_information_coefficient=rank_ic,
+        avg_return=avg_return,
+        avg_excess_return_vs_spy=avg_excess_return_vs_spy,
+        avg_excess_return_vs_sector=avg_excess_return_vs_sector,
+        calibration_error=ece,
+        brier_score=brier,
+        buy_win_rate=buy_win_rate,
+        sell_win_rate=sell_win_rate,
+        hold_win_rate=hold_win_rate,
+        metadata=metadata,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Main entry point (Requirements 10.1, 10.2, 10.3, 10.4, 10.5)
+# ---------------------------------------------------------------------------
+
+
+async def compute_and_store_metric_snapshots(
+    pool: asyncpg.Pool,
+) -> list[ModelMetricSnapshot]:
+    """Compute metric snapshots for all lookback/horizon combinations.
+
+    Lookback windows: 7d, 30d, 90d, all-time.
+    Horizons: 1h, 6h, 1d, 7d, 30d.
+
+    For each of the 4 lookbacks × 5 horizons = 20 combinations, queries the
+    v_prediction_performance view, computes all metrics, and persists the
+    result to model_metric_snapshots.
+
+    Returns the list of computed snapshots.
+    """
+    snapshots: list[ModelMetricSnapshot] = []
+    now = datetime.now().astimezone()
+
+    for lookback in LOOKBACK_WINDOWS:
+        duration = LOOKBACK_DURATIONS[lookback]
+
+        for horizon in EVALUATION_HORIZONS:
+            try:
+                # Query performance data
+                if duration is not None:
+                    cutoff = now - duration
+                    rows = await pool.fetch(
+                        _PERFORMANCE_DATA_WITH_LOOKBACK_SQL,
+                        horizon,
+                        cutoff,
+                    )
+                else:
+                    rows = await pool.fetch(
+                        _PERFORMANCE_DATA_SQL,
+                        horizon,
+                    )
+
+                # Convert asyncpg Records to dicts
+                row_dicts = [dict(r) for r in rows]
+
+                # Compute metrics
+                snapshot = _compute_metrics_from_rows(
+                    row_dicts, lookback, horizon
+                )
+
+                # Persist
+                await pool.execute(
+                    _INSERT_METRIC_SNAPSHOT_SQL,
+                    snapshot.id,
+                    snapshot.generated_at,
+                    snapshot.lookback_window,
+                    snapshot.horizon,
+                    snapshot.prediction_count,
+                    snapshot.win_rate,
+                    snapshot.directional_accuracy,
+                    snapshot.information_coefficient,
+                    snapshot.rank_information_coefficient,
+                    snapshot.avg_return,
+                    snapshot.avg_excess_return_vs_spy,
+                    snapshot.avg_excess_return_vs_sector,
+                    snapshot.calibration_error,
+                    snapshot.brier_score,
+                    snapshot.buy_win_rate,
+                    snapshot.sell_win_rate,
+                    snapshot.hold_win_rate,
+                    json.dumps(snapshot.metadata),
+                )
+
+                snapshots.append(snapshot)
+
+            except Exception:
+                logger.exception(
+                    "Failed to compute metrics for lookback=%s horizon=%s",
+                    lookback,
+                    horizon,
+                )
+                continue
+
+    logger.info(
+        "Computed %d metric snapshots across %d lookback/horizon combinations",
+        len(snapshots),
+        len(LOOKBACK_WINDOWS) * len(EVALUATION_HORIZONS),
+    )
+
+    return snapshots
@@ -0,0 +1,414 @@
+"""Outcome Evaluator — matches predictions with realized market outcomes.
+
+Runs periodically to evaluate prediction snapshots whose horizon has elapsed.
+For each snapshot, fetches future prices at the horizon endpoint and computes
+returns, excess returns, directional accuracy, and profitability across all
+five evaluation horizons (1h, 6h, 1d, 7d, 30d).
+
+Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 4.10
+"""
+from __future__ import annotations
+
+import json
+import logging
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+
+import asyncpg
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+HORIZON_DURATIONS: dict[str, timedelta] = {
+    "1h": timedelta(hours=1),
+    "6h": timedelta(hours=6),
+    "1d": timedelta(days=1),
+    "7d": timedelta(days=7),
+    "30d": timedelta(days=30),
+}
+
+
+# ---------------------------------------------------------------------------
+# Dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class PredictionOutcome:
+    """Realized outcome for a prediction at a specific horizon."""
+
+    id: str  # UUID
+    prediction_id: str
+    evaluated_at: datetime
+    horizon: str  # 1h, 6h, 1d, 7d, 30d
+    future_price: float
+    future_return: float
+    spy_future_price: float | None
+    spy_return: float | None
+    sector_etf_future_price: float | None
+    sector_etf_return: float | None
+    excess_return_vs_spy: float | None
+    excess_return_vs_sector: float | None
+    direction_correct: bool
+    profitable: bool
+    metadata: dict = field(default_factory=dict)
+
+
+# ---------------------------------------------------------------------------
+# SQL statements
+# ---------------------------------------------------------------------------
+
+# Find matured predictions: snapshots where generated_at + horizon_duration <= NOW()
+# and no outcome has been recorded yet for that (prediction_id, horizon) pair.
+# We evaluate ALL 5 horizons for each snapshot, not just the snapshot's own horizon.
+_MATURED_PREDICTIONS_SQL = """
+SELECT
+    ps.id,
+    ps.generated_at,
+    ps.ticker,
+    ps.horizon AS snapshot_horizon,
+    ps.direction,
+    ps.action,
+    ps.price_at_prediction,
+    ps.spy_price_at_prediction,
+    ps.sector_etf_price_at_prediction
+FROM prediction_snapshots ps
+WHERE ps.generated_at + $1::interval <= NOW()
+  AND NOT EXISTS (
+      SELECT 1 FROM prediction_outcomes po
+      WHERE po.prediction_id = ps.id AND po.horizon = $2
+  )
+"""
+
+# Fetch the close price for a ticker at or before a specific time.
+# Uses the closest bar before or at the target time.
+_CLOSE_AT_TIME_SQL = """
+SELECT (data->>'c')::float AS close
+FROM market_snapshots
+WHERE ticker = $1
+  AND snapshot_type = 'bar'
+  AND data->>'c' IS NOT NULL
+  AND captured_at <= $2
+ORDER BY captured_at DESC
+LIMIT 1
+"""
+
+_INSERT_OUTCOME_SQL = """
+INSERT INTO prediction_outcomes (
+    id, prediction_id, evaluated_at, horizon,
+    future_price, future_return,
+    spy_future_price, spy_return,
+    sector_etf_future_price, sector_etf_return,
+    excess_return_vs_spy, excess_return_vs_sector,
+    direction_correct, profitable,
+    metadata
+) VALUES (
+    $1::uuid, $2::uuid, $3, $4,
+    $5, $6,
+    $7, $8,
+    $9, $10,
+    $11, $12,
+    $13, $14,
+    $15::jsonb
+)
+"""
+
+
+# ---------------------------------------------------------------------------
+# Price fetching at a specific time
+# ---------------------------------------------------------------------------
+
+
+async def _fetch_close_at_time(
+    pool: asyncpg.Pool,
+    ticker: str,
+    target_time: datetime,
+) -> float | None:
+    """Fetch the close price for a ticker at or before a specific time.
+
+    Returns None if no market data is available before the target time.
+    """
+    row = await pool.fetchrow(_CLOSE_AT_TIME_SQL, ticker, target_time)
+    if row is None:
+        return None
+    return row["close"]
+
+
+# ---------------------------------------------------------------------------
+# Sector ETF lookup (reuse pattern from prediction_snapshot)
+# ---------------------------------------------------------------------------
+
+_SECTOR_ETF_MAP: dict[str, str] = {
+    "Technology": "XLK",
+    "Consumer Cyclical": "XLY",
+    "Financial Services": "XLF",
+    "Healthcare": "XLV",
+    "Energy": "XLE",
+    "Communication Services": "XLC",
+    "Industrials": "XLI",
+    "Consumer Defensive": "XLP",
+    "Real Estate": "XLRE",
+    "Utilities": "XLU",
+}
+
+_COMPANY_SECTOR_SQL = """
+SELECT sector FROM companies WHERE ticker = $1 AND active = TRUE LIMIT 1
+"""
+
+
+async def _fetch_sector_etf_ticker(pool: asyncpg.Pool, ticker: str) -> str | None:
+    """Look up the sector ETF ticker for a company ticker."""
+    row = await pool.fetchrow(_COMPANY_SECTOR_SQL, ticker)
+    if row is None or row["sector"] is None:
+        return None
+    return _SECTOR_ETF_MAP.get(row["sector"])
+
+
+# ---------------------------------------------------------------------------
+# Return computation helpers
+# ---------------------------------------------------------------------------
+
+
+def _compute_return(current_price: float, future_price: float) -> float:
+    """Compute simple return: (future - current) / current."""
+    if current_price == 0.0:
+        return 0.0
+    return (future_price - current_price) / current_price
+
+
+def _is_direction_correct(direction: str, future_return: float) -> bool:
+    """Determine if the predicted direction matches the realized return.
+
+    bullish + positive return = True
+    bearish + negative return = True
+    All other combinations = False
+    """
+    direction_lower = direction.lower()
+    if direction_lower == "bullish" and future_return > 0.0:
+        return True
+    if direction_lower == "bearish" and future_return < 0.0:
+        return True
+    return False
+
+
+def _is_profitable(action: str, future_return: float) -> bool:
+    """Determine if the predicted action would have been profitable.
+
+    buy + positive return = True
+    sell + negative return = True
+    All other combinations = False
+    """
+    action_lower = action.lower()
+    if action_lower == "buy" and future_return > 0.0:
+        return True
+    if action_lower == "sell" and future_return < 0.0:
+        return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Single prediction evaluation (Requirements 4.2–4.7)
+# ---------------------------------------------------------------------------
+
+
+async def evaluate_single_prediction(
+    pool: asyncpg.Pool,
+    snapshot: dict,
+    horizon: str,
+) -> PredictionOutcome | None:
+    """Evaluate a single prediction at a specific horizon.
+
+    Fetches the future price at generated_at + horizon_duration for the ticker,
+    SPY, and sector ETF. Computes returns, excess returns, direction correctness,
+    and profitability.
+
+    Returns None if the ticker's future price is unavailable (Requirement 4.10).
+    """
+    duration = HORIZON_DURATIONS[horizon]
+    target_time = snapshot["generated_at"] + duration
+    ticker = snapshot["ticker"]
+
+    # Fetch future price for the ticker — required (skip if unavailable)
+    future_price = await _fetch_close_at_time(pool, ticker, target_time)
+    if future_price is None:
+        logger.debug(
+            "Future price unavailable for %s at horizon %s (target %s), skipping",
+            ticker,
+            horizon,
+            target_time,
+        )
+        return None
+
+    price_at_prediction = snapshot["price_at_prediction"]
+    if price_at_prediction is None or price_at_prediction == 0.0:
+        logger.warning(
+            "Price at prediction is NULL or zero for snapshot %s, skipping horizon %s",
+            snapshot["id"],
+            horizon,
+        )
+        return None
+
+    # Compute ticker future return (Requirement 4.2)
+    future_return = _compute_return(price_at_prediction, future_price)
+
+    # Fetch SPY future price and compute SPY return (Requirement 4.3)
+    spy_future_price: float | None = None
+    spy_return: float | None = None
+    spy_price_at_prediction = snapshot["spy_price_at_prediction"]
+
+    if spy_price_at_prediction is not None and spy_price_at_prediction != 0.0:
+        spy_future_price = await _fetch_close_at_time(pool, "SPY", target_time)
+        if spy_future_price is not None:
+            spy_return = _compute_return(spy_price_at_prediction, spy_future_price)
+
+    # Fetch sector ETF future price and compute sector return (Requirement 4.4)
+    sector_etf_future_price: float | None = None
+    sector_etf_return: float | None = None
+    sector_etf_price_at_prediction = snapshot["sector_etf_price_at_prediction"]
+
+    if (
+        sector_etf_price_at_prediction is not None
+        and sector_etf_price_at_prediction != 0.0
+    ):
+        sector_etf_ticker = await _fetch_sector_etf_ticker(pool, ticker)
+        if sector_etf_ticker is not None:
+            sector_etf_future_price = await _fetch_close_at_time(
+                pool, sector_etf_ticker, target_time
+            )
+            if sector_etf_future_price is not None:
+                sector_etf_return = _compute_return(
+                    sector_etf_price_at_prediction, sector_etf_future_price
+                )
+
+    # Compute excess returns (Requirement 4.5)
+    excess_return_vs_spy: float | None = None
+    if future_return is not None and spy_return is not None:
+        excess_return_vs_spy = future_return - spy_return
+
+    excess_return_vs_sector: float | None = None
+    if future_return is not None and sector_etf_return is not None:
+        excess_return_vs_sector = future_return - sector_etf_return
+
+    # Determine direction correctness (Requirement 4.6)
+    direction_correct = _is_direction_correct(snapshot["direction"], future_return)
+
+    # Determine profitability (Requirement 4.7)
+    profitable = _is_profitable(snapshot["action"], future_return)
+
+    now = datetime.now().astimezone()
+
+    return PredictionOutcome(
+        id=str(uuid.uuid4()),
+        prediction_id=str(snapshot["id"]),
+        evaluated_at=now,
+        horizon=horizon,
+        future_price=future_price,
+        future_return=future_return,
+        spy_future_price=spy_future_price,
+        spy_return=spy_return,
+        sector_etf_future_price=sector_etf_future_price,
+        sector_etf_return=sector_etf_return,
+        excess_return_vs_spy=excess_return_vs_spy,
+        excess_return_vs_sector=excess_return_vs_sector,
+        direction_correct=direction_correct,
+        profitable=profitable,
+        metadata={
+            "ticker": ticker,
+            "horizon": horizon,
+            "price_at_prediction": price_at_prediction,
+            "future_price": future_price,
+        },
+    )
+
+
+# ---------------------------------------------------------------------------
+# Store outcome (Requirement 4.9)
+# ---------------------------------------------------------------------------
+
+
+async def _store_outcome(
+    conn: asyncpg.Connection,
+    outcome: PredictionOutcome,
+) -> None:
+    """Persist a single prediction outcome to the database."""
+    await conn.execute(
+        _INSERT_OUTCOME_SQL,
+        outcome.id,
+        outcome.prediction_id,
+        outcome.evaluated_at,
+        outcome.horizon,
+        outcome.future_price,
+        outcome.future_return,
+        outcome.spy_future_price,
+        outcome.spy_return,
+        outcome.sector_etf_future_price,
+        outcome.sector_etf_return,
+        outcome.excess_return_vs_spy,
+        outcome.excess_return_vs_sector,
+        outcome.direction_correct,
+        outcome.profitable,
+        json.dumps(outcome.metadata),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Main entry point (Requirements 4.1, 4.8, 4.9, 4.10)
+# ---------------------------------------------------------------------------
+
+
+async def evaluate_matured_predictions(
+    pool: asyncpg.Pool,
+) -> int:
+    """Evaluate all matured prediction snapshots across all horizons.
+
+    For each of the 5 horizons (1h, 6h, 1d, 7d, 30d), finds prediction
+    snapshots where generated_at + horizon_duration <= NOW() and no outcome
+    has been recorded for that (prediction_id, horizon) pair.
+
+    For each matured snapshot-horizon pair, fetches future prices and computes
+    returns. Skips horizons where the future price is unavailable — those will
+    be retried on the next run (Requirement 4.10).
+
+    Returns the total count of outcomes recorded.
+    """
+    total_recorded = 0
+
+    for horizon, duration in HORIZON_DURATIONS.items():
+        # Find snapshots matured for this horizon
+        rows = await pool.fetch(_MATURED_PREDICTIONS_SQL, duration, horizon)
+
+        if not rows:
+            continue
+
+        logger.info(
+            "Found %d matured predictions for horizon %s", len(rows), horizon
+        )
+
+        for row in rows:
+            snapshot = dict(row)
+            try:
+                outcome = await evaluate_single_prediction(pool, snapshot, horizon)
+                if outcome is None:
+                    # Future price unavailable — skip, retry next run
+                    continue
+
+                async with pool.acquire() as conn:
+                    async with conn.transaction():
+                        await _store_outcome(conn, outcome)
+
+                total_recorded += 1
+
+            except Exception:
+                logger.exception(
+                    "Failed to evaluate snapshot %s at horizon %s",
+                    snapshot["id"],
+                    horizon,
+                )
+                continue
+
+    logger.info("Outcome evaluation complete: %d outcomes recorded", total_recorded)
+    return total_recorded
@@ -0,0 +1,540 @@
+"""Prediction Snapshot Writer — captures immutable prediction state at generation time.
+
+Creates frozen records of every recommendation with prices, evidence links,
+duplicate detection, and contribution scores so that predictions can be
+evaluated against future outcomes without hindsight bias.
+
+Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.1, 3.2, 3.3, 3.4
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import urllib.parse
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime
+
+import asyncpg
+
+from services.shared.schemas import Recommendation, TrendSummary
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+SECTOR_ETF_MAP: dict[str, str] = {
+    "Technology": "XLK",
+    "Consumer Cyclical": "XLY",
+    "Financial Services": "XLF",
+    "Healthcare": "XLV",
+    "Energy": "XLE",
+    "Communication Services": "XLC",
+    "Industrials": "XLI",
+    "Consumer Defensive": "XLP",
+    "Real Estate": "XLRE",
+    "Utilities": "XLU",
+}
+
+EVALUATION_HORIZONS: list[str] = ["1h", "6h", "1d", "7d", "30d"]
+
+MAX_SINGLE_DOCUMENT_WEIGHT: float = 1.0
+
+
+# ---------------------------------------------------------------------------
+# Dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class PredictionSnapshot:
+    """Immutable snapshot of a prediction at generation time."""
+
+    id: str  # UUID
+    generated_at: datetime
+    ticker: str
+    window: str
+    horizon: str
+    direction: str  # bullish/bearish/mixed/neutral
+    action: str  # buy/sell/hold/watch
+    mode: str  # informational/paper_eligible/live_eligible
+    strength: float
+    confidence: float
+    contradiction: float
+    p_bull: float | None
+    p_bear: float | None
+    score_company: float
+    score_macro: float
+    score_competitive: float
+    evidence_count: int
+    unique_source_count: int
+    duplicate_evidence_count: int
+    price_at_prediction: float | None
+    spy_price_at_prediction: float | None
+    sector_etf_price_at_prediction: float | None
+    metadata: dict = field(default_factory=dict)
+
+
+@dataclass
+class SignalEvidenceLink:
+    """Link between a prediction and a contributing evidence document."""
+
+    id: str  # UUID
+    prediction_id: str
+    document_id: str
+    signal_id: str
+    ticker: str
+    source: str
+    source_type: str
+    catalyst_type: str
+    sentiment: str
+    impact: float
+    extraction_confidence: float
+    weight: float  # clamped to MAX_SINGLE_DOCUMENT_WEIGHT
+    is_duplicate: bool
+    canonical_evidence_key: str
+    contribution_score: float  # weight / total_weight, sums to 1.0
+    metadata: dict = field(default_factory=dict)
+
+
+# ---------------------------------------------------------------------------
+# Canonical evidence key computation (Requirements 2.3, 17.4)
+# ---------------------------------------------------------------------------
+
+
+def compute_canonical_evidence_key(title: str, url: str) -> str:
+    """SHA256 of normalized(title) + normalized(url).
+
+    Normalization:
+    - Title: lowercase, strip leading/trailing whitespace
+    - URL: lowercase, strip query parameters (keep scheme, netloc, path)
+    """
+    normalized_title = title.strip().lower()
+
+    parsed = urllib.parse.urlparse(url.lower())
+    normalized_url = urllib.parse.urlunparse(
+        (parsed.scheme, parsed.netloc, parsed.path, "", "", "")
+    )
+
+    combined = normalized_title + normalized_url
+    return hashlib.sha256(combined.encode("utf-8")).hexdigest()
+
+
+# ---------------------------------------------------------------------------
+# Contribution score computation (Requirements 2.5, 17.7)
+# ---------------------------------------------------------------------------
+
+
+def compute_contribution_scores(weights: list[float]) -> list[float]:
+    """Compute contribution scores: each score = weight_i / sum(weights).
+
+    All scores are in [0.0, 1.0] and sum to 1.0 (within floating-point tolerance).
+    Returns an empty list for empty input.
+    """
+    if not weights:
+        return []
+
+    total = sum(weights)
+    if total == 0.0:
+        # All weights are zero — distribute equally
+        n = len(weights)
+        return [1.0 / n] * n
+
+    return [w / total for w in weights]
+
+
+# ---------------------------------------------------------------------------
+# Price fetching (Requirements 1.2, 1.3, 1.4, 1.5)
+# ---------------------------------------------------------------------------
+
+_LATEST_CLOSE_SQL = """
+SELECT (data->>'c')::float AS close
+FROM market_snapshots
+WHERE ticker = $1 AND snapshot_type = 'bar' AND data->>'c' IS NOT NULL
+ORDER BY captured_at DESC
+LIMIT 1
+"""
+
+
+async def fetch_latest_close_price(
+    pool: asyncpg.Pool,
+    ticker: str,
+) -> float | None:
+    """Fetch most recent close price from market_snapshots for a ticker.
+
+    Returns None if no market data is available for the ticker.
+    """
+    row = await pool.fetchrow(_LATEST_CLOSE_SQL, ticker)
+    if row is None:
+        return None
+    return row["close"]
+
+
+# ---------------------------------------------------------------------------
+# Sector ETF lookup
+# ---------------------------------------------------------------------------
+
+_COMPANY_SECTOR_SQL = """
+SELECT sector FROM companies WHERE ticker = $1 AND active = TRUE LIMIT 1
+"""
+
+
+async def _fetch_sector_etf_ticker(pool: asyncpg.Pool, ticker: str) -> str | None:
+    """Look up the sector ETF ticker for a company ticker."""
+    row = await pool.fetchrow(_COMPANY_SECTOR_SQL, ticker)
+    if row is None or row["sector"] is None:
+        return None
+    return SECTOR_ETF_MAP.get(row["sector"])
+
+
+# ---------------------------------------------------------------------------
+# Layer score computation
+# ---------------------------------------------------------------------------
+
+
+def _compute_layer_scores(
+    evidence_signals: list[dict],
+) -> tuple[float, float, float]:
+    """Compute company, macro, and competitive layer scores from evidence signals.
+
+    Each signal's source_type determines its layer:
+    - company: news_api, filings_api, web_scrape
+    - macro: macro events (source_type containing 'macro')
+    - competitive: competitive signals (source_type containing 'competitive' or 'pattern')
+
+    Returns (score_company, score_macro, score_competitive) as fractions summing to 1.0.
+    """
+    company_weight = 0.0
+    macro_weight = 0.0
+    competitive_weight = 0.0
+
+    for sig in evidence_signals:
+        w = sig.get("weight", 0.0)
+        source_type = sig.get("source_type", "").lower()
+        catalyst_type = sig.get("catalyst_type", "").lower()
+
+        if "macro" in source_type or catalyst_type == "macro":
+            macro_weight += w
+        elif "competitive" in source_type or "pattern" in source_type:
+            competitive_weight += w
+        else:
+            company_weight += w
+
+    total = company_weight + macro_weight + competitive_weight
+    if total == 0.0:
+        return (0.0, 0.0, 0.0)
+
+    return (
+        round(company_weight / total, 6),
+        round(macro_weight / total, 6),
+        round(competitive_weight / total, 6),
+    )
+
+
+# ---------------------------------------------------------------------------
+# SQL statements
+# ---------------------------------------------------------------------------
+
+_INSERT_SNAPSHOT_SQL = """
+INSERT INTO prediction_snapshots (
+    id, generated_at, ticker, window, horizon, direction, action, mode,
+    strength, confidence, contradiction, p_bull, p_bear,
+    score_company, score_macro, score_competitive,
+    evidence_count, unique_source_count, duplicate_evidence_count,
+    price_at_prediction, spy_price_at_prediction, sector_etf_price_at_prediction,
+    metadata
+) VALUES (
+    $1::uuid, $2, $3, $4, $5, $6, $7, $8,
+    $9, $10, $11, $12, $13,
+    $14, $15, $16,
+    $17, $18, $19,
+    $20, $21, $22,
+    $23::jsonb
+)
+"""
+
+_INSERT_EVIDENCE_LINK_SQL = """
+INSERT INTO signal_evidence_links (
+    id, prediction_id, document_id, signal_id, ticker,
+    source, source_type, catalyst_type, sentiment,
+    impact, extraction_confidence, weight,
+    is_duplicate, canonical_evidence_key, contribution_score,
+    metadata
+) VALUES (
+    $1::uuid, $2::uuid, $3, $4, $5,
+    $6, $7, $8, $9,
+    $10, $11, $12,
+    $13, $14, $15,
+    $16::jsonb
+)
+"""
+
+
+# ---------------------------------------------------------------------------
+# Main entry point (Requirements 1.1–1.7, 2.1–2.6, 3.1–3.4)
+# ---------------------------------------------------------------------------
+
+
+async def create_prediction_snapshot(
+    pool: asyncpg.Pool,
+    recommendation: Recommendation,
+    trend_summary: TrendSummary,
+    evidence_signals: list[dict],
+    evidence_docs: list[dict],
+) -> PredictionSnapshot:
+    """Create and persist a prediction snapshot with evidence links.
+
+    Steps:
+    1. Fetch current prices (ticker, SPY, sector ETF) from market_snapshots
+    2. Compute canonical evidence keys and detect duplicates
+    3. Clamp individual document weights to MAX_SINGLE_DOCUMENT_WEIGHT
+    4. Compute contribution scores (one-vote-per-canonical-key dedup)
+    5. Persist snapshot and evidence links in a transaction
+
+    Args:
+        pool: asyncpg connection pool.
+        recommendation: The generated Recommendation object.
+        trend_summary: The TrendSummary used to generate the recommendation.
+        evidence_signals: List of dicts with signal fields (source, source_type,
+            catalyst_type, sentiment, impact, extraction_confidence, weight,
+            document_id, signal_id, ticker).
+        evidence_docs: List of dicts with document metadata (title, url, document_id).
+
+    Returns:
+        The persisted PredictionSnapshot.
+    """
+    ticker = recommendation.ticker
+
+    # 1. Fetch prices — handle NULL gracefully (Requirement 1.5)
+    ticker_price = await fetch_latest_close_price(pool, ticker)
+    if ticker_price is None:
+        logger.warning("No market price available for %s at snapshot time", ticker)
+
+    spy_price = await fetch_latest_close_price(pool, "SPY")
+    if spy_price is None:
+        logger.warning("No SPY price available at snapshot time")
+
+    sector_etf_ticker = await _fetch_sector_etf_ticker(pool, ticker)
+    sector_etf_price: float | None = None
+    if sector_etf_ticker is not None:
+        sector_etf_price = await fetch_latest_close_price(pool, sector_etf_ticker)
+        if sector_etf_price is None:
+            logger.warning(
+                "No sector ETF price available for %s (%s) at snapshot time",
+                sector_etf_ticker,
+                ticker,
+            )
+    else:
+        logger.warning("No sector ETF mapping found for ticker %s", ticker)
+
+    # 2. Build a doc lookup for canonical key computation
+    doc_lookup: dict[str, dict] = {}
+    for doc in evidence_docs:
+        doc_id = doc.get("document_id", "")
+        doc_lookup[doc_id] = doc
+
+    # 3. Process evidence signals: compute canonical keys, detect duplicates,
+    #    clamp weights
+    processed_links: list[dict] = []
+    seen_canonical_keys: dict[str, int] = {}  # canonical_key -> first index
+
+    for sig in evidence_signals:
+        doc_id = sig.get("document_id", "")
+        doc_meta = doc_lookup.get(doc_id, {})
+        title = doc_meta.get("title", "")
+        url = doc_meta.get("url", "")
+
+        canonical_key = compute_canonical_evidence_key(title, url)
+
+        # Detect duplicates: same canonical key for same ticker
+        is_duplicate = canonical_key in seen_canonical_keys
+        if not is_duplicate:
+            seen_canonical_keys[canonical_key] = len(processed_links)
+
+        # Clamp weight to MAX_SINGLE_DOCUMENT_WEIGHT (Requirement 3.3)
+        raw_weight = sig.get("weight", 0.0)
+        clamped_weight = min(raw_weight, MAX_SINGLE_DOCUMENT_WEIGHT)
+
+        processed_links.append({
+            "id": str(uuid.uuid4()),
+            "document_id": doc_id,
+            "signal_id": sig.get("signal_id", ""),
+            "ticker": sig.get("ticker", ticker),
+            "source": sig.get("source", ""),
+            "source_type": sig.get("source_type", ""),
+            "catalyst_type": sig.get("catalyst_type", ""),
+            "sentiment": sig.get("sentiment", ""),
+            "impact": sig.get("impact", 0.0),
+            "extraction_confidence": sig.get("extraction_confidence", 0.0),
+            "weight": clamped_weight,
+            "is_duplicate": is_duplicate,
+            "canonical_evidence_key": canonical_key,
+        })
+
+    # 4. Compute contribution scores — one vote per canonical key (Requirement 3.4)
+    #    Only non-duplicate links contribute to the weight pool
+    non_dup_weights = [
+        link["weight"] for link in processed_links if not link["is_duplicate"]
+    ]
+    non_dup_scores = compute_contribution_scores(non_dup_weights)
+
+    # Assign contribution scores: non-duplicates get their computed score,
+    # duplicates get 0.0
+    score_idx = 0
+    for link in processed_links:
+        if not link["is_duplicate"]:
+            link["contribution_score"] = non_dup_scores[score_idx]
+            score_idx += 1
+        else:
+            link["contribution_score"] = 0.0
+
+    # 5. Compute deduplication quality metrics (Requirements 3.1, 3.2)
+    unique_sources = {
+        link["source"]
+        for link in processed_links
+        if not link["is_duplicate"]
+    }
+    unique_source_count = len(unique_sources)
+    duplicate_evidence_count = sum(
+        1 for link in processed_links if link["is_duplicate"]
+    )
+
+    # 6. Compute layer scores from evidence signals
+    score_company, score_macro, score_competitive = _compute_layer_scores(
+        evidence_signals
+    )
+
+    # 7. Build metadata from trend summary context (Requirement 1.7)
+    metadata: dict = {}
+    if trend_summary.market_context is not None:
+        metadata["market_context"] = {
+            "ticker": trend_summary.market_context.ticker,
+            "price_change_pct": trend_summary.market_context.price_change_pct,
+            "avg_volume": trend_summary.market_context.avg_volume,
+            "volume_change_pct": trend_summary.market_context.volume_change_pct,
+            "volatility": trend_summary.market_context.volatility,
+            "latest_close": trend_summary.market_context.latest_close,
+            "bars_available": trend_summary.market_context.bars_available,
+        }
+    if sector_etf_ticker is not None:
+        metadata["sector_etf_ticker"] = sector_etf_ticker
+
+    # 8. Build the snapshot
+    snapshot_id = str(uuid.uuid4())
+    snapshot = PredictionSnapshot(
+        id=snapshot_id,
+        generated_at=recommendation.generated_at,
+        ticker=ticker,
+        window=trend_summary.window.value,
+        horizon=recommendation.time_horizon,
+        direction=trend_summary.trend_direction.value,
+        action=recommendation.action.value,
+        mode=recommendation.mode.value,
+        strength=trend_summary.trend_strength,
+        confidence=recommendation.confidence,
+        contradiction=trend_summary.contradiction_score,
+        p_bull=trend_summary.p_bull,
+        p_bear=1.0 - trend_summary.p_bull if trend_summary.p_bull is not None else None,
+        score_company=score_company,
+        score_macro=score_macro,
+        score_competitive=score_competitive,
+        evidence_count=len(processed_links),
+        unique_source_count=unique_source_count,
+        duplicate_evidence_count=duplicate_evidence_count,
+        price_at_prediction=ticker_price,
+        spy_price_at_prediction=spy_price,
+        sector_etf_price_at_prediction=sector_etf_price,
+        metadata=metadata,
+    )
+
+    # 9. Build evidence link objects
+    evidence_link_objects: list[SignalEvidenceLink] = []
+    for link in processed_links:
+        evidence_link_objects.append(
+            SignalEvidenceLink(
+                id=link["id"],
+                prediction_id=snapshot_id,
+                document_id=link["document_id"],
+                signal_id=link["signal_id"],
+                ticker=link["ticker"],
+                source=link["source"],
+                source_type=link["source_type"],
+                catalyst_type=link["catalyst_type"],
+                sentiment=link["sentiment"],
+                impact=link["impact"],
+                extraction_confidence=link["extraction_confidence"],
+                weight=link["weight"],
+                is_duplicate=link["is_duplicate"],
+                canonical_evidence_key=link["canonical_evidence_key"],
+                contribution_score=link["contribution_score"],
+            )
+        )
+
+    # 10. Persist in a transaction (Requirements 1.6, 2.6)
+    async with pool.acquire() as conn:
+        async with conn.transaction():
+            await conn.execute(
+                _INSERT_SNAPSHOT_SQL,
+                snapshot.id,
+                snapshot.generated_at,
+                snapshot.ticker,
+                snapshot.window,
+                snapshot.horizon,
+                snapshot.direction,
+                snapshot.action,
+                snapshot.mode,
+                snapshot.strength,
+                snapshot.confidence,
+                snapshot.contradiction,
+                snapshot.p_bull,
+                snapshot.p_bear,
+                snapshot.score_company,
+                snapshot.score_macro,
+                snapshot.score_competitive,
+                snapshot.evidence_count,
+                snapshot.unique_source_count,
+                snapshot.duplicate_evidence_count,
+                snapshot.price_at_prediction,
+                snapshot.spy_price_at_prediction,
+                snapshot.sector_etf_price_at_prediction,
+                json.dumps(snapshot.metadata),
+            )
+
+            for link in evidence_link_objects:
+                await conn.execute(
+                    _INSERT_EVIDENCE_LINK_SQL,
+                    link.id,
+                    link.prediction_id,
+                    link.document_id,
+                    link.signal_id,
+                    link.ticker,
+                    link.source,
+                    link.source_type,
+                    link.catalyst_type,
+                    link.sentiment,
+                    link.impact,
+                    link.extraction_confidence,
+                    link.weight,
+                    link.is_duplicate,
+                    link.canonical_evidence_key,
+                    link.contribution_score,
+                    json.dumps(link.metadata),
+                )
+
+    logger.info(
+        "Created prediction snapshot %s for %s: %d evidence links "
+        "(%d unique sources, %d duplicates), prices: ticker=%s spy=%s sector_etf=%s",
+        snapshot_id,
+        ticker,
+        len(evidence_link_objects),
+        unique_source_count,
+        duplicate_evidence_count,
+        ticker_price,
+        spy_price,
+        sector_etf_price,
+    )
+
+    return snapshot