Files
stonks-oracle/services/validation/attribution.py
T
Celes Renata 7fcc8a6c07
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build-1 unknown status
ci/woodpecker/push/build-3 unknown status
ci/woodpecker/push/build-2 unknown status
ci/woodpecker/push/finalize unknown status
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
feat: model validation, calibration, and signal quality layer
- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views
- Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores
- Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d)
- Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison
- Attribution engine: per-source, per-catalyst, per-layer performance
- Calibration engine: Bayesian shrinkage source reliability
- Quality gate for live trading eligibility with configurable thresholds
- 7 new /api/validation/* endpoints
- Upgraded OpsModel dashboard with validation tab
- Enhanced recommendation display with calibration context
- Backtest replay validation mode
- 86 Python tests (unit + property-based), 179 frontend tests passing
2026-05-01 03:04:58 +00:00

592 lines
17 KiB
Python

"""Attribution Engine — per-source, per-catalyst, and per-layer performance.
Joins signal evidence links with prediction outcomes to compute attribution
metrics that identify which sources, catalyst types, and signal layers
contribute most to accurate predictions.
Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7
"""
from __future__ import annotations
import logging
import math
from dataclasses import dataclass
from datetime import datetime, timedelta
import asyncpg
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Dataclasses
# ---------------------------------------------------------------------------
@dataclass
class SourceAttribution:
"""Performance metrics for a single source."""
source: str
source_type: str
prediction_count: int
avg_weight: float
avg_contribution_score: float
win_rate: float
avg_future_return: float
avg_excess_return_vs_spy: float
information_coefficient: float | None
duplicate_rate: float
@dataclass
class CatalystAttribution:
"""Performance metrics for a single catalyst type."""
catalyst_type: str
prediction_count: int
win_rate: float
avg_future_return: float
avg_excess_return_vs_spy: float
information_coefficient: float | None
@dataclass
class LayerAttribution:
"""Performance metrics for a signal layer."""
layer: str # company, macro, competitive
avg_contribution_pct: float
dominant_win_rate: float # win rate when this layer > 30% contribution
dominant_ic: float | None # IC when this layer > 30% contribution
# ---------------------------------------------------------------------------
# Pure computation helpers
# ---------------------------------------------------------------------------
def _pearson_correlation(xs: list[float], ys: list[float]) -> float | None:
"""Compute Pearson correlation coefficient between two lists.
Returns None if the lists have fewer than 2 elements or if either
has zero variance. Guards against NaN/infinity.
"""
n = len(xs)
if n < 2:
return None
mean_x = sum(xs) / n
mean_y = sum(ys) / n
cov = 0.0
var_x = 0.0
var_y = 0.0
for x, y in zip(xs, ys):
dx = x - mean_x
dy = y - mean_y
cov += dx * dy
var_x += dx * dx
var_y += dy * dy
if var_x == 0.0 or var_y == 0.0:
return None
r = cov / math.sqrt(var_x * var_y)
if math.isnan(r) or math.isinf(r):
return None
return max(-1.0, min(1.0, r))
def _compute_ic(
contribution_scores: list[float],
future_returns: list[float],
) -> float | None:
"""Compute IC (Pearson correlation) between contribution scores and returns.
Returns None when fewer than 30 data points.
"""
if len(contribution_scores) < 30 or len(future_returns) < 30:
return None
n = min(len(contribution_scores), len(future_returns))
return _pearson_correlation(contribution_scores[:n], future_returns[:n])
# ---------------------------------------------------------------------------
# SQL queries — source attribution via v_source_performance
# ---------------------------------------------------------------------------
_SOURCE_ATTRIBUTION_SQL = """
SELECT
source,
source_type,
weight,
contribution_score,
is_duplicate,
direction_correct,
future_return,
excess_return_vs_spy
FROM v_source_performance
WHERE horizon = $1
AND generated_at >= $2
"""
_SOURCE_ATTRIBUTION_ALL_SQL = """
SELECT
source,
source_type,
weight,
contribution_score,
is_duplicate,
direction_correct,
future_return,
excess_return_vs_spy
FROM v_source_performance
WHERE horizon = $1
"""
# ---------------------------------------------------------------------------
# SQL queries — catalyst attribution via v_source_performance
# ---------------------------------------------------------------------------
_CATALYST_ATTRIBUTION_SQL = """
SELECT
catalyst_type,
weight,
contribution_score,
direction_correct,
future_return,
excess_return_vs_spy
FROM v_source_performance
WHERE horizon = $1
AND generated_at >= $2
"""
_CATALYST_ATTRIBUTION_ALL_SQL = """
SELECT
catalyst_type,
weight,
contribution_score,
direction_correct,
future_return,
excess_return_vs_spy
FROM v_source_performance
WHERE horizon = $1
"""
# ---------------------------------------------------------------------------
# SQL queries — layer attribution via prediction_snapshots + outcomes
# ---------------------------------------------------------------------------
_LAYER_ATTRIBUTION_SQL = """
SELECT
ps.score_company,
ps.score_macro,
ps.score_competitive,
po.direction_correct,
po.future_return
FROM prediction_snapshots ps
JOIN prediction_outcomes po ON po.prediction_id = ps.id
WHERE po.horizon = $1
AND ps.generated_at >= $2
"""
_LAYER_ATTRIBUTION_ALL_SQL = """
SELECT
ps.score_company,
ps.score_macro,
ps.score_competitive,
po.direction_correct,
po.future_return
FROM prediction_snapshots ps
JOIN prediction_outcomes po ON po.prediction_id = ps.id
WHERE po.horizon = $1
"""
# ---------------------------------------------------------------------------
# Source attribution (Requirements 7.1, 7.2, 7.7)
# ---------------------------------------------------------------------------
async def compute_source_attribution(
pool: asyncpg.Pool,
lookback_days: int = 30,
horizon: str = "7d",
) -> list[SourceAttribution]:
"""Compute per-source performance metrics.
Queries v_source_performance, groups by source, and computes:
prediction count, avg weight, avg contribution score, win rate,
avg future return, avg excess return vs SPY, IC, and duplicate rate.
Returns a list of SourceAttribution sorted by prediction count descending.
"""
now = datetime.now().astimezone()
cutoff = now - timedelta(days=lookback_days)
try:
rows = await pool.fetch(_SOURCE_ATTRIBUTION_SQL, horizon, cutoff)
except Exception:
logger.exception(
"Failed to query source attribution for horizon=%s lookback=%dd",
horizon,
lookback_days,
)
return []
if not rows:
return []
# Group rows by source
source_groups: dict[str, list[dict]] = {}
for row in rows:
r = dict(row)
key = r.get("source") or "unknown"
source_groups.setdefault(key, []).append(r)
results: list[SourceAttribution] = []
for source, group in source_groups.items():
count = len(group)
# Source type — take the most common one
source_type = group[0].get("source_type") or "unknown"
# Avg weight
weights = [r["weight"] for r in group if r.get("weight") is not None]
avg_weight = sum(weights) / len(weights) if weights else 0.0
# Avg contribution score
contrib_scores = [
r["contribution_score"]
for r in group
if r.get("contribution_score") is not None
]
avg_contribution_score = (
sum(contrib_scores) / len(contrib_scores) if contrib_scores else 0.0
)
# Win rate
direction_rows = [r for r in group if r.get("direction_correct") is not None]
win_count = sum(1 for r in direction_rows if r["direction_correct"] is True)
win_rate = win_count / len(direction_rows) if direction_rows else 0.0
# Avg future return
returns = [
r["future_return"] for r in group if r.get("future_return") is not None
]
avg_future_return = sum(returns) / len(returns) if returns else 0.0
# Avg excess return vs SPY
excess_returns = [
r["excess_return_vs_spy"]
for r in group
if r.get("excess_return_vs_spy") is not None
]
avg_excess_return_vs_spy = (
sum(excess_returns) / len(excess_returns) if excess_returns else 0.0
)
# IC: correlation between contribution scores and future returns
ic_scores = [
r["contribution_score"]
for r in group
if r.get("contribution_score") is not None
and r.get("future_return") is not None
]
ic_returns = [
r["future_return"]
for r in group
if r.get("contribution_score") is not None
and r.get("future_return") is not None
]
ic = _compute_ic(ic_scores, ic_returns)
# Duplicate rate: is_duplicate=true / total
dup_count = sum(1 for r in group if r.get("is_duplicate") is True)
duplicate_rate = dup_count / count
results.append(
SourceAttribution(
source=source,
source_type=source_type,
prediction_count=count,
avg_weight=avg_weight,
avg_contribution_score=avg_contribution_score,
win_rate=win_rate,
avg_future_return=avg_future_return,
avg_excess_return_vs_spy=avg_excess_return_vs_spy,
information_coefficient=ic,
duplicate_rate=duplicate_rate,
)
)
# Sort by prediction count descending
results.sort(key=lambda a: a.prediction_count, reverse=True)
logger.info(
"Computed source attribution for %d sources (horizon=%s, lookback=%dd)",
len(results),
horizon,
lookback_days,
)
return results
# ---------------------------------------------------------------------------
# Catalyst attribution (Requirements 7.3, 7.4)
# ---------------------------------------------------------------------------
async def compute_catalyst_attribution(
pool: asyncpg.Pool,
lookback_days: int = 30,
horizon: str = "7d",
) -> list[CatalystAttribution]:
"""Compute per-catalyst-type performance metrics.
Queries v_source_performance, groups by catalyst_type, and computes:
prediction count, win rate, avg future return, avg excess return vs SPY,
and IC.
Returns a list of CatalystAttribution sorted by prediction count descending.
"""
now = datetime.now().astimezone()
cutoff = now - timedelta(days=lookback_days)
try:
rows = await pool.fetch(_CATALYST_ATTRIBUTION_SQL, horizon, cutoff)
except Exception:
logger.exception(
"Failed to query catalyst attribution for horizon=%s lookback=%dd",
horizon,
lookback_days,
)
return []
if not rows:
return []
# Group rows by catalyst_type
catalyst_groups: dict[str, list[dict]] = {}
for row in rows:
r = dict(row)
key = r.get("catalyst_type") or "unknown"
catalyst_groups.setdefault(key, []).append(r)
results: list[CatalystAttribution] = []
for catalyst_type, group in catalyst_groups.items():
count = len(group)
# Win rate
direction_rows = [r for r in group if r.get("direction_correct") is not None]
win_count = sum(1 for r in direction_rows if r["direction_correct"] is True)
win_rate = win_count / len(direction_rows) if direction_rows else 0.0
# Avg future return
returns = [
r["future_return"] for r in group if r.get("future_return") is not None
]
avg_future_return = sum(returns) / len(returns) if returns else 0.0
# Avg excess return vs SPY
excess_returns = [
r["excess_return_vs_spy"]
for r in group
if r.get("excess_return_vs_spy") is not None
]
avg_excess_return_vs_spy = (
sum(excess_returns) / len(excess_returns) if excess_returns else 0.0
)
# IC: correlation between contribution scores and future returns
ic_scores = [
r["contribution_score"]
for r in group
if r.get("contribution_score") is not None
and r.get("future_return") is not None
]
ic_returns = [
r["future_return"]
for r in group
if r.get("contribution_score") is not None
and r.get("future_return") is not None
]
ic = _compute_ic(ic_scores, ic_returns)
results.append(
CatalystAttribution(
catalyst_type=catalyst_type,
prediction_count=count,
win_rate=win_rate,
avg_future_return=avg_future_return,
avg_excess_return_vs_spy=avg_excess_return_vs_spy,
information_coefficient=ic,
)
)
# Sort by prediction count descending
results.sort(key=lambda a: a.prediction_count, reverse=True)
logger.info(
"Computed catalyst attribution for %d catalyst types "
"(horizon=%s, lookback=%dd)",
len(results),
horizon,
lookback_days,
)
return results
# ---------------------------------------------------------------------------
# Layer attribution (Requirements 7.5, 7.6)
# ---------------------------------------------------------------------------
async def compute_layer_attribution(
pool: asyncpg.Pool,
lookback_days: int = 30,
horizon: str = "7d",
) -> list[LayerAttribution]:
"""Compute per-layer (company, macro, competitive) performance metrics.
Queries prediction_snapshots joined with prediction_outcomes to get
score_company, score_macro, score_competitive alongside outcomes.
For each layer computes:
- avg_contribution_pct: average of layer_score / total_score across all
predictions (where total_score > 0)
- dominant_win_rate: win rate for predictions where the layer contributes
more than 30% of the total score
- dominant_ic: IC (Pearson correlation between layer score and future
return) for predictions where the layer contributes > 30%
Returns a list of 3 LayerAttribution objects (company, macro, competitive).
"""
now = datetime.now().astimezone()
cutoff = now - timedelta(days=lookback_days)
try:
rows = await pool.fetch(_LAYER_ATTRIBUTION_SQL, horizon, cutoff)
except Exception:
logger.exception(
"Failed to query layer attribution for horizon=%s lookback=%dd",
horizon,
lookback_days,
)
return []
if not rows:
return [
LayerAttribution(
layer="company",
avg_contribution_pct=0.0,
dominant_win_rate=0.0,
dominant_ic=None,
),
LayerAttribution(
layer="macro",
avg_contribution_pct=0.0,
dominant_win_rate=0.0,
dominant_ic=None,
),
LayerAttribution(
layer="competitive",
avg_contribution_pct=0.0,
dominant_win_rate=0.0,
dominant_ic=None,
),
]
row_dicts = [dict(r) for r in rows]
layers = [
("company", "score_company"),
("macro", "score_macro"),
("competitive", "score_competitive"),
]
results: list[LayerAttribution] = []
for layer_name, score_field in layers:
# --- Average contribution percentage ---
contribution_pcts: list[float] = []
for r in row_dicts:
total = (
(r.get("score_company") or 0.0)
+ (r.get("score_macro") or 0.0)
+ (r.get("score_competitive") or 0.0)
)
if total > 0.0:
layer_score = r.get(score_field) or 0.0
contribution_pcts.append(layer_score / total)
avg_contribution_pct = (
sum(contribution_pcts) / len(contribution_pcts)
if contribution_pcts
else 0.0
)
# --- Dominant predictions: layer > 30% of total score ---
dominant_rows: list[dict] = []
for r in row_dicts:
total = (
(r.get("score_company") or 0.0)
+ (r.get("score_macro") or 0.0)
+ (r.get("score_competitive") or 0.0)
)
if total > 0.0:
layer_score = r.get(score_field) or 0.0
if layer_score / total > 0.30:
dominant_rows.append(r)
# Dominant win rate
dominant_direction_rows = [
r for r in dominant_rows if r.get("direction_correct") is not None
]
dominant_win_count = sum(
1 for r in dominant_direction_rows if r["direction_correct"] is True
)
dominant_win_rate = (
dominant_win_count / len(dominant_direction_rows)
if dominant_direction_rows
else 0.0
)
# Dominant IC: correlation between layer score and future return
dom_scores = [
r.get(score_field) or 0.0
for r in dominant_rows
if r.get("future_return") is not None
]
dom_returns = [
r["future_return"]
for r in dominant_rows
if r.get("future_return") is not None
]
dominant_ic = _compute_ic(dom_scores, dom_returns)
results.append(
LayerAttribution(
layer=layer_name,
avg_contribution_pct=avg_contribution_pct,
dominant_win_rate=dominant_win_rate,
dominant_ic=dominant_ic,
)
)
logger.info(
"Computed layer attribution for 3 layers (horizon=%s, lookback=%dd)",
horizon,
lookback_days,
)
return results