feat: add 15 validation saved queries for SQL Explorer (migration 036)

Prediction overview, confidence distribution, evidence dedup quality, source/catalyst breakdown, win rate by ticker/horizon, model quality timeline, quality gate status, high-duplicate predictions, excess vs SPY.
2026-05-01 19:15:54 +00:00
parent affb65d7f4
commit cc21fd9e8f
2 changed files with 53 additions and 1 deletions
@@ -81,7 +81,7 @@ When a full reset is needed:
 ## Database Migrations
 - Located in `infra/migrations/001_*.sql` through `030_*.sql`
 - Applied automatically by `runmefirst.sh` in sorted order
- Next migration number: **036**
+- Next migration number: **037**
 - Key migrations:
  - 016: Global news interpolation (global_events, macro_impact_records, exposure_profiles, trend_projections)
  - 017: Competitive intelligence (competitor_relationships, competitive_signal_records)
@@ -0,0 +1,52 @@
 -- Seed saved queries for model validation, calibration, and signal quality analysis
 -- Uses the prediction_snapshots, prediction_outcomes, signal_evidence_links,
 -- model_metric_snapshots tables and the v_prediction_performance / v_source_performance views.
 INSERT INTO saved_queries (name, description, sql_text) VALUES
 ('Prediction Snapshot Overview', 'Recent prediction snapshots with confidence and evidence counts',
 'SELECT ticker, direction, action, mode, round(confidence::numeric, 3) AS confidence, round(strength::numeric, 3) AS strength, evidence_count, unique_source_count, duplicate_evidence_count, round(price_at_prediction::numeric, 2) AS price, generated_at FROM prediction_snapshots ORDER BY generated_at DESC LIMIT 50'),
 ('Predictions by Ticker', 'Prediction count and avg confidence per ticker',
 'SELECT ticker, count(*) AS predictions, round(avg(confidence)::numeric, 3) AS avg_confidence, round(avg(strength)::numeric, 3) AS avg_strength, count(*) FILTER (WHERE action = ''buy'') AS buys, count(*) FILTER (WHERE action = ''sell'') AS sells, count(*) FILTER (WHERE action = ''hold'') AS holds, count(*) FILTER (WHERE action = ''watch'') AS watches FROM prediction_snapshots GROUP BY ticker ORDER BY predictions DESC'),
 ('Prediction Confidence Distribution', 'Predictions grouped by confidence bucket',
 'SELECT CASE WHEN confidence >= 0.90 THEN ''[0.90, 1.00]'' WHEN confidence >= 0.80 THEN ''[0.80, 0.90)'' WHEN confidence >= 0.70 THEN ''[0.70, 0.80)'' WHEN confidence >= 0.60 THEN ''[0.60, 0.70)'' ELSE ''[0.50, 0.60)'' END AS bucket, count(*) AS count, round(avg(confidence)::numeric, 3) AS avg_conf, count(*) FILTER (WHERE action = ''buy'') AS buys, count(*) FILTER (WHERE action = ''sell'') AS sells FROM prediction_snapshots GROUP BY 1 ORDER BY bucket'),
 ('Evidence Deduplication Quality', 'Duplicate evidence rate per ticker — high rates suggest source overlap',
 'SELECT ticker, count(*) AS total_links, sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END) AS duplicates, round(sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END)::numeric / NULLIF(count(*), 0) * 100, 1) AS dupe_pct, count(DISTINCT source_type) AS source_types FROM signal_evidence_links GROUP BY ticker ORDER BY dupe_pct DESC'),
 ('Evidence Source Breakdown', 'Evidence links by source type with duplicate rates',
 'SELECT source_type, count(*) AS total, sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END) AS duplicates, round(sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END)::numeric / NULLIF(count(*), 0) * 100, 1) AS dupe_pct, round(avg(weight)::numeric, 3) AS avg_weight, round(avg(contribution_score)::numeric, 4) AS avg_contribution FROM signal_evidence_links GROUP BY source_type ORDER BY total DESC'),
 ('Evidence by Catalyst Type', 'Evidence links grouped by catalyst type',
 'SELECT catalyst_type, count(*) AS total, round(avg(impact)::numeric, 3) AS avg_impact, round(avg(extraction_confidence)::numeric, 3) AS avg_extraction_conf, count(DISTINCT ticker) AS tickers FROM signal_evidence_links WHERE catalyst_type IS NOT NULL GROUP BY catalyst_type ORDER BY total DESC'),
 ('Prediction Performance', 'Prediction outcomes with returns and accuracy (uses v_prediction_performance view)',
 'SELECT ticker, direction, action, round(confidence::numeric, 3) AS confidence, round(future_return::numeric, 4) AS future_return, round(excess_return_vs_spy::numeric, 4) AS excess_vs_spy, direction_correct, profitable, horizon, generated_at FROM v_prediction_performance ORDER BY generated_at DESC LIMIT 50'),
 ('Win Rate by Ticker', 'Directional accuracy and profitability per ticker',
 'SELECT ticker, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(CASE WHEN profitable THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS profit_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return, round(avg(excess_return_vs_spy)::numeric, 4) AS avg_excess_spy FROM v_prediction_performance GROUP BY ticker HAVING count(*) >= 5 ORDER BY win_rate_pct DESC'),
 ('Win Rate by Horizon', 'Directional accuracy across prediction horizons',
 'SELECT horizon, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return, round(avg(excess_return_vs_spy)::numeric, 4) AS avg_excess_spy FROM v_prediction_performance GROUP BY horizon ORDER BY outcomes DESC'),
 ('Source Performance', 'Per-source win rate and returns (uses v_source_performance view)',
 'SELECT source, source_type, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return, round(avg(excess_return_vs_spy)::numeric, 4) AS avg_excess_spy, round(avg(CASE WHEN is_duplicate THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS dupe_pct FROM v_source_performance GROUP BY source, source_type HAVING count(*) >= 10 ORDER BY win_rate_pct DESC'),
 ('Catalyst Performance', 'Win rate by catalyst type',
 'SELECT catalyst_type, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return FROM v_source_performance WHERE catalyst_type IS NOT NULL GROUP BY catalyst_type HAVING count(*) >= 5 ORDER BY win_rate_pct DESC'),
 ('Model Quality Timeline', 'Model metric snapshots over time for the 30d/7d window',
 'SELECT generated_at, prediction_count, round(win_rate::numeric, 3) AS win_rate, round(information_coefficient::numeric, 4) AS ic, round(rank_information_coefficient::numeric, 4) AS rank_ic, round(calibration_error::numeric, 4) AS ece, round(brier_score::numeric, 4) AS brier, round(avg_excess_return_vs_spy::numeric, 4) AS excess_spy FROM model_metric_snapshots WHERE lookback_window = ''30d'' AND horizon = ''7d'' ORDER BY generated_at DESC LIMIT 30'),
 ('Quality Gate Status', 'Latest quality gate evaluation from risk_configs',
 'SELECT key, value, updated_at FROM risk_configs WHERE key = ''model_quality_gate'' ORDER BY updated_at DESC LIMIT 1'),
 ('High Duplicate Predictions', 'Predictions where duplicate evidence exceeds 50% — potential inflation risk',
 'SELECT ticker, direction, action, round(confidence::numeric, 3) AS confidence, evidence_count, duplicate_evidence_count, round(duplicate_evidence_count::numeric / NULLIF(evidence_count, 0) * 100, 1) AS dupe_pct, generated_at FROM prediction_snapshots WHERE evidence_count > 0 AND duplicate_evidence_count::float / NULLIF(evidence_count, 0) > 0.5 ORDER BY dupe_pct DESC LIMIT 30'),
 ('Prediction vs SPY', 'Average excess return vs SPY by action type',
 'SELECT ps.action, count(*) AS outcomes, round(avg(po.future_return)::numeric, 4) AS avg_return, round(avg(po.excess_return_vs_spy)::numeric, 4) AS avg_excess_spy, round(avg(CASE WHEN po.direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct FROM prediction_snapshots ps JOIN prediction_outcomes po ON po.prediction_id = ps.id GROUP BY ps.action ORDER BY avg_excess_spy DESC')
 ON CONFLICT (name) DO UPDATE SET sql_text = EXCLUDED.sql_text, description = EXCLUDED.description;