feat: model validation, calibration, and signal quality layer

- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views - Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores - Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d) - Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison - Attribution engine: per-source, per-catalyst, per-layer performance - Calibration engine: Bayesian shrinkage source reliability - Quality gate for live trading eligibility with configurable thresholds - 7 new /api/validation/* endpoints - Upgraded OpsModel dashboard with validation tab - Enhanced recommendation display with calibration context - Backtest replay validation mode - 86 Python tests (unit + property-based), 179 frontend tests passing
2026-05-01 03:04:58 +00:00
parent 5d2ffd9163
commit 7fcc8a6c07
23 changed files with 7554 additions and 9 deletions
@@ -885,3 +885,169 @@ export function useToggleMacro() {
    onSuccess: () => qc.invalidateQueries({ queryKey: ['macro-status'] }),
  });
 }
+
+// ---------------------------------------------------------------------------
+// Validation: Model Quality & Calibration (Requirements 12.1, 12.2, 12.3, 12.7)
+// ---------------------------------------------------------------------------
+
+export interface ModelMetricSnapshot {
+  id: string;
+  generated_at: string;
+  lookback_window: string;
+  horizon: string;
+  prediction_count: number;
+  win_rate: number | null;
+  directional_accuracy: number | null;
+  information_coefficient: number | null;
+  rank_information_coefficient: number | null;
+  avg_return: number | null;
+  avg_excess_return_vs_spy: number | null;
+  avg_excess_return_vs_sector: number | null;
+  calibration_error: number | null;
+  brier_score: number | null;
+  buy_win_rate: number | null;
+  sell_win_rate: number | null;
+  hold_win_rate: number | null;
+  metadata: Record<string, unknown> | null;
+}
+
+export interface ValidationSummary {
+  snapshot: ModelMetricSnapshot | null;
+  gate_status: Record<string, unknown> | null;
+}
+
+export interface CalibrationBucket {
+  bucket_low: number;
+  bucket_high: number;
+  avg_confidence: number;
+  observed_win_rate: number;
+  prediction_count: number;
+  miscalibrated: boolean;
+}
+
+export interface ValidationCalibration {
+  buckets: CalibrationBucket[];
+  lookback: string;
+  horizon: string;
+}
+
+export interface ICByHorizonEntry {
+  horizon: string;
+  information_coefficient: number | null;
+  rank_information_coefficient: number | null;
+  prediction_count: number;
+  generated_at: string | null;
+}
+
+export interface ValidationICByHorizon {
+  horizons: ICByHorizonEntry[];
+  lookback: string;
+}
+
+export interface ValidationGateStatus {
+  gate_status: Record<string, unknown> | null;
+  updated_at?: string | null;
+  message?: string;
+}
+
+export function useValidationSummary(lookback = '30d', horizon = '7d') {
+  const qs = new URLSearchParams();
+  if (lookback) qs.set('lookback', lookback);
+  if (horizon) qs.set('horizon', horizon);
+  const path = `/api/validation/summary${qs.toString() ? '?' + qs : ''}`;
+  return useGet<ValidationSummary>(['validation-summary', lookback, horizon], 'query', path);
+}
+
+export function useValidationCalibration(lookback = '30d', horizon = '7d') {
+  const qs = new URLSearchParams();
+  if (lookback) qs.set('lookback', lookback);
+  if (horizon) qs.set('horizon', horizon);
+  const path = `/api/validation/calibration${qs.toString() ? '?' + qs : ''}`;
+  return useGet<ValidationCalibration>(['validation-calibration', lookback, horizon], 'query', path);
+}
+
+export function useValidationICByHorizon(lookback = '30d') {
+  const qs = new URLSearchParams();
+  if (lookback) qs.set('lookback', lookback);
+  const path = `/api/validation/ic-by-horizon${qs.toString() ? '?' + qs : ''}`;
+  return useGet<ValidationICByHorizon>(['validation-ic-by-horizon', lookback], 'query', path);
+}
+
+export function useValidationGateStatus() {
+  return useGet<ValidationGateStatus>(['validation-gate-status'], 'query', '/api/validation/gate-status');
+}
+
+// ---------------------------------------------------------------------------
+// Validation: Attribution — Sources, Catalysts, Layers (Requirements 12.4, 12.5, 12.6)
+// ---------------------------------------------------------------------------
+
+export interface SourceAttribution {
+  source: string;
+  source_type: string;
+  prediction_count: number;
+  avg_weight: number;
+  avg_contribution_score: number;
+  win_rate: number;
+  avg_future_return: number;
+  avg_excess_return_vs_spy: number;
+  information_coefficient: number | null;
+  duplicate_rate: number;
+}
+
+export interface SourceAttributionResponse {
+  sources: SourceAttribution[];
+  lookback: string;
+  horizon: string;
+}
+
+export interface CatalystAttribution {
+  catalyst_type: string;
+  prediction_count: number;
+  win_rate: number;
+  avg_future_return: number;
+  avg_excess_return_vs_spy: number;
+  information_coefficient: number | null;
+}
+
+export interface CatalystAttributionResponse {
+  catalysts: CatalystAttribution[];
+  lookback: string;
+  horizon: string;
+}
+
+export interface LayerAttribution {
+  layer: string;
+  avg_contribution_pct: number;
+  dominant_win_rate: number;
+  dominant_ic: number | null;
+}
+
+export interface LayerAttributionResponse {
+  layers: LayerAttribution[];
+  lookback: string;
+  horizon: string;
+}
+
+export function useValidationAttributionSources(lookback = '30d', horizon = '7d') {
+  const qs = new URLSearchParams();
+  if (lookback) qs.set('lookback', lookback);
+  if (horizon) qs.set('horizon', horizon);
+  const path = `/api/validation/attribution/sources${qs.toString() ? '?' + qs : ''}`;
+  return useGet<SourceAttributionResponse>(['validation-attribution-sources', lookback, horizon], 'query', path);
+}
+
+export function useValidationAttributionCatalysts(lookback = '30d', horizon = '7d') {
+  const qs = new URLSearchParams();
+  if (lookback) qs.set('lookback', lookback);
+  if (horizon) qs.set('horizon', horizon);
+  const path = `/api/validation/attribution/catalysts${qs.toString() ? '?' + qs : ''}`;
+  return useGet<CatalystAttributionResponse>(['validation-attribution-catalysts', lookback, horizon], 'query', path);
+}
+
+export function useValidationAttributionLayers(lookback = '30d', horizon = '7d') {
+  const qs = new URLSearchParams();
+  if (lookback) qs.set('lookback', lookback);
+  if (horizon) qs.set('horizon', horizon);
+  const path = `/api/validation/attribution/layers${qs.toString() ? '?' + qs : ''}`;
+  return useGet<LayerAttributionResponse>(['validation-attribution-layers', lookback, horizon], 'query', path);
+}
@@ -1,9 +1,89 @@
 import { useState } from 'react';
-import { useModelPerformance, useModelFailures } from '../api/hooks';
+import {
+  useModelPerformance,
+  useModelFailures,
+  useValidationSummary,
+  useValidationCalibration,
+  useValidationICByHorizon,
+  useValidationGateStatus,
+  useValidationAttributionSources,
+  useValidationAttributionCatalysts,
+  useValidationAttributionLayers,
+} from '../api/hooks';
+import type {
+  ValidationSummary,
+  ValidationCalibration,
+  CalibrationBucket,
+  ValidationICByHorizon,
+  ICByHorizonEntry,
+  ValidationGateStatus,
+  SourceAttributionResponse,
+  CatalystAttributionResponse,
+  LayerAttributionResponse,
+  SourceAttribution,
+  CatalystAttribution,
+  LayerAttribution,
+} from '../api/hooks';
 import { LoadingSpinner, DateRangeSelector, StatusBadge, Card } from '../components/ui';
+import { AlertTriangle, ShieldCheck, ShieldX } from 'lucide-react';
+
+type Tab = 'extraction' | 'validation';

 export function OpsModelPage() {
  const [hours, setHours] = useState(24);
+  const [activeTab, setActiveTab] = useState<Tab>('extraction');
+
+  return (
+    <div className="space-y-6">
+      <div className="flex items-center justify-between">
+        <h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
+        {activeTab === 'extraction' && (
+          <DateRangeSelector value={hours} onChange={setHours} />
+        )}
+      </div>
+
+      {/* Tab bar */}
+      <div className="flex border-b border-surface-700" role="tablist" aria-label="Model performance tabs">
+        <button
+          role="tab"
+          aria-selected={activeTab === 'extraction'}
+          onClick={() => setActiveTab('extraction')}
+          className={`px-4 py-2 text-sm font-medium transition-colors ${
+            activeTab === 'extraction'
+              ? 'border-b-2 border-brand-500 text-brand-400'
+              : 'text-gray-400 hover:text-gray-200'
+          }`}
+        >
+          Extraction Performance
+        </button>
+        <button
+          role="tab"
+          aria-selected={activeTab === 'validation'}
+          onClick={() => setActiveTab('validation')}
+          className={`px-4 py-2 text-sm font-medium transition-colors ${
+            activeTab === 'validation'
+              ? 'border-b-2 border-brand-500 text-brand-400'
+              : 'text-gray-400 hover:text-gray-200'
+          }`}
+        >
+          Model Validation
+        </button>
+      </div>
+
+      {activeTab === 'extraction' ? (
+        <ExtractionTab hours={hours} />
+      ) : (
+        <ValidationTab />
+      )}
+    </div>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/* Extraction Performance Tab (existing content)                       */
+/* ------------------------------------------------------------------ */
+
+function ExtractionTab({ hours }: { hours: number }) {
  const { data: perf, isLoading } = useModelPerformance(hours);
  const { data: failures } = useModelFailures(hours);

@@ -13,11 +93,6 @@ export function OpsModelPage() {

  return (
    <div className="space-y-6">
-      <div className="flex items-center justify-between">
-        <h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
-        <DateRangeSelector value={hours} onChange={setHours} />
-      </div>
-
      {/* Key metrics */}
      <div className="grid grid-cols-2 gap-3 sm:grid-cols-5">
        <StatCard label="Total Extractions" value={String(p.total_extractions ?? '—')} />
@@ -63,6 +138,482 @@ export function OpsModelPage() {
  );
 }

+/* ------------------------------------------------------------------ */
+/* Model Validation Tab (new)                                          */
+/* ------------------------------------------------------------------ */
+
+function ValidationTab() {
+  const { data: summary, isLoading: summaryLoading, error: summaryError } = useValidationSummary();
+  const { data: calibration, isLoading: calLoading, error: calError } = useValidationCalibration();
+  const { data: icData, isLoading: icLoading, error: icError } = useValidationICByHorizon();
+  const { data: gateData, isLoading: gateLoading, error: gateError } = useValidationGateStatus();
+  const { data: sourcesData, isLoading: srcLoading, error: srcError } = useValidationAttributionSources();
+  const { data: catalystsData, isLoading: catLoading, error: catError } = useValidationAttributionCatalysts();
+  const { data: layersData, isLoading: layLoading, error: layError } = useValidationAttributionLayers();
+
+  return (
+    <div className="space-y-6">
+      {/* Gate Status */}
+      <GateStatusSection data={gateData} isLoading={gateLoading} error={gateError} />
+
+      {/* Summary Cards */}
+      <SummaryCardsSection data={summary} isLoading={summaryLoading} error={summaryError} />
+
+      {/* Calibration Table */}
+      <CalibrationTableSection data={calibration} isLoading={calLoading} error={calError} />
+
+      {/* IC by Horizon Table */}
+      <ICByHorizonSection data={icData} isLoading={icLoading} error={icError} />
+
+      {/* Source Attribution Table */}
+      <SourceAttributionSection data={sourcesData} isLoading={srcLoading} error={srcError} />
+
+      {/* Catalyst Attribution Table */}
+      <CatalystAttributionSection data={catalystsData} isLoading={catLoading} error={catError} />
+
+      {/* Layer Attribution Table */}
+      <LayerAttributionSection data={layersData} isLoading={layLoading} error={layError} />
+    </div>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/* Gate Status Section                                                 */
+/* ------------------------------------------------------------------ */
+
+function GateStatusSection({ data, isLoading, error }: {
+  data: ValidationGateStatus | undefined;
+  isLoading: boolean;
+  error: Error | null;
+}) {
+  if (isLoading) return <LoadingSpinner />;
+  if (error) return <ErrorCard message="Failed to load gate status" />;
+
+  const gate = data?.gate_status as Record<string, unknown> | null;
+  if (!gate) {
+    return (
+      <Card className="flex items-center gap-3">
+        <ShieldX size={20} className="text-yellow-400" />
+        <div>
+          <div className="text-sm font-medium text-yellow-400">Gate Status Unknown</div>
+          <div className="text-xs text-gray-500">No gate evaluation data available</div>
+        </div>
+      </Card>
+    );
+  }
+
+  const passed = gate.passed as boolean | undefined;
+  const reason = gate.reason as string | undefined;
+  const thresholds = gate.threshold_results as Array<Record<string, unknown>> | undefined;
+
+  return (
+    <Card>
+      <div className="mb-3 flex items-center gap-3">
+        {passed ? (
+          <ShieldCheck size={20} className="text-green-400" />
+        ) : (
+          <ShieldX size={20} className="text-red-400" />
+        )}
+        <div>
+          <div className={`text-sm font-medium ${passed ? 'text-green-400' : 'text-red-400'}`}>
+            Live Trading Gate: {passed ? 'PASS' : 'FAIL'}
+          </div>
+          {reason && <div className="text-xs text-gray-500">{reason}</div>}
+        </div>
+      </div>
+
+      {thresholds && thresholds.length > 0 && (
+        <div className="overflow-x-auto">
+          <table className="w-full text-left text-xs">
+            <thead>
+              <tr className="border-b border-surface-700 text-gray-500">
+                <th className="pb-2 pr-4 font-medium">Threshold</th>
+                <th className="pb-2 pr-4 font-medium">Required</th>
+                <th className="pb-2 pr-4 font-medium">Actual</th>
+                <th className="pb-2 font-medium">Status</th>
+              </tr>
+            </thead>
+            <tbody>
+              {thresholds.map((t, i) => (
+                <tr key={i} className="border-b border-surface-800">
+                  <td className="py-1.5 pr-4 text-gray-300">{String(t.name ?? '')}</td>
+                  <td className="py-1.5 pr-4 font-mono text-gray-400">{fmtThreshold(t.threshold)}</td>
+                  <td className="py-1.5 pr-4 font-mono text-gray-300">{fmtThreshold(t.actual)}</td>
+                  <td className="py-1.5">
+                    <StatusBadge status={t.passed ? 'success' : 'failed'} />
+                  </td>
+                </tr>
+              ))}
+            </tbody>
+          </table>
+        </div>
+      )}
+    </Card>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/* Summary Cards Section                                               */
+/* ------------------------------------------------------------------ */
+
+function SummaryCardsSection({ data, isLoading, error }: {
+  data: ValidationSummary | undefined;
+  isLoading: boolean;
+  error: Error | null;
+}) {
+  if (isLoading) return <LoadingSpinner />;
+  if (error) return <ErrorCard message="Failed to load validation summary" />;
+
+  const snap = data?.snapshot;
+  if (!snap) {
+    return (
+      <Card>
+        <p className="text-sm text-gray-500">No validation data available yet. Metrics will appear once predictions have been evaluated.</p>
+      </Card>
+    );
+  }
+
+  return (
+    <div className="grid grid-cols-2 gap-3 sm:grid-cols-3 lg:grid-cols-5">
+      <StatCard label="Predictions" value={String(snap.prediction_count ?? '—')} />
+      <StatCard
+        label="Win Rate"
+        value={fmtPct(snap.win_rate)}
+        color={colorForRate(snap.win_rate, 0.53)}
+      />
+      <StatCard
+        label="Directional Accuracy"
+        value={fmtPct(snap.directional_accuracy)}
+        color={colorForRate(snap.directional_accuracy, 0.53)}
+      />
+      <StatCard
+        label="IC"
+        value={fmtIC(snap.information_coefficient)}
+        color={colorForIC(snap.information_coefficient)}
+      />
+      <StatCard
+        label="Rank IC"
+        value={fmtIC(snap.rank_information_coefficient)}
+        color={colorForIC(snap.rank_information_coefficient)}
+      />
+      <StatCard
+        label="Brier Score"
+        value={snap.brier_score != null ? snap.brier_score.toFixed(4) : '—'}
+        color={snap.brier_score != null && snap.brier_score < 0.25 ? 'text-green-400' : 'text-gray-100'}
+      />
+      <StatCard
+        label="ECE"
+        value={snap.calibration_error != null ? snap.calibration_error.toFixed(4) : '—'}
+        color={snap.calibration_error != null && snap.calibration_error < 0.15 ? 'text-green-400' : 'text-yellow-400'}
+      />
+      <StatCard
+        label="Excess vs SPY"
+        value={fmtPct(snap.avg_excess_return_vs_spy)}
+        color={snap.avg_excess_return_vs_spy != null && snap.avg_excess_return_vs_spy > 0 ? 'text-green-400' : 'text-red-400'}
+      />
+    </div>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/* Calibration Table Section                                           */
+/* ------------------------------------------------------------------ */
+
+function CalibrationTableSection({ data, isLoading, error }: {
+  data: ValidationCalibration | undefined;
+  isLoading: boolean;
+  error: Error | null;
+}) {
+  if (isLoading) return <LoadingSpinner />;
+  if (error) return <ErrorCard message="Failed to load calibration data" />;
+
+  const buckets = data?.buckets;
+  if (!buckets || buckets.length === 0) {
+    return (
+      <Card>
+        <h2 className="mb-2 text-sm font-medium text-gray-400">Calibration</h2>
+        <p className="text-sm text-gray-500">No calibration data available</p>
+      </Card>
+    );
+  }
+
+  return (
+    <Card>
+      <h2 className="mb-3 text-sm font-medium text-gray-400">Calibration by Confidence Bucket</h2>
+      <div className="overflow-x-auto">
+        <table className="w-full text-left text-xs">
+          <thead>
+            <tr className="border-b border-surface-700 text-gray-500">
+              <th className="pb-2 pr-4 font-medium">Bucket</th>
+              <th className="pb-2 pr-4 font-medium">Avg Confidence</th>
+              <th className="pb-2 pr-4 font-medium">Observed Win Rate</th>
+              <th className="pb-2 pr-4 font-medium">Count</th>
+              <th className="pb-2 font-medium">Status</th>
+            </tr>
+          </thead>
+          <tbody>
+            {buckets.map((b: CalibrationBucket, i: number) => (
+              <CalibrationRow key={i} bucket={b} />
+            ))}
+          </tbody>
+        </table>
+      </div>
+    </Card>
+  );
+}
+
+function CalibrationRow({ bucket }: { bucket: CalibrationBucket }) {
+  const isMiscalibrated = bucket.miscalibrated ||
+    Math.abs(bucket.avg_confidence - bucket.observed_win_rate) > 0.15;
+
+  return (
+    <tr className={`border-b border-surface-800 ${isMiscalibrated ? 'bg-amber-900/20' : ''}`}>
+      <td className="py-1.5 pr-4 font-mono text-gray-300">
+        [{fmtPctShort(bucket.bucket_low)}, {fmtPctShort(bucket.bucket_high)})
+      </td>
+      <td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.avg_confidence)}</td>
+      <td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.observed_win_rate)}</td>
+      <td className="py-1.5 pr-4 font-mono text-gray-400">{bucket.prediction_count}</td>
+      <td className="py-1.5">
+        {isMiscalibrated ? (
+          <span className="inline-flex items-center gap-1 text-amber-400">
+            <AlertTriangle size={14} />
+            <span>Miscalibrated</span>
+          </span>
+        ) : (
+          <span className="text-green-400">OK</span>
+        )}
+      </td>
+    </tr>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/* IC by Horizon Section                                               */
+/* ------------------------------------------------------------------ */
+
+function ICByHorizonSection({ data, isLoading, error }: {
+  data: ValidationICByHorizon | undefined;
+  isLoading: boolean;
+  error: Error | null;
+}) {
+  if (isLoading) return <LoadingSpinner />;
+  if (error) return <ErrorCard message="Failed to load IC by horizon data" />;
+
+  const horizons = data?.horizons;
+  if (!horizons || horizons.length === 0) {
+    return (
+      <Card>
+        <h2 className="mb-2 text-sm font-medium text-gray-400">IC by Horizon</h2>
+        <p className="text-sm text-gray-500">No IC data available</p>
+      </Card>
+    );
+  }
+
+  return (
+    <Card>
+      <h2 className="mb-3 text-sm font-medium text-gray-400">Information Coefficient by Horizon</h2>
+      <div className="overflow-x-auto">
+        <table className="w-full text-left text-xs">
+          <thead>
+            <tr className="border-b border-surface-700 text-gray-500">
+              <th className="pb-2 pr-4 font-medium">Horizon</th>
+              <th className="pb-2 pr-4 font-medium">IC</th>
+              <th className="pb-2 pr-4 font-medium">Rank IC</th>
+              <th className="pb-2 font-medium">Predictions</th>
+            </tr>
+          </thead>
+          <tbody>
+            {horizons.map((h: ICByHorizonEntry, i: number) => (
+              <tr key={i} className="border-b border-surface-800">
+                <td className="py-1.5 pr-4 font-mono text-gray-300">{h.horizon}</td>
+                <td className={`py-1.5 pr-4 font-mono ${colorForIC(h.information_coefficient)}`}>
+                  {fmtIC(h.information_coefficient)}
+                </td>
+                <td className={`py-1.5 pr-4 font-mono ${colorForIC(h.rank_information_coefficient)}`}>
+                  {fmtIC(h.rank_information_coefficient)}
+                </td>
+                <td className="py-1.5 font-mono text-gray-400">{h.prediction_count}</td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+    </Card>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/* Source Attribution Section                                           */
+/* ------------------------------------------------------------------ */
+
+function SourceAttributionSection({ data, isLoading, error }: {
+  data: SourceAttributionResponse | undefined;
+  isLoading: boolean;
+  error: Error | null;
+}) {
+  if (isLoading) return <LoadingSpinner />;
+  if (error) return <ErrorCard message="Failed to load source attribution data" />;
+
+  const sources = data?.sources;
+  if (!sources || sources.length === 0) {
+    return (
+      <Card>
+        <h2 className="mb-2 text-sm font-medium text-gray-400">Source Performance</h2>
+        <p className="text-sm text-gray-500">No source attribution data available</p>
+      </Card>
+    );
+  }
+
+  return (
+    <Card>
+      <h2 className="mb-3 text-sm font-medium text-gray-400">Source Performance</h2>
+      <div className="overflow-x-auto">
+        <table className="w-full text-left text-xs">
+          <thead>
+            <tr className="border-b border-surface-700 text-gray-500">
+              <th className="pb-2 pr-4 font-medium">Source</th>
+              <th className="pb-2 pr-4 font-medium">Win Rate</th>
+              <th className="pb-2 pr-4 font-medium">IC</th>
+              <th className="pb-2 pr-4 font-medium">Avg Return</th>
+              <th className="pb-2 font-medium">Duplicate Rate</th>
+            </tr>
+          </thead>
+          <tbody>
+            {sources.map((s: SourceAttribution, i: number) => (
+              <tr key={i} className="border-b border-surface-800">
+                <td className="py-1.5 pr-4 text-gray-300">{s.source}</td>
+                <td className={`py-1.5 pr-4 font-mono ${colorForRate(s.win_rate, 0.53)}`}>
+                  {fmtPct(s.win_rate)}
+                </td>
+                <td className={`py-1.5 pr-4 font-mono ${colorForIC(s.information_coefficient)}`}>
+                  {fmtIC(s.information_coefficient)}
+                </td>
+                <td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(s.avg_future_return)}</td>
+                <td className="py-1.5 font-mono text-gray-300">{fmtPct(s.duplicate_rate)}</td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+    </Card>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/* Catalyst Attribution Section                                        */
+/* ------------------------------------------------------------------ */
+
+function CatalystAttributionSection({ data, isLoading, error }: {
+  data: CatalystAttributionResponse | undefined;
+  isLoading: boolean;
+  error: Error | null;
+}) {
+  if (isLoading) return <LoadingSpinner />;
+  if (error) return <ErrorCard message="Failed to load catalyst attribution data" />;
+
+  const catalysts = data?.catalysts;
+  if (!catalysts || catalysts.length === 0) {
+    return (
+      <Card>
+        <h2 className="mb-2 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
+        <p className="text-sm text-gray-500">No catalyst attribution data available</p>
+      </Card>
+    );
+  }
+
+  return (
+    <Card>
+      <h2 className="mb-3 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
+      <div className="overflow-x-auto">
+        <table className="w-full text-left text-xs">
+          <thead>
+            <tr className="border-b border-surface-700 text-gray-500">
+              <th className="pb-2 pr-4 font-medium">Catalyst Type</th>
+              <th className="pb-2 pr-4 font-medium">Win Rate</th>
+              <th className="pb-2 pr-4 font-medium">Avg Return</th>
+              <th className="pb-2 font-medium">IC</th>
+            </tr>
+          </thead>
+          <tbody>
+            {catalysts.map((c: CatalystAttribution, i: number) => (
+              <tr key={i} className="border-b border-surface-800">
+                <td className="py-1.5 pr-4 text-gray-300">{c.catalyst_type}</td>
+                <td className={`py-1.5 pr-4 font-mono ${colorForRate(c.win_rate, 0.53)}`}>
+                  {fmtPct(c.win_rate)}
+                </td>
+                <td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(c.avg_future_return)}</td>
+                <td className={`py-1.5 font-mono ${colorForIC(c.information_coefficient)}`}>
+                  {fmtIC(c.information_coefficient)}
+                </td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+    </Card>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/* Layer Attribution Section                                           */
+/* ------------------------------------------------------------------ */
+
+function LayerAttributionSection({ data, isLoading, error }: {
+  data: LayerAttributionResponse | undefined;
+  isLoading: boolean;
+  error: Error | null;
+}) {
+  if (isLoading) return <LoadingSpinner />;
+  if (error) return <ErrorCard message="Failed to load layer attribution data" />;
+
+  const layers = data?.layers;
+  if (!layers || layers.length === 0) {
+    return (
+      <Card>
+        <h2 className="mb-2 text-sm font-medium text-gray-400">Layer Attribution</h2>
+        <p className="text-sm text-gray-500">No layer attribution data available</p>
+      </Card>
+    );
+  }
+
+  return (
+    <Card>
+      <h2 className="mb-3 text-sm font-medium text-gray-400">Layer Attribution</h2>
+      <div className="overflow-x-auto">
+        <table className="w-full text-left text-xs">
+          <thead>
+            <tr className="border-b border-surface-700 text-gray-500">
+              <th className="pb-2 pr-4 font-medium">Layer</th>
+              <th className="pb-2 pr-4 font-medium">Contribution %</th>
+              <th className="pb-2 pr-4 font-medium">Dominant Win Rate</th>
+              <th className="pb-2 font-medium">IC</th>
+            </tr>
+          </thead>
+          <tbody>
+            {layers.map((l: LayerAttribution, i: number) => (
+              <tr key={i} className="border-b border-surface-800">
+                <td className="py-1.5 pr-4 text-gray-300 capitalize">{l.layer}</td>
+                <td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(l.avg_contribution_pct)}</td>
+                <td className={`py-1.5 pr-4 font-mono ${colorForRate(l.dominant_win_rate, 0.53)}`}>
+                  {fmtPct(l.dominant_win_rate)}
+                </td>
+                <td className={`py-1.5 font-mono ${colorForIC(l.dominant_ic)}`}>
+                  {fmtIC(l.dominant_ic)}
+                </td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+    </Card>
+  );
+}
+
+/* ------------------------------------------------------------------ */
+/* Shared helpers                                                      */
+/* ------------------------------------------------------------------ */
+
 function StatCard({ label, value, color = 'text-gray-100' }: { label: string; value: string; color?: string }) {
  return (
    <Card className="text-center">
@@ -71,3 +622,53 @@ function StatCard({ label, value, color = 'text-gray-100' }: { label: string; va
    </Card>
  );
 }
+
+function ErrorCard({ message }: { message: string }) {
+  return (
+    <Card className="border-red-700/50 bg-red-900/20">
+      <p className="text-sm text-red-400">{message}</p>
+    </Card>
+  );
+}
+
+/** Format a float as percentage with 1 decimal place, or '—' if null */
+function fmtPct(v: number | null | undefined): string {
+  if (v == null) return '—';
+  return `${(v * 100).toFixed(1)}%`;
+}
+
+/** Format a float as short percentage (no decimal) for bucket display */
+function fmtPctShort(v: number | null | undefined): string {
+  if (v == null) return '—';
+  return `${(v * 100).toFixed(0)}%`;
+}
+
+/** Format IC value with 4 decimal places, or '—' if null */
+function fmtIC(v: number | null | undefined): string {
+  if (v == null) return '—';
+  return v.toFixed(4);
+}
+
+/** Format a threshold value for display */
+function fmtThreshold(v: unknown): string {
+  if (v == null) return '—';
+  if (typeof v === 'number') {
+    if (Number.isInteger(v)) return String(v);
+    return v.toFixed(4);
+  }
+  return String(v);
+}
+
+/** Color for win rate / accuracy — green if above threshold, red otherwise */
+function colorForRate(v: number | null | undefined, threshold: number): string {
+  if (v == null) return 'text-gray-100';
+  return v >= threshold ? 'text-green-400' : 'text-red-400';
+}
+
+/** Color for IC — green if positive, red if negative, gray if null */
+function colorForIC(v: number | null | undefined): string {
+  if (v == null) return 'text-gray-400';
+  if (v >= 0.03) return 'text-green-400';
+  if (v > 0) return 'text-yellow-400';
+  return 'text-red-400';
+}
@@ -1,13 +1,92 @@
+/**
+ * Recommendation detail page with validation context.
+ *
+ * Shows original confidence alongside calibrated confidence (historical win rate),
+ * evidence quality indicators, source reliability, and live eligibility status.
+ *
+ * Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7
+ */
 import { useParams, Link } from '@tanstack/react-router';
-import { useRecommendation } from '../api/hooks';
+import { AlertTriangle, ShieldCheck, ShieldX, Info } from 'lucide-react';
+import {
+  useRecommendation,
+  useValidationCalibration,
+  useValidationGateStatus,
+  useValidationAttributionSources,
+} from '../api/hooks';
 import { StatusBadge, ConfidenceBar, LoadingSpinner, Card } from '../components/ui';

 export function RecommendationDetailPage() {
  const { id } = useParams({ from: '/recommendations/$id' });
  const { data: rec, isLoading } = useRecommendation(id);
+  const { data: calibration } = useValidationCalibration();
+  const { data: gateData } = useValidationGateStatus();
+  const { data: sourcesData } = useValidationAttributionSources();

  if (isLoading || !rec) return <LoadingSpinner />;

+  // --- Calibration: find the bucket matching this recommendation's confidence ---
+  const matchingBucket = calibration?.buckets?.find(
+    (b) => rec.confidence >= b.bucket_low && rec.confidence < b.bucket_high,
+  );
+  // Handle edge case: confidence of exactly 1.0 falls in the last bucket [0.90, 1.00]
+  const calibratedBucket =
+    matchingBucket ??
+    (rec.confidence >= 1.0
+      ? calibration?.buckets?.find((b) => b.bucket_high >= 1.0)
+      : undefined);
+
+  const historicalWinRate = calibratedBucket?.observed_win_rate;
+
+  // --- Evidence counts ---
+  const totalEvidenceCount = rec.evidence.length;
+  // Compute duplicate evidence: group by normalized title, count extras
+  const titleCounts = new Map<string, number>();
+  for (const ev of rec.evidence) {
+    const key = (ev.title ?? '').toLowerCase().trim();
+    titleCounts.set(key, (titleCounts.get(key) ?? 0) + 1);
+  }
+  let duplicateEvidenceCount = 0;
+  for (const count of titleCounts.values()) {
+    if (count > 1) duplicateEvidenceCount += count - 1;
+  }
+  const uniqueEvidenceCount = totalEvidenceCount - duplicateEvidenceCount;
+  const duplicateRatio = totalEvidenceCount > 0 ? duplicateEvidenceCount / totalEvidenceCount : 0;
+  const hasDuplicateWarning = duplicateRatio > 0.2;
+
+  // --- Source reliability: find primary contributing sources ---
+  const evidenceSources = new Map<string, number>();
+  for (const ev of rec.evidence) {
+    const src = ev.source_type ?? ev.publisher ?? 'unknown';
+    evidenceSources.set(src, (evidenceSources.get(src) ?? 0) + ev.weight);
+  }
+  // Sort by total weight descending to find primary source
+  const sortedSources = [...evidenceSources.entries()].sort((a, b) => b[1] - a[1]);
+  const primarySourceType = sortedSources[0]?.[0];
+
+  // Look up source reliability from attribution data
+  const primarySourceAttribution = sourcesData?.sources?.find(
+    (s) => s.source_type === primarySourceType || s.source === primarySourceType,
+  );
+  // Source reliability is approximated from win_rate via Bayesian shrinkage
+  // The attribution data has win_rate which is the observed metric
+  const primarySourceWinRate = primarySourceAttribution?.win_rate;
+  // Bayesian shrinkage: reliability = 0.5 + (n/(n+30)) * (win_rate - 0.5)
+  const primarySourceCount = primarySourceAttribution?.prediction_count ?? 0;
+  const primarySourceReliability =
+    primarySourceWinRate != null
+      ? 0.5 + (primarySourceCount / (primarySourceCount + 30)) * (primarySourceWinRate - 0.5)
+      : undefined;
+  const hasLowReliabilityWarning =
+    primarySourceReliability != null && primarySourceReliability < 0.4;
+
+  // --- Gate status ---
+  const gateStatus = gateData?.gate_status as {
+    passed?: boolean;
+    reason?: string;
+    threshold_results?: Array<{ name: string; threshold: number; actual: number; passed: boolean }>;
+  } | null;
+
  return (
    <div className="space-y-6">
      <div className="flex items-center gap-3">
@@ -28,6 +107,137 @@ export function RecommendationDetailPage() {
        </dl>
      </Card>

+      {/* Validation Context Card — Requirements 13.1–13.7 */}
+      <Card>
+        <h2 className="mb-3 text-sm font-medium text-gray-400">Validation Context</h2>
+        <dl className="grid grid-cols-2 gap-x-8 gap-y-3 text-sm sm:grid-cols-3">
+          {/* 13.1: Original confidence alongside calibrated confidence */}
+          <div>
+            <dt className="text-gray-500">Original Confidence</dt>
+            <dd className="text-gray-200">{(rec.confidence * 100).toFixed(1)}%</dd>
+          </div>
+          <div>
+            <dt className="text-gray-500">Calibrated Confidence</dt>
+            <dd className="text-gray-200">
+              {historicalWinRate != null
+                ? `${(historicalWinRate * 100).toFixed(1)}%`
+                : 'N/A'}
+            </dd>
+          </div>
+
+          {/* 13.2: Historical win rate for similar confidence levels */}
+          <div>
+            <dt className="text-gray-500">Historical Win Rate</dt>
+            <dd className="text-gray-200">
+              {historicalWinRate != null ? (
+                <span>
+                  {(historicalWinRate * 100).toFixed(1)}%
+                  {calibratedBucket && (
+                    <span className="ml-1 text-xs text-gray-500">
+                      ({calibratedBucket.prediction_count} predictions)
+                    </span>
+                  )}
+                </span>
+              ) : (
+                'N/A'
+              )}
+            </dd>
+          </div>
+
+          {/* 13.3: Evidence count, unique evidence count, duplicate evidence count */}
+          <div>
+            <dt className="text-gray-500">Evidence Count</dt>
+            <dd className="text-gray-200">{totalEvidenceCount}</dd>
+          </div>
+          <div>
+            <dt className="text-gray-500">Unique Evidence</dt>
+            <dd className="text-gray-200">{uniqueEvidenceCount}</dd>
+          </div>
+          <div>
+            <dt className="flex items-center gap-1 text-gray-500">
+              Duplicate Evidence
+              {/* 13.6: Warning badge when duplicate evidence count > 20% of total */}
+              {hasDuplicateWarning && (
+                <span
+                  className="inline-flex items-center gap-0.5 rounded-full border border-yellow-700/50 bg-yellow-900/40 px-1.5 py-0.5 text-[10px] font-medium text-yellow-400"
+                  title="Duplicate evidence exceeds 20% of total — potential evidence inflation"
+                >
+                  <AlertTriangle size={10} />
+                  &gt;20%
+                </span>
+              )}
+            </dt>
+            <dd className="text-gray-200">
+              {duplicateEvidenceCount}
+              {totalEvidenceCount > 0 && (
+                <span className="ml-1 text-xs text-gray-500">
+                  ({(duplicateRatio * 100).toFixed(0)}%)
+                </span>
+              )}
+            </dd>
+          </div>
+
+          {/* 13.4: Source reliability indicator */}
+          <div>
+            <dt className="flex items-center gap-1 text-gray-500">
+              Primary Source Reliability
+              {/* 13.7: Warning badge when primary source reliability < 0.4 */}
+              {hasLowReliabilityWarning && (
+                <span
+                  className="inline-flex items-center gap-0.5 rounded-full border border-red-700/50 bg-red-900/40 px-1.5 py-0.5 text-[10px] font-medium text-red-400"
+                  title="Primary source reliability is below 0.4 — low or unknown reliability"
+                >
+                  <AlertTriangle size={10} />
+                  Low
+                </span>
+              )}
+            </dt>
+            <dd className="text-gray-200">
+              {primarySourceReliability != null ? (
+                <span>
+                  {primarySourceReliability.toFixed(3)}
+                  {primarySourceType && (
+                    <span className="ml-1 text-xs text-gray-500">({primarySourceType})</span>
+                  )}
+                </span>
+              ) : (
+                'N/A'
+              )}
+            </dd>
+          </div>
+
+          {/* 13.5: Live eligibility status with reason */}
+          <div className="col-span-2">
+            <dt className="text-gray-500">Live Eligibility</dt>
+            <dd>
+              {gateStatus != null ? (
+                <div className="flex items-center gap-2">
+                  {gateStatus.passed ? (
+                    <span className="inline-flex items-center gap-1 text-green-400">
+                      <ShieldCheck size={14} />
+                      Gate Passed
+                    </span>
+                  ) : (
+                    <span className="inline-flex items-center gap-1 text-red-400">
+                      <ShieldX size={14} />
+                      Gate Failed
+                    </span>
+                  )}
+                  {gateStatus.reason && (
+                    <span className="text-xs text-gray-500">{gateStatus.reason}</span>
+                  )}
+                </div>
+              ) : (
+                <span className="inline-flex items-center gap-1 text-gray-500">
+                  <Info size={14} />
+                  N/A — no gate evaluation available
+                </span>
+              )}
+            </dd>
+          </div>
+        </dl>
+      </Card>
+
      {rec.thesis && (
        <Card>
          <h2 className="mb-2 text-sm font-medium text-gray-400">Thesis</h2>
@@ -73,6 +73,97 @@ export const mockVariantPerfHistory = [
  { hour: '2026-04-10T11:00:00Z', invocations: 12, successes: 11, avg_duration_ms: 1300, avg_confidence: 0.82 },
 ];

+// Validation: Model Quality & Calibration mock data
+export const mockValidationSummary = {
+  snapshot: {
+    id: 'ms-1',
+    generated_at: '2026-04-11T12:00:00Z',
+    lookback_window: '30d',
+    horizon: '7d',
+    prediction_count: 150,
+    win_rate: 0.58,
+    directional_accuracy: 0.56,
+    information_coefficient: 0.045,
+    rank_information_coefficient: 0.038,
+    avg_return: 0.012,
+    avg_excess_return_vs_spy: 0.003,
+    avg_excess_return_vs_sector: 0.002,
+    calibration_error: 0.08,
+    brier_score: 0.21,
+    buy_win_rate: 0.61,
+    sell_win_rate: 0.54,
+    hold_win_rate: 0.50,
+    metadata: {},
+  },
+  gate_status: {
+    passed: true,
+    reason: 'all thresholds met',
+    threshold_results: [
+      { name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
+      { name: 'min_ic', threshold: 0.03, actual: 0.045, passed: true },
+      { name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
+    ],
+  },
+};
+
+export const mockValidationCalibration = {
+  buckets: [
+    { bucket_low: 0.50, bucket_high: 0.60, avg_confidence: 0.55, observed_win_rate: 0.52, prediction_count: 30, miscalibrated: false },
+    { bucket_low: 0.60, bucket_high: 0.70, avg_confidence: 0.65, observed_win_rate: 0.58, prediction_count: 40, miscalibrated: false },
+    { bucket_low: 0.70, bucket_high: 0.80, avg_confidence: 0.75, observed_win_rate: 0.55, prediction_count: 35, miscalibrated: true },
+    { bucket_low: 0.80, bucket_high: 0.90, avg_confidence: 0.85, observed_win_rate: 0.70, prediction_count: 25, miscalibrated: false },
+    { bucket_low: 0.90, bucket_high: 1.00, avg_confidence: 0.95, observed_win_rate: 0.72, prediction_count: 20, miscalibrated: true },
+  ],
+  lookback: '30d',
+  horizon: '7d',
+};
+
+export const mockValidationGateStatus = {
+  gate_status: {
+    passed: false,
+    reason: 'failed: min_ic below threshold',
+    threshold_results: [
+      { name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
+      { name: 'min_ic', threshold: 0.03, actual: 0.02, passed: false },
+      { name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
+    ],
+  },
+};
+
+export const mockValidationICByHorizon = {
+  horizons: [
+    { horizon: '1h', information_coefficient: 0.02, rank_information_coefficient: 0.015, prediction_count: 120, generated_at: '2026-04-11T12:00:00Z' },
+    { horizon: '7d', information_coefficient: 0.045, rank_information_coefficient: 0.038, prediction_count: 100, generated_at: '2026-04-11T12:00:00Z' },
+  ],
+  lookback: '30d',
+};
+
+export const mockValidationAttributionSources = {
+  sources: [
+    { source: 'Reuters', source_type: 'news_api', prediction_count: 50, avg_weight: 0.6, avg_contribution_score: 0.3, win_rate: 0.62, avg_future_return: 0.015, avg_excess_return_vs_spy: 0.005, information_coefficient: 0.05, duplicate_rate: 0.1 },
+  ],
+  lookback: '30d',
+  horizon: '7d',
+};
+
+export const mockValidationAttributionCatalysts = {
+  catalysts: [
+    { catalyst_type: 'earnings', prediction_count: 40, win_rate: 0.65, avg_future_return: 0.02, avg_excess_return_vs_spy: 0.008, information_coefficient: 0.06 },
+  ],
+  lookback: '30d',
+  horizon: '7d',
+};
+
+export const mockValidationAttributionLayers = {
+  layers: [
+    { layer: 'company', avg_contribution_pct: 0.55, dominant_win_rate: 0.60, dominant_ic: 0.04 },
+    { layer: 'macro', avg_contribution_pct: 0.30, dominant_win_rate: 0.52, dominant_ic: 0.02 },
+    { layer: 'competitive', avg_contribution_pct: 0.15, dominant_win_rate: 0.48, dominant_ic: null },
+  ],
+  lookback: '30d',
+  horizon: '7d',
+};
+
 export const handlers = [
  // Query API (proxied at /api/)
  http.get('/api/companies', () => HttpResponse.json(mockCompanies)),
@@ -242,4 +333,13 @@ export const handlers = [
    const body = await request.json() as Record<string, unknown>;
    return HttpResponse.json({ enabled: body.enabled, previous_enabled: true, toggled_by: 'operator' });
  }),
+
+  // Validation: Model Quality & Calibration endpoints
+  http.get('/api/validation/summary', () => HttpResponse.json(mockValidationSummary)),
+  http.get('/api/validation/calibration', () => HttpResponse.json(mockValidationCalibration)),
+  http.get('/api/validation/gate-status', () => HttpResponse.json(mockValidationGateStatus)),
+  http.get('/api/validation/ic-by-horizon', () => HttpResponse.json(mockValidationICByHorizon)),
+  http.get('/api/validation/attribution/sources', () => HttpResponse.json(mockValidationAttributionSources)),
+  http.get('/api/validation/attribution/catalysts', () => HttpResponse.json(mockValidationAttributionCatalysts)),
+  http.get('/api/validation/attribution/layers', () => HttpResponse.json(mockValidationAttributionLayers)),
 ];
@@ -169,6 +169,55 @@ describe('Global Events page', () => {
  });
 });

+describe('OpsModel validation tab', () => {
+  it('renders Model Validation tab with summary cards', async () => {
+    renderRoute('/ops/model');
+    await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
+
+    // The tab buttons should be present
+    expect(screen.getByText('Extraction Performance')).toBeInTheDocument();
+    expect(screen.getByText('Model Validation')).toBeInTheDocument();
+
+    // Click the Model Validation tab button
+    await userEvent.click(screen.getByText('Model Validation'));
+
+    // Summary cards should render key metric labels unique to the validation summary
+    await waitFor(() => {
+      expect(screen.getByText('Brier Score')).toBeInTheDocument();
+      expect(screen.getByText('ECE')).toBeInTheDocument();
+      expect(screen.getByText('Directional Accuracy')).toBeInTheDocument();
+      expect(screen.getByText('Excess vs SPY')).toBeInTheDocument();
+    });
+  }, 10000);
+
+  it('renders calibration table with miscalibration warning', async () => {
+    renderRoute('/ops/model');
+    await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
+
+    await userEvent.click(screen.getByText('Model Validation'));
+
+    await waitFor(() => {
+      expect(screen.getByText('Calibration by Confidence Bucket')).toBeInTheDocument();
+    });
+
+    // Miscalibrated buckets should show warning text
+    const miscalWarnings = screen.getAllByText('Miscalibrated');
+    expect(miscalWarnings.length).toBeGreaterThanOrEqual(1);
+  }, 10000);
+
+  it('renders gate status pass/fail indicator', async () => {
+    renderRoute('/ops/model');
+    await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
+
+    await userEvent.click(screen.getByText('Model Validation'));
+
+    // The gate-status endpoint returns passed: false
+    await waitFor(() => {
+      expect(screen.getByText(/Live Trading Gate: FAIL/)).toBeInTheDocument();
+    });
+  }, 10000);
+});
+
 describe('Agents page', () => {
  it('renders agent list in sidebar', async () => {
    renderRoute('/agents');