feat: model validation, calibration, and signal quality layer
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build-1 unknown status
ci/woodpecker/push/build-3 unknown status
ci/woodpecker/push/build-2 unknown status
ci/woodpecker/push/finalize unknown status
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled

- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views
- Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores
- Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d)
- Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison
- Attribution engine: per-source, per-catalyst, per-layer performance
- Calibration engine: Bayesian shrinkage source reliability
- Quality gate for live trading eligibility with configurable thresholds
- 7 new /api/validation/* endpoints
- Upgraded OpsModel dashboard with validation tab
- Enhanced recommendation display with calibration context
- Backtest replay validation mode
- 86 Python tests (unit + property-based), 179 frontend tests passing
This commit is contained in:
Celes Renata
2026-05-01 03:04:58 +00:00
parent 5d2ffd9163
commit 7fcc8a6c07
23 changed files with 7554 additions and 9 deletions
+166
View File
@@ -885,3 +885,169 @@ export function useToggleMacro() {
onSuccess: () => qc.invalidateQueries({ queryKey: ['macro-status'] }),
});
}
// ---------------------------------------------------------------------------
// Validation: Model Quality & Calibration (Requirements 12.1, 12.2, 12.3, 12.7)
// ---------------------------------------------------------------------------
export interface ModelMetricSnapshot {
id: string;
generated_at: string;
lookback_window: string;
horizon: string;
prediction_count: number;
win_rate: number | null;
directional_accuracy: number | null;
information_coefficient: number | null;
rank_information_coefficient: number | null;
avg_return: number | null;
avg_excess_return_vs_spy: number | null;
avg_excess_return_vs_sector: number | null;
calibration_error: number | null;
brier_score: number | null;
buy_win_rate: number | null;
sell_win_rate: number | null;
hold_win_rate: number | null;
metadata: Record<string, unknown> | null;
}
export interface ValidationSummary {
snapshot: ModelMetricSnapshot | null;
gate_status: Record<string, unknown> | null;
}
export interface CalibrationBucket {
bucket_low: number;
bucket_high: number;
avg_confidence: number;
observed_win_rate: number;
prediction_count: number;
miscalibrated: boolean;
}
export interface ValidationCalibration {
buckets: CalibrationBucket[];
lookback: string;
horizon: string;
}
export interface ICByHorizonEntry {
horizon: string;
information_coefficient: number | null;
rank_information_coefficient: number | null;
prediction_count: number;
generated_at: string | null;
}
export interface ValidationICByHorizon {
horizons: ICByHorizonEntry[];
lookback: string;
}
export interface ValidationGateStatus {
gate_status: Record<string, unknown> | null;
updated_at?: string | null;
message?: string;
}
export function useValidationSummary(lookback = '30d', horizon = '7d') {
const qs = new URLSearchParams();
if (lookback) qs.set('lookback', lookback);
if (horizon) qs.set('horizon', horizon);
const path = `/api/validation/summary${qs.toString() ? '?' + qs : ''}`;
return useGet<ValidationSummary>(['validation-summary', lookback, horizon], 'query', path);
}
export function useValidationCalibration(lookback = '30d', horizon = '7d') {
const qs = new URLSearchParams();
if (lookback) qs.set('lookback', lookback);
if (horizon) qs.set('horizon', horizon);
const path = `/api/validation/calibration${qs.toString() ? '?' + qs : ''}`;
return useGet<ValidationCalibration>(['validation-calibration', lookback, horizon], 'query', path);
}
export function useValidationICByHorizon(lookback = '30d') {
const qs = new URLSearchParams();
if (lookback) qs.set('lookback', lookback);
const path = `/api/validation/ic-by-horizon${qs.toString() ? '?' + qs : ''}`;
return useGet<ValidationICByHorizon>(['validation-ic-by-horizon', lookback], 'query', path);
}
export function useValidationGateStatus() {
return useGet<ValidationGateStatus>(['validation-gate-status'], 'query', '/api/validation/gate-status');
}
// ---------------------------------------------------------------------------
// Validation: Attribution — Sources, Catalysts, Layers (Requirements 12.4, 12.5, 12.6)
// ---------------------------------------------------------------------------
export interface SourceAttribution {
source: string;
source_type: string;
prediction_count: number;
avg_weight: number;
avg_contribution_score: number;
win_rate: number;
avg_future_return: number;
avg_excess_return_vs_spy: number;
information_coefficient: number | null;
duplicate_rate: number;
}
export interface SourceAttributionResponse {
sources: SourceAttribution[];
lookback: string;
horizon: string;
}
export interface CatalystAttribution {
catalyst_type: string;
prediction_count: number;
win_rate: number;
avg_future_return: number;
avg_excess_return_vs_spy: number;
information_coefficient: number | null;
}
export interface CatalystAttributionResponse {
catalysts: CatalystAttribution[];
lookback: string;
horizon: string;
}
export interface LayerAttribution {
layer: string;
avg_contribution_pct: number;
dominant_win_rate: number;
dominant_ic: number | null;
}
export interface LayerAttributionResponse {
layers: LayerAttribution[];
lookback: string;
horizon: string;
}
export function useValidationAttributionSources(lookback = '30d', horizon = '7d') {
const qs = new URLSearchParams();
if (lookback) qs.set('lookback', lookback);
if (horizon) qs.set('horizon', horizon);
const path = `/api/validation/attribution/sources${qs.toString() ? '?' + qs : ''}`;
return useGet<SourceAttributionResponse>(['validation-attribution-sources', lookback, horizon], 'query', path);
}
export function useValidationAttributionCatalysts(lookback = '30d', horizon = '7d') {
const qs = new URLSearchParams();
if (lookback) qs.set('lookback', lookback);
if (horizon) qs.set('horizon', horizon);
const path = `/api/validation/attribution/catalysts${qs.toString() ? '?' + qs : ''}`;
return useGet<CatalystAttributionResponse>(['validation-attribution-catalysts', lookback, horizon], 'query', path);
}
export function useValidationAttributionLayers(lookback = '30d', horizon = '7d') {
const qs = new URLSearchParams();
if (lookback) qs.set('lookback', lookback);
if (horizon) qs.set('horizon', horizon);
const path = `/api/validation/attribution/layers${qs.toString() ? '?' + qs : ''}`;
return useGet<LayerAttributionResponse>(['validation-attribution-layers', lookback, horizon], 'query', path);
}
+607 -6
View File
@@ -1,9 +1,89 @@
import { useState } from 'react';
import { useModelPerformance, useModelFailures } from '../api/hooks';
import {
useModelPerformance,
useModelFailures,
useValidationSummary,
useValidationCalibration,
useValidationICByHorizon,
useValidationGateStatus,
useValidationAttributionSources,
useValidationAttributionCatalysts,
useValidationAttributionLayers,
} from '../api/hooks';
import type {
ValidationSummary,
ValidationCalibration,
CalibrationBucket,
ValidationICByHorizon,
ICByHorizonEntry,
ValidationGateStatus,
SourceAttributionResponse,
CatalystAttributionResponse,
LayerAttributionResponse,
SourceAttribution,
CatalystAttribution,
LayerAttribution,
} from '../api/hooks';
import { LoadingSpinner, DateRangeSelector, StatusBadge, Card } from '../components/ui';
import { AlertTriangle, ShieldCheck, ShieldX } from 'lucide-react';
type Tab = 'extraction' | 'validation';
export function OpsModelPage() {
const [hours, setHours] = useState(24);
const [activeTab, setActiveTab] = useState<Tab>('extraction');
return (
<div className="space-y-6">
<div className="flex items-center justify-between">
<h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
{activeTab === 'extraction' && (
<DateRangeSelector value={hours} onChange={setHours} />
)}
</div>
{/* Tab bar */}
<div className="flex border-b border-surface-700" role="tablist" aria-label="Model performance tabs">
<button
role="tab"
aria-selected={activeTab === 'extraction'}
onClick={() => setActiveTab('extraction')}
className={`px-4 py-2 text-sm font-medium transition-colors ${
activeTab === 'extraction'
? 'border-b-2 border-brand-500 text-brand-400'
: 'text-gray-400 hover:text-gray-200'
}`}
>
Extraction Performance
</button>
<button
role="tab"
aria-selected={activeTab === 'validation'}
onClick={() => setActiveTab('validation')}
className={`px-4 py-2 text-sm font-medium transition-colors ${
activeTab === 'validation'
? 'border-b-2 border-brand-500 text-brand-400'
: 'text-gray-400 hover:text-gray-200'
}`}
>
Model Validation
</button>
</div>
{activeTab === 'extraction' ? (
<ExtractionTab hours={hours} />
) : (
<ValidationTab />
)}
</div>
);
}
/* ------------------------------------------------------------------ */
/* Extraction Performance Tab (existing content) */
/* ------------------------------------------------------------------ */
function ExtractionTab({ hours }: { hours: number }) {
const { data: perf, isLoading } = useModelPerformance(hours);
const { data: failures } = useModelFailures(hours);
@@ -13,11 +93,6 @@ export function OpsModelPage() {
return (
<div className="space-y-6">
<div className="flex items-center justify-between">
<h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
<DateRangeSelector value={hours} onChange={setHours} />
</div>
{/* Key metrics */}
<div className="grid grid-cols-2 gap-3 sm:grid-cols-5">
<StatCard label="Total Extractions" value={String(p.total_extractions ?? '—')} />
@@ -63,6 +138,482 @@ export function OpsModelPage() {
);
}
/* ------------------------------------------------------------------ */
/* Model Validation Tab (new) */
/* ------------------------------------------------------------------ */
function ValidationTab() {
const { data: summary, isLoading: summaryLoading, error: summaryError } = useValidationSummary();
const { data: calibration, isLoading: calLoading, error: calError } = useValidationCalibration();
const { data: icData, isLoading: icLoading, error: icError } = useValidationICByHorizon();
const { data: gateData, isLoading: gateLoading, error: gateError } = useValidationGateStatus();
const { data: sourcesData, isLoading: srcLoading, error: srcError } = useValidationAttributionSources();
const { data: catalystsData, isLoading: catLoading, error: catError } = useValidationAttributionCatalysts();
const { data: layersData, isLoading: layLoading, error: layError } = useValidationAttributionLayers();
return (
<div className="space-y-6">
{/* Gate Status */}
<GateStatusSection data={gateData} isLoading={gateLoading} error={gateError} />
{/* Summary Cards */}
<SummaryCardsSection data={summary} isLoading={summaryLoading} error={summaryError} />
{/* Calibration Table */}
<CalibrationTableSection data={calibration} isLoading={calLoading} error={calError} />
{/* IC by Horizon Table */}
<ICByHorizonSection data={icData} isLoading={icLoading} error={icError} />
{/* Source Attribution Table */}
<SourceAttributionSection data={sourcesData} isLoading={srcLoading} error={srcError} />
{/* Catalyst Attribution Table */}
<CatalystAttributionSection data={catalystsData} isLoading={catLoading} error={catError} />
{/* Layer Attribution Table */}
<LayerAttributionSection data={layersData} isLoading={layLoading} error={layError} />
</div>
);
}
/* ------------------------------------------------------------------ */
/* Gate Status Section */
/* ------------------------------------------------------------------ */
function GateStatusSection({ data, isLoading, error }: {
data: ValidationGateStatus | undefined;
isLoading: boolean;
error: Error | null;
}) {
if (isLoading) return <LoadingSpinner />;
if (error) return <ErrorCard message="Failed to load gate status" />;
const gate = data?.gate_status as Record<string, unknown> | null;
if (!gate) {
return (
<Card className="flex items-center gap-3">
<ShieldX size={20} className="text-yellow-400" />
<div>
<div className="text-sm font-medium text-yellow-400">Gate Status Unknown</div>
<div className="text-xs text-gray-500">No gate evaluation data available</div>
</div>
</Card>
);
}
const passed = gate.passed as boolean | undefined;
const reason = gate.reason as string | undefined;
const thresholds = gate.threshold_results as Array<Record<string, unknown>> | undefined;
return (
<Card>
<div className="mb-3 flex items-center gap-3">
{passed ? (
<ShieldCheck size={20} className="text-green-400" />
) : (
<ShieldX size={20} className="text-red-400" />
)}
<div>
<div className={`text-sm font-medium ${passed ? 'text-green-400' : 'text-red-400'}`}>
Live Trading Gate: {passed ? 'PASS' : 'FAIL'}
</div>
{reason && <div className="text-xs text-gray-500">{reason}</div>}
</div>
</div>
{thresholds && thresholds.length > 0 && (
<div className="overflow-x-auto">
<table className="w-full text-left text-xs">
<thead>
<tr className="border-b border-surface-700 text-gray-500">
<th className="pb-2 pr-4 font-medium">Threshold</th>
<th className="pb-2 pr-4 font-medium">Required</th>
<th className="pb-2 pr-4 font-medium">Actual</th>
<th className="pb-2 font-medium">Status</th>
</tr>
</thead>
<tbody>
{thresholds.map((t, i) => (
<tr key={i} className="border-b border-surface-800">
<td className="py-1.5 pr-4 text-gray-300">{String(t.name ?? '')}</td>
<td className="py-1.5 pr-4 font-mono text-gray-400">{fmtThreshold(t.threshold)}</td>
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtThreshold(t.actual)}</td>
<td className="py-1.5">
<StatusBadge status={t.passed ? 'success' : 'failed'} />
</td>
</tr>
))}
</tbody>
</table>
</div>
)}
</Card>
);
}
/* ------------------------------------------------------------------ */
/* Summary Cards Section */
/* ------------------------------------------------------------------ */
function SummaryCardsSection({ data, isLoading, error }: {
data: ValidationSummary | undefined;
isLoading: boolean;
error: Error | null;
}) {
if (isLoading) return <LoadingSpinner />;
if (error) return <ErrorCard message="Failed to load validation summary" />;
const snap = data?.snapshot;
if (!snap) {
return (
<Card>
<p className="text-sm text-gray-500">No validation data available yet. Metrics will appear once predictions have been evaluated.</p>
</Card>
);
}
return (
<div className="grid grid-cols-2 gap-3 sm:grid-cols-3 lg:grid-cols-5">
<StatCard label="Predictions" value={String(snap.prediction_count ?? '—')} />
<StatCard
label="Win Rate"
value={fmtPct(snap.win_rate)}
color={colorForRate(snap.win_rate, 0.53)}
/>
<StatCard
label="Directional Accuracy"
value={fmtPct(snap.directional_accuracy)}
color={colorForRate(snap.directional_accuracy, 0.53)}
/>
<StatCard
label="IC"
value={fmtIC(snap.information_coefficient)}
color={colorForIC(snap.information_coefficient)}
/>
<StatCard
label="Rank IC"
value={fmtIC(snap.rank_information_coefficient)}
color={colorForIC(snap.rank_information_coefficient)}
/>
<StatCard
label="Brier Score"
value={snap.brier_score != null ? snap.brier_score.toFixed(4) : '—'}
color={snap.brier_score != null && snap.brier_score < 0.25 ? 'text-green-400' : 'text-gray-100'}
/>
<StatCard
label="ECE"
value={snap.calibration_error != null ? snap.calibration_error.toFixed(4) : '—'}
color={snap.calibration_error != null && snap.calibration_error < 0.15 ? 'text-green-400' : 'text-yellow-400'}
/>
<StatCard
label="Excess vs SPY"
value={fmtPct(snap.avg_excess_return_vs_spy)}
color={snap.avg_excess_return_vs_spy != null && snap.avg_excess_return_vs_spy > 0 ? 'text-green-400' : 'text-red-400'}
/>
</div>
);
}
/* ------------------------------------------------------------------ */
/* Calibration Table Section */
/* ------------------------------------------------------------------ */
function CalibrationTableSection({ data, isLoading, error }: {
data: ValidationCalibration | undefined;
isLoading: boolean;
error: Error | null;
}) {
if (isLoading) return <LoadingSpinner />;
if (error) return <ErrorCard message="Failed to load calibration data" />;
const buckets = data?.buckets;
if (!buckets || buckets.length === 0) {
return (
<Card>
<h2 className="mb-2 text-sm font-medium text-gray-400">Calibration</h2>
<p className="text-sm text-gray-500">No calibration data available</p>
</Card>
);
}
return (
<Card>
<h2 className="mb-3 text-sm font-medium text-gray-400">Calibration by Confidence Bucket</h2>
<div className="overflow-x-auto">
<table className="w-full text-left text-xs">
<thead>
<tr className="border-b border-surface-700 text-gray-500">
<th className="pb-2 pr-4 font-medium">Bucket</th>
<th className="pb-2 pr-4 font-medium">Avg Confidence</th>
<th className="pb-2 pr-4 font-medium">Observed Win Rate</th>
<th className="pb-2 pr-4 font-medium">Count</th>
<th className="pb-2 font-medium">Status</th>
</tr>
</thead>
<tbody>
{buckets.map((b: CalibrationBucket, i: number) => (
<CalibrationRow key={i} bucket={b} />
))}
</tbody>
</table>
</div>
</Card>
);
}
function CalibrationRow({ bucket }: { bucket: CalibrationBucket }) {
const isMiscalibrated = bucket.miscalibrated ||
Math.abs(bucket.avg_confidence - bucket.observed_win_rate) > 0.15;
return (
<tr className={`border-b border-surface-800 ${isMiscalibrated ? 'bg-amber-900/20' : ''}`}>
<td className="py-1.5 pr-4 font-mono text-gray-300">
[{fmtPctShort(bucket.bucket_low)}, {fmtPctShort(bucket.bucket_high)})
</td>
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.avg_confidence)}</td>
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.observed_win_rate)}</td>
<td className="py-1.5 pr-4 font-mono text-gray-400">{bucket.prediction_count}</td>
<td className="py-1.5">
{isMiscalibrated ? (
<span className="inline-flex items-center gap-1 text-amber-400">
<AlertTriangle size={14} />
<span>Miscalibrated</span>
</span>
) : (
<span className="text-green-400">OK</span>
)}
</td>
</tr>
);
}
/* ------------------------------------------------------------------ */
/* IC by Horizon Section */
/* ------------------------------------------------------------------ */
function ICByHorizonSection({ data, isLoading, error }: {
data: ValidationICByHorizon | undefined;
isLoading: boolean;
error: Error | null;
}) {
if (isLoading) return <LoadingSpinner />;
if (error) return <ErrorCard message="Failed to load IC by horizon data" />;
const horizons = data?.horizons;
if (!horizons || horizons.length === 0) {
return (
<Card>
<h2 className="mb-2 text-sm font-medium text-gray-400">IC by Horizon</h2>
<p className="text-sm text-gray-500">No IC data available</p>
</Card>
);
}
return (
<Card>
<h2 className="mb-3 text-sm font-medium text-gray-400">Information Coefficient by Horizon</h2>
<div className="overflow-x-auto">
<table className="w-full text-left text-xs">
<thead>
<tr className="border-b border-surface-700 text-gray-500">
<th className="pb-2 pr-4 font-medium">Horizon</th>
<th className="pb-2 pr-4 font-medium">IC</th>
<th className="pb-2 pr-4 font-medium">Rank IC</th>
<th className="pb-2 font-medium">Predictions</th>
</tr>
</thead>
<tbody>
{horizons.map((h: ICByHorizonEntry, i: number) => (
<tr key={i} className="border-b border-surface-800">
<td className="py-1.5 pr-4 font-mono text-gray-300">{h.horizon}</td>
<td className={`py-1.5 pr-4 font-mono ${colorForIC(h.information_coefficient)}`}>
{fmtIC(h.information_coefficient)}
</td>
<td className={`py-1.5 pr-4 font-mono ${colorForIC(h.rank_information_coefficient)}`}>
{fmtIC(h.rank_information_coefficient)}
</td>
<td className="py-1.5 font-mono text-gray-400">{h.prediction_count}</td>
</tr>
))}
</tbody>
</table>
</div>
</Card>
);
}
/* ------------------------------------------------------------------ */
/* Source Attribution Section */
/* ------------------------------------------------------------------ */
function SourceAttributionSection({ data, isLoading, error }: {
data: SourceAttributionResponse | undefined;
isLoading: boolean;
error: Error | null;
}) {
if (isLoading) return <LoadingSpinner />;
if (error) return <ErrorCard message="Failed to load source attribution data" />;
const sources = data?.sources;
if (!sources || sources.length === 0) {
return (
<Card>
<h2 className="mb-2 text-sm font-medium text-gray-400">Source Performance</h2>
<p className="text-sm text-gray-500">No source attribution data available</p>
</Card>
);
}
return (
<Card>
<h2 className="mb-3 text-sm font-medium text-gray-400">Source Performance</h2>
<div className="overflow-x-auto">
<table className="w-full text-left text-xs">
<thead>
<tr className="border-b border-surface-700 text-gray-500">
<th className="pb-2 pr-4 font-medium">Source</th>
<th className="pb-2 pr-4 font-medium">Win Rate</th>
<th className="pb-2 pr-4 font-medium">IC</th>
<th className="pb-2 pr-4 font-medium">Avg Return</th>
<th className="pb-2 font-medium">Duplicate Rate</th>
</tr>
</thead>
<tbody>
{sources.map((s: SourceAttribution, i: number) => (
<tr key={i} className="border-b border-surface-800">
<td className="py-1.5 pr-4 text-gray-300">{s.source}</td>
<td className={`py-1.5 pr-4 font-mono ${colorForRate(s.win_rate, 0.53)}`}>
{fmtPct(s.win_rate)}
</td>
<td className={`py-1.5 pr-4 font-mono ${colorForIC(s.information_coefficient)}`}>
{fmtIC(s.information_coefficient)}
</td>
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(s.avg_future_return)}</td>
<td className="py-1.5 font-mono text-gray-300">{fmtPct(s.duplicate_rate)}</td>
</tr>
))}
</tbody>
</table>
</div>
</Card>
);
}
/* ------------------------------------------------------------------ */
/* Catalyst Attribution Section */
/* ------------------------------------------------------------------ */
function CatalystAttributionSection({ data, isLoading, error }: {
data: CatalystAttributionResponse | undefined;
isLoading: boolean;
error: Error | null;
}) {
if (isLoading) return <LoadingSpinner />;
if (error) return <ErrorCard message="Failed to load catalyst attribution data" />;
const catalysts = data?.catalysts;
if (!catalysts || catalysts.length === 0) {
return (
<Card>
<h2 className="mb-2 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
<p className="text-sm text-gray-500">No catalyst attribution data available</p>
</Card>
);
}
return (
<Card>
<h2 className="mb-3 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
<div className="overflow-x-auto">
<table className="w-full text-left text-xs">
<thead>
<tr className="border-b border-surface-700 text-gray-500">
<th className="pb-2 pr-4 font-medium">Catalyst Type</th>
<th className="pb-2 pr-4 font-medium">Win Rate</th>
<th className="pb-2 pr-4 font-medium">Avg Return</th>
<th className="pb-2 font-medium">IC</th>
</tr>
</thead>
<tbody>
{catalysts.map((c: CatalystAttribution, i: number) => (
<tr key={i} className="border-b border-surface-800">
<td className="py-1.5 pr-4 text-gray-300">{c.catalyst_type}</td>
<td className={`py-1.5 pr-4 font-mono ${colorForRate(c.win_rate, 0.53)}`}>
{fmtPct(c.win_rate)}
</td>
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(c.avg_future_return)}</td>
<td className={`py-1.5 font-mono ${colorForIC(c.information_coefficient)}`}>
{fmtIC(c.information_coefficient)}
</td>
</tr>
))}
</tbody>
</table>
</div>
</Card>
);
}
/* ------------------------------------------------------------------ */
/* Layer Attribution Section */
/* ------------------------------------------------------------------ */
function LayerAttributionSection({ data, isLoading, error }: {
data: LayerAttributionResponse | undefined;
isLoading: boolean;
error: Error | null;
}) {
if (isLoading) return <LoadingSpinner />;
if (error) return <ErrorCard message="Failed to load layer attribution data" />;
const layers = data?.layers;
if (!layers || layers.length === 0) {
return (
<Card>
<h2 className="mb-2 text-sm font-medium text-gray-400">Layer Attribution</h2>
<p className="text-sm text-gray-500">No layer attribution data available</p>
</Card>
);
}
return (
<Card>
<h2 className="mb-3 text-sm font-medium text-gray-400">Layer Attribution</h2>
<div className="overflow-x-auto">
<table className="w-full text-left text-xs">
<thead>
<tr className="border-b border-surface-700 text-gray-500">
<th className="pb-2 pr-4 font-medium">Layer</th>
<th className="pb-2 pr-4 font-medium">Contribution %</th>
<th className="pb-2 pr-4 font-medium">Dominant Win Rate</th>
<th className="pb-2 font-medium">IC</th>
</tr>
</thead>
<tbody>
{layers.map((l: LayerAttribution, i: number) => (
<tr key={i} className="border-b border-surface-800">
<td className="py-1.5 pr-4 text-gray-300 capitalize">{l.layer}</td>
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(l.avg_contribution_pct)}</td>
<td className={`py-1.5 pr-4 font-mono ${colorForRate(l.dominant_win_rate, 0.53)}`}>
{fmtPct(l.dominant_win_rate)}
</td>
<td className={`py-1.5 font-mono ${colorForIC(l.dominant_ic)}`}>
{fmtIC(l.dominant_ic)}
</td>
</tr>
))}
</tbody>
</table>
</div>
</Card>
);
}
/* ------------------------------------------------------------------ */
/* Shared helpers */
/* ------------------------------------------------------------------ */
function StatCard({ label, value, color = 'text-gray-100' }: { label: string; value: string; color?: string }) {
return (
<Card className="text-center">
@@ -71,3 +622,53 @@ function StatCard({ label, value, color = 'text-gray-100' }: { label: string; va
</Card>
);
}
function ErrorCard({ message }: { message: string }) {
return (
<Card className="border-red-700/50 bg-red-900/20">
<p className="text-sm text-red-400">{message}</p>
</Card>
);
}
/** Format a float as percentage with 1 decimal place, or '—' if null */
function fmtPct(v: number | null | undefined): string {
if (v == null) return '—';
return `${(v * 100).toFixed(1)}%`;
}
/** Format a float as short percentage (no decimal) for bucket display */
function fmtPctShort(v: number | null | undefined): string {
if (v == null) return '—';
return `${(v * 100).toFixed(0)}%`;
}
/** Format IC value with 4 decimal places, or '—' if null */
function fmtIC(v: number | null | undefined): string {
if (v == null) return '—';
return v.toFixed(4);
}
/** Format a threshold value for display */
function fmtThreshold(v: unknown): string {
if (v == null) return '—';
if (typeof v === 'number') {
if (Number.isInteger(v)) return String(v);
return v.toFixed(4);
}
return String(v);
}
/** Color for win rate / accuracy — green if above threshold, red otherwise */
function colorForRate(v: number | null | undefined, threshold: number): string {
if (v == null) return 'text-gray-100';
return v >= threshold ? 'text-green-400' : 'text-red-400';
}
/** Color for IC — green if positive, red if negative, gray if null */
function colorForIC(v: number | null | undefined): string {
if (v == null) return 'text-gray-400';
if (v >= 0.03) return 'text-green-400';
if (v > 0) return 'text-yellow-400';
return 'text-red-400';
}
+211 -1
View File
@@ -1,13 +1,92 @@
/**
* Recommendation detail page with validation context.
*
* Shows original confidence alongside calibrated confidence (historical win rate),
* evidence quality indicators, source reliability, and live eligibility status.
*
* Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7
*/
import { useParams, Link } from '@tanstack/react-router';
import { useRecommendation } from '../api/hooks';
import { AlertTriangle, ShieldCheck, ShieldX, Info } from 'lucide-react';
import {
useRecommendation,
useValidationCalibration,
useValidationGateStatus,
useValidationAttributionSources,
} from '../api/hooks';
import { StatusBadge, ConfidenceBar, LoadingSpinner, Card } from '../components/ui';
export function RecommendationDetailPage() {
const { id } = useParams({ from: '/recommendations/$id' });
const { data: rec, isLoading } = useRecommendation(id);
const { data: calibration } = useValidationCalibration();
const { data: gateData } = useValidationGateStatus();
const { data: sourcesData } = useValidationAttributionSources();
if (isLoading || !rec) return <LoadingSpinner />;
// --- Calibration: find the bucket matching this recommendation's confidence ---
const matchingBucket = calibration?.buckets?.find(
(b) => rec.confidence >= b.bucket_low && rec.confidence < b.bucket_high,
);
// Handle edge case: confidence of exactly 1.0 falls in the last bucket [0.90, 1.00]
const calibratedBucket =
matchingBucket ??
(rec.confidence >= 1.0
? calibration?.buckets?.find((b) => b.bucket_high >= 1.0)
: undefined);
const historicalWinRate = calibratedBucket?.observed_win_rate;
// --- Evidence counts ---
const totalEvidenceCount = rec.evidence.length;
// Compute duplicate evidence: group by normalized title, count extras
const titleCounts = new Map<string, number>();
for (const ev of rec.evidence) {
const key = (ev.title ?? '').toLowerCase().trim();
titleCounts.set(key, (titleCounts.get(key) ?? 0) + 1);
}
let duplicateEvidenceCount = 0;
for (const count of titleCounts.values()) {
if (count > 1) duplicateEvidenceCount += count - 1;
}
const uniqueEvidenceCount = totalEvidenceCount - duplicateEvidenceCount;
const duplicateRatio = totalEvidenceCount > 0 ? duplicateEvidenceCount / totalEvidenceCount : 0;
const hasDuplicateWarning = duplicateRatio > 0.2;
// --- Source reliability: find primary contributing sources ---
const evidenceSources = new Map<string, number>();
for (const ev of rec.evidence) {
const src = ev.source_type ?? ev.publisher ?? 'unknown';
evidenceSources.set(src, (evidenceSources.get(src) ?? 0) + ev.weight);
}
// Sort by total weight descending to find primary source
const sortedSources = [...evidenceSources.entries()].sort((a, b) => b[1] - a[1]);
const primarySourceType = sortedSources[0]?.[0];
// Look up source reliability from attribution data
const primarySourceAttribution = sourcesData?.sources?.find(
(s) => s.source_type === primarySourceType || s.source === primarySourceType,
);
// Source reliability is approximated from win_rate via Bayesian shrinkage
// The attribution data has win_rate which is the observed metric
const primarySourceWinRate = primarySourceAttribution?.win_rate;
// Bayesian shrinkage: reliability = 0.5 + (n/(n+30)) * (win_rate - 0.5)
const primarySourceCount = primarySourceAttribution?.prediction_count ?? 0;
const primarySourceReliability =
primarySourceWinRate != null
? 0.5 + (primarySourceCount / (primarySourceCount + 30)) * (primarySourceWinRate - 0.5)
: undefined;
const hasLowReliabilityWarning =
primarySourceReliability != null && primarySourceReliability < 0.4;
// --- Gate status ---
const gateStatus = gateData?.gate_status as {
passed?: boolean;
reason?: string;
threshold_results?: Array<{ name: string; threshold: number; actual: number; passed: boolean }>;
} | null;
return (
<div className="space-y-6">
<div className="flex items-center gap-3">
@@ -28,6 +107,137 @@ export function RecommendationDetailPage() {
</dl>
</Card>
{/* Validation Context Card — Requirements 13.113.7 */}
<Card>
<h2 className="mb-3 text-sm font-medium text-gray-400">Validation Context</h2>
<dl className="grid grid-cols-2 gap-x-8 gap-y-3 text-sm sm:grid-cols-3">
{/* 13.1: Original confidence alongside calibrated confidence */}
<div>
<dt className="text-gray-500">Original Confidence</dt>
<dd className="text-gray-200">{(rec.confidence * 100).toFixed(1)}%</dd>
</div>
<div>
<dt className="text-gray-500">Calibrated Confidence</dt>
<dd className="text-gray-200">
{historicalWinRate != null
? `${(historicalWinRate * 100).toFixed(1)}%`
: 'N/A'}
</dd>
</div>
{/* 13.2: Historical win rate for similar confidence levels */}
<div>
<dt className="text-gray-500">Historical Win Rate</dt>
<dd className="text-gray-200">
{historicalWinRate != null ? (
<span>
{(historicalWinRate * 100).toFixed(1)}%
{calibratedBucket && (
<span className="ml-1 text-xs text-gray-500">
({calibratedBucket.prediction_count} predictions)
</span>
)}
</span>
) : (
'N/A'
)}
</dd>
</div>
{/* 13.3: Evidence count, unique evidence count, duplicate evidence count */}
<div>
<dt className="text-gray-500">Evidence Count</dt>
<dd className="text-gray-200">{totalEvidenceCount}</dd>
</div>
<div>
<dt className="text-gray-500">Unique Evidence</dt>
<dd className="text-gray-200">{uniqueEvidenceCount}</dd>
</div>
<div>
<dt className="flex items-center gap-1 text-gray-500">
Duplicate Evidence
{/* 13.6: Warning badge when duplicate evidence count > 20% of total */}
{hasDuplicateWarning && (
<span
className="inline-flex items-center gap-0.5 rounded-full border border-yellow-700/50 bg-yellow-900/40 px-1.5 py-0.5 text-[10px] font-medium text-yellow-400"
title="Duplicate evidence exceeds 20% of total — potential evidence inflation"
>
<AlertTriangle size={10} />
&gt;20%
</span>
)}
</dt>
<dd className="text-gray-200">
{duplicateEvidenceCount}
{totalEvidenceCount > 0 && (
<span className="ml-1 text-xs text-gray-500">
({(duplicateRatio * 100).toFixed(0)}%)
</span>
)}
</dd>
</div>
{/* 13.4: Source reliability indicator */}
<div>
<dt className="flex items-center gap-1 text-gray-500">
Primary Source Reliability
{/* 13.7: Warning badge when primary source reliability < 0.4 */}
{hasLowReliabilityWarning && (
<span
className="inline-flex items-center gap-0.5 rounded-full border border-red-700/50 bg-red-900/40 px-1.5 py-0.5 text-[10px] font-medium text-red-400"
title="Primary source reliability is below 0.4 — low or unknown reliability"
>
<AlertTriangle size={10} />
Low
</span>
)}
</dt>
<dd className="text-gray-200">
{primarySourceReliability != null ? (
<span>
{primarySourceReliability.toFixed(3)}
{primarySourceType && (
<span className="ml-1 text-xs text-gray-500">({primarySourceType})</span>
)}
</span>
) : (
'N/A'
)}
</dd>
</div>
{/* 13.5: Live eligibility status with reason */}
<div className="col-span-2">
<dt className="text-gray-500">Live Eligibility</dt>
<dd>
{gateStatus != null ? (
<div className="flex items-center gap-2">
{gateStatus.passed ? (
<span className="inline-flex items-center gap-1 text-green-400">
<ShieldCheck size={14} />
Gate Passed
</span>
) : (
<span className="inline-flex items-center gap-1 text-red-400">
<ShieldX size={14} />
Gate Failed
</span>
)}
{gateStatus.reason && (
<span className="text-xs text-gray-500">{gateStatus.reason}</span>
)}
</div>
) : (
<span className="inline-flex items-center gap-1 text-gray-500">
<Info size={14} />
N/A no gate evaluation available
</span>
)}
</dd>
</div>
</dl>
</Card>
{rec.thesis && (
<Card>
<h2 className="mb-2 text-sm font-medium text-gray-400">Thesis</h2>
+100
View File
@@ -73,6 +73,97 @@ export const mockVariantPerfHistory = [
{ hour: '2026-04-10T11:00:00Z', invocations: 12, successes: 11, avg_duration_ms: 1300, avg_confidence: 0.82 },
];
// Validation: Model Quality & Calibration mock data
export const mockValidationSummary = {
snapshot: {
id: 'ms-1',
generated_at: '2026-04-11T12:00:00Z',
lookback_window: '30d',
horizon: '7d',
prediction_count: 150,
win_rate: 0.58,
directional_accuracy: 0.56,
information_coefficient: 0.045,
rank_information_coefficient: 0.038,
avg_return: 0.012,
avg_excess_return_vs_spy: 0.003,
avg_excess_return_vs_sector: 0.002,
calibration_error: 0.08,
brier_score: 0.21,
buy_win_rate: 0.61,
sell_win_rate: 0.54,
hold_win_rate: 0.50,
metadata: {},
},
gate_status: {
passed: true,
reason: 'all thresholds met',
threshold_results: [
{ name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
{ name: 'min_ic', threshold: 0.03, actual: 0.045, passed: true },
{ name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
],
},
};
export const mockValidationCalibration = {
buckets: [
{ bucket_low: 0.50, bucket_high: 0.60, avg_confidence: 0.55, observed_win_rate: 0.52, prediction_count: 30, miscalibrated: false },
{ bucket_low: 0.60, bucket_high: 0.70, avg_confidence: 0.65, observed_win_rate: 0.58, prediction_count: 40, miscalibrated: false },
{ bucket_low: 0.70, bucket_high: 0.80, avg_confidence: 0.75, observed_win_rate: 0.55, prediction_count: 35, miscalibrated: true },
{ bucket_low: 0.80, bucket_high: 0.90, avg_confidence: 0.85, observed_win_rate: 0.70, prediction_count: 25, miscalibrated: false },
{ bucket_low: 0.90, bucket_high: 1.00, avg_confidence: 0.95, observed_win_rate: 0.72, prediction_count: 20, miscalibrated: true },
],
lookback: '30d',
horizon: '7d',
};
export const mockValidationGateStatus = {
gate_status: {
passed: false,
reason: 'failed: min_ic below threshold',
threshold_results: [
{ name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
{ name: 'min_ic', threshold: 0.03, actual: 0.02, passed: false },
{ name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
],
},
};
export const mockValidationICByHorizon = {
horizons: [
{ horizon: '1h', information_coefficient: 0.02, rank_information_coefficient: 0.015, prediction_count: 120, generated_at: '2026-04-11T12:00:00Z' },
{ horizon: '7d', information_coefficient: 0.045, rank_information_coefficient: 0.038, prediction_count: 100, generated_at: '2026-04-11T12:00:00Z' },
],
lookback: '30d',
};
export const mockValidationAttributionSources = {
sources: [
{ source: 'Reuters', source_type: 'news_api', prediction_count: 50, avg_weight: 0.6, avg_contribution_score: 0.3, win_rate: 0.62, avg_future_return: 0.015, avg_excess_return_vs_spy: 0.005, information_coefficient: 0.05, duplicate_rate: 0.1 },
],
lookback: '30d',
horizon: '7d',
};
export const mockValidationAttributionCatalysts = {
catalysts: [
{ catalyst_type: 'earnings', prediction_count: 40, win_rate: 0.65, avg_future_return: 0.02, avg_excess_return_vs_spy: 0.008, information_coefficient: 0.06 },
],
lookback: '30d',
horizon: '7d',
};
export const mockValidationAttributionLayers = {
layers: [
{ layer: 'company', avg_contribution_pct: 0.55, dominant_win_rate: 0.60, dominant_ic: 0.04 },
{ layer: 'macro', avg_contribution_pct: 0.30, dominant_win_rate: 0.52, dominant_ic: 0.02 },
{ layer: 'competitive', avg_contribution_pct: 0.15, dominant_win_rate: 0.48, dominant_ic: null },
],
lookback: '30d',
horizon: '7d',
};
export const handlers = [
// Query API (proxied at /api/)
http.get('/api/companies', () => HttpResponse.json(mockCompanies)),
@@ -242,4 +333,13 @@ export const handlers = [
const body = await request.json() as Record<string, unknown>;
return HttpResponse.json({ enabled: body.enabled, previous_enabled: true, toggled_by: 'operator' });
}),
// Validation: Model Quality & Calibration endpoints
http.get('/api/validation/summary', () => HttpResponse.json(mockValidationSummary)),
http.get('/api/validation/calibration', () => HttpResponse.json(mockValidationCalibration)),
http.get('/api/validation/gate-status', () => HttpResponse.json(mockValidationGateStatus)),
http.get('/api/validation/ic-by-horizon', () => HttpResponse.json(mockValidationICByHorizon)),
http.get('/api/validation/attribution/sources', () => HttpResponse.json(mockValidationAttributionSources)),
http.get('/api/validation/attribution/catalysts', () => HttpResponse.json(mockValidationAttributionCatalysts)),
http.get('/api/validation/attribution/layers', () => HttpResponse.json(mockValidationAttributionLayers)),
];
+49
View File
@@ -169,6 +169,55 @@ describe('Global Events page', () => {
});
});
describe('OpsModel validation tab', () => {
it('renders Model Validation tab with summary cards', async () => {
renderRoute('/ops/model');
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
// The tab buttons should be present
expect(screen.getByText('Extraction Performance')).toBeInTheDocument();
expect(screen.getByText('Model Validation')).toBeInTheDocument();
// Click the Model Validation tab button
await userEvent.click(screen.getByText('Model Validation'));
// Summary cards should render key metric labels unique to the validation summary
await waitFor(() => {
expect(screen.getByText('Brier Score')).toBeInTheDocument();
expect(screen.getByText('ECE')).toBeInTheDocument();
expect(screen.getByText('Directional Accuracy')).toBeInTheDocument();
expect(screen.getByText('Excess vs SPY')).toBeInTheDocument();
});
}, 10000);
it('renders calibration table with miscalibration warning', async () => {
renderRoute('/ops/model');
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
await userEvent.click(screen.getByText('Model Validation'));
await waitFor(() => {
expect(screen.getByText('Calibration by Confidence Bucket')).toBeInTheDocument();
});
// Miscalibrated buckets should show warning text
const miscalWarnings = screen.getAllByText('Miscalibrated');
expect(miscalWarnings.length).toBeGreaterThanOrEqual(1);
}, 10000);
it('renders gate status pass/fail indicator', async () => {
renderRoute('/ops/model');
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
await userEvent.click(screen.getByText('Model Validation'));
// The gate-status endpoint returns passed: false
await waitFor(() => {
expect(screen.getByText(/Live Trading Gate: FAIL/)).toBeInTheDocument();
});
}, 10000);
});
describe('Agents page', () => {
it('renders agent list in sidebar', async () => {
renderRoute('/agents');