feat: model validation, calibration, and signal quality layer
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build-1 unknown status
ci/woodpecker/push/build-3 unknown status
ci/woodpecker/push/build-2 unknown status
ci/woodpecker/push/finalize unknown status
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build-1 unknown status
ci/woodpecker/push/build-3 unknown status
ci/woodpecker/push/build-2 unknown status
ci/woodpecker/push/finalize unknown status
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
- Migration 035: prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots tables + SQL views - Prediction snapshot writer with canonical evidence keys, duplicate detection, contribution scores - Outcome evaluator across 5 horizons (1h, 6h, 1d, 7d, 30d) - Metrics engine: ECE, Brier score, IC, Rank IC, benchmark comparison - Attribution engine: per-source, per-catalyst, per-layer performance - Calibration engine: Bayesian shrinkage source reliability - Quality gate for live trading eligibility with configurable thresholds - 7 new /api/validation/* endpoints - Upgraded OpsModel dashboard with validation tab - Enhanced recommendation display with calibration context - Backtest replay validation mode - 86 Python tests (unit + property-based), 179 frontend tests passing
This commit is contained in:
@@ -885,3 +885,169 @@ export function useToggleMacro() {
|
||||
onSuccess: () => qc.invalidateQueries({ queryKey: ['macro-status'] }),
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Validation: Model Quality & Calibration (Requirements 12.1, 12.2, 12.3, 12.7)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ModelMetricSnapshot {
|
||||
id: string;
|
||||
generated_at: string;
|
||||
lookback_window: string;
|
||||
horizon: string;
|
||||
prediction_count: number;
|
||||
win_rate: number | null;
|
||||
directional_accuracy: number | null;
|
||||
information_coefficient: number | null;
|
||||
rank_information_coefficient: number | null;
|
||||
avg_return: number | null;
|
||||
avg_excess_return_vs_spy: number | null;
|
||||
avg_excess_return_vs_sector: number | null;
|
||||
calibration_error: number | null;
|
||||
brier_score: number | null;
|
||||
buy_win_rate: number | null;
|
||||
sell_win_rate: number | null;
|
||||
hold_win_rate: number | null;
|
||||
metadata: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export interface ValidationSummary {
|
||||
snapshot: ModelMetricSnapshot | null;
|
||||
gate_status: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export interface CalibrationBucket {
|
||||
bucket_low: number;
|
||||
bucket_high: number;
|
||||
avg_confidence: number;
|
||||
observed_win_rate: number;
|
||||
prediction_count: number;
|
||||
miscalibrated: boolean;
|
||||
}
|
||||
|
||||
export interface ValidationCalibration {
|
||||
buckets: CalibrationBucket[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export interface ICByHorizonEntry {
|
||||
horizon: string;
|
||||
information_coefficient: number | null;
|
||||
rank_information_coefficient: number | null;
|
||||
prediction_count: number;
|
||||
generated_at: string | null;
|
||||
}
|
||||
|
||||
export interface ValidationICByHorizon {
|
||||
horizons: ICByHorizonEntry[];
|
||||
lookback: string;
|
||||
}
|
||||
|
||||
export interface ValidationGateStatus {
|
||||
gate_status: Record<string, unknown> | null;
|
||||
updated_at?: string | null;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export function useValidationSummary(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/summary${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ValidationSummary>(['validation-summary', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationCalibration(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/calibration${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ValidationCalibration>(['validation-calibration', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationICByHorizon(lookback = '30d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
const path = `/api/validation/ic-by-horizon${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ValidationICByHorizon>(['validation-ic-by-horizon', lookback], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationGateStatus() {
|
||||
return useGet<ValidationGateStatus>(['validation-gate-status'], 'query', '/api/validation/gate-status');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Validation: Attribution — Sources, Catalysts, Layers (Requirements 12.4, 12.5, 12.6)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface SourceAttribution {
|
||||
source: string;
|
||||
source_type: string;
|
||||
prediction_count: number;
|
||||
avg_weight: number;
|
||||
avg_contribution_score: number;
|
||||
win_rate: number;
|
||||
avg_future_return: number;
|
||||
avg_excess_return_vs_spy: number;
|
||||
information_coefficient: number | null;
|
||||
duplicate_rate: number;
|
||||
}
|
||||
|
||||
export interface SourceAttributionResponse {
|
||||
sources: SourceAttribution[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export interface CatalystAttribution {
|
||||
catalyst_type: string;
|
||||
prediction_count: number;
|
||||
win_rate: number;
|
||||
avg_future_return: number;
|
||||
avg_excess_return_vs_spy: number;
|
||||
information_coefficient: number | null;
|
||||
}
|
||||
|
||||
export interface CatalystAttributionResponse {
|
||||
catalysts: CatalystAttribution[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export interface LayerAttribution {
|
||||
layer: string;
|
||||
avg_contribution_pct: number;
|
||||
dominant_win_rate: number;
|
||||
dominant_ic: number | null;
|
||||
}
|
||||
|
||||
export interface LayerAttributionResponse {
|
||||
layers: LayerAttribution[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export function useValidationAttributionSources(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/attribution/sources${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<SourceAttributionResponse>(['validation-attribution-sources', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationAttributionCatalysts(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/attribution/catalysts${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<CatalystAttributionResponse>(['validation-attribution-catalysts', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationAttributionLayers(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/attribution/layers${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<LayerAttributionResponse>(['validation-attribution-layers', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
@@ -1,9 +1,89 @@
|
||||
import { useState } from 'react';
|
||||
import { useModelPerformance, useModelFailures } from '../api/hooks';
|
||||
import {
|
||||
useModelPerformance,
|
||||
useModelFailures,
|
||||
useValidationSummary,
|
||||
useValidationCalibration,
|
||||
useValidationICByHorizon,
|
||||
useValidationGateStatus,
|
||||
useValidationAttributionSources,
|
||||
useValidationAttributionCatalysts,
|
||||
useValidationAttributionLayers,
|
||||
} from '../api/hooks';
|
||||
import type {
|
||||
ValidationSummary,
|
||||
ValidationCalibration,
|
||||
CalibrationBucket,
|
||||
ValidationICByHorizon,
|
||||
ICByHorizonEntry,
|
||||
ValidationGateStatus,
|
||||
SourceAttributionResponse,
|
||||
CatalystAttributionResponse,
|
||||
LayerAttributionResponse,
|
||||
SourceAttribution,
|
||||
CatalystAttribution,
|
||||
LayerAttribution,
|
||||
} from '../api/hooks';
|
||||
import { LoadingSpinner, DateRangeSelector, StatusBadge, Card } from '../components/ui';
|
||||
import { AlertTriangle, ShieldCheck, ShieldX } from 'lucide-react';
|
||||
|
||||
type Tab = 'extraction' | 'validation';
|
||||
|
||||
export function OpsModelPage() {
|
||||
const [hours, setHours] = useState(24);
|
||||
const [activeTab, setActiveTab] = useState<Tab>('extraction');
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
|
||||
{activeTab === 'extraction' && (
|
||||
<DateRangeSelector value={hours} onChange={setHours} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Tab bar */}
|
||||
<div className="flex border-b border-surface-700" role="tablist" aria-label="Model performance tabs">
|
||||
<button
|
||||
role="tab"
|
||||
aria-selected={activeTab === 'extraction'}
|
||||
onClick={() => setActiveTab('extraction')}
|
||||
className={`px-4 py-2 text-sm font-medium transition-colors ${
|
||||
activeTab === 'extraction'
|
||||
? 'border-b-2 border-brand-500 text-brand-400'
|
||||
: 'text-gray-400 hover:text-gray-200'
|
||||
}`}
|
||||
>
|
||||
Extraction Performance
|
||||
</button>
|
||||
<button
|
||||
role="tab"
|
||||
aria-selected={activeTab === 'validation'}
|
||||
onClick={() => setActiveTab('validation')}
|
||||
className={`px-4 py-2 text-sm font-medium transition-colors ${
|
||||
activeTab === 'validation'
|
||||
? 'border-b-2 border-brand-500 text-brand-400'
|
||||
: 'text-gray-400 hover:text-gray-200'
|
||||
}`}
|
||||
>
|
||||
Model Validation
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{activeTab === 'extraction' ? (
|
||||
<ExtractionTab hours={hours} />
|
||||
) : (
|
||||
<ValidationTab />
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Extraction Performance Tab (existing content) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function ExtractionTab({ hours }: { hours: number }) {
|
||||
const { data: perf, isLoading } = useModelPerformance(hours);
|
||||
const { data: failures } = useModelFailures(hours);
|
||||
|
||||
@@ -13,11 +93,6 @@ export function OpsModelPage() {
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
|
||||
<DateRangeSelector value={hours} onChange={setHours} />
|
||||
</div>
|
||||
|
||||
{/* Key metrics */}
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-5">
|
||||
<StatCard label="Total Extractions" value={String(p.total_extractions ?? '—')} />
|
||||
@@ -63,6 +138,482 @@ export function OpsModelPage() {
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Model Validation Tab (new) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function ValidationTab() {
|
||||
const { data: summary, isLoading: summaryLoading, error: summaryError } = useValidationSummary();
|
||||
const { data: calibration, isLoading: calLoading, error: calError } = useValidationCalibration();
|
||||
const { data: icData, isLoading: icLoading, error: icError } = useValidationICByHorizon();
|
||||
const { data: gateData, isLoading: gateLoading, error: gateError } = useValidationGateStatus();
|
||||
const { data: sourcesData, isLoading: srcLoading, error: srcError } = useValidationAttributionSources();
|
||||
const { data: catalystsData, isLoading: catLoading, error: catError } = useValidationAttributionCatalysts();
|
||||
const { data: layersData, isLoading: layLoading, error: layError } = useValidationAttributionLayers();
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Gate Status */}
|
||||
<GateStatusSection data={gateData} isLoading={gateLoading} error={gateError} />
|
||||
|
||||
{/* Summary Cards */}
|
||||
<SummaryCardsSection data={summary} isLoading={summaryLoading} error={summaryError} />
|
||||
|
||||
{/* Calibration Table */}
|
||||
<CalibrationTableSection data={calibration} isLoading={calLoading} error={calError} />
|
||||
|
||||
{/* IC by Horizon Table */}
|
||||
<ICByHorizonSection data={icData} isLoading={icLoading} error={icError} />
|
||||
|
||||
{/* Source Attribution Table */}
|
||||
<SourceAttributionSection data={sourcesData} isLoading={srcLoading} error={srcError} />
|
||||
|
||||
{/* Catalyst Attribution Table */}
|
||||
<CatalystAttributionSection data={catalystsData} isLoading={catLoading} error={catError} />
|
||||
|
||||
{/* Layer Attribution Table */}
|
||||
<LayerAttributionSection data={layersData} isLoading={layLoading} error={layError} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Gate Status Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function GateStatusSection({ data, isLoading, error }: {
|
||||
data: ValidationGateStatus | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load gate status" />;
|
||||
|
||||
const gate = data?.gate_status as Record<string, unknown> | null;
|
||||
if (!gate) {
|
||||
return (
|
||||
<Card className="flex items-center gap-3">
|
||||
<ShieldX size={20} className="text-yellow-400" />
|
||||
<div>
|
||||
<div className="text-sm font-medium text-yellow-400">Gate Status Unknown</div>
|
||||
<div className="text-xs text-gray-500">No gate evaluation data available</div>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
const passed = gate.passed as boolean | undefined;
|
||||
const reason = gate.reason as string | undefined;
|
||||
const thresholds = gate.threshold_results as Array<Record<string, unknown>> | undefined;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<div className="mb-3 flex items-center gap-3">
|
||||
{passed ? (
|
||||
<ShieldCheck size={20} className="text-green-400" />
|
||||
) : (
|
||||
<ShieldX size={20} className="text-red-400" />
|
||||
)}
|
||||
<div>
|
||||
<div className={`text-sm font-medium ${passed ? 'text-green-400' : 'text-red-400'}`}>
|
||||
Live Trading Gate: {passed ? 'PASS' : 'FAIL'}
|
||||
</div>
|
||||
{reason && <div className="text-xs text-gray-500">{reason}</div>}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{thresholds && thresholds.length > 0 && (
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Threshold</th>
|
||||
<th className="pb-2 pr-4 font-medium">Required</th>
|
||||
<th className="pb-2 pr-4 font-medium">Actual</th>
|
||||
<th className="pb-2 font-medium">Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{thresholds.map((t, i) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300">{String(t.name ?? '')}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-400">{fmtThreshold(t.threshold)}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtThreshold(t.actual)}</td>
|
||||
<td className="py-1.5">
|
||||
<StatusBadge status={t.passed ? 'success' : 'failed'} />
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)}
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Summary Cards Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function SummaryCardsSection({ data, isLoading, error }: {
|
||||
data: ValidationSummary | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load validation summary" />;
|
||||
|
||||
const snap = data?.snapshot;
|
||||
if (!snap) {
|
||||
return (
|
||||
<Card>
|
||||
<p className="text-sm text-gray-500">No validation data available yet. Metrics will appear once predictions have been evaluated.</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-3 lg:grid-cols-5">
|
||||
<StatCard label="Predictions" value={String(snap.prediction_count ?? '—')} />
|
||||
<StatCard
|
||||
label="Win Rate"
|
||||
value={fmtPct(snap.win_rate)}
|
||||
color={colorForRate(snap.win_rate, 0.53)}
|
||||
/>
|
||||
<StatCard
|
||||
label="Directional Accuracy"
|
||||
value={fmtPct(snap.directional_accuracy)}
|
||||
color={colorForRate(snap.directional_accuracy, 0.53)}
|
||||
/>
|
||||
<StatCard
|
||||
label="IC"
|
||||
value={fmtIC(snap.information_coefficient)}
|
||||
color={colorForIC(snap.information_coefficient)}
|
||||
/>
|
||||
<StatCard
|
||||
label="Rank IC"
|
||||
value={fmtIC(snap.rank_information_coefficient)}
|
||||
color={colorForIC(snap.rank_information_coefficient)}
|
||||
/>
|
||||
<StatCard
|
||||
label="Brier Score"
|
||||
value={snap.brier_score != null ? snap.brier_score.toFixed(4) : '—'}
|
||||
color={snap.brier_score != null && snap.brier_score < 0.25 ? 'text-green-400' : 'text-gray-100'}
|
||||
/>
|
||||
<StatCard
|
||||
label="ECE"
|
||||
value={snap.calibration_error != null ? snap.calibration_error.toFixed(4) : '—'}
|
||||
color={snap.calibration_error != null && snap.calibration_error < 0.15 ? 'text-green-400' : 'text-yellow-400'}
|
||||
/>
|
||||
<StatCard
|
||||
label="Excess vs SPY"
|
||||
value={fmtPct(snap.avg_excess_return_vs_spy)}
|
||||
color={snap.avg_excess_return_vs_spy != null && snap.avg_excess_return_vs_spy > 0 ? 'text-green-400' : 'text-red-400'}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Calibration Table Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function CalibrationTableSection({ data, isLoading, error }: {
|
||||
data: ValidationCalibration | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load calibration data" />;
|
||||
|
||||
const buckets = data?.buckets;
|
||||
if (!buckets || buckets.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Calibration</h2>
|
||||
<p className="text-sm text-gray-500">No calibration data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Calibration by Confidence Bucket</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Bucket</th>
|
||||
<th className="pb-2 pr-4 font-medium">Avg Confidence</th>
|
||||
<th className="pb-2 pr-4 font-medium">Observed Win Rate</th>
|
||||
<th className="pb-2 pr-4 font-medium">Count</th>
|
||||
<th className="pb-2 font-medium">Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{buckets.map((b: CalibrationBucket, i: number) => (
|
||||
<CalibrationRow key={i} bucket={b} />
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function CalibrationRow({ bucket }: { bucket: CalibrationBucket }) {
|
||||
const isMiscalibrated = bucket.miscalibrated ||
|
||||
Math.abs(bucket.avg_confidence - bucket.observed_win_rate) > 0.15;
|
||||
|
||||
return (
|
||||
<tr className={`border-b border-surface-800 ${isMiscalibrated ? 'bg-amber-900/20' : ''}`}>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">
|
||||
[{fmtPctShort(bucket.bucket_low)}, {fmtPctShort(bucket.bucket_high)})
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.avg_confidence)}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.observed_win_rate)}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-400">{bucket.prediction_count}</td>
|
||||
<td className="py-1.5">
|
||||
{isMiscalibrated ? (
|
||||
<span className="inline-flex items-center gap-1 text-amber-400">
|
||||
<AlertTriangle size={14} />
|
||||
<span>Miscalibrated</span>
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-green-400">OK</span>
|
||||
)}
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* IC by Horizon Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function ICByHorizonSection({ data, isLoading, error }: {
|
||||
data: ValidationICByHorizon | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load IC by horizon data" />;
|
||||
|
||||
const horizons = data?.horizons;
|
||||
if (!horizons || horizons.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">IC by Horizon</h2>
|
||||
<p className="text-sm text-gray-500">No IC data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Information Coefficient by Horizon</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Horizon</th>
|
||||
<th className="pb-2 pr-4 font-medium">IC</th>
|
||||
<th className="pb-2 pr-4 font-medium">Rank IC</th>
|
||||
<th className="pb-2 font-medium">Predictions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{horizons.map((h: ICByHorizonEntry, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{h.horizon}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForIC(h.information_coefficient)}`}>
|
||||
{fmtIC(h.information_coefficient)}
|
||||
</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForIC(h.rank_information_coefficient)}`}>
|
||||
{fmtIC(h.rank_information_coefficient)}
|
||||
</td>
|
||||
<td className="py-1.5 font-mono text-gray-400">{h.prediction_count}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Source Attribution Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function SourceAttributionSection({ data, isLoading, error }: {
|
||||
data: SourceAttributionResponse | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load source attribution data" />;
|
||||
|
||||
const sources = data?.sources;
|
||||
if (!sources || sources.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Source Performance</h2>
|
||||
<p className="text-sm text-gray-500">No source attribution data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Source Performance</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Source</th>
|
||||
<th className="pb-2 pr-4 font-medium">Win Rate</th>
|
||||
<th className="pb-2 pr-4 font-medium">IC</th>
|
||||
<th className="pb-2 pr-4 font-medium">Avg Return</th>
|
||||
<th className="pb-2 font-medium">Duplicate Rate</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sources.map((s: SourceAttribution, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300">{s.source}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForRate(s.win_rate, 0.53)}`}>
|
||||
{fmtPct(s.win_rate)}
|
||||
</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForIC(s.information_coefficient)}`}>
|
||||
{fmtIC(s.information_coefficient)}
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(s.avg_future_return)}</td>
|
||||
<td className="py-1.5 font-mono text-gray-300">{fmtPct(s.duplicate_rate)}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Catalyst Attribution Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function CatalystAttributionSection({ data, isLoading, error }: {
|
||||
data: CatalystAttributionResponse | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load catalyst attribution data" />;
|
||||
|
||||
const catalysts = data?.catalysts;
|
||||
if (!catalysts || catalysts.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
|
||||
<p className="text-sm text-gray-500">No catalyst attribution data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Catalyst Type</th>
|
||||
<th className="pb-2 pr-4 font-medium">Win Rate</th>
|
||||
<th className="pb-2 pr-4 font-medium">Avg Return</th>
|
||||
<th className="pb-2 font-medium">IC</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{catalysts.map((c: CatalystAttribution, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300">{c.catalyst_type}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForRate(c.win_rate, 0.53)}`}>
|
||||
{fmtPct(c.win_rate)}
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(c.avg_future_return)}</td>
|
||||
<td className={`py-1.5 font-mono ${colorForIC(c.information_coefficient)}`}>
|
||||
{fmtIC(c.information_coefficient)}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Layer Attribution Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function LayerAttributionSection({ data, isLoading, error }: {
|
||||
data: LayerAttributionResponse | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load layer attribution data" />;
|
||||
|
||||
const layers = data?.layers;
|
||||
if (!layers || layers.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Layer Attribution</h2>
|
||||
<p className="text-sm text-gray-500">No layer attribution data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Layer Attribution</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Layer</th>
|
||||
<th className="pb-2 pr-4 font-medium">Contribution %</th>
|
||||
<th className="pb-2 pr-4 font-medium">Dominant Win Rate</th>
|
||||
<th className="pb-2 font-medium">IC</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{layers.map((l: LayerAttribution, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300 capitalize">{l.layer}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(l.avg_contribution_pct)}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForRate(l.dominant_win_rate, 0.53)}`}>
|
||||
{fmtPct(l.dominant_win_rate)}
|
||||
</td>
|
||||
<td className={`py-1.5 font-mono ${colorForIC(l.dominant_ic)}`}>
|
||||
{fmtIC(l.dominant_ic)}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Shared helpers */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function StatCard({ label, value, color = 'text-gray-100' }: { label: string; value: string; color?: string }) {
|
||||
return (
|
||||
<Card className="text-center">
|
||||
@@ -71,3 +622,53 @@ function StatCard({ label, value, color = 'text-gray-100' }: { label: string; va
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function ErrorCard({ message }: { message: string }) {
|
||||
return (
|
||||
<Card className="border-red-700/50 bg-red-900/20">
|
||||
<p className="text-sm text-red-400">{message}</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/** Format a float as percentage with 1 decimal place, or '—' if null */
|
||||
function fmtPct(v: number | null | undefined): string {
|
||||
if (v == null) return '—';
|
||||
return `${(v * 100).toFixed(1)}%`;
|
||||
}
|
||||
|
||||
/** Format a float as short percentage (no decimal) for bucket display */
|
||||
function fmtPctShort(v: number | null | undefined): string {
|
||||
if (v == null) return '—';
|
||||
return `${(v * 100).toFixed(0)}%`;
|
||||
}
|
||||
|
||||
/** Format IC value with 4 decimal places, or '—' if null */
|
||||
function fmtIC(v: number | null | undefined): string {
|
||||
if (v == null) return '—';
|
||||
return v.toFixed(4);
|
||||
}
|
||||
|
||||
/** Format a threshold value for display */
|
||||
function fmtThreshold(v: unknown): string {
|
||||
if (v == null) return '—';
|
||||
if (typeof v === 'number') {
|
||||
if (Number.isInteger(v)) return String(v);
|
||||
return v.toFixed(4);
|
||||
}
|
||||
return String(v);
|
||||
}
|
||||
|
||||
/** Color for win rate / accuracy — green if above threshold, red otherwise */
|
||||
function colorForRate(v: number | null | undefined, threshold: number): string {
|
||||
if (v == null) return 'text-gray-100';
|
||||
return v >= threshold ? 'text-green-400' : 'text-red-400';
|
||||
}
|
||||
|
||||
/** Color for IC — green if positive, red if negative, gray if null */
|
||||
function colorForIC(v: number | null | undefined): string {
|
||||
if (v == null) return 'text-gray-400';
|
||||
if (v >= 0.03) return 'text-green-400';
|
||||
if (v > 0) return 'text-yellow-400';
|
||||
return 'text-red-400';
|
||||
}
|
||||
|
||||
@@ -1,13 +1,92 @@
|
||||
/**
|
||||
* Recommendation detail page with validation context.
|
||||
*
|
||||
* Shows original confidence alongside calibrated confidence (historical win rate),
|
||||
* evidence quality indicators, source reliability, and live eligibility status.
|
||||
*
|
||||
* Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7
|
||||
*/
|
||||
import { useParams, Link } from '@tanstack/react-router';
|
||||
import { useRecommendation } from '../api/hooks';
|
||||
import { AlertTriangle, ShieldCheck, ShieldX, Info } from 'lucide-react';
|
||||
import {
|
||||
useRecommendation,
|
||||
useValidationCalibration,
|
||||
useValidationGateStatus,
|
||||
useValidationAttributionSources,
|
||||
} from '../api/hooks';
|
||||
import { StatusBadge, ConfidenceBar, LoadingSpinner, Card } from '../components/ui';
|
||||
|
||||
export function RecommendationDetailPage() {
|
||||
const { id } = useParams({ from: '/recommendations/$id' });
|
||||
const { data: rec, isLoading } = useRecommendation(id);
|
||||
const { data: calibration } = useValidationCalibration();
|
||||
const { data: gateData } = useValidationGateStatus();
|
||||
const { data: sourcesData } = useValidationAttributionSources();
|
||||
|
||||
if (isLoading || !rec) return <LoadingSpinner />;
|
||||
|
||||
// --- Calibration: find the bucket matching this recommendation's confidence ---
|
||||
const matchingBucket = calibration?.buckets?.find(
|
||||
(b) => rec.confidence >= b.bucket_low && rec.confidence < b.bucket_high,
|
||||
);
|
||||
// Handle edge case: confidence of exactly 1.0 falls in the last bucket [0.90, 1.00]
|
||||
const calibratedBucket =
|
||||
matchingBucket ??
|
||||
(rec.confidence >= 1.0
|
||||
? calibration?.buckets?.find((b) => b.bucket_high >= 1.0)
|
||||
: undefined);
|
||||
|
||||
const historicalWinRate = calibratedBucket?.observed_win_rate;
|
||||
|
||||
// --- Evidence counts ---
|
||||
const totalEvidenceCount = rec.evidence.length;
|
||||
// Compute duplicate evidence: group by normalized title, count extras
|
||||
const titleCounts = new Map<string, number>();
|
||||
for (const ev of rec.evidence) {
|
||||
const key = (ev.title ?? '').toLowerCase().trim();
|
||||
titleCounts.set(key, (titleCounts.get(key) ?? 0) + 1);
|
||||
}
|
||||
let duplicateEvidenceCount = 0;
|
||||
for (const count of titleCounts.values()) {
|
||||
if (count > 1) duplicateEvidenceCount += count - 1;
|
||||
}
|
||||
const uniqueEvidenceCount = totalEvidenceCount - duplicateEvidenceCount;
|
||||
const duplicateRatio = totalEvidenceCount > 0 ? duplicateEvidenceCount / totalEvidenceCount : 0;
|
||||
const hasDuplicateWarning = duplicateRatio > 0.2;
|
||||
|
||||
// --- Source reliability: find primary contributing sources ---
|
||||
const evidenceSources = new Map<string, number>();
|
||||
for (const ev of rec.evidence) {
|
||||
const src = ev.source_type ?? ev.publisher ?? 'unknown';
|
||||
evidenceSources.set(src, (evidenceSources.get(src) ?? 0) + ev.weight);
|
||||
}
|
||||
// Sort by total weight descending to find primary source
|
||||
const sortedSources = [...evidenceSources.entries()].sort((a, b) => b[1] - a[1]);
|
||||
const primarySourceType = sortedSources[0]?.[0];
|
||||
|
||||
// Look up source reliability from attribution data
|
||||
const primarySourceAttribution = sourcesData?.sources?.find(
|
||||
(s) => s.source_type === primarySourceType || s.source === primarySourceType,
|
||||
);
|
||||
// Source reliability is approximated from win_rate via Bayesian shrinkage
|
||||
// The attribution data has win_rate which is the observed metric
|
||||
const primarySourceWinRate = primarySourceAttribution?.win_rate;
|
||||
// Bayesian shrinkage: reliability = 0.5 + (n/(n+30)) * (win_rate - 0.5)
|
||||
const primarySourceCount = primarySourceAttribution?.prediction_count ?? 0;
|
||||
const primarySourceReliability =
|
||||
primarySourceWinRate != null
|
||||
? 0.5 + (primarySourceCount / (primarySourceCount + 30)) * (primarySourceWinRate - 0.5)
|
||||
: undefined;
|
||||
const hasLowReliabilityWarning =
|
||||
primarySourceReliability != null && primarySourceReliability < 0.4;
|
||||
|
||||
// --- Gate status ---
|
||||
const gateStatus = gateData?.gate_status as {
|
||||
passed?: boolean;
|
||||
reason?: string;
|
||||
threshold_results?: Array<{ name: string; threshold: number; actual: number; passed: boolean }>;
|
||||
} | null;
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="flex items-center gap-3">
|
||||
@@ -28,6 +107,137 @@ export function RecommendationDetailPage() {
|
||||
</dl>
|
||||
</Card>
|
||||
|
||||
{/* Validation Context Card — Requirements 13.1–13.7 */}
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Validation Context</h2>
|
||||
<dl className="grid grid-cols-2 gap-x-8 gap-y-3 text-sm sm:grid-cols-3">
|
||||
{/* 13.1: Original confidence alongside calibrated confidence */}
|
||||
<div>
|
||||
<dt className="text-gray-500">Original Confidence</dt>
|
||||
<dd className="text-gray-200">{(rec.confidence * 100).toFixed(1)}%</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Calibrated Confidence</dt>
|
||||
<dd className="text-gray-200">
|
||||
{historicalWinRate != null
|
||||
? `${(historicalWinRate * 100).toFixed(1)}%`
|
||||
: 'N/A'}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.2: Historical win rate for similar confidence levels */}
|
||||
<div>
|
||||
<dt className="text-gray-500">Historical Win Rate</dt>
|
||||
<dd className="text-gray-200">
|
||||
{historicalWinRate != null ? (
|
||||
<span>
|
||||
{(historicalWinRate * 100).toFixed(1)}%
|
||||
{calibratedBucket && (
|
||||
<span className="ml-1 text-xs text-gray-500">
|
||||
({calibratedBucket.prediction_count} predictions)
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
) : (
|
||||
'N/A'
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.3: Evidence count, unique evidence count, duplicate evidence count */}
|
||||
<div>
|
||||
<dt className="text-gray-500">Evidence Count</dt>
|
||||
<dd className="text-gray-200">{totalEvidenceCount}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Unique Evidence</dt>
|
||||
<dd className="text-gray-200">{uniqueEvidenceCount}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="flex items-center gap-1 text-gray-500">
|
||||
Duplicate Evidence
|
||||
{/* 13.6: Warning badge when duplicate evidence count > 20% of total */}
|
||||
{hasDuplicateWarning && (
|
||||
<span
|
||||
className="inline-flex items-center gap-0.5 rounded-full border border-yellow-700/50 bg-yellow-900/40 px-1.5 py-0.5 text-[10px] font-medium text-yellow-400"
|
||||
title="Duplicate evidence exceeds 20% of total — potential evidence inflation"
|
||||
>
|
||||
<AlertTriangle size={10} />
|
||||
>20%
|
||||
</span>
|
||||
)}
|
||||
</dt>
|
||||
<dd className="text-gray-200">
|
||||
{duplicateEvidenceCount}
|
||||
{totalEvidenceCount > 0 && (
|
||||
<span className="ml-1 text-xs text-gray-500">
|
||||
({(duplicateRatio * 100).toFixed(0)}%)
|
||||
</span>
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.4: Source reliability indicator */}
|
||||
<div>
|
||||
<dt className="flex items-center gap-1 text-gray-500">
|
||||
Primary Source Reliability
|
||||
{/* 13.7: Warning badge when primary source reliability < 0.4 */}
|
||||
{hasLowReliabilityWarning && (
|
||||
<span
|
||||
className="inline-flex items-center gap-0.5 rounded-full border border-red-700/50 bg-red-900/40 px-1.5 py-0.5 text-[10px] font-medium text-red-400"
|
||||
title="Primary source reliability is below 0.4 — low or unknown reliability"
|
||||
>
|
||||
<AlertTriangle size={10} />
|
||||
Low
|
||||
</span>
|
||||
)}
|
||||
</dt>
|
||||
<dd className="text-gray-200">
|
||||
{primarySourceReliability != null ? (
|
||||
<span>
|
||||
{primarySourceReliability.toFixed(3)}
|
||||
{primarySourceType && (
|
||||
<span className="ml-1 text-xs text-gray-500">({primarySourceType})</span>
|
||||
)}
|
||||
</span>
|
||||
) : (
|
||||
'N/A'
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.5: Live eligibility status with reason */}
|
||||
<div className="col-span-2">
|
||||
<dt className="text-gray-500">Live Eligibility</dt>
|
||||
<dd>
|
||||
{gateStatus != null ? (
|
||||
<div className="flex items-center gap-2">
|
||||
{gateStatus.passed ? (
|
||||
<span className="inline-flex items-center gap-1 text-green-400">
|
||||
<ShieldCheck size={14} />
|
||||
Gate Passed
|
||||
</span>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1 text-red-400">
|
||||
<ShieldX size={14} />
|
||||
Gate Failed
|
||||
</span>
|
||||
)}
|
||||
{gateStatus.reason && (
|
||||
<span className="text-xs text-gray-500">{gateStatus.reason}</span>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1 text-gray-500">
|
||||
<Info size={14} />
|
||||
N/A — no gate evaluation available
|
||||
</span>
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
</dl>
|
||||
</Card>
|
||||
|
||||
{rec.thesis && (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Thesis</h2>
|
||||
|
||||
@@ -73,6 +73,97 @@ export const mockVariantPerfHistory = [
|
||||
{ hour: '2026-04-10T11:00:00Z', invocations: 12, successes: 11, avg_duration_ms: 1300, avg_confidence: 0.82 },
|
||||
];
|
||||
|
||||
// Validation: Model Quality & Calibration mock data
|
||||
export const mockValidationSummary = {
|
||||
snapshot: {
|
||||
id: 'ms-1',
|
||||
generated_at: '2026-04-11T12:00:00Z',
|
||||
lookback_window: '30d',
|
||||
horizon: '7d',
|
||||
prediction_count: 150,
|
||||
win_rate: 0.58,
|
||||
directional_accuracy: 0.56,
|
||||
information_coefficient: 0.045,
|
||||
rank_information_coefficient: 0.038,
|
||||
avg_return: 0.012,
|
||||
avg_excess_return_vs_spy: 0.003,
|
||||
avg_excess_return_vs_sector: 0.002,
|
||||
calibration_error: 0.08,
|
||||
brier_score: 0.21,
|
||||
buy_win_rate: 0.61,
|
||||
sell_win_rate: 0.54,
|
||||
hold_win_rate: 0.50,
|
||||
metadata: {},
|
||||
},
|
||||
gate_status: {
|
||||
passed: true,
|
||||
reason: 'all thresholds met',
|
||||
threshold_results: [
|
||||
{ name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
|
||||
{ name: 'min_ic', threshold: 0.03, actual: 0.045, passed: true },
|
||||
{ name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
export const mockValidationCalibration = {
|
||||
buckets: [
|
||||
{ bucket_low: 0.50, bucket_high: 0.60, avg_confidence: 0.55, observed_win_rate: 0.52, prediction_count: 30, miscalibrated: false },
|
||||
{ bucket_low: 0.60, bucket_high: 0.70, avg_confidence: 0.65, observed_win_rate: 0.58, prediction_count: 40, miscalibrated: false },
|
||||
{ bucket_low: 0.70, bucket_high: 0.80, avg_confidence: 0.75, observed_win_rate: 0.55, prediction_count: 35, miscalibrated: true },
|
||||
{ bucket_low: 0.80, bucket_high: 0.90, avg_confidence: 0.85, observed_win_rate: 0.70, prediction_count: 25, miscalibrated: false },
|
||||
{ bucket_low: 0.90, bucket_high: 1.00, avg_confidence: 0.95, observed_win_rate: 0.72, prediction_count: 20, miscalibrated: true },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const mockValidationGateStatus = {
|
||||
gate_status: {
|
||||
passed: false,
|
||||
reason: 'failed: min_ic below threshold',
|
||||
threshold_results: [
|
||||
{ name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
|
||||
{ name: 'min_ic', threshold: 0.03, actual: 0.02, passed: false },
|
||||
{ name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
export const mockValidationICByHorizon = {
|
||||
horizons: [
|
||||
{ horizon: '1h', information_coefficient: 0.02, rank_information_coefficient: 0.015, prediction_count: 120, generated_at: '2026-04-11T12:00:00Z' },
|
||||
{ horizon: '7d', information_coefficient: 0.045, rank_information_coefficient: 0.038, prediction_count: 100, generated_at: '2026-04-11T12:00:00Z' },
|
||||
],
|
||||
lookback: '30d',
|
||||
};
|
||||
|
||||
export const mockValidationAttributionSources = {
|
||||
sources: [
|
||||
{ source: 'Reuters', source_type: 'news_api', prediction_count: 50, avg_weight: 0.6, avg_contribution_score: 0.3, win_rate: 0.62, avg_future_return: 0.015, avg_excess_return_vs_spy: 0.005, information_coefficient: 0.05, duplicate_rate: 0.1 },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const mockValidationAttributionCatalysts = {
|
||||
catalysts: [
|
||||
{ catalyst_type: 'earnings', prediction_count: 40, win_rate: 0.65, avg_future_return: 0.02, avg_excess_return_vs_spy: 0.008, information_coefficient: 0.06 },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const mockValidationAttributionLayers = {
|
||||
layers: [
|
||||
{ layer: 'company', avg_contribution_pct: 0.55, dominant_win_rate: 0.60, dominant_ic: 0.04 },
|
||||
{ layer: 'macro', avg_contribution_pct: 0.30, dominant_win_rate: 0.52, dominant_ic: 0.02 },
|
||||
{ layer: 'competitive', avg_contribution_pct: 0.15, dominant_win_rate: 0.48, dominant_ic: null },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const handlers = [
|
||||
// Query API (proxied at /api/)
|
||||
http.get('/api/companies', () => HttpResponse.json(mockCompanies)),
|
||||
@@ -242,4 +333,13 @@ export const handlers = [
|
||||
const body = await request.json() as Record<string, unknown>;
|
||||
return HttpResponse.json({ enabled: body.enabled, previous_enabled: true, toggled_by: 'operator' });
|
||||
}),
|
||||
|
||||
// Validation: Model Quality & Calibration endpoints
|
||||
http.get('/api/validation/summary', () => HttpResponse.json(mockValidationSummary)),
|
||||
http.get('/api/validation/calibration', () => HttpResponse.json(mockValidationCalibration)),
|
||||
http.get('/api/validation/gate-status', () => HttpResponse.json(mockValidationGateStatus)),
|
||||
http.get('/api/validation/ic-by-horizon', () => HttpResponse.json(mockValidationICByHorizon)),
|
||||
http.get('/api/validation/attribution/sources', () => HttpResponse.json(mockValidationAttributionSources)),
|
||||
http.get('/api/validation/attribution/catalysts', () => HttpResponse.json(mockValidationAttributionCatalysts)),
|
||||
http.get('/api/validation/attribution/layers', () => HttpResponse.json(mockValidationAttributionLayers)),
|
||||
];
|
||||
|
||||
@@ -169,6 +169,55 @@ describe('Global Events page', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('OpsModel validation tab', () => {
|
||||
it('renders Model Validation tab with summary cards', async () => {
|
||||
renderRoute('/ops/model');
|
||||
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
|
||||
|
||||
// The tab buttons should be present
|
||||
expect(screen.getByText('Extraction Performance')).toBeInTheDocument();
|
||||
expect(screen.getByText('Model Validation')).toBeInTheDocument();
|
||||
|
||||
// Click the Model Validation tab button
|
||||
await userEvent.click(screen.getByText('Model Validation'));
|
||||
|
||||
// Summary cards should render key metric labels unique to the validation summary
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('Brier Score')).toBeInTheDocument();
|
||||
expect(screen.getByText('ECE')).toBeInTheDocument();
|
||||
expect(screen.getByText('Directional Accuracy')).toBeInTheDocument();
|
||||
expect(screen.getByText('Excess vs SPY')).toBeInTheDocument();
|
||||
});
|
||||
}, 10000);
|
||||
|
||||
it('renders calibration table with miscalibration warning', async () => {
|
||||
renderRoute('/ops/model');
|
||||
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
|
||||
|
||||
await userEvent.click(screen.getByText('Model Validation'));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('Calibration by Confidence Bucket')).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Miscalibrated buckets should show warning text
|
||||
const miscalWarnings = screen.getAllByText('Miscalibrated');
|
||||
expect(miscalWarnings.length).toBeGreaterThanOrEqual(1);
|
||||
}, 10000);
|
||||
|
||||
it('renders gate status pass/fail indicator', async () => {
|
||||
renderRoute('/ops/model');
|
||||
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
|
||||
|
||||
await userEvent.click(screen.getByText('Model Validation'));
|
||||
|
||||
// The gate-status endpoint returns passed: false
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/Live Trading Gate: FAIL/)).toBeInTheDocument();
|
||||
});
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe('Agents page', () => {
|
||||
it('renders agent list in sidebar', async () => {
|
||||
renderRoute('/agents');
|
||||
|
||||
Reference in New Issue
Block a user