Compare commits
45 Commits
v0.1.0
..
2f2a7665e7
| Author | SHA1 | Date | |
|---|---|---|---|
| 2f2a7665e7 | |||
| 7eaff3ae58 | |||
| f99fc7fdc1 | |||
| 23834a9333 | |||
| 485dd12024 | |||
| fe8debeb17 | |||
| 5ed9f001b1 | |||
| 18f857e1a3 | |||
| 61bcb5aa57 | |||
| f468e30af0 | |||
| 7e2343ec2c | |||
| 3a8c6f6c80 | |||
| 139fbd6342 | |||
| 63b62c5e9f | |||
| 3acfdfd11f | |||
| ab212db8be | |||
| 7102358f51 | |||
| bc077bfcc8 | |||
| 376fcb4bb4 | |||
| cc21fd9e8f | |||
| affb65d7f4 | |||
| 32535540fe | |||
| 751cce0509 | |||
| 0732894414 | |||
| d9110d03a6 | |||
| 76f6bd5677 | |||
| 32d290bea7 | |||
| 7fcc8a6c07 | |||
| 5d2ffd9163 | |||
| 6169efdc89 | |||
| c42f2223d8 | |||
| 1f08820f11 | |||
| 2f2ea65fb4 | |||
| 9a60ce127b | |||
| 414f476620 | |||
| a5f2bcde55 | |||
| 34ffdad00c | |||
| 601b85764b | |||
| 51b6f3d34a | |||
| bddaf44ffc | |||
| 5209cc522e | |||
| fa18b1a7c2 | |||
| 4b254adad2 | |||
| af2e554edd | |||
| facce5dbb5 |
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Minimal MCP server for OpenAI chat completions.
|
||||
* Accepts ANY model string (gpt-5.2, gpt-5.4, etc.) — no hardcoded enum.
|
||||
* Communicates over stdio using JSON-RPC (MCP protocol).
|
||||
*/
|
||||
|
||||
import { createInterface } from "readline";
|
||||
|
||||
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
||||
if (!OPENAI_API_KEY) {
|
||||
process.stderr.write("ERROR: OPENAI_API_KEY environment variable is required\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const SERVER_INFO = {
|
||||
name: "openai-chat",
|
||||
version: "1.0.0",
|
||||
};
|
||||
|
||||
const TOOLS = [
|
||||
{
|
||||
name: "openai_chat",
|
||||
description:
|
||||
"Send messages to OpenAI chat completions API. Supports all OpenAI models including GPT-5.x series.",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
model: {
|
||||
type: "string",
|
||||
description:
|
||||
"OpenAI model name (e.g. gpt-5.2, gpt-5.4, gpt-4o, etc.)",
|
||||
default: "gpt-5.2",
|
||||
},
|
||||
messages: {
|
||||
type: "array",
|
||||
description: "Array of chat messages",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
role: {
|
||||
type: "string",
|
||||
enum: ["system", "user", "assistant"],
|
||||
},
|
||||
content: { type: "string" },
|
||||
},
|
||||
required: ["role", "content"],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ["messages"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
async function callOpenAI(model, messages) {
|
||||
const resp = await fetch("https://api.openai.com/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${OPENAI_API_KEY}`,
|
||||
},
|
||||
body: JSON.stringify({ model, messages }),
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const errText = await resp.text();
|
||||
throw new Error(`OpenAI API error ${resp.status}: ${errText}`);
|
||||
}
|
||||
|
||||
const data = await resp.json();
|
||||
return data.choices?.[0]?.message?.content ?? "(no response)";
|
||||
}
|
||||
|
||||
function jsonRpcResponse(id, result) {
|
||||
return JSON.stringify({ jsonrpc: "2.0", id, result });
|
||||
}
|
||||
|
||||
function jsonRpcError(id, code, message) {
|
||||
return JSON.stringify({ jsonrpc: "2.0", id, error: { code, message } });
|
||||
}
|
||||
|
||||
async function handleRequest(req) {
|
||||
const { id, method, params } = req;
|
||||
|
||||
switch (method) {
|
||||
case "initialize":
|
||||
return jsonRpcResponse(id, {
|
||||
protocolVersion: "2024-11-05",
|
||||
capabilities: { tools: {} },
|
||||
serverInfo: SERVER_INFO,
|
||||
});
|
||||
|
||||
case "notifications/initialized":
|
||||
return null; // no response needed for notifications
|
||||
|
||||
case "tools/list":
|
||||
return jsonRpcResponse(id, { tools: TOOLS });
|
||||
|
||||
case "tools/call": {
|
||||
const toolName = params?.name;
|
||||
if (toolName !== "openai_chat") {
|
||||
return jsonRpcError(id, -32602, `Unknown tool: ${toolName}`);
|
||||
}
|
||||
const args = params?.arguments ?? {};
|
||||
const model = args.model || "gpt-5.2";
|
||||
const messages = args.messages || [];
|
||||
|
||||
if (!messages.length) {
|
||||
return jsonRpcError(id, -32602, "messages array is required");
|
||||
}
|
||||
|
||||
try {
|
||||
const content = await callOpenAI(model, messages);
|
||||
return jsonRpcResponse(id, {
|
||||
content: [{ type: "text", text: content }],
|
||||
});
|
||||
} catch (err) {
|
||||
return jsonRpcResponse(id, {
|
||||
content: [{ type: "text", text: `Error: ${err.message}` }],
|
||||
isError: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
case "ping":
|
||||
return jsonRpcResponse(id, {});
|
||||
|
||||
default:
|
||||
if (method?.startsWith("notifications/")) return null;
|
||||
return jsonRpcError(id, -32601, `Method not found: ${method}`);
|
||||
}
|
||||
}
|
||||
|
||||
// stdio transport
|
||||
const rl = createInterface({ input: process.stdin });
|
||||
|
||||
rl.on("line", async (line) => {
|
||||
try {
|
||||
const req = JSON.parse(line);
|
||||
const resp = await handleRequest(req);
|
||||
if (resp) {
|
||||
process.stdout.write(resp + "\n");
|
||||
}
|
||||
} catch (err) {
|
||||
process.stderr.write(`Parse error: ${err.message}\n`);
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1 @@
|
||||
{"specId": "d76705a8-fb91-4fce-b59e-c4b3b0dbbd83", "workflowType": "requirements-first", "specType": "feature"}
|
||||
@@ -0,0 +1,723 @@
|
||||
# Design Document — Dual-Pipeline Signal Engine
|
||||
|
||||
## Overview
|
||||
|
||||
The dual-pipeline signal engine is a new service at `services/signal_engine/` that runs as an independent Kubernetes deployment alongside the existing aggregation → recommendation pipeline. It implements a concurrent dual-pipeline architecture where both a heuristic (deterministic scoring) and probabilistic (Bayesian inference) pipeline evaluate the same normalized inputs per ticker per evaluation tick, producing independent BUY/WATCH/SKIP verdicts. A delta analyzer compares the two verdicts, and an output formatter assembles a structured `SignalOutput` contract published to the existing `trading_decisions` Redis queue.
|
||||
|
||||
The engine introduces several new components — Input Normalizer, Signal Library (Fibonacci, MA Stack, RSI, Cup & Handle, Elliott Wave), Multi-Timeframe Engine, Hard Filter Engine, Exit Engine, Delta Analyzer, and Output Formatter — while reusing existing infrastructure: `compute_signal_weight`, `compute_bayesian_posterior`, `classify_regime`, `WeightedSignal`, `BayesianPosterior`, and `RegimeClassification` from `services/aggregation/`.
|
||||
|
||||
The service is toggled via `dual_pipeline_enabled` in the `risk_configs` table (default: false, fail-safe). When disabled, the existing pipeline operates unchanged. When enabled, the signal engine runs alongside the existing pipeline with support for shadow mode (dual-pipeline output persisted but not forwarded to trading).
|
||||
|
||||
### Design Rationale
|
||||
|
||||
- **Separate service, not inline extension**: The signal engine has a fundamentally different evaluation cadence (multi-timeframe technical signals) and data flow (OHLCV bars, not document intelligence). Embedding it in the aggregation worker would couple two distinct concerns.
|
||||
- **Reuse existing math**: The Bayesian posterior, regime classification, and signal weighting functions are battle-tested. The probabilistic pipeline wraps them with regime-based priors and likelihood ratio accumulation rather than reimplementing.
|
||||
- **Concurrent pipelines via asyncio.gather**: Both pipelines share the same `NormalizedInput` reference and run concurrently. If one fails, the other completes normally with the failed pipeline producing a SKIP verdict.
|
||||
- **Signal clustering for correlation penalty**: The Bayesian pipeline groups signals into four clusters (momentum, structure, volatility, fundamentals) and applies exponential decay within each cluster to prevent likelihood ratio stacking inflation from correlated signals.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### High-Level Flow
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
A[Evaluation Tick<br/>Redis queue: signal_engine] --> B[Input Normalizer]
|
||||
B --> C[Hard Filter Engine]
|
||||
C -->|filtered out| D[SKIP verdict for both pipelines]
|
||||
C -->|passed| E[Signal Library]
|
||||
E --> F[Multi-Timeframe Engine]
|
||||
F --> G{asyncio.gather}
|
||||
G --> H[Heuristic Pipeline]
|
||||
G --> I[Probabilistic Pipeline]
|
||||
H --> J[Delta Analyzer]
|
||||
I --> J
|
||||
J --> K[Output Formatter]
|
||||
K --> L[SignalOutput]
|
||||
L --> M[Redis: trading_decisions queue]
|
||||
L --> N[PostgreSQL: signal_engine_outputs]
|
||||
|
||||
subgraph Exit Path
|
||||
B --> O[Exit Engine]
|
||||
O --> K
|
||||
end
|
||||
```
|
||||
|
||||
### Trigger Mechanism
|
||||
|
||||
The signal engine polls a new Redis queue `stonks:queue:signal_engine`. Evaluation ticks are enqueued by the scheduler service after aggregation completes for a ticker. The queue message contains `{"ticker": "AAPL", "triggered_at": "2024-01-15T10:00:00Z"}`.
|
||||
|
||||
### Integration Points
|
||||
|
||||
| Component | Integration | Direction |
|
||||
|---|---|---|
|
||||
| Scheduler | Enqueues ticks to `signal_engine` queue | Scheduler → Signal Engine |
|
||||
| Market data tables | OHLCV bars, closing prices, returns | Signal Engine reads |
|
||||
| `macro_impact_records` | Macro bias computation | Signal Engine reads |
|
||||
| `trend_windows` | Fundamental/valuation context | Signal Engine reads |
|
||||
| `risk_configs` | Feature flags, thresholds | Signal Engine reads |
|
||||
| `classify_regime()` | Regime classification for priors | Signal Engine calls |
|
||||
| `compute_signal_weight()` | Heuristic signal weighting | Signal Engine calls |
|
||||
| `compute_bayesian_posterior()` | Bayesian accumulation | Signal Engine calls |
|
||||
| Redis `trading_decisions` | SignalOutput publication | Signal Engine → Trading Engine |
|
||||
| `signal_engine_outputs` table | Persistence for audit | Signal Engine writes |
|
||||
| Redis rolling agreement | Delta analyzer metrics | Signal Engine writes |
|
||||
|
||||
---
|
||||
|
||||
## Components and Interfaces
|
||||
|
||||
### Module Structure
|
||||
|
||||
```
|
||||
services/signal_engine/
|
||||
├── __init__.py
|
||||
├── main.py # Entry point: asyncio event loop, queue polling
|
||||
├── worker.py # Top-level orchestrator per evaluation tick
|
||||
├── config.py # SignalEngineConfig, loaded from risk_configs + env
|
||||
├── models.py # All Pydantic models (NormalizedInput, SignalResult, etc.)
|
||||
├── normalizer.py # Input Normalizer — fetches and assembles NormalizedInput
|
||||
├── signals/
|
||||
│ ├── __init__.py
|
||||
│ ├── base.py # SignalEvaluator protocol, SignalResult model
|
||||
│ ├── fibonacci.py # Fibonacci retracement evaluator
|
||||
│ ├── ma_stack.py # Moving average stack evaluator
|
||||
│ ├── rsi.py # RSI evaluator
|
||||
│ ├── cup_handle.py # Cup & Handle pattern detector
|
||||
│ └── elliott_wave.py # Elliott Wave detector
|
||||
├── confluence.py # Multi-Timeframe Confluence Engine
|
||||
├── hard_filter.py # Hard Filter Engine
|
||||
├── heuristic.py # Heuristic Pipeline (Pipeline A)
|
||||
├── probabilistic.py # Probabilistic Pipeline (Pipeline B)
|
||||
├── correlation.py # Signal cluster classification + correlation penalty
|
||||
├── exit_engine.py # Exit Engine — position-level exit management
|
||||
├── delta.py # Delta Analyzer
|
||||
├── formatter.py # Output Formatter
|
||||
└── persistence.py # Database persistence for signal_engine_outputs
|
||||
```
|
||||
|
||||
### Key Function Signatures
|
||||
|
||||
#### `main.py` — Entry Point
|
||||
|
||||
```python
|
||||
async def main() -> None:
|
||||
"""Start the signal engine worker loop.
|
||||
|
||||
Connects to PostgreSQL and Redis, loads config from risk_configs,
|
||||
and polls the signal_engine queue indefinitely.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `worker.py` — Orchestrator
|
||||
|
||||
```python
|
||||
async def evaluate_tick(
|
||||
pool: asyncpg.Pool,
|
||||
redis: redis.asyncio.Redis,
|
||||
ticker: str,
|
||||
config: SignalEngineConfig,
|
||||
) -> SignalOutput | None:
|
||||
"""Run a full evaluation tick for a single ticker.
|
||||
|
||||
1. Normalize inputs
|
||||
2. Evaluate exit conditions for open positions
|
||||
3. Run hard filters
|
||||
4. Evaluate signals across timeframes
|
||||
5. Run both pipelines concurrently
|
||||
6. Compute delta analysis
|
||||
7. Format and publish output
|
||||
|
||||
Returns None if the ticker is hard-filtered or both pipelines fail.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `normalizer.py` — Input Normalizer
|
||||
|
||||
```python
|
||||
async def normalize_input(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
config: SignalEngineConfig,
|
||||
) -> NormalizedInput:
|
||||
"""Fetch and assemble all data needed for a single evaluation tick.
|
||||
|
||||
Sources:
|
||||
- OHLCV bars from market_data_bars (M30, H1, H4, D, W, M)
|
||||
- Fundamental metrics from trend_windows + companies
|
||||
- Macro context from macro_impact_records + global_events
|
||||
- Open position state from the trading engine's portfolio
|
||||
|
||||
Missing data sources produce sentinel values (None/empty list)
|
||||
with a logged warning.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `signals/base.py` — Signal Evaluator Protocol
|
||||
|
||||
```python
|
||||
from typing import Protocol
|
||||
|
||||
class SignalEvaluator(Protocol):
|
||||
"""Protocol for all signal evaluators in the Signal Library."""
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
bars: list[OHLCVBar],
|
||||
timeframe: str,
|
||||
) -> SignalResult | None:
|
||||
"""Evaluate a signal on a single timeframe's bar data.
|
||||
|
||||
Returns None when insufficient data is available.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### `confluence.py` — Multi-Timeframe Engine
|
||||
|
||||
```python
|
||||
def compute_confluence(
|
||||
signal_results: dict[str, dict[str, SignalResult]],
|
||||
weights: dict[str, float],
|
||||
) -> list[ConfluenceSignal]:
|
||||
"""Compute weighted confluence scores across timeframes.
|
||||
|
||||
Args:
|
||||
signal_results: {signal_type: {timeframe: SignalResult}}
|
||||
weights: {timeframe: weight} e.g. {"M30": 0.03, "D": 0.30, ...}
|
||||
|
||||
Returns:
|
||||
List of ConfluenceSignal objects that pass the minimum
|
||||
confluence threshold (≥2 timeframes, ≥1 of D/W/M).
|
||||
"""
|
||||
```
|
||||
|
||||
#### `hard_filter.py` — Hard Filter Engine
|
||||
|
||||
```python
|
||||
def evaluate_hard_filters(
|
||||
normalized: NormalizedInput,
|
||||
config: HardFilterConfig,
|
||||
) -> HardFilterResult:
|
||||
"""Evaluate pre-pipeline hard filters.
|
||||
|
||||
Checks:
|
||||
- macro_bias == -1.0 → SKIP
|
||||
- valuation_score < threshold → SKIP
|
||||
- earnings_proximity_days <= threshold → SKIP
|
||||
|
||||
Returns HardFilterResult with filtered=True/False and all triggered reasons.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `heuristic.py` — Heuristic Pipeline
|
||||
|
||||
```python
|
||||
def run_heuristic_pipeline(
|
||||
normalized: NormalizedInput,
|
||||
confluence_signals: list[ConfluenceSignal],
|
||||
config: HeuristicConfig,
|
||||
) -> HeuristicResult:
|
||||
"""Run the deterministic heuristic pipeline.
|
||||
|
||||
Computes S_total = S_company + S_macro + S_competitive using
|
||||
existing compute_signal_weight() and weighted sentiment averaging.
|
||||
Produces BUY/WATCH/SKIP verdict based on confidence and score thresholds.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `probabilistic.py` — Probabilistic Pipeline
|
||||
|
||||
```python
|
||||
def run_probabilistic_pipeline(
|
||||
normalized: NormalizedInput,
|
||||
confluence_signals: list[ConfluenceSignal],
|
||||
regime: RegimeClassification,
|
||||
config: ProbabilisticConfig,
|
||||
) -> ProbabilisticResult:
|
||||
"""Run the Bayesian probabilistic pipeline.
|
||||
|
||||
1. Initialize regime-based prior (bull=0.58, range=0.50, bear=0.42)
|
||||
2. Compute likelihood ratios per signal with correlation penalty
|
||||
3. Accumulate via log-odds: logit(P_post) = logit(P_prior) + Σ log(LR_i)
|
||||
4. Apply entropy gating
|
||||
5. Compute EV_R = P_up · E[win_R] - (1 - P_up) · 1.0
|
||||
6. Produce BUY/WATCH/SKIP verdict
|
||||
"""
|
||||
```
|
||||
|
||||
#### `correlation.py` — Signal Correlation Penalty
|
||||
|
||||
```python
|
||||
class SignalCluster(str, Enum):
|
||||
MOMENTUM = "momentum" # MA stack, RSI
|
||||
STRUCTURE = "structure" # Fibonacci, Elliott Wave
|
||||
VOLATILITY = "volatility" # ATR-based, Bollinger-derived
|
||||
FUNDAMENTALS = "fundamentals" # valuation, earnings, macro
|
||||
|
||||
def classify_signal(signal_type: str) -> SignalCluster:
|
||||
"""Map a signal type to its correlation cluster."""
|
||||
|
||||
def apply_correlation_penalty(
|
||||
likelihood_ratios: list[LikelihoodRatio],
|
||||
) -> list[LikelihoodRatio]:
|
||||
"""Apply within-cluster decay penalty to correlated signals.
|
||||
|
||||
Within each cluster, signals are ranked by LR magnitude.
|
||||
The strongest contributes at full weight; subsequent signals
|
||||
contribute at 0.5^(n-1) decay.
|
||||
|
||||
Cross-cluster signals are independent (no penalty).
|
||||
"""
|
||||
```
|
||||
|
||||
#### `exit_engine.py` — Exit Engine
|
||||
|
||||
```python
|
||||
def evaluate_exits(
|
||||
positions: list[OpenPositionState],
|
||||
current_prices: dict[str, float],
|
||||
config: ExitConfig,
|
||||
) -> list[ExitSignal]:
|
||||
"""Evaluate exit conditions for all open positions.
|
||||
|
||||
Checks: stop_loss hit, target_1 hit (EXIT_HALF), target_2 hit (EXIT_FULL),
|
||||
trailing stop hit (EXIT_FULL for remaining).
|
||||
|
||||
Trailing stop activates after EXIT_HALF and ratchets upward only.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `delta.py` — Delta Analyzer
|
||||
|
||||
```python
|
||||
async def analyze_delta(
|
||||
heuristic: HeuristicResult,
|
||||
probabilistic: ProbabilisticResult,
|
||||
redis: redis.asyncio.Redis,
|
||||
ticker: str,
|
||||
) -> DeltaResult:
|
||||
"""Compare pipeline verdicts and track agreement metrics.
|
||||
|
||||
Computes agreement flag, confidence delta, disagreement reasons.
|
||||
Updates rolling 100-evaluation agreement rate in Redis.
|
||||
Logs warning when agreement rate drops below 0.50.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `formatter.py` — Output Formatter
|
||||
|
||||
```python
|
||||
def format_output(
|
||||
ticker: str,
|
||||
price: float,
|
||||
heuristic: HeuristicResult,
|
||||
probabilistic: ProbabilisticResult,
|
||||
delta: DeltaResult,
|
||||
exit_signals: list[ExitSignal],
|
||||
config: SignalEngineConfig,
|
||||
) -> SignalOutput:
|
||||
"""Assemble the structured SignalOutput contract.
|
||||
|
||||
Populates trade_plan based on verdict combination:
|
||||
- Both BUY → dual_confirmed, full position sizing
|
||||
- Probabilistic-only BUY → probabilistic_only, 50% position sizing
|
||||
- Heuristic-only BUY → standard position sizing
|
||||
- No BUY → no trade_plan (WATCH/SKIP persisted for analysis)
|
||||
"""
|
||||
|
||||
def signal_output_to_recommendation(output: SignalOutput) -> Recommendation:
|
||||
"""Map a SignalOutput to the existing Recommendation schema.
|
||||
|
||||
Enables the trading engine to consume dual-pipeline outputs
|
||||
without modification to its core evaluate_recommendation logic.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `persistence.py` — Database Persistence
|
||||
|
||||
```python
|
||||
async def persist_signal_output(
|
||||
pool: asyncpg.Pool,
|
||||
output: SignalOutput,
|
||||
) -> None:
|
||||
"""Persist a SignalOutput to the signal_engine_outputs table.
|
||||
|
||||
Logs and continues on database errors (persistence failure
|
||||
does not block signal emission to the trading queue).
|
||||
"""
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Data Models
|
||||
|
||||
All new data models are Pydantic `BaseModel` subclasses defined in `services/signal_engine/models.py`. Existing models (`WeightedSignal`, `BayesianPosterior`, `RegimeClassification`, `TrendSummary`, `Recommendation`, `PositionSizing`) are imported from `services/aggregation/` and `services/shared/schemas.py`.
|
||||
|
||||
### OHLCVBar
|
||||
|
||||
```python
|
||||
class OHLCVBar(BaseModel):
|
||||
"""Single OHLCV bar for a timeframe."""
|
||||
timestamp: datetime
|
||||
open: float
|
||||
high: float
|
||||
low: float
|
||||
close: float
|
||||
volume: float
|
||||
```
|
||||
|
||||
### NormalizedInput
|
||||
|
||||
```python
|
||||
class NormalizedInput(BaseModel):
|
||||
"""Unified input structure consumed by both pipelines."""
|
||||
ticker: str
|
||||
evaluated_at: datetime
|
||||
|
||||
# Multi-timeframe OHLCV bars
|
||||
bars: dict[str, list[OHLCVBar]] # {"M30": [...], "H1": [...], ...}
|
||||
|
||||
# Fundamental metrics
|
||||
valuation_score: float | None = None # [0.0, 1.0]
|
||||
earnings_proximity_days: int | None = None
|
||||
|
||||
# Macro context
|
||||
macro_bias: float = 0.0 # [-1.0, 1.0]
|
||||
|
||||
# Open position state (for exit engine)
|
||||
open_positions: list[OpenPositionState] = Field(default_factory=list)
|
||||
|
||||
# Market data for regime classification
|
||||
closing_prices: list[float] = Field(default_factory=list)
|
||||
returns: list[float] = Field(default_factory=list)
|
||||
|
||||
# Current price (latest close from shortest available timeframe)
|
||||
current_price: float | None = None
|
||||
```
|
||||
|
||||
### OpenPositionState
|
||||
|
||||
```python
|
||||
class OpenPositionState(BaseModel):
|
||||
"""Snapshot of an open position for exit evaluation."""
|
||||
position_id: str
|
||||
ticker: str
|
||||
entry_price: float
|
||||
current_price: float
|
||||
stop_loss: float
|
||||
target_1: float
|
||||
target_2: float
|
||||
trailing_stop: float | None = None
|
||||
partial_exit_done: bool = False
|
||||
atr: float | None = None
|
||||
```
|
||||
|
||||
### SignalResult
|
||||
|
||||
```python
|
||||
class SignalDirection(str, Enum):
|
||||
BULLISH = "bullish"
|
||||
BEARISH = "bearish"
|
||||
NEUTRAL = "neutral"
|
||||
|
||||
class SignalResult(BaseModel):
|
||||
"""Output from a single signal evaluator on a single timeframe."""
|
||||
signal_type: str # e.g. "fibonacci", "ma_stack", "rsi"
|
||||
timeframe: str # e.g. "D", "H4"
|
||||
strength: float = Field(ge=0.0, le=1.0)
|
||||
direction: SignalDirection
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
metadata: dict = Field(default_factory=dict) # signal-specific details
|
||||
```
|
||||
|
||||
### ConfluenceSignal
|
||||
|
||||
```python
|
||||
class ConfluenceSignal(BaseModel):
|
||||
"""A signal that passed multi-timeframe confluence filtering."""
|
||||
signal_type: str
|
||||
direction: SignalDirection
|
||||
confluence_score: float # weighted sum across timeframes
|
||||
active_timeframes: list[str] # which timeframes triggered
|
||||
per_timeframe: dict[str, float] # {timeframe: strength}
|
||||
```
|
||||
|
||||
### Verdict
|
||||
|
||||
```python
|
||||
class Verdict(str, Enum):
|
||||
BUY = "BUY"
|
||||
WATCH = "WATCH"
|
||||
SKIP = "SKIP"
|
||||
```
|
||||
|
||||
### HeuristicResult
|
||||
|
||||
```python
|
||||
class HeuristicResult(BaseModel):
|
||||
"""Output from the heuristic (deterministic) pipeline."""
|
||||
verdict: Verdict
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
s_total: float
|
||||
s_company: float
|
||||
s_macro: float
|
||||
s_competitive: float
|
||||
signal_weights: list[dict] = Field(default_factory=list)
|
||||
reasoning: list[str] = Field(default_factory=list)
|
||||
```
|
||||
|
||||
### LikelihoodRatio
|
||||
|
||||
```python
|
||||
class LikelihoodRatio(BaseModel):
|
||||
"""A single signal's likelihood ratio for Bayesian updating."""
|
||||
signal_type: str
|
||||
cluster: str # SignalCluster value
|
||||
lr: float # P(sig|up) / P(sig|down)
|
||||
log_lr: float # log(lr)
|
||||
penalized_log_lr: float # after correlation penalty
|
||||
hit_rate: float
|
||||
strength: float
|
||||
```
|
||||
|
||||
### ProbabilisticResult
|
||||
|
||||
```python
|
||||
class ProbabilisticResult(BaseModel):
|
||||
"""Output from the probabilistic (Bayesian) pipeline."""
|
||||
verdict: Verdict
|
||||
p_up: float = Field(ge=0.0, le=1.0)
|
||||
entropy: float = Field(ge=0.0, le=1.0)
|
||||
ev_r: float
|
||||
prior: float
|
||||
posterior: float
|
||||
likelihood_ratios: list[LikelihoodRatio] = Field(default_factory=list)
|
||||
regime: str
|
||||
reasoning: list[str] = Field(default_factory=list)
|
||||
```
|
||||
|
||||
### DeltaResult
|
||||
|
||||
```python
|
||||
class DeltaResult(BaseModel):
|
||||
"""Output from the delta analyzer comparing both pipelines."""
|
||||
agreement: bool
|
||||
confidence_delta: float
|
||||
heuristic_verdict: str
|
||||
probabilistic_verdict: str
|
||||
disagreement_reasons: list[str] = Field(default_factory=list)
|
||||
rolling_agreement_rate: float | None = None
|
||||
```
|
||||
|
||||
### ExitSignal
|
||||
|
||||
```python
|
||||
class ExitType(str, Enum):
|
||||
EXIT_HALF = "EXIT_HALF"
|
||||
EXIT_FULL = "EXIT_FULL"
|
||||
|
||||
class ExitSignal(BaseModel):
|
||||
"""An exit signal for an open position."""
|
||||
position_id: str
|
||||
ticker: str
|
||||
exit_type: ExitType
|
||||
reason: str # "stop_hit", "target_1_hit", "target_2_hit", "trailing_stop_hit"
|
||||
price: float
|
||||
```
|
||||
|
||||
### TradePlan
|
||||
|
||||
```python
|
||||
class TradePlan(BaseModel):
|
||||
"""Optional trade plan attached to a BUY signal."""
|
||||
entry_price: float
|
||||
stop_loss: float
|
||||
target_1: float
|
||||
target_2: float
|
||||
position_size_pct: float = Field(ge=0.0, le=1.0)
|
||||
max_loss_pct: float = Field(ge=0.0, le=1.0)
|
||||
dual_confirmed: bool = False
|
||||
probabilistic_only: bool = False
|
||||
```
|
||||
|
||||
### SignalOutput
|
||||
|
||||
```python
|
||||
class SignalOutput(BaseModel):
|
||||
"""The structured output contract consumed by the trading engine and audit systems."""
|
||||
output_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
ticker: str
|
||||
timestamp: datetime
|
||||
price: float
|
||||
|
||||
# Heuristic pipeline results
|
||||
heuristic_verdict: str
|
||||
heuristic_confidence: float
|
||||
heuristic_s_total: float
|
||||
|
||||
# Probabilistic pipeline results
|
||||
probabilistic_verdict: str
|
||||
probabilistic_p_up: float
|
||||
probabilistic_entropy: float
|
||||
probabilistic_ev_r: float
|
||||
|
||||
# Delta analysis
|
||||
delta_agreement: bool
|
||||
delta_confidence_delta: float
|
||||
delta_reasons: list[str] = Field(default_factory=list)
|
||||
|
||||
# Optional trade plan (populated when at least one pipeline says BUY)
|
||||
trade_plan: TradePlan | None = None
|
||||
|
||||
# Exit signals for open positions
|
||||
exit_signals: list[ExitSignal] = Field(default_factory=list)
|
||||
|
||||
# Full pipeline results for audit (stored as JSONB)
|
||||
heuristic_detail: dict = Field(default_factory=dict)
|
||||
probabilistic_detail: dict = Field(default_factory=dict)
|
||||
|
||||
# Pipeline mode metadata
|
||||
pipeline_mode: str = "dual_pipeline"
|
||||
shadow_mode: bool = False
|
||||
```
|
||||
|
||||
### SignalEngineConfig
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class SignalEngineConfig:
|
||||
"""Configuration loaded from risk_configs + environment."""
|
||||
dual_pipeline_enabled: bool = False
|
||||
heuristic_pipeline_enabled: bool = True
|
||||
probabilistic_pipeline_enabled: bool = True
|
||||
shadow_mode: bool = False
|
||||
|
||||
# Timeframe weights
|
||||
timeframe_weights: dict[str, float] = field(default_factory=lambda: {
|
||||
"M30": 0.03, "H1": 0.07, "H4": 0.15,
|
||||
"D": 0.30, "W": 0.30, "M": 0.15,
|
||||
})
|
||||
|
||||
# Hard filter thresholds
|
||||
hard_filter_valuation_min: float = 0.3
|
||||
hard_filter_earnings_days: int = 5
|
||||
hard_filter_macro_bias_skip: float = -1.0
|
||||
|
||||
# Heuristic verdict thresholds
|
||||
heuristic_buy_confidence: float = 0.70
|
||||
heuristic_buy_s_total: float = 1.2
|
||||
heuristic_buy_valuation_min: float = 0.5
|
||||
heuristic_watch_confidence: float = 0.55
|
||||
|
||||
# Probabilistic verdict thresholds
|
||||
prob_buy_p_up: float = 0.60
|
||||
prob_buy_entropy_max: float = 0.90
|
||||
prob_buy_ev_r_min: float = 1.5
|
||||
prob_buy_valuation_min: float = 0.5
|
||||
prob_watch_p_up: float = 0.55
|
||||
prob_watch_entropy_max: float = 0.95
|
||||
prob_entropy_skip: float = 0.95
|
||||
|
||||
# Regime priors
|
||||
regime_prior_bull: float = 0.58
|
||||
regime_prior_range: float = 0.50
|
||||
regime_prior_bear: float = 0.42
|
||||
|
||||
# Exit engine
|
||||
trailing_stop_atr_multiplier: float = 2.0
|
||||
|
||||
# Polling
|
||||
polling_interval_seconds: int = 30
|
||||
```
|
||||
|
||||
### HardFilterConfig / HeuristicConfig / ProbabilisticConfig / ExitConfig
|
||||
|
||||
These are derived from `SignalEngineConfig` fields for cleaner function signatures — simple `@dataclass` wrappers over the relevant subset of config values.
|
||||
|
||||
---
|
||||
|
||||
### Database Migration (039)
|
||||
|
||||
```sql
|
||||
-- Migration 039: Signal Engine Outputs
|
||||
-- Creates the signal_engine_outputs table for persisting dual-pipeline evaluations.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS signal_engine_outputs (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
ticker TEXT NOT NULL,
|
||||
evaluated_at TIMESTAMPTZ NOT NULL,
|
||||
price NUMERIC NOT NULL,
|
||||
|
||||
-- Heuristic pipeline
|
||||
heuristic_verdict TEXT NOT NULL,
|
||||
heuristic_confidence NUMERIC NOT NULL,
|
||||
heuristic_s_total NUMERIC NOT NULL,
|
||||
|
||||
-- Probabilistic pipeline
|
||||
probabilistic_verdict TEXT NOT NULL,
|
||||
probabilistic_p_up NUMERIC NOT NULL,
|
||||
probabilistic_entropy NUMERIC NOT NULL,
|
||||
probabilistic_ev_r NUMERIC NOT NULL,
|
||||
|
||||
-- Delta analysis
|
||||
delta_agreement BOOLEAN NOT NULL,
|
||||
delta_confidence_delta NUMERIC NOT NULL,
|
||||
delta_reasons JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Trade plan (null when no BUY verdict)
|
||||
trade_plan JSONB,
|
||||
|
||||
-- Full output for audit
|
||||
full_output JSONB NOT NULL,
|
||||
|
||||
-- Exit signals
|
||||
exit_signals JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Metadata
|
||||
pipeline_mode TEXT NOT NULL DEFAULT 'dual_pipeline',
|
||||
shadow_mode BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for per-ticker time-range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_ticker_time
|
||||
ON signal_engine_outputs (ticker, evaluated_at);
|
||||
|
||||
-- Index for global time-range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_evaluated
|
||||
ON signal_engine_outputs (evaluated_at);
|
||||
|
||||
-- Index for filtering by verdict
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_verdicts
|
||||
ON signal_engine_outputs (heuristic_verdict, probabilistic_verdict);
|
||||
```
|
||||
|
||||
### Helm / Deployment Configuration
|
||||
|
||||
Add to `values.yaml` under `services:`:
|
||||
|
||||
```yaml
|
||||
signalEngine:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: signal-engine
|
||||
command: "python -m services.signal_engine.main"
|
||||
tier: processing
|
||||
secrets: [stonks-core-secrets, stonks-market-secrets]
|
||||
resources:
|
||||
requests: { cpu: 100m, memory: 128Mi }
|
||||
limits: { cpu: 500m, memory: 256Mi }
|
||||
```
|
||||
|
||||
Add to `redis_keys.py`:
|
||||
|
||||
```python
|
||||
QUEUE_SIGNAL_ENGINE = "signal_engine"
|
||||
```
|
||||
|
||||
The service uses the existing `stonks-config` ConfigMap and `stonks-core-secrets` for database/Redis credentials. No new ingress or network policy is needed — the signal engine is a queue-polling worker with no HTTP interface.
|
||||
|
||||
---
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
# Requirements Document — Dual-Pipeline Signal Engine
|
||||
|
||||
## Introduction
|
||||
|
||||
The Stonks Oracle platform currently operates a single aggregation pipeline that can run in either heuristic or probabilistic mode (toggled via `probabilistic_scoring_enabled`). This feature replaces the single-pipeline toggle with a dual-pipeline architecture where both pipelines run concurrently per evaluation tick, produce independent verdicts (BUY/WATCH/SKIP), and emit a structured output contract for downstream consumers (trading engine, delta analysis, dashboards).
|
||||
|
||||
The dual-pipeline engine introduces:
|
||||
- **Pipeline A (Heuristic)**: Deterministic scoring using the existing `S_total = S_company + S_macro + S_competitive` formula with signal weighting, producing a confidence-gated verdict.
|
||||
- **Pipeline B (Probabilistic)**: Bayesian inference using the existing `bayesian.py` infrastructure with regime-based priors, likelihood ratios, entropy gating, and expected value calculation.
|
||||
- **Hard Filter Engine**: Pre-pipeline filters that short-circuit both pipelines before evaluation.
|
||||
- **Multi-Timeframe Engine**: Signal evaluation across M30, H1, H4, D, W, M timeframes with weighted confluence scoring.
|
||||
- **Exit Engine**: Position-level exit management (stop hit, targets, trailing ATR-based).
|
||||
- **Delta Analyzer**: Compares heuristic vs probabilistic verdicts to generate training signals for future model tuning.
|
||||
- **Output Formatter**: Structured `SignalOutput` contract consumed by the trading engine and delta analysis.
|
||||
|
||||
The design must address the signal independence assumption in the Bayesian pipeline — correlated signals (MA+RSI, Fib+Elliott) require correlation penalty or signal clustering into categories (momentum, structure, volatility, fundamentals) to prevent likelihood ratio stacking inflation.
|
||||
|
||||
## Glossary
|
||||
|
||||
- **Signal_Engine**: The top-level orchestrator in `services/signal_engine/` that coordinates input normalization, hard filters, both pipelines, delta analysis, and output formatting per evaluation tick.
|
||||
- **Heuristic_Pipeline**: Pipeline A — deterministic scoring that computes `S_total = S_company + S_macro + S_competitive` with signal weighting and produces a confidence-gated BUY/WATCH/SKIP verdict.
|
||||
- **Probabilistic_Pipeline**: Pipeline B — Bayesian inference pipeline that computes posterior probability via log-likelihood accumulation with regime-based priors, entropy gating, and expected value calculation.
|
||||
- **Input_Normalizer**: The component that ingests multi-timeframe OHLCV data, fundamentals, macro context, and open positions into a unified `NormalizedInput` structure consumed by both pipelines.
|
||||
- **Signal_Library**: The collection of technical signal evaluators (Fibonacci retracement, MA stack, RSI, Cup & Handle, Elliott Wave) that produce scored signals per timeframe.
|
||||
- **Multi_Timeframe_Engine**: The component that evaluates signals across six timeframes (M30, H1, H4, D, W, M) and computes weighted confluence scores.
|
||||
- **Hard_Filter_Engine**: The pre-pipeline filter stage that evaluates macro bias, valuation score, and earnings proximity to short-circuit evaluation before either pipeline runs.
|
||||
- **Exit_Engine**: The position management component that evaluates stop hits, take-profit targets, and trailing ATR-based stops for open positions.
|
||||
- **Delta_Analyzer**: The component that compares heuristic and probabilistic verdicts, tracks agreement rates, measures confidence deltas, and records disagreement reasons as training signals.
|
||||
- **Output_Formatter**: The component that assembles the structured `SignalOutput` contract from both pipeline results, delta analysis, and optional trade plan.
|
||||
- **SignalOutput**: The structured output contract containing ticker, timestamp, price, heuristic verdict/confidence/S_total, probabilistic verdict/P_up/entropy/EV_R, delta analysis, and optional trade plan.
|
||||
- **Verdict**: A pipeline decision of BUY, WATCH, or SKIP with associated confidence and reasoning.
|
||||
- **Confluence**: The condition where a signal triggers across multiple timeframes; requires activation on at least 2 timeframes including at least one of D, W, or M.
|
||||
- **Entropy_Gate**: Shannon entropy threshold used in the probabilistic pipeline to detect high-uncertainty states and force SKIP verdicts.
|
||||
- **EV_R**: Expected value per unit of risk, computed as `P_up · E[win_R] - (1 - P_up) · 1.0`, used as a quality gate in the probabilistic pipeline.
|
||||
- **Signal_Cluster**: A grouping of correlated signals (momentum, structure, volatility, fundamentals) used to prevent likelihood ratio stacking inflation in the Bayesian pipeline.
|
||||
- **Likelihood_Ratio**: The ratio `P(signal|up) / P(signal|down)` used in Bayesian updating, where `P(sig|up) = h·s + (1-h)·(1-s)·0.5`.
|
||||
- **Regime_Prior**: The initial probability assigned based on market regime classification: bull=0.58, range=0.50, bear=0.42.
|
||||
- **OHLCV**: Open, High, Low, Close, Volume — standard market data bar format.
|
||||
- **ATR**: Average True Range — a volatility measure used for trailing stop calculations.
|
||||
- **Fibonacci_Retracement**: A technical analysis tool computing price levels as `L(r) = SH - r·(SH - SL)` where SH is swing high, SL is swing low, and r is a retracement ratio (0.236, 0.382, 0.5, 0.618, 0.786).
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
### Requirement 1: Input Normalization
|
||||
|
||||
**User Story:** As a signal engine operator, I want all market data, fundamentals, macro context, and open positions normalized into a single input structure, so that both pipelines consume identical inputs per evaluation tick.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN an evaluation tick is triggered for a ticker, THE Input_Normalizer SHALL construct a `NormalizedInput` containing multi-timeframe OHLCV bars (M30, H1, H4, D, W, M), fundamental metrics (valuation_score, earnings_proximity_days), macro context (macro_bias as float in [-1.0, 1.0]), and open position state (entry_price, current_price, stop_loss, targets).
|
||||
2. THE Input_Normalizer SHALL source OHLCV data from the existing market data tables, fundamental metrics from the existing company and trend data, and macro context from the existing `macro_impact_records` and `global_events` tables.
|
||||
3. IF any required data source is unavailable or returns an error, THEN THE Input_Normalizer SHALL populate the corresponding field with a sentinel value (`None` for optional fields, empty list for OHLCV bars) and log a warning identifying the missing source.
|
||||
4. THE Input_Normalizer SHALL validate that all OHLCV bars have monotonically increasing timestamps within each timeframe series.
|
||||
5. THE Input_Normalizer SHALL produce identical `NormalizedInput` instances for both pipelines within the same evaluation tick (shared reference, no independent fetches).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 2: Signal Library — Technical Signal Evaluation
|
||||
|
||||
**User Story:** As a quantitative analyst, I want a library of technical signal evaluators that produce scored signals per timeframe, so that both pipelines can consume standardized signal assessments.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Signal_Library SHALL implement Fibonacci retracement signal evaluation using the formula `L(r) = SH - r·(SH - SL)` for retracement ratios [0.236, 0.382, 0.5, 0.618, 0.786], where SH is the swing high and SL is the swing low within the evaluation window.
|
||||
2. THE Signal_Library SHALL implement moving average stack evaluation that detects bullish alignment (MA_10 > MA_20 > MA_50 > MA_200) and bearish alignment (MA_10 < MA_20 < MA_50 < MA_200), producing a signal strength proportional to the degree of alignment.
|
||||
3. THE Signal_Library SHALL implement RSI evaluation using the standard 14-period RSI formula, producing overbought signals (RSI > 70) and oversold signals (RSI < 30) with strength scaled by distance from the threshold.
|
||||
4. THE Signal_Library SHALL implement Cup & Handle pattern detection that identifies the cup formation (U-shaped price recovery) and handle (small consolidation), producing a signal with confidence proportional to pattern completeness.
|
||||
5. THE Signal_Library SHALL implement Elliott Wave detection that identifies impulse waves (5-wave structure) and corrective waves (3-wave structure), producing a signal with the current wave position and projected direction.
|
||||
6. WHEN a signal evaluator receives insufficient data for its calculation (fewer bars than the required lookback period), THE Signal_Library SHALL return a null signal with a reason code indicating insufficient data rather than producing a partial evaluation.
|
||||
7. FOR ALL signal evaluators, THE Signal_Library SHALL produce output conforming to a common `SignalResult` structure containing: signal_type, timeframe, strength (float in [0.0, 1.0]), direction (bullish/bearish/neutral), confidence (float in [0.0, 1.0]), and metadata specific to the signal type.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 3: Multi-Timeframe Confluence Engine
|
||||
|
||||
**User Story:** As a quantitative analyst, I want signals evaluated across multiple timeframes with weighted confluence scoring, so that the engine prioritizes signals confirmed across longer timeframes.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Multi_Timeframe_Engine SHALL evaluate each signal type across six timeframes with the following weights: M30=0.03, H1=0.07, H4=0.15, D=0.30, W=0.30, M=0.15.
|
||||
2. THE Multi_Timeframe_Engine SHALL compute a weighted confluence score as `C_confluence = Σ(w_tf · s_tf)` where `w_tf` is the timeframe weight and `s_tf` is the signal strength on that timeframe (0.0 if the signal did not trigger).
|
||||
3. WHEN a signal triggers on fewer than 2 timeframes, THE Multi_Timeframe_Engine SHALL discard the signal from further pipeline processing (minimum confluence threshold).
|
||||
4. WHEN a signal triggers on 2 or more timeframes but none of D, W, or M are included, THE Multi_Timeframe_Engine SHALL discard the signal from further pipeline processing (higher-timeframe anchor requirement).
|
||||
5. THE Multi_Timeframe_Engine SHALL pass the confluence-filtered signals and their weighted scores to both the Heuristic_Pipeline and Probabilistic_Pipeline.
|
||||
6. FOR ALL signal sets where a signal triggers on more timeframes with higher weights, THE Multi_Timeframe_Engine SHALL produce a higher confluence score (monotonicity with respect to timeframe activation count and weight).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 4: Hard Filter Engine — Pre-Pipeline Gating
|
||||
|
||||
**User Story:** As a risk manager, I want hard filters that short-circuit both pipelines before evaluation, so that clearly unfavorable conditions produce immediate SKIP verdicts without wasting computation.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN the macro_bias value from the NormalizedInput equals -1.0, THE Hard_Filter_Engine SHALL produce an immediate SKIP verdict for both pipelines with reason "macro_bias_negative".
|
||||
2. WHEN the valuation_score from the NormalizedInput is below 0.3, THE Hard_Filter_Engine SHALL produce an immediate SKIP verdict for both pipelines with reason "valuation_below_threshold".
|
||||
3. WHEN the earnings_proximity_days from the NormalizedInput is 5 or fewer, THE Hard_Filter_Engine SHALL produce an immediate SKIP verdict for both pipelines with reason "earnings_block".
|
||||
4. WHEN multiple hard filters trigger simultaneously, THE Hard_Filter_Engine SHALL record all triggered filter reasons in the SKIP verdict (not just the first).
|
||||
5. WHEN no hard filters trigger, THE Hard_Filter_Engine SHALL pass the NormalizedInput through to both pipelines without modification.
|
||||
6. THE Hard_Filter_Engine SHALL execute before either pipeline begins evaluation, and both pipelines SHALL receive the same filter decision.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 5: Heuristic Pipeline — Deterministic Scoring and Verdict
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the heuristic pipeline to produce a deterministic BUY/WATCH/SKIP verdict based on composite scoring of company, macro, and competitive signals, so that the system maintains a transparent, auditable scoring path.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Heuristic_Pipeline SHALL compute a total score `S_total = S_company + S_macro + S_competitive` using the existing three-layer signal aggregation with the current `WeightedSignal` abstraction.
|
||||
2. THE Heuristic_Pipeline SHALL compute signal weights using the formula `W_signal = gate · recency · credibility · (1 + novelty) · market_context` consistent with the existing `compute_signal_weight` function in `scoring.py`.
|
||||
3. THE Heuristic_Pipeline SHALL compute a confidence value from the existing trend confidence formula incorporating source count, extraction confidence, signal agreement, and contradiction penalty.
|
||||
4. THE Heuristic_Pipeline SHALL produce a BUY verdict WHEN confidence >= 0.70 AND S_total >= 1.2 AND valuation_score >= 0.5 AND macro_bias > 0 AND earnings_proximity_days > 5.
|
||||
5. THE Heuristic_Pipeline SHALL produce a WATCH verdict WHEN confidence >= 0.55 AND the BUY conditions are not fully met.
|
||||
6. THE Heuristic_Pipeline SHALL produce a SKIP verdict WHEN confidence < 0.55.
|
||||
7. THE Heuristic_Pipeline SHALL emit a `HeuristicResult` containing: verdict (BUY/WATCH/SKIP), confidence (float), S_total (float), S_company (float), S_macro (float), S_competitive (float), signal_weights (list), and reasoning (list of strings explaining the verdict).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 6: Probabilistic Pipeline — Bayesian Inference and Verdict
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the probabilistic pipeline to produce a Bayesian BUY/WATCH/SKIP verdict using regime-based priors, likelihood ratios, entropy gating, and expected value calculation, so that the system captures uncertainty structure and risk-adjusted expected outcomes.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Probabilistic_Pipeline SHALL initialize the prior probability based on the current market regime classification: bull regime → P_prior = 0.58, range regime → P_prior = 0.50, bear regime → P_prior = 0.42.
|
||||
2. THE Probabilistic_Pipeline SHALL compute likelihood ratios for each signal using `P(sig|up) = h·s + (1-h)·(1-s)·0.5` and `LR = P(sig|up) / P(sig|down)`, where h is the signal's historical hit rate and s is the signal strength.
|
||||
3. THE Probabilistic_Pipeline SHALL update the posterior using log-odds accumulation: `logit(P_post) = logit(P_prior) + Σ log(LR_i)`, converting back to probability via the sigmoid function.
|
||||
4. THE Probabilistic_Pipeline SHALL compute Shannon entropy `H = -P_up·log₂(P_up) - (1-P_up)·log₂(1-P_up)` and apply entropy gating: WHEN H > 0.95, THE Probabilistic_Pipeline SHALL force a SKIP verdict with reason "high_entropy".
|
||||
5. THE Probabilistic_Pipeline SHALL compute expected value per unit risk as `EV_R = P_up · E[win_R] - (1 - P_up) · 1.0` where `E[win_R]` is the expected win in risk units derived from signal strength and historical reward-risk ratios.
|
||||
6. THE Probabilistic_Pipeline SHALL produce a BUY verdict WHEN P_up >= 0.60 AND entropy <= 0.90 AND EV_R >= 1.5 AND macro_bias > 0 AND valuation_score >= 0.5.
|
||||
7. THE Probabilistic_Pipeline SHALL produce a WATCH verdict WHEN P_up >= 0.55 AND entropy <= 0.95 AND the BUY conditions are not fully met.
|
||||
8. THE Probabilistic_Pipeline SHALL produce a SKIP verdict in all other cases.
|
||||
9. THE Probabilistic_Pipeline SHALL emit a `ProbabilisticResult` containing: verdict (BUY/WATCH/SKIP), P_up (float), entropy (float), EV_R (float), prior (float), posterior (float), likelihood_ratios (list), regime (string), and reasoning (list of strings).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 7: Signal Correlation Penalty — Preventing LR Stacking Inflation
|
||||
|
||||
**User Story:** As a quantitative analyst, I want correlated signals grouped into clusters with a correlation penalty applied to prevent likelihood ratio stacking inflation, so that the Bayesian pipeline does not overstate confidence from redundant signals.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Probabilistic_Pipeline SHALL classify each signal into one of four clusters: momentum (MA stack, RSI), structure (Fibonacci retracement, Elliott Wave), volatility (ATR-based signals, Bollinger-derived), and fundamentals (valuation, earnings, macro).
|
||||
2. WHEN multiple signals within the same cluster produce likelihood ratios in the same direction, THE Probabilistic_Pipeline SHALL apply a within-cluster penalty: only the strongest LR in the cluster contributes at full weight, and subsequent LRs in the same cluster contribute at a decay factor of 0.5^(n-1) where n is the signal's rank within the cluster by LR magnitude.
|
||||
3. THE Probabilistic_Pipeline SHALL apply no penalty across different clusters (signals from different clusters are treated as independent).
|
||||
4. WHEN a cluster contains only one signal, THE Probabilistic_Pipeline SHALL apply no penalty to that signal.
|
||||
5. FOR ALL signal sets, THE Probabilistic_Pipeline SHALL produce a posterior probability that is less than or equal to the posterior computed without the correlation penalty (the penalty only reduces confidence, never inflates it).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 8: Exit Engine — Position Management
|
||||
|
||||
**User Story:** As a trader, I want the signal engine to evaluate exit conditions for open positions, so that stop hits, take-profit targets, and trailing stops are managed as part of the signal evaluation cycle.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN the current price of an open position hits or crosses below the stop_loss level, THE Exit_Engine SHALL emit an EXIT_FULL signal for that position with reason "stop_hit".
|
||||
2. WHEN the current price of an open position hits or crosses above the first take-profit target (target_1), THE Exit_Engine SHALL emit an EXIT_HALF signal for that position with reason "target_1_hit".
|
||||
3. WHEN the current price of an open position hits or crosses above the second take-profit target (target_2), THE Exit_Engine SHALL emit an EXIT_FULL signal for that position with reason "target_2_hit".
|
||||
4. WHEN a partial exit has been executed (EXIT_HALF), THE Exit_Engine SHALL activate a trailing stop at `current_price - ATR · trailing_multiplier` and update the trailing stop upward as the price advances (the trailing stop moves up but does not move down).
|
||||
5. WHEN the trailing stop is active and the current price crosses below the trailing stop level, THE Exit_Engine SHALL emit an EXIT_FULL signal for the remaining position with reason "trailing_stop_hit".
|
||||
6. THE Exit_Engine SHALL evaluate exit conditions before the signal pipelines run for new entry signals, so that exit signals take priority over new entry signals for the same ticker.
|
||||
7. THE Exit_Engine SHALL emit exit signals as part of the `SignalOutput` contract with the position identifier, exit type (EXIT_HALF/EXIT_FULL), and reason.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 9: Delta Analyzer — Pipeline Agreement Tracking
|
||||
|
||||
**User Story:** As a model developer, I want the delta analyzer to compare heuristic and probabilistic verdicts and record disagreement details, so that I can generate training signals for future model tuning.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN both pipelines produce verdicts for the same ticker and tick, THE Delta_Analyzer SHALL compute an agreement flag (true if both verdicts are identical, false otherwise).
|
||||
2. THE Delta_Analyzer SHALL compute a confidence delta as `|heuristic_confidence - probabilistic_P_up|` representing the magnitude of disagreement between the two pipelines.
|
||||
3. WHEN the pipelines disagree on verdict, THE Delta_Analyzer SHALL record the disagreement reason by identifying which conditions differed (e.g., "heuristic_confidence_below_threshold", "probabilistic_entropy_too_high", "EV_R_below_threshold").
|
||||
4. THE Delta_Analyzer SHALL track a rolling agreement rate over the last 100 evaluations per ticker, stored in Redis for dashboard consumption.
|
||||
5. THE Delta_Analyzer SHALL emit a `DeltaResult` containing: agreement (bool), confidence_delta (float), heuristic_verdict (string), probabilistic_verdict (string), disagreement_reasons (list of strings), and rolling_agreement_rate (float).
|
||||
6. WHEN the rolling agreement rate drops below 0.50 for a ticker, THE Delta_Analyzer SHALL log a warning indicating persistent pipeline disagreement for operator review.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 10: Output Formatter — Structured SignalOutput Contract
|
||||
|
||||
**User Story:** As a downstream system consumer, I want the signal engine to emit a structured `SignalOutput` contract, so that the trading engine, delta analysis dashboard, and audit systems can consume a consistent output format.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Output_Formatter SHALL produce a `SignalOutput` containing: ticker (string), timestamp (datetime), price (float), heuristic section (verdict, confidence, S_total), probabilistic section (verdict, P_up, entropy, EV_R), delta section (agreement, confidence_delta, disagreement_reasons), and optional trade_plan section.
|
||||
2. WHEN the heuristic pipeline produces a BUY verdict, THE Output_Formatter SHALL populate the trade_plan section with entry_price, stop_loss, target_1, target_2, and position_size derived from the heuristic confidence and existing position sizing logic.
|
||||
3. WHEN the probabilistic pipeline produces a BUY verdict but the heuristic pipeline does not, THE Output_Formatter SHALL populate the trade_plan section with a "probabilistic_only" flag and reduced position sizing (50% of standard).
|
||||
4. WHEN both pipelines produce a BUY verdict, THE Output_Formatter SHALL populate the trade_plan section with full position sizing and a "dual_confirmed" flag.
|
||||
5. THE Output_Formatter SHALL serialize the `SignalOutput` as a Pydantic model with JSON serialization support for Redis queue publishing and database persistence.
|
||||
6. FOR ALL valid pipeline results, THE Output_Formatter SHALL produce a `SignalOutput` that round-trips through JSON serialization and deserialization without data loss (parse(format(output)) produces an equivalent object).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 11: Dual Pipeline Orchestration
|
||||
|
||||
**User Story:** As a signal engine operator, I want both pipelines to run concurrently per evaluation tick sharing the same inputs, so that the system produces independent verdicts without redundant data fetching.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN an evaluation tick is triggered, THE Signal_Engine SHALL execute the Input_Normalizer once, then pass the resulting `NormalizedInput` to the Hard_Filter_Engine, then (if not filtered) execute both the Heuristic_Pipeline and Probabilistic_Pipeline concurrently using `asyncio.gather`.
|
||||
2. THE Signal_Engine SHALL enforce that both pipelines receive identical `NormalizedInput` references (no independent data fetches that could produce different snapshots).
|
||||
3. WHEN either pipeline raises an exception during evaluation, THE Signal_Engine SHALL catch the exception, log the error with full traceback, and produce a SKIP verdict for the failed pipeline with reason "pipeline_error" while allowing the other pipeline to complete normally.
|
||||
4. THE Signal_Engine SHALL measure and log the wall-clock execution time of each pipeline per tick for performance monitoring.
|
||||
5. THE Signal_Engine SHALL publish the assembled `SignalOutput` to the existing Redis queue (`stonks:queue:trading_decisions`) for consumption by the trading engine.
|
||||
6. THE Signal_Engine SHALL persist each `SignalOutput` to a database table for historical analysis and audit.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 12: Integration with Existing Trading Engine
|
||||
|
||||
**User Story:** As a platform operator, I want the dual-pipeline signal engine to integrate with the existing trading engine, so that the trading engine can consume `SignalOutput` verdicts and make execution decisions.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Signal_Engine SHALL publish `SignalOutput` to the existing `stonks:queue:trading_decisions` Redis queue in a format compatible with the existing `TradingEngine.evaluate_recommendation` interface.
|
||||
2. THE Signal_Engine SHALL map the `SignalOutput` trade_plan to the existing `Recommendation` schema fields (action, confidence, position_sizing) so that the trading engine can process dual-pipeline outputs without modification to its core evaluation logic.
|
||||
3. WHEN the `SignalOutput` has a "dual_confirmed" flag, THE Signal_Engine SHALL set the recommendation confidence to the maximum of heuristic_confidence and probabilistic_P_up.
|
||||
4. WHEN the `SignalOutput` has a "probabilistic_only" flag, THE Signal_Engine SHALL set the recommendation confidence to `probabilistic_P_up · 0.8` (20% confidence haircut for single-pipeline confirmation).
|
||||
5. WHEN neither pipeline produces a BUY verdict, THE Signal_Engine SHALL not publish a trading recommendation to the queue (WATCH and SKIP verdicts are persisted for analysis but not forwarded to the trading engine).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 13: Configuration and Feature Flags
|
||||
|
||||
**User Story:** As a platform operator, I want the dual-pipeline engine configurable via the existing `risk_configs` table and environment variables, so that I can tune thresholds, enable/disable individual pipelines, and adjust timeframe weights without code changes.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Signal_Engine SHALL support a `dual_pipeline_enabled` feature flag in `risk_configs` that toggles the entire dual-pipeline engine on or off, defaulting to false for safe rollout.
|
||||
2. THE Signal_Engine SHALL support independent enable/disable flags for each pipeline: `heuristic_pipeline_enabled` and `probabilistic_pipeline_enabled`, both defaulting to true when the dual-pipeline engine is enabled.
|
||||
3. THE Signal_Engine SHALL support configurable timeframe weights via a `timeframe_weights` JSON object in `risk_configs`, defaulting to `{"M30": 0.03, "H1": 0.07, "H4": 0.15, "D": 0.30, "W": 0.30, "M": 0.15}`.
|
||||
4. THE Signal_Engine SHALL support configurable hard filter thresholds: `hard_filter_valuation_min` (default 0.3), `hard_filter_earnings_days` (default 5), and `hard_filter_macro_bias_skip` (default -1.0).
|
||||
5. THE Signal_Engine SHALL support configurable verdict thresholds for both pipelines via `risk_configs` JSON, including heuristic confidence thresholds (BUY: 0.70, WATCH: 0.55) and probabilistic thresholds (P_up: 0.60, entropy: 0.90, EV_R: 1.5).
|
||||
6. IF the `dual_pipeline_enabled` flag fails to read from the database, THEN THE Signal_Engine SHALL default to disabled (fail-safe behavior) and log a warning.
|
||||
7. THE Signal_Engine SHALL log the active configuration at startup and on each configuration change for auditability.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 14: Regime-Based Prior Engine
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the probabilistic pipeline's prior probability to adapt based on the current market regime, so that the Bayesian inference starts from a regime-appropriate baseline rather than a fixed 0.50.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Probabilistic_Pipeline SHALL use the existing `classify_regime` function from `services/aggregation/regime.py` to determine the current market regime for each ticker.
|
||||
2. THE Probabilistic_Pipeline SHALL map regime classifications to prior probabilities: trend_following with positive trend_indicator → 0.58 (bull), trend_following with negative trend_indicator → 0.42 (bear), mean_reversion → 0.50 (range), panic → 0.42 (bear), uncertainty → 0.50 (range).
|
||||
3. THE Probabilistic_Pipeline SHALL convert the regime prior to log-odds before accumulating likelihood ratios: `logit(P_prior) = log(P_prior / (1 - P_prior))`.
|
||||
4. WHEN market data is insufficient for regime classification (fewer than 100 days of price history), THE Probabilistic_Pipeline SHALL use the uncertainty prior of 0.50.
|
||||
5. THE Probabilistic_Pipeline SHALL record the regime classification and prior probability in the `ProbabilisticResult` for auditability.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 15: Database Schema for Signal Engine Output
|
||||
|
||||
**User Story:** As a platform operator, I want signal engine outputs persisted to a dedicated database table, so that historical evaluations are available for analysis, backtesting, and audit.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Signal_Engine SHALL persist each `SignalOutput` to a `signal_engine_outputs` table with columns for: id (UUID primary key), ticker (text), evaluated_at (timestamptz), price (numeric), heuristic_verdict (text), heuristic_confidence (numeric), heuristic_s_total (numeric), probabilistic_verdict (text), probabilistic_p_up (numeric), probabilistic_entropy (numeric), probabilistic_ev_r (numeric), delta_agreement (boolean), delta_confidence_delta (numeric), delta_reasons (JSONB), trade_plan (JSONB), full_output (JSONB), created_at (timestamptz).
|
||||
2. THE Signal_Engine SHALL create an index on `(ticker, evaluated_at)` for efficient time-range queries per ticker.
|
||||
3. THE Signal_Engine SHALL create an index on `evaluated_at` for efficient global time-range queries.
|
||||
4. WHEN persisting fails due to a database error, THE Signal_Engine SHALL log the error and continue processing (persistence failure does not block signal emission to the trading queue).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 16: Backward Compatibility and Migration Path
|
||||
|
||||
**User Story:** As a platform operator, I want the dual-pipeline engine to coexist with the existing single-pipeline aggregation, so that the rollout is incremental and reversible.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN `dual_pipeline_enabled` is false, THE Signal_Engine SHALL not run, and the existing aggregation pipeline SHALL continue to operate unchanged.
|
||||
2. WHEN `dual_pipeline_enabled` is true, THE Signal_Engine SHALL run alongside the existing aggregation pipeline, with the trading engine consuming `SignalOutput` from the dual-pipeline engine instead of `Recommendation` from the existing recommendation worker.
|
||||
3. THE Signal_Engine SHALL reuse the existing `WeightedSignal`, `BayesianPosterior`, `RegimeClassification`, and `TrendSummary` data structures from `services/aggregation/` rather than duplicating them.
|
||||
4. THE Signal_Engine SHALL reuse the existing `compute_signal_weight`, `compute_bayesian_posterior`, and `classify_regime` functions rather than reimplementing the underlying math.
|
||||
5. THE Signal_Engine SHALL add the new `signal_engine_outputs` table via a new database migration without modifying existing tables.
|
||||
6. THE Signal_Engine SHALL support running in "shadow mode" where both the existing pipeline and the dual-pipeline engine run, but only the existing pipeline's output is forwarded to the trading engine (dual-pipeline output is persisted for comparison only).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 17: Property-Based Testing for Dual-Pipeline Correctness
|
||||
|
||||
**User Story:** As a developer, I want comprehensive property-based tests validating the mathematical correctness and structural invariants of the dual-pipeline engine, so that edge cases and numerical stability issues are caught before deployment.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE test suite SHALL include property-based tests for the Fibonacci retracement formula verifying that `L(r) = SH - r·(SH - SL)` produces values in [SL, SH] for all r in [0, 1] and all SH > SL > 0.
|
||||
2. THE test suite SHALL include property-based tests for the Bayesian log-odds update verifying that `logit(P_post) = logit(P_prior) + Σ log(LR_i)` round-trips correctly: converting P_prior to logit, adding log-LRs, and converting back via sigmoid produces a valid probability in (0, 1).
|
||||
3. THE test suite SHALL include property-based tests for the entropy gate verifying that Shannon entropy is maximized at P_up = 0.5 and equals 0.0 at P_up = 0.0 or P_up = 1.0, and is symmetric around 0.5.
|
||||
4. THE test suite SHALL include property-based tests for the signal correlation penalty verifying that the penalized posterior is always less than or equal to the unpenalized posterior for any signal set with correlated signals.
|
||||
5. THE test suite SHALL include property-based tests for the multi-timeframe confluence score verifying monotonicity: activating a signal on an additional timeframe with non-zero weight always increases or maintains the confluence score.
|
||||
6. THE test suite SHALL include property-based tests for the `SignalOutput` contract verifying round-trip serialization: `SignalOutput.model_validate_json(output.model_dump_json())` produces an equivalent object for all valid outputs.
|
||||
7. THE test suite SHALL include property-based tests for the hard filter engine verifying that macro_bias = -1.0 always produces SKIP, valuation_score < 0.3 always produces SKIP, and earnings_proximity_days <= 5 always produces SKIP, regardless of all other input values.
|
||||
8. THE test suite SHALL include property-based tests for the EV_R calculation verifying that `EV_R = P_up · E[win_R] - (1 - P_up) · 1.0` is monotonically increasing with P_up for fixed E[win_R] > 0.
|
||||
@@ -0,0 +1,345 @@
|
||||
# Implementation Plan: Dual-Pipeline Signal Engine
|
||||
|
||||
## Overview
|
||||
|
||||
Implement the dual-pipeline signal engine as a new service at `services/signal_engine/` that runs as an independent Kubernetes deployment. The engine evaluates both a heuristic (deterministic scoring) and probabilistic (Bayesian inference) pipeline concurrently per ticker per evaluation tick, producing independent BUY/WATCH/SKIP verdicts. Implementation proceeds incrementally: infrastructure first, then core models, signal library, pipelines, orchestration, integration, and deployment.
|
||||
|
||||
## Tasks
|
||||
|
||||
- [x] 1. Project scaffolding, configuration, and data models
|
||||
- [x] 1.1 Create service directory structure and `__init__.py` files
|
||||
- Create `services/signal_engine/` with all subdirectories per the design module structure
|
||||
- Create `services/signal_engine/__init__.py`, `services/signal_engine/signals/__init__.py`
|
||||
- _Requirements: 11.1, 13.1_
|
||||
|
||||
- [x] 1.2 Implement `models.py` — all Pydantic data models
|
||||
- Define `OHLCVBar`, `NormalizedInput`, `OpenPositionState`, `SignalResult`, `SignalDirection`
|
||||
- Define `ConfluenceSignal`, `Verdict`, `HeuristicResult`, `LikelihoodRatio`, `ProbabilisticResult`
|
||||
- Define `DeltaResult`, `ExitSignal`, `ExitType`, `TradePlan`, `SignalOutput`
|
||||
- All models must use Pydantic `BaseModel` with proper field constraints (`ge`, `le`)
|
||||
- _Requirements: 1.1, 2.7, 5.7, 6.9, 9.5, 10.1, 10.5_
|
||||
|
||||
- [x] 1.3 Implement `config.py` — `SignalEngineConfig` and sub-configs
|
||||
- Define `SignalEngineConfig` dataclass with all fields from the design
|
||||
- Define `HardFilterConfig`, `HeuristicConfig`, `ProbabilisticConfig`, `ExitConfig` as derived sub-configs
|
||||
- Implement `load_config()` that reads from `risk_configs` table + environment variables
|
||||
- Default `dual_pipeline_enabled` to `False` (fail-safe)
|
||||
- _Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7_
|
||||
|
||||
- [x] 1.4 Add `QUEUE_SIGNAL_ENGINE` to `services/shared/redis_keys.py`
|
||||
- Add `QUEUE_SIGNAL_ENGINE = "signal_engine"` constant
|
||||
- _Requirements: 11.1_
|
||||
|
||||
- [x] 1.5 Write property test for `SignalOutput` round-trip serialization
|
||||
- **Requirement 17.6: SignalOutput round-trip serialization**
|
||||
- Generate arbitrary valid `SignalOutput` instances with Hypothesis
|
||||
- Verify `SignalOutput.model_validate_json(output.model_dump_json())` produces equivalent object
|
||||
- File: `tests/test_pbt_signal_engine_models.py`
|
||||
- _Requirements: 10.5, 17.6_
|
||||
|
||||
- [x] 2. Input Normalizer and Hard Filter Engine
|
||||
- [x] 2.1 Implement `normalizer.py` — Input Normalizer
|
||||
- Implement `normalize_input(pool, ticker, config) -> NormalizedInput`
|
||||
- Fetch OHLCV bars from `market_data_bars` for M30, H1, H4, D, W, M timeframes
|
||||
- Fetch fundamental metrics (valuation_score, earnings_proximity_days) from company/trend data
|
||||
- Fetch macro context (macro_bias) from `macro_impact_records` and `global_events`
|
||||
- Fetch open position state from trading engine portfolio tables
|
||||
- Populate sentinel values (`None`, empty list) for unavailable data with logged warnings
|
||||
- Validate monotonically increasing timestamps within each timeframe series
|
||||
- _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5_
|
||||
|
||||
- [x] 2.2 Implement `hard_filter.py` — Hard Filter Engine
|
||||
- Implement `evaluate_hard_filters(normalized, config) -> HardFilterResult`
|
||||
- Check `macro_bias == -1.0` → SKIP with reason "macro_bias_negative"
|
||||
- Check `valuation_score < 0.3` → SKIP with reason "valuation_below_threshold"
|
||||
- Check `earnings_proximity_days <= 5` → SKIP with reason "earnings_block"
|
||||
- Record all triggered filter reasons (not just first)
|
||||
- Return `HardFilterResult` with `filtered: bool` and `reasons: list[str]`
|
||||
- _Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6_
|
||||
|
||||
- [x] 2.3 Write property tests for hard filter engine
|
||||
- **Requirement 17.7: Hard filter determinism**
|
||||
- Generate arbitrary `NormalizedInput` with `macro_bias = -1.0` → always SKIP
|
||||
- Generate arbitrary `NormalizedInput` with `valuation_score < 0.3` → always SKIP
|
||||
- Generate arbitrary `NormalizedInput` with `earnings_proximity_days <= 5` → always SKIP
|
||||
- Verify these hold regardless of all other input values
|
||||
- File: `tests/test_pbt_signal_engine_hard_filter.py`
|
||||
- _Requirements: 4.1, 4.2, 4.3, 17.7_
|
||||
|
||||
- [x] 3. Checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 4. Signal Library — Technical Signal Evaluators
|
||||
- [x] 4.1 Implement `signals/base.py` — SignalEvaluator protocol
|
||||
- Define `SignalEvaluator` protocol with `evaluate(bars, timeframe) -> SignalResult | None`
|
||||
- Define common helper functions for swing high/low detection, lookback validation
|
||||
- _Requirements: 2.6, 2.7_
|
||||
|
||||
- [x] 4.2 Implement `signals/fibonacci.py` — Fibonacci retracement evaluator
|
||||
- Implement `L(r) = SH - r·(SH - SL)` for ratios [0.236, 0.382, 0.5, 0.618, 0.786]
|
||||
- Detect swing high and swing low within the evaluation window
|
||||
- Produce signal strength based on proximity of current price to retracement levels
|
||||
- Return `None` with reason code when insufficient data
|
||||
- _Requirements: 2.1, 2.6, 2.7_
|
||||
|
||||
- [x] 4.3 Write property test for Fibonacci retracement formula
|
||||
- **Requirement 17.1: Fibonacci retracement bounds**
|
||||
- For all `r` in [0, 1] and all `SH > SL > 0`, verify `L(r)` is in [SL, SH]
|
||||
- File: `tests/test_pbt_signal_engine_fibonacci.py`
|
||||
- _Requirements: 2.1, 17.1_
|
||||
|
||||
- [x] 4.4 Implement `signals/ma_stack.py` — Moving average stack evaluator
|
||||
- Detect bullish alignment (MA_10 > MA_20 > MA_50 > MA_200)
|
||||
- Detect bearish alignment (MA_10 < MA_20 < MA_50 < MA_200)
|
||||
- Produce signal strength proportional to degree of alignment
|
||||
- Return `None` when insufficient bars for MA_200 calculation
|
||||
- _Requirements: 2.2, 2.6, 2.7_
|
||||
|
||||
- [x] 4.5 Implement `signals/rsi.py` — RSI evaluator
|
||||
- Implement standard 14-period RSI formula
|
||||
- Produce overbought signals (RSI > 70) and oversold signals (RSI < 30)
|
||||
- Scale strength by distance from threshold
|
||||
- Return `None` when fewer than 14 bars available
|
||||
- _Requirements: 2.3, 2.6, 2.7_
|
||||
|
||||
- [x] 4.6 Implement `signals/cup_handle.py` — Cup & Handle pattern detector
|
||||
- Identify cup formation (U-shaped price recovery) and handle (small consolidation)
|
||||
- Produce signal with confidence proportional to pattern completeness
|
||||
- Return `None` when insufficient data or no pattern detected
|
||||
- _Requirements: 2.4, 2.6, 2.7_
|
||||
|
||||
- [x] 4.7 Implement `signals/elliott_wave.py` — Elliott Wave detector
|
||||
- Identify impulse waves (5-wave structure) and corrective waves (3-wave structure)
|
||||
- Produce signal with current wave position and projected direction
|
||||
- Return `None` when insufficient data or ambiguous wave count
|
||||
- _Requirements: 2.5, 2.6, 2.7_
|
||||
|
||||
- [x] 5. Multi-Timeframe Confluence Engine
|
||||
- [x] 5.1 Implement `confluence.py` — Multi-Timeframe Engine
|
||||
- Implement `compute_confluence(signal_results, weights) -> list[ConfluenceSignal]`
|
||||
- Compute weighted confluence score: `C_confluence = Σ(w_tf · s_tf)`
|
||||
- Apply minimum confluence threshold: discard signals triggering on < 2 timeframes
|
||||
- Apply higher-timeframe anchor: discard signals without at least one of D, W, or M
|
||||
- Return `ConfluenceSignal` objects with active timeframes and per-timeframe strengths
|
||||
- _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6_
|
||||
|
||||
- [x] 5.2 Write property test for confluence score monotonicity
|
||||
- **Requirement 17.5: Confluence score monotonicity**
|
||||
- Verify that activating a signal on an additional timeframe with non-zero weight always increases or maintains the confluence score
|
||||
- File: `tests/test_pbt_signal_engine_confluence.py`
|
||||
- _Requirements: 3.6, 17.5_
|
||||
|
||||
- [x] 6. Checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 7. Heuristic Pipeline (Pipeline A)
|
||||
- [x] 7.1 Implement `heuristic.py` — Heuristic Pipeline
|
||||
- Implement `run_heuristic_pipeline(normalized, confluence_signals, config) -> HeuristicResult`
|
||||
- Compute `S_total = S_company + S_macro + S_competitive` using existing `compute_signal_weight()`
|
||||
- Compute confidence from source count, extraction confidence, signal agreement, contradiction penalty
|
||||
- BUY verdict: confidence >= 0.70 AND S_total >= 1.2 AND valuation_score >= 0.5 AND macro_bias > 0 AND earnings_proximity_days > 5
|
||||
- WATCH verdict: confidence >= 0.55 AND BUY conditions not fully met
|
||||
- SKIP verdict: confidence < 0.55
|
||||
- Emit `HeuristicResult` with all required fields and reasoning
|
||||
- _Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7_
|
||||
|
||||
- [x] 7.2 Write unit tests for heuristic pipeline verdict logic
|
||||
- Test BUY threshold conditions
|
||||
- Test WATCH threshold conditions
|
||||
- Test SKIP conditions
|
||||
- Test edge cases at threshold boundaries
|
||||
- File: `tests/test_signal_engine_heuristic.py`
|
||||
- _Requirements: 5.4, 5.5, 5.6_
|
||||
|
||||
- [x] 8. Probabilistic Pipeline (Pipeline B) and Correlation Penalty
|
||||
- [x] 8.1 Implement `correlation.py` — Signal cluster classification and penalty
|
||||
- Define `SignalCluster` enum: MOMENTUM, STRUCTURE, VOLATILITY, FUNDAMENTALS
|
||||
- Implement `classify_signal(signal_type) -> SignalCluster`
|
||||
- Implement `apply_correlation_penalty(likelihood_ratios) -> list[LikelihoodRatio]`
|
||||
- Within-cluster decay: strongest LR at full weight, subsequent at 0.5^(n-1)
|
||||
- No penalty across different clusters
|
||||
- Single-signal clusters receive no penalty
|
||||
- _Requirements: 7.1, 7.2, 7.3, 7.4_
|
||||
|
||||
- [x] 8.2 Implement `probabilistic.py` — Probabilistic Pipeline
|
||||
- Implement `run_probabilistic_pipeline(normalized, confluence_signals, regime, config) -> ProbabilisticResult`
|
||||
- Initialize regime-based prior: bull=0.58, range=0.50, bear=0.42
|
||||
- Compute likelihood ratios: `P(sig|up) = h·s + (1-h)·(1-s)·0.5`, `LR = P(sig|up) / P(sig|down)`
|
||||
- Apply correlation penalty via `apply_correlation_penalty()`
|
||||
- Accumulate via log-odds: `logit(P_post) = logit(P_prior) + Σ log(LR_i)`
|
||||
- Compute Shannon entropy and apply entropy gating (H > 0.95 → SKIP)
|
||||
- Compute `EV_R = P_up · E[win_R] - (1 - P_up) · 1.0`
|
||||
- BUY: P_up >= 0.60 AND entropy <= 0.90 AND EV_R >= 1.5 AND macro_bias > 0 AND valuation_score >= 0.5
|
||||
- WATCH: P_up >= 0.55 AND entropy <= 0.95 AND BUY conditions not fully met
|
||||
- SKIP: all other cases
|
||||
- Use existing `classify_regime()` from `services/aggregation/regime.py`
|
||||
- _Requirements: 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 14.1, 14.2, 14.3, 14.4, 14.5_
|
||||
|
||||
- [x] 8.3 Write property test for Bayesian log-odds round-trip
|
||||
- **Requirement 17.2: Bayesian log-odds update correctness**
|
||||
- Verify `logit(P_post) = logit(P_prior) + Σ log(LR_i)` round-trips correctly
|
||||
- Converting P_prior to logit, adding log-LRs, converting back via sigmoid produces valid probability in (0, 1)
|
||||
- File: `tests/test_pbt_signal_engine_bayesian.py`
|
||||
- _Requirements: 6.3, 17.2_
|
||||
|
||||
- [x] 8.4 Write property test for entropy gate
|
||||
- **Requirement 17.3: Entropy gate properties**
|
||||
- Verify Shannon entropy is maximized at P_up = 0.5
|
||||
- Verify entropy equals 0.0 at P_up = 0.0 or P_up = 1.0
|
||||
- Verify entropy is symmetric around 0.5
|
||||
- File: `tests/test_pbt_signal_engine_bayesian.py`
|
||||
- _Requirements: 6.4, 17.3_
|
||||
|
||||
- [x] 8.5 Write property test for signal correlation penalty
|
||||
- **Requirement 17.4: Correlation penalty reduces confidence**
|
||||
- Verify penalized posterior is always <= unpenalized posterior for any signal set with correlated signals
|
||||
- File: `tests/test_pbt_signal_engine_correlation.py`
|
||||
- _Requirements: 7.5, 17.4_
|
||||
|
||||
- [x] 8.6 Write property test for EV_R monotonicity
|
||||
- **Requirement 17.8: EV_R monotonically increasing with P_up**
|
||||
- Verify `EV_R = P_up · E[win_R] - (1 - P_up) · 1.0` is monotonically increasing with P_up for fixed E[win_R] > 0
|
||||
- File: `tests/test_pbt_signal_engine_bayesian.py`
|
||||
- _Requirements: 6.5, 17.8_
|
||||
|
||||
- [x] 9. Checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 10. Exit Engine
|
||||
- [x] 10.1 Implement `exit_engine.py` — Exit Engine
|
||||
- Implement `evaluate_exits(positions, current_prices, config) -> list[ExitSignal]`
|
||||
- Check stop_loss hit → EXIT_FULL with reason "stop_hit"
|
||||
- Check target_1 hit → EXIT_HALF with reason "target_1_hit"
|
||||
- Check target_2 hit → EXIT_FULL with reason "target_2_hit"
|
||||
- Trailing stop: activate after EXIT_HALF at `current_price - ATR · trailing_multiplier`
|
||||
- Trailing stop ratchets upward only (never moves down)
|
||||
- Trailing stop hit → EXIT_FULL with reason "trailing_stop_hit"
|
||||
- _Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7_
|
||||
|
||||
- [x] 10.2 Write unit tests for exit engine
|
||||
- Test stop_loss trigger
|
||||
- Test target_1 partial exit
|
||||
- Test target_2 full exit
|
||||
- Test trailing stop activation and ratchet behavior
|
||||
- File: `tests/test_signal_engine_exit.py`
|
||||
- _Requirements: 8.1, 8.2, 8.3, 8.4, 8.5_
|
||||
|
||||
- [x] 11. Delta Analyzer and Output Formatter
|
||||
- [x] 11.1 Implement `delta.py` — Delta Analyzer
|
||||
- Implement `analyze_delta(heuristic, probabilistic, redis, ticker) -> DeltaResult`
|
||||
- Compute agreement flag (both verdicts identical)
|
||||
- Compute confidence delta: `|heuristic_confidence - probabilistic_P_up|`
|
||||
- Record disagreement reasons when verdicts differ
|
||||
- Track rolling 100-evaluation agreement rate in Redis
|
||||
- Log warning when agreement rate drops below 0.50
|
||||
- _Requirements: 9.1, 9.2, 9.3, 9.4, 9.5, 9.6_
|
||||
|
||||
- [x] 11.2 Implement `formatter.py` — Output Formatter
|
||||
- Implement `format_output(ticker, price, heuristic, probabilistic, delta, exit_signals, config) -> SignalOutput`
|
||||
- Both BUY → `dual_confirmed`, full position sizing
|
||||
- Probabilistic-only BUY → `probabilistic_only`, 50% position sizing
|
||||
- Heuristic-only BUY → standard position sizing
|
||||
- No BUY → no trade_plan (WATCH/SKIP persisted for analysis)
|
||||
- Implement `signal_output_to_recommendation(output) -> Recommendation`
|
||||
- Map `SignalOutput` to existing `Recommendation` schema for trading engine compatibility
|
||||
- Dual confirmed: confidence = max(heuristic_confidence, probabilistic_P_up)
|
||||
- Probabilistic only: confidence = probabilistic_P_up · 0.8 (20% haircut)
|
||||
- _Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 12.1, 12.2, 12.3, 12.4, 12.5_
|
||||
|
||||
- [x] 11.3 Write unit tests for output formatter
|
||||
- Test dual_confirmed trade plan generation
|
||||
- Test probabilistic_only trade plan with 50% sizing
|
||||
- Test heuristic-only trade plan
|
||||
- Test no-BUY case (no trade_plan)
|
||||
- Test `signal_output_to_recommendation` mapping
|
||||
- File: `tests/test_signal_engine_formatter.py`
|
||||
- _Requirements: 10.2, 10.3, 10.4, 12.3, 12.4_
|
||||
|
||||
- [x] 12. Orchestrator, Persistence, and Main Entry Point
|
||||
- [x] 12.1 Implement `persistence.py` — Database persistence
|
||||
- Implement `persist_signal_output(pool, output) -> None`
|
||||
- Insert into `signal_engine_outputs` table
|
||||
- Log and continue on database errors (non-blocking)
|
||||
- _Requirements: 15.1, 15.4_
|
||||
|
||||
- [x] 12.2 Implement `worker.py` — Top-level orchestrator
|
||||
- Implement `evaluate_tick(pool, redis, ticker, config) -> SignalOutput | None`
|
||||
- Step 1: Normalize inputs (single fetch, shared reference)
|
||||
- Step 2: Evaluate exit conditions for open positions
|
||||
- Step 3: Run hard filters (short-circuit if filtered)
|
||||
- Step 4: Evaluate signals across timeframes via Signal Library
|
||||
- Step 5: Compute confluence
|
||||
- Step 6: Classify regime via existing `classify_regime()`
|
||||
- Step 7: Run both pipelines concurrently via `asyncio.gather` with exception handling
|
||||
- Step 8: Compute delta analysis
|
||||
- Step 9: Format output
|
||||
- Step 10: Persist to database and publish to Redis queue
|
||||
- Catch pipeline exceptions → SKIP verdict for failed pipeline, other continues
|
||||
- Measure and log wall-clock execution time per pipeline
|
||||
- _Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6_
|
||||
|
||||
- [x] 12.3 Implement `main.py` — Entry point with asyncio event loop
|
||||
- Connect to PostgreSQL (asyncpg pool) and Redis (redis.asyncio)
|
||||
- Load config from `risk_configs` table
|
||||
- Log active configuration at startup
|
||||
- Poll `stonks:queue:signal_engine` queue indefinitely
|
||||
- Check `dual_pipeline_enabled` flag; if disabled, sleep and retry
|
||||
- On config read failure, default to disabled (fail-safe)
|
||||
- Support shadow mode (persist but don't forward to trading queue)
|
||||
- _Requirements: 13.1, 13.6, 13.7, 16.1, 16.6_
|
||||
|
||||
- [x] 12.4 Write integration tests for worker orchestration
|
||||
- Test full tick evaluation with mocked DB/Redis
|
||||
- Test pipeline failure isolation (one fails, other completes)
|
||||
- Test hard filter short-circuit
|
||||
- Test shadow mode behavior
|
||||
- File: `tests/test_signal_engine_worker.py`
|
||||
- _Requirements: 11.3, 16.6_
|
||||
|
||||
- [x] 13. Checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 14. Database migration and infrastructure
|
||||
- [x] 14.1 Create database migration `infra/migrations/039_signal_engine_outputs.sql`
|
||||
- Create `signal_engine_outputs` table per the design schema
|
||||
- Create index on `(ticker, evaluated_at)` for per-ticker time-range queries
|
||||
- Create index on `evaluated_at` for global time-range queries
|
||||
- Create index on `(heuristic_verdict, probabilistic_verdict)` for verdict filtering
|
||||
- _Requirements: 15.1, 15.2, 15.3_
|
||||
|
||||
- [x] 14.2 Add signal engine service to Helm chart
|
||||
- Add `signalEngine` entry to `infra/helm/stonks-oracle/values.yaml`
|
||||
- Configure: replicas=1, command=`python -m services.signal_engine.main`, tier=processing
|
||||
- Set resource requests/limits per design (100m/128Mi → 500m/256Mi)
|
||||
- Reference existing secrets: `stonks-core-secrets`, `stonks-market-secrets`
|
||||
- _Requirements: 11.1, 13.1_
|
||||
|
||||
- [x] 15. Trading engine integration and backward compatibility
|
||||
- [x] 15.1 Wire signal engine output to trading engine queue
|
||||
- Publish `SignalOutput` (mapped to `Recommendation`) to `stonks:queue:trading_decisions`
|
||||
- Only publish when at least one pipeline produces BUY verdict
|
||||
- WATCH/SKIP verdicts persisted for analysis but not forwarded
|
||||
- Ensure trading engine can consume without modification via `signal_output_to_recommendation()`
|
||||
- _Requirements: 12.1, 12.2, 12.5, 16.2_
|
||||
|
||||
- [x] 15.2 Ensure backward compatibility with existing pipeline
|
||||
- Verify `dual_pipeline_enabled=false` means signal engine does not run
|
||||
- Verify existing aggregation pipeline operates unchanged when flag is off
|
||||
- Reuse existing `WeightedSignal`, `BayesianPosterior`, `RegimeClassification` (import, don't duplicate)
|
||||
- Reuse existing `compute_signal_weight`, `compute_bayesian_posterior`, `classify_regime` functions
|
||||
- No modifications to existing tables (new migration only adds new table)
|
||||
- _Requirements: 16.1, 16.2, 16.3, 16.4, 16.5_
|
||||
|
||||
- [x] 16. Final checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
## Notes
|
||||
|
||||
- Tasks marked with `*` are optional and can be skipped for faster MVP
|
||||
- Each task references specific requirements for traceability
|
||||
- Checkpoints ensure incremental validation between major phases
|
||||
- Property-based tests use Hypothesis with `@settings(max_examples=100)` per project conventions
|
||||
- PBT test files are prefixed `test_pbt_*` per project conventions
|
||||
- The service reuses existing math functions from `services/aggregation/` — no reimplementation
|
||||
- All configuration is loaded from `risk_configs` table with fail-safe defaults
|
||||
- Shadow mode allows running alongside existing pipeline without affecting trading decisions
|
||||
@@ -0,0 +1 @@
|
||||
{"specId": "b595d834-7e72-4fab-87a9-65c92115a069", "workflowType": "requirements-first", "specType": "feature"}
|
||||
@@ -0,0 +1,975 @@
|
||||
# Design Document — Model Validation, Calibration, and Signal Quality
|
||||
|
||||
## Overview
|
||||
|
||||
This design adds a closed-loop model validation layer to Stonks Oracle. The system currently generates trend summaries and trading recommendations with confidence scores, but has no mechanism to evaluate whether those predictions are accurate, whether confidence scores are well-calibrated, which sources contribute to correct predictions, or whether the system outperforms simple benchmarks.
|
||||
|
||||
The validation layer introduces six new service modules under `services/validation/`, a quality gate in `services/trading/`, seven new API endpoints under `/api/validation/`, a database migration (035) with four new tables and two SQL views, and an upgraded OpsModel dashboard page. The architecture follows the existing patterns: pure computation modules with asyncpg for persistence, FastAPI endpoints in `services/api/app.py`, and React/TanStack Query hooks on the frontend.
|
||||
|
||||
### Design Rationale
|
||||
|
||||
A prediction engine without outcome tracking is flying blind. The validation layer closes the feedback loop by:
|
||||
|
||||
1. **Capturing immutable snapshots** at prediction time — preventing hindsight bias in evaluation
|
||||
2. **Evaluating outcomes** across multiple horizons (1h, 6h, 1d, 7d, 30d) — matching the system's multi-window trend architecture
|
||||
3. **Computing calibration metrics** (ECE, Brier score) — measuring whether confidence scores mean what they claim
|
||||
4. **Tracking information coefficients** (IC, Rank IC) — measuring linear and ordinal predictive power
|
||||
5. **Attributing performance** to sources, catalysts, and signal layers — identifying the most valuable information channels
|
||||
6. **Recalibrating confidence** via Bayesian shrinkage — learning from the system's own track record
|
||||
7. **Gating live trading** on minimum quality thresholds — preventing real capital risk on a poorly performing model
|
||||
|
||||
The design reuses existing infrastructure (asyncpg, FastAPI, TanStack Query, Recharts) and integrates with the existing `source_accuracy` table from the signal-math-upgrade spec.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### High-Level Data Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
subgraph "Prediction Capture (Real-time)"
|
||||
A[Recommendation Engine] -->|generates| B[Prediction_Snapshot_Writer]
|
||||
B --> C[prediction_snapshots table]
|
||||
B --> D[signal_evidence_links table]
|
||||
B -->|computes| E[canonical_evidence_key<br/>duplicate detection<br/>contribution scores]
|
||||
end
|
||||
|
||||
subgraph "Outcome Evaluation (Periodic)"
|
||||
F[Outcome_Evaluator<br/>scheduled job] -->|reads matured snapshots| C
|
||||
F -->|fetches future prices| G[market_snapshots table]
|
||||
F -->|computes returns| H[prediction_outcomes table]
|
||||
F -->|evaluates 5 horizons| H
|
||||
end
|
||||
|
||||
subgraph "Metrics Computation (Periodic)"
|
||||
I[Metrics_Engine] -->|reads| H
|
||||
I -->|reads| C
|
||||
I -->|reads| D
|
||||
I -->|computes| J[model_metric_snapshots table]
|
||||
I -->|computes| K[Calibration: ECE, Brier]
|
||||
I -->|computes| L[IC, Rank IC by horizon]
|
||||
I -->|computes| M[Benchmark: excess returns]
|
||||
end
|
||||
|
||||
subgraph "Attribution (Periodic)"
|
||||
N[Attribution_Engine] -->|joins| D
|
||||
N -->|joins| H
|
||||
N -->|computes| O[Per-source metrics]
|
||||
N -->|computes| P[Per-catalyst metrics]
|
||||
N -->|computes| Q[Per-layer metrics]
|
||||
end
|
||||
|
||||
subgraph "Calibration (Periodic)"
|
||||
R[Calibration_Engine] -->|reads| H
|
||||
R -->|reads| D
|
||||
R -->|computes Bayesian shrinkage| S[source_accuracy table<br/>reliability scores]
|
||||
end
|
||||
|
||||
subgraph "Safety Gate (Per-cycle)"
|
||||
T[Quality_Gate] -->|reads latest| J
|
||||
T -->|evaluates thresholds| U{Pass?}
|
||||
U -->|yes| V[Live trading allowed]
|
||||
U -->|no| W[Force paper mode]
|
||||
T -->|stores result| X[risk_configs table<br/>model_quality_gate key]
|
||||
end
|
||||
|
||||
subgraph "Dashboard (Frontend)"
|
||||
Y[Dashboard_API<br/>7 endpoints] -->|reads| J
|
||||
Y -->|reads| C
|
||||
Y -->|reads| H
|
||||
Y -->|reads| D
|
||||
Z[OpsModel.tsx<br/>upgraded page] -->|fetches| Y
|
||||
end
|
||||
|
||||
subgraph "Backtest Integration"
|
||||
AA[BacktestReplay] -->|validation mode| B
|
||||
AA -->|validation mode| F
|
||||
AA -->|triggers| I
|
||||
end
|
||||
```
|
||||
|
||||
### Scheduling Strategy
|
||||
|
||||
The validation components run on different cadences:
|
||||
|
||||
| Component | Trigger | Cadence |
|
||||
|-----------|---------|---------|
|
||||
| Prediction_Snapshot_Writer | Synchronous — called by recommendation engine | Every recommendation |
|
||||
| Outcome_Evaluator | Scheduled job | Every 1 hour |
|
||||
| Metrics_Engine | After Outcome_Evaluator completes | Every 1 hour |
|
||||
| Attribution_Engine | Called by Metrics_Engine | Every 1 hour |
|
||||
| Calibration_Engine | After Metrics_Engine completes | Every 6 hours |
|
||||
| Quality_Gate | Start of each aggregation cycle | Every aggregation cycle |
|
||||
|
||||
### Sector ETF Mapping
|
||||
|
||||
The system needs a mapping from company sectors to sector ETFs for benchmark comparison. This is stored as a configuration constant:
|
||||
|
||||
```python
|
||||
SECTOR_ETF_MAP: dict[str, str] = {
|
||||
"Technology": "XLK",
|
||||
"Consumer Cyclical": "XLY",
|
||||
"Financial Services": "XLF",
|
||||
"Healthcare": "XLV",
|
||||
"Energy": "XLE",
|
||||
"Communication Services": "XLC",
|
||||
"Industrials": "XLI",
|
||||
"Consumer Defensive": "XLP",
|
||||
"Real Estate": "XLRE",
|
||||
"Utilities": "XLU",
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Components and Interfaces
|
||||
|
||||
### New Modules
|
||||
|
||||
| Module | File | Responsibility |
|
||||
|--------|------|----------------|
|
||||
| Prediction Snapshot Writer | `services/validation/prediction_snapshot.py` | Captures immutable prediction state at generation time |
|
||||
| Outcome Evaluator | `services/validation/outcome_evaluator.py` | Matches predictions with realized market outcomes |
|
||||
| Metrics Engine | `services/validation/metrics.py` | Computes calibration, IC, Brier, benchmark metrics |
|
||||
| Attribution Engine | `services/validation/attribution.py` | Per-source, per-catalyst, per-layer performance |
|
||||
| Calibration Engine | `services/validation/calibration.py` | Bayesian shrinkage source reliability, weight adjustment |
|
||||
| Quality Gate | `services/trading/model_quality_gate.py` | Safety gate for live trading eligibility |
|
||||
|
||||
### Modified Modules
|
||||
|
||||
| Module | File | Changes |
|
||||
|--------|------|---------|
|
||||
| Query API | `services/api/app.py` | 7 new `/api/validation/*` endpoints |
|
||||
| Aggregation Worker | `services/aggregation/worker.py` | Call Quality_Gate at cycle start |
|
||||
| Recommendation Engine | `services/recommendation/eligibility.py` | Call Prediction_Snapshot_Writer after recommendation |
|
||||
| Backtest Replay | `services/trading/backtest_replay.py` | Validation mode support |
|
||||
| Frontend Hooks | `frontend/src/api/hooks.ts` | 7 new validation hooks |
|
||||
| OpsModel Page | `frontend/src/pages/OpsModel.tsx` | Full dashboard upgrade |
|
||||
| AppLayout | `frontend/src/components/AppLayout.tsx` | Nav item update (if needed) |
|
||||
|
||||
### Component Interface Details
|
||||
|
||||
#### 1. Prediction Snapshot Writer (`services/validation/prediction_snapshot.py`)
|
||||
|
||||
```python
|
||||
SECTOR_ETF_MAP: dict[str, str] = {
|
||||
"Technology": "XLK",
|
||||
"Consumer Cyclical": "XLY",
|
||||
"Financial Services": "XLF",
|
||||
"Healthcare": "XLV",
|
||||
"Energy": "XLE",
|
||||
"Communication Services": "XLC",
|
||||
"Industrials": "XLI",
|
||||
"Consumer Defensive": "XLP",
|
||||
"Real Estate": "XLRE",
|
||||
"Utilities": "XLU",
|
||||
}
|
||||
|
||||
EVALUATION_HORIZONS: list[str] = ["1h", "6h", "1d", "7d", "30d"]
|
||||
|
||||
MAX_SINGLE_DOCUMENT_WEIGHT: float = 1.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class PredictionSnapshot:
|
||||
"""Immutable snapshot of a prediction at generation time."""
|
||||
id: str # UUID
|
||||
generated_at: datetime
|
||||
ticker: str
|
||||
window: str
|
||||
horizon: str
|
||||
direction: str # bullish/bearish/mixed/neutral
|
||||
action: str # buy/sell/hold/watch
|
||||
mode: str # informational/paper_eligible/live_eligible
|
||||
strength: float
|
||||
confidence: float
|
||||
contradiction: float
|
||||
p_bull: float | None
|
||||
p_bear: float | None
|
||||
score_company: float
|
||||
score_macro: float
|
||||
score_competitive: float
|
||||
evidence_count: int
|
||||
unique_source_count: int
|
||||
duplicate_evidence_count: int
|
||||
price_at_prediction: float | None
|
||||
spy_price_at_prediction: float | None
|
||||
sector_etf_price_at_prediction: float | None
|
||||
metadata: dict
|
||||
|
||||
|
||||
@dataclass
|
||||
class SignalEvidenceLink:
|
||||
"""Link between a prediction and a contributing evidence document."""
|
||||
id: str # UUID
|
||||
prediction_id: str
|
||||
document_id: str
|
||||
signal_id: str
|
||||
ticker: str
|
||||
source: str
|
||||
source_type: str
|
||||
catalyst_type: str
|
||||
sentiment: str
|
||||
impact: float
|
||||
extraction_confidence: float
|
||||
weight: float # clamped to MAX_SINGLE_DOCUMENT_WEIGHT
|
||||
is_duplicate: bool
|
||||
canonical_evidence_key: str
|
||||
contribution_score: float # weight / total_weight, sums to 1.0
|
||||
metadata: dict
|
||||
|
||||
|
||||
def compute_canonical_evidence_key(title: str, url: str) -> str:
|
||||
"""SHA256 of normalized(title) + normalized(url).
|
||||
|
||||
Normalization: lowercase, strip whitespace for title;
|
||||
lowercase, strip query params for URL.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def create_prediction_snapshot(
|
||||
pool: asyncpg.Pool,
|
||||
recommendation: Recommendation,
|
||||
trend_summary: TrendSummary,
|
||||
evidence_signals: list[WeightedSignal],
|
||||
evidence_docs: list[dict], # document metadata from recommendation_evidence
|
||||
) -> PredictionSnapshot:
|
||||
"""Create and persist a prediction snapshot with evidence links.
|
||||
|
||||
1. Fetches current prices (ticker, SPY, sector ETF) from market_snapshots
|
||||
2. Computes canonical evidence keys and duplicate detection
|
||||
3. Clamps individual document weights to MAX_SINGLE_DOCUMENT_WEIGHT
|
||||
4. Computes contribution scores (one-vote-per-canonical-key dedup)
|
||||
5. Persists snapshot and evidence links in a transaction
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def fetch_latest_close_price(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
) -> float | None:
|
||||
"""Fetch most recent close price from market_snapshots for a ticker."""
|
||||
...
|
||||
```
|
||||
|
||||
#### 2. Outcome Evaluator (`services/validation/outcome_evaluator.py`)
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class PredictionOutcome:
|
||||
"""Realized outcome for a prediction at a specific horizon."""
|
||||
id: str # UUID
|
||||
prediction_id: str
|
||||
evaluated_at: datetime
|
||||
horizon: str # 1h, 6h, 1d, 7d, 30d
|
||||
future_price: float
|
||||
future_return: float
|
||||
spy_future_price: float | None
|
||||
spy_return: float | None
|
||||
sector_etf_future_price: float | None
|
||||
sector_etf_return: float | None
|
||||
excess_return_vs_spy: float | None
|
||||
excess_return_vs_sector: float | None
|
||||
direction_correct: bool
|
||||
profitable: bool
|
||||
metadata: dict
|
||||
|
||||
|
||||
HORIZON_DURATIONS: dict[str, timedelta] = {
|
||||
"1h": timedelta(hours=1),
|
||||
"6h": timedelta(hours=6),
|
||||
"1d": timedelta(days=1),
|
||||
"7d": timedelta(days=7),
|
||||
"30d": timedelta(days=30),
|
||||
}
|
||||
|
||||
|
||||
async def evaluate_matured_predictions(
|
||||
pool: asyncpg.Pool,
|
||||
) -> int:
|
||||
"""Evaluate all matured prediction snapshots.
|
||||
|
||||
Finds snapshots where horizon has elapsed and outcome not yet recorded.
|
||||
For each, fetches future prices and computes returns.
|
||||
Skips horizons where future price is unavailable (retries next run).
|
||||
|
||||
Returns count of outcomes recorded.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def evaluate_single_prediction(
|
||||
pool: asyncpg.Pool,
|
||||
snapshot: PredictionSnapshot,
|
||||
horizon: str,
|
||||
) -> PredictionOutcome | None:
|
||||
"""Evaluate a single prediction at a specific horizon.
|
||||
|
||||
Returns None if future price is unavailable.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 3. Metrics Engine (`services/validation/metrics.py`)
|
||||
|
||||
```python
|
||||
CONFIDENCE_BUCKETS: list[tuple[float, float]] = [
|
||||
(0.50, 0.60),
|
||||
(0.60, 0.70),
|
||||
(0.70, 0.80),
|
||||
(0.80, 0.90),
|
||||
(0.90, 1.00),
|
||||
]
|
||||
|
||||
LOOKBACK_WINDOWS: list[str] = ["7d", "30d", "90d", "all"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CalibrationBucket:
|
||||
"""Calibration metrics for a single confidence bucket."""
|
||||
bucket_low: float
|
||||
bucket_high: float
|
||||
avg_confidence: float
|
||||
observed_win_rate: float
|
||||
prediction_count: int
|
||||
miscalibrated: bool # |avg_confidence - win_rate| > 0.15
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelMetricSnapshot:
|
||||
"""Aggregate model quality metrics for a lookback/horizon combination."""
|
||||
id: str
|
||||
generated_at: datetime
|
||||
lookback_window: str
|
||||
horizon: str
|
||||
prediction_count: int
|
||||
win_rate: float
|
||||
directional_accuracy: float
|
||||
information_coefficient: float | None
|
||||
rank_information_coefficient: float | None
|
||||
avg_return: float
|
||||
avg_excess_return_vs_spy: float
|
||||
avg_excess_return_vs_sector: float
|
||||
calibration_error: float # ECE
|
||||
brier_score: float
|
||||
buy_win_rate: float
|
||||
sell_win_rate: float
|
||||
hold_win_rate: float
|
||||
metadata: dict
|
||||
|
||||
|
||||
def compute_calibration_error(
|
||||
confidences: list[float],
|
||||
outcomes: list[bool],
|
||||
) -> tuple[float, list[CalibrationBucket]]:
|
||||
"""Compute ECE and calibration buckets.
|
||||
|
||||
ECE = Σ (n_b / N) * |avg_conf_b - win_rate_b|
|
||||
|
||||
Returns (ece, buckets).
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_brier_score(
|
||||
p_bulls: list[float],
|
||||
outcomes: list[bool],
|
||||
) -> float:
|
||||
"""Brier score = mean((p_bull - outcome)^2).
|
||||
|
||||
outcome is 1.0 when price moved in predicted direction, 0.0 otherwise.
|
||||
Returns value in [0.0, 1.0].
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_information_coefficient(
|
||||
scores: list[float],
|
||||
returns: list[float],
|
||||
) -> float | None:
|
||||
"""Pearson correlation between prediction scores and future returns.
|
||||
|
||||
Returns None when fewer than 30 data points.
|
||||
Returns value in [-1.0, 1.0].
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_rank_information_coefficient(
|
||||
scores: list[float],
|
||||
returns: list[float],
|
||||
) -> float | None:
|
||||
"""Spearman rank correlation between prediction scores and future returns.
|
||||
|
||||
Returns None when fewer than 30 data points.
|
||||
Returns value in [-1.0, 1.0].
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_contribution_scores(
|
||||
weights: list[float],
|
||||
) -> list[float]:
|
||||
"""Compute contribution scores from document weights.
|
||||
|
||||
Each score = weight_i / sum(weights). Sums to 1.0.
|
||||
Each score in [0.0, 1.0].
|
||||
Returns empty list for empty input.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def compute_and_store_metric_snapshots(
|
||||
pool: asyncpg.Pool,
|
||||
) -> list[ModelMetricSnapshot]:
|
||||
"""Compute metric snapshots for all lookback/horizon combinations.
|
||||
|
||||
Lookback windows: 7d, 30d, 90d, all-time.
|
||||
Horizons: 1h, 6h, 1d, 7d, 30d.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 4. Attribution Engine (`services/validation/attribution.py`)
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class SourceAttribution:
|
||||
"""Performance metrics for a single source."""
|
||||
source: str
|
||||
source_type: str
|
||||
prediction_count: int
|
||||
avg_weight: float
|
||||
avg_contribution_score: float
|
||||
win_rate: float
|
||||
avg_future_return: float
|
||||
avg_excess_return_vs_spy: float
|
||||
information_coefficient: float | None
|
||||
duplicate_rate: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalystAttribution:
|
||||
"""Performance metrics for a single catalyst type."""
|
||||
catalyst_type: str
|
||||
prediction_count: int
|
||||
win_rate: float
|
||||
avg_future_return: float
|
||||
avg_excess_return_vs_spy: float
|
||||
information_coefficient: float | None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LayerAttribution:
|
||||
"""Performance metrics for a signal layer."""
|
||||
layer: str # company, macro, competitive
|
||||
avg_contribution_pct: float
|
||||
dominant_win_rate: float # win rate when this layer > 30% contribution
|
||||
dominant_ic: float | None # IC when this layer > 30% contribution
|
||||
|
||||
|
||||
async def compute_source_attribution(
|
||||
pool: asyncpg.Pool,
|
||||
lookback_days: int = 30,
|
||||
horizon: str = "7d",
|
||||
) -> list[SourceAttribution]:
|
||||
...
|
||||
|
||||
|
||||
async def compute_catalyst_attribution(
|
||||
pool: asyncpg.Pool,
|
||||
lookback_days: int = 30,
|
||||
horizon: str = "7d",
|
||||
) -> list[CatalystAttribution]:
|
||||
...
|
||||
|
||||
|
||||
async def compute_layer_attribution(
|
||||
pool: asyncpg.Pool,
|
||||
lookback_days: int = 30,
|
||||
horizon: str = "7d",
|
||||
) -> list[LayerAttribution]:
|
||||
...
|
||||
```
|
||||
|
||||
#### 5. Calibration Engine (`services/validation/calibration.py`)
|
||||
|
||||
```python
|
||||
def compute_source_reliability(
|
||||
observed_win_rate: float,
|
||||
sample_count: int,
|
||||
prior_strength: int = 30,
|
||||
) -> float:
|
||||
"""Bayesian shrinkage source reliability.
|
||||
|
||||
reliability = 0.5 + (n / (n + prior_strength)) * (observed_win_rate - 0.5)
|
||||
|
||||
Returns value in [0.0, 1.0].
|
||||
When n=0, returns 0.5 (prior mean).
|
||||
As n→∞, approaches observed_win_rate.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_adjusted_evidence_weight(
|
||||
base_weight: float,
|
||||
reliability: float,
|
||||
) -> float:
|
||||
"""Adjusted weight = base_weight * (0.5 + reliability), clamped to [0.1, 2.0]."""
|
||||
...
|
||||
|
||||
|
||||
async def update_source_reliabilities(
|
||||
pool: asyncpg.Pool,
|
||||
) -> int:
|
||||
"""Recompute and store source reliability scores from latest outcomes.
|
||||
|
||||
Uses the existing source_accuracy table, updating accuracy_ratio
|
||||
with the Bayesian shrinkage formula.
|
||||
|
||||
Returns count of sources updated.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 6. Quality Gate (`services/trading/model_quality_gate.py`)
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class QualityGateConfig:
|
||||
"""Configurable thresholds for live trading eligibility."""
|
||||
min_prediction_count: int = 100
|
||||
min_ic: float = 0.03
|
||||
min_win_rate: float = 0.53
|
||||
max_ece: float = 0.15
|
||||
min_excess_return_vs_spy: float = 0.0
|
||||
max_snapshot_age_hours: int = 24
|
||||
|
||||
|
||||
@dataclass
|
||||
class GateThresholdResult:
|
||||
"""Result for a single threshold check."""
|
||||
name: str
|
||||
threshold: float
|
||||
actual: float
|
||||
passed: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class QualityGateResult:
|
||||
"""Full gate evaluation result."""
|
||||
passed: bool
|
||||
evaluated_at: datetime
|
||||
threshold_results: list[GateThresholdResult]
|
||||
reason: str # "all thresholds met" or "failed: ..."
|
||||
snapshot_id: str | None
|
||||
config: QualityGateConfig
|
||||
|
||||
|
||||
async def evaluate_quality_gate(
|
||||
pool: asyncpg.Pool,
|
||||
config: QualityGateConfig | None = None,
|
||||
) -> QualityGateResult:
|
||||
"""Evaluate model quality gate from latest metric snapshot.
|
||||
|
||||
Reads the most recent model_metric_snapshot for the 30d lookback
|
||||
and 7d horizon (the primary evaluation window).
|
||||
|
||||
If no snapshot exists or snapshot is stale (>24h), defaults to
|
||||
paper-only mode (fail-safe).
|
||||
|
||||
Stores result in risk_configs under 'model_quality_gate' key.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def load_gate_config_from_db(
|
||||
pool: asyncpg.Pool,
|
||||
) -> QualityGateConfig:
|
||||
"""Load gate thresholds from risk_configs, with defaults."""
|
||||
...
|
||||
```
|
||||
|
||||
#### 7. Dashboard API Endpoints
|
||||
|
||||
Seven new endpoints added to `services/api/app.py`:
|
||||
|
||||
| Endpoint | Method | Returns |
|
||||
|----------|--------|---------|
|
||||
| `/api/validation/summary` | GET | Latest model metric snapshot + gate status |
|
||||
| `/api/validation/calibration` | GET | Calibration table with buckets |
|
||||
| `/api/validation/ic-by-horizon` | GET | IC and Rank IC per horizon |
|
||||
| `/api/validation/attribution/sources` | GET | Per-source performance |
|
||||
| `/api/validation/attribution/catalysts` | GET | Per-catalyst performance |
|
||||
| `/api/validation/attribution/layers` | GET | Per-layer performance |
|
||||
| `/api/validation/gate-status` | GET | Quality gate evaluation detail |
|
||||
|
||||
All endpoints accept optional `lookback` (default "30d") and `horizon` (default "7d") query parameters.
|
||||
|
||||
---
|
||||
|
||||
## Data Models
|
||||
|
||||
### Database Schema (Migration 035)
|
||||
|
||||
#### prediction_snapshots
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS prediction_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
ticker VARCHAR(20) NOT NULL,
|
||||
window VARCHAR(20) NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
direction VARCHAR(20) NOT NULL,
|
||||
action VARCHAR(20) NOT NULL,
|
||||
mode VARCHAR(30) NOT NULL,
|
||||
strength FLOAT NOT NULL,
|
||||
confidence FLOAT NOT NULL,
|
||||
contradiction FLOAT NOT NULL DEFAULT 0.0,
|
||||
p_bull FLOAT,
|
||||
p_bear FLOAT,
|
||||
score_company FLOAT NOT NULL DEFAULT 0.0,
|
||||
score_macro FLOAT NOT NULL DEFAULT 0.0,
|
||||
score_competitive FLOAT NOT NULL DEFAULT 0.0,
|
||||
evidence_count INTEGER NOT NULL DEFAULT 0,
|
||||
unique_source_count INTEGER NOT NULL DEFAULT 0,
|
||||
duplicate_evidence_count INTEGER NOT NULL DEFAULT 0,
|
||||
price_at_prediction FLOAT,
|
||||
spy_price_at_prediction FLOAT,
|
||||
sector_etf_price_at_prediction FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_ticker ON prediction_snapshots(ticker);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_generated ON prediction_snapshots(generated_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_horizon ON prediction_snapshots(horizon);
|
||||
```
|
||||
|
||||
#### prediction_outcomes
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS prediction_outcomes (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
prediction_id UUID NOT NULL REFERENCES prediction_snapshots(id),
|
||||
evaluated_at TIMESTAMPTZ NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
future_price FLOAT,
|
||||
future_return FLOAT,
|
||||
spy_future_price FLOAT,
|
||||
spy_return FLOAT,
|
||||
sector_etf_future_price FLOAT,
|
||||
sector_etf_return FLOAT,
|
||||
excess_return_vs_spy FLOAT,
|
||||
excess_return_vs_sector FLOAT,
|
||||
direction_correct BOOLEAN,
|
||||
profitable BOOLEAN,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_prediction ON prediction_outcomes(prediction_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_horizon ON prediction_outcomes(horizon);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_evaluated ON prediction_outcomes(evaluated_at);
|
||||
```
|
||||
|
||||
#### signal_evidence_links
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS signal_evidence_links (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
prediction_id UUID NOT NULL REFERENCES prediction_snapshots(id),
|
||||
document_id VARCHAR(200),
|
||||
signal_id VARCHAR(200),
|
||||
ticker VARCHAR(20),
|
||||
source VARCHAR(200),
|
||||
source_type VARCHAR(50),
|
||||
catalyst_type VARCHAR(50),
|
||||
sentiment VARCHAR(20),
|
||||
impact FLOAT,
|
||||
extraction_confidence FLOAT,
|
||||
weight FLOAT,
|
||||
is_duplicate BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
canonical_evidence_key VARCHAR(64),
|
||||
contribution_score FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_prediction ON signal_evidence_links(prediction_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_document ON signal_evidence_links(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_ticker ON signal_evidence_links(ticker);
|
||||
```
|
||||
|
||||
#### model_metric_snapshots
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS model_metric_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
lookback_window VARCHAR(20) NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
prediction_count INTEGER NOT NULL DEFAULT 0,
|
||||
win_rate FLOAT,
|
||||
directional_accuracy FLOAT,
|
||||
information_coefficient FLOAT,
|
||||
rank_information_coefficient FLOAT,
|
||||
avg_return FLOAT,
|
||||
avg_excess_return_vs_spy FLOAT,
|
||||
avg_excess_return_vs_sector FLOAT,
|
||||
calibration_error FLOAT,
|
||||
brier_score FLOAT,
|
||||
buy_win_rate FLOAT,
|
||||
sell_win_rate FLOAT,
|
||||
hold_win_rate FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_generated ON model_metric_snapshots(generated_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_lookback ON model_metric_snapshots(lookback_window);
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_horizon ON model_metric_snapshots(horizon);
|
||||
```
|
||||
|
||||
#### SQL Explorer Views
|
||||
|
||||
```sql
|
||||
CREATE OR REPLACE VIEW v_prediction_performance AS
|
||||
SELECT
|
||||
ps.ticker,
|
||||
ps.direction,
|
||||
ps.action,
|
||||
ps.confidence,
|
||||
ps.strength,
|
||||
ps.contradiction,
|
||||
ps.p_bull,
|
||||
ps.score_company,
|
||||
ps.score_macro,
|
||||
ps.score_competitive,
|
||||
ps.evidence_count,
|
||||
ps.unique_source_count,
|
||||
ps.duplicate_evidence_count,
|
||||
ps.price_at_prediction,
|
||||
po.future_return,
|
||||
po.excess_return_vs_spy,
|
||||
po.excess_return_vs_sector,
|
||||
po.direction_correct,
|
||||
po.profitable,
|
||||
po.horizon,
|
||||
ps.generated_at,
|
||||
po.evaluated_at
|
||||
FROM prediction_snapshots ps
|
||||
JOIN prediction_outcomes po ON po.prediction_id = ps.id;
|
||||
|
||||
CREATE OR REPLACE VIEW v_source_performance AS
|
||||
SELECT
|
||||
sel.source,
|
||||
sel.source_type,
|
||||
sel.catalyst_type,
|
||||
sel.sentiment,
|
||||
sel.weight,
|
||||
sel.contribution_score,
|
||||
sel.is_duplicate,
|
||||
po.direction_correct,
|
||||
po.future_return,
|
||||
po.excess_return_vs_spy,
|
||||
po.horizon,
|
||||
ps.generated_at
|
||||
FROM signal_evidence_links sel
|
||||
JOIN prediction_snapshots ps ON ps.id = sel.prediction_id
|
||||
JOIN prediction_outcomes po ON po.prediction_id = sel.prediction_id;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Correctness Properties
|
||||
|
||||
*A property is a characteristic or behavior that should hold true across all valid executions of a system — essentially, a formal statement about what the system should do. Properties serve as the bridge between human-readable specifications and machine-verifiable correctness guarantees.*
|
||||
|
||||
The following properties were derived from the acceptance criteria through systematic prework analysis. Each property is universally quantified and maps to specific requirements. After reflection, 7 unique properties remain — one for each PBT requirement in Requirement 17. Redundant properties from Requirements 2, 5, 6, 8, and 11 were consolidated with their corresponding Requirement 17 counterparts.
|
||||
|
||||
### Property 1: Calibration Error Range and Round-Trip
|
||||
|
||||
*For any* valid distribution of predictions across confidence buckets (where each prediction has a confidence in [0.5, 1.0] and a boolean outcome), the Expected Calibration Error (ECE) SHALL be in [0.0, 1.0]. Furthermore, when every bucket's observed win rate exactly matches its average confidence, ECE SHALL be 0.0.
|
||||
|
||||
**Validates: Requirements 5.1, 5.3, 17.1**
|
||||
|
||||
### Property 2: Brier Score Range and Perfect Prediction
|
||||
|
||||
*For any* list of (p_bull, outcome) pairs where p_bull ∈ [0.0, 1.0] and outcome ∈ {0.0, 1.0}, the Brier score SHALL be in [0.0, 1.0]. Furthermore, when all predictions have p_bull = 1.0 and outcome = 1.0 (or p_bull = 0.0 and outcome = 0.0), the Brier score SHALL be 0.0.
|
||||
|
||||
**Validates: Requirements 5.4, 17.2**
|
||||
|
||||
### Property 3: Information Coefficient Range and Perfect Correlation
|
||||
|
||||
*For any* list of (score, return) pairs with at least 30 elements where scores and returns are finite floats, the Information Coefficient (Pearson correlation) SHALL be in [-1.0, 1.0]. Furthermore, when scores and returns are perfectly positively linearly correlated (returns = a * scores + b, a > 0), IC SHALL be 1.0 (within floating-point tolerance).
|
||||
|
||||
**Validates: Requirements 6.1, 6.2, 17.3**
|
||||
|
||||
### Property 4: Canonical Evidence Key Determinism and Normalization Idempotence
|
||||
|
||||
*For any* (title, url) string pair, computing the canonical evidence key SHALL be deterministic — the same inputs always produce the same key. Furthermore, normalizing an already-normalized input (lowercased, trimmed title; lowercased, query-stripped URL) and computing the key SHALL produce the same key as the original computation (idempotence).
|
||||
|
||||
**Validates: Requirements 2.3, 17.4**
|
||||
|
||||
### Property 5: Source Reliability Bayesian Shrinkage Bounds and Convergence
|
||||
|
||||
*For any* observed_win_rate ∈ [0.0, 1.0] and sample_count ≥ 0, the source reliability computed via Bayesian shrinkage SHALL be in [0.0, 1.0]. When sample_count = 0, reliability SHALL be exactly 0.5. As sample_count increases toward infinity, reliability SHALL approach the observed_win_rate monotonically.
|
||||
|
||||
**Validates: Requirements 8.1, 8.2, 17.5**
|
||||
|
||||
### Property 6: Quality Gate Determinism and Threshold Monotonicity
|
||||
|
||||
*For any* set of model metric values and quality gate configuration, the gate evaluation result SHALL be deterministic — the same inputs always produce the same pass/fail result. Furthermore, for any configuration where the gate passes, relaxing any single threshold (increasing min values or decreasing max values to make them easier to satisfy) SHALL NOT cause the gate to fail (monotonicity).
|
||||
|
||||
**Validates: Requirements 11.1, 17.6**
|
||||
|
||||
### Property 7: Contribution Score Sum-to-One and Range
|
||||
|
||||
*For any* non-empty list of positive document weights, the computed contribution scores SHALL each be in [0.0, 1.0] and SHALL sum to 1.0 (within floating-point tolerance of 1e-9). For an empty weight list, the result SHALL be an empty list.
|
||||
|
||||
**Validates: Requirements 2.5, 17.7**
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Price Data Unavailability
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Ticker price unavailable at snapshot time | Store NULL for `price_at_prediction`, log warning, continue |
|
||||
| SPY price unavailable at snapshot time | Store NULL for `spy_price_at_prediction`, log warning, continue |
|
||||
| Sector ETF price unavailable at snapshot time | Store NULL for `sector_etf_price_at_prediction`, log warning, continue |
|
||||
| Sector not found in SECTOR_ETF_MAP | Store NULL for sector ETF price, log warning |
|
||||
| Future price unavailable at evaluation time | Skip that horizon, retry on next Outcome_Evaluator run |
|
||||
| SPY/sector ETF future price unavailable | Store NULL for excess returns, still compute ticker return |
|
||||
|
||||
### Metrics Computation Edge Cases
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Zero predictions in a confidence bucket | Exclude bucket from ECE computation |
|
||||
| Fewer than 30 predictions for IC/Rank IC | Return NULL instead of unreliable correlation |
|
||||
| All predictions in same confidence bucket | ECE = |avg_confidence - win_rate| for that single bucket |
|
||||
| Division by zero in contribution scores (total weight = 0) | Return equal contribution scores (1/n) |
|
||||
| Single prediction | Contribution score = 1.0 |
|
||||
| NaN/infinity in metric computation | Guard with `math.isnan`/`math.isinf` checks, return 0.0 or NULL |
|
||||
|
||||
### Quality Gate Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| No model_metric_snapshots exist | Default to paper-only mode (fail-safe) |
|
||||
| Most recent snapshot older than 24 hours | Default to paper-only mode (fail-safe) |
|
||||
| risk_configs table unreachable | Default to paper-only mode, log warning |
|
||||
| Invalid threshold values in risk_configs | Use default thresholds, log warning |
|
||||
| Gate evaluation fails mid-computation | Default to paper-only mode, log error |
|
||||
|
||||
### Database Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| prediction_snapshots insert fails | Log error, do not block recommendation generation |
|
||||
| signal_evidence_links insert fails | Log error, snapshot still created (partial data) |
|
||||
| prediction_outcomes insert fails | Log error, retry on next Outcome_Evaluator run |
|
||||
| model_metric_snapshots insert fails | Log error, stale metrics used until next successful computation |
|
||||
| source_accuracy update fails | Log error, continue with stale reliability data |
|
||||
|
||||
### Canonical Evidence Key Edge Cases
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Empty title | Use empty string in hash computation |
|
||||
| Empty URL | Use empty string in hash computation |
|
||||
| URL with no query parameters | Use URL as-is after lowercasing |
|
||||
| Non-ASCII characters in title/URL | Encode as UTF-8 before hashing |
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Dual Testing Approach
|
||||
|
||||
The model validation feature requires both property-based tests (for mathematical correctness of metric computations) and example-based unit tests (for specific behaviors, integration points, and edge cases). Property-based testing is appropriate here because the feature contains several pure mathematical functions (ECE, Brier score, IC, Bayesian shrinkage, contribution scores) with clear input/output behavior and universal properties.
|
||||
|
||||
### Property-Based Testing
|
||||
|
||||
**Library:** Hypothesis (already in use — `.hypothesis/` directory exists, project convention established)
|
||||
|
||||
**Configuration:**
|
||||
- Minimum 100 iterations per property: `@settings(max_examples=100)`
|
||||
- File naming: `tests/test_pbt_model_validation.py`
|
||||
- Tag format: `# Feature: model-validation-calibration, Property N: <title>`
|
||||
|
||||
**Property tests to implement (one test per correctness property):**
|
||||
|
||||
| Property | Test Function | Key Generators |
|
||||
|----------|---------------|----------------|
|
||||
| 1: ECE range and round-trip | `test_calibration_error_range_and_roundtrip` | `st.lists(st.tuples(st.floats(0.5, 1.0), st.booleans()))` |
|
||||
| 2: Brier score range and perfect | `test_brier_score_range_and_perfect` | `st.lists(st.tuples(st.floats(0.0, 1.0), st.sampled_from([0.0, 1.0])))` |
|
||||
| 3: IC range and perfect correlation | `test_information_coefficient_range_and_perfect` | `st.lists(st.floats(-10, 10), min_size=30)` with linear transform |
|
||||
| 4: Canonical key determinism and idempotence | `test_canonical_key_determinism_and_idempotence` | `st.text()` pairs for title and URL |
|
||||
| 5: Source reliability bounds and convergence | `test_source_reliability_bounds_and_convergence` | `st.floats(0.0, 1.0)` for win_rate, `st.integers(0, 10000)` for n |
|
||||
| 6: Quality gate determinism and monotonicity | `test_quality_gate_determinism_and_monotonicity` | Custom strategy for `QualityGateConfig` and metric values |
|
||||
| 7: Contribution score sum-to-one | `test_contribution_score_sum_to_one` | `st.lists(st.floats(0.01, 100.0), min_size=1)` |
|
||||
|
||||
### Example-Based Unit Tests
|
||||
|
||||
**File:** `tests/test_model_validation_unit.py`
|
||||
|
||||
| Test Area | Examples |
|
||||
|-----------|----------|
|
||||
| Canonical evidence key | Known title/URL → expected SHA256, empty inputs, unicode |
|
||||
| Duplicate detection | 3 docs with 2 sharing a key → 1 marked duplicate |
|
||||
| Contribution scores | [0.5, 0.3, 0.2] → [0.5, 0.3, 0.2], single doc → [1.0] |
|
||||
| ECE specific values | Perfect calibration → 0.0, all overconfident → positive ECE |
|
||||
| Brier score specific values | All correct at p=1.0 → 0.0, all wrong at p=1.0 → 1.0 |
|
||||
| IC specific values | Perfect correlation → 1.0, anti-correlation → -1.0, < 30 → None |
|
||||
| Source reliability | n=0 → 0.5, n=1000 with wr=0.8 → ≈0.8, n=30 with wr=0.7 → 0.6 |
|
||||
| Adjusted evidence weight | reliability=0.5 → base*1.0, clamping to [0.1, 2.0] |
|
||||
| Quality gate | All thresholds met → pass, one failed → fail with reason |
|
||||
| Quality gate fail-safe | No snapshots → paper-only, stale snapshot → paper-only |
|
||||
| Direction correct logic | bullish+positive → true, bullish+negative → false |
|
||||
| Profitable logic | buy+positive → true, sell+negative → true |
|
||||
| Future return computation | price 100→110 → 0.10, price 100→90 → -0.10 |
|
||||
| Excess return | ticker 10%, SPY 5% → excess 5% |
|
||||
| Weight clamping | weight 1.5 → clamped to 1.0 |
|
||||
|
||||
### Frontend Tests
|
||||
|
||||
**File:** `frontend/src/test/pages.test.tsx` (extend existing)
|
||||
|
||||
| Test Area | Strategy |
|
||||
|-----------|----------|
|
||||
| OpsModel page renders validation tabs | MSW mock for `/api/validation/summary` |
|
||||
| Calibration table renders buckets | MSW mock for `/api/validation/calibration` |
|
||||
| Gate status indicator | MSW mock for `/api/validation/gate-status` |
|
||||
| Miscalibration warning badge | Mock data with miscalibrated bucket |
|
||||
|
||||
### Integration Tests
|
||||
|
||||
**File:** `tests/test_model_validation_integration.py`
|
||||
|
||||
| Test Area | Strategy |
|
||||
|-----------|----------|
|
||||
| Snapshot creation with mock DB | asyncpg mock, verify INSERT queries |
|
||||
| Outcome evaluation with mock prices | asyncpg mock, verify return computation |
|
||||
| Metrics computation end-to-end | In-memory data, verify all metrics computed |
|
||||
| API endpoint responses | FastAPI TestClient with mock pool |
|
||||
|
||||
### Test File Structure
|
||||
|
||||
```
|
||||
tests/
|
||||
├── test_pbt_model_validation.py # 7 property-based tests
|
||||
├── test_model_validation_unit.py # Example-based unit tests
|
||||
└── test_model_validation_integration.py # Integration tests (optional)
|
||||
|
||||
frontend/src/test/
|
||||
└── pages.test.tsx # Extended with validation page tests
|
||||
```
|
||||
@@ -0,0 +1,286 @@
|
||||
# Requirements Document — Model Validation, Calibration, and Signal Quality
|
||||
|
||||
## Introduction
|
||||
|
||||
The Stonks Oracle platform generates trend summaries and trading recommendations from a three-layer signal aggregation engine. While the pipeline produces directional predictions with confidence scores, there is no systematic mechanism to evaluate whether those predictions are accurate, whether confidence scores are well-calibrated, which sources and signal types contribute to correct predictions, or whether the system outperforms simple benchmarks. The platform also lacks safety gates that prevent live trading when model quality is insufficient.
|
||||
|
||||
This feature adds a complete model validation layer: prediction outcome tracking, calibration analysis, information coefficient metrics, signal and source attribution, evidence deduplication quality tracking, confidence recalibration, benchmark comparison, an upgraded Model Performance dashboard, and safety gates for live trading eligibility. The goal is to transform Stonks Oracle from a signal dashboard with paper trading into a statistically validated prediction engine with closed-loop feedback.
|
||||
|
||||
## Glossary
|
||||
|
||||
- **Prediction_Snapshot_Writer**: A new service component in `services/validation/prediction_snapshot.py` that captures the full state of every recommendation and trend prediction at generation time, including prices, evidence links, and duplicate counts.
|
||||
- **Outcome_Evaluator**: A new service component in `services/validation/outcome_evaluator.py` that runs periodically to compute realized future returns and directional accuracy for matured prediction snapshots across multiple horizons.
|
||||
- **Metrics_Engine**: A new service component in `services/validation/metrics.py` that computes aggregate model quality metrics including calibration error, information coefficient, Brier score, and win rates over configurable lookback windows.
|
||||
- **Attribution_Engine**: A new service component in `services/validation/attribution.py` that computes per-source, per-catalyst-type, and per-signal-layer performance metrics by joining evidence links with prediction outcomes.
|
||||
- **Calibration_Engine**: A new service component in `services/validation/calibration.py` that computes source reliability scores using Bayesian shrinkage and adjusts evidence weights based on historical source performance.
|
||||
- **Quality_Gate**: A new service component in `services/trading/model_quality_gate.py` that evaluates aggregate model metrics against configurable thresholds and determines whether the system meets minimum quality standards for live trading.
|
||||
- **Information_Coefficient**: The Pearson correlation between predicted scores and realized future returns, measuring the linear predictive power of the model. Abbreviated as IC.
|
||||
- **Rank_Information_Coefficient**: The Spearman rank correlation between predicted scores and realized future returns, measuring ordinal predictive power. Abbreviated as Rank IC.
|
||||
- **Calibration_Error**: The Expected Calibration Error (ECE), computed as the weighted average of the absolute difference between predicted confidence and observed win rate across confidence buckets.
|
||||
- **Brier_Score**: The mean squared error between the predicted bullish probability and the binary actual outcome (1 if price went up, 0 otherwise), measuring probabilistic forecast accuracy.
|
||||
- **Canonical_Evidence_Key**: A normalized identifier for a piece of evidence, computed as SHA256 of the normalized title concatenated with the normalized URL, used to detect duplicate evidence across different ingestion paths.
|
||||
- **Excess_Return**: The return of a prediction minus the return of a benchmark (SPY for broad market, sector ETF for sector-relative) over the same horizon, measuring alpha generation.
|
||||
- **Prediction_Snapshot**: A frozen record of a prediction at generation time, capturing all inputs (prices, scores, evidence) needed to evaluate the prediction against future outcomes without hindsight bias.
|
||||
- **Model_Metric_Snapshot**: A periodic aggregate of model quality metrics over a lookback window and horizon, stored for time-series analysis of model performance trends.
|
||||
- **Source_Reliability**: A Bayesian-shrunk estimate of a source's historical win rate, computed as `0.5 + (n/(n+30)) * (observed_win_rate - 0.5)`, which regresses toward 0.5 for sources with few observations.
|
||||
- **Dashboard_API**: The set of API endpoints under `/api/validation/` that serve model quality metrics, calibration tables, attribution data, and gate status to the frontend.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
### Requirement 1: Prediction Snapshot Capture
|
||||
|
||||
**User Story:** As a quantitative analyst, I want every recommendation and trend prediction captured as an immutable snapshot at generation time, so that I can evaluate predictions against future outcomes without hindsight bias.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN a recommendation is generated by the Recommendation_Engine, THE Prediction_Snapshot_Writer SHALL create a prediction_snapshots record containing the ticker, generation timestamp, trend window, prediction horizon, direction, action, mode, strength, confidence, contradiction score, bullish probability, bearish probability, company score, macro score, competitive score, evidence count, unique source count, duplicate evidence count, price at prediction time, SPY price at prediction time, and sector ETF price at prediction time.
|
||||
2. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL record the current market price for the predicted ticker by querying the most recent close price from the market_snapshots table.
|
||||
3. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL record the current SPY price by querying the most recent close price for ticker SPY from the market_snapshots table.
|
||||
4. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL record the current sector ETF price by looking up the sector for the predicted ticker and querying the most recent close price for the corresponding sector ETF from the market_snapshots table.
|
||||
5. IF the market price, SPY price, or sector ETF price is unavailable at snapshot time, THEN THE Prediction_Snapshot_Writer SHALL store NULL for the unavailable price fields and log a warning, rather than failing the snapshot creation.
|
||||
6. THE Prediction_Snapshot_Writer SHALL store prediction snapshots in a new `prediction_snapshots` database table with a UUID primary key and indexed columns for ticker, generated_at, and horizon.
|
||||
7. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL store a JSONB metadata field containing any additional context from the trend summary market_context and recommendation risk_checks fields.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 2: Signal Evidence Link Tracking
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to know which specific evidence documents contributed to each prediction, so that I can attribute prediction success or failure to individual sources and signal types.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL create signal_evidence_links records for each document that contributed to the prediction, linking the prediction_id to the document_id and signal_id.
|
||||
2. THE signal_evidence_links record SHALL capture the source identifier, source type, catalyst type, sentiment, impact score, extraction confidence, weight assigned during aggregation, duplicate status, canonical evidence key, and contribution score for each contributing document.
|
||||
3. WHEN recording evidence links, THE Prediction_Snapshot_Writer SHALL compute the canonical_evidence_key as the SHA256 hash of the concatenation of the normalized (lowercased, whitespace-trimmed) document title and the normalized (lowercased, query-parameters-stripped) document URL.
|
||||
4. WHEN recording evidence links, THE Prediction_Snapshot_Writer SHALL mark a link as `is_duplicate = true` when another link for the same prediction and ticker shares the same canonical_evidence_key.
|
||||
5. THE Prediction_Snapshot_Writer SHALL compute the contribution_score for each evidence link as the ratio of that document's effective weight to the total effective weight across all documents for the prediction.
|
||||
6. THE signal_evidence_links table SHALL have a foreign key constraint from prediction_id to prediction_snapshots(id) and indexes on prediction_id, document_id, and ticker.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 3: Evidence Deduplication Quality Tracking
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the system to track evidence deduplication quality per prediction, so that I can identify when predictions are inflated by counting the same information multiple times from different sources.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN creating a prediction snapshot, THE Prediction_Snapshot_Writer SHALL compute the unique_source_count as the number of distinct source identifiers across all non-duplicate evidence links for that prediction.
|
||||
2. WHEN creating a prediction snapshot, THE Prediction_Snapshot_Writer SHALL compute the duplicate_evidence_count as the number of evidence links marked as `is_duplicate = true` for that prediction.
|
||||
3. THE Prediction_Snapshot_Writer SHALL enforce a maximum single-document weight cap of 1.0, clamping any individual document's effective weight to prevent a single piece of evidence from dominating the prediction.
|
||||
4. WHEN computing contribution scores, THE Prediction_Snapshot_Writer SHALL count each canonical evidence key at most once per ticker per window, applying the one-vote-per-canonical-document deduplication rule.
|
||||
5. THE Metrics_Engine SHALL compute a duplicate_rate metric as the ratio of duplicate_evidence_count to total evidence_count across predictions in the lookback window.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 4: Prediction Outcome Evaluation
|
||||
|
||||
**User Story:** As a quantitative analyst, I want realized market outcomes automatically matched to historical predictions, so that I can measure whether the system's directional calls and confidence scores correspond to actual price movements.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Outcome_Evaluator SHALL run on a periodic schedule, evaluating prediction snapshots whose horizon has elapsed and whose outcome has not yet been recorded.
|
||||
2. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL compute the future_return as `(future_price - price_at_prediction) / price_at_prediction` using the closing price at the horizon endpoint.
|
||||
3. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL compute the SPY return over the same horizon as `(spy_future_price - spy_price_at_prediction) / spy_price_at_prediction`.
|
||||
4. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL compute the sector ETF return over the same horizon as `(sector_etf_future_price - sector_etf_price_at_prediction) / sector_etf_price_at_prediction`.
|
||||
5. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL compute excess_return_vs_spy as `future_return - spy_return` and excess_return_vs_sector as `future_return - sector_etf_return`.
|
||||
6. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL determine direction_correct as true when the prediction direction is bullish and future_return is positive, or when the prediction direction is bearish and future_return is negative.
|
||||
7. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL determine profitable as true when the prediction action is buy and future_return is positive, or when the prediction action is sell and future_return is negative.
|
||||
8. THE Outcome_Evaluator SHALL evaluate each prediction across all applicable horizons: 1 hour, 6 hours, 1 day, 7 days, and 30 days.
|
||||
9. THE Outcome_Evaluator SHALL store evaluation results in a new `prediction_outcomes` table with a foreign key to prediction_snapshots and indexed columns for prediction_id, horizon, and evaluated_at.
|
||||
10. IF the future price is unavailable at the horizon endpoint (market data gap), THEN THE Outcome_Evaluator SHALL skip that horizon evaluation and retry on the next run.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 5: Calibration Analysis
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to measure how well the system's confidence scores predict actual win rates, so that I can identify overconfident or underconfident predictions and recalibrate the model.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Metrics_Engine SHALL compute calibration metrics by grouping evaluated predictions into confidence buckets: [0.50, 0.60), [0.60, 0.70), [0.70, 0.80), [0.80, 0.90), [0.90, 1.00].
|
||||
2. FOR EACH confidence bucket, THE Metrics_Engine SHALL compute the average confidence, the observed win rate (fraction of direction_correct outcomes), and the prediction count.
|
||||
3. THE Metrics_Engine SHALL compute the Expected Calibration Error (ECE) as the weighted average of `|avg_confidence - observed_win_rate|` across all buckets, weighted by the fraction of predictions in each bucket.
|
||||
4. THE Metrics_Engine SHALL compute the Brier Score as `mean((p_bull - actual_outcome)^2)` across all evaluated predictions, where actual_outcome is 1.0 when the price moved in the predicted direction and 0.0 otherwise.
|
||||
5. THE Metrics_Engine SHALL flag calibration buckets where `|avg_confidence - observed_win_rate| > 0.15` as miscalibrated for dashboard highlighting.
|
||||
6. THE Metrics_Engine SHALL compute calibration metrics separately for each prediction horizon (1h, 6h, 1d, 7d, 30d).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 6: Information Coefficient Metrics
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to measure the correlation between the system's prediction scores and realized returns, so that I can assess whether higher-scored predictions actually produce higher returns.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Metrics_Engine SHALL compute the Information Coefficient (IC) as the Pearson correlation between prediction scores and future returns across all evaluated predictions in the lookback window.
|
||||
2. THE Metrics_Engine SHALL compute the Rank Information Coefficient (Rank IC) as the Spearman rank correlation between prediction scores and future returns across all evaluated predictions in the lookback window.
|
||||
3. THE Metrics_Engine SHALL compute IC and Rank IC separately for each prediction horizon (1h, 6h, 1d, 7d, 30d).
|
||||
4. THE Metrics_Engine SHALL compute return statistics by confidence decile, grouping predictions into 10 equal-sized bins by confidence and computing the average future return and average excess return for each decile.
|
||||
5. WHEN fewer than 30 evaluated predictions exist for a given horizon, THE Metrics_Engine SHALL report IC and Rank IC as NULL rather than computing unreliable correlations from small samples.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 7: Source and Signal Attribution
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to know which sources, source types, and catalyst types contribute to accurate predictions, so that I can identify the most valuable information channels and deprioritize unreliable ones.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Attribution_Engine SHALL compute per-source performance metrics by joining signal_evidence_links with prediction_outcomes, grouping by source identifier.
|
||||
2. FOR EACH source, THE Attribution_Engine SHALL compute: prediction count, average weight, average contribution score, win rate, average future return, average excess return vs SPY, and information coefficient.
|
||||
3. THE Attribution_Engine SHALL compute the same performance metrics grouped by source_type (e.g., news_api, filings_api, web_scrape, market_api).
|
||||
4. THE Attribution_Engine SHALL compute the same performance metrics grouped by catalyst_type (e.g., earnings, product, legal, macro, m_and_a).
|
||||
5. THE Attribution_Engine SHALL compute layer attribution metrics for the three signal layers (company, macro, competitive) by using the score_company, score_macro, and score_competitive fields from prediction snapshots.
|
||||
6. FOR EACH layer, THE Attribution_Engine SHALL compute the average contribution percentage, the win rate when that layer is the dominant contributor, and the IC of predictions where that layer contributes more than 30% of the total score.
|
||||
7. THE Attribution_Engine SHALL compute a per-source duplicate_rate as the fraction of evidence links from that source marked as is_duplicate.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 8: Confidence Recalibration via Source Reliability
|
||||
|
||||
**User Story:** As a quantitative analyst, I want source credibility weights adjusted based on historical prediction accuracy using Bayesian shrinkage, so that the system learns from its own track record and improves over time.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Calibration_Engine SHALL compute source reliability using Bayesian shrinkage: `reliability = 0.5 + (n / (n + 30)) * (observed_win_rate - 0.5)`, where n is the number of evaluated predictions involving that source and observed_win_rate is the fraction of correct directional calls.
|
||||
2. WHEN a source has zero evaluated predictions, THE Calibration_Engine SHALL assign a reliability of 0.5 (the prior mean).
|
||||
3. THE Calibration_Engine SHALL compute an adjusted evidence weight for each source as `adjusted_weight = base_weight * (0.5 + reliability)`, clamped to the range [0.1, 2.0].
|
||||
4. THE Calibration_Engine SHALL update source reliability scores after each outcome evaluation cycle, using the latest prediction outcomes.
|
||||
5. THE Calibration_Engine SHALL store source reliability scores in the existing `source_accuracy` table, extending it with a reliability column or using the existing accuracy_ratio field with the Bayesian shrinkage formula.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 9: Benchmark Comparison
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the system's prediction performance compared against simple benchmarks, so that I can determine whether the model adds value beyond naive strategies.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Metrics_Engine SHALL compute the average excess return of all buy predictions versus a buy-and-hold SPY strategy over the same horizons.
|
||||
2. THE Metrics_Engine SHALL compute the average excess return of all buy predictions versus a buy-and-hold sector ETF strategy over the same horizons.
|
||||
3. THE Metrics_Engine SHALL compute the win rate of the system's directional predictions compared to a random 50/50 baseline, reporting the statistical significance using a binomial test when the prediction count exceeds 100.
|
||||
4. THE Metrics_Engine SHALL compute the hit rate improvement, defined as `(system_win_rate - 0.5) / 0.5`, representing the percentage improvement over random guessing.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 10: Model Metric Snapshots
|
||||
|
||||
**User Story:** As a quantitative analyst, I want aggregate model metrics stored as time-series snapshots, so that I can track whether model quality is improving or degrading over time.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Metrics_Engine SHALL periodically compute and store model_metric_snapshots containing all aggregate metrics for each combination of lookback window and prediction horizon.
|
||||
2. EACH model_metric_snapshot SHALL contain: prediction count, win rate, directional accuracy, IC, Rank IC, average return, average excess return vs SPY, average excess return vs sector, calibration error (ECE), Brier score, and per-action win rates (buy, sell, hold).
|
||||
3. THE Metrics_Engine SHALL store model_metric_snapshots in a new `model_metric_snapshots` database table with a UUID primary key and indexed columns for generated_at, lookback_window, and horizon.
|
||||
4. THE Metrics_Engine SHALL compute snapshots for lookback windows of 7 days, 30 days, 90 days, and all-time.
|
||||
5. THE Metrics_Engine SHALL store a JSONB metadata field in each snapshot for extensibility, containing any additional computed metrics not captured in dedicated columns.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 11: Safety Gate for Live Trading
|
||||
|
||||
**User Story:** As a platform operator, I want live trading automatically disabled when model quality metrics fall below minimum thresholds, so that the system does not risk real capital on a poorly performing model.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Quality_Gate SHALL evaluate the following minimum thresholds for live trading eligibility: minimum prediction count of 100, minimum IC of 0.03, minimum win rate of 0.53, maximum ECE of 0.15, and minimum excess return vs SPY of 0.0.
|
||||
2. WHEN any threshold is not met, THE Quality_Gate SHALL force all recommendations to paper mode, overriding any live_eligible mode assignments.
|
||||
3. THE Quality_Gate SHALL evaluate gate status at the start of each aggregation cycle by reading the most recent model_metric_snapshot.
|
||||
4. THE Quality_Gate SHALL log the gate evaluation result including which thresholds passed and which failed, with their actual values.
|
||||
5. THE Quality_Gate SHALL store the gate evaluation result in the `risk_configs` table under a `model_quality_gate` key, making it available to the recommendation engine and dashboard.
|
||||
6. IF the model_metric_snapshots table is empty or the most recent snapshot is older than 24 hours, THEN THE Quality_Gate SHALL default to paper-only mode (fail-safe behavior).
|
||||
7. THE Quality_Gate SHALL support configurable thresholds via the `risk_configs` table, with the default values specified in acceptance criterion 1 used when no override is configured.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 12: Model Performance Dashboard Upgrade
|
||||
|
||||
**User Story:** As a platform operator, I want a comprehensive model performance dashboard showing prediction accuracy, calibration, attribution, and gate status, so that I can monitor model quality and make informed decisions about live trading.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Dashboard_API SHALL expose a `/api/validation/summary` endpoint returning the latest model metric snapshot with summary cards for: prediction count, win rate, directional accuracy, IC, Rank IC, Brier score, calibration error, average excess return vs SPY, average excess return vs sector, and live trading gate status.
|
||||
2. THE Dashboard_API SHALL expose a `/api/validation/calibration` endpoint returning the calibration table with confidence buckets, average confidence, observed win rate, prediction count, and miscalibration flag for each bucket.
|
||||
3. THE Dashboard_API SHALL expose a `/api/validation/ic-by-horizon` endpoint returning IC and Rank IC values for each prediction horizon.
|
||||
4. THE Dashboard_API SHALL expose a `/api/validation/attribution/sources` endpoint returning per-source performance metrics including win rate, IC, average return, and duplicate rate.
|
||||
5. THE Dashboard_API SHALL expose a `/api/validation/attribution/catalysts` endpoint returning per-catalyst-type performance metrics.
|
||||
6. THE Dashboard_API SHALL expose a `/api/validation/attribution/layers` endpoint returning per-signal-layer (company, macro, competitive) performance metrics.
|
||||
7. THE Dashboard_API SHALL expose a `/api/validation/gate-status` endpoint returning the current quality gate evaluation with pass/fail status for each threshold.
|
||||
8. THE frontend OpsModel page SHALL be upgraded to display the model validation summary cards, calibration table, IC-by-horizon table, source performance table, catalyst truth table, layer attribution table, and gate status indicator.
|
||||
9. THE frontend SHALL highlight miscalibrated confidence buckets where `|avg_confidence - observed_win_rate| > 0.15` with a visual warning indicator.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 13: Recommendation Display Enhancements
|
||||
|
||||
**User Story:** As a platform operator, I want each recommendation to display its validation context including calibrated confidence, historical win rate, and evidence quality indicators, so that I can assess the reliability of individual predictions.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN displaying a recommendation, THE frontend SHALL show the original confidence alongside the calibrated confidence (based on the historical win rate for that confidence bucket).
|
||||
2. WHEN displaying a recommendation, THE frontend SHALL show the historical win rate for predictions with similar confidence levels.
|
||||
3. WHEN displaying a recommendation, THE frontend SHALL show the evidence count, unique evidence count, and duplicate evidence count.
|
||||
4. WHEN displaying a recommendation, THE frontend SHALL show a source reliability indicator based on the Bayesian-shrunk reliability score of the primary contributing sources.
|
||||
5. WHEN displaying a recommendation, THE frontend SHALL show the live eligibility status with the reason (gate passed, or which threshold failed).
|
||||
6. WHEN the duplicate evidence count exceeds 20% of the total evidence count, THE frontend SHALL display a warning badge indicating potential evidence inflation.
|
||||
7. WHEN the primary contributing source has a reliability score below 0.4, THE frontend SHALL display a warning badge indicating unknown or low source reliability.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 14: SQL Explorer Views
|
||||
|
||||
**User Story:** As a quantitative analyst, I want pre-built SQL views joining predictions with outcomes and evidence with performance, so that I can run ad-hoc analysis in the SQL Explorer without writing complex joins.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE database migration SHALL create a view `v_prediction_performance` that joins prediction_snapshots with prediction_outcomes on prediction_id, providing a single flat table with prediction inputs and realized outcomes.
|
||||
2. THE database migration SHALL create a view `v_source_performance` that joins signal_evidence_links with prediction_outcomes (via prediction_id), providing per-evidence-link outcome data for source attribution analysis.
|
||||
3. THE v_prediction_performance view SHALL include columns for ticker, direction, action, confidence, strength, price_at_prediction, future_return, excess_return_vs_spy, direction_correct, profitable, horizon, generated_at, and evaluated_at.
|
||||
4. THE v_source_performance view SHALL include columns for source, source_type, catalyst_type, sentiment, weight, contribution_score, is_duplicate, direction_correct, future_return, and excess_return_vs_spy.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 15: Backtest Replay Integration
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to replay historical data through the prediction snapshot and outcome evaluation pipeline, so that I can assess model quality on historical data without future data leakage.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Backtest_Replay service SHALL support a validation mode that generates prediction snapshots and evaluates outcomes using only data available at each historical point in time.
|
||||
2. WHEN running in validation mode, THE Backtest_Replay service SHALL process historical recommendations chronologically, creating prediction snapshots with the market prices that were available at each recommendation's generation time.
|
||||
3. WHEN running in validation mode, THE Backtest_Replay service SHALL evaluate prediction outcomes using market prices from the appropriate future horizon relative to each prediction's generation time.
|
||||
4. THE Backtest_Replay service SHALL prevent future data leakage by ensuring that no market data with a timestamp after the prediction generation time is used during snapshot creation.
|
||||
5. WHEN a backtest validation run completes, THE Backtest_Replay service SHALL trigger a model metrics computation over the backtest period, storing the results as model_metric_snapshots tagged with the backtest_id.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 16: Database Schema
|
||||
|
||||
**User Story:** As a developer, I want the new database tables created via a migration script following the existing migration conventions, so that the schema changes are applied consistently across all environments.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE database migration SHALL create the `prediction_snapshots` table with columns: id (UUID PK), generated_at (TIMESTAMPTZ), ticker (VARCHAR), window (VARCHAR), horizon (VARCHAR), direction (VARCHAR), action (VARCHAR), mode (VARCHAR), strength (FLOAT), confidence (FLOAT), contradiction (FLOAT), p_bull (FLOAT), p_bear (FLOAT), score_company (FLOAT), score_macro (FLOAT), score_competitive (FLOAT), evidence_count (INTEGER), unique_source_count (INTEGER), duplicate_evidence_count (INTEGER), price_at_prediction (FLOAT), spy_price_at_prediction (FLOAT), sector_etf_price_at_prediction (FLOAT), metadata (JSONB), created_at (TIMESTAMPTZ).
|
||||
2. THE database migration SHALL create the `prediction_outcomes` table with columns: id (UUID PK), prediction_id (UUID FK to prediction_snapshots), evaluated_at (TIMESTAMPTZ), horizon (VARCHAR), future_price (FLOAT), future_return (FLOAT), spy_future_price (FLOAT), spy_return (FLOAT), sector_etf_future_price (FLOAT), sector_etf_return (FLOAT), excess_return_vs_spy (FLOAT), excess_return_vs_sector (FLOAT), direction_correct (BOOLEAN), profitable (BOOLEAN), metadata (JSONB), created_at (TIMESTAMPTZ).
|
||||
3. THE database migration SHALL create the `signal_evidence_links` table with columns: id (UUID PK), prediction_id (UUID FK to prediction_snapshots), document_id (VARCHAR), signal_id (VARCHAR), ticker (VARCHAR), source (VARCHAR), source_type (VARCHAR), catalyst_type (VARCHAR), sentiment (VARCHAR), impact (FLOAT), extraction_confidence (FLOAT), weight (FLOAT), is_duplicate (BOOLEAN), canonical_evidence_key (VARCHAR), contribution_score (FLOAT), metadata (JSONB), created_at (TIMESTAMPTZ).
|
||||
4. THE database migration SHALL create the `model_metric_snapshots` table with columns: id (UUID PK), generated_at (TIMESTAMPTZ), lookback_window (VARCHAR), horizon (VARCHAR), prediction_count (INTEGER), win_rate (FLOAT), directional_accuracy (FLOAT), information_coefficient (FLOAT), rank_information_coefficient (FLOAT), avg_return (FLOAT), avg_excess_return_vs_spy (FLOAT), avg_excess_return_vs_sector (FLOAT), calibration_error (FLOAT), brier_score (FLOAT), buy_win_rate (FLOAT), sell_win_rate (FLOAT), hold_win_rate (FLOAT), metadata (JSONB), created_at (TIMESTAMPTZ).
|
||||
5. THE database migration SHALL create appropriate indexes on prediction_snapshots (ticker, generated_at, horizon), prediction_outcomes (prediction_id, horizon), signal_evidence_links (prediction_id, document_id, ticker), and model_metric_snapshots (generated_at, lookback_window, horizon).
|
||||
6. THE database migration SHALL be numbered as `035_model_validation.sql`, following the existing migration numbering convention.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 17: Property-Based Testing for Validation Metrics
|
||||
|
||||
**User Story:** As a developer, I want property-based tests validating the mathematical correctness of all validation metric computations, so that edge cases and numerical stability issues are caught before deployment.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE test suite SHALL include a property-based test for calibration error verifying that ECE is in [0.0, 1.0] for all valid distributions of predictions across confidence buckets, and that ECE is 0.0 when every bucket's observed win rate exactly matches its average confidence (round-trip calibration property).
|
||||
2. THE test suite SHALL include a property-based test for Brier score verifying that the score is in [0.0, 1.0] for all valid probability-outcome pairs, and that the score is 0.0 when all predictions are perfectly correct with probability 1.0.
|
||||
3. THE test suite SHALL include a property-based test for information coefficient verifying that IC is in [-1.0, 1.0] for all valid score-return pairs, and that IC is 1.0 when scores and returns are perfectly positively correlated.
|
||||
4. THE test suite SHALL include a property-based test for the canonical evidence key verifying that the key is deterministic (same inputs always produce the same key) and that normalization is idempotent (normalizing an already-normalized input produces the same key).
|
||||
5. THE test suite SHALL include a property-based test for source reliability Bayesian shrinkage verifying that reliability is always in [0.0, 1.0], that reliability approaches 0.5 as sample count approaches 0, and that reliability approaches the observed win rate as sample count approaches infinity.
|
||||
6. THE test suite SHALL include a property-based test for the quality gate verifying that the gate result is deterministic for the same metric inputs, and that relaxing any single threshold (making it easier to pass) never causes a previously passing gate to fail (monotonicity property).
|
||||
7. THE test suite SHALL include a property-based test for contribution score computation verifying that all contribution scores for a single prediction sum to 1.0 (within floating-point tolerance) and that each individual score is in [0.0, 1.0].
|
||||
@@ -0,0 +1,260 @@
|
||||
# Implementation Plan: Model Validation, Calibration, and Signal Quality
|
||||
|
||||
## Overview
|
||||
|
||||
Add a closed-loop model validation layer to Stonks Oracle: prediction snapshot capture, outcome evaluation, calibration/IC metrics, source/catalyst/layer attribution, Bayesian source reliability, a quality gate for live trading, 7 new API endpoints, an upgraded OpsModel dashboard, and backtest replay integration. Implementation follows the four-phase priority order from the spec, with each phase building on the previous one.
|
||||
|
||||
## Tasks
|
||||
|
||||
- [x] 1. Database migration 035 — schema foundation
|
||||
- [x] 1.1 Create `infra/migrations/035_model_validation.sql` with all tables, indexes, and views
|
||||
- Create `prediction_snapshots` table with all columns from design (id UUID PK, generated_at, ticker, window, horizon, direction, action, mode, strength, confidence, contradiction, p_bull, p_bear, score_company, score_macro, score_competitive, evidence_count, unique_source_count, duplicate_evidence_count, price_at_prediction, spy_price_at_prediction, sector_etf_price_at_prediction, metadata JSONB, created_at)
|
||||
- Create `prediction_outcomes` table with FK to prediction_snapshots (id UUID PK, prediction_id, evaluated_at, horizon, future_price, future_return, spy_future_price, spy_return, sector_etf_future_price, sector_etf_return, excess_return_vs_spy, excess_return_vs_sector, direction_correct, profitable, metadata JSONB, created_at)
|
||||
- Create `signal_evidence_links` table with FK to prediction_snapshots (id UUID PK, prediction_id, document_id, signal_id, ticker, source, source_type, catalyst_type, sentiment, impact, extraction_confidence, weight, is_duplicate, canonical_evidence_key, contribution_score, metadata JSONB, created_at)
|
||||
- Create `model_metric_snapshots` table (id UUID PK, generated_at, lookback_window, horizon, prediction_count, win_rate, directional_accuracy, information_coefficient, rank_information_coefficient, avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector, calibration_error, brier_score, buy_win_rate, sell_win_rate, hold_win_rate, metadata JSONB, created_at)
|
||||
- Create indexes on prediction_snapshots (ticker, generated_at, horizon), prediction_outcomes (prediction_id, horizon, evaluated_at), signal_evidence_links (prediction_id, document_id, ticker), model_metric_snapshots (generated_at, lookback_window, horizon)
|
||||
- Create `v_prediction_performance` view joining prediction_snapshots with prediction_outcomes
|
||||
- Create `v_source_performance` view joining signal_evidence_links with prediction_snapshots and prediction_outcomes
|
||||
- _Requirements: 16.1, 16.2, 16.3, 16.4, 16.5, 16.6, 14.1, 14.2, 14.3, 14.4_
|
||||
|
||||
- [x] 2. Phase 1 — Prediction capture, outcome evaluation, core metrics, and dashboard API
|
||||
- [x] 2.1 Implement Prediction Snapshot Writer (`services/validation/prediction_snapshot.py`)
|
||||
- Create `services/validation/__init__.py`
|
||||
- Define `SECTOR_ETF_MAP`, `EVALUATION_HORIZONS`, `MAX_SINGLE_DOCUMENT_WEIGHT` constants
|
||||
- Implement `PredictionSnapshot` and `SignalEvidenceLink` dataclasses
|
||||
- Implement `compute_canonical_evidence_key(title, url)` — SHA256 of normalized title + normalized URL (lowercase, strip whitespace for title; lowercase, strip query params for URL)
|
||||
- Implement `fetch_latest_close_price(pool, ticker)` — query most recent close from market_snapshots
|
||||
- Implement `create_prediction_snapshot(pool, recommendation, trend_summary, evidence_signals, evidence_docs)` — fetch prices (ticker, SPY, sector ETF), compute canonical keys, detect duplicates, clamp weights to MAX_SINGLE_DOCUMENT_WEIGHT, compute contribution scores (one-vote-per-canonical-key), persist snapshot + evidence links in a transaction
|
||||
- Implement `compute_contribution_scores(weights)` — each score = weight_i / sum(weights), sums to 1.0
|
||||
- Handle NULL prices gracefully (log warning, store NULL, don't fail)
|
||||
- _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.1, 3.2, 3.3, 3.4_
|
||||
|
||||
- [x] 2.2 Write property test for canonical evidence key determinism and idempotence
|
||||
- **Property 4: Canonical Evidence Key Determinism and Normalization Idempotence**
|
||||
- Test that same (title, url) always produces same key
|
||||
- Test that normalizing already-normalized input produces same key
|
||||
- **Validates: Requirements 2.3, 17.4**
|
||||
|
||||
- [x] 2.3 Write property test for contribution score sum-to-one and range
|
||||
- **Property 7: Contribution Score Sum-to-One and Range**
|
||||
- Test that all scores in [0.0, 1.0] and sum to 1.0 (within 1e-9 tolerance)
|
||||
- Test that empty input returns empty list
|
||||
- **Validates: Requirements 2.5, 17.7**
|
||||
|
||||
- [x] 2.4 Implement Outcome Evaluator (`services/validation/outcome_evaluator.py`)
|
||||
- Define `PredictionOutcome` dataclass and `HORIZON_DURATIONS` mapping
|
||||
- Implement `evaluate_matured_predictions(pool)` — find snapshots where horizon elapsed and outcome not recorded, evaluate each
|
||||
- Implement `evaluate_single_prediction(pool, snapshot, horizon)` — fetch future price at horizon endpoint, compute future_return, SPY return, sector ETF return, excess returns, direction_correct, profitable; return None if future price unavailable
|
||||
- Evaluate across all 5 horizons: 1h, 6h, 1d, 7d, 30d
|
||||
- Skip horizons where future price is unavailable (retry next run)
|
||||
- Store results in prediction_outcomes table
|
||||
- _Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 4.10_
|
||||
|
||||
- [x] 2.5 Implement Metrics Engine (`services/validation/metrics.py`)
|
||||
- Define `CONFIDENCE_BUCKETS`, `LOOKBACK_WINDOWS` constants
|
||||
- Define `CalibrationBucket` and `ModelMetricSnapshot` dataclasses
|
||||
- Implement `compute_calibration_error(confidences, outcomes)` — group into 5 confidence buckets, compute ECE as weighted average of |avg_conf - win_rate|, flag miscalibrated buckets (|diff| > 0.15)
|
||||
- Implement `compute_brier_score(p_bulls, outcomes)` — mean((p_bull - outcome)^2)
|
||||
- Implement `compute_information_coefficient(scores, returns)` — Pearson correlation, return None when < 30 data points
|
||||
- Implement `compute_rank_information_coefficient(scores, returns)` — Spearman rank correlation, return None when < 30 data points
|
||||
- Implement `compute_contribution_scores(weights)` — weight_i / sum(weights), sums to 1.0
|
||||
- Implement benchmark metrics: average excess return vs SPY, vs sector ETF, hit rate improvement
|
||||
- Implement `compute_and_store_metric_snapshots(pool)` — compute for all lookback/horizon combinations (4 lookbacks × 5 horizons), persist to model_metric_snapshots
|
||||
- _Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 6.1, 6.2, 6.3, 6.4, 6.5, 9.1, 9.2, 9.3, 9.4, 10.1, 10.2, 10.3, 10.4, 10.5_
|
||||
|
||||
- [x] 2.6 Write property test for ECE range and round-trip
|
||||
- **Property 1: Calibration Error Range and Round-Trip**
|
||||
- Test ECE in [0.0, 1.0] for all valid distributions
|
||||
- Test ECE = 0.0 when every bucket's win rate matches avg confidence
|
||||
- **Validates: Requirements 5.1, 5.3, 17.1**
|
||||
|
||||
- [x] 2.7 Write property test for Brier score range and perfect prediction
|
||||
- **Property 2: Brier Score Range and Perfect Prediction**
|
||||
- Test Brier in [0.0, 1.0] for all valid (p_bull, outcome) pairs
|
||||
- Test Brier = 0.0 when all predictions perfectly correct
|
||||
- **Validates: Requirements 5.4, 17.2**
|
||||
|
||||
- [x] 2.8 Write property test for IC range and perfect correlation
|
||||
- **Property 3: Information Coefficient Range and Perfect Correlation**
|
||||
- Test IC in [-1.0, 1.0] for all valid (score, return) pairs with ≥30 elements
|
||||
- Test IC = 1.0 for perfectly positively correlated data
|
||||
- **Validates: Requirements 6.1, 6.2, 17.3**
|
||||
|
||||
- [x] 2.9 Implement Dashboard API endpoints in `services/api/app.py`
|
||||
- Add `/api/validation/summary` GET — return latest model_metric_snapshot + gate status
|
||||
- Add `/api/validation/calibration` GET — return calibration table with buckets
|
||||
- Add `/api/validation/ic-by-horizon` GET — return IC and Rank IC per horizon
|
||||
- Add `/api/validation/gate-status` GET — return quality gate evaluation detail
|
||||
- All endpoints accept optional `lookback` (default "30d") and `horizon` (default "7d") query params
|
||||
- _Requirements: 12.1, 12.2, 12.3, 12.7_
|
||||
|
||||
- [x] 2.10 Add frontend validation API hooks in `frontend/src/api/hooks.ts`
|
||||
- Add `useValidationSummary(lookback?, horizon?)` hook for `/api/validation/summary`
|
||||
- Add `useValidationCalibration(lookback?, horizon?)` hook for `/api/validation/calibration`
|
||||
- Add `useValidationICByHorizon(lookback?)` hook for `/api/validation/ic-by-horizon`
|
||||
- Add `useValidationGateStatus()` hook for `/api/validation/gate-status`
|
||||
- _Requirements: 12.1, 12.2, 12.3, 12.7_
|
||||
|
||||
- [x] 2.11 Upgrade OpsModel page (`frontend/src/pages/OpsModel.tsx`) — Phase 1 dashboard
|
||||
- Add tabbed layout: existing "Extraction Performance" tab + new "Model Validation" tab
|
||||
- Add summary cards: prediction count, win rate, directional accuracy, IC, Rank IC, Brier score, ECE, avg excess return vs SPY, gate status
|
||||
- Add calibration table with confidence buckets, avg confidence, observed win rate, count, miscalibration flag
|
||||
- Highlight miscalibrated buckets (|avg_confidence - observed_win_rate| > 0.15) with warning indicator
|
||||
- Add IC-by-horizon table showing IC and Rank IC for each horizon
|
||||
- Add gate status indicator (pass/fail with threshold details)
|
||||
- _Requirements: 12.1, 12.2, 12.3, 12.7, 12.8, 12.9_
|
||||
|
||||
- [x] 3. Checkpoint — Phase 1 verification
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 4. Phase 2 — Attribution engine and source/catalyst truth tables
|
||||
- [x] 4.1 Implement Attribution Engine (`services/validation/attribution.py`)
|
||||
- Define `SourceAttribution`, `CatalystAttribution`, `LayerAttribution` dataclasses
|
||||
- Implement `compute_source_attribution(pool, lookback_days, horizon)` — join signal_evidence_links with prediction_outcomes, group by source; compute prediction count, avg weight, avg contribution score, win rate, avg future return, avg excess return vs SPY, IC, duplicate rate
|
||||
- Implement `compute_catalyst_attribution(pool, lookback_days, horizon)` — same metrics grouped by catalyst_type
|
||||
- Implement `compute_layer_attribution(pool, lookback_days, horizon)` — compute per-layer (company, macro, competitive) avg contribution %, dominant win rate (layer > 30% contribution), dominant IC
|
||||
- _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7_
|
||||
|
||||
- [x] 4.2 Implement Calibration Engine (`services/validation/calibration.py`)
|
||||
- Implement `compute_source_reliability(observed_win_rate, sample_count, prior_strength=30)` — Bayesian shrinkage: `0.5 + (n / (n + 30)) * (observed_win_rate - 0.5)`; return 0.5 when n=0
|
||||
- Implement `compute_adjusted_evidence_weight(base_weight, reliability)` — `base_weight * (0.5 + reliability)`, clamped to [0.1, 2.0]
|
||||
- Implement `update_source_reliabilities(pool)` — recompute from latest outcomes, update source_accuracy table
|
||||
- _Requirements: 8.1, 8.2, 8.3, 8.4, 8.5_
|
||||
|
||||
- [x] 4.3 Write property test for source reliability Bayesian shrinkage bounds and convergence
|
||||
- **Property 5: Source Reliability Bayesian Shrinkage Bounds and Convergence**
|
||||
- Test reliability in [0.0, 1.0] for all valid inputs
|
||||
- Test reliability = 0.5 when sample_count = 0
|
||||
- Test reliability approaches observed_win_rate as sample_count → ∞
|
||||
- **Validates: Requirements 8.1, 8.2, 17.5**
|
||||
|
||||
- [x] 4.4 Add attribution API endpoints in `services/api/app.py`
|
||||
- Add `/api/validation/attribution/sources` GET — return per-source performance metrics
|
||||
- Add `/api/validation/attribution/catalysts` GET — return per-catalyst performance metrics
|
||||
- Add `/api/validation/attribution/layers` GET — return per-layer performance metrics
|
||||
- All endpoints accept optional `lookback` (default "30d") and `horizon` (default "7d") query params
|
||||
- _Requirements: 12.4, 12.5, 12.6_
|
||||
|
||||
- [x] 4.5 Add frontend attribution hooks in `frontend/src/api/hooks.ts`
|
||||
- Add `useValidationAttributionSources(lookback?, horizon?)` hook
|
||||
- Add `useValidationAttributionCatalysts(lookback?, horizon?)` hook
|
||||
- Add `useValidationAttributionLayers(lookback?, horizon?)` hook
|
||||
- _Requirements: 12.4, 12.5, 12.6_
|
||||
|
||||
- [x] 4.6 Extend OpsModel page with attribution tables
|
||||
- Add source performance table (source, win rate, IC, avg return, duplicate rate)
|
||||
- Add catalyst truth table (catalyst type, win rate, avg return, IC)
|
||||
- Add layer attribution table (company/macro/competitive contribution %, dominant win rate, IC)
|
||||
- _Requirements: 12.4, 12.5, 12.6, 12.8_
|
||||
|
||||
- [x] 5. Checkpoint — Phase 2 verification
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 6. Phase 3 — Quality gate, recommendation enhancements, and pipeline wiring
|
||||
- [x] 6.1 Implement Quality Gate (`services/trading/model_quality_gate.py`)
|
||||
- Define `QualityGateConfig` dataclass with default thresholds (min_prediction_count=100, min_ic=0.03, min_win_rate=0.53, max_ece=0.15, min_excess_return_vs_spy=0.0, max_snapshot_age_hours=24)
|
||||
- Define `GateThresholdResult` and `QualityGateResult` dataclasses
|
||||
- Implement `evaluate_quality_gate(pool, config)` — read most recent model_metric_snapshot (30d lookback, 7d horizon), evaluate each threshold, store result in risk_configs under 'model_quality_gate' key
|
||||
- Implement `load_gate_config_from_db(pool)` — load thresholds from risk_configs with defaults
|
||||
- Default to paper-only mode when no snapshots exist or snapshot is stale (>24h)
|
||||
- Log gate evaluation result with threshold pass/fail details
|
||||
- _Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7_
|
||||
|
||||
- [x] 6.2 Write property test for quality gate determinism and threshold monotonicity
|
||||
- **Property 6: Quality Gate Determinism and Threshold Monotonicity**
|
||||
- Test same inputs always produce same pass/fail result
|
||||
- Test relaxing any threshold never causes a previously passing gate to fail
|
||||
- **Validates: Requirements 11.1, 17.6**
|
||||
|
||||
- [x] 6.3 Wire Quality Gate into aggregation cycle (`services/aggregation/worker.py`)
|
||||
- Call `evaluate_quality_gate` at the start of each aggregation cycle
|
||||
- When gate fails, force all recommendations to paper mode
|
||||
- Log gate status at cycle start
|
||||
- _Requirements: 11.2, 11.3_
|
||||
|
||||
- [x] 6.4 Wire Prediction Snapshot Writer into recommendation engine
|
||||
- After recommendation is generated in `services/recommendation/eligibility.py` or the calling code, call `create_prediction_snapshot` to capture the prediction state
|
||||
- Pass recommendation, trend_summary, evidence signals, and evidence docs
|
||||
- Handle snapshot creation failure gracefully (log error, don't block recommendation)
|
||||
- _Requirements: 1.1, 1.6_
|
||||
|
||||
- [x] 6.5 Enhance recommendation display on frontend
|
||||
- Update `frontend/src/pages/RecommendationDetail` (or relevant recommendation display component) to show:
|
||||
- Original confidence alongside calibrated confidence (historical win rate for that bucket)
|
||||
- Historical win rate for similar confidence levels
|
||||
- Evidence count, unique evidence count, duplicate evidence count
|
||||
- Source reliability indicator for primary contributing sources
|
||||
- Live eligibility status with reason (gate passed or which threshold failed)
|
||||
- Add warning badge when duplicate evidence count > 20% of total evidence count
|
||||
- Add warning badge when primary source reliability < 0.4
|
||||
- _Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7_
|
||||
|
||||
- [x] 7. Checkpoint — Phase 3 verification
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 8. Phase 4 — Backtest replay integration and unit tests
|
||||
- [x] 8.1 Add validation mode to BacktestReplay (`services/trading/backtest_replay.py`)
|
||||
- Add `validation_mode: bool = False` parameter to `BacktestReplay.run()`
|
||||
- When validation_mode=True, create prediction snapshots for each historical recommendation using only data available at that point in time
|
||||
- Evaluate prediction outcomes using market prices from the appropriate future horizon
|
||||
- Prevent future data leakage: no market data after prediction generation time used during snapshot creation
|
||||
- After backtest completes, trigger model metrics computation over the backtest period, tag snapshots with backtest_id
|
||||
- _Requirements: 15.1, 15.2, 15.3, 15.4, 15.5_
|
||||
|
||||
- [x] 8.2 Write unit tests for prediction snapshot writer (`tests/test_model_validation_unit.py`)
|
||||
- Test canonical evidence key: known title/URL → expected SHA256, empty inputs, unicode
|
||||
- Test duplicate detection: 3 docs with 2 sharing a key → 1 marked duplicate
|
||||
- Test contribution scores: [0.5, 0.3, 0.2] → [0.5, 0.3, 0.2], single doc → [1.0]
|
||||
- Test weight clamping: weight 1.5 → clamped to 1.0
|
||||
- _Requirements: 1.1, 2.3, 2.4, 2.5, 3.3_
|
||||
|
||||
- [x] 8.3 Write unit tests for outcome evaluator (`tests/test_model_validation_unit.py`)
|
||||
- Test future return computation: price 100→110 → 0.10, price 100→90 → -0.10
|
||||
- Test direction_correct logic: bullish+positive → true, bullish+negative → false
|
||||
- Test profitable logic: buy+positive → true, sell+negative → true
|
||||
- Test excess return: ticker 10%, SPY 5% → excess 5%
|
||||
- _Requirements: 4.2, 4.5, 4.6, 4.7_
|
||||
|
||||
- [x] 8.4 Write unit tests for metrics engine (`tests/test_model_validation_unit.py`)
|
||||
- Test ECE specific values: perfect calibration → 0.0, all overconfident → positive ECE
|
||||
- Test Brier score: all correct at p=1.0 → 0.0, all wrong at p=1.0 → 1.0
|
||||
- Test IC: perfect correlation → 1.0, anti-correlation → -1.0, < 30 → None
|
||||
- _Requirements: 5.3, 5.4, 6.1, 6.2, 6.5_
|
||||
|
||||
- [x] 8.5 Write unit tests for calibration engine (`tests/test_model_validation_unit.py`)
|
||||
- Test source reliability: n=0 → 0.5, n=1000 with wr=0.8 → ≈0.8, n=30 with wr=0.7 → 0.6
|
||||
- Test adjusted evidence weight: reliability=0.5 → base*1.0, clamping to [0.1, 2.0]
|
||||
- _Requirements: 8.1, 8.2, 8.3_
|
||||
|
||||
- [x] 8.6 Write unit tests for quality gate (`tests/test_model_validation_unit.py`)
|
||||
- Test all thresholds met → pass
|
||||
- Test one threshold failed → fail with reason
|
||||
- Test fail-safe: no snapshots → paper-only, stale snapshot → paper-only
|
||||
- _Requirements: 11.1, 11.6_
|
||||
|
||||
- [x] 8.7 Write frontend tests for validation dashboard (`frontend/src/test/pages.test.tsx`)
|
||||
- Add MSW mock handlers for `/api/validation/summary`, `/api/validation/calibration`, `/api/validation/gate-status`
|
||||
- Test OpsModel page renders validation tab with summary cards
|
||||
- Test calibration table renders buckets with miscalibration warning
|
||||
- Test gate status indicator renders pass/fail
|
||||
- _Requirements: 12.8, 12.9_
|
||||
|
||||
- [x] 9. Final checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
## Notes
|
||||
|
||||
- Tasks marked with `*` are optional and can be skipped for faster MVP
|
||||
- Each task references specific requirements for traceability
|
||||
- Checkpoints ensure incremental validation after each phase
|
||||
- Property tests validate the 7 universal correctness properties from the design document
|
||||
- Unit tests validate specific examples, edge cases, and integration points
|
||||
- The design uses Python for backend and TypeScript for frontend — no language selection needed
|
||||
- Migration number is 035 (existing migrations go up to 034)
|
||||
- All new service modules go under `services/validation/` except the quality gate which goes in `services/trading/`
|
||||
- The 7 new API endpoints are added to the existing `services/api/app.py`
|
||||
- Frontend hooks follow existing patterns in `frontend/src/api/hooks.ts`
|
||||
- Phase 1 delivers the core feedback loop (capture → evaluate → measure → display)
|
||||
- Phase 2 adds attribution depth (which sources/catalysts/layers work best)
|
||||
- Phase 3 adds safety (quality gate) and UX (recommendation warnings)
|
||||
- Phase 4 adds historical analysis (backtest validation mode) and comprehensive tests
|
||||
@@ -0,0 +1 @@
|
||||
{"specId": "d76705a8-fb91-4fce-b59e-c4b3b0dbbd83", "workflowType": "requirements-first", "specType": "feature"}
|
||||
@@ -0,0 +1,802 @@
|
||||
# Design Document — Trading Feedback Engine
|
||||
|
||||
## Overview
|
||||
|
||||
This design adds a periodic trading performance reporting system to Stonks Oracle. The system collects trading data (P&L, recommendations, positions, risk metrics, model quality), generates structured JSON reports with AI-powered summaries, validates report metrics against live data, and stores reports for retrieval via API.
|
||||
|
||||
The core challenge is fitting AI summarization within the 8k-token context window of the `qwen3.5:9b-fast` model on the local Ollama instance. The design addresses this with a chunking strategy that serializes report section data into ≤6,000-character chunks, summarizes each chunk independently, then merges chunk summaries into a final section summary. This hierarchical summarization approach keeps each LLM call well within the token budget while producing coherent narratives.
|
||||
|
||||
### Design Rationale
|
||||
|
||||
A trading system without periodic performance feedback forces the operator to manually query tables and compute metrics. The feedback engine closes this gap by:
|
||||
|
||||
1. **Automating data collection** — pulling from 7+ tables (trading_decisions, orders, positions, portfolio_snapshots, recommendations, prediction_outcomes, model_metric_snapshots) into a single structured report
|
||||
2. **AI-powered summarization** — using the existing agent infrastructure (ai_agents, AgentConfigResolver, llm_factory) to generate natural-language summaries that highlight trends and anomalies
|
||||
3. **Cross-validation** — comparing computed metrics against live validation data (prediction_outcomes, model_metric_snapshots) and flagging discrepancies >5%
|
||||
4. **Persistent storage** — storing reports as JSONB for historical comparison and trend analysis
|
||||
5. **Scheduled generation** — daily (after market close) and weekly (Saturday) reports via Redis queue jobs
|
||||
|
||||
The design reuses existing infrastructure: asyncpg for persistence, FastAPI for API endpoints, Redis queues for async job processing, the ai_agents/AgentConfigResolver/llm_factory stack for LLM access, and TanStack Query hooks on the frontend.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### High-Level Data Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
subgraph "Scheduling (Trigger)"
|
||||
A[Scheduler Service] -->|after 16:30 ET daily| B[Redis Queue<br/>stonks:queue:report_generation]
|
||||
A -->|Saturday weekly| B
|
||||
C[Manual API Trigger] --> B
|
||||
end
|
||||
|
||||
subgraph "Report Generation (Async Worker)"
|
||||
B --> D[Report Generator<br/>services/reporting/generator.py]
|
||||
D -->|1. Collect| E[Data Collector<br/>services/reporting/collector.py]
|
||||
E -->|queries| F[(trading_decisions<br/>orders, positions<br/>portfolio_snapshots<br/>recommendations)]
|
||||
D -->|2. Build sections| G[Section Builder<br/>services/reporting/sections.py]
|
||||
G -->|P&L, accuracy,<br/>positions, risk,<br/>model quality| H[Report Sections]
|
||||
D -->|3. Validate| I[Report Validator<br/>services/reporting/validator.py]
|
||||
I -->|cross-check| J[(prediction_outcomes<br/>model_metric_snapshots)]
|
||||
D -->|4. Summarize| K[AI Summarizer<br/>services/reporting/summarizer.py]
|
||||
K -->|chunk & summarize| L[Report_Summarizer_Agent<br/>via AgentConfigResolver<br/>+ llm_factory]
|
||||
D -->|5. Store| M[(trading_reports table)]
|
||||
end
|
||||
|
||||
subgraph "API Layer"
|
||||
N[GET /api/reports] -->|paginated list| M
|
||||
O[GET /api/reports/:id] -->|full report| M
|
||||
end
|
||||
|
||||
subgraph "Frontend"
|
||||
P[useReports hook] --> N
|
||||
Q[useReport hook] --> O
|
||||
end
|
||||
```
|
||||
|
||||
### Scheduling Strategy
|
||||
|
||||
| Component | Trigger | Cadence |
|
||||
|-----------|---------|---------|
|
||||
| Daily Report | Scheduler after 16:30 ET | Every trading day |
|
||||
| Weekly Report | Scheduler on Saturday | Weekly (Mon–Fri coverage) |
|
||||
| Report Generator Worker | Redis queue consumer | On-demand from queue |
|
||||
| AI Summarizer | Called by generator | Per report section |
|
||||
|
||||
### Chunking Strategy
|
||||
|
||||
The `qwen3.5:9b-fast` model has an 8k-token context window. With the system prompt (~200 tokens) and response budget (~200 tokens), roughly 7,600 tokens remain for input. At ~4 chars/token for structured data, that's ~30,400 characters. The 6,000-character chunk limit provides a 5x safety margin to account for JSON overhead, prompt framing, and tokenization variance.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A[Section Data<br/>e.g. 15,000 chars] --> B{> 6,000 chars?}
|
||||
B -->|No| C[Single LLM call<br/>→ summary]
|
||||
B -->|Yes| D[Split into chunks<br/>≤ 6,000 chars each]
|
||||
D --> E[Chunk 1 → LLM → summary 1]
|
||||
D --> F[Chunk 2 → LLM → summary 2]
|
||||
D --> G[Chunk N → LLM → summary N]
|
||||
E --> H[Merge summaries<br/>→ final LLM call<br/>→ section summary]
|
||||
F --> H
|
||||
G --> H
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Components and Interfaces
|
||||
|
||||
### New Modules
|
||||
|
||||
| Module | File | Responsibility |
|
||||
|--------|------|----------------|
|
||||
| Report Data Collector | `services/reporting/collector.py` | Queries trading data for a reporting period |
|
||||
| Report Section Builder | `services/reporting/sections.py` | Builds structured report sections from raw data |
|
||||
| Report Validator | `services/reporting/validator.py` | Cross-checks metrics against validation tables |
|
||||
| AI Summarizer | `services/reporting/summarizer.py` | Chunks data and generates AI summaries |
|
||||
| Report Generator | `services/reporting/generator.py` | Orchestrates the full report generation pipeline |
|
||||
| Report Models | `services/reporting/models.py` | Pydantic models for report structure and serialization |
|
||||
|
||||
### Modified Modules
|
||||
|
||||
| Module | File | Changes |
|
||||
|--------|------|---------|
|
||||
| Query API | `services/api/app.py` | 2 new `/api/reports` endpoints |
|
||||
| Redis Keys | `services/shared/redis_keys.py` | New `QUEUE_REPORT_GENERATION` constant |
|
||||
| Frontend Hooks | `frontend/src/api/hooks.ts` | 2 new report hooks |
|
||||
| DB Migration | `infra/migrations/038_trading_reports.sql` | New table + agent seed |
|
||||
|
||||
### Component Interface Details
|
||||
|
||||
#### 1. Report Models (`services/reporting/models.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ReportType(str, Enum):
|
||||
DAILY = "daily"
|
||||
WEEKLY = "weekly"
|
||||
|
||||
|
||||
class ValidationStatus(str, Enum):
|
||||
PASSED = "passed"
|
||||
WARNINGS = "warnings"
|
||||
|
||||
|
||||
class ValidationWarning(BaseModel):
|
||||
field_name: str
|
||||
computed_value: float
|
||||
snapshot_value: float
|
||||
pct_difference: float
|
||||
|
||||
|
||||
class PLSection(BaseModel):
|
||||
realized_pnl: float
|
||||
unrealized_pnl: float
|
||||
daily_return: float
|
||||
cumulative_return: float
|
||||
win_count: int
|
||||
loss_count: int
|
||||
win_rate: float
|
||||
profit_factor: float
|
||||
sharpe_ratio: float
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class RecommendationAccuracySection(BaseModel):
|
||||
total_evaluated: int
|
||||
act_count: int
|
||||
skip_count: int
|
||||
acted_win_rate: float
|
||||
avg_confidence_acted: float
|
||||
avg_confidence_skipped: float
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class PositionDetail(BaseModel):
|
||||
ticker: str
|
||||
entry_price: float
|
||||
current_or_exit_price: float
|
||||
pnl: float
|
||||
pnl_pct: float
|
||||
hold_duration_hours: float
|
||||
status: str # "open" or "closed"
|
||||
|
||||
|
||||
class PositionPerformanceSection(BaseModel):
|
||||
positions: list[PositionDetail] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
|
||||
|
||||
class RiskMetricsSection(BaseModel):
|
||||
current_risk_tier: str
|
||||
portfolio_heat: float
|
||||
max_drawdown: float
|
||||
current_drawdown_pct: float
|
||||
reserve_pool_balance: float
|
||||
circuit_breaker_event_count: int
|
||||
summary: str = ""
|
||||
|
||||
|
||||
class ModelQualityWindow(BaseModel):
|
||||
lookback: str
|
||||
win_rate: float | None
|
||||
directional_accuracy: float | None
|
||||
information_coefficient: float | None
|
||||
calibration_error: float | None
|
||||
brier_score: float | None
|
||||
|
||||
|
||||
class ModelQualitySection(BaseModel):
|
||||
windows: list[ModelQualityWindow] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ReportData(BaseModel):
|
||||
"""Top-level report structure stored as JSONB."""
|
||||
pnl: PLSection
|
||||
recommendation_accuracy: RecommendationAccuracySection
|
||||
position_performance: PositionPerformanceSection
|
||||
risk_metrics: RiskMetricsSection
|
||||
model_quality: ModelQualitySection
|
||||
executive_summary: str = ""
|
||||
validation_status: ValidationStatus = ValidationStatus.PASSED
|
||||
generated_at: datetime
|
||||
period_start: date
|
||||
period_end: date
|
||||
report_type: ReportType
|
||||
```
|
||||
|
||||
#### 2. Report Data Collector (`services/reporting/collector.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime
|
||||
import asyncpg
|
||||
|
||||
|
||||
@dataclass
|
||||
class CollectedData:
|
||||
"""Raw data collected for a reporting period."""
|
||||
trading_decisions: list[dict]
|
||||
orders: list[dict]
|
||||
open_positions: list[dict]
|
||||
closed_positions: list[dict]
|
||||
portfolio_snapshot: dict | None
|
||||
previous_portfolio_snapshot: dict | None
|
||||
recommendations: list[dict]
|
||||
prediction_outcomes: list[dict]
|
||||
model_metric_snapshots: list[dict]
|
||||
circuit_breaker_events: list[dict]
|
||||
reserve_pool_balance: float
|
||||
|
||||
|
||||
async def collect_report_data(
|
||||
pool: asyncpg.Pool,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> CollectedData:
|
||||
"""Query all trading data for the reporting period.
|
||||
|
||||
Queries: trading_decisions, orders, positions, portfolio_snapshots,
|
||||
recommendations, prediction_outcomes, model_metric_snapshots,
|
||||
circuit_breaker_events, reserve_pool_ledger.
|
||||
|
||||
Returns CollectedData with all raw query results.
|
||||
If no trading_decisions exist, returns empty lists (zero-activity).
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 3. Report Section Builder (`services/reporting/sections.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
from services.reporting.models import (
|
||||
PLSection, RecommendationAccuracySection,
|
||||
PositionPerformanceSection, PositionDetail,
|
||||
RiskMetricsSection, ModelQualitySection, ModelQualityWindow,
|
||||
)
|
||||
from services.reporting.collector import CollectedData
|
||||
|
||||
|
||||
def build_pnl_section(data: CollectedData) -> PLSection:
|
||||
"""Build P&L section from collected data.
|
||||
|
||||
Computes realized/unrealized P&L, daily return, cumulative return,
|
||||
win/loss counts, win rate, profit factor, and Sharpe ratio from
|
||||
portfolio_snapshot and closed positions.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_recommendation_accuracy_section(data: CollectedData) -> RecommendationAccuracySection:
|
||||
"""Build recommendation accuracy section.
|
||||
|
||||
Joins trading_decisions with prediction_outcomes to compute
|
||||
act/skip breakdown, win rate of acted recommendations, and
|
||||
average confidence of acted vs skipped.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_position_performance_section(data: CollectedData) -> PositionPerformanceSection:
|
||||
"""Build position performance section.
|
||||
|
||||
Lists each position (open and closed) with entry price,
|
||||
current/exit price, P&L, P&L%, and hold duration.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_risk_metrics_section(data: CollectedData) -> RiskMetricsSection:
|
||||
"""Build risk metrics section.
|
||||
|
||||
Extracts current risk tier, portfolio heat, max drawdown,
|
||||
current drawdown %, reserve pool balance, and circuit breaker
|
||||
event count from collected data.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_model_quality_section(data: CollectedData) -> ModelQualitySection:
|
||||
"""Build model quality section.
|
||||
|
||||
Extracts latest model_metric_snapshot values for 7d, 30d, 90d
|
||||
lookback windows.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 4. Report Validator (`services/reporting/validator.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
import asyncpg
|
||||
from services.reporting.models import (
|
||||
ReportData, ValidationStatus, ValidationWarning,
|
||||
)
|
||||
|
||||
|
||||
DISCREPANCY_THRESHOLD_PCT = 5.0
|
||||
|
||||
|
||||
def validate_recommendation_accuracy(
|
||||
section: "RecommendationAccuracySection",
|
||||
prediction_outcomes: list[dict],
|
||||
) -> list[ValidationWarning]:
|
||||
"""Cross-reference reported win rates with prediction_outcomes.
|
||||
|
||||
Compares computed win rate against direction_correct/profitable
|
||||
fields from prediction_outcomes for the same tickers and period.
|
||||
Returns warnings for discrepancies > 5%.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def validate_model_quality(
|
||||
section: "ModelQualitySection",
|
||||
metric_snapshots: list[dict],
|
||||
) -> list[ValidationWarning]:
|
||||
"""Compare reported model quality metrics against model_metric_snapshots.
|
||||
|
||||
Flags discrepancies > 5% between computed and snapshot values
|
||||
for win_rate, directional_accuracy, IC, ECE, and Brier score.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_validation_status(report: ReportData) -> ValidationStatus:
|
||||
"""Determine overall validation status.
|
||||
|
||||
Returns 'passed' if no warnings across all sections,
|
||||
'warnings' if any section has validation warnings.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 5. AI Summarizer (`services/reporting/summarizer.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
import asyncpg
|
||||
from services.shared.agent_config import AgentConfigResolver
|
||||
|
||||
|
||||
CHUNK_SIZE_LIMIT = 6000 # characters per chunk
|
||||
MAX_SUMMARY_WORDS = 200 # per section summary
|
||||
MAX_EXECUTIVE_SUMMARY_WORDS = 300
|
||||
|
||||
|
||||
def chunk_data(serialized: str, max_chars: int = CHUNK_SIZE_LIMIT) -> list[str]:
|
||||
"""Split serialized data into chunks of at most max_chars.
|
||||
|
||||
Splits on newline boundaries to avoid breaking JSON structures.
|
||||
Each chunk is ≤ max_chars characters.
|
||||
Returns at least one chunk (even if empty input).
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def summarize_section(
|
||||
pool: asyncpg.Pool,
|
||||
resolver: AgentConfigResolver,
|
||||
section_name: str,
|
||||
section_data: str,
|
||||
) -> str:
|
||||
"""Generate AI summary for a report section.
|
||||
|
||||
1. Serialize section data to string
|
||||
2. Chunk if > CHUNK_SIZE_LIMIT
|
||||
3. Summarize each chunk via Report_Summarizer_Agent
|
||||
4. If multiple chunks, merge summaries with a final LLM call
|
||||
5. Log each invocation to agent_performance_log
|
||||
6. On failure after max_retries, fall back to deterministic summary
|
||||
|
||||
Uses AgentConfigResolver to resolve agent config by slug
|
||||
'report-summarizer', then llm_factory to build the LLM client.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_deterministic_summary(section_name: str, section_data: dict) -> str:
|
||||
"""Build a fallback deterministic summary from raw metrics.
|
||||
|
||||
Produces a template-based text summary when AI summarization fails.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def generate_executive_summary(
|
||||
pool: asyncpg.Pool,
|
||||
resolver: AgentConfigResolver,
|
||||
section_summaries: dict[str, str],
|
||||
) -> str:
|
||||
"""Generate executive summary from all section summaries.
|
||||
|
||||
Concatenates section summaries, chunks if needed, and produces
|
||||
a ≤300-word synthesis via the Report_Summarizer_Agent.
|
||||
Falls back to concatenated section summaries on failure.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 6. Report Generator (`services/reporting/generator.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
from datetime import date
|
||||
import asyncpg
|
||||
from services.reporting.models import ReportData, ReportType
|
||||
|
||||
|
||||
async def generate_report(
|
||||
pool: asyncpg.Pool,
|
||||
report_type: ReportType,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> ReportData:
|
||||
"""Orchestrate full report generation.
|
||||
|
||||
1. Collect data via collector
|
||||
2. Build sections via section builder
|
||||
3. Validate sections via validator
|
||||
4. Generate AI summaries via summarizer
|
||||
5. Generate executive summary
|
||||
6. Assemble final ReportData
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def store_report(
|
||||
pool: asyncpg.Pool,
|
||||
report: ReportData,
|
||||
) -> str:
|
||||
"""Store report in trading_reports table.
|
||||
|
||||
Uses INSERT ... ON CONFLICT (report_type, period_start, period_end)
|
||||
DO UPDATE to handle regeneration of existing reports.
|
||||
|
||||
Returns the report UUID.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def process_report_job(
|
||||
pool: asyncpg.Pool,
|
||||
job: dict,
|
||||
) -> None:
|
||||
"""Process a report generation job from the Redis queue.
|
||||
|
||||
Deserializes job payload, calls generate_report + store_report.
|
||||
Handles retries with exponential backoff (up to 3 attempts).
|
||||
Rejects duplicate jobs for the same report_type + period.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 7. API Endpoints (added to `services/api/app.py`)
|
||||
|
||||
| Endpoint | Method | Parameters | Returns |
|
||||
|----------|--------|------------|---------|
|
||||
| `GET /api/reports` | GET | `report_type`, `start_date`, `end_date`, `limit`, `offset` | Paginated list: id, report_type, period_start, period_end, validation_status, generated_at |
|
||||
| `GET /api/reports/{report_id}` | GET | — | Full report including report_data JSONB |
|
||||
|
||||
#### 8. Frontend Hooks (added to `frontend/src/api/hooks.ts`)
|
||||
|
||||
```typescript
|
||||
export interface ReportListItem {
|
||||
id: string;
|
||||
report_type: string;
|
||||
period_start: string;
|
||||
period_end: string;
|
||||
validation_status: string;
|
||||
generated_at: string;
|
||||
}
|
||||
|
||||
export interface ReportDetail extends ReportListItem {
|
||||
report_data: Record<string, unknown>;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
export function useReports(params?: {
|
||||
report_type?: string;
|
||||
start_date?: string;
|
||||
end_date?: string;
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
}) {
|
||||
const qs = new URLSearchParams();
|
||||
if (params?.report_type) qs.set('report_type', params.report_type);
|
||||
if (params?.start_date) qs.set('start_date', params.start_date);
|
||||
if (params?.end_date) qs.set('end_date', params.end_date);
|
||||
if (params?.limit) qs.set('limit', String(params.limit));
|
||||
if (params?.offset) qs.set('offset', String(params.offset));
|
||||
const path = `/api/reports${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ReportListItem[]>(['reports', params], 'query', path);
|
||||
}
|
||||
|
||||
export function useReport(id: string | undefined) {
|
||||
return useGet<ReportDetail>(
|
||||
['report', id], 'query', `/api/reports/${id}`, !!id
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Data Models
|
||||
|
||||
### Database Schema (Migration 038)
|
||||
|
||||
#### trading_reports
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS trading_reports (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
report_type VARCHAR(20) NOT NULL,
|
||||
period_start DATE NOT NULL,
|
||||
period_end DATE NOT NULL,
|
||||
report_data JSONB NOT NULL,
|
||||
validation_status VARCHAR(20) NOT NULL DEFAULT 'passed',
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT uq_trading_reports_period UNIQUE (report_type, period_start, period_end),
|
||||
CONSTRAINT chk_report_type CHECK (report_type IN ('daily', 'weekly'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_type ON trading_reports(report_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_period ON trading_reports(period_start, period_end);
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_generated ON trading_reports(generated_at DESC);
|
||||
```
|
||||
|
||||
#### Report Summarizer Agent Seed
|
||||
|
||||
```sql
|
||||
INSERT INTO ai_agents (name, slug, purpose, model_provider, model_name, system_prompt, prompt_version, schema_version, temperature, max_tokens, timeout_seconds, max_retries, source)
|
||||
SELECT * FROM (VALUES
|
||||
(
|
||||
'Report Summarizer',
|
||||
'report-summarizer',
|
||||
'Generates concise natural-language summaries of trading performance report sections. Processes chunked data within the 8k-token context window.',
|
||||
'ollama',
|
||||
'qwen3.5:9b-fast',
|
||||
E'You are a concise financial performance analyst. You summarize trading performance data into clear, professional prose.\n\nSTRICT RULES:\n1. Do NOT fabricate any data not present in the input.\n2. Do NOT add opinions, predictions, or recommendations.\n3. Keep each summary under 200 words.\n4. Highlight notable trends, outliers, and changes from prior periods.\n5. Use precise numbers from the input data.\n6. Use a neutral, professional tone.\n7. Return ONLY the summary text. No JSON, no markdown, no commentary.',
|
||||
'report-summarizer-v1',
|
||||
'1.0.0',
|
||||
0.0,
|
||||
1024,
|
||||
60,
|
||||
2,
|
||||
'system'
|
||||
)
|
||||
) AS v(name, slug, purpose, model_provider, model_name, system_prompt, prompt_version, schema_version, temperature, max_tokens, timeout_seconds, max_retries, source)
|
||||
WHERE NOT EXISTS (SELECT 1 FROM ai_agents WHERE slug = 'report-summarizer');
|
||||
```
|
||||
|
||||
### Report JSONB Structure
|
||||
|
||||
The `report_data` column stores a JSON object matching the `ReportData` Pydantic model:
|
||||
|
||||
```json
|
||||
{
|
||||
"pnl": {
|
||||
"realized_pnl": 125.50,
|
||||
"unrealized_pnl": -30.20,
|
||||
"daily_return": 0.012,
|
||||
"cumulative_return": 0.085,
|
||||
"win_count": 8,
|
||||
"loss_count": 3,
|
||||
"win_rate": 0.727,
|
||||
"profit_factor": 2.15,
|
||||
"sharpe_ratio": 1.42,
|
||||
"summary": "AI-generated summary...",
|
||||
"validation_warnings": []
|
||||
},
|
||||
"recommendation_accuracy": {
|
||||
"total_evaluated": 15,
|
||||
"act_count": 8,
|
||||
"skip_count": 7,
|
||||
"acted_win_rate": 0.75,
|
||||
"avg_confidence_acted": 0.72,
|
||||
"avg_confidence_skipped": 0.48,
|
||||
"summary": "AI-generated summary...",
|
||||
"validation_warnings": []
|
||||
},
|
||||
"position_performance": {
|
||||
"positions": [
|
||||
{
|
||||
"ticker": "AAPL",
|
||||
"entry_price": 185.50,
|
||||
"current_or_exit_price": 192.30,
|
||||
"pnl": 68.00,
|
||||
"pnl_pct": 3.66,
|
||||
"hold_duration_hours": 72.5,
|
||||
"status": "open"
|
||||
}
|
||||
],
|
||||
"summary": "AI-generated summary..."
|
||||
},
|
||||
"risk_metrics": {
|
||||
"current_risk_tier": "moderate",
|
||||
"portfolio_heat": 0.12,
|
||||
"max_drawdown": 0.08,
|
||||
"current_drawdown_pct": 0.03,
|
||||
"reserve_pool_balance": 450.00,
|
||||
"circuit_breaker_event_count": 1,
|
||||
"summary": "AI-generated summary..."
|
||||
},
|
||||
"model_quality": {
|
||||
"windows": [
|
||||
{
|
||||
"lookback": "7d",
|
||||
"win_rate": 0.65,
|
||||
"directional_accuracy": 0.62,
|
||||
"information_coefficient": 0.08,
|
||||
"calibration_error": 0.12,
|
||||
"brier_score": 0.22
|
||||
}
|
||||
],
|
||||
"summary": "AI-generated summary...",
|
||||
"validation_warnings": []
|
||||
},
|
||||
"executive_summary": "AI-generated executive summary...",
|
||||
"validation_status": "passed",
|
||||
"generated_at": "2025-01-15T21:30:00Z",
|
||||
"period_start": "2025-01-15",
|
||||
"period_end": "2025-01-15",
|
||||
"report_type": "daily"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Correctness Properties
|
||||
|
||||
*A property is a characteristic or behavior that should hold true across all valid executions of a system — essentially, a formal statement about what the system should do. Properties serve as the bridge between human-readable specifications and machine-verifiable correctness guarantees.*
|
||||
|
||||
The following properties were derived from the acceptance criteria through systematic prework analysis. After reflection, 5 unique properties remain. Report section structure checks (3.1–3.5) are subsumed by the round-trip property — if a ReportData object survives serialization and deserialization, its structure is correct by construction (Pydantic enforces required fields). Validation status computation (4.4) is subsumed by the discrepancy detection property. ISO 8601 datetime formatting (8.4) is verified as part of the round-trip property since Pydantic's JSON serialization uses ISO 8601 by default and the round-trip would fail if datetimes were mangled.
|
||||
|
||||
### Property 1: Chunking Round-Trip and Size Constraint
|
||||
|
||||
*For any* input string, splitting it into chunks with a maximum size limit SHALL produce chunks where (a) every chunk is ≤ the size limit in characters, (b) no chunk is empty (except when the input itself is empty, which produces exactly one empty chunk), and (c) concatenating all chunks in order reconstructs the original input string.
|
||||
|
||||
**Validates: Requirements 2.2**
|
||||
|
||||
### Property 2: Report Serialization Round-Trip
|
||||
|
||||
*For any* valid ReportData object (with valid P&L, recommendation accuracy, position performance, risk metrics, and model quality sections), serializing to JSON and then deserializing back SHALL produce a ReportData object equivalent to the original. All datetime fields in the serialized JSON SHALL be in ISO 8601 format.
|
||||
|
||||
**Validates: Requirements 8.1, 8.2, 8.3, 8.4**
|
||||
|
||||
### Property 3: Validation Discrepancy Detection Correctness
|
||||
|
||||
*For any* pair of computed metric value and snapshot metric value (both finite, non-negative floats), the validation function SHALL produce a warning if and only if the percentage difference exceeds 5%. The percentage difference SHALL be computed as `|computed - snapshot| / snapshot * 100` when snapshot > 0, and SHALL flag any non-zero computed value when snapshot is 0.
|
||||
|
||||
**Validates: Requirements 4.1, 4.2, 4.3, 4.4**
|
||||
|
||||
### Property 4: Recommendation Accuracy Aggregation
|
||||
|
||||
*For any* non-empty list of trading decisions with associated prediction outcomes (each having a boolean `direction_correct`, boolean `profitable`, and float `excess_return_vs_spy`), the computed win rate SHALL equal the count of profitable outcomes divided by total outcomes, the directional accuracy SHALL equal the count of direction-correct outcomes divided by total outcomes, and the average excess return SHALL equal the arithmetic mean of all excess_return_vs_spy values. All three values SHALL be in [0.0, 1.0] for rates and finite for the average.
|
||||
|
||||
**Validates: Requirements 1.4**
|
||||
|
||||
### Property 5: Portfolio Period-Over-Period Delta Computation
|
||||
|
||||
*For any* two valid portfolio snapshots (current and previous) with non-negative portfolio_value, active_pool, reserve_pool, and finite cumulative_return, the period-over-period deltas SHALL equal (current - previous) for each field. When no previous snapshot exists, the deltas SHALL be zero.
|
||||
|
||||
**Validates: Requirements 1.3**
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Data Collection Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| No trading_decisions for period | Generate zero-activity report with note "No trading activity during this period" |
|
||||
| No portfolio_snapshot for period | Use most recent snapshot before period_start; if none exists, use zero values |
|
||||
| No prediction_outcomes for period | Skip recommendation accuracy validation; set validation_warnings noting missing data |
|
||||
| No model_metric_snapshots for period | Model quality section shows NULL values for all metrics |
|
||||
| Database connection failure during collection | Propagate error to job processor for retry |
|
||||
|
||||
### AI Summarization Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| LLM timeout (>60s) | Retry up to max_retries (from agent config, default 2) |
|
||||
| LLM returns empty response | Treat as failure, retry |
|
||||
| LLM returns response > 200 words | Truncate to 200 words at sentence boundary |
|
||||
| All LLM retries exhausted | Fall back to deterministic template summary |
|
||||
| AgentConfigResolver returns None (agent not found) | Log error, use deterministic summary for all sections |
|
||||
| Chunk merge LLM call fails | Use concatenation of chunk summaries (joined with newlines) |
|
||||
|
||||
### Validation Edge Cases
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Snapshot value is 0 and computed value is non-zero | Flag as warning with pct_difference = 100.0 |
|
||||
| Both snapshot and computed values are 0 | No warning (0% difference) |
|
||||
| Snapshot value is NULL | Skip validation for that metric, no warning |
|
||||
| Computed value is NaN or infinity | Replace with 0.0, log warning |
|
||||
| No prediction_outcomes to cross-reference | Skip recommendation accuracy validation entirely |
|
||||
|
||||
### Report Storage Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Unique constraint violation on insert | Use ON CONFLICT DO UPDATE to upsert |
|
||||
| JSONB serialization failure | Log error with report structure, propagate to job processor |
|
||||
| Report exceeds PostgreSQL JSONB size limit (~255 MB) | Extremely unlikely given report structure; log error if it occurs |
|
||||
|
||||
### Job Processing Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Job fails on first attempt | Retry with exponential backoff: 30s, 60s, 120s |
|
||||
| Job fails after 3 retries | Mark job as failed, log error with full context |
|
||||
| Duplicate job submitted for same period | Reject with log message, return without error |
|
||||
| Redis connection failure | Job stays in queue, picked up on reconnection |
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Property-Based Tests (Hypothesis)
|
||||
|
||||
Property-based tests use the Hypothesis library with `@settings(max_examples=100)`. Test files are prefixed `test_pbt_*` per project convention.
|
||||
|
||||
| Property | Test File | What It Tests |
|
||||
|----------|-----------|---------------|
|
||||
| Property 1: Chunking Round-Trip | `tests/test_pbt_report_chunking.py` | `chunk_data()` preserves content and respects size limits |
|
||||
| Property 2: Report Serialization Round-Trip | `tests/test_pbt_report_serialization.py` | `ReportData.model_dump_json()` → `ReportData.model_validate_json()` round-trip |
|
||||
| Property 3: Validation Discrepancy Detection | `tests/test_pbt_report_validation.py` | Discrepancy detection correctly flags >5% differences |
|
||||
| Property 4: Recommendation Accuracy Aggregation | `tests/test_pbt_report_sections.py` | `build_recommendation_accuracy_section()` computes correct aggregates |
|
||||
| Property 5: Portfolio Delta Computation | `tests/test_pbt_report_sections.py` | `build_pnl_section()` computes correct period-over-period deltas |
|
||||
|
||||
Each property test is tagged with a comment referencing the design property:
|
||||
```python
|
||||
# Feature: trading-feedback-engine, Property 1: Chunking round-trip and size constraint
|
||||
```
|
||||
|
||||
### Unit Tests (pytest)
|
||||
|
||||
| Test File | Coverage |
|
||||
|-----------|----------|
|
||||
| `tests/test_report_sections.py` | Section builders with known inputs, edge cases (empty data, single position, zero-activity) |
|
||||
| `tests/test_report_validator.py` | Specific discrepancy scenarios, boundary cases (exactly 5%), NULL snapshot values |
|
||||
| `tests/test_report_summarizer.py` | Deterministic fallback summary, chunk splitting edge cases (empty input, single char) |
|
||||
| `tests/test_report_models.py` | Pydantic model validation, enum constraints, default values |
|
||||
| `tests/test_report_generator.py` | Orchestration with mocked dependencies, zero-activity report, upsert behavior |
|
||||
|
||||
### Integration Tests
|
||||
|
||||
| Test File | Coverage |
|
||||
|-----------|----------|
|
||||
| `tests/test_report_api.py` | API endpoints with seeded database, pagination, filtering by report_type and date range |
|
||||
| `tests/test_report_storage.py` | Store/retrieve round-trip against real asyncpg pool, upsert behavior, unique constraint |
|
||||
|
||||
### Frontend Tests (Vitest)
|
||||
|
||||
| Test File | Coverage |
|
||||
|-----------|----------|
|
||||
| `frontend/src/test/reports.test.ts` | useReports and useReport hooks with MSW mocks, loading/error states |
|
||||
|
||||
### Test Configuration
|
||||
|
||||
- Python PBT: Hypothesis with `@settings(max_examples=100)`, files prefixed `test_pbt_*`
|
||||
- Python unit/integration: pytest with pytest-asyncio for async code
|
||||
- Frontend: Vitest with MSW for deterministic API mocking
|
||||
- Lint: `ruff check services/` before all commits
|
||||
- CI: Woodpecker runs all tests automatically on push to Gitea
|
||||
@@ -0,0 +1,117 @@
|
||||
# Requirements Document
|
||||
|
||||
## Introduction
|
||||
|
||||
The Trading Feedback Engine generates periodic performance reports from the Stonks Oracle trading system. Reports cover trading P&L, recommendation accuracy, position performance, risk metrics, and model quality trends. An AI agent (registered in the `ai_agents` table) summarizes sections of the report by processing data in small chunks that fit within the 8k-token context window. Reports are validated against live data from the prediction outcomes and model metric snapshots tables, stored in the database for retrieval, and exposed via API endpoints.
|
||||
|
||||
## Glossary
|
||||
|
||||
- **Feedback_Engine**: The backend service that orchestrates report generation, data collection, AI summarization, and report storage.
|
||||
- **Report_Summarizer_Agent**: The AI agent registered in the `ai_agents` table that generates natural-language summaries for report sections. Uses the existing `AgentConfigResolver` and `llm_factory` infrastructure.
|
||||
- **Report**: A structured JSON document containing trading performance metrics, AI-generated summaries, and validation data for a specific period (daily or weekly).
|
||||
- **Report_Section**: A self-contained portion of a report (e.g., P&L summary, recommendation accuracy, position performance) that can be independently generated and summarized.
|
||||
- **Chunk**: A subset of data rows small enough to fit within the 8k-token context window when serialized, allowing the Report_Summarizer_Agent to process it in a single LLM call.
|
||||
- **Portfolio_Snapshot**: A daily record in the `portfolio_snapshots` table containing portfolio value, pool balances, returns, win/loss counts, Sharpe ratio, max drawdown, and risk tier.
|
||||
- **Prediction_Outcome**: A record in the `prediction_outcomes` table containing realized returns, direction correctness, and excess returns vs benchmarks for a prediction at a specific horizon.
|
||||
- **Model_Metric_Snapshot**: A record in the `model_metric_snapshots` table containing aggregate model quality metrics (win rate, IC, ECE, Brier score) for a lookback/horizon combination.
|
||||
- **Trading_Decision**: A record in the `trading_decisions` table capturing the act/skip decision, skip reason, position sizing, risk tier, circuit breaker status, and decision trace for a recommendation evaluation.
|
||||
- **Validation_Data**: Live data from `prediction_outcomes`, `model_metric_snapshots`, and `signal_evidence_links` used to cross-check report claims against actual measured performance.
|
||||
- **Query_API**: The existing FastAPI service (`services/api/app.py`) that serves HTTP endpoints for the dashboard and external consumers.
|
||||
|
||||
## Requirements
|
||||
|
||||
### Requirement 1: Report Data Collection
|
||||
|
||||
**User Story:** As a trader, I want the feedback engine to collect all relevant trading data for a reporting period, so that reports reflect the complete picture of trading activity.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN a report generation is triggered for a date range, THE Feedback_Engine SHALL query trading_decisions, orders, positions, portfolio_snapshots, recommendations, prediction_outcomes, and model_metric_snapshots for that period.
|
||||
2. WHEN collecting trading decision data, THE Feedback_Engine SHALL include the decision type, skip reason, ticker, computed position size, risk tier, circuit breaker status, and correlation check result for each Trading_Decision.
|
||||
3. WHEN collecting portfolio data, THE Feedback_Engine SHALL retrieve the most recent Portfolio_Snapshot within the reporting period and compute period-over-period changes in portfolio value, active pool, reserve pool, and cumulative return.
|
||||
4. WHEN collecting recommendation accuracy data, THE Feedback_Engine SHALL join recommendations with Prediction_Outcomes to compute win rate, directional accuracy, and average excess return vs SPY for the period.
|
||||
5. IF no trading_decisions exist for the requested period, THEN THE Feedback_Engine SHALL generate a report with zero-activity sections and a note indicating no trading occurred.
|
||||
|
||||
### Requirement 2: Chunked AI Summarization
|
||||
|
||||
**User Story:** As a trader, I want AI-generated summaries in my reports, so that I can quickly understand performance trends without reading raw numbers.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Report_Summarizer_Agent SHALL be registered in the `ai_agents` table with slug `report-summarizer`, model `qwen3.5:9b-fast`, and source `system`.
|
||||
2. WHEN generating a summary for a Report_Section, THE Feedback_Engine SHALL serialize the section data into Chunks of no more than 6,000 characters each to stay within the 8k-token context window.
|
||||
3. WHEN a Report_Section contains data that exceeds a single Chunk, THE Feedback_Engine SHALL split the data into multiple Chunks, summarize each Chunk independently, and then produce a final merged summary from the individual Chunk summaries.
|
||||
4. WHEN invoking the Report_Summarizer_Agent, THE Feedback_Engine SHALL use the existing `AgentConfigResolver` and `llm_factory` infrastructure to resolve model configuration and build the LLM client.
|
||||
5. WHEN invoking the Report_Summarizer_Agent, THE Feedback_Engine SHALL log each invocation to the `agent_performance_log` table with agent_id, success status, duration_ms, and token estimates.
|
||||
6. IF the Report_Summarizer_Agent fails after max_retries, THEN THE Feedback_Engine SHALL fall back to a deterministic text summary built from the raw metrics and continue report generation.
|
||||
|
||||
### Requirement 3: Report Structure and Content
|
||||
|
||||
**User Story:** As a trader, I want reports to cover P&L, recommendation accuracy, position performance, risk metrics, and model quality, so that I have a comprehensive view of system performance.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Report SHALL contain a P&L section with realized P&L, unrealized P&L, daily return, cumulative return, win count, loss count, win rate, profit factor, and Sharpe ratio for the reporting period.
|
||||
2. THE Report SHALL contain a recommendation accuracy section with total recommendations evaluated, act/skip breakdown, win rate of acted-upon recommendations, and average confidence of acted vs skipped recommendations.
|
||||
3. THE Report SHALL contain a position performance section listing each position held during the period with ticker, entry price, current or exit price, unrealized or realized P&L, P&L percentage, and hold duration.
|
||||
4. THE Report SHALL contain a risk metrics section with current risk tier, portfolio heat, max drawdown, current drawdown percentage, reserve pool balance, and a count of circuit breaker events during the period.
|
||||
5. THE Report SHALL contain a model quality section with the latest Model_Metric_Snapshot values for win rate, directional accuracy, information coefficient, calibration error (ECE), and Brier score across the 7d, 30d, and 90d lookback windows.
|
||||
6. THE Report SHALL contain an AI-generated executive summary that synthesizes the key findings from all sections into a concise narrative of no more than 300 words.
|
||||
|
||||
### Requirement 4: Report Validation Against Live Data
|
||||
|
||||
**User Story:** As a trader, I want report metrics to be cross-checked against live validation data, so that I can trust the accuracy of the reported numbers.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN generating the recommendation accuracy section, THE Feedback_Engine SHALL cross-reference reported win rates with the `direction_correct` and `profitable` fields from Prediction_Outcomes for the same tickers and period.
|
||||
2. WHEN generating the model quality section, THE Feedback_Engine SHALL compare the reported metrics against the most recent Model_Metric_Snapshot records and flag discrepancies greater than 5% between computed and snapshot values.
|
||||
3. WHEN a validation discrepancy is detected, THE Feedback_Engine SHALL include a `validation_warnings` array in the report section with the field name, computed value, snapshot value, and percentage difference.
|
||||
4. THE Report SHALL include a `validation_status` field set to `passed` when no discrepancies exceed 5%, or `warnings` when one or more discrepancies are detected.
|
||||
|
||||
### Requirement 5: Report Storage and Retrieval
|
||||
|
||||
**User Story:** As a trader, I want reports stored in the database and accessible via API, so that I can review historical performance at any time.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Feedback_Engine SHALL store each generated Report as a row in a `trading_reports` table with columns for id (UUID), report_type (daily/weekly), period_start (DATE), period_end (DATE), report_data (JSONB), validation_status (VARCHAR), generated_at (TIMESTAMPTZ), and created_at (TIMESTAMPTZ).
|
||||
2. THE Feedback_Engine SHALL enforce a unique constraint on (report_type, period_start, period_end) to prevent duplicate reports for the same period.
|
||||
3. WHEN a report for an existing period is regenerated, THE Feedback_Engine SHALL update the existing row with the new report_data, validation_status, and generated_at timestamp.
|
||||
4. THE Query_API SHALL expose a `GET /api/reports` endpoint that returns a paginated list of reports with id, report_type, period_start, period_end, validation_status, and generated_at.
|
||||
5. THE Query_API SHALL expose a `GET /api/reports/{report_id}` endpoint that returns the full report including report_data JSONB.
|
||||
6. THE Query_API SHALL support filtering reports by report_type and date range via query parameters on the `GET /api/reports` endpoint.
|
||||
|
||||
### Requirement 6: Periodic Report Generation
|
||||
|
||||
**User Story:** As a trader, I want reports generated automatically on a daily and weekly schedule, so that I always have up-to-date performance feedback.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Feedback_Engine SHALL generate a daily report after market close (after 16:30 ET) covering the current trading day.
|
||||
2. THE Feedback_Engine SHALL generate a weekly report on Saturday covering the Monday-through-Friday trading week.
|
||||
3. WHEN a scheduled report generation is triggered, THE Feedback_Engine SHALL enqueue a report generation job on a Redis queue for asynchronous processing.
|
||||
4. IF a report generation job fails, THEN THE Feedback_Engine SHALL retry the job up to 3 times with exponential backoff before marking the job as failed.
|
||||
5. WHILE a report generation job is in progress for a given period, THE Feedback_Engine SHALL reject duplicate job submissions for the same report_type and period.
|
||||
|
||||
### Requirement 7: Agent Registration and Editability
|
||||
|
||||
**User Story:** As a trader, I want the report summarizer agent registered in the ai_agents table, so that I can edit its prompts, model, and parameters through the existing agent management API.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Feedback_Engine SHALL register the Report_Summarizer_Agent in the `ai_agents` table via a database migration with slug `report-summarizer`, source `system`, model_provider `ollama`, and model_name `qwen3.5:9b-fast`.
|
||||
2. THE Report_Summarizer_Agent system prompt SHALL instruct the model to produce concise financial performance summaries, avoid fabricating data not present in the input, and keep each summary under 200 words.
|
||||
3. THE Report_Summarizer_Agent SHALL support variant creation and activation through the existing agent variants system, allowing A/B testing of different summarization prompts.
|
||||
4. WHEN the Report_Summarizer_Agent configuration is updated via the agent management API, THE Feedback_Engine SHALL pick up the new configuration within 60 seconds via the `AgentConfigResolver` TTL cache.
|
||||
|
||||
### Requirement 8: Report Serialization Round-Trip
|
||||
|
||||
**User Story:** As a developer, I want report data to survive serialization and deserialization without data loss, so that stored reports are always faithful to the generated content.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Feedback_Engine SHALL serialize Report objects to JSON for storage in the `report_data` JSONB column.
|
||||
2. THE Feedback_Engine SHALL deserialize stored JSON back into Report objects for API responses.
|
||||
3. FOR ALL valid Report objects, serializing to JSON then deserializing back SHALL produce an equivalent Report object (round-trip property).
|
||||
4. THE Feedback_Engine SHALL use ISO 8601 format for all datetime fields in serialized reports.
|
||||
@@ -0,0 +1,195 @@
|
||||
# Implementation Plan: Trading Feedback Engine
|
||||
|
||||
## Overview
|
||||
|
||||
Add a periodic trading performance reporting system to Stonks Oracle. The system collects trading data, generates structured JSON reports with AI-powered summaries, validates metrics against live data, and stores reports for retrieval via API. Implementation follows the four-phase approach from the design: foundation → validation & AI → generator & API → scheduling & tests.
|
||||
|
||||
## Tasks
|
||||
|
||||
- [x] 1. Database migration 038 — trading_reports table and report-summarizer agent
|
||||
- [x] 1.1 Create `infra/migrations/038_trading_reports.sql`
|
||||
- Create `trading_reports` table with columns: id (UUID PK, gen_random_uuid()), report_type (VARCHAR(20) NOT NULL), period_start (DATE NOT NULL), period_end (DATE NOT NULL), report_data (JSONB NOT NULL), validation_status (VARCHAR(20) NOT NULL DEFAULT 'passed'), generated_at (TIMESTAMPTZ NOT NULL), created_at (TIMESTAMPTZ NOT NULL DEFAULT NOW())
|
||||
- Add UNIQUE constraint on (report_type, period_start, period_end)
|
||||
- Add CHECK constraint: report_type IN ('daily', 'weekly')
|
||||
- Create indexes: idx_trading_reports_type, idx_trading_reports_period, idx_trading_reports_generated
|
||||
- Seed Report_Summarizer_Agent into ai_agents table with slug 'report-summarizer', model_provider 'ollama', model_name 'qwen3.5:9b-fast', source 'system', temperature 0.0, max_tokens 1024, timeout_seconds 60, max_retries 2
|
||||
- Use WHERE NOT EXISTS guard on agent insert to be idempotent
|
||||
- _Requirements: 5.1, 5.2, 7.1, 7.2_
|
||||
|
||||
- [x] 1.2 Add `QUEUE_REPORT_GENERATION` constant to `services/shared/redis_keys.py`
|
||||
- Add `QUEUE_REPORT_GENERATION = "report_generation"` following existing queue naming convention
|
||||
- _Requirements: 6.3_
|
||||
|
||||
- [x] 2. Phase 1 — Report models, data collector, and section builders
|
||||
- [x] 2.1 Create report models (`services/reporting/models.py`)
|
||||
- Create `services/reporting/__init__.py`
|
||||
- Define enums: ReportType (daily, weekly), ValidationStatus (passed, warnings)
|
||||
- Define Pydantic models: ValidationWarning, PLSection, RecommendationAccuracySection, PositionDetail, PositionPerformanceSection, RiskMetricsSection, ModelQualityWindow, ModelQualitySection, ReportData
|
||||
- ReportData includes all sections, executive_summary, validation_status, generated_at, period_start, period_end, report_type
|
||||
- _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 8.1, 8.2, 8.4_
|
||||
|
||||
- [x] 2.2 Implement data collector (`services/reporting/collector.py`)
|
||||
- Define CollectedData dataclass with fields: trading_decisions, orders, open_positions, closed_positions, portfolio_snapshot, previous_portfolio_snapshot, recommendations, prediction_outcomes, model_metric_snapshots, circuit_breaker_events, reserve_pool_balance
|
||||
- Implement `collect_report_data(pool, period_start, period_end)` → CollectedData
|
||||
- Query trading_decisions, orders, positions (open + closed), portfolio_snapshots (current + previous), recommendations, prediction_outcomes, model_metric_snapshots, circuit_breaker_events, reserve_pool_ledger for the period
|
||||
- Return empty lists for tables with no data (zero-activity case)
|
||||
- Use `_row_dict()` pattern for UUID conversion from asyncpg rows
|
||||
- _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5_
|
||||
|
||||
- [x] 2.3 Implement section builders (`services/reporting/sections.py`)
|
||||
- Implement `build_pnl_section(data: CollectedData) -> PLSection` — compute realized/unrealized P&L, daily return, cumulative return, win/loss counts, win rate, profit factor, Sharpe ratio from portfolio_snapshot and closed positions
|
||||
- Implement `build_recommendation_accuracy_section(data: CollectedData) -> RecommendationAccuracySection` — join trading_decisions with prediction_outcomes, compute act/skip breakdown, win rate of acted, avg confidence acted vs skipped
|
||||
- Implement `build_position_performance_section(data: CollectedData) -> PositionPerformanceSection` — list each position with ticker, entry price, current/exit price, P&L, P&L%, hold duration
|
||||
- Implement `build_risk_metrics_section(data: CollectedData) -> RiskMetricsSection` — extract risk tier, portfolio heat, max drawdown, current drawdown %, reserve pool balance, circuit breaker event count
|
||||
- Implement `build_model_quality_section(data: CollectedData) -> ModelQualitySection` — extract model_metric_snapshot values for 7d, 30d, 90d lookback windows
|
||||
- Handle zero-activity gracefully (zero values, empty lists)
|
||||
- _Requirements: 1.3, 1.4, 3.1, 3.2, 3.3, 3.4, 3.5_
|
||||
|
||||
- [x] 3. Checkpoint — Verify foundation modules
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
- Run `.venv/bin/ruff check services/reporting/`
|
||||
- Run `.venv/bin/python -m pytest tests/ -x --tb=short -q -k "report"` to verify models and section builders
|
||||
|
||||
- [x] 4. Phase 2 — Report validator and AI summarizer
|
||||
- [x] 4.1 Implement report validator (`services/reporting/validator.py`)
|
||||
- Define `DISCREPANCY_THRESHOLD_PCT = 5.0`
|
||||
- Implement `validate_recommendation_accuracy(section, prediction_outcomes)` → list[ValidationWarning] — compare computed win rate against direction_correct/profitable from prediction_outcomes, flag >5% discrepancies
|
||||
- Implement `validate_model_quality(section, metric_snapshots)` → list[ValidationWarning] — compare reported metrics against model_metric_snapshots for win_rate, directional_accuracy, IC, ECE, Brier score, flag >5% discrepancies
|
||||
- Implement `compute_validation_status(report: ReportData)` → ValidationStatus — return 'passed' if no warnings, 'warnings' if any section has validation_warnings
|
||||
- Handle edge cases: snapshot=0 with computed≠0 → 100% difference; both=0 → no warning; snapshot=NULL → skip; computed=NaN → replace with 0.0
|
||||
- _Requirements: 4.1, 4.2, 4.3, 4.4_
|
||||
|
||||
- [x] 4.2 Implement AI summarizer (`services/reporting/summarizer.py`)
|
||||
- Define constants: CHUNK_SIZE_LIMIT=6000, MAX_SUMMARY_WORDS=200, MAX_EXECUTIVE_SUMMARY_WORDS=300
|
||||
- Implement `chunk_data(serialized: str, max_chars: int)` → list[str] — split on newline boundaries, each chunk ≤ max_chars, at least one chunk returned
|
||||
- Implement `summarize_section(pool, resolver, section_name, section_data)` → str — serialize, chunk if needed, summarize each chunk via Report_Summarizer_Agent (resolved by slug 'report-summarizer'), merge if multiple chunks, log to agent_performance_log, fall back to deterministic on failure
|
||||
- Implement `build_deterministic_summary(section_name, section_data)` → str — template-based fallback summary from raw metrics
|
||||
- Implement `generate_executive_summary(pool, resolver, section_summaries)` → str — concatenate section summaries, chunk if needed, produce ≤300-word synthesis, fall back to concatenation on failure
|
||||
- Use AgentConfigResolver + llm_factory for LLM access
|
||||
- Log each invocation to agent_performance_log with agent_id, success, duration_ms, token estimates
|
||||
- _Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.6_
|
||||
|
||||
- [x] 5. Checkpoint — Verify validator and summarizer
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
- Run `.venv/bin/ruff check services/reporting/`
|
||||
- Run `.venv/bin/python -m pytest tests/ -x --tb=short -q -k "report"` to verify validator and summarizer
|
||||
|
||||
- [x] 6. Phase 3 — Report generator orchestrator and API endpoints
|
||||
- [x] 6.1 Implement report generator (`services/reporting/generator.py`)
|
||||
- Implement `generate_report(pool, report_type, period_start, period_end)` → ReportData — orchestrate: collect data → build sections → validate → summarize → assemble ReportData
|
||||
- Implement `store_report(pool, report)` → str (UUID) — INSERT ... ON CONFLICT (report_type, period_start, period_end) DO UPDATE for upsert, return report id
|
||||
- Implement `process_report_job(pool, job: dict)` → None — deserialize job payload, call generate_report + store_report, handle retries with exponential backoff (30s, 60s, 120s up to 3 attempts), reject duplicate jobs for same report_type + period
|
||||
- _Requirements: 5.1, 5.2, 5.3, 6.3, 6.4, 6.5_
|
||||
|
||||
- [x] 6.2 Add API endpoints to `services/api/app.py`
|
||||
- Add `GET /api/reports` — paginated list with query params: report_type, start_date, end_date, limit (default 20), offset (default 0); returns id, report_type, period_start, period_end, validation_status, generated_at
|
||||
- Add `GET /api/reports/{report_id}` — full report including report_data JSONB
|
||||
- Use asyncpg pool from existing app state
|
||||
- Return 404 for non-existent report_id
|
||||
- _Requirements: 5.4, 5.5, 5.6_
|
||||
|
||||
- [x] 6.3 Add frontend hooks to `frontend/src/api/hooks.ts`
|
||||
- Add `ReportListItem` and `ReportDetail` TypeScript interfaces
|
||||
- Implement `useReports(params?)` hook — builds query string from report_type, start_date, end_date, limit, offset; uses `useGet` with 'query' base
|
||||
- Implement `useReport(id)` hook — fetches single report by id, enabled only when id is defined
|
||||
- _Requirements: 5.4, 5.5_
|
||||
|
||||
- [x] 7. Checkpoint — Verify generator and API
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
- Run `.venv/bin/ruff check services/`
|
||||
- Run `.venv/bin/python -m pytest tests/ -x --tb=short -q -k "report"` to verify generator and API endpoints
|
||||
|
||||
- [x] 8. Phase 4 — Scheduling, property-based tests, unit tests, and frontend tests
|
||||
- [x] 8.1 Wire Redis queue integration and scheduler
|
||||
- Add report generation job consumer to the scheduler service that listens on `stonks:queue:report_generation`
|
||||
- Add daily report trigger (after 16:30 ET on trading days) and weekly report trigger (Saturday) to the scheduler
|
||||
- Job payload: `{"report_type": "daily"|"weekly", "period_start": "YYYY-MM-DD", "period_end": "YYYY-MM-DD"}`
|
||||
- _Requirements: 6.1, 6.2, 6.3, 6.4, 6.5_
|
||||
|
||||
- [x] 8.2 Write property test: Chunking Round-Trip and Size Constraint
|
||||
- **Property 1: Chunking Round-Trip and Size Constraint**
|
||||
- File: `tests/test_pbt_report_chunking.py`
|
||||
- Use Hypothesis `@settings(max_examples=100)` with `@given(st.text())` and `@given(st.integers(min_value=1, max_value=10000))`
|
||||
- Assert: every chunk ≤ max_chars, no empty chunks (except empty input → one empty chunk), concatenation of chunks == original input
|
||||
- **Validates: Requirements 2.2**
|
||||
|
||||
- [x] 8.3 Write property test: Report Serialization Round-Trip
|
||||
- **Property 2: Report Serialization Round-Trip**
|
||||
- File: `tests/test_pbt_report_serialization.py`
|
||||
- Use Hypothesis with custom strategies for ReportData (valid PLSection, RecommendationAccuracySection, etc.)
|
||||
- Assert: `ReportData.model_validate_json(report.model_dump_json())` == original report
|
||||
- Assert: all datetime fields in serialized JSON are ISO 8601 format
|
||||
- **Validates: Requirements 8.1, 8.2, 8.3, 8.4**
|
||||
|
||||
- [x] 8.4 Write property test: Validation Discrepancy Detection Correctness
|
||||
- **Property 3: Validation Discrepancy Detection Correctness**
|
||||
- File: `tests/test_pbt_report_validation.py`
|
||||
- Use Hypothesis with `@given(st.floats(min_value=0, max_value=1e6), st.floats(min_value=0, max_value=1e6))`
|
||||
- Assert: warning iff |computed - snapshot| / snapshot * 100 > 5% (when snapshot > 0); flag any non-zero computed when snapshot == 0; no warning when both == 0
|
||||
- **Validates: Requirements 4.1, 4.2, 4.3, 4.4**
|
||||
|
||||
- [x] 8.5 Write property test: Recommendation Accuracy Aggregation
|
||||
- **Property 4: Recommendation Accuracy Aggregation**
|
||||
- File: `tests/test_pbt_report_sections.py`
|
||||
- Use Hypothesis with lists of trading decisions + prediction outcomes (direction_correct bool, profitable bool, excess_return_vs_spy float)
|
||||
- Assert: win_rate == count(profitable) / total, directional_accuracy == count(direction_correct) / total, avg excess return == mean(excess_return_vs_spy), all rates in [0.0, 1.0]
|
||||
- **Validates: Requirements 1.4**
|
||||
|
||||
- [x] 8.6 Write property test: Portfolio Period-Over-Period Delta Computation
|
||||
- **Property 5: Portfolio Period-Over-Period Delta Computation**
|
||||
- File: `tests/test_pbt_report_sections.py`
|
||||
- Use Hypothesis with two portfolio snapshots (non-negative portfolio_value, active_pool, reserve_pool, finite cumulative_return)
|
||||
- Assert: deltas == (current - previous) for each field; when no previous snapshot, deltas == 0
|
||||
- **Validates: Requirements 1.3**
|
||||
|
||||
- [x] 8.7 Write unit tests for section builders
|
||||
- File: `tests/test_report_sections.py`
|
||||
- Test each section builder with known inputs and expected outputs
|
||||
- Test edge cases: empty data (zero-activity), single position, no portfolio snapshot
|
||||
- _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5_
|
||||
|
||||
- [x] 8.8 Write unit tests for report validator
|
||||
- File: `tests/test_report_validator.py`
|
||||
- Test specific discrepancy scenarios: exactly 5% (no warning), 5.1% (warning), snapshot=0 computed≠0, both=0, NULL snapshot
|
||||
- _Requirements: 4.1, 4.2, 4.3, 4.4_
|
||||
|
||||
- [x] 8.9 Write unit tests for AI summarizer
|
||||
- File: `tests/test_report_summarizer.py`
|
||||
- Test deterministic fallback summary generation
|
||||
- Test chunk_data edge cases: empty input, single character, exactly at limit, one char over limit
|
||||
- _Requirements: 2.2, 2.6_
|
||||
|
||||
- [x] 8.10 Write unit tests for report generator
|
||||
- File: `tests/test_report_generator.py`
|
||||
- Test orchestration with mocked dependencies (collector, sections, validator, summarizer)
|
||||
- Test zero-activity report generation
|
||||
- Test upsert behavior (regeneration of existing report)
|
||||
- _Requirements: 5.1, 5.2, 5.3_
|
||||
|
||||
- [x] 8.11 Write API integration tests
|
||||
- File: `tests/test_report_api.py`
|
||||
- Test GET /api/reports with pagination, filtering by report_type and date range
|
||||
- Test GET /api/reports/{report_id} with valid and invalid IDs
|
||||
- _Requirements: 5.4, 5.5, 5.6_
|
||||
|
||||
- [x] 8.12 Write frontend hook tests
|
||||
- File: `frontend/src/test/reports.test.ts`
|
||||
- Test useReports and useReport hooks with MSW mocks
|
||||
- Test loading and error states
|
||||
- _Requirements: 5.4, 5.5_
|
||||
|
||||
- [x] 9. Final checkpoint — Full test suite and lint
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
- Run `.venv/bin/ruff check services/`
|
||||
- Run `.venv/bin/python -m pytest tests/ -x --tb=short -q -k "report"`
|
||||
- Run frontend tests: `cd frontend && npx vitest --run`
|
||||
|
||||
## Notes
|
||||
|
||||
- Tasks marked with `*` are optional and can be skipped for faster MVP
|
||||
- Each task references specific requirements for traceability
|
||||
- Checkpoints ensure incremental validation after each phase
|
||||
- Property tests validate the 5 universal correctness properties from the design document
|
||||
- Unit tests validate specific examples and edge cases
|
||||
- The design document contains full interface signatures — use those as the implementation guide
|
||||
- Always run `.venv/bin/ruff check services/` before committing Python changes
|
||||
@@ -93,8 +93,35 @@ Ingestion jobs MUST include `source_id`, `source_type`, `ticker`, `company_id`,
|
||||
- The `competitor_relationships` table uses UUID company IDs — queries must join through `companies` to match by ticker
|
||||
- The dashboard Docker build uses TypeScript strict mode — unused imports that pass local diagnostics will fail in CI
|
||||
- Ingestion jobs require `source_id` from the `sources` table — don't just pass `ticker`
|
||||
- **Bash `!` in passwords/strings**: Bash interprets `!` inside double quotes as history expansion. NEVER use double quotes around strings containing `!`. Use single quotes instead: `'St0nks0racl3!'`. For kubectl exec with psql, use: `kubectl exec ... -- psql -U postgres -c "ALTER USER x WITH PASSWORD '"'"'password!'"'"';"` (single-quote escaping trick)
|
||||
|
||||
## No Premature Simplification
|
||||
Do NOT "simplify" code on impulse. When the urge arises to simplify a section, STOP and do this instead:
|
||||
|
||||
1. **Evaluate the section**: Read the full function/module, not just the part that looks complex.
|
||||
2. **Map the dependencies**: Identify every caller, every consumer, every downstream component that depends on this code's behavior, return shape, or side effects.
|
||||
3. **Assess blast radius**: Would changing this function break other implementations? Check imports, tests, API contracts, database queries, and frontend expectations.
|
||||
4. **Respect intentional complexity**: If the code is complex because the domain is complex (financial math, multi-layer signal aggregation, Bayesian shrinkage), the complexity is load-bearing. Simplifying it will introduce bugs.
|
||||
5. **Only simplify when**: The complexity is accidental (dead code, redundant branches, copy-paste artifacts) AND you have confirmed no downstream dependencies break.
|
||||
|
||||
This codebase has interconnected layers (ingestion → extraction → aggregation → recommendation → trading → validation). A "simple" change to a scoring function can cascade through trend summaries, recommendations, snapshot capture, and outcome evaluation. Always trace the full path before refactoring.
|
||||
|
||||
## Documentation
|
||||
- Do NOT create large summary/success markdown files after each step
|
||||
- Keep notes short, concise, and organized under `docs/notes/`
|
||||
- If a note isn't useful for future reference, don't write it
|
||||
|
||||
## Documentation Maintenance on Feature Changes
|
||||
When implementing a feature or fix that introduces an impactful change, update the relevant documentation as part of the same commit or task. "Impactful" means any change that affects how someone installs, deploys, configures, operates, or understands the system. Specifically:
|
||||
|
||||
- **New database migrations**: Update `docs/architecture-data-pipeline.md` or `docs/api-reference.md` if new tables, views, or endpoints are added. Update `project-context.md` steering file with the new migration number.
|
||||
- **New API endpoints**: Update `docs/api-reference.md` with the endpoint path, method, parameters, and response shape.
|
||||
- **New services or service changes**: Update `docs/architecture-docker-compose.md` and `docs/docker-deployment.md` if a new service is added or an existing service's configuration changes.
|
||||
- **Helm chart changes**: Update `docs/helm-reference.md` if new values, services, or config options are added.
|
||||
- **New environment variables or secrets**: Update `docs/LOCAL_DEV_SETUP.md` and the project-context steering file.
|
||||
- **Install/deploy script changes**: Update `deploy-docker.sh`, `docs/docker-deployment.md`, or the relevant runme scripts if the deploy process changes.
|
||||
- **Frontend route or page additions**: Update `docs/api-reference.md` (if it covers UI routes) and ensure the nav item is documented.
|
||||
- **README.md**: Update the top-level `README.md` when a major new capability is added (new signal layer, new dashboard section, new trading feature).
|
||||
- **Steering files**: Update `.kiro/steering/project-context.md` when migration numbers advance, new services are added, or key conventions change.
|
||||
|
||||
The goal is that someone reading the docs can always understand the current state of the system without reading the source code. When in doubt, update the doc.
|
||||
|
||||
@@ -81,13 +81,14 @@ When a full reset is needed:
|
||||
## Database Migrations
|
||||
- Located in `infra/migrations/001_*.sql` through `030_*.sql`
|
||||
- Applied automatically by `runmefirst.sh` in sorted order
|
||||
- Next migration number: **031**
|
||||
- Next migration number: **038**
|
||||
- Key migrations:
|
||||
- 016: Global news interpolation (global_events, macro_impact_records, exposure_profiles, trend_projections)
|
||||
- 017: Competitive intelligence (competitor_relationships, competitive_signal_records)
|
||||
- 024: Trend history time-series table
|
||||
- 026: AI agents management (ai_agents, agent_performance_log)
|
||||
- 027: Agent variants (agent_variants table for A/B testing)
|
||||
- 035: Model validation (prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots, v_prediction_performance, v_source_performance)
|
||||
|
||||
## Key Conventions
|
||||
- All services use `services/shared/config.py` for configuration via env vars
|
||||
|
||||
+12
-3
@@ -63,8 +63,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -99,8 +102,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.ingestion.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.ingestion.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -135,8 +141,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.parser.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.parser.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
|
||||
+55
-4
@@ -28,8 +28,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.extractor.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.extractor.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -64,8 +67,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.aggregation.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.aggregation.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -100,8 +106,50 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.recommendation.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.recommendation.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
requests:
|
||||
memory: 1Gi
|
||||
cpu: 1000m
|
||||
limits:
|
||||
memory: 2Gi
|
||||
cpu: 4000m
|
||||
depends_on: []
|
||||
build-signal-engine:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
privileged: true
|
||||
settings:
|
||||
repo: registry.celestium.life/stonks-oracle/signal-engine
|
||||
registry: registry.celestium.life
|
||||
custom_dns: 192.168.42.1
|
||||
buildx_image: registry.celestium.life/dockerhub-cache/moby/buildkit:buildx-stable-1
|
||||
add_host: registry.celestium.life:10.1.1.12
|
||||
buildx_flags: --driver-opt network=host
|
||||
buildkitd_config: "[registry.\"docker.io\"]\n mirrors = [\"registry.celestium.life/v2/dockerhub-cache\"]\n[registry.\"ghcr.io\"]\n mirrors = [\"registry.celestium.life/v2/ghcr-cache\"]\n"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
logins:
|
||||
- registry: https://registry.celestium.life
|
||||
username:
|
||||
from_secret: harbor_username
|
||||
password:
|
||||
from_secret: harbor_password
|
||||
tags:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.signal_engine.main
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -136,8 +184,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=uvicorn services.risk.app:app --host 0.0.0.0 --port 8000
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=uvicorn services.risk.app:app --host 0.0.0.0 --port 8000
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
|
||||
+16
-4
@@ -28,8 +28,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.adapters.broker_adapter
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.adapters.broker_adapter
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -64,8 +67,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.lake_publisher.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.lake_publisher.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -100,8 +106,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=uvicorn services.api.app:app --host 0.0.0.0 --port 8000
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=uvicorn services.api.app:app --host 0.0.0.0 --port 8000
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -136,8 +145,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=uvicorn services.trading.app:app --host 0.0.0.0 --port 8000
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=uvicorn services.trading.app:app --host 0.0.0.0 --port 8000
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
|
||||
@@ -16,7 +16,9 @@ WORKDIR /app
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ARG CACHE_BUST
|
||||
COPY services/ /app/services/
|
||||
COPY scripts/ /app/scripts/
|
||||
COPY tests/ /app/tests/
|
||||
COPY conftest.py /app/conftest.py
|
||||
|
||||
|
||||
+104
-13
@@ -1,6 +1,6 @@
|
||||
# AI Agent Building Guide
|
||||
|
||||
Stonks Oracle uses three AI agents powered by a local Ollama instance. Each agent has a dedicated purpose in the pipeline, a database-backed configuration, and support for A/B testing through variants. This guide covers how each agent works, how to configure them, how to create and test variants, and how to monitor performance.
|
||||
Stonks Oracle uses three AI agents powered by local LLM inference (Ollama or vLLM). Each agent has a dedicated purpose in the pipeline, a database-backed configuration, and support for A/B testing through variants. This guide covers how each agent works, how to configure them, how to create and test variants, and how to monitor performance.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
@@ -8,6 +8,7 @@ Stonks Oracle uses three AI agents powered by a local Ollama instance. Each agen
|
||||
- [Document Intelligence Extractor](#1-document-intelligence-extractor)
|
||||
- [Global Event Classifier](#2-global-event-classifier)
|
||||
- [Thesis Rewriter](#3-thesis-rewriter)
|
||||
- [LLM Provider Abstraction](#llm-provider-abstraction)
|
||||
- [Database Schema](#database-schema)
|
||||
- [ai_agents Table](#ai_agents-table)
|
||||
- [agent_variants Table](#agent_variants-table)
|
||||
@@ -30,9 +31,10 @@ Three agents are seeded into the `ai_agents` table on first migration (migration
|
||||
| **Slug** | `document-extractor` |
|
||||
| **Purpose** | Extracts structured intelligence (sentiment, catalysts, impact scores, key facts, risks) from company news, SEC filings, earnings transcripts, and press releases |
|
||||
| **Default Model** | `qwen3.5:9b-fast` (Ollama) |
|
||||
| **Supported Providers** | `ollama`, `vllm` |
|
||||
| **Prompt Version** | `document-intel-v2` |
|
||||
| **Schema Version** | `2.0.0` |
|
||||
| **Entry Point** | `services/extractor/main.py` → `services/extractor/client.py` |
|
||||
| **Entry Point** | `services/extractor/main.py` → `services/extractor/llm_factory.py` → `services/extractor/client.py` (Ollama) or `services/extractor/vllm_client.py` (vLLM) |
|
||||
|
||||
**Input Data:**
|
||||
- Normalized document text (fetched from MinIO or passed in the Redis job payload)
|
||||
@@ -40,7 +42,7 @@ Three agents are seeded into the `ai_agents` table on first migration (migration
|
||||
- List of tracked tickers for company identification
|
||||
- Document ID for traceability
|
||||
|
||||
**Output Schema** (`ExtractionResult`):
|
||||
**Output Schema** (`ExtractionResult` — defined in `services/extractor/schemas.py`):
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -81,6 +83,7 @@ Use "other" for catalyst_type if unsure. Keep evidence_spans short
|
||||
- Includes tracked ticker list with rules for company identification
|
||||
- Includes the full JSON schema field descriptions
|
||||
- Truncates documents to 8,000 characters to limit inference time
|
||||
- When an active variant has `input_token_limit > 0`, truncation uses `input_token_limit * 4` characters instead
|
||||
|
||||
---
|
||||
|
||||
@@ -91,6 +94,7 @@ Use "other" for catalyst_type if unsure. Keep evidence_spans short
|
||||
| **Slug** | `event-classifier` |
|
||||
| **Purpose** | Classifies global/geopolitical news into structured macro events with impact type, severity, affected regions/sectors/commodities, and estimated duration |
|
||||
| **Default Model** | `qwen3.5:9b-fast` (Ollama) |
|
||||
| **Supported Providers** | `ollama`, `vllm` |
|
||||
| **Prompt Version** | `event-classification-v1` |
|
||||
| **Schema Version** | `1.0.0` |
|
||||
| **Entry Point** | `services/extractor/main.py` → `services/extractor/event_classifier.py` |
|
||||
@@ -99,7 +103,7 @@ Use "other" for catalyst_type if unsure. Keep evidence_spans short
|
||||
- Normalized text of a macro news article (from the `stonks:queue:macro_classification` Redis queue)
|
||||
- Document ID for traceability
|
||||
|
||||
**Output Schema** (`GlobalEvent`):
|
||||
**Output Schema** (`GlobalEvent` — defined in `services/extractor/event_classifier.py`):
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -141,9 +145,11 @@ as empty arrays.
|
||||
```
|
||||
|
||||
**User Prompt Template** (built by `build_event_classification_prompt()` in `services/extractor/event_classifier.py`):
|
||||
- Includes anti-hallucination rules
|
||||
- Includes anti-hallucination rules (no fabrication, severity "critical" reserved for multi-country events)
|
||||
- Lists all valid enum values for each field
|
||||
- Truncates articles to 6,000 characters
|
||||
- When an active variant has `input_token_limit > 0`, truncation uses `input_token_limit * 4` characters instead
|
||||
- If a variant overrides the system prompt, the classifier ensures JSON output instructions are always appended if not already present
|
||||
|
||||
---
|
||||
|
||||
@@ -154,6 +160,7 @@ as empty arrays.
|
||||
| **Slug** | `thesis-rewriter` |
|
||||
| **Purpose** | Rewrites deterministic trade thesis summaries into clear, professional analyst prose. Optional layer — the system falls back to the deterministic thesis if this fails |
|
||||
| **Default Model** | `qwen3.5:9b-fast` (Ollama) |
|
||||
| **Supported Providers** | `ollama`, `vllm` |
|
||||
| **Prompt Version** | `thesis-rewrite-v1` |
|
||||
| **Schema Version** | `1.0.0` |
|
||||
| **Entry Point** | `services/recommendation/main.py` → `services/recommendation/thesis_llm.py` |
|
||||
@@ -165,6 +172,7 @@ as empty arrays.
|
||||
**Output Schema:**
|
||||
- Plain text (not JSON). The model returns only the rewritten thesis as a string, under 150 words.
|
||||
- On failure or empty response, the original deterministic thesis is returned unchanged.
|
||||
- A `_strip_thinking_block()` post-processor removes `<think>` XML tags and "Thinking Process:" blocks that some models (e.g. Qwen3) emit before the actual response.
|
||||
|
||||
**System Prompt:**
|
||||
|
||||
@@ -182,11 +190,37 @@ STRICT RULES:
|
||||
5. Use a neutral, professional tone. Avoid hype or marketing language.
|
||||
6. Return ONLY the rewritten thesis text. No JSON, no markdown, no
|
||||
commentary.
|
||||
7. Do NOT show your thinking process. Do NOT include any reasoning
|
||||
steps. Output ONLY the final rewritten text.
|
||||
```
|
||||
|
||||
**User Prompt Template** (built by `build_thesis_rewrite_prompt()` in `services/recommendation/thesis_llm.py`):
|
||||
- Includes the deterministic thesis between delimiters
|
||||
- Includes trend context: ticker, window, direction, strength, confidence, contradiction score, top catalysts, top risks
|
||||
- Appends `/no_think` suffix to suppress reasoning mode on models that support it (e.g. Qwen3)
|
||||
- Ollama calls also set `"think": false` in the request payload
|
||||
|
||||
---
|
||||
|
||||
## LLM Provider Abstraction
|
||||
|
||||
All three agents support both **Ollama** and **vLLM** as inference providers. The provider is determined by the `model_provider` field in the agent config (or active variant).
|
||||
|
||||
**Module:** `services/extractor/llm_factory.py`
|
||||
|
||||
The `build_llm_client()` factory function routes to the correct client:
|
||||
|
||||
| `model_provider` value | Client class | API endpoint |
|
||||
|------------------------|-------------|--------------|
|
||||
| `ollama` (default), `""`, `None` | `OllamaClient` (`services/extractor/client.py`) | `{OLLAMA_BASE_URL}/api/chat` |
|
||||
| `vllm` | `VLLMClient` (`services/extractor/vllm_client.py`) | `{VLLM_BASE_URL}/v1/chat/completions` (OpenAI-compatible) |
|
||||
| Unknown value | `OllamaClient` (with warning log) | Falls back to Ollama |
|
||||
|
||||
Both clients implement the `LLMClient` protocol (`services/shared/llm_protocol.py`), providing `call_llm()` and `close()` methods.
|
||||
|
||||
**Provider switching at runtime:** When a variant changes the `model_provider`, the extractor worker detects this during its periodic config refresh (every 100 jobs) and creates a new client instance. The old client is closed gracefully. A safety guard prevents switching to Ollama if `OLLAMA_BASE_URL` is empty.
|
||||
|
||||
**vLLM health check:** At startup, if the resolved provider is `vllm`, the extractor runs a health check against the vLLM endpoint. If it fails, the worker falls back to Ollama automatically.
|
||||
|
||||
---
|
||||
|
||||
@@ -202,8 +236,8 @@ Defined in migration `026_ai_agents.sql`. Stores the base configuration for each
|
||||
| `name` | `VARCHAR(100)` | — | Human-readable name (unique) |
|
||||
| `slug` | `VARCHAR(100)` | — | URL-safe identifier (unique), used by `AgentConfigResolver` |
|
||||
| `purpose` | `TEXT` | `''` | Description of what the agent does |
|
||||
| `model_provider` | `VARCHAR(50)` | `'ollama'` | LLM provider |
|
||||
| `model_name` | `VARCHAR(200)` | `'qwen3.5:9b'` | Model identifier |
|
||||
| `model_provider` | `VARCHAR(50)` | `'ollama'` | LLM provider (`ollama` or `vllm`) |
|
||||
| `model_name` | `VARCHAR(200)` | `'qwen3.5:9b-fast'` | Model identifier |
|
||||
| `system_prompt` | `TEXT` | `''` | System prompt sent to the model |
|
||||
| `user_prompt_template` | `TEXT` | `''` | User prompt template (optional — code-defined templates take precedence) |
|
||||
| `prompt_version` | `VARCHAR(100)` | `''` | Version tag for prompt tracking |
|
||||
@@ -297,13 +331,20 @@ The `AgentConfigResolver` is the central mechanism for resolving runtime agent c
|
||||
2. **COALESCE-based override**: The SQL query uses `COALESCE(variant_column, agent_column)` for every configuration field. If an active variant exists and has a non-NULL value for a field, that value is used. Otherwise, the base agent's value is used.
|
||||
|
||||
```sql
|
||||
SELECT a.id AS agent_id,
|
||||
v.id AS variant_id,
|
||||
SELECT a.id AS agent_id,
|
||||
v.id AS variant_id,
|
||||
COALESCE(v.model_provider, a.model_provider) AS model_provider,
|
||||
COALESCE(v.model_name, a.model_name) AS model_name,
|
||||
COALESCE(v.system_prompt, a.system_prompt) AS system_prompt,
|
||||
COALESCE(v.user_prompt_template, a.user_prompt_template) AS user_prompt_template,
|
||||
-- ... all other fields ...
|
||||
COALESCE(v.prompt_version, a.prompt_version) AS prompt_version,
|
||||
COALESCE(v.temperature, a.temperature) AS temperature,
|
||||
COALESCE(v.max_tokens, a.max_tokens) AS max_tokens,
|
||||
COALESCE(v.context_window, 0) AS context_window,
|
||||
COALESCE(v.input_token_limit, 0) AS input_token_limit,
|
||||
COALESCE(v.token_budget, 0) AS token_budget,
|
||||
COALESCE(v.timeout_seconds, a.timeout_seconds) AS timeout_seconds,
|
||||
COALESCE(v.max_retries, a.max_retries) AS max_retries
|
||||
FROM ai_agents a
|
||||
LEFT JOIN agent_variants v
|
||||
ON v.agent_id = a.id AND v.is_active = TRUE
|
||||
@@ -361,7 +402,10 @@ resolver.invalidate() # Clear all entries
|
||||
|
||||
### Config Refresh in Workers
|
||||
|
||||
The extractor and recommendation workers periodically re-resolve their agent config (every 100 jobs for the extractor, every 50 jobs for the recommendation worker). If the resolved model changes, the worker creates a new `OllamaClient` instance with the updated configuration.
|
||||
The extractor and recommendation workers periodically re-resolve their agent config to pick up variant swaps and model changes:
|
||||
|
||||
- **Extractor worker** (`services/extractor/main.py`): Re-resolves both `document-extractor` and `event-classifier` configs every **100 jobs**. If the resolved model or provider changes, the worker creates a new LLM client instance via `build_llm_client()` and closes the old one. A safety guard prevents switching to Ollama if `OLLAMA_BASE_URL` is empty.
|
||||
- **Recommendation worker** (`services/recommendation/main.py`): Re-resolves the `thesis-rewriter` config every **50 jobs**. If the model changes, a new `OllamaConfig` is built.
|
||||
|
||||
---
|
||||
|
||||
@@ -373,7 +417,7 @@ Every agent invocation is logged to `agent_performance_log` with the `agent_id`
|
||||
|
||||
- **Document extractor**: Logged in `services/extractor/main.py` after each extraction. Records success/failure, duration, confidence, retry count, token estimates.
|
||||
- **Event classifier**: Logged in `services/extractor/event_classifier.py` after each classification. Same fields.
|
||||
- **Thesis rewriter**: Logged in `services/recommendation/thesis_llm.py` after each rewrite attempt. Confidence is always 0.0 (not applicable for rewrites).
|
||||
- **Thesis rewriter**: Logged in `services/recommendation/thesis_llm.py` after each rewrite attempt. Confidence is always 0.0 (not applicable for rewrites). `document_id` is always NULL.
|
||||
|
||||
### Querying for Variant Comparison
|
||||
|
||||
@@ -464,6 +508,8 @@ All agent endpoints are served by the Query API (`services/api/app.py`) under th
|
||||
}
|
||||
```
|
||||
|
||||
All fields except `name` have defaults. The `slug` is auto-generated from `name` if not provided. The `model_name` defaults to `llama3.1:8b` for user-created agents.
|
||||
|
||||
**Update Agent Request Body** (all fields optional):
|
||||
|
||||
```json
|
||||
@@ -509,6 +555,30 @@ All agent endpoints are served by the Query API (`services/api/app.py`) under th
|
||||
| `PUT` | `/api/agents/{agent_id}/variants/{variant_id}` | Partial update a variant |
|
||||
| `DELETE` | `/api/agents/{agent_id}/variants/{variant_id}` | Delete a variant (returns 400 if active) |
|
||||
|
||||
**Create Variant Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"variant_name": "Llama 3.1 8B Test",
|
||||
"variant_slug": "llama-3-1-8b-test",
|
||||
"description": "Testing llama3.1:8b as an alternative",
|
||||
"model_provider": "ollama",
|
||||
"model_name": "llama3.1:8b",
|
||||
"system_prompt": "",
|
||||
"user_prompt_template": "",
|
||||
"prompt_version": "",
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 32768,
|
||||
"context_window": 0,
|
||||
"input_token_limit": 0,
|
||||
"token_budget": 0,
|
||||
"timeout_seconds": 120,
|
||||
"max_retries": 2
|
||||
}
|
||||
```
|
||||
|
||||
Required fields: `variant_name`, `model_name`. The `variant_slug` is auto-generated from `variant_name` if not provided.
|
||||
|
||||
### Clone Endpoints
|
||||
|
||||
| Method | Path | Description |
|
||||
@@ -516,7 +586,7 @@ All agent endpoints are served by the Query API (`services/api/app.py`) under th
|
||||
| `POST` | `/api/agents/{agent_id}/clone` | Clone an agent's base config as a new variant |
|
||||
| `POST` | `/api/agents/{agent_id}/variants/{variant_id}/clone` | Clone an existing variant as a new variant |
|
||||
|
||||
Clone requests copy all configuration fields from the source, with optional overrides in the request body.
|
||||
Clone requests copy all configuration fields from the source, with optional overrides in the request body. The `variant_name` field is required. All other fields default to the source's values if not provided.
|
||||
|
||||
### Activate / Deactivate
|
||||
|
||||
@@ -525,6 +595,8 @@ Clone requests copy all configuration fields from the source, with optional over
|
||||
| `POST` | `/api/agents/{agent_id}/variants/{variant_id}/activate` | Set a variant as active (deactivates any other active variant in a single transaction) |
|
||||
| `POST` | `/api/agents/{agent_id}/variants/deactivate` | Deactivate the currently active variant (agent falls back to base config) |
|
||||
|
||||
The activate endpoint uses a database transaction to atomically deactivate the current variant and activate the new one, ensuring exactly one active variant at all times.
|
||||
|
||||
### Per-Variant Performance
|
||||
|
||||
| Method | Path | Description |
|
||||
@@ -532,6 +604,8 @@ Clone requests copy all configuration fields from the source, with optional over
|
||||
| `GET` | `/api/agents/{agent_id}/variants/{variant_id}/performance` | Aggregated metrics for a specific variant |
|
||||
| `GET` | `/api/agents/{agent_id}/variants/{variant_id}/performance/history` | Hourly time-series for a specific variant |
|
||||
|
||||
Both endpoints accept the same `hours` query parameter (default 24, max 720) and return the same response shape as the agent-level performance endpoints.
|
||||
|
||||
---
|
||||
|
||||
## Step-by-Step: Creating and Activating a Variant
|
||||
@@ -616,3 +690,20 @@ curl -s -X PUT \
|
||||
```
|
||||
|
||||
Then re-activate and compare again.
|
||||
|
||||
### 7. Switch to vLLM Provider
|
||||
|
||||
To test a variant using vLLM instead of Ollama:
|
||||
|
||||
```bash
|
||||
curl -s -X POST https://stonks-api.celestium.life/api/agents/$AGENT_ID/clone \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"variant_name": "vLLM Qwen3 Test",
|
||||
"description": "Testing extraction with vLLM backend",
|
||||
"model_provider": "vllm",
|
||||
"model_name": "Qwen/Qwen3-8B"
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
The extractor worker will detect the provider change during its next config refresh and build a `VLLMClient` instead of an `OllamaClient`. Ensure the `VLLM_BASE_URL` environment variable is set in the extractor deployment.
|
||||
+185
-19
@@ -142,14 +142,35 @@ Trend projection for a specific trend window.
|
||||
### 1.5 Market Prices
|
||||
|
||||
#### `GET /api/market/prices/{ticker}`
|
||||
Historical close prices from `market_snapshots`.
|
||||
Historical OHLCV bars from `market_snapshots`, deduplicated by bar timestamp and ordered oldest-first. Also returns 90-day high/low range.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `limit` | int | `30` | max `200` | Max bars returned |
|
||||
| `limit` | int | `200` | max `500` | Max bars returned |
|
||||
|
||||
- **Path params:** `ticker` (auto-uppercased)
|
||||
- **Response:** Array of OHLCV objects ordered oldest-first
|
||||
- **Response:** `{ bars: [{ ticker, close, open, high, low, volume, bar_timestamp, captured_at }], range_90d: { low, high } }`
|
||||
|
||||
#### `POST /api/market/backfill/{ticker}`
|
||||
Backfill daily OHLCV bars from Polygon for the last N days. Deduplicates by bar timestamp.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `days` | int | `90` | max `365` | Number of days to backfill |
|
||||
|
||||
- **Path params:** `ticker` (auto-uppercased)
|
||||
- **Response:** `{ ticker, inserted, total_bars, days }`
|
||||
- **Errors:** `503` — No market data API key configured
|
||||
|
||||
#### `POST /api/market/backfill-all`
|
||||
Backfill daily bars for all active companies from Polygon.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `days` | int | `90` | max `365` | Number of days to backfill |
|
||||
|
||||
- **Response:** `{ total_inserted, tickers, details[] }` — each detail has `{ ticker, inserted }` or `{ ticker, inserted: 0, error }`
|
||||
- **Errors:** `503` — No market data API key configured
|
||||
|
||||
### 1.6 Recommendations
|
||||
|
||||
@@ -224,8 +245,6 @@ Get audit events for any entity type and ID.
|
||||
|
||||
- **Path params:** `entity_type` (string), `entity_id` (string)
|
||||
- **Response:** Array of audit event objects
|
||||
- **Errors:** `404` — No audit events found
|
||||
|
||||
|
||||
### 1.10 Admin: Source Health
|
||||
|
||||
@@ -331,6 +350,8 @@ Approve or reject a pending operator approval request.
|
||||
#### `GET /api/admin/trading/lockouts`
|
||||
List active symbol lockouts (news-shock, cooldown, manual).
|
||||
|
||||
- **Response:** Array of lockout objects
|
||||
|
||||
#### `POST /api/admin/trading/lockouts`
|
||||
Create a manual symbol lockout.
|
||||
|
||||
@@ -353,7 +374,6 @@ Update operator approval settings.
|
||||
- **Body:** `{ auto_approve_paper?: bool, require_approval_for_live?: bool, approval_timeout_minutes?: int }`
|
||||
- **Response:** Updated approval settings
|
||||
|
||||
|
||||
### 1.13 Operational Dashboard
|
||||
|
||||
#### `GET /api/ops/ingestion/throughput`
|
||||
@@ -450,7 +470,7 @@ Trino catalog/schema/table/column metadata for the schema browser.
|
||||
#### `GET /api/analytics/pg-schema`
|
||||
PostgreSQL table/column metadata with primary keys, foreign keys, and row estimates.
|
||||
|
||||
- **Response:** `{ catalog: "postgresql", schema: "public", tables[] }`
|
||||
- **Response:** `{ catalog: "postgresql", schema: "public", tables[{ name, row_estimate, columns[{ name, type, nullable, primary_key?, references?, has_default? }] }] }`
|
||||
|
||||
#### `POST /api/analytics/pg-query`
|
||||
Run read-only SQL against PostgreSQL directly. Only SELECT statements allowed.
|
||||
@@ -462,17 +482,19 @@ Run read-only SQL against PostgreSQL directly. Only SELECT statements allowed.
|
||||
#### `GET /api/analytics/saved-queries`
|
||||
List all saved queries.
|
||||
|
||||
- **Response:** Array of `{ id, name, description, sql_text, created_by, created_at, updated_at }`
|
||||
|
||||
#### `POST /api/analytics/saved-queries` (201)
|
||||
Save a new query.
|
||||
|
||||
- **Body:** `{ name: string, description?: string, sql_text: string }`
|
||||
- **Response:** `{ id, name, description, sql_text, created_by, created_at }`
|
||||
|
||||
#### `DELETE /api/analytics/saved-queries/{query_id}`
|
||||
Delete a saved query.
|
||||
|
||||
- **Errors:** `404` — Query not found
|
||||
|
||||
|
||||
### 1.16 Macro Signal Layer
|
||||
|
||||
#### `GET /api/admin/macro/status`
|
||||
@@ -501,9 +523,13 @@ List recent global events with filtering.
|
||||
| `limit` | int | `50` | max `200` | Page size |
|
||||
| `offset` | int | `0` | — | Pagination offset |
|
||||
|
||||
- **Response:** Array of global event objects with `id`, `event_types`, `severity`, `affected_regions`, `affected_sectors`, `affected_commodities`, `summary`, `key_facts`, `estimated_duration`, `confidence`, `source_document_id`, `created_at`
|
||||
|
||||
#### `GET /api/macro/events/{event_id}`
|
||||
Event detail with affected companies and macro impact scores.
|
||||
|
||||
- **Path params:** `event_id` (UUID string)
|
||||
- **Response:** Global event object + `impacts[]` (each with `company_id`, `ticker`, `macro_impact_score`, `impact_direction`, `contributing_factors`, `confidence`, `legal_name`, `sector`)
|
||||
- **Errors:** `404` — Global event not found
|
||||
|
||||
#### `GET /api/macro/impacts/{ticker}`
|
||||
@@ -515,7 +541,8 @@ Macro impacts and exposure profile for a specific company.
|
||||
| `limit` | int | `50` | max `200` | Page size |
|
||||
| `offset` | int | `0` | — | Pagination offset |
|
||||
|
||||
- **Response:** `{ exposure_profile, impacts[] }`
|
||||
- **Path params:** `ticker` (auto-uppercased)
|
||||
- **Response:** `{ exposure_profile, impacts[] }` — each impact includes `event_summary`, `event_severity`, `event_types`, `affected_regions`
|
||||
|
||||
### 1.18 Competitive Signal Layer
|
||||
|
||||
@@ -540,6 +567,7 @@ Historical patterns for a company.
|
||||
| `catalyst_type` | string | — | Filter by catalyst type |
|
||||
| `time_horizon` | string | — | Filter by time horizon |
|
||||
|
||||
- **Path params:** `ticker` (string)
|
||||
- **Response:** `{ ticker, patterns[], count }`
|
||||
|
||||
#### `GET /api/patterns/{ticker}/competitors`
|
||||
@@ -555,6 +583,7 @@ Cross-company patterns showing how this company's catalysts affected competitors
|
||||
#### `GET /api/patterns/{ticker}/competitive-signals`
|
||||
Recent competitive signals targeting this company (limit 100).
|
||||
|
||||
- **Path params:** `ticker` (string)
|
||||
- **Response:** `{ ticker, competitive_signals[], count }`
|
||||
|
||||
#### `GET /api/patterns/{ticker}/decisions`
|
||||
@@ -564,9 +593,9 @@ Major corporate decision history with trend outcomes and pattern statistics.
|
||||
|-----------|------|---------|-------------|
|
||||
| `time_horizon` | string | — | Filter by time horizon |
|
||||
|
||||
- **Path params:** `ticker` (string)
|
||||
- **Response:** `{ ticker, decisions[], count }` — each decision includes `pattern_statistics[]`
|
||||
|
||||
|
||||
### 1.20 AI Agents
|
||||
|
||||
#### `GET /api/agents`
|
||||
@@ -576,9 +605,12 @@ List all AI agent configurations.
|
||||
|-----------|------|---------|-------------|
|
||||
| `active_only` | bool | `false` | Only show active agents |
|
||||
|
||||
- **Response:** Array of agent objects with `id`, `name`, `slug`, `purpose`, `model_provider`, `model_name`, `system_prompt`, `user_prompt_template`, `prompt_version`, `schema_version`, `temperature`, `max_tokens`, `timeout_seconds`, `max_retries`, `active`, `source`, `created_at`, `updated_at`
|
||||
|
||||
#### `GET /api/agents/{agent_id}`
|
||||
Get a single agent configuration.
|
||||
|
||||
- **Path params:** `agent_id` (UUID string)
|
||||
- **Errors:** `404` — Agent not found
|
||||
|
||||
#### `POST /api/agents` (201)
|
||||
@@ -603,9 +635,9 @@ Create a new user-defined agent.
|
||||
| `max_retries` | int | `2` | Max retry attempts |
|
||||
|
||||
#### `PUT /api/agents/{agent_id}`
|
||||
Update an agent configuration. Partial updates supported.
|
||||
Update an agent configuration. Partial updates supported — only provided fields are changed.
|
||||
|
||||
- **Body:** `AgentUpdateBody` — all fields optional (same fields as create)
|
||||
- **Body:** `AgentUpdateBody` — all fields optional (same fields as create plus `active`)
|
||||
- **Errors:** `400` — No fields to update; `404` — Agent not found
|
||||
|
||||
#### `DELETE /api/agents/{agent_id}`
|
||||
@@ -636,6 +668,8 @@ Hourly performance time-series for an agent.
|
||||
#### `GET /api/agents/{agent_id}/variants`
|
||||
List all variants for an agent, ordered by `created_at` ascending.
|
||||
|
||||
- **Response:** Array of variant objects with `id`, `agent_id`, `variant_name`, `variant_slug`, `description`, `model_provider`, `model_name`, `system_prompt`, `user_prompt_template`, `prompt_version`, `temperature`, `max_tokens`, `context_window`, `input_token_limit`, `token_budget`, `timeout_seconds`, `max_retries`, `is_active`, `created_at`, `updated_at`
|
||||
|
||||
#### `GET /api/agents/{agent_id}/variants/{variant_id}`
|
||||
Get a single variant.
|
||||
|
||||
@@ -680,13 +714,13 @@ Delete a variant. Cannot delete active variants.
|
||||
#### `POST /api/agents/{agent_id}/clone` (201)
|
||||
Clone an agent's configuration as a new variant with optional overrides.
|
||||
|
||||
- **Body:** `VariantCloneBody { variant_name, variant_slug?, ...optional overrides }`
|
||||
- **Body:** `VariantCloneBody { variant_name, variant_slug?, description?, model_provider?, model_name?, system_prompt?, user_prompt_template?, prompt_version?, temperature?, max_tokens?, context_window?, input_token_limit?, token_budget?, timeout_seconds?, max_retries? }`
|
||||
- **Errors:** `404` — Agent not found; `409` — Duplicate slug
|
||||
|
||||
#### `POST /api/agents/{agent_id}/variants/{variant_id}/clone` (201)
|
||||
Clone an existing variant as a new variant with optional overrides.
|
||||
|
||||
- **Body:** `VariantCloneBody`
|
||||
- **Body:** `VariantCloneBody` (same as above)
|
||||
- **Errors:** `404` — Source variant not found; `409` — Duplicate slug
|
||||
|
||||
#### `POST /api/agents/{agent_id}/variants/{variant_id}/activate`
|
||||
@@ -697,6 +731,8 @@ Set a variant as the active variant for its agent. Deactivates any currently act
|
||||
#### `POST /api/agents/{agent_id}/variants/deactivate`
|
||||
Deactivate the currently active variant. Agent falls back to base configuration.
|
||||
|
||||
- **Response:** `{ deactivated: true }`
|
||||
|
||||
#### `GET /api/agents/{agent_id}/variants/{variant_id}/performance`
|
||||
Aggregated performance metrics for a specific variant.
|
||||
|
||||
@@ -704,6 +740,8 @@ Aggregated performance metrics for a specific variant.
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `hours` | int | `24` | max `720` | Time window |
|
||||
|
||||
- **Response:** Same shape as agent performance (invocations, successes, failures, durations, confidence, tokens, success_rate)
|
||||
|
||||
#### `GET /api/agents/{agent_id}/variants/{variant_id}/performance/history`
|
||||
Hourly performance time-series for a specific variant.
|
||||
|
||||
@@ -711,6 +749,108 @@ Hourly performance time-series for a specific variant.
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `hours` | int | `24` | max `720` | Time window |
|
||||
|
||||
- **Response:** Array of `{ hour, invocations, successes, avg_duration_ms, avg_confidence }`
|
||||
|
||||
### 1.22 Model Validation
|
||||
|
||||
#### `GET /api/validation/summary`
|
||||
Latest model metric snapshot plus quality gate status.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ snapshot: { id, generated_at, lookback_window, horizon, prediction_count, win_rate, directional_accuracy, information_coefficient, rank_information_coefficient, avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector, calibration_error, brier_score, buy_win_rate, sell_win_rate, hold_win_rate, metadata }, gate_status }`
|
||||
- **Errors:** `400` — Invalid lookback or horizon value
|
||||
|
||||
#### `GET /api/validation/calibration`
|
||||
Calibration table with confidence buckets showing predicted vs observed win rates.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ buckets: [{ bucket_low, bucket_high, avg_confidence, observed_win_rate, prediction_count, miscalibrated }], lookback, horizon }`
|
||||
- Buckets: 0.50–0.60, 0.60–0.70, 0.70–0.80, 0.80–0.90, 0.90–1.00
|
||||
- `miscalibrated` is `true` when `|avg_confidence - observed_win_rate| > 0.15`
|
||||
- **Errors:** `400` — Invalid lookback or horizon value
|
||||
|
||||
#### `GET /api/validation/ic-by-horizon`
|
||||
Information Coefficient and Rank IC per prediction horizon.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
|
||||
- **Response:** `{ horizons: [{ horizon, information_coefficient, rank_information_coefficient, prediction_count, generated_at }], lookback }`
|
||||
- Horizons ordered: `1h`, `6h`, `1d`, `7d`, `30d`
|
||||
- **Errors:** `400` — Invalid lookback value
|
||||
|
||||
#### `GET /api/validation/gate-status`
|
||||
Quality gate evaluation detail from `risk_configs` where `name = 'model_quality_gate'`.
|
||||
|
||||
- **Response:** `{ gate_status, updated_at }` or `{ gate_status: null, message: "No gate evaluation found..." }`
|
||||
|
||||
### 1.23 Attribution
|
||||
|
||||
#### `GET /api/validation/attribution/sources`
|
||||
Per-source performance metrics: win rate, IC, average return, duplicate rate.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ sources[], lookback, horizon }`
|
||||
- **Errors:** `400` — Invalid lookback or horizon; `500` — Computation failed
|
||||
|
||||
#### `GET /api/validation/attribution/catalysts`
|
||||
Per-catalyst-type performance metrics: win rate, IC, average return.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ catalysts[], lookback, horizon }`
|
||||
- **Errors:** `400` — Invalid lookback or horizon; `500` — Computation failed
|
||||
|
||||
#### `GET /api/validation/attribution/layers`
|
||||
Per-signal-layer (company, macro, competitive) performance metrics.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ layers[], lookback, horizon }` — each layer has `avg_contribution_pct`, `dominant_win_rate`, `dominant_ic`
|
||||
- **Errors:** `400` — Invalid lookback or horizon; `500` — Computation failed
|
||||
|
||||
### 1.24 Trading Reports
|
||||
|
||||
#### `GET /api/reports`
|
||||
Paginated list of trading reports with optional filtering.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `report_type` | string | — | `daily` or `weekly` | Filter by report type |
|
||||
| `start_date` | string | — | ISO date (YYYY-MM-DD) | Filter `period_start >= this` |
|
||||
| `end_date` | string | — | ISO date (YYYY-MM-DD) | Filter `period_end <= this` |
|
||||
| `limit` | int | `20` | max `100` | Page size |
|
||||
| `offset` | int | `0` | min `0` | Pagination offset |
|
||||
|
||||
- **Response:** Array of `{ id, report_type, period_start, period_end, validation_status, generated_at }`
|
||||
- **Errors:** `400` — Invalid `report_type` or date format
|
||||
|
||||
#### `GET /api/reports/{report_id}`
|
||||
Fetch a single report including full `report_data` JSONB.
|
||||
|
||||
- **Path params:** `report_id` (UUID string)
|
||||
- **Response:** `{ id, report_type, period_start, period_end, report_data, validation_status, generated_at, created_at }`
|
||||
- **Errors:** `404` — Report not found
|
||||
|
||||
---
|
||||
|
||||
## 2. Symbol Registry API
|
||||
@@ -756,6 +896,7 @@ List tracked companies.
|
||||
#### `GET /companies/{company_id}`
|
||||
Get a single company.
|
||||
|
||||
- **Path params:** `company_id` (UUID string)
|
||||
- **Errors:** `404` — Company not found
|
||||
|
||||
#### `PUT /companies/{company_id}`
|
||||
@@ -783,14 +924,18 @@ List aliases for a company.
|
||||
Create a new watchlist.
|
||||
|
||||
- **Body:** `{ name: string, description?: string }`
|
||||
- **Response:** `{ id, name, description, active }`
|
||||
- **Errors:** `409` — Watchlist name already exists
|
||||
|
||||
#### `GET /watchlists`
|
||||
List all watchlists.
|
||||
|
||||
- **Response:** Array of `{ id, name, description, active }`
|
||||
|
||||
#### `POST /watchlists/{watchlist_id}/members/{company_id}` (201)
|
||||
Add a company to a watchlist.
|
||||
|
||||
- **Response:** `{ status: "added" }`
|
||||
- **Errors:** `409` — Already a member; `404` — Watchlist or company not found
|
||||
|
||||
#### `GET /watchlists/{watchlist_id}/members`
|
||||
@@ -814,11 +959,14 @@ Add a data source for a company.
|
||||
| `retention_days` | int | `365` | — | Data retention period |
|
||||
| `access_policy` | string | `"internal"` | `internal`, `public`, `restricted` | Access policy |
|
||||
|
||||
- **Response:** `{ id, source_type, source_name, credibility_score, active }`
|
||||
- **Errors:** `404` — Company not found; `422` — Invalid source_type or access_policy
|
||||
|
||||
#### `GET /companies/{company_id}/sources`
|
||||
List sources for a company.
|
||||
|
||||
- **Response:** Array of `{ id, source_type, source_name, config, credibility_score, retention_days, access_policy, active }`
|
||||
|
||||
### 2.6 Exposure Profiles
|
||||
|
||||
#### `GET /companies/{company_id}/exposure`
|
||||
@@ -848,6 +996,8 @@ Create or update an exposure profile. Archives the previous active version.
|
||||
#### `GET /companies/{company_id}/exposure/history`
|
||||
Get all exposure profile versions for a company, ordered by version descending.
|
||||
|
||||
- **Response:** Array of `ExposureProfileResponse`
|
||||
|
||||
### 2.7 Competitor Relationships
|
||||
|
||||
#### `POST /companies/{company_id}/competitors` (201)
|
||||
@@ -863,10 +1013,11 @@ Create a competitor relationship. Records an audit event.
|
||||
| `bidirectional` | bool | `true` | — | Bidirectional relationship |
|
||||
| `source` | string | `"manual"` | `manual`, `inferred` | Data source |
|
||||
|
||||
- **Response:** `CompetitorRelationship { id, company_a_id, company_b_id, relationship_type, strength, bidirectional, source, active, created_at, updated_at }`
|
||||
- **Errors:** `400` — Self-reference; `404` — Company not found; `409` — Relationship already exists
|
||||
|
||||
#### `GET /companies/{company_id}/competitors`
|
||||
List active competitor relationships, enriched with ticker and legal_name of the other company.
|
||||
List active competitor relationships, enriched with `ticker` and `legal_name` of the other company. Ordered by strength descending.
|
||||
|
||||
- **Errors:** `404` — Company not found
|
||||
|
||||
@@ -879,6 +1030,7 @@ Update a competitor relationship. Records an audit event with previous state.
|
||||
#### `DELETE /companies/{company_id}/competitors/{relationship_id}`
|
||||
Soft-delete a competitor relationship (sets `active=false`). Records an audit event.
|
||||
|
||||
- **Response:** `{ status: "deleted", id }`
|
||||
- **Errors:** `404` — Active relationship not found
|
||||
|
||||
### 2.8 Competitor Inference
|
||||
@@ -923,7 +1075,7 @@ Diagnostic endpoint showing engine internals for troubleshooting.
|
||||
#### `GET /api/trading/status`
|
||||
Return current engine state.
|
||||
|
||||
- **Response:** `{ enabled, paused, risk_tier, circuit_breaker_status, active_pool, reserve_pool, portfolio_heat, open_positions, last_decision_at }`
|
||||
- **Response:** `{ enabled, paused, risk_tier, circuit_breaker_status, active_pool, reserve_pool, portfolio_heat, open_positions, open_position_count, max_open_positions, absolute_position_cap, last_decision_at }`
|
||||
- **Errors:** `503` — Engine not initialised
|
||||
|
||||
#### `PUT /api/trading/config`
|
||||
@@ -960,7 +1112,13 @@ Resume the trading engine.
|
||||
#### `POST /api/trading/reset`
|
||||
Full paper trading reset: liquidate broker positions, cancel orders, clear trading state, reset capital.
|
||||
|
||||
- **Body:** `{ initial_capital?: float (default 0.0) }` — if 0, uses broker balance or defaults to 100,000
|
||||
- **Body:** `CapitalRequest`
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `initial_capital` | float | `0.0` | If 0, uses broker balance or defaults to 100,000 |
|
||||
| `reserve_pct` | float | `null` | Reserve pool percentage (0–1). If null, uses engine config `reserve_siphon_pct` |
|
||||
|
||||
- **Response:** `{ reset: true, initial_capital, active_pool, reserve_pool, broker: { orders_cancelled, positions_closed, portfolio_value, cash, buying_power } }`
|
||||
- **Errors:** `503` — Engine not initialised; `500` — Database reset failed
|
||||
|
||||
@@ -977,6 +1135,8 @@ Return recent trading decisions from the database.
|
||||
| `limit` | int | `50` | max `200` | Page size |
|
||||
| `offset` | int | `0` | — | Pagination offset |
|
||||
|
||||
- **Response:** Array of `{ id, recommendation_id, decision, skip_reason, ticker, computed_position_size, computed_share_quantity, risk_tier_at_decision, portfolio_heat_at_decision, active_pool_at_decision, reserve_pool_at_decision, circuit_breaker_status, is_micro_trade, created_at }`
|
||||
|
||||
### 3.5 Performance Metrics
|
||||
|
||||
#### `GET /api/trading/metrics`
|
||||
@@ -992,6 +1152,8 @@ Return historical daily portfolio snapshots.
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `limit` | int | `30` | max `365` | Max snapshots |
|
||||
|
||||
- **Response:** Array of `{ id, snapshot_date, portfolio_value, active_pool, reserve_pool, daily_return, cumulative_return, unrealized_pnl, realized_pnl, win_count, loss_count, win_rate, sharpe_ratio, max_drawdown, current_drawdown_pct, portfolio_heat, risk_tier, created_at }`
|
||||
|
||||
### 3.6 Backtesting
|
||||
|
||||
#### `POST /api/trading/backtest`
|
||||
@@ -1012,6 +1174,7 @@ Launch a backtest run asynchronously.
|
||||
#### `GET /api/trading/backtest/{backtest_id}`
|
||||
Retrieve backtest results.
|
||||
|
||||
- **Path params:** `backtest_id` (UUID string)
|
||||
- **Response:** `{ id, start_date, end_date, initial_capital, risk_tier, config, total_return, sharpe_ratio, max_drawdown, win_rate, profit_factor, trade_count, equity_curve[], trades[], status, completed_at, created_at }`
|
||||
- Status values: `running`, `completed`, `not_found`, `pending`
|
||||
|
||||
@@ -1037,10 +1200,11 @@ Update notification preferences.
|
||||
|
||||
All fields optional.
|
||||
|
||||
- **Response:** `{ updated: { ...changed fields } }`
|
||||
- **Errors:** `503` — Engine not initialised
|
||||
|
||||
#### `GET /api/trading/notifications/history`
|
||||
Return recent notifications.
|
||||
Return recent notifications (placeholder — currently returns empty array).
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
@@ -1116,6 +1280,8 @@ List pending approval requests.
|
||||
#### `GET /approvals/{approval_id}`
|
||||
Get a single approval request.
|
||||
|
||||
- **Path params:** `approval_id` (UUID string)
|
||||
- **Response:** Approval request object
|
||||
- **Errors:** `404` — Approval not found; `503` — Database not ready
|
||||
|
||||
#### `POST /approvals/{approval_id}/review`
|
||||
@@ -1138,4 +1304,4 @@ Approve or reject a pending approval request.
|
||||
Expire stale approvals that have passed their expiration time.
|
||||
|
||||
- **Response:** `{ expired: int, items: [] }`
|
||||
- **Errors:** `503` — Database not ready
|
||||
- **Errors:** `503` — Database not ready
|
||||
|
||||
@@ -18,13 +18,13 @@ flowchart TB
|
||||
end
|
||||
|
||||
%% ── Scheduler ─────────────────────────────────────────────────
|
||||
scheduler["<b>Scheduler</b><br/><i>services.scheduler.app</i><br/>Cadence polling, rate limiting,<br/>backoff & stale recovery"]
|
||||
scheduler["<b>Scheduler</b><br/><i>services.scheduler.app</i><br/>Cadence polling, rate limiting,<br/>backoff, stale recovery,<br/>periodic aggregation,<br/>report scheduling"]
|
||||
|
||||
sources -.->|"API polling<br/>on cadence"| scheduler
|
||||
|
||||
%% ── Ingestion Queue ───────────────────────────────────────────
|
||||
q_ingestion[["stonks:queue:ingestion"]]
|
||||
scheduler -->|"rpush job"| q_ingestion
|
||||
scheduler -->|"rpush job<br/>(company, macro,<br/>global market)"| q_ingestion
|
||||
|
||||
%% ── Ingestion Worker ──────────────────────────────────────────
|
||||
ingestion["<b>Ingestion</b><br/><i>services.ingestion.worker</i><br/>Adapter dispatch, dedupe,<br/>raw artifact upload"]
|
||||
@@ -42,7 +42,7 @@ flowchart TB
|
||||
|
||||
%% ── Parsing Queue ─────────────────────────────────────────────
|
||||
q_parsing[["stonks:queue:parsing"]]
|
||||
ingestion -->|"rpush<br/>(news, filings,<br/>web_scrape)"| q_parsing
|
||||
ingestion -->|"rpush<br/>(news, filings,<br/>web_scrape, macro)"| q_parsing
|
||||
|
||||
%% ── Parser Worker ─────────────────────────────────────────────
|
||||
parser["<b>Parser</b><br/><i>services.parser.worker</i><br/>HTML parsing, quality scoring,<br/>company mention detection"]
|
||||
@@ -50,7 +50,7 @@ flowchart TB
|
||||
q_parsing -->|"lpop"| parser
|
||||
|
||||
minio_norm[("MinIO<br/><i>Normalized Text</i><br/><i>Parser Output JSON</i>")]
|
||||
parser -->|"upload normalized text"| minio_norm
|
||||
parser -->|"upload normalized text<br/>+ structured output"| minio_norm
|
||||
parser -->|"update document status,<br/>insert mentions"| pg_docs
|
||||
```
|
||||
|
||||
@@ -70,18 +70,23 @@ flowchart TB
|
||||
parser -->|"rpush<br/>(standard docs)"| q_extraction
|
||||
parser -->|"rpush<br/>(macro_event docs)"| q_macro
|
||||
|
||||
%% ── Scheduler Recovery ────────────────────────────────────────
|
||||
scheduler_recovery(("Scheduler<br/><i>stale recovery &<br/>failed retry</i>"))
|
||||
scheduler_recovery -.->|"re-enqueue orphaned<br/>parsed docs"| q_extraction
|
||||
scheduler_recovery -.->|"re-enqueue orphaned<br/>macro docs"| q_macro
|
||||
|
||||
%% ── Extractor Worker ──────────────────────────────────────────
|
||||
subgraph extractor_svc ["Extractor Service"]
|
||||
direction TB
|
||||
ext_main["<b>Extractor</b><br/><i>services.extractor.main</i><br/>Alternates between queues<br/>(2 extraction : 1 macro)"]
|
||||
ext_main["<b>Extractor</b><br/><i>services.extractor.main</i><br/>Alternates between queues<br/>(2 extraction : 1 macro)<br/>Token budget enforcement"]
|
||||
end
|
||||
|
||||
q_extraction -->|"lpop"| ext_main
|
||||
q_macro -->|"lpop"| ext_main
|
||||
|
||||
%% ── Ollama LLM ───────────────────────────────────────────────
|
||||
ollama["<b>Ollama</b><br/><i>LLM Inference</i><br/>document-extractor agent<br/>event-classifier agent"]
|
||||
ext_main <-->|"HTTP /api/generate"| ollama
|
||||
ollama["<b>Ollama / vLLM</b><br/><i>LLM Inference</i><br/>document-extractor agent<br/>event-classifier agent"]
|
||||
ext_main <-->|"HTTP /api/generate<br/>(AgentConfigResolver<br/>selects model + variant)"| ollama
|
||||
|
||||
%% ── Signal Layer 1: Company ───────────────────────────────────
|
||||
subgraph layer1 ["Layer 1 — Company Signals"]
|
||||
@@ -95,7 +100,7 @@ flowchart TB
|
||||
subgraph layer2 ["Layer 2 — Macro Signals"]
|
||||
direction LR
|
||||
ge["global_events"]
|
||||
mir["macro_impact_records<br/><i>per-company interpolation</i>"]
|
||||
mir["macro_impact_records<br/><i>per-company interpolation<br/>via exposure profiles</i>"]
|
||||
ge --> mir
|
||||
end
|
||||
|
||||
@@ -106,6 +111,10 @@ flowchart TB
|
||||
q_agg[["stonks:queue:aggregation"]]
|
||||
ext_main -->|"rpush<br/>(per ticker)"| q_agg
|
||||
|
||||
%% ── Scheduler Periodic Aggregation ────────────────────────────
|
||||
scheduler_agg(("Scheduler<br/><i>periodic aggregation<br/>every ~15 min</i>"))
|
||||
scheduler_agg -.->|"rpush all<br/>active tickers"| q_agg
|
||||
|
||||
%% ── Aggregation Worker ────────────────────────────────────────
|
||||
aggregation["<b>Aggregation</b><br/><i>services.aggregation.main</i><br/>Trend windows, scoring,<br/>contradiction detection"]
|
||||
|
||||
@@ -133,6 +142,8 @@ flowchart TB
|
||||
|
||||
## Recommendation → Trading → Broker
|
||||
|
||||
The recommendation worker consumes from the recommendation queue. The trading engine does **not** consume from a queue — it polls the `recommendations` table in PostgreSQL on a configurable interval, evaluates each recommendation through its decision pipeline, and pushes "act" decisions to the broker queue.
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
%% ── Recommendation Queue ──────────────────────────────────────
|
||||
@@ -144,19 +155,23 @@ flowchart TB
|
||||
|
||||
q_rec -->|"lpop"| recommendation
|
||||
|
||||
ollama_thesis["<b>Ollama</b><br/><i>thesis-rewriter agent</i><br/>(optional LLM rewrite)"]
|
||||
ollama_thesis["<b>Ollama / vLLM</b><br/><i>thesis-rewriter agent</i><br/>(AgentConfigResolver<br/>selects model + variant)"]
|
||||
recommendation <-->|"rewrite thesis<br/>(trading-eligible only)"| ollama_thesis
|
||||
|
||||
pg_recs[("PostgreSQL<br/><i>recommendations,<br/>recommendation_evidence,<br/>risk_evaluations</i>")]
|
||||
recommendation -->|"persist recommendation<br/>+ evidence + risk eval"| pg_recs
|
||||
|
||||
%% ── Lake Publication (inline) ─────────────────────────────────
|
||||
minio_rec_lake[("MinIO<br/><i>Lakehouse</i><br/>recommendation facts")]
|
||||
recommendation -->|"publish_recommendation_facts<br/>(Parquet)"| minio_rec_lake
|
||||
|
||||
%% ── Trading Engine ────────────────────────────────────────────
|
||||
subgraph trading_loop ["Trading Engine Decision Loop"]
|
||||
direction TB
|
||||
poll["Poll recommendations<br/><i>action IN (buy, sell)<br/>mode IN (paper, live)<br/>generated_at > last_poll</i>"]
|
||||
dedup_check["Redis dedup check<br/><i>stonks:dedupe:trading:*</i>"]
|
||||
evaluate["evaluate_recommendation<br/><i>Circuit breaker check<br/>Trading window check<br/>Confidence gate<br/>Sector exposure check<br/>Correlation check<br/>Earnings blackout</i>"]
|
||||
size["Position sizing<br/><i>Kelly criterion,<br/>risk tier limits</i>"]
|
||||
evaluate["evaluate_recommendation<br/><i>Circuit breaker check<br/>Trading window check<br/>Confidence gate<br/>Sector exposure check<br/>Correlation check<br/>Earnings blackout<br/>Max positions check</i>"]
|
||||
size["Position sizing<br/><i>Kelly criterion,<br/>risk tier limits,<br/>micro-trade support</i>"]
|
||||
decide{{"Decision"}}
|
||||
poll --> dedup_check --> evaluate --> size --> decide
|
||||
end
|
||||
@@ -170,22 +185,30 @@ flowchart TB
|
||||
|
||||
pg_decisions[("PostgreSQL<br/><i>trading_decisions</i>")]
|
||||
|
||||
%% ── Manual Override ───────────────────────────────────────────
|
||||
trading_api(("Trading API<br/><i>POST /override/order</i>"))
|
||||
trading_api -->|"rpush<br/>manual order"| q_broker
|
||||
|
||||
%% ── Broker Adapter ────────────────────────────────────────────
|
||||
broker["<b>Broker Adapter</b><br/><i>services.adapters.broker_service</i><br/>Risk evaluation, idempotency,<br/>order submission, fill tracking"]
|
||||
broker["<b>Broker Adapter</b><br/><i>services.adapters.broker_service</i><br/>Idempotency, risk evaluation,<br/>approval gate, order submission,<br/>fill tracking, position sync"]
|
||||
|
||||
q_broker -->|"lpop"| broker
|
||||
|
||||
%% ── Risk Engine ───────────────────────────────────────────────
|
||||
risk["<b>Risk Engine</b><br/><i>services.risk.app</i><br/>POST /evaluate<br/>Approval workflow"]
|
||||
broker <-->|"evaluate order"| risk
|
||||
risk["<b>Risk Engine</b><br/><i>services.risk.app</i><br/>evaluate_order()<br/>Position limits, sector exposure,<br/>daily loss caps, approval workflow"]
|
||||
broker -->|"evaluate order<br/>(inline call)"| risk
|
||||
|
||||
%% ── Alpaca ────────────────────────────────────────────────────
|
||||
alpaca["<b>Alpaca</b><br/><i>Paper Trading API</i><br/>Order submission,<br/>position sync"]
|
||||
broker <-->|"submit order /<br/>sync positions"| alpaca
|
||||
alpaca["<b>Alpaca</b><br/><i>Paper Trading API</i><br/>Order submission,<br/>position sync,<br/>account state"]
|
||||
broker <-->|"submit order /<br/>sync positions /<br/>sync order status"| alpaca
|
||||
|
||||
pg_orders[("PostgreSQL<br/><i>orders, order_events,<br/>positions,<br/>portfolio_snapshots</i>")]
|
||||
pg_orders[("PostgreSQL<br/><i>orders, order_events,<br/>positions,<br/>portfolio_snapshots,<br/>broker_accounts</i>")]
|
||||
broker -->|"persist order,<br/>events, positions"| pg_orders
|
||||
|
||||
%% ── Lake Publication (broker inline) ──────────────────────────
|
||||
minio_broker_lake[("MinIO<br/><i>Lakehouse</i><br/>order + fill + position facts")]
|
||||
broker -->|"publish_trade_order<br/>publish_trade_fill<br/>publish_positions_daily_batch<br/>(Parquet)"| minio_broker_lake
|
||||
|
||||
%% ── Notifications ─────────────────────────────────────────────
|
||||
subgraph notifications ["Notifications"]
|
||||
direction LR
|
||||
@@ -198,28 +221,32 @@ flowchart TB
|
||||
|
||||
## Analytical Branch — Lake Publisher
|
||||
|
||||
The lake publisher runs as a separate worker, consuming from its own queue and writing partitioned Parquet fact tables to MinIO for analytical queries.
|
||||
The lake publisher runs as a separate worker, consuming from its own queue and writing partitioned Parquet fact tables to MinIO for analytical queries. Some services (broker adapter, recommendation worker) also publish facts directly to MinIO inline, bypassing the queue.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
%% ── Lake Publish Queue ────────────────────────────────────────
|
||||
q_lake[["stonks:queue:lake_publish"]]
|
||||
|
||||
various(("Various Services<br/><i>ingestion, extractor,<br/>recommendation,<br/>broker adapter</i>"))
|
||||
various -->|"enqueue_lake_job"| q_lake
|
||||
various(("Upstream Services<br/><i>via enqueue_lake_job()</i>"))
|
||||
various -->|"rpush job<br/>(job_type + entity_id)"| q_lake
|
||||
|
||||
%% ── Lake Publisher Worker ─────────────────────────────────────
|
||||
lake["<b>Lake Publisher</b><br/><i>services.lake_publisher.jobs</i><br/>Transforms operational data<br/>into analytical facts"]
|
||||
lake["<b>Lake Publisher</b><br/><i>services.lake_publisher.jobs</i><br/>Transforms operational data<br/>into analytical facts<br/><i>15 job types supported</i>"]
|
||||
|
||||
q_lake -->|"lpop"| lake
|
||||
|
||||
pg_source[("PostgreSQL<br/><i>Operational Tables</i><br/>documents, extractions,<br/>orders, positions, events")]
|
||||
pg_source[("PostgreSQL<br/><i>Operational Tables</i><br/>documents, extractions,<br/>orders, positions, events,<br/>global_events, macro_impacts,<br/>competitive_signals")]
|
||||
lake -->|"query source data"| pg_source
|
||||
|
||||
%% ── MinIO Parquet ─────────────────────────────────────────────
|
||||
minio_lake[("MinIO<br/><i>Lakehouse Bucket</i><br/>Partitioned Parquet<br/>/year=/month=/day=")]
|
||||
lake -->|"write Parquet files"| minio_lake
|
||||
|
||||
%% ── Inline Publishers ─────────────────────────────────────────
|
||||
inline(("Inline Publishers<br/><i>broker adapter,<br/>recommendation worker</i>"))
|
||||
inline -->|"publish_* functions<br/>(direct Parquet write)"| minio_lake
|
||||
|
||||
%% ── Trino ─────────────────────────────────────────────────────
|
||||
trino["<b>Trino</b><br/><i>SQL Query Engine</i><br/>Hive connector → MinIO"]
|
||||
minio_lake -->|"read via<br/>Hive Metastore"| trino
|
||||
@@ -238,18 +265,40 @@ flowchart LR
|
||||
query_api --> dashboard
|
||||
```
|
||||
|
||||
## Report Generation
|
||||
|
||||
The scheduler manages report generation as a sub-loop, enqueuing daily and weekly report jobs to a dedicated queue and consuming them inline.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
scheduler["<b>Scheduler</b><br/><i>report schedule check</i><br/>daily @ 16:30 ET<br/>weekly @ Saturday"]
|
||||
|
||||
q_report[["stonks:queue:report_generation"]]
|
||||
scheduler -->|"rpush<br/>(daily/weekly)"| q_report
|
||||
|
||||
scheduler_consumer["<b>Scheduler</b><br/><i>report consumer loop</i><br/>pops up to 5 jobs/cycle"]
|
||||
q_report -->|"lpop"| scheduler_consumer
|
||||
|
||||
generator["<b>Report Generator</b><br/><i>services.reporting.generator</i>"]
|
||||
scheduler_consumer -->|"process_report_job()"| generator
|
||||
|
||||
pg_reports[("PostgreSQL<br/><i>trading_reports</i>")]
|
||||
generator -->|"persist report"| pg_reports
|
||||
```
|
||||
|
||||
## Complete Queue Topology
|
||||
|
||||
| Queue | Full Key | Producer(s) | Consumer |
|
||||
|-------|----------|-------------|----------|
|
||||
| Ingestion | `stonks:queue:ingestion` | Scheduler | Ingestion Worker |
|
||||
| Parsing | `stonks:queue:parsing` | Ingestion Worker | Parser Worker |
|
||||
| Extraction | `stonks:queue:extraction` | Parser (standard docs) | Extractor Worker |
|
||||
| Macro Classification | `stonks:queue:macro_classification` | Parser (macro_event docs), Scheduler | Extractor Worker |
|
||||
| Aggregation | `stonks:queue:aggregation` | Extractor Worker | Aggregation Worker |
|
||||
| Recommendation | `stonks:queue:recommendation` | Aggregation Worker | Recommendation Worker |
|
||||
| Broker Orders | `stonks:queue:broker_orders` | Trading Engine, Trading API (manual overrides) | Broker Adapter |
|
||||
| Lake Publish | `stonks:queue:lake_publish` | Various services | Lake Publisher |
|
||||
| Ingestion | `stonks:queue:ingestion` | Scheduler (company, macro, global market sources) | Ingestion Worker |
|
||||
| Parsing | `stonks:queue:parsing` | Ingestion Worker (news, filings, web_scrape, macro) | Parser Worker |
|
||||
| Extraction | `stonks:queue:extraction` | Parser (standard docs), Scheduler (stale recovery) | Extractor Worker |
|
||||
| Macro Classification | `stonks:queue:macro_classification` | Parser (macro_event docs), Scheduler (stale/failed recovery) | Extractor Worker |
|
||||
| Aggregation | `stonks:queue:aggregation` | Extractor Worker (per ticker), Scheduler (periodic, all tickers) | Aggregation Worker |
|
||||
| Recommendation | `stonks:queue:recommendation` | Aggregation Worker (ticker + window, 5 min dedup TTL) | Recommendation Worker |
|
||||
| Broker Orders | `stonks:queue:broker_orders` | Trading Engine (act decisions), Trading API (manual overrides) | Broker Adapter |
|
||||
| Lake Publish | `stonks:queue:lake_publish` | Various services (via `enqueue_lake_job()`) | Lake Publisher |
|
||||
| Report Generation | `stonks:queue:report_generation` | Scheduler (daily/weekly triggers) | Scheduler (inline consumer) |
|
||||
|
||||
Dead-letter queues follow the pattern `stonks:dlq:<queue_name>` and are populated when a job exhausts its retry budget.
|
||||
|
||||
@@ -257,18 +306,25 @@ Dead-letter queues follow the pattern `stonks:dlq:<queue_name>` and are populate
|
||||
|
||||
| Store | Role | Key Tables / Buckets |
|
||||
|-------|------|---------------------|
|
||||
| **PostgreSQL** | Structured operational data | `documents`, `document_intelligence`, `document_impact_records`, `global_events`, `macro_impact_records`, `competitive_signal_records`, `trend_windows`, `trend_history`, `trend_projections`, `recommendations`, `recommendation_evidence`, `risk_evaluations`, `orders`, `order_events`, `positions`, `portfolio_snapshots`, `trading_decisions` |
|
||||
| **Redis** | Queues, dedup markers, rate limits, circuit breaker state | `stonks:queue:*`, `stonks:dedupe:*`, `stonks:ratelimit:*`, `stonks:trading:circuit_breaker:*`, `stonks:dlq:*` |
|
||||
| **MinIO** | Object storage for raw artifacts, normalized text, and analytical Parquet files | Raw artifacts bucket, normalized text bucket, lakehouse bucket (partitioned Parquet) |
|
||||
| **PostgreSQL** | Structured operational data | `documents`, `document_intelligence`, `document_impact_records`, `document_company_mentions`, `global_events`, `macro_impact_records`, `exposure_profiles`, `competitive_signal_records`, `competitor_relationships`, `trend_windows`, `trend_history`, `trend_projections`, `recommendations`, `recommendation_evidence`, `risk_evaluations`, `orders`, `order_events`, `positions`, `portfolio_snapshots`, `trading_decisions`, `circuit_breaker_events`, `reserve_pool_ledger`, `risk_tier_history`, `broker_accounts`, `ingestion_runs`, `sources`, `companies`, `company_aliases`, `ai_agents`, `agent_variants`, `agent_performance_log`, `risk_configs`, `trading_reports` |
|
||||
| **Redis** | Queues, dedup markers, rate limits, circuit breaker state, pipeline toggle | `stonks:queue:*` (9 queues), `stonks:dedupe:*`, `stonks:dedupe:trading:*`, `stonks:ratelimit:*`, `stonks:trading:circuit_breaker:*`, `stonks:trading:notification_rate:*`, `stonks:order_idempotency:*`, `stonks:lock:*`, `stonks:cache:*`, `stonks:retry:*`, `stonks:rec_dedup:*`, `stonks:pipeline:enabled`, `stonks:dlq:*` |
|
||||
| **MinIO** | Object storage for raw artifacts, normalized text, and analytical Parquet files | Raw artifacts bucket, normalized text bucket, parser output bucket, lakehouse bucket (partitioned Parquet: documents, extractions, market bars/quotes, orders, fills, positions, PnL, global events, macro impacts, trend projections, competitive signals, competitor relationships, recommendations) |
|
||||
|
||||
## External Integration Points
|
||||
|
||||
| Integration | Service | Protocol | Purpose |
|
||||
|-------------|---------|----------|---------|
|
||||
| **Polygon.io** | Ingestion (via adapters) | HTTPS REST | News articles, market bars, grouped daily data |
|
||||
| **SEC EDGAR** | Ingestion (via FilingsDataAdapter) | HTTPS REST | 10-K, 10-Q filings |
|
||||
| **Ollama** | Extractor, Recommendation | HTTP `/api/generate` | LLM inference for document extraction, event classification, thesis rewriting |
|
||||
| **Alpaca** | Broker Adapter | HTTPS REST | Paper trading order submission, position sync, account state |
|
||||
| **Polygon.io** | Ingestion (via PolygonNewsAdapter, PolygonMarketAdapter) | HTTPS REST | News articles, market bars, grouped daily data, intraday bars |
|
||||
| **SEC EDGAR** | Ingestion (via SECEdgarAdapter) | HTTPS REST | 10-K, 10-Q filings |
|
||||
| **Macro News** | Ingestion (via MacroNewsAdapter) | HTTPS REST | Geopolitical and economic event articles |
|
||||
| **Ollama / vLLM** | Extractor, Recommendation | HTTP `/api/generate` | LLM inference for document extraction (document-extractor agent), event classification (event-classifier agent), thesis rewriting (thesis-rewriter agent). Model and variant selected via `AgentConfigResolver` with 60s TTL cache. |
|
||||
| **Alpaca** | Broker Adapter | HTTPS REST | Paper/live trading: order submission, position sync, account state, order status polling |
|
||||
| **AWS SNS** | Trading Engine (notifications) | boto3 SDK | SMS alerts for circuit breaker trips, order fills, stop-loss triggers |
|
||||
| **Gmail** | Trading Engine (notifications) | SMTP (port 587 STARTTLS) | Email alerts for trading events |
|
||||
| **Trino** | Query API, Superset | JDBC / HTTP | SQL queries over lakehouse Parquet files |
|
||||
| **Trino** | Query API, Superset | HTTP | SQL queries over lakehouse Parquet files via Hive Metastore |
|
||||
|
||||
## Pipeline Toggle
|
||||
|
||||
The pipeline can be paused globally via the Redis key `stonks:pipeline:enabled`. When set to `"0"`, all queue workers (ingestion, parser, extractor, aggregation, recommendation, broker adapter, lake publisher) enter a sleep loop and stop processing jobs. The scheduler also skips scheduling cycles when the toggle is off. The toggle can be set via the Query API's pipeline control endpoints.
|
||||
|
||||
Setting `PIPELINE_DEFAULT_OFF=true` on the scheduler initializes the toggle to OFF on first boot, useful for staged deployments where you want to verify infrastructure before enabling the pipeline.
|
||||
|
||||
@@ -53,7 +53,7 @@ graph TB
|
||||
subgraph trading_tier ["Trading Tier"]
|
||||
direction LR
|
||||
trading_engine["trading-engine<br/><i>docker/Dockerfile</i><br/><i>uvicorn services.trading.app</i><br/>host :8002 → :8000"]
|
||||
risk_engine["risk-engine<br/><i>docker/Dockerfile</i><br/><i>uvicorn services.risk.app</i><br/>host :8003 → :8000"]
|
||||
risk_engine["risk-engine<br/><i>docker/Dockerfile</i><br/><i>uvicorn services.risk.app</i><br/>host :8003 → :8000<br/><i>alias: risk</i>"]
|
||||
broker_adapter["broker-adapter<br/><i>docker/Dockerfile</i><br/><i>python -m services.adapters.broker_service</i><br/><i>no host port</i>"]
|
||||
end
|
||||
|
||||
@@ -320,3 +320,4 @@ All containers share the default Docker Compose network. Services reference each
|
||||
| `hive-metastore` | Hive Metastore container | trino (thrift://hive-metastore:9083) |
|
||||
| `trino` | Trino container | superset (trino:8080) |
|
||||
| `query-api` | Query API container | dashboard (nginx proxy upstream) |
|
||||
| `risk` | risk-engine container (network alias) | trading-engine (risk evaluation calls) |
|
||||
|
||||
@@ -11,7 +11,7 @@ graph TB
|
||||
%% ── External traffic ──────────────────────────────────────────
|
||||
internet((Internet))
|
||||
|
||||
subgraph traefik ["kube-system (Traefik Ingress Controller)"]
|
||||
subgraph traefik ["kube-system · Traefik Ingress Controller"]
|
||||
direction LR
|
||||
ing_dash["stonks.celestium.life"]
|
||||
ing_api["stonks-api.celestium.life"]
|
||||
@@ -28,47 +28,55 @@ graph TB
|
||||
direction TB
|
||||
|
||||
%% ── API Tier (ingress-facing) ─────────────────────────────
|
||||
subgraph api_tier ["API Tier"]
|
||||
subgraph api_tier ["API Tier · tier: api"]
|
||||
direction LR
|
||||
query_api["query-api<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
||||
symbol_registry["symbol-registry<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
||||
query_api["query-api<br/><i>Deployment · 1 replica</i><br/>:8000<br/><i>readiness: /docs</i>"]
|
||||
symbol_registry["symbol-registry<br/><i>Deployment · 1 replica</i><br/>:8000<br/><i>readiness: /docs · liveness: /docs</i>"]
|
||||
end
|
||||
|
||||
%% ── Frontend Tier ─────────────────────────────────────────
|
||||
subgraph frontend_tier ["Frontend Tier"]
|
||||
dashboard["dashboard<br/><i>Deployment (1 replica)</i><br/>:8080<br/><i>nginx-unprivileged</i>"]
|
||||
subgraph frontend_tier ["Frontend Tier · tier: frontend"]
|
||||
dashboard["dashboard<br/><i>Deployment · 1 replica</i><br/>:8080<br/><i>nginx-unprivileged</i><br/><i>readiness: / · liveness: /</i>"]
|
||||
end
|
||||
|
||||
%% ── Trading Tier ──────────────────────────────────────────
|
||||
subgraph trading_tier ["Trading Tier"]
|
||||
subgraph trading_tier ["Trading Tier · tier: trading"]
|
||||
direction LR
|
||||
trading_engine["trading-engine<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
||||
risk_engine["risk-engine<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
||||
broker_adapter["broker-adapter<br/><i>Deployment (1 replica)</i><br/><i>queue-driven worker</i>"]
|
||||
trading_engine["trading-engine<br/><i>Deployment · 1 replica</i><br/>:8000<br/><i>readiness: /ready · liveness: /health</i>"]
|
||||
risk_engine["risk-engine<br/><i>Deployment · 1 replica</i><br/>:8000"]
|
||||
broker_adapter["broker-adapter<br/><i>Deployment · 1 replica</i><br/><i>queue-driven worker · pipeline-gated</i>"]
|
||||
end
|
||||
|
||||
%% ── Orchestration Tier ────────────────────────────────────
|
||||
subgraph orchestration_tier ["Orchestration Tier"]
|
||||
scheduler["scheduler<br/><i>Deployment (1 replica)</i><br/><i>runs migrations + seed</i>"]
|
||||
subgraph orchestration_tier ["Orchestration Tier · tier: orchestration"]
|
||||
scheduler["scheduler<br/><i>Deployment · 1 replica · pipeline-gated</i><br/><i>init: migrations → seed → backfill</i>"]
|
||||
end
|
||||
|
||||
%% ── Ingestion Tier ────────────────────────────────────────
|
||||
subgraph ingestion_tier ["Ingestion Tier · tier: ingestion"]
|
||||
ingestion["ingestion<br/><i>Deployment · 1 replica · pipeline-gated</i><br/><i>queue-driven worker</i>"]
|
||||
end
|
||||
|
||||
%% ── Processing Tier (pipeline workers) ────────────────────
|
||||
subgraph processing_tier ["Processing Tier (pipeline workers)"]
|
||||
subgraph processing_tier ["Processing Tier · tier: processing"]
|
||||
direction LR
|
||||
ingestion["ingestion<br/><i>Deployment (2 replicas)</i>"]
|
||||
parser["parser<br/><i>Deployment (2 replicas)</i>"]
|
||||
extractor["extractor<br/><i>Deployment (1 replica)</i>"]
|
||||
aggregation["aggregation<br/><i>Deployment (4 replicas)</i>"]
|
||||
recommendation["recommendation<br/><i>Deployment (1 replica)</i>"]
|
||||
parser["parser<br/><i>Deployment · 2 replicas · pipeline-gated</i>"]
|
||||
extractor["extractor<br/><i>Deployment · 1 replica · pipeline-gated</i>"]
|
||||
aggregation["aggregation<br/><i>Deployment · 4 replicas · pipeline-gated</i>"]
|
||||
recommendation["recommendation<br/><i>Deployment · 1 replica · pipeline-gated</i>"]
|
||||
end
|
||||
|
||||
%% ── Analytics Tier ────────────────────────────────────────
|
||||
subgraph analytics_tier ["Analytics Tier"]
|
||||
subgraph analytics_tier ["Analytics Tier · tier: analytics"]
|
||||
direction LR
|
||||
lake_publisher["lake-publisher<br/><i>Deployment (1 replica)</i><br/><i>queue-driven worker</i>"]
|
||||
hive_metastore["hive-metastore<br/><i>Deployment (1 replica)</i><br/>:9083<br/><i>apache/hive:4.0.0</i>"]
|
||||
trino["trino<br/><i>Deployment (1 replica)</i><br/>:8080<br/><i>trinodb/trino:latest</i>"]
|
||||
superset["superset<br/><i>Deployment (1 replica)</i><br/>:8088<br/><i>custom image</i>"]
|
||||
lake_publisher["lake-publisher<br/><i>Deployment · 1 replica · pipeline-gated</i><br/><i>queue-driven worker</i>"]
|
||||
hive_metastore["hive-metastore<br/><i>Deployment · 1 replica</i><br/>:9083<br/><i>apache/hive:4.0.0</i><br/><i>PVC: hive-metastore-data</i>"]
|
||||
trino["trino<br/><i>Deployment · 1 replica</i><br/>:8080<br/><i>trinodb/trino:latest</i><br/><i>readiness: /v1/info</i>"]
|
||||
end
|
||||
|
||||
%% ── Superset (tier: dashboard in template) ────────────────
|
||||
subgraph superset_block ["Superset · tier: dashboard"]
|
||||
superset["superset<br/><i>Deployment · 1 replica</i><br/>:8088<br/><i>custom image</i><br/><i>PVC: superset-data</i><br/><i>readiness: /health</i>"]
|
||||
end
|
||||
|
||||
%% ── Helm Secrets ──────────────────────────────────────────
|
||||
@@ -99,7 +107,7 @@ graph TB
|
||||
end
|
||||
|
||||
subgraph ollama_ns ["ollama-service namespace"]
|
||||
ollama[("Ollama<br/>ollama:11434<br/><i>GPU: 4070 Ti Super</i>")]
|
||||
ollama[("Ollama<br/>ollama:11434<br/><i>GPU: 4070 Ti Super 16GB</i>")]
|
||||
end
|
||||
|
||||
%% ── Ingress Routes ────────────────────────────────────────────
|
||||
@@ -191,6 +199,7 @@ graph TB
|
||||
sec_broker -.-> broker_adapter
|
||||
|
||||
sec_market -.-> ingestion
|
||||
sec_market -.-> query_api
|
||||
|
||||
sec_gmail -.-> trading_engine
|
||||
|
||||
@@ -216,7 +225,9 @@ graph TB
|
||||
classDef tradingSvc fill:#e8a838,stroke:#b07d1a,color:#fff
|
||||
classDef processSvc fill:#9b59b6,stroke:#6c3483,color:#fff
|
||||
classDef orchSvc fill:#1abc9c,stroke:#148f77,color:#fff
|
||||
classDef ingestionSvc fill:#e67e22,stroke:#bf6516,color:#fff
|
||||
classDef analyticsSvc fill:#e74c3c,stroke:#a93226,color:#fff
|
||||
classDef supersetSvc fill:#c0392b,stroke:#96281b,color:#fff
|
||||
classDef extSvc fill:#95a5a6,stroke:#717d7e,color:#fff
|
||||
classDef secretSvc fill:#f5f5dc,stroke:#999,color:#333
|
||||
classDef configSvc fill:#dfe6e9,stroke:#999,color:#333
|
||||
@@ -225,8 +236,10 @@ graph TB
|
||||
class dashboard frontendSvc
|
||||
class trading_engine,risk_engine,broker_adapter tradingSvc
|
||||
class scheduler orchSvc
|
||||
class ingestion,parser,extractor,aggregation,recommendation processSvc
|
||||
class lake_publisher,hive_metastore,trino,superset analyticsSvc
|
||||
class ingestion ingestionSvc
|
||||
class parser,extractor,aggregation,recommendation processSvc
|
||||
class lake_publisher,hive_metastore,trino analyticsSvc
|
||||
class superset supersetSvc
|
||||
class postgres,redis,minio,ollama extSvc
|
||||
class sec_core,sec_broker,sec_market,sec_gmail,sec_dashboard secretSvc
|
||||
class configmap configSvc
|
||||
@@ -284,8 +297,8 @@ The following services have **no inbound network policy** — they are queue-dri
|
||||
|
||||
| Service | Tier | Behavior |
|
||||
|---------|------|----------|
|
||||
| scheduler | orchestration | Polls DB, enqueues to Redis |
|
||||
| ingestion | processing | Reads from `stonks:queue:ingestion`, writes to DB/MinIO/Redis |
|
||||
| scheduler | orchestration | Polls DB, enqueues to Redis. Runs migrations + seed + backfill as init containers |
|
||||
| ingestion | ingestion | Reads from `stonks:queue:ingestion`, writes to DB/MinIO/Redis. Egress to Polygon.io/News APIs |
|
||||
| parser | processing | Reads from `stonks:queue:parsing`, writes to DB/Redis |
|
||||
| extractor | processing | Reads from `stonks:queue:extraction`, calls Ollama, writes to DB/Redis |
|
||||
| aggregation | processing | Reads from `stonks:queue:aggregation`, writes to DB/Redis |
|
||||
@@ -294,22 +307,24 @@ The following services have **no inbound network policy** — they are queue-dri
|
||||
|
||||
## Service Tier Summary
|
||||
|
||||
| Tier | Services | Ingress? | Replicas | Notes |
|
||||
|------|----------|----------|----------|-------|
|
||||
| **api** | query-api, symbol-registry | Yes (Traefik) | 1 each | FastAPI, readiness probes on `/docs` |
|
||||
| **frontend** | dashboard | Yes (Traefik) | 1 | nginx-unprivileged on :8080, proxies to API services |
|
||||
| **trading** | trading-engine, risk-engine, broker-adapter | trading-engine: Yes; risk-engine: internal only; broker-adapter: denied | 1 each | trading-engine has egress to Alpaca + Gmail |
|
||||
| **orchestration** | scheduler | No | 1 | Runs DB migrations + seed as init containers |
|
||||
| **processing** | ingestion, parser, extractor, aggregation, recommendation | No | 2, 2, 1, 4, 1 | Pipeline-gated by `pipelineEnabled` toggle |
|
||||
| **analytics** | lake-publisher, trino, hive-metastore, superset | trino + superset: Yes; others: No | 1 each | lake-publisher is pipeline-gated |
|
||||
| Tier | Services | Ingress? | Replicas | Pipeline-Gated? | Notes |
|
||||
|------|----------|----------|----------|-----------------|-------|
|
||||
| **api** | query-api, symbol-registry | Yes (Traefik) | 1 each | No | FastAPI, readiness probes on `/docs` |
|
||||
| **frontend** | dashboard | Yes (Traefik) | 1 | No | nginx-unprivileged on :8080, proxies to API services |
|
||||
| **trading** | trading-engine, risk-engine, broker-adapter | trading-engine: Yes; risk-engine: internal only; broker-adapter: denied | 1 each | broker-adapter only | trading-engine has egress to Alpaca + Gmail |
|
||||
| **orchestration** | scheduler | No | 1 | Yes | Runs DB migrations + seed + backfill as init containers |
|
||||
| **ingestion** | ingestion | No | 1 | Yes | Fetches from external APIs (Polygon.io, news, filings) |
|
||||
| **processing** | parser, extractor, aggregation, recommendation | No | 2, 1, 4, 1 | Yes | Queue-driven pipeline workers |
|
||||
| **analytics** | lake-publisher, trino, hive-metastore | trino: Yes (Traefik); others: No | 1 each | lake-publisher only | trino + hive-metastore gated by `trino.enabled` / `hiveMetastore.enabled` |
|
||||
| **dashboard** (Superset) | superset | Yes (Traefik) | 1 | No | Gated by `superset.enabled`, custom image with trino + psycopg2 drivers |
|
||||
|
||||
## Secret Consumption Map
|
||||
|
||||
| Secret | Keys | Consumers |
|
||||
|--------|------|-----------|
|
||||
| `stonks-core-secrets` | POSTGRES_PASSWORD, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, REDIS_PASSWORD | All 13 app services + hive-metastore, trino, superset |
|
||||
| `stonks-core-secrets` | POSTGRES_PASSWORD, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, REDIS_PASSWORD | All 13 app services + hive-metastore (init), trino (init), superset |
|
||||
| `stonks-broker-secrets` | BROKER_API_KEY, BROKER_API_SECRET, BROKER_BASE_URL | ingestion, trading-engine, risk-engine, broker-adapter |
|
||||
| `stonks-market-secrets` | MARKET_DATA_API_KEY | ingestion |
|
||||
| `stonks-market-secrets` | MARKET_DATA_API_KEY | ingestion, query-api |
|
||||
| `stonks-gmail-secrets` | GMAIL_SENDER, GMAIL_RECIPIENT, GMAIL_APP_PASSWORD | trading-engine |
|
||||
| `stonks-dashboard-secrets` | SUPERSET_SECRET_KEY, SUPERSET_ADMIN_PASSWORD | superset |
|
||||
|
||||
@@ -336,10 +351,10 @@ These services run outside the `stonks-oracle` namespace and are referenced via
|
||||
|
||||
The analytics stack runs within the `stonks-oracle` namespace:
|
||||
|
||||
1. **Lake Publisher** writes Parquet fact tables to MinIO at `s3a://stonks-lakehouse/warehouse`
|
||||
2. **Hive Metastore** (Apache Hive 4.0.0) manages table metadata, backed by embedded Derby DB with a PVC for persistence. Connects to MinIO for S3A filesystem access.
|
||||
3. **Trino** queries the lakehouse via Hive Metastore (thrift://hive-metastore:9083). Exposes two catalogs: `lakehouse` (Hive connector) and `iceberg` (Iceberg connector). Both connect to MinIO for data access.
|
||||
4. **Superset** connects to Trino for lakehouse queries and to PostgreSQL for its metadata DB. Uses Redis for caching. Exposed externally via Traefik ingress.
|
||||
1. **Lake Publisher** writes Parquet fact tables to MinIO at `s3a://stonks-lakehouse/warehouse`. Pipeline-gated — scales to 0 when `pipelineEnabled: false`.
|
||||
2. **Hive Metastore** (Apache Hive 4.0.0) manages table metadata, backed by embedded Derby DB with a PVC (`hive-metastore-data`) for persistence. Connects to MinIO for S3A filesystem access. Gated by `hiveMetastore.enabled`.
|
||||
3. **Trino** queries the lakehouse via Hive Metastore (`thrift://hive-metastore:9083`). Exposes two catalogs: `lakehouse` (Hive connector) and `iceberg` (Iceberg connector). Both connect to MinIO for data access. Gated by `trino.enabled`. Readiness probe on `/v1/info`.
|
||||
4. **Superset** connects to Trino for lakehouse queries and to PostgreSQL for its metadata DB. Uses Redis for caching. Exposed externally via Traefik ingress. Gated by `superset.enabled`. Uses custom image (`registry.celestium.life/stonks-oracle/superset:latest`) with trino + psycopg2 drivers. PVC (`superset-data`) for persistence.
|
||||
|
||||
## Ingress Routes
|
||||
|
||||
@@ -353,3 +368,13 @@ All ingress resources use the `traefik` IngressClass with TLS certificates issue
|
||||
| `stonks-trading.celestium.life` | trading-engine | 8000 | `stonks-trading-tls` |
|
||||
| `stonks-dash.celestium.life` | superset | 8088 | `stonks-dash-tls` |
|
||||
| `stonks-trino.celestium.life` | trino | 8080 | `stonks-trino-tls` |
|
||||
|
||||
## Deployment Stages
|
||||
|
||||
The Helm chart supports multiple deployment stages via value override files:
|
||||
|
||||
| Stage | Override File | Namespace | Key Differences |
|
||||
|-------|--------------|-----------|-----------------|
|
||||
| **Production** | `values.yaml` (base) | `stonks-oracle` | Full analytics stack, all services |
|
||||
| **Paper** | `values-paper.yaml` | `stonks-oracle` | `BROKER_MODE=paper`, `DEPLOY_STAGE=paper`, separate DB (`stonks_paper`), Redis DB 2, paper-specific ingress hostnames |
|
||||
| **Beta** | `values-beta.yaml` | `stonks-oracle-beta` | `DEPLOY_STAGE=beta`, `LOG_LEVEL=DEBUG`, separate DB (`stonks_beta`), Redis DB 1, analytics stack disabled, beta-specific ingress hostnames |
|
||||
|
||||
+180
-29
@@ -5,6 +5,7 @@ This guide covers running the full Stonks Oracle platform locally using Docker C
|
||||
## Prerequisites
|
||||
|
||||
- Docker Engine 24+ and Docker Compose v2
|
||||
- NVIDIA GPU with drivers and NVIDIA Container Toolkit (for Ollama LLM inference)
|
||||
- At least 16 GB RAM (Ollama + Trino + all services)
|
||||
- API keys for Polygon.io and Alpaca (optional — platform runs in degraded mode without them)
|
||||
|
||||
@@ -14,20 +15,54 @@ This guide covers running the full Stonks Oracle platform locally using Docker C
|
||||
# 1. Clone the repository
|
||||
git clone <repo-url> && cd stonks-oracle
|
||||
|
||||
# 2. Configure API keys
|
||||
cp .env.example .env # or edit the existing .env
|
||||
# Fill in MARKET_DATA_API_KEY, BROKER_API_KEY, BROKER_API_SECRET
|
||||
# 2. Configure API keys (create .env in the repo root)
|
||||
cat > .env <<'EOF'
|
||||
MARKET_DATA_API_KEY=your_polygon_key
|
||||
BROKER_API_KEY=your_alpaca_key
|
||||
BROKER_API_SECRET=your_alpaca_secret
|
||||
BROKER_BASE_URL=https://paper-api.alpaca.markets
|
||||
EOF
|
||||
|
||||
# 3. Start everything
|
||||
docker compose up -d
|
||||
|
||||
# 4. Verify all services are healthy
|
||||
# 4. Pull an LLM model into Ollama
|
||||
docker compose exec ollama ollama pull qwen3.5:9b-fast
|
||||
|
||||
# 5. Seed the database
|
||||
docker compose exec scheduler python -m services.symbol_registry.seed
|
||||
|
||||
# 6. Verify all services are healthy
|
||||
docker compose ps
|
||||
|
||||
# 5. Access the dashboard
|
||||
# 7. Access the dashboard
|
||||
open http://localhost:3000
|
||||
```
|
||||
|
||||
### Automated Deployment
|
||||
|
||||
The `deploy-docker.sh` script automates the full deployment to a remote host via SSH, including prerequisite installation, repository sync, environment configuration, image builds, service startup, database seeding, and Ollama model pulling:
|
||||
|
||||
```bash
|
||||
# Deploy with defaults (GPU-accelerated Docker Ollama)
|
||||
bash deploy-docker.sh
|
||||
|
||||
# Specify a custom Ollama model
|
||||
bash deploy-docker.sh --ollama-model qwen3.6
|
||||
|
||||
# Deploy to a different host
|
||||
bash deploy-docker.sh --host user@myserver --dir /opt/stonks
|
||||
```
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--host` | `celes@192.168.42.254` | SSH target (`USER@HOST`) |
|
||||
| `--ollama-url` | (auto — Docker container) | Ollama API URL |
|
||||
| `--ollama-model` | `qwen3.5:9b-fast` | Ollama model to pull |
|
||||
| `--dir` | `~/stonks-oracle` | Remote install directory |
|
||||
|
||||
The script detects the target OS and package manager (apt, dnf, yum, pacman, zypper) and installs Docker, NVIDIA drivers, and the NVIDIA Container Toolkit as needed. It also handles WSL environments and firewall configuration.
|
||||
|
||||
---
|
||||
|
||||
## Service Inventory
|
||||
@@ -63,6 +98,8 @@ open http://localhost:3000
|
||||
| `query-api` | `docker/Dockerfile` | `uvicorn services.api.app:app --host 0.0.0.0 --port 8000` | `8004:8000` | postgres (healthy), redis (healthy), minio (healthy) |
|
||||
| `dashboard` | `frontend/Dockerfile` | nginx (built-in) | `3000:8080` | query-api (healthy) |
|
||||
|
||||
The `risk-engine` service has a Docker network alias of `risk` so the dashboard's nginx reverse proxy can resolve it as `http://risk:8000`.
|
||||
|
||||
### Port Summary
|
||||
|
||||
| Port | Service | Protocol |
|
||||
@@ -109,15 +146,27 @@ The `.env` file is loaded by `ingestion`, `broker-adapter`, and `trading-engine`
|
||||
|
||||
```dotenv
|
||||
# Stonks Oracle — Environment Variables
|
||||
# These are loaded by ingestion, broker-adapter, and trading-engine services.
|
||||
# Loaded by: ingestion, broker-adapter, trading-engine
|
||||
|
||||
# Polygon.io market data API key (required for live data ingestion)
|
||||
# ── Required for live data ingestion ──
|
||||
MARKET_DATA_API_KEY=
|
||||
|
||||
# Alpaca broker credentials (required for paper/live trading)
|
||||
# ── Required for paper/live trading ──
|
||||
BROKER_API_KEY=
|
||||
BROKER_API_SECRET=
|
||||
BROKER_BASE_URL=https://paper-api.alpaca.markets
|
||||
|
||||
# ── Trading engine settings (optional) ──
|
||||
TRADING_ENABLED=true
|
||||
TRADING_RISK_TIER=moderate
|
||||
TRADING_MAX_OPEN_POSITIONS=15
|
||||
|
||||
# ── LLM model (optional) ──
|
||||
OLLAMA_MODEL=qwen3.5:9b-fast
|
||||
|
||||
# ── Signal layers (optional) ──
|
||||
MACRO_ENABLED=true
|
||||
COMPETITIVE_ENABLED=true
|
||||
```
|
||||
|
||||
| Variable | Required | Default | Used By | Description |
|
||||
@@ -178,20 +227,24 @@ All application services support additional environment variables loaded via `se
|
||||
| `REDIS_DB` | `0` | Redis database number |
|
||||
| `REDIS_PASSWORD` | (none) | Redis password (not needed in Docker Compose) |
|
||||
| `MINIO_SECURE` | `false` | Use HTTPS for MinIO |
|
||||
| `OLLAMA_BASE_URL` | `http://ollama:11434` | Ollama LLM server URL |
|
||||
| `OLLAMA_MODEL` | `qwen3.5:9b` | Default LLM model for extraction |
|
||||
| `OLLAMA_TIMEOUT` | `120` | Ollama request timeout (seconds) |
|
||||
| `OLLAMA_MAX_RETRIES` | `2` | Max retries for Ollama requests |
|
||||
| `VLLM_BASE_URL` | (empty) | vLLM server URL (if using vLLM instead of Ollama) |
|
||||
| `VLLM_MODEL` | (empty) | vLLM model name (e.g. `AxionML/Qwen3.5-9B-NVFP4`) |
|
||||
| `OLLAMA_RETRY_BASE_DELAY` | `1.0` | Base delay between retries (seconds) |
|
||||
| `OLLAMA_RETRY_MAX_DELAY` | `10.0` | Maximum delay between retries (seconds) |
|
||||
| `OLLAMA_RETRY_BACKOFF_MULTIPLIER` | `2.0` | Backoff multiplier for retries |
|
||||
| `VLLM_BASE_URL` | `http://192.168.42.254:8000` | vLLM server URL (if using vLLM instead of Ollama) |
|
||||
| `VLLM_MODEL` | `RedHatAI/Qwen3.6-35B-A3B-NVFP4` | vLLM model name |
|
||||
| `VLLM_TIMEOUT` | `120` | vLLM request timeout (seconds) |
|
||||
| `VLLM_MAX_RETRIES` | `2` | Max retries for vLLM requests |
|
||||
| `VLLM_TEMPERATURE` | `0.7` | vLLM sampling temperature |
|
||||
| `VLLM_MAX_TOKENS` | `4096` | vLLM max output tokens |
|
||||
| `VLLM_API_KEY` | (empty) | vLLM API key (if required) |
|
||||
| `TRINO_HOST` | `localhost` | Trino hostname |
|
||||
| `TRINO_PORT` | `8080` | Trino port |
|
||||
| `TRINO_CATALOG` | `lakehouse` | Trino catalog name |
|
||||
| `TRINO_SCHEMA` | `stonks` | Trino schema name |
|
||||
| `TRINO_ICEBERG_CATALOG` | `iceberg` | Trino Iceberg catalog name |
|
||||
| `MARKET_DATA_BASE_URL` | `https://api.polygon.io` | Polygon.io base URL |
|
||||
| `MARKET_DATA_PROVIDER` | `polygon` | Market data provider |
|
||||
| `BROKER_MODE` | `paper` | Broker mode: `paper` or `live` |
|
||||
@@ -200,12 +253,62 @@ All application services support additional environment variables loaded via `se
|
||||
| `TRADING_RISK_TIER` | `moderate` | Risk tier: `conservative`, `moderate`, `aggressive` |
|
||||
| `TRADING_POLLING_INTERVAL_SECONDS` | `60` | Recommendation polling interval |
|
||||
| `TRADING_MAX_OPEN_POSITIONS` | `10` | Maximum concurrent open positions |
|
||||
| `TRADING_RESERVE_SIPHON_PCT` | `0.20` | Percentage of profits siphoned to reserve pool |
|
||||
| `TRADING_STOP_LOSS_CHECK_INTERVAL_SECONDS` | `300` | Stop-loss check interval |
|
||||
| `TRADING_FAST_STOP_LOSS_INTERVAL_SECONDS` | `60` | Fast stop-loss check interval |
|
||||
| `TRADING_GRADUAL_ENTRY_TRANCHES` | `3` | Number of tranches for gradual entry |
|
||||
| `TRADING_GRADUAL_ENTRY_THRESHOLD_DOLLARS` | `30.0` | Dollar threshold for gradual entry |
|
||||
| `TRADING_ABSOLUTE_POSITION_CAP` | `50.0` | Maximum position size (dollars) |
|
||||
| `TRADING_ACTIVE_POOL_MINIMUM` | `100.0` | Minimum active pool balance |
|
||||
| `TRADING_EMERGENCY_DRAWDOWN_THRESHOLD_PCT` | `0.40` | Emergency drawdown threshold |
|
||||
| `TRADING_RESERVE_HIGH_WATER_PCT` | `0.30` | Reserve high-water mark percentage |
|
||||
| `TRADING_MICRO_TRADING_ENABLED` | `false` | Enable micro-trading mode |
|
||||
| `TRADING_MICRO_TRADING_INTERVAL_SECONDS` | `300` | Micro-trading polling interval |
|
||||
| `TRADING_MICRO_TRADING_ALLOCATION_CAP_PCT` | `0.03` | Micro-trading allocation cap |
|
||||
| `TRADING_MICRO_TRADING_MAX_DAILY` | `10` | Max micro-trades per day |
|
||||
| `TRADING_MICRO_TRADING_MAX_HOLD_MINUTES` | `120` | Max micro-trade hold time |
|
||||
| `TRADING_SNS_TOPIC_ARN` | (empty) | AWS SNS topic ARN for notifications |
|
||||
| `TRADING_SNS_PHONE_NUMBER` | (empty) | Phone number for SNS notifications |
|
||||
| `TRADING_GMAIL_SENDER` | (empty) | Gmail sender address for notifications |
|
||||
| `TRADING_GMAIL_RECIPIENT` | (empty) | Gmail recipient address for notifications |
|
||||
| `MACRO_ENABLED` | `true` | Enable macro signal layer |
|
||||
| `MACRO_SIGNAL_WEIGHT` | `0.3` | Relative weight of macro vs company signals |
|
||||
| `MACRO_CONFIDENCE_THRESHOLD` | `0.4` | Minimum confidence for macro event inclusion |
|
||||
| `MACRO_SHORT_TERM_STALENESS_HOURS` | `48` | Hours before short-term events get accelerated decay |
|
||||
| `PROJECTION_CONFIDENCE_THRESHOLD` | `0.3` | Minimum confidence for projections to influence recommendations |
|
||||
| `COMPETITIVE_ENABLED` | `true` | Enable competitive signal layer |
|
||||
| `COMPETITIVE_SIGNAL_WEIGHT` | `0.2` | Relative weight of competitive signals |
|
||||
| `COMPETITIVE_PATTERN_CONFIDENCE_THRESHOLD` | `0.3` | Minimum confidence for pattern inclusion |
|
||||
| `COMPETITIVE_PROPAGATION_STRENGTH_THRESHOLD` | `0.2` | Minimum strength for signal propagation |
|
||||
| `COMPETITIVE_ROUTINE_LOOKBACK_DAYS` | `180` | Lookback window for routine patterns |
|
||||
| `COMPETITIVE_MAJOR_DECISION_LOOKBACK_DAYS` | `365` | Lookback window for major decisions |
|
||||
| `COMPETITIVE_MIN_PATTERN_SAMPLES` | `3` | Minimum samples for pattern matching |
|
||||
| `COMPETITIVE_MAJOR_DECISION_WEIGHT_MULTIPLIER` | `1.3` | Weight multiplier for major decision patterns |
|
||||
| `COMPETITIVE_STALENESS_WINDOW_DAYS` | `180` | Window for staleness decay on competitive signals |
|
||||
| `COMPETITIVE_STALENESS_RECENT_DAYS` | `90` | Days within which signals are considered recent |
|
||||
| `COMPETITIVE_STALENESS_DECAY_PENALTY` | `0.5` | Decay penalty for stale competitive signals |
|
||||
| `COMPETITIVE_PROPAGATION_FAILURE_THRESHOLD` | `5` | Consecutive propagation failures before operator alert |
|
||||
| `ALERT_SOURCE_FAILURE_THRESHOLD` | `3` | Consecutive source failures before alert fires |
|
||||
| `ALERT_SOURCE_FAILURE_WINDOW_HOURS` | `6` | Lookback window for source failure alerting |
|
||||
| `ALERT_SCHEMA_FAILURE_RATE_THRESHOLD` | `0.3` | Extraction failure rate (30%) that triggers alert |
|
||||
| `ALERT_SCHEMA_FAILURE_WINDOW_HOURS` | `1` | Lookback window for schema failure spike |
|
||||
| `ALERT_LAKE_LAG_THRESHOLD_MINUTES` | `60` | Minutes since last lake publish before alert |
|
||||
| `ALERT_BROKER_ERROR_THRESHOLD` | `3` | Consecutive broker errors before alert |
|
||||
| `ALERT_BROKER_ERROR_WINDOW_HOURS` | `1` | Lookback window for broker error alerting |
|
||||
| `ALERT_CHECK_INTERVAL_SECONDS` | `120` | How often alerting rules are evaluated |
|
||||
| `RETENTION_RAW_MARKET_DAYS` | `90` | Retention period for raw market data (days) |
|
||||
| `RETENTION_RAW_NEWS_DAYS` | `180` | Retention period for raw news articles (days) |
|
||||
| `RETENTION_RAW_FILINGS_DAYS` | `365` | Retention period for raw SEC filings (days) |
|
||||
| `RETENTION_NORMALIZED_DAYS` | `180` | Retention period for normalized documents (days) |
|
||||
| `RETENTION_LLM_PROMPTS_DAYS` | `365` | Retention period for LLM prompt archives (days) |
|
||||
| `RETENTION_LLM_RESULTS_DAYS` | `365` | Retention period for LLM extraction results (days) |
|
||||
| `RETENTION_LAKEHOUSE_DAYS` | `730` | Retention period for lakehouse Parquet files (days) |
|
||||
| `RETENTION_AUDIT_DAYS` | `730` | Retention period for audit trail artifacts (days) |
|
||||
| `RETENTION_CLEANUP_INTERVAL_HOURS` | `24` | How often the retention cleanup worker runs |
|
||||
| `RETENTION_BATCH_SIZE` | `1000` | Number of objects processed per cleanup batch |
|
||||
| `LOG_LEVEL` | `INFO` | Logging level |
|
||||
| `JSON_LOGS` | `true` | Enable structured JSON logging |
|
||||
| `DEPLOY_STAGE` | (empty) | Deployment stage prefix for bucket names |
|
||||
| `TZ` | `America/Los_Angeles` | Display timezone for timestamps (set on all containers) |
|
||||
|
||||
See `services/shared/config.py` for the complete list of all supported environment variables with their defaults.
|
||||
|
||||
@@ -217,7 +320,7 @@ Stonks Oracle supports two LLM backends: **Ollama** (local, self-hosted) and **v
|
||||
|
||||
### Option A: Bundled Ollama (default)
|
||||
|
||||
The `docker-compose.yml` includes an Ollama container. On first start, pull a model:
|
||||
The `docker-compose.yml` includes an Ollama container with GPU passthrough via the NVIDIA Container Toolkit. On first start, pull a model:
|
||||
|
||||
```bash
|
||||
docker compose exec ollama ollama pull qwen3.5:9b-fast
|
||||
@@ -225,6 +328,8 @@ docker compose exec ollama ollama pull qwen3.5:9b-fast
|
||||
|
||||
No additional configuration needed — services connect to `http://ollama:11434` by default.
|
||||
|
||||
The Ollama container requests all available NVIDIA GPUs via the `deploy.resources.reservations.devices` configuration. If no GPU is available, Ollama falls back to CPU inference (significantly slower).
|
||||
|
||||
### Option B: External Ollama
|
||||
|
||||
If Ollama is already running on the host (e.g. with GPU access), create a `docker-compose.override.yml`:
|
||||
@@ -252,15 +357,15 @@ services:
|
||||
- "host.docker.internal:host-gateway"
|
||||
```
|
||||
|
||||
This disables the bundled Ollama container and routes services to the host's instance. Replace the port if your Ollama runs on a non-standard port.
|
||||
This disables the bundled Ollama container and routes services to the host's instance. Replace the port if your Ollama runs on a non-standard port. For a remote Ollama instance (not on localhost), replace `host.docker.internal` with the remote IP and remove the `extra_hosts` block.
|
||||
|
||||
### Option C: vLLM Server
|
||||
|
||||
For higher throughput or quantized models (e.g. `AxionML/Qwen3.5-9B-NVFP4`), point services at a vLLM server. Add to your `.env`:
|
||||
For higher throughput or quantized models (e.g. `RedHatAI/Qwen3.6-35B-A3B-NVFP4`), point services at a vLLM server. Add to your `.env`:
|
||||
|
||||
```dotenv
|
||||
VLLM_BASE_URL=http://192.168.42.254:8000
|
||||
VLLM_MODEL=AxionML/Qwen3.5-9B-NVFP4
|
||||
VLLM_MODEL=RedHatAI/Qwen3.6-35B-A3B-NVFP4
|
||||
VLLM_TIMEOUT=120
|
||||
VLLM_TEMPERATURE=0.7
|
||||
```
|
||||
@@ -268,7 +373,7 @@ VLLM_TEMPERATURE=0.7
|
||||
Then update the `ai_agents` table to use the vLLM provider:
|
||||
|
||||
```sql
|
||||
UPDATE ai_agents SET model_provider = 'vllm', model_name = 'AxionML/Qwen3.5-9B-NVFP4' WHERE active = true;
|
||||
UPDATE ai_agents SET model_provider = 'vllm', model_name = 'RedHatAI/Qwen3.6-35B-A3B-NVFP4' WHERE active = true;
|
||||
```
|
||||
|
||||
Or use the API:
|
||||
@@ -276,7 +381,7 @@ Or use the API:
|
||||
```bash
|
||||
curl -X PUT http://localhost:8004/api/admin/agents/document-extractor \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"model_provider": "vllm", "model_name": "AxionML/Qwen3.5-9B-NVFP4"}'
|
||||
-d '{"model_provider": "vllm", "model_name": "RedHatAI/Qwen3.6-35B-A3B-NVFP4"}'
|
||||
```
|
||||
|
||||
### Option D: Mixed (Ollama + vLLM)
|
||||
@@ -284,8 +389,8 @@ curl -X PUT http://localhost:8004/api/admin/agents/document-extractor \
|
||||
You can run different agents on different providers. For example, use vLLM for the high-volume extractor and Ollama for the thesis rewriter:
|
||||
|
||||
```sql
|
||||
UPDATE ai_agents SET model_provider = 'vllm', model_name = 'AxionML/Qwen3.5-9B-NVFP4' WHERE slug = 'document-extractor';
|
||||
UPDATE ai_agents SET model_provider = 'vllm', model_name = 'AxionML/Qwen3.5-9B-NVFP4' WHERE slug = 'event-classifier';
|
||||
UPDATE ai_agents SET model_provider = 'vllm', model_name = 'RedHatAI/Qwen3.6-35B-A3B-NVFP4' WHERE slug = 'document-extractor';
|
||||
UPDATE ai_agents SET model_provider = 'vllm', model_name = 'RedHatAI/Qwen3.6-35B-A3B-NVFP4' WHERE slug = 'event-classifier';
|
||||
UPDATE ai_agents SET model_provider = 'ollama', model_name = 'qwen3.5:9b-fast' WHERE slug = 'thesis-rewriter';
|
||||
```
|
||||
|
||||
@@ -293,19 +398,21 @@ Both `OLLAMA_BASE_URL` and `VLLM_BASE_URL` must be set in the environment for mi
|
||||
|
||||
### Automated Deployment
|
||||
|
||||
The `deploy-docker.sh` script handles LLM configuration automatically:
|
||||
The `deploy-docker.sh` script handles LLM configuration automatically. It always uses the Docker Ollama container with GPU passthrough (NVIDIA Container Toolkit):
|
||||
|
||||
```bash
|
||||
# Auto-detect host Ollama, use default model
|
||||
# Deploy with defaults (Docker Ollama, GPU-accelerated)
|
||||
bash deploy-docker.sh
|
||||
|
||||
# Specify a remote Ollama instance
|
||||
bash deploy-docker.sh --ollama-url http://10.1.1.12:2701 --ollama-model qwen3.6
|
||||
# Specify a custom model
|
||||
bash deploy-docker.sh --ollama-model qwen3.6
|
||||
|
||||
# Specify a different host
|
||||
# Specify a different host and directory
|
||||
bash deploy-docker.sh --host user@myserver --dir /opt/stonks
|
||||
```
|
||||
|
||||
If an external Ollama URL is provided via `--ollama-url`, the script creates a `docker-compose.override.yml` that disables the bundled container and routes services to the external instance.
|
||||
|
||||
---
|
||||
|
||||
## Volume Mounts and Data Persistence
|
||||
@@ -404,6 +511,9 @@ docker compose ps query-api
|
||||
|
||||
# Inspect health check details for a container
|
||||
docker inspect --format='{{json .State.Health}}' stonks-oracle-query-api-1 | python -m json.tool
|
||||
|
||||
# Wait for all services to be healthy
|
||||
docker compose up -d --wait
|
||||
```
|
||||
|
||||
---
|
||||
@@ -414,17 +524,19 @@ docker inspect --format='{{json .State.Health}}' stonks-oracle-query-api-1 | pyt
|
||||
|
||||
Used by all application services except the scheduler. Accepts a `SERVICE_CMD` build argument that determines which service the container runs.
|
||||
|
||||
**Base image**: `python:3.12-slim`
|
||||
**Base image**: `python:3.12-slim` (via Harbor proxy cache in CI)
|
||||
|
||||
**Build arguments**:
|
||||
|
||||
| Argument | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `SERVICE_CMD` | `python -m services.scheduler.app` | The command executed when the container starts |
|
||||
| `CACHE_BUST` | (none) | Optional cache-busting argument to force rebuild of source layers |
|
||||
|
||||
**What gets copied**:
|
||||
- `requirements.txt` → pip dependencies installed
|
||||
- `services/` → all service source code
|
||||
- `scripts/` → operational scripts
|
||||
- `tests/` → test files (available for in-container testing)
|
||||
- `conftest.py` → pytest configuration
|
||||
|
||||
@@ -462,7 +574,7 @@ A specialized variant of the generic Dockerfile used only by the `scheduler` ser
|
||||
|
||||
Extends the official Apache Superset image with additional database drivers.
|
||||
|
||||
**Base image**: `apache/superset:latest`
|
||||
**Base image**: `apache/superset:latest` (via Harbor proxy cache in CI)
|
||||
|
||||
**Additional packages**: `trino[sqlalchemy]`, `psycopg2-binary`, `redis`
|
||||
|
||||
@@ -481,7 +593,9 @@ Multi-stage build for the React dashboard.
|
||||
**Stage 2 — Serve** (base: `nginxinc/nginx-unprivileged:alpine`):
|
||||
- Serves the built static files on port 8080
|
||||
- Uses `frontend/nginx.conf` for SPA fallback and API reverse proxying
|
||||
- Proxies `/api/` → `query-api:8000`, `/registry/` → `symbol-registry:8000`, `/risk/` → `risk-engine:8000`, `/trading/` → `trading-engine:8000`
|
||||
- Proxies `/api/` → `query-api:8000`, `/registry/` → `symbol-registry:8000`, `/risk/` → `risk:8000`, `/trading/` → `trading-engine:8000`
|
||||
- SSE stream endpoint (`/api/ops/pipeline/stream`) has buffering disabled for real-time delivery
|
||||
- Static assets under `/assets/` are cached with 1-year expiry
|
||||
|
||||
### Building Custom Images
|
||||
|
||||
@@ -503,6 +617,9 @@ docker build -t my-dashboard \
|
||||
|
||||
# Rebuild all images
|
||||
docker compose build
|
||||
|
||||
# Rebuild without cache (force fresh build)
|
||||
docker compose build --no-cache
|
||||
```
|
||||
|
||||
---
|
||||
@@ -561,6 +678,9 @@ Services with `condition: service_healthy` wait until the dependency's health ch
|
||||
# Start all services in the background
|
||||
docker compose up -d
|
||||
|
||||
# Start all services and wait for health checks
|
||||
docker compose up -d --wait
|
||||
|
||||
# Start only infrastructure (useful for local development)
|
||||
docker compose up -d postgres redis minio minio-init ollama
|
||||
|
||||
@@ -639,6 +759,9 @@ docker compose exec query-api python -c "from services.shared.config import load
|
||||
|
||||
# Open a shell in a container
|
||||
docker compose exec postgres psql -U stonks -d stonks
|
||||
|
||||
# Seed the database
|
||||
docker compose exec scheduler python -m services.symbol_registry.seed
|
||||
```
|
||||
|
||||
### Full Reset
|
||||
@@ -680,13 +803,16 @@ The dashboard container runs nginx with reverse proxy rules that route API reque
|
||||
| Path | Proxied To | Service |
|
||||
|------|-----------|---------|
|
||||
| `/api/` | `http://query-api:8000` | Query API |
|
||||
| `/api/ops/pipeline/stream` | `http://query-api:8000` (SSE, no buffering) | Query API (real-time pipeline stream) |
|
||||
| `/registry/` | `http://symbol-registry:8000/` | Symbol Registry API |
|
||||
| `/risk/` | `http://risk:8000/` | Risk Engine (via network alias) |
|
||||
| `/trading/` | `http://trading-engine:8000/` | Trading Engine API |
|
||||
|
||||
The `risk-engine` service has a network alias of `risk` in `docker-compose.yml` so the nginx upstream resolves correctly.
|
||||
|
||||
All other paths serve the React SPA with `try_files` fallback to `index.html`.
|
||||
All other paths serve the React SPA with `try_files` fallback to `index.html`. Static assets under `/assets/` are served with 1-year cache headers.
|
||||
|
||||
Security headers applied: `X-Frame-Options: SAMEORIGIN`, `X-Content-Type-Options: nosniff`, `Referrer-Policy: strict-origin-when-cross-origin`.
|
||||
|
||||
---
|
||||
|
||||
@@ -734,6 +860,19 @@ curl http://your-vllm-host:8000/v1/models
|
||||
|
||||
If Ollama is already running on the host, the bundled container will fail to bind port 11434. Use the external Ollama configuration described in the "LLM Provider Configuration" section above, or use `deploy-docker.sh` which handles this automatically.
|
||||
|
||||
### GPU not detected by Ollama container
|
||||
|
||||
Ensure the NVIDIA Container Toolkit is installed and Docker is configured:
|
||||
|
||||
```bash
|
||||
# Verify GPU passthrough works
|
||||
docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi
|
||||
|
||||
# If it fails, reconfigure Docker runtime
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
### Port conflicts
|
||||
|
||||
If a port is already in use, modify the host port mapping in `docker-compose.yml`:
|
||||
@@ -743,3 +882,15 @@ query-api:
|
||||
ports:
|
||||
- "9004:8000" # Changed from 8004 to 9004
|
||||
```
|
||||
|
||||
### Container runs out of memory
|
||||
|
||||
The full stack requires at least 16 GB RAM. If services are being OOM-killed:
|
||||
|
||||
```bash
|
||||
# Check which containers are using the most memory
|
||||
docker stats --no-stream
|
||||
|
||||
# Reduce memory usage by stopping non-essential services
|
||||
docker compose stop trino hive-metastore superset
|
||||
```
|
||||
+48
-31
@@ -94,7 +94,7 @@ Each key under `services` defines a Kubernetes Deployment. The deployments templ
|
||||
| `image` | string | yes | Image name appended to `image.registry`. Also used as the Deployment name and pod label (`app: <image>`). |
|
||||
| `command` | string | no | Shell command passed as `["sh", "-c", "<command>"]`. Omit for images with a built-in entrypoint (e.g., dashboard/nginx). |
|
||||
| `tier` | string | yes | Service tier label (`stonks-oracle/tier`). One of: `api`, `frontend`, `processing`, `trading`, `orchestration`, `analytics`, `ingestion`. |
|
||||
| `port` | int | no | Container port. When set, a Kubernetes Service is created mapping `port → port`. |
|
||||
| `port` | int | no | Container port. When set, a Kubernetes Service is created mapping `port -> port`. |
|
||||
| `pipeline` | bool | no | If `true`, replicas are set to 0 when `pipelineEnabled` is `false`. |
|
||||
| `secrets` | list(string) | no | List of Secret names to mount via `envFrom.secretRef`. |
|
||||
| `resources` | object | yes | Kubernetes resource requests and limits (`cpu`, `memory`). |
|
||||
@@ -118,9 +118,10 @@ Each key under `services` defines a Kubernetes Deployment. The deployments templ
|
||||
| `resources.limits` | cpu: 200m, memory: 128Mi |
|
||||
| `probes` | — |
|
||||
|
||||
The scheduler deployment has two init containers (not configurable via values):
|
||||
The scheduler deployment has three init containers (not configurable via values):
|
||||
1. **run-migrations** — applies all SQL files from `infra/migrations/*.sql` in sorted order.
|
||||
2. **seed-if-empty** — runs `python -m services.symbol_registry.seed` if the `companies` table is empty.
|
||||
3. **backfill-market-data** — runs `scripts/backfill_market_data.py` if available (skips gracefully if not).
|
||||
|
||||
#### symbolRegistry
|
||||
|
||||
@@ -141,7 +142,7 @@ The scheduler deployment has two init containers (not configurable via values):
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| `replicas` | `2` |
|
||||
| `replicas` | `1` |
|
||||
| `pipeline` | `true` |
|
||||
| `image` | `ingestion` |
|
||||
| `command` | `python -m services.ingestion.worker` |
|
||||
@@ -274,7 +275,7 @@ Single replica is recommended — the extractor is bottlenecked by the shared Ol
|
||||
| `command` | `uvicorn services.api.app:app --host 0.0.0.0 --port 8000` |
|
||||
| `tier` | `api` |
|
||||
| `port` | `8000` |
|
||||
| `secrets` | `stonks-core-secrets` |
|
||||
| `secrets` | `stonks-core-secrets`, `stonks-market-secrets` |
|
||||
| `resources.requests` | cpu: 100m, memory: 128Mi |
|
||||
| `resources.limits` | cpu: 500m, memory: 256Mi |
|
||||
| `probes.readiness` | path: `/docs`, port: 8000, initialDelay: 5s, period: 10s |
|
||||
@@ -323,7 +324,7 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `config.OLLAMA_BASE_URL` | string | `""` (empty) | Ollama API base URL. Set to the cluster-internal or external Ollama endpoint. |
|
||||
| `config.OLLAMA_BASE_URL` | string | `http://10.1.1.12:2701` | Ollama API base URL. Points to the external Ollama endpoint by default. |
|
||||
| `config.OLLAMA_MODEL` | string | `qwen3.5:9b-fast` | Default LLM model for extraction and classification agents. |
|
||||
| `config.OLLAMA_TIMEOUT` | string | `240` | Request timeout in seconds for Ollama API calls. |
|
||||
| `config.OLLAMA_MAX_RETRIES` | string | `2` | Maximum retry attempts for failed Ollama requests. |
|
||||
@@ -331,6 +332,17 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
| `config.OLLAMA_RETRY_MAX_DELAY` | string | `10.0` | Maximum delay cap in seconds for Ollama retry backoff. |
|
||||
| `config.OLLAMA_RETRY_BACKOFF_MULTIPLIER` | string | `2.0` | Multiplier for exponential backoff between Ollama retries. |
|
||||
|
||||
### vLLM
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `config.VLLM_BASE_URL` | string | `http://10.1.1.12:2701` | vLLM API base URL. Alternative LLM backend using OpenAI-compatible API. |
|
||||
| `config.VLLM_MODEL` | string | `qwen3.5:9b-fast` | vLLM model identifier. |
|
||||
| `config.VLLM_TIMEOUT` | string | `120` | Request timeout in seconds for vLLM API calls. |
|
||||
| `config.VLLM_MAX_RETRIES` | string | `2` | Maximum retry attempts for failed vLLM requests. |
|
||||
| `config.VLLM_TEMPERATURE` | string | `0.7` | Sampling temperature for vLLM generation (0.0-1.0). |
|
||||
| `config.VLLM_API_KEY` | string | `""` (empty) | API key for vLLM authentication. Leave empty if not required. |
|
||||
|
||||
### Analytics / Trino
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
@@ -347,7 +359,7 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
|-----|------|---------|-------------|
|
||||
| `config.BROKER_MODE` | string | `paper` | Broker execution mode. `paper` for simulated trading, `live` for real orders. |
|
||||
| `config.BROKER_PROVIDER` | string | `""` (empty) | Broker provider name (e.g., `alpaca`). |
|
||||
| `config.MARKET_DATA_BASE_URL` | string | `""` (empty) | Market data API base URL (e.g., `https://api.polygon.io`). |
|
||||
| `config.MARKET_DATA_BASE_URL` | string | `https://api.polygon.io` | Market data API base URL. |
|
||||
| `config.MARKET_DATA_PROVIDER` | string | `polygon` | Market data provider identifier. |
|
||||
| `config.TRADING_ENABLED` | string | `true` | Master toggle for the trading engine. Set to `false` to disable order submission. |
|
||||
| `config.TRADING_RISK_TIER` | string | `moderate` | Default risk tier for position sizing. Options: `conservative`, `moderate`, `aggressive`. |
|
||||
@@ -384,7 +396,7 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
|-----|------|---------|-------------|
|
||||
| `config.ALERT_SOURCE_FAILURE_THRESHOLD` | string | `3` | Number of consecutive source failures before firing an alert. |
|
||||
| `config.ALERT_SOURCE_FAILURE_WINDOW_HOURS` | string | `6` | Time window (hours) for evaluating source failure count. |
|
||||
| `config.ALERT_SCHEMA_FAILURE_RATE_THRESHOLD` | string | `0.3` | Schema validation failure rate (0.0–1.0) that triggers an alert. |
|
||||
| `config.ALERT_SCHEMA_FAILURE_RATE_THRESHOLD` | string | `0.3` | Schema validation failure rate (0.0-1.0) that triggers an alert. |
|
||||
| `config.ALERT_SCHEMA_FAILURE_WINDOW_HOURS` | string | `1` | Time window (hours) for evaluating schema failure rate. |
|
||||
| `config.ALERT_LAKE_LAG_THRESHOLD_MINUTES` | string | `60` | Minutes of lakehouse publish lag before alerting. |
|
||||
| `config.ALERT_BROKER_ERROR_THRESHOLD` | string | `3` | Number of broker errors before firing an alert. |
|
||||
@@ -395,7 +407,7 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
|
||||
## `secrets` — Kubernetes Secrets
|
||||
|
||||
Secrets are rendered into five Kubernetes Secret objects. In the base `values.yaml`, all secret values default to empty strings. Inject real values at deploy time using `--set` flags or a values override file.
|
||||
Secrets are rendered into five Kubernetes Secret objects. Inject real values at deploy time using `--set` flags or a values override file. The base `values.yaml` contains placeholder values — override them for each environment.
|
||||
|
||||
### Secret Objects
|
||||
|
||||
@@ -403,32 +415,32 @@ Secrets are rendered into five Kubernetes Secret objects. In the base `values.ya
|
||||
|-------------|-----------|-------------|
|
||||
| `stonks-core-secrets` | `secrets.core` | All services |
|
||||
| `stonks-broker-secrets` | `secrets.broker` | ingestion, trading-engine, risk-engine, broker-adapter |
|
||||
| `stonks-market-secrets` | `secrets.market` | ingestion |
|
||||
| `stonks-market-secrets` | `secrets.market` | ingestion, query-api |
|
||||
| `stonks-gmail-secrets` | `secrets.gmail` | trading-engine |
|
||||
| `stonks-dashboard-secrets` | `secrets.dashboard` | superset |
|
||||
|
||||
### `secrets.core`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `POSTGRES_PASSWORD` | string | `""` | PostgreSQL password. |
|
||||
| `MINIO_ACCESS_KEY` | string | `""` | MinIO access key (AWS-style). |
|
||||
| `MINIO_SECRET_KEY` | string | `""` | MinIO secret key. |
|
||||
| `REDIS_PASSWORD` | string | `""` | Redis authentication password. |
|
||||
| Key | Type | Description |
|
||||
|-----|------|-------------|
|
||||
| `POSTGRES_PASSWORD` | string | PostgreSQL password. |
|
||||
| `MINIO_ACCESS_KEY` | string | MinIO access key (AWS-style). |
|
||||
| `MINIO_SECRET_KEY` | string | MinIO secret key. |
|
||||
| `REDIS_PASSWORD` | string | Redis authentication password. |
|
||||
|
||||
### `secrets.broker`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `BROKER_API_KEY` | string | `""` | Broker API key (e.g., Alpaca paper trading key). |
|
||||
| `BROKER_API_SECRET` | string | `""` | Broker API secret. |
|
||||
| `BROKER_BASE_URL` | string | `""` | Broker API base URL (e.g., `https://paper-api.alpaca.markets`). |
|
||||
| Key | Type | Description |
|
||||
|-----|------|-------------|
|
||||
| `BROKER_API_KEY` | string | Broker API key (e.g., Alpaca paper trading key). |
|
||||
| `BROKER_API_SECRET` | string | Broker API secret. |
|
||||
| `BROKER_BASE_URL` | string | Broker API base URL (e.g., `https://paper-api.alpaca.markets`). |
|
||||
|
||||
### `secrets.market`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `MARKET_DATA_API_KEY` | string | `""` | Market data provider API key (e.g., Polygon.io). |
|
||||
| Key | Type | Description |
|
||||
|-----|------|-------------|
|
||||
| `MARKET_DATA_API_KEY` | string | Market data provider API key (e.g., Polygon.io). |
|
||||
|
||||
### `secrets.gmail`
|
||||
|
||||
@@ -440,10 +452,10 @@ Secrets are rendered into five Kubernetes Secret objects. In the base `values.ya
|
||||
|
||||
### `secrets.dashboard`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `SUPERSET_SECRET_KEY` | string | `""` | Flask secret key for Superset session encryption. |
|
||||
| `SUPERSET_ADMIN_PASSWORD` | string | `""` | Superset admin user password. |
|
||||
| Key | Type | Description |
|
||||
|-----|------|-------------|
|
||||
| `SUPERSET_SECRET_KEY` | string | Flask secret key for Superset session encryption. |
|
||||
| `SUPERSET_ADMIN_PASSWORD` | string | Superset admin user password. |
|
||||
|
||||
### Injecting Secrets at Deploy Time
|
||||
|
||||
@@ -596,15 +608,20 @@ Key overrides:
|
||||
| `pipelineEnabled` | `true` | Services deployed (ArgoCD health checks), but pipeline defaults to OFF via `PIPELINE_DEFAULT_OFF`. |
|
||||
| `config.DEPLOY_STAGE` | `beta` | Isolates Redis keys (`stonks:beta:*`) and MinIO buckets (`beta-stonks-*`). |
|
||||
| `config.POSTGRES_DB` | `stonks_beta` | Separate database for beta data. |
|
||||
| `config.POSTGRES_USER` | `stonks_beta` | Separate database user for beta. |
|
||||
| `config.REDIS_DB` | `1` | Separate Redis DB index. |
|
||||
| `config.LOG_LEVEL` | `DEBUG` | Verbose logging for debugging. |
|
||||
| `config.TRADING_ENABLED` | `false` | Safety net — no order submission in beta. |
|
||||
| `config.PIPELINE_DEFAULT_OFF` | `true` | Scheduler won't enqueue jobs unless explicitly enabled. |
|
||||
| `config.TRADING_ENABLED` | `true` | Trading engine active but constrained by paper broker mode. |
|
||||
| `config.PIPELINE_DEFAULT_OFF` | `true` | Scheduler won't enqueue jobs unless explicitly enabled via the UI. |
|
||||
| `config.BROKER_MODE` | `paper` | Simulated order execution. |
|
||||
| `config.BROKER_PROVIDER` | `alpaca` | Alpaca paper trading API. |
|
||||
| `config.OLLAMA_MODEL` | `qwen3.6` | May use a different model version for testing. |
|
||||
| `trino.enabled` | `false` | Analytics stack disabled in beta. |
|
||||
| `hiveMetastore.enabled` | `false` | Analytics stack disabled in beta. |
|
||||
| `superset.enabled` | `false` | Analytics stack disabled in beta. |
|
||||
|
||||
Beta also configures vLLM settings (`VLLM_BASE_URL`, `VLLM_MODEL`, etc.) for testing alternative LLM backends.
|
||||
|
||||
Beta ingress hostnames:
|
||||
|
||||
| Service | Hostname |
|
||||
@@ -649,11 +666,11 @@ Paper ingress hostnames:
|
||||
|
||||
```
|
||||
values-beta.yaml values-paper.yaml values.yaml (base)
|
||||
Beta → Paper Trading → Production
|
||||
Beta -> Paper Trading -> Production
|
||||
Integration Simulated orders Live trading
|
||||
testing Real market data Real orders
|
||||
Pipeline OFF Pipeline ON Pipeline ON
|
||||
Trading OFF Trading ON Trading ON
|
||||
Trading ON Trading ON Trading ON
|
||||
Analytics OFF Analytics ON Analytics ON
|
||||
```
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ scrape_configs:
|
||||
scrape_interval: 15s
|
||||
scrape_timeout: 10s
|
||||
metrics_path: /metrics
|
||||
static_targets:
|
||||
static_configs:
|
||||
- targets:
|
||||
# Docker Compose
|
||||
- "query-api:8000"
|
||||
@@ -124,6 +124,7 @@ All metrics are defined in `services/shared/metrics.py`. Metric names use the `s
|
||||
| `stonks_orders_rejected_total` | Counter | `reason_category` | Orders rejected before broker submission |
|
||||
| `stonks_orders_filled_total` | Counter | `side` | Orders filled by broker |
|
||||
| `stonks_orders_duplicates_prevented_total` | Counter | `detected_via` | Duplicate orders prevented by idempotency checks |
|
||||
| `stonks_orders_clamped_total` | Counter | — | Orders auto-clamped to fit within position limits |
|
||||
| `stonks_risk_evaluations_total` | Counter | `result` | Risk evaluations performed |
|
||||
| `stonks_risk_check_failures_total` | Counter | `check_name` | Individual risk check failures |
|
||||
| `stonks_positions_synced_total` | Counter | — | Position sync operations completed |
|
||||
|
||||
+117
-10
@@ -41,6 +41,7 @@ All queues use the `stonks:queue:<name>` key pattern (configurable via `DEPLOY_S
|
||||
| `recommendation` | `stonks:queue:recommendation` | Aggregation | Recommendation |
|
||||
| `broker_orders` | `stonks:queue:broker_orders` | Trading Engine, Trading API | Broker Adapter |
|
||||
| `lake_publish` | `stonks:queue:lake_publish` | Various services | Lake Publisher |
|
||||
| `report_generation` | `stonks:queue:report_generation` | Scheduler | Scheduler (inline consumer) |
|
||||
|
||||
### Queue Message Schemas
|
||||
|
||||
@@ -131,11 +132,20 @@ All queues use the `stonks:queue:<name>` key pattern (configurable via `DEPLOY_S
|
||||
}
|
||||
```
|
||||
|
||||
**Report Generation Job** (`stonks:queue:report_generation`):
|
||||
```json
|
||||
{
|
||||
"report_type": "daily | weekly",
|
||||
"period_start": "2025-01-01",
|
||||
"period_end": "2025-01-01"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Scheduler
|
||||
|
||||
**Purpose**: Triggers ingestion cycles for tracked companies and sources on a configurable cadence. Polls the symbol registry for active companies and their configured sources, respects per-source polling intervals and backoff windows, coordinates rate limits across source types, and enqueues ingestion jobs for downstream workers. Also runs periodic maintenance: stale document recovery, failed extraction retries, and data retention cleanup.
|
||||
**Purpose**: Triggers ingestion cycles for tracked companies and sources on a configurable cadence. Polls the symbol registry for active companies and their configured sources, respects per-source polling intervals and backoff windows, coordinates rate limits across source types, and enqueues ingestion jobs for downstream workers. Also runs periodic maintenance: stale document recovery, failed extraction retries, data retention cleanup, periodic aggregation re-runs, and automated report generation (daily/weekly).
|
||||
|
||||
**Entry Point**: `services.scheduler.app`
|
||||
|
||||
@@ -176,12 +186,16 @@ All queues use the `stonks:queue:<name>` key pattern (configurable via `DEPLOY_S
|
||||
| `recommendations` | Write (delete) | Retention cleanup |
|
||||
| `order_events` | Write (delete) | Retention cleanup |
|
||||
| `model_performance_metrics` | Write (delete) | Retention cleanup |
|
||||
| `ingestion_runs` | Write (delete) | Retention cleanup |
|
||||
| `trading_reports` | Write | Report generation storage |
|
||||
|
||||
### Redis Queues
|
||||
|
||||
| Direction | Queue | Purpose |
|
||||
|---|---|---|
|
||||
| Publish | `stonks:queue:ingestion` | Enqueue ingestion jobs for due sources |
|
||||
| Publish | `stonks:queue:aggregation` | Periodic aggregation re-runs |
|
||||
| Publish/Consume | `stonks:queue:report_generation` | Enqueue and consume report generation jobs |
|
||||
| Read | `stonks:pipeline:enabled` | Pipeline toggle (skip cycle if `"0"`) |
|
||||
| Read/Write | `stonks:lock:scheduler_cycle` | Distributed lock for single-writer |
|
||||
| Read/Write | `stonks:ratelimit:*` | Per-source-type and global Polygon rate limits |
|
||||
@@ -195,6 +209,8 @@ All queues use the `stonks:queue:<name>` key pattern (configurable via `DEPLOY_S
|
||||
- **Stale document recovery**: Every ~5 minutes, re-enqueues documents stuck in `parsed` status for >240 minutes.
|
||||
- **Failed extraction retry**: Every ~10 minutes, re-enqueues `extraction_failed` documents older than 60 minutes.
|
||||
- **Data retention cleanup**: Every ~25 minutes, deletes old rows from 10 tables with configurable retention windows (14–90 days).
|
||||
- **Periodic aggregation**: Re-enqueues aggregation jobs for all active tickers to keep trend summaries fresh.
|
||||
- **Report generation**: Enqueues daily and weekly report jobs on schedule; consumes them inline via `process_report_job` with retry logic (3 attempts, exponential backoff 30s/60s/120s).
|
||||
|
||||
---
|
||||
|
||||
@@ -281,7 +297,7 @@ None — this service is purely HTTP-driven.
|
||||
### MinIO Buckets
|
||||
|
||||
- `stonks-raw-market` — Raw market data JSON
|
||||
- `stonks-raw-news` — Raw news article JSON
|
||||
- `stonks-raw-news` — Raw news article JSON (also used for macro news)
|
||||
- `stonks-raw-filings` — Raw SEC filing data
|
||||
- `stonks-normalized` — Normalized text (written by parser)
|
||||
|
||||
@@ -296,6 +312,13 @@ None — this service is purely HTTP-driven.
|
||||
| `broker` | `AlpacaBrokerAdapter` | Alpaca |
|
||||
| `macro_news` | `MacroNewsAdapter` | Polygon.io |
|
||||
|
||||
### Key Behaviors
|
||||
|
||||
- Macro news jobs (`source_type=macro_news`) may lack a `company_id` — the worker handles this gracefully
|
||||
- Macro news documents are typed as `macro_event` so the parser routes them to the macro classification queue
|
||||
- Duplicate documents detected via content hash are linked to the current company (except for `macro_news`)
|
||||
- Tracks `last_published_at` per source to fetch only newer articles on subsequent runs
|
||||
|
||||
---
|
||||
|
||||
## 4. Parser
|
||||
@@ -349,7 +372,7 @@ None — this service is purely HTTP-driven.
|
||||
|
||||
## 5. Extractor
|
||||
|
||||
**Purpose**: Performs LLM-based intelligence extraction from documents using Ollama. Handles two pipelines: (1) standard document extraction producing `DocumentIntelligence` with per-company impact records, and (2) macro event classification producing `GlobalEventSchema` with company-level macro impact interpolation. Supports AI agent configuration with variant-based A/B testing.
|
||||
**Purpose**: Performs LLM-based intelligence extraction from documents using Ollama or a remote vLLM inference server. Handles two pipelines: (1) standard document extraction producing `DocumentIntelligence` with per-company impact records, and (2) macro event classification producing `GlobalEventSchema` with company-level macro impact interpolation. Supports AI agent configuration with variant-based A/B testing and provider routing (Ollama or vLLM).
|
||||
|
||||
**Entry Point**: `services.extractor.main`
|
||||
|
||||
@@ -363,9 +386,16 @@ None — this service is purely HTTP-driven.
|
||||
| `REDIS_*` | _(see shared)_ | Redis connection |
|
||||
| `MINIO_*` | _(see shared)_ | MinIO connection |
|
||||
| `OLLAMA_BASE_URL` | `http://localhost:11434` | Ollama API endpoint |
|
||||
| `OLLAMA_MODEL` | `qwen3.5:9b` | Default LLM model |
|
||||
| `OLLAMA_MODEL` | `qwen3.5:9b` | Default Ollama model |
|
||||
| `OLLAMA_TIMEOUT` | `120` | Request timeout (seconds) |
|
||||
| `OLLAMA_MAX_RETRIES` | `2` | Max retry attempts |
|
||||
| `VLLM_BASE_URL` | `http://192.168.42.254:8000` | vLLM inference server endpoint |
|
||||
| `VLLM_MODEL` | `RedHatAI/Qwen3.6-35B-A3B-NVFP4` | Default vLLM model |
|
||||
| `VLLM_TIMEOUT` | `120` | vLLM request timeout (seconds) |
|
||||
| `VLLM_MAX_RETRIES` | `2` | vLLM max retry attempts |
|
||||
| `VLLM_MAX_TOKENS` | `4096` | vLLM max output tokens |
|
||||
| `VLLM_TEMPERATURE` | `0.7` | vLLM sampling temperature |
|
||||
| `VLLM_API_KEY` | _(empty)_ | Optional API key for authenticated vLLM deployments |
|
||||
| `MACRO_CONFIDENCE_THRESHOLD` | `0.4` | Minimum confidence for macro event inclusion |
|
||||
| `LOG_LEVEL` | `INFO` | Logging level |
|
||||
|
||||
@@ -395,6 +425,7 @@ None — this service is purely HTTP-driven.
|
||||
|
||||
### Key Behaviors
|
||||
|
||||
- **LLM provider routing**: The `AgentConfigResolver` resolves agent configuration from the DB, including a `model_provider` field (`"ollama"` or `"vllm"`). The `build_llm_client` factory returns the appropriate client (`OllamaClient` or `VLLMClient`).
|
||||
- Alternates between macro and extraction queues (1 macro per 3 jobs) to prevent starvation
|
||||
- Resolves agent configuration from DB with 60-second TTL cache (`AgentConfigResolver`)
|
||||
- Supports separate models for document extraction and event classification
|
||||
@@ -565,7 +596,7 @@ None — this service is purely HTTP-driven.
|
||||
| `risk_tier_history` | Read/Write | Risk tier change audit trail |
|
||||
| `circuit_breaker_events` | Read/Write | Circuit breaker trigger/reset events |
|
||||
| `positions` | Read | Current open positions |
|
||||
| `position_stop_levels` | Read/Write | Stop-loss and take-profit levels |
|
||||
| `position_stop_levels` | Read/Write | Stop-loss and take-profit levels per position |
|
||||
| `orders` | Read | Order history for dedup |
|
||||
| `backtest_runs` | Read/Write | Backtest configuration and results |
|
||||
| `backtest_trades` | Read/Write | Individual trades within a backtest |
|
||||
@@ -652,7 +683,7 @@ None — called synchronously by the broker adapter and via HTTP.
|
||||
| `positions` | Write (upsert) | Sync positions from Alpaca |
|
||||
| `broker_accounts` | Write (upsert) | Register/update broker account |
|
||||
| `daily_risk_snapshots` | Read | Daily portfolio state for risk evaluation |
|
||||
| `risk_configs` | Read | Active risk configuration |
|
||||
| `risk_configs` | Read | Active risk configuration for order evaluation |
|
||||
| `approval_requests` | Write | Create approval requests for gated orders |
|
||||
| `audit_events` | Write | Full audit trail |
|
||||
|
||||
@@ -728,7 +759,7 @@ None — called synchronously by the broker adapter and via HTTP.
|
||||
|
||||
## 12. Query API
|
||||
|
||||
**Purpose**: Read-only FastAPI service for analytics, evidence drill-down, and admin controls. Serves the React dashboard and external integrations with endpoints for companies, documents, trends, recommendations, orders, positions, portfolio metrics, global events, macro impacts, competitive signals, trend projections, AI agents, dead-letter queues, pipeline control, SQL explorer, saved queries, audit trail, DevOps metrics, and Prometheus metrics.
|
||||
**Purpose**: Read-only FastAPI service for analytics, evidence drill-down, and admin controls. Serves the React dashboard and external integrations with endpoints for companies, documents, trends, recommendations, orders, positions, portfolio metrics, global events, macro impacts, competitive signals, trend projections, AI agents, dead-letter queues, pipeline control, SQL explorer, saved queries, audit trail, DevOps metrics, Prometheus metrics, model validation, and trading reports.
|
||||
|
||||
**Entry Point**: `services.api.app` (FastAPI)
|
||||
|
||||
@@ -745,6 +776,7 @@ None — called synchronously by the broker adapter and via HTTP.
|
||||
| `TRINO_PORT` | `8080` | Trino port |
|
||||
| `TRINO_CATALOG` | `lakehouse` | Trino catalog |
|
||||
| `TRINO_SCHEMA` | `stonks` | Trino schema |
|
||||
| `TRINO_ICEBERG_CATALOG` | `iceberg` | Trino Iceberg catalog |
|
||||
| `LOG_LEVEL` | `INFO` | Logging level |
|
||||
|
||||
### Database Tables
|
||||
@@ -757,9 +789,9 @@ The Query API reads from nearly all tables in the database, including:
|
||||
| `sources` | Source configurations |
|
||||
| `documents`, `document_company_mentions` | Document timelines |
|
||||
| `document_intelligence`, `document_impact_records` | Intelligence extraction results |
|
||||
| `trend_windows`, `trend_history`, `trend_projections` | Trend summaries and projections |
|
||||
| `trend_windows`, `trend_history`, `trend_projections`, `trend_evidence` | Trend summaries and projections |
|
||||
| `recommendations`, `recommendation_evidence` | Recommendation history with evidence |
|
||||
| `risk_evaluations` | Risk evaluation results |
|
||||
| `risk_evaluations`, `risk_configs` | Risk evaluation results and configuration |
|
||||
| `orders`, `order_events` | Order history and lifecycle |
|
||||
| `positions`, `portfolio_snapshots` | Portfolio state |
|
||||
| `global_events`, `macro_impact_records` | Macro event data |
|
||||
@@ -768,6 +800,13 @@ The Query API reads from nearly all tables in the database, including:
|
||||
| `audit_events` | Audit trail |
|
||||
| `market_snapshots` | Market price data |
|
||||
| `watchlists`, `watchlist_members` | Watchlist data |
|
||||
| `ingestion_runs` | Ingestion throughput and source health |
|
||||
| `model_performance_metrics` | Model quality metrics |
|
||||
| `prediction_snapshots`, `prediction_outcomes` | Model validation and calibration |
|
||||
| `trading_decisions` | Trading decision history |
|
||||
| `trading_reports` | Generated daily/weekly reports |
|
||||
| `approval_requests` | Pending approval workflow |
|
||||
| `symbol_lockouts` | Active trading lockouts per symbol |
|
||||
|
||||
### Redis Queues
|
||||
|
||||
@@ -776,15 +815,22 @@ The Query API reads from nearly all tables in the database, including:
|
||||
| Read/Write | `stonks:pipeline:enabled` | Pipeline toggle control |
|
||||
| Read | `stonks:queue:*` | Queue depth monitoring for DLQ and DevOps metrics |
|
||||
| Read | `stonks:dlq:*` | Dead-letter queue inspection and replay |
|
||||
| Read | `stonks:ratelimit:*` | Rate limit status monitoring |
|
||||
|
||||
### Key Behaviors
|
||||
|
||||
- Exposes `/metrics` endpoint for Prometheus scraping
|
||||
- Trace context propagation via `x-trace-id` header middleware
|
||||
- SQL explorer endpoint for ad-hoc Trino queries
|
||||
- SQL explorer endpoint for ad-hoc Trino queries (`/analytics/query`)
|
||||
- PostgreSQL schema explorer (`/pg/schema`, `/pg/query`)
|
||||
- Dead-letter queue management (list, inspect, replay)
|
||||
- Pipeline control (enable/disable via Redis toggle)
|
||||
- Saved queries with CRUD operations
|
||||
- Macro and competitive layer toggle endpoints
|
||||
- Model validation endpoints (summary, calibration, IC by horizon, gate status, attribution)
|
||||
- Trading report listing and retrieval
|
||||
- SSE pipeline health stream (`/pipeline/stream`)
|
||||
- Market price backfill endpoints
|
||||
|
||||
---
|
||||
|
||||
@@ -1042,6 +1088,67 @@ All services load configuration from environment variables via `services/shared/
|
||||
| `OLLAMA_MODEL` | `qwen3.5:9b` | Default model |
|
||||
| `OLLAMA_TIMEOUT` | `120` | Request timeout (seconds) |
|
||||
| `OLLAMA_MAX_RETRIES` | `2` | Max retry attempts |
|
||||
| `OLLAMA_RETRY_BASE_DELAY` | `1.0` | Base delay between retries (seconds) |
|
||||
| `OLLAMA_RETRY_MAX_DELAY` | `10.0` | Maximum delay between retries (seconds) |
|
||||
| `OLLAMA_RETRY_BACKOFF_MULTIPLIER` | `2.0` | Backoff multiplier |
|
||||
|
||||
### vLLM
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `VLLM_BASE_URL` | `http://192.168.42.254:8000` | vLLM inference server endpoint |
|
||||
| `VLLM_MODEL` | `RedHatAI/Qwen3.6-35B-A3B-NVFP4` | Default vLLM model |
|
||||
| `VLLM_TIMEOUT` | `120` | Request timeout (seconds) |
|
||||
| `VLLM_MAX_RETRIES` | `2` | Max retry attempts |
|
||||
| `VLLM_MAX_TOKENS` | `4096` | Max output tokens |
|
||||
| `VLLM_TEMPERATURE` | `0.7` | Sampling temperature |
|
||||
| `VLLM_API_KEY` | _(empty)_ | Optional API key for authenticated deployments |
|
||||
| `VLLM_RETRY_BASE_DELAY` | `1.0` | Base delay between retries (seconds) |
|
||||
| `VLLM_RETRY_MAX_DELAY` | `10.0` | Maximum delay between retries (seconds) |
|
||||
| `VLLM_RETRY_BACKOFF_MULTIPLIER` | `2.0` | Backoff multiplier |
|
||||
|
||||
### Trino
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `TRINO_HOST` | `localhost` | Trino host |
|
||||
| `TRINO_PORT` | `8080` | Trino port |
|
||||
| `TRINO_CATALOG` | `lakehouse` | Trino catalog |
|
||||
| `TRINO_SCHEMA` | `stonks` | Trino schema |
|
||||
| `TRINO_ICEBERG_CATALOG` | `iceberg` | Trino Iceberg catalog |
|
||||
|
||||
### Market Data
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `MARKET_DATA_API_KEY` | _(empty)_ | Polygon.io API key |
|
||||
| `MARKET_DATA_BASE_URL` | `https://api.polygon.io` | Polygon base URL |
|
||||
| `MARKET_DATA_PROVIDER` | `polygon` | Market data provider |
|
||||
|
||||
### Broker
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `BROKER_MODE` | `paper` | Trading mode (`paper` or `live`) |
|
||||
| `BROKER_PROVIDER` | `alpaca` | Broker provider |
|
||||
| `BROKER_API_KEY` | _(none)_ | Alpaca API key |
|
||||
| `BROKER_API_SECRET` | _(none)_ | Alpaca API secret |
|
||||
| `BROKER_BASE_URL` | _(none)_ | Alpaca base URL |
|
||||
|
||||
### Retention
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `RETENTION_RAW_MARKET_DAYS` | `90` | Raw market data retention (days) |
|
||||
| `RETENTION_RAW_NEWS_DAYS` | `180` | Raw news data retention (days) |
|
||||
| `RETENTION_RAW_FILINGS_DAYS` | `365` | Raw filings retention (days) |
|
||||
| `RETENTION_NORMALIZED_DAYS` | `180` | Normalized text retention (days) |
|
||||
| `RETENTION_LLM_PROMPTS_DAYS` | `365` | LLM prompt retention (days) |
|
||||
| `RETENTION_LLM_RESULTS_DAYS` | `365` | LLM result retention (days) |
|
||||
| `RETENTION_LAKEHOUSE_DAYS` | `730` | Lakehouse data retention (days) |
|
||||
| `RETENTION_AUDIT_DAYS` | `730` | Audit log retention (days) |
|
||||
| `RETENTION_CLEANUP_INTERVAL_HOURS` | `24` | Cleanup interval (hours) |
|
||||
| `RETENTION_BATCH_SIZE` | `1000` | Rows deleted per batch |
|
||||
|
||||
### Observability
|
||||
|
||||
|
||||
+227
-2
@@ -256,8 +256,13 @@ export interface MarketPrice {
|
||||
captured_at: string;
|
||||
}
|
||||
|
||||
export function useMarketPrices(ticker: string | undefined, limit = 30) {
|
||||
return useGet<MarketPrice[]>(
|
||||
export interface MarketPriceResponse {
|
||||
bars: MarketPrice[];
|
||||
range_90d: { low: number | null; high: number | null };
|
||||
}
|
||||
|
||||
export function useMarketPrices(ticker: string | undefined, limit = 200) {
|
||||
return useGet<MarketPriceResponse>(
|
||||
['market-prices', ticker, limit],
|
||||
'query',
|
||||
`/api/market/prices/${ticker}?limit=${limit}`,
|
||||
@@ -265,6 +270,18 @@ export function useMarketPrices(ticker: string | undefined, limit = 30) {
|
||||
);
|
||||
}
|
||||
|
||||
/** Backfill 90 days of daily bars from Polygon for a single ticker. */
|
||||
export function useBackfillMarketPrices() {
|
||||
const qc = useQueryClient();
|
||||
return useMutation({
|
||||
mutationFn: (ticker: string) =>
|
||||
apiPost<{ ticker: string; inserted: number; total_bars: number }>('query', `/api/market/backfill/${ticker}`, {}),
|
||||
onSuccess: (_data, ticker) => {
|
||||
qc.invalidateQueries({ queryKey: ['market-prices', ticker] });
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export function useTrend(id: string | undefined) {
|
||||
return useGet<TrendSummary>(['trend', id], 'query', `/api/trends/${id}`, !!id);
|
||||
}
|
||||
@@ -372,6 +389,7 @@ export interface Position {
|
||||
quantity: number;
|
||||
avg_entry_price: number;
|
||||
current_price: number | null;
|
||||
polygon_price: number | null;
|
||||
unrealized_pnl: number | null;
|
||||
realized_pnl: number | null;
|
||||
updated_at: string;
|
||||
@@ -867,3 +885,210 @@ export function useToggleMacro() {
|
||||
onSuccess: () => qc.invalidateQueries({ queryKey: ['macro-status'] }),
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Validation: Model Quality & Calibration (Requirements 12.1, 12.2, 12.3, 12.7)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ModelMetricSnapshot {
|
||||
id: string;
|
||||
generated_at: string;
|
||||
lookback_window: string;
|
||||
horizon: string;
|
||||
prediction_count: number;
|
||||
win_rate: number | null;
|
||||
directional_accuracy: number | null;
|
||||
information_coefficient: number | null;
|
||||
rank_information_coefficient: number | null;
|
||||
avg_return: number | null;
|
||||
avg_excess_return_vs_spy: number | null;
|
||||
avg_excess_return_vs_sector: number | null;
|
||||
calibration_error: number | null;
|
||||
brier_score: number | null;
|
||||
buy_win_rate: number | null;
|
||||
sell_win_rate: number | null;
|
||||
hold_win_rate: number | null;
|
||||
metadata: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export interface ValidationSummary {
|
||||
snapshot: ModelMetricSnapshot | null;
|
||||
gate_status: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export interface CalibrationBucket {
|
||||
bucket_low: number;
|
||||
bucket_high: number;
|
||||
avg_confidence: number;
|
||||
observed_win_rate: number;
|
||||
prediction_count: number;
|
||||
miscalibrated: boolean;
|
||||
}
|
||||
|
||||
export interface ValidationCalibration {
|
||||
buckets: CalibrationBucket[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export interface ICByHorizonEntry {
|
||||
horizon: string;
|
||||
information_coefficient: number | null;
|
||||
rank_information_coefficient: number | null;
|
||||
prediction_count: number;
|
||||
generated_at: string | null;
|
||||
}
|
||||
|
||||
export interface ValidationICByHorizon {
|
||||
horizons: ICByHorizonEntry[];
|
||||
lookback: string;
|
||||
}
|
||||
|
||||
export interface ValidationGateStatus {
|
||||
gate_status: Record<string, unknown> | null;
|
||||
updated_at?: string | null;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export function useValidationSummary(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/summary${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ValidationSummary>(['validation-summary', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationCalibration(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/calibration${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ValidationCalibration>(['validation-calibration', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationICByHorizon(lookback = '30d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
const path = `/api/validation/ic-by-horizon${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ValidationICByHorizon>(['validation-ic-by-horizon', lookback], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationGateStatus() {
|
||||
return useGet<ValidationGateStatus>(['validation-gate-status'], 'query', '/api/validation/gate-status');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Validation: Attribution — Sources, Catalysts, Layers (Requirements 12.4, 12.5, 12.6)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface SourceAttribution {
|
||||
source: string;
|
||||
source_type: string;
|
||||
prediction_count: number;
|
||||
avg_weight: number;
|
||||
avg_contribution_score: number;
|
||||
win_rate: number;
|
||||
avg_future_return: number;
|
||||
avg_excess_return_vs_spy: number;
|
||||
information_coefficient: number | null;
|
||||
duplicate_rate: number;
|
||||
}
|
||||
|
||||
export interface SourceAttributionResponse {
|
||||
sources: SourceAttribution[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export interface CatalystAttribution {
|
||||
catalyst_type: string;
|
||||
prediction_count: number;
|
||||
win_rate: number;
|
||||
avg_future_return: number;
|
||||
avg_excess_return_vs_spy: number;
|
||||
information_coefficient: number | null;
|
||||
}
|
||||
|
||||
export interface CatalystAttributionResponse {
|
||||
catalysts: CatalystAttribution[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export interface LayerAttribution {
|
||||
layer: string;
|
||||
avg_contribution_pct: number;
|
||||
dominant_win_rate: number;
|
||||
dominant_ic: number | null;
|
||||
}
|
||||
|
||||
export interface LayerAttributionResponse {
|
||||
layers: LayerAttribution[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export function useValidationAttributionSources(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/attribution/sources${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<SourceAttributionResponse>(['validation-attribution-sources', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationAttributionCatalysts(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/attribution/catalysts${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<CatalystAttributionResponse>(['validation-attribution-catalysts', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationAttributionLayers(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/attribution/layers${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<LayerAttributionResponse>(['validation-attribution-layers', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Trading Reports
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ReportListItem {
|
||||
id: string;
|
||||
report_type: string;
|
||||
period_start: string;
|
||||
period_end: string;
|
||||
validation_status: string;
|
||||
generated_at: string;
|
||||
}
|
||||
|
||||
export interface ReportDetail extends ReportListItem {
|
||||
report_data: Record<string, unknown>;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
export function useReports(params?: {
|
||||
report_type?: string;
|
||||
start_date?: string;
|
||||
end_date?: string;
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
}) {
|
||||
const qs = new URLSearchParams();
|
||||
if (params?.report_type) qs.set('report_type', params.report_type);
|
||||
if (params?.start_date) qs.set('start_date', params.start_date);
|
||||
if (params?.end_date) qs.set('end_date', params.end_date);
|
||||
if (params?.limit) qs.set('limit', String(params.limit));
|
||||
if (params?.offset) qs.set('offset', String(params.offset));
|
||||
const path = `/api/reports${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ReportListItem[]>(['reports', params], 'query', path);
|
||||
}
|
||||
|
||||
export function useReport(id: string | undefined) {
|
||||
return useGet<ReportDetail>(
|
||||
['report', id], 'query', `/api/reports/${id}`, !!id
|
||||
);
|
||||
}
|
||||
|
||||
@@ -316,9 +316,12 @@ export function useBacktestLaunch() {
|
||||
export function useResetPaperTrading() {
|
||||
const qc = useQueryClient();
|
||||
return useMutation({
|
||||
mutationFn: (initial_capital: number = 0) =>
|
||||
mutationFn: (params: { initial_capital?: number; reserve_pct?: number } = {}) =>
|
||||
apiPost<{ reset: boolean; initial_capital: number; active_pool: number; reserve_pool: number; broker: Record<string, number> }>(
|
||||
'trading', '/api/trading/reset', { initial_capital },
|
||||
'trading', '/api/trading/reset', {
|
||||
initial_capital: params.initial_capital ?? 0,
|
||||
reserve_pct: params.reserve_pct ?? undefined,
|
||||
},
|
||||
),
|
||||
onSuccess: () => {
|
||||
qc.invalidateQueries({ queryKey: ['trading-status'] });
|
||||
|
||||
@@ -19,6 +19,7 @@ import {
|
||||
Globe,
|
||||
BarChart3,
|
||||
Bot,
|
||||
ClipboardList,
|
||||
} from 'lucide-react';
|
||||
|
||||
interface NavItem {
|
||||
@@ -40,6 +41,7 @@ const navItems: NavItem[] = [
|
||||
{ to: '/positions', label: 'Positions', icon: <Wallet size={18} />, group: 'Trading' },
|
||||
{ to: '/trading', label: 'Trading Controls', icon: <ShieldCheck size={18} />, group: 'Trading' },
|
||||
{ to: '/trading/engine', label: 'Trading Engine', icon: <BarChart3 size={18} />, group: 'Trading' },
|
||||
{ to: '/reports', label: 'Reports', icon: <ClipboardList size={18} />, group: 'Trading' },
|
||||
{ to: '/ops/pipeline', label: 'Pipeline', icon: <Activity size={18} />, group: 'Ops' },
|
||||
{ to: '/ops/ingestion', label: 'Ingestion', icon: <Download size={18} />, group: 'Ops' },
|
||||
{ to: '/ops/model', label: 'Model Perf', icon: <Cpu size={18} />, group: 'Ops' },
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useParams, useNavigate, Link } from '@tanstack/react-router';
|
||||
import { useState } from 'react';
|
||||
import { useState, useEffect } from 'react';
|
||||
import {
|
||||
useCompany,
|
||||
useCompanySources,
|
||||
@@ -22,7 +22,7 @@ import { DataTable, type Column } from '../components/DataTable';
|
||||
import type { Source, Alias, MacroImpactRecord, CompetitorRelationship, HistoricalPattern, CompetitiveSignal, CorporateDecision, TrendSummary, MarketPrice } from '../api/hooks';
|
||||
import {
|
||||
LineChart, Line, XAxis, YAxis, Tooltip, ResponsiveContainer,
|
||||
CartesianGrid, Legend,
|
||||
CartesianGrid, Legend, ReferenceLine,
|
||||
} from 'recharts';
|
||||
|
||||
const sourceCols: Column<Source>[] = [
|
||||
@@ -46,7 +46,9 @@ export function CompanyDetailPage() {
|
||||
const { data: trends } = useTrends({ ticker: company?.ticker, limit: 200 });
|
||||
const [selectedWindow, setSelectedWindow] = useState('7d');
|
||||
const { data: trendHistory } = useTrendHistory({ ticker: company?.ticker, window: selectedWindow, limit: 500 });
|
||||
const { data: marketPrices } = useMarketPrices(company?.ticker, 200);
|
||||
const { data: marketPriceData } = useMarketPrices(company?.ticker, 200);
|
||||
const marketPrices = marketPriceData?.bars ?? [];
|
||||
const range90d = marketPriceData?.range_90d ?? { low: null, high: null };
|
||||
const { data: positions } = usePositions(company?.ticker);
|
||||
const [tab, setTab] = useState<'trends' | 'sources' | 'aliases' | 'macro' | 'competitors' | 'patterns' | 'signals' | 'decisions'>('trends');
|
||||
|
||||
@@ -88,7 +90,7 @@ export function CompanyDetailPage() {
|
||||
{tab === 'trends' && (
|
||||
<div className="space-y-4">
|
||||
<PositionCard positions={positions ?? []} ticker={company.ticker} />
|
||||
<TrendHistoryChart trends={trendHistory ?? []} latestTrends={trends ?? []} ticker={company.ticker} marketPrices={marketPrices ?? []} selectedWindow={selectedWindow} onWindowChange={setSelectedWindow} />
|
||||
<TrendHistoryChart trends={trendHistory ?? []} latestTrends={trends ?? []} ticker={company.ticker} marketPrices={marketPrices} range90d={range90d} selectedWindow={selectedWindow} onWindowChange={setSelectedWindow} />
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -680,7 +682,7 @@ function PositionCard({ positions, ticker }: { positions: import('../api/hooks')
|
||||
);
|
||||
}
|
||||
|
||||
function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices, selectedWindow, onWindowChange }: { trends: TrendSummary[]; latestTrends: TrendSummary[]; ticker: string; marketPrices: MarketPrice[]; selectedWindow: string; onWindowChange: (w: string) => void }) {
|
||||
function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices, range90d, selectedWindow, onWindowChange }: { trends: TrendSummary[]; latestTrends: TrendSummary[]; ticker: string; marketPrices: MarketPrice[]; range90d: { low: number | null; high: number | null }; selectedWindow: string; onWindowChange: (w: string) => void }) {
|
||||
|
||||
// Determine the time range for the selected window to filter data
|
||||
const windowHours: Record<string, number> = {
|
||||
@@ -748,6 +750,34 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices, selecte
|
||||
|
||||
const hasPrice = chartData.some((pt) => pt.price != null);
|
||||
|
||||
// Compute market open/close vertical markers for intraday and 1d windows
|
||||
const showMarketMarkers = selectedWindow === 'intraday' || selectedWindow === '1d';
|
||||
const marketMarkers: { ts: number; label: string }[] = [];
|
||||
if (showMarketMarkers && chartData.length > 0) {
|
||||
const minTs = chartData[0].timestamp;
|
||||
const maxTs = chartData[chartData.length - 1].timestamp;
|
||||
// Walk each day in the range and compute 9:30 AM ET (open) and 4:00 PM ET (close)
|
||||
const dayMs = 86400_000;
|
||||
const startDay = new Date(minTs);
|
||||
startDay.setUTCHours(0, 0, 0, 0);
|
||||
for (let d = startDay.getTime(); d <= maxTs + dayMs; d += dayMs) {
|
||||
const date = new Date(d);
|
||||
const dow = date.getUTCDay();
|
||||
if (dow === 0 || dow === 6) continue; // skip weekends
|
||||
// ET offset: EDT = UTC-4, EST = UTC-5. Approximate with -4 (summer).
|
||||
// 9:30 AM ET = 13:30 UTC (EDT)
|
||||
const openTs = d + 13 * 3600_000 + 30 * 60_000;
|
||||
// 4:00 PM ET = 20:00 UTC (EDT)
|
||||
const closeTs = d + 20 * 3600_000;
|
||||
if (openTs >= minTs && openTs <= maxTs) {
|
||||
marketMarkers.push({ ts: openTs, label: 'Open' });
|
||||
}
|
||||
if (closeTs >= minTs && closeTs <= maxTs) {
|
||||
marketMarkers.push({ ts: closeTs, label: 'Close' });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Available windows from the data (check both history and latest)
|
||||
const allTrends = [...(trends ?? []), ...(latestTrends ?? [])];
|
||||
const availableWindows = [...new Set(allTrends.filter((t) => t.entity_id === ticker).map((t) => t.window))];
|
||||
@@ -759,6 +789,94 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices, selecte
|
||||
.sort((a, b) => new Date(b.generated_at).getTime() - new Date(a.generated_at).getTime());
|
||||
const latest = latestForWindow[0] ?? (filtered.length > 0 ? filtered[filtered.length - 1] : null);
|
||||
|
||||
const [fullscreen, setFullscreen] = useState(false);
|
||||
|
||||
// Close fullscreen on Escape key
|
||||
useEffect(() => {
|
||||
if (!fullscreen) return;
|
||||
const handler = (e: KeyboardEvent) => { if (e.key === 'Escape') setFullscreen(false); };
|
||||
window.addEventListener('keydown', handler);
|
||||
return () => window.removeEventListener('keydown', handler);
|
||||
}, [fullscreen]);
|
||||
|
||||
// Shared chart content — rendered at different sizes
|
||||
const chartContent = (height: number) => (
|
||||
<ResponsiveContainer width="100%" height={height}>
|
||||
<LineChart data={chartData} margin={{ top: 5, right: 20, bottom: 70, left: 0 }}>
|
||||
<CartesianGrid strokeDasharray="3 3" stroke="#334155" />
|
||||
{marketMarkers.map((m, i) => (
|
||||
<ReferenceLine
|
||||
key={`market-${i}`}
|
||||
yAxisId="left"
|
||||
x={m.ts}
|
||||
stroke={m.label === 'Open' ? '#22c55e' : '#ef4444'}
|
||||
strokeDasharray="4 4"
|
||||
strokeWidth={1}
|
||||
strokeOpacity={0.5}
|
||||
label={{ value: m.label, position: 'top', fill: m.label === 'Open' ? '#22c55e' : '#ef4444', fontSize: 9 }}
|
||||
/>
|
||||
))}
|
||||
<XAxis
|
||||
dataKey="timestamp"
|
||||
type="number"
|
||||
domain={['dataMin', 'dataMax']}
|
||||
scale="time"
|
||||
tick={<ChartXTick />}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickCount={8}
|
||||
/>
|
||||
<YAxis
|
||||
yAxisId="left"
|
||||
domain={[0, 100]}
|
||||
tick={{ fill: '#94a3b8', fontSize: 11 }}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickFormatter={(v) => `${v}%`}
|
||||
/>
|
||||
{hasPrice && (
|
||||
<YAxis
|
||||
yAxisId="right"
|
||||
orientation="right"
|
||||
tick={{ fill: '#e879f9', fontSize: 11 }}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickFormatter={(v) => `$${v}`}
|
||||
domain={[
|
||||
range90d.low != null ? Math.floor(range90d.low * 0.97) : 'dataMin - 2',
|
||||
range90d.high != null ? Math.ceil(range90d.high * 1.03) : 'dataMax + 2',
|
||||
]}
|
||||
/>
|
||||
)}
|
||||
<Tooltip content={TrendTooltip} />
|
||||
<Legend verticalAlign="bottom" wrapperStyle={{ color: '#94a3b8', fontSize: 12, paddingTop: 24 }} />
|
||||
{hasPrice && range90d.high != null && (
|
||||
<ReferenceLine
|
||||
yAxisId="right"
|
||||
y={range90d.high}
|
||||
stroke="#22c55e"
|
||||
strokeDasharray="6 3"
|
||||
strokeWidth={1.5}
|
||||
label={{ value: `90d High $${range90d.high.toFixed(2)}`, position: 'insideTopRight', fill: '#22c55e', fontSize: 10 }}
|
||||
/>
|
||||
)}
|
||||
{hasPrice && range90d.low != null && (
|
||||
<ReferenceLine
|
||||
yAxisId="right"
|
||||
y={range90d.low}
|
||||
stroke="#ef4444"
|
||||
strokeDasharray="6 3"
|
||||
strokeWidth={1.5}
|
||||
label={{ value: `90d Low $${range90d.low.toFixed(2)}`, position: 'insideBottomRight', fill: '#ef4444', fontSize: 10 }}
|
||||
/>
|
||||
)}
|
||||
<Line yAxisId="left" type="monotone" dataKey="strength" name="Trend Strength" stroke="#3b82f6" strokeWidth={2} dot={{ r: 3, fill: '#3b82f6' }} activeDot={{ r: 5 }} />
|
||||
<Line yAxisId="left" type="monotone" dataKey="confidence" name="Confidence" stroke="#10b981" strokeWidth={2} dot={{ r: 3, fill: '#10b981' }} activeDot={{ r: 5 }} />
|
||||
<Line yAxisId="left" type="monotone" dataKey="contradiction" name="Contradiction" stroke="#f59e0b" strokeWidth={1.5} strokeDasharray="5 5" dot={{ r: 2, fill: '#f59e0b' }} />
|
||||
{hasPrice && (
|
||||
<Line yAxisId="right" type="monotone" dataKey="price" name="Price" stroke="#e879f9" strokeWidth={2} dot={{ r: 3, fill: '#e879f9' }} activeDot={{ r: 5 }} connectNulls />
|
||||
)}
|
||||
</LineChart>
|
||||
</ResponsiveContainer>
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Window selector */}
|
||||
@@ -787,87 +905,47 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices, selecte
|
||||
<>
|
||||
{/* Trend Strength & Confidence Chart */}
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">
|
||||
Trend Strength & Confidence — {ticker} / {selectedWindow}
|
||||
</h2>
|
||||
<ResponsiveContainer width="100%" height={280}>
|
||||
<LineChart data={chartData} margin={{ top: 5, right: 20, bottom: 40, left: 0 }}>
|
||||
<CartesianGrid strokeDasharray="3 3" stroke="#334155" />
|
||||
<XAxis
|
||||
dataKey="timestamp"
|
||||
type="number"
|
||||
domain={['dataMin', 'dataMax']}
|
||||
scale="time"
|
||||
tick={<ChartXTick />}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickCount={8}
|
||||
/>
|
||||
<YAxis
|
||||
yAxisId="left"
|
||||
domain={[0, 100]}
|
||||
tick={{ fill: '#94a3b8', fontSize: 11 }}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickFormatter={(v) => `${v}%`}
|
||||
/>
|
||||
{hasPrice && (
|
||||
<YAxis
|
||||
yAxisId="right"
|
||||
orientation="right"
|
||||
tick={{ fill: '#e879f9', fontSize: 11 }}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickFormatter={(v) => `$${v}`}
|
||||
domain={['dataMin - 2', 'dataMax + 2']}
|
||||
/>
|
||||
)}
|
||||
<Tooltip content={TrendTooltip} />
|
||||
<Legend wrapperStyle={{ color: '#94a3b8', fontSize: 12 }} />
|
||||
<Line
|
||||
yAxisId="left"
|
||||
type="monotone"
|
||||
dataKey="strength"
|
||||
name="Trend Strength"
|
||||
stroke="#3b82f6"
|
||||
strokeWidth={2}
|
||||
dot={{ r: 3, fill: '#3b82f6' }}
|
||||
activeDot={{ r: 5 }}
|
||||
/>
|
||||
<Line
|
||||
yAxisId="left"
|
||||
type="monotone"
|
||||
dataKey="confidence"
|
||||
name="Confidence"
|
||||
stroke="#10b981"
|
||||
strokeWidth={2}
|
||||
dot={{ r: 3, fill: '#10b981' }}
|
||||
activeDot={{ r: 5 }}
|
||||
/>
|
||||
<Line
|
||||
yAxisId="left"
|
||||
type="monotone"
|
||||
dataKey="contradiction"
|
||||
name="Contradiction"
|
||||
stroke="#f59e0b"
|
||||
strokeWidth={1.5}
|
||||
strokeDasharray="5 5"
|
||||
dot={{ r: 2, fill: '#f59e0b' }}
|
||||
/>
|
||||
{hasPrice && (
|
||||
<Line
|
||||
yAxisId="right"
|
||||
type="monotone"
|
||||
dataKey="price"
|
||||
name="Price"
|
||||
stroke="#e879f9"
|
||||
strokeWidth={2}
|
||||
dot={{ r: 3, fill: '#e879f9' }}
|
||||
activeDot={{ r: 5 }}
|
||||
connectNulls
|
||||
/>
|
||||
)}
|
||||
</LineChart>
|
||||
</ResponsiveContainer>
|
||||
<div className="mb-3 flex items-center justify-between">
|
||||
<h2 className="text-sm font-medium text-gray-400">
|
||||
Trend Strength & Confidence — {ticker} / {selectedWindow}
|
||||
</h2>
|
||||
<button
|
||||
onClick={() => setFullscreen(true)}
|
||||
className="rounded-md border border-surface-700 px-2 py-1 text-xs text-gray-400 hover:bg-surface-800 hover:text-gray-200"
|
||||
title="Expand chart"
|
||||
>
|
||||
⛶ Expand
|
||||
</button>
|
||||
</div>
|
||||
{chartContent(280)}
|
||||
</Card>
|
||||
|
||||
{/* Fullscreen overlay */}
|
||||
{fullscreen && (
|
||||
<div
|
||||
className="fixed inset-0 z-50 flex flex-col bg-surface-950/95 p-6"
|
||||
onClick={(e) => { if (e.target === e.currentTarget) setFullscreen(false); }}
|
||||
role="dialog"
|
||||
aria-label="Expanded chart"
|
||||
>
|
||||
<div className="mb-4 flex items-center justify-between">
|
||||
<h2 className="text-lg font-medium text-gray-200">
|
||||
Trend Strength & Confidence — {ticker} / {selectedWindow}
|
||||
</h2>
|
||||
<button
|
||||
onClick={() => setFullscreen(false)}
|
||||
className="rounded-md border border-surface-700 px-3 py-1.5 text-sm text-gray-400 hover:bg-surface-800 hover:text-gray-200"
|
||||
>
|
||||
✕ Close
|
||||
</button>
|
||||
</div>
|
||||
<div className="flex-1 min-h-0">
|
||||
{chartContent(Math.max(400, window.innerHeight - 160))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Direction Timeline */}
|
||||
{/* Direction Timeline */}
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">
|
||||
|
||||
@@ -1,9 +1,89 @@
|
||||
import { useState } from 'react';
|
||||
import { useModelPerformance, useModelFailures } from '../api/hooks';
|
||||
import {
|
||||
useModelPerformance,
|
||||
useModelFailures,
|
||||
useValidationSummary,
|
||||
useValidationCalibration,
|
||||
useValidationICByHorizon,
|
||||
useValidationGateStatus,
|
||||
useValidationAttributionSources,
|
||||
useValidationAttributionCatalysts,
|
||||
useValidationAttributionLayers,
|
||||
} from '../api/hooks';
|
||||
import type {
|
||||
ValidationSummary,
|
||||
ValidationCalibration,
|
||||
CalibrationBucket,
|
||||
ValidationICByHorizon,
|
||||
ICByHorizonEntry,
|
||||
ValidationGateStatus,
|
||||
SourceAttributionResponse,
|
||||
CatalystAttributionResponse,
|
||||
LayerAttributionResponse,
|
||||
SourceAttribution,
|
||||
CatalystAttribution,
|
||||
LayerAttribution,
|
||||
} from '../api/hooks';
|
||||
import { LoadingSpinner, DateRangeSelector, StatusBadge, Card } from '../components/ui';
|
||||
import { AlertTriangle, ShieldCheck, ShieldX } from 'lucide-react';
|
||||
|
||||
type Tab = 'extraction' | 'validation';
|
||||
|
||||
export function OpsModelPage() {
|
||||
const [hours, setHours] = useState(24);
|
||||
const [activeTab, setActiveTab] = useState<Tab>('extraction');
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
|
||||
{activeTab === 'extraction' && (
|
||||
<DateRangeSelector value={hours} onChange={setHours} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Tab bar */}
|
||||
<div className="flex border-b border-surface-700" role="tablist" aria-label="Model performance tabs">
|
||||
<button
|
||||
role="tab"
|
||||
aria-selected={activeTab === 'extraction'}
|
||||
onClick={() => setActiveTab('extraction')}
|
||||
className={`px-4 py-2 text-sm font-medium transition-colors ${
|
||||
activeTab === 'extraction'
|
||||
? 'border-b-2 border-brand-500 text-brand-400'
|
||||
: 'text-gray-400 hover:text-gray-200'
|
||||
}`}
|
||||
>
|
||||
Extraction Performance
|
||||
</button>
|
||||
<button
|
||||
role="tab"
|
||||
aria-selected={activeTab === 'validation'}
|
||||
onClick={() => setActiveTab('validation')}
|
||||
className={`px-4 py-2 text-sm font-medium transition-colors ${
|
||||
activeTab === 'validation'
|
||||
? 'border-b-2 border-brand-500 text-brand-400'
|
||||
: 'text-gray-400 hover:text-gray-200'
|
||||
}`}
|
||||
>
|
||||
Model Validation
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{activeTab === 'extraction' ? (
|
||||
<ExtractionTab hours={hours} />
|
||||
) : (
|
||||
<ValidationTab />
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Extraction Performance Tab (existing content) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function ExtractionTab({ hours }: { hours: number }) {
|
||||
const { data: perf, isLoading } = useModelPerformance(hours);
|
||||
const { data: failures } = useModelFailures(hours);
|
||||
|
||||
@@ -13,11 +93,6 @@ export function OpsModelPage() {
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
|
||||
<DateRangeSelector value={hours} onChange={setHours} />
|
||||
</div>
|
||||
|
||||
{/* Key metrics */}
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-5">
|
||||
<StatCard label="Total Extractions" value={String(p.total_extractions ?? '—')} />
|
||||
@@ -63,6 +138,482 @@ export function OpsModelPage() {
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Model Validation Tab (new) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function ValidationTab() {
|
||||
const { data: summary, isLoading: summaryLoading, error: summaryError } = useValidationSummary();
|
||||
const { data: calibration, isLoading: calLoading, error: calError } = useValidationCalibration();
|
||||
const { data: icData, isLoading: icLoading, error: icError } = useValidationICByHorizon();
|
||||
const { data: gateData, isLoading: gateLoading, error: gateError } = useValidationGateStatus();
|
||||
const { data: sourcesData, isLoading: srcLoading, error: srcError } = useValidationAttributionSources();
|
||||
const { data: catalystsData, isLoading: catLoading, error: catError } = useValidationAttributionCatalysts();
|
||||
const { data: layersData, isLoading: layLoading, error: layError } = useValidationAttributionLayers();
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Gate Status */}
|
||||
<GateStatusSection data={gateData} isLoading={gateLoading} error={gateError} />
|
||||
|
||||
{/* Summary Cards */}
|
||||
<SummaryCardsSection data={summary} isLoading={summaryLoading} error={summaryError} />
|
||||
|
||||
{/* Calibration Table */}
|
||||
<CalibrationTableSection data={calibration} isLoading={calLoading} error={calError} />
|
||||
|
||||
{/* IC by Horizon Table */}
|
||||
<ICByHorizonSection data={icData} isLoading={icLoading} error={icError} />
|
||||
|
||||
{/* Source Attribution Table */}
|
||||
<SourceAttributionSection data={sourcesData} isLoading={srcLoading} error={srcError} />
|
||||
|
||||
{/* Catalyst Attribution Table */}
|
||||
<CatalystAttributionSection data={catalystsData} isLoading={catLoading} error={catError} />
|
||||
|
||||
{/* Layer Attribution Table */}
|
||||
<LayerAttributionSection data={layersData} isLoading={layLoading} error={layError} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Gate Status Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function GateStatusSection({ data, isLoading, error }: {
|
||||
data: ValidationGateStatus | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load gate status" />;
|
||||
|
||||
const gate = data?.gate_status as Record<string, unknown> | null;
|
||||
if (!gate) {
|
||||
return (
|
||||
<Card className="flex items-center gap-3">
|
||||
<ShieldX size={20} className="text-yellow-400" />
|
||||
<div>
|
||||
<div className="text-sm font-medium text-yellow-400">Gate Status Unknown</div>
|
||||
<div className="text-xs text-gray-500">No gate evaluation data available</div>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
const passed = gate.passed as boolean | undefined;
|
||||
const reason = gate.reason as string | undefined;
|
||||
const thresholds = gate.threshold_results as Array<Record<string, unknown>> | undefined;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<div className="mb-3 flex items-center gap-3">
|
||||
{passed ? (
|
||||
<ShieldCheck size={20} className="text-green-400" />
|
||||
) : (
|
||||
<ShieldX size={20} className="text-red-400" />
|
||||
)}
|
||||
<div>
|
||||
<div className={`text-sm font-medium ${passed ? 'text-green-400' : 'text-red-400'}`}>
|
||||
Live Trading Gate: {passed ? 'PASS' : 'FAIL'}
|
||||
</div>
|
||||
{reason && <div className="text-xs text-gray-500">{reason}</div>}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{thresholds && thresholds.length > 0 && (
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Threshold</th>
|
||||
<th className="pb-2 pr-4 font-medium">Required</th>
|
||||
<th className="pb-2 pr-4 font-medium">Actual</th>
|
||||
<th className="pb-2 font-medium">Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{thresholds.map((t, i) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300">{String(t.name ?? '')}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-400">{fmtThreshold(t.threshold)}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtThreshold(t.actual)}</td>
|
||||
<td className="py-1.5">
|
||||
<StatusBadge status={t.passed ? 'success' : 'failed'} />
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)}
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Summary Cards Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function SummaryCardsSection({ data, isLoading, error }: {
|
||||
data: ValidationSummary | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load validation summary" />;
|
||||
|
||||
const snap = data?.snapshot;
|
||||
if (!snap) {
|
||||
return (
|
||||
<Card>
|
||||
<p className="text-sm text-gray-500">No validation data available yet. Metrics will appear once predictions have been evaluated.</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-3 lg:grid-cols-5">
|
||||
<StatCard label="Predictions" value={String(snap.prediction_count ?? '—')} />
|
||||
<StatCard
|
||||
label="Win Rate"
|
||||
value={fmtPct(snap.win_rate)}
|
||||
color={colorForRate(snap.win_rate, 0.53)}
|
||||
/>
|
||||
<StatCard
|
||||
label="Directional Accuracy"
|
||||
value={fmtPct(snap.directional_accuracy)}
|
||||
color={colorForRate(snap.directional_accuracy, 0.53)}
|
||||
/>
|
||||
<StatCard
|
||||
label="IC"
|
||||
value={fmtIC(snap.information_coefficient)}
|
||||
color={colorForIC(snap.information_coefficient)}
|
||||
/>
|
||||
<StatCard
|
||||
label="Rank IC"
|
||||
value={fmtIC(snap.rank_information_coefficient)}
|
||||
color={colorForIC(snap.rank_information_coefficient)}
|
||||
/>
|
||||
<StatCard
|
||||
label="Brier Score"
|
||||
value={snap.brier_score != null ? snap.brier_score.toFixed(4) : '—'}
|
||||
color={snap.brier_score != null && snap.brier_score < 0.25 ? 'text-green-400' : 'text-gray-100'}
|
||||
/>
|
||||
<StatCard
|
||||
label="ECE"
|
||||
value={snap.calibration_error != null ? snap.calibration_error.toFixed(4) : '—'}
|
||||
color={snap.calibration_error != null && snap.calibration_error < 0.15 ? 'text-green-400' : 'text-yellow-400'}
|
||||
/>
|
||||
<StatCard
|
||||
label="Excess vs SPY"
|
||||
value={fmtPct(snap.avg_excess_return_vs_spy)}
|
||||
color={snap.avg_excess_return_vs_spy != null && snap.avg_excess_return_vs_spy > 0 ? 'text-green-400' : 'text-red-400'}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Calibration Table Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function CalibrationTableSection({ data, isLoading, error }: {
|
||||
data: ValidationCalibration | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load calibration data" />;
|
||||
|
||||
const buckets = data?.buckets;
|
||||
if (!buckets || buckets.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Calibration</h2>
|
||||
<p className="text-sm text-gray-500">No calibration data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Calibration by Confidence Bucket</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Bucket</th>
|
||||
<th className="pb-2 pr-4 font-medium">Avg Confidence</th>
|
||||
<th className="pb-2 pr-4 font-medium">Observed Win Rate</th>
|
||||
<th className="pb-2 pr-4 font-medium">Count</th>
|
||||
<th className="pb-2 font-medium">Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{buckets.map((b: CalibrationBucket, i: number) => (
|
||||
<CalibrationRow key={i} bucket={b} />
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function CalibrationRow({ bucket }: { bucket: CalibrationBucket }) {
|
||||
const isMiscalibrated = bucket.miscalibrated ||
|
||||
Math.abs(bucket.avg_confidence - bucket.observed_win_rate) > 0.15;
|
||||
|
||||
return (
|
||||
<tr className={`border-b border-surface-800 ${isMiscalibrated ? 'bg-amber-900/20' : ''}`}>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">
|
||||
[{fmtPctShort(bucket.bucket_low)}, {fmtPctShort(bucket.bucket_high)})
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.avg_confidence)}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.observed_win_rate)}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-400">{bucket.prediction_count}</td>
|
||||
<td className="py-1.5">
|
||||
{isMiscalibrated ? (
|
||||
<span className="inline-flex items-center gap-1 text-amber-400">
|
||||
<AlertTriangle size={14} />
|
||||
<span>Miscalibrated</span>
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-green-400">OK</span>
|
||||
)}
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* IC by Horizon Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function ICByHorizonSection({ data, isLoading, error }: {
|
||||
data: ValidationICByHorizon | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load IC by horizon data" />;
|
||||
|
||||
const horizons = data?.horizons;
|
||||
if (!horizons || horizons.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">IC by Horizon</h2>
|
||||
<p className="text-sm text-gray-500">No IC data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Information Coefficient by Horizon</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Horizon</th>
|
||||
<th className="pb-2 pr-4 font-medium">IC</th>
|
||||
<th className="pb-2 pr-4 font-medium">Rank IC</th>
|
||||
<th className="pb-2 font-medium">Predictions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{horizons.map((h: ICByHorizonEntry, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{h.horizon}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForIC(h.information_coefficient)}`}>
|
||||
{fmtIC(h.information_coefficient)}
|
||||
</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForIC(h.rank_information_coefficient)}`}>
|
||||
{fmtIC(h.rank_information_coefficient)}
|
||||
</td>
|
||||
<td className="py-1.5 font-mono text-gray-400">{h.prediction_count}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Source Attribution Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function SourceAttributionSection({ data, isLoading, error }: {
|
||||
data: SourceAttributionResponse | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load source attribution data" />;
|
||||
|
||||
const sources = data?.sources;
|
||||
if (!sources || sources.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Source Performance</h2>
|
||||
<p className="text-sm text-gray-500">No source attribution data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Source Performance</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Source</th>
|
||||
<th className="pb-2 pr-4 font-medium">Win Rate</th>
|
||||
<th className="pb-2 pr-4 font-medium">IC</th>
|
||||
<th className="pb-2 pr-4 font-medium">Avg Return</th>
|
||||
<th className="pb-2 font-medium">Duplicate Rate</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sources.map((s: SourceAttribution, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300">{s.source}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForRate(s.win_rate, 0.53)}`}>
|
||||
{fmtPct(s.win_rate)}
|
||||
</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForIC(s.information_coefficient)}`}>
|
||||
{fmtIC(s.information_coefficient)}
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(s.avg_future_return)}</td>
|
||||
<td className="py-1.5 font-mono text-gray-300">{fmtPct(s.duplicate_rate)}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Catalyst Attribution Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function CatalystAttributionSection({ data, isLoading, error }: {
|
||||
data: CatalystAttributionResponse | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load catalyst attribution data" />;
|
||||
|
||||
const catalysts = data?.catalysts;
|
||||
if (!catalysts || catalysts.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
|
||||
<p className="text-sm text-gray-500">No catalyst attribution data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Catalyst Type</th>
|
||||
<th className="pb-2 pr-4 font-medium">Win Rate</th>
|
||||
<th className="pb-2 pr-4 font-medium">Avg Return</th>
|
||||
<th className="pb-2 font-medium">IC</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{catalysts.map((c: CatalystAttribution, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300">{c.catalyst_type}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForRate(c.win_rate, 0.53)}`}>
|
||||
{fmtPct(c.win_rate)}
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(c.avg_future_return)}</td>
|
||||
<td className={`py-1.5 font-mono ${colorForIC(c.information_coefficient)}`}>
|
||||
{fmtIC(c.information_coefficient)}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Layer Attribution Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function LayerAttributionSection({ data, isLoading, error }: {
|
||||
data: LayerAttributionResponse | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load layer attribution data" />;
|
||||
|
||||
const layers = data?.layers;
|
||||
if (!layers || layers.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Layer Attribution</h2>
|
||||
<p className="text-sm text-gray-500">No layer attribution data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Layer Attribution</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Layer</th>
|
||||
<th className="pb-2 pr-4 font-medium">Contribution %</th>
|
||||
<th className="pb-2 pr-4 font-medium">Dominant Win Rate</th>
|
||||
<th className="pb-2 font-medium">IC</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{layers.map((l: LayerAttribution, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300 capitalize">{l.layer}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(l.avg_contribution_pct)}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForRate(l.dominant_win_rate, 0.53)}`}>
|
||||
{fmtPct(l.dominant_win_rate)}
|
||||
</td>
|
||||
<td className={`py-1.5 font-mono ${colorForIC(l.dominant_ic)}`}>
|
||||
{fmtIC(l.dominant_ic)}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Shared helpers */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function StatCard({ label, value, color = 'text-gray-100' }: { label: string; value: string; color?: string }) {
|
||||
return (
|
||||
<Card className="text-center">
|
||||
@@ -71,3 +622,53 @@ function StatCard({ label, value, color = 'text-gray-100' }: { label: string; va
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function ErrorCard({ message }: { message: string }) {
|
||||
return (
|
||||
<Card className="border-red-700/50 bg-red-900/20">
|
||||
<p className="text-sm text-red-400">{message}</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/** Format a float as percentage with 1 decimal place, or '—' if null */
|
||||
function fmtPct(v: number | null | undefined): string {
|
||||
if (v == null) return '—';
|
||||
return `${(v * 100).toFixed(1)}%`;
|
||||
}
|
||||
|
||||
/** Format a float as short percentage (no decimal) for bucket display */
|
||||
function fmtPctShort(v: number | null | undefined): string {
|
||||
if (v == null) return '—';
|
||||
return `${(v * 100).toFixed(0)}%`;
|
||||
}
|
||||
|
||||
/** Format IC value with 4 decimal places, or '—' if null */
|
||||
function fmtIC(v: number | null | undefined): string {
|
||||
if (v == null) return '—';
|
||||
return v.toFixed(4);
|
||||
}
|
||||
|
||||
/** Format a threshold value for display */
|
||||
function fmtThreshold(v: unknown): string {
|
||||
if (v == null) return '—';
|
||||
if (typeof v === 'number') {
|
||||
if (Number.isInteger(v)) return String(v);
|
||||
return v.toFixed(4);
|
||||
}
|
||||
return String(v);
|
||||
}
|
||||
|
||||
/** Color for win rate / accuracy — green if above threshold, red otherwise */
|
||||
function colorForRate(v: number | null | undefined, threshold: number): string {
|
||||
if (v == null) return 'text-gray-100';
|
||||
return v >= threshold ? 'text-green-400' : 'text-red-400';
|
||||
}
|
||||
|
||||
/** Color for IC — green if positive, red if negative, gray if null */
|
||||
function colorForIC(v: number | null | undefined): string {
|
||||
if (v == null) return 'text-gray-400';
|
||||
if (v >= 0.03) return 'text-green-400';
|
||||
if (v > 0) return 'text-yellow-400';
|
||||
return 'text-red-400';
|
||||
}
|
||||
|
||||
@@ -1,13 +1,92 @@
|
||||
/**
|
||||
* Recommendation detail page with validation context.
|
||||
*
|
||||
* Shows original confidence alongside calibrated confidence (historical win rate),
|
||||
* evidence quality indicators, source reliability, and live eligibility status.
|
||||
*
|
||||
* Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7
|
||||
*/
|
||||
import { useParams, Link } from '@tanstack/react-router';
|
||||
import { useRecommendation } from '../api/hooks';
|
||||
import { AlertTriangle, ShieldCheck, ShieldX, Info } from 'lucide-react';
|
||||
import {
|
||||
useRecommendation,
|
||||
useValidationCalibration,
|
||||
useValidationGateStatus,
|
||||
useValidationAttributionSources,
|
||||
} from '../api/hooks';
|
||||
import { StatusBadge, ConfidenceBar, LoadingSpinner, Card } from '../components/ui';
|
||||
|
||||
export function RecommendationDetailPage() {
|
||||
const { id } = useParams({ from: '/recommendations/$id' });
|
||||
const { data: rec, isLoading } = useRecommendation(id);
|
||||
const { data: calibration } = useValidationCalibration();
|
||||
const { data: gateData } = useValidationGateStatus();
|
||||
const { data: sourcesData } = useValidationAttributionSources();
|
||||
|
||||
if (isLoading || !rec) return <LoadingSpinner />;
|
||||
|
||||
// --- Calibration: find the bucket matching this recommendation's confidence ---
|
||||
const matchingBucket = calibration?.buckets?.find(
|
||||
(b) => rec.confidence >= b.bucket_low && rec.confidence < b.bucket_high,
|
||||
);
|
||||
// Handle edge case: confidence of exactly 1.0 falls in the last bucket [0.90, 1.00]
|
||||
const calibratedBucket =
|
||||
matchingBucket ??
|
||||
(rec.confidence >= 1.0
|
||||
? calibration?.buckets?.find((b) => b.bucket_high >= 1.0)
|
||||
: undefined);
|
||||
|
||||
const historicalWinRate = calibratedBucket?.observed_win_rate;
|
||||
|
||||
// --- Evidence counts ---
|
||||
const totalEvidenceCount = rec.evidence.length;
|
||||
// Compute duplicate evidence: group by normalized title, count extras
|
||||
const titleCounts = new Map<string, number>();
|
||||
for (const ev of rec.evidence) {
|
||||
const key = (ev.title ?? '').toLowerCase().trim();
|
||||
titleCounts.set(key, (titleCounts.get(key) ?? 0) + 1);
|
||||
}
|
||||
let duplicateEvidenceCount = 0;
|
||||
for (const count of titleCounts.values()) {
|
||||
if (count > 1) duplicateEvidenceCount += count - 1;
|
||||
}
|
||||
const uniqueEvidenceCount = totalEvidenceCount - duplicateEvidenceCount;
|
||||
const duplicateRatio = totalEvidenceCount > 0 ? duplicateEvidenceCount / totalEvidenceCount : 0;
|
||||
const hasDuplicateWarning = duplicateRatio > 0.2;
|
||||
|
||||
// --- Source reliability: find primary contributing sources ---
|
||||
const evidenceSources = new Map<string, number>();
|
||||
for (const ev of rec.evidence) {
|
||||
const src = ev.source_type ?? ev.publisher ?? 'unknown';
|
||||
evidenceSources.set(src, (evidenceSources.get(src) ?? 0) + ev.weight);
|
||||
}
|
||||
// Sort by total weight descending to find primary source
|
||||
const sortedSources = [...evidenceSources.entries()].sort((a, b) => b[1] - a[1]);
|
||||
const primarySourceType = sortedSources[0]?.[0];
|
||||
|
||||
// Look up source reliability from attribution data
|
||||
const primarySourceAttribution = sourcesData?.sources?.find(
|
||||
(s) => s.source_type === primarySourceType || s.source === primarySourceType,
|
||||
);
|
||||
// Source reliability is approximated from win_rate via Bayesian shrinkage
|
||||
// The attribution data has win_rate which is the observed metric
|
||||
const primarySourceWinRate = primarySourceAttribution?.win_rate;
|
||||
// Bayesian shrinkage: reliability = 0.5 + (n/(n+30)) * (win_rate - 0.5)
|
||||
const primarySourceCount = primarySourceAttribution?.prediction_count ?? 0;
|
||||
const primarySourceReliability =
|
||||
primarySourceWinRate != null
|
||||
? 0.5 + (primarySourceCount / (primarySourceCount + 30)) * (primarySourceWinRate - 0.5)
|
||||
: undefined;
|
||||
const hasLowReliabilityWarning =
|
||||
primarySourceReliability != null && primarySourceReliability < 0.4;
|
||||
|
||||
// --- Gate status ---
|
||||
const gateStatus = gateData?.gate_status as {
|
||||
passed?: boolean;
|
||||
reason?: string;
|
||||
threshold_results?: Array<{ name: string; threshold: number; actual: number; passed: boolean }>;
|
||||
} | null;
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="flex items-center gap-3">
|
||||
@@ -28,6 +107,137 @@ export function RecommendationDetailPage() {
|
||||
</dl>
|
||||
</Card>
|
||||
|
||||
{/* Validation Context Card — Requirements 13.1–13.7 */}
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Validation Context</h2>
|
||||
<dl className="grid grid-cols-2 gap-x-8 gap-y-3 text-sm sm:grid-cols-3">
|
||||
{/* 13.1: Original confidence alongside calibrated confidence */}
|
||||
<div>
|
||||
<dt className="text-gray-500">Original Confidence</dt>
|
||||
<dd className="text-gray-200">{(rec.confidence * 100).toFixed(1)}%</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Calibrated Confidence</dt>
|
||||
<dd className="text-gray-200">
|
||||
{historicalWinRate != null
|
||||
? `${(historicalWinRate * 100).toFixed(1)}%`
|
||||
: 'N/A'}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.2: Historical win rate for similar confidence levels */}
|
||||
<div>
|
||||
<dt className="text-gray-500">Historical Win Rate</dt>
|
||||
<dd className="text-gray-200">
|
||||
{historicalWinRate != null ? (
|
||||
<span>
|
||||
{(historicalWinRate * 100).toFixed(1)}%
|
||||
{calibratedBucket && (
|
||||
<span className="ml-1 text-xs text-gray-500">
|
||||
({calibratedBucket.prediction_count} predictions)
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
) : (
|
||||
'N/A'
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.3: Evidence count, unique evidence count, duplicate evidence count */}
|
||||
<div>
|
||||
<dt className="text-gray-500">Evidence Count</dt>
|
||||
<dd className="text-gray-200">{totalEvidenceCount}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Unique Evidence</dt>
|
||||
<dd className="text-gray-200">{uniqueEvidenceCount}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="flex items-center gap-1 text-gray-500">
|
||||
Duplicate Evidence
|
||||
{/* 13.6: Warning badge when duplicate evidence count > 20% of total */}
|
||||
{hasDuplicateWarning && (
|
||||
<span
|
||||
className="inline-flex items-center gap-0.5 rounded-full border border-yellow-700/50 bg-yellow-900/40 px-1.5 py-0.5 text-[10px] font-medium text-yellow-400"
|
||||
title="Duplicate evidence exceeds 20% of total — potential evidence inflation"
|
||||
>
|
||||
<AlertTriangle size={10} />
|
||||
>20%
|
||||
</span>
|
||||
)}
|
||||
</dt>
|
||||
<dd className="text-gray-200">
|
||||
{duplicateEvidenceCount}
|
||||
{totalEvidenceCount > 0 && (
|
||||
<span className="ml-1 text-xs text-gray-500">
|
||||
({(duplicateRatio * 100).toFixed(0)}%)
|
||||
</span>
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.4: Source reliability indicator */}
|
||||
<div>
|
||||
<dt className="flex items-center gap-1 text-gray-500">
|
||||
Primary Source Reliability
|
||||
{/* 13.7: Warning badge when primary source reliability < 0.4 */}
|
||||
{hasLowReliabilityWarning && (
|
||||
<span
|
||||
className="inline-flex items-center gap-0.5 rounded-full border border-red-700/50 bg-red-900/40 px-1.5 py-0.5 text-[10px] font-medium text-red-400"
|
||||
title="Primary source reliability is below 0.4 — low or unknown reliability"
|
||||
>
|
||||
<AlertTriangle size={10} />
|
||||
Low
|
||||
</span>
|
||||
)}
|
||||
</dt>
|
||||
<dd className="text-gray-200">
|
||||
{primarySourceReliability != null ? (
|
||||
<span>
|
||||
{primarySourceReliability.toFixed(3)}
|
||||
{primarySourceType && (
|
||||
<span className="ml-1 text-xs text-gray-500">({primarySourceType})</span>
|
||||
)}
|
||||
</span>
|
||||
) : (
|
||||
'N/A'
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.5: Live eligibility status with reason */}
|
||||
<div className="col-span-2">
|
||||
<dt className="text-gray-500">Live Eligibility</dt>
|
||||
<dd>
|
||||
{gateStatus != null ? (
|
||||
<div className="flex items-center gap-2">
|
||||
{gateStatus.passed ? (
|
||||
<span className="inline-flex items-center gap-1 text-green-400">
|
||||
<ShieldCheck size={14} />
|
||||
Gate Passed
|
||||
</span>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1 text-red-400">
|
||||
<ShieldX size={14} />
|
||||
Gate Failed
|
||||
</span>
|
||||
)}
|
||||
{gateStatus.reason && (
|
||||
<span className="text-xs text-gray-500">{gateStatus.reason}</span>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1 text-gray-500">
|
||||
<Info size={14} />
|
||||
N/A — no gate evaluation available
|
||||
</span>
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
</dl>
|
||||
</Card>
|
||||
|
||||
{rec.thesis && (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Thesis</h2>
|
||||
|
||||
@@ -0,0 +1,275 @@
|
||||
import { useParams, Link } from '@tanstack/react-router';
|
||||
import { useReport } from '../api/hooks';
|
||||
import { LoadingSpinner, StatusBadge, Card } from '../components/ui';
|
||||
import { ArrowLeft } from 'lucide-react';
|
||||
|
||||
interface PLSection {
|
||||
realized_pnl: number;
|
||||
unrealized_pnl: number;
|
||||
daily_return: number;
|
||||
cumulative_return: number;
|
||||
win_count: number;
|
||||
loss_count: number;
|
||||
win_rate: number;
|
||||
profit_factor: number;
|
||||
sharpe_ratio: number;
|
||||
summary: string;
|
||||
validation_warnings?: { field_name: string; computed_value: number; snapshot_value: number; pct_difference: number }[];
|
||||
}
|
||||
|
||||
interface PositionDetail {
|
||||
ticker: string;
|
||||
entry_price: number;
|
||||
current_or_exit_price: number;
|
||||
pnl: number;
|
||||
pnl_pct: number;
|
||||
hold_duration_hours: number;
|
||||
status: string;
|
||||
}
|
||||
|
||||
interface RiskMetrics {
|
||||
current_risk_tier: string;
|
||||
portfolio_heat: number;
|
||||
max_drawdown: number;
|
||||
current_drawdown_pct: number;
|
||||
reserve_pool_balance: number;
|
||||
circuit_breaker_event_count: number;
|
||||
summary: string;
|
||||
}
|
||||
|
||||
interface ModelWindow {
|
||||
lookback: string;
|
||||
win_rate: number | null;
|
||||
directional_accuracy: number | null;
|
||||
information_coefficient: number | null;
|
||||
calibration_error: number | null;
|
||||
brier_score: number | null;
|
||||
}
|
||||
|
||||
interface ReportData {
|
||||
pnl: PLSection;
|
||||
recommendation_accuracy: {
|
||||
total_evaluated: number;
|
||||
act_count: number;
|
||||
skip_count: number;
|
||||
acted_win_rate: number;
|
||||
avg_confidence_acted: number;
|
||||
avg_confidence_skipped: number;
|
||||
summary: string;
|
||||
validation_warnings?: { field_name: string; pct_difference: number }[];
|
||||
};
|
||||
position_performance: {
|
||||
positions: PositionDetail[];
|
||||
summary: string;
|
||||
};
|
||||
risk_metrics: RiskMetrics;
|
||||
model_quality: {
|
||||
windows: ModelWindow[];
|
||||
summary: string;
|
||||
validation_warnings?: { field_name: string; pct_difference: number }[];
|
||||
};
|
||||
executive_summary: string;
|
||||
validation_status: string;
|
||||
}
|
||||
|
||||
function MetricCard({ label, value, sub }: { label: string; value: string; sub?: string }) {
|
||||
return (
|
||||
<div className="rounded-lg bg-surface-800 border border-surface-700 p-3">
|
||||
<div className="text-xs text-gray-400 mb-1">{label}</div>
|
||||
<div className="text-lg font-semibold text-gray-100">{value}</div>
|
||||
{sub && <div className="text-xs text-gray-500 mt-0.5">{sub}</div>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function pct(v: number) {
|
||||
return `${(v * 100).toFixed(2)}%`;
|
||||
}
|
||||
|
||||
function dollar(v: number) {
|
||||
return v >= 0 ? `$${v.toFixed(2)}` : `-$${Math.abs(v).toFixed(2)}`;
|
||||
}
|
||||
|
||||
export function ReportDetailPage() {
|
||||
const { id } = useParams({ from: '/reports/$id' });
|
||||
const { data, isLoading } = useReport(id);
|
||||
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (!data) return <div className="text-gray-400">Report not found</div>;
|
||||
|
||||
const report = data.report_data as unknown as ReportData;
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center gap-3">
|
||||
<Link to="/reports" className="text-gray-400 hover:text-gray-200">
|
||||
<ArrowLeft size={20} />
|
||||
</Link>
|
||||
<div>
|
||||
<h1 className="text-xl font-semibold text-gray-100">
|
||||
{data.report_type === 'daily' ? 'Daily' : 'Weekly'} Report
|
||||
</h1>
|
||||
<p className="text-sm text-gray-400">
|
||||
{data.period_start === data.period_end
|
||||
? data.period_start
|
||||
: `${data.period_start} → ${data.period_end}`}
|
||||
{' · '}
|
||||
<StatusBadge status={data.validation_status} />
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Executive Summary */}
|
||||
{report.executive_summary && (
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-2">Executive Summary</h2>
|
||||
<p className="text-sm text-gray-200 whitespace-pre-wrap leading-relaxed">
|
||||
{report.executive_summary}
|
||||
</p>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* P&L Section */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">P&L</h2>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-3 mb-3">
|
||||
<MetricCard label="Realized P&L" value={dollar(report.pnl.realized_pnl)} />
|
||||
<MetricCard label="Unrealized P&L" value={dollar(report.pnl.unrealized_pnl)} />
|
||||
<MetricCard label="Daily Return" value={pct(report.pnl.daily_return)} />
|
||||
<MetricCard label="Cumulative Return" value={pct(report.pnl.cumulative_return)} />
|
||||
<MetricCard label="Win Rate" value={pct(report.pnl.win_rate)} sub={`${report.pnl.win_count}W / ${report.pnl.loss_count}L`} />
|
||||
<MetricCard label="Profit Factor" value={report.pnl.profit_factor.toFixed(2)} />
|
||||
<MetricCard label="Sharpe Ratio" value={report.pnl.sharpe_ratio.toFixed(2)} />
|
||||
</div>
|
||||
{report.pnl.summary && (
|
||||
<p className="text-xs text-gray-400 mt-2">{report.pnl.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Recommendation Accuracy */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">Recommendation Accuracy</h2>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-3 mb-3">
|
||||
<MetricCard label="Total Evaluated" value={String(report.recommendation_accuracy.total_evaluated)} />
|
||||
<MetricCard label="Acted" value={String(report.recommendation_accuracy.act_count)} />
|
||||
<MetricCard label="Skipped" value={String(report.recommendation_accuracy.skip_count)} />
|
||||
<MetricCard label="Acted Win Rate" value={pct(report.recommendation_accuracy.acted_win_rate)} />
|
||||
<MetricCard label="Avg Confidence (Acted)" value={report.recommendation_accuracy.avg_confidence_acted.toFixed(3)} />
|
||||
<MetricCard label="Avg Confidence (Skipped)" value={report.recommendation_accuracy.avg_confidence_skipped.toFixed(3)} />
|
||||
</div>
|
||||
{report.recommendation_accuracy.validation_warnings && report.recommendation_accuracy.validation_warnings.length > 0 && (
|
||||
<div className="mt-2 rounded bg-yellow-900/20 border border-yellow-700/30 p-2">
|
||||
<span className="text-xs text-yellow-400">⚠ Validation warnings:</span>
|
||||
{report.recommendation_accuracy.validation_warnings.map((w, i) => (
|
||||
<span key={i} className="text-xs text-yellow-300 ml-2">{w.field_name} ({w.pct_difference.toFixed(1)}% off)</span>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
{report.recommendation_accuracy.summary && (
|
||||
<p className="text-xs text-gray-400 mt-2">{report.recommendation_accuracy.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Position Performance */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">
|
||||
Positions ({report.position_performance.positions.length})
|
||||
</h2>
|
||||
{report.position_performance.positions.length > 0 ? (
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="text-left text-xs text-gray-400 border-b border-surface-700">
|
||||
<th className="pb-2 pr-4">Ticker</th>
|
||||
<th className="pb-2 pr-4">Status</th>
|
||||
<th className="pb-2 pr-4">Entry</th>
|
||||
<th className="pb-2 pr-4">Current/Exit</th>
|
||||
<th className="pb-2 pr-4">P&L</th>
|
||||
<th className="pb-2 pr-4">P&L %</th>
|
||||
<th className="pb-2">Hold (hrs)</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{report.position_performance.positions.map((p, i) => (
|
||||
<tr key={i} className="border-b border-surface-800 text-gray-200">
|
||||
<td className="py-1.5 pr-4 font-mono font-semibold text-brand-300">{p.ticker}</td>
|
||||
<td className="py-1.5 pr-4"><StatusBadge status={p.status} /></td>
|
||||
<td className="py-1.5 pr-4">${p.entry_price.toFixed(2)}</td>
|
||||
<td className="py-1.5 pr-4">${p.current_or_exit_price.toFixed(2)}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${p.pnl >= 0 ? 'text-green-400' : 'text-red-400'}`}>
|
||||
{dollar(p.pnl)}
|
||||
</td>
|
||||
<td className={`py-1.5 pr-4 ${p.pnl_pct >= 0 ? 'text-green-400' : 'text-red-400'}`}>
|
||||
{p.pnl_pct.toFixed(2)}%
|
||||
</td>
|
||||
<td className="py-1.5 text-gray-400">{p.hold_duration_hours.toFixed(1)}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-gray-500">No positions during this period.</p>
|
||||
)}
|
||||
{report.position_performance.summary && (
|
||||
<p className="text-xs text-gray-400 mt-3">{report.position_performance.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Risk Metrics */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">Risk Metrics</h2>
|
||||
<div className="grid grid-cols-2 md:grid-cols-3 gap-3">
|
||||
<MetricCard label="Risk Tier" value={report.risk_metrics.current_risk_tier} />
|
||||
<MetricCard label="Portfolio Heat" value={pct(report.risk_metrics.portfolio_heat)} />
|
||||
<MetricCard label="Max Drawdown" value={pct(report.risk_metrics.max_drawdown)} />
|
||||
<MetricCard label="Current Drawdown" value={pct(report.risk_metrics.current_drawdown_pct)} />
|
||||
<MetricCard label="Reserve Pool" value={dollar(report.risk_metrics.reserve_pool_balance)} />
|
||||
<MetricCard label="Circuit Breaker Events" value={String(report.risk_metrics.circuit_breaker_event_count)} />
|
||||
</div>
|
||||
{report.risk_metrics.summary && (
|
||||
<p className="text-xs text-gray-400 mt-3">{report.risk_metrics.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Model Quality */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">Model Quality</h2>
|
||||
{report.model_quality.windows.length > 0 ? (
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="text-left text-xs text-gray-400 border-b border-surface-700">
|
||||
<th className="pb-2 pr-4">Window</th>
|
||||
<th className="pb-2 pr-4">Win Rate</th>
|
||||
<th className="pb-2 pr-4">Dir. Accuracy</th>
|
||||
<th className="pb-2 pr-4">IC</th>
|
||||
<th className="pb-2 pr-4">ECE</th>
|
||||
<th className="pb-2">Brier</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{report.model_quality.windows.map((w, i) => (
|
||||
<tr key={i} className="border-b border-surface-800 text-gray-200">
|
||||
<td className="py-1.5 pr-4 font-medium">{w.lookback}</td>
|
||||
<td className="py-1.5 pr-4">{w.win_rate != null ? pct(w.win_rate) : '—'}</td>
|
||||
<td className="py-1.5 pr-4">{w.directional_accuracy != null ? pct(w.directional_accuracy) : '—'}</td>
|
||||
<td className="py-1.5 pr-4">{w.information_coefficient != null ? w.information_coefficient.toFixed(4) : '—'}</td>
|
||||
<td className="py-1.5 pr-4">{w.calibration_error != null ? w.calibration_error.toFixed(4) : '—'}</td>
|
||||
<td className="py-1.5">{w.brier_score != null ? w.brier_score.toFixed(4) : '—'}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-gray-500">No model quality data available.</p>
|
||||
)}
|
||||
{report.model_quality.summary && (
|
||||
<p className="text-xs text-gray-400 mt-3">{report.model_quality.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
import { useState } from 'react';
|
||||
import { useNavigate } from '@tanstack/react-router';
|
||||
import { useReports } from '../api/hooks';
|
||||
import { DataTable, type Column } from '../components/DataTable';
|
||||
import { StatusBadge, LoadingSpinner } from '../components/ui';
|
||||
import type { ReportListItem } from '../api/hooks';
|
||||
|
||||
export function ReportsPage() {
|
||||
const navigate = useNavigate();
|
||||
const [reportType, setReportType] = useState('');
|
||||
const { data, isLoading } = useReports({
|
||||
report_type: reportType || undefined,
|
||||
limit: 50,
|
||||
});
|
||||
|
||||
const columns: Column<ReportListItem>[] = [
|
||||
{
|
||||
key: 'report_type',
|
||||
header: 'Type',
|
||||
render: (r) => (
|
||||
<span className="inline-flex items-center rounded px-2 py-0.5 text-xs font-medium bg-surface-700 text-brand-300 capitalize">
|
||||
{r.report_type}
|
||||
</span>
|
||||
),
|
||||
},
|
||||
{
|
||||
key: 'period_start',
|
||||
header: 'Period',
|
||||
render: (r) =>
|
||||
r.period_start === r.period_end
|
||||
? r.period_start
|
||||
: `${r.period_start} → ${r.period_end}`,
|
||||
},
|
||||
{
|
||||
key: 'validation_status',
|
||||
header: 'Validation',
|
||||
render: (r) => <StatusBadge status={r.validation_status} />,
|
||||
},
|
||||
{
|
||||
key: 'generated_at',
|
||||
header: 'Generated',
|
||||
render: (r) => (
|
||||
<span className="text-xs text-gray-400">
|
||||
{new Date(r.generated_at).toLocaleString()}
|
||||
</span>
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div className="mb-4 flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">
|
||||
Trading Reports
|
||||
</h1>
|
||||
<select
|
||||
value={reportType}
|
||||
onChange={(e) => setReportType(e.target.value)}
|
||||
className="rounded border border-surface-600 bg-surface-800 px-3 py-1.5 text-sm text-gray-200 focus:border-brand-500 focus:outline-none"
|
||||
aria-label="Filter by report type"
|
||||
>
|
||||
<option value="">All Types</option>
|
||||
<option value="daily">Daily</option>
|
||||
<option value="weekly">Weekly</option>
|
||||
</select>
|
||||
</div>
|
||||
<DataTable<ReportListItem>
|
||||
data={data ?? []}
|
||||
columns={columns}
|
||||
keyField="id"
|
||||
onRowClick={(row) =>
|
||||
navigate({ to: '/reports/$id', params: { id: row.id } })
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -116,7 +116,7 @@ export function TradingPage() {
|
||||
|
||||
{/* Paper Trading Reset */}
|
||||
<ResetCard
|
||||
onReset={() => resetTrading.mutate(0)}
|
||||
onReset={(params) => resetTrading.mutate(params)}
|
||||
isResetting={resetTrading.isPending}
|
||||
/>
|
||||
|
||||
@@ -490,26 +490,101 @@ function ApprovalRow({ approval, onReview }: {
|
||||
|
||||
|
||||
function ResetCard({ onReset, isResetting }: {
|
||||
onReset: () => void;
|
||||
onReset: (params: { initial_capital?: number; reserve_pct?: number }) => void;
|
||||
isResetting: boolean;
|
||||
}) {
|
||||
const [showConfirm, setShowConfirm] = useState(false);
|
||||
const [capitalInput, setCapitalInput] = useState('100000');
|
||||
const [reservePct, setReservePct] = useState(20);
|
||||
const [useCustomCapital, setUseCustomCapital] = useState(false);
|
||||
|
||||
const capital = parseFloat(capitalInput) || 0;
|
||||
const reserveAmount = capital * (reservePct / 100);
|
||||
const activeAmount = capital - reserveAmount;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Paper Trading Account</h2>
|
||||
|
||||
{/* Capital & Reserve Configuration */}
|
||||
<div className="mb-4 space-y-3 rounded-lg border border-surface-700 bg-surface-950 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
onClick={() => setUseCustomCapital(!useCustomCapital)}
|
||||
className={`relative inline-flex h-5 w-9 shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors focus:outline-none focus:ring-2 focus:ring-brand-500 focus:ring-offset-2 focus:ring-offset-surface-900 ${
|
||||
useCustomCapital ? 'bg-brand-600' : 'bg-surface-700'
|
||||
}`}
|
||||
role="switch"
|
||||
aria-checked={useCustomCapital}
|
||||
aria-label="Set custom initial capital"
|
||||
>
|
||||
<span className={`pointer-events-none inline-block h-4 w-4 rounded-full bg-white shadow transition-transform ${
|
||||
useCustomCapital ? 'translate-x-4' : 'translate-x-0'
|
||||
}`} />
|
||||
</button>
|
||||
<span className="text-sm text-gray-300">Set initial capital</span>
|
||||
<span className="text-[10px] text-gray-600">(otherwise uses broker account balance)</span>
|
||||
</div>
|
||||
|
||||
{useCustomCapital && (
|
||||
<div className="flex items-center gap-2">
|
||||
<label htmlFor="reset-capital" className="text-xs text-gray-500">Capital $</label>
|
||||
<input
|
||||
id="reset-capital"
|
||||
type="number"
|
||||
min={0}
|
||||
step={1000}
|
||||
value={capitalInput}
|
||||
onChange={(e) => setCapitalInput(e.target.value)}
|
||||
className="w-36 rounded-md border border-surface-700 bg-surface-900 px-2 py-1 text-sm font-mono text-gray-200"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div>
|
||||
<div className="flex items-center justify-between">
|
||||
<label htmlFor="reset-reserve" className="text-xs text-gray-500">
|
||||
Reserve pool: {reservePct}%
|
||||
</label>
|
||||
<span className="text-xs text-gray-600">
|
||||
Active: {100 - reservePct}%
|
||||
</span>
|
||||
</div>
|
||||
<input
|
||||
id="reset-reserve"
|
||||
type="range"
|
||||
min={0}
|
||||
max={50}
|
||||
step={5}
|
||||
value={reservePct}
|
||||
onChange={(e) => setReservePct(Number(e.target.value))}
|
||||
className="mt-1 w-full accent-brand-600"
|
||||
/>
|
||||
{useCustomCapital && capital > 0 && (
|
||||
<div className="mt-1 flex justify-between text-[10px] text-gray-600">
|
||||
<span>Reserve: ${reserveAmount.toLocaleString(undefined, { maximumFractionDigits: 0 })}</span>
|
||||
<span>Active: ${activeAmount.toLocaleString(undefined, { maximumFractionDigits: 0 })}</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Reset Button */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<p className="text-sm text-gray-300">Full Reset</p>
|
||||
<p className="text-[10px] text-gray-600">
|
||||
Liquidates all broker positions, cancels open orders, wipes local trading history,
|
||||
and syncs capital from the broker account.
|
||||
and sets capital from {useCustomCapital ? 'the amount above' : 'the broker account balance'}.
|
||||
</p>
|
||||
<p className="mt-1 text-[10px] text-gray-600 italic">
|
||||
Note: To reset the Alpaca paper account balance itself, use the Alpaca dashboard.
|
||||
</p>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => setShowConfirm(true)}
|
||||
disabled={isResetting}
|
||||
className="rounded-md border border-red-700/50 bg-red-900/20 px-3 py-1.5 text-sm font-medium text-red-400 hover:bg-red-900/40 disabled:opacity-50"
|
||||
className="shrink-0 rounded-md border border-red-700/50 bg-red-900/20 px-3 py-1.5 text-sm font-medium text-red-400 hover:bg-red-900/40 disabled:opacity-50"
|
||||
>
|
||||
Reset Everything
|
||||
</button>
|
||||
@@ -519,11 +594,20 @@ function ResetCard({ onReset, isResetting }: {
|
||||
<p className="text-sm text-red-300">
|
||||
This will <span className="font-semibold">permanently delete</span> all positions, orders,
|
||||
trading decisions, stop levels, portfolio snapshots, and backtest data.
|
||||
All broker positions will be liquidated and capital will be set from the broker's account balance.
|
||||
All broker positions will be liquidated.
|
||||
{useCustomCapital
|
||||
? ` Capital will be set to $${capital.toLocaleString()} (${reservePct}% reserve / ${100 - reservePct}% active).`
|
||||
: ` Capital will be set from the broker's account balance (${reservePct}% reserve / ${100 - reservePct}% active).`}
|
||||
</p>
|
||||
<div className="mt-3 flex gap-2">
|
||||
<button
|
||||
onClick={() => { onReset(); setShowConfirm(false); }}
|
||||
onClick={() => {
|
||||
onReset({
|
||||
initial_capital: useCustomCapital ? capital : undefined,
|
||||
reserve_pct: reservePct / 100,
|
||||
});
|
||||
setShowConfirm(false);
|
||||
}}
|
||||
disabled={isResetting}
|
||||
className="rounded-md bg-red-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-red-700 disabled:opacity-50"
|
||||
>
|
||||
|
||||
@@ -30,6 +30,8 @@ import { HomePage } from './pages/Home';
|
||||
import { GlobalEventsPage } from './pages/GlobalEvents';
|
||||
import { GlobalEventDetailPage } from './pages/GlobalEventDetail';
|
||||
import { AgentsPage } from './pages/Agents';
|
||||
import { ReportsPage } from './pages/Reports';
|
||||
import { ReportDetailPage } from './pages/ReportDetail';
|
||||
|
||||
// Root route wraps everything in the app shell layout
|
||||
const rootRoute = createRootRoute({
|
||||
@@ -167,6 +169,17 @@ const agentsRoute = createRoute({
|
||||
component: AgentsPage,
|
||||
});
|
||||
|
||||
const reportsRoute = createRoute({
|
||||
getParentRoute: () => rootRoute,
|
||||
path: '/reports',
|
||||
component: ReportsPage,
|
||||
});
|
||||
const reportDetailRoute = createRoute({
|
||||
getParentRoute: () => rootRoute,
|
||||
path: '/reports/$id',
|
||||
component: ReportDetailPage,
|
||||
});
|
||||
|
||||
const routeTree = rootRoute.addChildren([
|
||||
indexRoute,
|
||||
companiesRoute,
|
||||
@@ -192,6 +205,8 @@ const routeTree = rootRoute.addChildren([
|
||||
globalEventsRoute,
|
||||
globalEventDetailRoute,
|
||||
agentsRoute,
|
||||
reportsRoute,
|
||||
reportDetailRoute,
|
||||
]);
|
||||
|
||||
export const router = createRouter({ routeTree });
|
||||
|
||||
@@ -73,6 +73,97 @@ export const mockVariantPerfHistory = [
|
||||
{ hour: '2026-04-10T11:00:00Z', invocations: 12, successes: 11, avg_duration_ms: 1300, avg_confidence: 0.82 },
|
||||
];
|
||||
|
||||
// Validation: Model Quality & Calibration mock data
|
||||
export const mockValidationSummary = {
|
||||
snapshot: {
|
||||
id: 'ms-1',
|
||||
generated_at: '2026-04-11T12:00:00Z',
|
||||
lookback_window: '30d',
|
||||
horizon: '7d',
|
||||
prediction_count: 150,
|
||||
win_rate: 0.58,
|
||||
directional_accuracy: 0.56,
|
||||
information_coefficient: 0.045,
|
||||
rank_information_coefficient: 0.038,
|
||||
avg_return: 0.012,
|
||||
avg_excess_return_vs_spy: 0.003,
|
||||
avg_excess_return_vs_sector: 0.002,
|
||||
calibration_error: 0.08,
|
||||
brier_score: 0.21,
|
||||
buy_win_rate: 0.61,
|
||||
sell_win_rate: 0.54,
|
||||
hold_win_rate: 0.50,
|
||||
metadata: {},
|
||||
},
|
||||
gate_status: {
|
||||
passed: true,
|
||||
reason: 'all thresholds met',
|
||||
threshold_results: [
|
||||
{ name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
|
||||
{ name: 'min_ic', threshold: 0.03, actual: 0.045, passed: true },
|
||||
{ name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
export const mockValidationCalibration = {
|
||||
buckets: [
|
||||
{ bucket_low: 0.50, bucket_high: 0.60, avg_confidence: 0.55, observed_win_rate: 0.52, prediction_count: 30, miscalibrated: false },
|
||||
{ bucket_low: 0.60, bucket_high: 0.70, avg_confidence: 0.65, observed_win_rate: 0.58, prediction_count: 40, miscalibrated: false },
|
||||
{ bucket_low: 0.70, bucket_high: 0.80, avg_confidence: 0.75, observed_win_rate: 0.55, prediction_count: 35, miscalibrated: true },
|
||||
{ bucket_low: 0.80, bucket_high: 0.90, avg_confidence: 0.85, observed_win_rate: 0.70, prediction_count: 25, miscalibrated: false },
|
||||
{ bucket_low: 0.90, bucket_high: 1.00, avg_confidence: 0.95, observed_win_rate: 0.72, prediction_count: 20, miscalibrated: true },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const mockValidationGateStatus = {
|
||||
gate_status: {
|
||||
passed: false,
|
||||
reason: 'failed: min_ic below threshold',
|
||||
threshold_results: [
|
||||
{ name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
|
||||
{ name: 'min_ic', threshold: 0.03, actual: 0.02, passed: false },
|
||||
{ name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
export const mockValidationICByHorizon = {
|
||||
horizons: [
|
||||
{ horizon: '1h', information_coefficient: 0.02, rank_information_coefficient: 0.015, prediction_count: 120, generated_at: '2026-04-11T12:00:00Z' },
|
||||
{ horizon: '7d', information_coefficient: 0.045, rank_information_coefficient: 0.038, prediction_count: 100, generated_at: '2026-04-11T12:00:00Z' },
|
||||
],
|
||||
lookback: '30d',
|
||||
};
|
||||
|
||||
export const mockValidationAttributionSources = {
|
||||
sources: [
|
||||
{ source: 'Reuters', source_type: 'news_api', prediction_count: 50, avg_weight: 0.6, avg_contribution_score: 0.3, win_rate: 0.62, avg_future_return: 0.015, avg_excess_return_vs_spy: 0.005, information_coefficient: 0.05, duplicate_rate: 0.1 },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const mockValidationAttributionCatalysts = {
|
||||
catalysts: [
|
||||
{ catalyst_type: 'earnings', prediction_count: 40, win_rate: 0.65, avg_future_return: 0.02, avg_excess_return_vs_spy: 0.008, information_coefficient: 0.06 },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const mockValidationAttributionLayers = {
|
||||
layers: [
|
||||
{ layer: 'company', avg_contribution_pct: 0.55, dominant_win_rate: 0.60, dominant_ic: 0.04 },
|
||||
{ layer: 'macro', avg_contribution_pct: 0.30, dominant_win_rate: 0.52, dominant_ic: 0.02 },
|
||||
{ layer: 'competitive', avg_contribution_pct: 0.15, dominant_win_rate: 0.48, dominant_ic: null },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const handlers = [
|
||||
// Query API (proxied at /api/)
|
||||
http.get('/api/companies', () => HttpResponse.json(mockCompanies)),
|
||||
@@ -242,4 +333,24 @@ export const handlers = [
|
||||
const body = await request.json() as Record<string, unknown>;
|
||||
return HttpResponse.json({ enabled: body.enabled, previous_enabled: true, toggled_by: 'operator' });
|
||||
}),
|
||||
|
||||
// Trading Reports
|
||||
http.get('/api/reports', () => HttpResponse.json([
|
||||
{ id: 'rpt-1', report_type: 'daily', period_start: '2025-01-15', period_end: '2025-01-15', validation_status: 'passed', generated_at: '2025-01-15T21:30:00Z' },
|
||||
])),
|
||||
http.get('/api/reports/:id', ({ params }) => {
|
||||
if (params.id === 'rpt-1') {
|
||||
return HttpResponse.json({ id: 'rpt-1', report_type: 'daily', period_start: '2025-01-15', period_end: '2025-01-15', report_data: { pnl: { realized_pnl: 125.5 }, executive_summary: 'Test' }, validation_status: 'passed', generated_at: '2025-01-15T21:30:00Z', created_at: '2025-01-15T21:30:05Z' });
|
||||
}
|
||||
return new HttpResponse(null, { status: 404 });
|
||||
}),
|
||||
|
||||
// Validation: Model Quality & Calibration endpoints
|
||||
http.get('/api/validation/summary', () => HttpResponse.json(mockValidationSummary)),
|
||||
http.get('/api/validation/calibration', () => HttpResponse.json(mockValidationCalibration)),
|
||||
http.get('/api/validation/gate-status', () => HttpResponse.json(mockValidationGateStatus)),
|
||||
http.get('/api/validation/ic-by-horizon', () => HttpResponse.json(mockValidationICByHorizon)),
|
||||
http.get('/api/validation/attribution/sources', () => HttpResponse.json(mockValidationAttributionSources)),
|
||||
http.get('/api/validation/attribution/catalysts', () => HttpResponse.json(mockValidationAttributionCatalysts)),
|
||||
http.get('/api/validation/attribution/layers', () => HttpResponse.json(mockValidationAttributionLayers)),
|
||||
];
|
||||
|
||||
@@ -169,6 +169,55 @@ describe('Global Events page', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('OpsModel validation tab', () => {
|
||||
it('renders Model Validation tab with summary cards', async () => {
|
||||
renderRoute('/ops/model');
|
||||
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
|
||||
|
||||
// The tab buttons should be present
|
||||
expect(screen.getByText('Extraction Performance')).toBeInTheDocument();
|
||||
expect(screen.getByText('Model Validation')).toBeInTheDocument();
|
||||
|
||||
// Click the Model Validation tab button
|
||||
await userEvent.click(screen.getByText('Model Validation'));
|
||||
|
||||
// Summary cards should render key metric labels unique to the validation summary
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('Brier Score')).toBeInTheDocument();
|
||||
expect(screen.getByText('ECE')).toBeInTheDocument();
|
||||
expect(screen.getByText('Directional Accuracy')).toBeInTheDocument();
|
||||
expect(screen.getByText('Excess vs SPY')).toBeInTheDocument();
|
||||
});
|
||||
}, 10000);
|
||||
|
||||
it('renders calibration table with miscalibration warning', async () => {
|
||||
renderRoute('/ops/model');
|
||||
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
|
||||
|
||||
await userEvent.click(screen.getByText('Model Validation'));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('Calibration by Confidence Bucket')).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Miscalibrated buckets should show warning text
|
||||
const miscalWarnings = screen.getAllByText('Miscalibrated');
|
||||
expect(miscalWarnings.length).toBeGreaterThanOrEqual(1);
|
||||
}, 10000);
|
||||
|
||||
it('renders gate status pass/fail indicator', async () => {
|
||||
renderRoute('/ops/model');
|
||||
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
|
||||
|
||||
await userEvent.click(screen.getByText('Model Validation'));
|
||||
|
||||
// The gate-status endpoint returns passed: false
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/Live Trading Gate: FAIL/)).toBeInTheDocument();
|
||||
});
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe('Agents page', () => {
|
||||
it('renders agent list in sidebar', async () => {
|
||||
renderRoute('/agents');
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Frontend hook tests for trading reports.
|
||||
*
|
||||
* Tests useReports and useReport hooks with MSW mocks.
|
||||
* Requirements validated: 5.4, 5.5
|
||||
*/
|
||||
import { renderHook, waitFor } from '@testing-library/react';
|
||||
import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
|
||||
import { http, HttpResponse } from 'msw';
|
||||
import { type ReactNode, createElement } from 'react';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { useReports, useReport } from '../api/hooks';
|
||||
import { server } from './mocks/server';
|
||||
|
||||
const mockReportList = [
|
||||
{
|
||||
id: 'rpt-1',
|
||||
report_type: 'daily',
|
||||
period_start: '2025-01-15',
|
||||
period_end: '2025-01-15',
|
||||
validation_status: 'passed',
|
||||
generated_at: '2025-01-15T21:30:00Z',
|
||||
},
|
||||
{
|
||||
id: 'rpt-2',
|
||||
report_type: 'weekly',
|
||||
period_start: '2025-01-13',
|
||||
period_end: '2025-01-17',
|
||||
validation_status: 'warnings',
|
||||
generated_at: '2025-01-18T10:00:00Z',
|
||||
},
|
||||
];
|
||||
|
||||
const mockReportDetail = {
|
||||
id: 'rpt-1',
|
||||
report_type: 'daily',
|
||||
period_start: '2025-01-15',
|
||||
period_end: '2025-01-15',
|
||||
validation_status: 'passed',
|
||||
generated_at: '2025-01-15T21:30:00Z',
|
||||
created_at: '2025-01-15T21:30:05Z',
|
||||
report_data: {
|
||||
pnl: { realized_pnl: 125.5, unrealized_pnl: -30.2 },
|
||||
executive_summary: 'Test executive summary',
|
||||
},
|
||||
};
|
||||
|
||||
function createWrapper() {
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: { retry: false, gcTime: 0 },
|
||||
},
|
||||
});
|
||||
return function Wrapper({ children }: { children: ReactNode }) {
|
||||
return createElement(QueryClientProvider, { client: queryClient }, children);
|
||||
};
|
||||
}
|
||||
|
||||
describe('useReports', () => {
|
||||
it('fetches report list with default params', async () => {
|
||||
server.use(
|
||||
http.get('/api/reports', () => HttpResponse.json(mockReportList)),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useReports(), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.isSuccess).toBe(true));
|
||||
|
||||
expect(result.current.data).toHaveLength(2);
|
||||
expect(result.current.data![0].id).toBe('rpt-1');
|
||||
expect(result.current.data![0].report_type).toBe('daily');
|
||||
expect(result.current.data![1].report_type).toBe('weekly');
|
||||
});
|
||||
|
||||
it('passes query params for filtering', async () => {
|
||||
let capturedUrl = '';
|
||||
server.use(
|
||||
http.get('/api/reports', ({ request }) => {
|
||||
capturedUrl = request.url;
|
||||
return HttpResponse.json([mockReportList[0]]);
|
||||
}),
|
||||
);
|
||||
|
||||
const { result } = renderHook(
|
||||
() => useReports({ report_type: 'daily', limit: 10 }),
|
||||
{ wrapper: createWrapper() },
|
||||
);
|
||||
|
||||
await waitFor(() => expect(result.current.isSuccess).toBe(true));
|
||||
|
||||
expect(capturedUrl).toContain('report_type=daily');
|
||||
expect(capturedUrl).toContain('limit=10');
|
||||
expect(result.current.data).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('handles error state', async () => {
|
||||
server.use(
|
||||
http.get('/api/reports', () =>
|
||||
new HttpResponse(null, { status: 500 }),
|
||||
),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useReports(), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.isError).toBe(true));
|
||||
});
|
||||
});
|
||||
|
||||
describe('useReport', () => {
|
||||
it('fetches single report by id', async () => {
|
||||
server.use(
|
||||
http.get('/api/reports/rpt-1', () =>
|
||||
HttpResponse.json(mockReportDetail),
|
||||
),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useReport('rpt-1'), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.isSuccess).toBe(true));
|
||||
|
||||
expect(result.current.data!.id).toBe('rpt-1');
|
||||
expect(result.current.data!.report_data).toBeDefined();
|
||||
expect(result.current.data!.report_data.pnl).toBeDefined();
|
||||
expect(result.current.data!.created_at).toBe('2025-01-15T21:30:05Z');
|
||||
});
|
||||
|
||||
it('does not fetch when id is undefined', async () => {
|
||||
const { result } = renderHook(() => useReport(undefined), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
// Should stay in idle/loading state without fetching
|
||||
expect(result.current.isFetching).toBe(false);
|
||||
});
|
||||
|
||||
it('handles 404 error', async () => {
|
||||
server.use(
|
||||
http.get('/api/reports/nonexistent', () =>
|
||||
new HttpResponse(null, { status: 404 }),
|
||||
),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useReport('nonexistent'), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.isError).toBe(true));
|
||||
});
|
||||
});
|
||||
@@ -90,6 +90,25 @@ spec:
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: backfill-market-data
|
||||
image: {{ $root.Values.image.registry }}/{{ $svc.image }}:{{ $root.Values.image.tag }}
|
||||
imagePullPolicy: {{ $root.Values.image.pullPolicy }}
|
||||
command: ["sh", "-c", "python /app/scripts/backfill_market_data.py 2>/dev/null || echo 'Backfill script not available — skipping'"]
|
||||
securityContext:
|
||||
{{- include "stonks.containerSecurityContext" $root | nindent 12 }}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: stonks-config
|
||||
{{- range $svc.secrets }}
|
||||
- secretRef:
|
||||
name: {{ . }}
|
||||
{{- end }}
|
||||
resources:
|
||||
requests: { cpu: 50m, memory: 64Mi }
|
||||
limits: { cpu: 200m, memory: 256Mi }
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ $svc.image }}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
## Live-Math stage overrides
|
||||
## Helm merges these with the base values.yaml.
|
||||
## Runs the dual-pipeline signal engine with its own paper trading account
|
||||
## for validating the math-upgraded signal pipeline in production conditions.
|
||||
|
||||
## Image tag — overridden by Kargo during promotion
|
||||
image:
|
||||
tag: latest
|
||||
|
||||
## Config overrides: paper broker (separate account), dedicated DB/Redis namespace
|
||||
config:
|
||||
BROKER_MODE: "paper"
|
||||
BROKER_PROVIDER: "alpaca"
|
||||
LOG_LEVEL: "INFO"
|
||||
TRADING_ENABLED: "true"
|
||||
POSTGRES_DB: "stonks_live_math"
|
||||
REDIS_DB: "3"
|
||||
DEPLOY_STAGE: "live-math"
|
||||
POSTGRES_USER: "stonks_live_math"
|
||||
OLLAMA_BASE_URL: "http://10.1.1.12:2701"
|
||||
MARKET_DATA_BASE_URL: "https://api.polygon.io"
|
||||
|
||||
## Secrets — dedicated paper trading account for live-math
|
||||
secrets:
|
||||
core:
|
||||
POSTGRES_PASSWORD: "St0nks0racl3!"
|
||||
MINIO_ACCESS_KEY: "AKIA6V7J3N9B5P0D2YQH"
|
||||
MINIO_SECRET_KEY: "8fG3!v2rJ7$wN@9mLpQ6zXbC4tKdPqW1"
|
||||
REDIS_PASSWORD: "PSCh4ng3me!"
|
||||
broker:
|
||||
BROKER_API_KEY: "PK64RS7NH24XPBI3IDEU3BB72Y"
|
||||
BROKER_API_SECRET: "Ho4D84392vB4s2TkGi52ra5FcxEskGfJSZYRKHa3qrYq"
|
||||
BROKER_BASE_URL: "https://paper-api.alpaca.markets"
|
||||
market:
|
||||
MARKET_DATA_API_KEY: "NPwKtrLvoBxcKt3Byp5PEvuZiBZU_d8E"
|
||||
|
||||
## Live-math-specific ingress hostnames
|
||||
ingress:
|
||||
hosts:
|
||||
queryApi: stonks-math-api.celestium.life
|
||||
symbolRegistry: stonks-math-registry.celestium.life
|
||||
dashboard: stonks-math.celestium.life
|
||||
superset: stonks-math-dash.celestium.life
|
||||
trino: stonks-math-trino.celestium.life
|
||||
tradingEngine: stonks-math-trading.celestium.life
|
||||
|
||||
## Scale: same as production (single replicas for most services)
|
||||
services:
|
||||
extractor:
|
||||
replicas: 1
|
||||
@@ -37,7 +37,7 @@ services:
|
||||
liveness: { path: /docs, port: 8000, initialDelay: 10, period: 30 }
|
||||
|
||||
ingestion:
|
||||
replicas: 2
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: ingestion
|
||||
command: "python -m services.ingestion.worker"
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
limits: { cpu: 500m, memory: 256Mi }
|
||||
|
||||
extractor:
|
||||
replicas: 8
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: extractor
|
||||
command: "python -m services.extractor.main"
|
||||
@@ -127,6 +127,17 @@ services:
|
||||
requests: { cpu: 50m, memory: 64Mi }
|
||||
limits: { cpu: 200m, memory: 128Mi }
|
||||
|
||||
signalEngine:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: signal-engine
|
||||
command: "python -m services.signal_engine.main"
|
||||
tier: processing
|
||||
secrets: [stonks-core-secrets, stonks-market-secrets]
|
||||
resources:
|
||||
requests: { cpu: 100m, memory: 128Mi }
|
||||
limits: { cpu: 500m, memory: 256Mi }
|
||||
|
||||
lakePublisher:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
@@ -144,7 +155,7 @@ services:
|
||||
command: "uvicorn services.api.app:app --host 0.0.0.0 --port 8000"
|
||||
tier: api
|
||||
port: 8000
|
||||
secrets: [stonks-core-secrets]
|
||||
secrets: [stonks-core-secrets, stonks-market-secrets]
|
||||
resources:
|
||||
requests: { cpu: 100m, memory: 128Mi }
|
||||
limits: { cpu: 500m, memory: 256Mi }
|
||||
@@ -181,8 +192,8 @@ config:
|
||||
OLLAMA_RETRY_BASE_DELAY: "1.0"
|
||||
OLLAMA_RETRY_MAX_DELAY: "10.0"
|
||||
OLLAMA_RETRY_BACKOFF_MULTIPLIER: "2.0"
|
||||
VLLM_BASE_URL: "http://192.168.42.254:8000"
|
||||
VLLM_MODEL: "AxionML/Qwen3.5-9B-NVFP4"
|
||||
VLLM_BASE_URL: "http://10.1.1.12:2701"
|
||||
VLLM_MODEL: "qwen3.5:9b-fast"
|
||||
VLLM_TIMEOUT: "120"
|
||||
VLLM_MAX_RETRIES: "2"
|
||||
VLLM_TEMPERATURE: "0.7"
|
||||
@@ -194,7 +205,7 @@ config:
|
||||
TRINO_ICEBERG_CATALOG: "iceberg"
|
||||
BROKER_MODE: "paper"
|
||||
BROKER_PROVIDER: ""
|
||||
MARKET_DATA_BASE_URL: ""
|
||||
MARKET_DATA_BASE_URL: "https://api.polygon.io"
|
||||
MARKET_DATA_PROVIDER: "polygon"
|
||||
RETENTION_RAW_MARKET_DAYS: "90"
|
||||
RETENTION_RAW_NEWS_DAYS: "180"
|
||||
@@ -231,11 +242,11 @@ secrets:
|
||||
MINIO_SECRET_KEY: "8fG3!v2rJ7$wN@9mLpQ6zXbC4tKdPqW1"
|
||||
REDIS_PASSWORD: "PSCh4ng3me!"
|
||||
broker:
|
||||
BROKER_API_KEY: ""
|
||||
BROKER_API_SECRET: ""
|
||||
BROKER_BASE_URL: ""
|
||||
BROKER_API_KEY: "PKECQBNHD6ZLKEXZZVIFTOLX72"
|
||||
BROKER_API_SECRET: "5pV8zfUn92zAUL4TAwFor3Lk8RqNBcRzN12Y1HJjU7Gn"
|
||||
BROKER_BASE_URL: "https://paper-api.alpaca.markets"
|
||||
market:
|
||||
MARKET_DATA_API_KEY: ""
|
||||
MARKET_DATA_API_KEY: "NPwKtrLvoBxcKt3Byp5PEvuZiBZU_d8E"
|
||||
gmail:
|
||||
GMAIL_SENDER: "celes@celestium.life"
|
||||
GMAIL_RECIPIENT: "celes@celestium.life"
|
||||
|
||||
@@ -7,8 +7,8 @@ CREATE TABLE IF NOT EXISTS ai_agents (
|
||||
name VARCHAR(100) NOT NULL UNIQUE,
|
||||
slug VARCHAR(100) NOT NULL UNIQUE,
|
||||
purpose TEXT NOT NULL DEFAULT '',
|
||||
model_provider VARCHAR(50) NOT NULL DEFAULT 'vllm',
|
||||
model_name VARCHAR(200) NOT NULL DEFAULT 'qwen3.5:9b',
|
||||
model_provider VARCHAR(50) NOT NULL DEFAULT 'ollama',
|
||||
model_name VARCHAR(200) NOT NULL DEFAULT 'qwen3.5:9b-fast',
|
||||
system_prompt TEXT NOT NULL DEFAULT '',
|
||||
user_prompt_template TEXT NOT NULL DEFAULT '',
|
||||
prompt_version VARCHAR(100) NOT NULL DEFAULT '',
|
||||
@@ -37,8 +37,8 @@ SELECT * FROM (VALUES
|
||||
'Document Intelligence Extractor',
|
||||
'document-extractor',
|
||||
'Extracts structured intelligence (sentiment, catalysts, impact scores, key facts, risks) from company news, SEC filings, earnings transcripts, and press releases.',
|
||||
'vllm',
|
||||
'AxionML/Qwen3.5-9B-NVFP4',
|
||||
'ollama',
|
||||
'qwen3.5:9b-fast',
|
||||
E'You are a financial document analyst. Extract structured data as JSON. Return ONLY a single JSON object. No markdown fences, no explanation, no text before or after the JSON. Every field in the schema is required. Use "other" for catalyst_type if unsure. Keep evidence_spans short (under 20 words each). Keep key_facts to 3-5 items max.',
|
||||
'document-intel-v2',
|
||||
'2.0.0',
|
||||
@@ -48,8 +48,8 @@ SELECT * FROM (VALUES
|
||||
'Global Event Classifier',
|
||||
'event-classifier',
|
||||
'Classifies global/geopolitical news into structured macro events with impact type, severity, affected regions/sectors/commodities, and estimated duration.',
|
||||
'vllm',
|
||||
'AxionML/Qwen3.5-9B-NVFP4',
|
||||
'ollama',
|
||||
'qwen3.5:9b-fast',
|
||||
E'You classify MACRO-LEVEL global news into structured event JSON. Return ONLY a single JSON object. No markdown, no explanation. Every field is required. Keep key_facts to 3-5 items. Keep summary under 3 sentences.\n\nCRITICAL: Only classify articles about MACRO events that affect entire markets, sectors, or economies. Examples: trade wars, interest rate changes, commodity supply disruptions, regulatory changes, geopolitical conflicts, natural disasters.\n\nDO NOT classify as macro events: individual company earnings, lawsuits against a single company, single-company management changes, individual stock analysis, company-specific debt or bankruptcy, product launches by one company. For these, set severity to "low", confidence below 0.3, and leave affected_regions, affected_sectors, and affected_commodities as empty arrays.',
|
||||
'event-classification-v1',
|
||||
'1.0.0',
|
||||
@@ -59,8 +59,8 @@ SELECT * FROM (VALUES
|
||||
'Thesis Rewriter',
|
||||
'thesis-rewriter',
|
||||
'Rewrites deterministic trade thesis summaries into clear, professional analyst prose. Optional layer — system falls back to deterministic thesis if this fails.',
|
||||
'vllm',
|
||||
'AxionML/Qwen3.5-9B-NVFP4',
|
||||
'ollama',
|
||||
'qwen3.5:9b-fast',
|
||||
E'You are a concise financial analyst. You rewrite structured trade thesis summaries into clear, professional prose suitable for an internal research note.\n\nSTRICT RULES:\n1. Do NOT add any information that is not present in the input.\n2. Do NOT fabricate numbers, dates, company names, or analyst opinions.\n3. Keep the rewrite under 150 words.\n4. Preserve all factual claims, risk notes, and evidence counts from the input.\n5. Use a neutral, professional tone. Avoid hype or marketing language.\n6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary.',
|
||||
'thesis-rewrite-v1',
|
||||
'1.0.0',
|
||||
|
||||
@@ -0,0 +1,176 @@
|
||||
-- Migration 035: Model Validation, Calibration, and Signal Quality
|
||||
-- Creates tables for prediction snapshots, outcomes, evidence links, and metric snapshots
|
||||
-- Plus views for prediction performance and source performance analysis
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: prediction_snapshots
|
||||
-- Immutable snapshot of a prediction at generation time
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS prediction_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
ticker VARCHAR(20) NOT NULL,
|
||||
"window" VARCHAR(20) NOT NULL,
|
||||
horizon VARCHAR(50) NOT NULL,
|
||||
direction VARCHAR(20) NOT NULL,
|
||||
action VARCHAR(20) NOT NULL,
|
||||
mode VARCHAR(50) NOT NULL,
|
||||
strength FLOAT NOT NULL,
|
||||
confidence FLOAT NOT NULL,
|
||||
contradiction FLOAT NOT NULL DEFAULT 0.0,
|
||||
p_bull FLOAT,
|
||||
p_bear FLOAT,
|
||||
score_company FLOAT NOT NULL DEFAULT 0.0,
|
||||
score_macro FLOAT NOT NULL DEFAULT 0.0,
|
||||
score_competitive FLOAT NOT NULL DEFAULT 0.0,
|
||||
evidence_count INTEGER NOT NULL DEFAULT 0,
|
||||
unique_source_count INTEGER NOT NULL DEFAULT 0,
|
||||
duplicate_evidence_count INTEGER NOT NULL DEFAULT 0,
|
||||
price_at_prediction FLOAT,
|
||||
spy_price_at_prediction FLOAT,
|
||||
sector_etf_price_at_prediction FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_ticker ON prediction_snapshots(ticker);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_generated ON prediction_snapshots(generated_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_horizon ON prediction_snapshots(horizon);
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: prediction_outcomes
|
||||
-- Realized outcome for a prediction at a specific horizon
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS prediction_outcomes (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
prediction_id UUID NOT NULL REFERENCES prediction_snapshots(id),
|
||||
evaluated_at TIMESTAMPTZ NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
future_price FLOAT,
|
||||
future_return FLOAT,
|
||||
spy_future_price FLOAT,
|
||||
spy_return FLOAT,
|
||||
sector_etf_future_price FLOAT,
|
||||
sector_etf_return FLOAT,
|
||||
excess_return_vs_spy FLOAT,
|
||||
excess_return_vs_sector FLOAT,
|
||||
direction_correct BOOLEAN,
|
||||
profitable BOOLEAN,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_prediction ON prediction_outcomes(prediction_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_horizon ON prediction_outcomes(horizon);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_evaluated ON prediction_outcomes(evaluated_at);
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: signal_evidence_links
|
||||
-- Link between a prediction and a contributing evidence document
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS signal_evidence_links (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
prediction_id UUID NOT NULL REFERENCES prediction_snapshots(id),
|
||||
document_id VARCHAR(200),
|
||||
signal_id VARCHAR(200),
|
||||
ticker VARCHAR(20),
|
||||
source VARCHAR(200),
|
||||
source_type VARCHAR(50),
|
||||
catalyst_type VARCHAR(50),
|
||||
sentiment VARCHAR(20),
|
||||
impact FLOAT,
|
||||
extraction_confidence FLOAT,
|
||||
weight FLOAT,
|
||||
is_duplicate BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
canonical_evidence_key VARCHAR(64),
|
||||
contribution_score FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_prediction ON signal_evidence_links(prediction_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_document ON signal_evidence_links(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_ticker ON signal_evidence_links(ticker);
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: model_metric_snapshots
|
||||
-- Aggregate model quality metrics for a lookback/horizon combination
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS model_metric_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
lookback_window VARCHAR(20) NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
prediction_count INTEGER NOT NULL DEFAULT 0,
|
||||
win_rate FLOAT,
|
||||
directional_accuracy FLOAT,
|
||||
information_coefficient FLOAT,
|
||||
rank_information_coefficient FLOAT,
|
||||
avg_return FLOAT,
|
||||
avg_excess_return_vs_spy FLOAT,
|
||||
avg_excess_return_vs_sector FLOAT,
|
||||
calibration_error FLOAT,
|
||||
brier_score FLOAT,
|
||||
buy_win_rate FLOAT,
|
||||
sell_win_rate FLOAT,
|
||||
hold_win_rate FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_generated ON model_metric_snapshots(generated_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_lookback ON model_metric_snapshots(lookback_window);
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_horizon ON model_metric_snapshots(horizon);
|
||||
|
||||
-- ============================================================================
|
||||
-- View: v_prediction_performance
|
||||
-- Joins prediction snapshots with outcomes for flat analysis
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE VIEW v_prediction_performance AS
|
||||
SELECT
|
||||
ps.ticker,
|
||||
ps.direction,
|
||||
ps.action,
|
||||
ps.confidence,
|
||||
ps.strength,
|
||||
ps.contradiction,
|
||||
ps.p_bull,
|
||||
ps.score_company,
|
||||
ps.score_macro,
|
||||
ps.score_competitive,
|
||||
ps.evidence_count,
|
||||
ps.unique_source_count,
|
||||
ps.duplicate_evidence_count,
|
||||
ps.price_at_prediction,
|
||||
po.future_return,
|
||||
po.excess_return_vs_spy,
|
||||
po.excess_return_vs_sector,
|
||||
po.direction_correct,
|
||||
po.profitable,
|
||||
po.horizon,
|
||||
ps.generated_at,
|
||||
po.evaluated_at
|
||||
FROM prediction_snapshots ps
|
||||
JOIN prediction_outcomes po ON po.prediction_id = ps.id;
|
||||
|
||||
-- ============================================================================
|
||||
-- View: v_source_performance
|
||||
-- Joins evidence links with snapshots and outcomes for source attribution
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE VIEW v_source_performance AS
|
||||
SELECT
|
||||
sel.source,
|
||||
sel.source_type,
|
||||
sel.catalyst_type,
|
||||
sel.sentiment,
|
||||
sel.weight,
|
||||
sel.contribution_score,
|
||||
sel.is_duplicate,
|
||||
po.direction_correct,
|
||||
po.future_return,
|
||||
po.excess_return_vs_spy,
|
||||
po.horizon,
|
||||
ps.generated_at
|
||||
FROM signal_evidence_links sel
|
||||
JOIN prediction_snapshots ps ON ps.id = sel.prediction_id
|
||||
JOIN prediction_outcomes po ON po.prediction_id = sel.prediction_id;
|
||||
@@ -0,0 +1,52 @@
|
||||
-- Seed saved queries for model validation, calibration, and signal quality analysis
|
||||
-- Uses the prediction_snapshots, prediction_outcomes, signal_evidence_links,
|
||||
-- model_metric_snapshots tables and the v_prediction_performance / v_source_performance views.
|
||||
|
||||
INSERT INTO saved_queries (name, description, sql_text) VALUES
|
||||
|
||||
('Prediction Snapshot Overview', 'Recent prediction snapshots with confidence and evidence counts',
|
||||
'SELECT ticker, direction, action, mode, round(confidence::numeric, 3) AS confidence, round(strength::numeric, 3) AS strength, evidence_count, unique_source_count, duplicate_evidence_count, round(price_at_prediction::numeric, 2) AS price, generated_at FROM prediction_snapshots ORDER BY generated_at DESC LIMIT 50'),
|
||||
|
||||
('Predictions by Ticker', 'Prediction count and avg confidence per ticker',
|
||||
'SELECT ticker, count(*) AS predictions, round(avg(confidence)::numeric, 3) AS avg_confidence, round(avg(strength)::numeric, 3) AS avg_strength, count(*) FILTER (WHERE action = ''buy'') AS buys, count(*) FILTER (WHERE action = ''sell'') AS sells, count(*) FILTER (WHERE action = ''hold'') AS holds, count(*) FILTER (WHERE action = ''watch'') AS watches FROM prediction_snapshots GROUP BY ticker ORDER BY predictions DESC'),
|
||||
|
||||
('Prediction Confidence Distribution', 'Predictions grouped by confidence bucket',
|
||||
'SELECT CASE WHEN confidence >= 0.90 THEN ''[0.90, 1.00]'' WHEN confidence >= 0.80 THEN ''[0.80, 0.90)'' WHEN confidence >= 0.70 THEN ''[0.70, 0.80)'' WHEN confidence >= 0.60 THEN ''[0.60, 0.70)'' ELSE ''[0.50, 0.60)'' END AS bucket, count(*) AS count, round(avg(confidence)::numeric, 3) AS avg_conf, count(*) FILTER (WHERE action = ''buy'') AS buys, count(*) FILTER (WHERE action = ''sell'') AS sells FROM prediction_snapshots GROUP BY 1 ORDER BY bucket'),
|
||||
|
||||
('Evidence Deduplication Quality', 'Duplicate evidence rate per ticker — high rates suggest source overlap',
|
||||
'SELECT ticker, count(*) AS total_links, sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END) AS duplicates, round(sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END)::numeric / NULLIF(count(*), 0) * 100, 1) AS dupe_pct, count(DISTINCT source_type) AS source_types FROM signal_evidence_links GROUP BY ticker ORDER BY dupe_pct DESC'),
|
||||
|
||||
('Evidence Source Breakdown', 'Evidence links by source type with duplicate rates',
|
||||
'SELECT source_type, count(*) AS total, sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END) AS duplicates, round(sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END)::numeric / NULLIF(count(*), 0) * 100, 1) AS dupe_pct, round(avg(weight)::numeric, 3) AS avg_weight, round(avg(contribution_score)::numeric, 4) AS avg_contribution FROM signal_evidence_links GROUP BY source_type ORDER BY total DESC'),
|
||||
|
||||
('Evidence by Catalyst Type', 'Evidence links grouped by catalyst type',
|
||||
'SELECT catalyst_type, count(*) AS total, round(avg(impact)::numeric, 3) AS avg_impact, round(avg(extraction_confidence)::numeric, 3) AS avg_extraction_conf, count(DISTINCT ticker) AS tickers FROM signal_evidence_links WHERE catalyst_type IS NOT NULL GROUP BY catalyst_type ORDER BY total DESC'),
|
||||
|
||||
('Prediction Performance', 'Prediction outcomes with returns and accuracy (uses v_prediction_performance view)',
|
||||
'SELECT ticker, direction, action, round(confidence::numeric, 3) AS confidence, round(future_return::numeric, 4) AS future_return, round(excess_return_vs_spy::numeric, 4) AS excess_vs_spy, direction_correct, profitable, horizon, generated_at FROM v_prediction_performance ORDER BY generated_at DESC LIMIT 50'),
|
||||
|
||||
('Win Rate by Ticker', 'Directional accuracy and profitability per ticker',
|
||||
'SELECT ticker, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(CASE WHEN profitable THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS profit_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return, round(avg(excess_return_vs_spy)::numeric, 4) AS avg_excess_spy FROM v_prediction_performance GROUP BY ticker HAVING count(*) >= 5 ORDER BY win_rate_pct DESC'),
|
||||
|
||||
('Win Rate by Horizon', 'Directional accuracy across prediction horizons',
|
||||
'SELECT horizon, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return, round(avg(excess_return_vs_spy)::numeric, 4) AS avg_excess_spy FROM v_prediction_performance GROUP BY horizon ORDER BY outcomes DESC'),
|
||||
|
||||
('Source Performance', 'Per-source win rate and returns (uses v_source_performance view)',
|
||||
'SELECT source, source_type, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return, round(avg(excess_return_vs_spy)::numeric, 4) AS avg_excess_spy, round(avg(CASE WHEN is_duplicate THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS dupe_pct FROM v_source_performance GROUP BY source, source_type HAVING count(*) >= 10 ORDER BY win_rate_pct DESC'),
|
||||
|
||||
('Catalyst Performance', 'Win rate by catalyst type',
|
||||
'SELECT catalyst_type, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return FROM v_source_performance WHERE catalyst_type IS NOT NULL GROUP BY catalyst_type HAVING count(*) >= 5 ORDER BY win_rate_pct DESC'),
|
||||
|
||||
('Model Quality Timeline', 'Model metric snapshots over time for the 30d/7d window',
|
||||
'SELECT generated_at, prediction_count, round(win_rate::numeric, 3) AS win_rate, round(information_coefficient::numeric, 4) AS ic, round(rank_information_coefficient::numeric, 4) AS rank_ic, round(calibration_error::numeric, 4) AS ece, round(brier_score::numeric, 4) AS brier, round(avg_excess_return_vs_spy::numeric, 4) AS excess_spy FROM model_metric_snapshots WHERE lookback_window = ''30d'' AND horizon = ''7d'' ORDER BY generated_at DESC LIMIT 30'),
|
||||
|
||||
('Quality Gate Status', 'Latest quality gate evaluation from risk_configs',
|
||||
'SELECT key, value, updated_at FROM risk_configs WHERE key = ''model_quality_gate'' ORDER BY updated_at DESC LIMIT 1'),
|
||||
|
||||
('High Duplicate Predictions', 'Predictions where duplicate evidence exceeds 50% — potential inflation risk',
|
||||
'SELECT ticker, direction, action, round(confidence::numeric, 3) AS confidence, evidence_count, duplicate_evidence_count, round(duplicate_evidence_count::numeric / NULLIF(evidence_count, 0) * 100, 1) AS dupe_pct, generated_at FROM prediction_snapshots WHERE evidence_count > 0 AND duplicate_evidence_count::float / NULLIF(evidence_count, 0) > 0.5 ORDER BY dupe_pct DESC LIMIT 30'),
|
||||
|
||||
('Prediction vs SPY', 'Average excess return vs SPY by action type',
|
||||
'SELECT ps.action, count(*) AS outcomes, round(avg(po.future_return)::numeric, 4) AS avg_return, round(avg(po.excess_return_vs_spy)::numeric, 4) AS avg_excess_spy, round(avg(CASE WHEN po.direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct FROM prediction_snapshots ps JOIN prediction_outcomes po ON po.prediction_id = ps.id GROUP BY ps.action ORDER BY avg_excess_spy DESC')
|
||||
|
||||
ON CONFLICT (name) DO UPDATE SET sql_text = EXCLUDED.sql_text, description = EXCLUDED.description;
|
||||
@@ -0,0 +1,42 @@
|
||||
-- Integrity check saved queries for the SQL Explorer
|
||||
-- These validate data consistency across the model validation pipeline.
|
||||
|
||||
INSERT INTO saved_queries (name, description, sql_text) VALUES
|
||||
|
||||
('⚕ Duplicate Snapshots', 'Detect duplicate prediction snapshots (same ticker+timestamp)',
|
||||
'SELECT ticker, generated_at, count(*) AS duplicates FROM prediction_snapshots GROUP BY ticker, generated_at HAVING count(*) > 1 ORDER BY duplicates DESC'),
|
||||
|
||||
('⚕ Orphaned Evidence Links', 'Evidence links referencing non-existent snapshots',
|
||||
'SELECT sel.id, sel.prediction_id, sel.ticker, sel.source_type FROM signal_evidence_links sel WHERE NOT EXISTS (SELECT 1 FROM prediction_snapshots ps WHERE ps.id = sel.prediction_id) LIMIT 20'),
|
||||
|
||||
('⚕ Evidence Count Mismatches', 'Snapshots where stored evidence_count differs from actual link count',
|
||||
'SELECT ps.id, ps.ticker, ps.evidence_count AS stored, count(sel.id) AS actual, ps.evidence_count - count(sel.id) AS diff FROM prediction_snapshots ps LEFT JOIN signal_evidence_links sel ON sel.prediction_id = ps.id GROUP BY ps.id, ps.ticker, ps.evidence_count HAVING ps.evidence_count != count(sel.id) ORDER BY abs(ps.evidence_count - count(sel.id)) DESC LIMIT 20'),
|
||||
|
||||
('⚕ Contribution Score Integrity', 'Snapshots where contribution scores do not sum to 1.0 (±0.01)',
|
||||
'SELECT prediction_id, round(sum(contribution_score)::numeric, 6) AS score_sum, count(*) AS links FROM signal_evidence_links WHERE contribution_score IS NOT NULL GROUP BY prediction_id HAVING abs(sum(contribution_score) - 1.0) > 0.01 LIMIT 20'),
|
||||
|
||||
('⚕ Canonical Key Consistency', 'Documents producing different canonical keys across predictions (should be 0)',
|
||||
'SELECT document_id, count(DISTINCT canonical_evidence_key) AS key_variants, array_agg(DISTINCT canonical_evidence_key) AS keys FROM signal_evidence_links WHERE document_id IS NOT NULL AND canonical_evidence_key IS NOT NULL GROUP BY document_id HAVING count(DISTINCT canonical_evidence_key) > 1 LIMIT 20'),
|
||||
|
||||
('⚕ Out-of-Range Values', 'Snapshots with confidence or strength outside [0, 1]',
|
||||
'SELECT id, ticker, confidence, strength, generated_at FROM prediction_snapshots WHERE confidence < 0 OR confidence > 1 OR strength < 0 OR strength > 1 LIMIT 20'),
|
||||
|
||||
('⚕ Unmatched Snapshots', 'Prediction snapshots with no matching recommendation',
|
||||
'SELECT ps.id, ps.ticker, ps.action, ps.confidence, ps.generated_at FROM prediction_snapshots ps WHERE NOT EXISTS (SELECT 1 FROM recommendations r WHERE r.ticker = ps.ticker AND r.generated_at = ps.generated_at) LIMIT 20'),
|
||||
|
||||
('⚕ Zero Evidence Rate', 'Percentage of snapshots with no evidence links by action type',
|
||||
'SELECT ps.action, count(*) AS total, count(*) FILTER (WHERE NOT EXISTS (SELECT 1 FROM signal_evidence_links sel WHERE sel.prediction_id = ps.id)) AS zero_evidence, round(count(*) FILTER (WHERE NOT EXISTS (SELECT 1 FROM signal_evidence_links sel WHERE sel.prediction_id = ps.id))::numeric / NULLIF(count(*), 0) * 100, 1) AS zero_pct FROM prediction_snapshots ps GROUP BY ps.action ORDER BY zero_pct DESC'),
|
||||
|
||||
('⚕ Duplicate Evidence Mismatches', 'Snapshots where stored duplicate count differs from actual is_duplicate count',
|
||||
'SELECT ps.id, ps.ticker, ps.duplicate_evidence_count AS stored_dupes, count(sel.id) FILTER (WHERE sel.is_duplicate) AS actual_dupes FROM prediction_snapshots ps JOIN signal_evidence_links sel ON sel.prediction_id = ps.id GROUP BY ps.id, ps.ticker, ps.duplicate_evidence_count HAVING ps.duplicate_evidence_count != count(sel.id) FILTER (WHERE sel.is_duplicate) LIMIT 20'),
|
||||
|
||||
('⚕ Missing Price Data', 'Snapshots missing ticker or SPY price at prediction time',
|
||||
'SELECT ticker, count(*) AS total, count(*) FILTER (WHERE price_at_prediction IS NULL) AS null_price, count(*) FILTER (WHERE spy_price_at_prediction IS NULL) AS null_spy FROM prediction_snapshots GROUP BY ticker HAVING count(*) FILTER (WHERE price_at_prediction IS NULL) > 0 OR count(*) FILTER (WHERE spy_price_at_prediction IS NULL) > 0 ORDER BY null_price DESC'),
|
||||
|
||||
('⚕ Outcome Integrity', 'Prediction outcomes with impossible values (return outside [-1, 10] or NULL direction_correct)',
|
||||
'SELECT po.id, ps.ticker, po.horizon, po.future_return, po.direction_correct, po.profitable FROM prediction_outcomes po JOIN prediction_snapshots ps ON ps.id = po.prediction_id WHERE po.future_return < -1 OR po.future_return > 10 OR po.direction_correct IS NULL LIMIT 20'),
|
||||
|
||||
('⚕ Pipeline Health Summary', 'Overall validation pipeline health dashboard',
|
||||
'SELECT ''Snapshots'' AS metric, count(*)::text AS value FROM prediction_snapshots UNION ALL SELECT ''Evidence Links'', count(*)::text FROM signal_evidence_links UNION ALL SELECT ''Outcomes'', count(*)::text FROM prediction_outcomes UNION ALL SELECT ''Metric Snapshots'', count(*)::text FROM model_metric_snapshots UNION ALL SELECT ''Duplicate Evidence %'', round(avg(CASE WHEN is_duplicate THEN 100.0 ELSE 0.0 END)::numeric, 1)::text FROM signal_evidence_links UNION ALL SELECT ''Zero-Evidence Snapshots'', count(*)::text FROM prediction_snapshots ps WHERE NOT EXISTS (SELECT 1 FROM signal_evidence_links sel WHERE sel.prediction_id = ps.id) UNION ALL SELECT ''Avg Confidence'', round(avg(confidence)::numeric, 3)::text FROM prediction_snapshots UNION ALL SELECT ''Distinct Tickers'', count(DISTINCT ticker)::text FROM prediction_snapshots')
|
||||
|
||||
ON CONFLICT (name) DO UPDATE SET sql_text = EXCLUDED.sql_text, description = EXCLUDED.description;
|
||||
@@ -0,0 +1,50 @@
|
||||
-- Migration 038: Trading Reports
|
||||
-- Creates the trading_reports table for storing periodic performance reports
|
||||
-- and seeds the Report Summarizer AI agent for report section summarization.
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: trading_reports
|
||||
-- Stores daily and weekly trading performance reports as structured JSONB
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS trading_reports (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
report_type VARCHAR(20) NOT NULL,
|
||||
period_start DATE NOT NULL,
|
||||
period_end DATE NOT NULL,
|
||||
report_data JSONB NOT NULL,
|
||||
validation_status VARCHAR(20) NOT NULL DEFAULT 'passed',
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT uq_trading_reports_period UNIQUE (report_type, period_start, period_end),
|
||||
CONSTRAINT chk_report_type CHECK (report_type IN ('daily', 'weekly'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_type ON trading_reports(report_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_period ON trading_reports(period_start, period_end);
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_generated ON trading_reports(generated_at DESC);
|
||||
|
||||
-- ============================================================================
|
||||
-- Seed: Report Summarizer Agent
|
||||
-- Generates concise natural-language summaries of trading performance report
|
||||
-- sections. Uses chunked data within the 8k-token context window.
|
||||
-- Only inserted if the slug does not already exist (idempotent).
|
||||
-- ============================================================================
|
||||
INSERT INTO ai_agents (name, slug, purpose, model_provider, model_name, system_prompt, prompt_version, schema_version, temperature, max_tokens, timeout_seconds, max_retries, source)
|
||||
SELECT * FROM (VALUES
|
||||
(
|
||||
'Report Summarizer',
|
||||
'report-summarizer',
|
||||
'Generates concise natural-language summaries of trading performance report sections. Processes chunked data within the 8k-token context window.',
|
||||
'ollama',
|
||||
'qwen3.5:9b-fast',
|
||||
E'You are a concise financial performance analyst. You summarize trading performance data into clear, professional prose.\n\nSTRICT RULES:\n1. Do NOT fabricate any data not present in the input.\n2. Do NOT add opinions, predictions, or recommendations.\n3. Keep each summary under 200 words.\n4. Highlight notable trends, outliers, and changes from prior periods.\n5. Use precise numbers from the input data.\n6. Use a neutral, professional tone.\n7. Return ONLY the summary text. No JSON, no markdown, no commentary.',
|
||||
'report-summarizer-v1',
|
||||
'1.0.0',
|
||||
0.0,
|
||||
1024,
|
||||
60,
|
||||
2,
|
||||
'system'
|
||||
)
|
||||
) AS v(name, slug, purpose, model_provider, model_name, system_prompt, prompt_version, schema_version, temperature, max_tokens, timeout_seconds, max_retries, source)
|
||||
WHERE NOT EXISTS (SELECT 1 FROM ai_agents WHERE slug = 'report-summarizer');
|
||||
@@ -0,0 +1,51 @@
|
||||
-- Migration 039: Signal Engine Outputs
|
||||
-- Creates the signal_engine_outputs table for persisting dual-pipeline evaluations.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS signal_engine_outputs (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
ticker TEXT NOT NULL,
|
||||
evaluated_at TIMESTAMPTZ NOT NULL,
|
||||
price NUMERIC NOT NULL,
|
||||
|
||||
-- Heuristic pipeline
|
||||
heuristic_verdict TEXT NOT NULL,
|
||||
heuristic_confidence NUMERIC NOT NULL,
|
||||
heuristic_s_total NUMERIC NOT NULL,
|
||||
|
||||
-- Probabilistic pipeline
|
||||
probabilistic_verdict TEXT NOT NULL,
|
||||
probabilistic_p_up NUMERIC NOT NULL,
|
||||
probabilistic_entropy NUMERIC NOT NULL,
|
||||
probabilistic_ev_r NUMERIC NOT NULL,
|
||||
|
||||
-- Delta analysis
|
||||
delta_agreement BOOLEAN NOT NULL,
|
||||
delta_confidence_delta NUMERIC NOT NULL,
|
||||
delta_reasons JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Trade plan (null when no BUY verdict)
|
||||
trade_plan JSONB,
|
||||
|
||||
-- Full output for audit
|
||||
full_output JSONB NOT NULL,
|
||||
|
||||
-- Exit signals
|
||||
exit_signals JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Metadata
|
||||
pipeline_mode TEXT NOT NULL DEFAULT 'dual_pipeline',
|
||||
shadow_mode BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for per-ticker time-range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_ticker_time
|
||||
ON signal_engine_outputs (ticker, evaluated_at);
|
||||
|
||||
-- Index for global time-range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_evaluated
|
||||
ON signal_engine_outputs (evaluated_at);
|
||||
|
||||
-- Index for filtering by verdict
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_verdicts
|
||||
ON signal_engine_outputs (heuristic_verdict, probabilistic_verdict);
|
||||
@@ -9,6 +9,8 @@ spec:
|
||||
provider:
|
||||
job:
|
||||
spec:
|
||||
ttlSecondsAfterFinished: 3600
|
||||
backoffLimit: 0
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
"""Backfill 90 days of daily OHLCV bars from Polygon into market_snapshots.
|
||||
|
||||
Run as: python scripts/backfill_market_data.py
|
||||
Requires env vars: POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_HOST,
|
||||
POSTGRES_PORT, POSTGRES_DB, MARKET_DATA_API_KEY,
|
||||
MARKET_DATA_BASE_URL (optional, defaults to polygon).
|
||||
|
||||
Skips if market_snapshots already has >= 50 bars.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
|
||||
import asyncpg
|
||||
import httpx
|
||||
|
||||
|
||||
async def backfill() -> None:
|
||||
api_key = os.environ.get("MARKET_DATA_API_KEY", "")
|
||||
if not api_key:
|
||||
print("No MARKET_DATA_API_KEY set — skipping backfill.")
|
||||
return
|
||||
|
||||
base_url = os.environ.get("MARKET_DATA_BASE_URL", "https://api.polygon.io")
|
||||
dsn = (
|
||||
f"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}"
|
||||
f"@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}"
|
||||
f"/{os.environ['POSTGRES_DB']}"
|
||||
)
|
||||
pool = await asyncpg.create_pool(dsn=dsn)
|
||||
|
||||
# Check if backfill is needed
|
||||
count = await pool.fetchval(
|
||||
"SELECT count(*) FROM market_snapshots WHERE snapshot_type = 'bar'"
|
||||
)
|
||||
if count >= 50:
|
||||
print(f"Market data has {count} bars — skipping backfill.")
|
||||
await pool.close()
|
||||
return
|
||||
|
||||
print(f"Only {count} market bars found — backfilling 90 days from Polygon...")
|
||||
|
||||
tickers = [
|
||||
r["ticker"]
|
||||
for r in await pool.fetch(
|
||||
"SELECT ticker FROM companies WHERE active = TRUE ORDER BY ticker"
|
||||
)
|
||||
]
|
||||
to_d = date.today().isoformat()
|
||||
from_d = (date.today() - timedelta(days=90)).isoformat()
|
||||
total = 0
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
for ticker in tickers:
|
||||
url = f"{base_url}/v2/aggs/ticker/{ticker}/range/1/day/{from_d}/{to_d}"
|
||||
try:
|
||||
resp = await client.get(
|
||||
url,
|
||||
params={
|
||||
"apiKey": api_key,
|
||||
"adjusted": "true",
|
||||
"sort": "asc",
|
||||
"limit": "500",
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
bars = resp.json().get("results", [])
|
||||
except Exception as e:
|
||||
print(f" {ticker}: fetch failed ({e})")
|
||||
continue
|
||||
|
||||
existing = {
|
||||
r["bar_ts"]
|
||||
for r in await pool.fetch(
|
||||
"SELECT DISTINCT (data->>'t')::bigint AS bar_ts "
|
||||
"FROM market_snapshots WHERE ticker = $1 AND snapshot_type = 'bar'",
|
||||
ticker,
|
||||
)
|
||||
if r["bar_ts"]
|
||||
}
|
||||
co = await pool.fetchrow(
|
||||
"SELECT id FROM companies WHERE ticker = $1", ticker
|
||||
)
|
||||
cid = co["id"] if co else None
|
||||
inserted = 0
|
||||
|
||||
for bar in bars:
|
||||
bar_ts = bar.get("t")
|
||||
if not bar_ts or bar_ts in existing:
|
||||
continue
|
||||
bj = json.dumps(bar)
|
||||
ch = hashlib.sha256(bj.encode()).hexdigest()
|
||||
ca = datetime.fromtimestamp(bar_ts / 1000, tz=timezone.utc)
|
||||
await pool.execute(
|
||||
"INSERT INTO market_snapshots "
|
||||
"(company_id, ticker, snapshot_type, data, source_provider, "
|
||||
"captured_at, content_hash) "
|
||||
"VALUES ($1, $2, 'bar', $3::jsonb, 'polygon_backfill', $4, $5)",
|
||||
cid,
|
||||
ticker,
|
||||
bj,
|
||||
ca,
|
||||
ch,
|
||||
)
|
||||
existing.add(bar_ts)
|
||||
inserted += 1
|
||||
|
||||
total += inserted
|
||||
if inserted:
|
||||
print(f" {ticker}: {inserted} bars")
|
||||
|
||||
await pool.close()
|
||||
print(f"Backfill complete: {total} bars across {len(tickers)} tickers")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(backfill())
|
||||
except Exception as e:
|
||||
print(f"Backfill failed: {e}", file=sys.stderr)
|
||||
sys.exit(0) # Don't block startup on backfill failure
|
||||
Executable
+360
@@ -0,0 +1,360 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# factory-reset.sh — Factory reset a Stonks Oracle stage
|
||||
#
|
||||
# Drops and recreates the database, flushes Redis keys, empties S3 buckets,
|
||||
# re-runs migrations, and re-seeds the symbol registry.
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/factory-reset.sh <stage> [--component <component>] [--yes] [--verbose]
|
||||
#
|
||||
# Stages:
|
||||
# production — stonks-oracle namespace, DB=stonks, Redis DB=0, buckets=stonks-*
|
||||
# paper — stonks-paper namespace, DB=stonks_paper, Redis DB=2, buckets=paper-stonks-*
|
||||
# beta — stonks-beta namespace, DB=stonks_beta, Redis DB=1, buckets=beta-stonks-*
|
||||
#
|
||||
# Components (optional, default: all):
|
||||
# all — Full factory reset (DB + S3 + Redis)
|
||||
# db — Database only (drop/recreate + migrations + seed)
|
||||
# s3 — S3 buckets only (empty all stage buckets)
|
||||
# redis — Redis only (flush stage keys)
|
||||
# computed — Computed data only (trends, recommendations, orders, positions)
|
||||
#
|
||||
# Examples:
|
||||
# bash scripts/factory-reset.sh beta # Full reset of beta
|
||||
# bash scripts/factory-reset.sh production --component db # DB-only reset of production
|
||||
# bash scripts/factory-reset.sh paper --component computed # Clear computed data in paper
|
||||
# bash scripts/factory-reset.sh beta --verbose # Full reset with per-object output
|
||||
#
|
||||
# Requirements:
|
||||
# - kubectl access to the cluster
|
||||
# - mc (MinIO client) configured with alias "stonks"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
STAGE="${1:-}"
|
||||
COMPONENT="all"
|
||||
AUTO_YES=false
|
||||
VERBOSE=false
|
||||
|
||||
if [[ -z "$STAGE" ]]; then
|
||||
echo "Usage: bash scripts/factory-reset.sh <stage> [--component <component>] [--yes] [--verbose]"
|
||||
echo "Stages: production, paper, beta"
|
||||
echo "Components: all, db, s3, redis, computed"
|
||||
echo "Flags: --yes Skip confirmation prompt"
|
||||
echo " --verbose Show detailed per-object output"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
shift
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--component) COMPONENT="$2"; shift 2 ;;
|
||||
--yes|-y) AUTO_YES=true; shift ;;
|
||||
--verbose|-v) VERBOSE=true; shift ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Map stage to namespace, DB name, Redis DB, and bucket prefix
|
||||
case "$STAGE" in
|
||||
production|prod)
|
||||
NAMESPACE="stonks-oracle"
|
||||
DB_NAME="stonks"
|
||||
REDIS_DB=0
|
||||
BUCKET_PREFIX="stonks-"
|
||||
REDIS_KEY_PREFIX="stonks:"
|
||||
DEPLOY_STAGE=""
|
||||
;;
|
||||
paper)
|
||||
NAMESPACE="stonks-paper"
|
||||
DB_NAME="stonks_paper"
|
||||
REDIS_DB=2
|
||||
BUCKET_PREFIX="paper-stonks-"
|
||||
REDIS_KEY_PREFIX="stonks:paper:"
|
||||
DEPLOY_STAGE="paper"
|
||||
;;
|
||||
beta)
|
||||
NAMESPACE="stonks-beta"
|
||||
DB_NAME="stonks_beta"
|
||||
REDIS_DB=1
|
||||
BUCKET_PREFIX="beta-stonks-"
|
||||
REDIS_KEY_PREFIX="stonks:beta:"
|
||||
DEPLOY_STAGE="beta"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown stage '$STAGE'. Use: production, paper, beta"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
PG_POD="postgresql-1"
|
||||
PG_NS="postgresql-service"
|
||||
PG_USER="postgres"
|
||||
REDIS_HOST="redis-master.redis-service.svc.cluster.local"
|
||||
REDIS_PORT="6379"
|
||||
REDIS_PASSWORD="PSCh4ng3me!"
|
||||
MC_ALIAS="stonks"
|
||||
|
||||
# S3 bucket suffixes
|
||||
BUCKET_SUFFIXES=(
|
||||
"audit"
|
||||
"lakehouse"
|
||||
"llm-prompts"
|
||||
"llm-results"
|
||||
"normalized"
|
||||
"raw-filings"
|
||||
"raw-market"
|
||||
"raw-news"
|
||||
)
|
||||
|
||||
echo "============================================"
|
||||
echo " Stonks Oracle Factory Reset"
|
||||
echo "============================================"
|
||||
echo " Stage: $STAGE"
|
||||
echo " Namespace: $NAMESPACE"
|
||||
echo " Database: $DB_NAME"
|
||||
echo " Redis DB: $REDIS_DB"
|
||||
echo " Buckets: ${BUCKET_PREFIX}*"
|
||||
echo " Component: $COMPONENT"
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo " Verbose: ON"
|
||||
fi
|
||||
echo "============================================"
|
||||
echo ""
|
||||
|
||||
# Safety confirmation
|
||||
if [[ "$AUTO_YES" == true ]]; then
|
||||
echo "⚠️ --yes flag set, skipping confirmation"
|
||||
else
|
||||
read -rp "⚠️ This will DESTROY data. Type '$STAGE' to confirm: " confirm
|
||||
if [[ "$confirm" != "$STAGE" ]]; then
|
||||
echo "Aborted."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper: scale down all deployments in the namespace
|
||||
# ---------------------------------------------------------------------------
|
||||
scale_down() {
|
||||
echo "--- Scaling down $NAMESPACE deployments ---"
|
||||
local deployments
|
||||
deployments=$(kubectl get deployments -n "$NAMESPACE" -o name 2>/dev/null || true)
|
||||
if [[ -n "$deployments" ]]; then
|
||||
local count
|
||||
count=$(echo "$deployments" | wc -l)
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo "$deployments" | xargs -I{} kubectl scale {} --replicas=0 -n "$NAMESPACE" 2>/dev/null || true
|
||||
else
|
||||
echo "$deployments" | xargs -I{} kubectl scale {} --replicas=0 -n "$NAMESPACE" &>/dev/null || true
|
||||
fi
|
||||
echo " Waiting for $count deployments to terminate..."
|
||||
kubectl wait --for=delete pod --all -n "$NAMESPACE" --timeout=60s 2>/dev/null || true
|
||||
fi
|
||||
echo " ✓ All deployments scaled to 0"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper: scale up all deployments in the namespace
|
||||
# ---------------------------------------------------------------------------
|
||||
scale_up() {
|
||||
echo "--- Scaling up $NAMESPACE deployments ---"
|
||||
# ArgoCD will auto-heal and restore replica counts, just trigger a sync
|
||||
if kubectl get application -n argocd 2>/dev/null | grep -q "$NAMESPACE"; then
|
||||
echo " ArgoCD will restore replicas via self-heal"
|
||||
else
|
||||
echo " Manually restoring replicas..."
|
||||
local deployments
|
||||
deployments=$(kubectl get deployments -n "$NAMESPACE" -o name 2>/dev/null || true)
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo "$deployments" | xargs -I{} kubectl scale {} --replicas=1 -n "$NAMESPACE" 2>/dev/null || true
|
||||
else
|
||||
echo "$deployments" | xargs -I{} kubectl scale {} --replicas=1 -n "$NAMESPACE" &>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
echo " ✓ Scale-up triggered"
|
||||
|
||||
# Wait for the scheduler pod (which runs migrations via init containers)
|
||||
# to be fully ready before other services start querying the DB.
|
||||
echo " Waiting for scheduler pod (runs migrations)..."
|
||||
kubectl rollout restart deployment/scheduler -n "$NAMESPACE" 2>/dev/null || true
|
||||
kubectl rollout status deployment/scheduler -n "$NAMESPACE" --timeout=120s 2>/dev/null || true
|
||||
echo " ✓ Scheduler ready (migrations applied)"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reset: Database
|
||||
# ---------------------------------------------------------------------------
|
||||
reset_db() {
|
||||
echo "--- Resetting database: $DB_NAME ---"
|
||||
|
||||
# Terminate active connections
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -c \
|
||||
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '$DB_NAME' AND pid <> pg_backend_pid();" \
|
||||
2>/dev/null || true
|
||||
else
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -tAc \
|
||||
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '$DB_NAME' AND pid <> pg_backend_pid();" \
|
||||
&>/dev/null || true
|
||||
fi
|
||||
|
||||
# Drop and recreate
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -c \
|
||||
"DROP DATABASE IF EXISTS $DB_NAME;"
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -c \
|
||||
"CREATE DATABASE $DB_NAME OWNER stonks;"
|
||||
echo " ✓ Database recreated"
|
||||
|
||||
# Run migrations
|
||||
local migrations
|
||||
migrations=($(ls infra/migrations/*.sql | sort))
|
||||
local count=${#migrations[@]}
|
||||
echo " Running $count migrations..."
|
||||
for migration in "${migrations[@]}"; do
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo " Applying $(basename "$migration")..."
|
||||
fi
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -i -- psql -U stonks -d "$DB_NAME" < "$migration" 2>/dev/null || true
|
||||
done
|
||||
echo " ✓ Migrations applied ($count files)"
|
||||
|
||||
# Seed symbol registry
|
||||
echo " Seeding symbol registry..."
|
||||
# Wait for at least one pod to be ready
|
||||
scale_up
|
||||
sleep 10
|
||||
local scheduler_pod
|
||||
scheduler_pod=$(kubectl get pods -n "$NAMESPACE" -l app=scheduler -o name 2>/dev/null | head -1)
|
||||
if [[ -n "$scheduler_pod" ]]; then
|
||||
kubectl exec -n "$NAMESPACE" "$scheduler_pod" -c scheduler -- \
|
||||
python -m services.symbol_registry.seed 2>/dev/null && echo " ✓ Seeded" || echo " ⚠ Seed failed (will retry on next restart)"
|
||||
else
|
||||
echo " ⚠ No scheduler pod available — seed will run on next deployment"
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reset: Computed data only (trends, recommendations, orders, positions)
|
||||
# ---------------------------------------------------------------------------
|
||||
reset_computed() {
|
||||
echo "--- Clearing computed data in $DB_NAME ---"
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -d "$DB_NAME" -c "
|
||||
-- Order matters due to FK constraints
|
||||
DELETE FROM recommendation_evidence;
|
||||
DELETE FROM risk_evaluations;
|
||||
DELETE FROM order_events;
|
||||
DELETE FROM orders;
|
||||
DELETE FROM trading_decisions;
|
||||
DELETE FROM positions;
|
||||
DELETE FROM portfolio_snapshots;
|
||||
DELETE FROM reserve_pool_ledger;
|
||||
DELETE FROM risk_tier_history;
|
||||
DELETE FROM circuit_breaker_events;
|
||||
DELETE FROM notifications;
|
||||
DELETE FROM recommendations;
|
||||
DELETE FROM trend_evidence;
|
||||
DELETE FROM trend_projections;
|
||||
DELETE FROM trend_history;
|
||||
DELETE FROM trend_windows;
|
||||
DELETE FROM backtest_trades;
|
||||
DELETE FROM backtest_runs;
|
||||
DELETE FROM position_stop_levels;
|
||||
" 2>/dev/null
|
||||
echo " ✓ Computed data cleared"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reset: S3 buckets
|
||||
# ---------------------------------------------------------------------------
|
||||
reset_s3() {
|
||||
echo "--- Emptying S3 buckets: ${BUCKET_PREFIX}* ---"
|
||||
for suffix in "${BUCKET_SUFFIXES[@]}"; do
|
||||
local bucket="${BUCKET_PREFIX}${suffix}"
|
||||
if mc ls "${MC_ALIAS}/${bucket}" &>/dev/null; then
|
||||
echo -n " Emptying ${bucket}..."
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo ""
|
||||
mc rm --recursive --force "${MC_ALIAS}/${bucket}/" 2>/dev/null || true
|
||||
else
|
||||
local removed
|
||||
removed=$(mc rm --recursive --force "${MC_ALIAS}/${bucket}/" 2>/dev/null | wc -l || echo "0")
|
||||
echo " ${removed} objects removed"
|
||||
fi
|
||||
echo " ✓ ${bucket} emptied"
|
||||
else
|
||||
echo " ⚠ ${bucket} not found (skipping)"
|
||||
fi
|
||||
done
|
||||
echo " ✓ All S3 buckets emptied"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reset: Redis
|
||||
# ---------------------------------------------------------------------------
|
||||
reset_redis() {
|
||||
echo "--- Flushing Redis DB $REDIS_DB ---"
|
||||
kubectl exec -n "$NAMESPACE" deployment/scheduler -c scheduler -- python -c "
|
||||
import redis
|
||||
r = redis.from_url('redis://:${REDIS_PASSWORD}@${REDIS_HOST}:${REDIS_PORT}/${REDIS_DB}')
|
||||
keys = r.keys('stonks:*')
|
||||
if keys:
|
||||
r.delete(*keys)
|
||||
print(f' Deleted {len(keys)} keys')
|
||||
else:
|
||||
print(' No keys to delete')
|
||||
" 2>/dev/null || {
|
||||
# Fallback: flush the entire Redis DB if no scheduler pod
|
||||
echo " Falling back to FLUSHDB..."
|
||||
kubectl exec -n redis-service redis-master-0 -- redis-cli -a "$REDIS_PASSWORD" -n "$REDIS_DB" FLUSHDB 2>/dev/null || true
|
||||
}
|
||||
echo " ✓ Redis flushed"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Execute based on component selection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
case "$COMPONENT" in
|
||||
all)
|
||||
scale_down
|
||||
reset_db
|
||||
reset_s3
|
||||
reset_redis
|
||||
scale_up
|
||||
;;
|
||||
db)
|
||||
scale_down
|
||||
reset_db
|
||||
scale_up
|
||||
;;
|
||||
s3)
|
||||
reset_s3
|
||||
;;
|
||||
redis)
|
||||
reset_redis
|
||||
;;
|
||||
computed)
|
||||
reset_computed
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown component '$COMPONENT'. Use: all, db, s3, redis, computed"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
echo "============================================"
|
||||
echo " Factory reset complete: $STAGE / $COMPONENT"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " - ArgoCD will auto-restore pod replicas"
|
||||
echo " - Migrations and seed run automatically on scheduler init"
|
||||
echo " - Ingestion will begin on the next scheduler cycle (~15s)"
|
||||
echo " - First aggregation will run within ~15 minutes"
|
||||
@@ -64,6 +64,7 @@ from services.shared.metrics import (
|
||||
AGGREGATION_WINDOWS_COMPUTED,
|
||||
)
|
||||
from services.shared.schemas import TrendDirection, TrendSummary, TrendWindow
|
||||
from services.trading.model_quality_gate import QualityGateResult, evaluate_quality_gate
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1576,10 +1577,34 @@ async def aggregate_company(
|
||||
# Mid-cycle changes take effect on the next cycle.
|
||||
probabilistic = await fetch_probabilistic_scoring_enabled(pool)
|
||||
pipeline_mode = "probabilistic" if probabilistic else "heuristic"
|
||||
|
||||
# --- Quality gate evaluation (Req 11.2, 11.3) ---
|
||||
# Evaluate model quality gate at the start of each aggregation cycle.
|
||||
# When the gate fails, all recommendations are forced to paper mode.
|
||||
# Gate evaluation failure defaults to paper-only (fail-safe).
|
||||
quality_gate_passed = False
|
||||
try:
|
||||
gate_result: QualityGateResult = await evaluate_quality_gate(pool)
|
||||
quality_gate_passed = gate_result.passed
|
||||
logger.info(
|
||||
"Quality gate for %s cycle: %s — %s",
|
||||
ticker,
|
||||
"PASS" if gate_result.passed else "FAIL",
|
||||
gate_result.reason,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Quality gate evaluation failed for %s cycle — "
|
||||
"defaulting to paper-only mode (fail-safe)",
|
||||
ticker,
|
||||
)
|
||||
quality_gate_passed = False
|
||||
|
||||
logger.info(
|
||||
"Aggregation cycle for %s: pipeline_mode=%s",
|
||||
"Aggregation cycle for %s: pipeline_mode=%s quality_gate=%s",
|
||||
ticker,
|
||||
pipeline_mode,
|
||||
"passed" if quality_gate_passed else "failed",
|
||||
)
|
||||
|
||||
# --- Regime detection (Req 7.1, 7.2, 7.3, 7.8, 7.9) ---
|
||||
@@ -1647,6 +1672,20 @@ async def aggregate_company(
|
||||
ticker_returns=ticker_returns,
|
||||
ticker_volumes=ticker_volumes,
|
||||
)
|
||||
|
||||
# When quality gate fails, annotate the trend summary so the
|
||||
# recommendation engine forces paper mode (Req 11.2, 11.3).
|
||||
if not quality_gate_passed:
|
||||
ctx = summary.market_context
|
||||
if isinstance(ctx, dict):
|
||||
ctx["quality_gate_passed"] = False
|
||||
elif ctx is not None and hasattr(ctx, "model_dump"):
|
||||
ctx_dict = ctx.model_dump()
|
||||
ctx_dict["quality_gate_passed"] = False
|
||||
summary.market_context = ctx_dict
|
||||
else:
|
||||
summary.market_context = {"quality_gate_passed": False}
|
||||
|
||||
summaries.append(summary)
|
||||
|
||||
return summaries
|
||||
|
||||
+596
-5
@@ -43,6 +43,11 @@ from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.logging import new_trace_id, set_trace_context, setup_logging
|
||||
from services.shared.redis_keys import PIPELINE_ENABLED_KEY, QUEUE_BROKER, QUEUE_PREFIX, queue_key
|
||||
from services.shared.schemas import MAJOR_DECISION_CATALYSTS
|
||||
from services.validation.attribution import (
|
||||
compute_catalyst_attribution,
|
||||
compute_layer_attribution,
|
||||
compute_source_attribution,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("query_api")
|
||||
|
||||
@@ -471,12 +476,13 @@ async def list_trend_history(
|
||||
@app.get("/api/market/prices/{ticker}")
|
||||
async def get_market_prices(
|
||||
ticker: str,
|
||||
limit: int = Query(default=30, le=200),
|
||||
limit: int = Query(default=200, le=500),
|
||||
):
|
||||
"""Return historical close prices for a ticker from market_snapshots.
|
||||
|
||||
Each row has a bar_date (from the Polygon bar timestamp) and OHLCV data.
|
||||
Ordered oldest-first for chart rendering.
|
||||
Ordered oldest-first for chart rendering. Also returns 90-day high/low
|
||||
computed from all bars in the last 90 days.
|
||||
"""
|
||||
ticker = ticker.upper()
|
||||
rows = await pool.fetch(
|
||||
@@ -515,7 +521,124 @@ async def get_market_prices(
|
||||
"bar_timestamp": bar_ts,
|
||||
"captured_at": r["captured_at"].isoformat() if r["captured_at"] else None,
|
||||
})
|
||||
return results
|
||||
|
||||
# Compute 90-day high/low from all bars in the window
|
||||
cutoff_90d = datetime.now(timezone.utc) - timedelta(days=90)
|
||||
range_row = await pool.fetchrow(
|
||||
"""SELECT
|
||||
MIN((data->>'l')::float) AS low_90d,
|
||||
MAX((data->>'h')::float) AS high_90d
|
||||
FROM market_snapshots
|
||||
WHERE ticker = $1 AND snapshot_type = 'bar'
|
||||
AND captured_at >= $2""",
|
||||
ticker, cutoff_90d,
|
||||
)
|
||||
low_90d = range_row["low_90d"] if range_row else None
|
||||
high_90d = range_row["high_90d"] if range_row else None
|
||||
|
||||
return {
|
||||
"bars": results,
|
||||
"range_90d": {"low": low_90d, "high": high_90d},
|
||||
}
|
||||
|
||||
|
||||
@app.post("/api/market/backfill/{ticker}")
|
||||
async def backfill_market_prices(ticker: str, days: int = Query(default=90, le=365)):
|
||||
"""Backfill daily OHLCV bars from Polygon for the last N days.
|
||||
|
||||
Fetches daily aggregate bars from Polygon's range endpoint and inserts
|
||||
any missing bars into market_snapshots (deduped by bar timestamp).
|
||||
Returns the number of bars inserted.
|
||||
"""
|
||||
ticker = ticker.upper()
|
||||
api_key = config.market_data.api_key
|
||||
if not api_key:
|
||||
raise HTTPException(503, "No market data API key configured")
|
||||
|
||||
import hashlib
|
||||
from datetime import date, timedelta
|
||||
|
||||
import httpx
|
||||
|
||||
to_date = date.today().isoformat()
|
||||
from_date = (date.today() - timedelta(days=days)).isoformat()
|
||||
|
||||
url = (
|
||||
f"{config.market_data.base_url}/v2/aggs/ticker/{ticker}"
|
||||
f"/range/1/day/{from_date}/{to_date}"
|
||||
)
|
||||
params = {"apiKey": api_key, "adjusted": "true", "sort": "asc", "limit": "500"}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
resp = await client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
bars = data.get("results", [])
|
||||
if not bars:
|
||||
return {"ticker": ticker, "inserted": 0, "total_bars": 0}
|
||||
|
||||
# Find existing bar timestamps to avoid duplicates
|
||||
existing = await pool.fetch(
|
||||
"""SELECT DISTINCT (data->>'t')::bigint AS bar_ts
|
||||
FROM market_snapshots
|
||||
WHERE ticker = $1 AND snapshot_type = 'bar'""",
|
||||
ticker,
|
||||
)
|
||||
existing_ts = {r["bar_ts"] for r in existing if r["bar_ts"] is not None}
|
||||
|
||||
# Look up company_id (nullable)
|
||||
company_row = await pool.fetchrow(
|
||||
"SELECT id FROM companies WHERE ticker = $1", ticker,
|
||||
)
|
||||
company_id = company_row["id"] if company_row else None
|
||||
|
||||
inserted = 0
|
||||
for bar in bars:
|
||||
bar_ts = bar.get("t")
|
||||
if bar_ts is None or bar_ts in existing_ts:
|
||||
continue
|
||||
bar_json = json.dumps(bar)
|
||||
content_hash = hashlib.sha256(bar_json.encode()).hexdigest()
|
||||
captured_at = datetime.fromtimestamp(bar_ts / 1000, tz=timezone.utc)
|
||||
await pool.execute(
|
||||
"""INSERT INTO market_snapshots
|
||||
(company_id, ticker, snapshot_type, data, source_provider, captured_at, content_hash)
|
||||
VALUES ($1, $2, 'bar', $3::jsonb, 'polygon_backfill', $4, $5)""",
|
||||
company_id, ticker, bar_json, captured_at, content_hash,
|
||||
)
|
||||
existing_ts.add(bar_ts)
|
||||
inserted += 1
|
||||
|
||||
return {"ticker": ticker, "inserted": inserted, "total_bars": len(bars), "days": days}
|
||||
|
||||
|
||||
@app.post("/api/market/backfill-all")
|
||||
async def backfill_all_market_prices(days: int = Query(default=90, le=365)):
|
||||
"""Backfill daily bars for ALL active companies from Polygon.
|
||||
|
||||
Iterates through all active tickers and calls the per-ticker backfill.
|
||||
Returns a summary of results per ticker.
|
||||
"""
|
||||
api_key = config.market_data.api_key
|
||||
if not api_key:
|
||||
raise HTTPException(503, "No market data API key configured")
|
||||
|
||||
rows = await pool.fetch(
|
||||
"SELECT ticker FROM companies WHERE active = TRUE ORDER BY ticker",
|
||||
)
|
||||
results = []
|
||||
for row in rows:
|
||||
ticker = row["ticker"]
|
||||
try:
|
||||
result = await backfill_market_prices(ticker, days)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
logger.warning("Backfill failed for %s: %s", ticker, e)
|
||||
results.append({"ticker": ticker, "inserted": 0, "error": str(e)})
|
||||
|
||||
total_inserted = sum(r.get("inserted", 0) for r in results)
|
||||
return {"total_inserted": total_inserted, "tickers": len(results), "details": results}
|
||||
|
||||
|
||||
@app.get("/api/trends/{trend_id}")
|
||||
@@ -1061,7 +1184,12 @@ async def get_order(order_id: str):
|
||||
async def list_positions(
|
||||
ticker: Optional[str] = None,
|
||||
):
|
||||
"""List current positions."""
|
||||
"""List current positions with Polygon market prices overlaid.
|
||||
|
||||
The current_price from the broker (Alpaca paper) can be stale or
|
||||
inaccurate. We overlay the latest close from market_snapshots
|
||||
(Polygon daily bars) and recompute unrealized P&L from that.
|
||||
"""
|
||||
if ticker:
|
||||
rows = await pool.fetch(
|
||||
"""SELECT p.id, p.broker_account_id, p.ticker, p.quantity,
|
||||
@@ -1077,7 +1205,28 @@ async def list_positions(
|
||||
p.unrealized_pnl, p.realized_pnl, p.updated_at
|
||||
FROM positions p ORDER BY p.ticker""",
|
||||
)
|
||||
return [_row_to_dict(r) for r in rows]
|
||||
|
||||
# Enrich with latest Polygon close for comparison.
|
||||
# Use whichever price is more recent: broker sync or Polygon bar.
|
||||
tickers = list({r["ticker"] for r in rows})
|
||||
price_map: dict[str, float] = {}
|
||||
if tickers:
|
||||
price_rows = await pool.fetch(
|
||||
"""SELECT DISTINCT ON (ticker) ticker, (data->>'c')::float AS close
|
||||
FROM market_snapshots
|
||||
WHERE ticker = ANY($1) AND snapshot_type = 'bar'
|
||||
ORDER BY ticker, captured_at DESC""",
|
||||
tickers,
|
||||
)
|
||||
price_map = {r["ticker"]: r["close"] for r in price_rows if r["close"]}
|
||||
|
||||
results = []
|
||||
for r in rows:
|
||||
d = _row_to_dict(r)
|
||||
polygon_price = price_map.get(d["ticker"])
|
||||
d["polygon_price"] = polygon_price
|
||||
results.append(d)
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -3625,3 +3774,445 @@ async def get_variant_performance_history(
|
||||
agent_id, variant_id, hours,
|
||||
)
|
||||
return [_row_to_dict(r) for r in rows]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model Validation Dashboard (Requirements 12.1, 12.2, 12.3, 12.7)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_VALID_LOOKBACKS = {"7d", "30d", "90d", "all"}
|
||||
_VALID_HORIZONS = {"1h", "6h", "1d", "7d", "30d"}
|
||||
|
||||
|
||||
@app.get("/api/validation/summary")
|
||||
async def get_validation_summary(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Latest model metric snapshot plus quality gate status.
|
||||
|
||||
Returns the most recent model_metric_snapshot for the given
|
||||
lookback/horizon combination, along with the current gate status
|
||||
from risk_configs.
|
||||
|
||||
Requirement 12.1
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
# Latest metric snapshot for the requested lookback/horizon
|
||||
snapshot_row = await pool.fetchrow(
|
||||
"""SELECT id, generated_at, lookback_window, horizon,
|
||||
prediction_count, win_rate, directional_accuracy,
|
||||
information_coefficient, rank_information_coefficient,
|
||||
avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector,
|
||||
calibration_error, brier_score,
|
||||
buy_win_rate, sell_win_rate, hold_win_rate,
|
||||
metadata
|
||||
FROM model_metric_snapshots
|
||||
WHERE lookback_window = $1 AND horizon = $2
|
||||
ORDER BY generated_at DESC
|
||||
LIMIT 1""",
|
||||
lookback, horizon,
|
||||
)
|
||||
|
||||
snapshot = None
|
||||
if snapshot_row:
|
||||
snapshot = _row_to_dict(snapshot_row)
|
||||
snapshot["metadata"] = _parse_jsonb(snapshot.get("metadata"))
|
||||
|
||||
# Gate status from risk_configs
|
||||
gate_row = await pool.fetchrow(
|
||||
"SELECT config, updated_at FROM risk_configs WHERE name = 'model_quality_gate'",
|
||||
)
|
||||
gate_status = None
|
||||
if gate_row:
|
||||
gate_status = _parse_jsonb(gate_row["config"])
|
||||
|
||||
return {
|
||||
"snapshot": snapshot,
|
||||
"gate_status": gate_status,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/calibration")
|
||||
async def get_validation_calibration(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Calibration table with confidence buckets.
|
||||
|
||||
Queries v_prediction_performance for the given lookback/horizon,
|
||||
groups by confidence buckets, and computes avg_confidence,
|
||||
observed_win_rate, count, and miscalibrated flag per bucket.
|
||||
|
||||
Requirement 12.2
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
# Build lookback filter
|
||||
lookback_condition = ""
|
||||
params: list[Any] = [horizon]
|
||||
idx = 2
|
||||
|
||||
if lookback != "all":
|
||||
lookback_days = {"7d": 7, "30d": 30, "90d": 90}[lookback]
|
||||
lookback_condition = f"AND generated_at >= NOW() - make_interval(days => ${idx})"
|
||||
params.append(lookback_days)
|
||||
idx += 1
|
||||
|
||||
rows = await pool.fetch(
|
||||
f"""SELECT confidence, direction_correct
|
||||
FROM v_prediction_performance
|
||||
WHERE horizon = $1
|
||||
{lookback_condition}
|
||||
AND confidence IS NOT NULL""",
|
||||
*params,
|
||||
)
|
||||
|
||||
# Group into calibration buckets
|
||||
buckets_def = [
|
||||
(0.50, 0.60),
|
||||
(0.60, 0.70),
|
||||
(0.70, 0.80),
|
||||
(0.80, 0.90),
|
||||
(0.90, 1.00),
|
||||
]
|
||||
|
||||
buckets = []
|
||||
for low, high in buckets_def:
|
||||
bucket_rows = []
|
||||
for r in rows:
|
||||
conf = float(r["confidence"])
|
||||
if high == 1.00:
|
||||
in_bucket = low <= conf <= high
|
||||
else:
|
||||
in_bucket = low <= conf < high
|
||||
if in_bucket:
|
||||
bucket_rows.append(r)
|
||||
|
||||
count = len(bucket_rows)
|
||||
if count == 0:
|
||||
buckets.append({
|
||||
"bucket_low": low,
|
||||
"bucket_high": high,
|
||||
"avg_confidence": 0.0,
|
||||
"observed_win_rate": 0.0,
|
||||
"prediction_count": 0,
|
||||
"miscalibrated": False,
|
||||
})
|
||||
continue
|
||||
|
||||
avg_conf = sum(float(r["confidence"]) for r in bucket_rows) / count
|
||||
win_count = sum(1 for r in bucket_rows if r["direction_correct"] is True)
|
||||
win_rate = win_count / count
|
||||
diff = abs(avg_conf - win_rate)
|
||||
|
||||
buckets.append({
|
||||
"bucket_low": low,
|
||||
"bucket_high": high,
|
||||
"avg_confidence": round(avg_conf, 4),
|
||||
"observed_win_rate": round(win_rate, 4),
|
||||
"prediction_count": count,
|
||||
"miscalibrated": diff > 0.15,
|
||||
})
|
||||
|
||||
return {"buckets": buckets, "lookback": lookback, "horizon": horizon}
|
||||
|
||||
|
||||
@app.get("/api/validation/ic-by-horizon")
|
||||
async def get_validation_ic_by_horizon(
|
||||
lookback: str = Query(default="30d"),
|
||||
):
|
||||
"""IC and Rank IC per prediction horizon.
|
||||
|
||||
Queries the most recent model_metric_snapshot for the given lookback
|
||||
across all 5 horizons, returning IC and Rank IC for each.
|
||||
|
||||
Requirement 12.3
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
|
||||
rows = await pool.fetch(
|
||||
"""SELECT DISTINCT ON (horizon)
|
||||
horizon,
|
||||
information_coefficient,
|
||||
rank_information_coefficient,
|
||||
prediction_count,
|
||||
generated_at
|
||||
FROM model_metric_snapshots
|
||||
WHERE lookback_window = $1
|
||||
ORDER BY horizon, generated_at DESC""",
|
||||
lookback,
|
||||
)
|
||||
|
||||
horizons = []
|
||||
for r in rows:
|
||||
horizons.append({
|
||||
"horizon": r["horizon"],
|
||||
"information_coefficient": float(r["information_coefficient"]) if r["information_coefficient"] is not None else None,
|
||||
"rank_information_coefficient": float(r["rank_information_coefficient"]) if r["rank_information_coefficient"] is not None else None,
|
||||
"prediction_count": r["prediction_count"],
|
||||
"generated_at": r["generated_at"].isoformat() if r["generated_at"] else None,
|
||||
})
|
||||
|
||||
# Sort by canonical horizon order
|
||||
horizon_order = {"1h": 0, "6h": 1, "1d": 2, "7d": 3, "30d": 4}
|
||||
horizons.sort(key=lambda h: horizon_order.get(h["horizon"], 99))
|
||||
|
||||
return {"horizons": horizons, "lookback": lookback}
|
||||
|
||||
|
||||
@app.get("/api/validation/gate-status")
|
||||
async def get_validation_gate_status():
|
||||
"""Quality gate evaluation detail.
|
||||
|
||||
Returns the stored gate evaluation result from risk_configs
|
||||
where key = 'model_quality_gate'.
|
||||
|
||||
Requirement 12.7
|
||||
"""
|
||||
gate_row = await pool.fetchrow(
|
||||
"SELECT config, updated_at FROM risk_configs WHERE name = 'model_quality_gate'",
|
||||
)
|
||||
|
||||
if not gate_row:
|
||||
return {
|
||||
"gate_status": None,
|
||||
"message": "No gate evaluation found. Model metrics may not have been computed yet.",
|
||||
}
|
||||
|
||||
gate_data = _parse_jsonb(gate_row["config"])
|
||||
updated_at = gate_row["updated_at"].isoformat() if gate_row.get("updated_at") else None
|
||||
|
||||
return {
|
||||
"gate_status": gate_data,
|
||||
"updated_at": updated_at,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Attribution Endpoints (Requirements 12.4, 12.5, 12.6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_LOOKBACK_TO_DAYS: dict[str, int] = {
|
||||
"7d": 7,
|
||||
"30d": 30,
|
||||
"90d": 90,
|
||||
"all": 3650,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/attribution/sources")
|
||||
async def get_validation_attribution_sources(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Per-source performance metrics.
|
||||
|
||||
Returns win rate, IC, average return, duplicate rate, and other
|
||||
attribution metrics for each source, computed over the given
|
||||
lookback window and prediction horizon.
|
||||
|
||||
Requirement 12.4
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
lookback_days = _LOOKBACK_TO_DAYS[lookback]
|
||||
|
||||
try:
|
||||
results = await compute_source_attribution(pool, lookback_days=lookback_days, horizon=horizon)
|
||||
except Exception:
|
||||
logger.exception("Failed to compute source attribution")
|
||||
raise HTTPException(500, "Failed to compute source attribution")
|
||||
|
||||
return {
|
||||
"sources": [asdict(r) for r in results],
|
||||
"lookback": lookback,
|
||||
"horizon": horizon,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/attribution/catalysts")
|
||||
async def get_validation_attribution_catalysts(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Per-catalyst-type performance metrics.
|
||||
|
||||
Returns win rate, IC, average return, and other attribution metrics
|
||||
for each catalyst type, computed over the given lookback window
|
||||
and prediction horizon.
|
||||
|
||||
Requirement 12.5
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
lookback_days = _LOOKBACK_TO_DAYS[lookback]
|
||||
|
||||
try:
|
||||
results = await compute_catalyst_attribution(pool, lookback_days=lookback_days, horizon=horizon)
|
||||
except Exception:
|
||||
logger.exception("Failed to compute catalyst attribution")
|
||||
raise HTTPException(500, "Failed to compute catalyst attribution")
|
||||
|
||||
return {
|
||||
"catalysts": [asdict(r) for r in results],
|
||||
"lookback": lookback,
|
||||
"horizon": horizon,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/attribution/layers")
|
||||
async def get_validation_attribution_layers(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Per-signal-layer (company, macro, competitive) performance metrics.
|
||||
|
||||
Returns average contribution percentage, dominant win rate, and
|
||||
dominant IC for each of the three signal layers, computed over
|
||||
the given lookback window and prediction horizon.
|
||||
|
||||
Requirement 12.6
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
lookback_days = _LOOKBACK_TO_DAYS[lookback]
|
||||
|
||||
try:
|
||||
results = await compute_layer_attribution(pool, lookback_days=lookback_days, horizon=horizon)
|
||||
except Exception:
|
||||
logger.exception("Failed to compute layer attribution")
|
||||
raise HTTPException(500, "Failed to compute layer attribution")
|
||||
|
||||
return {
|
||||
"layers": [asdict(r) for r in results],
|
||||
"lookback": lookback,
|
||||
"horizon": horizon,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Trading Reports
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.get("/api/reports")
|
||||
async def list_reports(
|
||||
report_type: Optional[str] = None,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
limit: int = Query(default=20, le=100),
|
||||
offset: int = Query(default=0, ge=0),
|
||||
):
|
||||
"""Paginated list of trading reports with optional filtering.
|
||||
|
||||
Query params:
|
||||
- report_type: 'daily' or 'weekly'
|
||||
- start_date: ISO date (YYYY-MM-DD) — filter period_start >= this
|
||||
- end_date: ISO date (YYYY-MM-DD) — filter period_end <= this
|
||||
- limit: max results (default 20, max 100)
|
||||
- offset: pagination offset (default 0)
|
||||
|
||||
Requirements: 5.4, 5.5, 5.6
|
||||
"""
|
||||
conditions: list[str] = []
|
||||
params: list[Any] = []
|
||||
idx = 1
|
||||
|
||||
if report_type:
|
||||
if report_type not in ("daily", "weekly"):
|
||||
raise HTTPException(400, "report_type must be 'daily' or 'weekly'")
|
||||
conditions.append(f"report_type = ${idx}")
|
||||
params.append(report_type)
|
||||
idx += 1
|
||||
|
||||
if start_date:
|
||||
try:
|
||||
from datetime import date as _date
|
||||
_date.fromisoformat(start_date)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "start_date must be YYYY-MM-DD")
|
||||
conditions.append(f"period_start >= ${idx}::date")
|
||||
params.append(start_date)
|
||||
idx += 1
|
||||
|
||||
if end_date:
|
||||
try:
|
||||
from datetime import date as _date
|
||||
_date.fromisoformat(end_date)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "end_date must be YYYY-MM-DD")
|
||||
conditions.append(f"period_end <= ${idx}::date")
|
||||
params.append(end_date)
|
||||
idx += 1
|
||||
|
||||
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||
|
||||
query = f"""
|
||||
SELECT id, report_type, period_start, period_end,
|
||||
validation_status, generated_at
|
||||
FROM trading_reports
|
||||
{where}
|
||||
ORDER BY generated_at DESC
|
||||
LIMIT ${idx} OFFSET ${idx + 1}
|
||||
"""
|
||||
params.extend([limit, offset])
|
||||
|
||||
rows = await pool.fetch(query, *params)
|
||||
return [
|
||||
{
|
||||
"id": str(r["id"]),
|
||||
"report_type": r["report_type"],
|
||||
"period_start": r["period_start"].isoformat(),
|
||||
"period_end": r["period_end"].isoformat(),
|
||||
"validation_status": r["validation_status"],
|
||||
"generated_at": r["generated_at"].isoformat(),
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
@app.get("/api/reports/{report_id}")
|
||||
async def get_report(report_id: str):
|
||||
"""Fetch a single report including full report_data JSONB.
|
||||
|
||||
Requirements: 5.4, 5.5
|
||||
"""
|
||||
row = await pool.fetchrow(
|
||||
"""SELECT id, report_type, period_start, period_end,
|
||||
report_data, validation_status, generated_at, created_at
|
||||
FROM trading_reports
|
||||
WHERE id = $1::uuid""",
|
||||
report_id,
|
||||
)
|
||||
if row is None:
|
||||
raise HTTPException(404, "Report not found")
|
||||
|
||||
return {
|
||||
"id": str(row["id"]),
|
||||
"report_type": row["report_type"],
|
||||
"period_start": row["period_start"].isoformat(),
|
||||
"period_end": row["period_end"].isoformat(),
|
||||
"report_data": json.loads(row["report_data"]) if isinstance(row["report_data"], str) else row["report_data"],
|
||||
"validation_status": row["validation_status"],
|
||||
"generated_at": row["generated_at"].isoformat(),
|
||||
"created_at": row["created_at"].isoformat(),
|
||||
}
|
||||
|
||||
@@ -48,6 +48,7 @@ from services.shared.schemas import (
|
||||
TrendSummary,
|
||||
TrendWindow,
|
||||
)
|
||||
from services.validation.prediction_snapshot import create_prediction_snapshot
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -741,6 +742,92 @@ def _map_time_horizon_prefix(window: str) -> str:
|
||||
return mapping.get(window, "window_")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch evidence signals and docs for prediction snapshot (Requirement 1.1)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_EVIDENCE_SIGNALS_QUERY = """
|
||||
SELECT
|
||||
di.document_id::text AS document_id,
|
||||
di.id::text AS signal_id,
|
||||
dir.ticker,
|
||||
d.source_type AS source,
|
||||
d.source_type,
|
||||
dir.catalyst_type,
|
||||
dir.sentiment,
|
||||
dir.impact_score AS impact,
|
||||
di.confidence AS extraction_confidence,
|
||||
di.source_credibility AS weight
|
||||
FROM document_impact_records dir
|
||||
JOIN document_intelligence di ON di.id = dir.intelligence_id
|
||||
JOIN documents d ON d.id = di.document_id
|
||||
WHERE di.document_id = ANY($1::uuid[])
|
||||
AND di.validation_status = 'valid'
|
||||
"""
|
||||
|
||||
_EVIDENCE_DOCS_QUERY = """
|
||||
SELECT
|
||||
d.id::text AS document_id,
|
||||
COALESCE(d.title, '') AS title,
|
||||
COALESCE(d.url, '') AS url
|
||||
FROM documents d
|
||||
WHERE d.id = ANY($1::uuid[])
|
||||
"""
|
||||
|
||||
|
||||
async def _fetch_evidence_for_snapshot(
|
||||
pool: asyncpg.Pool,
|
||||
document_ids: list[str],
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""Fetch evidence signals and document metadata for prediction snapshot.
|
||||
|
||||
Filters out non-UUID document IDs (e.g. synthetic pattern IDs) since
|
||||
they cannot be looked up in the documents table.
|
||||
|
||||
Returns (evidence_signals, evidence_docs).
|
||||
"""
|
||||
# Filter to valid UUIDs only
|
||||
valid_ids: list[str] = []
|
||||
for doc_id in document_ids:
|
||||
try:
|
||||
_uuid.UUID(doc_id)
|
||||
valid_ids.append(doc_id)
|
||||
except (ValueError, AttributeError):
|
||||
continue
|
||||
|
||||
if not valid_ids:
|
||||
return [], []
|
||||
|
||||
signal_rows = await pool.fetch(_EVIDENCE_SIGNALS_QUERY, valid_ids)
|
||||
evidence_signals = [
|
||||
{
|
||||
"document_id": row["document_id"],
|
||||
"signal_id": row["signal_id"],
|
||||
"ticker": row["ticker"] or "",
|
||||
"source": row["source"] or "",
|
||||
"source_type": row["source_type"] or "",
|
||||
"catalyst_type": row["catalyst_type"] or "",
|
||||
"sentiment": row["sentiment"] or "",
|
||||
"impact": float(row["impact"] or 0.0),
|
||||
"extraction_confidence": float(row["extraction_confidence"] or 0.0),
|
||||
"weight": float(row["weight"] or 0.0),
|
||||
}
|
||||
for row in signal_rows
|
||||
]
|
||||
|
||||
doc_rows = await pool.fetch(_EVIDENCE_DOCS_QUERY, valid_ids)
|
||||
evidence_docs = [
|
||||
{
|
||||
"document_id": row["document_id"],
|
||||
"title": row["title"],
|
||||
"url": row["url"],
|
||||
}
|
||||
for row in doc_rows
|
||||
]
|
||||
|
||||
return evidence_signals, evidence_docs
|
||||
|
||||
|
||||
async def generate_recommendation(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
@@ -847,6 +934,22 @@ async def generate_recommendation(
|
||||
eligibility_result=result,
|
||||
)
|
||||
|
||||
# 7b. Capture prediction snapshot for model validation (Requirements 1.1, 1.6)
|
||||
try:
|
||||
all_doc_ids = list(summary.top_supporting_evidence) + list(summary.top_opposing_evidence)
|
||||
evidence_signals, evidence_docs = await _fetch_evidence_for_snapshot(
|
||||
pool, all_doc_ids,
|
||||
)
|
||||
await create_prediction_snapshot(
|
||||
pool, rec, summary, evidence_signals, evidence_docs,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to create prediction snapshot for %s/%s — recommendation "
|
||||
"persisted but snapshot creation failed",
|
||||
ticker, rec_id, exc_info=True,
|
||||
)
|
||||
|
||||
# 8. Publish prediction facts to analytical tables (Requirement 9.4)
|
||||
if minio_client is not None:
|
||||
try:
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
"""Data collector for trading performance reports.
|
||||
|
||||
Queries all relevant trading data for a reporting period and returns
|
||||
a CollectedData bundle for downstream section builders.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date
|
||||
from typing import Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CollectedData:
|
||||
"""Raw data collected for a reporting period."""
|
||||
|
||||
trading_decisions: list[dict] = field(default_factory=list)
|
||||
orders: list[dict] = field(default_factory=list)
|
||||
open_positions: list[dict] = field(default_factory=list)
|
||||
closed_positions: list[dict] = field(default_factory=list)
|
||||
portfolio_snapshot: dict | None = None
|
||||
previous_portfolio_snapshot: dict | None = None
|
||||
recommendations: list[dict] = field(default_factory=list)
|
||||
prediction_outcomes: list[dict] = field(default_factory=list)
|
||||
model_metric_snapshots: list[dict] = field(default_factory=list)
|
||||
circuit_breaker_events: list[dict] = field(default_factory=list)
|
||||
reserve_pool_balance: float = 0.0
|
||||
|
||||
|
||||
def _row_dict(row: asyncpg.Record) -> dict[str, Any]:
|
||||
"""Convert asyncpg Record to dict with UUID→str coercion."""
|
||||
d = dict(row)
|
||||
for k, v in d.items():
|
||||
if isinstance(v, uuid.UUID):
|
||||
d[k] = str(v)
|
||||
return d
|
||||
|
||||
|
||||
async def collect_report_data(
|
||||
pool: asyncpg.Pool,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> CollectedData:
|
||||
"""Query all trading data for the reporting period.
|
||||
|
||||
Queries: trading_decisions, orders, positions, portfolio_snapshots,
|
||||
recommendations, prediction_outcomes, model_metric_snapshots,
|
||||
circuit_breaker_events, reserve_pool_ledger.
|
||||
|
||||
Returns CollectedData with all raw query results.
|
||||
If no trading_decisions exist, returns empty lists (zero-activity).
|
||||
"""
|
||||
async with pool.acquire() as conn:
|
||||
trading_decisions = await _fetch_trading_decisions(conn, period_start, period_end)
|
||||
orders = await _fetch_orders(conn, period_start, period_end)
|
||||
open_positions = await _fetch_open_positions(conn)
|
||||
closed_positions = await _fetch_closed_positions(conn, period_start, period_end)
|
||||
portfolio_snapshot = await _fetch_portfolio_snapshot(conn, period_start, period_end)
|
||||
previous_portfolio_snapshot = await _fetch_previous_portfolio_snapshot(conn, period_start)
|
||||
recommendations = await _fetch_recommendations(conn, period_start, period_end)
|
||||
prediction_outcomes = await _fetch_prediction_outcomes(conn, period_start, period_end)
|
||||
model_metric_snapshots = await _fetch_model_metric_snapshots(conn, period_start, period_end)
|
||||
circuit_breaker_events = await _fetch_circuit_breaker_events(conn, period_start, period_end)
|
||||
reserve_pool_balance = await _fetch_reserve_pool_balance(conn)
|
||||
|
||||
return CollectedData(
|
||||
trading_decisions=trading_decisions,
|
||||
orders=orders,
|
||||
open_positions=open_positions,
|
||||
closed_positions=closed_positions,
|
||||
portfolio_snapshot=portfolio_snapshot,
|
||||
previous_portfolio_snapshot=previous_portfolio_snapshot,
|
||||
recommendations=recommendations,
|
||||
prediction_outcomes=prediction_outcomes,
|
||||
model_metric_snapshots=model_metric_snapshots,
|
||||
circuit_breaker_events=circuit_breaker_events,
|
||||
reserve_pool_balance=reserve_pool_balance,
|
||||
)
|
||||
|
||||
|
||||
async def _fetch_trading_decisions(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch trading decisions created within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, recommendation_id, decision, skip_reason, ticker,
|
||||
computed_position_size, computed_share_quantity,
|
||||
risk_tier_at_decision, portfolio_heat_at_decision,
|
||||
active_pool_at_decision, reserve_pool_at_decision,
|
||||
circuit_breaker_status, correlation_check_result,
|
||||
sector_exposure_check_result, earnings_proximity_flag,
|
||||
is_micro_trade, decision_trace, created_at
|
||||
FROM trading_decisions
|
||||
WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY created_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_orders(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch orders created within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, recommendation_id, broker_account_id, ticker, side,
|
||||
order_type, quantity, limit_price, stop_price, status,
|
||||
broker_order_id, fill_price, fill_quantity,
|
||||
submitted_at, filled_at, cancelled_at, rejected_at,
|
||||
rejection_reason, created_at
|
||||
FROM orders
|
||||
WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY created_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_open_positions(conn: asyncpg.Connection) -> list[dict]:
|
||||
"""Fetch currently open positions (quantity > 0)."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, broker_account_id, ticker, quantity,
|
||||
avg_entry_price, current_price,
|
||||
unrealized_pnl, realized_pnl, updated_at
|
||||
FROM positions
|
||||
WHERE quantity > 0
|
||||
ORDER BY ticker""",
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_closed_positions(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch positions closed during the period (quantity = 0, updated within period)."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, broker_account_id, ticker, quantity,
|
||||
avg_entry_price, current_price,
|
||||
unrealized_pnl, realized_pnl, updated_at
|
||||
FROM positions
|
||||
WHERE quantity = 0
|
||||
AND updated_at >= $1::date
|
||||
AND updated_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY updated_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_portfolio_snapshot(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> dict | None:
|
||||
"""Fetch the most recent portfolio snapshot within the period."""
|
||||
row = await conn.fetchrow(
|
||||
"""SELECT id, snapshot_date, portfolio_value, active_pool, reserve_pool,
|
||||
daily_return, cumulative_return, unrealized_pnl, realized_pnl,
|
||||
win_count, loss_count, win_rate, sharpe_ratio,
|
||||
max_drawdown, current_drawdown_pct, portfolio_heat,
|
||||
risk_tier, positions, metrics, created_at
|
||||
FROM portfolio_snapshots
|
||||
WHERE snapshot_date >= $1 AND snapshot_date <= $2
|
||||
ORDER BY snapshot_date DESC
|
||||
LIMIT 1""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return _row_dict(row) if row else None
|
||||
|
||||
|
||||
async def _fetch_previous_portfolio_snapshot(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
) -> dict | None:
|
||||
"""Fetch the most recent portfolio snapshot before the period start."""
|
||||
row = await conn.fetchrow(
|
||||
"""SELECT id, snapshot_date, portfolio_value, active_pool, reserve_pool,
|
||||
daily_return, cumulative_return, unrealized_pnl, realized_pnl,
|
||||
win_count, loss_count, win_rate, sharpe_ratio,
|
||||
max_drawdown, current_drawdown_pct, portfolio_heat,
|
||||
risk_tier, positions, metrics, created_at
|
||||
FROM portfolio_snapshots
|
||||
WHERE snapshot_date < $1
|
||||
ORDER BY snapshot_date DESC
|
||||
LIMIT 1""",
|
||||
period_start,
|
||||
)
|
||||
return _row_dict(row) if row else None
|
||||
|
||||
|
||||
async def _fetch_recommendations(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch recommendations created within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, ticker, company_id, action, mode, confidence,
|
||||
time_horizon, thesis, portfolio_pct, max_loss_pct,
|
||||
model_version, generated_at, created_at
|
||||
FROM recommendations
|
||||
WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY created_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_prediction_outcomes(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch prediction outcomes evaluated within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT po.id, po.prediction_id, po.evaluated_at, po.horizon,
|
||||
po.future_price, po.future_return,
|
||||
po.spy_future_price, po.spy_return,
|
||||
po.sector_etf_future_price, po.sector_etf_return,
|
||||
po.excess_return_vs_spy, po.excess_return_vs_sector,
|
||||
po.direction_correct, po.profitable,
|
||||
ps.ticker, ps.direction, ps.action, ps.confidence
|
||||
FROM prediction_outcomes po
|
||||
JOIN prediction_snapshots ps ON ps.id = po.prediction_id
|
||||
WHERE po.evaluated_at >= $1::date
|
||||
AND po.evaluated_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY po.evaluated_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_model_metric_snapshots(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch model metric snapshots generated within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, generated_at, lookback_window, horizon,
|
||||
prediction_count, win_rate, directional_accuracy,
|
||||
information_coefficient, rank_information_coefficient,
|
||||
avg_return, avg_excess_return_vs_spy,
|
||||
avg_excess_return_vs_sector,
|
||||
calibration_error, brier_score,
|
||||
buy_win_rate, sell_win_rate, hold_win_rate,
|
||||
created_at
|
||||
FROM model_metric_snapshots
|
||||
WHERE generated_at >= $1::date
|
||||
AND generated_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY generated_at DESC""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_circuit_breaker_events(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch circuit breaker events from trading decisions within the period.
|
||||
|
||||
Circuit breaker events are trading decisions where
|
||||
circuit_breaker_status is not 'clear' (i.e. a breaker was active).
|
||||
"""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, recommendation_id, decision, ticker,
|
||||
circuit_breaker_status, decision_trace, created_at
|
||||
FROM trading_decisions
|
||||
WHERE circuit_breaker_status != 'clear'
|
||||
AND created_at >= $1::date
|
||||
AND created_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY created_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_reserve_pool_balance(conn: asyncpg.Connection) -> float:
|
||||
"""Fetch the latest reserve pool balance."""
|
||||
row = await conn.fetchrow(
|
||||
"SELECT balance_after FROM reserve_pool_ledger ORDER BY created_at DESC LIMIT 1",
|
||||
)
|
||||
return float(row["balance_after"]) if row else 0.0
|
||||
@@ -0,0 +1,279 @@
|
||||
"""Report generator — orchestrates collection, building, validation, summarization, and storage.
|
||||
|
||||
Provides three public functions:
|
||||
- generate_report: full pipeline from data collection to assembled ReportData
|
||||
- store_report: upsert into trading_reports table
|
||||
- process_report_job: Redis queue job handler with retry and dedup
|
||||
|
||||
Requirements: 5.1, 5.2, 5.3, 6.3, 6.4, 6.5
|
||||
Design: Report Generator
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import date, datetime, timezone
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.reporting.collector import collect_report_data
|
||||
from services.reporting.models import ReportData, ReportType
|
||||
from services.reporting.sections import (
|
||||
build_model_quality_section,
|
||||
build_pnl_section,
|
||||
build_position_performance_section,
|
||||
build_recommendation_accuracy_section,
|
||||
build_risk_metrics_section,
|
||||
)
|
||||
from services.reporting.summarizer import (
|
||||
generate_executive_summary,
|
||||
summarize_section,
|
||||
)
|
||||
from services.reporting.validator import (
|
||||
compute_validation_status,
|
||||
validate_model_quality,
|
||||
validate_recommendation_accuracy,
|
||||
)
|
||||
from services.shared.agent_config import AgentConfigResolver
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Retry configuration for process_report_job
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_MAX_RETRIES = 3
|
||||
_BACKOFF_SECONDS = (30, 60, 120)
|
||||
|
||||
# In-memory set tracking in-progress jobs to reject duplicates.
|
||||
# Key format: "{report_type}:{period_start}:{period_end}"
|
||||
_in_progress_jobs: set[str] = set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# generate_report
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def generate_report(
|
||||
pool: asyncpg.Pool,
|
||||
report_type: ReportType,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> ReportData:
|
||||
"""Orchestrate full report generation.
|
||||
|
||||
1. Collect data via collector
|
||||
2. Build all 5 sections via section builders
|
||||
3. Validate recommendation_accuracy and model_quality via validator
|
||||
4. Create AgentConfigResolver and summarize each section
|
||||
5. Generate executive summary
|
||||
6. Assemble final ReportData
|
||||
"""
|
||||
# 1. Collect data
|
||||
data = await collect_report_data(pool, period_start, period_end)
|
||||
|
||||
# 2. Build sections
|
||||
pnl = build_pnl_section(data)
|
||||
rec_accuracy = build_recommendation_accuracy_section(data)
|
||||
position_perf = build_position_performance_section(data)
|
||||
risk_metrics = build_risk_metrics_section(data)
|
||||
model_quality = build_model_quality_section(data)
|
||||
|
||||
# 3. Validate
|
||||
rec_warnings = validate_recommendation_accuracy(
|
||||
rec_accuracy, data.prediction_outcomes,
|
||||
)
|
||||
rec_accuracy.validation_warnings = rec_warnings
|
||||
|
||||
mq_warnings = validate_model_quality(
|
||||
model_quality, data.model_metric_snapshots,
|
||||
)
|
||||
model_quality.validation_warnings = mq_warnings
|
||||
|
||||
# 4. Summarize each section
|
||||
resolver = AgentConfigResolver(pool)
|
||||
|
||||
pnl.summary = await summarize_section(
|
||||
pool, resolver, "pnl", pnl.model_dump(),
|
||||
)
|
||||
rec_accuracy.summary = await summarize_section(
|
||||
pool, resolver, "recommendation_accuracy", rec_accuracy.model_dump(),
|
||||
)
|
||||
position_perf.summary = await summarize_section(
|
||||
pool, resolver, "position_performance", position_perf.model_dump(),
|
||||
)
|
||||
risk_metrics.summary = await summarize_section(
|
||||
pool, resolver, "risk_metrics", risk_metrics.model_dump(),
|
||||
)
|
||||
model_quality.summary = await summarize_section(
|
||||
pool, resolver, "model_quality", model_quality.model_dump(),
|
||||
)
|
||||
|
||||
# 5. Generate executive summary
|
||||
section_summaries = {
|
||||
"pnl": pnl.summary,
|
||||
"recommendation_accuracy": rec_accuracy.summary,
|
||||
"position_performance": position_perf.summary,
|
||||
"risk_metrics": risk_metrics.summary,
|
||||
"model_quality": model_quality.summary,
|
||||
}
|
||||
executive_summary = await generate_executive_summary(
|
||||
pool, resolver, section_summaries,
|
||||
)
|
||||
|
||||
# 6. Assemble ReportData
|
||||
report = ReportData(
|
||||
pnl=pnl,
|
||||
recommendation_accuracy=rec_accuracy,
|
||||
position_performance=position_perf,
|
||||
risk_metrics=risk_metrics,
|
||||
model_quality=model_quality,
|
||||
executive_summary=executive_summary,
|
||||
generated_at=datetime.now(timezone.utc),
|
||||
period_start=period_start,
|
||||
period_end=period_end,
|
||||
report_type=ReportType(report_type),
|
||||
)
|
||||
|
||||
# Set validation status based on all warnings
|
||||
report.validation_status = compute_validation_status(report)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# store_report
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_UPSERT_SQL = """\
|
||||
INSERT INTO trading_reports
|
||||
(report_type, period_start, period_end, report_data, validation_status, generated_at)
|
||||
VALUES
|
||||
($1, $2, $3, $4::jsonb, $5, $6)
|
||||
ON CONFLICT (report_type, period_start, period_end)
|
||||
DO UPDATE SET
|
||||
report_data = EXCLUDED.report_data,
|
||||
validation_status = EXCLUDED.validation_status,
|
||||
generated_at = EXCLUDED.generated_at
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
|
||||
async def store_report(
|
||||
pool: asyncpg.Pool,
|
||||
report: ReportData,
|
||||
) -> str:
|
||||
"""Store report in trading_reports table via upsert.
|
||||
|
||||
Uses INSERT ... ON CONFLICT (report_type, period_start, period_end)
|
||||
DO UPDATE to handle regeneration of existing reports.
|
||||
|
||||
Returns the report UUID as a string.
|
||||
"""
|
||||
row = await pool.fetchrow(
|
||||
_UPSERT_SQL,
|
||||
report.report_type.value,
|
||||
report.period_start,
|
||||
report.period_end,
|
||||
report.model_dump_json(),
|
||||
report.validation_status.value,
|
||||
report.generated_at,
|
||||
)
|
||||
report_id = str(row["id"]) # type: ignore[index]
|
||||
logger.info(
|
||||
"Stored report %s (type=%s, period=%s to %s)",
|
||||
report_id,
|
||||
report.report_type.value,
|
||||
report.period_start,
|
||||
report.period_end,
|
||||
)
|
||||
return report_id
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# process_report_job
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _job_key(report_type: str, period_start: str, period_end: str) -> str:
|
||||
"""Build a dedup key for an in-progress job."""
|
||||
return f"{report_type}:{period_start}:{period_end}"
|
||||
|
||||
|
||||
async def process_report_job(
|
||||
pool: asyncpg.Pool,
|
||||
job: dict,
|
||||
) -> None:
|
||||
"""Process a report generation job from the Redis queue.
|
||||
|
||||
Deserializes job payload, calls generate_report + store_report.
|
||||
Handles retries with exponential backoff (30s, 60s, 120s up to 3 attempts).
|
||||
Rejects duplicate jobs for the same report_type + period.
|
||||
|
||||
Expected job payload::
|
||||
|
||||
{
|
||||
"report_type": "daily" | "weekly",
|
||||
"period_start": "YYYY-MM-DD",
|
||||
"period_end": "YYYY-MM-DD"
|
||||
}
|
||||
"""
|
||||
report_type_str = job.get("report_type", "")
|
||||
period_start_str = job.get("period_start", "")
|
||||
period_end_str = job.get("period_end", "")
|
||||
|
||||
# Validate payload
|
||||
try:
|
||||
report_type = ReportType(report_type_str)
|
||||
period_start = date.fromisoformat(period_start_str)
|
||||
period_end = date.fromisoformat(period_end_str)
|
||||
except (ValueError, TypeError) as exc:
|
||||
logger.error("Invalid report job payload: %s — %s", job, exc)
|
||||
return
|
||||
|
||||
# Reject duplicate in-progress jobs
|
||||
key = _job_key(report_type_str, period_start_str, period_end_str)
|
||||
if key in _in_progress_jobs:
|
||||
logger.warning(
|
||||
"Duplicate report job rejected (already in progress): %s", key,
|
||||
)
|
||||
return
|
||||
|
||||
_in_progress_jobs.add(key)
|
||||
try:
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(_MAX_RETRIES):
|
||||
try:
|
||||
report = await generate_report(
|
||||
pool, report_type, period_start, period_end,
|
||||
)
|
||||
await store_report(pool, report)
|
||||
logger.info(
|
||||
"Report job completed: %s (attempt %d)", key, attempt + 1,
|
||||
)
|
||||
return
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
if attempt < _MAX_RETRIES - 1:
|
||||
backoff = _BACKOFF_SECONDS[attempt]
|
||||
logger.warning(
|
||||
"Report job %s failed (attempt %d/%d): %s — retrying in %ds",
|
||||
key,
|
||||
attempt + 1,
|
||||
_MAX_RETRIES,
|
||||
exc,
|
||||
backoff,
|
||||
)
|
||||
await asyncio.sleep(backoff)
|
||||
|
||||
# All retries exhausted
|
||||
logger.error(
|
||||
"Report job %s failed after %d attempts: %s",
|
||||
key,
|
||||
_MAX_RETRIES,
|
||||
last_error,
|
||||
)
|
||||
finally:
|
||||
_in_progress_jobs.discard(key)
|
||||
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ReportType(str, Enum):
|
||||
DAILY = "daily"
|
||||
WEEKLY = "weekly"
|
||||
|
||||
|
||||
class ValidationStatus(str, Enum):
|
||||
PASSED = "passed"
|
||||
WARNINGS = "warnings"
|
||||
|
||||
|
||||
class ValidationWarning(BaseModel):
|
||||
field_name: str
|
||||
computed_value: float
|
||||
snapshot_value: float
|
||||
pct_difference: float
|
||||
|
||||
|
||||
class PLSection(BaseModel):
|
||||
realized_pnl: float
|
||||
unrealized_pnl: float
|
||||
daily_return: float
|
||||
cumulative_return: float
|
||||
win_count: int
|
||||
loss_count: int
|
||||
win_rate: float
|
||||
profit_factor: float
|
||||
sharpe_ratio: float
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class RecommendationAccuracySection(BaseModel):
|
||||
total_evaluated: int
|
||||
act_count: int
|
||||
skip_count: int
|
||||
acted_win_rate: float
|
||||
avg_confidence_acted: float
|
||||
avg_confidence_skipped: float
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class PositionDetail(BaseModel):
|
||||
ticker: str
|
||||
entry_price: float
|
||||
current_or_exit_price: float
|
||||
pnl: float
|
||||
pnl_pct: float
|
||||
hold_duration_hours: float
|
||||
status: str # "open" or "closed"
|
||||
|
||||
|
||||
class PositionPerformanceSection(BaseModel):
|
||||
positions: list[PositionDetail] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
|
||||
|
||||
class RiskMetricsSection(BaseModel):
|
||||
current_risk_tier: str
|
||||
portfolio_heat: float
|
||||
max_drawdown: float
|
||||
current_drawdown_pct: float
|
||||
reserve_pool_balance: float
|
||||
circuit_breaker_event_count: int
|
||||
summary: str = ""
|
||||
|
||||
|
||||
class ModelQualityWindow(BaseModel):
|
||||
lookback: str
|
||||
win_rate: float | None
|
||||
directional_accuracy: float | None
|
||||
information_coefficient: float | None
|
||||
calibration_error: float | None
|
||||
brier_score: float | None
|
||||
|
||||
|
||||
class ModelQualitySection(BaseModel):
|
||||
windows: list[ModelQualityWindow] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ReportData(BaseModel):
|
||||
"""Top-level report structure stored as JSONB."""
|
||||
|
||||
pnl: PLSection
|
||||
recommendation_accuracy: RecommendationAccuracySection
|
||||
position_performance: PositionPerformanceSection
|
||||
risk_metrics: RiskMetricsSection
|
||||
model_quality: ModelQualitySection
|
||||
executive_summary: str = ""
|
||||
validation_status: ValidationStatus = ValidationStatus.PASSED
|
||||
generated_at: datetime
|
||||
period_start: date
|
||||
period_end: date
|
||||
report_type: ReportType
|
||||
@@ -0,0 +1,370 @@
|
||||
"""Section builders for trading performance reports.
|
||||
|
||||
Each builder takes a CollectedData bundle and returns a typed Pydantic
|
||||
section model. All builders handle zero-activity gracefully by returning
|
||||
zero values and empty lists when no data is available.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from services.reporting.collector import CollectedData
|
||||
from services.reporting.models import (
|
||||
ModelQualitySection,
|
||||
ModelQualityWindow,
|
||||
PLSection,
|
||||
PositionDetail,
|
||||
PositionPerformanceSection,
|
||||
RecommendationAccuracySection,
|
||||
RiskMetricsSection,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_pnl_section(data: CollectedData) -> PLSection:
|
||||
"""Build P&L section from collected data.
|
||||
|
||||
Computes realized/unrealized P&L, daily return, cumulative return,
|
||||
win/loss counts, win rate, profit factor, and Sharpe ratio from
|
||||
portfolio_snapshot and closed positions.
|
||||
"""
|
||||
snap = data.portfolio_snapshot
|
||||
|
||||
if snap is None:
|
||||
return PLSection(
|
||||
realized_pnl=0.0,
|
||||
unrealized_pnl=0.0,
|
||||
daily_return=0.0,
|
||||
cumulative_return=0.0,
|
||||
win_count=0,
|
||||
loss_count=0,
|
||||
win_rate=0.0,
|
||||
profit_factor=0.0,
|
||||
sharpe_ratio=0.0,
|
||||
)
|
||||
|
||||
# Compute profit factor from closed positions:
|
||||
# sum of gains / abs(sum of losses)
|
||||
gains = 0.0
|
||||
losses = 0.0
|
||||
for pos in data.closed_positions:
|
||||
rpnl = float(pos.get("realized_pnl", 0) or 0)
|
||||
if rpnl > 0:
|
||||
gains += rpnl
|
||||
elif rpnl < 0:
|
||||
losses += abs(rpnl)
|
||||
|
||||
profit_factor = (gains / losses) if losses > 0 else 0.0
|
||||
|
||||
return PLSection(
|
||||
realized_pnl=float(snap.get("realized_pnl", 0) or 0),
|
||||
unrealized_pnl=float(snap.get("unrealized_pnl", 0) or 0),
|
||||
daily_return=float(snap.get("daily_return", 0) or 0),
|
||||
cumulative_return=float(snap.get("cumulative_return", 0) or 0),
|
||||
win_count=int(snap.get("win_count", 0) or 0),
|
||||
loss_count=int(snap.get("loss_count", 0) or 0),
|
||||
win_rate=float(snap.get("win_rate", 0) or 0),
|
||||
profit_factor=profit_factor,
|
||||
sharpe_ratio=float(snap.get("sharpe_ratio", 0) or 0),
|
||||
)
|
||||
|
||||
|
||||
def build_recommendation_accuracy_section(
|
||||
data: CollectedData,
|
||||
) -> RecommendationAccuracySection:
|
||||
"""Build recommendation accuracy section.
|
||||
|
||||
Joins trading_decisions with prediction_outcomes to compute
|
||||
act/skip breakdown, win rate of acted recommendations, and
|
||||
average confidence of acted vs skipped.
|
||||
"""
|
||||
if not data.trading_decisions:
|
||||
return RecommendationAccuracySection(
|
||||
total_evaluated=0,
|
||||
act_count=0,
|
||||
skip_count=0,
|
||||
acted_win_rate=0.0,
|
||||
avg_confidence_acted=0.0,
|
||||
avg_confidence_skipped=0.0,
|
||||
)
|
||||
|
||||
# Build lookup: recommendation_id -> prediction_outcome
|
||||
# prediction_outcomes are joined with prediction_snapshots in the collector,
|
||||
# so they carry ticker, direction, action, confidence from the snapshot.
|
||||
# trading_decisions reference recommendations via recommendation_id.
|
||||
# We need to match trading_decisions -> recommendations -> prediction_outcomes.
|
||||
#
|
||||
# The collector fetches prediction_outcomes joined with prediction_snapshots
|
||||
# (po.prediction_id = ps.id). Trading decisions reference recommendation_id.
|
||||
# Recommendations and prediction_snapshots share the same ticker, so we
|
||||
# match by recommendation_id on the trading_decision side.
|
||||
|
||||
# Build recommendation_id -> recommendation dict for confidence lookup
|
||||
rec_by_id: dict[str, dict] = {}
|
||||
for rec in data.recommendations:
|
||||
rec_id = str(rec.get("id", ""))
|
||||
if rec_id:
|
||||
rec_by_id[rec_id] = rec
|
||||
|
||||
# Build prediction_id -> prediction_outcome for profitability lookup
|
||||
# We also need to map recommendation_id -> prediction_outcome.
|
||||
# The link is: trading_decision.recommendation_id -> recommendation.id
|
||||
# and prediction_outcome has ticker from prediction_snapshots.
|
||||
# We match by ticker between recommendation and prediction_outcome.
|
||||
outcome_by_ticker: dict[str, list[dict]] = {}
|
||||
for po in data.prediction_outcomes:
|
||||
ticker = po.get("ticker", "")
|
||||
if ticker:
|
||||
outcome_by_ticker.setdefault(ticker, []).append(po)
|
||||
|
||||
act_count = 0
|
||||
skip_count = 0
|
||||
acted_wins = 0
|
||||
acted_total_with_outcome = 0
|
||||
confidence_acted: list[float] = []
|
||||
confidence_skipped: list[float] = []
|
||||
|
||||
for td in data.trading_decisions:
|
||||
decision = str(td.get("decision", "")).lower()
|
||||
rec_id = str(td.get("recommendation_id", ""))
|
||||
rec = rec_by_id.get(rec_id, {})
|
||||
conf = rec.get("confidence")
|
||||
ticker = td.get("ticker", "")
|
||||
|
||||
if decision == "act":
|
||||
act_count += 1
|
||||
if conf is not None:
|
||||
confidence_acted.append(float(conf))
|
||||
|
||||
# Check profitability from prediction_outcomes for this ticker
|
||||
ticker_outcomes = outcome_by_ticker.get(ticker, [])
|
||||
if ticker_outcomes:
|
||||
# Use the most recent outcome for this ticker
|
||||
latest = ticker_outcomes[-1]
|
||||
acted_total_with_outcome += 1
|
||||
if latest.get("profitable"):
|
||||
acted_wins += 1
|
||||
else:
|
||||
skip_count += 1
|
||||
if conf is not None:
|
||||
confidence_skipped.append(float(conf))
|
||||
|
||||
total_evaluated = act_count + skip_count
|
||||
acted_win_rate = (
|
||||
(acted_wins / acted_total_with_outcome)
|
||||
if acted_total_with_outcome > 0
|
||||
else 0.0
|
||||
)
|
||||
avg_confidence_acted = (
|
||||
(sum(confidence_acted) / len(confidence_acted))
|
||||
if confidence_acted
|
||||
else 0.0
|
||||
)
|
||||
avg_confidence_skipped = (
|
||||
(sum(confidence_skipped) / len(confidence_skipped))
|
||||
if confidence_skipped
|
||||
else 0.0
|
||||
)
|
||||
|
||||
return RecommendationAccuracySection(
|
||||
total_evaluated=total_evaluated,
|
||||
act_count=act_count,
|
||||
skip_count=skip_count,
|
||||
acted_win_rate=acted_win_rate,
|
||||
avg_confidence_acted=avg_confidence_acted,
|
||||
avg_confidence_skipped=avg_confidence_skipped,
|
||||
)
|
||||
|
||||
|
||||
def build_position_performance_section(
|
||||
data: CollectedData,
|
||||
) -> PositionPerformanceSection:
|
||||
"""Build position performance section.
|
||||
|
||||
Lists each position (open and closed) with entry price,
|
||||
current/exit price, P&L, P&L%, and hold duration.
|
||||
"""
|
||||
positions: list[PositionDetail] = []
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Open positions
|
||||
for pos in data.open_positions:
|
||||
entry_price = float(pos.get("avg_entry_price", 0) or 0)
|
||||
current_price = float(pos.get("current_price", 0) or 0)
|
||||
quantity = float(pos.get("quantity", 0) or 0)
|
||||
|
||||
pnl = (current_price - entry_price) * quantity
|
||||
cost_basis = entry_price * quantity
|
||||
pnl_pct = (pnl / cost_basis * 100) if cost_basis > 0 else 0.0
|
||||
|
||||
# Hold duration from updated_at to now
|
||||
updated_at = pos.get("updated_at")
|
||||
hold_hours = _compute_hold_hours(updated_at, now)
|
||||
|
||||
positions.append(
|
||||
PositionDetail(
|
||||
ticker=pos.get("ticker", ""),
|
||||
entry_price=entry_price,
|
||||
current_or_exit_price=current_price,
|
||||
pnl=pnl,
|
||||
pnl_pct=pnl_pct,
|
||||
hold_duration_hours=hold_hours,
|
||||
status="open",
|
||||
)
|
||||
)
|
||||
|
||||
# Closed positions
|
||||
for pos in data.closed_positions:
|
||||
entry_price = float(pos.get("avg_entry_price", 0) or 0)
|
||||
current_price = float(pos.get("current_price", 0) or 0)
|
||||
realized_pnl = float(pos.get("realized_pnl", 0) or 0)
|
||||
|
||||
cost_basis = entry_price * float(pos.get("quantity", 0) or 0)
|
||||
# For closed positions, quantity is 0 in the DB, so use realized_pnl
|
||||
# directly. P&L% is based on the original cost basis which we can
|
||||
# approximate from entry_price and the realized_pnl.
|
||||
# If entry_price is available, compute pnl_pct from realized_pnl / cost.
|
||||
# Since quantity=0 for closed, we estimate original quantity from
|
||||
# realized_pnl and price difference, or just use realized_pnl directly.
|
||||
if entry_price > 0 and current_price != entry_price:
|
||||
# Estimate original quantity from realized_pnl / (exit - entry)
|
||||
price_diff = current_price - entry_price
|
||||
if price_diff != 0:
|
||||
est_quantity = abs(realized_pnl / price_diff)
|
||||
est_cost = entry_price * est_quantity
|
||||
pnl_pct = (realized_pnl / est_cost * 100) if est_cost > 0 else 0.0
|
||||
else:
|
||||
pnl_pct = 0.0
|
||||
else:
|
||||
pnl_pct = 0.0
|
||||
|
||||
updated_at = pos.get("updated_at")
|
||||
hold_hours = _compute_hold_hours(updated_at, now)
|
||||
|
||||
positions.append(
|
||||
PositionDetail(
|
||||
ticker=pos.get("ticker", ""),
|
||||
entry_price=entry_price,
|
||||
current_or_exit_price=current_price,
|
||||
pnl=realized_pnl,
|
||||
pnl_pct=pnl_pct,
|
||||
hold_duration_hours=hold_hours,
|
||||
status="closed",
|
||||
)
|
||||
)
|
||||
|
||||
return PositionPerformanceSection(positions=positions)
|
||||
|
||||
|
||||
def _compute_hold_hours(updated_at: datetime | str | None, now: datetime) -> float:
|
||||
"""Compute hold duration in hours from updated_at to now."""
|
||||
if updated_at is None:
|
||||
return 0.0
|
||||
if isinstance(updated_at, str):
|
||||
try:
|
||||
updated_at = datetime.fromisoformat(updated_at)
|
||||
except (ValueError, TypeError):
|
||||
return 0.0
|
||||
if not isinstance(updated_at, datetime):
|
||||
return 0.0
|
||||
# Ensure timezone-aware comparison
|
||||
if updated_at.tzinfo is None:
|
||||
updated_at = updated_at.replace(tzinfo=timezone.utc)
|
||||
delta = now - updated_at
|
||||
return max(delta.total_seconds() / 3600.0, 0.0)
|
||||
|
||||
|
||||
def build_risk_metrics_section(data: CollectedData) -> RiskMetricsSection:
|
||||
"""Build risk metrics section.
|
||||
|
||||
Extracts current risk tier, portfolio heat, max drawdown,
|
||||
current drawdown %, reserve pool balance, and circuit breaker
|
||||
event count from collected data.
|
||||
"""
|
||||
snap = data.portfolio_snapshot
|
||||
|
||||
if snap is None:
|
||||
return RiskMetricsSection(
|
||||
current_risk_tier="unknown",
|
||||
portfolio_heat=0.0,
|
||||
max_drawdown=0.0,
|
||||
current_drawdown_pct=0.0,
|
||||
reserve_pool_balance=data.reserve_pool_balance,
|
||||
circuit_breaker_event_count=len(data.circuit_breaker_events),
|
||||
)
|
||||
|
||||
return RiskMetricsSection(
|
||||
current_risk_tier=str(snap.get("risk_tier", "unknown") or "unknown"),
|
||||
portfolio_heat=float(snap.get("portfolio_heat", 0) or 0),
|
||||
max_drawdown=float(snap.get("max_drawdown", 0) or 0),
|
||||
current_drawdown_pct=float(snap.get("current_drawdown_pct", 0) or 0),
|
||||
reserve_pool_balance=data.reserve_pool_balance,
|
||||
circuit_breaker_event_count=len(data.circuit_breaker_events),
|
||||
)
|
||||
|
||||
|
||||
def build_model_quality_section(data: CollectedData) -> ModelQualitySection:
|
||||
"""Build model quality section.
|
||||
|
||||
Extracts latest model_metric_snapshot values for 7d, 30d, 90d
|
||||
lookback windows.
|
||||
"""
|
||||
if not data.model_metric_snapshots:
|
||||
return ModelQualitySection(windows=[])
|
||||
|
||||
# Group by lookback_window, take the latest (first in list since
|
||||
# collector orders by generated_at DESC)
|
||||
target_windows = {"7d", "30d", "90d"}
|
||||
latest_by_window: dict[str, dict] = {}
|
||||
|
||||
for snap in data.model_metric_snapshots:
|
||||
window = snap.get("lookback_window", "")
|
||||
if window in target_windows and window not in latest_by_window:
|
||||
latest_by_window[window] = snap
|
||||
|
||||
windows: list[ModelQualityWindow] = []
|
||||
for w in ("7d", "30d", "90d"):
|
||||
snap = latest_by_window.get(w)
|
||||
if snap is None:
|
||||
windows.append(
|
||||
ModelQualityWindow(
|
||||
lookback=w,
|
||||
win_rate=None,
|
||||
directional_accuracy=None,
|
||||
information_coefficient=None,
|
||||
calibration_error=None,
|
||||
brier_score=None,
|
||||
)
|
||||
)
|
||||
else:
|
||||
windows.append(
|
||||
ModelQualityWindow(
|
||||
lookback=w,
|
||||
win_rate=_safe_float(snap.get("win_rate")),
|
||||
directional_accuracy=_safe_float(snap.get("directional_accuracy")),
|
||||
information_coefficient=_safe_float(
|
||||
snap.get("information_coefficient")
|
||||
),
|
||||
calibration_error=_safe_float(snap.get("calibration_error")),
|
||||
brier_score=_safe_float(snap.get("brier_score")),
|
||||
)
|
||||
)
|
||||
|
||||
return ModelQualitySection(windows=windows)
|
||||
|
||||
|
||||
def _safe_float(value: object) -> float | None:
|
||||
"""Convert a value to float, returning None for None/invalid values."""
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
f = float(value) # type: ignore[arg-type]
|
||||
# Replace NaN/inf with None
|
||||
if f != f or f == float("inf") or f == float("-inf"):
|
||||
return None
|
||||
return f
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
@@ -0,0 +1,437 @@
|
||||
"""AI-powered report summarizer with chunking and deterministic fallback.
|
||||
|
||||
Generates natural-language summaries for trading performance report sections
|
||||
using the Report_Summarizer_Agent (resolved via AgentConfigResolver + llm_factory).
|
||||
Data is chunked to fit within the 8k-token context window of the local model.
|
||||
|
||||
Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.6
|
||||
Design: AI Summarizer
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.extractor.llm_factory import build_llm_client
|
||||
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
|
||||
from services.shared.config import load_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CHUNK_SIZE_LIMIT = 6000 # characters per chunk
|
||||
MAX_SUMMARY_WORDS = 200 # per section summary
|
||||
MAX_EXECUTIVE_SUMMARY_WORDS = 300
|
||||
|
||||
_REPORT_SUMMARIZER_SLUG = "report-summarizer"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Chunking
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def chunk_data(serialized: str, max_chars: int = CHUNK_SIZE_LIMIT) -> list[str]:
|
||||
"""Split serialized data into chunks of at most *max_chars* characters.
|
||||
|
||||
Splits on newline boundaries to avoid breaking JSON structures.
|
||||
Each chunk is ≤ *max_chars* characters. Returns at least one chunk
|
||||
(even for empty input).
|
||||
|
||||
Round-trip property: ``"".join(chunk_data(s, n)) == s`` for all *s*.
|
||||
|
||||
If a single line (including its trailing newline) exceeds *max_chars*,
|
||||
it is included as its own chunk (we never break mid-line).
|
||||
"""
|
||||
if not serialized:
|
||||
return [""]
|
||||
|
||||
# Split into segments where each segment includes its trailing "\n"
|
||||
# (except possibly the last one if the string doesn't end with "\n").
|
||||
# This preserves the exact original when chunks are concatenated.
|
||||
segments: list[str] = []
|
||||
start = 0
|
||||
while start < len(serialized):
|
||||
nl = serialized.find("\n", start)
|
||||
if nl == -1:
|
||||
# Last segment, no trailing newline
|
||||
segments.append(serialized[start:])
|
||||
break
|
||||
else:
|
||||
# Include the newline in this segment
|
||||
segments.append(serialized[start : nl + 1])
|
||||
start = nl + 1
|
||||
|
||||
chunks: list[str] = []
|
||||
current_parts: list[str] = []
|
||||
current_len = 0
|
||||
|
||||
for segment in segments:
|
||||
if current_parts and current_len + len(segment) > max_chars:
|
||||
# Flush current chunk
|
||||
chunks.append("".join(current_parts))
|
||||
current_parts = [segment]
|
||||
current_len = len(segment)
|
||||
else:
|
||||
current_parts.append(segment)
|
||||
current_len += len(segment)
|
||||
|
||||
# Flush remaining
|
||||
if current_parts:
|
||||
chunks.append("".join(current_parts))
|
||||
|
||||
return chunks if chunks else [""]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Performance logging
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _log_performance(
|
||||
pool: asyncpg.Pool,
|
||||
resolved: ResolvedAgentConfig,
|
||||
success: bool,
|
||||
duration_ms: int,
|
||||
input_text: str,
|
||||
output_text: str,
|
||||
error_message: str | None = None,
|
||||
) -> None:
|
||||
"""Insert a row into agent_performance_log for a summarizer invocation."""
|
||||
try:
|
||||
await pool.execute(
|
||||
"""INSERT INTO agent_performance_log
|
||||
(agent_id, variant_id, document_id, ticker, success,
|
||||
duration_ms, confidence, retry_count,
|
||||
input_tokens, output_tokens, error_message)
|
||||
VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
|
||||
resolved.agent_id,
|
||||
resolved.variant_id,
|
||||
None, # no document_id for report summaries
|
||||
None, # no ticker for report summaries
|
||||
success,
|
||||
duration_ms,
|
||||
0.0, # no confidence score for summaries
|
||||
0,
|
||||
len(input_text) // 4, # token estimate
|
||||
len(output_text) // 4, # token estimate
|
||||
error_message,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to log summarizer performance", exc_info=True)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM summarization helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _summarize_chunk(
|
||||
resolved: ResolvedAgentConfig,
|
||||
section_name: str,
|
||||
chunk: str,
|
||||
) -> str:
|
||||
"""Summarize a single chunk via the Report_Summarizer_Agent LLM client.
|
||||
|
||||
Returns the raw text output from the model.
|
||||
Raises on failure so the caller can handle retries / fallback.
|
||||
"""
|
||||
cfg = load_config()
|
||||
client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
|
||||
try:
|
||||
prompts = {
|
||||
"system": resolved.system_prompt,
|
||||
"user": f"Summarize this {section_name} data:\n{chunk}",
|
||||
}
|
||||
attempt = await client.call_llm(
|
||||
prompts=prompts,
|
||||
json_schema={}, # plain text, no structured output
|
||||
document_text="",
|
||||
)
|
||||
if attempt.error:
|
||||
raise RuntimeError(f"LLM error: {attempt.error}")
|
||||
if not attempt.raw_output.strip():
|
||||
raise RuntimeError("LLM returned empty response")
|
||||
return attempt.raw_output.strip()
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
|
||||
async def _merge_summaries(
|
||||
resolved: ResolvedAgentConfig,
|
||||
section_name: str,
|
||||
summaries: list[str],
|
||||
) -> str:
|
||||
"""Merge multiple chunk summaries into a single coherent summary."""
|
||||
combined = "\n\n".join(summaries)
|
||||
cfg = load_config()
|
||||
client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
|
||||
try:
|
||||
prompts = {
|
||||
"system": resolved.system_prompt,
|
||||
"user": (
|
||||
f"Merge these {section_name} summaries into a single coherent "
|
||||
f"summary of no more than {MAX_SUMMARY_WORDS} words:\n{combined}"
|
||||
),
|
||||
}
|
||||
attempt = await client.call_llm(
|
||||
prompts=prompts,
|
||||
json_schema={},
|
||||
document_text="",
|
||||
)
|
||||
if attempt.error:
|
||||
raise RuntimeError(f"LLM merge error: {attempt.error}")
|
||||
if not attempt.raw_output.strip():
|
||||
raise RuntimeError("LLM returned empty merge response")
|
||||
return attempt.raw_output.strip()
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Section summarization
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def summarize_section(
|
||||
pool: asyncpg.Pool,
|
||||
resolver: AgentConfigResolver,
|
||||
section_name: str,
|
||||
section_data: dict,
|
||||
) -> str:
|
||||
"""Generate AI summary for a report section.
|
||||
|
||||
1. Serialize section data to JSON string
|
||||
2. Chunk if > CHUNK_SIZE_LIMIT
|
||||
3. Summarize each chunk via Report_Summarizer_Agent
|
||||
4. If multiple chunks, merge summaries with a final LLM call
|
||||
5. Log each invocation to agent_performance_log
|
||||
6. On failure, fall back to deterministic summary
|
||||
"""
|
||||
resolved = await resolver.resolve(_REPORT_SUMMARIZER_SLUG)
|
||||
if resolved is None:
|
||||
logger.error(
|
||||
"Report summarizer agent not found (slug=%s) — using deterministic fallback",
|
||||
_REPORT_SUMMARIZER_SLUG,
|
||||
)
|
||||
return build_deterministic_summary(section_name, section_data)
|
||||
|
||||
serialized = json.dumps(section_data, indent=2, default=str)
|
||||
chunks = chunk_data(serialized)
|
||||
|
||||
start = time.monotonic()
|
||||
try:
|
||||
# Summarize each chunk
|
||||
chunk_summaries: list[str] = []
|
||||
for chunk in chunks:
|
||||
summary = await _summarize_chunk(resolved, section_name, chunk)
|
||||
chunk_summaries.append(summary)
|
||||
|
||||
# Merge if multiple chunks
|
||||
if len(chunk_summaries) > 1:
|
||||
try:
|
||||
final_summary = await _merge_summaries(
|
||||
resolved, section_name, chunk_summaries,
|
||||
)
|
||||
except Exception:
|
||||
# Merge failed — fall back to concatenation of chunk summaries
|
||||
logger.warning(
|
||||
"Chunk merge LLM call failed for section %s — concatenating summaries",
|
||||
section_name,
|
||||
)
|
||||
final_summary = "\n".join(chunk_summaries)
|
||||
else:
|
||||
final_summary = chunk_summaries[0]
|
||||
|
||||
# Truncate to MAX_SUMMARY_WORDS at sentence boundary
|
||||
words = final_summary.split()
|
||||
if len(words) > MAX_SUMMARY_WORDS:
|
||||
truncated = " ".join(words[:MAX_SUMMARY_WORDS])
|
||||
# Try to end at a sentence boundary
|
||||
last_period = truncated.rfind(".")
|
||||
if last_period > len(truncated) // 2:
|
||||
truncated = truncated[: last_period + 1]
|
||||
final_summary = truncated
|
||||
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
await _log_performance(
|
||||
pool, resolved, True, duration_ms, serialized, final_summary,
|
||||
)
|
||||
return final_summary
|
||||
|
||||
except Exception as exc:
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
logger.warning(
|
||||
"AI summarization failed for section %s: %s — using deterministic fallback",
|
||||
section_name,
|
||||
exc,
|
||||
)
|
||||
await _log_performance(
|
||||
pool, resolved, False, duration_ms, serialized, "",
|
||||
error_message=str(exc),
|
||||
)
|
||||
return build_deterministic_summary(section_name, section_data)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Deterministic fallback summaries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DETERMINISTIC_TEMPLATES: dict[str, str] = {
|
||||
"pnl": (
|
||||
"P&L Summary: Realized P&L ${realized_pnl}, unrealized ${unrealized_pnl}, "
|
||||
"daily return {daily_return}%, win rate {win_rate}%."
|
||||
),
|
||||
"recommendation_accuracy": (
|
||||
"Recommendation Accuracy: {total_evaluated} evaluated, "
|
||||
"{act_count} acted ({acted_win_rate}% win rate), "
|
||||
"{skip_count} skipped. "
|
||||
"Avg confidence acted {avg_confidence_acted}, skipped {avg_confidence_skipped}."
|
||||
),
|
||||
"position_performance": (
|
||||
"Position Performance: {position_count} positions tracked during the period."
|
||||
),
|
||||
"risk_metrics": (
|
||||
"Risk Metrics: Risk tier {current_risk_tier}, portfolio heat {portfolio_heat}, "
|
||||
"max drawdown {max_drawdown}, current drawdown {current_drawdown_pct}%, "
|
||||
"reserve pool ${reserve_pool_balance}, "
|
||||
"{circuit_breaker_event_count} circuit breaker events."
|
||||
),
|
||||
"model_quality": (
|
||||
"Model Quality: {window_count} lookback windows evaluated."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def build_deterministic_summary(section_name: str, section_data: dict) -> str:
|
||||
"""Build a fallback deterministic summary from raw metrics.
|
||||
|
||||
Produces a template-based text summary when AI summarization fails.
|
||||
"""
|
||||
template = _DETERMINISTIC_TEMPLATES.get(section_name)
|
||||
if template is None:
|
||||
# Generic fallback for unknown sections
|
||||
return f"{section_name} summary: {len(section_data)} metrics reported."
|
||||
|
||||
try:
|
||||
# Prepare template variables with safe defaults
|
||||
data = dict(section_data)
|
||||
|
||||
# Add computed fields for templates that need them
|
||||
if section_name == "position_performance":
|
||||
positions = data.get("positions", [])
|
||||
data["position_count"] = len(positions)
|
||||
elif section_name == "model_quality":
|
||||
windows = data.get("windows", [])
|
||||
data["window_count"] = len(windows)
|
||||
|
||||
return template.format(**data)
|
||||
except (KeyError, ValueError, TypeError) as exc:
|
||||
logger.warning(
|
||||
"Deterministic summary template failed for %s: %s",
|
||||
section_name,
|
||||
exc,
|
||||
)
|
||||
return f"{section_name} summary: data available but template formatting failed."
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Executive summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def generate_executive_summary(
|
||||
pool: asyncpg.Pool,
|
||||
resolver: AgentConfigResolver,
|
||||
section_summaries: dict[str, str],
|
||||
) -> str:
|
||||
"""Generate executive summary from all section summaries.
|
||||
|
||||
Concatenates section summaries, chunks if needed, and produces
|
||||
a ≤300-word synthesis via the Report_Summarizer_Agent.
|
||||
Falls back to concatenated section summaries on failure.
|
||||
"""
|
||||
resolved = await resolver.resolve(_REPORT_SUMMARIZER_SLUG)
|
||||
concatenated = "\n\n".join(
|
||||
f"{name}: {summary}" for name, summary in section_summaries.items()
|
||||
)
|
||||
|
||||
if resolved is None:
|
||||
logger.error(
|
||||
"Report summarizer agent not found — using concatenated summaries as executive summary",
|
||||
)
|
||||
return concatenated
|
||||
|
||||
chunks = chunk_data(concatenated)
|
||||
|
||||
start = time.monotonic()
|
||||
try:
|
||||
# Summarize chunks if needed
|
||||
if len(chunks) > 1:
|
||||
chunk_summaries: list[str] = []
|
||||
for chunk in chunks:
|
||||
summary = await _summarize_chunk(resolved, "executive", chunk)
|
||||
chunk_summaries.append(summary)
|
||||
input_text = "\n\n".join(chunk_summaries)
|
||||
else:
|
||||
input_text = chunks[0]
|
||||
|
||||
# Final executive summary call
|
||||
cfg = load_config()
|
||||
client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
|
||||
try:
|
||||
prompts = {
|
||||
"system": resolved.system_prompt,
|
||||
"user": (
|
||||
f"Synthesize these trading performance section summaries into "
|
||||
f"a concise executive summary of no more than "
|
||||
f"{MAX_EXECUTIVE_SUMMARY_WORDS} words:\n{input_text}"
|
||||
),
|
||||
}
|
||||
attempt = await client.call_llm(
|
||||
prompts=prompts,
|
||||
json_schema={},
|
||||
document_text="",
|
||||
)
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
if attempt.error:
|
||||
raise RuntimeError(f"Executive summary LLM error: {attempt.error}")
|
||||
if not attempt.raw_output.strip():
|
||||
raise RuntimeError("Executive summary LLM returned empty response")
|
||||
|
||||
executive = attempt.raw_output.strip()
|
||||
|
||||
# Truncate to MAX_EXECUTIVE_SUMMARY_WORDS at sentence boundary
|
||||
words = executive.split()
|
||||
if len(words) > MAX_EXECUTIVE_SUMMARY_WORDS:
|
||||
truncated = " ".join(words[:MAX_EXECUTIVE_SUMMARY_WORDS])
|
||||
last_period = truncated.rfind(".")
|
||||
if last_period > len(truncated) // 2:
|
||||
truncated = truncated[: last_period + 1]
|
||||
executive = truncated
|
||||
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
await _log_performance(
|
||||
pool, resolved, True, duration_ms, concatenated, executive,
|
||||
)
|
||||
return executive
|
||||
|
||||
except Exception as exc:
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
logger.warning(
|
||||
"Executive summary generation failed: %s — using concatenated summaries",
|
||||
exc,
|
||||
)
|
||||
await _log_performance(
|
||||
pool, resolved, False, duration_ms, concatenated, "",
|
||||
error_message=str(exc),
|
||||
)
|
||||
return concatenated
|
||||
@@ -0,0 +1,175 @@
|
||||
"""Report validator — cross-checks computed metrics against live data.
|
||||
|
||||
Compares report section values against prediction_outcomes and
|
||||
model_metric_snapshots, flagging discrepancies that exceed the
|
||||
configured threshold.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
|
||||
from services.reporting.models import (
|
||||
ModelQualitySection,
|
||||
RecommendationAccuracySection,
|
||||
ReportData,
|
||||
ValidationStatus,
|
||||
ValidationWarning,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DISCREPANCY_THRESHOLD_PCT = 5.0
|
||||
|
||||
|
||||
def _sanitize(value: float | None) -> float:
|
||||
"""Replace None, NaN, and infinity with 0.0."""
|
||||
if value is None:
|
||||
return 0.0
|
||||
if math.isnan(value) or math.isinf(value):
|
||||
return 0.0
|
||||
return value
|
||||
|
||||
|
||||
def _check_discrepancy(
|
||||
field_name: str,
|
||||
computed: float,
|
||||
snapshot: float,
|
||||
) -> ValidationWarning | None:
|
||||
"""Compare computed vs snapshot and return a warning if >5% discrepancy.
|
||||
|
||||
Edge cases:
|
||||
- snapshot=0 and computed≠0 → 100% difference → warning
|
||||
- both=0 → 0% difference → no warning
|
||||
- snapshot is handled upstream (NULL → skip before calling this)
|
||||
"""
|
||||
computed = _sanitize(computed)
|
||||
snapshot = _sanitize(snapshot)
|
||||
|
||||
if snapshot == 0.0 and computed == 0.0:
|
||||
return None
|
||||
|
||||
if snapshot == 0.0:
|
||||
# Non-zero computed with zero snapshot → 100% discrepancy
|
||||
pct_diff = 100.0
|
||||
else:
|
||||
pct_diff = abs(computed - snapshot) / abs(snapshot) * 100.0
|
||||
|
||||
if pct_diff > DISCREPANCY_THRESHOLD_PCT:
|
||||
return ValidationWarning(
|
||||
field_name=field_name,
|
||||
computed_value=computed,
|
||||
snapshot_value=snapshot,
|
||||
pct_difference=round(pct_diff, 4),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def validate_recommendation_accuracy(
|
||||
section: RecommendationAccuracySection,
|
||||
prediction_outcomes: list[dict],
|
||||
) -> list[ValidationWarning]:
|
||||
"""Cross-reference reported win rates with prediction_outcomes.
|
||||
|
||||
Computes win_rate from prediction_outcomes (count profitable / total)
|
||||
and compares against section.acted_win_rate. Returns warnings for
|
||||
discrepancies > 5%.
|
||||
"""
|
||||
warnings: list[ValidationWarning] = []
|
||||
|
||||
if not prediction_outcomes:
|
||||
return warnings
|
||||
|
||||
total = len(prediction_outcomes)
|
||||
profitable_count = sum(
|
||||
1 for po in prediction_outcomes if po.get("profitable")
|
||||
)
|
||||
computed_win_rate = profitable_count / total if total > 0 else 0.0
|
||||
|
||||
w = _check_discrepancy(
|
||||
"acted_win_rate",
|
||||
section.acted_win_rate,
|
||||
computed_win_rate,
|
||||
)
|
||||
if w is not None:
|
||||
warnings.append(w)
|
||||
|
||||
return warnings
|
||||
|
||||
|
||||
def validate_model_quality(
|
||||
section: ModelQualitySection,
|
||||
metric_snapshots: list[dict],
|
||||
) -> list[ValidationWarning]:
|
||||
"""Compare reported model quality metrics against model_metric_snapshots.
|
||||
|
||||
For each window in the section, finds the matching snapshot by
|
||||
lookback_window and compares win_rate, directional_accuracy,
|
||||
information_coefficient, calibration_error, and brier_score.
|
||||
Flags discrepancies > 5%.
|
||||
"""
|
||||
warnings: list[ValidationWarning] = []
|
||||
|
||||
if not metric_snapshots:
|
||||
return warnings
|
||||
|
||||
# Build lookup: lookback_window → latest snapshot (first match since
|
||||
# collector orders by generated_at DESC)
|
||||
snap_by_window: dict[str, dict] = {}
|
||||
for snap in metric_snapshots:
|
||||
window = snap.get("lookback_window", "")
|
||||
if window and window not in snap_by_window:
|
||||
snap_by_window[window] = snap
|
||||
|
||||
metric_fields = [
|
||||
("win_rate", "win_rate"),
|
||||
("directional_accuracy", "directional_accuracy"),
|
||||
("information_coefficient", "information_coefficient"),
|
||||
("calibration_error", "calibration_error"),
|
||||
("brier_score", "brier_score"),
|
||||
]
|
||||
|
||||
for mq_window in section.windows:
|
||||
snap = snap_by_window.get(mq_window.lookback)
|
||||
if snap is None:
|
||||
continue
|
||||
|
||||
for section_attr, snap_key in metric_fields:
|
||||
section_value = getattr(mq_window, section_attr, None)
|
||||
snapshot_value = snap.get(snap_key)
|
||||
|
||||
# NULL snapshot → skip
|
||||
if snapshot_value is None:
|
||||
continue
|
||||
# NULL section value → skip
|
||||
if section_value is None:
|
||||
continue
|
||||
|
||||
snapshot_float = _sanitize(float(snapshot_value))
|
||||
section_float = _sanitize(section_value)
|
||||
|
||||
w = _check_discrepancy(
|
||||
f"{mq_window.lookback}_{section_attr}",
|
||||
section_float,
|
||||
snapshot_float,
|
||||
)
|
||||
if w is not None:
|
||||
warnings.append(w)
|
||||
|
||||
return warnings
|
||||
|
||||
|
||||
def compute_validation_status(report: ReportData) -> ValidationStatus:
|
||||
"""Determine overall validation status.
|
||||
|
||||
Returns 'passed' if no warnings across all sections,
|
||||
'warnings' if any section has validation warnings.
|
||||
"""
|
||||
if report.pnl.validation_warnings:
|
||||
return ValidationStatus.WARNINGS
|
||||
if report.recommendation_accuracy.validation_warnings:
|
||||
return ValidationStatus.WARNINGS
|
||||
if report.model_quality.validation_warnings:
|
||||
return ValidationStatus.WARNINGS
|
||||
return ValidationStatus.PASSED
|
||||
+172
-1
@@ -10,8 +10,9 @@ import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import asyncpg
|
||||
import redis.asyncio as aioredis
|
||||
@@ -26,6 +27,7 @@ from services.shared.redis_keys import (
|
||||
QUEUE_INGESTION,
|
||||
QUEUE_MACRO_CLASSIFICATION,
|
||||
QUEUE_PREFIX,
|
||||
QUEUE_REPORT_GENERATION,
|
||||
lock_key,
|
||||
queue_key,
|
||||
rate_limit_key,
|
||||
@@ -498,6 +500,163 @@ async def schedule_cycle(pool: asyncpg.Pool, rds: aioredis.Redis) -> int:
|
||||
return enqueued
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Report generation: queue consumer + scheduled triggers
|
||||
# Requirements: 6.1, 6.2, 6.3, 6.4, 6.5
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Eastern Time zone for market-close checks
|
||||
_ET = ZoneInfo("America/New_York")
|
||||
|
||||
# How often to check the report generation queue (every N cycles)
|
||||
# 15s tick × 4 cycles = ~1 minute
|
||||
REPORT_CONSUMER_CYCLE_INTERVAL: int = 4
|
||||
|
||||
# How often to check report scheduling triggers (every N cycles)
|
||||
# 15s tick × 20 cycles = ~5 minutes
|
||||
REPORT_SCHEDULE_CYCLE_INTERVAL: int = 20
|
||||
|
||||
# Redis key prefix for report schedule dedup markers
|
||||
_REPORT_DEDUPE_PREFIX = f"{QUEUE_PREFIX}:report_dedupe"
|
||||
_REPORT_DEDUPE_TTL = 86400 # 24 hours — prevents re-enqueuing same report within a day
|
||||
|
||||
|
||||
def _report_dedupe_key(report_type: str, period_start: str, period_end: str) -> str:
|
||||
"""Build a Redis key for deduplicating report schedule triggers."""
|
||||
return f"{_REPORT_DEDUPE_PREFIX}:{report_type}:{period_start}:{period_end}"
|
||||
|
||||
|
||||
async def consume_report_generation_jobs(
|
||||
pool: asyncpg.Pool,
|
||||
rds: aioredis.Redis,
|
||||
) -> int:
|
||||
"""Pop and process jobs from the report generation queue.
|
||||
|
||||
Pops up to 5 jobs per invocation to avoid blocking the scheduler loop.
|
||||
Each job is deserialized and handed to process_report_job from the
|
||||
reporting generator module.
|
||||
|
||||
Returns the number of jobs processed.
|
||||
"""
|
||||
from services.reporting.generator import process_report_job
|
||||
|
||||
report_queue = queue_key(QUEUE_REPORT_GENERATION)
|
||||
processed = 0
|
||||
|
||||
for _ in range(5):
|
||||
raw = await rds.lpop(report_queue)
|
||||
if raw is None:
|
||||
break
|
||||
|
||||
try:
|
||||
job = json.loads(raw)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.error("Invalid report generation job payload: %s", raw)
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"Processing report generation job: type=%s period=%s to %s",
|
||||
job.get("report_type"),
|
||||
job.get("period_start"),
|
||||
job.get("period_end"),
|
||||
)
|
||||
|
||||
try:
|
||||
await process_report_job(pool, job)
|
||||
processed += 1
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to process report generation job: %s", job,
|
||||
)
|
||||
|
||||
if processed > 0:
|
||||
logger.info("Processed %d report generation jobs", processed)
|
||||
return processed
|
||||
|
||||
|
||||
async def maybe_enqueue_daily_report(
|
||||
rds: aioredis.Redis,
|
||||
now_et: datetime,
|
||||
) -> bool:
|
||||
"""Enqueue a daily report job if it's after 16:30 ET on a weekday.
|
||||
|
||||
Uses a Redis dedupe key to avoid re-enqueuing the same daily report.
|
||||
Returns True if a job was enqueued, False otherwise.
|
||||
"""
|
||||
# Only on weekdays (Mon=0 .. Fri=4)
|
||||
if now_et.weekday() > 4:
|
||||
return False
|
||||
|
||||
# Only after 16:30 ET
|
||||
if now_et.hour < 16 or (now_et.hour == 16 and now_et.minute < 30):
|
||||
return False
|
||||
|
||||
today = now_et.date()
|
||||
period_start = today.isoformat()
|
||||
period_end = today.isoformat()
|
||||
|
||||
dedupe = _report_dedupe_key("daily", period_start, period_end)
|
||||
created = await rds.set(dedupe, "1", nx=True, ex=_REPORT_DEDUPE_TTL)
|
||||
if not created:
|
||||
return False
|
||||
|
||||
job = json.dumps({
|
||||
"report_type": "daily",
|
||||
"period_start": period_start,
|
||||
"period_end": period_end,
|
||||
})
|
||||
await rds.rpush(queue_key(QUEUE_REPORT_GENERATION), job)
|
||||
logger.info("Enqueued daily report for %s", period_start)
|
||||
return True
|
||||
|
||||
|
||||
async def maybe_enqueue_weekly_report(
|
||||
rds: aioredis.Redis,
|
||||
now_et: datetime,
|
||||
) -> bool:
|
||||
"""Enqueue a weekly report job on Saturday.
|
||||
|
||||
Covers the previous Monday through Friday.
|
||||
Uses a Redis dedupe key to avoid re-enqueuing the same weekly report.
|
||||
Returns True if a job was enqueued, False otherwise.
|
||||
"""
|
||||
# Only on Saturday (weekday() == 5)
|
||||
if now_et.weekday() != 5:
|
||||
return False
|
||||
|
||||
today = now_et.date()
|
||||
# Previous Monday = today - 5 days, previous Friday = today - 1 day
|
||||
period_start = (today - timedelta(days=5)).isoformat()
|
||||
period_end = (today - timedelta(days=1)).isoformat()
|
||||
|
||||
dedupe = _report_dedupe_key("weekly", period_start, period_end)
|
||||
created = await rds.set(dedupe, "1", nx=True, ex=_REPORT_DEDUPE_TTL)
|
||||
if not created:
|
||||
return False
|
||||
|
||||
job = json.dumps({
|
||||
"report_type": "weekly",
|
||||
"period_start": period_start,
|
||||
"period_end": period_end,
|
||||
})
|
||||
await rds.rpush(queue_key(QUEUE_REPORT_GENERATION), job)
|
||||
logger.info(
|
||||
"Enqueued weekly report for %s to %s", period_start, period_end,
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
async def check_report_schedule(rds: aioredis.Redis) -> None:
|
||||
"""Check if daily or weekly report triggers should fire.
|
||||
|
||||
Called periodically from the main loop. Uses Eastern Time to determine
|
||||
market close (16:30 ET) and day of week.
|
||||
"""
|
||||
now_et = datetime.now(tz=_ET)
|
||||
await maybe_enqueue_daily_report(rds, now_et)
|
||||
await maybe_enqueue_weekly_report(rds, now_et)
|
||||
|
||||
|
||||
async def enqueue_periodic_aggregation(pool: asyncpg.Pool, rds: aioredis.Redis) -> int:
|
||||
"""Enqueue aggregation jobs for all active tickers.
|
||||
|
||||
@@ -544,6 +703,8 @@ async def main() -> None:
|
||||
retry_counter = 0
|
||||
cleanup_counter = 0
|
||||
aggregation_counter = 0
|
||||
report_consumer_counter = 0
|
||||
report_schedule_counter = 0
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
@@ -576,6 +737,16 @@ async def main() -> None:
|
||||
if aggregation_counter >= AGGREGATION_CYCLE_INTERVAL:
|
||||
aggregation_counter = 0
|
||||
await enqueue_periodic_aggregation(pool, rds)
|
||||
# Consume report generation jobs (~1 minute)
|
||||
report_consumer_counter += 1
|
||||
if report_consumer_counter >= REPORT_CONSUMER_CYCLE_INTERVAL:
|
||||
report_consumer_counter = 0
|
||||
await consume_report_generation_jobs(pool, rds)
|
||||
# Check report schedule triggers (~5 minutes)
|
||||
report_schedule_counter += 1
|
||||
if report_schedule_counter >= REPORT_SCHEDULE_CYCLE_INTERVAL:
|
||||
report_schedule_counter = 0
|
||||
await check_report_schedule(rds)
|
||||
finally:
|
||||
await release_lock(rds, "scheduler_cycle")
|
||||
except Exception:
|
||||
|
||||
@@ -68,6 +68,9 @@ QUEUE_LAKE_PUBLISH = "lake_publish"
|
||||
QUEUE_TRADE = "trade"
|
||||
QUEUE_BROKER = "broker_orders"
|
||||
QUEUE_MACRO_CLASSIFICATION = "macro_classification"
|
||||
QUEUE_REPORT_GENERATION = "report_generation"
|
||||
QUEUE_REPORT_GENERATION = "report_generation"
|
||||
QUEUE_SIGNAL_ENGINE = "signal_engine"
|
||||
|
||||
# --- Trading engine ---
|
||||
QUEUE_TRADING_DECISIONS = "trading_decisions"
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
# Signal Engine - dual-pipeline signal evaluation (heuristic + probabilistic)
|
||||
@@ -0,0 +1,355 @@
|
||||
"""Signal engine configuration loaded from risk_configs + environment.
|
||||
|
||||
Defines ``SignalEngineConfig`` (the top-level dataclass) and four derived
|
||||
sub-configs — ``HardFilterConfig``, ``HeuristicConfig``,
|
||||
``ProbabilisticConfig``, ``ExitConfig`` — that expose relevant subsets for
|
||||
cleaner function signatures.
|
||||
|
||||
``load_config()`` reads from the ``risk_configs`` table's JSONB ``config``
|
||||
column and falls back to safe defaults on any error. Environment variables
|
||||
with the ``SIGNAL_ENGINE_`` prefix override database values.
|
||||
|
||||
Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sub-configs — thin wrappers over relevant subsets of SignalEngineConfig
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class HardFilterConfig:
|
||||
"""Thresholds for the pre-pipeline hard filter engine."""
|
||||
|
||||
valuation_min: float = 0.3
|
||||
earnings_days: int = 5
|
||||
macro_bias_skip: float = -1.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class HeuristicConfig:
|
||||
"""Thresholds for the heuristic (deterministic) pipeline verdict."""
|
||||
|
||||
buy_confidence: float = 0.70
|
||||
buy_s_total: float = 1.2
|
||||
buy_valuation_min: float = 0.5
|
||||
watch_confidence: float = 0.55
|
||||
macro_bias_threshold: float = 0.0 # macro_bias must be > this for BUY
|
||||
earnings_days_threshold: int = 5 # earnings_proximity must be > this for BUY
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProbabilisticConfig:
|
||||
"""Thresholds for the probabilistic (Bayesian) pipeline verdict."""
|
||||
|
||||
buy_p_up: float = 0.60
|
||||
buy_entropy_max: float = 0.90
|
||||
buy_ev_r_min: float = 1.5
|
||||
buy_valuation_min: float = 0.5
|
||||
watch_p_up: float = 0.55
|
||||
watch_entropy_max: float = 0.95
|
||||
entropy_skip: float = 0.95
|
||||
|
||||
# Regime priors
|
||||
regime_prior_bull: float = 0.58
|
||||
regime_prior_range: float = 0.50
|
||||
regime_prior_bear: float = 0.42
|
||||
|
||||
# Fundamental gates (same semantics as heuristic)
|
||||
macro_bias_threshold: float = 0.0
|
||||
earnings_days_threshold: int = 5
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExitConfig:
|
||||
"""Configuration for the exit engine."""
|
||||
|
||||
trailing_stop_atr_multiplier: float = 2.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Top-level config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class SignalEngineConfig:
|
||||
"""Configuration loaded from risk_configs + environment.
|
||||
|
||||
All fields carry safe defaults so that a fresh deployment works without
|
||||
any database rows or environment variables.
|
||||
"""
|
||||
|
||||
dual_pipeline_enabled: bool = False
|
||||
heuristic_pipeline_enabled: bool = True
|
||||
probabilistic_pipeline_enabled: bool = True
|
||||
shadow_mode: bool = False
|
||||
|
||||
# Timeframe weights
|
||||
timeframe_weights: dict[str, float] = field(
|
||||
default_factory=lambda: {
|
||||
"M30": 0.03,
|
||||
"H1": 0.07,
|
||||
"H4": 0.15,
|
||||
"D": 0.30,
|
||||
"W": 0.30,
|
||||
"M": 0.15,
|
||||
}
|
||||
)
|
||||
|
||||
# Hard filter thresholds
|
||||
hard_filter_valuation_min: float = 0.3
|
||||
hard_filter_earnings_days: int = 5
|
||||
hard_filter_macro_bias_skip: float = -1.0
|
||||
|
||||
# Heuristic verdict thresholds
|
||||
heuristic_buy_confidence: float = 0.70
|
||||
heuristic_buy_s_total: float = 1.2
|
||||
heuristic_buy_valuation_min: float = 0.5
|
||||
heuristic_watch_confidence: float = 0.55
|
||||
|
||||
# Probabilistic verdict thresholds
|
||||
prob_buy_p_up: float = 0.60
|
||||
prob_buy_entropy_max: float = 0.90
|
||||
prob_buy_ev_r_min: float = 1.5
|
||||
prob_buy_valuation_min: float = 0.5
|
||||
prob_watch_p_up: float = 0.55
|
||||
prob_watch_entropy_max: float = 0.95
|
||||
prob_entropy_skip: float = 0.95
|
||||
|
||||
# Regime priors
|
||||
regime_prior_bull: float = 0.58
|
||||
regime_prior_range: float = 0.50
|
||||
regime_prior_bear: float = 0.42
|
||||
|
||||
# Exit engine
|
||||
trailing_stop_atr_multiplier: float = 2.0
|
||||
|
||||
# Polling
|
||||
polling_interval_seconds: int = 30
|
||||
|
||||
# -- Derived sub-configs ------------------------------------------------
|
||||
|
||||
@property
|
||||
def hard_filter_config(self) -> HardFilterConfig:
|
||||
return HardFilterConfig(
|
||||
valuation_min=self.hard_filter_valuation_min,
|
||||
earnings_days=self.hard_filter_earnings_days,
|
||||
macro_bias_skip=self.hard_filter_macro_bias_skip,
|
||||
)
|
||||
|
||||
@property
|
||||
def heuristic_config(self) -> HeuristicConfig:
|
||||
return HeuristicConfig(
|
||||
buy_confidence=self.heuristic_buy_confidence,
|
||||
buy_s_total=self.heuristic_buy_s_total,
|
||||
buy_valuation_min=self.heuristic_buy_valuation_min,
|
||||
watch_confidence=self.heuristic_watch_confidence,
|
||||
macro_bias_threshold=0.0,
|
||||
earnings_days_threshold=self.hard_filter_earnings_days,
|
||||
)
|
||||
|
||||
@property
|
||||
def probabilistic_config(self) -> ProbabilisticConfig:
|
||||
return ProbabilisticConfig(
|
||||
buy_p_up=self.prob_buy_p_up,
|
||||
buy_entropy_max=self.prob_buy_entropy_max,
|
||||
buy_ev_r_min=self.prob_buy_ev_r_min,
|
||||
buy_valuation_min=self.prob_buy_valuation_min,
|
||||
watch_p_up=self.prob_watch_p_up,
|
||||
watch_entropy_max=self.prob_watch_entropy_max,
|
||||
entropy_skip=self.prob_entropy_skip,
|
||||
regime_prior_bull=self.regime_prior_bull,
|
||||
regime_prior_range=self.regime_prior_range,
|
||||
regime_prior_bear=self.regime_prior_bear,
|
||||
macro_bias_threshold=0.0,
|
||||
earnings_days_threshold=self.hard_filter_earnings_days,
|
||||
)
|
||||
|
||||
@property
|
||||
def exit_config(self) -> ExitConfig:
|
||||
return ExitConfig(
|
||||
trailing_stop_atr_multiplier=self.trailing_stop_atr_multiplier,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config loading helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# SQL to fetch all signal_engine_* keys from the active risk_configs row's
|
||||
# JSONB config column. The query extracts each top-level key/value pair and
|
||||
# filters to those prefixed with ``signal_engine_``.
|
||||
_CONFIG_QUERY = """
|
||||
SELECT key, value
|
||||
FROM (
|
||||
SELECT key, value
|
||||
FROM risk_configs,
|
||||
jsonb_each_text(config)
|
||||
WHERE active = TRUE
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT 1
|
||||
) sub
|
||||
WHERE key LIKE 'signal_engine_%'
|
||||
"""
|
||||
|
||||
# Mapping from risk_configs JSON key → SignalEngineConfig field name.
|
||||
# Keys in the DB are prefixed ``signal_engine_`` which is stripped to match
|
||||
# the dataclass field names.
|
||||
_FIELD_TYPES: dict[str, type] = {
|
||||
"dual_pipeline_enabled": bool,
|
||||
"heuristic_pipeline_enabled": bool,
|
||||
"probabilistic_pipeline_enabled": bool,
|
||||
"shadow_mode": bool,
|
||||
"timeframe_weights": dict,
|
||||
"hard_filter_valuation_min": float,
|
||||
"hard_filter_earnings_days": int,
|
||||
"hard_filter_macro_bias_skip": float,
|
||||
"heuristic_buy_confidence": float,
|
||||
"heuristic_buy_s_total": float,
|
||||
"heuristic_buy_valuation_min": float,
|
||||
"heuristic_watch_confidence": float,
|
||||
"prob_buy_p_up": float,
|
||||
"prob_buy_entropy_max": float,
|
||||
"prob_buy_ev_r_min": float,
|
||||
"prob_buy_valuation_min": float,
|
||||
"prob_watch_p_up": float,
|
||||
"prob_watch_entropy_max": float,
|
||||
"prob_entropy_skip": float,
|
||||
"regime_prior_bull": float,
|
||||
"regime_prior_range": float,
|
||||
"regime_prior_bear": float,
|
||||
"trailing_stop_atr_multiplier": float,
|
||||
"polling_interval_seconds": int,
|
||||
}
|
||||
|
||||
|
||||
def _parse_value(raw: str, target_type: type) -> Any:
|
||||
"""Coerce a raw string value from the DB/env into *target_type*.
|
||||
|
||||
Booleans accept ``true``/``false`` (case-insensitive).
|
||||
Dicts are parsed as JSON.
|
||||
"""
|
||||
if target_type is bool:
|
||||
return raw.lower() in ("true", "1", "yes")
|
||||
if target_type is dict:
|
||||
return json.loads(raw)
|
||||
if target_type is int:
|
||||
return int(raw)
|
||||
if target_type is float:
|
||||
return float(raw)
|
||||
return raw
|
||||
|
||||
|
||||
def _apply_db_rows(
|
||||
config: SignalEngineConfig,
|
||||
rows: list[tuple[str, str]],
|
||||
) -> None:
|
||||
"""Mutate *config* in-place from ``(key, value)`` DB rows.
|
||||
|
||||
Keys are expected to be prefixed ``signal_engine_`` — the prefix is
|
||||
stripped before matching against dataclass fields.
|
||||
"""
|
||||
for key, value in rows:
|
||||
field_name = key.removeprefix("signal_engine_")
|
||||
target_type = _FIELD_TYPES.get(field_name)
|
||||
if target_type is None:
|
||||
logger.debug("Ignoring unknown signal_engine config key: %s", key)
|
||||
continue
|
||||
try:
|
||||
parsed = _parse_value(value, target_type)
|
||||
setattr(config, field_name, parsed)
|
||||
except (ValueError, TypeError, json.JSONDecodeError):
|
||||
logger.warning(
|
||||
"Invalid value for signal_engine config key %s: %r — keeping default",
|
||||
key,
|
||||
value,
|
||||
)
|
||||
|
||||
|
||||
def _apply_env_overrides(config: SignalEngineConfig) -> None:
|
||||
"""Override config fields from environment variables.
|
||||
|
||||
Environment variables use the ``SIGNAL_ENGINE_`` prefix (upper-case).
|
||||
For example ``SIGNAL_ENGINE_DUAL_PIPELINE_ENABLED=true`` overrides
|
||||
``dual_pipeline_enabled``.
|
||||
"""
|
||||
prefix = "SIGNAL_ENGINE_"
|
||||
for env_key, env_value in os.environ.items():
|
||||
if not env_key.startswith(prefix):
|
||||
continue
|
||||
field_name = env_key[len(prefix):].lower()
|
||||
target_type = _FIELD_TYPES.get(field_name)
|
||||
if target_type is None:
|
||||
continue
|
||||
try:
|
||||
parsed = _parse_value(env_value, target_type)
|
||||
setattr(config, field_name, parsed)
|
||||
except (ValueError, TypeError, json.JSONDecodeError):
|
||||
logger.warning(
|
||||
"Invalid env override %s=%r — keeping previous value",
|
||||
env_key,
|
||||
env_value,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def load_config(pool: Any) -> SignalEngineConfig:
|
||||
"""Load signal engine configuration from the database and environment.
|
||||
|
||||
1. Start with safe defaults (``SignalEngineConfig()``).
|
||||
2. Query ``risk_configs`` for keys prefixed ``signal_engine_``.
|
||||
3. Apply matching values over the defaults.
|
||||
4. Apply environment variable overrides (``SIGNAL_ENGINE_*``).
|
||||
5. On any DB error, fall back to defaults with ``dual_pipeline_enabled=False``.
|
||||
|
||||
The *pool* argument is an ``asyncpg.Pool`` (typed as ``Any`` to avoid a
|
||||
hard import dependency at module level).
|
||||
|
||||
Requirements: 13.1, 13.6, 13.7
|
||||
"""
|
||||
config = SignalEngineConfig()
|
||||
|
||||
# Step 1 — read from risk_configs
|
||||
try:
|
||||
rows = await pool.fetch(_CONFIG_QUERY)
|
||||
if rows:
|
||||
_apply_db_rows(config, [(r["key"], r["value"]) for r in rows])
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to load signal engine config from risk_configs — "
|
||||
"defaulting to disabled (fail-safe)",
|
||||
exc_info=True,
|
||||
)
|
||||
# Ensure fail-safe: dual pipeline stays off
|
||||
config.dual_pipeline_enabled = False
|
||||
|
||||
# Step 2 — environment overrides (always applied, even after DB failure)
|
||||
_apply_env_overrides(config)
|
||||
|
||||
logger.info(
|
||||
"Signal engine config loaded: dual_pipeline_enabled=%s, "
|
||||
"heuristic=%s, probabilistic=%s, shadow_mode=%s, "
|
||||
"polling_interval=%ds",
|
||||
config.dual_pipeline_enabled,
|
||||
config.heuristic_pipeline_enabled,
|
||||
config.probabilistic_pipeline_enabled,
|
||||
config.shadow_mode,
|
||||
config.polling_interval_seconds,
|
||||
)
|
||||
|
||||
return config
|
||||
@@ -0,0 +1,136 @@
|
||||
"""Multi-Timeframe Confluence Engine.
|
||||
|
||||
Evaluates signals across multiple timeframes and computes weighted confluence
|
||||
scores. Signals must trigger on at least 2 timeframes **and** include at
|
||||
least one higher-timeframe anchor (D, W, or M) to pass the confluence filter.
|
||||
|
||||
The weighted confluence score is:
|
||||
|
||||
C_confluence = Σ(w_tf · s_tf)
|
||||
|
||||
where ``w_tf`` is the timeframe weight and ``s_tf`` is the signal strength on
|
||||
that timeframe (only summed over timeframes where the signal triggered).
|
||||
|
||||
Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections import Counter
|
||||
|
||||
from services.signal_engine.models import (
|
||||
ConfluenceSignal,
|
||||
SignalDirection,
|
||||
SignalResult,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Higher-timeframe anchors — at least one must be present for a signal to pass.
|
||||
HIGHER_TIMEFRAME_ANCHORS: frozenset[str] = frozenset({"D", "W", "M"})
|
||||
|
||||
# Minimum number of timeframes a signal must trigger on.
|
||||
MIN_TIMEFRAME_COUNT: int = 2
|
||||
|
||||
|
||||
def _dominant_direction(results: dict[str, SignalResult]) -> SignalDirection:
|
||||
"""Determine the dominant direction from a set of per-timeframe results.
|
||||
|
||||
Counts bullish vs bearish votes across active timeframes. Ties resolve
|
||||
to NEUTRAL.
|
||||
"""
|
||||
counts: Counter[SignalDirection] = Counter()
|
||||
for sr in results.values():
|
||||
counts[sr.direction] += 1
|
||||
|
||||
bullish = counts.get(SignalDirection.BULLISH, 0)
|
||||
bearish = counts.get(SignalDirection.BEARISH, 0)
|
||||
|
||||
if bullish > bearish:
|
||||
return SignalDirection.BULLISH
|
||||
if bearish > bullish:
|
||||
return SignalDirection.BEARISH
|
||||
return SignalDirection.NEUTRAL
|
||||
|
||||
|
||||
def compute_confluence(
|
||||
signal_results: dict[str, dict[str, SignalResult]],
|
||||
weights: dict[str, float],
|
||||
) -> list[ConfluenceSignal]:
|
||||
"""Compute weighted confluence scores across timeframes.
|
||||
|
||||
Args:
|
||||
signal_results: ``{signal_type: {timeframe: SignalResult}}``.
|
||||
Each inner dict maps timeframe labels (e.g. ``"D"``, ``"H4"``)
|
||||
to the :class:`SignalResult` produced by the signal evaluator on
|
||||
that timeframe.
|
||||
weights: ``{timeframe: weight}`` e.g.
|
||||
``{"M30": 0.03, "H1": 0.07, "H4": 0.15, "D": 0.30, "W": 0.30, "M": 0.15}``.
|
||||
|
||||
Returns:
|
||||
List of :class:`ConfluenceSignal` objects that pass **both** filters:
|
||||
|
||||
1. **Minimum confluence threshold** — the signal must trigger on at
|
||||
least :data:`MIN_TIMEFRAME_COUNT` (2) timeframes.
|
||||
2. **Higher-timeframe anchor** — at least one of D, W, or M must be
|
||||
among the active timeframes.
|
||||
|
||||
Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6
|
||||
"""
|
||||
confluence_signals: list[ConfluenceSignal] = []
|
||||
|
||||
for signal_type, tf_results in signal_results.items():
|
||||
active_timeframes = list(tf_results.keys())
|
||||
|
||||
# 3.3 — Minimum confluence threshold: discard if < 2 timeframes
|
||||
if len(active_timeframes) < MIN_TIMEFRAME_COUNT:
|
||||
logger.debug(
|
||||
"Signal %s discarded: only %d timeframe(s) triggered (need >= %d)",
|
||||
signal_type,
|
||||
len(active_timeframes),
|
||||
MIN_TIMEFRAME_COUNT,
|
||||
)
|
||||
continue
|
||||
|
||||
# 3.4 — Higher-timeframe anchor: discard if none of D, W, M present
|
||||
if not HIGHER_TIMEFRAME_ANCHORS.intersection(active_timeframes):
|
||||
logger.debug(
|
||||
"Signal %s discarded: no higher-timeframe anchor (D/W/M) "
|
||||
"among active timeframes %s",
|
||||
signal_type,
|
||||
active_timeframes,
|
||||
)
|
||||
continue
|
||||
|
||||
# 3.2 — Compute weighted confluence score
|
||||
per_timeframe: dict[str, float] = {}
|
||||
confluence_score = 0.0
|
||||
for tf, sr in tf_results.items():
|
||||
w = weights.get(tf, 0.0)
|
||||
per_timeframe[tf] = sr.strength
|
||||
confluence_score += w * sr.strength
|
||||
|
||||
# Determine dominant direction across active timeframes
|
||||
direction = _dominant_direction(tf_results)
|
||||
|
||||
confluence_signals.append(
|
||||
ConfluenceSignal(
|
||||
signal_type=signal_type,
|
||||
direction=direction,
|
||||
confluence_score=confluence_score,
|
||||
active_timeframes=active_timeframes,
|
||||
per_timeframe=per_timeframe,
|
||||
)
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Signal %s passed confluence: score=%.4f, direction=%s, "
|
||||
"timeframes=%s",
|
||||
signal_type,
|
||||
confluence_score,
|
||||
direction.value,
|
||||
active_timeframes,
|
||||
)
|
||||
|
||||
return confluence_signals
|
||||
@@ -0,0 +1,137 @@
|
||||
"""Signal cluster classification and within-cluster correlation penalty.
|
||||
|
||||
Groups signals into four clusters — momentum, structure, volatility,
|
||||
fundamentals — and applies exponential decay within each cluster to prevent
|
||||
likelihood ratio stacking inflation in the Bayesian pipeline.
|
||||
|
||||
Within a cluster the strongest signal (by ``|log_lr|``) contributes at full
|
||||
weight; subsequent signals contribute at ``0.5^(n-1)`` decay. Signals in
|
||||
different clusters are treated as independent (no penalty). Single-signal
|
||||
clusters receive no penalty.
|
||||
|
||||
Requirements: 7.1, 7.2, 7.3, 7.4
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
|
||||
from services.signal_engine.models import LikelihoodRatio
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Signal cluster enum
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SignalCluster(str, Enum):
|
||||
"""Correlation cluster for grouping related signals."""
|
||||
|
||||
MOMENTUM = "momentum" # MA stack, RSI
|
||||
STRUCTURE = "structure" # Fibonacci, Elliott Wave, Cup & Handle
|
||||
VOLATILITY = "volatility" # ATR-based, Bollinger-derived
|
||||
FUNDAMENTALS = "fundamentals" # valuation, earnings, macro
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Signal type → cluster mapping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SIGNAL_CLUSTER_MAP: dict[str, SignalCluster] = {
|
||||
# Momentum
|
||||
"ma_stack": SignalCluster.MOMENTUM,
|
||||
"rsi": SignalCluster.MOMENTUM,
|
||||
# Structure
|
||||
"fibonacci": SignalCluster.STRUCTURE,
|
||||
"elliott_wave": SignalCluster.STRUCTURE,
|
||||
"cup_handle": SignalCluster.STRUCTURE,
|
||||
# Volatility
|
||||
"atr": SignalCluster.VOLATILITY,
|
||||
"bollinger": SignalCluster.VOLATILITY,
|
||||
# Fundamentals
|
||||
"valuation": SignalCluster.FUNDAMENTALS,
|
||||
"earnings": SignalCluster.FUNDAMENTALS,
|
||||
"macro": SignalCluster.FUNDAMENTALS,
|
||||
}
|
||||
|
||||
# Decay factor applied to successive signals within the same cluster.
|
||||
_WITHIN_CLUSTER_DECAY = 0.5
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def classify_signal(signal_type: str) -> SignalCluster:
|
||||
"""Map a signal type string to its correlation cluster.
|
||||
|
||||
Falls back to :pyattr:`SignalCluster.FUNDAMENTALS` for unknown signal
|
||||
types so that unrecognised signals still participate in the penalty
|
||||
system rather than silently bypassing it.
|
||||
"""
|
||||
cluster = _SIGNAL_CLUSTER_MAP.get(signal_type)
|
||||
if cluster is None:
|
||||
logger.warning(
|
||||
"Unknown signal type %r — defaulting to FUNDAMENTALS cluster",
|
||||
signal_type,
|
||||
)
|
||||
return SignalCluster.FUNDAMENTALS
|
||||
return cluster
|
||||
|
||||
|
||||
def apply_correlation_penalty(
|
||||
likelihood_ratios: list[LikelihoodRatio],
|
||||
) -> list[LikelihoodRatio]:
|
||||
"""Apply within-cluster decay penalty to correlated signals.
|
||||
|
||||
Algorithm:
|
||||
1. Group LRs by cluster.
|
||||
2. Within each cluster, sort by ``abs(log_lr)`` descending (strongest
|
||||
first).
|
||||
3. The strongest signal keeps its full ``log_lr`` as
|
||||
``penalized_log_lr``.
|
||||
4. The *n*-th signal (1-indexed) receives
|
||||
``penalized_log_lr = log_lr * 0.5^(n-1)``.
|
||||
5. Single-signal clusters are untouched (``penalized_log_lr = log_lr``).
|
||||
6. Cross-cluster signals are independent — no penalty applied across
|
||||
clusters.
|
||||
|
||||
Returns a **new** list of :class:`LikelihoodRatio` instances with
|
||||
updated ``penalized_log_lr`` values. The original objects are not
|
||||
mutated.
|
||||
"""
|
||||
if not likelihood_ratios:
|
||||
return []
|
||||
|
||||
# Group by cluster
|
||||
clusters: dict[str, list[tuple[int, LikelihoodRatio]]] = defaultdict(list)
|
||||
for idx, lr in enumerate(likelihood_ratios):
|
||||
clusters[lr.cluster].append((idx, lr))
|
||||
|
||||
# Build result list preserving original order
|
||||
result: list[LikelihoodRatio | None] = [None] * len(likelihood_ratios)
|
||||
|
||||
for cluster_name, members in clusters.items():
|
||||
# Sort by abs(log_lr) descending — strongest first
|
||||
sorted_members = sorted(members, key=lambda t: abs(t[1].log_lr), reverse=True)
|
||||
|
||||
for rank, (orig_idx, lr) in enumerate(sorted_members):
|
||||
decay = _WITHIN_CLUSTER_DECAY ** rank # 0.5^0=1, 0.5^1=0.5, ...
|
||||
penalized = lr.log_lr * decay
|
||||
|
||||
result[orig_idx] = LikelihoodRatio(
|
||||
signal_type=lr.signal_type,
|
||||
cluster=lr.cluster,
|
||||
lr=lr.lr,
|
||||
log_lr=lr.log_lr,
|
||||
penalized_log_lr=penalized,
|
||||
hit_rate=lr.hit_rate,
|
||||
strength=lr.strength,
|
||||
)
|
||||
|
||||
# Safety: should never happen, but guard against it
|
||||
return [r for r in result if r is not None]
|
||||
@@ -0,0 +1,139 @@
|
||||
"""Delta Analyzer — compares heuristic and probabilistic pipeline verdicts.
|
||||
|
||||
Computes agreement flags, confidence deltas, disagreement reasons, and
|
||||
tracks a rolling 100-evaluation agreement rate per ticker in Redis.
|
||||
|
||||
Requirements: 9.1, 9.2, 9.3, 9.4, 9.5, 9.6
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import redis.asyncio
|
||||
|
||||
from services.signal_engine.models import (
|
||||
DeltaResult,
|
||||
HeuristicResult,
|
||||
ProbabilisticResult,
|
||||
Verdict,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Redis key pattern for rolling agreement tracking
|
||||
_AGREEMENT_KEY_PREFIX = "stonks:signal_engine:agreement"
|
||||
|
||||
# Maximum number of evaluations to track for rolling agreement rate
|
||||
_ROLLING_WINDOW = 100
|
||||
|
||||
# Agreement rate threshold below which a warning is logged
|
||||
_AGREEMENT_WARNING_THRESHOLD = 0.50
|
||||
|
||||
|
||||
def _compute_disagreement_reasons(
|
||||
heuristic: HeuristicResult,
|
||||
probabilistic: ProbabilisticResult,
|
||||
) -> list[str]:
|
||||
"""Identify reasons for pipeline disagreement.
|
||||
|
||||
Compares which conditions each pipeline met or failed to produce
|
||||
human-readable disagreement reasons for training signal generation.
|
||||
"""
|
||||
reasons: list[str] = []
|
||||
|
||||
if heuristic.verdict == probabilistic.verdict:
|
||||
return reasons
|
||||
|
||||
# Heuristic-side reasons
|
||||
if heuristic.confidence < 0.70:
|
||||
reasons.append("heuristic_confidence_below_threshold")
|
||||
if heuristic.s_total < 1.2:
|
||||
reasons.append("heuristic_s_total_below_threshold")
|
||||
|
||||
# Probabilistic-side reasons
|
||||
if probabilistic.p_up < 0.60:
|
||||
reasons.append("probabilistic_p_up_below_threshold")
|
||||
if probabilistic.entropy > 0.90:
|
||||
reasons.append("probabilistic_entropy_too_high")
|
||||
if probabilistic.ev_r < 1.5:
|
||||
reasons.append("EV_R_below_threshold")
|
||||
|
||||
# Verdict-specific context
|
||||
if heuristic.verdict == Verdict.BUY and probabilistic.verdict != Verdict.BUY:
|
||||
reasons.append("heuristic_buy_probabilistic_disagrees")
|
||||
elif probabilistic.verdict == Verdict.BUY and heuristic.verdict != Verdict.BUY:
|
||||
reasons.append("probabilistic_buy_heuristic_disagrees")
|
||||
|
||||
return reasons
|
||||
|
||||
|
||||
async def analyze_delta(
|
||||
heuristic: HeuristicResult,
|
||||
probabilistic: ProbabilisticResult,
|
||||
redis_client: redis.asyncio.Redis,
|
||||
ticker: str,
|
||||
) -> DeltaResult:
|
||||
"""Compare pipeline verdicts and track agreement metrics.
|
||||
|
||||
1. Compute agreement flag (both verdicts identical).
|
||||
2. Compute confidence delta: ``|heuristic_confidence - probabilistic_P_up|``.
|
||||
3. Record disagreement reasons when verdicts differ.
|
||||
4. Track rolling 100-evaluation agreement rate in Redis.
|
||||
5. Log warning when agreement rate drops below 0.50.
|
||||
|
||||
Returns a ``DeltaResult`` with all computed fields.
|
||||
"""
|
||||
# Step 1: Agreement flag
|
||||
agreement = heuristic.verdict == probabilistic.verdict
|
||||
|
||||
# Step 2: Confidence delta
|
||||
confidence_delta = abs(heuristic.confidence - probabilistic.p_up)
|
||||
|
||||
# Step 3: Disagreement reasons
|
||||
disagreement_reasons = _compute_disagreement_reasons(heuristic, probabilistic)
|
||||
|
||||
# Step 4: Rolling agreement rate in Redis
|
||||
rolling_agreement_rate: float | None = None
|
||||
agreement_key = f"{_AGREEMENT_KEY_PREFIX}:{ticker}"
|
||||
|
||||
try:
|
||||
# Push the agreement result (1 for agree, 0 for disagree)
|
||||
await redis_client.lpush(agreement_key, "1" if agreement else "0")
|
||||
# Trim to the last _ROLLING_WINDOW evaluations
|
||||
await redis_client.ltrim(agreement_key, 0, _ROLLING_WINDOW - 1)
|
||||
# Compute the rolling agreement rate
|
||||
values = await redis_client.lrange(agreement_key, 0, _ROLLING_WINDOW - 1)
|
||||
if values:
|
||||
agree_count = sum(1 for v in values if v == b"1" or v == "1")
|
||||
rolling_agreement_rate = agree_count / len(values)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to update rolling agreement rate in Redis for %s",
|
||||
ticker,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Step 5: Log warning when agreement rate drops below threshold
|
||||
if (
|
||||
rolling_agreement_rate is not None
|
||||
and rolling_agreement_rate < _AGREEMENT_WARNING_THRESHOLD
|
||||
):
|
||||
logger.warning(
|
||||
"Persistent pipeline disagreement for %s: rolling agreement rate %.2f "
|
||||
"(below %.2f threshold over last %d evaluations)",
|
||||
ticker,
|
||||
rolling_agreement_rate,
|
||||
_AGREEMENT_WARNING_THRESHOLD,
|
||||
_ROLLING_WINDOW,
|
||||
)
|
||||
|
||||
# Step 6: Return DeltaResult
|
||||
return DeltaResult(
|
||||
agreement=agreement,
|
||||
confidence_delta=round(confidence_delta, 6),
|
||||
heuristic_verdict=heuristic.verdict.value,
|
||||
probabilistic_verdict=probabilistic.verdict.value,
|
||||
disagreement_reasons=disagreement_reasons,
|
||||
rolling_agreement_rate=rolling_agreement_rate,
|
||||
)
|
||||
@@ -0,0 +1,154 @@
|
||||
"""Exit engine — position-level exit management.
|
||||
|
||||
Evaluates stop-loss hits, take-profit targets, and trailing ATR-based stops
|
||||
for open positions. Called once per evaluation tick *before* the signal
|
||||
pipelines run so that exit signals take priority over new entry signals.
|
||||
|
||||
Priority order (first match wins per position):
|
||||
1. stop_loss hit → EXIT_FULL, reason ``"stop_hit"``
|
||||
2. target_2 hit → EXIT_FULL, reason ``"target_2_hit"``
|
||||
3. trailing stop → EXIT_FULL, reason ``"trailing_stop_hit"``
|
||||
4. target_1 hit → EXIT_HALF, reason ``"target_1_hit"``
|
||||
|
||||
Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from services.signal_engine.config import ExitConfig
|
||||
from services.signal_engine.models import (
|
||||
ExitSignal,
|
||||
ExitType,
|
||||
OpenPositionState,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def evaluate_exits(
|
||||
positions: list[OpenPositionState],
|
||||
current_prices: dict[str, float],
|
||||
config: ExitConfig,
|
||||
) -> list[ExitSignal]:
|
||||
"""Evaluate exit conditions for all open positions.
|
||||
|
||||
For each position the current price is looked up in *current_prices*
|
||||
(keyed by ticker). If the ticker is absent the position's own
|
||||
``current_price`` field is used as a fallback.
|
||||
|
||||
Checks are applied in priority order — only the **first** matching
|
||||
condition per position emits an ``ExitSignal``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
positions:
|
||||
Snapshots of open positions to evaluate.
|
||||
current_prices:
|
||||
Latest prices keyed by ticker symbol.
|
||||
config:
|
||||
Exit engine configuration (trailing stop ATR multiplier, etc.).
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[ExitSignal]
|
||||
One signal per position that triggered an exit condition.
|
||||
Positions with no exit condition produce no signal.
|
||||
"""
|
||||
signals: list[ExitSignal] = []
|
||||
|
||||
for pos in positions:
|
||||
price = current_prices.get(pos.ticker, pos.current_price)
|
||||
|
||||
signal = _evaluate_single_position(pos, price, config)
|
||||
if signal is not None:
|
||||
signals.append(signal)
|
||||
|
||||
return signals
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _evaluate_single_position(
|
||||
pos: OpenPositionState,
|
||||
price: float,
|
||||
config: ExitConfig,
|
||||
) -> ExitSignal | None:
|
||||
"""Check exit conditions for a single position in priority order.
|
||||
|
||||
Priority: stop_loss > target_2 > trailing_stop > target_1.
|
||||
"""
|
||||
|
||||
# 1. Stop-loss hit (highest priority)
|
||||
if price <= pos.stop_loss:
|
||||
return ExitSignal(
|
||||
position_id=pos.position_id,
|
||||
ticker=pos.ticker,
|
||||
exit_type=ExitType.EXIT_FULL,
|
||||
reason="stop_hit",
|
||||
price=price,
|
||||
)
|
||||
|
||||
# 2. Target 2 hit → full exit
|
||||
if price >= pos.target_2:
|
||||
return ExitSignal(
|
||||
position_id=pos.position_id,
|
||||
ticker=pos.ticker,
|
||||
exit_type=ExitType.EXIT_FULL,
|
||||
reason="target_2_hit",
|
||||
price=price,
|
||||
)
|
||||
|
||||
# 3. Trailing stop (only active after partial exit)
|
||||
if pos.partial_exit_done:
|
||||
trailing_stop = _compute_trailing_stop(pos, price, config)
|
||||
if price <= trailing_stop:
|
||||
return ExitSignal(
|
||||
position_id=pos.position_id,
|
||||
ticker=pos.ticker,
|
||||
exit_type=ExitType.EXIT_FULL,
|
||||
reason="trailing_stop_hit",
|
||||
price=price,
|
||||
)
|
||||
|
||||
# 4. Target 1 hit → partial exit (only if not already done)
|
||||
if not pos.partial_exit_done and price >= pos.target_1:
|
||||
return ExitSignal(
|
||||
position_id=pos.position_id,
|
||||
ticker=pos.ticker,
|
||||
exit_type=ExitType.EXIT_HALF,
|
||||
reason="target_1_hit",
|
||||
price=price,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _compute_trailing_stop(
|
||||
pos: OpenPositionState,
|
||||
price: float,
|
||||
config: ExitConfig,
|
||||
) -> float:
|
||||
"""Compute the effective trailing stop level.
|
||||
|
||||
The trailing stop is ``price - ATR * multiplier``, but it only
|
||||
ratchets **upward** — if the position already has a higher trailing
|
||||
stop recorded, that value is kept.
|
||||
|
||||
When ATR is unavailable (``None``), the existing ``trailing_stop``
|
||||
on the position is returned as-is. If neither is set, returns 0.0
|
||||
(effectively no trailing stop).
|
||||
"""
|
||||
existing = pos.trailing_stop if pos.trailing_stop is not None else 0.0
|
||||
|
||||
if pos.atr is None:
|
||||
return existing
|
||||
|
||||
new_level = price - pos.atr * config.trailing_stop_atr_multiplier
|
||||
|
||||
# Ratchet upward only
|
||||
return max(existing, new_level)
|
||||
@@ -0,0 +1,233 @@
|
||||
"""Output Formatter — assembles the structured SignalOutput contract.
|
||||
|
||||
Populates trade plans based on verdict combinations and maps
|
||||
``SignalOutput`` to the existing ``Recommendation`` schema for
|
||||
trading engine compatibility.
|
||||
|
||||
Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 12.1, 12.2, 12.3, 12.4, 12.5
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from services.shared.schemas import (
|
||||
ActionType,
|
||||
PositionSizing,
|
||||
Recommendation,
|
||||
RecommendationMode,
|
||||
)
|
||||
from services.signal_engine.config import SignalEngineConfig
|
||||
from services.signal_engine.models import (
|
||||
DeltaResult,
|
||||
ExitSignal,
|
||||
HeuristicResult,
|
||||
ProbabilisticResult,
|
||||
SignalOutput,
|
||||
TradePlan,
|
||||
Verdict,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Position sizing constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Full position sizing (heuristic-only or dual confirmed)
|
||||
_FULL_POSITION_SIZE_PCT = 0.02
|
||||
_FULL_MAX_LOSS_PCT = 0.005
|
||||
|
||||
# Reduced position sizing for probabilistic-only BUY (50% of standard)
|
||||
_REDUCED_POSITION_SIZE_PCT = 0.01
|
||||
|
||||
# Trade plan price levels (relative to entry)
|
||||
_STOP_LOSS_FACTOR = 0.95
|
||||
_TARGET_1_FACTOR = 1.05
|
||||
_TARGET_2_FACTOR = 1.10
|
||||
|
||||
|
||||
def _build_trade_plan(
|
||||
price: float,
|
||||
*,
|
||||
dual_confirmed: bool = False,
|
||||
probabilistic_only: bool = False,
|
||||
) -> TradePlan:
|
||||
"""Build a trade plan with position sizing based on confirmation mode.
|
||||
|
||||
- dual_confirmed: full position sizing with dual_confirmed flag
|
||||
- probabilistic_only: 50% position sizing with probabilistic_only flag
|
||||
- heuristic-only (neither flag): standard full position sizing
|
||||
"""
|
||||
if dual_confirmed:
|
||||
position_size_pct = _FULL_POSITION_SIZE_PCT
|
||||
max_loss_pct = _FULL_MAX_LOSS_PCT
|
||||
elif probabilistic_only:
|
||||
position_size_pct = _REDUCED_POSITION_SIZE_PCT
|
||||
max_loss_pct = _FULL_MAX_LOSS_PCT
|
||||
else:
|
||||
# Heuristic-only BUY
|
||||
position_size_pct = _FULL_POSITION_SIZE_PCT
|
||||
max_loss_pct = _FULL_MAX_LOSS_PCT
|
||||
|
||||
return TradePlan(
|
||||
entry_price=price,
|
||||
stop_loss=round(price * _STOP_LOSS_FACTOR, 6),
|
||||
target_1=round(price * _TARGET_1_FACTOR, 6),
|
||||
target_2=round(price * _TARGET_2_FACTOR, 6),
|
||||
position_size_pct=position_size_pct,
|
||||
max_loss_pct=max_loss_pct,
|
||||
dual_confirmed=dual_confirmed,
|
||||
probabilistic_only=probabilistic_only,
|
||||
)
|
||||
|
||||
|
||||
def format_output(
|
||||
ticker: str,
|
||||
price: float,
|
||||
heuristic: HeuristicResult,
|
||||
probabilistic: ProbabilisticResult,
|
||||
delta: DeltaResult,
|
||||
exit_signals: list[ExitSignal],
|
||||
config: SignalEngineConfig,
|
||||
) -> SignalOutput:
|
||||
"""Assemble the structured ``SignalOutput`` contract.
|
||||
|
||||
Trade plan logic:
|
||||
- Both BUY → ``dual_confirmed``, full position sizing
|
||||
- Probabilistic-only BUY → ``probabilistic_only``, 50% position sizing
|
||||
- Heuristic-only BUY → standard position sizing
|
||||
- No BUY → no trade_plan (WATCH/SKIP persisted for analysis)
|
||||
"""
|
||||
heuristic_buy = heuristic.verdict == Verdict.BUY
|
||||
probabilistic_buy = probabilistic.verdict == Verdict.BUY
|
||||
|
||||
trade_plan: TradePlan | None = None
|
||||
|
||||
if heuristic_buy and probabilistic_buy:
|
||||
# Both pipelines agree on BUY → dual confirmed
|
||||
trade_plan = _build_trade_plan(
|
||||
price, dual_confirmed=True, probabilistic_only=False
|
||||
)
|
||||
elif probabilistic_buy and not heuristic_buy:
|
||||
# Probabilistic-only BUY → reduced position sizing
|
||||
trade_plan = _build_trade_plan(
|
||||
price, dual_confirmed=False, probabilistic_only=True
|
||||
)
|
||||
elif heuristic_buy and not probabilistic_buy:
|
||||
# Heuristic-only BUY → standard position sizing
|
||||
trade_plan = _build_trade_plan(
|
||||
price, dual_confirmed=False, probabilistic_only=False
|
||||
)
|
||||
# else: No BUY → no trade_plan
|
||||
|
||||
return SignalOutput(
|
||||
ticker=ticker,
|
||||
timestamp=datetime.now(tz=timezone.utc),
|
||||
price=price,
|
||||
# Heuristic pipeline section
|
||||
heuristic_verdict=heuristic.verdict.value,
|
||||
heuristic_confidence=heuristic.confidence,
|
||||
heuristic_s_total=heuristic.s_total,
|
||||
# Probabilistic pipeline section
|
||||
probabilistic_verdict=probabilistic.verdict.value,
|
||||
probabilistic_p_up=probabilistic.p_up,
|
||||
probabilistic_entropy=probabilistic.entropy,
|
||||
probabilistic_ev_r=probabilistic.ev_r,
|
||||
# Delta analysis section
|
||||
delta_agreement=delta.agreement,
|
||||
delta_confidence_delta=delta.confidence_delta,
|
||||
delta_reasons=delta.disagreement_reasons,
|
||||
# Trade plan and exit signals
|
||||
trade_plan=trade_plan,
|
||||
exit_signals=exit_signals,
|
||||
# Detail payloads for audit
|
||||
heuristic_detail=heuristic.model_dump(),
|
||||
probabilistic_detail=probabilistic.model_dump(),
|
||||
# Pipeline mode metadata
|
||||
pipeline_mode="dual_pipeline",
|
||||
shadow_mode=config.shadow_mode,
|
||||
)
|
||||
|
||||
|
||||
def signal_output_to_recommendation(output: SignalOutput) -> Recommendation:
|
||||
"""Map a ``SignalOutput`` to the existing ``Recommendation`` schema.
|
||||
|
||||
Enables the trading engine to consume dual-pipeline outputs without
|
||||
modification to its core ``evaluate_recommendation`` logic.
|
||||
|
||||
Confidence mapping:
|
||||
- Dual confirmed: ``max(heuristic_confidence, probabilistic_P_up)``
|
||||
- Probabilistic only: ``probabilistic_P_up * 0.8`` (20% haircut)
|
||||
- Heuristic only: ``heuristic_confidence``
|
||||
- No BUY: ``max(heuristic_confidence, probabilistic_P_up)``
|
||||
|
||||
Action mapping:
|
||||
- BUY (either pipeline) → ``ActionType.BUY``
|
||||
- WATCH → ``ActionType.WATCH``
|
||||
- SKIP → ``ActionType.HOLD``
|
||||
|
||||
Mode: always ``RecommendationMode.PAPER_ELIGIBLE``
|
||||
"""
|
||||
trade_plan = output.trade_plan
|
||||
|
||||
# Determine confidence based on confirmation mode
|
||||
if trade_plan is not None and trade_plan.dual_confirmed:
|
||||
confidence = max(output.heuristic_confidence, output.probabilistic_p_up)
|
||||
elif trade_plan is not None and trade_plan.probabilistic_only:
|
||||
confidence = output.probabilistic_p_up * 0.8
|
||||
elif trade_plan is not None:
|
||||
# Heuristic-only BUY
|
||||
confidence = output.heuristic_confidence
|
||||
else:
|
||||
# No trade plan — use the best available confidence
|
||||
confidence = max(output.heuristic_confidence, output.probabilistic_p_up)
|
||||
|
||||
# Clamp confidence to [0, 1]
|
||||
confidence = max(0.0, min(1.0, confidence))
|
||||
|
||||
# Determine action from verdicts
|
||||
h_verdict = output.heuristic_verdict
|
||||
p_verdict = output.probabilistic_verdict
|
||||
|
||||
if h_verdict == Verdict.BUY.value or p_verdict == Verdict.BUY.value:
|
||||
action = ActionType.BUY
|
||||
elif h_verdict == Verdict.WATCH.value or p_verdict == Verdict.WATCH.value:
|
||||
action = ActionType.WATCH
|
||||
else:
|
||||
action = ActionType.HOLD
|
||||
|
||||
# Build position sizing from trade plan if available
|
||||
position_sizing = PositionSizing()
|
||||
if trade_plan is not None:
|
||||
position_sizing = PositionSizing(
|
||||
portfolio_pct=trade_plan.position_size_pct,
|
||||
max_loss_pct=trade_plan.max_loss_pct,
|
||||
)
|
||||
|
||||
# Build thesis from delta analysis
|
||||
thesis_parts: list[str] = []
|
||||
if trade_plan is not None and trade_plan.dual_confirmed:
|
||||
thesis_parts.append("Dual-pipeline confirmed BUY signal")
|
||||
elif trade_plan is not None and trade_plan.probabilistic_only:
|
||||
thesis_parts.append("Probabilistic-only BUY signal (reduced sizing)")
|
||||
elif trade_plan is not None:
|
||||
thesis_parts.append("Heuristic-only BUY signal")
|
||||
else:
|
||||
thesis_parts.append(f"No BUY signal (H={h_verdict}, P={p_verdict})")
|
||||
|
||||
if output.delta_reasons:
|
||||
thesis_parts.append(f"Delta reasons: {', '.join(output.delta_reasons)}")
|
||||
|
||||
return Recommendation(
|
||||
recommendation_id=output.output_id,
|
||||
ticker=output.ticker,
|
||||
action=action,
|
||||
mode=RecommendationMode.PAPER_ELIGIBLE,
|
||||
confidence=confidence,
|
||||
time_horizon="signal_engine",
|
||||
thesis="; ".join(thesis_parts),
|
||||
position_sizing=position_sizing,
|
||||
pipeline_mode="dual_pipeline",
|
||||
p_bull=output.probabilistic_p_up,
|
||||
expected_value=output.probabilistic_ev_r,
|
||||
generated_at=output.timestamp,
|
||||
)
|
||||
@@ -0,0 +1,80 @@
|
||||
"""Hard Filter Engine — pre-pipeline gating for the dual-pipeline signal engine.
|
||||
|
||||
Evaluates macro bias, valuation score, and earnings proximity to short-circuit
|
||||
both pipelines before evaluation. All conditions are checked and all triggered
|
||||
reasons are collected (no short-circuit on first match).
|
||||
|
||||
Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from services.signal_engine.config import HardFilterConfig
|
||||
from services.signal_engine.models import NormalizedInput
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class HardFilterResult:
|
||||
"""Outcome of the hard filter evaluation.
|
||||
|
||||
``filtered=True`` means the ticker should be **skipped** — both pipelines
|
||||
are short-circuited. ``reasons`` lists every filter that triggered.
|
||||
"""
|
||||
|
||||
filtered: bool = False
|
||||
reasons: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def evaluate_hard_filters(
|
||||
normalized: NormalizedInput,
|
||||
config: HardFilterConfig,
|
||||
) -> HardFilterResult:
|
||||
"""Evaluate pre-pipeline hard filters.
|
||||
|
||||
Checks (all evaluated, not short-circuited):
|
||||
- ``macro_bias == config.macro_bias_skip`` → reason ``"macro_bias_negative"``
|
||||
- ``valuation_score < config.valuation_min`` → reason ``"valuation_below_threshold"``
|
||||
- ``earnings_proximity_days <= config.earnings_days`` → reason ``"earnings_block"``
|
||||
|
||||
Missing optional fields (``valuation_score is None``,
|
||||
``earnings_proximity_days is None``) do **not** trigger a filter — missing
|
||||
data should not produce a false-positive SKIP.
|
||||
|
||||
Returns a :class:`HardFilterResult` with ``filtered=True`` when at least
|
||||
one reason was recorded.
|
||||
"""
|
||||
reasons: list[str] = []
|
||||
|
||||
# 4.1 — macro_bias exact equality with configured skip value
|
||||
if normalized.macro_bias == config.macro_bias_skip:
|
||||
reasons.append("macro_bias_negative")
|
||||
|
||||
# 4.2 — valuation score below minimum threshold
|
||||
if (
|
||||
normalized.valuation_score is not None
|
||||
and normalized.valuation_score < config.valuation_min
|
||||
):
|
||||
reasons.append("valuation_below_threshold")
|
||||
|
||||
# 4.3 — earnings proximity within block window
|
||||
if (
|
||||
normalized.earnings_proximity_days is not None
|
||||
and normalized.earnings_proximity_days <= config.earnings_days
|
||||
):
|
||||
reasons.append("earnings_block")
|
||||
|
||||
filtered = len(reasons) > 0
|
||||
|
||||
if filtered:
|
||||
logger.info(
|
||||
"Hard filter triggered for %s: %s",
|
||||
normalized.ticker,
|
||||
", ".join(reasons),
|
||||
)
|
||||
|
||||
return HardFilterResult(filtered=filtered, reasons=reasons)
|
||||
@@ -0,0 +1,299 @@
|
||||
"""Heuristic Pipeline (Pipeline A) — Deterministic scoring and verdict.
|
||||
|
||||
Computes ``S_total = S_company + S_macro + S_competitive`` from confluence-
|
||||
filtered signals and produces a confidence-gated BUY / WATCH / SKIP verdict.
|
||||
|
||||
The pipeline reuses the existing ``compute_signal_weight`` infrastructure
|
||||
from ``services.aggregation.scoring`` for signal weighting and follows the
|
||||
three-layer signal aggregation model (company, macro, competitive).
|
||||
|
||||
Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from services.signal_engine.config import HeuristicConfig
|
||||
from services.signal_engine.models import (
|
||||
ConfluenceSignal,
|
||||
HeuristicResult,
|
||||
NormalizedInput,
|
||||
SignalDirection,
|
||||
Verdict,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Signal classification — which confluence signals belong to which layer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Company-level technical signals (Layer 1)
|
||||
COMPANY_SIGNAL_TYPES: frozenset[str] = frozenset({
|
||||
"fibonacci",
|
||||
"ma_stack",
|
||||
"rsi",
|
||||
"cup_handle",
|
||||
"elliott_wave",
|
||||
})
|
||||
|
||||
# Competitive signals (Layer 3) — future expansion
|
||||
COMPETITIVE_SIGNAL_TYPES: frozenset[str] = frozenset()
|
||||
|
||||
# Macro weight applied to macro_bias to produce S_macro
|
||||
_MACRO_WEIGHT: float = 0.5
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Score computation helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compute_s_company(confluence_signals: list[ConfluenceSignal]) -> tuple[float, list[dict]]:
|
||||
"""Sum confluence scores for company-level signals.
|
||||
|
||||
Returns the total S_company score and a list of per-signal weight
|
||||
breakdowns for audit.
|
||||
"""
|
||||
s_company = 0.0
|
||||
weights: list[dict] = []
|
||||
|
||||
for sig in confluence_signals:
|
||||
if sig.signal_type in COMPANY_SIGNAL_TYPES:
|
||||
# Direction-aware: bullish contributes positively, bearish negatively
|
||||
direction_sign = _direction_sign(sig.direction)
|
||||
contribution = sig.confluence_score * direction_sign
|
||||
s_company += contribution
|
||||
weights.append({
|
||||
"signal_type": sig.signal_type,
|
||||
"layer": "company",
|
||||
"confluence_score": sig.confluence_score,
|
||||
"direction": sig.direction.value,
|
||||
"contribution": contribution,
|
||||
"active_timeframes": sig.active_timeframes,
|
||||
})
|
||||
|
||||
return s_company, weights
|
||||
|
||||
|
||||
def _compute_s_macro(normalized: NormalizedInput) -> float:
|
||||
"""Compute macro score from macro_bias.
|
||||
|
||||
S_macro = macro_bias * weight, where macro_bias is in [-1.0, 1.0].
|
||||
A positive macro_bias contributes positively; negative contributes
|
||||
negatively.
|
||||
"""
|
||||
return normalized.macro_bias * _MACRO_WEIGHT
|
||||
|
||||
|
||||
def _compute_s_competitive(confluence_signals: list[ConfluenceSignal]) -> float:
|
||||
"""Sum confluence scores for competitive-layer signals.
|
||||
|
||||
Currently returns 0.0 as no competitive signal types are defined in
|
||||
the signal library. This is a placeholder for future expansion.
|
||||
"""
|
||||
s_competitive = 0.0
|
||||
for sig in confluence_signals:
|
||||
if sig.signal_type in COMPETITIVE_SIGNAL_TYPES:
|
||||
direction_sign = _direction_sign(sig.direction)
|
||||
s_competitive += sig.confluence_score * direction_sign
|
||||
return s_competitive
|
||||
|
||||
|
||||
def _direction_sign(direction: SignalDirection) -> float:
|
||||
"""Map signal direction to a numeric sign."""
|
||||
if direction == SignalDirection.BULLISH:
|
||||
return 1.0
|
||||
if direction == SignalDirection.BEARISH:
|
||||
return -1.0
|
||||
return 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Confidence computation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compute_confidence(
|
||||
confluence_signals: list[ConfluenceSignal],
|
||||
) -> float:
|
||||
"""Compute pipeline confidence from confluence signals.
|
||||
|
||||
Confidence is derived from:
|
||||
1. **Base confidence** — average signal strength across all confluence
|
||||
signals (mean of confluence_score values).
|
||||
2. **Source count boost** — more active signals increase confidence
|
||||
(diminishing returns, capped contribution).
|
||||
3. **Signal agreement boost** — if all signals point in the same
|
||||
direction, confidence is boosted.
|
||||
4. **Contradiction penalty** — if signals disagree on direction,
|
||||
confidence is penalised.
|
||||
|
||||
Returns a value clamped to [0.0, 1.0].
|
||||
"""
|
||||
if not confluence_signals:
|
||||
return 0.0
|
||||
|
||||
# 1. Base confidence: average confluence score (already weighted by
|
||||
# timeframe importance)
|
||||
total_score = sum(s.confluence_score for s in confluence_signals)
|
||||
base_confidence = total_score / len(confluence_signals)
|
||||
|
||||
# 2. Source count factor: more signals → higher confidence, with
|
||||
# diminishing returns. 1 signal → 0.6, 2 → 0.75, 3 → 0.85,
|
||||
# 4 → 0.90, 5+ → 0.95 (asymptotic).
|
||||
n = len(confluence_signals)
|
||||
source_factor = 1.0 - (0.4 / n) # approaches 1.0 as n grows
|
||||
|
||||
# 3. Signal agreement / contradiction
|
||||
directions = [s.direction for s in confluence_signals]
|
||||
bullish_count = sum(1 for d in directions if d == SignalDirection.BULLISH)
|
||||
bearish_count = sum(1 for d in directions if d == SignalDirection.BEARISH)
|
||||
|
||||
if n == 1:
|
||||
agreement_factor = 1.0
|
||||
elif bullish_count == n or bearish_count == n:
|
||||
# Perfect agreement — boost
|
||||
agreement_factor = 1.15
|
||||
elif bullish_count > 0 and bearish_count > 0:
|
||||
# Contradiction — penalty proportional to minority fraction
|
||||
minority = min(bullish_count, bearish_count)
|
||||
contradiction_ratio = minority / n
|
||||
agreement_factor = 1.0 - (0.3 * contradiction_ratio)
|
||||
else:
|
||||
# Mix of directional and neutral — mild boost
|
||||
agreement_factor = 1.05
|
||||
|
||||
confidence = base_confidence * source_factor * agreement_factor
|
||||
return max(0.0, min(confidence, 1.0))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Verdict logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _determine_verdict(
|
||||
confidence: float,
|
||||
s_total: float,
|
||||
normalized: NormalizedInput,
|
||||
config: HeuristicConfig,
|
||||
) -> tuple[Verdict, list[str]]:
|
||||
"""Apply threshold logic to determine BUY / WATCH / SKIP verdict.
|
||||
|
||||
Returns the verdict and a list of reasoning strings explaining the
|
||||
decision.
|
||||
"""
|
||||
reasoning: list[str] = []
|
||||
|
||||
valuation_score = normalized.valuation_score if normalized.valuation_score is not None else 0.0
|
||||
earnings_days = normalized.earnings_proximity_days if normalized.earnings_proximity_days is not None else 0
|
||||
|
||||
# --- Check BUY conditions ---
|
||||
buy_conditions = {
|
||||
"confidence": confidence >= config.buy_confidence,
|
||||
"s_total": s_total >= config.buy_s_total,
|
||||
"valuation": valuation_score >= config.buy_valuation_min,
|
||||
"macro_bias": normalized.macro_bias > config.macro_bias_threshold,
|
||||
"earnings_proximity": earnings_days > config.earnings_days_threshold,
|
||||
}
|
||||
|
||||
all_buy_met = all(buy_conditions.values())
|
||||
|
||||
if all_buy_met:
|
||||
reasoning.append(
|
||||
f"BUY: all conditions met — confidence={confidence:.3f} "
|
||||
f"(>= {config.buy_confidence}), S_total={s_total:.3f} "
|
||||
f"(>= {config.buy_s_total}), valuation={valuation_score:.2f} "
|
||||
f"(>= {config.buy_valuation_min}), macro_bias={normalized.macro_bias:.2f} "
|
||||
f"(> {config.macro_bias_threshold}), earnings_days={earnings_days} "
|
||||
f"(> {config.earnings_days_threshold})"
|
||||
)
|
||||
return Verdict.BUY, reasoning
|
||||
|
||||
# --- Check WATCH conditions ---
|
||||
if confidence >= config.watch_confidence:
|
||||
# WATCH: confidence is sufficient but not all BUY conditions met
|
||||
failed_conditions = [k for k, v in buy_conditions.items() if not v]
|
||||
reasoning.append(
|
||||
f"WATCH: confidence={confidence:.3f} (>= {config.watch_confidence}) "
|
||||
f"but BUY conditions not fully met — failed: {', '.join(failed_conditions)}"
|
||||
)
|
||||
for cond_name, met in buy_conditions.items():
|
||||
if not met:
|
||||
reasoning.append(f" - {cond_name} not met")
|
||||
return Verdict.WATCH, reasoning
|
||||
|
||||
# --- SKIP ---
|
||||
reasoning.append(
|
||||
f"SKIP: confidence={confidence:.3f} < {config.watch_confidence} "
|
||||
f"(watch threshold)"
|
||||
)
|
||||
return Verdict.SKIP, reasoning
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def run_heuristic_pipeline(
|
||||
normalized: NormalizedInput,
|
||||
confluence_signals: list[ConfluenceSignal],
|
||||
config: HeuristicConfig,
|
||||
) -> HeuristicResult:
|
||||
"""Run the deterministic heuristic pipeline.
|
||||
|
||||
Computes ``S_total = S_company + S_macro + S_competitive`` using the
|
||||
existing three-layer signal aggregation model and produces a
|
||||
confidence-gated BUY / WATCH / SKIP verdict.
|
||||
|
||||
Args:
|
||||
normalized: The unified input structure for this evaluation tick.
|
||||
confluence_signals: Signals that passed multi-timeframe confluence
|
||||
filtering.
|
||||
config: Heuristic pipeline thresholds.
|
||||
|
||||
Returns:
|
||||
A :class:`HeuristicResult` with verdict, scores, weights, and
|
||||
reasoning.
|
||||
|
||||
Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7
|
||||
"""
|
||||
# 1. Compute three-layer scores
|
||||
s_company, signal_weights = _compute_s_company(confluence_signals)
|
||||
s_macro = _compute_s_macro(normalized)
|
||||
s_competitive = _compute_s_competitive(confluence_signals)
|
||||
s_total = s_company + s_macro + s_competitive
|
||||
|
||||
# 2. Compute confidence
|
||||
confidence = _compute_confidence(confluence_signals)
|
||||
|
||||
# 3. Determine verdict
|
||||
verdict, reasoning = _determine_verdict(confidence, s_total, normalized, config)
|
||||
|
||||
logger.info(
|
||||
"Heuristic pipeline [%s]: verdict=%s confidence=%.3f "
|
||||
"S_total=%.3f (company=%.3f macro=%.3f competitive=%.3f) "
|
||||
"signals=%d",
|
||||
normalized.ticker,
|
||||
verdict.value,
|
||||
confidence,
|
||||
s_total,
|
||||
s_company,
|
||||
s_macro,
|
||||
s_competitive,
|
||||
len(confluence_signals),
|
||||
)
|
||||
|
||||
return HeuristicResult(
|
||||
verdict=verdict,
|
||||
confidence=confidence,
|
||||
s_total=s_total,
|
||||
s_company=s_company,
|
||||
s_macro=s_macro,
|
||||
s_competitive=s_competitive,
|
||||
signal_weights=signal_weights,
|
||||
reasoning=reasoning,
|
||||
)
|
||||
@@ -0,0 +1,180 @@
|
||||
"""Signal engine entry point — asyncio event loop and queue polling.
|
||||
|
||||
Connects to PostgreSQL and Redis, loads configuration from ``risk_configs``,
|
||||
and polls the ``stonks:queue:signal_engine`` queue indefinitely. Each
|
||||
queue message triggers a full evaluation tick via ``evaluate_tick()``.
|
||||
|
||||
When ``dual_pipeline_enabled`` is ``False`` the worker sleeps and retries
|
||||
(fail-safe: the existing pipeline continues unchanged).
|
||||
|
||||
Requirements: 13.1, 13.6, 13.7, 16.1, 16.6
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import asyncpg
|
||||
import redis.asyncio
|
||||
|
||||
from services.shared.config import load_config as load_app_config
|
||||
from services.shared.redis_keys import QUEUE_SIGNAL_ENGINE, queue_key
|
||||
from services.signal_engine.config import load_config as load_signal_config
|
||||
from services.signal_engine.worker import evaluate_tick
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# BLPOP timeout in seconds — how long to wait for a queue message before
|
||||
# looping back to check the enabled flag.
|
||||
_BLPOP_TIMEOUT = 5
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""Start the signal engine worker loop.
|
||||
|
||||
1. Connect to PostgreSQL (asyncpg pool) using env vars from
|
||||
``services.shared.config``.
|
||||
2. Connect to Redis (redis.asyncio) using env vars.
|
||||
3. Load signal engine config via ``load_config(pool)``.
|
||||
4. Log active configuration at startup.
|
||||
5. Poll ``stonks:queue:signal_engine`` queue indefinitely (BLPOP).
|
||||
6. Check ``dual_pipeline_enabled`` flag; if disabled, sleep and retry.
|
||||
7. On config read failure, default to disabled (fail-safe).
|
||||
8. Parse queue message as JSON: ``{"ticker": "AAPL", "triggered_at": "..."}``.
|
||||
9. Call ``evaluate_tick(pool, redis, ticker, config)`` for each message.
|
||||
|
||||
Requirements: 13.1, 13.6, 13.7, 16.1, 16.6
|
||||
"""
|
||||
# --- Setup logging ---
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(name)s %(levelname)s %(message)s",
|
||||
stream=sys.stdout,
|
||||
)
|
||||
|
||||
logger.info("Signal engine starting up")
|
||||
|
||||
# --- Load shared app config for connection details ---
|
||||
app_config = load_app_config()
|
||||
|
||||
# --- Connect to PostgreSQL ---
|
||||
pool = await asyncpg.create_pool(
|
||||
dsn=app_config.postgres.dsn,
|
||||
min_size=2,
|
||||
max_size=10,
|
||||
)
|
||||
logger.info("Connected to PostgreSQL at %s", app_config.postgres.host)
|
||||
|
||||
# --- Connect to Redis ---
|
||||
redis_client = redis.asyncio.from_url(
|
||||
app_config.redis.url,
|
||||
decode_responses=True,
|
||||
)
|
||||
logger.info("Connected to Redis at %s", app_config.redis.host)
|
||||
|
||||
# --- Load signal engine config ---
|
||||
try:
|
||||
config = await load_signal_config(pool)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to load signal engine config at startup — "
|
||||
"defaulting to disabled (fail-safe)",
|
||||
exc_info=True,
|
||||
)
|
||||
from services.signal_engine.config import SignalEngineConfig
|
||||
config = SignalEngineConfig() # dual_pipeline_enabled=False
|
||||
|
||||
logger.info(
|
||||
"Signal engine config: dual_pipeline_enabled=%s, "
|
||||
"heuristic=%s, probabilistic=%s, shadow_mode=%s, "
|
||||
"polling_interval=%ds",
|
||||
config.dual_pipeline_enabled,
|
||||
config.heuristic_pipeline_enabled,
|
||||
config.probabilistic_pipeline_enabled,
|
||||
config.shadow_mode,
|
||||
config.polling_interval_seconds,
|
||||
)
|
||||
|
||||
# --- Queue key ---
|
||||
signal_queue = queue_key(QUEUE_SIGNAL_ENGINE)
|
||||
logger.info("Polling queue: %s", signal_queue)
|
||||
|
||||
# --- Main loop ---
|
||||
try:
|
||||
while True:
|
||||
# Check if dual pipeline is enabled
|
||||
if not config.dual_pipeline_enabled:
|
||||
logger.debug(
|
||||
"Dual pipeline disabled — sleeping %ds before retry",
|
||||
config.polling_interval_seconds,
|
||||
)
|
||||
await asyncio.sleep(config.polling_interval_seconds)
|
||||
|
||||
# Reload config to pick up flag changes
|
||||
try:
|
||||
config = await load_signal_config(pool)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to reload signal engine config — "
|
||||
"keeping disabled (fail-safe)",
|
||||
exc_info=True,
|
||||
)
|
||||
continue
|
||||
|
||||
# BLPOP: blocking pop from the signal engine queue
|
||||
try:
|
||||
result = await redis_client.blpop(
|
||||
signal_queue,
|
||||
timeout=_BLPOP_TIMEOUT,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Redis BLPOP failed — sleeping before retry",
|
||||
exc_info=True,
|
||||
)
|
||||
await asyncio.sleep(5)
|
||||
continue
|
||||
|
||||
if result is None:
|
||||
# Timeout — no message, loop back
|
||||
continue
|
||||
|
||||
# result is (queue_name, message)
|
||||
_, raw_message = result
|
||||
|
||||
# Parse the queue message
|
||||
try:
|
||||
message = json.loads(raw_message)
|
||||
ticker = message["ticker"]
|
||||
except (json.JSONDecodeError, KeyError, TypeError):
|
||||
logger.warning(
|
||||
"Invalid queue message — skipping: %s",
|
||||
raw_message,
|
||||
)
|
||||
continue
|
||||
|
||||
logger.info("Processing evaluation tick for %s", ticker)
|
||||
|
||||
# Run the evaluation tick
|
||||
try:
|
||||
await evaluate_tick(pool, redis_client, ticker, config)
|
||||
except Exception:
|
||||
logger.error(
|
||||
"Unhandled error in evaluate_tick for %s",
|
||||
ticker,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Signal engine shutting down (KeyboardInterrupt)")
|
||||
finally:
|
||||
await pool.close()
|
||||
await redis_client.aclose()
|
||||
logger.info("Signal engine shut down")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,271 @@
|
||||
"""Pydantic data models for the dual-pipeline signal engine.
|
||||
|
||||
Defines all input, intermediate, and output models consumed by the heuristic
|
||||
pipeline, probabilistic pipeline, delta analyzer, exit engine, and output
|
||||
formatter. Every model is a Pydantic ``BaseModel`` subclass with field-level
|
||||
constraints where applicable.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Market data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class OHLCVBar(BaseModel):
|
||||
"""Single OHLCV bar for a timeframe."""
|
||||
|
||||
timestamp: datetime
|
||||
open: float
|
||||
high: float
|
||||
low: float
|
||||
close: float
|
||||
volume: float
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Position state (for exit engine)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class OpenPositionState(BaseModel):
|
||||
"""Snapshot of an open position for exit evaluation."""
|
||||
|
||||
position_id: str
|
||||
ticker: str
|
||||
entry_price: float
|
||||
current_price: float
|
||||
stop_loss: float
|
||||
target_1: float
|
||||
target_2: float
|
||||
trailing_stop: float | None = None
|
||||
partial_exit_done: bool = False
|
||||
atr: float | None = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Normalized input consumed by both pipelines
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class NormalizedInput(BaseModel):
|
||||
"""Unified input structure consumed by both pipelines."""
|
||||
|
||||
ticker: str
|
||||
evaluated_at: datetime
|
||||
|
||||
# Multi-timeframe OHLCV bars keyed by timeframe label
|
||||
bars: dict[str, list[OHLCVBar]] # {"M30": [...], "H1": [...], ...}
|
||||
|
||||
# Fundamental / macro context
|
||||
valuation_score: float | None = None # [0.0, 1.0]
|
||||
earnings_proximity_days: int | None = None
|
||||
macro_bias: float = 0.0 # [-1.0, 1.0]
|
||||
|
||||
# Open positions for exit evaluation
|
||||
open_positions: list[OpenPositionState] = Field(default_factory=list)
|
||||
|
||||
# Price series helpers (used by probabilistic pipeline)
|
||||
closing_prices: list[float] = Field(default_factory=list)
|
||||
returns: list[float] = Field(default_factory=list)
|
||||
current_price: float | None = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Signal evaluation primitives
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SignalDirection(str, Enum):
|
||||
BULLISH = "bullish"
|
||||
BEARISH = "bearish"
|
||||
NEUTRAL = "neutral"
|
||||
|
||||
|
||||
class SignalResult(BaseModel):
|
||||
"""Output from a single signal evaluator on a single timeframe."""
|
||||
|
||||
signal_type: str
|
||||
timeframe: str
|
||||
strength: float = Field(ge=0.0, le=1.0)
|
||||
direction: SignalDirection
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
metadata: dict = Field(default_factory=dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multi-timeframe confluence
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ConfluenceSignal(BaseModel):
|
||||
"""A signal that passed multi-timeframe confluence filtering."""
|
||||
|
||||
signal_type: str
|
||||
direction: SignalDirection
|
||||
confluence_score: float
|
||||
active_timeframes: list[str]
|
||||
per_timeframe: dict[str, float]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pipeline verdicts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class Verdict(str, Enum):
|
||||
BUY = "BUY"
|
||||
WATCH = "WATCH"
|
||||
SKIP = "SKIP"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Heuristic pipeline output
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class HeuristicResult(BaseModel):
|
||||
"""Output from the heuristic (deterministic) pipeline."""
|
||||
|
||||
verdict: Verdict
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
s_total: float
|
||||
s_company: float
|
||||
s_macro: float
|
||||
s_competitive: float
|
||||
signal_weights: list[dict] = Field(default_factory=list)
|
||||
reasoning: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Probabilistic pipeline output
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class LikelihoodRatio(BaseModel):
|
||||
"""A single signal's likelihood ratio for Bayesian updating."""
|
||||
|
||||
signal_type: str
|
||||
cluster: str
|
||||
lr: float
|
||||
log_lr: float
|
||||
penalized_log_lr: float
|
||||
hit_rate: float
|
||||
strength: float
|
||||
|
||||
|
||||
class ProbabilisticResult(BaseModel):
|
||||
"""Output from the probabilistic (Bayesian) pipeline."""
|
||||
|
||||
verdict: Verdict
|
||||
p_up: float = Field(ge=0.0, le=1.0)
|
||||
entropy: float = Field(ge=0.0, le=1.0)
|
||||
ev_r: float
|
||||
prior: float
|
||||
posterior: float
|
||||
likelihood_ratios: list[LikelihoodRatio] = Field(default_factory=list)
|
||||
regime: str
|
||||
reasoning: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Delta analyzer output
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class DeltaResult(BaseModel):
|
||||
"""Output from the delta analyzer comparing both pipelines."""
|
||||
|
||||
agreement: bool
|
||||
confidence_delta: float
|
||||
heuristic_verdict: str
|
||||
probabilistic_verdict: str
|
||||
disagreement_reasons: list[str] = Field(default_factory=list)
|
||||
rolling_agreement_rate: float | None = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Exit engine
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ExitType(str, Enum):
|
||||
EXIT_HALF = "EXIT_HALF"
|
||||
EXIT_FULL = "EXIT_FULL"
|
||||
|
||||
|
||||
class ExitSignal(BaseModel):
|
||||
"""An exit signal for an open position."""
|
||||
|
||||
position_id: str
|
||||
ticker: str
|
||||
exit_type: ExitType
|
||||
reason: str
|
||||
price: float
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Trade plan
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TradePlan(BaseModel):
|
||||
"""Optional trade plan attached to a BUY signal."""
|
||||
|
||||
entry_price: float
|
||||
stop_loss: float
|
||||
target_1: float
|
||||
target_2: float
|
||||
position_size_pct: float = Field(ge=0.0, le=1.0)
|
||||
max_loss_pct: float = Field(ge=0.0, le=1.0)
|
||||
dual_confirmed: bool = False
|
||||
probabilistic_only: bool = False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Structured output contract
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SignalOutput(BaseModel):
|
||||
"""The structured output contract consumed by the trading engine and audit systems."""
|
||||
|
||||
output_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
ticker: str
|
||||
timestamp: datetime
|
||||
price: float
|
||||
|
||||
# Heuristic pipeline section
|
||||
heuristic_verdict: str
|
||||
heuristic_confidence: float
|
||||
heuristic_s_total: float
|
||||
|
||||
# Probabilistic pipeline section
|
||||
probabilistic_verdict: str
|
||||
probabilistic_p_up: float
|
||||
probabilistic_entropy: float
|
||||
probabilistic_ev_r: float
|
||||
|
||||
# Delta analysis section
|
||||
delta_agreement: bool
|
||||
delta_confidence_delta: float
|
||||
delta_reasons: list[str] = Field(default_factory=list)
|
||||
|
||||
# Optional trade plan and exit signals
|
||||
trade_plan: TradePlan | None = None
|
||||
exit_signals: list[ExitSignal] = Field(default_factory=list)
|
||||
|
||||
# Detail payloads for audit / dashboard
|
||||
heuristic_detail: dict = Field(default_factory=dict)
|
||||
probabilistic_detail: dict = Field(default_factory=dict)
|
||||
|
||||
# Pipeline mode metadata
|
||||
pipeline_mode: str = "dual_pipeline"
|
||||
shadow_mode: bool = False
|
||||
@@ -0,0 +1,459 @@
|
||||
"""Input Normalizer — fetches and assembles NormalizedInput for a single tick.
|
||||
|
||||
Queries multiple data sources (market snapshots, trend windows, earnings
|
||||
calendar, macro impact records, position stop levels) and assembles them
|
||||
into a single ``NormalizedInput`` consumed by both pipelines.
|
||||
|
||||
Missing data sources produce sentinel values (``None`` / empty list) with a
|
||||
logged warning — the normalizer never crashes on unavailable data.
|
||||
|
||||
Requirements: 1.1, 1.2, 1.3, 1.4, 1.5
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import asyncpg
|
||||
|
||||
from .config import SignalEngineConfig
|
||||
from .models import NormalizedInput, OHLCVBar, OpenPositionState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeframes the signal engine evaluates, ordered shortest → longest.
|
||||
TIMEFRAMES = ("M30", "H1", "H4", "D", "W", "M")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Direction → numeric bias mapping (same semantics as aggregation worker)
|
||||
# ---------------------------------------------------------------------------
|
||||
_DIRECTION_TO_BIAS: dict[str, float] = {
|
||||
"positive": 1.0,
|
||||
"negative": -1.0,
|
||||
"mixed": 0.0,
|
||||
"neutral": 0.0,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _validate_monotonic_timestamps(
|
||||
bars: list[OHLCVBar],
|
||||
timeframe: str,
|
||||
ticker: str,
|
||||
) -> list[OHLCVBar]:
|
||||
"""Return *bars* sorted by timestamp, warning on non-monotonic input.
|
||||
|
||||
If timestamps are already strictly increasing the list is returned
|
||||
unchanged. Otherwise the bars are sorted and a warning is logged.
|
||||
"""
|
||||
if len(bars) <= 1:
|
||||
return bars
|
||||
|
||||
is_monotonic = all(
|
||||
bars[i].timestamp < bars[i + 1].timestamp for i in range(len(bars) - 1)
|
||||
)
|
||||
if is_monotonic:
|
||||
return bars
|
||||
|
||||
logger.warning(
|
||||
"%s/%s: OHLCV timestamps not monotonically increasing — sorting",
|
||||
ticker,
|
||||
timeframe,
|
||||
)
|
||||
return sorted(bars, key=lambda b: b.timestamp)
|
||||
|
||||
|
||||
def _polygon_bar_to_ohlcv(row: asyncpg.Record) -> OHLCVBar | None:
|
||||
"""Convert a market_snapshots row (JSONB data column) to an OHLCVBar.
|
||||
|
||||
Polygon bar format stored in ``data``:
|
||||
t — timestamp in epoch milliseconds
|
||||
o — open
|
||||
h — high
|
||||
l — low
|
||||
c — close
|
||||
v — volume
|
||||
|
||||
Returns ``None`` if the row cannot be parsed.
|
||||
"""
|
||||
data = row["data"]
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
try:
|
||||
ts_ms = data.get("t")
|
||||
if ts_ms is None:
|
||||
return None
|
||||
return OHLCVBar(
|
||||
timestamp=datetime.fromtimestamp(int(ts_ms) / 1000, tz=timezone.utc),
|
||||
open=float(data.get("o", 0)),
|
||||
high=float(data.get("h", 0)),
|
||||
low=float(data.get("l", 0)),
|
||||
close=float(data.get("c", 0)),
|
||||
volume=float(data.get("v", 0)),
|
||||
)
|
||||
except (TypeError, ValueError, OverflowError):
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data-source fetchers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _fetch_bars(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
) -> dict[str, list[OHLCVBar]]:
|
||||
"""Fetch OHLCV bars from ``market_snapshots`` for all timeframes.
|
||||
|
||||
The current database stores daily bars (``snapshot_type = 'bar'``) from
|
||||
Polygon. Intraday bars are stored with ``snapshot_type = 'intraday_bar'``
|
||||
when available.
|
||||
|
||||
For timeframes that have no dedicated data yet (H4, W, M) we derive them
|
||||
from daily bars where possible:
|
||||
- **W** (weekly): group daily bars by ISO week.
|
||||
- **M** (monthly): group daily bars by calendar month.
|
||||
- **H4 / H1 / M30**: sourced from intraday snapshots when present;
|
||||
otherwise left empty.
|
||||
|
||||
Returns a dict keyed by timeframe label with validated bar lists.
|
||||
"""
|
||||
bars: dict[str, list[OHLCVBar]] = {tf: [] for tf in TIMEFRAMES}
|
||||
|
||||
# --- Daily bars --------------------------------------------------------
|
||||
try:
|
||||
rows = await pool.fetch(
|
||||
"SELECT data FROM market_snapshots "
|
||||
"WHERE ticker = $1 AND snapshot_type = 'bar' "
|
||||
"ORDER BY captured_at ASC",
|
||||
ticker,
|
||||
)
|
||||
daily: list[OHLCVBar] = []
|
||||
for row in rows:
|
||||
bar = _polygon_bar_to_ohlcv(row)
|
||||
if bar is not None:
|
||||
daily.append(bar)
|
||||
bars["D"] = daily
|
||||
except Exception:
|
||||
logger.warning("%s: failed to fetch daily bars", ticker, exc_info=True)
|
||||
|
||||
# --- Intraday bars (M30, H1) ------------------------------------------
|
||||
try:
|
||||
intraday_rows = await pool.fetch(
|
||||
"SELECT data FROM market_snapshots "
|
||||
"WHERE ticker = $1 AND snapshot_type = 'intraday_bar' "
|
||||
"ORDER BY captured_at ASC",
|
||||
ticker,
|
||||
)
|
||||
intraday: list[OHLCVBar] = []
|
||||
for row in intraday_rows:
|
||||
bar = _polygon_bar_to_ohlcv(row)
|
||||
if bar is not None:
|
||||
intraday.append(bar)
|
||||
|
||||
# Assign intraday bars to M30 and H1 buckets.
|
||||
# The actual timespan depends on the source config; we store them
|
||||
# under M30 (shortest) and duplicate to H1 for now. When dedicated
|
||||
# H1 bars are ingested they will replace this.
|
||||
if intraday:
|
||||
bars["M30"] = intraday
|
||||
bars["H1"] = intraday
|
||||
except Exception:
|
||||
logger.warning("%s: failed to fetch intraday bars", ticker, exc_info=True)
|
||||
|
||||
# --- Derive H4 from intraday (4-hour grouping) ------------------------
|
||||
# Left empty when no intraday data — sentinel value per Req 1.3.
|
||||
|
||||
# --- Derive weekly bars from daily ------------------------------------
|
||||
if bars["D"]:
|
||||
bars["W"] = _aggregate_bars_by_period(bars["D"], period="week")
|
||||
|
||||
# --- Derive monthly bars from daily -----------------------------------
|
||||
if bars["D"]:
|
||||
bars["M"] = _aggregate_bars_by_period(bars["D"], period="month")
|
||||
|
||||
return bars
|
||||
|
||||
|
||||
def _aggregate_bars_by_period(
|
||||
daily_bars: list[OHLCVBar],
|
||||
period: str,
|
||||
) -> list[OHLCVBar]:
|
||||
"""Aggregate daily bars into weekly or monthly bars.
|
||||
|
||||
Groups by ISO week (period="week") or calendar month (period="month"),
|
||||
then computes OHLCV aggregates per group.
|
||||
"""
|
||||
from collections import OrderedDict
|
||||
|
||||
groups: OrderedDict[tuple[int, int], list[OHLCVBar]] = OrderedDict()
|
||||
for bar in daily_bars:
|
||||
if period == "week":
|
||||
iso = bar.timestamp.isocalendar()
|
||||
key = (iso[0], iso[1]) # (year, week)
|
||||
else:
|
||||
key = (bar.timestamp.year, bar.timestamp.month)
|
||||
groups.setdefault(key, []).append(bar)
|
||||
|
||||
result: list[OHLCVBar] = []
|
||||
for group_bars in groups.values():
|
||||
if not group_bars:
|
||||
continue
|
||||
result.append(
|
||||
OHLCVBar(
|
||||
timestamp=group_bars[0].timestamp, # period open timestamp
|
||||
open=group_bars[0].open,
|
||||
high=max(b.high for b in group_bars),
|
||||
low=min(b.low for b in group_bars),
|
||||
close=group_bars[-1].close,
|
||||
volume=sum(b.volume for b in group_bars),
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
async def _fetch_fundamentals(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
) -> tuple[float | None, int | None]:
|
||||
"""Fetch valuation_score and earnings_proximity_days.
|
||||
|
||||
- **valuation_score**: derived from the latest ``trend_windows`` confidence
|
||||
for the ticker (entity_type='company', entity_id=ticker).
|
||||
- **earnings_proximity_days**: days until the next earnings date from
|
||||
``earnings_calendar``.
|
||||
|
||||
Returns ``(valuation_score, earnings_proximity_days)`` with ``None``
|
||||
sentinels for unavailable data.
|
||||
"""
|
||||
valuation_score: float | None = None
|
||||
earnings_proximity_days: int | None = None
|
||||
|
||||
# --- Valuation score from trend_windows --------------------------------
|
||||
try:
|
||||
row = await pool.fetchrow(
|
||||
"SELECT confidence FROM trend_windows "
|
||||
"WHERE entity_type = 'company' AND entity_id = $1 "
|
||||
"ORDER BY generated_at DESC LIMIT 1",
|
||||
ticker,
|
||||
)
|
||||
if row is not None:
|
||||
valuation_score = float(row["confidence"])
|
||||
else:
|
||||
logger.warning("%s: no trend_windows data — valuation_score=None", ticker)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"%s: failed to fetch valuation_score", ticker, exc_info=True
|
||||
)
|
||||
|
||||
# --- Earnings proximity from earnings_calendar -------------------------
|
||||
try:
|
||||
row = await pool.fetchrow(
|
||||
"SELECT earnings_date FROM earnings_calendar "
|
||||
"WHERE ticker = $1 AND earnings_date >= CURRENT_DATE "
|
||||
"ORDER BY earnings_date ASC LIMIT 1",
|
||||
ticker,
|
||||
)
|
||||
if row is not None:
|
||||
delta = row["earnings_date"] - datetime.now(timezone.utc).date()
|
||||
earnings_proximity_days = delta.days
|
||||
else:
|
||||
logger.warning(
|
||||
"%s: no upcoming earnings in calendar — earnings_proximity_days=None",
|
||||
ticker,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"%s: failed to fetch earnings_proximity_days", ticker, exc_info=True
|
||||
)
|
||||
|
||||
return valuation_score, earnings_proximity_days
|
||||
|
||||
|
||||
async def _fetch_macro_bias(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
) -> float:
|
||||
"""Compute macro_bias for *ticker* from recent ``macro_impact_records``.
|
||||
|
||||
Averages the numeric bias of the most recent impact records (up to 10)
|
||||
weighted by their confidence. The direction string is mapped to a float
|
||||
via ``_DIRECTION_TO_BIAS``.
|
||||
|
||||
Returns 0.0 (neutral) when no records are found or on error.
|
||||
"""
|
||||
try:
|
||||
rows = await pool.fetch(
|
||||
"SELECT impact_direction, macro_impact_score, confidence "
|
||||
"FROM macro_impact_records "
|
||||
"WHERE ticker = $1 "
|
||||
"ORDER BY computed_at DESC LIMIT 10",
|
||||
ticker,
|
||||
)
|
||||
if not rows:
|
||||
logger.warning("%s: no macro_impact_records — macro_bias=0.0", ticker)
|
||||
return 0.0
|
||||
|
||||
weighted_sum = 0.0
|
||||
weight_total = 0.0
|
||||
for row in rows:
|
||||
direction = row["impact_direction"] or "neutral"
|
||||
bias = _DIRECTION_TO_BIAS.get(direction, 0.0)
|
||||
score = float(row["macro_impact_score"] or 0.0)
|
||||
conf = float(row["confidence"] or 0.5)
|
||||
w = score * conf
|
||||
weighted_sum += bias * w
|
||||
weight_total += w
|
||||
|
||||
if weight_total == 0.0:
|
||||
return 0.0
|
||||
|
||||
# Clamp to [-1.0, 1.0]
|
||||
raw = weighted_sum / weight_total
|
||||
return max(-1.0, min(1.0, raw))
|
||||
except Exception:
|
||||
logger.warning("%s: failed to fetch macro_bias", ticker, exc_info=True)
|
||||
return 0.0
|
||||
|
||||
|
||||
async def _fetch_open_positions(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
) -> list[OpenPositionState]:
|
||||
"""Fetch open positions for *ticker* from ``position_stop_levels``.
|
||||
|
||||
Joins with ``positions`` for current_price when available.
|
||||
Returns an empty list on error or when no positions exist.
|
||||
"""
|
||||
try:
|
||||
rows = await pool.fetch(
|
||||
"SELECT psl.id, psl.ticker, psl.entry_price, "
|
||||
" psl.stop_loss_price, psl.take_profit_price, "
|
||||
" psl.trailing_stop_active, psl.atr_value, "
|
||||
" psl.atr_multiplier, psl.reward_risk_ratio, "
|
||||
" COALESCE(p.current_price, psl.entry_price) AS current_price "
|
||||
"FROM position_stop_levels psl "
|
||||
"LEFT JOIN positions p ON p.ticker = psl.ticker "
|
||||
"WHERE psl.ticker = $1 AND psl.active = TRUE",
|
||||
ticker,
|
||||
)
|
||||
positions: list[OpenPositionState] = []
|
||||
for row in rows:
|
||||
entry = float(row["entry_price"])
|
||||
current = float(row["current_price"])
|
||||
stop = float(row["stop_loss_price"])
|
||||
tp = float(row["take_profit_price"])
|
||||
atr = float(row["atr_value"]) if row["atr_value"] else None
|
||||
rr = float(row["reward_risk_ratio"]) if row["reward_risk_ratio"] else 2.0
|
||||
|
||||
# Derive target_2 from reward-risk ratio if only one TP level
|
||||
target_1 = tp
|
||||
target_2 = entry + (tp - entry) * rr if rr > 1.0 else tp
|
||||
|
||||
positions.append(
|
||||
OpenPositionState(
|
||||
position_id=str(row["id"]),
|
||||
ticker=row["ticker"],
|
||||
entry_price=entry,
|
||||
current_price=current,
|
||||
stop_loss=stop,
|
||||
target_1=target_1,
|
||||
target_2=target_2,
|
||||
trailing_stop=None, # computed by exit engine at runtime
|
||||
partial_exit_done=bool(row["trailing_stop_active"]),
|
||||
atr=atr,
|
||||
)
|
||||
)
|
||||
return positions
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"%s: failed to fetch open positions", ticker, exc_info=True
|
||||
)
|
||||
return []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def normalize_input(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
config: SignalEngineConfig,
|
||||
) -> NormalizedInput:
|
||||
"""Fetch and assemble all data needed for a single evaluation tick.
|
||||
|
||||
Sources:
|
||||
- OHLCV bars from ``market_snapshots`` (M30, H1, H4, D, W, M)
|
||||
- Fundamental metrics from ``trend_windows`` + ``earnings_calendar``
|
||||
- Macro context from ``macro_impact_records``
|
||||
- Open position state from ``position_stop_levels`` + ``positions``
|
||||
|
||||
Missing data sources produce sentinel values (``None`` / empty list)
|
||||
with a logged warning. The function never raises — it always returns
|
||||
a valid ``NormalizedInput``.
|
||||
|
||||
Requirements: 1.1, 1.2, 1.3, 1.4, 1.5
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Fetch all data sources concurrently for efficiency.
|
||||
# Each fetcher handles its own errors and returns sentinels on failure.
|
||||
|
||||
bars_task = asyncio.create_task(_fetch_bars(pool, ticker))
|
||||
fundamentals_task = asyncio.create_task(_fetch_fundamentals(pool, ticker))
|
||||
macro_task = asyncio.create_task(_fetch_macro_bias(pool, ticker))
|
||||
positions_task = asyncio.create_task(_fetch_open_positions(pool, ticker))
|
||||
|
||||
bars = await bars_task
|
||||
valuation_score, earnings_proximity_days = await fundamentals_task
|
||||
macro_bias = await macro_task
|
||||
open_positions = await positions_task
|
||||
|
||||
# Validate monotonic timestamps within each timeframe (Req 1.4)
|
||||
for tf in TIMEFRAMES:
|
||||
bars[tf] = _validate_monotonic_timestamps(bars[tf], tf, ticker)
|
||||
|
||||
# Compute closing_prices and returns from daily bars for regime
|
||||
# classification (used by the probabilistic pipeline).
|
||||
closing_prices: list[float] = []
|
||||
returns: list[float] = []
|
||||
daily = bars.get("D", [])
|
||||
if daily:
|
||||
closing_prices = [bar.close for bar in daily]
|
||||
if len(closing_prices) >= 2:
|
||||
returns = [
|
||||
(closing_prices[i] - closing_prices[i - 1]) / closing_prices[i - 1]
|
||||
if closing_prices[i - 1] != 0
|
||||
else 0.0
|
||||
for i in range(1, len(closing_prices))
|
||||
]
|
||||
|
||||
# Determine current_price from the latest close of the shortest
|
||||
# available timeframe.
|
||||
current_price: float | None = None
|
||||
for tf in TIMEFRAMES: # shortest first
|
||||
if bars[tf]:
|
||||
current_price = bars[tf][-1].close
|
||||
break
|
||||
|
||||
return NormalizedInput(
|
||||
ticker=ticker,
|
||||
evaluated_at=now,
|
||||
bars=bars,
|
||||
valuation_score=valuation_score,
|
||||
earnings_proximity_days=earnings_proximity_days,
|
||||
macro_bias=macro_bias,
|
||||
open_positions=open_positions,
|
||||
closing_prices=closing_prices,
|
||||
returns=returns,
|
||||
current_price=current_price,
|
||||
)
|
||||
@@ -0,0 +1,107 @@
|
||||
"""Database persistence for signal engine outputs.
|
||||
|
||||
Persists ``SignalOutput`` instances to the ``signal_engine_outputs`` table.
|
||||
Persistence failures are logged and swallowed — they never block signal
|
||||
emission to the trading queue.
|
||||
|
||||
Requirements: 15.1, 15.4
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.signal_engine.models import SignalOutput
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# INSERT statement for the signal_engine_outputs table.
|
||||
_INSERT_SQL = """
|
||||
INSERT INTO signal_engine_outputs (
|
||||
id,
|
||||
ticker,
|
||||
evaluated_at,
|
||||
price,
|
||||
heuristic_verdict,
|
||||
heuristic_confidence,
|
||||
heuristic_s_total,
|
||||
probabilistic_verdict,
|
||||
probabilistic_p_up,
|
||||
probabilistic_entropy,
|
||||
probabilistic_ev_r,
|
||||
delta_agreement,
|
||||
delta_confidence_delta,
|
||||
delta_reasons,
|
||||
trade_plan,
|
||||
full_output,
|
||||
exit_signals,
|
||||
pipeline_mode,
|
||||
shadow_mode
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
||||
$11, $12, $13, $14, $15, $16, $17, $18, $19
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
async def persist_signal_output(
|
||||
pool: asyncpg.Pool,
|
||||
output: SignalOutput,
|
||||
) -> None:
|
||||
"""Persist a SignalOutput to the signal_engine_outputs table.
|
||||
|
||||
Logs and continues on database errors (non-blocking).
|
||||
|
||||
Requirements: 15.1, 15.4
|
||||
"""
|
||||
try:
|
||||
trade_plan_json: str | None = None
|
||||
if output.trade_plan is not None:
|
||||
trade_plan_json = json.dumps(output.trade_plan.model_dump())
|
||||
|
||||
exit_signals_json = json.dumps(
|
||||
[e.model_dump() for e in output.exit_signals]
|
||||
)
|
||||
|
||||
delta_reasons_json = json.dumps(output.delta_reasons)
|
||||
|
||||
full_output_json = output.model_dump_json()
|
||||
|
||||
await pool.execute(
|
||||
_INSERT_SQL,
|
||||
output.output_id, # $1 id
|
||||
output.ticker, # $2 ticker
|
||||
output.timestamp, # $3 evaluated_at
|
||||
output.price, # $4 price
|
||||
output.heuristic_verdict, # $5 heuristic_verdict
|
||||
output.heuristic_confidence, # $6 heuristic_confidence
|
||||
output.heuristic_s_total, # $7 heuristic_s_total
|
||||
output.probabilistic_verdict, # $8 probabilistic_verdict
|
||||
output.probabilistic_p_up, # $9 probabilistic_p_up
|
||||
output.probabilistic_entropy, # $10 probabilistic_entropy
|
||||
output.probabilistic_ev_r, # $11 probabilistic_ev_r
|
||||
output.delta_agreement, # $12 delta_agreement
|
||||
output.delta_confidence_delta, # $13 delta_confidence_delta
|
||||
delta_reasons_json, # $14 delta_reasons (JSONB)
|
||||
trade_plan_json, # $15 trade_plan (JSONB, nullable)
|
||||
full_output_json, # $16 full_output (JSONB)
|
||||
exit_signals_json, # $17 exit_signals (JSONB)
|
||||
output.pipeline_mode, # $18 pipeline_mode
|
||||
output.shadow_mode, # $19 shadow_mode
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Persisted signal output %s for %s",
|
||||
output.output_id,
|
||||
output.ticker,
|
||||
)
|
||||
except Exception:
|
||||
logger.error(
|
||||
"Failed to persist signal output %s for %s — continuing",
|
||||
output.output_id,
|
||||
output.ticker,
|
||||
exc_info=True,
|
||||
)
|
||||
@@ -0,0 +1,380 @@
|
||||
"""Probabilistic Pipeline (Pipeline B) — Bayesian inference and verdict.
|
||||
|
||||
Computes a posterior probability via regime-based priors, likelihood ratio
|
||||
accumulation with correlation penalty, entropy gating, and expected value
|
||||
calculation. Produces a BUY / WATCH / SKIP verdict.
|
||||
|
||||
The pipeline reuses the existing ``classify_regime`` infrastructure from
|
||||
``services.aggregation.regime`` for regime classification and wraps the
|
||||
Bayesian math with signal-cluster correlation penalties from
|
||||
``services.signal_engine.correlation``.
|
||||
|
||||
Requirements: 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9,
|
||||
14.1, 14.2, 14.3, 14.4, 14.5
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
|
||||
from services.aggregation.regime import MarketRegime, RegimeClassification
|
||||
from services.signal_engine.config import ProbabilisticConfig
|
||||
from services.signal_engine.correlation import (
|
||||
apply_correlation_penalty,
|
||||
classify_signal,
|
||||
)
|
||||
from services.signal_engine.models import (
|
||||
ConfluenceSignal,
|
||||
LikelihoodRatio,
|
||||
NormalizedInput,
|
||||
ProbabilisticResult,
|
||||
SignalDirection,
|
||||
Verdict,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default hit rate used when no historical hit rate is available.
|
||||
_DEFAULT_HIT_RATE: float = 0.6
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regime → prior mapping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _regime_to_prior(
|
||||
regime: RegimeClassification,
|
||||
config: ProbabilisticConfig,
|
||||
) -> float:
|
||||
"""Map a regime classification to a prior probability.
|
||||
|
||||
Mapping (Req 14.2):
|
||||
- TREND_FOLLOWING with positive trend_indicator → bull prior (0.58)
|
||||
- TREND_FOLLOWING with negative trend_indicator → bear prior (0.42)
|
||||
- MEAN_REVERSION → range prior (0.50)
|
||||
- PANIC → bear prior (0.42)
|
||||
- UNCERTAINTY → range prior (0.50)
|
||||
"""
|
||||
if regime.regime == MarketRegime.TREND_FOLLOWING:
|
||||
if regime.trend_indicator > 0:
|
||||
return config.regime_prior_bull
|
||||
return config.regime_prior_bear
|
||||
if regime.regime == MarketRegime.MEAN_REVERSION:
|
||||
return config.regime_prior_range
|
||||
if regime.regime == MarketRegime.PANIC:
|
||||
return config.regime_prior_bear
|
||||
# UNCERTAINTY or any unknown → range prior
|
||||
return config.regime_prior_range
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Likelihood ratio computation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compute_likelihood_ratios(
|
||||
confluence_signals: list[ConfluenceSignal],
|
||||
) -> list[LikelihoodRatio]:
|
||||
"""Compute raw likelihood ratios for each confluence signal.
|
||||
|
||||
For each signal:
|
||||
- h = hit rate (use confidence as proxy, default 0.6)
|
||||
- s = signal strength (confluence_score)
|
||||
- P(sig|up) = h * s + (1 - h) * (1 - s) * 0.5
|
||||
- P(sig|down) = 1 - P(sig|up)
|
||||
- LR = P(sig|up) / P(sig|down)
|
||||
|
||||
Direction-aware: bearish signals invert the LR (use 1/LR) so that
|
||||
bearish evidence reduces P_up.
|
||||
|
||||
Requirements: 6.2
|
||||
"""
|
||||
ratios: list[LikelihoodRatio] = []
|
||||
|
||||
for sig in confluence_signals:
|
||||
h = _DEFAULT_HIT_RATE
|
||||
s = sig.confluence_score
|
||||
|
||||
# Clamp inputs to valid ranges to avoid numerical issues
|
||||
h = max(0.01, min(h, 0.99))
|
||||
s = max(0.01, min(s, 0.99))
|
||||
|
||||
p_sig_up = h * s + (1.0 - h) * (1.0 - s) * 0.5
|
||||
p_sig_down = 1.0 - p_sig_up
|
||||
|
||||
# Guard against division by zero / near-zero
|
||||
if p_sig_down < 1e-10:
|
||||
p_sig_down = 1e-10
|
||||
|
||||
lr = p_sig_up / p_sig_down
|
||||
|
||||
# Bearish signals: invert the LR so it reduces P_up
|
||||
if sig.direction == SignalDirection.BEARISH:
|
||||
lr = 1.0 / lr if lr > 1e-10 else 1e10
|
||||
|
||||
log_lr = math.log(lr) if lr > 0 else 0.0
|
||||
|
||||
cluster = classify_signal(sig.signal_type)
|
||||
|
||||
ratios.append(
|
||||
LikelihoodRatio(
|
||||
signal_type=sig.signal_type,
|
||||
cluster=cluster.value,
|
||||
lr=lr,
|
||||
log_lr=log_lr,
|
||||
penalized_log_lr=log_lr, # will be updated by penalty
|
||||
hit_rate=h,
|
||||
strength=s,
|
||||
)
|
||||
)
|
||||
|
||||
return ratios
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Log-odds / sigmoid helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _logit(p: float) -> float:
|
||||
"""Compute logit(p) = log(p / (1 - p)).
|
||||
|
||||
Clamps p to (1e-10, 1 - 1e-10) to avoid infinities.
|
||||
"""
|
||||
p = max(1e-10, min(p, 1.0 - 1e-10))
|
||||
return math.log(p / (1.0 - p))
|
||||
|
||||
|
||||
def _sigmoid(x: float) -> float:
|
||||
"""Compute sigmoid(x) = 1 / (1 + exp(-x)).
|
||||
|
||||
Clamps the exponent to avoid overflow.
|
||||
"""
|
||||
if x > 500:
|
||||
return 1.0
|
||||
if x < -500:
|
||||
return 0.0
|
||||
return 1.0 / (1.0 + math.exp(-x))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shannon entropy
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _shannon_entropy(p: float) -> float:
|
||||
"""Compute Shannon entropy H = -p·log₂(p) - (1-p)·log₂(1-p).
|
||||
|
||||
Returns 0.0 at the boundaries (p = 0 or p = 1).
|
||||
Result is in [0, 1] for binary entropy.
|
||||
"""
|
||||
if p <= 0.0 or p >= 1.0:
|
||||
return 0.0
|
||||
return -(p * math.log2(p) + (1.0 - p) * math.log2(1.0 - p))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# EV_R computation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compute_ev_r(
|
||||
p_up: float,
|
||||
confluence_signals: list[ConfluenceSignal],
|
||||
) -> float:
|
||||
"""Compute expected value per unit risk.
|
||||
|
||||
EV_R = P_up · E[win_R] - (1 - P_up) · 1.0
|
||||
|
||||
E[win_R] is estimated as the average confluence_score × 2.0
|
||||
(heuristic for expected win in R-units). Falls back to 1.0 if
|
||||
no signals are available.
|
||||
"""
|
||||
if confluence_signals:
|
||||
avg_score = sum(s.confluence_score for s in confluence_signals) / len(
|
||||
confluence_signals
|
||||
)
|
||||
e_win_r = avg_score * 2.0
|
||||
else:
|
||||
e_win_r = 1.0
|
||||
|
||||
return p_up * e_win_r - (1.0 - p_up) * 1.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Verdict logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _determine_verdict(
|
||||
p_up: float,
|
||||
entropy: float,
|
||||
ev_r: float,
|
||||
normalized: NormalizedInput,
|
||||
config: ProbabilisticConfig,
|
||||
) -> tuple[Verdict, list[str]]:
|
||||
"""Apply threshold logic to determine BUY / WATCH / SKIP verdict.
|
||||
|
||||
Returns the verdict and a list of reasoning strings.
|
||||
|
||||
Requirements: 6.6, 6.7, 6.8
|
||||
"""
|
||||
reasoning: list[str] = []
|
||||
|
||||
valuation_score = (
|
||||
normalized.valuation_score if normalized.valuation_score is not None else 0.0
|
||||
)
|
||||
|
||||
# --- Entropy gating (Req 6.4) ---
|
||||
if entropy > config.entropy_skip:
|
||||
reasoning.append(
|
||||
f"SKIP: entropy={entropy:.4f} > {config.entropy_skip} (high_entropy)"
|
||||
)
|
||||
return Verdict.SKIP, reasoning
|
||||
|
||||
# --- Check BUY conditions (Req 6.6) ---
|
||||
buy_conditions = {
|
||||
"p_up": p_up >= config.buy_p_up,
|
||||
"entropy": entropy <= config.buy_entropy_max,
|
||||
"ev_r": ev_r >= config.buy_ev_r_min,
|
||||
"macro_bias": normalized.macro_bias > config.macro_bias_threshold,
|
||||
"valuation": valuation_score >= config.buy_valuation_min,
|
||||
}
|
||||
|
||||
all_buy_met = all(buy_conditions.values())
|
||||
|
||||
if all_buy_met:
|
||||
reasoning.append(
|
||||
f"BUY: all conditions met — P_up={p_up:.4f} "
|
||||
f"(>= {config.buy_p_up}), entropy={entropy:.4f} "
|
||||
f"(<= {config.buy_entropy_max}), EV_R={ev_r:.4f} "
|
||||
f"(>= {config.buy_ev_r_min}), macro_bias={normalized.macro_bias:.2f} "
|
||||
f"(> {config.macro_bias_threshold}), valuation={valuation_score:.2f} "
|
||||
f"(>= {config.buy_valuation_min})"
|
||||
)
|
||||
return Verdict.BUY, reasoning
|
||||
|
||||
# --- Check WATCH conditions (Req 6.7) ---
|
||||
watch_conditions = {
|
||||
"p_up": p_up >= config.watch_p_up,
|
||||
"entropy": entropy <= config.watch_entropy_max,
|
||||
}
|
||||
|
||||
if all(watch_conditions.values()):
|
||||
failed_buy = [k for k, v in buy_conditions.items() if not v]
|
||||
reasoning.append(
|
||||
f"WATCH: P_up={p_up:.4f} (>= {config.watch_p_up}), "
|
||||
f"entropy={entropy:.4f} (<= {config.watch_entropy_max}) "
|
||||
f"but BUY conditions not fully met — failed: {', '.join(failed_buy)}"
|
||||
)
|
||||
return Verdict.WATCH, reasoning
|
||||
|
||||
# --- SKIP (Req 6.8) ---
|
||||
reasoning.append(
|
||||
f"SKIP: P_up={p_up:.4f}, entropy={entropy:.4f}, EV_R={ev_r:.4f} "
|
||||
f"— does not meet WATCH or BUY thresholds"
|
||||
)
|
||||
return Verdict.SKIP, reasoning
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def run_probabilistic_pipeline(
|
||||
normalized: NormalizedInput,
|
||||
confluence_signals: list[ConfluenceSignal],
|
||||
regime: RegimeClassification,
|
||||
config: ProbabilisticConfig,
|
||||
) -> ProbabilisticResult:
|
||||
"""Run the Bayesian probabilistic pipeline.
|
||||
|
||||
Steps:
|
||||
1. Initialize regime-based prior (bull=0.58, range=0.50, bear=0.42)
|
||||
2. Compute likelihood ratios per signal
|
||||
3. Apply correlation penalty via ``apply_correlation_penalty()``
|
||||
4. Accumulate via log-odds: logit(P_post) = logit(P_prior) + Σ log(LR_i)
|
||||
5. Compute Shannon entropy and apply entropy gating
|
||||
6. Compute EV_R = P_up · E[win_R] - (1 - P_up) · 1.0
|
||||
7. Produce BUY / WATCH / SKIP verdict
|
||||
|
||||
Args:
|
||||
normalized: The unified input structure for this evaluation tick.
|
||||
confluence_signals: Signals that passed multi-timeframe confluence
|
||||
filtering.
|
||||
regime: The current market regime classification.
|
||||
config: Probabilistic pipeline thresholds.
|
||||
|
||||
Returns:
|
||||
A :class:`ProbabilisticResult` with verdict, posterior, entropy,
|
||||
EV_R, likelihood ratios, and reasoning.
|
||||
|
||||
Requirements: 6.1–6.9, 14.1–14.5
|
||||
"""
|
||||
reasoning: list[str] = []
|
||||
|
||||
# 1. Regime-based prior (Req 6.1, 14.2)
|
||||
prior = _regime_to_prior(regime, config)
|
||||
reasoning.append(
|
||||
f"Regime={regime.regime.value}, trend_indicator={regime.trend_indicator:.1f} "
|
||||
f"→ prior={prior:.2f}"
|
||||
)
|
||||
|
||||
# 2. Compute likelihood ratios (Req 6.2)
|
||||
raw_lrs = _compute_likelihood_ratios(confluence_signals)
|
||||
|
||||
# 3. Apply correlation penalty (Req 7.1–7.4)
|
||||
penalized_lrs = apply_correlation_penalty(raw_lrs)
|
||||
|
||||
# 4. Accumulate via log-odds (Req 6.3, 14.3)
|
||||
logit_prior = _logit(prior)
|
||||
sum_penalized_log_lr = sum(lr.penalized_log_lr for lr in penalized_lrs)
|
||||
logit_posterior = logit_prior + sum_penalized_log_lr
|
||||
p_up = _sigmoid(logit_posterior)
|
||||
|
||||
reasoning.append(
|
||||
f"logit(prior)={logit_prior:.4f} + Σ penalized_log_lr={sum_penalized_log_lr:.4f} "
|
||||
f"= logit(posterior)={logit_posterior:.4f} → P_up={p_up:.4f}"
|
||||
)
|
||||
|
||||
# 5. Shannon entropy (Req 6.4)
|
||||
entropy = _shannon_entropy(p_up)
|
||||
reasoning.append(f"Shannon entropy H={entropy:.4f}")
|
||||
|
||||
# 6. EV_R (Req 6.5)
|
||||
ev_r = _compute_ev_r(p_up, confluence_signals)
|
||||
reasoning.append(f"EV_R={ev_r:.4f}")
|
||||
|
||||
# 7. Verdict (Req 6.6, 6.7, 6.8)
|
||||
verdict, verdict_reasoning = _determine_verdict(
|
||||
p_up, entropy, ev_r, normalized, config
|
||||
)
|
||||
reasoning.extend(verdict_reasoning)
|
||||
|
||||
logger.info(
|
||||
"Probabilistic pipeline [%s]: verdict=%s P_up=%.4f "
|
||||
"entropy=%.4f EV_R=%.4f prior=%.2f regime=%s signals=%d",
|
||||
normalized.ticker,
|
||||
verdict.value,
|
||||
p_up,
|
||||
entropy,
|
||||
ev_r,
|
||||
prior,
|
||||
regime.regime.value,
|
||||
len(confluence_signals),
|
||||
)
|
||||
|
||||
return ProbabilisticResult(
|
||||
verdict=verdict,
|
||||
p_up=p_up,
|
||||
entropy=entropy,
|
||||
ev_r=ev_r,
|
||||
prior=prior,
|
||||
posterior=p_up,
|
||||
likelihood_ratios=penalized_lrs,
|
||||
regime=regime.regime.value,
|
||||
reasoning=reasoning,
|
||||
)
|
||||
@@ -0,0 +1 @@
|
||||
# Signal Library - technical signal evaluators (Fibonacci, MA Stack, RSI, Cup & Handle, Elliott Wave)
|
||||
@@ -0,0 +1,127 @@
|
||||
"""Base protocol and common helpers for signal evaluators.
|
||||
|
||||
Defines the ``SignalEvaluator`` protocol that every signal in the Signal
|
||||
Library must satisfy, plus shared utility functions for swing detection,
|
||||
lookback validation, and simple moving average computation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Protocol
|
||||
|
||||
from services.signal_engine.models import OHLCVBar, SignalResult
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Signal evaluator protocol
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SignalEvaluator(Protocol):
|
||||
"""Protocol for all signal evaluators in the Signal Library.
|
||||
|
||||
Each evaluator receives a list of OHLCV bars for a single timeframe
|
||||
and returns a ``SignalResult`` when the signal triggers, or ``None``
|
||||
when insufficient data is available or the signal does not fire.
|
||||
"""
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
bars: list[OHLCVBar],
|
||||
timeframe: str,
|
||||
) -> SignalResult | None:
|
||||
"""Evaluate a signal on a single timeframe's bar data.
|
||||
|
||||
Returns ``None`` when insufficient data is available.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Common helper functions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def find_swing_high(
|
||||
bars: list[OHLCVBar],
|
||||
lookback: int,
|
||||
) -> tuple[int, float] | None:
|
||||
"""Find the highest high in the last *lookback* bars.
|
||||
|
||||
Args:
|
||||
bars: OHLCV bar series (oldest-first).
|
||||
lookback: Number of recent bars to search.
|
||||
|
||||
Returns:
|
||||
``(index, price)`` of the bar with the highest high within the
|
||||
lookback window, or ``None`` if *bars* has fewer than *lookback*
|
||||
entries.
|
||||
"""
|
||||
if len(bars) < lookback or lookback <= 0:
|
||||
return None
|
||||
|
||||
window = bars[-lookback:]
|
||||
offset = len(bars) - lookback
|
||||
|
||||
best_idx = 0
|
||||
best_price = window[0].high
|
||||
for i, bar in enumerate(window):
|
||||
if bar.high >= best_price:
|
||||
best_idx = i
|
||||
best_price = bar.high
|
||||
|
||||
return (offset + best_idx, best_price)
|
||||
|
||||
|
||||
def find_swing_low(
|
||||
bars: list[OHLCVBar],
|
||||
lookback: int,
|
||||
) -> tuple[int, float] | None:
|
||||
"""Find the lowest low in the last *lookback* bars.
|
||||
|
||||
Args:
|
||||
bars: OHLCV bar series (oldest-first).
|
||||
lookback: Number of recent bars to search.
|
||||
|
||||
Returns:
|
||||
``(index, price)`` of the bar with the lowest low within the
|
||||
lookback window, or ``None`` if *bars* has fewer than *lookback*
|
||||
entries.
|
||||
"""
|
||||
if len(bars) < lookback or lookback <= 0:
|
||||
return None
|
||||
|
||||
window = bars[-lookback:]
|
||||
offset = len(bars) - lookback
|
||||
|
||||
best_idx = 0
|
||||
best_price = window[0].low
|
||||
for i, bar in enumerate(window):
|
||||
if bar.low <= best_price:
|
||||
best_idx = i
|
||||
best_price = bar.low
|
||||
|
||||
return (offset + best_idx, best_price)
|
||||
|
||||
|
||||
def validate_lookback(bars: list[OHLCVBar], min_bars: int) -> bool:
|
||||
"""Return ``True`` if *bars* contains at least *min_bars* entries."""
|
||||
return len(bars) >= min_bars
|
||||
|
||||
|
||||
def compute_sma(bars: list[OHLCVBar], period: int) -> float | None:
|
||||
"""Compute the simple moving average of close prices over the last *period* bars.
|
||||
|
||||
Args:
|
||||
bars: OHLCV bar series (oldest-first).
|
||||
period: Number of recent bars to average.
|
||||
|
||||
Returns:
|
||||
The arithmetic mean of the last *period* close prices, or ``None``
|
||||
if *bars* has fewer than *period* entries or *period* is not
|
||||
positive.
|
||||
"""
|
||||
if period <= 0 or len(bars) < period:
|
||||
return None
|
||||
|
||||
total = sum(bar.close for bar in bars[-period:])
|
||||
return total / period
|
||||
@@ -0,0 +1,206 @@
|
||||
"""Cup & Handle pattern signal evaluator.
|
||||
|
||||
Detects the Cup & Handle chart pattern — a bullish continuation pattern
|
||||
consisting of a U-shaped price recovery (the cup) followed by a small
|
||||
consolidation pullback (the handle).
|
||||
|
||||
Pattern detection algorithm:
|
||||
1. Find the left rim (local high in the first third of bars).
|
||||
2. Find the cup bottom (lowest low between left rim and right rim area).
|
||||
3. Find the right rim (local high in the last third of bars, near left rim price).
|
||||
4. Identify the handle as a small pullback after the right rim (last few bars).
|
||||
|
||||
Pattern completeness scoring:
|
||||
- Cup depth: ``(left_rim - bottom) / left_rim`` — valid range 12–33%.
|
||||
- Symmetry: how close left_rim and right_rim prices are (within 5% = perfect).
|
||||
- Handle: small pullback (< 50% of cup depth) after right rim.
|
||||
|
||||
The signal is always BULLISH (cup & handle is a bullish continuation pattern).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from services.signal_engine.models import OHLCVBar, SignalDirection, SignalResult
|
||||
from services.signal_engine.signals.base import validate_lookback
|
||||
|
||||
# Default minimum number of bars required for cup & handle detection
|
||||
DEFAULT_MIN_BARS: int = 30
|
||||
|
||||
# Cup depth valid range (as fraction of left rim price)
|
||||
_CUP_DEPTH_MIN: float = 0.12 # 12%
|
||||
_CUP_DEPTH_MAX: float = 0.33 # 33%
|
||||
|
||||
# Symmetry: maximum allowed difference between left and right rim prices
|
||||
# as a fraction of left rim price for "perfect" symmetry
|
||||
_SYMMETRY_PERFECT_PCT: float = 0.05 # 5%
|
||||
|
||||
# Handle: maximum pullback as fraction of cup depth
|
||||
_HANDLE_MAX_RETRACE: float = 0.50 # 50% of cup depth
|
||||
|
||||
# Handle lookback: number of bars at the end to check for handle
|
||||
_HANDLE_LOOKBACK_FRACTION: float = 0.15 # last 15% of bars
|
||||
|
||||
# Confidence multiplier
|
||||
_CONFIDENCE_MULTIPLIER: float = 0.90
|
||||
|
||||
|
||||
class CupHandleEvaluator:
|
||||
"""Cup & Handle pattern signal evaluator.
|
||||
|
||||
Satisfies the :class:`~services.signal_engine.signals.base.SignalEvaluator`
|
||||
protocol.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_bars:
|
||||
Minimum number of OHLCV bars required before the evaluator will
|
||||
produce a signal. Defaults to ``30``.
|
||||
"""
|
||||
|
||||
def __init__(self, min_bars: int = DEFAULT_MIN_BARS) -> None:
|
||||
self.min_bars = min_bars
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API (SignalEvaluator protocol)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
bars: list[OHLCVBar],
|
||||
timeframe: str,
|
||||
) -> SignalResult | None:
|
||||
"""Evaluate Cup & Handle pattern on *bars* for *timeframe*.
|
||||
|
||||
Returns ``None`` when there are fewer than :pyattr:`min_bars` bars,
|
||||
or when no valid cup & handle pattern is detected.
|
||||
"""
|
||||
if not validate_lookback(bars, self.min_bars):
|
||||
return None
|
||||
|
||||
n = len(bars)
|
||||
|
||||
# --- Step 1: Find the left rim (highest high in first third) ---
|
||||
first_third_end = n // 3
|
||||
if first_third_end < 1:
|
||||
return None
|
||||
|
||||
left_rim_idx = 0
|
||||
left_rim_price = bars[0].high
|
||||
for i in range(1, first_third_end):
|
||||
if bars[i].high > left_rim_price:
|
||||
left_rim_idx = i
|
||||
left_rim_price = bars[i].high
|
||||
|
||||
if left_rim_price <= 0:
|
||||
return None
|
||||
|
||||
# --- Step 2: Find the right rim (highest high in last third) ---
|
||||
last_third_start = n - (n // 3)
|
||||
if last_third_start >= n:
|
||||
return None
|
||||
|
||||
right_rim_idx = last_third_start
|
||||
right_rim_price = bars[last_third_start].high
|
||||
for i in range(last_third_start + 1, n):
|
||||
if bars[i].high > right_rim_price:
|
||||
right_rim_idx = i
|
||||
right_rim_price = bars[i].high
|
||||
|
||||
# --- Step 3: Find the cup bottom (lowest low between rims) ---
|
||||
search_start = left_rim_idx + 1
|
||||
search_end = right_rim_idx
|
||||
if search_start >= search_end:
|
||||
return None
|
||||
|
||||
bottom_idx = search_start
|
||||
bottom_price = bars[search_start].low
|
||||
for i in range(search_start + 1, search_end):
|
||||
if bars[i].low < bottom_price:
|
||||
bottom_idx = i
|
||||
bottom_price = bars[i].low
|
||||
|
||||
# --- Step 4: Validate cup depth ---
|
||||
cup_depth = left_rim_price - bottom_price
|
||||
if cup_depth <= 0:
|
||||
return None
|
||||
|
||||
cup_depth_pct = cup_depth / left_rim_price
|
||||
if cup_depth_pct < _CUP_DEPTH_MIN or cup_depth_pct > _CUP_DEPTH_MAX:
|
||||
return None
|
||||
|
||||
# --- Step 5: Score symmetry (left rim vs right rim) ---
|
||||
rim_diff_pct = abs(left_rim_price - right_rim_price) / left_rim_price
|
||||
if rim_diff_pct <= _SYMMETRY_PERFECT_PCT:
|
||||
symmetry_score = 1.0
|
||||
else:
|
||||
# Linear decay from 1.0 at 5% to 0.0 at 20%
|
||||
max_diff = 0.20
|
||||
symmetry_score = max(0.0, 1.0 - (rim_diff_pct - _SYMMETRY_PERFECT_PCT) / (max_diff - _SYMMETRY_PERFECT_PCT))
|
||||
|
||||
# Right rim must be at least close to left rim (within 20%)
|
||||
if symmetry_score <= 0.0:
|
||||
return None
|
||||
|
||||
# --- Step 6: Detect and score the handle ---
|
||||
handle_lookback = max(2, int(n * _HANDLE_LOOKBACK_FRACTION))
|
||||
handle_bars = bars[-handle_lookback:]
|
||||
|
||||
# Handle is a small pullback from the right rim
|
||||
handle_low = min(b.low for b in handle_bars)
|
||||
handle_depth = right_rim_price - handle_low
|
||||
|
||||
if cup_depth <= 0:
|
||||
return None
|
||||
|
||||
handle_retrace = handle_depth / cup_depth
|
||||
|
||||
if handle_retrace > _HANDLE_MAX_RETRACE:
|
||||
# Handle is too deep — not a valid cup & handle
|
||||
return None
|
||||
|
||||
# Handle score: 1.0 when handle is very shallow, decreasing as it deepens
|
||||
if handle_retrace <= 0:
|
||||
handle_score = 1.0
|
||||
else:
|
||||
handle_score = 1.0 - (handle_retrace / _HANDLE_MAX_RETRACE)
|
||||
|
||||
# --- Step 7: Cup depth quality score ---
|
||||
# Ideal cup depth is around 20-25% — score peaks in the middle of valid range
|
||||
ideal_depth = (_CUP_DEPTH_MIN + _CUP_DEPTH_MAX) / 2.0 # 0.225
|
||||
depth_deviation = abs(cup_depth_pct - ideal_depth) / ((_CUP_DEPTH_MAX - _CUP_DEPTH_MIN) / 2.0)
|
||||
depth_score = max(0.0, 1.0 - depth_deviation)
|
||||
|
||||
# --- Step 8: Compute overall completeness ---
|
||||
completeness = (
|
||||
0.35 * symmetry_score
|
||||
+ 0.35 * depth_score
|
||||
+ 0.30 * handle_score
|
||||
)
|
||||
completeness = max(0.0, min(1.0, completeness))
|
||||
|
||||
# --- Step 9: Build signal result ---
|
||||
strength = completeness
|
||||
confidence = completeness * _CONFIDENCE_MULTIPLIER
|
||||
|
||||
return SignalResult(
|
||||
signal_type="cup_handle",
|
||||
timeframe=timeframe,
|
||||
strength=strength,
|
||||
direction=SignalDirection.BULLISH,
|
||||
confidence=confidence,
|
||||
metadata={
|
||||
"left_rim": left_rim_price,
|
||||
"left_rim_idx": left_rim_idx,
|
||||
"right_rim": right_rim_price,
|
||||
"right_rim_idx": right_rim_idx,
|
||||
"bottom": bottom_price,
|
||||
"bottom_idx": bottom_idx,
|
||||
"cup_depth_pct": round(cup_depth_pct, 4),
|
||||
"handle_depth": round(handle_depth, 4),
|
||||
"handle_retrace_pct": round(handle_retrace, 4),
|
||||
"symmetry_score": round(symmetry_score, 4),
|
||||
"depth_score": round(depth_score, 4),
|
||||
"handle_score": round(handle_score, 4),
|
||||
"completeness": round(completeness, 4),
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,499 @@
|
||||
"""Elliott Wave signal evaluator.
|
||||
|
||||
Detects Elliott Wave patterns — impulse waves (5-wave structure) and
|
||||
corrective waves (3-wave structure) — using a simplified zigzag pivot
|
||||
filter. Produces a signal with the current wave position and projected
|
||||
direction.
|
||||
|
||||
Wave detection algorithm (simplified):
|
||||
1. Find significant pivot points (local highs and lows) using a zigzag
|
||||
filter that identifies reversals of at least X% of the price range.
|
||||
2. Count alternating pivots to identify wave structure.
|
||||
3. Five alternating pivots = impulse wave (bullish if trending up,
|
||||
bearish if trending down).
|
||||
4. Three alternating pivots after an impulse = corrective wave.
|
||||
|
||||
Signal logic:
|
||||
- Impulse wave 3 or 5: strong signal in the trend direction.
|
||||
- Corrective wave (A, B, C): signal in the opposite direction
|
||||
(anticipating next impulse).
|
||||
- Ambiguous wave count: return ``None``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from services.signal_engine.models import OHLCVBar, SignalDirection, SignalResult
|
||||
from services.signal_engine.signals.base import validate_lookback
|
||||
|
||||
# Default minimum number of bars required for evaluation
|
||||
DEFAULT_MIN_BARS: int = 30
|
||||
|
||||
# Minimum zigzag reversal threshold as a fraction of the price range
|
||||
_DEFAULT_ZIGZAG_PCT: float = 0.05 # 5%
|
||||
|
||||
# Wave type labels
|
||||
WAVE_TYPE_IMPULSE: str = "impulse"
|
||||
WAVE_TYPE_CORRECTIVE: str = "corrective"
|
||||
|
||||
# Impulse wave positions (1-indexed)
|
||||
_IMPULSE_WAVE_COUNT: int = 5
|
||||
# Corrective wave positions
|
||||
_CORRECTIVE_WAVE_COUNT: int = 3
|
||||
|
||||
# Confidence multiplier for wave clarity
|
||||
_CONFIDENCE_MULTIPLIER: float = 0.85
|
||||
|
||||
|
||||
class ElliottWaveEvaluator:
|
||||
"""Elliott Wave pattern signal evaluator.
|
||||
|
||||
Satisfies the :class:`~services.signal_engine.signals.base.SignalEvaluator`
|
||||
protocol.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_bars:
|
||||
Minimum number of OHLCV bars required before the evaluator will
|
||||
produce a signal. Defaults to ``30``.
|
||||
zigzag_pct:
|
||||
Minimum reversal threshold as a fraction of the overall price
|
||||
range for the zigzag filter. Defaults to ``0.05`` (5%).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
min_bars: int = DEFAULT_MIN_BARS,
|
||||
zigzag_pct: float = _DEFAULT_ZIGZAG_PCT,
|
||||
) -> None:
|
||||
self.min_bars = min_bars
|
||||
self.zigzag_pct = zigzag_pct
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API (SignalEvaluator protocol)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
bars: list[OHLCVBar],
|
||||
timeframe: str,
|
||||
) -> SignalResult | None:
|
||||
"""Evaluate Elliott Wave pattern on *bars* for *timeframe*.
|
||||
|
||||
Returns ``None`` when there are fewer than :pyattr:`min_bars` bars,
|
||||
when the market is flat (no price range), or when the wave count
|
||||
is ambiguous.
|
||||
"""
|
||||
if not validate_lookback(bars, self.min_bars):
|
||||
return None
|
||||
|
||||
# Compute overall price range for the zigzag threshold
|
||||
overall_high = max(b.high for b in bars)
|
||||
overall_low = min(b.low for b in bars)
|
||||
price_range = overall_high - overall_low
|
||||
|
||||
if price_range <= 0:
|
||||
return None # flat market
|
||||
|
||||
zigzag_threshold = price_range * self.zigzag_pct
|
||||
|
||||
# Find zigzag pivots
|
||||
pivots = _find_zigzag_pivots(bars, zigzag_threshold)
|
||||
|
||||
if len(pivots) < _CORRECTIVE_WAVE_COUNT:
|
||||
return None # not enough pivots for any wave structure
|
||||
|
||||
# Try to identify wave structure from the pivots
|
||||
wave_info = _classify_waves(pivots, price_range)
|
||||
|
||||
if wave_info is None:
|
||||
return None # ambiguous wave count
|
||||
|
||||
wave_type = wave_info["wave_type"]
|
||||
current_position = wave_info["current_position"]
|
||||
trend_up = wave_info["trend_up"]
|
||||
clarity = wave_info["clarity"]
|
||||
|
||||
# Determine direction and strength based on wave type and position
|
||||
direction: SignalDirection
|
||||
strength: float
|
||||
|
||||
if wave_type == WAVE_TYPE_IMPULSE:
|
||||
# Impulse wave: signal in the trend direction
|
||||
direction = SignalDirection.BULLISH if trend_up else SignalDirection.BEARISH
|
||||
# Waves 3 and 5 are the strongest signal points
|
||||
if current_position in (3, 5):
|
||||
strength = min(1.0, clarity * 1.0)
|
||||
else:
|
||||
strength = min(1.0, clarity * 0.6)
|
||||
else:
|
||||
# Corrective wave: signal opposite to the correction
|
||||
# (anticipating next impulse in the original trend direction)
|
||||
direction = SignalDirection.BULLISH if trend_up else SignalDirection.BEARISH
|
||||
strength = min(1.0, clarity * 0.7)
|
||||
|
||||
confidence = min(1.0, clarity * _CONFIDENCE_MULTIPLIER)
|
||||
|
||||
# Build pivot list for metadata (index, price, type)
|
||||
pivot_meta = [
|
||||
{"index": p["index"], "price": p["price"], "type": p["type"]}
|
||||
for p in pivots
|
||||
]
|
||||
|
||||
return SignalResult(
|
||||
signal_type="elliott_wave",
|
||||
timeframe=timeframe,
|
||||
strength=strength,
|
||||
direction=direction,
|
||||
confidence=confidence,
|
||||
metadata={
|
||||
"wave_count": len(pivots),
|
||||
"wave_type": wave_type,
|
||||
"current_wave_position": current_position,
|
||||
"trend_up": trend_up,
|
||||
"clarity": round(clarity, 4),
|
||||
"pivots": pivot_meta,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _find_zigzag_pivots(
|
||||
bars: list[OHLCVBar],
|
||||
threshold: float,
|
||||
) -> list[dict]:
|
||||
"""Find significant pivot points using a zigzag filter.
|
||||
|
||||
A pivot is a local high or low where the price reverses by at least
|
||||
*threshold* from the last confirmed pivot.
|
||||
|
||||
Returns a list of dicts with keys: ``index``, ``price``, ``type``
|
||||
(``"high"`` or ``"low"``).
|
||||
"""
|
||||
if len(bars) < 2:
|
||||
return []
|
||||
|
||||
pivots: list[dict] = []
|
||||
|
||||
# Seed with the first bar's high and low as candidates
|
||||
last_high_idx = 0
|
||||
last_high = bars[0].high
|
||||
last_low_idx = 0
|
||||
last_low = bars[0].low
|
||||
|
||||
# Direction: 1 = looking for a high (trending up), -1 = looking for a low
|
||||
# Start by determining initial direction from first two bars
|
||||
if bars[1].close >= bars[0].close:
|
||||
direction = 1 # trending up, looking for a high
|
||||
else:
|
||||
direction = -1 # trending down, looking for a low
|
||||
|
||||
for i in range(1, len(bars)):
|
||||
bar = bars[i]
|
||||
|
||||
if direction == 1:
|
||||
# Trending up — track the highest high
|
||||
if bar.high >= last_high:
|
||||
last_high = bar.high
|
||||
last_high_idx = i
|
||||
# Check for reversal: price dropped by threshold from the high
|
||||
if last_high - bar.low >= threshold:
|
||||
# Confirm the high as a pivot
|
||||
pivots.append({
|
||||
"index": last_high_idx,
|
||||
"price": last_high,
|
||||
"type": "high",
|
||||
})
|
||||
# Switch direction: now looking for a low
|
||||
direction = -1
|
||||
last_low = bar.low
|
||||
last_low_idx = i
|
||||
else:
|
||||
# Trending down — track the lowest low
|
||||
if bar.low <= last_low:
|
||||
last_low = bar.low
|
||||
last_low_idx = i
|
||||
# Check for reversal: price rose by threshold from the low
|
||||
if bar.high - last_low >= threshold:
|
||||
# Confirm the low as a pivot
|
||||
pivots.append({
|
||||
"index": last_low_idx,
|
||||
"price": last_low,
|
||||
"type": "low",
|
||||
})
|
||||
# Switch direction: now looking for a high
|
||||
direction = 1
|
||||
last_high = bar.high
|
||||
last_high_idx = i
|
||||
|
||||
# Add the final unconfirmed pivot (the current trend endpoint)
|
||||
if direction == 1 and (not pivots or pivots[-1]["type"] != "high"):
|
||||
pivots.append({
|
||||
"index": last_high_idx,
|
||||
"price": last_high,
|
||||
"type": "high",
|
||||
})
|
||||
elif direction == -1 and (not pivots or pivots[-1]["type"] != "low"):
|
||||
pivots.append({
|
||||
"index": last_low_idx,
|
||||
"price": last_low,
|
||||
"type": "low",
|
||||
})
|
||||
|
||||
return pivots
|
||||
|
||||
|
||||
def _classify_waves(
|
||||
pivots: list[dict],
|
||||
price_range: float,
|
||||
) -> dict | None:
|
||||
"""Classify the pivot sequence as impulse or corrective waves.
|
||||
|
||||
Returns a dict with ``wave_type``, ``current_position``, ``trend_up``,
|
||||
and ``clarity``, or ``None`` if the wave count is ambiguous.
|
||||
"""
|
||||
n = len(pivots)
|
||||
|
||||
if n < _CORRECTIVE_WAVE_COUNT:
|
||||
return None
|
||||
|
||||
# Determine overall trend from first to last pivot
|
||||
first_price = pivots[0]["price"]
|
||||
last_price = pivots[-1]["price"]
|
||||
trend_up = last_price > first_price
|
||||
|
||||
# Try impulse wave (5 pivots) first, then corrective (3 pivots)
|
||||
if n >= _IMPULSE_WAVE_COUNT:
|
||||
# Use the last 5 pivots for impulse wave detection
|
||||
impulse_pivots = pivots[-_IMPULSE_WAVE_COUNT:]
|
||||
impulse_result = _check_impulse(impulse_pivots, trend_up, price_range)
|
||||
if impulse_result is not None:
|
||||
return impulse_result
|
||||
|
||||
# Check if there's a corrective wave after an impulse
|
||||
# (need at least 5 + 3 = 8 pivots for impulse + corrective)
|
||||
if n >= _IMPULSE_WAVE_COUNT + _CORRECTIVE_WAVE_COUNT:
|
||||
# Check if the first 5 pivots form an impulse
|
||||
early_impulse = pivots[:_IMPULSE_WAVE_COUNT]
|
||||
early_result = _check_impulse(early_impulse, trend_up, price_range)
|
||||
if early_result is not None:
|
||||
# The remaining pivots may form a corrective wave
|
||||
corrective_pivots = pivots[_IMPULSE_WAVE_COUNT:_IMPULSE_WAVE_COUNT + _CORRECTIVE_WAVE_COUNT]
|
||||
corrective_result = _check_corrective(
|
||||
corrective_pivots, trend_up, price_range,
|
||||
)
|
||||
if corrective_result is not None:
|
||||
return corrective_result
|
||||
|
||||
# Try corrective wave (3 pivots) from the tail
|
||||
if n >= _CORRECTIVE_WAVE_COUNT:
|
||||
corrective_pivots = pivots[-_CORRECTIVE_WAVE_COUNT:]
|
||||
corrective_result = _check_corrective(
|
||||
corrective_pivots, trend_up, price_range,
|
||||
)
|
||||
if corrective_result is not None:
|
||||
return corrective_result
|
||||
|
||||
return None # ambiguous
|
||||
|
||||
|
||||
def _check_impulse(
|
||||
pivots: list[dict],
|
||||
trend_up: bool,
|
||||
price_range: float,
|
||||
) -> dict | None:
|
||||
"""Check if 5 pivots form a valid impulse wave.
|
||||
|
||||
For a bullish impulse (trend_up=True):
|
||||
- Wave 1 (low→high): price rises
|
||||
- Wave 2 (high→low): price falls but stays above wave 1 start
|
||||
- Wave 3 (low→high): price rises above wave 1 high (wave 3 is longest)
|
||||
- Wave 4 (high→low): price falls but stays above wave 1 high
|
||||
- Wave 5 (low→high): price rises to new high
|
||||
|
||||
For bearish impulse, the pattern is inverted.
|
||||
"""
|
||||
if len(pivots) != _IMPULSE_WAVE_COUNT:
|
||||
return None
|
||||
|
||||
prices = [p["price"] for p in pivots]
|
||||
|
||||
if trend_up:
|
||||
# Bullish impulse: alternating low-high-low-high-low or high-low-high-low-high
|
||||
# Check for generally ascending pattern with higher highs
|
||||
valid = _validate_bullish_impulse(prices)
|
||||
else:
|
||||
# Bearish impulse: generally descending pattern with lower lows
|
||||
valid = _validate_bearish_impulse(prices)
|
||||
|
||||
if not valid:
|
||||
return None
|
||||
|
||||
# Compute clarity: how clean the wave structure is
|
||||
clarity = _compute_impulse_clarity(prices, trend_up, price_range)
|
||||
|
||||
# Current position is wave 5 (the last wave in the impulse)
|
||||
return {
|
||||
"wave_type": WAVE_TYPE_IMPULSE,
|
||||
"current_position": 5,
|
||||
"trend_up": trend_up,
|
||||
"clarity": clarity,
|
||||
}
|
||||
|
||||
|
||||
def _validate_bullish_impulse(prices: list[float]) -> bool:
|
||||
"""Validate a 5-pivot sequence as a bullish impulse.
|
||||
|
||||
Simplified rules:
|
||||
- The overall trend is up (last > first).
|
||||
- Wave 3 (pivot 2 to pivot 3) should be the largest move or
|
||||
at least not the shortest.
|
||||
- Wave 2 should not retrace below wave 1 start.
|
||||
- Wave 4 should not overlap wave 1 end.
|
||||
"""
|
||||
if len(prices) != 5:
|
||||
return False
|
||||
|
||||
# Overall upward trend
|
||||
if prices[-1] <= prices[0]:
|
||||
return False
|
||||
|
||||
# Compute wave magnitudes
|
||||
waves = [abs(prices[i + 1] - prices[i]) for i in range(4)]
|
||||
|
||||
# Wave 3 (index 2) should not be the shortest impulse wave
|
||||
# Impulse waves are waves 0, 2, 4 (odd-indexed moves in 0-based)
|
||||
impulse_waves = [waves[0], waves[2]]
|
||||
if len(waves) > 3:
|
||||
impulse_waves.append(waves[3])
|
||||
|
||||
# Wave 3 (waves[2]) should be significant
|
||||
if waves[2] < min(waves[0], waves[2]) * 0.5:
|
||||
return False
|
||||
|
||||
# The pattern should show alternating direction
|
||||
# Check that consecutive pivots alternate in direction
|
||||
for i in range(3):
|
||||
move_a = prices[i + 1] - prices[i]
|
||||
move_b = prices[i + 2] - prices[i + 1]
|
||||
# Consecutive moves should be in opposite directions
|
||||
if move_a * move_b >= 0:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _validate_bearish_impulse(prices: list[float]) -> bool:
|
||||
"""Validate a 5-pivot sequence as a bearish impulse.
|
||||
|
||||
Mirror of bullish validation with inverted price direction.
|
||||
"""
|
||||
if len(prices) != 5:
|
||||
return False
|
||||
|
||||
# Overall downward trend
|
||||
if prices[-1] >= prices[0]:
|
||||
return False
|
||||
|
||||
# Compute wave magnitudes
|
||||
waves = [abs(prices[i + 1] - prices[i]) for i in range(4)]
|
||||
|
||||
# Wave 3 (waves[2]) should be significant
|
||||
if waves[2] < min(waves[0], waves[2]) * 0.5:
|
||||
return False
|
||||
|
||||
# Check alternating direction
|
||||
for i in range(3):
|
||||
move_a = prices[i + 1] - prices[i]
|
||||
move_b = prices[i + 2] - prices[i + 1]
|
||||
if move_a * move_b >= 0:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _compute_impulse_clarity(
|
||||
prices: list[float],
|
||||
trend_up: bool,
|
||||
price_range: float,
|
||||
) -> float:
|
||||
"""Compute wave clarity for an impulse wave.
|
||||
|
||||
Clarity is based on:
|
||||
- How well the pivots alternate (already validated).
|
||||
- How proportional the wave magnitudes are.
|
||||
- How significant the waves are relative to the price range.
|
||||
"""
|
||||
if price_range <= 0:
|
||||
return 0.0
|
||||
|
||||
waves = [abs(prices[i + 1] - prices[i]) for i in range(4)]
|
||||
total_movement = sum(waves)
|
||||
|
||||
# Significance: total wave movement relative to price range
|
||||
significance = min(1.0, total_movement / (price_range * 2.0))
|
||||
|
||||
# Proportionality: wave 3 should be the largest or close to it
|
||||
max_wave = max(waves)
|
||||
if max_wave <= 0:
|
||||
return 0.0
|
||||
|
||||
wave3_ratio = waves[2] / max_wave # 1.0 if wave 3 is the largest
|
||||
|
||||
# Overall clarity
|
||||
clarity = 0.5 * significance + 0.5 * wave3_ratio
|
||||
return max(0.0, min(1.0, clarity))
|
||||
|
||||
|
||||
def _check_corrective(
|
||||
pivots: list[dict],
|
||||
trend_up: bool,
|
||||
price_range: float,
|
||||
) -> dict | None:
|
||||
"""Check if 3 pivots form a valid corrective wave (A-B-C).
|
||||
|
||||
A corrective wave moves against the main trend:
|
||||
- For a bullish main trend: corrective wave moves down (A down, B up, C down).
|
||||
- For a bearish main trend: corrective wave moves up (A up, B down, C up).
|
||||
"""
|
||||
if len(pivots) != _CORRECTIVE_WAVE_COUNT:
|
||||
return None
|
||||
|
||||
prices = [p["price"] for p in pivots]
|
||||
|
||||
# Check alternating direction
|
||||
move_a = prices[1] - prices[0]
|
||||
move_b = prices[2] - prices[1]
|
||||
|
||||
# Moves must be in opposite directions
|
||||
if move_a * move_b >= 0:
|
||||
return None
|
||||
|
||||
# For a bullish main trend, the corrective wave should move down overall
|
||||
if trend_up:
|
||||
if prices[2] >= prices[0]:
|
||||
return None # not a downward correction
|
||||
else:
|
||||
if prices[2] <= prices[0]:
|
||||
return None # not an upward correction
|
||||
|
||||
# Compute clarity
|
||||
waves = [abs(prices[1] - prices[0]), abs(prices[2] - prices[1])]
|
||||
total_movement = sum(waves)
|
||||
|
||||
if price_range <= 0:
|
||||
return 0.0
|
||||
|
||||
significance = min(1.0, total_movement / price_range)
|
||||
clarity = significance * 0.8 # corrective waves are inherently less clear
|
||||
|
||||
# Current position is wave C (the last wave in the correction)
|
||||
return {
|
||||
"wave_type": WAVE_TYPE_CORRECTIVE,
|
||||
"current_position": 3, # wave C
|
||||
"trend_up": trend_up,
|
||||
"clarity": max(0.0, min(1.0, clarity)),
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
"""Fibonacci retracement signal evaluator.
|
||||
|
||||
Computes retracement levels using ``L(r) = SH - r * (SH - SL)`` for the
|
||||
standard ratios [0.236, 0.382, 0.5, 0.618, 0.786] and produces a signal
|
||||
based on the proximity of the current price to the nearest level.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from services.signal_engine.models import OHLCVBar, SignalDirection, SignalResult
|
||||
from services.signal_engine.signals.base import (
|
||||
find_swing_high,
|
||||
find_swing_low,
|
||||
validate_lookback,
|
||||
)
|
||||
|
||||
# Standard Fibonacci retracement ratios
|
||||
RETRACEMENT_RATIOS: list[float] = [0.236, 0.382, 0.5, 0.618, 0.786]
|
||||
|
||||
# Ratios considered "key" levels — proximity to these yields higher confidence
|
||||
_KEY_RATIOS: set[float] = {0.5, 0.618}
|
||||
|
||||
# Default minimum number of bars required for evaluation
|
||||
DEFAULT_MIN_BARS: int = 20
|
||||
|
||||
|
||||
class FibonacciEvaluator:
|
||||
"""Fibonacci retracement signal evaluator.
|
||||
|
||||
Satisfies the :class:`~services.signal_engine.signals.base.SignalEvaluator`
|
||||
protocol.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_bars:
|
||||
Minimum number of OHLCV bars required before the evaluator will
|
||||
produce a signal. Defaults to ``20``.
|
||||
"""
|
||||
|
||||
def __init__(self, min_bars: int = DEFAULT_MIN_BARS) -> None:
|
||||
self.min_bars = min_bars
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API (SignalEvaluator protocol)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
bars: list[OHLCVBar],
|
||||
timeframe: str,
|
||||
) -> SignalResult | None:
|
||||
"""Evaluate Fibonacci retracement on *bars* for *timeframe*.
|
||||
|
||||
Returns ``None`` when there are fewer than :pyattr:`min_bars` bars,
|
||||
or when the swing high equals the swing low (flat market — no valid
|
||||
retracement).
|
||||
"""
|
||||
if not validate_lookback(bars, self.min_bars):
|
||||
return None
|
||||
|
||||
# Detect swing high / swing low within the evaluation window
|
||||
sh_result = find_swing_high(bars, self.min_bars)
|
||||
sl_result = find_swing_low(bars, self.min_bars)
|
||||
|
||||
if sh_result is None or sl_result is None:
|
||||
return None
|
||||
|
||||
_sh_idx, sh_price = sh_result
|
||||
_sl_idx, sl_price = sl_result
|
||||
|
||||
# SH must be strictly greater than SL for a valid retracement range
|
||||
if sh_price <= sl_price:
|
||||
return None
|
||||
|
||||
price_range = sh_price - sl_price
|
||||
current_price = bars[-1].close
|
||||
|
||||
# Compute retracement levels: L(r) = SH - r * (SH - SL)
|
||||
levels: dict[float, float] = {
|
||||
r: sh_price - r * price_range for r in RETRACEMENT_RATIOS
|
||||
}
|
||||
|
||||
# Find the nearest retracement level to the current price
|
||||
nearest_ratio: float = RETRACEMENT_RATIOS[0]
|
||||
nearest_level: float = levels[nearest_ratio]
|
||||
min_distance: float = abs(current_price - nearest_level)
|
||||
|
||||
for ratio in RETRACEMENT_RATIOS[1:]:
|
||||
distance = abs(current_price - levels[ratio])
|
||||
if distance < min_distance:
|
||||
min_distance = distance
|
||||
nearest_ratio = ratio
|
||||
nearest_level = levels[ratio]
|
||||
|
||||
# Signal strength: 1.0 - (distance / range), clamped to [0, 1]
|
||||
raw_strength = 1.0 - (min_distance / price_range)
|
||||
strength = max(0.0, min(1.0, raw_strength))
|
||||
|
||||
# Direction: BULLISH if price is near a retracement level and above SL
|
||||
# (potential bounce off support). Otherwise BEARISH.
|
||||
if current_price >= sl_price:
|
||||
direction = SignalDirection.BULLISH
|
||||
else:
|
||||
direction = SignalDirection.BEARISH
|
||||
|
||||
# Confidence: higher when the nearest level is a key ratio (0.618, 0.5)
|
||||
if nearest_ratio in _KEY_RATIOS:
|
||||
confidence = min(1.0, strength * 1.2)
|
||||
else:
|
||||
confidence = strength * 0.8
|
||||
|
||||
return SignalResult(
|
||||
signal_type="fibonacci",
|
||||
timeframe=timeframe,
|
||||
strength=strength,
|
||||
direction=direction,
|
||||
confidence=confidence,
|
||||
metadata={
|
||||
"swing_high": sh_price,
|
||||
"swing_low": sl_price,
|
||||
"retracement_levels": levels,
|
||||
"nearest_ratio": nearest_ratio,
|
||||
"nearest_level": nearest_level,
|
||||
"distance_to_nearest": min_distance,
|
||||
"current_price": current_price,
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,182 @@
|
||||
"""Moving average stack signal evaluator.
|
||||
|
||||
Detects bullish alignment (MA_10 > MA_20 > MA_50 > MA_200) and bearish
|
||||
alignment (MA_10 < MA_20 < MA_50 < MA_200), producing a signal strength
|
||||
proportional to the degree of alignment.
|
||||
|
||||
Full alignment (4/4 MAs in order) yields strength 1.0, partial alignment
|
||||
(3/4) yields 0.6, and no alignment returns ``None`` (no signal).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from services.signal_engine.models import OHLCVBar, SignalDirection, SignalResult
|
||||
from services.signal_engine.signals.base import compute_sma, validate_lookback
|
||||
|
||||
# MA periods used for stack evaluation
|
||||
MA_PERIODS: list[int] = [10, 20, 50, 200]
|
||||
|
||||
# Minimum number of bars required (longest MA period)
|
||||
MIN_BARS: int = 200
|
||||
|
||||
# Strength values
|
||||
_FULL_ALIGNMENT_STRENGTH: float = 1.0
|
||||
_PARTIAL_ALIGNMENT_STRENGTH: float = 0.6
|
||||
|
||||
# Confidence multiplier (high confidence for clear alignment patterns)
|
||||
_CONFIDENCE_MULTIPLIER: float = 0.9
|
||||
|
||||
|
||||
class MAStackEvaluator:
|
||||
"""Moving average stack signal evaluator.
|
||||
|
||||
Satisfies the :class:`~services.signal_engine.signals.base.SignalEvaluator`
|
||||
protocol.
|
||||
|
||||
Computes MA_10, MA_20, MA_50, and MA_200 and checks whether they are
|
||||
in bullish or bearish order. Full alignment (all four in strict order)
|
||||
produces strength 1.0; partial alignment (any three consecutive in order)
|
||||
produces strength 0.6. When no alignment is detected the evaluator
|
||||
returns ``None``.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API (SignalEvaluator protocol)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
bars: list[OHLCVBar],
|
||||
timeframe: str,
|
||||
) -> SignalResult | None:
|
||||
"""Evaluate moving average stack alignment on *bars*.
|
||||
|
||||
Returns ``None`` when there are fewer than 200 bars (insufficient
|
||||
data for MA_200) or when no alignment is detected.
|
||||
"""
|
||||
if not validate_lookback(bars, MIN_BARS):
|
||||
return None
|
||||
|
||||
# Compute all four moving averages
|
||||
ma_10 = compute_sma(bars, 10)
|
||||
ma_20 = compute_sma(bars, 20)
|
||||
ma_50 = compute_sma(bars, 50)
|
||||
ma_200 = compute_sma(bars, 200)
|
||||
|
||||
# Safety check — compute_sma returns None on insufficient data
|
||||
if ma_10 is None or ma_20 is None or ma_50 is None or ma_200 is None:
|
||||
return None
|
||||
|
||||
ma_values = [ma_10, ma_20, ma_50, ma_200]
|
||||
|
||||
# Check full bullish alignment: MA_10 > MA_20 > MA_50 > MA_200
|
||||
full_bullish = ma_10 > ma_20 > ma_50 > ma_200
|
||||
|
||||
# Check full bearish alignment: MA_10 < MA_20 < MA_50 < MA_200
|
||||
full_bearish = ma_10 < ma_20 < ma_50 < ma_200
|
||||
|
||||
if full_bullish:
|
||||
return self._build_result(
|
||||
direction=SignalDirection.BULLISH,
|
||||
strength=_FULL_ALIGNMENT_STRENGTH,
|
||||
alignment="full_bullish",
|
||||
timeframe=timeframe,
|
||||
ma_values=ma_values,
|
||||
)
|
||||
|
||||
if full_bearish:
|
||||
return self._build_result(
|
||||
direction=SignalDirection.BEARISH,
|
||||
strength=_FULL_ALIGNMENT_STRENGTH,
|
||||
alignment="full_bearish",
|
||||
timeframe=timeframe,
|
||||
ma_values=ma_values,
|
||||
)
|
||||
|
||||
# Check partial alignment (3 out of 4 consecutive MAs in order)
|
||||
partial_bullish = self._check_partial_bullish(ma_values)
|
||||
partial_bearish = self._check_partial_bearish(ma_values)
|
||||
|
||||
if partial_bullish:
|
||||
return self._build_result(
|
||||
direction=SignalDirection.BULLISH,
|
||||
strength=_PARTIAL_ALIGNMENT_STRENGTH,
|
||||
alignment="partial_bullish",
|
||||
timeframe=timeframe,
|
||||
ma_values=ma_values,
|
||||
)
|
||||
|
||||
if partial_bearish:
|
||||
return self._build_result(
|
||||
direction=SignalDirection.BEARISH,
|
||||
strength=_PARTIAL_ALIGNMENT_STRENGTH,
|
||||
alignment="partial_bearish",
|
||||
timeframe=timeframe,
|
||||
ma_values=ma_values,
|
||||
)
|
||||
|
||||
# No alignment detected — no signal
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _check_partial_bullish(ma_values: list[float]) -> bool:
|
||||
"""Return ``True`` if any 3 consecutive MAs are in bullish order.
|
||||
|
||||
Checks windows [0:3] and [1:4] of the ordered MA list
|
||||
(MA_10, MA_20, MA_50, MA_200) for strictly descending values
|
||||
(higher MA value = bullish when shorter period > longer period).
|
||||
"""
|
||||
# Window 1: MA_10 > MA_20 > MA_50
|
||||
if ma_values[0] > ma_values[1] > ma_values[2]:
|
||||
return True
|
||||
# Window 2: MA_20 > MA_50 > MA_200
|
||||
if ma_values[1] > ma_values[2] > ma_values[3]:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _check_partial_bearish(ma_values: list[float]) -> bool:
|
||||
"""Return ``True`` if any 3 consecutive MAs are in bearish order.
|
||||
|
||||
Checks windows [0:3] and [1:4] of the ordered MA list
|
||||
for strictly ascending values (lower MA value = bearish when
|
||||
shorter period < longer period).
|
||||
"""
|
||||
# Window 1: MA_10 < MA_20 < MA_50
|
||||
if ma_values[0] < ma_values[1] < ma_values[2]:
|
||||
return True
|
||||
# Window 2: MA_20 < MA_50 < MA_200
|
||||
if ma_values[1] < ma_values[2] < ma_values[3]:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _build_result(
|
||||
*,
|
||||
direction: SignalDirection,
|
||||
strength: float,
|
||||
alignment: str,
|
||||
timeframe: str,
|
||||
ma_values: list[float],
|
||||
) -> SignalResult:
|
||||
"""Construct a ``SignalResult`` for the MA stack signal."""
|
||||
confidence = strength * _CONFIDENCE_MULTIPLIER
|
||||
|
||||
return SignalResult(
|
||||
signal_type="ma_stack",
|
||||
timeframe=timeframe,
|
||||
strength=strength,
|
||||
direction=direction,
|
||||
confidence=confidence,
|
||||
metadata={
|
||||
"ma_10": ma_values[0],
|
||||
"ma_20": ma_values[1],
|
||||
"ma_50": ma_values[2],
|
||||
"ma_200": ma_values[3],
|
||||
"alignment": alignment,
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,149 @@
|
||||
"""RSI (Relative Strength Index) signal evaluator.
|
||||
|
||||
Computes the standard 14-period RSI using Wilder's smoothing method and
|
||||
produces overbought (RSI > 70 → BEARISH) or oversold (RSI < 30 → BULLISH)
|
||||
signals with strength scaled by distance from the threshold.
|
||||
|
||||
When RSI is between 30 and 70 (neutral zone), no signal is produced.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from services.signal_engine.models import OHLCVBar, SignalDirection, SignalResult
|
||||
from services.signal_engine.signals.base import validate_lookback
|
||||
|
||||
# Default RSI period (standard Wilder 14-period)
|
||||
DEFAULT_RSI_PERIOD: int = 14
|
||||
|
||||
# Minimum bars required: period + 1 (for initial price change calculation)
|
||||
DEFAULT_MIN_BARS: int = DEFAULT_RSI_PERIOD + 1 # 15
|
||||
|
||||
# Overbought / oversold thresholds
|
||||
OVERBOUGHT_THRESHOLD: float = 70.0
|
||||
OVERSOLD_THRESHOLD: float = 30.0
|
||||
|
||||
# Maximum possible distance from threshold (used for strength scaling)
|
||||
_MAX_DISTANCE_OVERBOUGHT: float = 100.0 - OVERBOUGHT_THRESHOLD # 30
|
||||
_MAX_DISTANCE_OVERSOLD: float = OVERSOLD_THRESHOLD - 0.0 # 30
|
||||
|
||||
# Confidence multiplier
|
||||
_CONFIDENCE_MULTIPLIER: float = 0.85
|
||||
|
||||
|
||||
def compute_rsi(bars: list[OHLCVBar], period: int = DEFAULT_RSI_PERIOD) -> float | None:
|
||||
"""Compute RSI using Wilder's smoothing method.
|
||||
|
||||
Args:
|
||||
bars: OHLCV bar series (oldest-first).
|
||||
period: RSI period (default 14).
|
||||
|
||||
Returns:
|
||||
RSI value in [0, 100], or ``None`` if insufficient data.
|
||||
"""
|
||||
min_bars = period + 1
|
||||
if len(bars) < min_bars:
|
||||
return None
|
||||
|
||||
closes = [bar.close for bar in bars]
|
||||
|
||||
# Calculate price changes
|
||||
changes = [closes[i] - closes[i - 1] for i in range(1, len(closes))]
|
||||
|
||||
# Separate gains and losses for the first `period` changes
|
||||
first_gains = [max(0.0, c) for c in changes[:period]]
|
||||
first_losses = [max(0.0, -c) for c in changes[:period]]
|
||||
|
||||
avg_gain = sum(first_gains) / period
|
||||
avg_loss = sum(first_losses) / period
|
||||
|
||||
# Apply Wilder smoothing for subsequent changes
|
||||
for c in changes[period:]:
|
||||
gain = max(0.0, c)
|
||||
loss = max(0.0, -c)
|
||||
avg_gain = (avg_gain * (period - 1) + gain) / period
|
||||
avg_loss = (avg_loss * (period - 1) + loss) / period
|
||||
|
||||
# Avoid division by zero: if avg_loss is 0, RSI is 100
|
||||
if avg_loss == 0.0:
|
||||
return 100.0
|
||||
|
||||
rs = avg_gain / avg_loss
|
||||
rsi = 100.0 - (100.0 / (1.0 + rs))
|
||||
return rsi
|
||||
|
||||
|
||||
class RSIEvaluator:
|
||||
"""RSI signal evaluator.
|
||||
|
||||
Satisfies the :class:`~services.signal_engine.signals.base.SignalEvaluator`
|
||||
protocol.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
period:
|
||||
RSI calculation period. Defaults to ``14``.
|
||||
"""
|
||||
|
||||
def __init__(self, period: int = DEFAULT_RSI_PERIOD) -> None:
|
||||
self.period = period
|
||||
self.min_bars = period + 1
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API (SignalEvaluator protocol)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
bars: list[OHLCVBar],
|
||||
timeframe: str,
|
||||
) -> SignalResult | None:
|
||||
"""Evaluate RSI on *bars* for *timeframe*.
|
||||
|
||||
Returns ``None`` when there are fewer than ``period + 1`` bars
|
||||
or when RSI is in the neutral zone (30–70).
|
||||
"""
|
||||
if not validate_lookback(bars, self.min_bars):
|
||||
return None
|
||||
|
||||
rsi = compute_rsi(bars, self.period)
|
||||
if rsi is None:
|
||||
return None
|
||||
|
||||
# Overbought: RSI > 70 → BEARISH (potential reversal down)
|
||||
if rsi > OVERBOUGHT_THRESHOLD:
|
||||
distance = rsi - OVERBOUGHT_THRESHOLD
|
||||
strength = min(1.0, max(0.0, distance / _MAX_DISTANCE_OVERBOUGHT))
|
||||
confidence = strength * _CONFIDENCE_MULTIPLIER
|
||||
return SignalResult(
|
||||
signal_type="rsi",
|
||||
timeframe=timeframe,
|
||||
strength=strength,
|
||||
direction=SignalDirection.BEARISH,
|
||||
confidence=confidence,
|
||||
metadata={
|
||||
"rsi": rsi,
|
||||
"period": self.period,
|
||||
"zone": "overbought",
|
||||
},
|
||||
)
|
||||
|
||||
# Oversold: RSI < 30 → BULLISH (potential reversal up)
|
||||
if rsi < OVERSOLD_THRESHOLD:
|
||||
distance = OVERSOLD_THRESHOLD - rsi
|
||||
strength = min(1.0, max(0.0, distance / _MAX_DISTANCE_OVERSOLD))
|
||||
confidence = strength * _CONFIDENCE_MULTIPLIER
|
||||
return SignalResult(
|
||||
signal_type="rsi",
|
||||
timeframe=timeframe,
|
||||
strength=strength,
|
||||
direction=SignalDirection.BULLISH,
|
||||
confidence=confidence,
|
||||
metadata={
|
||||
"rsi": rsi,
|
||||
"period": self.period,
|
||||
"zone": "oversold",
|
||||
},
|
||||
)
|
||||
|
||||
# Neutral zone (30 ≤ RSI ≤ 70): no signal
|
||||
return None
|
||||
@@ -0,0 +1,300 @@
|
||||
"""Top-level orchestrator for a single evaluation tick.
|
||||
|
||||
Coordinates input normalization, exit evaluation, hard filters, signal
|
||||
evaluation, both pipelines (concurrent), delta analysis, output formatting,
|
||||
persistence, and Redis queue publication.
|
||||
|
||||
Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
|
||||
import asyncpg
|
||||
import redis.asyncio
|
||||
|
||||
from services.aggregation.regime import classify_regime
|
||||
from services.signal_engine.config import SignalEngineConfig
|
||||
from services.signal_engine.confluence import compute_confluence
|
||||
from services.signal_engine.delta import analyze_delta
|
||||
from services.signal_engine.exit_engine import evaluate_exits
|
||||
from services.signal_engine.formatter import (
|
||||
format_output,
|
||||
signal_output_to_recommendation,
|
||||
)
|
||||
from services.signal_engine.hard_filter import evaluate_hard_filters
|
||||
from services.signal_engine.heuristic import run_heuristic_pipeline
|
||||
from services.signal_engine.models import (
|
||||
HeuristicResult,
|
||||
NormalizedInput,
|
||||
ProbabilisticResult,
|
||||
SignalOutput,
|
||||
SignalResult,
|
||||
Verdict,
|
||||
)
|
||||
from services.signal_engine.normalizer import normalize_input
|
||||
from services.signal_engine.persistence import persist_signal_output
|
||||
from services.signal_engine.probabilistic import run_probabilistic_pipeline
|
||||
from services.signal_engine.signals.cup_handle import CupHandleEvaluator
|
||||
from services.signal_engine.signals.elliott_wave import ElliottWaveEvaluator
|
||||
from services.signal_engine.signals.fibonacci import FibonacciEvaluator
|
||||
from services.signal_engine.signals.ma_stack import MAStackEvaluator
|
||||
from services.signal_engine.signals.rsi import RSIEvaluator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Redis queue for trading decisions
|
||||
_TRADING_QUEUE = "stonks:queue:trading_decisions"
|
||||
|
||||
# All signal evaluators
|
||||
_EVALUATORS = [
|
||||
FibonacciEvaluator(),
|
||||
MAStackEvaluator(),
|
||||
RSIEvaluator(),
|
||||
CupHandleEvaluator(),
|
||||
ElliottWaveEvaluator(),
|
||||
]
|
||||
|
||||
# Default SKIP results used when a pipeline fails
|
||||
_SKIP_HEURISTIC = HeuristicResult(
|
||||
verdict=Verdict.SKIP,
|
||||
confidence=0.0,
|
||||
s_total=0.0,
|
||||
s_company=0.0,
|
||||
s_macro=0.0,
|
||||
s_competitive=0.0,
|
||||
signal_weights=[],
|
||||
reasoning=["pipeline_error: heuristic pipeline raised an exception"],
|
||||
)
|
||||
|
||||
_SKIP_PROBABILISTIC = ProbabilisticResult(
|
||||
verdict=Verdict.SKIP,
|
||||
p_up=0.5,
|
||||
entropy=1.0,
|
||||
ev_r=0.0,
|
||||
prior=0.5,
|
||||
posterior=0.5,
|
||||
likelihood_ratios=[],
|
||||
regime="uncertainty",
|
||||
reasoning=["pipeline_error: probabilistic pipeline raised an exception"],
|
||||
)
|
||||
|
||||
|
||||
def _evaluate_signals(
|
||||
normalized: NormalizedInput,
|
||||
) -> dict[str, dict[str, SignalResult]]:
|
||||
"""Run all signal evaluators across all timeframes.
|
||||
|
||||
Returns ``{signal_type: {timeframe: SignalResult}}`` for signals that
|
||||
fired. Signals that returned ``None`` (insufficient data or no trigger)
|
||||
are omitted.
|
||||
"""
|
||||
from services.signal_engine.normalizer import TIMEFRAMES
|
||||
|
||||
results: dict[str, dict[str, SignalResult]] = {}
|
||||
|
||||
for evaluator in _EVALUATORS:
|
||||
for tf in TIMEFRAMES:
|
||||
bars = normalized.bars.get(tf, [])
|
||||
if not bars:
|
||||
continue
|
||||
|
||||
try:
|
||||
result = evaluator.evaluate(bars, tf)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Signal evaluator %s failed on %s/%s",
|
||||
type(evaluator).__name__,
|
||||
normalized.ticker,
|
||||
tf,
|
||||
exc_info=True,
|
||||
)
|
||||
continue
|
||||
|
||||
if result is not None:
|
||||
results.setdefault(result.signal_type, {})[tf] = result
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def evaluate_tick(
|
||||
pool: asyncpg.Pool,
|
||||
redis_client: redis.asyncio.Redis,
|
||||
ticker: str,
|
||||
config: SignalEngineConfig,
|
||||
) -> SignalOutput | None:
|
||||
"""Run a full evaluation tick for a single ticker.
|
||||
|
||||
Steps:
|
||||
1. Normalize inputs (single fetch, shared reference)
|
||||
2. Evaluate exit conditions for open positions
|
||||
3. Run hard filters (short-circuit if filtered)
|
||||
4. Evaluate signals across timeframes via Signal Library
|
||||
5. Compute confluence
|
||||
6. Classify regime via existing ``classify_regime()``
|
||||
7. Run both pipelines concurrently via ``asyncio.gather``
|
||||
8. Compute delta analysis
|
||||
9. Format output
|
||||
10. Persist to database and publish to Redis queue
|
||||
|
||||
Returns ``None`` if the ticker is hard-filtered or both pipelines fail.
|
||||
|
||||
Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6
|
||||
"""
|
||||
tick_start = time.monotonic()
|
||||
|
||||
# Step 1: Normalize inputs
|
||||
normalized = await normalize_input(pool, ticker, config)
|
||||
|
||||
# Step 2: Evaluate exit conditions (before pipelines — Req 8.6)
|
||||
current_price = normalized.current_price or 0.0
|
||||
exit_signals = evaluate_exits(
|
||||
normalized.open_positions,
|
||||
{ticker: current_price},
|
||||
config.exit_config,
|
||||
)
|
||||
|
||||
# Step 3: Hard filters
|
||||
filter_result = evaluate_hard_filters(normalized, config.hard_filter_config)
|
||||
if filter_result.filtered:
|
||||
logger.info(
|
||||
"Ticker %s hard-filtered: %s",
|
||||
ticker,
|
||||
", ".join(filter_result.reasons),
|
||||
)
|
||||
return None
|
||||
|
||||
# Step 4: Evaluate signals across timeframes
|
||||
signal_results = _evaluate_signals(normalized)
|
||||
|
||||
# Step 5: Compute confluence
|
||||
confluence_signals = compute_confluence(signal_results, config.timeframe_weights)
|
||||
|
||||
# Step 6: Classify regime
|
||||
regime = classify_regime(normalized.closing_prices, normalized.returns)
|
||||
|
||||
# Step 7: Run both pipelines concurrently
|
||||
heuristic_start = time.monotonic()
|
||||
|
||||
async def _run_heuristic() -> HeuristicResult:
|
||||
return run_heuristic_pipeline(
|
||||
normalized, confluence_signals, config.heuristic_config
|
||||
)
|
||||
|
||||
async def _run_probabilistic() -> ProbabilisticResult:
|
||||
return run_probabilistic_pipeline(
|
||||
normalized, confluence_signals, regime, config.probabilistic_config
|
||||
)
|
||||
|
||||
results = await asyncio.gather(
|
||||
_run_heuristic(),
|
||||
_run_probabilistic(),
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
pipeline_elapsed = time.monotonic() - heuristic_start
|
||||
|
||||
# Handle pipeline exceptions — SKIP verdict for failed pipeline
|
||||
heuristic_result: HeuristicResult
|
||||
probabilistic_result: ProbabilisticResult
|
||||
|
||||
if isinstance(results[0], BaseException):
|
||||
logger.error(
|
||||
"Heuristic pipeline failed for %s: %s",
|
||||
ticker,
|
||||
results[0],
|
||||
exc_info=results[0],
|
||||
)
|
||||
heuristic_result = _SKIP_HEURISTIC
|
||||
else:
|
||||
heuristic_result = results[0]
|
||||
|
||||
if isinstance(results[1], BaseException):
|
||||
logger.error(
|
||||
"Probabilistic pipeline failed for %s: %s",
|
||||
ticker,
|
||||
results[1],
|
||||
exc_info=results[1],
|
||||
)
|
||||
probabilistic_result = _SKIP_PROBABILISTIC
|
||||
else:
|
||||
probabilistic_result = results[1]
|
||||
|
||||
# If both pipelines failed, return None
|
||||
if isinstance(results[0], BaseException) and isinstance(results[1], BaseException):
|
||||
logger.error(
|
||||
"Both pipelines failed for %s — skipping tick",
|
||||
ticker,
|
||||
)
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
"Pipelines completed for %s in %.3fs — heuristic=%s, probabilistic=%s",
|
||||
ticker,
|
||||
pipeline_elapsed,
|
||||
heuristic_result.verdict.value,
|
||||
probabilistic_result.verdict.value,
|
||||
)
|
||||
|
||||
# Step 8: Delta analysis
|
||||
delta = await analyze_delta(
|
||||
heuristic_result, probabilistic_result, redis_client, ticker
|
||||
)
|
||||
|
||||
# Step 9: Format output
|
||||
price = normalized.current_price or 0.0
|
||||
output = format_output(
|
||||
ticker,
|
||||
price,
|
||||
heuristic_result,
|
||||
probabilistic_result,
|
||||
delta,
|
||||
exit_signals,
|
||||
config,
|
||||
)
|
||||
|
||||
# Step 10: Persist to database
|
||||
await persist_signal_output(pool, output)
|
||||
|
||||
# Step 11: Publish to trading queue (only if at least one BUY and not shadow_mode)
|
||||
has_buy = (
|
||||
heuristic_result.verdict == Verdict.BUY
|
||||
or probabilistic_result.verdict == Verdict.BUY
|
||||
)
|
||||
|
||||
if has_buy and not config.shadow_mode:
|
||||
try:
|
||||
recommendation = signal_output_to_recommendation(output)
|
||||
await redis_client.rpush(
|
||||
_TRADING_QUEUE,
|
||||
recommendation.model_dump_json(),
|
||||
)
|
||||
logger.info(
|
||||
"Published trading recommendation for %s to %s",
|
||||
ticker,
|
||||
_TRADING_QUEUE,
|
||||
)
|
||||
except Exception:
|
||||
logger.error(
|
||||
"Failed to publish trading recommendation for %s",
|
||||
ticker,
|
||||
exc_info=True,
|
||||
)
|
||||
elif has_buy and config.shadow_mode:
|
||||
logger.info(
|
||||
"Shadow mode: BUY signal for %s persisted but not published to trading queue",
|
||||
ticker,
|
||||
)
|
||||
|
||||
# Log wall-clock execution time
|
||||
tick_elapsed = time.monotonic() - tick_start
|
||||
logger.info(
|
||||
"Evaluation tick for %s completed in %.3fs",
|
||||
ticker,
|
||||
tick_elapsed,
|
||||
)
|
||||
|
||||
return output
|
||||
@@ -68,6 +68,7 @@ class CapitalRequest(BaseModel):
|
||||
"""Body for POST /api/trading/reset."""
|
||||
|
||||
initial_capital: float = 0.0
|
||||
reserve_pct: float | None = None
|
||||
|
||||
|
||||
class BacktestRequest(BaseModel):
|
||||
@@ -430,7 +431,8 @@ async def reset_paper_trading(body: CapitalRequest) -> dict[str, Any]:
|
||||
else:
|
||||
capital = 100_000.0
|
||||
|
||||
reserve_pct = engine.config.reserve_siphon_pct
|
||||
reserve_pct = body.reserve_pct if body.reserve_pct is not None else engine.config.reserve_siphon_pct
|
||||
reserve_pct = max(0.0, min(1.0, reserve_pct)) # clamp 0-100%
|
||||
reserve = capital * reserve_pct
|
||||
active = capital - reserve
|
||||
|
||||
|
||||
@@ -4,6 +4,10 @@ Task 32: Fetches historical recommendations from the database, simulates
|
||||
the decision logic chronologically using evaluate_recommendation(), tracks
|
||||
simulated positions and equity curve, and persists results to backtest_runs
|
||||
and backtest_trades tables.
|
||||
|
||||
Supports a validation mode (Requirements 15.1–15.5) that generates prediction
|
||||
snapshots and evaluates outcomes using only data available at each historical
|
||||
point in time, preventing future data leakage.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -39,12 +43,22 @@ class BacktestReplay:
|
||||
self.pool = pool
|
||||
self._perf = PerformanceComputer()
|
||||
|
||||
async def run(self, config: BacktestConfig, backtest_id: str | None = None) -> BacktestResult:
|
||||
async def run(
|
||||
self,
|
||||
config: BacktestConfig,
|
||||
backtest_id: str | None = None,
|
||||
validation_mode: bool = False,
|
||||
) -> BacktestResult:
|
||||
"""Execute a full backtest replay.
|
||||
|
||||
Args:
|
||||
config: Backtest configuration (date range, capital, risk tier).
|
||||
backtest_id: Optional pre-generated ID. If not provided, one is generated.
|
||||
validation_mode: When True, creates prediction snapshots for each
|
||||
historical recommendation using only data available at that point
|
||||
in time, evaluates outcomes, and computes model metrics over the
|
||||
backtest period. Snapshots are tagged with the backtest_id.
|
||||
(Requirements 15.1–15.5)
|
||||
|
||||
Returns:
|
||||
BacktestResult with metrics, trade log, and equity curve.
|
||||
@@ -87,6 +101,7 @@ class BacktestReplay:
|
||||
daily_returns: list[float] = []
|
||||
prev_value = config.initial_capital
|
||||
trade_log: list[dict] = []
|
||||
validation_snapshot_ids: list[str] = [] # track snapshot IDs for validation mode
|
||||
|
||||
# Pre-load company sectors and latest prices for enrichment
|
||||
company_sectors: dict[str, str] = {}
|
||||
@@ -172,6 +187,25 @@ class BacktestReplay:
|
||||
now=sim_time,
|
||||
)
|
||||
|
||||
# --- Validation mode: create prediction snapshot (Req 15.1, 15.2, 15.4) ---
|
||||
if validation_mode and self.pool is not None:
|
||||
try:
|
||||
snapshot_id = await self._create_validation_snapshot(
|
||||
rec=rec,
|
||||
sim_time=sim_time,
|
||||
backtest_id=backtest_id,
|
||||
company_sectors=company_sectors,
|
||||
)
|
||||
if snapshot_id is not None:
|
||||
validation_snapshot_ids.append(snapshot_id)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Validation snapshot failed for %s at %s, continuing backtest",
|
||||
rec.get("ticker", "?"),
|
||||
sim_time,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
if decision.decision == "act":
|
||||
act_count += 1
|
||||
ticker = decision.ticker
|
||||
@@ -348,6 +382,10 @@ class BacktestReplay:
|
||||
# Persist results
|
||||
await self._persist_results(result, closed_trades)
|
||||
|
||||
# --- Validation mode: evaluate outcomes and compute metrics (Req 15.3, 15.5) ---
|
||||
if validation_mode and self.pool is not None and validation_snapshot_ids:
|
||||
await self._run_validation_evaluation(backtest_id)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
@@ -356,6 +394,210 @@ class BacktestReplay:
|
||||
await self._persist_failed_run(backtest_id, config, str(exc))
|
||||
raise
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Validation mode helpers (Requirements 15.1–15.5)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# SQL to fetch the close price at or before a specific time — prevents
|
||||
# future data leakage by only returning data available at that point.
|
||||
_CLOSE_AT_TIME_SQL = """
|
||||
SELECT (data->>'c')::float AS close
|
||||
FROM market_snapshots
|
||||
WHERE ticker = $1
|
||||
AND snapshot_type = 'bar'
|
||||
AND data->>'c' IS NOT NULL
|
||||
AND captured_at <= $2
|
||||
ORDER BY captured_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
_COMPANY_SECTOR_SQL = """
|
||||
SELECT sector FROM companies WHERE ticker = $1 AND active = TRUE LIMIT 1
|
||||
"""
|
||||
|
||||
_SECTOR_ETF_MAP: dict[str, str] = {
|
||||
"Technology": "XLK",
|
||||
"Consumer Cyclical": "XLY",
|
||||
"Financial Services": "XLF",
|
||||
"Healthcare": "XLV",
|
||||
"Energy": "XLE",
|
||||
"Communication Services": "XLC",
|
||||
"Industrials": "XLI",
|
||||
"Consumer Defensive": "XLP",
|
||||
"Real Estate": "XLRE",
|
||||
"Utilities": "XLU",
|
||||
}
|
||||
|
||||
async def _fetch_close_at_time(
|
||||
self,
|
||||
ticker: str,
|
||||
target_time: datetime,
|
||||
) -> float | None:
|
||||
"""Fetch the close price for *ticker* at or before *target_time*.
|
||||
|
||||
Ensures no future data leakage — only market data with
|
||||
``captured_at <= target_time`` is considered (Requirement 15.4).
|
||||
"""
|
||||
if self.pool is None:
|
||||
return None
|
||||
row = await self.pool.fetchrow(self._CLOSE_AT_TIME_SQL, ticker, target_time)
|
||||
if row is None:
|
||||
return None
|
||||
return row["close"]
|
||||
|
||||
async def _create_validation_snapshot(
|
||||
self,
|
||||
rec: dict,
|
||||
sim_time: datetime,
|
||||
backtest_id: str,
|
||||
company_sectors: dict[str, str],
|
||||
) -> str | None:
|
||||
"""Create a prediction snapshot using only data available at *sim_time*.
|
||||
|
||||
Fetches ticker, SPY, and sector ETF prices as of *sim_time* to prevent
|
||||
future data leakage (Requirements 15.1, 15.2, 15.4). The snapshot is
|
||||
tagged with *backtest_id* in its metadata field (Requirement 15.5).
|
||||
|
||||
Returns the snapshot UUID on success, or ``None`` on failure.
|
||||
"""
|
||||
from services.validation.prediction_snapshot import (
|
||||
SECTOR_ETF_MAP,
|
||||
)
|
||||
|
||||
ticker = rec.get("ticker", "")
|
||||
if not ticker:
|
||||
return None
|
||||
|
||||
# Fetch prices using only data available at sim_time (Req 15.4)
|
||||
ticker_price = await self._fetch_close_at_time(ticker, sim_time)
|
||||
spy_price = await self._fetch_close_at_time("SPY", sim_time)
|
||||
|
||||
# Sector ETF price
|
||||
sector = company_sectors.get(ticker)
|
||||
sector_etf_ticker = SECTOR_ETF_MAP.get(sector) if sector else None
|
||||
sector_etf_price: float | None = None
|
||||
if sector_etf_ticker is not None:
|
||||
sector_etf_price = await self._fetch_close_at_time(
|
||||
sector_etf_ticker, sim_time
|
||||
)
|
||||
|
||||
snapshot_id = str(uuid.uuid4())
|
||||
|
||||
# Build metadata tagged with backtest_id (Req 15.5)
|
||||
metadata: dict = {
|
||||
"backtest_id": backtest_id,
|
||||
"source": "backtest_validation",
|
||||
}
|
||||
|
||||
# Map recommendation fields to snapshot columns
|
||||
direction = rec.get("direction", rec.get("trend_direction", "neutral"))
|
||||
action = rec.get("action", "watch")
|
||||
mode = rec.get("mode", "informational")
|
||||
confidence = float(rec.get("confidence", 0.5))
|
||||
strength = float(rec.get("strength", rec.get("trend_strength", 0.5)))
|
||||
contradiction = float(rec.get("contradiction", rec.get("contradiction_score", 0.0)))
|
||||
p_bull = rec.get("p_bull")
|
||||
if p_bull is not None:
|
||||
p_bull = float(p_bull)
|
||||
p_bear = (1.0 - p_bull) if p_bull is not None else None
|
||||
window = rec.get("window", rec.get("trend_window", "7d"))
|
||||
horizon = rec.get("time_horizon", rec.get("horizon", "7d"))
|
||||
|
||||
# Insert the snapshot directly — we bypass create_prediction_snapshot()
|
||||
# because that function fetches *latest* prices (not point-in-time).
|
||||
insert_sql = """
|
||||
INSERT INTO prediction_snapshots (
|
||||
id, generated_at, ticker, "window", horizon, direction, action, mode,
|
||||
strength, confidence, contradiction, p_bull, p_bear,
|
||||
score_company, score_macro, score_competitive,
|
||||
evidence_count, unique_source_count, duplicate_evidence_count,
|
||||
price_at_prediction, spy_price_at_prediction,
|
||||
sector_etf_price_at_prediction, metadata
|
||||
) VALUES (
|
||||
$1::uuid, $2, $3, $4, $5, $6, $7, $8,
|
||||
$9, $10, $11, $12, $13,
|
||||
$14, $15, $16,
|
||||
$17, $18, $19,
|
||||
$20, $21, $22,
|
||||
$23::jsonb
|
||||
)
|
||||
"""
|
||||
await self.pool.execute(
|
||||
insert_sql,
|
||||
snapshot_id,
|
||||
sim_time,
|
||||
ticker,
|
||||
str(window),
|
||||
str(horizon),
|
||||
str(direction),
|
||||
str(action),
|
||||
str(mode),
|
||||
strength,
|
||||
confidence,
|
||||
contradiction,
|
||||
p_bull,
|
||||
p_bear,
|
||||
float(rec.get("score_company", 0.0)),
|
||||
float(rec.get("score_macro", 0.0)),
|
||||
float(rec.get("score_competitive", 0.0)),
|
||||
int(rec.get("evidence_count", 0)),
|
||||
int(rec.get("unique_source_count", 0)),
|
||||
int(rec.get("duplicate_evidence_count", 0)),
|
||||
ticker_price,
|
||||
spy_price,
|
||||
sector_etf_price,
|
||||
json.dumps(metadata),
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Validation snapshot %s created for %s at %s (backtest %s)",
|
||||
snapshot_id,
|
||||
ticker,
|
||||
sim_time,
|
||||
backtest_id,
|
||||
)
|
||||
return snapshot_id
|
||||
|
||||
async def _run_validation_evaluation(self, backtest_id: str) -> None:
|
||||
"""Evaluate prediction outcomes and compute metrics for the backtest.
|
||||
|
||||
Calls the outcome evaluator and metrics engine after the backtest
|
||||
completes (Requirements 15.3, 15.5). Failures are logged but do
|
||||
not block the backtest result.
|
||||
"""
|
||||
from services.validation.metrics import compute_and_store_metric_snapshots
|
||||
from services.validation.outcome_evaluator import evaluate_matured_predictions
|
||||
|
||||
# Step 1: Evaluate matured predictions (Req 15.3)
|
||||
try:
|
||||
outcomes_count = await evaluate_matured_predictions(self.pool)
|
||||
logger.info(
|
||||
"Backtest %s validation: %d prediction outcomes evaluated",
|
||||
backtest_id,
|
||||
outcomes_count,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Backtest %s: outcome evaluation failed, continuing",
|
||||
backtest_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Step 2: Compute and store metric snapshots (Req 15.5)
|
||||
try:
|
||||
snapshots = await compute_and_store_metric_snapshots(self.pool)
|
||||
logger.info(
|
||||
"Backtest %s validation: %d metric snapshots computed",
|
||||
backtest_id,
|
||||
len(snapshots),
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Backtest %s: metric snapshot computation failed, continuing",
|
||||
backtest_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Database helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
+52
-11
@@ -598,8 +598,9 @@ class TradingEngine:
|
||||
initial_capital, invested, available, reserve_balance, open_count,
|
||||
)
|
||||
|
||||
# Compute initial correlation matrix from market data
|
||||
await self._compute_correlation_matrix()
|
||||
# Compute initial correlation matrix from market data (non-blocking)
|
||||
# Runs in background so the engine can start trading immediately
|
||||
asyncio.create_task(self._compute_correlation_matrix(), name="correlation_matrix")
|
||||
|
||||
async def _decision_loop(self) -> None:
|
||||
"""Poll recommendations and evaluate them in a continuous loop.
|
||||
@@ -778,10 +779,6 @@ class TradingEngine:
|
||||
continue
|
||||
|
||||
# --- Buy path ---
|
||||
# Set dedup key for buys
|
||||
if self.redis is not None:
|
||||
await self.redis.set(trading_dedupe_key(rec_id), "1", ex=86400)
|
||||
|
||||
# Check if we already hold this ticker — don't double up
|
||||
try:
|
||||
existing_pos = await self.pool.fetchrow(
|
||||
@@ -789,6 +786,9 @@ class TradingEngine:
|
||||
ticker,
|
||||
)
|
||||
if existing_pos:
|
||||
# Permanent skip — safe to dedup
|
||||
if self.redis is not None:
|
||||
await self.redis.set(trading_dedupe_key(rec_id), "1", ex=86400)
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
@@ -864,6 +864,19 @@ class TradingEngine:
|
||||
# Persist decision
|
||||
await self._persist_decision(decision)
|
||||
|
||||
# Set dedup key only for permanent outcomes (act or
|
||||
# non-retryable skips). Do NOT dedup
|
||||
# outside_trading_window — those should be retried
|
||||
# when the market opens.
|
||||
retryable_skips = {"outside_trading_window"}
|
||||
if decision.skip_reason not in retryable_skips:
|
||||
if self.redis is not None:
|
||||
await self.redis.set(
|
||||
trading_dedupe_key(rec_id), "1", ex=86400,
|
||||
)
|
||||
if rec_id:
|
||||
self.processed_recommendation_ids.add(rec_id)
|
||||
|
||||
except Exception:
|
||||
logger.exception("Error evaluating recommendation %s", rec.get("recommendation_id", "?"))
|
||||
|
||||
@@ -933,6 +946,27 @@ class TradingEngine:
|
||||
if pos_match is None:
|
||||
continue
|
||||
|
||||
# Suppress take-profit when a strong buy signal is active
|
||||
if trigger.trigger_type == "take_profit" and self.pool is not None:
|
||||
try:
|
||||
active_buy = await self.pool.fetchrow(
|
||||
"SELECT confidence FROM recommendations "
|
||||
"WHERE ticker = $1 AND action = 'buy' "
|
||||
"AND mode IN ('paper_eligible', 'live_eligible') "
|
||||
"AND generated_at > NOW() - INTERVAL '2 hours' "
|
||||
"ORDER BY confidence DESC LIMIT 1",
|
||||
trigger.ticker,
|
||||
)
|
||||
if active_buy and float(active_buy["confidence"]) >= 0.80:
|
||||
logger.info(
|
||||
"Suppressing take-profit for %s — active buy signal (confidence=%.3f)",
|
||||
trigger.ticker,
|
||||
float(active_buy["confidence"]),
|
||||
)
|
||||
continue
|
||||
except Exception:
|
||||
pass # On error, proceed with the take-profit
|
||||
|
||||
await self._submit_sell_order(
|
||||
trigger.ticker,
|
||||
pos_match.quantity,
|
||||
@@ -1339,12 +1373,19 @@ class TradingEngine:
|
||||
return
|
||||
|
||||
try:
|
||||
rows = await self.pool.fetch(
|
||||
"SELECT ticker, captured_at::date AS dt, (data->>'c')::float AS close "
|
||||
"FROM market_snapshots "
|
||||
"WHERE snapshot_type = 'bar' AND captured_at > NOW() - INTERVAL '30 days' "
|
||||
"ORDER BY ticker, captured_at"
|
||||
rows = await asyncio.wait_for(
|
||||
self.pool.fetch(
|
||||
"SELECT ms.ticker, ms.captured_at::date AS dt, (ms.data->>'c')::float AS close "
|
||||
"FROM market_snapshots ms "
|
||||
"JOIN companies c ON c.ticker = ms.ticker AND c.active = TRUE "
|
||||
"WHERE ms.snapshot_type = 'bar' AND ms.captured_at > NOW() - INTERVAL '30 days' "
|
||||
"ORDER BY ms.ticker, ms.captured_at"
|
||||
),
|
||||
timeout=30.0,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Correlation matrix query timed out — skipping")
|
||||
return
|
||||
except Exception:
|
||||
logger.debug("Could not query market_snapshots for correlation matrix")
|
||||
return
|
||||
|
||||
@@ -0,0 +1,329 @@
|
||||
"""Quality gate for live trading eligibility.
|
||||
|
||||
Evaluates aggregate model metrics against configurable thresholds and
|
||||
determines whether the system meets minimum quality standards for live
|
||||
trading. When any threshold is not met, the gate forces all
|
||||
recommendations to paper mode (fail-safe).
|
||||
|
||||
Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger("trading_engine.quality_gate")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data classes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class QualityGateConfig:
|
||||
"""Configurable thresholds for live trading eligibility."""
|
||||
|
||||
min_prediction_count: int = 100
|
||||
min_ic: float = 0.03
|
||||
min_win_rate: float = 0.53
|
||||
max_ece: float = 0.15
|
||||
min_excess_return_vs_spy: float = 0.0
|
||||
max_snapshot_age_hours: int = 24
|
||||
|
||||
|
||||
@dataclass
|
||||
class GateThresholdResult:
|
||||
"""Result for a single threshold check."""
|
||||
|
||||
name: str
|
||||
threshold: float
|
||||
actual: float
|
||||
passed: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class QualityGateResult:
|
||||
"""Full gate evaluation result."""
|
||||
|
||||
passed: bool
|
||||
evaluated_at: datetime
|
||||
threshold_results: list[GateThresholdResult] = field(default_factory=list)
|
||||
reason: str = ""
|
||||
snapshot_id: str | None = None
|
||||
config: QualityGateConfig = field(default_factory=QualityGateConfig)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Threshold evaluation helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _evaluate_thresholds(
|
||||
snapshot: dict,
|
||||
config: QualityGateConfig,
|
||||
) -> list[GateThresholdResult]:
|
||||
"""Evaluate each threshold against snapshot metric values."""
|
||||
results: list[GateThresholdResult] = []
|
||||
|
||||
# min_prediction_count
|
||||
actual_count = snapshot.get("prediction_count") or 0
|
||||
results.append(
|
||||
GateThresholdResult(
|
||||
name="min_prediction_count",
|
||||
threshold=float(config.min_prediction_count),
|
||||
actual=float(actual_count),
|
||||
passed=actual_count >= config.min_prediction_count,
|
||||
)
|
||||
)
|
||||
|
||||
# min_ic
|
||||
actual_ic = snapshot.get("information_coefficient")
|
||||
if actual_ic is None:
|
||||
actual_ic = 0.0
|
||||
results.append(
|
||||
GateThresholdResult(
|
||||
name="min_ic",
|
||||
threshold=config.min_ic,
|
||||
actual=float(actual_ic),
|
||||
passed=float(actual_ic) >= config.min_ic,
|
||||
)
|
||||
)
|
||||
|
||||
# min_win_rate
|
||||
actual_wr = snapshot.get("win_rate")
|
||||
if actual_wr is None:
|
||||
actual_wr = 0.0
|
||||
results.append(
|
||||
GateThresholdResult(
|
||||
name="min_win_rate",
|
||||
threshold=config.min_win_rate,
|
||||
actual=float(actual_wr),
|
||||
passed=float(actual_wr) >= config.min_win_rate,
|
||||
)
|
||||
)
|
||||
|
||||
# max_ece (calibration_error)
|
||||
actual_ece = snapshot.get("calibration_error")
|
||||
if actual_ece is None:
|
||||
actual_ece = 1.0 # worst-case when missing
|
||||
results.append(
|
||||
GateThresholdResult(
|
||||
name="max_ece",
|
||||
threshold=config.max_ece,
|
||||
actual=float(actual_ece),
|
||||
passed=float(actual_ece) <= config.max_ece,
|
||||
)
|
||||
)
|
||||
|
||||
# min_excess_return_vs_spy
|
||||
actual_excess = snapshot.get("avg_excess_return_vs_spy")
|
||||
if actual_excess is None:
|
||||
actual_excess = 0.0
|
||||
results.append(
|
||||
GateThresholdResult(
|
||||
name="min_excess_return_vs_spy",
|
||||
threshold=config.min_excess_return_vs_spy,
|
||||
actual=float(actual_excess),
|
||||
passed=float(actual_excess) >= config.min_excess_return_vs_spy,
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def evaluate_quality_gate(
|
||||
pool: asyncpg.Pool,
|
||||
config: QualityGateConfig | None = None,
|
||||
) -> QualityGateResult:
|
||||
"""Evaluate model quality gate from latest metric snapshot.
|
||||
|
||||
Reads the most recent ``model_metric_snapshot`` for the 30d lookback
|
||||
and 7d horizon (the primary evaluation window).
|
||||
|
||||
If no snapshot exists or snapshot is stale (>max_snapshot_age_hours),
|
||||
defaults to paper-only mode (fail-safe).
|
||||
|
||||
Stores result in ``risk_configs`` under ``'model_quality_gate'`` key.
|
||||
"""
|
||||
if config is None:
|
||||
config = await load_gate_config_from_db(pool)
|
||||
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
|
||||
# Fetch the most recent metric snapshot for 30d lookback / 7d horizon
|
||||
try:
|
||||
row = await pool.fetchrow(
|
||||
"""SELECT id, generated_at, prediction_count, win_rate,
|
||||
directional_accuracy, information_coefficient,
|
||||
rank_information_coefficient, avg_return,
|
||||
avg_excess_return_vs_spy, avg_excess_return_vs_sector,
|
||||
calibration_error, brier_score,
|
||||
buy_win_rate, sell_win_rate, hold_win_rate
|
||||
FROM model_metric_snapshots
|
||||
WHERE lookback_window = '30d' AND horizon = '7d'
|
||||
ORDER BY generated_at DESC
|
||||
LIMIT 1""",
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to query model_metric_snapshots")
|
||||
row = None
|
||||
|
||||
# Fail-safe: no snapshot exists
|
||||
if row is None:
|
||||
result = QualityGateResult(
|
||||
passed=False,
|
||||
evaluated_at=now,
|
||||
threshold_results=[],
|
||||
reason="no model metric snapshot available — defaulting to paper-only",
|
||||
snapshot_id=None,
|
||||
config=config,
|
||||
)
|
||||
logger.warning("Quality gate: %s", result.reason)
|
||||
await _store_gate_result(pool, result)
|
||||
return result
|
||||
|
||||
snapshot = dict(row)
|
||||
snapshot_id = str(snapshot["id"])
|
||||
generated_at = snapshot["generated_at"]
|
||||
|
||||
# Fail-safe: stale snapshot
|
||||
age_hours = (now - generated_at).total_seconds() / 3600.0
|
||||
if age_hours > config.max_snapshot_age_hours:
|
||||
result = QualityGateResult(
|
||||
passed=False,
|
||||
evaluated_at=now,
|
||||
threshold_results=[],
|
||||
reason=(
|
||||
f"most recent snapshot is {age_hours:.1f}h old "
|
||||
f"(max {config.max_snapshot_age_hours}h) — defaulting to paper-only"
|
||||
),
|
||||
snapshot_id=snapshot_id,
|
||||
config=config,
|
||||
)
|
||||
logger.warning("Quality gate: %s", result.reason)
|
||||
await _store_gate_result(pool, result)
|
||||
return result
|
||||
|
||||
# Evaluate thresholds
|
||||
threshold_results = _evaluate_thresholds(snapshot, config)
|
||||
failed = [r for r in threshold_results if not r.passed]
|
||||
|
||||
if failed:
|
||||
failed_names = ", ".join(
|
||||
f"{r.name}(actual={r.actual:.4f}, threshold={r.threshold:.4f})"
|
||||
for r in failed
|
||||
)
|
||||
reason = f"failed: {failed_names}"
|
||||
passed = False
|
||||
else:
|
||||
reason = "all thresholds met"
|
||||
passed = True
|
||||
|
||||
result = QualityGateResult(
|
||||
passed=passed,
|
||||
evaluated_at=now,
|
||||
threshold_results=threshold_results,
|
||||
reason=reason,
|
||||
snapshot_id=snapshot_id,
|
||||
config=config,
|
||||
)
|
||||
|
||||
# Log details
|
||||
for tr in threshold_results:
|
||||
logger.info(
|
||||
"Quality gate threshold %s: actual=%.4f threshold=%.4f %s",
|
||||
tr.name,
|
||||
tr.actual,
|
||||
tr.threshold,
|
||||
"PASS" if tr.passed else "FAIL",
|
||||
)
|
||||
logger.info("Quality gate result: %s — %s", "PASS" if passed else "FAIL", reason)
|
||||
|
||||
await _store_gate_result(pool, result)
|
||||
return result
|
||||
|
||||
|
||||
async def load_gate_config_from_db(
|
||||
pool: asyncpg.Pool,
|
||||
) -> QualityGateConfig:
|
||||
"""Load gate thresholds from risk_configs, with defaults.
|
||||
|
||||
Looks for a ``risk_configs`` row with ``name = 'model_quality_gate_config'``.
|
||||
If found, merges stored thresholds over the defaults. If not found or
|
||||
the stored JSON is invalid, returns the default config.
|
||||
"""
|
||||
defaults = QualityGateConfig()
|
||||
try:
|
||||
row = await pool.fetchrow(
|
||||
"SELECT config FROM risk_configs WHERE name = 'model_quality_gate_config'",
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to load gate config from risk_configs — using defaults")
|
||||
return defaults
|
||||
|
||||
if row is None:
|
||||
return defaults
|
||||
|
||||
try:
|
||||
raw = row["config"]
|
||||
cfg = raw if isinstance(raw, dict) else json.loads(raw)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Invalid gate config JSON in risk_configs — using defaults")
|
||||
return defaults
|
||||
|
||||
return QualityGateConfig(
|
||||
min_prediction_count=int(cfg.get("min_prediction_count", defaults.min_prediction_count)),
|
||||
min_ic=float(cfg.get("min_ic", defaults.min_ic)),
|
||||
min_win_rate=float(cfg.get("min_win_rate", defaults.min_win_rate)),
|
||||
max_ece=float(cfg.get("max_ece", defaults.max_ece)),
|
||||
min_excess_return_vs_spy=float(
|
||||
cfg.get("min_excess_return_vs_spy", defaults.min_excess_return_vs_spy)
|
||||
),
|
||||
max_snapshot_age_hours=int(
|
||||
cfg.get("max_snapshot_age_hours", defaults.max_snapshot_age_hours)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _gate_result_to_json(result: QualityGateResult) -> str:
|
||||
"""Serialize a QualityGateResult to JSON for storage in risk_configs."""
|
||||
payload = {
|
||||
"passed": result.passed,
|
||||
"evaluated_at": result.evaluated_at.isoformat(),
|
||||
"reason": result.reason,
|
||||
"snapshot_id": result.snapshot_id,
|
||||
"config": asdict(result.config),
|
||||
"threshold_results": [asdict(tr) for tr in result.threshold_results],
|
||||
}
|
||||
return json.dumps(payload, default=str)
|
||||
|
||||
|
||||
async def _store_gate_result(pool: asyncpg.Pool, result: QualityGateResult) -> None:
|
||||
"""Upsert gate evaluation result into risk_configs."""
|
||||
payload = _gate_result_to_json(result)
|
||||
try:
|
||||
await pool.execute(
|
||||
"""INSERT INTO risk_configs (name, config, updated_at)
|
||||
VALUES ('model_quality_gate', $1::jsonb, NOW())
|
||||
ON CONFLICT (name) WHERE active = TRUE
|
||||
DO UPDATE SET config = $1::jsonb, updated_at = NOW()""",
|
||||
payload,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to store quality gate result in risk_configs")
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,591 @@
|
||||
"""Attribution Engine — per-source, per-catalyst, and per-layer performance.
|
||||
|
||||
Joins signal evidence links with prediction outcomes to compute attribution
|
||||
metrics that identify which sources, catalyst types, and signal layers
|
||||
contribute most to accurate predictions.
|
||||
|
||||
Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dataclasses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceAttribution:
|
||||
"""Performance metrics for a single source."""
|
||||
|
||||
source: str
|
||||
source_type: str
|
||||
prediction_count: int
|
||||
avg_weight: float
|
||||
avg_contribution_score: float
|
||||
win_rate: float
|
||||
avg_future_return: float
|
||||
avg_excess_return_vs_spy: float
|
||||
information_coefficient: float | None
|
||||
duplicate_rate: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalystAttribution:
|
||||
"""Performance metrics for a single catalyst type."""
|
||||
|
||||
catalyst_type: str
|
||||
prediction_count: int
|
||||
win_rate: float
|
||||
avg_future_return: float
|
||||
avg_excess_return_vs_spy: float
|
||||
information_coefficient: float | None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LayerAttribution:
|
||||
"""Performance metrics for a signal layer."""
|
||||
|
||||
layer: str # company, macro, competitive
|
||||
avg_contribution_pct: float
|
||||
dominant_win_rate: float # win rate when this layer > 30% contribution
|
||||
dominant_ic: float | None # IC when this layer > 30% contribution
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure computation helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _pearson_correlation(xs: list[float], ys: list[float]) -> float | None:
|
||||
"""Compute Pearson correlation coefficient between two lists.
|
||||
|
||||
Returns None if the lists have fewer than 2 elements or if either
|
||||
has zero variance. Guards against NaN/infinity.
|
||||
"""
|
||||
n = len(xs)
|
||||
if n < 2:
|
||||
return None
|
||||
|
||||
mean_x = sum(xs) / n
|
||||
mean_y = sum(ys) / n
|
||||
|
||||
cov = 0.0
|
||||
var_x = 0.0
|
||||
var_y = 0.0
|
||||
|
||||
for x, y in zip(xs, ys):
|
||||
dx = x - mean_x
|
||||
dy = y - mean_y
|
||||
cov += dx * dy
|
||||
var_x += dx * dx
|
||||
var_y += dy * dy
|
||||
|
||||
if var_x == 0.0 or var_y == 0.0:
|
||||
return None
|
||||
|
||||
r = cov / math.sqrt(var_x * var_y)
|
||||
|
||||
if math.isnan(r) or math.isinf(r):
|
||||
return None
|
||||
|
||||
return max(-1.0, min(1.0, r))
|
||||
|
||||
|
||||
def _compute_ic(
|
||||
contribution_scores: list[float],
|
||||
future_returns: list[float],
|
||||
) -> float | None:
|
||||
"""Compute IC (Pearson correlation) between contribution scores and returns.
|
||||
|
||||
Returns None when fewer than 30 data points.
|
||||
"""
|
||||
if len(contribution_scores) < 30 or len(future_returns) < 30:
|
||||
return None
|
||||
|
||||
n = min(len(contribution_scores), len(future_returns))
|
||||
return _pearson_correlation(contribution_scores[:n], future_returns[:n])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL queries — source attribution via v_source_performance
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SOURCE_ATTRIBUTION_SQL = """
|
||||
SELECT
|
||||
source,
|
||||
source_type,
|
||||
weight,
|
||||
contribution_score,
|
||||
is_duplicate,
|
||||
direction_correct,
|
||||
future_return,
|
||||
excess_return_vs_spy
|
||||
FROM v_source_performance
|
||||
WHERE horizon = $1
|
||||
AND generated_at >= $2
|
||||
"""
|
||||
|
||||
_SOURCE_ATTRIBUTION_ALL_SQL = """
|
||||
SELECT
|
||||
source,
|
||||
source_type,
|
||||
weight,
|
||||
contribution_score,
|
||||
is_duplicate,
|
||||
direction_correct,
|
||||
future_return,
|
||||
excess_return_vs_spy
|
||||
FROM v_source_performance
|
||||
WHERE horizon = $1
|
||||
"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL queries — catalyst attribution via v_source_performance
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CATALYST_ATTRIBUTION_SQL = """
|
||||
SELECT
|
||||
catalyst_type,
|
||||
weight,
|
||||
contribution_score,
|
||||
direction_correct,
|
||||
future_return,
|
||||
excess_return_vs_spy
|
||||
FROM v_source_performance
|
||||
WHERE horizon = $1
|
||||
AND generated_at >= $2
|
||||
"""
|
||||
|
||||
_CATALYST_ATTRIBUTION_ALL_SQL = """
|
||||
SELECT
|
||||
catalyst_type,
|
||||
weight,
|
||||
contribution_score,
|
||||
direction_correct,
|
||||
future_return,
|
||||
excess_return_vs_spy
|
||||
FROM v_source_performance
|
||||
WHERE horizon = $1
|
||||
"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL queries — layer attribution via prediction_snapshots + outcomes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_LAYER_ATTRIBUTION_SQL = """
|
||||
SELECT
|
||||
ps.score_company,
|
||||
ps.score_macro,
|
||||
ps.score_competitive,
|
||||
po.direction_correct,
|
||||
po.future_return
|
||||
FROM prediction_snapshots ps
|
||||
JOIN prediction_outcomes po ON po.prediction_id = ps.id
|
||||
WHERE po.horizon = $1
|
||||
AND ps.generated_at >= $2
|
||||
"""
|
||||
|
||||
_LAYER_ATTRIBUTION_ALL_SQL = """
|
||||
SELECT
|
||||
ps.score_company,
|
||||
ps.score_macro,
|
||||
ps.score_competitive,
|
||||
po.direction_correct,
|
||||
po.future_return
|
||||
FROM prediction_snapshots ps
|
||||
JOIN prediction_outcomes po ON po.prediction_id = ps.id
|
||||
WHERE po.horizon = $1
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source attribution (Requirements 7.1, 7.2, 7.7)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def compute_source_attribution(
|
||||
pool: asyncpg.Pool,
|
||||
lookback_days: int = 30,
|
||||
horizon: str = "7d",
|
||||
) -> list[SourceAttribution]:
|
||||
"""Compute per-source performance metrics.
|
||||
|
||||
Queries v_source_performance, groups by source, and computes:
|
||||
prediction count, avg weight, avg contribution score, win rate,
|
||||
avg future return, avg excess return vs SPY, IC, and duplicate rate.
|
||||
|
||||
Returns a list of SourceAttribution sorted by prediction count descending.
|
||||
"""
|
||||
now = datetime.now().astimezone()
|
||||
cutoff = now - timedelta(days=lookback_days)
|
||||
|
||||
try:
|
||||
rows = await pool.fetch(_SOURCE_ATTRIBUTION_SQL, horizon, cutoff)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to query source attribution for horizon=%s lookback=%dd",
|
||||
horizon,
|
||||
lookback_days,
|
||||
)
|
||||
return []
|
||||
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
# Group rows by source
|
||||
source_groups: dict[str, list[dict]] = {}
|
||||
for row in rows:
|
||||
r = dict(row)
|
||||
key = r.get("source") or "unknown"
|
||||
source_groups.setdefault(key, []).append(r)
|
||||
|
||||
results: list[SourceAttribution] = []
|
||||
|
||||
for source, group in source_groups.items():
|
||||
count = len(group)
|
||||
|
||||
# Source type — take the most common one
|
||||
source_type = group[0].get("source_type") or "unknown"
|
||||
|
||||
# Avg weight
|
||||
weights = [r["weight"] for r in group if r.get("weight") is not None]
|
||||
avg_weight = sum(weights) / len(weights) if weights else 0.0
|
||||
|
||||
# Avg contribution score
|
||||
contrib_scores = [
|
||||
r["contribution_score"]
|
||||
for r in group
|
||||
if r.get("contribution_score") is not None
|
||||
]
|
||||
avg_contribution_score = (
|
||||
sum(contrib_scores) / len(contrib_scores) if contrib_scores else 0.0
|
||||
)
|
||||
|
||||
# Win rate
|
||||
direction_rows = [r for r in group if r.get("direction_correct") is not None]
|
||||
win_count = sum(1 for r in direction_rows if r["direction_correct"] is True)
|
||||
win_rate = win_count / len(direction_rows) if direction_rows else 0.0
|
||||
|
||||
# Avg future return
|
||||
returns = [
|
||||
r["future_return"] for r in group if r.get("future_return") is not None
|
||||
]
|
||||
avg_future_return = sum(returns) / len(returns) if returns else 0.0
|
||||
|
||||
# Avg excess return vs SPY
|
||||
excess_returns = [
|
||||
r["excess_return_vs_spy"]
|
||||
for r in group
|
||||
if r.get("excess_return_vs_spy") is not None
|
||||
]
|
||||
avg_excess_return_vs_spy = (
|
||||
sum(excess_returns) / len(excess_returns) if excess_returns else 0.0
|
||||
)
|
||||
|
||||
# IC: correlation between contribution scores and future returns
|
||||
ic_scores = [
|
||||
r["contribution_score"]
|
||||
for r in group
|
||||
if r.get("contribution_score") is not None
|
||||
and r.get("future_return") is not None
|
||||
]
|
||||
ic_returns = [
|
||||
r["future_return"]
|
||||
for r in group
|
||||
if r.get("contribution_score") is not None
|
||||
and r.get("future_return") is not None
|
||||
]
|
||||
ic = _compute_ic(ic_scores, ic_returns)
|
||||
|
||||
# Duplicate rate: is_duplicate=true / total
|
||||
dup_count = sum(1 for r in group if r.get("is_duplicate") is True)
|
||||
duplicate_rate = dup_count / count
|
||||
|
||||
results.append(
|
||||
SourceAttribution(
|
||||
source=source,
|
||||
source_type=source_type,
|
||||
prediction_count=count,
|
||||
avg_weight=avg_weight,
|
||||
avg_contribution_score=avg_contribution_score,
|
||||
win_rate=win_rate,
|
||||
avg_future_return=avg_future_return,
|
||||
avg_excess_return_vs_spy=avg_excess_return_vs_spy,
|
||||
information_coefficient=ic,
|
||||
duplicate_rate=duplicate_rate,
|
||||
)
|
||||
)
|
||||
|
||||
# Sort by prediction count descending
|
||||
results.sort(key=lambda a: a.prediction_count, reverse=True)
|
||||
|
||||
logger.info(
|
||||
"Computed source attribution for %d sources (horizon=%s, lookback=%dd)",
|
||||
len(results),
|
||||
horizon,
|
||||
lookback_days,
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Catalyst attribution (Requirements 7.3, 7.4)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def compute_catalyst_attribution(
|
||||
pool: asyncpg.Pool,
|
||||
lookback_days: int = 30,
|
||||
horizon: str = "7d",
|
||||
) -> list[CatalystAttribution]:
|
||||
"""Compute per-catalyst-type performance metrics.
|
||||
|
||||
Queries v_source_performance, groups by catalyst_type, and computes:
|
||||
prediction count, win rate, avg future return, avg excess return vs SPY,
|
||||
and IC.
|
||||
|
||||
Returns a list of CatalystAttribution sorted by prediction count descending.
|
||||
"""
|
||||
now = datetime.now().astimezone()
|
||||
cutoff = now - timedelta(days=lookback_days)
|
||||
|
||||
try:
|
||||
rows = await pool.fetch(_CATALYST_ATTRIBUTION_SQL, horizon, cutoff)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to query catalyst attribution for horizon=%s lookback=%dd",
|
||||
horizon,
|
||||
lookback_days,
|
||||
)
|
||||
return []
|
||||
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
# Group rows by catalyst_type
|
||||
catalyst_groups: dict[str, list[dict]] = {}
|
||||
for row in rows:
|
||||
r = dict(row)
|
||||
key = r.get("catalyst_type") or "unknown"
|
||||
catalyst_groups.setdefault(key, []).append(r)
|
||||
|
||||
results: list[CatalystAttribution] = []
|
||||
|
||||
for catalyst_type, group in catalyst_groups.items():
|
||||
count = len(group)
|
||||
|
||||
# Win rate
|
||||
direction_rows = [r for r in group if r.get("direction_correct") is not None]
|
||||
win_count = sum(1 for r in direction_rows if r["direction_correct"] is True)
|
||||
win_rate = win_count / len(direction_rows) if direction_rows else 0.0
|
||||
|
||||
# Avg future return
|
||||
returns = [
|
||||
r["future_return"] for r in group if r.get("future_return") is not None
|
||||
]
|
||||
avg_future_return = sum(returns) / len(returns) if returns else 0.0
|
||||
|
||||
# Avg excess return vs SPY
|
||||
excess_returns = [
|
||||
r["excess_return_vs_spy"]
|
||||
for r in group
|
||||
if r.get("excess_return_vs_spy") is not None
|
||||
]
|
||||
avg_excess_return_vs_spy = (
|
||||
sum(excess_returns) / len(excess_returns) if excess_returns else 0.0
|
||||
)
|
||||
|
||||
# IC: correlation between contribution scores and future returns
|
||||
ic_scores = [
|
||||
r["contribution_score"]
|
||||
for r in group
|
||||
if r.get("contribution_score") is not None
|
||||
and r.get("future_return") is not None
|
||||
]
|
||||
ic_returns = [
|
||||
r["future_return"]
|
||||
for r in group
|
||||
if r.get("contribution_score") is not None
|
||||
and r.get("future_return") is not None
|
||||
]
|
||||
ic = _compute_ic(ic_scores, ic_returns)
|
||||
|
||||
results.append(
|
||||
CatalystAttribution(
|
||||
catalyst_type=catalyst_type,
|
||||
prediction_count=count,
|
||||
win_rate=win_rate,
|
||||
avg_future_return=avg_future_return,
|
||||
avg_excess_return_vs_spy=avg_excess_return_vs_spy,
|
||||
information_coefficient=ic,
|
||||
)
|
||||
)
|
||||
|
||||
# Sort by prediction count descending
|
||||
results.sort(key=lambda a: a.prediction_count, reverse=True)
|
||||
|
||||
logger.info(
|
||||
"Computed catalyst attribution for %d catalyst types "
|
||||
"(horizon=%s, lookback=%dd)",
|
||||
len(results),
|
||||
horizon,
|
||||
lookback_days,
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Layer attribution (Requirements 7.5, 7.6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def compute_layer_attribution(
|
||||
pool: asyncpg.Pool,
|
||||
lookback_days: int = 30,
|
||||
horizon: str = "7d",
|
||||
) -> list[LayerAttribution]:
|
||||
"""Compute per-layer (company, macro, competitive) performance metrics.
|
||||
|
||||
Queries prediction_snapshots joined with prediction_outcomes to get
|
||||
score_company, score_macro, score_competitive alongside outcomes.
|
||||
|
||||
For each layer computes:
|
||||
- avg_contribution_pct: average of layer_score / total_score across all
|
||||
predictions (where total_score > 0)
|
||||
- dominant_win_rate: win rate for predictions where the layer contributes
|
||||
more than 30% of the total score
|
||||
- dominant_ic: IC (Pearson correlation between layer score and future
|
||||
return) for predictions where the layer contributes > 30%
|
||||
|
||||
Returns a list of 3 LayerAttribution objects (company, macro, competitive).
|
||||
"""
|
||||
now = datetime.now().astimezone()
|
||||
cutoff = now - timedelta(days=lookback_days)
|
||||
|
||||
try:
|
||||
rows = await pool.fetch(_LAYER_ATTRIBUTION_SQL, horizon, cutoff)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to query layer attribution for horizon=%s lookback=%dd",
|
||||
horizon,
|
||||
lookback_days,
|
||||
)
|
||||
return []
|
||||
|
||||
if not rows:
|
||||
return [
|
||||
LayerAttribution(
|
||||
layer="company",
|
||||
avg_contribution_pct=0.0,
|
||||
dominant_win_rate=0.0,
|
||||
dominant_ic=None,
|
||||
),
|
||||
LayerAttribution(
|
||||
layer="macro",
|
||||
avg_contribution_pct=0.0,
|
||||
dominant_win_rate=0.0,
|
||||
dominant_ic=None,
|
||||
),
|
||||
LayerAttribution(
|
||||
layer="competitive",
|
||||
avg_contribution_pct=0.0,
|
||||
dominant_win_rate=0.0,
|
||||
dominant_ic=None,
|
||||
),
|
||||
]
|
||||
|
||||
row_dicts = [dict(r) for r in rows]
|
||||
|
||||
layers = [
|
||||
("company", "score_company"),
|
||||
("macro", "score_macro"),
|
||||
("competitive", "score_competitive"),
|
||||
]
|
||||
|
||||
results: list[LayerAttribution] = []
|
||||
|
||||
for layer_name, score_field in layers:
|
||||
# --- Average contribution percentage ---
|
||||
contribution_pcts: list[float] = []
|
||||
for r in row_dicts:
|
||||
total = (
|
||||
(r.get("score_company") or 0.0)
|
||||
+ (r.get("score_macro") or 0.0)
|
||||
+ (r.get("score_competitive") or 0.0)
|
||||
)
|
||||
if total > 0.0:
|
||||
layer_score = r.get(score_field) or 0.0
|
||||
contribution_pcts.append(layer_score / total)
|
||||
|
||||
avg_contribution_pct = (
|
||||
sum(contribution_pcts) / len(contribution_pcts)
|
||||
if contribution_pcts
|
||||
else 0.0
|
||||
)
|
||||
|
||||
# --- Dominant predictions: layer > 30% of total score ---
|
||||
dominant_rows: list[dict] = []
|
||||
for r in row_dicts:
|
||||
total = (
|
||||
(r.get("score_company") or 0.0)
|
||||
+ (r.get("score_macro") or 0.0)
|
||||
+ (r.get("score_competitive") or 0.0)
|
||||
)
|
||||
if total > 0.0:
|
||||
layer_score = r.get(score_field) or 0.0
|
||||
if layer_score / total > 0.30:
|
||||
dominant_rows.append(r)
|
||||
|
||||
# Dominant win rate
|
||||
dominant_direction_rows = [
|
||||
r for r in dominant_rows if r.get("direction_correct") is not None
|
||||
]
|
||||
dominant_win_count = sum(
|
||||
1 for r in dominant_direction_rows if r["direction_correct"] is True
|
||||
)
|
||||
dominant_win_rate = (
|
||||
dominant_win_count / len(dominant_direction_rows)
|
||||
if dominant_direction_rows
|
||||
else 0.0
|
||||
)
|
||||
|
||||
# Dominant IC: correlation between layer score and future return
|
||||
dom_scores = [
|
||||
r.get(score_field) or 0.0
|
||||
for r in dominant_rows
|
||||
if r.get("future_return") is not None
|
||||
]
|
||||
dom_returns = [
|
||||
r["future_return"]
|
||||
for r in dominant_rows
|
||||
if r.get("future_return") is not None
|
||||
]
|
||||
dominant_ic = _compute_ic(dom_scores, dom_returns)
|
||||
|
||||
results.append(
|
||||
LayerAttribution(
|
||||
layer=layer_name,
|
||||
avg_contribution_pct=avg_contribution_pct,
|
||||
dominant_win_rate=dominant_win_rate,
|
||||
dominant_ic=dominant_ic,
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Computed layer attribution for 3 layers (horizon=%s, lookback=%dd)",
|
||||
horizon,
|
||||
lookback_days,
|
||||
)
|
||||
|
||||
return results
|
||||
@@ -0,0 +1,135 @@
|
||||
"""Calibration Engine — Bayesian shrinkage source reliability and weight adjustment.
|
||||
|
||||
Computes source reliability scores using Bayesian shrinkage from historical
|
||||
prediction outcomes, and adjusts evidence weights based on source performance.
|
||||
Updates the existing source_accuracy table with reliability scores.
|
||||
|
||||
Requirements: 8.1, 8.2, 8.3, 8.4, 8.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure functions — testable without a database
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_source_reliability(
|
||||
observed_win_rate: float,
|
||||
sample_count: int,
|
||||
prior_strength: int = 30,
|
||||
) -> float:
|
||||
"""Bayesian shrinkage source reliability.
|
||||
|
||||
reliability = 0.5 + (n / (n + prior_strength)) * (observed_win_rate - 0.5)
|
||||
|
||||
Returns value in [0.0, 1.0].
|
||||
When n=0, returns 0.5 (prior mean).
|
||||
As n→∞, approaches observed_win_rate.
|
||||
"""
|
||||
if sample_count <= 0:
|
||||
return 0.5
|
||||
|
||||
shrinkage = sample_count / (sample_count + prior_strength)
|
||||
reliability = 0.5 + shrinkage * (observed_win_rate - 0.5)
|
||||
|
||||
# Clamp to [0.0, 1.0] for safety (should already be in range when
|
||||
# observed_win_rate is in [0.0, 1.0], but guard against edge cases).
|
||||
return max(0.0, min(1.0, reliability))
|
||||
|
||||
|
||||
def compute_adjusted_evidence_weight(
|
||||
base_weight: float,
|
||||
reliability: float,
|
||||
) -> float:
|
||||
"""Adjusted weight = base_weight * (0.5 + reliability), clamped to [0.1, 2.0]."""
|
||||
adjusted = base_weight * (0.5 + reliability)
|
||||
return max(0.1, min(2.0, adjusted))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL queries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Query v_source_performance to get per-source win rates and sample counts.
|
||||
# Groups by source, counting total predictions and directional wins.
|
||||
_SOURCE_PERFORMANCE_SQL = """
|
||||
SELECT
|
||||
source,
|
||||
COUNT(*) AS sample_count,
|
||||
COUNT(*) FILTER (WHERE direction_correct = TRUE) AS win_count
|
||||
FROM v_source_performance
|
||||
WHERE direction_correct IS NOT NULL
|
||||
GROUP BY source
|
||||
"""
|
||||
|
||||
# Upsert into source_accuracy: update accuracy_ratio and sample_count
|
||||
# for existing sources, insert new ones.
|
||||
_UPSERT_SOURCE_ACCURACY_SQL = """
|
||||
INSERT INTO source_accuracy (source_id, accuracy_ratio, sample_count, last_updated)
|
||||
VALUES ($1, $2, $3, NOW())
|
||||
ON CONFLICT (source_id)
|
||||
DO UPDATE SET
|
||||
accuracy_ratio = EXCLUDED.accuracy_ratio,
|
||||
sample_count = EXCLUDED.sample_count,
|
||||
last_updated = NOW()
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Database-backed function
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def update_source_reliabilities(
|
||||
pool: asyncpg.Pool,
|
||||
) -> int:
|
||||
"""Recompute and store source reliability scores from latest outcomes.
|
||||
|
||||
1. Queries v_source_performance to get per-source win rates and counts
|
||||
2. Computes Bayesian shrinkage reliability for each source
|
||||
3. Upserts into source_accuracy table (accuracy_ratio = reliability)
|
||||
|
||||
Returns count of sources updated.
|
||||
"""
|
||||
try:
|
||||
rows = await pool.fetch(_SOURCE_PERFORMANCE_SQL)
|
||||
except Exception:
|
||||
logger.exception("Failed to query source performance for reliability update")
|
||||
return 0
|
||||
|
||||
if not rows:
|
||||
logger.info("No source performance data available for reliability update")
|
||||
return 0
|
||||
|
||||
updated = 0
|
||||
|
||||
for row in rows:
|
||||
source = row["source"]
|
||||
sample_count = row["sample_count"]
|
||||
win_count = row["win_count"]
|
||||
|
||||
observed_win_rate = win_count / sample_count if sample_count > 0 else 0.5
|
||||
reliability = compute_source_reliability(observed_win_rate, sample_count)
|
||||
|
||||
try:
|
||||
await pool.execute(
|
||||
_UPSERT_SOURCE_ACCURACY_SQL,
|
||||
source,
|
||||
reliability,
|
||||
sample_count,
|
||||
)
|
||||
updated += 1
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to upsert source reliability for source=%s", source
|
||||
)
|
||||
|
||||
logger.info("Updated source reliabilities for %d sources", updated)
|
||||
return updated
|
||||
@@ -0,0 +1,637 @@
|
||||
"""Metrics Engine — computes calibration, IC, Brier, and benchmark metrics.
|
||||
|
||||
Aggregates model quality metrics across configurable lookback windows and
|
||||
prediction horizons. Stores periodic snapshots for time-series analysis
|
||||
of model performance trends.
|
||||
|
||||
Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 6.1, 6.2, 6.3, 6.4, 6.5,
|
||||
9.1, 9.2, 9.3, 9.4, 10.1, 10.2, 10.3, 10.4, 10.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CONFIDENCE_BUCKETS: list[tuple[float, float]] = [
|
||||
(0.50, 0.60),
|
||||
(0.60, 0.70),
|
||||
(0.70, 0.80),
|
||||
(0.80, 0.90),
|
||||
(0.90, 1.00),
|
||||
]
|
||||
|
||||
LOOKBACK_WINDOWS: list[str] = ["7d", "30d", "90d", "all"]
|
||||
|
||||
LOOKBACK_DURATIONS: dict[str, timedelta | None] = {
|
||||
"7d": timedelta(days=7),
|
||||
"30d": timedelta(days=30),
|
||||
"90d": timedelta(days=90),
|
||||
"all": None,
|
||||
}
|
||||
|
||||
EVALUATION_HORIZONS: list[str] = ["1h", "6h", "1d", "7d", "30d"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dataclasses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class CalibrationBucket:
|
||||
"""Calibration metrics for a single confidence bucket."""
|
||||
|
||||
bucket_low: float
|
||||
bucket_high: float
|
||||
avg_confidence: float
|
||||
observed_win_rate: float
|
||||
prediction_count: int
|
||||
miscalibrated: bool # |avg_confidence - win_rate| > 0.15
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelMetricSnapshot:
|
||||
"""Aggregate model quality metrics for a lookback/horizon combination."""
|
||||
|
||||
id: str
|
||||
generated_at: datetime
|
||||
lookback_window: str
|
||||
horizon: str
|
||||
prediction_count: int
|
||||
win_rate: float
|
||||
directional_accuracy: float
|
||||
information_coefficient: float | None
|
||||
rank_information_coefficient: float | None
|
||||
avg_return: float
|
||||
avg_excess_return_vs_spy: float
|
||||
avg_excess_return_vs_sector: float
|
||||
calibration_error: float # ECE
|
||||
brier_score: float
|
||||
buy_win_rate: float
|
||||
sell_win_rate: float
|
||||
hold_win_rate: float
|
||||
metadata: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure computation functions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_calibration_error(
|
||||
confidences: list[float],
|
||||
outcomes: list[bool],
|
||||
) -> tuple[float, list[CalibrationBucket]]:
|
||||
"""Compute ECE and calibration buckets.
|
||||
|
||||
ECE = Σ (n_b / N) * |avg_conf_b - win_rate_b|
|
||||
|
||||
Groups predictions into 5 confidence buckets and computes the weighted
|
||||
average of |avg_confidence - observed_win_rate| across all buckets.
|
||||
Flags buckets where |diff| > 0.15 as miscalibrated.
|
||||
|
||||
Returns (ece, buckets). Returns (0.0, []) when no data is provided.
|
||||
"""
|
||||
if not confidences or not outcomes:
|
||||
return 0.0, []
|
||||
|
||||
n = len(confidences)
|
||||
buckets: list[CalibrationBucket] = []
|
||||
ece = 0.0
|
||||
|
||||
for low, high in CONFIDENCE_BUCKETS:
|
||||
bucket_confs: list[float] = []
|
||||
bucket_outcomes: list[bool] = []
|
||||
|
||||
for conf, outcome in zip(confidences, outcomes):
|
||||
# Last bucket is inclusive on the right: [0.90, 1.00]
|
||||
if high == 1.00:
|
||||
in_bucket = low <= conf <= high
|
||||
else:
|
||||
in_bucket = low <= conf < high
|
||||
|
||||
if in_bucket:
|
||||
bucket_confs.append(conf)
|
||||
bucket_outcomes.append(outcome)
|
||||
|
||||
count = len(bucket_confs)
|
||||
if count == 0:
|
||||
# Empty bucket — exclude from ECE, still record it
|
||||
buckets.append(
|
||||
CalibrationBucket(
|
||||
bucket_low=low,
|
||||
bucket_high=high,
|
||||
avg_confidence=0.0,
|
||||
observed_win_rate=0.0,
|
||||
prediction_count=0,
|
||||
miscalibrated=False,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
avg_conf = sum(bucket_confs) / count
|
||||
win_rate = sum(1.0 for o in bucket_outcomes if o) / count
|
||||
diff = abs(avg_conf - win_rate)
|
||||
miscalibrated = diff > 0.15
|
||||
|
||||
buckets.append(
|
||||
CalibrationBucket(
|
||||
bucket_low=low,
|
||||
bucket_high=high,
|
||||
avg_confidence=avg_conf,
|
||||
observed_win_rate=win_rate,
|
||||
prediction_count=count,
|
||||
miscalibrated=miscalibrated,
|
||||
)
|
||||
)
|
||||
|
||||
ece += (count / n) * diff
|
||||
|
||||
return ece, buckets
|
||||
|
||||
|
||||
def compute_brier_score(
|
||||
p_bulls: list[float],
|
||||
outcomes: list[bool],
|
||||
) -> float:
|
||||
"""Brier score = mean((p_bull - outcome)^2).
|
||||
|
||||
outcome is 1.0 when price moved in predicted direction, 0.0 otherwise.
|
||||
Returns value in [0.0, 1.0]. Returns 0.0 for empty input.
|
||||
"""
|
||||
if not p_bulls or not outcomes:
|
||||
return 0.0
|
||||
|
||||
n = len(p_bulls)
|
||||
total = 0.0
|
||||
for p, o in zip(p_bulls, outcomes):
|
||||
actual = 1.0 if o else 0.0
|
||||
total += (p - actual) ** 2
|
||||
|
||||
return total / n
|
||||
|
||||
|
||||
def _pearson_correlation(xs: list[float], ys: list[float]) -> float | None:
|
||||
"""Compute Pearson correlation coefficient between two lists.
|
||||
|
||||
Returns None if the lists have fewer than 2 elements or if either
|
||||
has zero variance. Guards against NaN/infinity.
|
||||
"""
|
||||
n = len(xs)
|
||||
if n < 2:
|
||||
return None
|
||||
|
||||
mean_x = sum(xs) / n
|
||||
mean_y = sum(ys) / n
|
||||
|
||||
cov = 0.0
|
||||
var_x = 0.0
|
||||
var_y = 0.0
|
||||
|
||||
for x, y in zip(xs, ys):
|
||||
dx = x - mean_x
|
||||
dy = y - mean_y
|
||||
cov += dx * dy
|
||||
var_x += dx * dx
|
||||
var_y += dy * dy
|
||||
|
||||
if var_x == 0.0 or var_y == 0.0:
|
||||
return None
|
||||
|
||||
r = cov / math.sqrt(var_x * var_y)
|
||||
|
||||
# Guard against floating-point drift
|
||||
if math.isnan(r) or math.isinf(r):
|
||||
return None
|
||||
|
||||
# Clamp to [-1.0, 1.0]
|
||||
return max(-1.0, min(1.0, r))
|
||||
|
||||
|
||||
def _rank_data(values: list[float]) -> list[float]:
|
||||
"""Compute fractional ranks for a list of values (average tie-breaking)."""
|
||||
n = len(values)
|
||||
indexed = sorted(range(n), key=lambda i: values[i])
|
||||
|
||||
ranks = [0.0] * n
|
||||
i = 0
|
||||
while i < n:
|
||||
# Find the end of the tie group
|
||||
j = i + 1
|
||||
while j < n and values[indexed[j]] == values[indexed[i]]:
|
||||
j += 1
|
||||
|
||||
# Average rank for the tie group (1-based)
|
||||
avg_rank = (i + j + 1) / 2.0
|
||||
for k in range(i, j):
|
||||
ranks[indexed[k]] = avg_rank
|
||||
|
||||
i = j
|
||||
|
||||
return ranks
|
||||
|
||||
|
||||
def compute_information_coefficient(
|
||||
scores: list[float],
|
||||
returns: list[float],
|
||||
) -> float | None:
|
||||
"""Pearson correlation between prediction scores and future returns.
|
||||
|
||||
Returns None when fewer than 30 data points.
|
||||
Returns value in [-1.0, 1.0].
|
||||
"""
|
||||
if len(scores) < 30 or len(returns) < 30:
|
||||
return None
|
||||
|
||||
n = min(len(scores), len(returns))
|
||||
return _pearson_correlation(scores[:n], returns[:n])
|
||||
|
||||
|
||||
def compute_rank_information_coefficient(
|
||||
scores: list[float],
|
||||
returns: list[float],
|
||||
) -> float | None:
|
||||
"""Spearman rank correlation between prediction scores and future returns.
|
||||
|
||||
Ranks the data and computes Pearson correlation on the ranks.
|
||||
Returns None when fewer than 30 data points.
|
||||
Returns value in [-1.0, 1.0].
|
||||
"""
|
||||
if len(scores) < 30 or len(returns) < 30:
|
||||
return None
|
||||
|
||||
n = min(len(scores), len(returns))
|
||||
ranked_scores = _rank_data(scores[:n])
|
||||
ranked_returns = _rank_data(returns[:n])
|
||||
|
||||
return _pearson_correlation(ranked_scores, ranked_returns)
|
||||
|
||||
|
||||
def compute_contribution_scores(
|
||||
weights: list[float],
|
||||
) -> list[float]:
|
||||
"""Compute contribution scores from document weights.
|
||||
|
||||
Each score = weight_i / sum(weights). Sums to 1.0.
|
||||
Each score in [0.0, 1.0].
|
||||
Returns empty list for empty input.
|
||||
"""
|
||||
if not weights:
|
||||
return []
|
||||
|
||||
total = sum(weights)
|
||||
if total == 0.0:
|
||||
n = len(weights)
|
||||
return [1.0 / n] * n
|
||||
|
||||
return [w / total for w in weights]
|
||||
|
||||
|
||||
def compute_hit_rate_improvement(win_rate: float) -> float:
|
||||
"""Hit rate improvement over random 50/50 baseline.
|
||||
|
||||
Defined as (system_win_rate - 0.5) / 0.5.
|
||||
"""
|
||||
return (win_rate - 0.5) / 0.5
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL queries for v_prediction_performance view
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PERFORMANCE_DATA_SQL = """
|
||||
SELECT
|
||||
ticker,
|
||||
direction,
|
||||
action,
|
||||
confidence,
|
||||
strength,
|
||||
p_bull,
|
||||
score_company,
|
||||
score_macro,
|
||||
score_competitive,
|
||||
future_return,
|
||||
excess_return_vs_spy,
|
||||
excess_return_vs_sector,
|
||||
direction_correct,
|
||||
profitable,
|
||||
horizon,
|
||||
generated_at
|
||||
FROM v_prediction_performance
|
||||
WHERE horizon = $1
|
||||
"""
|
||||
|
||||
_PERFORMANCE_DATA_WITH_LOOKBACK_SQL = """
|
||||
SELECT
|
||||
ticker,
|
||||
direction,
|
||||
action,
|
||||
confidence,
|
||||
strength,
|
||||
p_bull,
|
||||
score_company,
|
||||
score_macro,
|
||||
score_competitive,
|
||||
future_return,
|
||||
excess_return_vs_spy,
|
||||
excess_return_vs_sector,
|
||||
direction_correct,
|
||||
profitable,
|
||||
horizon,
|
||||
generated_at
|
||||
FROM v_prediction_performance
|
||||
WHERE horizon = $1
|
||||
AND generated_at >= $2
|
||||
"""
|
||||
|
||||
_INSERT_METRIC_SNAPSHOT_SQL = """
|
||||
INSERT INTO model_metric_snapshots (
|
||||
id, generated_at, lookback_window, horizon,
|
||||
prediction_count, win_rate, directional_accuracy,
|
||||
information_coefficient, rank_information_coefficient,
|
||||
avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector,
|
||||
calibration_error, brier_score,
|
||||
buy_win_rate, sell_win_rate, hold_win_rate,
|
||||
metadata
|
||||
) VALUES (
|
||||
$1::uuid, $2, $3, $4,
|
||||
$5, $6, $7,
|
||||
$8, $9,
|
||||
$10, $11, $12,
|
||||
$13, $14,
|
||||
$15, $16, $17,
|
||||
$18::jsonb
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Metric computation from raw rows
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compute_metrics_from_rows(
|
||||
rows: list[dict],
|
||||
lookback_window: str,
|
||||
horizon: str,
|
||||
) -> ModelMetricSnapshot:
|
||||
"""Compute all metrics from a list of prediction performance rows.
|
||||
|
||||
Returns a ModelMetricSnapshot with all computed metrics.
|
||||
"""
|
||||
now = datetime.now().astimezone()
|
||||
snapshot_id = str(uuid.uuid4())
|
||||
|
||||
prediction_count = len(rows)
|
||||
|
||||
if prediction_count == 0:
|
||||
return ModelMetricSnapshot(
|
||||
id=snapshot_id,
|
||||
generated_at=now,
|
||||
lookback_window=lookback_window,
|
||||
horizon=horizon,
|
||||
prediction_count=0,
|
||||
win_rate=0.0,
|
||||
directional_accuracy=0.0,
|
||||
information_coefficient=None,
|
||||
rank_information_coefficient=None,
|
||||
avg_return=0.0,
|
||||
avg_excess_return_vs_spy=0.0,
|
||||
avg_excess_return_vs_sector=0.0,
|
||||
calibration_error=0.0,
|
||||
brier_score=0.0,
|
||||
buy_win_rate=0.0,
|
||||
sell_win_rate=0.0,
|
||||
hold_win_rate=0.0,
|
||||
metadata={},
|
||||
)
|
||||
|
||||
# --- Win rate and directional accuracy ---
|
||||
direction_correct_count = sum(
|
||||
1 for r in rows if r.get("direction_correct") is True
|
||||
)
|
||||
win_rate = direction_correct_count / prediction_count
|
||||
directional_accuracy = win_rate # Same metric, different name
|
||||
|
||||
# --- Per-action win rates ---
|
||||
buy_rows = [r for r in rows if (r.get("action") or "").lower() == "buy"]
|
||||
sell_rows = [r for r in rows if (r.get("action") or "").lower() == "sell"]
|
||||
hold_rows = [r for r in rows if (r.get("action") or "").lower() == "hold"]
|
||||
|
||||
buy_win_rate = (
|
||||
sum(1 for r in buy_rows if r.get("direction_correct") is True) / len(buy_rows)
|
||||
if buy_rows
|
||||
else 0.0
|
||||
)
|
||||
sell_win_rate = (
|
||||
sum(1 for r in sell_rows if r.get("direction_correct") is True)
|
||||
/ len(sell_rows)
|
||||
if sell_rows
|
||||
else 0.0
|
||||
)
|
||||
hold_win_rate = (
|
||||
sum(1 for r in hold_rows if r.get("direction_correct") is True)
|
||||
/ len(hold_rows)
|
||||
if hold_rows
|
||||
else 0.0
|
||||
)
|
||||
|
||||
# --- Average return ---
|
||||
returns_list = [
|
||||
r["future_return"] for r in rows if r.get("future_return") is not None
|
||||
]
|
||||
avg_return = sum(returns_list) / len(returns_list) if returns_list else 0.0
|
||||
|
||||
# --- Average excess return vs SPY (Requirement 9.1) ---
|
||||
excess_spy_list = [
|
||||
r["excess_return_vs_spy"]
|
||||
for r in rows
|
||||
if r.get("excess_return_vs_spy") is not None
|
||||
]
|
||||
avg_excess_return_vs_spy = (
|
||||
sum(excess_spy_list) / len(excess_spy_list) if excess_spy_list else 0.0
|
||||
)
|
||||
|
||||
# --- Average excess return vs sector ETF (Requirement 9.2) ---
|
||||
excess_sector_list = [
|
||||
r["excess_return_vs_sector"]
|
||||
for r in rows
|
||||
if r.get("excess_return_vs_sector") is not None
|
||||
]
|
||||
avg_excess_return_vs_sector = (
|
||||
sum(excess_sector_list) / len(excess_sector_list)
|
||||
if excess_sector_list
|
||||
else 0.0
|
||||
)
|
||||
|
||||
# --- Calibration error (ECE) (Requirements 5.1, 5.2, 5.3, 5.5) ---
|
||||
confidences = [
|
||||
r["confidence"] for r in rows if r.get("confidence") is not None
|
||||
]
|
||||
outcomes = [
|
||||
r.get("direction_correct") is True
|
||||
for r in rows
|
||||
if r.get("confidence") is not None
|
||||
]
|
||||
ece, _buckets = compute_calibration_error(confidences, outcomes)
|
||||
|
||||
# --- Brier score (Requirement 5.4) ---
|
||||
p_bulls = [r["p_bull"] for r in rows if r.get("p_bull") is not None]
|
||||
brier_outcomes = [
|
||||
r.get("direction_correct") is True
|
||||
for r in rows
|
||||
if r.get("p_bull") is not None
|
||||
]
|
||||
brier = compute_brier_score(p_bulls, brier_outcomes)
|
||||
|
||||
# --- Information Coefficient (Requirements 6.1, 6.5) ---
|
||||
ic_scores = [
|
||||
r["strength"] for r in rows if r.get("strength") is not None
|
||||
and r.get("future_return") is not None
|
||||
]
|
||||
ic_returns = [
|
||||
r["future_return"] for r in rows if r.get("strength") is not None
|
||||
and r.get("future_return") is not None
|
||||
]
|
||||
ic = compute_information_coefficient(ic_scores, ic_returns)
|
||||
|
||||
# --- Rank Information Coefficient (Requirements 6.2, 6.5) ---
|
||||
rank_ic = compute_rank_information_coefficient(ic_scores, ic_returns)
|
||||
|
||||
# --- Hit rate improvement (Requirement 9.4) ---
|
||||
hit_rate_improvement = compute_hit_rate_improvement(win_rate)
|
||||
|
||||
# --- Metadata (Requirement 10.5) ---
|
||||
metadata: dict = {
|
||||
"hit_rate_improvement": hit_rate_improvement,
|
||||
"buy_count": len(buy_rows),
|
||||
"sell_count": len(sell_rows),
|
||||
"hold_count": len(hold_rows),
|
||||
"returns_count": len(returns_list),
|
||||
"excess_spy_count": len(excess_spy_list),
|
||||
"excess_sector_count": len(excess_sector_list),
|
||||
}
|
||||
|
||||
return ModelMetricSnapshot(
|
||||
id=snapshot_id,
|
||||
generated_at=now,
|
||||
lookback_window=lookback_window,
|
||||
horizon=horizon,
|
||||
prediction_count=prediction_count,
|
||||
win_rate=win_rate,
|
||||
directional_accuracy=directional_accuracy,
|
||||
information_coefficient=ic,
|
||||
rank_information_coefficient=rank_ic,
|
||||
avg_return=avg_return,
|
||||
avg_excess_return_vs_spy=avg_excess_return_vs_spy,
|
||||
avg_excess_return_vs_sector=avg_excess_return_vs_sector,
|
||||
calibration_error=ece,
|
||||
brier_score=brier,
|
||||
buy_win_rate=buy_win_rate,
|
||||
sell_win_rate=sell_win_rate,
|
||||
hold_win_rate=hold_win_rate,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point (Requirements 10.1, 10.2, 10.3, 10.4, 10.5)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def compute_and_store_metric_snapshots(
|
||||
pool: asyncpg.Pool,
|
||||
) -> list[ModelMetricSnapshot]:
|
||||
"""Compute metric snapshots for all lookback/horizon combinations.
|
||||
|
||||
Lookback windows: 7d, 30d, 90d, all-time.
|
||||
Horizons: 1h, 6h, 1d, 7d, 30d.
|
||||
|
||||
For each of the 4 lookbacks × 5 horizons = 20 combinations, queries the
|
||||
v_prediction_performance view, computes all metrics, and persists the
|
||||
result to model_metric_snapshots.
|
||||
|
||||
Returns the list of computed snapshots.
|
||||
"""
|
||||
snapshots: list[ModelMetricSnapshot] = []
|
||||
now = datetime.now().astimezone()
|
||||
|
||||
for lookback in LOOKBACK_WINDOWS:
|
||||
duration = LOOKBACK_DURATIONS[lookback]
|
||||
|
||||
for horizon in EVALUATION_HORIZONS:
|
||||
try:
|
||||
# Query performance data
|
||||
if duration is not None:
|
||||
cutoff = now - duration
|
||||
rows = await pool.fetch(
|
||||
_PERFORMANCE_DATA_WITH_LOOKBACK_SQL,
|
||||
horizon,
|
||||
cutoff,
|
||||
)
|
||||
else:
|
||||
rows = await pool.fetch(
|
||||
_PERFORMANCE_DATA_SQL,
|
||||
horizon,
|
||||
)
|
||||
|
||||
# Convert asyncpg Records to dicts
|
||||
row_dicts = [dict(r) for r in rows]
|
||||
|
||||
# Compute metrics
|
||||
snapshot = _compute_metrics_from_rows(
|
||||
row_dicts, lookback, horizon
|
||||
)
|
||||
|
||||
# Persist
|
||||
await pool.execute(
|
||||
_INSERT_METRIC_SNAPSHOT_SQL,
|
||||
snapshot.id,
|
||||
snapshot.generated_at,
|
||||
snapshot.lookback_window,
|
||||
snapshot.horizon,
|
||||
snapshot.prediction_count,
|
||||
snapshot.win_rate,
|
||||
snapshot.directional_accuracy,
|
||||
snapshot.information_coefficient,
|
||||
snapshot.rank_information_coefficient,
|
||||
snapshot.avg_return,
|
||||
snapshot.avg_excess_return_vs_spy,
|
||||
snapshot.avg_excess_return_vs_sector,
|
||||
snapshot.calibration_error,
|
||||
snapshot.brier_score,
|
||||
snapshot.buy_win_rate,
|
||||
snapshot.sell_win_rate,
|
||||
snapshot.hold_win_rate,
|
||||
json.dumps(snapshot.metadata),
|
||||
)
|
||||
|
||||
snapshots.append(snapshot)
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to compute metrics for lookback=%s horizon=%s",
|
||||
lookback,
|
||||
horizon,
|
||||
)
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"Computed %d metric snapshots across %d lookback/horizon combinations",
|
||||
len(snapshots),
|
||||
len(LOOKBACK_WINDOWS) * len(EVALUATION_HORIZONS),
|
||||
)
|
||||
|
||||
return snapshots
|
||||
@@ -0,0 +1,414 @@
|
||||
"""Outcome Evaluator — matches predictions with realized market outcomes.
|
||||
|
||||
Runs periodically to evaluate prediction snapshots whose horizon has elapsed.
|
||||
For each snapshot, fetches future prices at the horizon endpoint and computes
|
||||
returns, excess returns, directional accuracy, and profitability across all
|
||||
five evaluation horizons (1h, 6h, 1d, 7d, 30d).
|
||||
|
||||
Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 4.10
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
HORIZON_DURATIONS: dict[str, timedelta] = {
|
||||
"1h": timedelta(hours=1),
|
||||
"6h": timedelta(hours=6),
|
||||
"1d": timedelta(days=1),
|
||||
"7d": timedelta(days=7),
|
||||
"30d": timedelta(days=30),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dataclasses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class PredictionOutcome:
|
||||
"""Realized outcome for a prediction at a specific horizon."""
|
||||
|
||||
id: str # UUID
|
||||
prediction_id: str
|
||||
evaluated_at: datetime
|
||||
horizon: str # 1h, 6h, 1d, 7d, 30d
|
||||
future_price: float
|
||||
future_return: float
|
||||
spy_future_price: float | None
|
||||
spy_return: float | None
|
||||
sector_etf_future_price: float | None
|
||||
sector_etf_return: float | None
|
||||
excess_return_vs_spy: float | None
|
||||
excess_return_vs_sector: float | None
|
||||
direction_correct: bool
|
||||
profitable: bool
|
||||
metadata: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL statements
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Find matured predictions: snapshots where generated_at + horizon_duration <= NOW()
|
||||
# and no outcome has been recorded yet for that (prediction_id, horizon) pair.
|
||||
# We evaluate ALL 5 horizons for each snapshot, not just the snapshot's own horizon.
|
||||
_MATURED_PREDICTIONS_SQL = """
|
||||
SELECT
|
||||
ps.id,
|
||||
ps.generated_at,
|
||||
ps.ticker,
|
||||
ps.horizon AS snapshot_horizon,
|
||||
ps.direction,
|
||||
ps.action,
|
||||
ps.price_at_prediction,
|
||||
ps.spy_price_at_prediction,
|
||||
ps.sector_etf_price_at_prediction
|
||||
FROM prediction_snapshots ps
|
||||
WHERE ps.generated_at + $1::interval <= NOW()
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM prediction_outcomes po
|
||||
WHERE po.prediction_id = ps.id AND po.horizon = $2
|
||||
)
|
||||
"""
|
||||
|
||||
# Fetch the close price for a ticker at or before a specific time.
|
||||
# Uses the closest bar before or at the target time.
|
||||
_CLOSE_AT_TIME_SQL = """
|
||||
SELECT (data->>'c')::float AS close
|
||||
FROM market_snapshots
|
||||
WHERE ticker = $1
|
||||
AND snapshot_type = 'bar'
|
||||
AND data->>'c' IS NOT NULL
|
||||
AND captured_at <= $2
|
||||
ORDER BY captured_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
_INSERT_OUTCOME_SQL = """
|
||||
INSERT INTO prediction_outcomes (
|
||||
id, prediction_id, evaluated_at, horizon,
|
||||
future_price, future_return,
|
||||
spy_future_price, spy_return,
|
||||
sector_etf_future_price, sector_etf_return,
|
||||
excess_return_vs_spy, excess_return_vs_sector,
|
||||
direction_correct, profitable,
|
||||
metadata
|
||||
) VALUES (
|
||||
$1::uuid, $2::uuid, $3, $4,
|
||||
$5, $6,
|
||||
$7, $8,
|
||||
$9, $10,
|
||||
$11, $12,
|
||||
$13, $14,
|
||||
$15::jsonb
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Price fetching at a specific time
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _fetch_close_at_time(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
target_time: datetime,
|
||||
) -> float | None:
|
||||
"""Fetch the close price for a ticker at or before a specific time.
|
||||
|
||||
Returns None if no market data is available before the target time.
|
||||
"""
|
||||
row = await pool.fetchrow(_CLOSE_AT_TIME_SQL, ticker, target_time)
|
||||
if row is None:
|
||||
return None
|
||||
return row["close"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sector ETF lookup (reuse pattern from prediction_snapshot)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SECTOR_ETF_MAP: dict[str, str] = {
|
||||
"Technology": "XLK",
|
||||
"Consumer Cyclical": "XLY",
|
||||
"Financial Services": "XLF",
|
||||
"Healthcare": "XLV",
|
||||
"Energy": "XLE",
|
||||
"Communication Services": "XLC",
|
||||
"Industrials": "XLI",
|
||||
"Consumer Defensive": "XLP",
|
||||
"Real Estate": "XLRE",
|
||||
"Utilities": "XLU",
|
||||
}
|
||||
|
||||
_COMPANY_SECTOR_SQL = """
|
||||
SELECT sector FROM companies WHERE ticker = $1 AND active = TRUE LIMIT 1
|
||||
"""
|
||||
|
||||
|
||||
async def _fetch_sector_etf_ticker(pool: asyncpg.Pool, ticker: str) -> str | None:
|
||||
"""Look up the sector ETF ticker for a company ticker."""
|
||||
row = await pool.fetchrow(_COMPANY_SECTOR_SQL, ticker)
|
||||
if row is None or row["sector"] is None:
|
||||
return None
|
||||
return _SECTOR_ETF_MAP.get(row["sector"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Return computation helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compute_return(current_price: float, future_price: float) -> float:
|
||||
"""Compute simple return: (future - current) / current."""
|
||||
if current_price == 0.0:
|
||||
return 0.0
|
||||
return (future_price - current_price) / current_price
|
||||
|
||||
|
||||
def _is_direction_correct(direction: str, future_return: float) -> bool:
|
||||
"""Determine if the predicted direction matches the realized return.
|
||||
|
||||
bullish + positive return = True
|
||||
bearish + negative return = True
|
||||
All other combinations = False
|
||||
"""
|
||||
direction_lower = direction.lower()
|
||||
if direction_lower == "bullish" and future_return > 0.0:
|
||||
return True
|
||||
if direction_lower == "bearish" and future_return < 0.0:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_profitable(action: str, future_return: float) -> bool:
|
||||
"""Determine if the predicted action would have been profitable.
|
||||
|
||||
buy + positive return = True
|
||||
sell + negative return = True
|
||||
All other combinations = False
|
||||
"""
|
||||
action_lower = action.lower()
|
||||
if action_lower == "buy" and future_return > 0.0:
|
||||
return True
|
||||
if action_lower == "sell" and future_return < 0.0:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Single prediction evaluation (Requirements 4.2–4.7)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def evaluate_single_prediction(
|
||||
pool: asyncpg.Pool,
|
||||
snapshot: dict,
|
||||
horizon: str,
|
||||
) -> PredictionOutcome | None:
|
||||
"""Evaluate a single prediction at a specific horizon.
|
||||
|
||||
Fetches the future price at generated_at + horizon_duration for the ticker,
|
||||
SPY, and sector ETF. Computes returns, excess returns, direction correctness,
|
||||
and profitability.
|
||||
|
||||
Returns None if the ticker's future price is unavailable (Requirement 4.10).
|
||||
"""
|
||||
duration = HORIZON_DURATIONS[horizon]
|
||||
target_time = snapshot["generated_at"] + duration
|
||||
ticker = snapshot["ticker"]
|
||||
|
||||
# Fetch future price for the ticker — required (skip if unavailable)
|
||||
future_price = await _fetch_close_at_time(pool, ticker, target_time)
|
||||
if future_price is None:
|
||||
logger.debug(
|
||||
"Future price unavailable for %s at horizon %s (target %s), skipping",
|
||||
ticker,
|
||||
horizon,
|
||||
target_time,
|
||||
)
|
||||
return None
|
||||
|
||||
price_at_prediction = snapshot["price_at_prediction"]
|
||||
if price_at_prediction is None or price_at_prediction == 0.0:
|
||||
logger.warning(
|
||||
"Price at prediction is NULL or zero for snapshot %s, skipping horizon %s",
|
||||
snapshot["id"],
|
||||
horizon,
|
||||
)
|
||||
return None
|
||||
|
||||
# Compute ticker future return (Requirement 4.2)
|
||||
future_return = _compute_return(price_at_prediction, future_price)
|
||||
|
||||
# Fetch SPY future price and compute SPY return (Requirement 4.3)
|
||||
spy_future_price: float | None = None
|
||||
spy_return: float | None = None
|
||||
spy_price_at_prediction = snapshot["spy_price_at_prediction"]
|
||||
|
||||
if spy_price_at_prediction is not None and spy_price_at_prediction != 0.0:
|
||||
spy_future_price = await _fetch_close_at_time(pool, "SPY", target_time)
|
||||
if spy_future_price is not None:
|
||||
spy_return = _compute_return(spy_price_at_prediction, spy_future_price)
|
||||
|
||||
# Fetch sector ETF future price and compute sector return (Requirement 4.4)
|
||||
sector_etf_future_price: float | None = None
|
||||
sector_etf_return: float | None = None
|
||||
sector_etf_price_at_prediction = snapshot["sector_etf_price_at_prediction"]
|
||||
|
||||
if (
|
||||
sector_etf_price_at_prediction is not None
|
||||
and sector_etf_price_at_prediction != 0.0
|
||||
):
|
||||
sector_etf_ticker = await _fetch_sector_etf_ticker(pool, ticker)
|
||||
if sector_etf_ticker is not None:
|
||||
sector_etf_future_price = await _fetch_close_at_time(
|
||||
pool, sector_etf_ticker, target_time
|
||||
)
|
||||
if sector_etf_future_price is not None:
|
||||
sector_etf_return = _compute_return(
|
||||
sector_etf_price_at_prediction, sector_etf_future_price
|
||||
)
|
||||
|
||||
# Compute excess returns (Requirement 4.5)
|
||||
excess_return_vs_spy: float | None = None
|
||||
if future_return is not None and spy_return is not None:
|
||||
excess_return_vs_spy = future_return - spy_return
|
||||
|
||||
excess_return_vs_sector: float | None = None
|
||||
if future_return is not None and sector_etf_return is not None:
|
||||
excess_return_vs_sector = future_return - sector_etf_return
|
||||
|
||||
# Determine direction correctness (Requirement 4.6)
|
||||
direction_correct = _is_direction_correct(snapshot["direction"], future_return)
|
||||
|
||||
# Determine profitability (Requirement 4.7)
|
||||
profitable = _is_profitable(snapshot["action"], future_return)
|
||||
|
||||
now = datetime.now().astimezone()
|
||||
|
||||
return PredictionOutcome(
|
||||
id=str(uuid.uuid4()),
|
||||
prediction_id=str(snapshot["id"]),
|
||||
evaluated_at=now,
|
||||
horizon=horizon,
|
||||
future_price=future_price,
|
||||
future_return=future_return,
|
||||
spy_future_price=spy_future_price,
|
||||
spy_return=spy_return,
|
||||
sector_etf_future_price=sector_etf_future_price,
|
||||
sector_etf_return=sector_etf_return,
|
||||
excess_return_vs_spy=excess_return_vs_spy,
|
||||
excess_return_vs_sector=excess_return_vs_sector,
|
||||
direction_correct=direction_correct,
|
||||
profitable=profitable,
|
||||
metadata={
|
||||
"ticker": ticker,
|
||||
"horizon": horizon,
|
||||
"price_at_prediction": price_at_prediction,
|
||||
"future_price": future_price,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Store outcome (Requirement 4.9)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _store_outcome(
|
||||
conn: asyncpg.Connection,
|
||||
outcome: PredictionOutcome,
|
||||
) -> None:
|
||||
"""Persist a single prediction outcome to the database."""
|
||||
await conn.execute(
|
||||
_INSERT_OUTCOME_SQL,
|
||||
outcome.id,
|
||||
outcome.prediction_id,
|
||||
outcome.evaluated_at,
|
||||
outcome.horizon,
|
||||
outcome.future_price,
|
||||
outcome.future_return,
|
||||
outcome.spy_future_price,
|
||||
outcome.spy_return,
|
||||
outcome.sector_etf_future_price,
|
||||
outcome.sector_etf_return,
|
||||
outcome.excess_return_vs_spy,
|
||||
outcome.excess_return_vs_sector,
|
||||
outcome.direction_correct,
|
||||
outcome.profitable,
|
||||
json.dumps(outcome.metadata),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point (Requirements 4.1, 4.8, 4.9, 4.10)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def evaluate_matured_predictions(
|
||||
pool: asyncpg.Pool,
|
||||
) -> int:
|
||||
"""Evaluate all matured prediction snapshots across all horizons.
|
||||
|
||||
For each of the 5 horizons (1h, 6h, 1d, 7d, 30d), finds prediction
|
||||
snapshots where generated_at + horizon_duration <= NOW() and no outcome
|
||||
has been recorded for that (prediction_id, horizon) pair.
|
||||
|
||||
For each matured snapshot-horizon pair, fetches future prices and computes
|
||||
returns. Skips horizons where the future price is unavailable — those will
|
||||
be retried on the next run (Requirement 4.10).
|
||||
|
||||
Returns the total count of outcomes recorded.
|
||||
"""
|
||||
total_recorded = 0
|
||||
|
||||
for horizon, duration in HORIZON_DURATIONS.items():
|
||||
# Find snapshots matured for this horizon
|
||||
rows = await pool.fetch(_MATURED_PREDICTIONS_SQL, duration, horizon)
|
||||
|
||||
if not rows:
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"Found %d matured predictions for horizon %s", len(rows), horizon
|
||||
)
|
||||
|
||||
for row in rows:
|
||||
snapshot = dict(row)
|
||||
try:
|
||||
outcome = await evaluate_single_prediction(pool, snapshot, horizon)
|
||||
if outcome is None:
|
||||
# Future price unavailable — skip, retry next run
|
||||
continue
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
await _store_outcome(conn, outcome)
|
||||
|
||||
total_recorded += 1
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to evaluate snapshot %s at horizon %s",
|
||||
snapshot["id"],
|
||||
horizon,
|
||||
)
|
||||
continue
|
||||
|
||||
logger.info("Outcome evaluation complete: %d outcomes recorded", total_recorded)
|
||||
return total_recorded
|
||||
@@ -0,0 +1,540 @@
|
||||
"""Prediction Snapshot Writer — captures immutable prediction state at generation time.
|
||||
|
||||
Creates frozen records of every recommendation with prices, evidence links,
|
||||
duplicate detection, and contribution scores so that predictions can be
|
||||
evaluated against future outcomes without hindsight bias.
|
||||
|
||||
Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.1, 3.2, 3.3, 3.4
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import urllib.parse
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.shared.schemas import Recommendation, TrendSummary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SECTOR_ETF_MAP: dict[str, str] = {
|
||||
"Technology": "XLK",
|
||||
"Consumer Cyclical": "XLY",
|
||||
"Financial Services": "XLF",
|
||||
"Healthcare": "XLV",
|
||||
"Energy": "XLE",
|
||||
"Communication Services": "XLC",
|
||||
"Industrials": "XLI",
|
||||
"Consumer Defensive": "XLP",
|
||||
"Real Estate": "XLRE",
|
||||
"Utilities": "XLU",
|
||||
}
|
||||
|
||||
EVALUATION_HORIZONS: list[str] = ["1h", "6h", "1d", "7d", "30d"]
|
||||
|
||||
MAX_SINGLE_DOCUMENT_WEIGHT: float = 1.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dataclasses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class PredictionSnapshot:
|
||||
"""Immutable snapshot of a prediction at generation time."""
|
||||
|
||||
id: str # UUID
|
||||
generated_at: datetime
|
||||
ticker: str
|
||||
window: str
|
||||
horizon: str
|
||||
direction: str # bullish/bearish/mixed/neutral
|
||||
action: str # buy/sell/hold/watch
|
||||
mode: str # informational/paper_eligible/live_eligible
|
||||
strength: float
|
||||
confidence: float
|
||||
contradiction: float
|
||||
p_bull: float | None
|
||||
p_bear: float | None
|
||||
score_company: float
|
||||
score_macro: float
|
||||
score_competitive: float
|
||||
evidence_count: int
|
||||
unique_source_count: int
|
||||
duplicate_evidence_count: int
|
||||
price_at_prediction: float | None
|
||||
spy_price_at_prediction: float | None
|
||||
sector_etf_price_at_prediction: float | None
|
||||
metadata: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SignalEvidenceLink:
|
||||
"""Link between a prediction and a contributing evidence document."""
|
||||
|
||||
id: str # UUID
|
||||
prediction_id: str
|
||||
document_id: str
|
||||
signal_id: str
|
||||
ticker: str
|
||||
source: str
|
||||
source_type: str
|
||||
catalyst_type: str
|
||||
sentiment: str
|
||||
impact: float
|
||||
extraction_confidence: float
|
||||
weight: float # clamped to MAX_SINGLE_DOCUMENT_WEIGHT
|
||||
is_duplicate: bool
|
||||
canonical_evidence_key: str
|
||||
contribution_score: float # weight / total_weight, sums to 1.0
|
||||
metadata: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Canonical evidence key computation (Requirements 2.3, 17.4)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_canonical_evidence_key(title: str, url: str) -> str:
|
||||
"""SHA256 of normalized(title) + normalized(url).
|
||||
|
||||
Normalization:
|
||||
- Title: lowercase, strip leading/trailing whitespace
|
||||
- URL: lowercase, strip query parameters (keep scheme, netloc, path)
|
||||
"""
|
||||
normalized_title = title.strip().lower()
|
||||
|
||||
parsed = urllib.parse.urlparse(url.lower())
|
||||
normalized_url = urllib.parse.urlunparse(
|
||||
(parsed.scheme, parsed.netloc, parsed.path, "", "", "")
|
||||
)
|
||||
|
||||
combined = normalized_title + normalized_url
|
||||
return hashlib.sha256(combined.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Contribution score computation (Requirements 2.5, 17.7)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_contribution_scores(weights: list[float]) -> list[float]:
|
||||
"""Compute contribution scores: each score = weight_i / sum(weights).
|
||||
|
||||
All scores are in [0.0, 1.0] and sum to 1.0 (within floating-point tolerance).
|
||||
Returns an empty list for empty input.
|
||||
"""
|
||||
if not weights:
|
||||
return []
|
||||
|
||||
total = sum(weights)
|
||||
if total == 0.0:
|
||||
# All weights are zero — distribute equally
|
||||
n = len(weights)
|
||||
return [1.0 / n] * n
|
||||
|
||||
return [w / total for w in weights]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Price fetching (Requirements 1.2, 1.3, 1.4, 1.5)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_LATEST_CLOSE_SQL = """
|
||||
SELECT (data->>'c')::float AS close
|
||||
FROM market_snapshots
|
||||
WHERE ticker = $1 AND snapshot_type = 'bar' AND data->>'c' IS NOT NULL
|
||||
ORDER BY captured_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
|
||||
async def fetch_latest_close_price(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
) -> float | None:
|
||||
"""Fetch most recent close price from market_snapshots for a ticker.
|
||||
|
||||
Returns None if no market data is available for the ticker.
|
||||
"""
|
||||
row = await pool.fetchrow(_LATEST_CLOSE_SQL, ticker)
|
||||
if row is None:
|
||||
return None
|
||||
return row["close"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sector ETF lookup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_COMPANY_SECTOR_SQL = """
|
||||
SELECT sector FROM companies WHERE ticker = $1 AND active = TRUE LIMIT 1
|
||||
"""
|
||||
|
||||
|
||||
async def _fetch_sector_etf_ticker(pool: asyncpg.Pool, ticker: str) -> str | None:
|
||||
"""Look up the sector ETF ticker for a company ticker."""
|
||||
row = await pool.fetchrow(_COMPANY_SECTOR_SQL, ticker)
|
||||
if row is None or row["sector"] is None:
|
||||
return None
|
||||
return SECTOR_ETF_MAP.get(row["sector"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Layer score computation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compute_layer_scores(
|
||||
evidence_signals: list[dict],
|
||||
) -> tuple[float, float, float]:
|
||||
"""Compute company, macro, and competitive layer scores from evidence signals.
|
||||
|
||||
Each signal's source_type determines its layer:
|
||||
- company: news_api, filings_api, web_scrape
|
||||
- macro: macro events (source_type containing 'macro')
|
||||
- competitive: competitive signals (source_type containing 'competitive' or 'pattern')
|
||||
|
||||
Returns (score_company, score_macro, score_competitive) as fractions summing to 1.0.
|
||||
"""
|
||||
company_weight = 0.0
|
||||
macro_weight = 0.0
|
||||
competitive_weight = 0.0
|
||||
|
||||
for sig in evidence_signals:
|
||||
w = sig.get("weight", 0.0)
|
||||
source_type = sig.get("source_type", "").lower()
|
||||
catalyst_type = sig.get("catalyst_type", "").lower()
|
||||
|
||||
if "macro" in source_type or catalyst_type == "macro":
|
||||
macro_weight += w
|
||||
elif "competitive" in source_type or "pattern" in source_type:
|
||||
competitive_weight += w
|
||||
else:
|
||||
company_weight += w
|
||||
|
||||
total = company_weight + macro_weight + competitive_weight
|
||||
if total == 0.0:
|
||||
return (0.0, 0.0, 0.0)
|
||||
|
||||
return (
|
||||
round(company_weight / total, 6),
|
||||
round(macro_weight / total, 6),
|
||||
round(competitive_weight / total, 6),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL statements
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_INSERT_SNAPSHOT_SQL = """
|
||||
INSERT INTO prediction_snapshots (
|
||||
id, generated_at, ticker, "window", horizon, direction, action, mode,
|
||||
strength, confidence, contradiction, p_bull, p_bear,
|
||||
score_company, score_macro, score_competitive,
|
||||
evidence_count, unique_source_count, duplicate_evidence_count,
|
||||
price_at_prediction, spy_price_at_prediction, sector_etf_price_at_prediction,
|
||||
metadata
|
||||
) VALUES (
|
||||
$1::uuid, $2, $3, $4, $5, $6, $7, $8,
|
||||
$9, $10, $11, $12, $13,
|
||||
$14, $15, $16,
|
||||
$17, $18, $19,
|
||||
$20, $21, $22,
|
||||
$23::jsonb
|
||||
)
|
||||
"""
|
||||
|
||||
_INSERT_EVIDENCE_LINK_SQL = """
|
||||
INSERT INTO signal_evidence_links (
|
||||
id, prediction_id, document_id, signal_id, ticker,
|
||||
source, source_type, catalyst_type, sentiment,
|
||||
impact, extraction_confidence, weight,
|
||||
is_duplicate, canonical_evidence_key, contribution_score,
|
||||
metadata
|
||||
) VALUES (
|
||||
$1::uuid, $2::uuid, $3, $4, $5,
|
||||
$6, $7, $8, $9,
|
||||
$10, $11, $12,
|
||||
$13, $14, $15,
|
||||
$16::jsonb
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point (Requirements 1.1–1.7, 2.1–2.6, 3.1–3.4)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def create_prediction_snapshot(
|
||||
pool: asyncpg.Pool,
|
||||
recommendation: Recommendation,
|
||||
trend_summary: TrendSummary,
|
||||
evidence_signals: list[dict],
|
||||
evidence_docs: list[dict],
|
||||
) -> PredictionSnapshot:
|
||||
"""Create and persist a prediction snapshot with evidence links.
|
||||
|
||||
Steps:
|
||||
1. Fetch current prices (ticker, SPY, sector ETF) from market_snapshots
|
||||
2. Compute canonical evidence keys and detect duplicates
|
||||
3. Clamp individual document weights to MAX_SINGLE_DOCUMENT_WEIGHT
|
||||
4. Compute contribution scores (one-vote-per-canonical-key dedup)
|
||||
5. Persist snapshot and evidence links in a transaction
|
||||
|
||||
Args:
|
||||
pool: asyncpg connection pool.
|
||||
recommendation: The generated Recommendation object.
|
||||
trend_summary: The TrendSummary used to generate the recommendation.
|
||||
evidence_signals: List of dicts with signal fields (source, source_type,
|
||||
catalyst_type, sentiment, impact, extraction_confidence, weight,
|
||||
document_id, signal_id, ticker).
|
||||
evidence_docs: List of dicts with document metadata (title, url, document_id).
|
||||
|
||||
Returns:
|
||||
The persisted PredictionSnapshot.
|
||||
"""
|
||||
ticker = recommendation.ticker
|
||||
|
||||
# 1. Fetch prices — handle NULL gracefully (Requirement 1.5)
|
||||
ticker_price = await fetch_latest_close_price(pool, ticker)
|
||||
if ticker_price is None:
|
||||
logger.warning("No market price available for %s at snapshot time", ticker)
|
||||
|
||||
spy_price = await fetch_latest_close_price(pool, "SPY")
|
||||
if spy_price is None:
|
||||
logger.warning("No SPY price available at snapshot time")
|
||||
|
||||
sector_etf_ticker = await _fetch_sector_etf_ticker(pool, ticker)
|
||||
sector_etf_price: float | None = None
|
||||
if sector_etf_ticker is not None:
|
||||
sector_etf_price = await fetch_latest_close_price(pool, sector_etf_ticker)
|
||||
if sector_etf_price is None:
|
||||
logger.warning(
|
||||
"No sector ETF price available for %s (%s) at snapshot time",
|
||||
sector_etf_ticker,
|
||||
ticker,
|
||||
)
|
||||
else:
|
||||
logger.warning("No sector ETF mapping found for ticker %s", ticker)
|
||||
|
||||
# 2. Build a doc lookup for canonical key computation
|
||||
doc_lookup: dict[str, dict] = {}
|
||||
for doc in evidence_docs:
|
||||
doc_id = doc.get("document_id", "")
|
||||
doc_lookup[doc_id] = doc
|
||||
|
||||
# 3. Process evidence signals: compute canonical keys, detect duplicates,
|
||||
# clamp weights
|
||||
processed_links: list[dict] = []
|
||||
seen_canonical_keys: dict[str, int] = {} # canonical_key -> first index
|
||||
|
||||
for sig in evidence_signals:
|
||||
doc_id = sig.get("document_id", "")
|
||||
doc_meta = doc_lookup.get(doc_id, {})
|
||||
title = doc_meta.get("title", "")
|
||||
url = doc_meta.get("url", "")
|
||||
|
||||
canonical_key = compute_canonical_evidence_key(title, url)
|
||||
|
||||
# Detect duplicates: same canonical key for same ticker
|
||||
is_duplicate = canonical_key in seen_canonical_keys
|
||||
if not is_duplicate:
|
||||
seen_canonical_keys[canonical_key] = len(processed_links)
|
||||
|
||||
# Clamp weight to MAX_SINGLE_DOCUMENT_WEIGHT (Requirement 3.3)
|
||||
raw_weight = sig.get("weight", 0.0)
|
||||
clamped_weight = min(raw_weight, MAX_SINGLE_DOCUMENT_WEIGHT)
|
||||
|
||||
processed_links.append({
|
||||
"id": str(uuid.uuid4()),
|
||||
"document_id": doc_id,
|
||||
"signal_id": sig.get("signal_id", ""),
|
||||
"ticker": sig.get("ticker", ticker),
|
||||
"source": sig.get("source", ""),
|
||||
"source_type": sig.get("source_type", ""),
|
||||
"catalyst_type": sig.get("catalyst_type", ""),
|
||||
"sentiment": sig.get("sentiment", ""),
|
||||
"impact": sig.get("impact", 0.0),
|
||||
"extraction_confidence": sig.get("extraction_confidence", 0.0),
|
||||
"weight": clamped_weight,
|
||||
"is_duplicate": is_duplicate,
|
||||
"canonical_evidence_key": canonical_key,
|
||||
})
|
||||
|
||||
# 4. Compute contribution scores — one vote per canonical key (Requirement 3.4)
|
||||
# Only non-duplicate links contribute to the weight pool
|
||||
non_dup_weights = [
|
||||
link["weight"] for link in processed_links if not link["is_duplicate"]
|
||||
]
|
||||
non_dup_scores = compute_contribution_scores(non_dup_weights)
|
||||
|
||||
# Assign contribution scores: non-duplicates get their computed score,
|
||||
# duplicates get 0.0
|
||||
score_idx = 0
|
||||
for link in processed_links:
|
||||
if not link["is_duplicate"]:
|
||||
link["contribution_score"] = non_dup_scores[score_idx]
|
||||
score_idx += 1
|
||||
else:
|
||||
link["contribution_score"] = 0.0
|
||||
|
||||
# 5. Compute deduplication quality metrics (Requirements 3.1, 3.2)
|
||||
unique_sources = {
|
||||
link["source"]
|
||||
for link in processed_links
|
||||
if not link["is_duplicate"]
|
||||
}
|
||||
unique_source_count = len(unique_sources)
|
||||
duplicate_evidence_count = sum(
|
||||
1 for link in processed_links if link["is_duplicate"]
|
||||
)
|
||||
|
||||
# 6. Compute layer scores from evidence signals
|
||||
score_company, score_macro, score_competitive = _compute_layer_scores(
|
||||
evidence_signals
|
||||
)
|
||||
|
||||
# 7. Build metadata from trend summary context (Requirement 1.7)
|
||||
metadata: dict = {}
|
||||
if trend_summary.market_context is not None:
|
||||
metadata["market_context"] = {
|
||||
"ticker": trend_summary.market_context.ticker,
|
||||
"price_change_pct": trend_summary.market_context.price_change_pct,
|
||||
"avg_volume": trend_summary.market_context.avg_volume,
|
||||
"volume_change_pct": trend_summary.market_context.volume_change_pct,
|
||||
"volatility": trend_summary.market_context.volatility,
|
||||
"latest_close": trend_summary.market_context.latest_close,
|
||||
"bars_available": trend_summary.market_context.bars_available,
|
||||
}
|
||||
if sector_etf_ticker is not None:
|
||||
metadata["sector_etf_ticker"] = sector_etf_ticker
|
||||
|
||||
# 8. Build the snapshot
|
||||
snapshot_id = str(uuid.uuid4())
|
||||
snapshot = PredictionSnapshot(
|
||||
id=snapshot_id,
|
||||
generated_at=recommendation.generated_at,
|
||||
ticker=ticker,
|
||||
window=trend_summary.window.value,
|
||||
horizon=recommendation.time_horizon,
|
||||
direction=trend_summary.trend_direction.value,
|
||||
action=recommendation.action.value,
|
||||
mode=recommendation.mode.value,
|
||||
strength=trend_summary.trend_strength,
|
||||
confidence=recommendation.confidence,
|
||||
contradiction=trend_summary.contradiction_score,
|
||||
p_bull=trend_summary.p_bull,
|
||||
p_bear=1.0 - trend_summary.p_bull if trend_summary.p_bull is not None else None,
|
||||
score_company=score_company,
|
||||
score_macro=score_macro,
|
||||
score_competitive=score_competitive,
|
||||
evidence_count=len(processed_links),
|
||||
unique_source_count=unique_source_count,
|
||||
duplicate_evidence_count=duplicate_evidence_count,
|
||||
price_at_prediction=ticker_price,
|
||||
spy_price_at_prediction=spy_price,
|
||||
sector_etf_price_at_prediction=sector_etf_price,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
# 9. Build evidence link objects
|
||||
evidence_link_objects: list[SignalEvidenceLink] = []
|
||||
for link in processed_links:
|
||||
evidence_link_objects.append(
|
||||
SignalEvidenceLink(
|
||||
id=link["id"],
|
||||
prediction_id=snapshot_id,
|
||||
document_id=link["document_id"],
|
||||
signal_id=link["signal_id"],
|
||||
ticker=link["ticker"],
|
||||
source=link["source"],
|
||||
source_type=link["source_type"],
|
||||
catalyst_type=link["catalyst_type"],
|
||||
sentiment=link["sentiment"],
|
||||
impact=link["impact"],
|
||||
extraction_confidence=link["extraction_confidence"],
|
||||
weight=link["weight"],
|
||||
is_duplicate=link["is_duplicate"],
|
||||
canonical_evidence_key=link["canonical_evidence_key"],
|
||||
contribution_score=link["contribution_score"],
|
||||
)
|
||||
)
|
||||
|
||||
# 10. Persist in a transaction (Requirements 1.6, 2.6)
|
||||
async with pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
await conn.execute(
|
||||
_INSERT_SNAPSHOT_SQL,
|
||||
snapshot.id,
|
||||
snapshot.generated_at,
|
||||
snapshot.ticker,
|
||||
snapshot.window,
|
||||
snapshot.horizon,
|
||||
snapshot.direction,
|
||||
snapshot.action,
|
||||
snapshot.mode,
|
||||
snapshot.strength,
|
||||
snapshot.confidence,
|
||||
snapshot.contradiction,
|
||||
snapshot.p_bull,
|
||||
snapshot.p_bear,
|
||||
snapshot.score_company,
|
||||
snapshot.score_macro,
|
||||
snapshot.score_competitive,
|
||||
snapshot.evidence_count,
|
||||
snapshot.unique_source_count,
|
||||
snapshot.duplicate_evidence_count,
|
||||
snapshot.price_at_prediction,
|
||||
snapshot.spy_price_at_prediction,
|
||||
snapshot.sector_etf_price_at_prediction,
|
||||
json.dumps(snapshot.metadata),
|
||||
)
|
||||
|
||||
for link in evidence_link_objects:
|
||||
await conn.execute(
|
||||
_INSERT_EVIDENCE_LINK_SQL,
|
||||
link.id,
|
||||
link.prediction_id,
|
||||
link.document_id,
|
||||
link.signal_id,
|
||||
link.ticker,
|
||||
link.source,
|
||||
link.source_type,
|
||||
link.catalyst_type,
|
||||
link.sentiment,
|
||||
link.impact,
|
||||
link.extraction_confidence,
|
||||
link.weight,
|
||||
link.is_duplicate,
|
||||
link.canonical_evidence_key,
|
||||
link.contribution_score,
|
||||
json.dumps(link.metadata),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Created prediction snapshot %s for %s: %d evidence links "
|
||||
"(%d unique sources, %d duplicates), prices: ticker=%s spy=%s sector_etf=%s",
|
||||
snapshot_id,
|
||||
ticker,
|
||||
len(evidence_link_objects),
|
||||
unique_source_count,
|
||||
duplicate_evidence_count,
|
||||
ticker_price,
|
||||
spy_price,
|
||||
sector_etf_price,
|
||||
)
|
||||
|
||||
return snapshot
|
||||
@@ -0,0 +1,689 @@
|
||||
"""Unit tests for model validation, calibration, and signal quality modules.
|
||||
|
||||
Covers prediction snapshot writer, outcome evaluator, metrics engine,
|
||||
calibration engine, and quality gate — all pure-function / deterministic tests.
|
||||
|
||||
Requirements: 1.1, 2.3, 2.4, 2.5, 3.3, 4.2, 4.5, 4.6, 4.7,
|
||||
5.3, 5.4, 6.1, 6.2, 6.5, 8.1, 8.2, 8.3, 11.1, 11.6
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
|
||||
import pytest
|
||||
|
||||
# -- Quality Gate --
|
||||
from services.trading.model_quality_gate import (
|
||||
QualityGateConfig,
|
||||
_evaluate_thresholds,
|
||||
)
|
||||
|
||||
# -- Calibration Engine --
|
||||
from services.validation.calibration import (
|
||||
compute_adjusted_evidence_weight,
|
||||
compute_source_reliability,
|
||||
)
|
||||
|
||||
# -- Metrics Engine --
|
||||
from services.validation.metrics import (
|
||||
compute_brier_score,
|
||||
compute_calibration_error,
|
||||
compute_information_coefficient,
|
||||
compute_rank_information_coefficient,
|
||||
)
|
||||
|
||||
# -- Outcome Evaluator --
|
||||
from services.validation.outcome_evaluator import (
|
||||
_compute_return,
|
||||
_is_direction_correct,
|
||||
_is_profitable,
|
||||
)
|
||||
|
||||
# -- Prediction Snapshot Writer --
|
||||
from services.validation.prediction_snapshot import (
|
||||
MAX_SINGLE_DOCUMENT_WEIGHT,
|
||||
compute_canonical_evidence_key,
|
||||
compute_contribution_scores,
|
||||
)
|
||||
|
||||
# ===================================================================
|
||||
# 8.2 — Prediction Snapshot Writer unit tests
|
||||
# Requirements: 1.1, 2.3, 2.4, 2.5, 3.3
|
||||
# ===================================================================
|
||||
|
||||
|
||||
class TestCanonicalEvidenceKey:
|
||||
"""Tests for compute_canonical_evidence_key."""
|
||||
|
||||
def test_known_title_url_produces_expected_sha256(self):
|
||||
"""Known title/URL pair produces a deterministic SHA256 hash."""
|
||||
key = compute_canonical_evidence_key(
|
||||
"Test Article", "https://example.com/article?ref=123"
|
||||
)
|
||||
assert key == "abd5818d51579a7af51cd06861289c7f1fdc97c0f522e8ba13ce9b4aad01cb6f"
|
||||
|
||||
def test_empty_inputs(self):
|
||||
"""Empty title and URL produce SHA256 of empty string."""
|
||||
key = compute_canonical_evidence_key("", "")
|
||||
expected = hashlib.sha256(b"").hexdigest()
|
||||
assert key == expected
|
||||
|
||||
def test_unicode_inputs(self):
|
||||
"""Unicode title and URL are handled correctly."""
|
||||
key = compute_canonical_evidence_key(
|
||||
"日本語テスト", "https://example.com/日本語"
|
||||
)
|
||||
assert key == "553553928bb4e36abdf283ff3c52df0695fca09809159650a9bdcb4fb2c5f62b"
|
||||
|
||||
def test_normalization_case_insensitive(self):
|
||||
"""Title and URL are lowercased before hashing."""
|
||||
key_lower = compute_canonical_evidence_key(
|
||||
"test article", "https://example.com/path"
|
||||
)
|
||||
key_upper = compute_canonical_evidence_key(
|
||||
"TEST ARTICLE", "HTTPS://EXAMPLE.COM/PATH"
|
||||
)
|
||||
assert key_lower == key_upper
|
||||
|
||||
def test_normalization_strips_query_params(self):
|
||||
"""URL query parameters are stripped before hashing."""
|
||||
key_with_params = compute_canonical_evidence_key(
|
||||
"title", "https://example.com/article?utm_source=twitter&ref=123"
|
||||
)
|
||||
key_without_params = compute_canonical_evidence_key(
|
||||
"title", "https://example.com/article"
|
||||
)
|
||||
assert key_with_params == key_without_params
|
||||
|
||||
def test_normalization_strips_whitespace(self):
|
||||
"""Leading/trailing whitespace in title is stripped."""
|
||||
key_trimmed = compute_canonical_evidence_key(
|
||||
"test", "https://example.com"
|
||||
)
|
||||
key_padded = compute_canonical_evidence_key(
|
||||
" test ", "https://example.com"
|
||||
)
|
||||
assert key_trimmed == key_padded
|
||||
|
||||
|
||||
class TestDuplicateDetection:
|
||||
"""Tests for duplicate detection via canonical evidence keys."""
|
||||
|
||||
def test_three_docs_two_sharing_key_one_duplicate(self):
|
||||
"""3 docs where 2 share a canonical key → 1 marked duplicate."""
|
||||
# Simulate the duplicate detection logic from create_prediction_snapshot
|
||||
docs = [
|
||||
{"title": "Breaking News", "url": "https://news.com/article"},
|
||||
{"title": "breaking news", "url": "https://news.com/article?ref=1"},
|
||||
{"title": "Other Story", "url": "https://other.com/story"},
|
||||
]
|
||||
|
||||
seen_keys: dict[str, int] = {}
|
||||
duplicates: list[bool] = []
|
||||
|
||||
for doc in docs:
|
||||
key = compute_canonical_evidence_key(doc["title"], doc["url"])
|
||||
is_dup = key in seen_keys
|
||||
if not is_dup:
|
||||
seen_keys[key] = len(duplicates)
|
||||
duplicates.append(is_dup)
|
||||
|
||||
assert duplicates == [False, True, False]
|
||||
assert sum(duplicates) == 1
|
||||
|
||||
|
||||
class TestContributionScores:
|
||||
"""Tests for compute_contribution_scores."""
|
||||
|
||||
def test_known_weights(self):
|
||||
"""[0.5, 0.3, 0.2] → [0.5, 0.3, 0.2] (already sums to 1.0)."""
|
||||
scores = compute_contribution_scores([0.5, 0.3, 0.2])
|
||||
assert scores == pytest.approx([0.5, 0.3, 0.2])
|
||||
assert sum(scores) == pytest.approx(1.0)
|
||||
|
||||
def test_single_doc(self):
|
||||
"""Single document → contribution score of 1.0."""
|
||||
scores = compute_contribution_scores([0.7])
|
||||
assert scores == pytest.approx([1.0])
|
||||
|
||||
def test_empty_input(self):
|
||||
"""Empty input → empty list."""
|
||||
scores = compute_contribution_scores([])
|
||||
assert scores == []
|
||||
|
||||
def test_all_zero_weights(self):
|
||||
"""All-zero weights → equal distribution."""
|
||||
scores = compute_contribution_scores([0.0, 0.0, 0.0])
|
||||
assert len(scores) == 3
|
||||
assert all(s == pytest.approx(1.0 / 3.0) for s in scores)
|
||||
|
||||
def test_scores_sum_to_one(self):
|
||||
"""Arbitrary weights sum to 1.0."""
|
||||
scores = compute_contribution_scores([1.0, 2.0, 3.0, 4.0])
|
||||
assert sum(scores) == pytest.approx(1.0)
|
||||
assert scores == pytest.approx([0.1, 0.2, 0.3, 0.4])
|
||||
|
||||
|
||||
class TestWeightClamping:
|
||||
"""Tests for MAX_SINGLE_DOCUMENT_WEIGHT clamping."""
|
||||
|
||||
def test_weight_above_max_clamped(self):
|
||||
"""Weight 1.5 → clamped to MAX_SINGLE_DOCUMENT_WEIGHT (1.0)."""
|
||||
raw_weight = 1.5
|
||||
clamped = min(raw_weight, MAX_SINGLE_DOCUMENT_WEIGHT)
|
||||
assert clamped == 1.0
|
||||
|
||||
def test_weight_at_max_unchanged(self):
|
||||
"""Weight exactly at MAX stays unchanged."""
|
||||
raw_weight = 1.0
|
||||
clamped = min(raw_weight, MAX_SINGLE_DOCUMENT_WEIGHT)
|
||||
assert clamped == 1.0
|
||||
|
||||
def test_weight_below_max_unchanged(self):
|
||||
"""Weight below MAX stays unchanged."""
|
||||
raw_weight = 0.5
|
||||
clamped = min(raw_weight, MAX_SINGLE_DOCUMENT_WEIGHT)
|
||||
assert clamped == 0.5
|
||||
|
||||
|
||||
# ===================================================================
|
||||
# 8.3 — Outcome Evaluator unit tests
|
||||
# Requirements: 4.2, 4.5, 4.6, 4.7
|
||||
# ===================================================================
|
||||
|
||||
|
||||
class TestComputeReturn:
|
||||
"""Tests for _compute_return."""
|
||||
|
||||
def test_positive_return(self):
|
||||
"""Price 100 → 110 → return 0.10."""
|
||||
assert _compute_return(100.0, 110.0) == pytest.approx(0.10)
|
||||
|
||||
def test_negative_return(self):
|
||||
"""Price 100 → 90 → return -0.10."""
|
||||
assert _compute_return(100.0, 90.0) == pytest.approx(-0.10)
|
||||
|
||||
def test_zero_return(self):
|
||||
"""Price unchanged → return 0.0."""
|
||||
assert _compute_return(100.0, 100.0) == pytest.approx(0.0)
|
||||
|
||||
def test_zero_current_price(self):
|
||||
"""Current price 0 → return 0.0 (guard against division by zero)."""
|
||||
assert _compute_return(0.0, 110.0) == 0.0
|
||||
|
||||
|
||||
class TestDirectionCorrect:
|
||||
"""Tests for _is_direction_correct."""
|
||||
|
||||
def test_bullish_positive_return(self):
|
||||
"""Bullish + positive return → True."""
|
||||
assert _is_direction_correct("bullish", 0.05) is True
|
||||
|
||||
def test_bullish_negative_return(self):
|
||||
"""Bullish + negative return → False."""
|
||||
assert _is_direction_correct("bullish", -0.05) is False
|
||||
|
||||
def test_bearish_negative_return(self):
|
||||
"""Bearish + negative return → True."""
|
||||
assert _is_direction_correct("bearish", -0.05) is True
|
||||
|
||||
def test_bearish_positive_return(self):
|
||||
"""Bearish + positive return → False."""
|
||||
assert _is_direction_correct("bearish", 0.05) is False
|
||||
|
||||
def test_bullish_zero_return(self):
|
||||
"""Bullish + zero return → False (not strictly positive)."""
|
||||
assert _is_direction_correct("bullish", 0.0) is False
|
||||
|
||||
def test_bearish_zero_return(self):
|
||||
"""Bearish + zero return → False (not strictly negative)."""
|
||||
assert _is_direction_correct("bearish", 0.0) is False
|
||||
|
||||
def test_mixed_direction(self):
|
||||
"""Mixed direction → always False."""
|
||||
assert _is_direction_correct("mixed", 0.05) is False
|
||||
assert _is_direction_correct("mixed", -0.05) is False
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""Direction matching is case-insensitive."""
|
||||
assert _is_direction_correct("Bullish", 0.05) is True
|
||||
assert _is_direction_correct("BEARISH", -0.05) is True
|
||||
|
||||
|
||||
class TestIsProfitable:
|
||||
"""Tests for _is_profitable."""
|
||||
|
||||
def test_buy_positive_return(self):
|
||||
"""Buy + positive return → True."""
|
||||
assert _is_profitable("buy", 0.05) is True
|
||||
|
||||
def test_buy_negative_return(self):
|
||||
"""Buy + negative return → False."""
|
||||
assert _is_profitable("buy", -0.05) is False
|
||||
|
||||
def test_sell_negative_return(self):
|
||||
"""Sell + negative return → True."""
|
||||
assert _is_profitable("sell", -0.05) is True
|
||||
|
||||
def test_sell_positive_return(self):
|
||||
"""Sell + positive return → False."""
|
||||
assert _is_profitable("sell", 0.05) is False
|
||||
|
||||
def test_hold_any_return(self):
|
||||
"""Hold → always False."""
|
||||
assert _is_profitable("hold", 0.05) is False
|
||||
assert _is_profitable("hold", -0.05) is False
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""Action matching is case-insensitive."""
|
||||
assert _is_profitable("Buy", 0.05) is True
|
||||
assert _is_profitable("SELL", -0.05) is True
|
||||
|
||||
|
||||
class TestExcessReturn:
|
||||
"""Tests for excess return computation (ticker return - benchmark return)."""
|
||||
|
||||
def test_excess_return_vs_spy(self):
|
||||
"""Ticker 10%, SPY 5% → excess 5%."""
|
||||
ticker_return = _compute_return(100.0, 110.0) # 0.10
|
||||
spy_return = _compute_return(100.0, 105.0) # 0.05
|
||||
excess = ticker_return - spy_return
|
||||
assert excess == pytest.approx(0.05)
|
||||
|
||||
def test_negative_excess_return(self):
|
||||
"""Ticker 3%, SPY 5% → excess -2%."""
|
||||
ticker_return = _compute_return(100.0, 103.0) # 0.03
|
||||
spy_return = _compute_return(100.0, 105.0) # 0.05
|
||||
excess = ticker_return - spy_return
|
||||
assert excess == pytest.approx(-0.02)
|
||||
|
||||
def test_zero_excess_return(self):
|
||||
"""Same return → excess 0%."""
|
||||
ticker_return = _compute_return(100.0, 110.0)
|
||||
spy_return = _compute_return(100.0, 110.0)
|
||||
excess = ticker_return - spy_return
|
||||
assert excess == pytest.approx(0.0)
|
||||
|
||||
|
||||
# ===================================================================
|
||||
# 8.4 — Metrics Engine unit tests
|
||||
# Requirements: 5.3, 5.4, 6.1, 6.2, 6.5
|
||||
# ===================================================================
|
||||
|
||||
|
||||
class TestCalibrationError:
|
||||
"""Tests for compute_calibration_error (ECE)."""
|
||||
|
||||
def test_perfect_calibration_ece_zero(self):
|
||||
"""Perfect calibration → ECE = 0.0.
|
||||
|
||||
All predictions in [0.70, 0.80) bucket with 75% win rate
|
||||
matching ~0.75 avg confidence.
|
||||
"""
|
||||
confidences = [0.75] * 100
|
||||
outcomes = [True] * 75 + [False] * 25
|
||||
ece, buckets = compute_calibration_error(confidences, outcomes)
|
||||
assert ece == pytest.approx(0.0, abs=1e-9)
|
||||
|
||||
def test_all_overconfident_positive_ece(self):
|
||||
"""All overconfident (high confidence, low win rate) → positive ECE."""
|
||||
# All predictions at 0.95 confidence but only 50% win rate
|
||||
confidences = [0.95] * 100
|
||||
outcomes = [True] * 50 + [False] * 50
|
||||
ece, buckets = compute_calibration_error(confidences, outcomes)
|
||||
assert ece > 0.0
|
||||
# ECE should be |0.95 - 0.50| = 0.45
|
||||
assert ece == pytest.approx(0.45, abs=0.01)
|
||||
|
||||
def test_empty_input_returns_zero(self):
|
||||
"""Empty input → ECE = 0.0, empty buckets."""
|
||||
ece, buckets = compute_calibration_error([], [])
|
||||
assert ece == 0.0
|
||||
assert buckets == []
|
||||
|
||||
def test_miscalibrated_flag(self):
|
||||
"""Buckets with |avg_conf - win_rate| > 0.15 are flagged."""
|
||||
# All in [0.90, 1.00] bucket with 0% win rate → diff = 0.95
|
||||
confidences = [0.95] * 20
|
||||
outcomes = [False] * 20
|
||||
_ece, buckets = compute_calibration_error(confidences, outcomes)
|
||||
# Find the [0.90, 1.00] bucket
|
||||
high_bucket = [b for b in buckets if b.bucket_low == 0.90]
|
||||
assert len(high_bucket) == 1
|
||||
assert high_bucket[0].miscalibrated is True
|
||||
|
||||
def test_ece_in_valid_range(self):
|
||||
"""ECE is always in [0.0, 1.0]."""
|
||||
confidences = [0.55, 0.65, 0.75, 0.85, 0.95]
|
||||
outcomes = [False, True, False, True, False]
|
||||
ece, _ = compute_calibration_error(confidences, outcomes)
|
||||
assert 0.0 <= ece <= 1.0
|
||||
|
||||
|
||||
class TestBrierScore:
|
||||
"""Tests for compute_brier_score."""
|
||||
|
||||
def test_all_correct_at_p1(self):
|
||||
"""All correct at p=1.0 → Brier = 0.0."""
|
||||
p_bulls = [1.0] * 10
|
||||
outcomes = [True] * 10
|
||||
assert compute_brier_score(p_bulls, outcomes) == pytest.approx(0.0)
|
||||
|
||||
def test_all_wrong_at_p1(self):
|
||||
"""All wrong at p=1.0 → Brier = 1.0."""
|
||||
p_bulls = [1.0] * 10
|
||||
outcomes = [False] * 10
|
||||
assert compute_brier_score(p_bulls, outcomes) == pytest.approx(1.0)
|
||||
|
||||
def test_all_correct_at_p0(self):
|
||||
"""All correct at p=0.0 (bearish correct) → Brier = 0.0."""
|
||||
p_bulls = [0.0] * 10
|
||||
outcomes = [False] * 10
|
||||
assert compute_brier_score(p_bulls, outcomes) == pytest.approx(0.0)
|
||||
|
||||
def test_empty_input(self):
|
||||
"""Empty input → Brier = 0.0."""
|
||||
assert compute_brier_score([], []) == 0.0
|
||||
|
||||
def test_mixed_predictions(self):
|
||||
"""Mixed predictions produce a value in (0, 1)."""
|
||||
p_bulls = [0.8, 0.6, 0.3]
|
||||
outcomes = [True, False, True]
|
||||
brier = compute_brier_score(p_bulls, outcomes)
|
||||
assert 0.0 < brier < 1.0
|
||||
|
||||
|
||||
class TestInformationCoefficient:
|
||||
"""Tests for compute_information_coefficient (Pearson IC)."""
|
||||
|
||||
def test_perfect_positive_correlation(self):
|
||||
"""Perfectly correlated scores and returns → IC = 1.0."""
|
||||
scores = list(range(30))
|
||||
returns = [s * 2.0 + 1.0 for s in scores] # linear: y = 2x + 1
|
||||
ic = compute_information_coefficient(scores, returns)
|
||||
assert ic is not None
|
||||
assert ic == pytest.approx(1.0, abs=1e-9)
|
||||
|
||||
def test_perfect_negative_correlation(self):
|
||||
"""Anti-correlated scores and returns → IC = -1.0."""
|
||||
scores = list(range(30))
|
||||
returns = [-s * 2.0 for s in scores]
|
||||
ic = compute_information_coefficient(scores, returns)
|
||||
assert ic is not None
|
||||
assert ic == pytest.approx(-1.0, abs=1e-9)
|
||||
|
||||
def test_fewer_than_30_returns_none(self):
|
||||
"""Fewer than 30 data points → None."""
|
||||
scores = list(range(29))
|
||||
returns = list(range(29))
|
||||
ic = compute_information_coefficient(scores, returns)
|
||||
assert ic is None
|
||||
|
||||
def test_ic_in_valid_range(self):
|
||||
"""IC is always in [-1.0, 1.0] for valid data."""
|
||||
scores = [float(i % 7) for i in range(50)]
|
||||
returns = [float(i % 5) for i in range(50)]
|
||||
ic = compute_information_coefficient(scores, returns)
|
||||
assert ic is not None
|
||||
assert -1.0 <= ic <= 1.0
|
||||
|
||||
|
||||
class TestRankInformationCoefficient:
|
||||
"""Tests for compute_rank_information_coefficient (Spearman Rank IC)."""
|
||||
|
||||
def test_perfect_rank_correlation(self):
|
||||
"""Perfectly rank-correlated → Rank IC = 1.0."""
|
||||
scores = list(range(30))
|
||||
returns = list(range(30)) # same ordering
|
||||
rank_ic = compute_rank_information_coefficient(scores, returns)
|
||||
assert rank_ic is not None
|
||||
assert rank_ic == pytest.approx(1.0, abs=1e-9)
|
||||
|
||||
def test_perfect_anti_rank_correlation(self):
|
||||
"""Perfectly anti-rank-correlated → Rank IC = -1.0."""
|
||||
scores = list(range(30))
|
||||
returns = list(range(29, -1, -1)) # reversed ordering
|
||||
rank_ic = compute_rank_information_coefficient(scores, returns)
|
||||
assert rank_ic is not None
|
||||
assert rank_ic == pytest.approx(-1.0, abs=1e-9)
|
||||
|
||||
def test_fewer_than_30_returns_none(self):
|
||||
"""Fewer than 30 data points → None."""
|
||||
scores = list(range(29))
|
||||
returns = list(range(29))
|
||||
rank_ic = compute_rank_information_coefficient(scores, returns)
|
||||
assert rank_ic is None
|
||||
|
||||
|
||||
# ===================================================================
|
||||
# 8.5 — Calibration Engine unit tests
|
||||
# Requirements: 8.1, 8.2, 8.3
|
||||
# ===================================================================
|
||||
|
||||
|
||||
class TestSourceReliability:
|
||||
"""Tests for compute_source_reliability (Bayesian shrinkage)."""
|
||||
|
||||
def test_zero_samples_returns_prior(self):
|
||||
"""n=0 → reliability = 0.5 (prior mean)."""
|
||||
assert compute_source_reliability(0.8, 0) == 0.5
|
||||
|
||||
def test_large_sample_approaches_observed(self):
|
||||
"""n=1000 with wr=0.8 → ≈0.8 (close to observed win rate)."""
|
||||
reliability = compute_source_reliability(0.8, 1000)
|
||||
assert reliability == pytest.approx(0.7912621359223302)
|
||||
# Should be close to 0.8 but not exactly
|
||||
assert abs(reliability - 0.8) < 0.02
|
||||
|
||||
def test_moderate_sample(self):
|
||||
"""n=30 with wr=0.7 → 0.6 exactly.
|
||||
|
||||
0.5 + (30/60) * (0.7 - 0.5) = 0.5 + 0.5 * 0.2 = 0.6
|
||||
"""
|
||||
assert compute_source_reliability(0.7, 30) == pytest.approx(0.6)
|
||||
|
||||
def test_reliability_in_range(self):
|
||||
"""Reliability is always in [0.0, 1.0]."""
|
||||
# Extreme win rates
|
||||
assert 0.0 <= compute_source_reliability(0.0, 100) <= 1.0
|
||||
assert 0.0 <= compute_source_reliability(1.0, 100) <= 1.0
|
||||
assert 0.0 <= compute_source_reliability(0.5, 1) <= 1.0
|
||||
|
||||
def test_negative_sample_count_returns_prior(self):
|
||||
"""Negative sample count → treated as 0, returns 0.5."""
|
||||
assert compute_source_reliability(0.8, -5) == 0.5
|
||||
|
||||
|
||||
class TestAdjustedEvidenceWeight:
|
||||
"""Tests for compute_adjusted_evidence_weight."""
|
||||
|
||||
def test_reliability_half_gives_base_weight(self):
|
||||
"""reliability=0.5 → adjusted = base * (0.5 + 0.5) = base * 1.0."""
|
||||
assert compute_adjusted_evidence_weight(1.0, 0.5) == pytest.approx(1.0)
|
||||
|
||||
def test_high_reliability_increases_weight(self):
|
||||
"""reliability=1.0 → adjusted = base * 1.5."""
|
||||
assert compute_adjusted_evidence_weight(1.0, 1.0) == pytest.approx(1.5)
|
||||
|
||||
def test_low_reliability_decreases_weight(self):
|
||||
"""reliability=0.0 → adjusted = base * 0.5."""
|
||||
assert compute_adjusted_evidence_weight(1.0, 0.0) == pytest.approx(0.5)
|
||||
|
||||
def test_clamped_to_upper_bound(self):
|
||||
"""Large base_weight * high reliability → clamped to 2.0."""
|
||||
result = compute_adjusted_evidence_weight(3.0, 1.0)
|
||||
assert result == 2.0
|
||||
|
||||
def test_clamped_to_lower_bound(self):
|
||||
"""Small base_weight * low reliability → clamped to 0.1."""
|
||||
result = compute_adjusted_evidence_weight(0.1, 0.0)
|
||||
assert result == 0.1
|
||||
|
||||
def test_mid_range_not_clamped(self):
|
||||
"""Normal values stay within bounds without clamping."""
|
||||
result = compute_adjusted_evidence_weight(0.8, 0.6)
|
||||
# 0.8 * (0.5 + 0.6) = 0.8 * 1.1 = 0.88
|
||||
assert result == pytest.approx(0.88)
|
||||
assert 0.1 <= result <= 2.0
|
||||
|
||||
|
||||
# ===================================================================
|
||||
# 8.6 — Quality Gate unit tests
|
||||
# Requirements: 11.1, 11.6
|
||||
# ===================================================================
|
||||
|
||||
|
||||
class TestQualityGate:
|
||||
"""Tests for _evaluate_thresholds and QualityGateConfig."""
|
||||
|
||||
def _make_passing_snapshot(self) -> dict:
|
||||
"""Return a metric snapshot dict that meets all default thresholds."""
|
||||
return {
|
||||
"prediction_count": 200,
|
||||
"information_coefficient": 0.10,
|
||||
"win_rate": 0.60,
|
||||
"calibration_error": 0.08,
|
||||
"avg_excess_return_vs_spy": 0.02,
|
||||
}
|
||||
|
||||
def test_all_thresholds_met_pass(self):
|
||||
"""All thresholds met → every result is passed=True."""
|
||||
config = QualityGateConfig()
|
||||
snapshot = self._make_passing_snapshot()
|
||||
|
||||
results = _evaluate_thresholds(snapshot, config)
|
||||
|
||||
assert len(results) == 5
|
||||
assert all(r.passed for r in results), (
|
||||
f"Expected all thresholds to pass, but got: "
|
||||
f"{[(r.name, r.passed) for r in results]}"
|
||||
)
|
||||
|
||||
def test_one_threshold_failed_ic_below_min(self):
|
||||
"""IC below min_ic → that threshold fails, others pass."""
|
||||
config = QualityGateConfig()
|
||||
snapshot = self._make_passing_snapshot()
|
||||
snapshot["information_coefficient"] = 0.01 # below min_ic=0.03
|
||||
|
||||
results = _evaluate_thresholds(snapshot, config)
|
||||
|
||||
results_by_name = {r.name: r for r in results}
|
||||
assert results_by_name["min_ic"].passed is False
|
||||
assert results_by_name["min_ic"].actual == pytest.approx(0.01)
|
||||
assert results_by_name["min_ic"].threshold == pytest.approx(0.03)
|
||||
# All other thresholds should still pass
|
||||
for name, result in results_by_name.items():
|
||||
if name != "min_ic":
|
||||
assert result.passed is True, f"{name} should pass but didn't"
|
||||
|
||||
def test_all_thresholds_below_all_fail(self):
|
||||
"""All metric values below thresholds → all results are passed=False."""
|
||||
config = QualityGateConfig()
|
||||
snapshot = {
|
||||
"prediction_count": 10, # below 100
|
||||
"information_coefficient": 0.0, # below 0.03
|
||||
"win_rate": 0.40, # below 0.53
|
||||
"calibration_error": 0.50, # above 0.15
|
||||
"avg_excess_return_vs_spy": -0.05, # below 0.0
|
||||
}
|
||||
|
||||
results = _evaluate_thresholds(snapshot, config)
|
||||
|
||||
assert len(results) == 5
|
||||
assert all(not r.passed for r in results), (
|
||||
f"Expected all thresholds to fail, but got: "
|
||||
f"{[(r.name, r.passed) for r in results]}"
|
||||
)
|
||||
|
||||
def test_failsafe_none_values_treated_as_worst_case(self):
|
||||
"""Missing (None) metric values are treated as worst-case defaults.
|
||||
|
||||
This tests the fail-safe behavior: when no snapshots exist,
|
||||
the snapshot dict would have None values. _evaluate_thresholds
|
||||
treats None as 0 for min-thresholds and 1.0 for max_ece,
|
||||
causing all thresholds to fail → paper-only.
|
||||
"""
|
||||
config = QualityGateConfig()
|
||||
snapshot = {
|
||||
"prediction_count": None,
|
||||
"information_coefficient": None,
|
||||
"win_rate": None,
|
||||
"calibration_error": None,
|
||||
"avg_excess_return_vs_spy": None,
|
||||
}
|
||||
|
||||
results = _evaluate_thresholds(snapshot, config)
|
||||
|
||||
results_by_name = {r.name: r for r in results}
|
||||
# prediction_count: None → 0, below 100 → fail
|
||||
assert results_by_name["min_prediction_count"].passed is False
|
||||
assert results_by_name["min_prediction_count"].actual == 0.0
|
||||
# IC: None → 0.0, below 0.03 → fail
|
||||
assert results_by_name["min_ic"].passed is False
|
||||
assert results_by_name["min_ic"].actual == 0.0
|
||||
# win_rate: None → 0.0, below 0.53 → fail
|
||||
assert results_by_name["min_win_rate"].passed is False
|
||||
assert results_by_name["min_win_rate"].actual == 0.0
|
||||
# calibration_error: None → 1.0 (worst-case), above 0.15 → fail
|
||||
assert results_by_name["max_ece"].passed is False
|
||||
assert results_by_name["max_ece"].actual == 1.0
|
||||
# excess_return: None → 0.0, equal to min 0.0 → pass (>= 0.0)
|
||||
assert results_by_name["min_excess_return_vs_spy"].passed is True
|
||||
assert results_by_name["min_excess_return_vs_spy"].actual == 0.0
|
||||
|
||||
def test_stale_snapshot_age_exceeds_max(self):
|
||||
"""Snapshot age exceeding max_snapshot_age_hours causes gate failure.
|
||||
|
||||
The evaluate_quality_gate async function checks snapshot age
|
||||
before calling _evaluate_thresholds. Here we verify the config
|
||||
field is respected by testing the age comparison logic directly.
|
||||
"""
|
||||
config = QualityGateConfig(max_snapshot_age_hours=24)
|
||||
age_hours = 30.0 # 30 hours old, exceeds 24h max
|
||||
|
||||
assert age_hours > config.max_snapshot_age_hours
|
||||
|
||||
def test_threshold_boundary_exact_values(self):
|
||||
"""Metric values exactly at threshold boundaries → pass.
|
||||
|
||||
min thresholds use >=, max thresholds use <=.
|
||||
"""
|
||||
config = QualityGateConfig()
|
||||
snapshot = {
|
||||
"prediction_count": 100, # exactly min_prediction_count
|
||||
"information_coefficient": 0.03, # exactly min_ic
|
||||
"win_rate": 0.53, # exactly min_win_rate
|
||||
"calibration_error": 0.15, # exactly max_ece
|
||||
"avg_excess_return_vs_spy": 0.0, # exactly min_excess_return
|
||||
}
|
||||
|
||||
results = _evaluate_thresholds(snapshot, config)
|
||||
|
||||
assert all(r.passed for r in results), (
|
||||
f"Boundary values should pass, but got: "
|
||||
f"{[(r.name, r.passed, r.actual, r.threshold) for r in results]}"
|
||||
)
|
||||
|
||||
def test_custom_config_thresholds(self):
|
||||
"""Custom QualityGateConfig thresholds are respected."""
|
||||
config = QualityGateConfig(
|
||||
min_prediction_count=50,
|
||||
min_ic=0.01,
|
||||
min_win_rate=0.51,
|
||||
max_ece=0.20,
|
||||
min_excess_return_vs_spy=-0.01,
|
||||
)
|
||||
snapshot = {
|
||||
"prediction_count": 60,
|
||||
"information_coefficient": 0.02,
|
||||
"win_rate": 0.52,
|
||||
"calibration_error": 0.18,
|
||||
"avg_excess_return_vs_spy": -0.005,
|
||||
}
|
||||
|
||||
results = _evaluate_thresholds(snapshot, config)
|
||||
|
||||
assert all(r.passed for r in results), (
|
||||
f"Custom thresholds should pass, but got: "
|
||||
f"{[(r.name, r.passed) for r in results]}"
|
||||
)
|
||||
@@ -0,0 +1,660 @@
|
||||
"""Property-based tests for model validation, calibration, and signal quality.
|
||||
|
||||
Feature: model-validation-calibration
|
||||
|
||||
Tests correctness properties from the design specification covering
|
||||
canonical evidence key determinism/idempotence, contribution score
|
||||
invariants, calibration error bounds, Brier score bounds, information
|
||||
coefficient bounds, source reliability shrinkage, and quality gate
|
||||
determinism.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import urllib.parse
|
||||
|
||||
from hypothesis import given, settings
|
||||
from hypothesis import strategies as st
|
||||
|
||||
from services.validation.prediction_snapshot import (
|
||||
compute_canonical_evidence_key,
|
||||
compute_contribution_scores,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Strategies
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Titles: arbitrary text (including whitespace, unicode)
|
||||
title_strategy = st.text(min_size=0, max_size=200)
|
||||
|
||||
# URLs: build realistic URLs with optional query params
|
||||
url_strategy = st.builds(
|
||||
lambda scheme, host, path, query: urllib.parse.urlunparse(
|
||||
(scheme, host, path, "", query, "")
|
||||
),
|
||||
scheme=st.sampled_from(["http", "https"]),
|
||||
host=st.from_regex(r"[a-z0-9]{1,20}\.[a-z]{2,6}", fullmatch=True),
|
||||
path=st.from_regex(r"(/[a-z0-9\-]{0,15}){0,4}", fullmatch=True),
|
||||
query=st.from_regex(r"([a-z]{1,8}=[a-z0-9]{1,8}(&[a-z]{1,8}=[a-z0-9]{1,8}){0,3})?", fullmatch=True),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 4: Canonical Evidence Key Determinism and Normalization Idempotence
|
||||
# Validates: Requirements 2.3, 17.4
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@given(title=title_strategy, url=url_strategy)
|
||||
@settings(max_examples=100)
|
||||
def test_canonical_evidence_key_determinism(title: str, url: str) -> None:
|
||||
"""**Validates: Requirements 2.3, 17.4**
|
||||
|
||||
For any (title, url) pair, computing the canonical evidence key twice
|
||||
with the same inputs SHALL produce the same result (determinism).
|
||||
"""
|
||||
key1 = compute_canonical_evidence_key(title, url)
|
||||
key2 = compute_canonical_evidence_key(title, url)
|
||||
assert key1 == key2, (
|
||||
f"Determinism violated: same inputs produced different keys: "
|
||||
f"{key1!r} != {key2!r}"
|
||||
)
|
||||
# Key should be a valid SHA256 hex digest (64 hex chars)
|
||||
assert len(key1) == 64, f"Expected 64-char hex digest, got {len(key1)}"
|
||||
assert all(c in "0123456789abcdef" for c in key1), (
|
||||
f"Key contains non-hex characters: {key1!r}"
|
||||
)
|
||||
|
||||
|
||||
@given(title=title_strategy, url=url_strategy)
|
||||
@settings(max_examples=100)
|
||||
def test_canonical_evidence_key_normalization_idempotence(title: str, url: str) -> None:
|
||||
"""**Validates: Requirements 2.3, 17.4**
|
||||
|
||||
Normalizing an already-normalized input and computing the key SHALL
|
||||
produce the same key as the original computation (idempotence).
|
||||
|
||||
Normalization rules:
|
||||
- Title: lowercase, strip leading/trailing whitespace
|
||||
- URL: lowercase, strip query parameters (keep scheme, netloc, path)
|
||||
"""
|
||||
# Compute key from original (unnormalized) inputs
|
||||
key_original = compute_canonical_evidence_key(title, url)
|
||||
|
||||
# Pre-normalize the inputs the same way the function does internally
|
||||
normalized_title = title.strip().lower()
|
||||
parsed = urllib.parse.urlparse(url.lower())
|
||||
normalized_url = urllib.parse.urlunparse(
|
||||
(parsed.scheme, parsed.netloc, parsed.path, "", "", "")
|
||||
)
|
||||
|
||||
# Compute key from already-normalized inputs
|
||||
key_from_normalized = compute_canonical_evidence_key(normalized_title, normalized_url)
|
||||
|
||||
assert key_original == key_from_normalized, (
|
||||
f"Idempotence violated: key from original inputs ({key_original!r}) "
|
||||
f"differs from key from pre-normalized inputs ({key_from_normalized!r}). "
|
||||
f"title={title!r}, url={url!r}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Strategies for contribution score tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
positive_weights_strategy = st.lists(
|
||||
st.floats(min_value=0.01, max_value=1000.0, allow_nan=False, allow_infinity=False),
|
||||
min_size=1,
|
||||
max_size=50,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 7: Contribution Score Sum-to-One and Range
|
||||
# Validates: Requirements 2.5, 17.7
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@given(weights=positive_weights_strategy)
|
||||
@settings(max_examples=100)
|
||||
def test_contribution_scores_sum_to_one_and_range(weights: list[float]) -> None:
|
||||
"""**Validates: Requirements 2.5, 17.7**
|
||||
|
||||
For any non-empty list of positive document weights, the computed
|
||||
contribution scores SHALL each be in [0.0, 1.0] and SHALL sum to 1.0
|
||||
(within floating-point tolerance of 1e-9).
|
||||
"""
|
||||
scores = compute_contribution_scores(weights)
|
||||
|
||||
# Same length as input
|
||||
assert len(scores) == len(weights), (
|
||||
f"Expected {len(weights)} scores, got {len(scores)}"
|
||||
)
|
||||
|
||||
# Each score in [0.0, 1.0]
|
||||
for i, score in enumerate(scores):
|
||||
assert 0.0 <= score <= 1.0, (
|
||||
f"Score at index {i} is {score}, expected in [0.0, 1.0]. "
|
||||
f"weights={weights}"
|
||||
)
|
||||
|
||||
# Scores sum to 1.0 within tolerance
|
||||
total = sum(scores)
|
||||
assert abs(total - 1.0) < 1e-9, (
|
||||
f"Scores sum to {total}, expected 1.0 within 1e-9 tolerance. "
|
||||
f"weights={weights}"
|
||||
)
|
||||
|
||||
|
||||
def test_contribution_scores_empty_input() -> None:
|
||||
"""**Validates: Requirements 2.5, 17.7**
|
||||
|
||||
For an empty weight list, the result SHALL be an empty list.
|
||||
"""
|
||||
scores = compute_contribution_scores([])
|
||||
assert scores == [], f"Expected empty list for empty input, got {scores}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Strategies for calibration error tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
confidence_strategy = st.floats(
|
||||
min_value=0.50, max_value=1.00, allow_nan=False, allow_infinity=False
|
||||
)
|
||||
outcome_strategy = st.booleans()
|
||||
prediction_pairs_strategy = st.lists(
|
||||
st.tuples(confidence_strategy, outcome_strategy),
|
||||
min_size=1,
|
||||
max_size=100,
|
||||
)
|
||||
|
||||
# Import metric functions
|
||||
from services.validation.metrics import (
|
||||
compute_brier_score,
|
||||
compute_calibration_error,
|
||||
compute_information_coefficient,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 1: Calibration Error Range and Round-Trip
|
||||
# Validates: Requirements 5.1, 5.3, 17.1
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@given(pairs=prediction_pairs_strategy)
|
||||
@settings(max_examples=100)
|
||||
def test_calibration_error_range(pairs: list[tuple[float, bool]]) -> None:
|
||||
"""**Validates: Requirements 5.1, 5.3, 17.1**
|
||||
|
||||
For any valid distribution of predictions with confidences in [0.50, 1.00]
|
||||
and boolean outcomes, the Expected Calibration Error (ECE) SHALL be in
|
||||
[0.0, 1.0].
|
||||
"""
|
||||
confidences = [c for c, _ in pairs]
|
||||
outcomes = [o for _, o in pairs]
|
||||
|
||||
ece, buckets = compute_calibration_error(confidences, outcomes)
|
||||
|
||||
assert 0.0 <= ece <= 1.0, (
|
||||
f"ECE {ece} is outside [0.0, 1.0]. "
|
||||
f"confidences={confidences}, outcomes={outcomes}"
|
||||
)
|
||||
|
||||
# Each bucket's metrics should also be well-formed
|
||||
for bucket in buckets:
|
||||
if bucket.prediction_count > 0:
|
||||
assert 0.0 <= bucket.avg_confidence <= 1.0, (
|
||||
f"Bucket [{bucket.bucket_low}, {bucket.bucket_high}) has "
|
||||
f"avg_confidence={bucket.avg_confidence} outside [0.0, 1.0]"
|
||||
)
|
||||
assert 0.0 <= bucket.observed_win_rate <= 1.0, (
|
||||
f"Bucket [{bucket.bucket_low}, {bucket.bucket_high}) has "
|
||||
f"observed_win_rate={bucket.observed_win_rate} outside [0.0, 1.0]"
|
||||
)
|
||||
|
||||
|
||||
def test_calibration_error_zero_when_perfectly_calibrated() -> None:
|
||||
"""**Validates: Requirements 5.1, 5.3, 17.1**
|
||||
|
||||
When every bucket's observed win rate exactly matches its average
|
||||
confidence, ECE SHALL be 0.0.
|
||||
|
||||
Constructs a scenario with predictions in multiple buckets where the
|
||||
fraction of True outcomes in each bucket equals the bucket's average
|
||||
confidence.
|
||||
"""
|
||||
# For each bucket midpoint, place predictions so win_rate == avg_confidence.
|
||||
# Use 100 predictions per bucket at the midpoint confidence.
|
||||
# Set exactly round(100 * midpoint) outcomes to True.
|
||||
bucket_midpoints = [0.55, 0.65, 0.75, 0.85, 0.95]
|
||||
n_per_bucket = 100
|
||||
|
||||
confidences: list[float] = []
|
||||
outcomes: list[bool] = []
|
||||
|
||||
for midpoint in bucket_midpoints:
|
||||
n_true = round(n_per_bucket * midpoint)
|
||||
n_false = n_per_bucket - n_true
|
||||
|
||||
confidences.extend([midpoint] * n_per_bucket)
|
||||
outcomes.extend([True] * n_true + [False] * n_false)
|
||||
|
||||
ece, buckets = compute_calibration_error(confidences, outcomes)
|
||||
|
||||
assert ece == 0.0, (
|
||||
f"ECE should be 0.0 for perfectly calibrated predictions, got {ece}. "
|
||||
f"Buckets: {[(b.avg_confidence, b.observed_win_rate, b.prediction_count) for b in buckets]}"
|
||||
)
|
||||
|
||||
# Verify each non-empty bucket has matching avg_confidence and win_rate
|
||||
for bucket in buckets:
|
||||
if bucket.prediction_count > 0:
|
||||
assert bucket.avg_confidence == bucket.observed_win_rate, (
|
||||
f"Bucket [{bucket.bucket_low}, {bucket.bucket_high}) has "
|
||||
f"avg_confidence={bucket.avg_confidence} != "
|
||||
f"observed_win_rate={bucket.observed_win_rate}"
|
||||
)
|
||||
assert not bucket.miscalibrated, (
|
||||
f"Bucket [{bucket.bucket_low}, {bucket.bucket_high}) should not "
|
||||
f"be flagged as miscalibrated when perfectly calibrated"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Strategies for Brier score tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
p_bull_strategy = st.floats(
|
||||
min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
)
|
||||
brier_outcome_strategy = st.booleans()
|
||||
brier_pairs_strategy = st.lists(
|
||||
st.tuples(p_bull_strategy, brier_outcome_strategy),
|
||||
min_size=1,
|
||||
max_size=100,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 2: Brier Score Range and Perfect Prediction
|
||||
# Validates: Requirements 5.4, 17.2
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@given(pairs=brier_pairs_strategy)
|
||||
@settings(max_examples=100)
|
||||
def test_brier_score_range(pairs: list[tuple[float, bool]]) -> None:
|
||||
"""**Validates: Requirements 5.4, 17.2**
|
||||
|
||||
For any list of (p_bull, outcome) pairs where p_bull ∈ [0.0, 1.0] and
|
||||
outcome is boolean, the Brier score SHALL be in [0.0, 1.0].
|
||||
"""
|
||||
p_bulls = [p for p, _ in pairs]
|
||||
outcomes = [o for _, o in pairs]
|
||||
|
||||
brier = compute_brier_score(p_bulls, outcomes)
|
||||
|
||||
assert 0.0 <= brier <= 1.0, (
|
||||
f"Brier score {brier} is outside [0.0, 1.0]. "
|
||||
f"p_bulls={p_bulls}, outcomes={outcomes}"
|
||||
)
|
||||
|
||||
|
||||
@given(n=st.integers(min_value=1, max_value=100))
|
||||
@settings(max_examples=100)
|
||||
def test_brier_score_perfect_prediction(n: int) -> None:
|
||||
"""**Validates: Requirements 5.4, 17.2**
|
||||
|
||||
When all predictions are perfectly correct — p_bull = 1.0 with
|
||||
outcome = True, or p_bull = 0.0 with outcome = False — the Brier
|
||||
score SHALL be 0.0.
|
||||
"""
|
||||
# Case 1: all p_bull = 1.0 and outcome = True
|
||||
p_bulls_all_bull = [1.0] * n
|
||||
outcomes_all_true = [True] * n
|
||||
brier_bull = compute_brier_score(p_bulls_all_bull, outcomes_all_true)
|
||||
assert brier_bull == 0.0, (
|
||||
f"Brier score should be 0.0 for perfect bullish predictions, "
|
||||
f"got {brier_bull} with n={n}"
|
||||
)
|
||||
|
||||
# Case 2: all p_bull = 0.0 and outcome = False
|
||||
p_bulls_all_bear = [0.0] * n
|
||||
outcomes_all_false = [False] * n
|
||||
brier_bear = compute_brier_score(p_bulls_all_bear, outcomes_all_false)
|
||||
assert brier_bear == 0.0, (
|
||||
f"Brier score should be 0.0 for perfect bearish predictions, "
|
||||
f"got {brier_bear} with n={n}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Strategies for Information Coefficient tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ic_score_strategy = st.floats(
|
||||
min_value=-100.0, max_value=100.0, allow_nan=False, allow_infinity=False
|
||||
)
|
||||
|
||||
# Generate lists of at least 30 (score, return) pairs
|
||||
ic_pairs_strategy = st.lists(
|
||||
st.tuples(ic_score_strategy, ic_score_strategy),
|
||||
min_size=30,
|
||||
max_size=100,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 3: Information Coefficient Range and Perfect Correlation
|
||||
# Validates: Requirements 6.1, 6.2, 17.3
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@given(pairs=ic_pairs_strategy)
|
||||
@settings(max_examples=100)
|
||||
def test_information_coefficient_range(pairs: list[tuple[float, float]]) -> None:
|
||||
"""**Validates: Requirements 6.1, 6.2, 17.3**
|
||||
|
||||
For any list of (score, return) pairs with at least 30 elements where
|
||||
scores and returns are finite floats, the Information Coefficient
|
||||
(Pearson correlation) SHALL be in [-1.0, 1.0] or None (when variance
|
||||
is zero).
|
||||
"""
|
||||
scores = [s for s, _ in pairs]
|
||||
returns = [r for _, r in pairs]
|
||||
|
||||
ic = compute_information_coefficient(scores, returns)
|
||||
|
||||
# IC may be None if variance is zero in either list
|
||||
if ic is not None:
|
||||
assert -1.0 <= ic <= 1.0, (
|
||||
f"IC {ic} is outside [-1.0, 1.0]. "
|
||||
f"scores={scores[:5]}..., returns={returns[:5]}..."
|
||||
)
|
||||
|
||||
|
||||
@given(
|
||||
scores=st.lists(
|
||||
st.floats(min_value=-100.0, max_value=100.0, allow_nan=False, allow_infinity=False),
|
||||
min_size=30,
|
||||
max_size=100,
|
||||
).filter(lambda xs: max(xs) - min(xs) > 1e-6),
|
||||
a=st.floats(min_value=0.01, max_value=100.0, allow_nan=False, allow_infinity=False),
|
||||
b=st.floats(min_value=-100.0, max_value=100.0, allow_nan=False, allow_infinity=False),
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_information_coefficient_perfect_positive_correlation(
|
||||
scores: list[float], a: float, b: float
|
||||
) -> None:
|
||||
"""**Validates: Requirements 6.1, 6.2, 17.3**
|
||||
|
||||
When scores and returns are perfectly positively linearly correlated
|
||||
(returns = a * scores + b, a > 0), IC SHALL be 1.0 within
|
||||
floating-point tolerance.
|
||||
"""
|
||||
returns = [a * s + b for s in scores]
|
||||
|
||||
ic = compute_information_coefficient(scores, returns)
|
||||
|
||||
assert ic is not None, (
|
||||
f"IC should not be None for perfectly correlated data with variance. "
|
||||
f"a={a}, b={b}, scores={scores[:5]}..."
|
||||
)
|
||||
assert abs(ic - 1.0) < 1e-6, (
|
||||
f"IC should be 1.0 for perfectly positively correlated data, "
|
||||
f"got {ic}. a={a}, b={b}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Strategies for source reliability tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
from services.validation.calibration import compute_source_reliability
|
||||
|
||||
observed_win_rate_strategy = st.floats(
|
||||
min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
)
|
||||
sample_count_strategy = st.integers(min_value=0, max_value=100_000)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 5: Source Reliability Bayesian Shrinkage Bounds and Convergence
|
||||
# Validates: Requirements 8.1, 8.2, 17.5
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@given(
|
||||
observed_win_rate=observed_win_rate_strategy,
|
||||
sample_count=sample_count_strategy,
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_source_reliability_range(observed_win_rate: float, sample_count: int) -> None:
|
||||
"""**Validates: Requirements 8.1, 8.2, 17.5**
|
||||
|
||||
For any observed_win_rate in [0.0, 1.0] and sample_count >= 0,
|
||||
the source reliability computed via Bayesian shrinkage SHALL be
|
||||
in [0.0, 1.0].
|
||||
"""
|
||||
reliability = compute_source_reliability(observed_win_rate, sample_count)
|
||||
|
||||
assert 0.0 <= reliability <= 1.0, (
|
||||
f"Reliability {reliability} is outside [0.0, 1.0]. "
|
||||
f"observed_win_rate={observed_win_rate}, sample_count={sample_count}"
|
||||
)
|
||||
|
||||
|
||||
def test_source_reliability_zero_samples() -> None:
|
||||
"""**Validates: Requirements 8.1, 8.2, 17.5**
|
||||
|
||||
When sample_count = 0, reliability SHALL be exactly 0.5 (the prior mean).
|
||||
"""
|
||||
reliability = compute_source_reliability(observed_win_rate=0.8, sample_count=0)
|
||||
assert reliability == 0.5, (
|
||||
f"Reliability should be 0.5 when sample_count=0, got {reliability}"
|
||||
)
|
||||
|
||||
# Also verify with different win rates
|
||||
for wr in [0.0, 0.25, 0.5, 0.75, 1.0]:
|
||||
r = compute_source_reliability(observed_win_rate=wr, sample_count=0)
|
||||
assert r == 0.5, (
|
||||
f"Reliability should be 0.5 when sample_count=0 regardless of "
|
||||
f"observed_win_rate={wr}, got {r}"
|
||||
)
|
||||
|
||||
|
||||
@given(
|
||||
observed_win_rate=st.floats(
|
||||
min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_source_reliability_convergence(observed_win_rate: float) -> None:
|
||||
"""**Validates: Requirements 8.1, 8.2, 17.5**
|
||||
|
||||
As sample_count increases toward infinity, reliability SHALL approach
|
||||
the observed_win_rate. For a large sample_count (e.g., 10000),
|
||||
reliability should be within 0.01 of observed_win_rate.
|
||||
"""
|
||||
reliability = compute_source_reliability(observed_win_rate, sample_count=10_000)
|
||||
|
||||
assert abs(reliability - observed_win_rate) < 0.01, (
|
||||
f"Reliability {reliability} should be within 0.01 of "
|
||||
f"observed_win_rate {observed_win_rate} when sample_count=10000. "
|
||||
f"Difference: {abs(reliability - observed_win_rate)}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Strategies for quality gate tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
from services.trading.model_quality_gate import (
|
||||
QualityGateConfig,
|
||||
_evaluate_thresholds,
|
||||
)
|
||||
|
||||
# Snapshot dict strategy: generate each metric value in a reasonable range
|
||||
snapshot_strategy = st.fixed_dictionaries({
|
||||
"prediction_count": st.integers(min_value=0, max_value=10_000),
|
||||
"information_coefficient": st.floats(
|
||||
min_value=-1.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
"win_rate": st.floats(
|
||||
min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
"calibration_error": st.floats(
|
||||
min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
"avg_excess_return_vs_spy": st.floats(
|
||||
min_value=-1.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
})
|
||||
|
||||
# Config strategy: generate each threshold in a reasonable range
|
||||
gate_config_strategy = st.builds(
|
||||
QualityGateConfig,
|
||||
min_prediction_count=st.integers(min_value=0, max_value=10_000),
|
||||
min_ic=st.floats(
|
||||
min_value=-1.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
min_win_rate=st.floats(
|
||||
min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
max_ece=st.floats(
|
||||
min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
min_excess_return_vs_spy=st.floats(
|
||||
min_value=-1.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 6: Quality Gate Determinism and Threshold Monotonicity
|
||||
# Validates: Requirements 11.1, 17.6
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@given(snapshot=snapshot_strategy, config=gate_config_strategy)
|
||||
@settings(max_examples=100)
|
||||
def test_quality_gate_determinism(
|
||||
snapshot: dict, config: QualityGateConfig
|
||||
) -> None:
|
||||
"""**Validates: Requirements 11.1, 17.6**
|
||||
|
||||
For any set of model metric values and quality gate configuration,
|
||||
calling _evaluate_thresholds twice with the same inputs SHALL produce
|
||||
the same pass/fail result for every threshold (determinism).
|
||||
"""
|
||||
results1 = _evaluate_thresholds(snapshot, config)
|
||||
results2 = _evaluate_thresholds(snapshot, config)
|
||||
|
||||
assert len(results1) == len(results2), (
|
||||
f"Different number of threshold results: {len(results1)} vs {len(results2)}"
|
||||
)
|
||||
|
||||
for r1, r2 in zip(results1, results2):
|
||||
assert r1.name == r2.name, (
|
||||
f"Threshold name mismatch: {r1.name!r} vs {r2.name!r}"
|
||||
)
|
||||
assert r1.threshold == r2.threshold, (
|
||||
f"Threshold value mismatch for {r1.name}: "
|
||||
f"{r1.threshold} vs {r2.threshold}"
|
||||
)
|
||||
assert r1.actual == r2.actual, (
|
||||
f"Actual value mismatch for {r1.name}: "
|
||||
f"{r1.actual} vs {r2.actual}"
|
||||
)
|
||||
assert r1.passed == r2.passed, (
|
||||
f"Determinism violated for threshold {r1.name}: "
|
||||
f"first call passed={r1.passed}, second call passed={r2.passed}. "
|
||||
f"actual={r1.actual}, threshold={r1.threshold}"
|
||||
)
|
||||
|
||||
# Overall gate pass/fail should also be deterministic
|
||||
all_passed_1 = all(r.passed for r in results1)
|
||||
all_passed_2 = all(r.passed for r in results2)
|
||||
assert all_passed_1 == all_passed_2, (
|
||||
f"Overall gate determinism violated: "
|
||||
f"first call passed={all_passed_1}, second call passed={all_passed_2}"
|
||||
)
|
||||
|
||||
|
||||
@given(
|
||||
snapshot=snapshot_strategy,
|
||||
config=gate_config_strategy,
|
||||
relax_amount=st.floats(
|
||||
min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False
|
||||
),
|
||||
threshold_to_relax=st.sampled_from([
|
||||
"min_prediction_count",
|
||||
"min_ic",
|
||||
"min_win_rate",
|
||||
"max_ece",
|
||||
"min_excess_return_vs_spy",
|
||||
]),
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_quality_gate_threshold_monotonicity(
|
||||
snapshot: dict,
|
||||
config: QualityGateConfig,
|
||||
relax_amount: float,
|
||||
threshold_to_relax: str,
|
||||
) -> None:
|
||||
"""**Validates: Requirements 11.1, 17.6**
|
||||
|
||||
For any configuration where the gate passes, relaxing any single
|
||||
threshold (decreasing min values or increasing max values to make
|
||||
them easier to satisfy) SHALL NOT cause the gate to fail
|
||||
(monotonicity).
|
||||
"""
|
||||
# Evaluate with original config
|
||||
original_results = _evaluate_thresholds(snapshot, config)
|
||||
original_passed = all(r.passed for r in original_results)
|
||||
|
||||
# Only test monotonicity when the gate originally passes
|
||||
if not original_passed:
|
||||
return
|
||||
|
||||
# Create a relaxed config by making one threshold easier to satisfy
|
||||
from dataclasses import replace
|
||||
|
||||
if threshold_to_relax == "min_prediction_count":
|
||||
# Decrease min → easier to satisfy
|
||||
relaxed_value = max(0, config.min_prediction_count - int(relax_amount * 1000))
|
||||
relaxed_config = replace(config, min_prediction_count=relaxed_value)
|
||||
elif threshold_to_relax == "min_ic":
|
||||
# Decrease min → easier to satisfy
|
||||
relaxed_config = replace(config, min_ic=config.min_ic - relax_amount)
|
||||
elif threshold_to_relax == "min_win_rate":
|
||||
# Decrease min → easier to satisfy
|
||||
relaxed_config = replace(config, min_win_rate=config.min_win_rate - relax_amount)
|
||||
elif threshold_to_relax == "max_ece":
|
||||
# Increase max → easier to satisfy
|
||||
relaxed_config = replace(config, max_ece=config.max_ece + relax_amount)
|
||||
elif threshold_to_relax == "min_excess_return_vs_spy":
|
||||
# Decrease min → easier to satisfy
|
||||
relaxed_config = replace(
|
||||
config,
|
||||
min_excess_return_vs_spy=config.min_excess_return_vs_spy - relax_amount,
|
||||
)
|
||||
else:
|
||||
return # pragma: no cover
|
||||
|
||||
# Evaluate with relaxed config
|
||||
relaxed_results = _evaluate_thresholds(snapshot, config=relaxed_config)
|
||||
relaxed_passed = all(r.passed for r in relaxed_results)
|
||||
|
||||
assert relaxed_passed, (
|
||||
f"Monotonicity violated: gate passed with original config but failed "
|
||||
f"after relaxing {threshold_to_relax}. "
|
||||
f"Original config: min_prediction_count={config.min_prediction_count}, "
|
||||
f"min_ic={config.min_ic}, min_win_rate={config.min_win_rate}, "
|
||||
f"max_ece={config.max_ece}, "
|
||||
f"min_excess_return_vs_spy={config.min_excess_return_vs_spy}. "
|
||||
f"Relaxed threshold: {threshold_to_relax} by {relax_amount}. "
|
||||
f"Failed thresholds: "
|
||||
f"{[(r.name, r.actual, r.threshold) for r in relaxed_results if not r.passed]}"
|
||||
)
|
||||
@@ -0,0 +1,110 @@
|
||||
# Feature: trading-feedback-engine, Property 1: Chunking round-trip and size constraint
|
||||
"""Property-based tests for report data chunking.
|
||||
|
||||
Feature: trading-feedback-engine
|
||||
|
||||
Tests the chunking round-trip and size constraint property from the design
|
||||
specification: for any input string, splitting it into chunks with a maximum
|
||||
size limit produces chunks where (a) every chunk is ≤ the size limit in
|
||||
characters (for chunks that don't contain a single oversized line), (b) no
|
||||
chunk is empty (except when the input itself is empty, which produces exactly
|
||||
one empty chunk), and (c) concatenating all chunks in order reconstructs the
|
||||
original input string.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from hypothesis import given, settings
|
||||
from hypothesis import strategies as st
|
||||
|
||||
from services.reporting.summarizer import chunk_data
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property 1: Chunking Round-Trip and Size Constraint
|
||||
# Validates: Requirements 2.2
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@given(
|
||||
text=st.text(),
|
||||
max_chars=st.integers(min_value=1, max_value=10000),
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_chunk_data_round_trip(text: str, max_chars: int) -> None:
|
||||
"""**Validates: Requirements 2.2**
|
||||
|
||||
For any input string and any max_chars ≥ 1, concatenating all chunks
|
||||
produced by chunk_data SHALL reconstruct the original input string
|
||||
exactly (round-trip property).
|
||||
"""
|
||||
chunks = chunk_data(text, max_chars)
|
||||
reconstructed = "".join(chunks)
|
||||
assert reconstructed == text, (
|
||||
f"Round-trip failed: concatenation of {len(chunks)} chunks does not "
|
||||
f"equal original input.\n"
|
||||
f" original length: {len(text)}\n"
|
||||
f" reconstructed length: {len(reconstructed)}\n"
|
||||
f" max_chars: {max_chars}"
|
||||
)
|
||||
|
||||
|
||||
@given(
|
||||
text=st.text(),
|
||||
max_chars=st.integers(min_value=1, max_value=10000),
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_chunk_data_no_empty_chunks(text: str, max_chars: int) -> None:
|
||||
"""**Validates: Requirements 2.2**
|
||||
|
||||
For any input string and any max_chars ≥ 1, chunk_data SHALL produce
|
||||
no empty chunks — except when the input itself is empty, in which case
|
||||
it SHALL produce exactly one empty chunk.
|
||||
"""
|
||||
chunks = chunk_data(text, max_chars)
|
||||
|
||||
if text == "":
|
||||
assert chunks == [""], (
|
||||
f"Empty input should produce exactly [''], got {chunks!r}"
|
||||
)
|
||||
else:
|
||||
for i, chunk in enumerate(chunks):
|
||||
assert chunk != "", (
|
||||
f"Chunk {i} is empty for non-empty input.\n"
|
||||
f" input length: {len(text)}\n"
|
||||
f" max_chars: {max_chars}\n"
|
||||
f" total chunks: {len(chunks)}"
|
||||
)
|
||||
|
||||
|
||||
@given(
|
||||
text=st.text(),
|
||||
max_chars=st.integers(min_value=1, max_value=10000),
|
||||
)
|
||||
@settings(max_examples=100)
|
||||
def test_chunk_data_size_constraint(text: str, max_chars: int) -> None:
|
||||
"""**Validates: Requirements 2.2**
|
||||
|
||||
For any input string and any max_chars ≥ 1, every chunk produced by
|
||||
chunk_data SHALL be ≤ max_chars in length — UNLESS the chunk contains
|
||||
a single line that by itself exceeds max_chars (since chunk_data never
|
||||
breaks mid-line, such a line is emitted as its own chunk).
|
||||
|
||||
A chunk is considered "oversized due to a single long line" when it
|
||||
consists of exactly one segment (a line with its trailing newline, or
|
||||
the final line without one) whose length exceeds max_chars.
|
||||
"""
|
||||
chunks = chunk_data(text, max_chars)
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
if len(chunk) > max_chars:
|
||||
# This chunk exceeds the limit. It must be because it contains
|
||||
# a single line that is itself longer than max_chars.
|
||||
# A single-segment chunk has at most one newline (at the end).
|
||||
lines_in_chunk = chunk.split("\n")
|
||||
# If the chunk ends with \n, split produces a trailing empty string
|
||||
non_empty_lines = [ln for ln in lines_in_chunk if ln]
|
||||
assert len(non_empty_lines) <= 1, (
|
||||
f"Chunk {i} exceeds max_chars={max_chars} "
|
||||
f"(len={len(chunk)}) but contains multiple non-empty lines, "
|
||||
f"which should not happen.\n"
|
||||
f" lines: {non_empty_lines!r}"
|
||||
)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user