Compare commits
101 Commits
24db0e97f6
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 2f2a7665e7 | |||
| 7eaff3ae58 | |||
| f99fc7fdc1 | |||
| 23834a9333 | |||
| 485dd12024 | |||
| fe8debeb17 | |||
| 5ed9f001b1 | |||
| 18f857e1a3 | |||
| 61bcb5aa57 | |||
| f468e30af0 | |||
| 7e2343ec2c | |||
| 3a8c6f6c80 | |||
| 139fbd6342 | |||
| 63b62c5e9f | |||
| 3acfdfd11f | |||
| ab212db8be | |||
| 7102358f51 | |||
| bc077bfcc8 | |||
| 376fcb4bb4 | |||
| cc21fd9e8f | |||
| affb65d7f4 | |||
| 32535540fe | |||
| 751cce0509 | |||
| 0732894414 | |||
| d9110d03a6 | |||
| 76f6bd5677 | |||
| 32d290bea7 | |||
| 7fcc8a6c07 | |||
| 5d2ffd9163 | |||
| 6169efdc89 | |||
| c42f2223d8 | |||
| 1f08820f11 | |||
| 2f2ea65fb4 | |||
| 9a60ce127b | |||
| 414f476620 | |||
| a5f2bcde55 | |||
| 34ffdad00c | |||
| 601b85764b | |||
| 51b6f3d34a | |||
| bddaf44ffc | |||
| 5209cc522e | |||
| fa18b1a7c2 | |||
| 4b254adad2 | |||
| af2e554edd | |||
| facce5dbb5 | |||
| 2f2c0d24f6 | |||
| 861423c1e3 | |||
| 13f863ef30 | |||
| 2538da3f1e | |||
| fa4ad6b15a | |||
| 5109c85a3e | |||
| 9975c2098b | |||
| e976363259 | |||
| b6e2718007 | |||
| cb3eb230d6 | |||
| 963a5c462c | |||
| 82892b7a3e | |||
| 6f54fd07fa | |||
| 99b7dcee98 | |||
| 4f7358f4e3 | |||
| 0665cef7e3 | |||
| 48eca672a9 | |||
| f159b20c87 | |||
| 97fe2249fe | |||
| 951b733ac3 | |||
| 531e33b0ce | |||
| 24c753f6e6 | |||
| 6880f11c26 | |||
| eead4f1381 | |||
| 007189c0a5 | |||
| f9ee1532dc | |||
| ac29e62033 | |||
| 7eecd71a0d | |||
| bb40a3cb8e | |||
| 4e010bc048 | |||
| 8c3c1aab43 | |||
| cfcfd655e7 | |||
| aaf8cee927 | |||
| f264e924f0 | |||
| a36702e5f3 | |||
| 01d77c153d | |||
| 8d227b62f6 | |||
| b38fb24f14 | |||
| 5c64043892 | |||
| 11c6457559 | |||
| f151747d56 | |||
| 49bff9de50 | |||
| 27b84fcd2e | |||
| 7e8d518946 | |||
| 23f2134754 | |||
| 4954318f7b | |||
| 3b22f5e1fc | |||
| c188677330 | |||
| 58613955e4 | |||
| b1770f37df | |||
| 2e4a9b1e08 | |||
| 416206e37b | |||
| 0a009cdc99 | |||
| 2ab52afc73 | |||
| 1aae36382c | |||
| 98bbec9b8d |
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Minimal MCP server for OpenAI chat completions.
|
||||
* Accepts ANY model string (gpt-5.2, gpt-5.4, etc.) — no hardcoded enum.
|
||||
* Communicates over stdio using JSON-RPC (MCP protocol).
|
||||
*/
|
||||
|
||||
import { createInterface } from "readline";
|
||||
|
||||
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
||||
if (!OPENAI_API_KEY) {
|
||||
process.stderr.write("ERROR: OPENAI_API_KEY environment variable is required\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const SERVER_INFO = {
|
||||
name: "openai-chat",
|
||||
version: "1.0.0",
|
||||
};
|
||||
|
||||
const TOOLS = [
|
||||
{
|
||||
name: "openai_chat",
|
||||
description:
|
||||
"Send messages to OpenAI chat completions API. Supports all OpenAI models including GPT-5.x series.",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
model: {
|
||||
type: "string",
|
||||
description:
|
||||
"OpenAI model name (e.g. gpt-5.2, gpt-5.4, gpt-4o, etc.)",
|
||||
default: "gpt-5.2",
|
||||
},
|
||||
messages: {
|
||||
type: "array",
|
||||
description: "Array of chat messages",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
role: {
|
||||
type: "string",
|
||||
enum: ["system", "user", "assistant"],
|
||||
},
|
||||
content: { type: "string" },
|
||||
},
|
||||
required: ["role", "content"],
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ["messages"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
async function callOpenAI(model, messages) {
|
||||
const resp = await fetch("https://api.openai.com/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${OPENAI_API_KEY}`,
|
||||
},
|
||||
body: JSON.stringify({ model, messages }),
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const errText = await resp.text();
|
||||
throw new Error(`OpenAI API error ${resp.status}: ${errText}`);
|
||||
}
|
||||
|
||||
const data = await resp.json();
|
||||
return data.choices?.[0]?.message?.content ?? "(no response)";
|
||||
}
|
||||
|
||||
function jsonRpcResponse(id, result) {
|
||||
return JSON.stringify({ jsonrpc: "2.0", id, result });
|
||||
}
|
||||
|
||||
function jsonRpcError(id, code, message) {
|
||||
return JSON.stringify({ jsonrpc: "2.0", id, error: { code, message } });
|
||||
}
|
||||
|
||||
async function handleRequest(req) {
|
||||
const { id, method, params } = req;
|
||||
|
||||
switch (method) {
|
||||
case "initialize":
|
||||
return jsonRpcResponse(id, {
|
||||
protocolVersion: "2024-11-05",
|
||||
capabilities: { tools: {} },
|
||||
serverInfo: SERVER_INFO,
|
||||
});
|
||||
|
||||
case "notifications/initialized":
|
||||
return null; // no response needed for notifications
|
||||
|
||||
case "tools/list":
|
||||
return jsonRpcResponse(id, { tools: TOOLS });
|
||||
|
||||
case "tools/call": {
|
||||
const toolName = params?.name;
|
||||
if (toolName !== "openai_chat") {
|
||||
return jsonRpcError(id, -32602, `Unknown tool: ${toolName}`);
|
||||
}
|
||||
const args = params?.arguments ?? {};
|
||||
const model = args.model || "gpt-5.2";
|
||||
const messages = args.messages || [];
|
||||
|
||||
if (!messages.length) {
|
||||
return jsonRpcError(id, -32602, "messages array is required");
|
||||
}
|
||||
|
||||
try {
|
||||
const content = await callOpenAI(model, messages);
|
||||
return jsonRpcResponse(id, {
|
||||
content: [{ type: "text", text: content }],
|
||||
});
|
||||
} catch (err) {
|
||||
return jsonRpcResponse(id, {
|
||||
content: [{ type: "text", text: `Error: ${err.message}` }],
|
||||
isError: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
case "ping":
|
||||
return jsonRpcResponse(id, {});
|
||||
|
||||
default:
|
||||
if (method?.startsWith("notifications/")) return null;
|
||||
return jsonRpcError(id, -32601, `Method not found: ${method}`);
|
||||
}
|
||||
}
|
||||
|
||||
// stdio transport
|
||||
const rl = createInterface({ input: process.stdin });
|
||||
|
||||
rl.on("line", async (line) => {
|
||||
try {
|
||||
const req = JSON.parse(line);
|
||||
const resp = await handleRequest(req);
|
||||
if (resp) {
|
||||
process.stdout.write(resp + "\n");
|
||||
}
|
||||
} catch (err) {
|
||||
process.stderr.write(`Parse error: ${err.message}\n`);
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1 @@
|
||||
{"specId": "d76705a8-fb91-4fce-b59e-c4b3b0dbbd83", "workflowType": "requirements-first", "specType": "feature"}
|
||||
@@ -0,0 +1,723 @@
|
||||
# Design Document — Dual-Pipeline Signal Engine
|
||||
|
||||
## Overview
|
||||
|
||||
The dual-pipeline signal engine is a new service at `services/signal_engine/` that runs as an independent Kubernetes deployment alongside the existing aggregation → recommendation pipeline. It implements a concurrent dual-pipeline architecture where both a heuristic (deterministic scoring) and probabilistic (Bayesian inference) pipeline evaluate the same normalized inputs per ticker per evaluation tick, producing independent BUY/WATCH/SKIP verdicts. A delta analyzer compares the two verdicts, and an output formatter assembles a structured `SignalOutput` contract published to the existing `trading_decisions` Redis queue.
|
||||
|
||||
The engine introduces several new components — Input Normalizer, Signal Library (Fibonacci, MA Stack, RSI, Cup & Handle, Elliott Wave), Multi-Timeframe Engine, Hard Filter Engine, Exit Engine, Delta Analyzer, and Output Formatter — while reusing existing infrastructure: `compute_signal_weight`, `compute_bayesian_posterior`, `classify_regime`, `WeightedSignal`, `BayesianPosterior`, and `RegimeClassification` from `services/aggregation/`.
|
||||
|
||||
The service is toggled via `dual_pipeline_enabled` in the `risk_configs` table (default: false, fail-safe). When disabled, the existing pipeline operates unchanged. When enabled, the signal engine runs alongside the existing pipeline with support for shadow mode (dual-pipeline output persisted but not forwarded to trading).
|
||||
|
||||
### Design Rationale
|
||||
|
||||
- **Separate service, not inline extension**: The signal engine has a fundamentally different evaluation cadence (multi-timeframe technical signals) and data flow (OHLCV bars, not document intelligence). Embedding it in the aggregation worker would couple two distinct concerns.
|
||||
- **Reuse existing math**: The Bayesian posterior, regime classification, and signal weighting functions are battle-tested. The probabilistic pipeline wraps them with regime-based priors and likelihood ratio accumulation rather than reimplementing.
|
||||
- **Concurrent pipelines via asyncio.gather**: Both pipelines share the same `NormalizedInput` reference and run concurrently. If one fails, the other completes normally with the failed pipeline producing a SKIP verdict.
|
||||
- **Signal clustering for correlation penalty**: The Bayesian pipeline groups signals into four clusters (momentum, structure, volatility, fundamentals) and applies exponential decay within each cluster to prevent likelihood ratio stacking inflation from correlated signals.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### High-Level Flow
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
A[Evaluation Tick<br/>Redis queue: signal_engine] --> B[Input Normalizer]
|
||||
B --> C[Hard Filter Engine]
|
||||
C -->|filtered out| D[SKIP verdict for both pipelines]
|
||||
C -->|passed| E[Signal Library]
|
||||
E --> F[Multi-Timeframe Engine]
|
||||
F --> G{asyncio.gather}
|
||||
G --> H[Heuristic Pipeline]
|
||||
G --> I[Probabilistic Pipeline]
|
||||
H --> J[Delta Analyzer]
|
||||
I --> J
|
||||
J --> K[Output Formatter]
|
||||
K --> L[SignalOutput]
|
||||
L --> M[Redis: trading_decisions queue]
|
||||
L --> N[PostgreSQL: signal_engine_outputs]
|
||||
|
||||
subgraph Exit Path
|
||||
B --> O[Exit Engine]
|
||||
O --> K
|
||||
end
|
||||
```
|
||||
|
||||
### Trigger Mechanism
|
||||
|
||||
The signal engine polls a new Redis queue `stonks:queue:signal_engine`. Evaluation ticks are enqueued by the scheduler service after aggregation completes for a ticker. The queue message contains `{"ticker": "AAPL", "triggered_at": "2024-01-15T10:00:00Z"}`.
|
||||
|
||||
### Integration Points
|
||||
|
||||
| Component | Integration | Direction |
|
||||
|---|---|---|
|
||||
| Scheduler | Enqueues ticks to `signal_engine` queue | Scheduler → Signal Engine |
|
||||
| Market data tables | OHLCV bars, closing prices, returns | Signal Engine reads |
|
||||
| `macro_impact_records` | Macro bias computation | Signal Engine reads |
|
||||
| `trend_windows` | Fundamental/valuation context | Signal Engine reads |
|
||||
| `risk_configs` | Feature flags, thresholds | Signal Engine reads |
|
||||
| `classify_regime()` | Regime classification for priors | Signal Engine calls |
|
||||
| `compute_signal_weight()` | Heuristic signal weighting | Signal Engine calls |
|
||||
| `compute_bayesian_posterior()` | Bayesian accumulation | Signal Engine calls |
|
||||
| Redis `trading_decisions` | SignalOutput publication | Signal Engine → Trading Engine |
|
||||
| `signal_engine_outputs` table | Persistence for audit | Signal Engine writes |
|
||||
| Redis rolling agreement | Delta analyzer metrics | Signal Engine writes |
|
||||
|
||||
---
|
||||
|
||||
## Components and Interfaces
|
||||
|
||||
### Module Structure
|
||||
|
||||
```
|
||||
services/signal_engine/
|
||||
├── __init__.py
|
||||
├── main.py # Entry point: asyncio event loop, queue polling
|
||||
├── worker.py # Top-level orchestrator per evaluation tick
|
||||
├── config.py # SignalEngineConfig, loaded from risk_configs + env
|
||||
├── models.py # All Pydantic models (NormalizedInput, SignalResult, etc.)
|
||||
├── normalizer.py # Input Normalizer — fetches and assembles NormalizedInput
|
||||
├── signals/
|
||||
│ ├── __init__.py
|
||||
│ ├── base.py # SignalEvaluator protocol, SignalResult model
|
||||
│ ├── fibonacci.py # Fibonacci retracement evaluator
|
||||
│ ├── ma_stack.py # Moving average stack evaluator
|
||||
│ ├── rsi.py # RSI evaluator
|
||||
│ ├── cup_handle.py # Cup & Handle pattern detector
|
||||
│ └── elliott_wave.py # Elliott Wave detector
|
||||
├── confluence.py # Multi-Timeframe Confluence Engine
|
||||
├── hard_filter.py # Hard Filter Engine
|
||||
├── heuristic.py # Heuristic Pipeline (Pipeline A)
|
||||
├── probabilistic.py # Probabilistic Pipeline (Pipeline B)
|
||||
├── correlation.py # Signal cluster classification + correlation penalty
|
||||
├── exit_engine.py # Exit Engine — position-level exit management
|
||||
├── delta.py # Delta Analyzer
|
||||
├── formatter.py # Output Formatter
|
||||
└── persistence.py # Database persistence for signal_engine_outputs
|
||||
```
|
||||
|
||||
### Key Function Signatures
|
||||
|
||||
#### `main.py` — Entry Point
|
||||
|
||||
```python
|
||||
async def main() -> None:
|
||||
"""Start the signal engine worker loop.
|
||||
|
||||
Connects to PostgreSQL and Redis, loads config from risk_configs,
|
||||
and polls the signal_engine queue indefinitely.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `worker.py` — Orchestrator
|
||||
|
||||
```python
|
||||
async def evaluate_tick(
|
||||
pool: asyncpg.Pool,
|
||||
redis: redis.asyncio.Redis,
|
||||
ticker: str,
|
||||
config: SignalEngineConfig,
|
||||
) -> SignalOutput | None:
|
||||
"""Run a full evaluation tick for a single ticker.
|
||||
|
||||
1. Normalize inputs
|
||||
2. Evaluate exit conditions for open positions
|
||||
3. Run hard filters
|
||||
4. Evaluate signals across timeframes
|
||||
5. Run both pipelines concurrently
|
||||
6. Compute delta analysis
|
||||
7. Format and publish output
|
||||
|
||||
Returns None if the ticker is hard-filtered or both pipelines fail.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `normalizer.py` — Input Normalizer
|
||||
|
||||
```python
|
||||
async def normalize_input(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
config: SignalEngineConfig,
|
||||
) -> NormalizedInput:
|
||||
"""Fetch and assemble all data needed for a single evaluation tick.
|
||||
|
||||
Sources:
|
||||
- OHLCV bars from market_data_bars (M30, H1, H4, D, W, M)
|
||||
- Fundamental metrics from trend_windows + companies
|
||||
- Macro context from macro_impact_records + global_events
|
||||
- Open position state from the trading engine's portfolio
|
||||
|
||||
Missing data sources produce sentinel values (None/empty list)
|
||||
with a logged warning.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `signals/base.py` — Signal Evaluator Protocol
|
||||
|
||||
```python
|
||||
from typing import Protocol
|
||||
|
||||
class SignalEvaluator(Protocol):
|
||||
"""Protocol for all signal evaluators in the Signal Library."""
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
bars: list[OHLCVBar],
|
||||
timeframe: str,
|
||||
) -> SignalResult | None:
|
||||
"""Evaluate a signal on a single timeframe's bar data.
|
||||
|
||||
Returns None when insufficient data is available.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### `confluence.py` — Multi-Timeframe Engine
|
||||
|
||||
```python
|
||||
def compute_confluence(
|
||||
signal_results: dict[str, dict[str, SignalResult]],
|
||||
weights: dict[str, float],
|
||||
) -> list[ConfluenceSignal]:
|
||||
"""Compute weighted confluence scores across timeframes.
|
||||
|
||||
Args:
|
||||
signal_results: {signal_type: {timeframe: SignalResult}}
|
||||
weights: {timeframe: weight} e.g. {"M30": 0.03, "D": 0.30, ...}
|
||||
|
||||
Returns:
|
||||
List of ConfluenceSignal objects that pass the minimum
|
||||
confluence threshold (≥2 timeframes, ≥1 of D/W/M).
|
||||
"""
|
||||
```
|
||||
|
||||
#### `hard_filter.py` — Hard Filter Engine
|
||||
|
||||
```python
|
||||
def evaluate_hard_filters(
|
||||
normalized: NormalizedInput,
|
||||
config: HardFilterConfig,
|
||||
) -> HardFilterResult:
|
||||
"""Evaluate pre-pipeline hard filters.
|
||||
|
||||
Checks:
|
||||
- macro_bias == -1.0 → SKIP
|
||||
- valuation_score < threshold → SKIP
|
||||
- earnings_proximity_days <= threshold → SKIP
|
||||
|
||||
Returns HardFilterResult with filtered=True/False and all triggered reasons.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `heuristic.py` — Heuristic Pipeline
|
||||
|
||||
```python
|
||||
def run_heuristic_pipeline(
|
||||
normalized: NormalizedInput,
|
||||
confluence_signals: list[ConfluenceSignal],
|
||||
config: HeuristicConfig,
|
||||
) -> HeuristicResult:
|
||||
"""Run the deterministic heuristic pipeline.
|
||||
|
||||
Computes S_total = S_company + S_macro + S_competitive using
|
||||
existing compute_signal_weight() and weighted sentiment averaging.
|
||||
Produces BUY/WATCH/SKIP verdict based on confidence and score thresholds.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `probabilistic.py` — Probabilistic Pipeline
|
||||
|
||||
```python
|
||||
def run_probabilistic_pipeline(
|
||||
normalized: NormalizedInput,
|
||||
confluence_signals: list[ConfluenceSignal],
|
||||
regime: RegimeClassification,
|
||||
config: ProbabilisticConfig,
|
||||
) -> ProbabilisticResult:
|
||||
"""Run the Bayesian probabilistic pipeline.
|
||||
|
||||
1. Initialize regime-based prior (bull=0.58, range=0.50, bear=0.42)
|
||||
2. Compute likelihood ratios per signal with correlation penalty
|
||||
3. Accumulate via log-odds: logit(P_post) = logit(P_prior) + Σ log(LR_i)
|
||||
4. Apply entropy gating
|
||||
5. Compute EV_R = P_up · E[win_R] - (1 - P_up) · 1.0
|
||||
6. Produce BUY/WATCH/SKIP verdict
|
||||
"""
|
||||
```
|
||||
|
||||
#### `correlation.py` — Signal Correlation Penalty
|
||||
|
||||
```python
|
||||
class SignalCluster(str, Enum):
|
||||
MOMENTUM = "momentum" # MA stack, RSI
|
||||
STRUCTURE = "structure" # Fibonacci, Elliott Wave
|
||||
VOLATILITY = "volatility" # ATR-based, Bollinger-derived
|
||||
FUNDAMENTALS = "fundamentals" # valuation, earnings, macro
|
||||
|
||||
def classify_signal(signal_type: str) -> SignalCluster:
|
||||
"""Map a signal type to its correlation cluster."""
|
||||
|
||||
def apply_correlation_penalty(
|
||||
likelihood_ratios: list[LikelihoodRatio],
|
||||
) -> list[LikelihoodRatio]:
|
||||
"""Apply within-cluster decay penalty to correlated signals.
|
||||
|
||||
Within each cluster, signals are ranked by LR magnitude.
|
||||
The strongest contributes at full weight; subsequent signals
|
||||
contribute at 0.5^(n-1) decay.
|
||||
|
||||
Cross-cluster signals are independent (no penalty).
|
||||
"""
|
||||
```
|
||||
|
||||
#### `exit_engine.py` — Exit Engine
|
||||
|
||||
```python
|
||||
def evaluate_exits(
|
||||
positions: list[OpenPositionState],
|
||||
current_prices: dict[str, float],
|
||||
config: ExitConfig,
|
||||
) -> list[ExitSignal]:
|
||||
"""Evaluate exit conditions for all open positions.
|
||||
|
||||
Checks: stop_loss hit, target_1 hit (EXIT_HALF), target_2 hit (EXIT_FULL),
|
||||
trailing stop hit (EXIT_FULL for remaining).
|
||||
|
||||
Trailing stop activates after EXIT_HALF and ratchets upward only.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `delta.py` — Delta Analyzer
|
||||
|
||||
```python
|
||||
async def analyze_delta(
|
||||
heuristic: HeuristicResult,
|
||||
probabilistic: ProbabilisticResult,
|
||||
redis: redis.asyncio.Redis,
|
||||
ticker: str,
|
||||
) -> DeltaResult:
|
||||
"""Compare pipeline verdicts and track agreement metrics.
|
||||
|
||||
Computes agreement flag, confidence delta, disagreement reasons.
|
||||
Updates rolling 100-evaluation agreement rate in Redis.
|
||||
Logs warning when agreement rate drops below 0.50.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `formatter.py` — Output Formatter
|
||||
|
||||
```python
|
||||
def format_output(
|
||||
ticker: str,
|
||||
price: float,
|
||||
heuristic: HeuristicResult,
|
||||
probabilistic: ProbabilisticResult,
|
||||
delta: DeltaResult,
|
||||
exit_signals: list[ExitSignal],
|
||||
config: SignalEngineConfig,
|
||||
) -> SignalOutput:
|
||||
"""Assemble the structured SignalOutput contract.
|
||||
|
||||
Populates trade_plan based on verdict combination:
|
||||
- Both BUY → dual_confirmed, full position sizing
|
||||
- Probabilistic-only BUY → probabilistic_only, 50% position sizing
|
||||
- Heuristic-only BUY → standard position sizing
|
||||
- No BUY → no trade_plan (WATCH/SKIP persisted for analysis)
|
||||
"""
|
||||
|
||||
def signal_output_to_recommendation(output: SignalOutput) -> Recommendation:
|
||||
"""Map a SignalOutput to the existing Recommendation schema.
|
||||
|
||||
Enables the trading engine to consume dual-pipeline outputs
|
||||
without modification to its core evaluate_recommendation logic.
|
||||
"""
|
||||
```
|
||||
|
||||
#### `persistence.py` — Database Persistence
|
||||
|
||||
```python
|
||||
async def persist_signal_output(
|
||||
pool: asyncpg.Pool,
|
||||
output: SignalOutput,
|
||||
) -> None:
|
||||
"""Persist a SignalOutput to the signal_engine_outputs table.
|
||||
|
||||
Logs and continues on database errors (persistence failure
|
||||
does not block signal emission to the trading queue).
|
||||
"""
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Data Models
|
||||
|
||||
All new data models are Pydantic `BaseModel` subclasses defined in `services/signal_engine/models.py`. Existing models (`WeightedSignal`, `BayesianPosterior`, `RegimeClassification`, `TrendSummary`, `Recommendation`, `PositionSizing`) are imported from `services/aggregation/` and `services/shared/schemas.py`.
|
||||
|
||||
### OHLCVBar
|
||||
|
||||
```python
|
||||
class OHLCVBar(BaseModel):
|
||||
"""Single OHLCV bar for a timeframe."""
|
||||
timestamp: datetime
|
||||
open: float
|
||||
high: float
|
||||
low: float
|
||||
close: float
|
||||
volume: float
|
||||
```
|
||||
|
||||
### NormalizedInput
|
||||
|
||||
```python
|
||||
class NormalizedInput(BaseModel):
|
||||
"""Unified input structure consumed by both pipelines."""
|
||||
ticker: str
|
||||
evaluated_at: datetime
|
||||
|
||||
# Multi-timeframe OHLCV bars
|
||||
bars: dict[str, list[OHLCVBar]] # {"M30": [...], "H1": [...], ...}
|
||||
|
||||
# Fundamental metrics
|
||||
valuation_score: float | None = None # [0.0, 1.0]
|
||||
earnings_proximity_days: int | None = None
|
||||
|
||||
# Macro context
|
||||
macro_bias: float = 0.0 # [-1.0, 1.0]
|
||||
|
||||
# Open position state (for exit engine)
|
||||
open_positions: list[OpenPositionState] = Field(default_factory=list)
|
||||
|
||||
# Market data for regime classification
|
||||
closing_prices: list[float] = Field(default_factory=list)
|
||||
returns: list[float] = Field(default_factory=list)
|
||||
|
||||
# Current price (latest close from shortest available timeframe)
|
||||
current_price: float | None = None
|
||||
```
|
||||
|
||||
### OpenPositionState
|
||||
|
||||
```python
|
||||
class OpenPositionState(BaseModel):
|
||||
"""Snapshot of an open position for exit evaluation."""
|
||||
position_id: str
|
||||
ticker: str
|
||||
entry_price: float
|
||||
current_price: float
|
||||
stop_loss: float
|
||||
target_1: float
|
||||
target_2: float
|
||||
trailing_stop: float | None = None
|
||||
partial_exit_done: bool = False
|
||||
atr: float | None = None
|
||||
```
|
||||
|
||||
### SignalResult
|
||||
|
||||
```python
|
||||
class SignalDirection(str, Enum):
|
||||
BULLISH = "bullish"
|
||||
BEARISH = "bearish"
|
||||
NEUTRAL = "neutral"
|
||||
|
||||
class SignalResult(BaseModel):
|
||||
"""Output from a single signal evaluator on a single timeframe."""
|
||||
signal_type: str # e.g. "fibonacci", "ma_stack", "rsi"
|
||||
timeframe: str # e.g. "D", "H4"
|
||||
strength: float = Field(ge=0.0, le=1.0)
|
||||
direction: SignalDirection
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
metadata: dict = Field(default_factory=dict) # signal-specific details
|
||||
```
|
||||
|
||||
### ConfluenceSignal
|
||||
|
||||
```python
|
||||
class ConfluenceSignal(BaseModel):
|
||||
"""A signal that passed multi-timeframe confluence filtering."""
|
||||
signal_type: str
|
||||
direction: SignalDirection
|
||||
confluence_score: float # weighted sum across timeframes
|
||||
active_timeframes: list[str] # which timeframes triggered
|
||||
per_timeframe: dict[str, float] # {timeframe: strength}
|
||||
```
|
||||
|
||||
### Verdict
|
||||
|
||||
```python
|
||||
class Verdict(str, Enum):
|
||||
BUY = "BUY"
|
||||
WATCH = "WATCH"
|
||||
SKIP = "SKIP"
|
||||
```
|
||||
|
||||
### HeuristicResult
|
||||
|
||||
```python
|
||||
class HeuristicResult(BaseModel):
|
||||
"""Output from the heuristic (deterministic) pipeline."""
|
||||
verdict: Verdict
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
s_total: float
|
||||
s_company: float
|
||||
s_macro: float
|
||||
s_competitive: float
|
||||
signal_weights: list[dict] = Field(default_factory=list)
|
||||
reasoning: list[str] = Field(default_factory=list)
|
||||
```
|
||||
|
||||
### LikelihoodRatio
|
||||
|
||||
```python
|
||||
class LikelihoodRatio(BaseModel):
|
||||
"""A single signal's likelihood ratio for Bayesian updating."""
|
||||
signal_type: str
|
||||
cluster: str # SignalCluster value
|
||||
lr: float # P(sig|up) / P(sig|down)
|
||||
log_lr: float # log(lr)
|
||||
penalized_log_lr: float # after correlation penalty
|
||||
hit_rate: float
|
||||
strength: float
|
||||
```
|
||||
|
||||
### ProbabilisticResult
|
||||
|
||||
```python
|
||||
class ProbabilisticResult(BaseModel):
|
||||
"""Output from the probabilistic (Bayesian) pipeline."""
|
||||
verdict: Verdict
|
||||
p_up: float = Field(ge=0.0, le=1.0)
|
||||
entropy: float = Field(ge=0.0, le=1.0)
|
||||
ev_r: float
|
||||
prior: float
|
||||
posterior: float
|
||||
likelihood_ratios: list[LikelihoodRatio] = Field(default_factory=list)
|
||||
regime: str
|
||||
reasoning: list[str] = Field(default_factory=list)
|
||||
```
|
||||
|
||||
### DeltaResult
|
||||
|
||||
```python
|
||||
class DeltaResult(BaseModel):
|
||||
"""Output from the delta analyzer comparing both pipelines."""
|
||||
agreement: bool
|
||||
confidence_delta: float
|
||||
heuristic_verdict: str
|
||||
probabilistic_verdict: str
|
||||
disagreement_reasons: list[str] = Field(default_factory=list)
|
||||
rolling_agreement_rate: float | None = None
|
||||
```
|
||||
|
||||
### ExitSignal
|
||||
|
||||
```python
|
||||
class ExitType(str, Enum):
|
||||
EXIT_HALF = "EXIT_HALF"
|
||||
EXIT_FULL = "EXIT_FULL"
|
||||
|
||||
class ExitSignal(BaseModel):
|
||||
"""An exit signal for an open position."""
|
||||
position_id: str
|
||||
ticker: str
|
||||
exit_type: ExitType
|
||||
reason: str # "stop_hit", "target_1_hit", "target_2_hit", "trailing_stop_hit"
|
||||
price: float
|
||||
```
|
||||
|
||||
### TradePlan
|
||||
|
||||
```python
|
||||
class TradePlan(BaseModel):
|
||||
"""Optional trade plan attached to a BUY signal."""
|
||||
entry_price: float
|
||||
stop_loss: float
|
||||
target_1: float
|
||||
target_2: float
|
||||
position_size_pct: float = Field(ge=0.0, le=1.0)
|
||||
max_loss_pct: float = Field(ge=0.0, le=1.0)
|
||||
dual_confirmed: bool = False
|
||||
probabilistic_only: bool = False
|
||||
```
|
||||
|
||||
### SignalOutput
|
||||
|
||||
```python
|
||||
class SignalOutput(BaseModel):
|
||||
"""The structured output contract consumed by the trading engine and audit systems."""
|
||||
output_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
ticker: str
|
||||
timestamp: datetime
|
||||
price: float
|
||||
|
||||
# Heuristic pipeline results
|
||||
heuristic_verdict: str
|
||||
heuristic_confidence: float
|
||||
heuristic_s_total: float
|
||||
|
||||
# Probabilistic pipeline results
|
||||
probabilistic_verdict: str
|
||||
probabilistic_p_up: float
|
||||
probabilistic_entropy: float
|
||||
probabilistic_ev_r: float
|
||||
|
||||
# Delta analysis
|
||||
delta_agreement: bool
|
||||
delta_confidence_delta: float
|
||||
delta_reasons: list[str] = Field(default_factory=list)
|
||||
|
||||
# Optional trade plan (populated when at least one pipeline says BUY)
|
||||
trade_plan: TradePlan | None = None
|
||||
|
||||
# Exit signals for open positions
|
||||
exit_signals: list[ExitSignal] = Field(default_factory=list)
|
||||
|
||||
# Full pipeline results for audit (stored as JSONB)
|
||||
heuristic_detail: dict = Field(default_factory=dict)
|
||||
probabilistic_detail: dict = Field(default_factory=dict)
|
||||
|
||||
# Pipeline mode metadata
|
||||
pipeline_mode: str = "dual_pipeline"
|
||||
shadow_mode: bool = False
|
||||
```
|
||||
|
||||
### SignalEngineConfig
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class SignalEngineConfig:
|
||||
"""Configuration loaded from risk_configs + environment."""
|
||||
dual_pipeline_enabled: bool = False
|
||||
heuristic_pipeline_enabled: bool = True
|
||||
probabilistic_pipeline_enabled: bool = True
|
||||
shadow_mode: bool = False
|
||||
|
||||
# Timeframe weights
|
||||
timeframe_weights: dict[str, float] = field(default_factory=lambda: {
|
||||
"M30": 0.03, "H1": 0.07, "H4": 0.15,
|
||||
"D": 0.30, "W": 0.30, "M": 0.15,
|
||||
})
|
||||
|
||||
# Hard filter thresholds
|
||||
hard_filter_valuation_min: float = 0.3
|
||||
hard_filter_earnings_days: int = 5
|
||||
hard_filter_macro_bias_skip: float = -1.0
|
||||
|
||||
# Heuristic verdict thresholds
|
||||
heuristic_buy_confidence: float = 0.70
|
||||
heuristic_buy_s_total: float = 1.2
|
||||
heuristic_buy_valuation_min: float = 0.5
|
||||
heuristic_watch_confidence: float = 0.55
|
||||
|
||||
# Probabilistic verdict thresholds
|
||||
prob_buy_p_up: float = 0.60
|
||||
prob_buy_entropy_max: float = 0.90
|
||||
prob_buy_ev_r_min: float = 1.5
|
||||
prob_buy_valuation_min: float = 0.5
|
||||
prob_watch_p_up: float = 0.55
|
||||
prob_watch_entropy_max: float = 0.95
|
||||
prob_entropy_skip: float = 0.95
|
||||
|
||||
# Regime priors
|
||||
regime_prior_bull: float = 0.58
|
||||
regime_prior_range: float = 0.50
|
||||
regime_prior_bear: float = 0.42
|
||||
|
||||
# Exit engine
|
||||
trailing_stop_atr_multiplier: float = 2.0
|
||||
|
||||
# Polling
|
||||
polling_interval_seconds: int = 30
|
||||
```
|
||||
|
||||
### HardFilterConfig / HeuristicConfig / ProbabilisticConfig / ExitConfig
|
||||
|
||||
These are derived from `SignalEngineConfig` fields for cleaner function signatures — simple `@dataclass` wrappers over the relevant subset of config values.
|
||||
|
||||
---
|
||||
|
||||
### Database Migration (039)
|
||||
|
||||
```sql
|
||||
-- Migration 039: Signal Engine Outputs
|
||||
-- Creates the signal_engine_outputs table for persisting dual-pipeline evaluations.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS signal_engine_outputs (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
ticker TEXT NOT NULL,
|
||||
evaluated_at TIMESTAMPTZ NOT NULL,
|
||||
price NUMERIC NOT NULL,
|
||||
|
||||
-- Heuristic pipeline
|
||||
heuristic_verdict TEXT NOT NULL,
|
||||
heuristic_confidence NUMERIC NOT NULL,
|
||||
heuristic_s_total NUMERIC NOT NULL,
|
||||
|
||||
-- Probabilistic pipeline
|
||||
probabilistic_verdict TEXT NOT NULL,
|
||||
probabilistic_p_up NUMERIC NOT NULL,
|
||||
probabilistic_entropy NUMERIC NOT NULL,
|
||||
probabilistic_ev_r NUMERIC NOT NULL,
|
||||
|
||||
-- Delta analysis
|
||||
delta_agreement BOOLEAN NOT NULL,
|
||||
delta_confidence_delta NUMERIC NOT NULL,
|
||||
delta_reasons JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Trade plan (null when no BUY verdict)
|
||||
trade_plan JSONB,
|
||||
|
||||
-- Full output for audit
|
||||
full_output JSONB NOT NULL,
|
||||
|
||||
-- Exit signals
|
||||
exit_signals JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Metadata
|
||||
pipeline_mode TEXT NOT NULL DEFAULT 'dual_pipeline',
|
||||
shadow_mode BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for per-ticker time-range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_ticker_time
|
||||
ON signal_engine_outputs (ticker, evaluated_at);
|
||||
|
||||
-- Index for global time-range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_evaluated
|
||||
ON signal_engine_outputs (evaluated_at);
|
||||
|
||||
-- Index for filtering by verdict
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_verdicts
|
||||
ON signal_engine_outputs (heuristic_verdict, probabilistic_verdict);
|
||||
```
|
||||
|
||||
### Helm / Deployment Configuration
|
||||
|
||||
Add to `values.yaml` under `services:`:
|
||||
|
||||
```yaml
|
||||
signalEngine:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: signal-engine
|
||||
command: "python -m services.signal_engine.main"
|
||||
tier: processing
|
||||
secrets: [stonks-core-secrets, stonks-market-secrets]
|
||||
resources:
|
||||
requests: { cpu: 100m, memory: 128Mi }
|
||||
limits: { cpu: 500m, memory: 256Mi }
|
||||
```
|
||||
|
||||
Add to `redis_keys.py`:
|
||||
|
||||
```python
|
||||
QUEUE_SIGNAL_ENGINE = "signal_engine"
|
||||
```
|
||||
|
||||
The service uses the existing `stonks-config` ConfigMap and `stonks-core-secrets` for database/Redis credentials. No new ingress or network policy is needed — the signal engine is a queue-polling worker with no HTTP interface.
|
||||
|
||||
---
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
# Requirements Document — Dual-Pipeline Signal Engine
|
||||
|
||||
## Introduction
|
||||
|
||||
The Stonks Oracle platform currently operates a single aggregation pipeline that can run in either heuristic or probabilistic mode (toggled via `probabilistic_scoring_enabled`). This feature replaces the single-pipeline toggle with a dual-pipeline architecture where both pipelines run concurrently per evaluation tick, produce independent verdicts (BUY/WATCH/SKIP), and emit a structured output contract for downstream consumers (trading engine, delta analysis, dashboards).
|
||||
|
||||
The dual-pipeline engine introduces:
|
||||
- **Pipeline A (Heuristic)**: Deterministic scoring using the existing `S_total = S_company + S_macro + S_competitive` formula with signal weighting, producing a confidence-gated verdict.
|
||||
- **Pipeline B (Probabilistic)**: Bayesian inference using the existing `bayesian.py` infrastructure with regime-based priors, likelihood ratios, entropy gating, and expected value calculation.
|
||||
- **Hard Filter Engine**: Pre-pipeline filters that short-circuit both pipelines before evaluation.
|
||||
- **Multi-Timeframe Engine**: Signal evaluation across M30, H1, H4, D, W, M timeframes with weighted confluence scoring.
|
||||
- **Exit Engine**: Position-level exit management (stop hit, targets, trailing ATR-based).
|
||||
- **Delta Analyzer**: Compares heuristic vs probabilistic verdicts to generate training signals for future model tuning.
|
||||
- **Output Formatter**: Structured `SignalOutput` contract consumed by the trading engine and delta analysis.
|
||||
|
||||
The design must address the signal independence assumption in the Bayesian pipeline — correlated signals (MA+RSI, Fib+Elliott) require correlation penalty or signal clustering into categories (momentum, structure, volatility, fundamentals) to prevent likelihood ratio stacking inflation.
|
||||
|
||||
## Glossary
|
||||
|
||||
- **Signal_Engine**: The top-level orchestrator in `services/signal_engine/` that coordinates input normalization, hard filters, both pipelines, delta analysis, and output formatting per evaluation tick.
|
||||
- **Heuristic_Pipeline**: Pipeline A — deterministic scoring that computes `S_total = S_company + S_macro + S_competitive` with signal weighting and produces a confidence-gated BUY/WATCH/SKIP verdict.
|
||||
- **Probabilistic_Pipeline**: Pipeline B — Bayesian inference pipeline that computes posterior probability via log-likelihood accumulation with regime-based priors, entropy gating, and expected value calculation.
|
||||
- **Input_Normalizer**: The component that ingests multi-timeframe OHLCV data, fundamentals, macro context, and open positions into a unified `NormalizedInput` structure consumed by both pipelines.
|
||||
- **Signal_Library**: The collection of technical signal evaluators (Fibonacci retracement, MA stack, RSI, Cup & Handle, Elliott Wave) that produce scored signals per timeframe.
|
||||
- **Multi_Timeframe_Engine**: The component that evaluates signals across six timeframes (M30, H1, H4, D, W, M) and computes weighted confluence scores.
|
||||
- **Hard_Filter_Engine**: The pre-pipeline filter stage that evaluates macro bias, valuation score, and earnings proximity to short-circuit evaluation before either pipeline runs.
|
||||
- **Exit_Engine**: The position management component that evaluates stop hits, take-profit targets, and trailing ATR-based stops for open positions.
|
||||
- **Delta_Analyzer**: The component that compares heuristic and probabilistic verdicts, tracks agreement rates, measures confidence deltas, and records disagreement reasons as training signals.
|
||||
- **Output_Formatter**: The component that assembles the structured `SignalOutput` contract from both pipeline results, delta analysis, and optional trade plan.
|
||||
- **SignalOutput**: The structured output contract containing ticker, timestamp, price, heuristic verdict/confidence/S_total, probabilistic verdict/P_up/entropy/EV_R, delta analysis, and optional trade plan.
|
||||
- **Verdict**: A pipeline decision of BUY, WATCH, or SKIP with associated confidence and reasoning.
|
||||
- **Confluence**: The condition where a signal triggers across multiple timeframes; requires activation on at least 2 timeframes including at least one of D, W, or M.
|
||||
- **Entropy_Gate**: Shannon entropy threshold used in the probabilistic pipeline to detect high-uncertainty states and force SKIP verdicts.
|
||||
- **EV_R**: Expected value per unit of risk, computed as `P_up · E[win_R] - (1 - P_up) · 1.0`, used as a quality gate in the probabilistic pipeline.
|
||||
- **Signal_Cluster**: A grouping of correlated signals (momentum, structure, volatility, fundamentals) used to prevent likelihood ratio stacking inflation in the Bayesian pipeline.
|
||||
- **Likelihood_Ratio**: The ratio `P(signal|up) / P(signal|down)` used in Bayesian updating, where `P(sig|up) = h·s + (1-h)·(1-s)·0.5`.
|
||||
- **Regime_Prior**: The initial probability assigned based on market regime classification: bull=0.58, range=0.50, bear=0.42.
|
||||
- **OHLCV**: Open, High, Low, Close, Volume — standard market data bar format.
|
||||
- **ATR**: Average True Range — a volatility measure used for trailing stop calculations.
|
||||
- **Fibonacci_Retracement**: A technical analysis tool computing price levels as `L(r) = SH - r·(SH - SL)` where SH is swing high, SL is swing low, and r is a retracement ratio (0.236, 0.382, 0.5, 0.618, 0.786).
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
### Requirement 1: Input Normalization
|
||||
|
||||
**User Story:** As a signal engine operator, I want all market data, fundamentals, macro context, and open positions normalized into a single input structure, so that both pipelines consume identical inputs per evaluation tick.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN an evaluation tick is triggered for a ticker, THE Input_Normalizer SHALL construct a `NormalizedInput` containing multi-timeframe OHLCV bars (M30, H1, H4, D, W, M), fundamental metrics (valuation_score, earnings_proximity_days), macro context (macro_bias as float in [-1.0, 1.0]), and open position state (entry_price, current_price, stop_loss, targets).
|
||||
2. THE Input_Normalizer SHALL source OHLCV data from the existing market data tables, fundamental metrics from the existing company and trend data, and macro context from the existing `macro_impact_records` and `global_events` tables.
|
||||
3. IF any required data source is unavailable or returns an error, THEN THE Input_Normalizer SHALL populate the corresponding field with a sentinel value (`None` for optional fields, empty list for OHLCV bars) and log a warning identifying the missing source.
|
||||
4. THE Input_Normalizer SHALL validate that all OHLCV bars have monotonically increasing timestamps within each timeframe series.
|
||||
5. THE Input_Normalizer SHALL produce identical `NormalizedInput` instances for both pipelines within the same evaluation tick (shared reference, no independent fetches).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 2: Signal Library — Technical Signal Evaluation
|
||||
|
||||
**User Story:** As a quantitative analyst, I want a library of technical signal evaluators that produce scored signals per timeframe, so that both pipelines can consume standardized signal assessments.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Signal_Library SHALL implement Fibonacci retracement signal evaluation using the formula `L(r) = SH - r·(SH - SL)` for retracement ratios [0.236, 0.382, 0.5, 0.618, 0.786], where SH is the swing high and SL is the swing low within the evaluation window.
|
||||
2. THE Signal_Library SHALL implement moving average stack evaluation that detects bullish alignment (MA_10 > MA_20 > MA_50 > MA_200) and bearish alignment (MA_10 < MA_20 < MA_50 < MA_200), producing a signal strength proportional to the degree of alignment.
|
||||
3. THE Signal_Library SHALL implement RSI evaluation using the standard 14-period RSI formula, producing overbought signals (RSI > 70) and oversold signals (RSI < 30) with strength scaled by distance from the threshold.
|
||||
4. THE Signal_Library SHALL implement Cup & Handle pattern detection that identifies the cup formation (U-shaped price recovery) and handle (small consolidation), producing a signal with confidence proportional to pattern completeness.
|
||||
5. THE Signal_Library SHALL implement Elliott Wave detection that identifies impulse waves (5-wave structure) and corrective waves (3-wave structure), producing a signal with the current wave position and projected direction.
|
||||
6. WHEN a signal evaluator receives insufficient data for its calculation (fewer bars than the required lookback period), THE Signal_Library SHALL return a null signal with a reason code indicating insufficient data rather than producing a partial evaluation.
|
||||
7. FOR ALL signal evaluators, THE Signal_Library SHALL produce output conforming to a common `SignalResult` structure containing: signal_type, timeframe, strength (float in [0.0, 1.0]), direction (bullish/bearish/neutral), confidence (float in [0.0, 1.0]), and metadata specific to the signal type.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 3: Multi-Timeframe Confluence Engine
|
||||
|
||||
**User Story:** As a quantitative analyst, I want signals evaluated across multiple timeframes with weighted confluence scoring, so that the engine prioritizes signals confirmed across longer timeframes.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Multi_Timeframe_Engine SHALL evaluate each signal type across six timeframes with the following weights: M30=0.03, H1=0.07, H4=0.15, D=0.30, W=0.30, M=0.15.
|
||||
2. THE Multi_Timeframe_Engine SHALL compute a weighted confluence score as `C_confluence = Σ(w_tf · s_tf)` where `w_tf` is the timeframe weight and `s_tf` is the signal strength on that timeframe (0.0 if the signal did not trigger).
|
||||
3. WHEN a signal triggers on fewer than 2 timeframes, THE Multi_Timeframe_Engine SHALL discard the signal from further pipeline processing (minimum confluence threshold).
|
||||
4. WHEN a signal triggers on 2 or more timeframes but none of D, W, or M are included, THE Multi_Timeframe_Engine SHALL discard the signal from further pipeline processing (higher-timeframe anchor requirement).
|
||||
5. THE Multi_Timeframe_Engine SHALL pass the confluence-filtered signals and their weighted scores to both the Heuristic_Pipeline and Probabilistic_Pipeline.
|
||||
6. FOR ALL signal sets where a signal triggers on more timeframes with higher weights, THE Multi_Timeframe_Engine SHALL produce a higher confluence score (monotonicity with respect to timeframe activation count and weight).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 4: Hard Filter Engine — Pre-Pipeline Gating
|
||||
|
||||
**User Story:** As a risk manager, I want hard filters that short-circuit both pipelines before evaluation, so that clearly unfavorable conditions produce immediate SKIP verdicts without wasting computation.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN the macro_bias value from the NormalizedInput equals -1.0, THE Hard_Filter_Engine SHALL produce an immediate SKIP verdict for both pipelines with reason "macro_bias_negative".
|
||||
2. WHEN the valuation_score from the NormalizedInput is below 0.3, THE Hard_Filter_Engine SHALL produce an immediate SKIP verdict for both pipelines with reason "valuation_below_threshold".
|
||||
3. WHEN the earnings_proximity_days from the NormalizedInput is 5 or fewer, THE Hard_Filter_Engine SHALL produce an immediate SKIP verdict for both pipelines with reason "earnings_block".
|
||||
4. WHEN multiple hard filters trigger simultaneously, THE Hard_Filter_Engine SHALL record all triggered filter reasons in the SKIP verdict (not just the first).
|
||||
5. WHEN no hard filters trigger, THE Hard_Filter_Engine SHALL pass the NormalizedInput through to both pipelines without modification.
|
||||
6. THE Hard_Filter_Engine SHALL execute before either pipeline begins evaluation, and both pipelines SHALL receive the same filter decision.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 5: Heuristic Pipeline — Deterministic Scoring and Verdict
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the heuristic pipeline to produce a deterministic BUY/WATCH/SKIP verdict based on composite scoring of company, macro, and competitive signals, so that the system maintains a transparent, auditable scoring path.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Heuristic_Pipeline SHALL compute a total score `S_total = S_company + S_macro + S_competitive` using the existing three-layer signal aggregation with the current `WeightedSignal` abstraction.
|
||||
2. THE Heuristic_Pipeline SHALL compute signal weights using the formula `W_signal = gate · recency · credibility · (1 + novelty) · market_context` consistent with the existing `compute_signal_weight` function in `scoring.py`.
|
||||
3. THE Heuristic_Pipeline SHALL compute a confidence value from the existing trend confidence formula incorporating source count, extraction confidence, signal agreement, and contradiction penalty.
|
||||
4. THE Heuristic_Pipeline SHALL produce a BUY verdict WHEN confidence >= 0.70 AND S_total >= 1.2 AND valuation_score >= 0.5 AND macro_bias > 0 AND earnings_proximity_days > 5.
|
||||
5. THE Heuristic_Pipeline SHALL produce a WATCH verdict WHEN confidence >= 0.55 AND the BUY conditions are not fully met.
|
||||
6. THE Heuristic_Pipeline SHALL produce a SKIP verdict WHEN confidence < 0.55.
|
||||
7. THE Heuristic_Pipeline SHALL emit a `HeuristicResult` containing: verdict (BUY/WATCH/SKIP), confidence (float), S_total (float), S_company (float), S_macro (float), S_competitive (float), signal_weights (list), and reasoning (list of strings explaining the verdict).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 6: Probabilistic Pipeline — Bayesian Inference and Verdict
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the probabilistic pipeline to produce a Bayesian BUY/WATCH/SKIP verdict using regime-based priors, likelihood ratios, entropy gating, and expected value calculation, so that the system captures uncertainty structure and risk-adjusted expected outcomes.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Probabilistic_Pipeline SHALL initialize the prior probability based on the current market regime classification: bull regime → P_prior = 0.58, range regime → P_prior = 0.50, bear regime → P_prior = 0.42.
|
||||
2. THE Probabilistic_Pipeline SHALL compute likelihood ratios for each signal using `P(sig|up) = h·s + (1-h)·(1-s)·0.5` and `LR = P(sig|up) / P(sig|down)`, where h is the signal's historical hit rate and s is the signal strength.
|
||||
3. THE Probabilistic_Pipeline SHALL update the posterior using log-odds accumulation: `logit(P_post) = logit(P_prior) + Σ log(LR_i)`, converting back to probability via the sigmoid function.
|
||||
4. THE Probabilistic_Pipeline SHALL compute Shannon entropy `H = -P_up·log₂(P_up) - (1-P_up)·log₂(1-P_up)` and apply entropy gating: WHEN H > 0.95, THE Probabilistic_Pipeline SHALL force a SKIP verdict with reason "high_entropy".
|
||||
5. THE Probabilistic_Pipeline SHALL compute expected value per unit risk as `EV_R = P_up · E[win_R] - (1 - P_up) · 1.0` where `E[win_R]` is the expected win in risk units derived from signal strength and historical reward-risk ratios.
|
||||
6. THE Probabilistic_Pipeline SHALL produce a BUY verdict WHEN P_up >= 0.60 AND entropy <= 0.90 AND EV_R >= 1.5 AND macro_bias > 0 AND valuation_score >= 0.5.
|
||||
7. THE Probabilistic_Pipeline SHALL produce a WATCH verdict WHEN P_up >= 0.55 AND entropy <= 0.95 AND the BUY conditions are not fully met.
|
||||
8. THE Probabilistic_Pipeline SHALL produce a SKIP verdict in all other cases.
|
||||
9. THE Probabilistic_Pipeline SHALL emit a `ProbabilisticResult` containing: verdict (BUY/WATCH/SKIP), P_up (float), entropy (float), EV_R (float), prior (float), posterior (float), likelihood_ratios (list), regime (string), and reasoning (list of strings).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 7: Signal Correlation Penalty — Preventing LR Stacking Inflation
|
||||
|
||||
**User Story:** As a quantitative analyst, I want correlated signals grouped into clusters with a correlation penalty applied to prevent likelihood ratio stacking inflation, so that the Bayesian pipeline does not overstate confidence from redundant signals.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Probabilistic_Pipeline SHALL classify each signal into one of four clusters: momentum (MA stack, RSI), structure (Fibonacci retracement, Elliott Wave), volatility (ATR-based signals, Bollinger-derived), and fundamentals (valuation, earnings, macro).
|
||||
2. WHEN multiple signals within the same cluster produce likelihood ratios in the same direction, THE Probabilistic_Pipeline SHALL apply a within-cluster penalty: only the strongest LR in the cluster contributes at full weight, and subsequent LRs in the same cluster contribute at a decay factor of 0.5^(n-1) where n is the signal's rank within the cluster by LR magnitude.
|
||||
3. THE Probabilistic_Pipeline SHALL apply no penalty across different clusters (signals from different clusters are treated as independent).
|
||||
4. WHEN a cluster contains only one signal, THE Probabilistic_Pipeline SHALL apply no penalty to that signal.
|
||||
5. FOR ALL signal sets, THE Probabilistic_Pipeline SHALL produce a posterior probability that is less than or equal to the posterior computed without the correlation penalty (the penalty only reduces confidence, never inflates it).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 8: Exit Engine — Position Management
|
||||
|
||||
**User Story:** As a trader, I want the signal engine to evaluate exit conditions for open positions, so that stop hits, take-profit targets, and trailing stops are managed as part of the signal evaluation cycle.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN the current price of an open position hits or crosses below the stop_loss level, THE Exit_Engine SHALL emit an EXIT_FULL signal for that position with reason "stop_hit".
|
||||
2. WHEN the current price of an open position hits or crosses above the first take-profit target (target_1), THE Exit_Engine SHALL emit an EXIT_HALF signal for that position with reason "target_1_hit".
|
||||
3. WHEN the current price of an open position hits or crosses above the second take-profit target (target_2), THE Exit_Engine SHALL emit an EXIT_FULL signal for that position with reason "target_2_hit".
|
||||
4. WHEN a partial exit has been executed (EXIT_HALF), THE Exit_Engine SHALL activate a trailing stop at `current_price - ATR · trailing_multiplier` and update the trailing stop upward as the price advances (the trailing stop moves up but does not move down).
|
||||
5. WHEN the trailing stop is active and the current price crosses below the trailing stop level, THE Exit_Engine SHALL emit an EXIT_FULL signal for the remaining position with reason "trailing_stop_hit".
|
||||
6. THE Exit_Engine SHALL evaluate exit conditions before the signal pipelines run for new entry signals, so that exit signals take priority over new entry signals for the same ticker.
|
||||
7. THE Exit_Engine SHALL emit exit signals as part of the `SignalOutput` contract with the position identifier, exit type (EXIT_HALF/EXIT_FULL), and reason.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 9: Delta Analyzer — Pipeline Agreement Tracking
|
||||
|
||||
**User Story:** As a model developer, I want the delta analyzer to compare heuristic and probabilistic verdicts and record disagreement details, so that I can generate training signals for future model tuning.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN both pipelines produce verdicts for the same ticker and tick, THE Delta_Analyzer SHALL compute an agreement flag (true if both verdicts are identical, false otherwise).
|
||||
2. THE Delta_Analyzer SHALL compute a confidence delta as `|heuristic_confidence - probabilistic_P_up|` representing the magnitude of disagreement between the two pipelines.
|
||||
3. WHEN the pipelines disagree on verdict, THE Delta_Analyzer SHALL record the disagreement reason by identifying which conditions differed (e.g., "heuristic_confidence_below_threshold", "probabilistic_entropy_too_high", "EV_R_below_threshold").
|
||||
4. THE Delta_Analyzer SHALL track a rolling agreement rate over the last 100 evaluations per ticker, stored in Redis for dashboard consumption.
|
||||
5. THE Delta_Analyzer SHALL emit a `DeltaResult` containing: agreement (bool), confidence_delta (float), heuristic_verdict (string), probabilistic_verdict (string), disagreement_reasons (list of strings), and rolling_agreement_rate (float).
|
||||
6. WHEN the rolling agreement rate drops below 0.50 for a ticker, THE Delta_Analyzer SHALL log a warning indicating persistent pipeline disagreement for operator review.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 10: Output Formatter — Structured SignalOutput Contract
|
||||
|
||||
**User Story:** As a downstream system consumer, I want the signal engine to emit a structured `SignalOutput` contract, so that the trading engine, delta analysis dashboard, and audit systems can consume a consistent output format.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Output_Formatter SHALL produce a `SignalOutput` containing: ticker (string), timestamp (datetime), price (float), heuristic section (verdict, confidence, S_total), probabilistic section (verdict, P_up, entropy, EV_R), delta section (agreement, confidence_delta, disagreement_reasons), and optional trade_plan section.
|
||||
2. WHEN the heuristic pipeline produces a BUY verdict, THE Output_Formatter SHALL populate the trade_plan section with entry_price, stop_loss, target_1, target_2, and position_size derived from the heuristic confidence and existing position sizing logic.
|
||||
3. WHEN the probabilistic pipeline produces a BUY verdict but the heuristic pipeline does not, THE Output_Formatter SHALL populate the trade_plan section with a "probabilistic_only" flag and reduced position sizing (50% of standard).
|
||||
4. WHEN both pipelines produce a BUY verdict, THE Output_Formatter SHALL populate the trade_plan section with full position sizing and a "dual_confirmed" flag.
|
||||
5. THE Output_Formatter SHALL serialize the `SignalOutput` as a Pydantic model with JSON serialization support for Redis queue publishing and database persistence.
|
||||
6. FOR ALL valid pipeline results, THE Output_Formatter SHALL produce a `SignalOutput` that round-trips through JSON serialization and deserialization without data loss (parse(format(output)) produces an equivalent object).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 11: Dual Pipeline Orchestration
|
||||
|
||||
**User Story:** As a signal engine operator, I want both pipelines to run concurrently per evaluation tick sharing the same inputs, so that the system produces independent verdicts without redundant data fetching.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN an evaluation tick is triggered, THE Signal_Engine SHALL execute the Input_Normalizer once, then pass the resulting `NormalizedInput` to the Hard_Filter_Engine, then (if not filtered) execute both the Heuristic_Pipeline and Probabilistic_Pipeline concurrently using `asyncio.gather`.
|
||||
2. THE Signal_Engine SHALL enforce that both pipelines receive identical `NormalizedInput` references (no independent data fetches that could produce different snapshots).
|
||||
3. WHEN either pipeline raises an exception during evaluation, THE Signal_Engine SHALL catch the exception, log the error with full traceback, and produce a SKIP verdict for the failed pipeline with reason "pipeline_error" while allowing the other pipeline to complete normally.
|
||||
4. THE Signal_Engine SHALL measure and log the wall-clock execution time of each pipeline per tick for performance monitoring.
|
||||
5. THE Signal_Engine SHALL publish the assembled `SignalOutput` to the existing Redis queue (`stonks:queue:trading_decisions`) for consumption by the trading engine.
|
||||
6. THE Signal_Engine SHALL persist each `SignalOutput` to a database table for historical analysis and audit.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 12: Integration with Existing Trading Engine
|
||||
|
||||
**User Story:** As a platform operator, I want the dual-pipeline signal engine to integrate with the existing trading engine, so that the trading engine can consume `SignalOutput` verdicts and make execution decisions.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Signal_Engine SHALL publish `SignalOutput` to the existing `stonks:queue:trading_decisions` Redis queue in a format compatible with the existing `TradingEngine.evaluate_recommendation` interface.
|
||||
2. THE Signal_Engine SHALL map the `SignalOutput` trade_plan to the existing `Recommendation` schema fields (action, confidence, position_sizing) so that the trading engine can process dual-pipeline outputs without modification to its core evaluation logic.
|
||||
3. WHEN the `SignalOutput` has a "dual_confirmed" flag, THE Signal_Engine SHALL set the recommendation confidence to the maximum of heuristic_confidence and probabilistic_P_up.
|
||||
4. WHEN the `SignalOutput` has a "probabilistic_only" flag, THE Signal_Engine SHALL set the recommendation confidence to `probabilistic_P_up · 0.8` (20% confidence haircut for single-pipeline confirmation).
|
||||
5. WHEN neither pipeline produces a BUY verdict, THE Signal_Engine SHALL not publish a trading recommendation to the queue (WATCH and SKIP verdicts are persisted for analysis but not forwarded to the trading engine).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 13: Configuration and Feature Flags
|
||||
|
||||
**User Story:** As a platform operator, I want the dual-pipeline engine configurable via the existing `risk_configs` table and environment variables, so that I can tune thresholds, enable/disable individual pipelines, and adjust timeframe weights without code changes.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Signal_Engine SHALL support a `dual_pipeline_enabled` feature flag in `risk_configs` that toggles the entire dual-pipeline engine on or off, defaulting to false for safe rollout.
|
||||
2. THE Signal_Engine SHALL support independent enable/disable flags for each pipeline: `heuristic_pipeline_enabled` and `probabilistic_pipeline_enabled`, both defaulting to true when the dual-pipeline engine is enabled.
|
||||
3. THE Signal_Engine SHALL support configurable timeframe weights via a `timeframe_weights` JSON object in `risk_configs`, defaulting to `{"M30": 0.03, "H1": 0.07, "H4": 0.15, "D": 0.30, "W": 0.30, "M": 0.15}`.
|
||||
4. THE Signal_Engine SHALL support configurable hard filter thresholds: `hard_filter_valuation_min` (default 0.3), `hard_filter_earnings_days` (default 5), and `hard_filter_macro_bias_skip` (default -1.0).
|
||||
5. THE Signal_Engine SHALL support configurable verdict thresholds for both pipelines via `risk_configs` JSON, including heuristic confidence thresholds (BUY: 0.70, WATCH: 0.55) and probabilistic thresholds (P_up: 0.60, entropy: 0.90, EV_R: 1.5).
|
||||
6. IF the `dual_pipeline_enabled` flag fails to read from the database, THEN THE Signal_Engine SHALL default to disabled (fail-safe behavior) and log a warning.
|
||||
7. THE Signal_Engine SHALL log the active configuration at startup and on each configuration change for auditability.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 14: Regime-Based Prior Engine
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the probabilistic pipeline's prior probability to adapt based on the current market regime, so that the Bayesian inference starts from a regime-appropriate baseline rather than a fixed 0.50.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Probabilistic_Pipeline SHALL use the existing `classify_regime` function from `services/aggregation/regime.py` to determine the current market regime for each ticker.
|
||||
2. THE Probabilistic_Pipeline SHALL map regime classifications to prior probabilities: trend_following with positive trend_indicator → 0.58 (bull), trend_following with negative trend_indicator → 0.42 (bear), mean_reversion → 0.50 (range), panic → 0.42 (bear), uncertainty → 0.50 (range).
|
||||
3. THE Probabilistic_Pipeline SHALL convert the regime prior to log-odds before accumulating likelihood ratios: `logit(P_prior) = log(P_prior / (1 - P_prior))`.
|
||||
4. WHEN market data is insufficient for regime classification (fewer than 100 days of price history), THE Probabilistic_Pipeline SHALL use the uncertainty prior of 0.50.
|
||||
5. THE Probabilistic_Pipeline SHALL record the regime classification and prior probability in the `ProbabilisticResult` for auditability.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 15: Database Schema for Signal Engine Output
|
||||
|
||||
**User Story:** As a platform operator, I want signal engine outputs persisted to a dedicated database table, so that historical evaluations are available for analysis, backtesting, and audit.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Signal_Engine SHALL persist each `SignalOutput` to a `signal_engine_outputs` table with columns for: id (UUID primary key), ticker (text), evaluated_at (timestamptz), price (numeric), heuristic_verdict (text), heuristic_confidence (numeric), heuristic_s_total (numeric), probabilistic_verdict (text), probabilistic_p_up (numeric), probabilistic_entropy (numeric), probabilistic_ev_r (numeric), delta_agreement (boolean), delta_confidence_delta (numeric), delta_reasons (JSONB), trade_plan (JSONB), full_output (JSONB), created_at (timestamptz).
|
||||
2. THE Signal_Engine SHALL create an index on `(ticker, evaluated_at)` for efficient time-range queries per ticker.
|
||||
3. THE Signal_Engine SHALL create an index on `evaluated_at` for efficient global time-range queries.
|
||||
4. WHEN persisting fails due to a database error, THE Signal_Engine SHALL log the error and continue processing (persistence failure does not block signal emission to the trading queue).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 16: Backward Compatibility and Migration Path
|
||||
|
||||
**User Story:** As a platform operator, I want the dual-pipeline engine to coexist with the existing single-pipeline aggregation, so that the rollout is incremental and reversible.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN `dual_pipeline_enabled` is false, THE Signal_Engine SHALL not run, and the existing aggregation pipeline SHALL continue to operate unchanged.
|
||||
2. WHEN `dual_pipeline_enabled` is true, THE Signal_Engine SHALL run alongside the existing aggregation pipeline, with the trading engine consuming `SignalOutput` from the dual-pipeline engine instead of `Recommendation` from the existing recommendation worker.
|
||||
3. THE Signal_Engine SHALL reuse the existing `WeightedSignal`, `BayesianPosterior`, `RegimeClassification`, and `TrendSummary` data structures from `services/aggregation/` rather than duplicating them.
|
||||
4. THE Signal_Engine SHALL reuse the existing `compute_signal_weight`, `compute_bayesian_posterior`, and `classify_regime` functions rather than reimplementing the underlying math.
|
||||
5. THE Signal_Engine SHALL add the new `signal_engine_outputs` table via a new database migration without modifying existing tables.
|
||||
6. THE Signal_Engine SHALL support running in "shadow mode" where both the existing pipeline and the dual-pipeline engine run, but only the existing pipeline's output is forwarded to the trading engine (dual-pipeline output is persisted for comparison only).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 17: Property-Based Testing for Dual-Pipeline Correctness
|
||||
|
||||
**User Story:** As a developer, I want comprehensive property-based tests validating the mathematical correctness and structural invariants of the dual-pipeline engine, so that edge cases and numerical stability issues are caught before deployment.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE test suite SHALL include property-based tests for the Fibonacci retracement formula verifying that `L(r) = SH - r·(SH - SL)` produces values in [SL, SH] for all r in [0, 1] and all SH > SL > 0.
|
||||
2. THE test suite SHALL include property-based tests for the Bayesian log-odds update verifying that `logit(P_post) = logit(P_prior) + Σ log(LR_i)` round-trips correctly: converting P_prior to logit, adding log-LRs, and converting back via sigmoid produces a valid probability in (0, 1).
|
||||
3. THE test suite SHALL include property-based tests for the entropy gate verifying that Shannon entropy is maximized at P_up = 0.5 and equals 0.0 at P_up = 0.0 or P_up = 1.0, and is symmetric around 0.5.
|
||||
4. THE test suite SHALL include property-based tests for the signal correlation penalty verifying that the penalized posterior is always less than or equal to the unpenalized posterior for any signal set with correlated signals.
|
||||
5. THE test suite SHALL include property-based tests for the multi-timeframe confluence score verifying monotonicity: activating a signal on an additional timeframe with non-zero weight always increases or maintains the confluence score.
|
||||
6. THE test suite SHALL include property-based tests for the `SignalOutput` contract verifying round-trip serialization: `SignalOutput.model_validate_json(output.model_dump_json())` produces an equivalent object for all valid outputs.
|
||||
7. THE test suite SHALL include property-based tests for the hard filter engine verifying that macro_bias = -1.0 always produces SKIP, valuation_score < 0.3 always produces SKIP, and earnings_proximity_days <= 5 always produces SKIP, regardless of all other input values.
|
||||
8. THE test suite SHALL include property-based tests for the EV_R calculation verifying that `EV_R = P_up · E[win_R] - (1 - P_up) · 1.0` is monotonically increasing with P_up for fixed E[win_R] > 0.
|
||||
@@ -0,0 +1,345 @@
|
||||
# Implementation Plan: Dual-Pipeline Signal Engine
|
||||
|
||||
## Overview
|
||||
|
||||
Implement the dual-pipeline signal engine as a new service at `services/signal_engine/` that runs as an independent Kubernetes deployment. The engine evaluates both a heuristic (deterministic scoring) and probabilistic (Bayesian inference) pipeline concurrently per ticker per evaluation tick, producing independent BUY/WATCH/SKIP verdicts. Implementation proceeds incrementally: infrastructure first, then core models, signal library, pipelines, orchestration, integration, and deployment.
|
||||
|
||||
## Tasks
|
||||
|
||||
- [x] 1. Project scaffolding, configuration, and data models
|
||||
- [x] 1.1 Create service directory structure and `__init__.py` files
|
||||
- Create `services/signal_engine/` with all subdirectories per the design module structure
|
||||
- Create `services/signal_engine/__init__.py`, `services/signal_engine/signals/__init__.py`
|
||||
- _Requirements: 11.1, 13.1_
|
||||
|
||||
- [x] 1.2 Implement `models.py` — all Pydantic data models
|
||||
- Define `OHLCVBar`, `NormalizedInput`, `OpenPositionState`, `SignalResult`, `SignalDirection`
|
||||
- Define `ConfluenceSignal`, `Verdict`, `HeuristicResult`, `LikelihoodRatio`, `ProbabilisticResult`
|
||||
- Define `DeltaResult`, `ExitSignal`, `ExitType`, `TradePlan`, `SignalOutput`
|
||||
- All models must use Pydantic `BaseModel` with proper field constraints (`ge`, `le`)
|
||||
- _Requirements: 1.1, 2.7, 5.7, 6.9, 9.5, 10.1, 10.5_
|
||||
|
||||
- [x] 1.3 Implement `config.py` — `SignalEngineConfig` and sub-configs
|
||||
- Define `SignalEngineConfig` dataclass with all fields from the design
|
||||
- Define `HardFilterConfig`, `HeuristicConfig`, `ProbabilisticConfig`, `ExitConfig` as derived sub-configs
|
||||
- Implement `load_config()` that reads from `risk_configs` table + environment variables
|
||||
- Default `dual_pipeline_enabled` to `False` (fail-safe)
|
||||
- _Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7_
|
||||
|
||||
- [x] 1.4 Add `QUEUE_SIGNAL_ENGINE` to `services/shared/redis_keys.py`
|
||||
- Add `QUEUE_SIGNAL_ENGINE = "signal_engine"` constant
|
||||
- _Requirements: 11.1_
|
||||
|
||||
- [x] 1.5 Write property test for `SignalOutput` round-trip serialization
|
||||
- **Requirement 17.6: SignalOutput round-trip serialization**
|
||||
- Generate arbitrary valid `SignalOutput` instances with Hypothesis
|
||||
- Verify `SignalOutput.model_validate_json(output.model_dump_json())` produces equivalent object
|
||||
- File: `tests/test_pbt_signal_engine_models.py`
|
||||
- _Requirements: 10.5, 17.6_
|
||||
|
||||
- [x] 2. Input Normalizer and Hard Filter Engine
|
||||
- [x] 2.1 Implement `normalizer.py` — Input Normalizer
|
||||
- Implement `normalize_input(pool, ticker, config) -> NormalizedInput`
|
||||
- Fetch OHLCV bars from `market_data_bars` for M30, H1, H4, D, W, M timeframes
|
||||
- Fetch fundamental metrics (valuation_score, earnings_proximity_days) from company/trend data
|
||||
- Fetch macro context (macro_bias) from `macro_impact_records` and `global_events`
|
||||
- Fetch open position state from trading engine portfolio tables
|
||||
- Populate sentinel values (`None`, empty list) for unavailable data with logged warnings
|
||||
- Validate monotonically increasing timestamps within each timeframe series
|
||||
- _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5_
|
||||
|
||||
- [x] 2.2 Implement `hard_filter.py` — Hard Filter Engine
|
||||
- Implement `evaluate_hard_filters(normalized, config) -> HardFilterResult`
|
||||
- Check `macro_bias == -1.0` → SKIP with reason "macro_bias_negative"
|
||||
- Check `valuation_score < 0.3` → SKIP with reason "valuation_below_threshold"
|
||||
- Check `earnings_proximity_days <= 5` → SKIP with reason "earnings_block"
|
||||
- Record all triggered filter reasons (not just first)
|
||||
- Return `HardFilterResult` with `filtered: bool` and `reasons: list[str]`
|
||||
- _Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6_
|
||||
|
||||
- [x] 2.3 Write property tests for hard filter engine
|
||||
- **Requirement 17.7: Hard filter determinism**
|
||||
- Generate arbitrary `NormalizedInput` with `macro_bias = -1.0` → always SKIP
|
||||
- Generate arbitrary `NormalizedInput` with `valuation_score < 0.3` → always SKIP
|
||||
- Generate arbitrary `NormalizedInput` with `earnings_proximity_days <= 5` → always SKIP
|
||||
- Verify these hold regardless of all other input values
|
||||
- File: `tests/test_pbt_signal_engine_hard_filter.py`
|
||||
- _Requirements: 4.1, 4.2, 4.3, 17.7_
|
||||
|
||||
- [x] 3. Checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 4. Signal Library — Technical Signal Evaluators
|
||||
- [x] 4.1 Implement `signals/base.py` — SignalEvaluator protocol
|
||||
- Define `SignalEvaluator` protocol with `evaluate(bars, timeframe) -> SignalResult | None`
|
||||
- Define common helper functions for swing high/low detection, lookback validation
|
||||
- _Requirements: 2.6, 2.7_
|
||||
|
||||
- [x] 4.2 Implement `signals/fibonacci.py` — Fibonacci retracement evaluator
|
||||
- Implement `L(r) = SH - r·(SH - SL)` for ratios [0.236, 0.382, 0.5, 0.618, 0.786]
|
||||
- Detect swing high and swing low within the evaluation window
|
||||
- Produce signal strength based on proximity of current price to retracement levels
|
||||
- Return `None` with reason code when insufficient data
|
||||
- _Requirements: 2.1, 2.6, 2.7_
|
||||
|
||||
- [x] 4.3 Write property test for Fibonacci retracement formula
|
||||
- **Requirement 17.1: Fibonacci retracement bounds**
|
||||
- For all `r` in [0, 1] and all `SH > SL > 0`, verify `L(r)` is in [SL, SH]
|
||||
- File: `tests/test_pbt_signal_engine_fibonacci.py`
|
||||
- _Requirements: 2.1, 17.1_
|
||||
|
||||
- [x] 4.4 Implement `signals/ma_stack.py` — Moving average stack evaluator
|
||||
- Detect bullish alignment (MA_10 > MA_20 > MA_50 > MA_200)
|
||||
- Detect bearish alignment (MA_10 < MA_20 < MA_50 < MA_200)
|
||||
- Produce signal strength proportional to degree of alignment
|
||||
- Return `None` when insufficient bars for MA_200 calculation
|
||||
- _Requirements: 2.2, 2.6, 2.7_
|
||||
|
||||
- [x] 4.5 Implement `signals/rsi.py` — RSI evaluator
|
||||
- Implement standard 14-period RSI formula
|
||||
- Produce overbought signals (RSI > 70) and oversold signals (RSI < 30)
|
||||
- Scale strength by distance from threshold
|
||||
- Return `None` when fewer than 14 bars available
|
||||
- _Requirements: 2.3, 2.6, 2.7_
|
||||
|
||||
- [x] 4.6 Implement `signals/cup_handle.py` — Cup & Handle pattern detector
|
||||
- Identify cup formation (U-shaped price recovery) and handle (small consolidation)
|
||||
- Produce signal with confidence proportional to pattern completeness
|
||||
- Return `None` when insufficient data or no pattern detected
|
||||
- _Requirements: 2.4, 2.6, 2.7_
|
||||
|
||||
- [x] 4.7 Implement `signals/elliott_wave.py` — Elliott Wave detector
|
||||
- Identify impulse waves (5-wave structure) and corrective waves (3-wave structure)
|
||||
- Produce signal with current wave position and projected direction
|
||||
- Return `None` when insufficient data or ambiguous wave count
|
||||
- _Requirements: 2.5, 2.6, 2.7_
|
||||
|
||||
- [x] 5. Multi-Timeframe Confluence Engine
|
||||
- [x] 5.1 Implement `confluence.py` — Multi-Timeframe Engine
|
||||
- Implement `compute_confluence(signal_results, weights) -> list[ConfluenceSignal]`
|
||||
- Compute weighted confluence score: `C_confluence = Σ(w_tf · s_tf)`
|
||||
- Apply minimum confluence threshold: discard signals triggering on < 2 timeframes
|
||||
- Apply higher-timeframe anchor: discard signals without at least one of D, W, or M
|
||||
- Return `ConfluenceSignal` objects with active timeframes and per-timeframe strengths
|
||||
- _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6_
|
||||
|
||||
- [x] 5.2 Write property test for confluence score monotonicity
|
||||
- **Requirement 17.5: Confluence score monotonicity**
|
||||
- Verify that activating a signal on an additional timeframe with non-zero weight always increases or maintains the confluence score
|
||||
- File: `tests/test_pbt_signal_engine_confluence.py`
|
||||
- _Requirements: 3.6, 17.5_
|
||||
|
||||
- [x] 6. Checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 7. Heuristic Pipeline (Pipeline A)
|
||||
- [x] 7.1 Implement `heuristic.py` — Heuristic Pipeline
|
||||
- Implement `run_heuristic_pipeline(normalized, confluence_signals, config) -> HeuristicResult`
|
||||
- Compute `S_total = S_company + S_macro + S_competitive` using existing `compute_signal_weight()`
|
||||
- Compute confidence from source count, extraction confidence, signal agreement, contradiction penalty
|
||||
- BUY verdict: confidence >= 0.70 AND S_total >= 1.2 AND valuation_score >= 0.5 AND macro_bias > 0 AND earnings_proximity_days > 5
|
||||
- WATCH verdict: confidence >= 0.55 AND BUY conditions not fully met
|
||||
- SKIP verdict: confidence < 0.55
|
||||
- Emit `HeuristicResult` with all required fields and reasoning
|
||||
- _Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7_
|
||||
|
||||
- [x] 7.2 Write unit tests for heuristic pipeline verdict logic
|
||||
- Test BUY threshold conditions
|
||||
- Test WATCH threshold conditions
|
||||
- Test SKIP conditions
|
||||
- Test edge cases at threshold boundaries
|
||||
- File: `tests/test_signal_engine_heuristic.py`
|
||||
- _Requirements: 5.4, 5.5, 5.6_
|
||||
|
||||
- [x] 8. Probabilistic Pipeline (Pipeline B) and Correlation Penalty
|
||||
- [x] 8.1 Implement `correlation.py` — Signal cluster classification and penalty
|
||||
- Define `SignalCluster` enum: MOMENTUM, STRUCTURE, VOLATILITY, FUNDAMENTALS
|
||||
- Implement `classify_signal(signal_type) -> SignalCluster`
|
||||
- Implement `apply_correlation_penalty(likelihood_ratios) -> list[LikelihoodRatio]`
|
||||
- Within-cluster decay: strongest LR at full weight, subsequent at 0.5^(n-1)
|
||||
- No penalty across different clusters
|
||||
- Single-signal clusters receive no penalty
|
||||
- _Requirements: 7.1, 7.2, 7.3, 7.4_
|
||||
|
||||
- [x] 8.2 Implement `probabilistic.py` — Probabilistic Pipeline
|
||||
- Implement `run_probabilistic_pipeline(normalized, confluence_signals, regime, config) -> ProbabilisticResult`
|
||||
- Initialize regime-based prior: bull=0.58, range=0.50, bear=0.42
|
||||
- Compute likelihood ratios: `P(sig|up) = h·s + (1-h)·(1-s)·0.5`, `LR = P(sig|up) / P(sig|down)`
|
||||
- Apply correlation penalty via `apply_correlation_penalty()`
|
||||
- Accumulate via log-odds: `logit(P_post) = logit(P_prior) + Σ log(LR_i)`
|
||||
- Compute Shannon entropy and apply entropy gating (H > 0.95 → SKIP)
|
||||
- Compute `EV_R = P_up · E[win_R] - (1 - P_up) · 1.0`
|
||||
- BUY: P_up >= 0.60 AND entropy <= 0.90 AND EV_R >= 1.5 AND macro_bias > 0 AND valuation_score >= 0.5
|
||||
- WATCH: P_up >= 0.55 AND entropy <= 0.95 AND BUY conditions not fully met
|
||||
- SKIP: all other cases
|
||||
- Use existing `classify_regime()` from `services/aggregation/regime.py`
|
||||
- _Requirements: 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 14.1, 14.2, 14.3, 14.4, 14.5_
|
||||
|
||||
- [x] 8.3 Write property test for Bayesian log-odds round-trip
|
||||
- **Requirement 17.2: Bayesian log-odds update correctness**
|
||||
- Verify `logit(P_post) = logit(P_prior) + Σ log(LR_i)` round-trips correctly
|
||||
- Converting P_prior to logit, adding log-LRs, converting back via sigmoid produces valid probability in (0, 1)
|
||||
- File: `tests/test_pbt_signal_engine_bayesian.py`
|
||||
- _Requirements: 6.3, 17.2_
|
||||
|
||||
- [x] 8.4 Write property test for entropy gate
|
||||
- **Requirement 17.3: Entropy gate properties**
|
||||
- Verify Shannon entropy is maximized at P_up = 0.5
|
||||
- Verify entropy equals 0.0 at P_up = 0.0 or P_up = 1.0
|
||||
- Verify entropy is symmetric around 0.5
|
||||
- File: `tests/test_pbt_signal_engine_bayesian.py`
|
||||
- _Requirements: 6.4, 17.3_
|
||||
|
||||
- [x] 8.5 Write property test for signal correlation penalty
|
||||
- **Requirement 17.4: Correlation penalty reduces confidence**
|
||||
- Verify penalized posterior is always <= unpenalized posterior for any signal set with correlated signals
|
||||
- File: `tests/test_pbt_signal_engine_correlation.py`
|
||||
- _Requirements: 7.5, 17.4_
|
||||
|
||||
- [x] 8.6 Write property test for EV_R monotonicity
|
||||
- **Requirement 17.8: EV_R monotonically increasing with P_up**
|
||||
- Verify `EV_R = P_up · E[win_R] - (1 - P_up) · 1.0` is monotonically increasing with P_up for fixed E[win_R] > 0
|
||||
- File: `tests/test_pbt_signal_engine_bayesian.py`
|
||||
- _Requirements: 6.5, 17.8_
|
||||
|
||||
- [x] 9. Checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 10. Exit Engine
|
||||
- [x] 10.1 Implement `exit_engine.py` — Exit Engine
|
||||
- Implement `evaluate_exits(positions, current_prices, config) -> list[ExitSignal]`
|
||||
- Check stop_loss hit → EXIT_FULL with reason "stop_hit"
|
||||
- Check target_1 hit → EXIT_HALF with reason "target_1_hit"
|
||||
- Check target_2 hit → EXIT_FULL with reason "target_2_hit"
|
||||
- Trailing stop: activate after EXIT_HALF at `current_price - ATR · trailing_multiplier`
|
||||
- Trailing stop ratchets upward only (never moves down)
|
||||
- Trailing stop hit → EXIT_FULL with reason "trailing_stop_hit"
|
||||
- _Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7_
|
||||
|
||||
- [x] 10.2 Write unit tests for exit engine
|
||||
- Test stop_loss trigger
|
||||
- Test target_1 partial exit
|
||||
- Test target_2 full exit
|
||||
- Test trailing stop activation and ratchet behavior
|
||||
- File: `tests/test_signal_engine_exit.py`
|
||||
- _Requirements: 8.1, 8.2, 8.3, 8.4, 8.5_
|
||||
|
||||
- [x] 11. Delta Analyzer and Output Formatter
|
||||
- [x] 11.1 Implement `delta.py` — Delta Analyzer
|
||||
- Implement `analyze_delta(heuristic, probabilistic, redis, ticker) -> DeltaResult`
|
||||
- Compute agreement flag (both verdicts identical)
|
||||
- Compute confidence delta: `|heuristic_confidence - probabilistic_P_up|`
|
||||
- Record disagreement reasons when verdicts differ
|
||||
- Track rolling 100-evaluation agreement rate in Redis
|
||||
- Log warning when agreement rate drops below 0.50
|
||||
- _Requirements: 9.1, 9.2, 9.3, 9.4, 9.5, 9.6_
|
||||
|
||||
- [x] 11.2 Implement `formatter.py` — Output Formatter
|
||||
- Implement `format_output(ticker, price, heuristic, probabilistic, delta, exit_signals, config) -> SignalOutput`
|
||||
- Both BUY → `dual_confirmed`, full position sizing
|
||||
- Probabilistic-only BUY → `probabilistic_only`, 50% position sizing
|
||||
- Heuristic-only BUY → standard position sizing
|
||||
- No BUY → no trade_plan (WATCH/SKIP persisted for analysis)
|
||||
- Implement `signal_output_to_recommendation(output) -> Recommendation`
|
||||
- Map `SignalOutput` to existing `Recommendation` schema for trading engine compatibility
|
||||
- Dual confirmed: confidence = max(heuristic_confidence, probabilistic_P_up)
|
||||
- Probabilistic only: confidence = probabilistic_P_up · 0.8 (20% haircut)
|
||||
- _Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 12.1, 12.2, 12.3, 12.4, 12.5_
|
||||
|
||||
- [x] 11.3 Write unit tests for output formatter
|
||||
- Test dual_confirmed trade plan generation
|
||||
- Test probabilistic_only trade plan with 50% sizing
|
||||
- Test heuristic-only trade plan
|
||||
- Test no-BUY case (no trade_plan)
|
||||
- Test `signal_output_to_recommendation` mapping
|
||||
- File: `tests/test_signal_engine_formatter.py`
|
||||
- _Requirements: 10.2, 10.3, 10.4, 12.3, 12.4_
|
||||
|
||||
- [x] 12. Orchestrator, Persistence, and Main Entry Point
|
||||
- [x] 12.1 Implement `persistence.py` — Database persistence
|
||||
- Implement `persist_signal_output(pool, output) -> None`
|
||||
- Insert into `signal_engine_outputs` table
|
||||
- Log and continue on database errors (non-blocking)
|
||||
- _Requirements: 15.1, 15.4_
|
||||
|
||||
- [x] 12.2 Implement `worker.py` — Top-level orchestrator
|
||||
- Implement `evaluate_tick(pool, redis, ticker, config) -> SignalOutput | None`
|
||||
- Step 1: Normalize inputs (single fetch, shared reference)
|
||||
- Step 2: Evaluate exit conditions for open positions
|
||||
- Step 3: Run hard filters (short-circuit if filtered)
|
||||
- Step 4: Evaluate signals across timeframes via Signal Library
|
||||
- Step 5: Compute confluence
|
||||
- Step 6: Classify regime via existing `classify_regime()`
|
||||
- Step 7: Run both pipelines concurrently via `asyncio.gather` with exception handling
|
||||
- Step 8: Compute delta analysis
|
||||
- Step 9: Format output
|
||||
- Step 10: Persist to database and publish to Redis queue
|
||||
- Catch pipeline exceptions → SKIP verdict for failed pipeline, other continues
|
||||
- Measure and log wall-clock execution time per pipeline
|
||||
- _Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6_
|
||||
|
||||
- [x] 12.3 Implement `main.py` — Entry point with asyncio event loop
|
||||
- Connect to PostgreSQL (asyncpg pool) and Redis (redis.asyncio)
|
||||
- Load config from `risk_configs` table
|
||||
- Log active configuration at startup
|
||||
- Poll `stonks:queue:signal_engine` queue indefinitely
|
||||
- Check `dual_pipeline_enabled` flag; if disabled, sleep and retry
|
||||
- On config read failure, default to disabled (fail-safe)
|
||||
- Support shadow mode (persist but don't forward to trading queue)
|
||||
- _Requirements: 13.1, 13.6, 13.7, 16.1, 16.6_
|
||||
|
||||
- [x] 12.4 Write integration tests for worker orchestration
|
||||
- Test full tick evaluation with mocked DB/Redis
|
||||
- Test pipeline failure isolation (one fails, other completes)
|
||||
- Test hard filter short-circuit
|
||||
- Test shadow mode behavior
|
||||
- File: `tests/test_signal_engine_worker.py`
|
||||
- _Requirements: 11.3, 16.6_
|
||||
|
||||
- [x] 13. Checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 14. Database migration and infrastructure
|
||||
- [x] 14.1 Create database migration `infra/migrations/039_signal_engine_outputs.sql`
|
||||
- Create `signal_engine_outputs` table per the design schema
|
||||
- Create index on `(ticker, evaluated_at)` for per-ticker time-range queries
|
||||
- Create index on `evaluated_at` for global time-range queries
|
||||
- Create index on `(heuristic_verdict, probabilistic_verdict)` for verdict filtering
|
||||
- _Requirements: 15.1, 15.2, 15.3_
|
||||
|
||||
- [x] 14.2 Add signal engine service to Helm chart
|
||||
- Add `signalEngine` entry to `infra/helm/stonks-oracle/values.yaml`
|
||||
- Configure: replicas=1, command=`python -m services.signal_engine.main`, tier=processing
|
||||
- Set resource requests/limits per design (100m/128Mi → 500m/256Mi)
|
||||
- Reference existing secrets: `stonks-core-secrets`, `stonks-market-secrets`
|
||||
- _Requirements: 11.1, 13.1_
|
||||
|
||||
- [x] 15. Trading engine integration and backward compatibility
|
||||
- [x] 15.1 Wire signal engine output to trading engine queue
|
||||
- Publish `SignalOutput` (mapped to `Recommendation`) to `stonks:queue:trading_decisions`
|
||||
- Only publish when at least one pipeline produces BUY verdict
|
||||
- WATCH/SKIP verdicts persisted for analysis but not forwarded
|
||||
- Ensure trading engine can consume without modification via `signal_output_to_recommendation()`
|
||||
- _Requirements: 12.1, 12.2, 12.5, 16.2_
|
||||
|
||||
- [x] 15.2 Ensure backward compatibility with existing pipeline
|
||||
- Verify `dual_pipeline_enabled=false` means signal engine does not run
|
||||
- Verify existing aggregation pipeline operates unchanged when flag is off
|
||||
- Reuse existing `WeightedSignal`, `BayesianPosterior`, `RegimeClassification` (import, don't duplicate)
|
||||
- Reuse existing `compute_signal_weight`, `compute_bayesian_posterior`, `classify_regime` functions
|
||||
- No modifications to existing tables (new migration only adds new table)
|
||||
- _Requirements: 16.1, 16.2, 16.3, 16.4, 16.5_
|
||||
|
||||
- [x] 16. Final checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
## Notes
|
||||
|
||||
- Tasks marked with `*` are optional and can be skipped for faster MVP
|
||||
- Each task references specific requirements for traceability
|
||||
- Checkpoints ensure incremental validation between major phases
|
||||
- Property-based tests use Hypothesis with `@settings(max_examples=100)` per project conventions
|
||||
- PBT test files are prefixed `test_pbt_*` per project conventions
|
||||
- The service reuses existing math functions from `services/aggregation/` — no reimplementation
|
||||
- All configuration is loaded from `risk_configs` table with fail-safe defaults
|
||||
- Shadow mode allows running alongside existing pipeline without affecting trading decisions
|
||||
@@ -0,0 +1 @@
|
||||
{"specId": "b595d834-7e72-4fab-87a9-65c92115a069", "workflowType": "requirements-first", "specType": "feature"}
|
||||
@@ -0,0 +1,975 @@
|
||||
# Design Document — Model Validation, Calibration, and Signal Quality
|
||||
|
||||
## Overview
|
||||
|
||||
This design adds a closed-loop model validation layer to Stonks Oracle. The system currently generates trend summaries and trading recommendations with confidence scores, but has no mechanism to evaluate whether those predictions are accurate, whether confidence scores are well-calibrated, which sources contribute to correct predictions, or whether the system outperforms simple benchmarks.
|
||||
|
||||
The validation layer introduces six new service modules under `services/validation/`, a quality gate in `services/trading/`, seven new API endpoints under `/api/validation/`, a database migration (035) with four new tables and two SQL views, and an upgraded OpsModel dashboard page. The architecture follows the existing patterns: pure computation modules with asyncpg for persistence, FastAPI endpoints in `services/api/app.py`, and React/TanStack Query hooks on the frontend.
|
||||
|
||||
### Design Rationale
|
||||
|
||||
A prediction engine without outcome tracking is flying blind. The validation layer closes the feedback loop by:
|
||||
|
||||
1. **Capturing immutable snapshots** at prediction time — preventing hindsight bias in evaluation
|
||||
2. **Evaluating outcomes** across multiple horizons (1h, 6h, 1d, 7d, 30d) — matching the system's multi-window trend architecture
|
||||
3. **Computing calibration metrics** (ECE, Brier score) — measuring whether confidence scores mean what they claim
|
||||
4. **Tracking information coefficients** (IC, Rank IC) — measuring linear and ordinal predictive power
|
||||
5. **Attributing performance** to sources, catalysts, and signal layers — identifying the most valuable information channels
|
||||
6. **Recalibrating confidence** via Bayesian shrinkage — learning from the system's own track record
|
||||
7. **Gating live trading** on minimum quality thresholds — preventing real capital risk on a poorly performing model
|
||||
|
||||
The design reuses existing infrastructure (asyncpg, FastAPI, TanStack Query, Recharts) and integrates with the existing `source_accuracy` table from the signal-math-upgrade spec.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### High-Level Data Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
subgraph "Prediction Capture (Real-time)"
|
||||
A[Recommendation Engine] -->|generates| B[Prediction_Snapshot_Writer]
|
||||
B --> C[prediction_snapshots table]
|
||||
B --> D[signal_evidence_links table]
|
||||
B -->|computes| E[canonical_evidence_key<br/>duplicate detection<br/>contribution scores]
|
||||
end
|
||||
|
||||
subgraph "Outcome Evaluation (Periodic)"
|
||||
F[Outcome_Evaluator<br/>scheduled job] -->|reads matured snapshots| C
|
||||
F -->|fetches future prices| G[market_snapshots table]
|
||||
F -->|computes returns| H[prediction_outcomes table]
|
||||
F -->|evaluates 5 horizons| H
|
||||
end
|
||||
|
||||
subgraph "Metrics Computation (Periodic)"
|
||||
I[Metrics_Engine] -->|reads| H
|
||||
I -->|reads| C
|
||||
I -->|reads| D
|
||||
I -->|computes| J[model_metric_snapshots table]
|
||||
I -->|computes| K[Calibration: ECE, Brier]
|
||||
I -->|computes| L[IC, Rank IC by horizon]
|
||||
I -->|computes| M[Benchmark: excess returns]
|
||||
end
|
||||
|
||||
subgraph "Attribution (Periodic)"
|
||||
N[Attribution_Engine] -->|joins| D
|
||||
N -->|joins| H
|
||||
N -->|computes| O[Per-source metrics]
|
||||
N -->|computes| P[Per-catalyst metrics]
|
||||
N -->|computes| Q[Per-layer metrics]
|
||||
end
|
||||
|
||||
subgraph "Calibration (Periodic)"
|
||||
R[Calibration_Engine] -->|reads| H
|
||||
R -->|reads| D
|
||||
R -->|computes Bayesian shrinkage| S[source_accuracy table<br/>reliability scores]
|
||||
end
|
||||
|
||||
subgraph "Safety Gate (Per-cycle)"
|
||||
T[Quality_Gate] -->|reads latest| J
|
||||
T -->|evaluates thresholds| U{Pass?}
|
||||
U -->|yes| V[Live trading allowed]
|
||||
U -->|no| W[Force paper mode]
|
||||
T -->|stores result| X[risk_configs table<br/>model_quality_gate key]
|
||||
end
|
||||
|
||||
subgraph "Dashboard (Frontend)"
|
||||
Y[Dashboard_API<br/>7 endpoints] -->|reads| J
|
||||
Y -->|reads| C
|
||||
Y -->|reads| H
|
||||
Y -->|reads| D
|
||||
Z[OpsModel.tsx<br/>upgraded page] -->|fetches| Y
|
||||
end
|
||||
|
||||
subgraph "Backtest Integration"
|
||||
AA[BacktestReplay] -->|validation mode| B
|
||||
AA -->|validation mode| F
|
||||
AA -->|triggers| I
|
||||
end
|
||||
```
|
||||
|
||||
### Scheduling Strategy
|
||||
|
||||
The validation components run on different cadences:
|
||||
|
||||
| Component | Trigger | Cadence |
|
||||
|-----------|---------|---------|
|
||||
| Prediction_Snapshot_Writer | Synchronous — called by recommendation engine | Every recommendation |
|
||||
| Outcome_Evaluator | Scheduled job | Every 1 hour |
|
||||
| Metrics_Engine | After Outcome_Evaluator completes | Every 1 hour |
|
||||
| Attribution_Engine | Called by Metrics_Engine | Every 1 hour |
|
||||
| Calibration_Engine | After Metrics_Engine completes | Every 6 hours |
|
||||
| Quality_Gate | Start of each aggregation cycle | Every aggregation cycle |
|
||||
|
||||
### Sector ETF Mapping
|
||||
|
||||
The system needs a mapping from company sectors to sector ETFs for benchmark comparison. This is stored as a configuration constant:
|
||||
|
||||
```python
|
||||
SECTOR_ETF_MAP: dict[str, str] = {
|
||||
"Technology": "XLK",
|
||||
"Consumer Cyclical": "XLY",
|
||||
"Financial Services": "XLF",
|
||||
"Healthcare": "XLV",
|
||||
"Energy": "XLE",
|
||||
"Communication Services": "XLC",
|
||||
"Industrials": "XLI",
|
||||
"Consumer Defensive": "XLP",
|
||||
"Real Estate": "XLRE",
|
||||
"Utilities": "XLU",
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Components and Interfaces
|
||||
|
||||
### New Modules
|
||||
|
||||
| Module | File | Responsibility |
|
||||
|--------|------|----------------|
|
||||
| Prediction Snapshot Writer | `services/validation/prediction_snapshot.py` | Captures immutable prediction state at generation time |
|
||||
| Outcome Evaluator | `services/validation/outcome_evaluator.py` | Matches predictions with realized market outcomes |
|
||||
| Metrics Engine | `services/validation/metrics.py` | Computes calibration, IC, Brier, benchmark metrics |
|
||||
| Attribution Engine | `services/validation/attribution.py` | Per-source, per-catalyst, per-layer performance |
|
||||
| Calibration Engine | `services/validation/calibration.py` | Bayesian shrinkage source reliability, weight adjustment |
|
||||
| Quality Gate | `services/trading/model_quality_gate.py` | Safety gate for live trading eligibility |
|
||||
|
||||
### Modified Modules
|
||||
|
||||
| Module | File | Changes |
|
||||
|--------|------|---------|
|
||||
| Query API | `services/api/app.py` | 7 new `/api/validation/*` endpoints |
|
||||
| Aggregation Worker | `services/aggregation/worker.py` | Call Quality_Gate at cycle start |
|
||||
| Recommendation Engine | `services/recommendation/eligibility.py` | Call Prediction_Snapshot_Writer after recommendation |
|
||||
| Backtest Replay | `services/trading/backtest_replay.py` | Validation mode support |
|
||||
| Frontend Hooks | `frontend/src/api/hooks.ts` | 7 new validation hooks |
|
||||
| OpsModel Page | `frontend/src/pages/OpsModel.tsx` | Full dashboard upgrade |
|
||||
| AppLayout | `frontend/src/components/AppLayout.tsx` | Nav item update (if needed) |
|
||||
|
||||
### Component Interface Details
|
||||
|
||||
#### 1. Prediction Snapshot Writer (`services/validation/prediction_snapshot.py`)
|
||||
|
||||
```python
|
||||
SECTOR_ETF_MAP: dict[str, str] = {
|
||||
"Technology": "XLK",
|
||||
"Consumer Cyclical": "XLY",
|
||||
"Financial Services": "XLF",
|
||||
"Healthcare": "XLV",
|
||||
"Energy": "XLE",
|
||||
"Communication Services": "XLC",
|
||||
"Industrials": "XLI",
|
||||
"Consumer Defensive": "XLP",
|
||||
"Real Estate": "XLRE",
|
||||
"Utilities": "XLU",
|
||||
}
|
||||
|
||||
EVALUATION_HORIZONS: list[str] = ["1h", "6h", "1d", "7d", "30d"]
|
||||
|
||||
MAX_SINGLE_DOCUMENT_WEIGHT: float = 1.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class PredictionSnapshot:
|
||||
"""Immutable snapshot of a prediction at generation time."""
|
||||
id: str # UUID
|
||||
generated_at: datetime
|
||||
ticker: str
|
||||
window: str
|
||||
horizon: str
|
||||
direction: str # bullish/bearish/mixed/neutral
|
||||
action: str # buy/sell/hold/watch
|
||||
mode: str # informational/paper_eligible/live_eligible
|
||||
strength: float
|
||||
confidence: float
|
||||
contradiction: float
|
||||
p_bull: float | None
|
||||
p_bear: float | None
|
||||
score_company: float
|
||||
score_macro: float
|
||||
score_competitive: float
|
||||
evidence_count: int
|
||||
unique_source_count: int
|
||||
duplicate_evidence_count: int
|
||||
price_at_prediction: float | None
|
||||
spy_price_at_prediction: float | None
|
||||
sector_etf_price_at_prediction: float | None
|
||||
metadata: dict
|
||||
|
||||
|
||||
@dataclass
|
||||
class SignalEvidenceLink:
|
||||
"""Link between a prediction and a contributing evidence document."""
|
||||
id: str # UUID
|
||||
prediction_id: str
|
||||
document_id: str
|
||||
signal_id: str
|
||||
ticker: str
|
||||
source: str
|
||||
source_type: str
|
||||
catalyst_type: str
|
||||
sentiment: str
|
||||
impact: float
|
||||
extraction_confidence: float
|
||||
weight: float # clamped to MAX_SINGLE_DOCUMENT_WEIGHT
|
||||
is_duplicate: bool
|
||||
canonical_evidence_key: str
|
||||
contribution_score: float # weight / total_weight, sums to 1.0
|
||||
metadata: dict
|
||||
|
||||
|
||||
def compute_canonical_evidence_key(title: str, url: str) -> str:
|
||||
"""SHA256 of normalized(title) + normalized(url).
|
||||
|
||||
Normalization: lowercase, strip whitespace for title;
|
||||
lowercase, strip query params for URL.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def create_prediction_snapshot(
|
||||
pool: asyncpg.Pool,
|
||||
recommendation: Recommendation,
|
||||
trend_summary: TrendSummary,
|
||||
evidence_signals: list[WeightedSignal],
|
||||
evidence_docs: list[dict], # document metadata from recommendation_evidence
|
||||
) -> PredictionSnapshot:
|
||||
"""Create and persist a prediction snapshot with evidence links.
|
||||
|
||||
1. Fetches current prices (ticker, SPY, sector ETF) from market_snapshots
|
||||
2. Computes canonical evidence keys and duplicate detection
|
||||
3. Clamps individual document weights to MAX_SINGLE_DOCUMENT_WEIGHT
|
||||
4. Computes contribution scores (one-vote-per-canonical-key dedup)
|
||||
5. Persists snapshot and evidence links in a transaction
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def fetch_latest_close_price(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
) -> float | None:
|
||||
"""Fetch most recent close price from market_snapshots for a ticker."""
|
||||
...
|
||||
```
|
||||
|
||||
#### 2. Outcome Evaluator (`services/validation/outcome_evaluator.py`)
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class PredictionOutcome:
|
||||
"""Realized outcome for a prediction at a specific horizon."""
|
||||
id: str # UUID
|
||||
prediction_id: str
|
||||
evaluated_at: datetime
|
||||
horizon: str # 1h, 6h, 1d, 7d, 30d
|
||||
future_price: float
|
||||
future_return: float
|
||||
spy_future_price: float | None
|
||||
spy_return: float | None
|
||||
sector_etf_future_price: float | None
|
||||
sector_etf_return: float | None
|
||||
excess_return_vs_spy: float | None
|
||||
excess_return_vs_sector: float | None
|
||||
direction_correct: bool
|
||||
profitable: bool
|
||||
metadata: dict
|
||||
|
||||
|
||||
HORIZON_DURATIONS: dict[str, timedelta] = {
|
||||
"1h": timedelta(hours=1),
|
||||
"6h": timedelta(hours=6),
|
||||
"1d": timedelta(days=1),
|
||||
"7d": timedelta(days=7),
|
||||
"30d": timedelta(days=30),
|
||||
}
|
||||
|
||||
|
||||
async def evaluate_matured_predictions(
|
||||
pool: asyncpg.Pool,
|
||||
) -> int:
|
||||
"""Evaluate all matured prediction snapshots.
|
||||
|
||||
Finds snapshots where horizon has elapsed and outcome not yet recorded.
|
||||
For each, fetches future prices and computes returns.
|
||||
Skips horizons where future price is unavailable (retries next run).
|
||||
|
||||
Returns count of outcomes recorded.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def evaluate_single_prediction(
|
||||
pool: asyncpg.Pool,
|
||||
snapshot: PredictionSnapshot,
|
||||
horizon: str,
|
||||
) -> PredictionOutcome | None:
|
||||
"""Evaluate a single prediction at a specific horizon.
|
||||
|
||||
Returns None if future price is unavailable.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 3. Metrics Engine (`services/validation/metrics.py`)
|
||||
|
||||
```python
|
||||
CONFIDENCE_BUCKETS: list[tuple[float, float]] = [
|
||||
(0.50, 0.60),
|
||||
(0.60, 0.70),
|
||||
(0.70, 0.80),
|
||||
(0.80, 0.90),
|
||||
(0.90, 1.00),
|
||||
]
|
||||
|
||||
LOOKBACK_WINDOWS: list[str] = ["7d", "30d", "90d", "all"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CalibrationBucket:
|
||||
"""Calibration metrics for a single confidence bucket."""
|
||||
bucket_low: float
|
||||
bucket_high: float
|
||||
avg_confidence: float
|
||||
observed_win_rate: float
|
||||
prediction_count: int
|
||||
miscalibrated: bool # |avg_confidence - win_rate| > 0.15
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelMetricSnapshot:
|
||||
"""Aggregate model quality metrics for a lookback/horizon combination."""
|
||||
id: str
|
||||
generated_at: datetime
|
||||
lookback_window: str
|
||||
horizon: str
|
||||
prediction_count: int
|
||||
win_rate: float
|
||||
directional_accuracy: float
|
||||
information_coefficient: float | None
|
||||
rank_information_coefficient: float | None
|
||||
avg_return: float
|
||||
avg_excess_return_vs_spy: float
|
||||
avg_excess_return_vs_sector: float
|
||||
calibration_error: float # ECE
|
||||
brier_score: float
|
||||
buy_win_rate: float
|
||||
sell_win_rate: float
|
||||
hold_win_rate: float
|
||||
metadata: dict
|
||||
|
||||
|
||||
def compute_calibration_error(
|
||||
confidences: list[float],
|
||||
outcomes: list[bool],
|
||||
) -> tuple[float, list[CalibrationBucket]]:
|
||||
"""Compute ECE and calibration buckets.
|
||||
|
||||
ECE = Σ (n_b / N) * |avg_conf_b - win_rate_b|
|
||||
|
||||
Returns (ece, buckets).
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_brier_score(
|
||||
p_bulls: list[float],
|
||||
outcomes: list[bool],
|
||||
) -> float:
|
||||
"""Brier score = mean((p_bull - outcome)^2).
|
||||
|
||||
outcome is 1.0 when price moved in predicted direction, 0.0 otherwise.
|
||||
Returns value in [0.0, 1.0].
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_information_coefficient(
|
||||
scores: list[float],
|
||||
returns: list[float],
|
||||
) -> float | None:
|
||||
"""Pearson correlation between prediction scores and future returns.
|
||||
|
||||
Returns None when fewer than 30 data points.
|
||||
Returns value in [-1.0, 1.0].
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_rank_information_coefficient(
|
||||
scores: list[float],
|
||||
returns: list[float],
|
||||
) -> float | None:
|
||||
"""Spearman rank correlation between prediction scores and future returns.
|
||||
|
||||
Returns None when fewer than 30 data points.
|
||||
Returns value in [-1.0, 1.0].
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_contribution_scores(
|
||||
weights: list[float],
|
||||
) -> list[float]:
|
||||
"""Compute contribution scores from document weights.
|
||||
|
||||
Each score = weight_i / sum(weights). Sums to 1.0.
|
||||
Each score in [0.0, 1.0].
|
||||
Returns empty list for empty input.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def compute_and_store_metric_snapshots(
|
||||
pool: asyncpg.Pool,
|
||||
) -> list[ModelMetricSnapshot]:
|
||||
"""Compute metric snapshots for all lookback/horizon combinations.
|
||||
|
||||
Lookback windows: 7d, 30d, 90d, all-time.
|
||||
Horizons: 1h, 6h, 1d, 7d, 30d.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 4. Attribution Engine (`services/validation/attribution.py`)
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class SourceAttribution:
|
||||
"""Performance metrics for a single source."""
|
||||
source: str
|
||||
source_type: str
|
||||
prediction_count: int
|
||||
avg_weight: float
|
||||
avg_contribution_score: float
|
||||
win_rate: float
|
||||
avg_future_return: float
|
||||
avg_excess_return_vs_spy: float
|
||||
information_coefficient: float | None
|
||||
duplicate_rate: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalystAttribution:
|
||||
"""Performance metrics for a single catalyst type."""
|
||||
catalyst_type: str
|
||||
prediction_count: int
|
||||
win_rate: float
|
||||
avg_future_return: float
|
||||
avg_excess_return_vs_spy: float
|
||||
information_coefficient: float | None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LayerAttribution:
|
||||
"""Performance metrics for a signal layer."""
|
||||
layer: str # company, macro, competitive
|
||||
avg_contribution_pct: float
|
||||
dominant_win_rate: float # win rate when this layer > 30% contribution
|
||||
dominant_ic: float | None # IC when this layer > 30% contribution
|
||||
|
||||
|
||||
async def compute_source_attribution(
|
||||
pool: asyncpg.Pool,
|
||||
lookback_days: int = 30,
|
||||
horizon: str = "7d",
|
||||
) -> list[SourceAttribution]:
|
||||
...
|
||||
|
||||
|
||||
async def compute_catalyst_attribution(
|
||||
pool: asyncpg.Pool,
|
||||
lookback_days: int = 30,
|
||||
horizon: str = "7d",
|
||||
) -> list[CatalystAttribution]:
|
||||
...
|
||||
|
||||
|
||||
async def compute_layer_attribution(
|
||||
pool: asyncpg.Pool,
|
||||
lookback_days: int = 30,
|
||||
horizon: str = "7d",
|
||||
) -> list[LayerAttribution]:
|
||||
...
|
||||
```
|
||||
|
||||
#### 5. Calibration Engine (`services/validation/calibration.py`)
|
||||
|
||||
```python
|
||||
def compute_source_reliability(
|
||||
observed_win_rate: float,
|
||||
sample_count: int,
|
||||
prior_strength: int = 30,
|
||||
) -> float:
|
||||
"""Bayesian shrinkage source reliability.
|
||||
|
||||
reliability = 0.5 + (n / (n + prior_strength)) * (observed_win_rate - 0.5)
|
||||
|
||||
Returns value in [0.0, 1.0].
|
||||
When n=0, returns 0.5 (prior mean).
|
||||
As n→∞, approaches observed_win_rate.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_adjusted_evidence_weight(
|
||||
base_weight: float,
|
||||
reliability: float,
|
||||
) -> float:
|
||||
"""Adjusted weight = base_weight * (0.5 + reliability), clamped to [0.1, 2.0]."""
|
||||
...
|
||||
|
||||
|
||||
async def update_source_reliabilities(
|
||||
pool: asyncpg.Pool,
|
||||
) -> int:
|
||||
"""Recompute and store source reliability scores from latest outcomes.
|
||||
|
||||
Uses the existing source_accuracy table, updating accuracy_ratio
|
||||
with the Bayesian shrinkage formula.
|
||||
|
||||
Returns count of sources updated.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 6. Quality Gate (`services/trading/model_quality_gate.py`)
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class QualityGateConfig:
|
||||
"""Configurable thresholds for live trading eligibility."""
|
||||
min_prediction_count: int = 100
|
||||
min_ic: float = 0.03
|
||||
min_win_rate: float = 0.53
|
||||
max_ece: float = 0.15
|
||||
min_excess_return_vs_spy: float = 0.0
|
||||
max_snapshot_age_hours: int = 24
|
||||
|
||||
|
||||
@dataclass
|
||||
class GateThresholdResult:
|
||||
"""Result for a single threshold check."""
|
||||
name: str
|
||||
threshold: float
|
||||
actual: float
|
||||
passed: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class QualityGateResult:
|
||||
"""Full gate evaluation result."""
|
||||
passed: bool
|
||||
evaluated_at: datetime
|
||||
threshold_results: list[GateThresholdResult]
|
||||
reason: str # "all thresholds met" or "failed: ..."
|
||||
snapshot_id: str | None
|
||||
config: QualityGateConfig
|
||||
|
||||
|
||||
async def evaluate_quality_gate(
|
||||
pool: asyncpg.Pool,
|
||||
config: QualityGateConfig | None = None,
|
||||
) -> QualityGateResult:
|
||||
"""Evaluate model quality gate from latest metric snapshot.
|
||||
|
||||
Reads the most recent model_metric_snapshot for the 30d lookback
|
||||
and 7d horizon (the primary evaluation window).
|
||||
|
||||
If no snapshot exists or snapshot is stale (>24h), defaults to
|
||||
paper-only mode (fail-safe).
|
||||
|
||||
Stores result in risk_configs under 'model_quality_gate' key.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def load_gate_config_from_db(
|
||||
pool: asyncpg.Pool,
|
||||
) -> QualityGateConfig:
|
||||
"""Load gate thresholds from risk_configs, with defaults."""
|
||||
...
|
||||
```
|
||||
|
||||
#### 7. Dashboard API Endpoints
|
||||
|
||||
Seven new endpoints added to `services/api/app.py`:
|
||||
|
||||
| Endpoint | Method | Returns |
|
||||
|----------|--------|---------|
|
||||
| `/api/validation/summary` | GET | Latest model metric snapshot + gate status |
|
||||
| `/api/validation/calibration` | GET | Calibration table with buckets |
|
||||
| `/api/validation/ic-by-horizon` | GET | IC and Rank IC per horizon |
|
||||
| `/api/validation/attribution/sources` | GET | Per-source performance |
|
||||
| `/api/validation/attribution/catalysts` | GET | Per-catalyst performance |
|
||||
| `/api/validation/attribution/layers` | GET | Per-layer performance |
|
||||
| `/api/validation/gate-status` | GET | Quality gate evaluation detail |
|
||||
|
||||
All endpoints accept optional `lookback` (default "30d") and `horizon` (default "7d") query parameters.
|
||||
|
||||
---
|
||||
|
||||
## Data Models
|
||||
|
||||
### Database Schema (Migration 035)
|
||||
|
||||
#### prediction_snapshots
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS prediction_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
ticker VARCHAR(20) NOT NULL,
|
||||
window VARCHAR(20) NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
direction VARCHAR(20) NOT NULL,
|
||||
action VARCHAR(20) NOT NULL,
|
||||
mode VARCHAR(30) NOT NULL,
|
||||
strength FLOAT NOT NULL,
|
||||
confidence FLOAT NOT NULL,
|
||||
contradiction FLOAT NOT NULL DEFAULT 0.0,
|
||||
p_bull FLOAT,
|
||||
p_bear FLOAT,
|
||||
score_company FLOAT NOT NULL DEFAULT 0.0,
|
||||
score_macro FLOAT NOT NULL DEFAULT 0.0,
|
||||
score_competitive FLOAT NOT NULL DEFAULT 0.0,
|
||||
evidence_count INTEGER NOT NULL DEFAULT 0,
|
||||
unique_source_count INTEGER NOT NULL DEFAULT 0,
|
||||
duplicate_evidence_count INTEGER NOT NULL DEFAULT 0,
|
||||
price_at_prediction FLOAT,
|
||||
spy_price_at_prediction FLOAT,
|
||||
sector_etf_price_at_prediction FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_ticker ON prediction_snapshots(ticker);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_generated ON prediction_snapshots(generated_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_horizon ON prediction_snapshots(horizon);
|
||||
```
|
||||
|
||||
#### prediction_outcomes
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS prediction_outcomes (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
prediction_id UUID NOT NULL REFERENCES prediction_snapshots(id),
|
||||
evaluated_at TIMESTAMPTZ NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
future_price FLOAT,
|
||||
future_return FLOAT,
|
||||
spy_future_price FLOAT,
|
||||
spy_return FLOAT,
|
||||
sector_etf_future_price FLOAT,
|
||||
sector_etf_return FLOAT,
|
||||
excess_return_vs_spy FLOAT,
|
||||
excess_return_vs_sector FLOAT,
|
||||
direction_correct BOOLEAN,
|
||||
profitable BOOLEAN,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_prediction ON prediction_outcomes(prediction_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_horizon ON prediction_outcomes(horizon);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_evaluated ON prediction_outcomes(evaluated_at);
|
||||
```
|
||||
|
||||
#### signal_evidence_links
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS signal_evidence_links (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
prediction_id UUID NOT NULL REFERENCES prediction_snapshots(id),
|
||||
document_id VARCHAR(200),
|
||||
signal_id VARCHAR(200),
|
||||
ticker VARCHAR(20),
|
||||
source VARCHAR(200),
|
||||
source_type VARCHAR(50),
|
||||
catalyst_type VARCHAR(50),
|
||||
sentiment VARCHAR(20),
|
||||
impact FLOAT,
|
||||
extraction_confidence FLOAT,
|
||||
weight FLOAT,
|
||||
is_duplicate BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
canonical_evidence_key VARCHAR(64),
|
||||
contribution_score FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_prediction ON signal_evidence_links(prediction_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_document ON signal_evidence_links(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_ticker ON signal_evidence_links(ticker);
|
||||
```
|
||||
|
||||
#### model_metric_snapshots
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS model_metric_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
lookback_window VARCHAR(20) NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
prediction_count INTEGER NOT NULL DEFAULT 0,
|
||||
win_rate FLOAT,
|
||||
directional_accuracy FLOAT,
|
||||
information_coefficient FLOAT,
|
||||
rank_information_coefficient FLOAT,
|
||||
avg_return FLOAT,
|
||||
avg_excess_return_vs_spy FLOAT,
|
||||
avg_excess_return_vs_sector FLOAT,
|
||||
calibration_error FLOAT,
|
||||
brier_score FLOAT,
|
||||
buy_win_rate FLOAT,
|
||||
sell_win_rate FLOAT,
|
||||
hold_win_rate FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_generated ON model_metric_snapshots(generated_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_lookback ON model_metric_snapshots(lookback_window);
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_horizon ON model_metric_snapshots(horizon);
|
||||
```
|
||||
|
||||
#### SQL Explorer Views
|
||||
|
||||
```sql
|
||||
CREATE OR REPLACE VIEW v_prediction_performance AS
|
||||
SELECT
|
||||
ps.ticker,
|
||||
ps.direction,
|
||||
ps.action,
|
||||
ps.confidence,
|
||||
ps.strength,
|
||||
ps.contradiction,
|
||||
ps.p_bull,
|
||||
ps.score_company,
|
||||
ps.score_macro,
|
||||
ps.score_competitive,
|
||||
ps.evidence_count,
|
||||
ps.unique_source_count,
|
||||
ps.duplicate_evidence_count,
|
||||
ps.price_at_prediction,
|
||||
po.future_return,
|
||||
po.excess_return_vs_spy,
|
||||
po.excess_return_vs_sector,
|
||||
po.direction_correct,
|
||||
po.profitable,
|
||||
po.horizon,
|
||||
ps.generated_at,
|
||||
po.evaluated_at
|
||||
FROM prediction_snapshots ps
|
||||
JOIN prediction_outcomes po ON po.prediction_id = ps.id;
|
||||
|
||||
CREATE OR REPLACE VIEW v_source_performance AS
|
||||
SELECT
|
||||
sel.source,
|
||||
sel.source_type,
|
||||
sel.catalyst_type,
|
||||
sel.sentiment,
|
||||
sel.weight,
|
||||
sel.contribution_score,
|
||||
sel.is_duplicate,
|
||||
po.direction_correct,
|
||||
po.future_return,
|
||||
po.excess_return_vs_spy,
|
||||
po.horizon,
|
||||
ps.generated_at
|
||||
FROM signal_evidence_links sel
|
||||
JOIN prediction_snapshots ps ON ps.id = sel.prediction_id
|
||||
JOIN prediction_outcomes po ON po.prediction_id = sel.prediction_id;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Correctness Properties
|
||||
|
||||
*A property is a characteristic or behavior that should hold true across all valid executions of a system — essentially, a formal statement about what the system should do. Properties serve as the bridge between human-readable specifications and machine-verifiable correctness guarantees.*
|
||||
|
||||
The following properties were derived from the acceptance criteria through systematic prework analysis. Each property is universally quantified and maps to specific requirements. After reflection, 7 unique properties remain — one for each PBT requirement in Requirement 17. Redundant properties from Requirements 2, 5, 6, 8, and 11 were consolidated with their corresponding Requirement 17 counterparts.
|
||||
|
||||
### Property 1: Calibration Error Range and Round-Trip
|
||||
|
||||
*For any* valid distribution of predictions across confidence buckets (where each prediction has a confidence in [0.5, 1.0] and a boolean outcome), the Expected Calibration Error (ECE) SHALL be in [0.0, 1.0]. Furthermore, when every bucket's observed win rate exactly matches its average confidence, ECE SHALL be 0.0.
|
||||
|
||||
**Validates: Requirements 5.1, 5.3, 17.1**
|
||||
|
||||
### Property 2: Brier Score Range and Perfect Prediction
|
||||
|
||||
*For any* list of (p_bull, outcome) pairs where p_bull ∈ [0.0, 1.0] and outcome ∈ {0.0, 1.0}, the Brier score SHALL be in [0.0, 1.0]. Furthermore, when all predictions have p_bull = 1.0 and outcome = 1.0 (or p_bull = 0.0 and outcome = 0.0), the Brier score SHALL be 0.0.
|
||||
|
||||
**Validates: Requirements 5.4, 17.2**
|
||||
|
||||
### Property 3: Information Coefficient Range and Perfect Correlation
|
||||
|
||||
*For any* list of (score, return) pairs with at least 30 elements where scores and returns are finite floats, the Information Coefficient (Pearson correlation) SHALL be in [-1.0, 1.0]. Furthermore, when scores and returns are perfectly positively linearly correlated (returns = a * scores + b, a > 0), IC SHALL be 1.0 (within floating-point tolerance).
|
||||
|
||||
**Validates: Requirements 6.1, 6.2, 17.3**
|
||||
|
||||
### Property 4: Canonical Evidence Key Determinism and Normalization Idempotence
|
||||
|
||||
*For any* (title, url) string pair, computing the canonical evidence key SHALL be deterministic — the same inputs always produce the same key. Furthermore, normalizing an already-normalized input (lowercased, trimmed title; lowercased, query-stripped URL) and computing the key SHALL produce the same key as the original computation (idempotence).
|
||||
|
||||
**Validates: Requirements 2.3, 17.4**
|
||||
|
||||
### Property 5: Source Reliability Bayesian Shrinkage Bounds and Convergence
|
||||
|
||||
*For any* observed_win_rate ∈ [0.0, 1.0] and sample_count ≥ 0, the source reliability computed via Bayesian shrinkage SHALL be in [0.0, 1.0]. When sample_count = 0, reliability SHALL be exactly 0.5. As sample_count increases toward infinity, reliability SHALL approach the observed_win_rate monotonically.
|
||||
|
||||
**Validates: Requirements 8.1, 8.2, 17.5**
|
||||
|
||||
### Property 6: Quality Gate Determinism and Threshold Monotonicity
|
||||
|
||||
*For any* set of model metric values and quality gate configuration, the gate evaluation result SHALL be deterministic — the same inputs always produce the same pass/fail result. Furthermore, for any configuration where the gate passes, relaxing any single threshold (increasing min values or decreasing max values to make them easier to satisfy) SHALL NOT cause the gate to fail (monotonicity).
|
||||
|
||||
**Validates: Requirements 11.1, 17.6**
|
||||
|
||||
### Property 7: Contribution Score Sum-to-One and Range
|
||||
|
||||
*For any* non-empty list of positive document weights, the computed contribution scores SHALL each be in [0.0, 1.0] and SHALL sum to 1.0 (within floating-point tolerance of 1e-9). For an empty weight list, the result SHALL be an empty list.
|
||||
|
||||
**Validates: Requirements 2.5, 17.7**
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Price Data Unavailability
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Ticker price unavailable at snapshot time | Store NULL for `price_at_prediction`, log warning, continue |
|
||||
| SPY price unavailable at snapshot time | Store NULL for `spy_price_at_prediction`, log warning, continue |
|
||||
| Sector ETF price unavailable at snapshot time | Store NULL for `sector_etf_price_at_prediction`, log warning, continue |
|
||||
| Sector not found in SECTOR_ETF_MAP | Store NULL for sector ETF price, log warning |
|
||||
| Future price unavailable at evaluation time | Skip that horizon, retry on next Outcome_Evaluator run |
|
||||
| SPY/sector ETF future price unavailable | Store NULL for excess returns, still compute ticker return |
|
||||
|
||||
### Metrics Computation Edge Cases
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Zero predictions in a confidence bucket | Exclude bucket from ECE computation |
|
||||
| Fewer than 30 predictions for IC/Rank IC | Return NULL instead of unreliable correlation |
|
||||
| All predictions in same confidence bucket | ECE = |avg_confidence - win_rate| for that single bucket |
|
||||
| Division by zero in contribution scores (total weight = 0) | Return equal contribution scores (1/n) |
|
||||
| Single prediction | Contribution score = 1.0 |
|
||||
| NaN/infinity in metric computation | Guard with `math.isnan`/`math.isinf` checks, return 0.0 or NULL |
|
||||
|
||||
### Quality Gate Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| No model_metric_snapshots exist | Default to paper-only mode (fail-safe) |
|
||||
| Most recent snapshot older than 24 hours | Default to paper-only mode (fail-safe) |
|
||||
| risk_configs table unreachable | Default to paper-only mode, log warning |
|
||||
| Invalid threshold values in risk_configs | Use default thresholds, log warning |
|
||||
| Gate evaluation fails mid-computation | Default to paper-only mode, log error |
|
||||
|
||||
### Database Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| prediction_snapshots insert fails | Log error, do not block recommendation generation |
|
||||
| signal_evidence_links insert fails | Log error, snapshot still created (partial data) |
|
||||
| prediction_outcomes insert fails | Log error, retry on next Outcome_Evaluator run |
|
||||
| model_metric_snapshots insert fails | Log error, stale metrics used until next successful computation |
|
||||
| source_accuracy update fails | Log error, continue with stale reliability data |
|
||||
|
||||
### Canonical Evidence Key Edge Cases
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Empty title | Use empty string in hash computation |
|
||||
| Empty URL | Use empty string in hash computation |
|
||||
| URL with no query parameters | Use URL as-is after lowercasing |
|
||||
| Non-ASCII characters in title/URL | Encode as UTF-8 before hashing |
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Dual Testing Approach
|
||||
|
||||
The model validation feature requires both property-based tests (for mathematical correctness of metric computations) and example-based unit tests (for specific behaviors, integration points, and edge cases). Property-based testing is appropriate here because the feature contains several pure mathematical functions (ECE, Brier score, IC, Bayesian shrinkage, contribution scores) with clear input/output behavior and universal properties.
|
||||
|
||||
### Property-Based Testing
|
||||
|
||||
**Library:** Hypothesis (already in use — `.hypothesis/` directory exists, project convention established)
|
||||
|
||||
**Configuration:**
|
||||
- Minimum 100 iterations per property: `@settings(max_examples=100)`
|
||||
- File naming: `tests/test_pbt_model_validation.py`
|
||||
- Tag format: `# Feature: model-validation-calibration, Property N: <title>`
|
||||
|
||||
**Property tests to implement (one test per correctness property):**
|
||||
|
||||
| Property | Test Function | Key Generators |
|
||||
|----------|---------------|----------------|
|
||||
| 1: ECE range and round-trip | `test_calibration_error_range_and_roundtrip` | `st.lists(st.tuples(st.floats(0.5, 1.0), st.booleans()))` |
|
||||
| 2: Brier score range and perfect | `test_brier_score_range_and_perfect` | `st.lists(st.tuples(st.floats(0.0, 1.0), st.sampled_from([0.0, 1.0])))` |
|
||||
| 3: IC range and perfect correlation | `test_information_coefficient_range_and_perfect` | `st.lists(st.floats(-10, 10), min_size=30)` with linear transform |
|
||||
| 4: Canonical key determinism and idempotence | `test_canonical_key_determinism_and_idempotence` | `st.text()` pairs for title and URL |
|
||||
| 5: Source reliability bounds and convergence | `test_source_reliability_bounds_and_convergence` | `st.floats(0.0, 1.0)` for win_rate, `st.integers(0, 10000)` for n |
|
||||
| 6: Quality gate determinism and monotonicity | `test_quality_gate_determinism_and_monotonicity` | Custom strategy for `QualityGateConfig` and metric values |
|
||||
| 7: Contribution score sum-to-one | `test_contribution_score_sum_to_one` | `st.lists(st.floats(0.01, 100.0), min_size=1)` |
|
||||
|
||||
### Example-Based Unit Tests
|
||||
|
||||
**File:** `tests/test_model_validation_unit.py`
|
||||
|
||||
| Test Area | Examples |
|
||||
|-----------|----------|
|
||||
| Canonical evidence key | Known title/URL → expected SHA256, empty inputs, unicode |
|
||||
| Duplicate detection | 3 docs with 2 sharing a key → 1 marked duplicate |
|
||||
| Contribution scores | [0.5, 0.3, 0.2] → [0.5, 0.3, 0.2], single doc → [1.0] |
|
||||
| ECE specific values | Perfect calibration → 0.0, all overconfident → positive ECE |
|
||||
| Brier score specific values | All correct at p=1.0 → 0.0, all wrong at p=1.0 → 1.0 |
|
||||
| IC specific values | Perfect correlation → 1.0, anti-correlation → -1.0, < 30 → None |
|
||||
| Source reliability | n=0 → 0.5, n=1000 with wr=0.8 → ≈0.8, n=30 with wr=0.7 → 0.6 |
|
||||
| Adjusted evidence weight | reliability=0.5 → base*1.0, clamping to [0.1, 2.0] |
|
||||
| Quality gate | All thresholds met → pass, one failed → fail with reason |
|
||||
| Quality gate fail-safe | No snapshots → paper-only, stale snapshot → paper-only |
|
||||
| Direction correct logic | bullish+positive → true, bullish+negative → false |
|
||||
| Profitable logic | buy+positive → true, sell+negative → true |
|
||||
| Future return computation | price 100→110 → 0.10, price 100→90 → -0.10 |
|
||||
| Excess return | ticker 10%, SPY 5% → excess 5% |
|
||||
| Weight clamping | weight 1.5 → clamped to 1.0 |
|
||||
|
||||
### Frontend Tests
|
||||
|
||||
**File:** `frontend/src/test/pages.test.tsx` (extend existing)
|
||||
|
||||
| Test Area | Strategy |
|
||||
|-----------|----------|
|
||||
| OpsModel page renders validation tabs | MSW mock for `/api/validation/summary` |
|
||||
| Calibration table renders buckets | MSW mock for `/api/validation/calibration` |
|
||||
| Gate status indicator | MSW mock for `/api/validation/gate-status` |
|
||||
| Miscalibration warning badge | Mock data with miscalibrated bucket |
|
||||
|
||||
### Integration Tests
|
||||
|
||||
**File:** `tests/test_model_validation_integration.py`
|
||||
|
||||
| Test Area | Strategy |
|
||||
|-----------|----------|
|
||||
| Snapshot creation with mock DB | asyncpg mock, verify INSERT queries |
|
||||
| Outcome evaluation with mock prices | asyncpg mock, verify return computation |
|
||||
| Metrics computation end-to-end | In-memory data, verify all metrics computed |
|
||||
| API endpoint responses | FastAPI TestClient with mock pool |
|
||||
|
||||
### Test File Structure
|
||||
|
||||
```
|
||||
tests/
|
||||
├── test_pbt_model_validation.py # 7 property-based tests
|
||||
├── test_model_validation_unit.py # Example-based unit tests
|
||||
└── test_model_validation_integration.py # Integration tests (optional)
|
||||
|
||||
frontend/src/test/
|
||||
└── pages.test.tsx # Extended with validation page tests
|
||||
```
|
||||
@@ -0,0 +1,286 @@
|
||||
# Requirements Document — Model Validation, Calibration, and Signal Quality
|
||||
|
||||
## Introduction
|
||||
|
||||
The Stonks Oracle platform generates trend summaries and trading recommendations from a three-layer signal aggregation engine. While the pipeline produces directional predictions with confidence scores, there is no systematic mechanism to evaluate whether those predictions are accurate, whether confidence scores are well-calibrated, which sources and signal types contribute to correct predictions, or whether the system outperforms simple benchmarks. The platform also lacks safety gates that prevent live trading when model quality is insufficient.
|
||||
|
||||
This feature adds a complete model validation layer: prediction outcome tracking, calibration analysis, information coefficient metrics, signal and source attribution, evidence deduplication quality tracking, confidence recalibration, benchmark comparison, an upgraded Model Performance dashboard, and safety gates for live trading eligibility. The goal is to transform Stonks Oracle from a signal dashboard with paper trading into a statistically validated prediction engine with closed-loop feedback.
|
||||
|
||||
## Glossary
|
||||
|
||||
- **Prediction_Snapshot_Writer**: A new service component in `services/validation/prediction_snapshot.py` that captures the full state of every recommendation and trend prediction at generation time, including prices, evidence links, and duplicate counts.
|
||||
- **Outcome_Evaluator**: A new service component in `services/validation/outcome_evaluator.py` that runs periodically to compute realized future returns and directional accuracy for matured prediction snapshots across multiple horizons.
|
||||
- **Metrics_Engine**: A new service component in `services/validation/metrics.py` that computes aggregate model quality metrics including calibration error, information coefficient, Brier score, and win rates over configurable lookback windows.
|
||||
- **Attribution_Engine**: A new service component in `services/validation/attribution.py` that computes per-source, per-catalyst-type, and per-signal-layer performance metrics by joining evidence links with prediction outcomes.
|
||||
- **Calibration_Engine**: A new service component in `services/validation/calibration.py` that computes source reliability scores using Bayesian shrinkage and adjusts evidence weights based on historical source performance.
|
||||
- **Quality_Gate**: A new service component in `services/trading/model_quality_gate.py` that evaluates aggregate model metrics against configurable thresholds and determines whether the system meets minimum quality standards for live trading.
|
||||
- **Information_Coefficient**: The Pearson correlation between predicted scores and realized future returns, measuring the linear predictive power of the model. Abbreviated as IC.
|
||||
- **Rank_Information_Coefficient**: The Spearman rank correlation between predicted scores and realized future returns, measuring ordinal predictive power. Abbreviated as Rank IC.
|
||||
- **Calibration_Error**: The Expected Calibration Error (ECE), computed as the weighted average of the absolute difference between predicted confidence and observed win rate across confidence buckets.
|
||||
- **Brier_Score**: The mean squared error between the predicted bullish probability and the binary actual outcome (1 if price went up, 0 otherwise), measuring probabilistic forecast accuracy.
|
||||
- **Canonical_Evidence_Key**: A normalized identifier for a piece of evidence, computed as SHA256 of the normalized title concatenated with the normalized URL, used to detect duplicate evidence across different ingestion paths.
|
||||
- **Excess_Return**: The return of a prediction minus the return of a benchmark (SPY for broad market, sector ETF for sector-relative) over the same horizon, measuring alpha generation.
|
||||
- **Prediction_Snapshot**: A frozen record of a prediction at generation time, capturing all inputs (prices, scores, evidence) needed to evaluate the prediction against future outcomes without hindsight bias.
|
||||
- **Model_Metric_Snapshot**: A periodic aggregate of model quality metrics over a lookback window and horizon, stored for time-series analysis of model performance trends.
|
||||
- **Source_Reliability**: A Bayesian-shrunk estimate of a source's historical win rate, computed as `0.5 + (n/(n+30)) * (observed_win_rate - 0.5)`, which regresses toward 0.5 for sources with few observations.
|
||||
- **Dashboard_API**: The set of API endpoints under `/api/validation/` that serve model quality metrics, calibration tables, attribution data, and gate status to the frontend.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
### Requirement 1: Prediction Snapshot Capture
|
||||
|
||||
**User Story:** As a quantitative analyst, I want every recommendation and trend prediction captured as an immutable snapshot at generation time, so that I can evaluate predictions against future outcomes without hindsight bias.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN a recommendation is generated by the Recommendation_Engine, THE Prediction_Snapshot_Writer SHALL create a prediction_snapshots record containing the ticker, generation timestamp, trend window, prediction horizon, direction, action, mode, strength, confidence, contradiction score, bullish probability, bearish probability, company score, macro score, competitive score, evidence count, unique source count, duplicate evidence count, price at prediction time, SPY price at prediction time, and sector ETF price at prediction time.
|
||||
2. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL record the current market price for the predicted ticker by querying the most recent close price from the market_snapshots table.
|
||||
3. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL record the current SPY price by querying the most recent close price for ticker SPY from the market_snapshots table.
|
||||
4. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL record the current sector ETF price by looking up the sector for the predicted ticker and querying the most recent close price for the corresponding sector ETF from the market_snapshots table.
|
||||
5. IF the market price, SPY price, or sector ETF price is unavailable at snapshot time, THEN THE Prediction_Snapshot_Writer SHALL store NULL for the unavailable price fields and log a warning, rather than failing the snapshot creation.
|
||||
6. THE Prediction_Snapshot_Writer SHALL store prediction snapshots in a new `prediction_snapshots` database table with a UUID primary key and indexed columns for ticker, generated_at, and horizon.
|
||||
7. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL store a JSONB metadata field containing any additional context from the trend summary market_context and recommendation risk_checks fields.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 2: Signal Evidence Link Tracking
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to know which specific evidence documents contributed to each prediction, so that I can attribute prediction success or failure to individual sources and signal types.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN a prediction snapshot is created, THE Prediction_Snapshot_Writer SHALL create signal_evidence_links records for each document that contributed to the prediction, linking the prediction_id to the document_id and signal_id.
|
||||
2. THE signal_evidence_links record SHALL capture the source identifier, source type, catalyst type, sentiment, impact score, extraction confidence, weight assigned during aggregation, duplicate status, canonical evidence key, and contribution score for each contributing document.
|
||||
3. WHEN recording evidence links, THE Prediction_Snapshot_Writer SHALL compute the canonical_evidence_key as the SHA256 hash of the concatenation of the normalized (lowercased, whitespace-trimmed) document title and the normalized (lowercased, query-parameters-stripped) document URL.
|
||||
4. WHEN recording evidence links, THE Prediction_Snapshot_Writer SHALL mark a link as `is_duplicate = true` when another link for the same prediction and ticker shares the same canonical_evidence_key.
|
||||
5. THE Prediction_Snapshot_Writer SHALL compute the contribution_score for each evidence link as the ratio of that document's effective weight to the total effective weight across all documents for the prediction.
|
||||
6. THE signal_evidence_links table SHALL have a foreign key constraint from prediction_id to prediction_snapshots(id) and indexes on prediction_id, document_id, and ticker.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 3: Evidence Deduplication Quality Tracking
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the system to track evidence deduplication quality per prediction, so that I can identify when predictions are inflated by counting the same information multiple times from different sources.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN creating a prediction snapshot, THE Prediction_Snapshot_Writer SHALL compute the unique_source_count as the number of distinct source identifiers across all non-duplicate evidence links for that prediction.
|
||||
2. WHEN creating a prediction snapshot, THE Prediction_Snapshot_Writer SHALL compute the duplicate_evidence_count as the number of evidence links marked as `is_duplicate = true` for that prediction.
|
||||
3. THE Prediction_Snapshot_Writer SHALL enforce a maximum single-document weight cap of 1.0, clamping any individual document's effective weight to prevent a single piece of evidence from dominating the prediction.
|
||||
4. WHEN computing contribution scores, THE Prediction_Snapshot_Writer SHALL count each canonical evidence key at most once per ticker per window, applying the one-vote-per-canonical-document deduplication rule.
|
||||
5. THE Metrics_Engine SHALL compute a duplicate_rate metric as the ratio of duplicate_evidence_count to total evidence_count across predictions in the lookback window.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 4: Prediction Outcome Evaluation
|
||||
|
||||
**User Story:** As a quantitative analyst, I want realized market outcomes automatically matched to historical predictions, so that I can measure whether the system's directional calls and confidence scores correspond to actual price movements.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Outcome_Evaluator SHALL run on a periodic schedule, evaluating prediction snapshots whose horizon has elapsed and whose outcome has not yet been recorded.
|
||||
2. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL compute the future_return as `(future_price - price_at_prediction) / price_at_prediction` using the closing price at the horizon endpoint.
|
||||
3. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL compute the SPY return over the same horizon as `(spy_future_price - spy_price_at_prediction) / spy_price_at_prediction`.
|
||||
4. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL compute the sector ETF return over the same horizon as `(sector_etf_future_price - sector_etf_price_at_prediction) / sector_etf_price_at_prediction`.
|
||||
5. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL compute excess_return_vs_spy as `future_return - spy_return` and excess_return_vs_sector as `future_return - sector_etf_return`.
|
||||
6. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL determine direction_correct as true when the prediction direction is bullish and future_return is positive, or when the prediction direction is bearish and future_return is negative.
|
||||
7. WHEN evaluating a prediction snapshot, THE Outcome_Evaluator SHALL determine profitable as true when the prediction action is buy and future_return is positive, or when the prediction action is sell and future_return is negative.
|
||||
8. THE Outcome_Evaluator SHALL evaluate each prediction across all applicable horizons: 1 hour, 6 hours, 1 day, 7 days, and 30 days.
|
||||
9. THE Outcome_Evaluator SHALL store evaluation results in a new `prediction_outcomes` table with a foreign key to prediction_snapshots and indexed columns for prediction_id, horizon, and evaluated_at.
|
||||
10. IF the future price is unavailable at the horizon endpoint (market data gap), THEN THE Outcome_Evaluator SHALL skip that horizon evaluation and retry on the next run.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 5: Calibration Analysis
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to measure how well the system's confidence scores predict actual win rates, so that I can identify overconfident or underconfident predictions and recalibrate the model.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Metrics_Engine SHALL compute calibration metrics by grouping evaluated predictions into confidence buckets: [0.50, 0.60), [0.60, 0.70), [0.70, 0.80), [0.80, 0.90), [0.90, 1.00].
|
||||
2. FOR EACH confidence bucket, THE Metrics_Engine SHALL compute the average confidence, the observed win rate (fraction of direction_correct outcomes), and the prediction count.
|
||||
3. THE Metrics_Engine SHALL compute the Expected Calibration Error (ECE) as the weighted average of `|avg_confidence - observed_win_rate|` across all buckets, weighted by the fraction of predictions in each bucket.
|
||||
4. THE Metrics_Engine SHALL compute the Brier Score as `mean((p_bull - actual_outcome)^2)` across all evaluated predictions, where actual_outcome is 1.0 when the price moved in the predicted direction and 0.0 otherwise.
|
||||
5. THE Metrics_Engine SHALL flag calibration buckets where `|avg_confidence - observed_win_rate| > 0.15` as miscalibrated for dashboard highlighting.
|
||||
6. THE Metrics_Engine SHALL compute calibration metrics separately for each prediction horizon (1h, 6h, 1d, 7d, 30d).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 6: Information Coefficient Metrics
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to measure the correlation between the system's prediction scores and realized returns, so that I can assess whether higher-scored predictions actually produce higher returns.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Metrics_Engine SHALL compute the Information Coefficient (IC) as the Pearson correlation between prediction scores and future returns across all evaluated predictions in the lookback window.
|
||||
2. THE Metrics_Engine SHALL compute the Rank Information Coefficient (Rank IC) as the Spearman rank correlation between prediction scores and future returns across all evaluated predictions in the lookback window.
|
||||
3. THE Metrics_Engine SHALL compute IC and Rank IC separately for each prediction horizon (1h, 6h, 1d, 7d, 30d).
|
||||
4. THE Metrics_Engine SHALL compute return statistics by confidence decile, grouping predictions into 10 equal-sized bins by confidence and computing the average future return and average excess return for each decile.
|
||||
5. WHEN fewer than 30 evaluated predictions exist for a given horizon, THE Metrics_Engine SHALL report IC and Rank IC as NULL rather than computing unreliable correlations from small samples.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 7: Source and Signal Attribution
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to know which sources, source types, and catalyst types contribute to accurate predictions, so that I can identify the most valuable information channels and deprioritize unreliable ones.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Attribution_Engine SHALL compute per-source performance metrics by joining signal_evidence_links with prediction_outcomes, grouping by source identifier.
|
||||
2. FOR EACH source, THE Attribution_Engine SHALL compute: prediction count, average weight, average contribution score, win rate, average future return, average excess return vs SPY, and information coefficient.
|
||||
3. THE Attribution_Engine SHALL compute the same performance metrics grouped by source_type (e.g., news_api, filings_api, web_scrape, market_api).
|
||||
4. THE Attribution_Engine SHALL compute the same performance metrics grouped by catalyst_type (e.g., earnings, product, legal, macro, m_and_a).
|
||||
5. THE Attribution_Engine SHALL compute layer attribution metrics for the three signal layers (company, macro, competitive) by using the score_company, score_macro, and score_competitive fields from prediction snapshots.
|
||||
6. FOR EACH layer, THE Attribution_Engine SHALL compute the average contribution percentage, the win rate when that layer is the dominant contributor, and the IC of predictions where that layer contributes more than 30% of the total score.
|
||||
7. THE Attribution_Engine SHALL compute a per-source duplicate_rate as the fraction of evidence links from that source marked as is_duplicate.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 8: Confidence Recalibration via Source Reliability
|
||||
|
||||
**User Story:** As a quantitative analyst, I want source credibility weights adjusted based on historical prediction accuracy using Bayesian shrinkage, so that the system learns from its own track record and improves over time.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Calibration_Engine SHALL compute source reliability using Bayesian shrinkage: `reliability = 0.5 + (n / (n + 30)) * (observed_win_rate - 0.5)`, where n is the number of evaluated predictions involving that source and observed_win_rate is the fraction of correct directional calls.
|
||||
2. WHEN a source has zero evaluated predictions, THE Calibration_Engine SHALL assign a reliability of 0.5 (the prior mean).
|
||||
3. THE Calibration_Engine SHALL compute an adjusted evidence weight for each source as `adjusted_weight = base_weight * (0.5 + reliability)`, clamped to the range [0.1, 2.0].
|
||||
4. THE Calibration_Engine SHALL update source reliability scores after each outcome evaluation cycle, using the latest prediction outcomes.
|
||||
5. THE Calibration_Engine SHALL store source reliability scores in the existing `source_accuracy` table, extending it with a reliability column or using the existing accuracy_ratio field with the Bayesian shrinkage formula.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 9: Benchmark Comparison
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the system's prediction performance compared against simple benchmarks, so that I can determine whether the model adds value beyond naive strategies.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Metrics_Engine SHALL compute the average excess return of all buy predictions versus a buy-and-hold SPY strategy over the same horizons.
|
||||
2. THE Metrics_Engine SHALL compute the average excess return of all buy predictions versus a buy-and-hold sector ETF strategy over the same horizons.
|
||||
3. THE Metrics_Engine SHALL compute the win rate of the system's directional predictions compared to a random 50/50 baseline, reporting the statistical significance using a binomial test when the prediction count exceeds 100.
|
||||
4. THE Metrics_Engine SHALL compute the hit rate improvement, defined as `(system_win_rate - 0.5) / 0.5`, representing the percentage improvement over random guessing.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 10: Model Metric Snapshots
|
||||
|
||||
**User Story:** As a quantitative analyst, I want aggregate model metrics stored as time-series snapshots, so that I can track whether model quality is improving or degrading over time.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Metrics_Engine SHALL periodically compute and store model_metric_snapshots containing all aggregate metrics for each combination of lookback window and prediction horizon.
|
||||
2. EACH model_metric_snapshot SHALL contain: prediction count, win rate, directional accuracy, IC, Rank IC, average return, average excess return vs SPY, average excess return vs sector, calibration error (ECE), Brier score, and per-action win rates (buy, sell, hold).
|
||||
3. THE Metrics_Engine SHALL store model_metric_snapshots in a new `model_metric_snapshots` database table with a UUID primary key and indexed columns for generated_at, lookback_window, and horizon.
|
||||
4. THE Metrics_Engine SHALL compute snapshots for lookback windows of 7 days, 30 days, 90 days, and all-time.
|
||||
5. THE Metrics_Engine SHALL store a JSONB metadata field in each snapshot for extensibility, containing any additional computed metrics not captured in dedicated columns.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 11: Safety Gate for Live Trading
|
||||
|
||||
**User Story:** As a platform operator, I want live trading automatically disabled when model quality metrics fall below minimum thresholds, so that the system does not risk real capital on a poorly performing model.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Quality_Gate SHALL evaluate the following minimum thresholds for live trading eligibility: minimum prediction count of 100, minimum IC of 0.03, minimum win rate of 0.53, maximum ECE of 0.15, and minimum excess return vs SPY of 0.0.
|
||||
2. WHEN any threshold is not met, THE Quality_Gate SHALL force all recommendations to paper mode, overriding any live_eligible mode assignments.
|
||||
3. THE Quality_Gate SHALL evaluate gate status at the start of each aggregation cycle by reading the most recent model_metric_snapshot.
|
||||
4. THE Quality_Gate SHALL log the gate evaluation result including which thresholds passed and which failed, with their actual values.
|
||||
5. THE Quality_Gate SHALL store the gate evaluation result in the `risk_configs` table under a `model_quality_gate` key, making it available to the recommendation engine and dashboard.
|
||||
6. IF the model_metric_snapshots table is empty or the most recent snapshot is older than 24 hours, THEN THE Quality_Gate SHALL default to paper-only mode (fail-safe behavior).
|
||||
7. THE Quality_Gate SHALL support configurable thresholds via the `risk_configs` table, with the default values specified in acceptance criterion 1 used when no override is configured.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 12: Model Performance Dashboard Upgrade
|
||||
|
||||
**User Story:** As a platform operator, I want a comprehensive model performance dashboard showing prediction accuracy, calibration, attribution, and gate status, so that I can monitor model quality and make informed decisions about live trading.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Dashboard_API SHALL expose a `/api/validation/summary` endpoint returning the latest model metric snapshot with summary cards for: prediction count, win rate, directional accuracy, IC, Rank IC, Brier score, calibration error, average excess return vs SPY, average excess return vs sector, and live trading gate status.
|
||||
2. THE Dashboard_API SHALL expose a `/api/validation/calibration` endpoint returning the calibration table with confidence buckets, average confidence, observed win rate, prediction count, and miscalibration flag for each bucket.
|
||||
3. THE Dashboard_API SHALL expose a `/api/validation/ic-by-horizon` endpoint returning IC and Rank IC values for each prediction horizon.
|
||||
4. THE Dashboard_API SHALL expose a `/api/validation/attribution/sources` endpoint returning per-source performance metrics including win rate, IC, average return, and duplicate rate.
|
||||
5. THE Dashboard_API SHALL expose a `/api/validation/attribution/catalysts` endpoint returning per-catalyst-type performance metrics.
|
||||
6. THE Dashboard_API SHALL expose a `/api/validation/attribution/layers` endpoint returning per-signal-layer (company, macro, competitive) performance metrics.
|
||||
7. THE Dashboard_API SHALL expose a `/api/validation/gate-status` endpoint returning the current quality gate evaluation with pass/fail status for each threshold.
|
||||
8. THE frontend OpsModel page SHALL be upgraded to display the model validation summary cards, calibration table, IC-by-horizon table, source performance table, catalyst truth table, layer attribution table, and gate status indicator.
|
||||
9. THE frontend SHALL highlight miscalibrated confidence buckets where `|avg_confidence - observed_win_rate| > 0.15` with a visual warning indicator.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 13: Recommendation Display Enhancements
|
||||
|
||||
**User Story:** As a platform operator, I want each recommendation to display its validation context including calibrated confidence, historical win rate, and evidence quality indicators, so that I can assess the reliability of individual predictions.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN displaying a recommendation, THE frontend SHALL show the original confidence alongside the calibrated confidence (based on the historical win rate for that confidence bucket).
|
||||
2. WHEN displaying a recommendation, THE frontend SHALL show the historical win rate for predictions with similar confidence levels.
|
||||
3. WHEN displaying a recommendation, THE frontend SHALL show the evidence count, unique evidence count, and duplicate evidence count.
|
||||
4. WHEN displaying a recommendation, THE frontend SHALL show a source reliability indicator based on the Bayesian-shrunk reliability score of the primary contributing sources.
|
||||
5. WHEN displaying a recommendation, THE frontend SHALL show the live eligibility status with the reason (gate passed, or which threshold failed).
|
||||
6. WHEN the duplicate evidence count exceeds 20% of the total evidence count, THE frontend SHALL display a warning badge indicating potential evidence inflation.
|
||||
7. WHEN the primary contributing source has a reliability score below 0.4, THE frontend SHALL display a warning badge indicating unknown or low source reliability.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 14: SQL Explorer Views
|
||||
|
||||
**User Story:** As a quantitative analyst, I want pre-built SQL views joining predictions with outcomes and evidence with performance, so that I can run ad-hoc analysis in the SQL Explorer without writing complex joins.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE database migration SHALL create a view `v_prediction_performance` that joins prediction_snapshots with prediction_outcomes on prediction_id, providing a single flat table with prediction inputs and realized outcomes.
|
||||
2. THE database migration SHALL create a view `v_source_performance` that joins signal_evidence_links with prediction_outcomes (via prediction_id), providing per-evidence-link outcome data for source attribution analysis.
|
||||
3. THE v_prediction_performance view SHALL include columns for ticker, direction, action, confidence, strength, price_at_prediction, future_return, excess_return_vs_spy, direction_correct, profitable, horizon, generated_at, and evaluated_at.
|
||||
4. THE v_source_performance view SHALL include columns for source, source_type, catalyst_type, sentiment, weight, contribution_score, is_duplicate, direction_correct, future_return, and excess_return_vs_spy.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 15: Backtest Replay Integration
|
||||
|
||||
**User Story:** As a quantitative analyst, I want to replay historical data through the prediction snapshot and outcome evaluation pipeline, so that I can assess model quality on historical data without future data leakage.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Backtest_Replay service SHALL support a validation mode that generates prediction snapshots and evaluates outcomes using only data available at each historical point in time.
|
||||
2. WHEN running in validation mode, THE Backtest_Replay service SHALL process historical recommendations chronologically, creating prediction snapshots with the market prices that were available at each recommendation's generation time.
|
||||
3. WHEN running in validation mode, THE Backtest_Replay service SHALL evaluate prediction outcomes using market prices from the appropriate future horizon relative to each prediction's generation time.
|
||||
4. THE Backtest_Replay service SHALL prevent future data leakage by ensuring that no market data with a timestamp after the prediction generation time is used during snapshot creation.
|
||||
5. WHEN a backtest validation run completes, THE Backtest_Replay service SHALL trigger a model metrics computation over the backtest period, storing the results as model_metric_snapshots tagged with the backtest_id.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 16: Database Schema
|
||||
|
||||
**User Story:** As a developer, I want the new database tables created via a migration script following the existing migration conventions, so that the schema changes are applied consistently across all environments.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE database migration SHALL create the `prediction_snapshots` table with columns: id (UUID PK), generated_at (TIMESTAMPTZ), ticker (VARCHAR), window (VARCHAR), horizon (VARCHAR), direction (VARCHAR), action (VARCHAR), mode (VARCHAR), strength (FLOAT), confidence (FLOAT), contradiction (FLOAT), p_bull (FLOAT), p_bear (FLOAT), score_company (FLOAT), score_macro (FLOAT), score_competitive (FLOAT), evidence_count (INTEGER), unique_source_count (INTEGER), duplicate_evidence_count (INTEGER), price_at_prediction (FLOAT), spy_price_at_prediction (FLOAT), sector_etf_price_at_prediction (FLOAT), metadata (JSONB), created_at (TIMESTAMPTZ).
|
||||
2. THE database migration SHALL create the `prediction_outcomes` table with columns: id (UUID PK), prediction_id (UUID FK to prediction_snapshots), evaluated_at (TIMESTAMPTZ), horizon (VARCHAR), future_price (FLOAT), future_return (FLOAT), spy_future_price (FLOAT), spy_return (FLOAT), sector_etf_future_price (FLOAT), sector_etf_return (FLOAT), excess_return_vs_spy (FLOAT), excess_return_vs_sector (FLOAT), direction_correct (BOOLEAN), profitable (BOOLEAN), metadata (JSONB), created_at (TIMESTAMPTZ).
|
||||
3. THE database migration SHALL create the `signal_evidence_links` table with columns: id (UUID PK), prediction_id (UUID FK to prediction_snapshots), document_id (VARCHAR), signal_id (VARCHAR), ticker (VARCHAR), source (VARCHAR), source_type (VARCHAR), catalyst_type (VARCHAR), sentiment (VARCHAR), impact (FLOAT), extraction_confidence (FLOAT), weight (FLOAT), is_duplicate (BOOLEAN), canonical_evidence_key (VARCHAR), contribution_score (FLOAT), metadata (JSONB), created_at (TIMESTAMPTZ).
|
||||
4. THE database migration SHALL create the `model_metric_snapshots` table with columns: id (UUID PK), generated_at (TIMESTAMPTZ), lookback_window (VARCHAR), horizon (VARCHAR), prediction_count (INTEGER), win_rate (FLOAT), directional_accuracy (FLOAT), information_coefficient (FLOAT), rank_information_coefficient (FLOAT), avg_return (FLOAT), avg_excess_return_vs_spy (FLOAT), avg_excess_return_vs_sector (FLOAT), calibration_error (FLOAT), brier_score (FLOAT), buy_win_rate (FLOAT), sell_win_rate (FLOAT), hold_win_rate (FLOAT), metadata (JSONB), created_at (TIMESTAMPTZ).
|
||||
5. THE database migration SHALL create appropriate indexes on prediction_snapshots (ticker, generated_at, horizon), prediction_outcomes (prediction_id, horizon), signal_evidence_links (prediction_id, document_id, ticker), and model_metric_snapshots (generated_at, lookback_window, horizon).
|
||||
6. THE database migration SHALL be numbered as `035_model_validation.sql`, following the existing migration numbering convention.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 17: Property-Based Testing for Validation Metrics
|
||||
|
||||
**User Story:** As a developer, I want property-based tests validating the mathematical correctness of all validation metric computations, so that edge cases and numerical stability issues are caught before deployment.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE test suite SHALL include a property-based test for calibration error verifying that ECE is in [0.0, 1.0] for all valid distributions of predictions across confidence buckets, and that ECE is 0.0 when every bucket's observed win rate exactly matches its average confidence (round-trip calibration property).
|
||||
2. THE test suite SHALL include a property-based test for Brier score verifying that the score is in [0.0, 1.0] for all valid probability-outcome pairs, and that the score is 0.0 when all predictions are perfectly correct with probability 1.0.
|
||||
3. THE test suite SHALL include a property-based test for information coefficient verifying that IC is in [-1.0, 1.0] for all valid score-return pairs, and that IC is 1.0 when scores and returns are perfectly positively correlated.
|
||||
4. THE test suite SHALL include a property-based test for the canonical evidence key verifying that the key is deterministic (same inputs always produce the same key) and that normalization is idempotent (normalizing an already-normalized input produces the same key).
|
||||
5. THE test suite SHALL include a property-based test for source reliability Bayesian shrinkage verifying that reliability is always in [0.0, 1.0], that reliability approaches 0.5 as sample count approaches 0, and that reliability approaches the observed win rate as sample count approaches infinity.
|
||||
6. THE test suite SHALL include a property-based test for the quality gate verifying that the gate result is deterministic for the same metric inputs, and that relaxing any single threshold (making it easier to pass) never causes a previously passing gate to fail (monotonicity property).
|
||||
7. THE test suite SHALL include a property-based test for contribution score computation verifying that all contribution scores for a single prediction sum to 1.0 (within floating-point tolerance) and that each individual score is in [0.0, 1.0].
|
||||
@@ -0,0 +1,260 @@
|
||||
# Implementation Plan: Model Validation, Calibration, and Signal Quality
|
||||
|
||||
## Overview
|
||||
|
||||
Add a closed-loop model validation layer to Stonks Oracle: prediction snapshot capture, outcome evaluation, calibration/IC metrics, source/catalyst/layer attribution, Bayesian source reliability, a quality gate for live trading, 7 new API endpoints, an upgraded OpsModel dashboard, and backtest replay integration. Implementation follows the four-phase priority order from the spec, with each phase building on the previous one.
|
||||
|
||||
## Tasks
|
||||
|
||||
- [x] 1. Database migration 035 — schema foundation
|
||||
- [x] 1.1 Create `infra/migrations/035_model_validation.sql` with all tables, indexes, and views
|
||||
- Create `prediction_snapshots` table with all columns from design (id UUID PK, generated_at, ticker, window, horizon, direction, action, mode, strength, confidence, contradiction, p_bull, p_bear, score_company, score_macro, score_competitive, evidence_count, unique_source_count, duplicate_evidence_count, price_at_prediction, spy_price_at_prediction, sector_etf_price_at_prediction, metadata JSONB, created_at)
|
||||
- Create `prediction_outcomes` table with FK to prediction_snapshots (id UUID PK, prediction_id, evaluated_at, horizon, future_price, future_return, spy_future_price, spy_return, sector_etf_future_price, sector_etf_return, excess_return_vs_spy, excess_return_vs_sector, direction_correct, profitable, metadata JSONB, created_at)
|
||||
- Create `signal_evidence_links` table with FK to prediction_snapshots (id UUID PK, prediction_id, document_id, signal_id, ticker, source, source_type, catalyst_type, sentiment, impact, extraction_confidence, weight, is_duplicate, canonical_evidence_key, contribution_score, metadata JSONB, created_at)
|
||||
- Create `model_metric_snapshots` table (id UUID PK, generated_at, lookback_window, horizon, prediction_count, win_rate, directional_accuracy, information_coefficient, rank_information_coefficient, avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector, calibration_error, brier_score, buy_win_rate, sell_win_rate, hold_win_rate, metadata JSONB, created_at)
|
||||
- Create indexes on prediction_snapshots (ticker, generated_at, horizon), prediction_outcomes (prediction_id, horizon, evaluated_at), signal_evidence_links (prediction_id, document_id, ticker), model_metric_snapshots (generated_at, lookback_window, horizon)
|
||||
- Create `v_prediction_performance` view joining prediction_snapshots with prediction_outcomes
|
||||
- Create `v_source_performance` view joining signal_evidence_links with prediction_snapshots and prediction_outcomes
|
||||
- _Requirements: 16.1, 16.2, 16.3, 16.4, 16.5, 16.6, 14.1, 14.2, 14.3, 14.4_
|
||||
|
||||
- [x] 2. Phase 1 — Prediction capture, outcome evaluation, core metrics, and dashboard API
|
||||
- [x] 2.1 Implement Prediction Snapshot Writer (`services/validation/prediction_snapshot.py`)
|
||||
- Create `services/validation/__init__.py`
|
||||
- Define `SECTOR_ETF_MAP`, `EVALUATION_HORIZONS`, `MAX_SINGLE_DOCUMENT_WEIGHT` constants
|
||||
- Implement `PredictionSnapshot` and `SignalEvidenceLink` dataclasses
|
||||
- Implement `compute_canonical_evidence_key(title, url)` — SHA256 of normalized title + normalized URL (lowercase, strip whitespace for title; lowercase, strip query params for URL)
|
||||
- Implement `fetch_latest_close_price(pool, ticker)` — query most recent close from market_snapshots
|
||||
- Implement `create_prediction_snapshot(pool, recommendation, trend_summary, evidence_signals, evidence_docs)` — fetch prices (ticker, SPY, sector ETF), compute canonical keys, detect duplicates, clamp weights to MAX_SINGLE_DOCUMENT_WEIGHT, compute contribution scores (one-vote-per-canonical-key), persist snapshot + evidence links in a transaction
|
||||
- Implement `compute_contribution_scores(weights)` — each score = weight_i / sum(weights), sums to 1.0
|
||||
- Handle NULL prices gracefully (log warning, store NULL, don't fail)
|
||||
- _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.1, 3.2, 3.3, 3.4_
|
||||
|
||||
- [x] 2.2 Write property test for canonical evidence key determinism and idempotence
|
||||
- **Property 4: Canonical Evidence Key Determinism and Normalization Idempotence**
|
||||
- Test that same (title, url) always produces same key
|
||||
- Test that normalizing already-normalized input produces same key
|
||||
- **Validates: Requirements 2.3, 17.4**
|
||||
|
||||
- [x] 2.3 Write property test for contribution score sum-to-one and range
|
||||
- **Property 7: Contribution Score Sum-to-One and Range**
|
||||
- Test that all scores in [0.0, 1.0] and sum to 1.0 (within 1e-9 tolerance)
|
||||
- Test that empty input returns empty list
|
||||
- **Validates: Requirements 2.5, 17.7**
|
||||
|
||||
- [x] 2.4 Implement Outcome Evaluator (`services/validation/outcome_evaluator.py`)
|
||||
- Define `PredictionOutcome` dataclass and `HORIZON_DURATIONS` mapping
|
||||
- Implement `evaluate_matured_predictions(pool)` — find snapshots where horizon elapsed and outcome not recorded, evaluate each
|
||||
- Implement `evaluate_single_prediction(pool, snapshot, horizon)` — fetch future price at horizon endpoint, compute future_return, SPY return, sector ETF return, excess returns, direction_correct, profitable; return None if future price unavailable
|
||||
- Evaluate across all 5 horizons: 1h, 6h, 1d, 7d, 30d
|
||||
- Skip horizons where future price is unavailable (retry next run)
|
||||
- Store results in prediction_outcomes table
|
||||
- _Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 4.10_
|
||||
|
||||
- [x] 2.5 Implement Metrics Engine (`services/validation/metrics.py`)
|
||||
- Define `CONFIDENCE_BUCKETS`, `LOOKBACK_WINDOWS` constants
|
||||
- Define `CalibrationBucket` and `ModelMetricSnapshot` dataclasses
|
||||
- Implement `compute_calibration_error(confidences, outcomes)` — group into 5 confidence buckets, compute ECE as weighted average of |avg_conf - win_rate|, flag miscalibrated buckets (|diff| > 0.15)
|
||||
- Implement `compute_brier_score(p_bulls, outcomes)` — mean((p_bull - outcome)^2)
|
||||
- Implement `compute_information_coefficient(scores, returns)` — Pearson correlation, return None when < 30 data points
|
||||
- Implement `compute_rank_information_coefficient(scores, returns)` — Spearman rank correlation, return None when < 30 data points
|
||||
- Implement `compute_contribution_scores(weights)` — weight_i / sum(weights), sums to 1.0
|
||||
- Implement benchmark metrics: average excess return vs SPY, vs sector ETF, hit rate improvement
|
||||
- Implement `compute_and_store_metric_snapshots(pool)` — compute for all lookback/horizon combinations (4 lookbacks × 5 horizons), persist to model_metric_snapshots
|
||||
- _Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 6.1, 6.2, 6.3, 6.4, 6.5, 9.1, 9.2, 9.3, 9.4, 10.1, 10.2, 10.3, 10.4, 10.5_
|
||||
|
||||
- [x] 2.6 Write property test for ECE range and round-trip
|
||||
- **Property 1: Calibration Error Range and Round-Trip**
|
||||
- Test ECE in [0.0, 1.0] for all valid distributions
|
||||
- Test ECE = 0.0 when every bucket's win rate matches avg confidence
|
||||
- **Validates: Requirements 5.1, 5.3, 17.1**
|
||||
|
||||
- [x] 2.7 Write property test for Brier score range and perfect prediction
|
||||
- **Property 2: Brier Score Range and Perfect Prediction**
|
||||
- Test Brier in [0.0, 1.0] for all valid (p_bull, outcome) pairs
|
||||
- Test Brier = 0.0 when all predictions perfectly correct
|
||||
- **Validates: Requirements 5.4, 17.2**
|
||||
|
||||
- [x] 2.8 Write property test for IC range and perfect correlation
|
||||
- **Property 3: Information Coefficient Range and Perfect Correlation**
|
||||
- Test IC in [-1.0, 1.0] for all valid (score, return) pairs with ≥30 elements
|
||||
- Test IC = 1.0 for perfectly positively correlated data
|
||||
- **Validates: Requirements 6.1, 6.2, 17.3**
|
||||
|
||||
- [x] 2.9 Implement Dashboard API endpoints in `services/api/app.py`
|
||||
- Add `/api/validation/summary` GET — return latest model_metric_snapshot + gate status
|
||||
- Add `/api/validation/calibration` GET — return calibration table with buckets
|
||||
- Add `/api/validation/ic-by-horizon` GET — return IC and Rank IC per horizon
|
||||
- Add `/api/validation/gate-status` GET — return quality gate evaluation detail
|
||||
- All endpoints accept optional `lookback` (default "30d") and `horizon` (default "7d") query params
|
||||
- _Requirements: 12.1, 12.2, 12.3, 12.7_
|
||||
|
||||
- [x] 2.10 Add frontend validation API hooks in `frontend/src/api/hooks.ts`
|
||||
- Add `useValidationSummary(lookback?, horizon?)` hook for `/api/validation/summary`
|
||||
- Add `useValidationCalibration(lookback?, horizon?)` hook for `/api/validation/calibration`
|
||||
- Add `useValidationICByHorizon(lookback?)` hook for `/api/validation/ic-by-horizon`
|
||||
- Add `useValidationGateStatus()` hook for `/api/validation/gate-status`
|
||||
- _Requirements: 12.1, 12.2, 12.3, 12.7_
|
||||
|
||||
- [x] 2.11 Upgrade OpsModel page (`frontend/src/pages/OpsModel.tsx`) — Phase 1 dashboard
|
||||
- Add tabbed layout: existing "Extraction Performance" tab + new "Model Validation" tab
|
||||
- Add summary cards: prediction count, win rate, directional accuracy, IC, Rank IC, Brier score, ECE, avg excess return vs SPY, gate status
|
||||
- Add calibration table with confidence buckets, avg confidence, observed win rate, count, miscalibration flag
|
||||
- Highlight miscalibrated buckets (|avg_confidence - observed_win_rate| > 0.15) with warning indicator
|
||||
- Add IC-by-horizon table showing IC and Rank IC for each horizon
|
||||
- Add gate status indicator (pass/fail with threshold details)
|
||||
- _Requirements: 12.1, 12.2, 12.3, 12.7, 12.8, 12.9_
|
||||
|
||||
- [x] 3. Checkpoint — Phase 1 verification
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 4. Phase 2 — Attribution engine and source/catalyst truth tables
|
||||
- [x] 4.1 Implement Attribution Engine (`services/validation/attribution.py`)
|
||||
- Define `SourceAttribution`, `CatalystAttribution`, `LayerAttribution` dataclasses
|
||||
- Implement `compute_source_attribution(pool, lookback_days, horizon)` — join signal_evidence_links with prediction_outcomes, group by source; compute prediction count, avg weight, avg contribution score, win rate, avg future return, avg excess return vs SPY, IC, duplicate rate
|
||||
- Implement `compute_catalyst_attribution(pool, lookback_days, horizon)` — same metrics grouped by catalyst_type
|
||||
- Implement `compute_layer_attribution(pool, lookback_days, horizon)` — compute per-layer (company, macro, competitive) avg contribution %, dominant win rate (layer > 30% contribution), dominant IC
|
||||
- _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7_
|
||||
|
||||
- [x] 4.2 Implement Calibration Engine (`services/validation/calibration.py`)
|
||||
- Implement `compute_source_reliability(observed_win_rate, sample_count, prior_strength=30)` — Bayesian shrinkage: `0.5 + (n / (n + 30)) * (observed_win_rate - 0.5)`; return 0.5 when n=0
|
||||
- Implement `compute_adjusted_evidence_weight(base_weight, reliability)` — `base_weight * (0.5 + reliability)`, clamped to [0.1, 2.0]
|
||||
- Implement `update_source_reliabilities(pool)` — recompute from latest outcomes, update source_accuracy table
|
||||
- _Requirements: 8.1, 8.2, 8.3, 8.4, 8.5_
|
||||
|
||||
- [x] 4.3 Write property test for source reliability Bayesian shrinkage bounds and convergence
|
||||
- **Property 5: Source Reliability Bayesian Shrinkage Bounds and Convergence**
|
||||
- Test reliability in [0.0, 1.0] for all valid inputs
|
||||
- Test reliability = 0.5 when sample_count = 0
|
||||
- Test reliability approaches observed_win_rate as sample_count → ∞
|
||||
- **Validates: Requirements 8.1, 8.2, 17.5**
|
||||
|
||||
- [x] 4.4 Add attribution API endpoints in `services/api/app.py`
|
||||
- Add `/api/validation/attribution/sources` GET — return per-source performance metrics
|
||||
- Add `/api/validation/attribution/catalysts` GET — return per-catalyst performance metrics
|
||||
- Add `/api/validation/attribution/layers` GET — return per-layer performance metrics
|
||||
- All endpoints accept optional `lookback` (default "30d") and `horizon` (default "7d") query params
|
||||
- _Requirements: 12.4, 12.5, 12.6_
|
||||
|
||||
- [x] 4.5 Add frontend attribution hooks in `frontend/src/api/hooks.ts`
|
||||
- Add `useValidationAttributionSources(lookback?, horizon?)` hook
|
||||
- Add `useValidationAttributionCatalysts(lookback?, horizon?)` hook
|
||||
- Add `useValidationAttributionLayers(lookback?, horizon?)` hook
|
||||
- _Requirements: 12.4, 12.5, 12.6_
|
||||
|
||||
- [x] 4.6 Extend OpsModel page with attribution tables
|
||||
- Add source performance table (source, win rate, IC, avg return, duplicate rate)
|
||||
- Add catalyst truth table (catalyst type, win rate, avg return, IC)
|
||||
- Add layer attribution table (company/macro/competitive contribution %, dominant win rate, IC)
|
||||
- _Requirements: 12.4, 12.5, 12.6, 12.8_
|
||||
|
||||
- [x] 5. Checkpoint — Phase 2 verification
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 6. Phase 3 — Quality gate, recommendation enhancements, and pipeline wiring
|
||||
- [x] 6.1 Implement Quality Gate (`services/trading/model_quality_gate.py`)
|
||||
- Define `QualityGateConfig` dataclass with default thresholds (min_prediction_count=100, min_ic=0.03, min_win_rate=0.53, max_ece=0.15, min_excess_return_vs_spy=0.0, max_snapshot_age_hours=24)
|
||||
- Define `GateThresholdResult` and `QualityGateResult` dataclasses
|
||||
- Implement `evaluate_quality_gate(pool, config)` — read most recent model_metric_snapshot (30d lookback, 7d horizon), evaluate each threshold, store result in risk_configs under 'model_quality_gate' key
|
||||
- Implement `load_gate_config_from_db(pool)` — load thresholds from risk_configs with defaults
|
||||
- Default to paper-only mode when no snapshots exist or snapshot is stale (>24h)
|
||||
- Log gate evaluation result with threshold pass/fail details
|
||||
- _Requirements: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7_
|
||||
|
||||
- [x] 6.2 Write property test for quality gate determinism and threshold monotonicity
|
||||
- **Property 6: Quality Gate Determinism and Threshold Monotonicity**
|
||||
- Test same inputs always produce same pass/fail result
|
||||
- Test relaxing any threshold never causes a previously passing gate to fail
|
||||
- **Validates: Requirements 11.1, 17.6**
|
||||
|
||||
- [x] 6.3 Wire Quality Gate into aggregation cycle (`services/aggregation/worker.py`)
|
||||
- Call `evaluate_quality_gate` at the start of each aggregation cycle
|
||||
- When gate fails, force all recommendations to paper mode
|
||||
- Log gate status at cycle start
|
||||
- _Requirements: 11.2, 11.3_
|
||||
|
||||
- [x] 6.4 Wire Prediction Snapshot Writer into recommendation engine
|
||||
- After recommendation is generated in `services/recommendation/eligibility.py` or the calling code, call `create_prediction_snapshot` to capture the prediction state
|
||||
- Pass recommendation, trend_summary, evidence signals, and evidence docs
|
||||
- Handle snapshot creation failure gracefully (log error, don't block recommendation)
|
||||
- _Requirements: 1.1, 1.6_
|
||||
|
||||
- [x] 6.5 Enhance recommendation display on frontend
|
||||
- Update `frontend/src/pages/RecommendationDetail` (or relevant recommendation display component) to show:
|
||||
- Original confidence alongside calibrated confidence (historical win rate for that bucket)
|
||||
- Historical win rate for similar confidence levels
|
||||
- Evidence count, unique evidence count, duplicate evidence count
|
||||
- Source reliability indicator for primary contributing sources
|
||||
- Live eligibility status with reason (gate passed or which threshold failed)
|
||||
- Add warning badge when duplicate evidence count > 20% of total evidence count
|
||||
- Add warning badge when primary source reliability < 0.4
|
||||
- _Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7_
|
||||
|
||||
- [x] 7. Checkpoint — Phase 3 verification
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [x] 8. Phase 4 — Backtest replay integration and unit tests
|
||||
- [x] 8.1 Add validation mode to BacktestReplay (`services/trading/backtest_replay.py`)
|
||||
- Add `validation_mode: bool = False` parameter to `BacktestReplay.run()`
|
||||
- When validation_mode=True, create prediction snapshots for each historical recommendation using only data available at that point in time
|
||||
- Evaluate prediction outcomes using market prices from the appropriate future horizon
|
||||
- Prevent future data leakage: no market data after prediction generation time used during snapshot creation
|
||||
- After backtest completes, trigger model metrics computation over the backtest period, tag snapshots with backtest_id
|
||||
- _Requirements: 15.1, 15.2, 15.3, 15.4, 15.5_
|
||||
|
||||
- [x] 8.2 Write unit tests for prediction snapshot writer (`tests/test_model_validation_unit.py`)
|
||||
- Test canonical evidence key: known title/URL → expected SHA256, empty inputs, unicode
|
||||
- Test duplicate detection: 3 docs with 2 sharing a key → 1 marked duplicate
|
||||
- Test contribution scores: [0.5, 0.3, 0.2] → [0.5, 0.3, 0.2], single doc → [1.0]
|
||||
- Test weight clamping: weight 1.5 → clamped to 1.0
|
||||
- _Requirements: 1.1, 2.3, 2.4, 2.5, 3.3_
|
||||
|
||||
- [x] 8.3 Write unit tests for outcome evaluator (`tests/test_model_validation_unit.py`)
|
||||
- Test future return computation: price 100→110 → 0.10, price 100→90 → -0.10
|
||||
- Test direction_correct logic: bullish+positive → true, bullish+negative → false
|
||||
- Test profitable logic: buy+positive → true, sell+negative → true
|
||||
- Test excess return: ticker 10%, SPY 5% → excess 5%
|
||||
- _Requirements: 4.2, 4.5, 4.6, 4.7_
|
||||
|
||||
- [x] 8.4 Write unit tests for metrics engine (`tests/test_model_validation_unit.py`)
|
||||
- Test ECE specific values: perfect calibration → 0.0, all overconfident → positive ECE
|
||||
- Test Brier score: all correct at p=1.0 → 0.0, all wrong at p=1.0 → 1.0
|
||||
- Test IC: perfect correlation → 1.0, anti-correlation → -1.0, < 30 → None
|
||||
- _Requirements: 5.3, 5.4, 6.1, 6.2, 6.5_
|
||||
|
||||
- [x] 8.5 Write unit tests for calibration engine (`tests/test_model_validation_unit.py`)
|
||||
- Test source reliability: n=0 → 0.5, n=1000 with wr=0.8 → ≈0.8, n=30 with wr=0.7 → 0.6
|
||||
- Test adjusted evidence weight: reliability=0.5 → base*1.0, clamping to [0.1, 2.0]
|
||||
- _Requirements: 8.1, 8.2, 8.3_
|
||||
|
||||
- [x] 8.6 Write unit tests for quality gate (`tests/test_model_validation_unit.py`)
|
||||
- Test all thresholds met → pass
|
||||
- Test one threshold failed → fail with reason
|
||||
- Test fail-safe: no snapshots → paper-only, stale snapshot → paper-only
|
||||
- _Requirements: 11.1, 11.6_
|
||||
|
||||
- [x] 8.7 Write frontend tests for validation dashboard (`frontend/src/test/pages.test.tsx`)
|
||||
- Add MSW mock handlers for `/api/validation/summary`, `/api/validation/calibration`, `/api/validation/gate-status`
|
||||
- Test OpsModel page renders validation tab with summary cards
|
||||
- Test calibration table renders buckets with miscalibration warning
|
||||
- Test gate status indicator renders pass/fail
|
||||
- _Requirements: 12.8, 12.9_
|
||||
|
||||
- [x] 9. Final checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
## Notes
|
||||
|
||||
- Tasks marked with `*` are optional and can be skipped for faster MVP
|
||||
- Each task references specific requirements for traceability
|
||||
- Checkpoints ensure incremental validation after each phase
|
||||
- Property tests validate the 7 universal correctness properties from the design document
|
||||
- Unit tests validate specific examples, edge cases, and integration points
|
||||
- The design uses Python for backend and TypeScript for frontend — no language selection needed
|
||||
- Migration number is 035 (existing migrations go up to 034)
|
||||
- All new service modules go under `services/validation/` except the quality gate which goes in `services/trading/`
|
||||
- The 7 new API endpoints are added to the existing `services/api/app.py`
|
||||
- Frontend hooks follow existing patterns in `frontend/src/api/hooks.ts`
|
||||
- Phase 1 delivers the core feedback loop (capture → evaluate → measure → display)
|
||||
- Phase 2 adds attribution depth (which sources/catalysts/layers work best)
|
||||
- Phase 3 adds safety (quality gate) and UX (recommendation warnings)
|
||||
- Phase 4 adds historical analysis (backtest validation mode) and comprehensive tests
|
||||
@@ -0,0 +1 @@
|
||||
{"specId": "b595d834-7e72-4fab-87a9-65c92115a069", "workflowType": "requirements-first", "specType": "feature"}
|
||||
@@ -0,0 +1,732 @@
|
||||
# Design Document — Signal Math Upgrade
|
||||
|
||||
## Overview
|
||||
|
||||
This design upgrades the Stonks Oracle signal processing pipeline from deterministic heuristic formulas to a probabilistic, regime-aware, and adaptive mathematical framework. The upgrade spans all pipeline stages — signal scoring, trend assembly, macro impact, competitive signals, trend projection, and recommendation generation — while preserving the existing `WeightedSignal` abstraction, three-layer architecture, database schema, and dataclass interfaces.
|
||||
|
||||
The core transformation replaces:
|
||||
- **Binary confidence gate** → smooth sigmoid transition
|
||||
- **Weighted sentiment average** → Bayesian log-likelihood accumulation with Beta posterior
|
||||
- **Fixed recency decay** → adaptive event-specific half-lives
|
||||
- **Linear macro exposure** → multiplicative compounding exposure
|
||||
- **Additive macro integration** → conditional multiplicative modifiers
|
||||
- **Simple contradiction ratio** → weighted disagreement entropy
|
||||
- **Heuristic trend confidence** → Bayesian posterior variance
|
||||
- **Threshold-based direction** → entropy-based mixed signal detection
|
||||
- **Simple momentum** → exponentially weighted momentum with volatility scaling
|
||||
- **Confidence/strength gates** → expected value recommendation gate
|
||||
- **Fixed relationship transfer** → graph-distance attenuated competitive signals
|
||||
|
||||
All changes are gated behind a `probabilistic_scoring_enabled` feature flag in `risk_configs`, allowing incremental rollout with instant rollback. New outputs (P_bull, α, β, entropy, regime, EV) are stored in existing JSONB columns — no database migrations required.
|
||||
|
||||
### Design Rationale
|
||||
|
||||
Markets are fundamentally probabilistic and regime-dependent. The current pipeline collapses rich evidence into binary sentiment labels and fixed-weight averages, losing uncertainty structure. A Bayesian framework preserves the full posterior distribution, enabling the system to distinguish between "strongly bullish" and "weakly bullish with high uncertainty" — a distinction that directly impacts position sizing and risk management.
|
||||
|
||||
The regime detector adapts scoring thresholds to market conditions (panic vs. trending vs. mean-reverting), and the expected value gate ensures recommendations only proceed when the risk-adjusted outcome is positive. Together, these changes transform the pipeline from a sentiment aggregator into a probabilistic forecasting engine.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### High-Level Pipeline Flow
|
||||
|
||||
The upgraded pipeline maintains the existing three-layer architecture but introduces new computation stages within each layer. The feature flag controls which computation path is taken at each stage.
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
subgraph "Layer 1: Company Signals"
|
||||
A[Document Intelligence Records] --> B[Signal Scorer]
|
||||
B --> |"probabilistic=false"| C1[Binary Gate + Fixed Decay]
|
||||
B --> |"probabilistic=true"| C2[Sigmoid Gate + Adaptive Decay<br/>+ Info Gain + Source Accuracy]
|
||||
C1 --> D[WeightedSignal list]
|
||||
C2 --> D
|
||||
end
|
||||
|
||||
subgraph "Layer 2: Macro Signals"
|
||||
E[Global Events] --> F[Macro Scorer]
|
||||
F --> |"probabilistic=false"| G1[Linear Weighted Sum]
|
||||
F --> |"probabilistic=true"| G2[Multiplicative Exposure]
|
||||
G1 --> H[Macro WeightedSignals]
|
||||
G2 --> H
|
||||
end
|
||||
|
||||
subgraph "Layer 3: Competitive Signals"
|
||||
I[Pattern Matcher] --> J[Signal Propagation]
|
||||
J --> |"probabilistic=false"| K1[Flat Transfer Strength]
|
||||
J --> |"probabilistic=true"| K2[Graph-Distance Attenuation]
|
||||
K1 --> L[Competitive WeightedSignals]
|
||||
K2 --> L
|
||||
end
|
||||
|
||||
subgraph "Regime Detection (new)"
|
||||
M[Market Data] --> N[Regime Detector]
|
||||
N --> O{Regime Classification}
|
||||
O --> P[trend-following / panic / mean-reversion / uncertainty]
|
||||
end
|
||||
|
||||
subgraph "Trend Assembly"
|
||||
D --> Q[Merge Signals]
|
||||
H --> |"probabilistic=false"| Q
|
||||
H --> |"probabilistic=true"| R[Conditional Macro Modifier]
|
||||
R --> Q
|
||||
L --> Q
|
||||
Q --> S[Trend Assembler]
|
||||
S --> |"probabilistic=false"| T1[Heuristic Confidence + Threshold Direction]
|
||||
S --> |"probabilistic=true"| T2[Bayesian Posterior + Entropy Direction<br/>+ Regime-Adjusted Thresholds]
|
||||
P --> T2
|
||||
T1 --> U[TrendSummary]
|
||||
T2 --> U
|
||||
end
|
||||
|
||||
subgraph "Projection"
|
||||
U --> V[Projection Engine]
|
||||
V --> |"probabilistic=false"| W1[Simple Momentum]
|
||||
V --> |"probabilistic=true"| W2[EW Momentum + Vol Scaling]
|
||||
W1 --> X[TrendProjection]
|
||||
W2 --> X
|
||||
end
|
||||
|
||||
subgraph "Recommendation"
|
||||
U --> Y[Recommendation Engine]
|
||||
X --> Y
|
||||
Y --> |"probabilistic=false"| Z1[Confidence + Strength Gates]
|
||||
Y --> |"probabilistic=true"| Z2[EV Gate + Existing Gates]
|
||||
Z1 --> AA[Recommendation]
|
||||
Z2 --> AA
|
||||
end
|
||||
```
|
||||
|
||||
### Feature Flag Control Flow
|
||||
|
||||
The feature flag `probabilistic_scoring_enabled` is read from the `risk_configs` table's `config` JSONB column at the start of each aggregation cycle. It propagates through all pipeline stages via the existing `AggregationConfig` dataclass.
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant W as Worker (aggregate_company)
|
||||
participant DB as PostgreSQL (risk_configs)
|
||||
participant S as Signal Scorer
|
||||
participant T as Trend Assembler
|
||||
participant R as Recommendation Engine
|
||||
|
||||
W->>DB: SELECT config FROM risk_configs WHERE active=TRUE
|
||||
DB-->>W: {"macro_enabled": true, "competitive_enabled": true, "probabilistic_scoring_enabled": false}
|
||||
W->>W: Log pipeline mode (heuristic or probabilistic)
|
||||
W->>S: compute_signal_weight(..., probabilistic=flag)
|
||||
S-->>W: WeightedSignal (with or without Bayesian fields)
|
||||
W->>T: assemble_trend_summary(..., probabilistic=flag)
|
||||
T-->>W: TrendSummary (with or without entropy/regime)
|
||||
W->>R: evaluate_eligibility(..., probabilistic=flag)
|
||||
R-->>W: Recommendation (with or without EV gate)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Components and Interfaces
|
||||
|
||||
### New Modules
|
||||
|
||||
| Module | File | Responsibility |
|
||||
|--------|------|----------------|
|
||||
| Bayesian Accumulator | `services/aggregation/bayesian.py` | Log-likelihood accumulation, Beta posterior, P_bull, Bayesian confidence |
|
||||
| Regime Detector | `services/aggregation/regime.py` | EMA computation, volatility ratio, regime classification, threshold adjustment |
|
||||
| Adaptive Decay | integrated into `scoring.py` | Event-specific half-life computation from impact, surprise, market reaction |
|
||||
| Information Gain | integrated into `scoring.py` | Surprise weighting from event type base rates |
|
||||
| Source Accuracy | `services/aggregation/source_accuracy.py` | Historical prediction accuracy tracking per source |
|
||||
| Entropy Detector | integrated into `bayesian.py` | Shannon entropy for mixed signal detection |
|
||||
| EV Gate | integrated into `eligibility.py` | Expected value computation for recommendation eligibility |
|
||||
|
||||
### Modified Modules
|
||||
|
||||
| Module | File | Changes |
|
||||
|--------|------|---------|
|
||||
| Signal Scorer | `services/aggregation/scoring.py` | Sigmoid gate, info gain factor, adaptive decay, regime multiplier, source accuracy factor |
|
||||
| Trend Assembler | `services/aggregation/worker.py` | Bayesian confidence, entropy-based direction, regime-adjusted thresholds, entropy-based contradiction |
|
||||
| Contradiction | `services/aggregation/contradiction.py` | Weighted disagreement entropy replacing minority/majority ratio |
|
||||
| Macro Scorer | `services/aggregation/interpolation.py` | Multiplicative exposure formula, conditional integration mode |
|
||||
| Competitive Scorer | `services/aggregation/signal_propagation.py` | Graph-distance attenuation with historical correlation |
|
||||
| Projection Engine | `services/aggregation/projection.py` | Exponentially weighted momentum, volatility scaling |
|
||||
| Recommendation | `services/recommendation/eligibility.py` | EV gate, P_bull-based position sizing adjustments |
|
||||
| Config | `services/shared/config.py` | New probabilistic config parameters |
|
||||
| Schemas | `services/shared/schemas.py` | Optional new fields on TrendSummary, Recommendation |
|
||||
|
||||
### Component Interface Details
|
||||
|
||||
#### 1. Bayesian Accumulator (`services/aggregation/bayesian.py`)
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class BayesianPosterior:
|
||||
"""Bayesian posterior state from signal accumulation."""
|
||||
p_bull: float # σ(L_t), bullish probability [0, 1]
|
||||
alpha: float # Beta distribution α parameter (≥ 1.0)
|
||||
beta: float # Beta distribution β parameter (≥ 1.0)
|
||||
log_likelihood: float # Raw log-likelihood accumulation L_t
|
||||
bayesian_confidence: float # 1 - 4αβ/(α+β)², [0, 1]
|
||||
entropy: float # Shannon entropy H, [0, 1]
|
||||
signal_count: int # Number of signals processed
|
||||
|
||||
# Uninformative prior (no evidence)
|
||||
PRIOR = BayesianPosterior(
|
||||
p_bull=0.5, alpha=1.0, beta=1.0,
|
||||
log_likelihood=0.0, bayesian_confidence=0.0,
|
||||
entropy=1.0, signal_count=0,
|
||||
)
|
||||
|
||||
|
||||
def compute_bayesian_posterior(
|
||||
signals: list[WeightedSignal],
|
||||
) -> BayesianPosterior:
|
||||
"""Accumulate weighted signals into a Bayesian posterior.
|
||||
|
||||
Computes:
|
||||
- Log-likelihood: L_t = Σ(w_i · s_i)
|
||||
- Bullish probability: P_bull = σ(L_t)
|
||||
- Beta posterior: α = 1 + W_bull, β = 1 + W_bear
|
||||
- Bayesian confidence: C = 1 - 4αβ/(α+β)²
|
||||
- Shannon entropy: H = -p·log₂(p) - (1-p)·log₂(1-p)
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_entropy(p_bull: float) -> float:
|
||||
"""Shannon entropy H = -p·log₂(p) - (1-p)·log₂(1-p).
|
||||
|
||||
Returns value in [0, 1]. Maximum at p=0.5, zero at p=0 or p=1.
|
||||
Handles edge cases p=0 and p=1 by returning 0.0.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 2. Regime Detector (`services/aggregation/regime.py`)
|
||||
|
||||
```python
|
||||
class MarketRegime(str, Enum):
|
||||
TREND_FOLLOWING = "trend_following"
|
||||
PANIC = "panic"
|
||||
MEAN_REVERSION = "mean_reversion"
|
||||
UNCERTAINTY = "uncertainty"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RegimeClassification:
|
||||
"""Result of regime detection for a ticker."""
|
||||
regime: MarketRegime
|
||||
trend_indicator: float # R = sign(EMA_20 - EMA_100)
|
||||
volatility_ratio: float # V_r = σ_20 / σ_100
|
||||
bullish_threshold: float # Adjusted ±threshold for direction
|
||||
bearish_threshold: float
|
||||
contradiction_penalty_multiplier: float # 0.4 default, 0.6 for uncertainty
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RegimeConfig:
|
||||
ema_short_period: int = 20
|
||||
ema_long_period: int = 100
|
||||
vol_short_period: int = 20
|
||||
vol_long_period: int = 100
|
||||
panic_vol_ratio: float = 1.5
|
||||
trend_vol_ratio: float = 1.2
|
||||
mean_reversion_vol_ratio: float = 1.0
|
||||
default_threshold: float = 0.15
|
||||
panic_threshold: float = 0.10
|
||||
mean_reversion_threshold: float = 0.20
|
||||
uncertainty_contradiction_multiplier: float = 0.6
|
||||
|
||||
|
||||
def classify_regime(
|
||||
closing_prices: list[float],
|
||||
returns: list[float],
|
||||
config: RegimeConfig = RegimeConfig(),
|
||||
) -> RegimeClassification:
|
||||
"""Classify market regime from price and return history.
|
||||
|
||||
Requires at least 100 days of price history for EMA_100.
|
||||
Falls back to UNCERTAINTY when data is insufficient.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_ema(values: list[float], period: int) -> float:
|
||||
"""Compute exponential moving average over the last `period` values."""
|
||||
...
|
||||
```
|
||||
|
||||
#### 3. Source Accuracy Tracker (`services/aggregation/source_accuracy.py`)
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class SourceAccuracy:
|
||||
"""Per-source historical prediction accuracy."""
|
||||
source_id: str
|
||||
accuracy_ratio: float # [0, 1] fraction of correct directional calls
|
||||
sample_count: int # Number of signals with known outcomes
|
||||
last_updated: datetime
|
||||
|
||||
@property
|
||||
def accuracy_factor(self) -> float:
|
||||
"""Multiplicative factor for credibility weight.
|
||||
|
||||
Returns 1.0 (neutral) when sample_count < 10.
|
||||
Otherwise scales linearly from 0.5 (0% accuracy) to 1.5 (100% accuracy).
|
||||
"""
|
||||
if self.sample_count < 10:
|
||||
return 1.0
|
||||
return 0.5 + self.accuracy_ratio
|
||||
|
||||
|
||||
async def fetch_source_accuracy(
|
||||
pool: asyncpg.Pool,
|
||||
source_ids: list[str],
|
||||
) -> dict[str, SourceAccuracy]:
|
||||
"""Fetch accuracy metrics for a batch of sources."""
|
||||
...
|
||||
|
||||
|
||||
async def update_source_accuracy(
|
||||
pool: asyncpg.Pool,
|
||||
source_id: str,
|
||||
realized_outcomes: list[tuple[str, float]], # (predicted_direction, actual_7d_return)
|
||||
) -> None:
|
||||
"""Update accuracy metrics for a source based on realized price data."""
|
||||
...
|
||||
```
|
||||
|
||||
#### 4. Extended ScoringConfig
|
||||
|
||||
New fields added to the existing `ScoringConfig` dataclass in `scoring.py`:
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class ScoringConfig:
|
||||
# ... existing fields preserved ...
|
||||
|
||||
# Probabilistic scoring toggle (mirrors feature flag for local use)
|
||||
probabilistic: bool = False
|
||||
|
||||
# Sigmoid gate parameters
|
||||
sigmoid_steepness: float = 5.0 # k in σ(k·(x - midpoint))
|
||||
sigmoid_midpoint: float = 0.5 # midpoint of sigmoid transition
|
||||
|
||||
# Information gain parameters
|
||||
info_gain_lambda: float = 0.3 # scaling parameter λ
|
||||
info_gain_max: float = 3.0 # maximum clamp for info gain factor
|
||||
default_base_rate: float = 0.1 # fallback when event type rate unknown
|
||||
|
||||
# Adaptive decay parameters (β scaling factors)
|
||||
adaptive_decay_impact_scale: float = 1.0 # max β_impact
|
||||
adaptive_decay_surprise_scale: float = 1.0 # max β_surprise at r=3.0
|
||||
adaptive_decay_market_scale: float = 0.5 # max β_market_reaction
|
||||
|
||||
# Regime multiplier parameters
|
||||
regime_return_weight: float = 0.15 # coefficient for |z_r|
|
||||
regime_volume_weight: float = 0.10 # coefficient for |z_v|
|
||||
regime_multiplier_max: float = 2.5 # M_regime ceiling
|
||||
```
|
||||
|
||||
#### 5. Extended WeightedSignal
|
||||
|
||||
The existing `WeightedSignal` dataclass gains optional fields:
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class WeightedSignal:
|
||||
"""A document intelligence reference paired with its computed weight."""
|
||||
document_id: str
|
||||
weight: SignalWeight
|
||||
sentiment_value: float
|
||||
impact_score: float
|
||||
|
||||
# New optional fields for probabilistic mode
|
||||
info_gain_factor: float = 1.0 # r = 1 + λ·(-log₂ P(event_type))
|
||||
source_accuracy_factor: float = 1.0 # [0.5, 1.5] from historical accuracy
|
||||
adaptive_half_life: float | None = None # τ_i when adaptive decay is active
|
||||
```
|
||||
|
||||
#### 6. Extended SignalWeight
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class SignalWeight:
|
||||
"""Breakdown of a document's aggregation weight."""
|
||||
recency: float
|
||||
credibility: float
|
||||
novelty_bonus: float
|
||||
confidence_gate: float
|
||||
market_ctx_multiplier: float
|
||||
combined: float
|
||||
|
||||
# New optional fields for probabilistic mode
|
||||
sigmoid_gate: float | None = None # Smooth gate value [0, 1]
|
||||
info_gain_factor: float = 1.0 # Surprise multiplier
|
||||
source_accuracy_factor: float = 1.0 # Historical accuracy multiplier
|
||||
regime_multiplier: float | None = None # M_regime replacing M_context
|
||||
```
|
||||
|
||||
#### 7. Extended TrendSummary
|
||||
|
||||
New optional fields on the existing Pydantic model:
|
||||
|
||||
```python
|
||||
class TrendSummary(BaseModel):
|
||||
# ... all existing fields preserved ...
|
||||
|
||||
# New optional fields for probabilistic mode
|
||||
p_bull: float | None = None # Bayesian bullish probability
|
||||
alpha: float | None = None # Beta posterior α
|
||||
beta_param: float | None = None # Beta posterior β (named to avoid shadowing)
|
||||
bayesian_confidence: float | None = None # 1 - 4αβ/(α+β)²
|
||||
entropy: float | None = None # Shannon entropy H
|
||||
regime: str | None = None # Market regime classification
|
||||
pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic"
|
||||
```
|
||||
|
||||
#### 8. Extended Recommendation
|
||||
|
||||
```python
|
||||
class Recommendation(BaseModel):
|
||||
# ... all existing fields preserved ...
|
||||
|
||||
# New optional fields for probabilistic mode
|
||||
expected_value: float | None = None # EV = P_bull·R_up - P_bear·R_down
|
||||
p_bull: float | None = None # Bayesian bullish probability used
|
||||
pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Data Models
|
||||
|
||||
### Database Storage Strategy
|
||||
|
||||
All new mathematical outputs are stored in existing JSONB columns. No new database migrations are required.
|
||||
|
||||
#### trend_windows table
|
||||
|
||||
The `market_context` JSONB column (currently stores volatility/volume data) is extended to include probabilistic outputs:
|
||||
|
||||
```json
|
||||
{
|
||||
"volatility": 1.23,
|
||||
"volume_change_pct": 45.2,
|
||||
"price_change_pct": -2.1,
|
||||
"probabilistic": {
|
||||
"p_bull": 0.72,
|
||||
"alpha": 8.3,
|
||||
"beta": 3.1,
|
||||
"log_likelihood": 0.94,
|
||||
"bayesian_confidence": 0.61,
|
||||
"entropy": 0.42,
|
||||
"regime": "trend_following",
|
||||
"regime_volatility_ratio": 0.85,
|
||||
"pipeline_mode": "probabilistic",
|
||||
"contradiction_entropy": 0.31,
|
||||
"macro_modifier": 1.15
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### recommendations table
|
||||
|
||||
The existing `invalidation_conditions` JSONB column stores recommendation-level data. The new EV and probabilistic fields are stored in a new key within the existing decision trace flow. Since recommendations don't have a dedicated metadata JSONB column, we add the probabilistic fields to the thesis text and store structured data in the `risk_checks` JSONB column of the `recommendation_evaluations` table:
|
||||
|
||||
```json
|
||||
{
|
||||
"ev": 0.0082,
|
||||
"p_bull": 0.72,
|
||||
"r_up": 0.034,
|
||||
"r_down": 0.012,
|
||||
"pipeline_mode": "probabilistic",
|
||||
"ev_threshold": 0.005
|
||||
}
|
||||
```
|
||||
|
||||
#### risk_configs table
|
||||
|
||||
The `config` JSONB column gains the new feature flag:
|
||||
|
||||
```json
|
||||
{
|
||||
"macro_enabled": true,
|
||||
"competitive_enabled": true,
|
||||
"probabilistic_scoring_enabled": false
|
||||
}
|
||||
```
|
||||
|
||||
#### source_accuracy table (new — Requirement 4)
|
||||
|
||||
This is the one new database table required, stored via a migration:
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS source_accuracy (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
source_id VARCHAR(200) NOT NULL,
|
||||
accuracy_ratio FLOAT NOT NULL DEFAULT 0.5,
|
||||
sample_count INTEGER NOT NULL DEFAULT 0,
|
||||
last_updated TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(source_id)
|
||||
);
|
||||
CREATE INDEX idx_source_accuracy_source ON source_accuracy(source_id);
|
||||
```
|
||||
|
||||
Note: This is the only schema addition. All other new outputs use existing JSONB columns.
|
||||
|
||||
### Event Type Base Rates
|
||||
|
||||
Information gain computation requires empirical base rates for event types. These are stored as a configuration constant (not in the database) and can be tuned over time:
|
||||
|
||||
```python
|
||||
EVENT_TYPE_BASE_RATES: dict[str, float] = {
|
||||
"earnings": 0.25, # Quarterly, common
|
||||
"product_launch": 0.10, # Moderately rare
|
||||
"regulatory": 0.08, # Somewhat rare
|
||||
"legal": 0.05, # Rare
|
||||
"m_and_a": 0.03, # Very rare
|
||||
"management_change": 0.06,
|
||||
"partnership": 0.12,
|
||||
"market_expansion": 0.09,
|
||||
"restructuring": 0.04,
|
||||
"dividend": 0.15,
|
||||
}
|
||||
DEFAULT_BASE_RATE = 0.1 # For unknown event types
|
||||
```
|
||||
|
||||
### Configuration Hierarchy
|
||||
|
||||
```
|
||||
risk_configs.config (DB, runtime)
|
||||
└── probabilistic_scoring_enabled: bool
|
||||
└── AggregationConfig.probabilistic: bool (in-memory)
|
||||
└── ScoringConfig.probabilistic: bool (per-cycle)
|
||||
├── scoring.py: sigmoid vs binary gate
|
||||
├── scoring.py: adaptive vs fixed decay
|
||||
├── scoring.py: info gain factor
|
||||
├── scoring.py: regime multiplier vs market context
|
||||
├── worker.py: Bayesian vs heuristic confidence
|
||||
├── worker.py: entropy vs threshold direction
|
||||
├── contradiction.py: entropy vs ratio
|
||||
├── interpolation.py: multiplicative vs linear
|
||||
├── signal_propagation.py: graph-distance vs flat
|
||||
├── projection.py: EW momentum vs simple
|
||||
└── eligibility.py: EV gate vs threshold-only
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Correctness Properties
|
||||
|
||||
*A property is a characteristic or behavior that should hold true across all valid executions of a system — essentially, a formal statement about what the system should do. Properties serve as the bridge between human-readable specifications and machine-verifiable correctness guarantees.*
|
||||
|
||||
The following properties were derived from the acceptance criteria through systematic prework analysis. Each property is universally quantified and maps to specific requirements. Redundant properties were consolidated during reflection (e.g., requirements 17.1–17.7 duplicate properties already stated in requirements 1–15).
|
||||
|
||||
### Property 1: Sigmoid Gate Monotonicity
|
||||
|
||||
*For any* two extraction confidence values x₁, x₂ ∈ [0.0, 1.0] where x₁ ≤ x₂, the sigmoid gate σ(5·(x₁ - 0.5)) SHALL be less than or equal to σ(5·(x₂ - 0.5)). Higher confidence always produces equal or higher gate values.
|
||||
|
||||
**Validates: Requirements 2.6, 17.1**
|
||||
|
||||
### Property 2: Beta Posterior Evidence Accumulation
|
||||
|
||||
*For any* sequence of weighted signal sets where each successive set contains one additional signal, the sum α + β of the Beta posterior parameters SHALL increase monotonically. Evidence always accumulates — adding a signal never reduces the total evidence mass.
|
||||
|
||||
**Validates: Requirements 1.3, 17.2**
|
||||
|
||||
### Property 3: Bayesian Confidence Symmetry and Divergence
|
||||
|
||||
*For any* Beta posterior with parameters α, β ≥ 1.0, the Bayesian confidence C = 1 - 4αβ/(α+β)² SHALL equal 0.0 when α = β (maximum uncertainty) and SHALL increase monotonically as the ratio max(α/β, β/α) increases. Confidence reflects evidence concentration, not evidence volume.
|
||||
|
||||
**Validates: Requirements 1.4, 17.3**
|
||||
|
||||
### Property 4: Bayesian Posterior Round-Trip Consistency
|
||||
|
||||
*For any* set of weighted signals with uniform weights, computing the Beta posterior and extracting the mean P_bull = α/(α+β) SHALL produce a value within 0.05 of σ(L_t) where L_t is the log-likelihood accumulation. The two probabilistic representations are consistent.
|
||||
|
||||
**Validates: Requirements 1.7, 17.7**
|
||||
|
||||
### Property 5: Adaptive Decay Lower Bound
|
||||
|
||||
*For any* valid combination of impact_score ∈ [0, 1], information gain factor r ∈ [1.0, 3.0], and market context multiplier ∈ [1.0, 1.45], the adaptive half-life τ_i SHALL be greater than or equal to the base half-life τ_base. Adaptive decay is always slower or equal to fixed decay, never faster.
|
||||
|
||||
**Validates: Requirements 5.7, 17.4**
|
||||
|
||||
### Property 6: Information Gain Monotonicity
|
||||
|
||||
*For any* two event type base rates p₁, p₂ ∈ (0, 1] where p₁ < p₂, the information gain factor r(p₁) SHALL be greater than or equal to r(p₂). Rarer events always receive higher surprise weight.
|
||||
|
||||
**Validates: Requirements 3.5**
|
||||
|
||||
### Property 7: Multiplicative Macro Exposure Monotonicity
|
||||
|
||||
*For any* overlap configuration (O_geo, O_supply, O_commodity, O_sector) and any dimension k where O_k = 0, setting O_k to any positive value SHALL increase the total macro impact score. Multi-dimensional exposure always compounds — it never reduces impact.
|
||||
|
||||
**Validates: Requirements 10.7, 17.5**
|
||||
|
||||
### Property 8: Shannon Entropy Range and Maximum
|
||||
|
||||
*For any* bullish probability P_bull ∈ (0, 1), the Shannon entropy H = -P_bull·log₂(P_bull) - (1-P_bull)·log₂(1-P_bull) SHALL be in the range (0, 1], with the maximum value of 1.0 occurring at P_bull = 0.5.
|
||||
|
||||
**Validates: Requirements 9.7**
|
||||
|
||||
### Property 9: Contradiction Entropy Monotonicity
|
||||
|
||||
*For any* set of weighted signals containing both positive and negative sentiment signals, the contradiction entropy score SHALL increase monotonically as the weight distribution f_pos approaches 0.5 (equal split). More balanced disagreement always produces higher contradiction.
|
||||
|
||||
**Validates: Requirements 15.7**
|
||||
|
||||
### Property 10: Exponentially Weighted Momentum Direction
|
||||
|
||||
*For any* sequence of monotonically increasing signed trend strengths (each ΔS_{t-k} > 0), the exponentially weighted momentum M_t SHALL be positive. Consistently strengthening bullish trends always produce positive momentum.
|
||||
|
||||
**Validates: Requirements 13.6, 17.6**
|
||||
|
||||
### Property 11: Competitive Signal Distance Attenuation
|
||||
|
||||
*For any* source-target company pair with fixed source signal strength S_source and historical correlation ρ_historical, the transfer strength S_transfer SHALL decrease monotonically with increasing graph distance d_network. Closer competitors always receive stronger signal transfer.
|
||||
|
||||
**Validates: Requirements 12.7**
|
||||
|
||||
### Property 12: Expected Value Directional Consistency
|
||||
|
||||
*For any* Bayesian bullish probability P_bull > 0.5 and estimated returns where R_up > R_down, the expected value EV = P_bull · R_up - (1 - P_bull) · R_down SHALL be positive. When the model is bullish and upside exceeds downside, EV is always positive.
|
||||
|
||||
**Validates: Requirements 17.8**
|
||||
|
||||
### Property 13: Bayesian Confidence Monotonic with Agreeing Signals
|
||||
|
||||
*For any* set of weighted signals where all signals agree on direction (all positive or all negative), adding one more agreeing signal SHALL increase the Bayesian confidence C. More agreeing evidence always increases confidence.
|
||||
|
||||
**Validates: Requirements 8.6**
|
||||
|
||||
### Property 14: Numerical Stability Across All Formulas
|
||||
|
||||
*For any* valid input combination to any formula in the probabilistic pipeline (sigmoid gate, Beta posterior, Bayesian confidence, adaptive decay, regime multiplier, Shannon entropy, multiplicative exposure, EW momentum, expected value), the output SHALL be a finite float (not NaN, not infinity) within the documented range for that formula. This includes regime multiplier M_regime ∈ [1.0, 2.5], entropy H ∈ [0, 1], P_bull ∈ [0, 1], confidence ∈ [0, 1], and M_adj ∈ [-2.0, 2.0].
|
||||
|
||||
**Validates: Requirements 17.9, 6.4**
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Numerical Edge Cases
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| P_bull = 0.0 or 1.0 (entropy undefined) | Return H = 0.0 (no uncertainty at extremes) |
|
||||
| σ_20 = 0.0 (zero volatility for momentum scaling) | Use floor max(σ_20, 0.01) per Req 13.4 |
|
||||
| σ_20 = 0.0 or σ_100 = 0.0 (volatility ratio) | Default to uncertainty regime |
|
||||
| log₂(0) in entropy computation | Guard with `if p <= 0 or p >= 1: return 0.0` |
|
||||
| log₂(0) in information gain (base_rate = 0) | Base rates must be > 0; use default 0.1 for unknown |
|
||||
| Division by zero in z-score (σ = 0) | Use M_regime = 1.0 when σ = 0 |
|
||||
| Empty signal list | Return uninformative prior (P_bull=0.5, α=1, β=1, C=0) |
|
||||
| All neutral signals (no positive or negative) | Contradiction = 0.0, direction = neutral |
|
||||
| Extremely large weights (overflow risk) | Python floats handle up to ~1.8e308; clamp combined weight if needed |
|
||||
| NaN from upstream data | Validate inputs; skip signals with NaN weight or sentiment |
|
||||
|
||||
### Feature Flag Failure Modes
|
||||
|
||||
| Failure | Behavior |
|
||||
|---------|----------|
|
||||
| `risk_configs` table unreachable | Default to `probabilistic_scoring_enabled = false` (heuristic mode) |
|
||||
| `config` JSONB missing the key | Default to `false` |
|
||||
| Invalid value type for flag | Default to `false`, log warning |
|
||||
| Flag changes mid-cycle | Flag is read once at cycle start; change takes effect next cycle |
|
||||
|
||||
### Source Accuracy Failures
|
||||
|
||||
| Failure | Behavior |
|
||||
|---------|----------|
|
||||
| `source_accuracy` table unreachable | Use neutral factor 1.0 for all sources |
|
||||
| Accuracy update fails | Log error, continue with stale accuracy data |
|
||||
| Corrupted accuracy data (ratio > 1.0 or < 0.0) | Clamp to [0.0, 1.0] |
|
||||
|
||||
### Regime Detection Failures
|
||||
|
||||
| Failure | Behavior |
|
||||
|---------|----------|
|
||||
| Market data unavailable | Default to uncertainty regime with default thresholds |
|
||||
| Insufficient price history (< 100 days) | Default to uncertainty regime |
|
||||
| Price data contains gaps | Use available data; EMA computation handles gaps gracefully |
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Dual Testing Approach
|
||||
|
||||
The signal math upgrade requires both property-based tests (for mathematical correctness) and example-based unit tests (for specific behaviors and integration points). Property-based testing is highly appropriate here because the feature consists primarily of pure mathematical functions with clear input/output behavior, universal properties that hold across wide input spaces, and well-defined range invariants.
|
||||
|
||||
### Property-Based Testing
|
||||
|
||||
**Library:** Hypothesis (already in use per `.hypothesis/` directory and project conventions)
|
||||
|
||||
**Configuration:**
|
||||
- Minimum 100 iterations per property: `@settings(max_examples=100)`
|
||||
- File naming: `test_pbt_signal_math.py` (or split by module)
|
||||
- Tag format: `# Feature: signal-math-upgrade, Property N: <title>`
|
||||
|
||||
**Property tests to implement (one test per correctness property):**
|
||||
|
||||
| Property | Test File | Key Generators |
|
||||
|----------|-----------|----------------|
|
||||
| 1: Sigmoid monotonicity | `test_pbt_signal_math.py` | `st.floats(0.0, 1.0)` pairs |
|
||||
| 2: Evidence accumulation | `test_pbt_signal_math.py` | `st.lists(weighted_signal_strategy)` |
|
||||
| 3: Confidence symmetry/divergence | `test_pbt_signal_math.py` | `st.floats(1.0, 100.0)` for α, β |
|
||||
| 4: Posterior round-trip | `test_pbt_signal_math.py` | `st.lists(uniform_weight_signal_strategy)` |
|
||||
| 5: Adaptive decay lower bound | `test_pbt_signal_math.py` | `st.floats` for impact, surprise, market |
|
||||
| 6: Info gain monotonicity | `test_pbt_signal_math.py` | `st.floats(0.001, 1.0)` pairs |
|
||||
| 7: Macro exposure monotonicity | `test_pbt_signal_math.py` | `st.floats(0.0, 1.0)` for overlaps |
|
||||
| 8: Entropy range/maximum | `test_pbt_signal_math.py` | `st.floats(0.001, 0.999)` for P_bull |
|
||||
| 9: Contradiction monotonicity | `test_pbt_signal_math.py` | Signal sets with varying weight splits |
|
||||
| 10: EW momentum direction | `test_pbt_signal_math.py` | `st.lists(st.floats)` monotonic sequences |
|
||||
| 11: Distance attenuation | `test_pbt_signal_math.py` | `st.integers(1, 3)` for distance |
|
||||
| 12: EV directional consistency | `test_pbt_signal_math.py` | `st.floats(0.5, 1.0)` for P_bull |
|
||||
| 13: Confidence with agreeing signals | `test_pbt_signal_math.py` | Growing lists of same-direction signals |
|
||||
| 14: Numerical stability | `test_pbt_signal_math.py` | Broad `st.floats` for all formula inputs |
|
||||
|
||||
### Example-Based Unit Tests
|
||||
|
||||
**File:** `test_signal_math_unit.py`
|
||||
|
||||
| Test Area | Examples |
|
||||
|-----------|----------|
|
||||
| Sigmoid gate specific values | x=0.5→0.5, x=0.2→<0.05, x=0.8→>0.95 |
|
||||
| Uninformative prior | Empty signals → P_bull=0.5, α=1, β=1, C=0 |
|
||||
| Default base rate | Unknown event type → base_rate=0.1 |
|
||||
| Info gain clamp | Very rare event → factor ≤ 3.0 |
|
||||
| Source accuracy threshold | sample_count < 10 → factor=1.0 |
|
||||
| Adaptive decay edge cases | All zeros → τ_base, all max → 6×τ_base |
|
||||
| Regime classification | Specific (R, V_r) → expected regime |
|
||||
| Regime thresholds | panic→0.10, mean_reversion→0.20, etc. |
|
||||
| Entropy direction mapping | H>0.9→mixed, P_bull>0.65→bullish, etc. |
|
||||
| Zero overlap → zero impact | All overlaps zero → S_macro=0 |
|
||||
| Max overlap value | All overlaps 1.0 → ≈severity×0.724 |
|
||||
| Macro fallback behaviors | Only macro → additive, only company → no modifier |
|
||||
| Graph distance cutoff | d>3 → no propagation |
|
||||
| Momentum fallback | <2 cycles → heuristic fallback |
|
||||
| EV threshold behavior | EV>0.005→proceed, EV≤0.005→informational |
|
||||
| Feature flag behaviors | flag=false→heuristic, flag=true→probabilistic |
|
||||
| Heuristic equivalence | flag=false produces identical outputs to current system |
|
||||
|
||||
### Integration Tests
|
||||
|
||||
| Test Area | Scope |
|
||||
|-----------|-------|
|
||||
| Source accuracy persistence | Write/read from source_accuracy table |
|
||||
| Regime persistence | Store/retrieve regime in JSONB |
|
||||
| EV persistence | Store/retrieve EV in recommendation_evaluations |
|
||||
| Feature flag reading | Read probabilistic_scoring_enabled from risk_configs |
|
||||
| End-to-end pipeline | Full aggregation cycle with probabilistic=true |
|
||||
|
||||
### Test Organization
|
||||
|
||||
```
|
||||
tests/
|
||||
├── test_pbt_signal_math.py # All 14 property-based tests
|
||||
├── test_signal_math_unit.py # Example-based unit tests
|
||||
├── test_bayesian.py # Bayesian accumulator unit tests
|
||||
├── test_regime.py # Regime detector unit tests
|
||||
├── test_source_accuracy.py # Source accuracy tracker tests
|
||||
└── test_signal_math_integration.py # Integration tests (DB required)
|
||||
```
|
||||
@@ -0,0 +1,293 @@
|
||||
# Requirements Document — Signal Math Upgrade
|
||||
|
||||
## Introduction
|
||||
|
||||
The Stonks Oracle platform uses a three-layer signal aggregation engine (company-specific, macro, competitive) to produce market intelligence and drive paper-trading decisions. The current mathematical models are structurally too deterministic and too linear for a market system that is fundamentally probabilistic, regime-dependent, and nonlinear. The pipeline behaves as weighted sentiment aggregation with heuristics rather than a probabilistic forecasting engine.
|
||||
|
||||
This feature upgrades the signal processing mathematics across all pipeline stages — from signal scoring through trend assembly, macro impact, competitive signals, trend projection, and recommendation generation — to replace heuristic formulas with probabilistic, regime-aware, and adaptive alternatives. The goal is to transform prediction quality while preserving the existing `WeightedSignal` abstraction, three-layer architecture, and database schema compatibility.
|
||||
|
||||
## Glossary
|
||||
|
||||
- **Aggregation_Engine**: The core pipeline in `services/aggregation/worker.py` that merges signals from all three layers and computes `TrendSummary` objects across five time windows.
|
||||
- **Signal_Scorer**: The scoring module in `services/aggregation/scoring.py` that transforms raw intelligence records into `WeightedSignal` objects with composite aggregation weights.
|
||||
- **Trend_Assembler**: The component in `services/aggregation/worker.py` that derives trend direction, strength, confidence, and contradiction from merged weighted signals.
|
||||
- **Macro_Scorer**: The macro impact scoring module in `services/aggregation/interpolation.py` that computes per-company impact from global events using overlap-based exposure profiles.
|
||||
- **Competitive_Scorer**: The competitive signal modules in `services/aggregation/pattern_matcher.py` and `services/aggregation/signal_propagation.py` that mine historical patterns and propagate cross-company signals.
|
||||
- **Projection_Engine**: The trend projection module in `services/aggregation/projection.py` that computes forward-looking trend estimates from momentum and macro decay.
|
||||
- **Recommendation_Engine**: The recommendation pipeline in `services/recommendation/` that translates trend assessments into actionable buy/sell/hold/watch decisions with position sizing.
|
||||
- **WeightedSignal**: The core data abstraction pairing a document reference with a composite aggregation weight, sentiment value, and impact score.
|
||||
- **Beta_Distribution**: A probability distribution on [0, 1] parameterized by α and β, used to model the posterior probability of bullish vs bearish sentiment.
|
||||
- **Regime_Detector**: A new component that classifies the current market regime (trend-following, panic, mean-reversion, uncertainty) from price and volume statistics.
|
||||
- **Sigmoid_Function**: The logistic function σ(x) = 1/(1+e^(-x)) used to convert log-likelihood accumulations into probabilities.
|
||||
- **Adaptive_Decay**: A recency decay mechanism where the half-life varies per signal based on event impact, surprise, and market reaction rather than using a fixed constant per window.
|
||||
- **Information_Gain**: A measure of how surprising an event is relative to its base rate, computed as -log P(event_type), used to weight novel signals more heavily.
|
||||
- **Entropy**: Shannon entropy H = -p·log(p) - (1-p)·log(1-p), used to detect mixed sentiment states where the probability distribution is spread rather than concentrated.
|
||||
- **EMA**: Exponential Moving Average, a weighted moving average giving more weight to recent observations, used for trend and volatility regime detection.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
### Requirement 1: Probabilistic Sentiment Accumulation via Bayesian Evidence
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the signal scoring layer to accumulate sentiment evidence probabilistically using Bayesian methods, so that the system captures uncertainty structure instead of collapsing sentiment into binary ±1 labels.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN a set of weighted signals is provided for a ticker and window, THE Signal_Scorer SHALL compute a log-likelihood accumulation L_t = Σ(w_i · s_i) where w_i is the combined signal weight and s_i is the sentiment value.
|
||||
2. WHEN the log-likelihood L_t has been computed, THE Signal_Scorer SHALL convert the accumulation to a bullish probability using the Sigmoid_Function: P_bull = σ(L_t) = 1/(1+e^(-L_t)).
|
||||
3. WHEN weighted signals are provided, THE Signal_Scorer SHALL maintain a Beta_Distribution posterior with parameters α_t = α_0 + W_bull and β_t = β_0 + W_bear, where W_bull is the sum of combined weights for positive signals and W_bear is the sum for negative signals, and α_0 = β_0 = 1.0 as uninformative priors.
|
||||
4. THE Signal_Scorer SHALL compute Bayesian confidence from the Beta_Distribution posterior variance as C = 1 - 4αβ/(α+β)², where C ranges from 0.0 (maximum uncertainty at α=β) to approaching 1.0 (strong evidence concentration).
|
||||
5. WHEN no signals exist for a ticker and window, THE Signal_Scorer SHALL return P_bull = 0.5, α = 1.0, β = 1.0, and C = 0.0, representing the uninformative prior state.
|
||||
6. THE Signal_Scorer SHALL preserve the existing `WeightedSignal` dataclass interface, adding the Bayesian posterior fields (P_bull, α, β, Bayesian confidence) as additional output alongside the existing weighted sentiment average.
|
||||
7. FOR ALL valid sets of weighted signals, computing the Beta posterior then extracting P_bull SHALL produce a value within 0.05 of σ(L_t) when signal weights are uniform (round-trip consistency between the two probabilistic representations).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 2: Sigmoid Confidence Gate Replacing Binary Gate
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the binary confidence gate replaced with a smooth sigmoid transition, so that marginally confident signals contribute proportionally rather than being completely discarded or fully included.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN a document signal has extraction confidence x, THE Signal_Scorer SHALL compute a soft gate value p = σ(5·(x - 0.5)) = 1/(1+e^(-5·(x-0.5))) instead of the current binary 0/1 gate.
|
||||
2. WHEN extraction confidence is 0.5, THE Signal_Scorer SHALL produce a gate value of 0.5 (the sigmoid midpoint).
|
||||
3. WHEN extraction confidence is below 0.2, THE Signal_Scorer SHALL produce a gate value below 0.05, preserving near-zero weight for very low confidence signals.
|
||||
4. WHEN extraction confidence is above 0.8, THE Signal_Scorer SHALL produce a gate value above 0.95, preserving near-full weight for high confidence signals.
|
||||
5. THE Signal_Scorer SHALL use the sigmoid gate value as a multiplicative factor in the combined weight formula in place of the current binary G_conf.
|
||||
6. FOR ALL extraction confidence values in [0.0, 1.0], THE Signal_Scorer SHALL produce gate values that are monotonically increasing (higher confidence always produces equal or higher gate values).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 3: Information Gain Surprise Weighting
|
||||
|
||||
**User Story:** As a quantitative analyst, I want signals weighted by their information gain (surprise factor), so that rare and unexpected events receive proportionally higher influence than routine signals.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN a signal has a known event type (e.g., earnings, product_launch, regulatory, legal, m_and_a), THE Signal_Scorer SHALL compute an information gain factor r = 1 + λ·(-log₂ P(event_type)), where P(event_type) is the empirical base rate of that event type and λ is a configurable scaling parameter with default 0.3.
|
||||
2. WHEN the event type base rate is not available, THE Signal_Scorer SHALL use a default base rate of 0.1 (treating the event as moderately rare).
|
||||
3. THE Signal_Scorer SHALL multiply the information gain factor r into the combined weight formula as an additional multiplicative component.
|
||||
4. THE Signal_Scorer SHALL clamp the information gain factor to a maximum of 3.0 to prevent extremely rare events from dominating the aggregation.
|
||||
5. FOR ALL event types with base rate in (0, 1], THE Signal_Scorer SHALL produce information gain factors that are monotonically decreasing with increasing base rate (rarer events always receive higher surprise weight).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 4: Historical Source Accuracy Tracking
|
||||
|
||||
**User Story:** As a quantitative analyst, I want source credibility to incorporate historical prediction accuracy, so that sources with a track record of correct directional calls receive higher weight.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Signal_Scorer SHALL maintain a per-source accuracy metric computed as the fraction of past signals from that source where the predicted direction matched the subsequent 7-day price movement direction.
|
||||
2. WHEN a source has at least 10 historical signals with known outcomes, THE Signal_Scorer SHALL incorporate the source accuracy as a multiplicative factor on the credibility weight, scaled linearly from 0.5 (0% accuracy) to 1.5 (100% accuracy).
|
||||
3. WHEN a source has fewer than 10 historical signals, THE Signal_Scorer SHALL use a neutral accuracy factor of 1.0 (no adjustment).
|
||||
4. THE Signal_Scorer SHALL update source accuracy metrics asynchronously after each aggregation cycle, using realized price data from the market data tables.
|
||||
5. THE Signal_Scorer SHALL store source accuracy metrics in a database table with columns for source identifier, accuracy ratio, sample count, and last updated timestamp.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 5: Adaptive Recency Decay with Event-Specific Half-Lives
|
||||
|
||||
**User Story:** As a quantitative analyst, I want recency decay half-lives to adapt based on event characteristics, so that high-impact events persist longer in the aggregation while routine signals decay faster.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN computing recency decay for a signal, THE Signal_Scorer SHALL use an adaptive half-life τ_i = τ_base · (1 + β_impact) · (1 + β_surprise) · (1 + β_market_reaction), where τ_base is the current fixed half-life for the window.
|
||||
2. THE Signal_Scorer SHALL compute β_impact from the signal's impact score, scaled linearly from 0.0 (impact_score = 0) to 1.0 (impact_score = 1.0).
|
||||
3. THE Signal_Scorer SHALL compute β_surprise from the information gain factor (Requirement 3), scaled linearly from 0.0 (r = 1.0, no surprise) to 1.0 (r = 3.0, maximum surprise).
|
||||
4. THE Signal_Scorer SHALL compute β_market_reaction from the market context multiplier, scaled linearly from 0.0 (multiplier = 1.0, no market reaction) to 0.5 (multiplier = 1.45, maximum market reaction).
|
||||
5. WHEN all three β factors are at their maximum, THE Signal_Scorer SHALL produce an adaptive half-life of at most 6× the base half-life (τ_base · 2.0 · 2.0 · 1.5 = 6.0 · τ_base).
|
||||
6. WHEN all three β factors are zero (routine, unsurprising signal in calm market), THE Signal_Scorer SHALL produce the same half-life as the current fixed system (τ_base).
|
||||
7. FOR ALL combinations of impact, surprise, and market reaction values, THE Signal_Scorer SHALL produce adaptive half-lives that are greater than or equal to τ_base (adaptive decay is always slower or equal to the base decay, never faster).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 6: Volatility-Adjusted Normalization (Regime-Aware Scoring)
|
||||
|
||||
**User Story:** As a quantitative analyst, I want signal weights normalized by current market volatility and volume conditions, so that the same signal magnitude is interpreted differently in calm vs volatile markets.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN market data is available for a ticker, THE Signal_Scorer SHALL compute a return z-score z_r = (r_t - μ_20) / σ_20, where r_t is the current return, μ_20 is the 20-day mean return, and σ_20 is the 20-day return standard deviation.
|
||||
2. WHEN market data is available for a ticker, THE Signal_Scorer SHALL compute a volume z-score z_v = (log(V_t) - μ_V) / σ_V, where V_t is the current volume, μ_V is the 20-day mean of log-volume, and σ_V is the 20-day standard deviation of log-volume.
|
||||
3. THE Signal_Scorer SHALL compute a regime multiplier M_regime = 1 + 0.15·|z_r| + 0.10·|z_v|, which amplifies signal weights during abnormal market conditions.
|
||||
4. THE Signal_Scorer SHALL clamp M_regime to the range [1.0, 2.5] to prevent extreme z-scores from producing runaway weight amplification.
|
||||
5. WHEN market data is not available for a ticker, THE Signal_Scorer SHALL use M_regime = 1.0 (no regime adjustment).
|
||||
6. THE Signal_Scorer SHALL replace the current market context multiplier (M_context) with M_regime in the combined weight formula.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 7: Regime Detection and Classification
|
||||
|
||||
**User Story:** As a quantitative analyst, I want the system to detect and classify the current market regime for each ticker, so that scoring thresholds and behavior adapt to whether the market is trending, panicking, mean-reverting, or uncertain.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN market data is available, THE Regime_Detector SHALL compute a trend indicator R = sign(EMA_20 - EMA_100), where EMA_20 and EMA_100 are exponential moving averages of closing prices over 20 and 100 days respectively.
|
||||
2. WHEN market data is available, THE Regime_Detector SHALL compute a volatility ratio V_r = σ_20 / σ_100, where σ_20 and σ_100 are the 20-day and 100-day return standard deviations.
|
||||
3. THE Regime_Detector SHALL classify the market regime into one of four categories based on R and V_r: trend-following (R ≠ 0 AND V_r < 1.2), panic (V_r > 1.5), mean-reversion (R = 0 AND V_r < 1.0), uncertainty (all other cases).
|
||||
4. WHEN the regime is classified as panic, THE Aggregation_Engine SHALL reduce the bullish/bearish threshold from ±0.15 to ±0.10 (making the system more sensitive to directional signals during high-volatility periods).
|
||||
5. WHEN the regime is classified as mean-reversion, THE Aggregation_Engine SHALL increase the bullish/bearish threshold from ±0.15 to ±0.20 (requiring stronger evidence for directional calls in range-bound markets).
|
||||
6. WHEN the regime is classified as trend-following, THE Aggregation_Engine SHALL use the default thresholds of ±0.15.
|
||||
7. WHEN the regime is classified as uncertainty, THE Aggregation_Engine SHALL use the default thresholds of ±0.15 and increase the contradiction penalty multiplier from 0.4 to 0.6.
|
||||
8. THE Regime_Detector SHALL persist the current regime classification per ticker to the database for auditability and dashboard display.
|
||||
9. WHEN market data is insufficient to compute EMA_100 (fewer than 100 days of price history), THE Regime_Detector SHALL default to the uncertainty regime.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 8: Bayesian Posterior Confidence Replacing Heuristic Confidence
|
||||
|
||||
**User Story:** As a quantitative analyst, I want trend confidence derived from the Bayesian posterior distribution rather than the current heuristic weighted formula, so that confidence reflects actual evidence concentration rather than an ad-hoc combination of factors.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN computing trend confidence, THE Trend_Assembler SHALL use the Bayesian confidence C = 1 - 4αβ/(α+β)² from the Beta_Distribution posterior (Requirement 1) as the primary confidence component with weight 0.5.
|
||||
2. THE Trend_Assembler SHALL retain the source count factor (min(N_unique/15, 0.8)) as a secondary confidence component with weight 0.25, rewarding evidence breadth.
|
||||
3. THE Trend_Assembler SHALL retain the contradiction penalty (contradiction_score × 0.4) as a confidence reduction.
|
||||
4. THE Trend_Assembler SHALL compute the combined confidence as: confidence = 0.5 × C_bayesian + 0.25 × F_count + 0.25 × C_avg_credibility - P_contradiction, clamped to [0.0, 1.0].
|
||||
5. THE Trend_Assembler SHALL preserve the existing confidence thresholds for recommendation eligibility (0.35 minimum, 0.50 paper, 0.70 live) without modification.
|
||||
6. FOR ALL signal sets where all signals agree on direction, THE Trend_Assembler SHALL produce Bayesian confidence that increases monotonically with the number of agreeing signals.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 9: Entropy-Based Mixed Signal Detection
|
||||
|
||||
**User Story:** As a quantitative analyst, I want mixed trend detection based on Shannon entropy rather than simple contradiction thresholds, so that the system can distinguish between genuine uncertainty (high entropy) and weak signal (low total weight).
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN the bullish probability P_bull has been computed from the Bayesian posterior, THE Trend_Assembler SHALL compute Shannon entropy H = -P_bull·log₂(P_bull) - (1-P_bull)·log₂(1-P_bull).
|
||||
2. WHEN H > 0.9 (entropy close to maximum of 1.0, indicating near-equal probability of bullish and bearish), THE Trend_Assembler SHALL classify the trend direction as mixed, regardless of the weighted sentiment average.
|
||||
3. WHEN H ≤ 0.9 AND P_bull > 0.65, THE Trend_Assembler SHALL classify the trend direction as bullish.
|
||||
4. WHEN H ≤ 0.9 AND P_bull < 0.35, THE Trend_Assembler SHALL classify the trend direction as bearish.
|
||||
5. WHEN H ≤ 0.9 AND 0.35 ≤ P_bull ≤ 0.65, THE Trend_Assembler SHALL classify the trend direction as neutral.
|
||||
6. THE Trend_Assembler SHALL persist the entropy value H alongside the trend summary for auditability.
|
||||
7. FOR ALL P_bull values in (0, 1), THE Trend_Assembler SHALL compute entropy values in (0, 1], with maximum entropy of 1.0 occurring at P_bull = 0.5.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 10: Multiplicative Macro Exposure Scoring
|
||||
|
||||
**User Story:** As a quantitative analyst, I want macro impact computed using multiplicative exposure rather than linear weighted sums, so that a company exposed across multiple dimensions receives compounding impact rather than simple addition.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN computing macro impact for a company, THE Macro_Scorer SHALL use the multiplicative exposure formula S_macro = severity · (1 - Π_k(1 - w_k · O_k)), where O_k are the overlap components (geographic, supply chain, commodity, sector) and w_k are their respective weights.
|
||||
2. THE Macro_Scorer SHALL use the following overlap weights: w_geo = 0.35, w_supply = 0.25, w_commodity = 0.25, w_sector = 0.15 (matching the current linear weight distribution).
|
||||
3. WHEN a company has zero overlap across all dimensions, THE Macro_Scorer SHALL produce S_macro = 0.0 (no impact).
|
||||
4. WHEN a company has maximum overlap across all dimensions (all O_k = 1.0), THE Macro_Scorer SHALL produce S_macro = severity · (1 - (1-0.35)·(1-0.25)·(1-0.25)·(1-0.15)), which is approximately severity · 0.724.
|
||||
5. THE Macro_Scorer SHALL preserve the existing severity weight mapping (critical=1.0, high=0.75, moderate=0.5, low=0.25).
|
||||
6. THE Macro_Scorer SHALL preserve the existing resilience modifier (R_tier) applied after the multiplicative exposure computation.
|
||||
7. FOR ALL overlap configurations, THE Macro_Scorer SHALL produce impact scores where adding a non-zero overlap in any dimension increases the total impact (monotonicity property).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 11: Conditional Macro Signal Integration
|
||||
|
||||
**User Story:** As a quantitative analyst, I want macro signals treated as conditional modifiers on company signals rather than additive contributions, so that macro context amplifies or dampens existing company-level evidence rather than independently shifting the trend.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN both company signals and macro signals exist for a ticker, THE Aggregation_Engine SHALL apply macro impact as a multiplicative modifier on the company signal strength: S_adjusted = S_company · (1 + M_macro · sign_alignment), where M_macro is the normalized macro impact and sign_alignment is +1 when macro and company signals agree in direction, -1 when they disagree.
|
||||
2. THE Aggregation_Engine SHALL clamp the macro modifier (1 + M_macro · sign_alignment) to the range [0.5, 1.5] to prevent macro signals from inverting or excessively amplifying company signals.
|
||||
3. WHEN only macro signals exist (no company signals), THE Aggregation_Engine SHALL fall back to the current additive behavior with the existing macro weight of 0.3, preserving the macro-only suppression safety mechanism.
|
||||
4. WHEN only company signals exist (macro layer disabled or no macro events), THE Aggregation_Engine SHALL use company signals without modification (modifier = 1.0).
|
||||
5. THE Aggregation_Engine SHALL log the macro modifier value applied to each ticker for auditability.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 12: Graph-Distance Competitive Signal Attenuation
|
||||
|
||||
**User Story:** As a quantitative analyst, I want competitive signal transfer attenuated by network graph distance and historical correlation, so that signals propagate more strongly to closely related competitors and decay for distant relationships.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN propagating a signal from a source company to a target company, THE Competitive_Scorer SHALL compute transfer strength as S_transfer = S_source · ρ_historical · e^(-d_network), where S_source is the source signal strength, ρ_historical is the historical price correlation between the two companies, and d_network is the graph distance in the competitor relationship network.
|
||||
2. THE Competitive_Scorer SHALL compute graph distance d_network as the shortest path length in the competitor relationship graph, where direct competitors have distance 1, competitors-of-competitors have distance 2, and so on.
|
||||
3. WHEN the graph distance exceeds 3, THE Competitive_Scorer SHALL not propagate the signal (e^(-3) ≈ 0.05, below meaningful contribution).
|
||||
4. THE Competitive_Scorer SHALL compute ρ_historical as the 90-day rolling Pearson correlation of daily returns between the source and target companies.
|
||||
5. WHEN historical correlation data is insufficient (fewer than 30 trading days of overlapping data), THE Competitive_Scorer SHALL use a default correlation of 0.3 for same-sector companies and 0.1 for cross-sector companies.
|
||||
6. THE Competitive_Scorer SHALL preserve the existing relationship strength threshold (R_relationship ≥ 0.2) as a pre-filter before applying the graph-distance attenuation.
|
||||
7. FOR ALL source-target pairs, THE Competitive_Scorer SHALL produce transfer strengths that decrease monotonically with increasing graph distance (closer competitors always receive stronger signal transfer).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 13: Exponentially Weighted Momentum
|
||||
|
||||
**User Story:** As a quantitative analyst, I want trend momentum computed using exponentially weighted historical changes rather than a simple current-minus-previous difference, so that the momentum estimate is smoother and less sensitive to single-cycle noise.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN computing trend momentum, THE Projection_Engine SHALL use an exponentially weighted sum M_t = Σ_{k=0}^{K-1} λ^k · ΔS_{t-k}, where ΔS_{t-k} is the signed strength change at lag k, λ = 0.7 is the decay factor, and K is the number of available historical cycles (up to 10).
|
||||
2. THE Projection_Engine SHALL normalize the momentum by dividing by the geometric series sum Σ λ^k to produce a value in [-1, 1].
|
||||
3. WHEN fewer than 2 historical cycles are available, THE Projection_Engine SHALL fall back to the current heuristic (momentum = direction_sign × strength × 0.5).
|
||||
4. THE Projection_Engine SHALL compute volatility-scaled momentum M_adj = M_t / max(σ_20, 0.01), where σ_20 is the 20-day return standard deviation, to normalize momentum relative to the ticker's typical price movement.
|
||||
5. THE Projection_Engine SHALL clamp M_adj to [-2.0, 2.0] to prevent division by very small σ_20 from producing extreme values.
|
||||
6. FOR ALL sequences of monotonically increasing signed strengths, THE Projection_Engine SHALL produce positive momentum values (correctly detecting strengthening bullish trends).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 14: Expected Value Recommendation Gate
|
||||
|
||||
**User Story:** As a quantitative analyst, I want recommendation eligibility based on expected value rather than simple confidence and strength thresholds, so that the system only recommends trades with positive risk-adjusted expected outcomes.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN evaluating recommendation eligibility, THE Recommendation_Engine SHALL compute expected value EV = P_bull · R_up - P_bear · R_down, where P_bull is the Bayesian bullish probability, P_bear = 1 - P_bull, R_up is the estimated upside return, and R_down is the estimated downside return.
|
||||
2. THE Recommendation_Engine SHALL estimate R_up and R_down from the trend strength and the ticker's 20-day historical volatility: R_up = strength · σ_20 · √(horizon_days) and R_down = (1 - strength) · σ_20 · √(horizon_days), where horizon_days corresponds to the trend window duration.
|
||||
3. WHEN EV is positive and exceeds a configurable threshold (default 0.005, representing 0.5% expected return), THE Recommendation_Engine SHALL allow the recommendation to proceed through the existing eligibility gates.
|
||||
4. WHEN EV is negative or below the threshold, THE Recommendation_Engine SHALL force the recommendation to informational mode regardless of confidence and strength.
|
||||
5. THE Recommendation_Engine SHALL persist the computed EV alongside the recommendation for auditability.
|
||||
6. THE Recommendation_Engine SHALL preserve all existing eligibility gates (confidence ≥ 0.35, strength ≥ 0.10, contradiction ≤ 0.60, evidence ≥ 2, direction ≠ neutral) as additional requirements beyond the EV gate.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 15: Contradiction Handling via Weighted Disagreement Entropy
|
||||
|
||||
**User Story:** As a quantitative analyst, I want contradiction detection to use weighted disagreement entropy rather than a simple minority/majority ratio, so that the system better distinguishes between a few strong dissenting signals and many weak ones.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN computing contradiction, THE Trend_Assembler SHALL compute weighted disagreement entropy using the effective weight distribution across positive and negative signal groups.
|
||||
2. THE Trend_Assembler SHALL compute the positive weight fraction f_pos = W_positive / (W_positive + W_negative) and negative weight fraction f_neg = W_negative / (W_positive + W_negative), where W_positive and W_negative are the sums of effective weights (combined_weight × impact_score) for each sentiment group.
|
||||
3. THE Trend_Assembler SHALL compute contradiction entropy as H_contradiction = -f_pos·log₂(f_pos) - f_neg·log₂(f_neg), normalized to [0, 1] (maximum at f_pos = f_neg = 0.5).
|
||||
4. THE Trend_Assembler SHALL weight the contradiction entropy by the total evidence mass: contradiction_score = H_contradiction · min(1.0, (W_positive + W_negative) / W_threshold), where W_threshold is a configurable parameter (default 5.0) representing the evidence mass at which contradiction becomes fully significant.
|
||||
5. WHEN only positive or only negative signals exist (no disagreement), THE Trend_Assembler SHALL produce a contradiction score of 0.0.
|
||||
6. THE Trend_Assembler SHALL preserve the existing `ContradictionResult` interface, populating the overall score with the entropy-based value and retaining the `DisagreementDetail` objects for catalyst-level analysis.
|
||||
7. FOR ALL signal sets with both positive and negative signals, THE Trend_Assembler SHALL produce contradiction scores that increase monotonically as the weight distribution approaches equal split (f_pos → 0.5).
|
||||
|
||||
---
|
||||
|
||||
### Requirement 16: Backward Compatibility and Migration
|
||||
|
||||
**User Story:** As a platform operator, I want the mathematical upgrades to be backward-compatible with the existing database schema and deployable incrementally, so that the upgrade does not require downtime or data migration.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Aggregation_Engine SHALL preserve the existing `WeightedSignal`, `SignalWeight`, `TrendSummary`, and `Recommendation` dataclass interfaces, adding new fields as optional attributes with default values.
|
||||
2. THE Aggregation_Engine SHALL store new mathematical outputs (P_bull, α, β, entropy, regime, EV) in the existing JSONB metadata fields of `trend_windows` and `recommendations` tables rather than requiring new columns.
|
||||
3. THE Aggregation_Engine SHALL support a feature flag `probabilistic_scoring_enabled` in `risk_configs` that toggles between the current heuristic pipeline and the new probabilistic pipeline, defaultable to `false` for safe rollout.
|
||||
4. WHEN `probabilistic_scoring_enabled` is false, THE Aggregation_Engine SHALL produce identical outputs to the current system (no behavioral change).
|
||||
5. WHEN `probabilistic_scoring_enabled` is true, THE Aggregation_Engine SHALL use the new Bayesian, regime-aware, and adaptive formulas for all pipeline stages.
|
||||
6. IF the feature flag toggle fails to read from the database, THEN THE Aggregation_Engine SHALL default to the current heuristic pipeline (fail-safe behavior).
|
||||
7. THE Aggregation_Engine SHALL log which pipeline mode (heuristic or probabilistic) is active at the start of each aggregation cycle.
|
||||
|
||||
---
|
||||
|
||||
### Requirement 17: Property-Based Testing for Mathematical Correctness
|
||||
|
||||
**User Story:** As a developer, I want comprehensive property-based tests validating the mathematical correctness of all new formulas, so that edge cases and numerical stability issues are caught before deployment.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE test suite SHALL include property-based tests using Hypothesis for the sigmoid confidence gate verifying monotonicity (higher confidence input always produces higher or equal gate output) across all float inputs in [0.0, 1.0].
|
||||
2. THE test suite SHALL include property-based tests for the Beta_Distribution posterior verifying that α + β increases monotonically with the number of signals processed (evidence always accumulates).
|
||||
3. THE test suite SHALL include property-based tests for the Bayesian confidence formula verifying that confidence is 0.0 when α = β (maximum uncertainty) and approaches 1.0 as the ratio α/β or β/α increases.
|
||||
4. THE test suite SHALL include property-based tests for the adaptive decay verifying that the adaptive half-life is always greater than or equal to the base half-life for all valid input combinations.
|
||||
5. THE test suite SHALL include property-based tests for the multiplicative macro exposure verifying monotonicity (adding non-zero overlap in any dimension increases total impact).
|
||||
6. THE test suite SHALL include property-based tests for the exponentially weighted momentum verifying that monotonically increasing strength sequences produce positive momentum.
|
||||
7. THE test suite SHALL include a round-trip property test verifying that computing the Beta posterior from signals, extracting P_bull, then reconstructing approximate signal weights produces values consistent with the original inputs.
|
||||
8. THE test suite SHALL include property-based tests for the expected value computation verifying that EV is positive when P_bull > 0.5 and R_up > R_down (basic directional consistency).
|
||||
9. THE test suite SHALL include property-based tests for numerical stability verifying that no formula produces NaN, infinity, or values outside documented ranges for any valid input combination.
|
||||
10. THE test suite SHALL use `@settings(max_examples=100)` and follow the project convention of `test_pbt_*` file naming.
|
||||
@@ -0,0 +1,349 @@
|
||||
# Implementation Plan: Signal Math Upgrade
|
||||
|
||||
## Overview
|
||||
|
||||
Upgrade the Stonks Oracle signal processing pipeline from deterministic heuristic formulas to a probabilistic, regime-aware, and adaptive mathematical framework. Implementation proceeds in layers: foundations (config, schemas, new modules), then each pipeline stage (scoring → trend assembly → macro → competitive → projection → recommendation), then integration wiring, and finally testing. All changes are gated behind the `probabilistic_scoring_enabled` feature flag.
|
||||
|
||||
## Tasks
|
||||
|
||||
- [ ] 1. Foundation: Configuration and schema extensions
|
||||
- [x] 1.1 Extend `ScoringConfig` with probabilistic parameters in `services/aggregation/scoring.py`
|
||||
- Add `probabilistic: bool = False` toggle field
|
||||
- Add sigmoid gate parameters: `sigmoid_steepness`, `sigmoid_midpoint`
|
||||
- Add information gain parameters: `info_gain_lambda`, `info_gain_max`, `default_base_rate`
|
||||
- Add adaptive decay parameters: `adaptive_decay_impact_scale`, `adaptive_decay_surprise_scale`, `adaptive_decay_market_scale`
|
||||
- Add regime multiplier parameters: `regime_return_weight`, `regime_volume_weight`, `regime_multiplier_max`
|
||||
- All new fields must have defaults matching the design document values
|
||||
- _Requirements: 2.5, 3.1, 5.1, 6.3, 16.1_
|
||||
|
||||
- [x] 1.2 Extend `SignalWeight` and `WeightedSignal` dataclasses in `services/aggregation/scoring.py`
|
||||
- Add optional fields to `SignalWeight`: `sigmoid_gate`, `info_gain_factor`, `source_accuracy_factor`, `regime_multiplier`
|
||||
- Add optional fields to `WeightedSignal`: `info_gain_factor`, `source_accuracy_factor`, `adaptive_half_life`
|
||||
- All new fields must have defaults (None or 1.0) for backward compatibility
|
||||
- _Requirements: 16.1, 2.5, 3.3, 4.2_
|
||||
|
||||
- [x] 1.3 Extend `TrendSummary` Pydantic model in `services/shared/schemas.py`
|
||||
- Add optional fields: `p_bull`, `alpha`, `beta_param`, `bayesian_confidence`, `entropy`, `regime`, `pipeline_mode`
|
||||
- `pipeline_mode` defaults to `"heuristic"`; all others default to `None`
|
||||
- _Requirements: 16.1, 1.6, 9.6_
|
||||
|
||||
- [x] 1.4 Extend `Recommendation` model in `services/shared/schemas.py` (or `services/recommendation/eligibility.py`)
|
||||
- Add optional fields: `expected_value`, `p_bull`, `pipeline_mode`
|
||||
- `pipeline_mode` defaults to `"heuristic"`; all others default to `None`
|
||||
- _Requirements: 16.1, 14.5_
|
||||
|
||||
- [x] 1.5 Add `probabilistic_scoring_enabled` feature flag support in `services/shared/config.py`
|
||||
- Read `probabilistic_scoring_enabled` from `risk_configs.config` JSONB
|
||||
- Default to `False` when key is missing, value is invalid, or DB is unreachable
|
||||
- Propagate flag through `AggregationConfig` dataclass
|
||||
- Log which pipeline mode is active at cycle start
|
||||
- _Requirements: 16.3, 16.4, 16.5, 16.6, 16.7_
|
||||
|
||||
- [x] 1.6 Create database migration `infra/migrations/034_source_accuracy.sql`
|
||||
- Create `source_accuracy` table with columns: `id UUID PRIMARY KEY DEFAULT gen_random_uuid()`, `source_id VARCHAR(200) NOT NULL`, `accuracy_ratio FLOAT NOT NULL DEFAULT 0.5`, `sample_count INTEGER NOT NULL DEFAULT 0`, `last_updated TIMESTAMPTZ`, `created_at TIMESTAMPTZ`
|
||||
- Add `UNIQUE(source_id)` constraint and `idx_source_accuracy_source` index
|
||||
- _Requirements: 4.5_
|
||||
|
||||
- [x] 2. Checkpoint — Verify foundation compiles and existing tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [ ] 3. New module: Bayesian Accumulator (`services/aggregation/bayesian.py`)
|
||||
- [x] 3.1 Implement `BayesianPosterior` dataclass and `compute_bayesian_posterior` function
|
||||
- Create frozen dataclass with fields: `p_bull`, `alpha`, `beta`, `log_likelihood`, `bayesian_confidence`, `entropy`, `signal_count`
|
||||
- Define `PRIOR` class-level constant for uninformative prior (p_bull=0.5, α=1.0, β=1.0, C=0.0, H=1.0)
|
||||
- Implement log-likelihood accumulation: `L_t = Σ(w_i · s_i)` using `weight.combined * sentiment_value`
|
||||
- Compute `P_bull = σ(L_t)` via sigmoid function
|
||||
- Compute Beta posterior: `α = 1 + W_bull`, `β = 1 + W_bear` from positive/negative weight sums
|
||||
- Compute Bayesian confidence: `C = 1 - 4αβ/(α+β)²`
|
||||
- Compute Shannon entropy via `compute_entropy`
|
||||
- Return `PRIOR` for empty signal lists
|
||||
- Skip signals with NaN weight or sentiment
|
||||
- _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6_
|
||||
|
||||
- [x] 3.2 Implement `compute_entropy` function
|
||||
- Shannon entropy: `H = -p·log₂(p) - (1-p)·log₂(1-p)`
|
||||
- Return 0.0 for p ≤ 0 or p ≥ 1 (edge cases)
|
||||
- Return value in [0, 1] with maximum 1.0 at p=0.5
|
||||
- _Requirements: 9.1, 9.7_
|
||||
|
||||
- [x] 3.3 Write property test for sigmoid gate monotonicity
|
||||
- **Property 1: Sigmoid Gate Monotonicity**
|
||||
- **Validates: Requirements 2.6, 17.1**
|
||||
|
||||
- [x] 3.4 Write property test for Beta posterior evidence accumulation
|
||||
- **Property 2: Beta Posterior Evidence Accumulation**
|
||||
- **Validates: Requirements 1.3, 17.2**
|
||||
|
||||
- [x] 3.5 Write property test for Bayesian confidence symmetry and divergence
|
||||
- **Property 3: Bayesian Confidence Symmetry and Divergence**
|
||||
- **Validates: Requirements 1.4, 17.3**
|
||||
|
||||
- [x] 3.6 Write property test for Bayesian posterior round-trip consistency
|
||||
- **Property 4: Bayesian Posterior Round-Trip Consistency**
|
||||
- **Validates: Requirements 1.7, 17.7**
|
||||
|
||||
- [x] 3.7 Write property test for Shannon entropy range and maximum
|
||||
- **Property 8: Shannon Entropy Range and Maximum**
|
||||
- **Validates: Requirements 9.7**
|
||||
|
||||
- [x] 3.8 Write property test for Bayesian confidence monotonic with agreeing signals
|
||||
- **Property 13: Bayesian Confidence Monotonic with Agreeing Signals**
|
||||
- **Validates: Requirements 8.6**
|
||||
|
||||
- [ ] 4. New module: Regime Detector (`services/aggregation/regime.py`)
|
||||
- [x] 4.1 Implement `MarketRegime` enum, `RegimeClassification` and `RegimeConfig` dataclasses
|
||||
- `MarketRegime`: `TREND_FOLLOWING`, `PANIC`, `MEAN_REVERSION`, `UNCERTAINTY`
|
||||
- `RegimeClassification`: `regime`, `trend_indicator`, `volatility_ratio`, `bullish_threshold`, `bearish_threshold`, `contradiction_penalty_multiplier`
|
||||
- `RegimeConfig`: all configurable parameters with defaults from design
|
||||
- _Requirements: 7.3_
|
||||
|
||||
- [x] 4.2 Implement `compute_ema` and `classify_regime` functions
|
||||
- `compute_ema`: exponential moving average over last N values
|
||||
- `classify_regime`: compute trend indicator `R = sign(EMA_20 - EMA_100)` and volatility ratio `V_r = σ_20 / σ_100`
|
||||
- Classification rules: trend-following (R≠0 AND V_r<1.2), panic (V_r>1.5), mean-reversion (R=0 AND V_r<1.0), uncertainty (all other)
|
||||
- Adjust thresholds per regime: panic→±0.10, mean-reversion→±0.20, trend-following→±0.15, uncertainty→±0.15 with contradiction multiplier 0.6
|
||||
- Default to uncertainty when data is insufficient (<100 days) or σ values are zero
|
||||
- _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.9_
|
||||
|
||||
- [ ] 5. New module: Source Accuracy Tracker (`services/aggregation/source_accuracy.py`)
|
||||
- [x] 5.1 Implement `SourceAccuracy` dataclass and database functions
|
||||
- `SourceAccuracy` dataclass with `source_id`, `accuracy_ratio`, `sample_count`, `last_updated`
|
||||
- `accuracy_factor` property: return 1.0 when sample_count < 10, else `0.5 + accuracy_ratio`
|
||||
- `fetch_source_accuracy`: batch fetch from `source_accuracy` table via asyncpg
|
||||
- `update_source_accuracy`: update accuracy metrics from realized price outcomes
|
||||
- Handle DB unreachable: return neutral factor 1.0 for all sources
|
||||
- Clamp corrupted accuracy_ratio to [0.0, 1.0]
|
||||
- _Requirements: 4.1, 4.2, 4.3, 4.4, 4.5_
|
||||
|
||||
- [x] 6. Checkpoint — Verify new modules compile and unit tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [ ] 7. Signal Scorer upgrades (`services/aggregation/scoring.py`)
|
||||
- [x] 7.1 Implement sigmoid confidence gate
|
||||
- Add `sigmoid_gate(x, steepness, midpoint)` function: `σ(k·(x - midpoint))`
|
||||
- When `probabilistic=True`, replace binary gate with sigmoid gate in `compute_signal_weight`
|
||||
- When `probabilistic=False`, preserve existing binary gate behavior
|
||||
- _Requirements: 2.1, 2.2, 2.3, 2.4, 2.5_
|
||||
|
||||
- [x] 7.2 Implement information gain surprise weighting
|
||||
- Add `EVENT_TYPE_BASE_RATES` constant dict and `DEFAULT_BASE_RATE = 0.1`
|
||||
- Add `compute_info_gain(event_type, lambda_param, max_gain, default_base_rate)` function: `r = 1 + λ·(-log₂ P(event_type))`, clamped to max 3.0
|
||||
- Integrate as multiplicative factor in combined weight when `probabilistic=True`
|
||||
- _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5_
|
||||
|
||||
- [x] 7.3 Implement adaptive recency decay
|
||||
- Add `compute_adaptive_half_life(base_half_life, impact_score, info_gain_factor, market_multiplier, config)` function
|
||||
- Compute `β_impact`, `β_surprise`, `β_market_reaction` scaling factors per design
|
||||
- `τ_i = τ_base · (1 + β_impact) · (1 + β_surprise) · (1 + β_market_reaction)`
|
||||
- When `probabilistic=True`, use adaptive half-life in `recency_weight`; otherwise use fixed
|
||||
- _Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7_
|
||||
|
||||
- [x] 7.4 Implement regime multiplier replacing market context multiplier
|
||||
- Add `compute_regime_multiplier(returns, volumes, config)` function
|
||||
- Compute z-scores for return and volume, then `M_regime = 1 + 0.15·|z_r| + 0.10·|z_v|`
|
||||
- Clamp to [1.0, 2.5]; default to 1.0 when data unavailable or σ=0
|
||||
- When `probabilistic=True`, use `M_regime` instead of `M_context` in combined weight
|
||||
- _Requirements: 6.1, 6.2, 6.3, 6.4, 6.5_
|
||||
|
||||
- [x] 7.5 Integrate source accuracy factor into `compute_signal_weight`
|
||||
- Accept optional `source_accuracy_factor` parameter
|
||||
- When `probabilistic=True`, multiply into combined weight formula
|
||||
- When `probabilistic=False`, ignore (factor = 1.0)
|
||||
- _Requirements: 4.2, 4.3_
|
||||
|
||||
- [x] 7.6 Update `compute_signal_weight` to branch on `probabilistic` flag
|
||||
- When `probabilistic=True`: use sigmoid gate × recency (adaptive) × credibility × (1 + novelty) × info_gain × source_accuracy × regime_multiplier
|
||||
- When `probabilistic=False`: preserve exact current formula (binary gate × recency × credibility × (1 + novelty) × market_context)
|
||||
- Populate all new optional fields on `SignalWeight` and `WeightedSignal`
|
||||
- _Requirements: 16.4, 16.5_
|
||||
|
||||
- [x] 7.7 Write property test for information gain monotonicity
|
||||
- **Property 6: Information Gain Monotonicity**
|
||||
- **Validates: Requirements 3.5**
|
||||
|
||||
- [x] 7.8 Write property test for adaptive decay lower bound
|
||||
- **Property 5: Adaptive Decay Lower Bound**
|
||||
- **Validates: Requirements 5.7, 17.4**
|
||||
|
||||
- [ ] 8. Contradiction upgrade (`services/aggregation/contradiction.py`)
|
||||
- [x] 8.1 Implement weighted disagreement entropy contradiction
|
||||
- Compute `f_pos = W_positive / (W_positive + W_negative)` and `f_neg = 1 - f_pos`
|
||||
- Compute `H_contradiction = -f_pos·log₂(f_pos) - f_neg·log₂(f_neg)`
|
||||
- Weight by evidence mass: `contradiction_score = H_contradiction · min(1.0, (W_pos + W_neg) / W_threshold)`
|
||||
- Return 0.0 when only one direction exists
|
||||
- Preserve existing `ContradictionResult` interface
|
||||
- When `probabilistic=False`, preserve existing minority/majority ratio behavior
|
||||
- _Requirements: 15.1, 15.2, 15.3, 15.4, 15.5, 15.6, 15.7_
|
||||
|
||||
- [x] 8.2 Write property test for contradiction entropy monotonicity
|
||||
- **Property 9: Contradiction Entropy Monotonicity**
|
||||
- **Validates: Requirements 15.7**
|
||||
|
||||
- [ ] 9. Trend Assembly upgrades (`services/aggregation/worker.py`)
|
||||
- [x] 9.1 Integrate Bayesian posterior into trend assembly
|
||||
- When `probabilistic=True`, call `compute_bayesian_posterior` on merged signals
|
||||
- Use Bayesian confidence formula for trend confidence: `0.5 × C_bayesian + 0.25 × F_count + 0.25 × C_avg_credibility - P_contradiction`
|
||||
- Use entropy-based direction: H>0.9→mixed, P_bull>0.65→bullish, P_bull<0.35→bearish, else neutral
|
||||
- Apply regime-adjusted thresholds from `RegimeClassification`
|
||||
- Populate new `TrendSummary` fields: `p_bull`, `alpha`, `beta_param`, `bayesian_confidence`, `entropy`, `regime`, `pipeline_mode`
|
||||
- Store probabilistic outputs in `market_context` JSONB under `"probabilistic"` key
|
||||
- When `probabilistic=False`, preserve exact current heuristic behavior
|
||||
- _Requirements: 1.1, 1.2, 8.1, 8.2, 8.3, 8.4, 8.5, 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 7.8, 16.4, 16.5_
|
||||
|
||||
- [x] 9.2 Wire regime detection into the aggregation cycle
|
||||
- Call `classify_regime` with closing prices and returns for each ticker
|
||||
- Pass `RegimeClassification` to trend assembly for threshold adjustment
|
||||
- Default to uncertainty regime when market data is unavailable
|
||||
- Persist regime classification in JSONB for auditability
|
||||
- _Requirements: 7.1, 7.2, 7.3, 7.8, 7.9_
|
||||
|
||||
- [ ] 10. Macro scoring upgrade (`services/aggregation/interpolation.py`)
|
||||
- [x] 10.1 Implement multiplicative macro exposure formula
|
||||
- When `probabilistic=True`, compute `S_macro = severity · (1 - Π_k(1 - w_k · O_k))` instead of linear weighted sum
|
||||
- Preserve overlap weights: w_geo=0.35, w_supply=0.25, w_commodity=0.25, w_sector=0.15
|
||||
- Preserve severity mapping and resilience modifier
|
||||
- When `probabilistic=False`, preserve exact current linear formula
|
||||
- _Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6_
|
||||
|
||||
- [x] 10.2 Implement conditional macro signal integration
|
||||
- When `probabilistic=True` and both company and macro signals exist, apply macro as multiplicative modifier: `S_adjusted = S_company · clamp(1 + M_macro · sign_alignment, 0.5, 1.5)`
|
||||
- When only macro signals exist, fall back to additive behavior with weight 0.3
|
||||
- When only company signals exist, use modifier = 1.0
|
||||
- Log macro modifier value per ticker
|
||||
- When `probabilistic=False`, preserve current additive merge behavior
|
||||
- _Requirements: 11.1, 11.2, 11.3, 11.4, 11.5_
|
||||
|
||||
- [x] 10.3 Write property test for multiplicative macro exposure monotonicity
|
||||
- **Property 7: Multiplicative Macro Exposure Monotonicity**
|
||||
- **Validates: Requirements 10.7, 17.5**
|
||||
|
||||
- [ ] 11. Competitive signal upgrade (`services/aggregation/signal_propagation.py`)
|
||||
- [x] 11.1 Implement graph-distance attenuation for competitive signals
|
||||
- When `probabilistic=True`, compute `S_transfer = S_source · ρ_historical · e^(-d_network)` instead of flat transfer
|
||||
- Compute graph distance as shortest path in competitor relationship graph (cap at 3)
|
||||
- Use 90-day rolling Pearson correlation for `ρ_historical`; default to 0.3 (same-sector) or 0.1 (cross-sector) when insufficient data (<30 days)
|
||||
- Preserve existing relationship strength threshold (R ≥ 0.2) as pre-filter
|
||||
- When `probabilistic=False`, preserve exact current flat transfer behavior
|
||||
- _Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.6, 12.7_
|
||||
|
||||
- [x] 11.2 Write property test for competitive signal distance attenuation
|
||||
- **Property 11: Competitive Signal Distance Attenuation**
|
||||
- **Validates: Requirements 12.7**
|
||||
|
||||
- [ ] 12. Projection upgrade (`services/aggregation/projection.py`)
|
||||
- [x] 12.1 Implement exponentially weighted momentum
|
||||
- When `probabilistic=True`, compute `M_t = Σ_{k=0}^{K-1} λ^k · ΔS_{t-k}` with λ=0.7, K up to 10
|
||||
- Normalize by geometric series sum to produce value in [-1, 1]
|
||||
- Fall back to current heuristic when fewer than 2 historical cycles available
|
||||
- Compute volatility-scaled momentum: `M_adj = M_t / max(σ_20, 0.01)`, clamped to [-2.0, 2.0]
|
||||
- When `probabilistic=False`, preserve exact current simple momentum behavior
|
||||
- _Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6_
|
||||
|
||||
- [x] 12.2 Write property test for exponentially weighted momentum direction
|
||||
- **Property 10: Exponentially Weighted Momentum Direction**
|
||||
- **Validates: Requirements 13.6, 17.6**
|
||||
|
||||
- [ ] 13. Recommendation upgrade (`services/recommendation/eligibility.py`)
|
||||
- [x] 13.1 Implement expected value recommendation gate
|
||||
- When `probabilistic=True`, compute `EV = P_bull · R_up - P_bear · R_down`
|
||||
- Estimate `R_up = strength · σ_20 · √(horizon_days)` and `R_down = (1 - strength) · σ_20 · √(horizon_days)`
|
||||
- When EV > threshold (default 0.005), allow recommendation through existing gates
|
||||
- When EV ≤ threshold, force recommendation to informational mode
|
||||
- Persist EV in `risk_checks` JSONB of `recommendation_evaluations`
|
||||
- Populate `expected_value`, `p_bull`, `pipeline_mode` on Recommendation model
|
||||
- Preserve all existing eligibility gates as additional requirements
|
||||
- When `probabilistic=False`, skip EV gate entirely
|
||||
- _Requirements: 14.1, 14.2, 14.3, 14.4, 14.5, 14.6_
|
||||
|
||||
- [x] 13.2 Write property test for expected value directional consistency
|
||||
- **Property 12: Expected Value Directional Consistency**
|
||||
- **Validates: Requirements 17.8**
|
||||
|
||||
- [x] 14. Checkpoint — Verify all pipeline stages compile and existing tests still pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
- [ ] 15. Integration wiring and feature flag plumbing
|
||||
- [x] 15.1 Wire feature flag through the aggregation worker entry point
|
||||
- Read `probabilistic_scoring_enabled` from `risk_configs` at cycle start in `services/aggregation/worker.py`
|
||||
- Pass flag to `ScoringConfig`, trend assembly, contradiction, macro, competitive, and projection stages
|
||||
- Log pipeline mode at cycle start
|
||||
- Ensure flag is read once per cycle (mid-cycle changes take effect next cycle)
|
||||
- _Requirements: 16.3, 16.6, 16.7_
|
||||
|
||||
- [x] 15.2 Wire source accuracy fetch into the scoring pipeline
|
||||
- At cycle start, batch-fetch source accuracy for all source IDs in the current signal set
|
||||
- Pass `source_accuracy_factor` to `compute_signal_weight` for each signal
|
||||
- Handle DB errors gracefully (default to 1.0)
|
||||
- _Requirements: 4.1, 4.2, 4.3_
|
||||
|
||||
- [x] 15.3 Wire regime detection into the aggregation cycle
|
||||
- Fetch closing prices and returns for each ticker from market data
|
||||
- Call `classify_regime` and pass result to trend assembly and scoring stages
|
||||
- Handle missing market data (default to uncertainty regime)
|
||||
- _Requirements: 7.1, 7.8, 7.9_
|
||||
|
||||
- [x] 15.4 Store probabilistic outputs in existing JSONB columns
|
||||
- Store Bayesian fields in `trend_windows.market_context` JSONB under `"probabilistic"` key
|
||||
- Store EV fields in `recommendation_evaluations.risk_checks` JSONB
|
||||
- Store regime classification in trend window JSONB
|
||||
- _Requirements: 16.2_
|
||||
|
||||
- [ ] 16. Numerical stability and edge case hardening
|
||||
- [x] 16.1 Add input validation and edge case guards across all new functions
|
||||
- Guard `log₂(0)` in entropy and information gain computations
|
||||
- Floor `max(σ_20, 0.01)` for momentum volatility scaling
|
||||
- Default to uncertainty regime when σ values are zero
|
||||
- Return `M_regime = 1.0` when z-score σ = 0
|
||||
- Skip signals with NaN weight or sentiment
|
||||
- Clamp all outputs to documented ranges
|
||||
- _Requirements: 17.9, 6.4_
|
||||
|
||||
- [x] 16.2 Write property test for numerical stability across all formulas
|
||||
- **Property 14: Numerical Stability Across All Formulas**
|
||||
- **Validates: Requirements 17.9, 6.4**
|
||||
|
||||
- [ ] 17. Unit tests for all new and modified modules
|
||||
- [x] 17.1 Write unit tests for Bayesian accumulator (`tests/test_bayesian.py`)
|
||||
- Test uninformative prior (empty signals → P_bull=0.5, α=1, β=1, C=0)
|
||||
- Test specific sigmoid gate values (x=0.5→0.5, x=0.2→<0.05, x=0.8→>0.95)
|
||||
- Test entropy direction mapping (H>0.9→mixed, P_bull>0.65→bullish, etc.)
|
||||
- _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5_
|
||||
|
||||
- [x] 17.2 Write unit tests for regime detector (`tests/test_regime.py`)
|
||||
- Test specific (R, V_r) → expected regime classification
|
||||
- Test threshold adjustments per regime (panic→0.10, mean_reversion→0.20)
|
||||
- Test insufficient data fallback to uncertainty
|
||||
- _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.9_
|
||||
|
||||
- [x] 17.3 Write unit tests for source accuracy tracker (`tests/test_source_accuracy.py`)
|
||||
- Test accuracy_factor property: sample_count < 10 → 1.0, else 0.5 + ratio
|
||||
- Test corrupted data clamping
|
||||
- _Requirements: 4.1, 4.2, 4.3_
|
||||
|
||||
- [x] 17.4 Write unit tests for signal scoring upgrades (`tests/test_signal_math_unit.py`)
|
||||
- Test info gain clamp (very rare event → factor ≤ 3.0)
|
||||
- Test default base rate (unknown event type → 0.1)
|
||||
- Test adaptive decay edge cases (all zeros → τ_base, all max → 6×τ_base)
|
||||
- Test zero overlap → zero macro impact
|
||||
- Test max overlap → ≈severity×0.724
|
||||
- Test macro fallback behaviors (only macro → additive, only company → no modifier)
|
||||
- Test graph distance cutoff (d>3 → no propagation)
|
||||
- Test momentum fallback (<2 cycles → heuristic)
|
||||
- Test EV threshold behavior (EV>0.005→proceed, EV≤0.005→informational)
|
||||
- Test feature flag behaviors (flag=false→heuristic, flag=true→probabilistic)
|
||||
- _Requirements: 3.1, 3.4, 5.5, 5.6, 10.3, 10.4, 11.3, 13.3, 14.3, 14.4, 16.4, 16.5_
|
||||
|
||||
- [x] 18. Final checkpoint — Ensure all tests pass
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
|
||||
## Notes
|
||||
|
||||
- Tasks marked with `*` are optional and can be skipped for faster MVP
|
||||
- Each task references specific requirements for traceability
|
||||
- Checkpoints ensure incremental validation after each major phase
|
||||
- Property tests validate the 14 universal correctness properties from the design document
|
||||
- Unit tests validate specific examples, edge cases, and integration points
|
||||
- The design uses Python throughout — no language selection needed
|
||||
- Migration number is 034 (existing migrations go up to 033)
|
||||
- All new dataclass fields use optional defaults for backward compatibility
|
||||
- Feature flag `probabilistic_scoring_enabled` gates every behavioral change
|
||||
@@ -0,0 +1 @@
|
||||
{"specId": "d76705a8-fb91-4fce-b59e-c4b3b0dbbd83", "workflowType": "requirements-first", "specType": "feature"}
|
||||
@@ -0,0 +1,802 @@
|
||||
# Design Document — Trading Feedback Engine
|
||||
|
||||
## Overview
|
||||
|
||||
This design adds a periodic trading performance reporting system to Stonks Oracle. The system collects trading data (P&L, recommendations, positions, risk metrics, model quality), generates structured JSON reports with AI-powered summaries, validates report metrics against live data, and stores reports for retrieval via API.
|
||||
|
||||
The core challenge is fitting AI summarization within the 8k-token context window of the `qwen3.5:9b-fast` model on the local Ollama instance. The design addresses this with a chunking strategy that serializes report section data into ≤6,000-character chunks, summarizes each chunk independently, then merges chunk summaries into a final section summary. This hierarchical summarization approach keeps each LLM call well within the token budget while producing coherent narratives.
|
||||
|
||||
### Design Rationale
|
||||
|
||||
A trading system without periodic performance feedback forces the operator to manually query tables and compute metrics. The feedback engine closes this gap by:
|
||||
|
||||
1. **Automating data collection** — pulling from 7+ tables (trading_decisions, orders, positions, portfolio_snapshots, recommendations, prediction_outcomes, model_metric_snapshots) into a single structured report
|
||||
2. **AI-powered summarization** — using the existing agent infrastructure (ai_agents, AgentConfigResolver, llm_factory) to generate natural-language summaries that highlight trends and anomalies
|
||||
3. **Cross-validation** — comparing computed metrics against live validation data (prediction_outcomes, model_metric_snapshots) and flagging discrepancies >5%
|
||||
4. **Persistent storage** — storing reports as JSONB for historical comparison and trend analysis
|
||||
5. **Scheduled generation** — daily (after market close) and weekly (Saturday) reports via Redis queue jobs
|
||||
|
||||
The design reuses existing infrastructure: asyncpg for persistence, FastAPI for API endpoints, Redis queues for async job processing, the ai_agents/AgentConfigResolver/llm_factory stack for LLM access, and TanStack Query hooks on the frontend.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### High-Level Data Flow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
subgraph "Scheduling (Trigger)"
|
||||
A[Scheduler Service] -->|after 16:30 ET daily| B[Redis Queue<br/>stonks:queue:report_generation]
|
||||
A -->|Saturday weekly| B
|
||||
C[Manual API Trigger] --> B
|
||||
end
|
||||
|
||||
subgraph "Report Generation (Async Worker)"
|
||||
B --> D[Report Generator<br/>services/reporting/generator.py]
|
||||
D -->|1. Collect| E[Data Collector<br/>services/reporting/collector.py]
|
||||
E -->|queries| F[(trading_decisions<br/>orders, positions<br/>portfolio_snapshots<br/>recommendations)]
|
||||
D -->|2. Build sections| G[Section Builder<br/>services/reporting/sections.py]
|
||||
G -->|P&L, accuracy,<br/>positions, risk,<br/>model quality| H[Report Sections]
|
||||
D -->|3. Validate| I[Report Validator<br/>services/reporting/validator.py]
|
||||
I -->|cross-check| J[(prediction_outcomes<br/>model_metric_snapshots)]
|
||||
D -->|4. Summarize| K[AI Summarizer<br/>services/reporting/summarizer.py]
|
||||
K -->|chunk & summarize| L[Report_Summarizer_Agent<br/>via AgentConfigResolver<br/>+ llm_factory]
|
||||
D -->|5. Store| M[(trading_reports table)]
|
||||
end
|
||||
|
||||
subgraph "API Layer"
|
||||
N[GET /api/reports] -->|paginated list| M
|
||||
O[GET /api/reports/:id] -->|full report| M
|
||||
end
|
||||
|
||||
subgraph "Frontend"
|
||||
P[useReports hook] --> N
|
||||
Q[useReport hook] --> O
|
||||
end
|
||||
```
|
||||
|
||||
### Scheduling Strategy
|
||||
|
||||
| Component | Trigger | Cadence |
|
||||
|-----------|---------|---------|
|
||||
| Daily Report | Scheduler after 16:30 ET | Every trading day |
|
||||
| Weekly Report | Scheduler on Saturday | Weekly (Mon–Fri coverage) |
|
||||
| Report Generator Worker | Redis queue consumer | On-demand from queue |
|
||||
| AI Summarizer | Called by generator | Per report section |
|
||||
|
||||
### Chunking Strategy
|
||||
|
||||
The `qwen3.5:9b-fast` model has an 8k-token context window. With the system prompt (~200 tokens) and response budget (~200 tokens), roughly 7,600 tokens remain for input. At ~4 chars/token for structured data, that's ~30,400 characters. The 6,000-character chunk limit provides a 5x safety margin to account for JSON overhead, prompt framing, and tokenization variance.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A[Section Data<br/>e.g. 15,000 chars] --> B{> 6,000 chars?}
|
||||
B -->|No| C[Single LLM call<br/>→ summary]
|
||||
B -->|Yes| D[Split into chunks<br/>≤ 6,000 chars each]
|
||||
D --> E[Chunk 1 → LLM → summary 1]
|
||||
D --> F[Chunk 2 → LLM → summary 2]
|
||||
D --> G[Chunk N → LLM → summary N]
|
||||
E --> H[Merge summaries<br/>→ final LLM call<br/>→ section summary]
|
||||
F --> H
|
||||
G --> H
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Components and Interfaces
|
||||
|
||||
### New Modules
|
||||
|
||||
| Module | File | Responsibility |
|
||||
|--------|------|----------------|
|
||||
| Report Data Collector | `services/reporting/collector.py` | Queries trading data for a reporting period |
|
||||
| Report Section Builder | `services/reporting/sections.py` | Builds structured report sections from raw data |
|
||||
| Report Validator | `services/reporting/validator.py` | Cross-checks metrics against validation tables |
|
||||
| AI Summarizer | `services/reporting/summarizer.py` | Chunks data and generates AI summaries |
|
||||
| Report Generator | `services/reporting/generator.py` | Orchestrates the full report generation pipeline |
|
||||
| Report Models | `services/reporting/models.py` | Pydantic models for report structure and serialization |
|
||||
|
||||
### Modified Modules
|
||||
|
||||
| Module | File | Changes |
|
||||
|--------|------|---------|
|
||||
| Query API | `services/api/app.py` | 2 new `/api/reports` endpoints |
|
||||
| Redis Keys | `services/shared/redis_keys.py` | New `QUEUE_REPORT_GENERATION` constant |
|
||||
| Frontend Hooks | `frontend/src/api/hooks.ts` | 2 new report hooks |
|
||||
| DB Migration | `infra/migrations/038_trading_reports.sql` | New table + agent seed |
|
||||
|
||||
### Component Interface Details
|
||||
|
||||
#### 1. Report Models (`services/reporting/models.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ReportType(str, Enum):
|
||||
DAILY = "daily"
|
||||
WEEKLY = "weekly"
|
||||
|
||||
|
||||
class ValidationStatus(str, Enum):
|
||||
PASSED = "passed"
|
||||
WARNINGS = "warnings"
|
||||
|
||||
|
||||
class ValidationWarning(BaseModel):
|
||||
field_name: str
|
||||
computed_value: float
|
||||
snapshot_value: float
|
||||
pct_difference: float
|
||||
|
||||
|
||||
class PLSection(BaseModel):
|
||||
realized_pnl: float
|
||||
unrealized_pnl: float
|
||||
daily_return: float
|
||||
cumulative_return: float
|
||||
win_count: int
|
||||
loss_count: int
|
||||
win_rate: float
|
||||
profit_factor: float
|
||||
sharpe_ratio: float
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class RecommendationAccuracySection(BaseModel):
|
||||
total_evaluated: int
|
||||
act_count: int
|
||||
skip_count: int
|
||||
acted_win_rate: float
|
||||
avg_confidence_acted: float
|
||||
avg_confidence_skipped: float
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class PositionDetail(BaseModel):
|
||||
ticker: str
|
||||
entry_price: float
|
||||
current_or_exit_price: float
|
||||
pnl: float
|
||||
pnl_pct: float
|
||||
hold_duration_hours: float
|
||||
status: str # "open" or "closed"
|
||||
|
||||
|
||||
class PositionPerformanceSection(BaseModel):
|
||||
positions: list[PositionDetail] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
|
||||
|
||||
class RiskMetricsSection(BaseModel):
|
||||
current_risk_tier: str
|
||||
portfolio_heat: float
|
||||
max_drawdown: float
|
||||
current_drawdown_pct: float
|
||||
reserve_pool_balance: float
|
||||
circuit_breaker_event_count: int
|
||||
summary: str = ""
|
||||
|
||||
|
||||
class ModelQualityWindow(BaseModel):
|
||||
lookback: str
|
||||
win_rate: float | None
|
||||
directional_accuracy: float | None
|
||||
information_coefficient: float | None
|
||||
calibration_error: float | None
|
||||
brier_score: float | None
|
||||
|
||||
|
||||
class ModelQualitySection(BaseModel):
|
||||
windows: list[ModelQualityWindow] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ReportData(BaseModel):
|
||||
"""Top-level report structure stored as JSONB."""
|
||||
pnl: PLSection
|
||||
recommendation_accuracy: RecommendationAccuracySection
|
||||
position_performance: PositionPerformanceSection
|
||||
risk_metrics: RiskMetricsSection
|
||||
model_quality: ModelQualitySection
|
||||
executive_summary: str = ""
|
||||
validation_status: ValidationStatus = ValidationStatus.PASSED
|
||||
generated_at: datetime
|
||||
period_start: date
|
||||
period_end: date
|
||||
report_type: ReportType
|
||||
```
|
||||
|
||||
#### 2. Report Data Collector (`services/reporting/collector.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime
|
||||
import asyncpg
|
||||
|
||||
|
||||
@dataclass
|
||||
class CollectedData:
|
||||
"""Raw data collected for a reporting period."""
|
||||
trading_decisions: list[dict]
|
||||
orders: list[dict]
|
||||
open_positions: list[dict]
|
||||
closed_positions: list[dict]
|
||||
portfolio_snapshot: dict | None
|
||||
previous_portfolio_snapshot: dict | None
|
||||
recommendations: list[dict]
|
||||
prediction_outcomes: list[dict]
|
||||
model_metric_snapshots: list[dict]
|
||||
circuit_breaker_events: list[dict]
|
||||
reserve_pool_balance: float
|
||||
|
||||
|
||||
async def collect_report_data(
|
||||
pool: asyncpg.Pool,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> CollectedData:
|
||||
"""Query all trading data for the reporting period.
|
||||
|
||||
Queries: trading_decisions, orders, positions, portfolio_snapshots,
|
||||
recommendations, prediction_outcomes, model_metric_snapshots,
|
||||
circuit_breaker_events, reserve_pool_ledger.
|
||||
|
||||
Returns CollectedData with all raw query results.
|
||||
If no trading_decisions exist, returns empty lists (zero-activity).
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 3. Report Section Builder (`services/reporting/sections.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
from services.reporting.models import (
|
||||
PLSection, RecommendationAccuracySection,
|
||||
PositionPerformanceSection, PositionDetail,
|
||||
RiskMetricsSection, ModelQualitySection, ModelQualityWindow,
|
||||
)
|
||||
from services.reporting.collector import CollectedData
|
||||
|
||||
|
||||
def build_pnl_section(data: CollectedData) -> PLSection:
|
||||
"""Build P&L section from collected data.
|
||||
|
||||
Computes realized/unrealized P&L, daily return, cumulative return,
|
||||
win/loss counts, win rate, profit factor, and Sharpe ratio from
|
||||
portfolio_snapshot and closed positions.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_recommendation_accuracy_section(data: CollectedData) -> RecommendationAccuracySection:
|
||||
"""Build recommendation accuracy section.
|
||||
|
||||
Joins trading_decisions with prediction_outcomes to compute
|
||||
act/skip breakdown, win rate of acted recommendations, and
|
||||
average confidence of acted vs skipped.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_position_performance_section(data: CollectedData) -> PositionPerformanceSection:
|
||||
"""Build position performance section.
|
||||
|
||||
Lists each position (open and closed) with entry price,
|
||||
current/exit price, P&L, P&L%, and hold duration.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_risk_metrics_section(data: CollectedData) -> RiskMetricsSection:
|
||||
"""Build risk metrics section.
|
||||
|
||||
Extracts current risk tier, portfolio heat, max drawdown,
|
||||
current drawdown %, reserve pool balance, and circuit breaker
|
||||
event count from collected data.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_model_quality_section(data: CollectedData) -> ModelQualitySection:
|
||||
"""Build model quality section.
|
||||
|
||||
Extracts latest model_metric_snapshot values for 7d, 30d, 90d
|
||||
lookback windows.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 4. Report Validator (`services/reporting/validator.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
import asyncpg
|
||||
from services.reporting.models import (
|
||||
ReportData, ValidationStatus, ValidationWarning,
|
||||
)
|
||||
|
||||
|
||||
DISCREPANCY_THRESHOLD_PCT = 5.0
|
||||
|
||||
|
||||
def validate_recommendation_accuracy(
|
||||
section: "RecommendationAccuracySection",
|
||||
prediction_outcomes: list[dict],
|
||||
) -> list[ValidationWarning]:
|
||||
"""Cross-reference reported win rates with prediction_outcomes.
|
||||
|
||||
Compares computed win rate against direction_correct/profitable
|
||||
fields from prediction_outcomes for the same tickers and period.
|
||||
Returns warnings for discrepancies > 5%.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def validate_model_quality(
|
||||
section: "ModelQualitySection",
|
||||
metric_snapshots: list[dict],
|
||||
) -> list[ValidationWarning]:
|
||||
"""Compare reported model quality metrics against model_metric_snapshots.
|
||||
|
||||
Flags discrepancies > 5% between computed and snapshot values
|
||||
for win_rate, directional_accuracy, IC, ECE, and Brier score.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def compute_validation_status(report: ReportData) -> ValidationStatus:
|
||||
"""Determine overall validation status.
|
||||
|
||||
Returns 'passed' if no warnings across all sections,
|
||||
'warnings' if any section has validation warnings.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 5. AI Summarizer (`services/reporting/summarizer.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
import asyncpg
|
||||
from services.shared.agent_config import AgentConfigResolver
|
||||
|
||||
|
||||
CHUNK_SIZE_LIMIT = 6000 # characters per chunk
|
||||
MAX_SUMMARY_WORDS = 200 # per section summary
|
||||
MAX_EXECUTIVE_SUMMARY_WORDS = 300
|
||||
|
||||
|
||||
def chunk_data(serialized: str, max_chars: int = CHUNK_SIZE_LIMIT) -> list[str]:
|
||||
"""Split serialized data into chunks of at most max_chars.
|
||||
|
||||
Splits on newline boundaries to avoid breaking JSON structures.
|
||||
Each chunk is ≤ max_chars characters.
|
||||
Returns at least one chunk (even if empty input).
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def summarize_section(
|
||||
pool: asyncpg.Pool,
|
||||
resolver: AgentConfigResolver,
|
||||
section_name: str,
|
||||
section_data: str,
|
||||
) -> str:
|
||||
"""Generate AI summary for a report section.
|
||||
|
||||
1. Serialize section data to string
|
||||
2. Chunk if > CHUNK_SIZE_LIMIT
|
||||
3. Summarize each chunk via Report_Summarizer_Agent
|
||||
4. If multiple chunks, merge summaries with a final LLM call
|
||||
5. Log each invocation to agent_performance_log
|
||||
6. On failure after max_retries, fall back to deterministic summary
|
||||
|
||||
Uses AgentConfigResolver to resolve agent config by slug
|
||||
'report-summarizer', then llm_factory to build the LLM client.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
def build_deterministic_summary(section_name: str, section_data: dict) -> str:
|
||||
"""Build a fallback deterministic summary from raw metrics.
|
||||
|
||||
Produces a template-based text summary when AI summarization fails.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def generate_executive_summary(
|
||||
pool: asyncpg.Pool,
|
||||
resolver: AgentConfigResolver,
|
||||
section_summaries: dict[str, str],
|
||||
) -> str:
|
||||
"""Generate executive summary from all section summaries.
|
||||
|
||||
Concatenates section summaries, chunks if needed, and produces
|
||||
a ≤300-word synthesis via the Report_Summarizer_Agent.
|
||||
Falls back to concatenated section summaries on failure.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 6. Report Generator (`services/reporting/generator.py`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
from datetime import date
|
||||
import asyncpg
|
||||
from services.reporting.models import ReportData, ReportType
|
||||
|
||||
|
||||
async def generate_report(
|
||||
pool: asyncpg.Pool,
|
||||
report_type: ReportType,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> ReportData:
|
||||
"""Orchestrate full report generation.
|
||||
|
||||
1. Collect data via collector
|
||||
2. Build sections via section builder
|
||||
3. Validate sections via validator
|
||||
4. Generate AI summaries via summarizer
|
||||
5. Generate executive summary
|
||||
6. Assemble final ReportData
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def store_report(
|
||||
pool: asyncpg.Pool,
|
||||
report: ReportData,
|
||||
) -> str:
|
||||
"""Store report in trading_reports table.
|
||||
|
||||
Uses INSERT ... ON CONFLICT (report_type, period_start, period_end)
|
||||
DO UPDATE to handle regeneration of existing reports.
|
||||
|
||||
Returns the report UUID.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
async def process_report_job(
|
||||
pool: asyncpg.Pool,
|
||||
job: dict,
|
||||
) -> None:
|
||||
"""Process a report generation job from the Redis queue.
|
||||
|
||||
Deserializes job payload, calls generate_report + store_report.
|
||||
Handles retries with exponential backoff (up to 3 attempts).
|
||||
Rejects duplicate jobs for the same report_type + period.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
#### 7. API Endpoints (added to `services/api/app.py`)
|
||||
|
||||
| Endpoint | Method | Parameters | Returns |
|
||||
|----------|--------|------------|---------|
|
||||
| `GET /api/reports` | GET | `report_type`, `start_date`, `end_date`, `limit`, `offset` | Paginated list: id, report_type, period_start, period_end, validation_status, generated_at |
|
||||
| `GET /api/reports/{report_id}` | GET | — | Full report including report_data JSONB |
|
||||
|
||||
#### 8. Frontend Hooks (added to `frontend/src/api/hooks.ts`)
|
||||
|
||||
```typescript
|
||||
export interface ReportListItem {
|
||||
id: string;
|
||||
report_type: string;
|
||||
period_start: string;
|
||||
period_end: string;
|
||||
validation_status: string;
|
||||
generated_at: string;
|
||||
}
|
||||
|
||||
export interface ReportDetail extends ReportListItem {
|
||||
report_data: Record<string, unknown>;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
export function useReports(params?: {
|
||||
report_type?: string;
|
||||
start_date?: string;
|
||||
end_date?: string;
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
}) {
|
||||
const qs = new URLSearchParams();
|
||||
if (params?.report_type) qs.set('report_type', params.report_type);
|
||||
if (params?.start_date) qs.set('start_date', params.start_date);
|
||||
if (params?.end_date) qs.set('end_date', params.end_date);
|
||||
if (params?.limit) qs.set('limit', String(params.limit));
|
||||
if (params?.offset) qs.set('offset', String(params.offset));
|
||||
const path = `/api/reports${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ReportListItem[]>(['reports', params], 'query', path);
|
||||
}
|
||||
|
||||
export function useReport(id: string | undefined) {
|
||||
return useGet<ReportDetail>(
|
||||
['report', id], 'query', `/api/reports/${id}`, !!id
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Data Models
|
||||
|
||||
### Database Schema (Migration 038)
|
||||
|
||||
#### trading_reports
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS trading_reports (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
report_type VARCHAR(20) NOT NULL,
|
||||
period_start DATE NOT NULL,
|
||||
period_end DATE NOT NULL,
|
||||
report_data JSONB NOT NULL,
|
||||
validation_status VARCHAR(20) NOT NULL DEFAULT 'passed',
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT uq_trading_reports_period UNIQUE (report_type, period_start, period_end),
|
||||
CONSTRAINT chk_report_type CHECK (report_type IN ('daily', 'weekly'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_type ON trading_reports(report_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_period ON trading_reports(period_start, period_end);
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_generated ON trading_reports(generated_at DESC);
|
||||
```
|
||||
|
||||
#### Report Summarizer Agent Seed
|
||||
|
||||
```sql
|
||||
INSERT INTO ai_agents (name, slug, purpose, model_provider, model_name, system_prompt, prompt_version, schema_version, temperature, max_tokens, timeout_seconds, max_retries, source)
|
||||
SELECT * FROM (VALUES
|
||||
(
|
||||
'Report Summarizer',
|
||||
'report-summarizer',
|
||||
'Generates concise natural-language summaries of trading performance report sections. Processes chunked data within the 8k-token context window.',
|
||||
'ollama',
|
||||
'qwen3.5:9b-fast',
|
||||
E'You are a concise financial performance analyst. You summarize trading performance data into clear, professional prose.\n\nSTRICT RULES:\n1. Do NOT fabricate any data not present in the input.\n2. Do NOT add opinions, predictions, or recommendations.\n3. Keep each summary under 200 words.\n4. Highlight notable trends, outliers, and changes from prior periods.\n5. Use precise numbers from the input data.\n6. Use a neutral, professional tone.\n7. Return ONLY the summary text. No JSON, no markdown, no commentary.',
|
||||
'report-summarizer-v1',
|
||||
'1.0.0',
|
||||
0.0,
|
||||
1024,
|
||||
60,
|
||||
2,
|
||||
'system'
|
||||
)
|
||||
) AS v(name, slug, purpose, model_provider, model_name, system_prompt, prompt_version, schema_version, temperature, max_tokens, timeout_seconds, max_retries, source)
|
||||
WHERE NOT EXISTS (SELECT 1 FROM ai_agents WHERE slug = 'report-summarizer');
|
||||
```
|
||||
|
||||
### Report JSONB Structure
|
||||
|
||||
The `report_data` column stores a JSON object matching the `ReportData` Pydantic model:
|
||||
|
||||
```json
|
||||
{
|
||||
"pnl": {
|
||||
"realized_pnl": 125.50,
|
||||
"unrealized_pnl": -30.20,
|
||||
"daily_return": 0.012,
|
||||
"cumulative_return": 0.085,
|
||||
"win_count": 8,
|
||||
"loss_count": 3,
|
||||
"win_rate": 0.727,
|
||||
"profit_factor": 2.15,
|
||||
"sharpe_ratio": 1.42,
|
||||
"summary": "AI-generated summary...",
|
||||
"validation_warnings": []
|
||||
},
|
||||
"recommendation_accuracy": {
|
||||
"total_evaluated": 15,
|
||||
"act_count": 8,
|
||||
"skip_count": 7,
|
||||
"acted_win_rate": 0.75,
|
||||
"avg_confidence_acted": 0.72,
|
||||
"avg_confidence_skipped": 0.48,
|
||||
"summary": "AI-generated summary...",
|
||||
"validation_warnings": []
|
||||
},
|
||||
"position_performance": {
|
||||
"positions": [
|
||||
{
|
||||
"ticker": "AAPL",
|
||||
"entry_price": 185.50,
|
||||
"current_or_exit_price": 192.30,
|
||||
"pnl": 68.00,
|
||||
"pnl_pct": 3.66,
|
||||
"hold_duration_hours": 72.5,
|
||||
"status": "open"
|
||||
}
|
||||
],
|
||||
"summary": "AI-generated summary..."
|
||||
},
|
||||
"risk_metrics": {
|
||||
"current_risk_tier": "moderate",
|
||||
"portfolio_heat": 0.12,
|
||||
"max_drawdown": 0.08,
|
||||
"current_drawdown_pct": 0.03,
|
||||
"reserve_pool_balance": 450.00,
|
||||
"circuit_breaker_event_count": 1,
|
||||
"summary": "AI-generated summary..."
|
||||
},
|
||||
"model_quality": {
|
||||
"windows": [
|
||||
{
|
||||
"lookback": "7d",
|
||||
"win_rate": 0.65,
|
||||
"directional_accuracy": 0.62,
|
||||
"information_coefficient": 0.08,
|
||||
"calibration_error": 0.12,
|
||||
"brier_score": 0.22
|
||||
}
|
||||
],
|
||||
"summary": "AI-generated summary...",
|
||||
"validation_warnings": []
|
||||
},
|
||||
"executive_summary": "AI-generated executive summary...",
|
||||
"validation_status": "passed",
|
||||
"generated_at": "2025-01-15T21:30:00Z",
|
||||
"period_start": "2025-01-15",
|
||||
"period_end": "2025-01-15",
|
||||
"report_type": "daily"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Correctness Properties
|
||||
|
||||
*A property is a characteristic or behavior that should hold true across all valid executions of a system — essentially, a formal statement about what the system should do. Properties serve as the bridge between human-readable specifications and machine-verifiable correctness guarantees.*
|
||||
|
||||
The following properties were derived from the acceptance criteria through systematic prework analysis. After reflection, 5 unique properties remain. Report section structure checks (3.1–3.5) are subsumed by the round-trip property — if a ReportData object survives serialization and deserialization, its structure is correct by construction (Pydantic enforces required fields). Validation status computation (4.4) is subsumed by the discrepancy detection property. ISO 8601 datetime formatting (8.4) is verified as part of the round-trip property since Pydantic's JSON serialization uses ISO 8601 by default and the round-trip would fail if datetimes were mangled.
|
||||
|
||||
### Property 1: Chunking Round-Trip and Size Constraint
|
||||
|
||||
*For any* input string, splitting it into chunks with a maximum size limit SHALL produce chunks where (a) every chunk is ≤ the size limit in characters, (b) no chunk is empty (except when the input itself is empty, which produces exactly one empty chunk), and (c) concatenating all chunks in order reconstructs the original input string.
|
||||
|
||||
**Validates: Requirements 2.2**
|
||||
|
||||
### Property 2: Report Serialization Round-Trip
|
||||
|
||||
*For any* valid ReportData object (with valid P&L, recommendation accuracy, position performance, risk metrics, and model quality sections), serializing to JSON and then deserializing back SHALL produce a ReportData object equivalent to the original. All datetime fields in the serialized JSON SHALL be in ISO 8601 format.
|
||||
|
||||
**Validates: Requirements 8.1, 8.2, 8.3, 8.4**
|
||||
|
||||
### Property 3: Validation Discrepancy Detection Correctness
|
||||
|
||||
*For any* pair of computed metric value and snapshot metric value (both finite, non-negative floats), the validation function SHALL produce a warning if and only if the percentage difference exceeds 5%. The percentage difference SHALL be computed as `|computed - snapshot| / snapshot * 100` when snapshot > 0, and SHALL flag any non-zero computed value when snapshot is 0.
|
||||
|
||||
**Validates: Requirements 4.1, 4.2, 4.3, 4.4**
|
||||
|
||||
### Property 4: Recommendation Accuracy Aggregation
|
||||
|
||||
*For any* non-empty list of trading decisions with associated prediction outcomes (each having a boolean `direction_correct`, boolean `profitable`, and float `excess_return_vs_spy`), the computed win rate SHALL equal the count of profitable outcomes divided by total outcomes, the directional accuracy SHALL equal the count of direction-correct outcomes divided by total outcomes, and the average excess return SHALL equal the arithmetic mean of all excess_return_vs_spy values. All three values SHALL be in [0.0, 1.0] for rates and finite for the average.
|
||||
|
||||
**Validates: Requirements 1.4**
|
||||
|
||||
### Property 5: Portfolio Period-Over-Period Delta Computation
|
||||
|
||||
*For any* two valid portfolio snapshots (current and previous) with non-negative portfolio_value, active_pool, reserve_pool, and finite cumulative_return, the period-over-period deltas SHALL equal (current - previous) for each field. When no previous snapshot exists, the deltas SHALL be zero.
|
||||
|
||||
**Validates: Requirements 1.3**
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Data Collection Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| No trading_decisions for period | Generate zero-activity report with note "No trading activity during this period" |
|
||||
| No portfolio_snapshot for period | Use most recent snapshot before period_start; if none exists, use zero values |
|
||||
| No prediction_outcomes for period | Skip recommendation accuracy validation; set validation_warnings noting missing data |
|
||||
| No model_metric_snapshots for period | Model quality section shows NULL values for all metrics |
|
||||
| Database connection failure during collection | Propagate error to job processor for retry |
|
||||
|
||||
### AI Summarization Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| LLM timeout (>60s) | Retry up to max_retries (from agent config, default 2) |
|
||||
| LLM returns empty response | Treat as failure, retry |
|
||||
| LLM returns response > 200 words | Truncate to 200 words at sentence boundary |
|
||||
| All LLM retries exhausted | Fall back to deterministic template summary |
|
||||
| AgentConfigResolver returns None (agent not found) | Log error, use deterministic summary for all sections |
|
||||
| Chunk merge LLM call fails | Use concatenation of chunk summaries (joined with newlines) |
|
||||
|
||||
### Validation Edge Cases
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Snapshot value is 0 and computed value is non-zero | Flag as warning with pct_difference = 100.0 |
|
||||
| Both snapshot and computed values are 0 | No warning (0% difference) |
|
||||
| Snapshot value is NULL | Skip validation for that metric, no warning |
|
||||
| Computed value is NaN or infinity | Replace with 0.0, log warning |
|
||||
| No prediction_outcomes to cross-reference | Skip recommendation accuracy validation entirely |
|
||||
|
||||
### Report Storage Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Unique constraint violation on insert | Use ON CONFLICT DO UPDATE to upsert |
|
||||
| JSONB serialization failure | Log error with report structure, propagate to job processor |
|
||||
| Report exceeds PostgreSQL JSONB size limit (~255 MB) | Extremely unlikely given report structure; log error if it occurs |
|
||||
|
||||
### Job Processing Failures
|
||||
|
||||
| Scenario | Handling |
|
||||
|----------|----------|
|
||||
| Job fails on first attempt | Retry with exponential backoff: 30s, 60s, 120s |
|
||||
| Job fails after 3 retries | Mark job as failed, log error with full context |
|
||||
| Duplicate job submitted for same period | Reject with log message, return without error |
|
||||
| Redis connection failure | Job stays in queue, picked up on reconnection |
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Property-Based Tests (Hypothesis)
|
||||
|
||||
Property-based tests use the Hypothesis library with `@settings(max_examples=100)`. Test files are prefixed `test_pbt_*` per project convention.
|
||||
|
||||
| Property | Test File | What It Tests |
|
||||
|----------|-----------|---------------|
|
||||
| Property 1: Chunking Round-Trip | `tests/test_pbt_report_chunking.py` | `chunk_data()` preserves content and respects size limits |
|
||||
| Property 2: Report Serialization Round-Trip | `tests/test_pbt_report_serialization.py` | `ReportData.model_dump_json()` → `ReportData.model_validate_json()` round-trip |
|
||||
| Property 3: Validation Discrepancy Detection | `tests/test_pbt_report_validation.py` | Discrepancy detection correctly flags >5% differences |
|
||||
| Property 4: Recommendation Accuracy Aggregation | `tests/test_pbt_report_sections.py` | `build_recommendation_accuracy_section()` computes correct aggregates |
|
||||
| Property 5: Portfolio Delta Computation | `tests/test_pbt_report_sections.py` | `build_pnl_section()` computes correct period-over-period deltas |
|
||||
|
||||
Each property test is tagged with a comment referencing the design property:
|
||||
```python
|
||||
# Feature: trading-feedback-engine, Property 1: Chunking round-trip and size constraint
|
||||
```
|
||||
|
||||
### Unit Tests (pytest)
|
||||
|
||||
| Test File | Coverage |
|
||||
|-----------|----------|
|
||||
| `tests/test_report_sections.py` | Section builders with known inputs, edge cases (empty data, single position, zero-activity) |
|
||||
| `tests/test_report_validator.py` | Specific discrepancy scenarios, boundary cases (exactly 5%), NULL snapshot values |
|
||||
| `tests/test_report_summarizer.py` | Deterministic fallback summary, chunk splitting edge cases (empty input, single char) |
|
||||
| `tests/test_report_models.py` | Pydantic model validation, enum constraints, default values |
|
||||
| `tests/test_report_generator.py` | Orchestration with mocked dependencies, zero-activity report, upsert behavior |
|
||||
|
||||
### Integration Tests
|
||||
|
||||
| Test File | Coverage |
|
||||
|-----------|----------|
|
||||
| `tests/test_report_api.py` | API endpoints with seeded database, pagination, filtering by report_type and date range |
|
||||
| `tests/test_report_storage.py` | Store/retrieve round-trip against real asyncpg pool, upsert behavior, unique constraint |
|
||||
|
||||
### Frontend Tests (Vitest)
|
||||
|
||||
| Test File | Coverage |
|
||||
|-----------|----------|
|
||||
| `frontend/src/test/reports.test.ts` | useReports and useReport hooks with MSW mocks, loading/error states |
|
||||
|
||||
### Test Configuration
|
||||
|
||||
- Python PBT: Hypothesis with `@settings(max_examples=100)`, files prefixed `test_pbt_*`
|
||||
- Python unit/integration: pytest with pytest-asyncio for async code
|
||||
- Frontend: Vitest with MSW for deterministic API mocking
|
||||
- Lint: `ruff check services/` before all commits
|
||||
- CI: Woodpecker runs all tests automatically on push to Gitea
|
||||
@@ -0,0 +1,117 @@
|
||||
# Requirements Document
|
||||
|
||||
## Introduction
|
||||
|
||||
The Trading Feedback Engine generates periodic performance reports from the Stonks Oracle trading system. Reports cover trading P&L, recommendation accuracy, position performance, risk metrics, and model quality trends. An AI agent (registered in the `ai_agents` table) summarizes sections of the report by processing data in small chunks that fit within the 8k-token context window. Reports are validated against live data from the prediction outcomes and model metric snapshots tables, stored in the database for retrieval, and exposed via API endpoints.
|
||||
|
||||
## Glossary
|
||||
|
||||
- **Feedback_Engine**: The backend service that orchestrates report generation, data collection, AI summarization, and report storage.
|
||||
- **Report_Summarizer_Agent**: The AI agent registered in the `ai_agents` table that generates natural-language summaries for report sections. Uses the existing `AgentConfigResolver` and `llm_factory` infrastructure.
|
||||
- **Report**: A structured JSON document containing trading performance metrics, AI-generated summaries, and validation data for a specific period (daily or weekly).
|
||||
- **Report_Section**: A self-contained portion of a report (e.g., P&L summary, recommendation accuracy, position performance) that can be independently generated and summarized.
|
||||
- **Chunk**: A subset of data rows small enough to fit within the 8k-token context window when serialized, allowing the Report_Summarizer_Agent to process it in a single LLM call.
|
||||
- **Portfolio_Snapshot**: A daily record in the `portfolio_snapshots` table containing portfolio value, pool balances, returns, win/loss counts, Sharpe ratio, max drawdown, and risk tier.
|
||||
- **Prediction_Outcome**: A record in the `prediction_outcomes` table containing realized returns, direction correctness, and excess returns vs benchmarks for a prediction at a specific horizon.
|
||||
- **Model_Metric_Snapshot**: A record in the `model_metric_snapshots` table containing aggregate model quality metrics (win rate, IC, ECE, Brier score) for a lookback/horizon combination.
|
||||
- **Trading_Decision**: A record in the `trading_decisions` table capturing the act/skip decision, skip reason, position sizing, risk tier, circuit breaker status, and decision trace for a recommendation evaluation.
|
||||
- **Validation_Data**: Live data from `prediction_outcomes`, `model_metric_snapshots`, and `signal_evidence_links` used to cross-check report claims against actual measured performance.
|
||||
- **Query_API**: The existing FastAPI service (`services/api/app.py`) that serves HTTP endpoints for the dashboard and external consumers.
|
||||
|
||||
## Requirements
|
||||
|
||||
### Requirement 1: Report Data Collection
|
||||
|
||||
**User Story:** As a trader, I want the feedback engine to collect all relevant trading data for a reporting period, so that reports reflect the complete picture of trading activity.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN a report generation is triggered for a date range, THE Feedback_Engine SHALL query trading_decisions, orders, positions, portfolio_snapshots, recommendations, prediction_outcomes, and model_metric_snapshots for that period.
|
||||
2. WHEN collecting trading decision data, THE Feedback_Engine SHALL include the decision type, skip reason, ticker, computed position size, risk tier, circuit breaker status, and correlation check result for each Trading_Decision.
|
||||
3. WHEN collecting portfolio data, THE Feedback_Engine SHALL retrieve the most recent Portfolio_Snapshot within the reporting period and compute period-over-period changes in portfolio value, active pool, reserve pool, and cumulative return.
|
||||
4. WHEN collecting recommendation accuracy data, THE Feedback_Engine SHALL join recommendations with Prediction_Outcomes to compute win rate, directional accuracy, and average excess return vs SPY for the period.
|
||||
5. IF no trading_decisions exist for the requested period, THEN THE Feedback_Engine SHALL generate a report with zero-activity sections and a note indicating no trading occurred.
|
||||
|
||||
### Requirement 2: Chunked AI Summarization
|
||||
|
||||
**User Story:** As a trader, I want AI-generated summaries in my reports, so that I can quickly understand performance trends without reading raw numbers.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Report_Summarizer_Agent SHALL be registered in the `ai_agents` table with slug `report-summarizer`, model `qwen3.5:9b-fast`, and source `system`.
|
||||
2. WHEN generating a summary for a Report_Section, THE Feedback_Engine SHALL serialize the section data into Chunks of no more than 6,000 characters each to stay within the 8k-token context window.
|
||||
3. WHEN a Report_Section contains data that exceeds a single Chunk, THE Feedback_Engine SHALL split the data into multiple Chunks, summarize each Chunk independently, and then produce a final merged summary from the individual Chunk summaries.
|
||||
4. WHEN invoking the Report_Summarizer_Agent, THE Feedback_Engine SHALL use the existing `AgentConfigResolver` and `llm_factory` infrastructure to resolve model configuration and build the LLM client.
|
||||
5. WHEN invoking the Report_Summarizer_Agent, THE Feedback_Engine SHALL log each invocation to the `agent_performance_log` table with agent_id, success status, duration_ms, and token estimates.
|
||||
6. IF the Report_Summarizer_Agent fails after max_retries, THEN THE Feedback_Engine SHALL fall back to a deterministic text summary built from the raw metrics and continue report generation.
|
||||
|
||||
### Requirement 3: Report Structure and Content
|
||||
|
||||
**User Story:** As a trader, I want reports to cover P&L, recommendation accuracy, position performance, risk metrics, and model quality, so that I have a comprehensive view of system performance.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Report SHALL contain a P&L section with realized P&L, unrealized P&L, daily return, cumulative return, win count, loss count, win rate, profit factor, and Sharpe ratio for the reporting period.
|
||||
2. THE Report SHALL contain a recommendation accuracy section with total recommendations evaluated, act/skip breakdown, win rate of acted-upon recommendations, and average confidence of acted vs skipped recommendations.
|
||||
3. THE Report SHALL contain a position performance section listing each position held during the period with ticker, entry price, current or exit price, unrealized or realized P&L, P&L percentage, and hold duration.
|
||||
4. THE Report SHALL contain a risk metrics section with current risk tier, portfolio heat, max drawdown, current drawdown percentage, reserve pool balance, and a count of circuit breaker events during the period.
|
||||
5. THE Report SHALL contain a model quality section with the latest Model_Metric_Snapshot values for win rate, directional accuracy, information coefficient, calibration error (ECE), and Brier score across the 7d, 30d, and 90d lookback windows.
|
||||
6. THE Report SHALL contain an AI-generated executive summary that synthesizes the key findings from all sections into a concise narrative of no more than 300 words.
|
||||
|
||||
### Requirement 4: Report Validation Against Live Data
|
||||
|
||||
**User Story:** As a trader, I want report metrics to be cross-checked against live validation data, so that I can trust the accuracy of the reported numbers.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. WHEN generating the recommendation accuracy section, THE Feedback_Engine SHALL cross-reference reported win rates with the `direction_correct` and `profitable` fields from Prediction_Outcomes for the same tickers and period.
|
||||
2. WHEN generating the model quality section, THE Feedback_Engine SHALL compare the reported metrics against the most recent Model_Metric_Snapshot records and flag discrepancies greater than 5% between computed and snapshot values.
|
||||
3. WHEN a validation discrepancy is detected, THE Feedback_Engine SHALL include a `validation_warnings` array in the report section with the field name, computed value, snapshot value, and percentage difference.
|
||||
4. THE Report SHALL include a `validation_status` field set to `passed` when no discrepancies exceed 5%, or `warnings` when one or more discrepancies are detected.
|
||||
|
||||
### Requirement 5: Report Storage and Retrieval
|
||||
|
||||
**User Story:** As a trader, I want reports stored in the database and accessible via API, so that I can review historical performance at any time.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Feedback_Engine SHALL store each generated Report as a row in a `trading_reports` table with columns for id (UUID), report_type (daily/weekly), period_start (DATE), period_end (DATE), report_data (JSONB), validation_status (VARCHAR), generated_at (TIMESTAMPTZ), and created_at (TIMESTAMPTZ).
|
||||
2. THE Feedback_Engine SHALL enforce a unique constraint on (report_type, period_start, period_end) to prevent duplicate reports for the same period.
|
||||
3. WHEN a report for an existing period is regenerated, THE Feedback_Engine SHALL update the existing row with the new report_data, validation_status, and generated_at timestamp.
|
||||
4. THE Query_API SHALL expose a `GET /api/reports` endpoint that returns a paginated list of reports with id, report_type, period_start, period_end, validation_status, and generated_at.
|
||||
5. THE Query_API SHALL expose a `GET /api/reports/{report_id}` endpoint that returns the full report including report_data JSONB.
|
||||
6. THE Query_API SHALL support filtering reports by report_type and date range via query parameters on the `GET /api/reports` endpoint.
|
||||
|
||||
### Requirement 6: Periodic Report Generation
|
||||
|
||||
**User Story:** As a trader, I want reports generated automatically on a daily and weekly schedule, so that I always have up-to-date performance feedback.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Feedback_Engine SHALL generate a daily report after market close (after 16:30 ET) covering the current trading day.
|
||||
2. THE Feedback_Engine SHALL generate a weekly report on Saturday covering the Monday-through-Friday trading week.
|
||||
3. WHEN a scheduled report generation is triggered, THE Feedback_Engine SHALL enqueue a report generation job on a Redis queue for asynchronous processing.
|
||||
4. IF a report generation job fails, THEN THE Feedback_Engine SHALL retry the job up to 3 times with exponential backoff before marking the job as failed.
|
||||
5. WHILE a report generation job is in progress for a given period, THE Feedback_Engine SHALL reject duplicate job submissions for the same report_type and period.
|
||||
|
||||
### Requirement 7: Agent Registration and Editability
|
||||
|
||||
**User Story:** As a trader, I want the report summarizer agent registered in the ai_agents table, so that I can edit its prompts, model, and parameters through the existing agent management API.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Feedback_Engine SHALL register the Report_Summarizer_Agent in the `ai_agents` table via a database migration with slug `report-summarizer`, source `system`, model_provider `ollama`, and model_name `qwen3.5:9b-fast`.
|
||||
2. THE Report_Summarizer_Agent system prompt SHALL instruct the model to produce concise financial performance summaries, avoid fabricating data not present in the input, and keep each summary under 200 words.
|
||||
3. THE Report_Summarizer_Agent SHALL support variant creation and activation through the existing agent variants system, allowing A/B testing of different summarization prompts.
|
||||
4. WHEN the Report_Summarizer_Agent configuration is updated via the agent management API, THE Feedback_Engine SHALL pick up the new configuration within 60 seconds via the `AgentConfigResolver` TTL cache.
|
||||
|
||||
### Requirement 8: Report Serialization Round-Trip
|
||||
|
||||
**User Story:** As a developer, I want report data to survive serialization and deserialization without data loss, so that stored reports are always faithful to the generated content.
|
||||
|
||||
#### Acceptance Criteria
|
||||
|
||||
1. THE Feedback_Engine SHALL serialize Report objects to JSON for storage in the `report_data` JSONB column.
|
||||
2. THE Feedback_Engine SHALL deserialize stored JSON back into Report objects for API responses.
|
||||
3. FOR ALL valid Report objects, serializing to JSON then deserializing back SHALL produce an equivalent Report object (round-trip property).
|
||||
4. THE Feedback_Engine SHALL use ISO 8601 format for all datetime fields in serialized reports.
|
||||
@@ -0,0 +1,195 @@
|
||||
# Implementation Plan: Trading Feedback Engine
|
||||
|
||||
## Overview
|
||||
|
||||
Add a periodic trading performance reporting system to Stonks Oracle. The system collects trading data, generates structured JSON reports with AI-powered summaries, validates metrics against live data, and stores reports for retrieval via API. Implementation follows the four-phase approach from the design: foundation → validation & AI → generator & API → scheduling & tests.
|
||||
|
||||
## Tasks
|
||||
|
||||
- [x] 1. Database migration 038 — trading_reports table and report-summarizer agent
|
||||
- [x] 1.1 Create `infra/migrations/038_trading_reports.sql`
|
||||
- Create `trading_reports` table with columns: id (UUID PK, gen_random_uuid()), report_type (VARCHAR(20) NOT NULL), period_start (DATE NOT NULL), period_end (DATE NOT NULL), report_data (JSONB NOT NULL), validation_status (VARCHAR(20) NOT NULL DEFAULT 'passed'), generated_at (TIMESTAMPTZ NOT NULL), created_at (TIMESTAMPTZ NOT NULL DEFAULT NOW())
|
||||
- Add UNIQUE constraint on (report_type, period_start, period_end)
|
||||
- Add CHECK constraint: report_type IN ('daily', 'weekly')
|
||||
- Create indexes: idx_trading_reports_type, idx_trading_reports_period, idx_trading_reports_generated
|
||||
- Seed Report_Summarizer_Agent into ai_agents table with slug 'report-summarizer', model_provider 'ollama', model_name 'qwen3.5:9b-fast', source 'system', temperature 0.0, max_tokens 1024, timeout_seconds 60, max_retries 2
|
||||
- Use WHERE NOT EXISTS guard on agent insert to be idempotent
|
||||
- _Requirements: 5.1, 5.2, 7.1, 7.2_
|
||||
|
||||
- [x] 1.2 Add `QUEUE_REPORT_GENERATION` constant to `services/shared/redis_keys.py`
|
||||
- Add `QUEUE_REPORT_GENERATION = "report_generation"` following existing queue naming convention
|
||||
- _Requirements: 6.3_
|
||||
|
||||
- [x] 2. Phase 1 — Report models, data collector, and section builders
|
||||
- [x] 2.1 Create report models (`services/reporting/models.py`)
|
||||
- Create `services/reporting/__init__.py`
|
||||
- Define enums: ReportType (daily, weekly), ValidationStatus (passed, warnings)
|
||||
- Define Pydantic models: ValidationWarning, PLSection, RecommendationAccuracySection, PositionDetail, PositionPerformanceSection, RiskMetricsSection, ModelQualityWindow, ModelQualitySection, ReportData
|
||||
- ReportData includes all sections, executive_summary, validation_status, generated_at, period_start, period_end, report_type
|
||||
- _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 8.1, 8.2, 8.4_
|
||||
|
||||
- [x] 2.2 Implement data collector (`services/reporting/collector.py`)
|
||||
- Define CollectedData dataclass with fields: trading_decisions, orders, open_positions, closed_positions, portfolio_snapshot, previous_portfolio_snapshot, recommendations, prediction_outcomes, model_metric_snapshots, circuit_breaker_events, reserve_pool_balance
|
||||
- Implement `collect_report_data(pool, period_start, period_end)` → CollectedData
|
||||
- Query trading_decisions, orders, positions (open + closed), portfolio_snapshots (current + previous), recommendations, prediction_outcomes, model_metric_snapshots, circuit_breaker_events, reserve_pool_ledger for the period
|
||||
- Return empty lists for tables with no data (zero-activity case)
|
||||
- Use `_row_dict()` pattern for UUID conversion from asyncpg rows
|
||||
- _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5_
|
||||
|
||||
- [x] 2.3 Implement section builders (`services/reporting/sections.py`)
|
||||
- Implement `build_pnl_section(data: CollectedData) -> PLSection` — compute realized/unrealized P&L, daily return, cumulative return, win/loss counts, win rate, profit factor, Sharpe ratio from portfolio_snapshot and closed positions
|
||||
- Implement `build_recommendation_accuracy_section(data: CollectedData) -> RecommendationAccuracySection` — join trading_decisions with prediction_outcomes, compute act/skip breakdown, win rate of acted, avg confidence acted vs skipped
|
||||
- Implement `build_position_performance_section(data: CollectedData) -> PositionPerformanceSection` — list each position with ticker, entry price, current/exit price, P&L, P&L%, hold duration
|
||||
- Implement `build_risk_metrics_section(data: CollectedData) -> RiskMetricsSection` — extract risk tier, portfolio heat, max drawdown, current drawdown %, reserve pool balance, circuit breaker event count
|
||||
- Implement `build_model_quality_section(data: CollectedData) -> ModelQualitySection` — extract model_metric_snapshot values for 7d, 30d, 90d lookback windows
|
||||
- Handle zero-activity gracefully (zero values, empty lists)
|
||||
- _Requirements: 1.3, 1.4, 3.1, 3.2, 3.3, 3.4, 3.5_
|
||||
|
||||
- [x] 3. Checkpoint — Verify foundation modules
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
- Run `.venv/bin/ruff check services/reporting/`
|
||||
- Run `.venv/bin/python -m pytest tests/ -x --tb=short -q -k "report"` to verify models and section builders
|
||||
|
||||
- [x] 4. Phase 2 — Report validator and AI summarizer
|
||||
- [x] 4.1 Implement report validator (`services/reporting/validator.py`)
|
||||
- Define `DISCREPANCY_THRESHOLD_PCT = 5.0`
|
||||
- Implement `validate_recommendation_accuracy(section, prediction_outcomes)` → list[ValidationWarning] — compare computed win rate against direction_correct/profitable from prediction_outcomes, flag >5% discrepancies
|
||||
- Implement `validate_model_quality(section, metric_snapshots)` → list[ValidationWarning] — compare reported metrics against model_metric_snapshots for win_rate, directional_accuracy, IC, ECE, Brier score, flag >5% discrepancies
|
||||
- Implement `compute_validation_status(report: ReportData)` → ValidationStatus — return 'passed' if no warnings, 'warnings' if any section has validation_warnings
|
||||
- Handle edge cases: snapshot=0 with computed≠0 → 100% difference; both=0 → no warning; snapshot=NULL → skip; computed=NaN → replace with 0.0
|
||||
- _Requirements: 4.1, 4.2, 4.3, 4.4_
|
||||
|
||||
- [x] 4.2 Implement AI summarizer (`services/reporting/summarizer.py`)
|
||||
- Define constants: CHUNK_SIZE_LIMIT=6000, MAX_SUMMARY_WORDS=200, MAX_EXECUTIVE_SUMMARY_WORDS=300
|
||||
- Implement `chunk_data(serialized: str, max_chars: int)` → list[str] — split on newline boundaries, each chunk ≤ max_chars, at least one chunk returned
|
||||
- Implement `summarize_section(pool, resolver, section_name, section_data)` → str — serialize, chunk if needed, summarize each chunk via Report_Summarizer_Agent (resolved by slug 'report-summarizer'), merge if multiple chunks, log to agent_performance_log, fall back to deterministic on failure
|
||||
- Implement `build_deterministic_summary(section_name, section_data)` → str — template-based fallback summary from raw metrics
|
||||
- Implement `generate_executive_summary(pool, resolver, section_summaries)` → str — concatenate section summaries, chunk if needed, produce ≤300-word synthesis, fall back to concatenation on failure
|
||||
- Use AgentConfigResolver + llm_factory for LLM access
|
||||
- Log each invocation to agent_performance_log with agent_id, success, duration_ms, token estimates
|
||||
- _Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.6_
|
||||
|
||||
- [x] 5. Checkpoint — Verify validator and summarizer
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
- Run `.venv/bin/ruff check services/reporting/`
|
||||
- Run `.venv/bin/python -m pytest tests/ -x --tb=short -q -k "report"` to verify validator and summarizer
|
||||
|
||||
- [x] 6. Phase 3 — Report generator orchestrator and API endpoints
|
||||
- [x] 6.1 Implement report generator (`services/reporting/generator.py`)
|
||||
- Implement `generate_report(pool, report_type, period_start, period_end)` → ReportData — orchestrate: collect data → build sections → validate → summarize → assemble ReportData
|
||||
- Implement `store_report(pool, report)` → str (UUID) — INSERT ... ON CONFLICT (report_type, period_start, period_end) DO UPDATE for upsert, return report id
|
||||
- Implement `process_report_job(pool, job: dict)` → None — deserialize job payload, call generate_report + store_report, handle retries with exponential backoff (30s, 60s, 120s up to 3 attempts), reject duplicate jobs for same report_type + period
|
||||
- _Requirements: 5.1, 5.2, 5.3, 6.3, 6.4, 6.5_
|
||||
|
||||
- [x] 6.2 Add API endpoints to `services/api/app.py`
|
||||
- Add `GET /api/reports` — paginated list with query params: report_type, start_date, end_date, limit (default 20), offset (default 0); returns id, report_type, period_start, period_end, validation_status, generated_at
|
||||
- Add `GET /api/reports/{report_id}` — full report including report_data JSONB
|
||||
- Use asyncpg pool from existing app state
|
||||
- Return 404 for non-existent report_id
|
||||
- _Requirements: 5.4, 5.5, 5.6_
|
||||
|
||||
- [x] 6.3 Add frontend hooks to `frontend/src/api/hooks.ts`
|
||||
- Add `ReportListItem` and `ReportDetail` TypeScript interfaces
|
||||
- Implement `useReports(params?)` hook — builds query string from report_type, start_date, end_date, limit, offset; uses `useGet` with 'query' base
|
||||
- Implement `useReport(id)` hook — fetches single report by id, enabled only when id is defined
|
||||
- _Requirements: 5.4, 5.5_
|
||||
|
||||
- [x] 7. Checkpoint — Verify generator and API
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
- Run `.venv/bin/ruff check services/`
|
||||
- Run `.venv/bin/python -m pytest tests/ -x --tb=short -q -k "report"` to verify generator and API endpoints
|
||||
|
||||
- [x] 8. Phase 4 — Scheduling, property-based tests, unit tests, and frontend tests
|
||||
- [x] 8.1 Wire Redis queue integration and scheduler
|
||||
- Add report generation job consumer to the scheduler service that listens on `stonks:queue:report_generation`
|
||||
- Add daily report trigger (after 16:30 ET on trading days) and weekly report trigger (Saturday) to the scheduler
|
||||
- Job payload: `{"report_type": "daily"|"weekly", "period_start": "YYYY-MM-DD", "period_end": "YYYY-MM-DD"}`
|
||||
- _Requirements: 6.1, 6.2, 6.3, 6.4, 6.5_
|
||||
|
||||
- [x] 8.2 Write property test: Chunking Round-Trip and Size Constraint
|
||||
- **Property 1: Chunking Round-Trip and Size Constraint**
|
||||
- File: `tests/test_pbt_report_chunking.py`
|
||||
- Use Hypothesis `@settings(max_examples=100)` with `@given(st.text())` and `@given(st.integers(min_value=1, max_value=10000))`
|
||||
- Assert: every chunk ≤ max_chars, no empty chunks (except empty input → one empty chunk), concatenation of chunks == original input
|
||||
- **Validates: Requirements 2.2**
|
||||
|
||||
- [x] 8.3 Write property test: Report Serialization Round-Trip
|
||||
- **Property 2: Report Serialization Round-Trip**
|
||||
- File: `tests/test_pbt_report_serialization.py`
|
||||
- Use Hypothesis with custom strategies for ReportData (valid PLSection, RecommendationAccuracySection, etc.)
|
||||
- Assert: `ReportData.model_validate_json(report.model_dump_json())` == original report
|
||||
- Assert: all datetime fields in serialized JSON are ISO 8601 format
|
||||
- **Validates: Requirements 8.1, 8.2, 8.3, 8.4**
|
||||
|
||||
- [x] 8.4 Write property test: Validation Discrepancy Detection Correctness
|
||||
- **Property 3: Validation Discrepancy Detection Correctness**
|
||||
- File: `tests/test_pbt_report_validation.py`
|
||||
- Use Hypothesis with `@given(st.floats(min_value=0, max_value=1e6), st.floats(min_value=0, max_value=1e6))`
|
||||
- Assert: warning iff |computed - snapshot| / snapshot * 100 > 5% (when snapshot > 0); flag any non-zero computed when snapshot == 0; no warning when both == 0
|
||||
- **Validates: Requirements 4.1, 4.2, 4.3, 4.4**
|
||||
|
||||
- [x] 8.5 Write property test: Recommendation Accuracy Aggregation
|
||||
- **Property 4: Recommendation Accuracy Aggregation**
|
||||
- File: `tests/test_pbt_report_sections.py`
|
||||
- Use Hypothesis with lists of trading decisions + prediction outcomes (direction_correct bool, profitable bool, excess_return_vs_spy float)
|
||||
- Assert: win_rate == count(profitable) / total, directional_accuracy == count(direction_correct) / total, avg excess return == mean(excess_return_vs_spy), all rates in [0.0, 1.0]
|
||||
- **Validates: Requirements 1.4**
|
||||
|
||||
- [x] 8.6 Write property test: Portfolio Period-Over-Period Delta Computation
|
||||
- **Property 5: Portfolio Period-Over-Period Delta Computation**
|
||||
- File: `tests/test_pbt_report_sections.py`
|
||||
- Use Hypothesis with two portfolio snapshots (non-negative portfolio_value, active_pool, reserve_pool, finite cumulative_return)
|
||||
- Assert: deltas == (current - previous) for each field; when no previous snapshot, deltas == 0
|
||||
- **Validates: Requirements 1.3**
|
||||
|
||||
- [x] 8.7 Write unit tests for section builders
|
||||
- File: `tests/test_report_sections.py`
|
||||
- Test each section builder with known inputs and expected outputs
|
||||
- Test edge cases: empty data (zero-activity), single position, no portfolio snapshot
|
||||
- _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5_
|
||||
|
||||
- [x] 8.8 Write unit tests for report validator
|
||||
- File: `tests/test_report_validator.py`
|
||||
- Test specific discrepancy scenarios: exactly 5% (no warning), 5.1% (warning), snapshot=0 computed≠0, both=0, NULL snapshot
|
||||
- _Requirements: 4.1, 4.2, 4.3, 4.4_
|
||||
|
||||
- [x] 8.9 Write unit tests for AI summarizer
|
||||
- File: `tests/test_report_summarizer.py`
|
||||
- Test deterministic fallback summary generation
|
||||
- Test chunk_data edge cases: empty input, single character, exactly at limit, one char over limit
|
||||
- _Requirements: 2.2, 2.6_
|
||||
|
||||
- [x] 8.10 Write unit tests for report generator
|
||||
- File: `tests/test_report_generator.py`
|
||||
- Test orchestration with mocked dependencies (collector, sections, validator, summarizer)
|
||||
- Test zero-activity report generation
|
||||
- Test upsert behavior (regeneration of existing report)
|
||||
- _Requirements: 5.1, 5.2, 5.3_
|
||||
|
||||
- [x] 8.11 Write API integration tests
|
||||
- File: `tests/test_report_api.py`
|
||||
- Test GET /api/reports with pagination, filtering by report_type and date range
|
||||
- Test GET /api/reports/{report_id} with valid and invalid IDs
|
||||
- _Requirements: 5.4, 5.5, 5.6_
|
||||
|
||||
- [x] 8.12 Write frontend hook tests
|
||||
- File: `frontend/src/test/reports.test.ts`
|
||||
- Test useReports and useReport hooks with MSW mocks
|
||||
- Test loading and error states
|
||||
- _Requirements: 5.4, 5.5_
|
||||
|
||||
- [x] 9. Final checkpoint — Full test suite and lint
|
||||
- Ensure all tests pass, ask the user if questions arise.
|
||||
- Run `.venv/bin/ruff check services/`
|
||||
- Run `.venv/bin/python -m pytest tests/ -x --tb=short -q -k "report"`
|
||||
- Run frontend tests: `cd frontend && npx vitest --run`
|
||||
|
||||
## Notes
|
||||
|
||||
- Tasks marked with `*` are optional and can be skipped for faster MVP
|
||||
- Each task references specific requirements for traceability
|
||||
- Checkpoints ensure incremental validation after each phase
|
||||
- Property tests validate the 5 universal correctness properties from the design document
|
||||
- Unit tests validate specific examples and edge cases
|
||||
- The design document contains full interface signatures — use those as the implementation guide
|
||||
- Always run `.venv/bin/ruff check services/` before committing Python changes
|
||||
@@ -93,8 +93,35 @@ Ingestion jobs MUST include `source_id`, `source_type`, `ticker`, `company_id`,
|
||||
- The `competitor_relationships` table uses UUID company IDs — queries must join through `companies` to match by ticker
|
||||
- The dashboard Docker build uses TypeScript strict mode — unused imports that pass local diagnostics will fail in CI
|
||||
- Ingestion jobs require `source_id` from the `sources` table — don't just pass `ticker`
|
||||
- **Bash `!` in passwords/strings**: Bash interprets `!` inside double quotes as history expansion. NEVER use double quotes around strings containing `!`. Use single quotes instead: `'St0nks0racl3!'`. For kubectl exec with psql, use: `kubectl exec ... -- psql -U postgres -c "ALTER USER x WITH PASSWORD '"'"'password!'"'"';"` (single-quote escaping trick)
|
||||
|
||||
## No Premature Simplification
|
||||
Do NOT "simplify" code on impulse. When the urge arises to simplify a section, STOP and do this instead:
|
||||
|
||||
1. **Evaluate the section**: Read the full function/module, not just the part that looks complex.
|
||||
2. **Map the dependencies**: Identify every caller, every consumer, every downstream component that depends on this code's behavior, return shape, or side effects.
|
||||
3. **Assess blast radius**: Would changing this function break other implementations? Check imports, tests, API contracts, database queries, and frontend expectations.
|
||||
4. **Respect intentional complexity**: If the code is complex because the domain is complex (financial math, multi-layer signal aggregation, Bayesian shrinkage), the complexity is load-bearing. Simplifying it will introduce bugs.
|
||||
5. **Only simplify when**: The complexity is accidental (dead code, redundant branches, copy-paste artifacts) AND you have confirmed no downstream dependencies break.
|
||||
|
||||
This codebase has interconnected layers (ingestion → extraction → aggregation → recommendation → trading → validation). A "simple" change to a scoring function can cascade through trend summaries, recommendations, snapshot capture, and outcome evaluation. Always trace the full path before refactoring.
|
||||
|
||||
## Documentation
|
||||
- Do NOT create large summary/success markdown files after each step
|
||||
- Keep notes short, concise, and organized under `docs/notes/`
|
||||
- If a note isn't useful for future reference, don't write it
|
||||
|
||||
## Documentation Maintenance on Feature Changes
|
||||
When implementing a feature or fix that introduces an impactful change, update the relevant documentation as part of the same commit or task. "Impactful" means any change that affects how someone installs, deploys, configures, operates, or understands the system. Specifically:
|
||||
|
||||
- **New database migrations**: Update `docs/architecture-data-pipeline.md` or `docs/api-reference.md` if new tables, views, or endpoints are added. Update `project-context.md` steering file with the new migration number.
|
||||
- **New API endpoints**: Update `docs/api-reference.md` with the endpoint path, method, parameters, and response shape.
|
||||
- **New services or service changes**: Update `docs/architecture-docker-compose.md` and `docs/docker-deployment.md` if a new service is added or an existing service's configuration changes.
|
||||
- **Helm chart changes**: Update `docs/helm-reference.md` if new values, services, or config options are added.
|
||||
- **New environment variables or secrets**: Update `docs/LOCAL_DEV_SETUP.md` and the project-context steering file.
|
||||
- **Install/deploy script changes**: Update `deploy-docker.sh`, `docs/docker-deployment.md`, or the relevant runme scripts if the deploy process changes.
|
||||
- **Frontend route or page additions**: Update `docs/api-reference.md` (if it covers UI routes) and ensure the nav item is documented.
|
||||
- **README.md**: Update the top-level `README.md` when a major new capability is added (new signal layer, new dashboard section, new trading feature).
|
||||
- **Steering files**: Update `.kiro/steering/project-context.md` when migration numbers advance, new services are added, or key conventions change.
|
||||
|
||||
The goal is that someone reading the docs can always understand the current state of the system without reading the source code. When in doubt, update the doc.
|
||||
|
||||
@@ -81,13 +81,14 @@ When a full reset is needed:
|
||||
## Database Migrations
|
||||
- Located in `infra/migrations/001_*.sql` through `030_*.sql`
|
||||
- Applied automatically by `runmefirst.sh` in sorted order
|
||||
- Next migration number: **031**
|
||||
- Next migration number: **038**
|
||||
- Key migrations:
|
||||
- 016: Global news interpolation (global_events, macro_impact_records, exposure_profiles, trend_projections)
|
||||
- 017: Competitive intelligence (competitor_relationships, competitive_signal_records)
|
||||
- 024: Trend history time-series table
|
||||
- 026: AI agents management (ai_agents, agent_performance_log)
|
||||
- 027: Agent variants (agent_variants table for A/B testing)
|
||||
- 035: Model validation (prediction_snapshots, prediction_outcomes, signal_evidence_links, model_metric_snapshots, v_prediction_performance, v_source_performance)
|
||||
|
||||
## Key Conventions
|
||||
- All services use `services/shared/config.py` for configuration via env vars
|
||||
|
||||
+12
-3
@@ -63,8 +63,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -99,8 +102,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.ingestion.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.ingestion.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -135,8 +141,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.parser.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.parser.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
|
||||
+55
-4
@@ -28,8 +28,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.extractor.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.extractor.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -64,8 +67,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.aggregation.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.aggregation.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -100,8 +106,50 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.recommendation.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.recommendation.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
requests:
|
||||
memory: 1Gi
|
||||
cpu: 1000m
|
||||
limits:
|
||||
memory: 2Gi
|
||||
cpu: 4000m
|
||||
depends_on: []
|
||||
build-signal-engine:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
privileged: true
|
||||
settings:
|
||||
repo: registry.celestium.life/stonks-oracle/signal-engine
|
||||
registry: registry.celestium.life
|
||||
custom_dns: 192.168.42.1
|
||||
buildx_image: registry.celestium.life/dockerhub-cache/moby/buildkit:buildx-stable-1
|
||||
add_host: registry.celestium.life:10.1.1.12
|
||||
buildx_flags: --driver-opt network=host
|
||||
buildkitd_config: "[registry.\"docker.io\"]\n mirrors = [\"registry.celestium.life/v2/dockerhub-cache\"]\n[registry.\"ghcr.io\"]\n mirrors = [\"registry.celestium.life/v2/ghcr-cache\"]\n"
|
||||
http_proxy: ""
|
||||
https_proxy: ""
|
||||
no_proxy: ""
|
||||
logins:
|
||||
- registry: https://registry.celestium.life
|
||||
username:
|
||||
from_secret: harbor_username
|
||||
password:
|
||||
from_secret: harbor_password
|
||||
tags:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.signal_engine.main
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -136,8 +184,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=uvicorn services.risk.app:app --host 0.0.0.0 --port 8000
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=uvicorn services.risk.app:app --host 0.0.0.0 --port 8000
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
|
||||
+16
-4
@@ -28,8 +28,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.adapters.broker_adapter
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.adapters.broker_adapter
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -64,8 +67,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=python -m services.lake_publisher.worker
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=python -m services.lake_publisher.worker
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -100,8 +106,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=uvicorn services.api.app:app --host 0.0.0.0 --port 8000
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=uvicorn services.api.app:app --host 0.0.0.0 --port 8000
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
@@ -136,8 +145,11 @@ steps:
|
||||
- ${CI_COMMIT_SHA}
|
||||
- latest
|
||||
dockerfile: docker/Dockerfile
|
||||
no_cache: true
|
||||
context: .
|
||||
build_args: SERVICE_CMD=uvicorn services.trading.app:app --host 0.0.0.0 --port 8000
|
||||
build_args:
|
||||
- CACHE_BUST=${CI_COMMIT_SHA}
|
||||
- SERVICE_CMD=uvicorn services.trading.app:app --host 0.0.0.0 --port 8000
|
||||
backend_options:
|
||||
kubernetes:
|
||||
resources:
|
||||
|
||||
Executable
+518
@@ -0,0 +1,518 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# deploy-docker.sh — Deploy Stonks Oracle to a Docker host via SSH
|
||||
#
|
||||
# Usage: bash deploy-docker.sh [OPTIONS]
|
||||
#
|
||||
# Options:
|
||||
# --host USER@HOST SSH target (default: celes@192.168.42.254)
|
||||
# --ollama-url URL Ollama API URL (default: auto-detect or install)
|
||||
# --ollama-model MODEL Ollama model name (default: qwen3.5:9b-fast)
|
||||
# --dir PATH Remote install directory (default: ~/stonks-oracle)
|
||||
#
|
||||
# Examples:
|
||||
# bash deploy-docker.sh
|
||||
# bash deploy-docker.sh --ollama-url http://10.1.1.12:2701 --ollama-model qwen3.6
|
||||
# bash deploy-docker.sh --host user@myserver --dir /opt/stonks
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Configuration (override via flags or environment)
|
||||
# -------------------------------------------------------
|
||||
REMOTE_HOST="${DEPLOY_HOST:-celes@192.168.42.254}"
|
||||
REMOTE_DIR="${DEPLOY_DIR:-/home/celes/stonks-oracle}"
|
||||
OLLAMA_URL="${DEPLOY_OLLAMA_URL:-}"
|
||||
OLLAMA_MODEL="${DEPLOY_OLLAMA_MODEL:-qwen3.5:9b-fast}"
|
||||
REPO_URL="http://admin:St0nks0racl3!@10.1.1.12:30300/admin/stonks-oracle.git"
|
||||
|
||||
# Parse command-line flags
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--host) REMOTE_HOST="$2"; shift 2 ;;
|
||||
--ollama-url) OLLAMA_URL="$2"; shift 2 ;;
|
||||
--ollama-model) OLLAMA_MODEL="$2"; shift 2 ;;
|
||||
--dir) REMOTE_DIR="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "=== Stonks Oracle Docker Deployment ==="
|
||||
echo " Target: ${REMOTE_HOST}:${REMOTE_DIR}"
|
||||
echo " Model: ${OLLAMA_MODEL}"
|
||||
echo " Ollama: Docker container (GPU-accelerated)"
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 0: Ensure prerequisites (multi-distro support)
|
||||
# -------------------------------------------------------
|
||||
echo "--- Step 0: Checking prerequisites ---"
|
||||
ssh "$REMOTE_HOST" bash -s <<'REMOTE_SCRIPT'
|
||||
set -euo pipefail
|
||||
|
||||
# --- Detect OS and package manager ---
|
||||
detect_os() {
|
||||
if [ -f /etc/os-release ]; then
|
||||
. /etc/os-release
|
||||
OS_ID="${ID:-unknown}"
|
||||
OS_LIKE="${ID_LIKE:-$OS_ID}"
|
||||
elif [ -f /etc/redhat-release ]; then
|
||||
OS_ID="rhel"
|
||||
OS_LIKE="rhel"
|
||||
else
|
||||
OS_ID="unknown"
|
||||
OS_LIKE="unknown"
|
||||
fi
|
||||
|
||||
# Detect WSL
|
||||
IS_WSL=false
|
||||
if grep -qi microsoft /proc/version 2>/dev/null; then
|
||||
IS_WSL=true
|
||||
fi
|
||||
|
||||
# Determine package manager
|
||||
if command -v apt-get &>/dev/null; then
|
||||
PKG_MGR="apt"
|
||||
elif command -v dnf &>/dev/null; then
|
||||
PKG_MGR="dnf"
|
||||
elif command -v yum &>/dev/null; then
|
||||
PKG_MGR="yum"
|
||||
elif command -v pacman &>/dev/null; then
|
||||
PKG_MGR="pacman"
|
||||
elif command -v zypper &>/dev/null; then
|
||||
PKG_MGR="zypper"
|
||||
else
|
||||
PKG_MGR="unknown"
|
||||
fi
|
||||
|
||||
echo " Detected: OS=$OS_ID (like=$OS_LIKE), pkg=$PKG_MGR, WSL=$IS_WSL"
|
||||
}
|
||||
|
||||
install_pkg() {
|
||||
local pkg="$1"
|
||||
case "$PKG_MGR" in
|
||||
apt) sudo apt-get install -y "$pkg" ;;
|
||||
dnf) sudo dnf -y install "$pkg" ;;
|
||||
yum) sudo yum -y install "$pkg" ;;
|
||||
pacman) sudo pacman -S --noconfirm "$pkg" ;;
|
||||
zypper) sudo zypper install -y "$pkg" ;;
|
||||
*) echo " ERROR: Unknown package manager"; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
update_pkg_cache() {
|
||||
case "$PKG_MGR" in
|
||||
apt) sudo apt-get update -qq ;;
|
||||
dnf|yum) ;; # dnf/yum auto-refresh
|
||||
pacman) sudo pacman -Sy ;;
|
||||
zypper) sudo zypper refresh -q ;;
|
||||
esac
|
||||
}
|
||||
|
||||
detect_os
|
||||
|
||||
# --- Git ---
|
||||
if ! command -v git &>/dev/null; then
|
||||
echo " Installing git..."
|
||||
update_pkg_cache
|
||||
install_pkg git
|
||||
echo " ✓ Git installed"
|
||||
else
|
||||
echo " ✓ Git present"
|
||||
fi
|
||||
|
||||
# --- Docker Engine ---
|
||||
if command -v docker &>/dev/null && docker info &>/dev/null; then
|
||||
echo " ✓ Docker already installed ($(docker --version | cut -d' ' -f3 | tr -d ','))"
|
||||
else
|
||||
echo " Installing Docker CE..."
|
||||
case "$PKG_MGR" in
|
||||
apt)
|
||||
# Debian/Ubuntu/WSL
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y ca-certificates curl gnupg
|
||||
sudo install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://download.docker.com/linux/${OS_ID}/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg 2>/dev/null
|
||||
sudo chmod a+r /etc/apt/keyrings/docker.gpg
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/${OS_ID} $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
|
||||
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
;;
|
||||
dnf|yum)
|
||||
# RHEL/Rocky/Fedora/CentOS
|
||||
sudo "$PKG_MGR" -y install dnf-plugins-core 2>/dev/null || true
|
||||
local repo_distro="rhel"
|
||||
if [[ "$OS_ID" == "fedora" ]]; then repo_distro="fedora"; fi
|
||||
sudo dnf config-manager --add-repo "https://download.docker.com/linux/${repo_distro}/docker-ce.repo" 2>/dev/null || \
|
||||
sudo yum-config-manager --add-repo "https://download.docker.com/linux/${repo_distro}/docker-ce.repo" 2>/dev/null
|
||||
sudo "$PKG_MGR" -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
;;
|
||||
pacman)
|
||||
# Arch Linux
|
||||
sudo pacman -S --noconfirm docker docker-compose docker-buildx
|
||||
;;
|
||||
zypper)
|
||||
# openSUSE
|
||||
sudo zypper install -y docker docker-compose docker-buildx
|
||||
;;
|
||||
esac
|
||||
sudo systemctl enable --now docker 2>/dev/null || true
|
||||
sudo usermod -aG docker "$(whoami)" 2>/dev/null || true
|
||||
echo " ✓ Docker installed and started"
|
||||
fi
|
||||
|
||||
# --- Docker Compose plugin ---
|
||||
if docker compose version &>/dev/null; then
|
||||
echo " ✓ Docker Compose plugin available ($(docker compose version --short))"
|
||||
else
|
||||
echo " ERROR: docker compose plugin not found after Docker install"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- NVIDIA Driver (skip on WSL — uses host driver) ---
|
||||
if [ "$IS_WSL" = "true" ]; then
|
||||
echo " ✓ WSL detected — using host Windows NVIDIA driver"
|
||||
elif ! command -v nvidia-smi &>/dev/null; then
|
||||
echo " Installing NVIDIA drivers..."
|
||||
case "$PKG_MGR" in
|
||||
apt)
|
||||
sudo apt-get install -y nvidia-driver-560 2>/dev/null || \
|
||||
sudo apt-get install -y nvidia-driver 2>/dev/null || \
|
||||
echo " ⚠ NVIDIA driver install failed — install manually"
|
||||
;;
|
||||
dnf|yum)
|
||||
sudo dnf -y install epel-release 2>/dev/null || true
|
||||
sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo 2>/dev/null || true
|
||||
sudo dnf -y module install nvidia-driver:latest-dkms 2>/dev/null || \
|
||||
echo " ⚠ NVIDIA driver install failed — install manually"
|
||||
;;
|
||||
pacman)
|
||||
sudo pacman -S --noconfirm nvidia nvidia-utils 2>/dev/null || \
|
||||
echo " ⚠ NVIDIA driver install failed — install manually"
|
||||
;;
|
||||
zypper)
|
||||
echo " ⚠ NVIDIA driver: install manually for openSUSE"
|
||||
;;
|
||||
esac
|
||||
else
|
||||
echo " ✓ NVIDIA driver present ($(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1))"
|
||||
fi
|
||||
|
||||
# --- NVIDIA Container Toolkit ---
|
||||
if command -v nvidia-ctk &>/dev/null; then
|
||||
echo " ✓ NVIDIA Container Toolkit already installed"
|
||||
elif [ "$IS_WSL" = "true" ] && docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi &>/dev/null 2>&1; then
|
||||
echo " ✓ WSL GPU passthrough working (no nvidia-ctk needed)"
|
||||
else
|
||||
echo " Installing NVIDIA Container Toolkit..."
|
||||
case "$PKG_MGR" in
|
||||
apt)
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg 2>/dev/null
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
|
||||
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
|
||||
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list > /dev/null
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y nvidia-container-toolkit
|
||||
;;
|
||||
dnf|yum)
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
|
||||
sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo > /dev/null
|
||||
sudo "$PKG_MGR" -y install nvidia-container-toolkit
|
||||
;;
|
||||
pacman)
|
||||
sudo pacman -S --noconfirm nvidia-container-toolkit 2>/dev/null || \
|
||||
echo " ⚠ Install nvidia-container-toolkit from AUR"
|
||||
;;
|
||||
zypper)
|
||||
echo " ⚠ NVIDIA Container Toolkit: install manually for openSUSE"
|
||||
;;
|
||||
esac
|
||||
sudo nvidia-ctk runtime configure --runtime=docker 2>/dev/null || true
|
||||
sudo systemctl restart docker 2>/dev/null || true
|
||||
echo " ✓ NVIDIA Container Toolkit installed and Docker configured"
|
||||
fi
|
||||
|
||||
# --- Verify GPU is accessible from Docker ---
|
||||
if docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi &>/dev/null 2>&1; then
|
||||
echo " ✓ GPU passthrough verified"
|
||||
else
|
||||
echo " ⚠ GPU passthrough test failed — may need a reboot or manual NVIDIA setup"
|
||||
fi
|
||||
|
||||
# --- Firewall (open required ports if firewall is active) ---
|
||||
if command -v firewall-cmd &>/dev/null && systemctl is-active firewalld &>/dev/null; then
|
||||
echo " Configuring firewalld..."
|
||||
for port in 3000 8001 8002 8003 8004 9000 9001 11434; do
|
||||
sudo firewall-cmd --permanent --add-port="${port}/tcp" 2>/dev/null || true
|
||||
done
|
||||
sudo firewall-cmd --reload 2>/dev/null || true
|
||||
echo " ✓ Firewall ports opened"
|
||||
elif command -v ufw &>/dev/null && sudo ufw status 2>/dev/null | grep -q "active"; then
|
||||
echo " Configuring ufw..."
|
||||
for port in 3000 8001 8002 8003 8004 9000 9001 11434; do
|
||||
sudo ufw allow "${port}/tcp" 2>/dev/null || true
|
||||
done
|
||||
echo " ✓ UFW ports opened"
|
||||
fi
|
||||
REMOTE_SCRIPT
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 1: Clone or update the repo on the remote host
|
||||
# -------------------------------------------------------
|
||||
echo "--- Step 1: Syncing repository ---"
|
||||
ssh "$REMOTE_HOST" bash -s -- "$REMOTE_DIR" "$REPO_URL" <<'REMOTE_SCRIPT'
|
||||
set -euo pipefail
|
||||
REMOTE_DIR="$1"
|
||||
REPO_URL="$2"
|
||||
|
||||
if [ -d "$REMOTE_DIR/.git" ]; then
|
||||
echo " Updating existing repo..."
|
||||
cd "$REMOTE_DIR"
|
||||
git fetch origin
|
||||
git reset --hard origin/main
|
||||
else
|
||||
echo " Cloning fresh..."
|
||||
git clone "$REPO_URL" "$REMOTE_DIR"
|
||||
cd "$REMOTE_DIR"
|
||||
fi
|
||||
|
||||
echo " ✓ Repo synced at $(git log --oneline -1)"
|
||||
REMOTE_SCRIPT
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 2: Detect or configure Ollama
|
||||
# -------------------------------------------------------
|
||||
echo "--- Step 2: Configuring Ollama ---"
|
||||
# Always use the Docker Ollama container with GPU passthrough
|
||||
# The ollama/ollama image ships with CUDA runtime built-in
|
||||
USE_DOCKER_OLLAMA=true
|
||||
OLLAMA_URL="http://ollama:11434"
|
||||
echo " Using Docker Ollama container (GPU-accelerated via NVIDIA passthrough)"
|
||||
echo " Host-accessible at localhost:11434"
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 3: Create .env and compose override
|
||||
# -------------------------------------------------------
|
||||
echo "--- Step 3: Configuring environment ---"
|
||||
ssh "$REMOTE_HOST" bash -s -- "$REMOTE_DIR" "$OLLAMA_URL" "$OLLAMA_MODEL" "$USE_DOCKER_OLLAMA" <<'REMOTE_SCRIPT'
|
||||
set -euo pipefail
|
||||
REMOTE_DIR="$1"
|
||||
OLLAMA_URL="$2"
|
||||
OLLAMA_MODEL="$3"
|
||||
USE_DOCKER_OLLAMA="$4"
|
||||
cd "$REMOTE_DIR"
|
||||
|
||||
# Read API keys from local files if they exist
|
||||
POLYGON_KEY=""
|
||||
ALPACA_KEY=""
|
||||
ALPACA_SECRET=""
|
||||
ALPACA_URL="https://paper-api.alpaca.markets"
|
||||
|
||||
[ -f polygon.io.key ] && POLYGON_KEY=$(cat polygon.io.key)
|
||||
[ -f alpaca.key ] && ALPACA_KEY=$(cat alpaca.key)
|
||||
[ -f alpaca.secret ] && ALPACA_SECRET=$(cat alpaca.secret)
|
||||
[ -f alpaca.url ] && ALPACA_URL=$(cat alpaca.url)
|
||||
|
||||
cat > .env <<EOF
|
||||
# Stonks Oracle — Docker Deployment Environment
|
||||
MARKET_DATA_API_KEY=${POLYGON_KEY}
|
||||
BROKER_API_KEY=${ALPACA_KEY}
|
||||
BROKER_API_SECRET=${ALPACA_SECRET}
|
||||
BROKER_BASE_URL=${ALPACA_URL}
|
||||
TRADING_ENABLED=true
|
||||
TRADING_RISK_TIER=moderate
|
||||
TRADING_MAX_OPEN_POSITIONS=15
|
||||
OLLAMA_MODEL=${OLLAMA_MODEL}
|
||||
MACRO_ENABLED=true
|
||||
COMPETITIVE_ENABLED=true
|
||||
EOF
|
||||
|
||||
# Create compose override based on Ollama configuration
|
||||
if [ "$USE_DOCKER_OLLAMA" = "true" ]; then
|
||||
# Using Docker Ollama — no override needed, default compose handles it
|
||||
rm -f docker-compose.override.yml
|
||||
echo " ✓ Using Docker Ollama container"
|
||||
else
|
||||
# Using external Ollama — disable the container and point services to it
|
||||
# Determine if URL is localhost (needs host-gateway) or remote
|
||||
if echo "$OLLAMA_URL" | grep -qE "localhost|127\.0\.0\.1"; then
|
||||
DOCKER_OLLAMA_URL="http://host.docker.internal:$(echo "$OLLAMA_URL" | grep -oP ':\K[0-9]+')"
|
||||
cat > docker-compose.override.yml <<EOF
|
||||
services:
|
||||
ollama:
|
||||
entrypoint: ["true"]
|
||||
restart: "no"
|
||||
ports: []
|
||||
extractor:
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
OLLAMA_BASE_URL: "${DOCKER_OLLAMA_URL}"
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
recommendation:
|
||||
environment:
|
||||
OLLAMA_BASE_URL: "${DOCKER_OLLAMA_URL}"
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
EOF
|
||||
else
|
||||
# Remote Ollama — containers can reach it directly
|
||||
cat > docker-compose.override.yml <<EOF
|
||||
services:
|
||||
ollama:
|
||||
entrypoint: ["true"]
|
||||
restart: "no"
|
||||
extractor:
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
OLLAMA_BASE_URL: "${OLLAMA_URL}"
|
||||
recommendation:
|
||||
environment:
|
||||
OLLAMA_BASE_URL: "${OLLAMA_URL}"
|
||||
EOF
|
||||
fi
|
||||
echo " ✓ Override created — services will use external Ollama at ${OLLAMA_URL}"
|
||||
fi
|
||||
|
||||
echo " ✓ .env configured (polygon=$([ -n "$POLYGON_KEY" ] && echo 'set' || echo 'empty'), alpaca=$([ -n "$ALPACA_KEY" ] && echo 'set' || echo 'empty'))"
|
||||
REMOTE_SCRIPT
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 4: Build and start all services
|
||||
# -------------------------------------------------------
|
||||
echo "--- Step 4: Building and starting services ---"
|
||||
ssh "$REMOTE_HOST" bash -s -- "$REMOTE_DIR" "$USE_DOCKER_OLLAMA" <<'REMOTE_SCRIPT'
|
||||
set -euo pipefail
|
||||
REMOTE_DIR="$1"
|
||||
USE_DOCKER_OLLAMA="$2"
|
||||
cd "$REMOTE_DIR"
|
||||
|
||||
# Stop any existing deployment
|
||||
docker compose down 2>/dev/null || true
|
||||
|
||||
# Build all images
|
||||
echo " Building images (this may take a few minutes)..."
|
||||
docker compose build --quiet 2>&1 | tail -5
|
||||
|
||||
# Start infrastructure
|
||||
echo " Starting infrastructure..."
|
||||
if [ "$USE_DOCKER_OLLAMA" = "true" ]; then
|
||||
docker compose up -d postgres redis minio minio-init ollama
|
||||
else
|
||||
docker compose up -d postgres redis minio minio-init
|
||||
fi
|
||||
|
||||
# Wait for infrastructure to be healthy
|
||||
echo " Waiting for infrastructure health checks..."
|
||||
for svc in postgres redis minio; do
|
||||
for i in $(seq 1 30); do
|
||||
if docker compose ps "$svc" 2>/dev/null | grep -q healthy; then
|
||||
break
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
done
|
||||
echo " ✓ Infrastructure healthy"
|
||||
|
||||
# Start all application services
|
||||
echo " Starting application services..."
|
||||
docker compose up -d
|
||||
|
||||
echo " Waiting for services to stabilize..."
|
||||
sleep 20
|
||||
|
||||
# Show status
|
||||
echo ""
|
||||
echo " Service Status:"
|
||||
docker compose ps --format "table {{.Name}}\t{{.Status}}" 2>/dev/null | head -25 || docker compose ps
|
||||
REMOTE_SCRIPT
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 5: Seed the database
|
||||
# -------------------------------------------------------
|
||||
echo "--- Step 5: Seeding database ---"
|
||||
ssh "$REMOTE_HOST" bash -s -- "$REMOTE_DIR" <<'REMOTE_SCRIPT'
|
||||
set -euo pipefail
|
||||
cd "$1"
|
||||
|
||||
# Wait for query-api to be healthy
|
||||
for i in $(seq 1 30); do
|
||||
if docker compose ps query-api 2>/dev/null | grep -q healthy; then
|
||||
break
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
|
||||
# Run the symbol registry seed
|
||||
echo " Seeding symbol registry..."
|
||||
docker compose exec -T scheduler python -m services.symbol_registry.seed 2>/dev/null && echo " ✓ Database seeded" || echo " ⚠ Seed skipped (may already be seeded or service not ready)"
|
||||
REMOTE_SCRIPT
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Step 6: Ensure Ollama model is available
|
||||
# -------------------------------------------------------
|
||||
echo "--- Step 6: Checking Ollama model ---"
|
||||
ssh "$REMOTE_HOST" bash -s -- "$OLLAMA_URL" "$OLLAMA_MODEL" "$USE_DOCKER_OLLAMA" "$REMOTE_DIR" <<'REMOTE_SCRIPT'
|
||||
set -euo pipefail
|
||||
OLLAMA_URL="$1"
|
||||
OLLAMA_MODEL="$2"
|
||||
USE_DOCKER_OLLAMA="$3"
|
||||
REMOTE_DIR="$4"
|
||||
|
||||
if [ "$USE_DOCKER_OLLAMA" = "true" ]; then
|
||||
# Pull via Docker container
|
||||
cd "$REMOTE_DIR"
|
||||
if docker compose exec -T ollama ollama list 2>/dev/null | grep -q "$OLLAMA_MODEL"; then
|
||||
echo " ✓ Model $OLLAMA_MODEL already available"
|
||||
else
|
||||
echo " Pulling $OLLAMA_MODEL via Docker Ollama..."
|
||||
docker compose exec -T ollama ollama pull "$OLLAMA_MODEL"
|
||||
echo " ✓ Model pulled"
|
||||
fi
|
||||
else
|
||||
# Check via API
|
||||
if curl -sf "$OLLAMA_URL/api/tags" 2>/dev/null | grep -q "$OLLAMA_MODEL"; then
|
||||
echo " ✓ Model $OLLAMA_MODEL already available at $OLLAMA_URL"
|
||||
else
|
||||
echo " Pulling $OLLAMA_MODEL via $OLLAMA_URL..."
|
||||
curl -sf "$OLLAMA_URL/api/pull" -d "{\"name\":\"$OLLAMA_MODEL\"}" | tail -1
|
||||
echo " ✓ Model pulled"
|
||||
fi
|
||||
fi
|
||||
REMOTE_SCRIPT
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# Done
|
||||
# -------------------------------------------------------
|
||||
REMOTE_IP=$(echo "$REMOTE_HOST" | cut -d@ -f2)
|
||||
echo "=== Deployment Complete ==="
|
||||
echo ""
|
||||
echo "Endpoints:"
|
||||
echo " Dashboard: http://${REMOTE_IP}:3000"
|
||||
echo " Query API: http://${REMOTE_IP}:8004"
|
||||
echo " Symbol Registry: http://${REMOTE_IP}:8001"
|
||||
echo " Trading Engine: http://${REMOTE_IP}:8002"
|
||||
echo " Risk Engine: http://${REMOTE_IP}:8003"
|
||||
echo " MinIO Console: http://${REMOTE_IP}:9001"
|
||||
echo " Superset: http://${REMOTE_IP}:8088"
|
||||
echo " Ollama: http://${REMOTE_IP}:11434"
|
||||
echo ""
|
||||
echo "Commands:"
|
||||
echo " ssh $REMOTE_HOST 'cd $REMOTE_DIR && docker compose logs -f'"
|
||||
echo " ssh $REMOTE_HOST 'cd $REMOTE_DIR && docker compose ps'"
|
||||
echo " ssh $REMOTE_HOST 'cd $REMOTE_DIR && docker compose down'"
|
||||
@@ -82,6 +82,13 @@ services:
|
||||
- "11434:11434"
|
||||
volumes:
|
||||
- ollama_models:/root/.ollama
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
|
||||
trino:
|
||||
image: trinodb/trino:latest
|
||||
@@ -312,6 +319,10 @@ services:
|
||||
<<: *app-env
|
||||
ports:
|
||||
- "8003:8000"
|
||||
networks:
|
||||
default:
|
||||
aliases:
|
||||
- risk
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
|
||||
@@ -16,7 +16,9 @@ WORKDIR /app
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ARG CACHE_BUST
|
||||
COPY services/ /app/services/
|
||||
COPY scripts/ /app/scripts/
|
||||
COPY tests/ /app/tests/
|
||||
COPY conftest.py /app/conftest.py
|
||||
|
||||
|
||||
+104
-13
@@ -1,6 +1,6 @@
|
||||
# AI Agent Building Guide
|
||||
|
||||
Stonks Oracle uses three AI agents powered by a local Ollama instance. Each agent has a dedicated purpose in the pipeline, a database-backed configuration, and support for A/B testing through variants. This guide covers how each agent works, how to configure them, how to create and test variants, and how to monitor performance.
|
||||
Stonks Oracle uses three AI agents powered by local LLM inference (Ollama or vLLM). Each agent has a dedicated purpose in the pipeline, a database-backed configuration, and support for A/B testing through variants. This guide covers how each agent works, how to configure them, how to create and test variants, and how to monitor performance.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
@@ -8,6 +8,7 @@ Stonks Oracle uses three AI agents powered by a local Ollama instance. Each agen
|
||||
- [Document Intelligence Extractor](#1-document-intelligence-extractor)
|
||||
- [Global Event Classifier](#2-global-event-classifier)
|
||||
- [Thesis Rewriter](#3-thesis-rewriter)
|
||||
- [LLM Provider Abstraction](#llm-provider-abstraction)
|
||||
- [Database Schema](#database-schema)
|
||||
- [ai_agents Table](#ai_agents-table)
|
||||
- [agent_variants Table](#agent_variants-table)
|
||||
@@ -30,9 +31,10 @@ Three agents are seeded into the `ai_agents` table on first migration (migration
|
||||
| **Slug** | `document-extractor` |
|
||||
| **Purpose** | Extracts structured intelligence (sentiment, catalysts, impact scores, key facts, risks) from company news, SEC filings, earnings transcripts, and press releases |
|
||||
| **Default Model** | `qwen3.5:9b-fast` (Ollama) |
|
||||
| **Supported Providers** | `ollama`, `vllm` |
|
||||
| **Prompt Version** | `document-intel-v2` |
|
||||
| **Schema Version** | `2.0.0` |
|
||||
| **Entry Point** | `services/extractor/main.py` → `services/extractor/client.py` |
|
||||
| **Entry Point** | `services/extractor/main.py` → `services/extractor/llm_factory.py` → `services/extractor/client.py` (Ollama) or `services/extractor/vllm_client.py` (vLLM) |
|
||||
|
||||
**Input Data:**
|
||||
- Normalized document text (fetched from MinIO or passed in the Redis job payload)
|
||||
@@ -40,7 +42,7 @@ Three agents are seeded into the `ai_agents` table on first migration (migration
|
||||
- List of tracked tickers for company identification
|
||||
- Document ID for traceability
|
||||
|
||||
**Output Schema** (`ExtractionResult`):
|
||||
**Output Schema** (`ExtractionResult` — defined in `services/extractor/schemas.py`):
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -81,6 +83,7 @@ Use "other" for catalyst_type if unsure. Keep evidence_spans short
|
||||
- Includes tracked ticker list with rules for company identification
|
||||
- Includes the full JSON schema field descriptions
|
||||
- Truncates documents to 8,000 characters to limit inference time
|
||||
- When an active variant has `input_token_limit > 0`, truncation uses `input_token_limit * 4` characters instead
|
||||
|
||||
---
|
||||
|
||||
@@ -91,6 +94,7 @@ Use "other" for catalyst_type if unsure. Keep evidence_spans short
|
||||
| **Slug** | `event-classifier` |
|
||||
| **Purpose** | Classifies global/geopolitical news into structured macro events with impact type, severity, affected regions/sectors/commodities, and estimated duration |
|
||||
| **Default Model** | `qwen3.5:9b-fast` (Ollama) |
|
||||
| **Supported Providers** | `ollama`, `vllm` |
|
||||
| **Prompt Version** | `event-classification-v1` |
|
||||
| **Schema Version** | `1.0.0` |
|
||||
| **Entry Point** | `services/extractor/main.py` → `services/extractor/event_classifier.py` |
|
||||
@@ -99,7 +103,7 @@ Use "other" for catalyst_type if unsure. Keep evidence_spans short
|
||||
- Normalized text of a macro news article (from the `stonks:queue:macro_classification` Redis queue)
|
||||
- Document ID for traceability
|
||||
|
||||
**Output Schema** (`GlobalEvent`):
|
||||
**Output Schema** (`GlobalEvent` — defined in `services/extractor/event_classifier.py`):
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -141,9 +145,11 @@ as empty arrays.
|
||||
```
|
||||
|
||||
**User Prompt Template** (built by `build_event_classification_prompt()` in `services/extractor/event_classifier.py`):
|
||||
- Includes anti-hallucination rules
|
||||
- Includes anti-hallucination rules (no fabrication, severity "critical" reserved for multi-country events)
|
||||
- Lists all valid enum values for each field
|
||||
- Truncates articles to 6,000 characters
|
||||
- When an active variant has `input_token_limit > 0`, truncation uses `input_token_limit * 4` characters instead
|
||||
- If a variant overrides the system prompt, the classifier ensures JSON output instructions are always appended if not already present
|
||||
|
||||
---
|
||||
|
||||
@@ -154,6 +160,7 @@ as empty arrays.
|
||||
| **Slug** | `thesis-rewriter` |
|
||||
| **Purpose** | Rewrites deterministic trade thesis summaries into clear, professional analyst prose. Optional layer — the system falls back to the deterministic thesis if this fails |
|
||||
| **Default Model** | `qwen3.5:9b-fast` (Ollama) |
|
||||
| **Supported Providers** | `ollama`, `vllm` |
|
||||
| **Prompt Version** | `thesis-rewrite-v1` |
|
||||
| **Schema Version** | `1.0.0` |
|
||||
| **Entry Point** | `services/recommendation/main.py` → `services/recommendation/thesis_llm.py` |
|
||||
@@ -165,6 +172,7 @@ as empty arrays.
|
||||
**Output Schema:**
|
||||
- Plain text (not JSON). The model returns only the rewritten thesis as a string, under 150 words.
|
||||
- On failure or empty response, the original deterministic thesis is returned unchanged.
|
||||
- A `_strip_thinking_block()` post-processor removes `<think>` XML tags and "Thinking Process:" blocks that some models (e.g. Qwen3) emit before the actual response.
|
||||
|
||||
**System Prompt:**
|
||||
|
||||
@@ -182,11 +190,37 @@ STRICT RULES:
|
||||
5. Use a neutral, professional tone. Avoid hype or marketing language.
|
||||
6. Return ONLY the rewritten thesis text. No JSON, no markdown, no
|
||||
commentary.
|
||||
7. Do NOT show your thinking process. Do NOT include any reasoning
|
||||
steps. Output ONLY the final rewritten text.
|
||||
```
|
||||
|
||||
**User Prompt Template** (built by `build_thesis_rewrite_prompt()` in `services/recommendation/thesis_llm.py`):
|
||||
- Includes the deterministic thesis between delimiters
|
||||
- Includes trend context: ticker, window, direction, strength, confidence, contradiction score, top catalysts, top risks
|
||||
- Appends `/no_think` suffix to suppress reasoning mode on models that support it (e.g. Qwen3)
|
||||
- Ollama calls also set `"think": false` in the request payload
|
||||
|
||||
---
|
||||
|
||||
## LLM Provider Abstraction
|
||||
|
||||
All three agents support both **Ollama** and **vLLM** as inference providers. The provider is determined by the `model_provider` field in the agent config (or active variant).
|
||||
|
||||
**Module:** `services/extractor/llm_factory.py`
|
||||
|
||||
The `build_llm_client()` factory function routes to the correct client:
|
||||
|
||||
| `model_provider` value | Client class | API endpoint |
|
||||
|------------------------|-------------|--------------|
|
||||
| `ollama` (default), `""`, `None` | `OllamaClient` (`services/extractor/client.py`) | `{OLLAMA_BASE_URL}/api/chat` |
|
||||
| `vllm` | `VLLMClient` (`services/extractor/vllm_client.py`) | `{VLLM_BASE_URL}/v1/chat/completions` (OpenAI-compatible) |
|
||||
| Unknown value | `OllamaClient` (with warning log) | Falls back to Ollama |
|
||||
|
||||
Both clients implement the `LLMClient` protocol (`services/shared/llm_protocol.py`), providing `call_llm()` and `close()` methods.
|
||||
|
||||
**Provider switching at runtime:** When a variant changes the `model_provider`, the extractor worker detects this during its periodic config refresh (every 100 jobs) and creates a new client instance. The old client is closed gracefully. A safety guard prevents switching to Ollama if `OLLAMA_BASE_URL` is empty.
|
||||
|
||||
**vLLM health check:** At startup, if the resolved provider is `vllm`, the extractor runs a health check against the vLLM endpoint. If it fails, the worker falls back to Ollama automatically.
|
||||
|
||||
---
|
||||
|
||||
@@ -202,8 +236,8 @@ Defined in migration `026_ai_agents.sql`. Stores the base configuration for each
|
||||
| `name` | `VARCHAR(100)` | — | Human-readable name (unique) |
|
||||
| `slug` | `VARCHAR(100)` | — | URL-safe identifier (unique), used by `AgentConfigResolver` |
|
||||
| `purpose` | `TEXT` | `''` | Description of what the agent does |
|
||||
| `model_provider` | `VARCHAR(50)` | `'ollama'` | LLM provider |
|
||||
| `model_name` | `VARCHAR(200)` | `'qwen3.5:9b'` | Model identifier |
|
||||
| `model_provider` | `VARCHAR(50)` | `'ollama'` | LLM provider (`ollama` or `vllm`) |
|
||||
| `model_name` | `VARCHAR(200)` | `'qwen3.5:9b-fast'` | Model identifier |
|
||||
| `system_prompt` | `TEXT` | `''` | System prompt sent to the model |
|
||||
| `user_prompt_template` | `TEXT` | `''` | User prompt template (optional — code-defined templates take precedence) |
|
||||
| `prompt_version` | `VARCHAR(100)` | `''` | Version tag for prompt tracking |
|
||||
@@ -297,13 +331,20 @@ The `AgentConfigResolver` is the central mechanism for resolving runtime agent c
|
||||
2. **COALESCE-based override**: The SQL query uses `COALESCE(variant_column, agent_column)` for every configuration field. If an active variant exists and has a non-NULL value for a field, that value is used. Otherwise, the base agent's value is used.
|
||||
|
||||
```sql
|
||||
SELECT a.id AS agent_id,
|
||||
v.id AS variant_id,
|
||||
SELECT a.id AS agent_id,
|
||||
v.id AS variant_id,
|
||||
COALESCE(v.model_provider, a.model_provider) AS model_provider,
|
||||
COALESCE(v.model_name, a.model_name) AS model_name,
|
||||
COALESCE(v.system_prompt, a.system_prompt) AS system_prompt,
|
||||
COALESCE(v.user_prompt_template, a.user_prompt_template) AS user_prompt_template,
|
||||
-- ... all other fields ...
|
||||
COALESCE(v.prompt_version, a.prompt_version) AS prompt_version,
|
||||
COALESCE(v.temperature, a.temperature) AS temperature,
|
||||
COALESCE(v.max_tokens, a.max_tokens) AS max_tokens,
|
||||
COALESCE(v.context_window, 0) AS context_window,
|
||||
COALESCE(v.input_token_limit, 0) AS input_token_limit,
|
||||
COALESCE(v.token_budget, 0) AS token_budget,
|
||||
COALESCE(v.timeout_seconds, a.timeout_seconds) AS timeout_seconds,
|
||||
COALESCE(v.max_retries, a.max_retries) AS max_retries
|
||||
FROM ai_agents a
|
||||
LEFT JOIN agent_variants v
|
||||
ON v.agent_id = a.id AND v.is_active = TRUE
|
||||
@@ -361,7 +402,10 @@ resolver.invalidate() # Clear all entries
|
||||
|
||||
### Config Refresh in Workers
|
||||
|
||||
The extractor and recommendation workers periodically re-resolve their agent config (every 100 jobs for the extractor, every 50 jobs for the recommendation worker). If the resolved model changes, the worker creates a new `OllamaClient` instance with the updated configuration.
|
||||
The extractor and recommendation workers periodically re-resolve their agent config to pick up variant swaps and model changes:
|
||||
|
||||
- **Extractor worker** (`services/extractor/main.py`): Re-resolves both `document-extractor` and `event-classifier` configs every **100 jobs**. If the resolved model or provider changes, the worker creates a new LLM client instance via `build_llm_client()` and closes the old one. A safety guard prevents switching to Ollama if `OLLAMA_BASE_URL` is empty.
|
||||
- **Recommendation worker** (`services/recommendation/main.py`): Re-resolves the `thesis-rewriter` config every **50 jobs**. If the model changes, a new `OllamaConfig` is built.
|
||||
|
||||
---
|
||||
|
||||
@@ -373,7 +417,7 @@ Every agent invocation is logged to `agent_performance_log` with the `agent_id`
|
||||
|
||||
- **Document extractor**: Logged in `services/extractor/main.py` after each extraction. Records success/failure, duration, confidence, retry count, token estimates.
|
||||
- **Event classifier**: Logged in `services/extractor/event_classifier.py` after each classification. Same fields.
|
||||
- **Thesis rewriter**: Logged in `services/recommendation/thesis_llm.py` after each rewrite attempt. Confidence is always 0.0 (not applicable for rewrites).
|
||||
- **Thesis rewriter**: Logged in `services/recommendation/thesis_llm.py` after each rewrite attempt. Confidence is always 0.0 (not applicable for rewrites). `document_id` is always NULL.
|
||||
|
||||
### Querying for Variant Comparison
|
||||
|
||||
@@ -464,6 +508,8 @@ All agent endpoints are served by the Query API (`services/api/app.py`) under th
|
||||
}
|
||||
```
|
||||
|
||||
All fields except `name` have defaults. The `slug` is auto-generated from `name` if not provided. The `model_name` defaults to `llama3.1:8b` for user-created agents.
|
||||
|
||||
**Update Agent Request Body** (all fields optional):
|
||||
|
||||
```json
|
||||
@@ -509,6 +555,30 @@ All agent endpoints are served by the Query API (`services/api/app.py`) under th
|
||||
| `PUT` | `/api/agents/{agent_id}/variants/{variant_id}` | Partial update a variant |
|
||||
| `DELETE` | `/api/agents/{agent_id}/variants/{variant_id}` | Delete a variant (returns 400 if active) |
|
||||
|
||||
**Create Variant Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"variant_name": "Llama 3.1 8B Test",
|
||||
"variant_slug": "llama-3-1-8b-test",
|
||||
"description": "Testing llama3.1:8b as an alternative",
|
||||
"model_provider": "ollama",
|
||||
"model_name": "llama3.1:8b",
|
||||
"system_prompt": "",
|
||||
"user_prompt_template": "",
|
||||
"prompt_version": "",
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 32768,
|
||||
"context_window": 0,
|
||||
"input_token_limit": 0,
|
||||
"token_budget": 0,
|
||||
"timeout_seconds": 120,
|
||||
"max_retries": 2
|
||||
}
|
||||
```
|
||||
|
||||
Required fields: `variant_name`, `model_name`. The `variant_slug` is auto-generated from `variant_name` if not provided.
|
||||
|
||||
### Clone Endpoints
|
||||
|
||||
| Method | Path | Description |
|
||||
@@ -516,7 +586,7 @@ All agent endpoints are served by the Query API (`services/api/app.py`) under th
|
||||
| `POST` | `/api/agents/{agent_id}/clone` | Clone an agent's base config as a new variant |
|
||||
| `POST` | `/api/agents/{agent_id}/variants/{variant_id}/clone` | Clone an existing variant as a new variant |
|
||||
|
||||
Clone requests copy all configuration fields from the source, with optional overrides in the request body.
|
||||
Clone requests copy all configuration fields from the source, with optional overrides in the request body. The `variant_name` field is required. All other fields default to the source's values if not provided.
|
||||
|
||||
### Activate / Deactivate
|
||||
|
||||
@@ -525,6 +595,8 @@ Clone requests copy all configuration fields from the source, with optional over
|
||||
| `POST` | `/api/agents/{agent_id}/variants/{variant_id}/activate` | Set a variant as active (deactivates any other active variant in a single transaction) |
|
||||
| `POST` | `/api/agents/{agent_id}/variants/deactivate` | Deactivate the currently active variant (agent falls back to base config) |
|
||||
|
||||
The activate endpoint uses a database transaction to atomically deactivate the current variant and activate the new one, ensuring exactly one active variant at all times.
|
||||
|
||||
### Per-Variant Performance
|
||||
|
||||
| Method | Path | Description |
|
||||
@@ -532,6 +604,8 @@ Clone requests copy all configuration fields from the source, with optional over
|
||||
| `GET` | `/api/agents/{agent_id}/variants/{variant_id}/performance` | Aggregated metrics for a specific variant |
|
||||
| `GET` | `/api/agents/{agent_id}/variants/{variant_id}/performance/history` | Hourly time-series for a specific variant |
|
||||
|
||||
Both endpoints accept the same `hours` query parameter (default 24, max 720) and return the same response shape as the agent-level performance endpoints.
|
||||
|
||||
---
|
||||
|
||||
## Step-by-Step: Creating and Activating a Variant
|
||||
@@ -616,3 +690,20 @@ curl -s -X PUT \
|
||||
```
|
||||
|
||||
Then re-activate and compare again.
|
||||
|
||||
### 7. Switch to vLLM Provider
|
||||
|
||||
To test a variant using vLLM instead of Ollama:
|
||||
|
||||
```bash
|
||||
curl -s -X POST https://stonks-api.celestium.life/api/agents/$AGENT_ID/clone \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"variant_name": "vLLM Qwen3 Test",
|
||||
"description": "Testing extraction with vLLM backend",
|
||||
"model_provider": "vllm",
|
||||
"model_name": "Qwen/Qwen3-8B"
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
The extractor worker will detect the provider change during its next config refresh and build a `VLLMClient` instead of an `OllamaClient`. Ensure the `VLLM_BASE_URL` environment variable is set in the extractor deployment.
|
||||
+185
-19
@@ -142,14 +142,35 @@ Trend projection for a specific trend window.
|
||||
### 1.5 Market Prices
|
||||
|
||||
#### `GET /api/market/prices/{ticker}`
|
||||
Historical close prices from `market_snapshots`.
|
||||
Historical OHLCV bars from `market_snapshots`, deduplicated by bar timestamp and ordered oldest-first. Also returns 90-day high/low range.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `limit` | int | `30` | max `200` | Max bars returned |
|
||||
| `limit` | int | `200` | max `500` | Max bars returned |
|
||||
|
||||
- **Path params:** `ticker` (auto-uppercased)
|
||||
- **Response:** Array of OHLCV objects ordered oldest-first
|
||||
- **Response:** `{ bars: [{ ticker, close, open, high, low, volume, bar_timestamp, captured_at }], range_90d: { low, high } }`
|
||||
|
||||
#### `POST /api/market/backfill/{ticker}`
|
||||
Backfill daily OHLCV bars from Polygon for the last N days. Deduplicates by bar timestamp.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `days` | int | `90` | max `365` | Number of days to backfill |
|
||||
|
||||
- **Path params:** `ticker` (auto-uppercased)
|
||||
- **Response:** `{ ticker, inserted, total_bars, days }`
|
||||
- **Errors:** `503` — No market data API key configured
|
||||
|
||||
#### `POST /api/market/backfill-all`
|
||||
Backfill daily bars for all active companies from Polygon.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `days` | int | `90` | max `365` | Number of days to backfill |
|
||||
|
||||
- **Response:** `{ total_inserted, tickers, details[] }` — each detail has `{ ticker, inserted }` or `{ ticker, inserted: 0, error }`
|
||||
- **Errors:** `503` — No market data API key configured
|
||||
|
||||
### 1.6 Recommendations
|
||||
|
||||
@@ -224,8 +245,6 @@ Get audit events for any entity type and ID.
|
||||
|
||||
- **Path params:** `entity_type` (string), `entity_id` (string)
|
||||
- **Response:** Array of audit event objects
|
||||
- **Errors:** `404` — No audit events found
|
||||
|
||||
|
||||
### 1.10 Admin: Source Health
|
||||
|
||||
@@ -331,6 +350,8 @@ Approve or reject a pending operator approval request.
|
||||
#### `GET /api/admin/trading/lockouts`
|
||||
List active symbol lockouts (news-shock, cooldown, manual).
|
||||
|
||||
- **Response:** Array of lockout objects
|
||||
|
||||
#### `POST /api/admin/trading/lockouts`
|
||||
Create a manual symbol lockout.
|
||||
|
||||
@@ -353,7 +374,6 @@ Update operator approval settings.
|
||||
- **Body:** `{ auto_approve_paper?: bool, require_approval_for_live?: bool, approval_timeout_minutes?: int }`
|
||||
- **Response:** Updated approval settings
|
||||
|
||||
|
||||
### 1.13 Operational Dashboard
|
||||
|
||||
#### `GET /api/ops/ingestion/throughput`
|
||||
@@ -450,7 +470,7 @@ Trino catalog/schema/table/column metadata for the schema browser.
|
||||
#### `GET /api/analytics/pg-schema`
|
||||
PostgreSQL table/column metadata with primary keys, foreign keys, and row estimates.
|
||||
|
||||
- **Response:** `{ catalog: "postgresql", schema: "public", tables[] }`
|
||||
- **Response:** `{ catalog: "postgresql", schema: "public", tables[{ name, row_estimate, columns[{ name, type, nullable, primary_key?, references?, has_default? }] }] }`
|
||||
|
||||
#### `POST /api/analytics/pg-query`
|
||||
Run read-only SQL against PostgreSQL directly. Only SELECT statements allowed.
|
||||
@@ -462,17 +482,19 @@ Run read-only SQL against PostgreSQL directly. Only SELECT statements allowed.
|
||||
#### `GET /api/analytics/saved-queries`
|
||||
List all saved queries.
|
||||
|
||||
- **Response:** Array of `{ id, name, description, sql_text, created_by, created_at, updated_at }`
|
||||
|
||||
#### `POST /api/analytics/saved-queries` (201)
|
||||
Save a new query.
|
||||
|
||||
- **Body:** `{ name: string, description?: string, sql_text: string }`
|
||||
- **Response:** `{ id, name, description, sql_text, created_by, created_at }`
|
||||
|
||||
#### `DELETE /api/analytics/saved-queries/{query_id}`
|
||||
Delete a saved query.
|
||||
|
||||
- **Errors:** `404` — Query not found
|
||||
|
||||
|
||||
### 1.16 Macro Signal Layer
|
||||
|
||||
#### `GET /api/admin/macro/status`
|
||||
@@ -501,9 +523,13 @@ List recent global events with filtering.
|
||||
| `limit` | int | `50` | max `200` | Page size |
|
||||
| `offset` | int | `0` | — | Pagination offset |
|
||||
|
||||
- **Response:** Array of global event objects with `id`, `event_types`, `severity`, `affected_regions`, `affected_sectors`, `affected_commodities`, `summary`, `key_facts`, `estimated_duration`, `confidence`, `source_document_id`, `created_at`
|
||||
|
||||
#### `GET /api/macro/events/{event_id}`
|
||||
Event detail with affected companies and macro impact scores.
|
||||
|
||||
- **Path params:** `event_id` (UUID string)
|
||||
- **Response:** Global event object + `impacts[]` (each with `company_id`, `ticker`, `macro_impact_score`, `impact_direction`, `contributing_factors`, `confidence`, `legal_name`, `sector`)
|
||||
- **Errors:** `404` — Global event not found
|
||||
|
||||
#### `GET /api/macro/impacts/{ticker}`
|
||||
@@ -515,7 +541,8 @@ Macro impacts and exposure profile for a specific company.
|
||||
| `limit` | int | `50` | max `200` | Page size |
|
||||
| `offset` | int | `0` | — | Pagination offset |
|
||||
|
||||
- **Response:** `{ exposure_profile, impacts[] }`
|
||||
- **Path params:** `ticker` (auto-uppercased)
|
||||
- **Response:** `{ exposure_profile, impacts[] }` — each impact includes `event_summary`, `event_severity`, `event_types`, `affected_regions`
|
||||
|
||||
### 1.18 Competitive Signal Layer
|
||||
|
||||
@@ -540,6 +567,7 @@ Historical patterns for a company.
|
||||
| `catalyst_type` | string | — | Filter by catalyst type |
|
||||
| `time_horizon` | string | — | Filter by time horizon |
|
||||
|
||||
- **Path params:** `ticker` (string)
|
||||
- **Response:** `{ ticker, patterns[], count }`
|
||||
|
||||
#### `GET /api/patterns/{ticker}/competitors`
|
||||
@@ -555,6 +583,7 @@ Cross-company patterns showing how this company's catalysts affected competitors
|
||||
#### `GET /api/patterns/{ticker}/competitive-signals`
|
||||
Recent competitive signals targeting this company (limit 100).
|
||||
|
||||
- **Path params:** `ticker` (string)
|
||||
- **Response:** `{ ticker, competitive_signals[], count }`
|
||||
|
||||
#### `GET /api/patterns/{ticker}/decisions`
|
||||
@@ -564,9 +593,9 @@ Major corporate decision history with trend outcomes and pattern statistics.
|
||||
|-----------|------|---------|-------------|
|
||||
| `time_horizon` | string | — | Filter by time horizon |
|
||||
|
||||
- **Path params:** `ticker` (string)
|
||||
- **Response:** `{ ticker, decisions[], count }` — each decision includes `pattern_statistics[]`
|
||||
|
||||
|
||||
### 1.20 AI Agents
|
||||
|
||||
#### `GET /api/agents`
|
||||
@@ -576,9 +605,12 @@ List all AI agent configurations.
|
||||
|-----------|------|---------|-------------|
|
||||
| `active_only` | bool | `false` | Only show active agents |
|
||||
|
||||
- **Response:** Array of agent objects with `id`, `name`, `slug`, `purpose`, `model_provider`, `model_name`, `system_prompt`, `user_prompt_template`, `prompt_version`, `schema_version`, `temperature`, `max_tokens`, `timeout_seconds`, `max_retries`, `active`, `source`, `created_at`, `updated_at`
|
||||
|
||||
#### `GET /api/agents/{agent_id}`
|
||||
Get a single agent configuration.
|
||||
|
||||
- **Path params:** `agent_id` (UUID string)
|
||||
- **Errors:** `404` — Agent not found
|
||||
|
||||
#### `POST /api/agents` (201)
|
||||
@@ -603,9 +635,9 @@ Create a new user-defined agent.
|
||||
| `max_retries` | int | `2` | Max retry attempts |
|
||||
|
||||
#### `PUT /api/agents/{agent_id}`
|
||||
Update an agent configuration. Partial updates supported.
|
||||
Update an agent configuration. Partial updates supported — only provided fields are changed.
|
||||
|
||||
- **Body:** `AgentUpdateBody` — all fields optional (same fields as create)
|
||||
- **Body:** `AgentUpdateBody` — all fields optional (same fields as create plus `active`)
|
||||
- **Errors:** `400` — No fields to update; `404` — Agent not found
|
||||
|
||||
#### `DELETE /api/agents/{agent_id}`
|
||||
@@ -636,6 +668,8 @@ Hourly performance time-series for an agent.
|
||||
#### `GET /api/agents/{agent_id}/variants`
|
||||
List all variants for an agent, ordered by `created_at` ascending.
|
||||
|
||||
- **Response:** Array of variant objects with `id`, `agent_id`, `variant_name`, `variant_slug`, `description`, `model_provider`, `model_name`, `system_prompt`, `user_prompt_template`, `prompt_version`, `temperature`, `max_tokens`, `context_window`, `input_token_limit`, `token_budget`, `timeout_seconds`, `max_retries`, `is_active`, `created_at`, `updated_at`
|
||||
|
||||
#### `GET /api/agents/{agent_id}/variants/{variant_id}`
|
||||
Get a single variant.
|
||||
|
||||
@@ -680,13 +714,13 @@ Delete a variant. Cannot delete active variants.
|
||||
#### `POST /api/agents/{agent_id}/clone` (201)
|
||||
Clone an agent's configuration as a new variant with optional overrides.
|
||||
|
||||
- **Body:** `VariantCloneBody { variant_name, variant_slug?, ...optional overrides }`
|
||||
- **Body:** `VariantCloneBody { variant_name, variant_slug?, description?, model_provider?, model_name?, system_prompt?, user_prompt_template?, prompt_version?, temperature?, max_tokens?, context_window?, input_token_limit?, token_budget?, timeout_seconds?, max_retries? }`
|
||||
- **Errors:** `404` — Agent not found; `409` — Duplicate slug
|
||||
|
||||
#### `POST /api/agents/{agent_id}/variants/{variant_id}/clone` (201)
|
||||
Clone an existing variant as a new variant with optional overrides.
|
||||
|
||||
- **Body:** `VariantCloneBody`
|
||||
- **Body:** `VariantCloneBody` (same as above)
|
||||
- **Errors:** `404` — Source variant not found; `409` — Duplicate slug
|
||||
|
||||
#### `POST /api/agents/{agent_id}/variants/{variant_id}/activate`
|
||||
@@ -697,6 +731,8 @@ Set a variant as the active variant for its agent. Deactivates any currently act
|
||||
#### `POST /api/agents/{agent_id}/variants/deactivate`
|
||||
Deactivate the currently active variant. Agent falls back to base configuration.
|
||||
|
||||
- **Response:** `{ deactivated: true }`
|
||||
|
||||
#### `GET /api/agents/{agent_id}/variants/{variant_id}/performance`
|
||||
Aggregated performance metrics for a specific variant.
|
||||
|
||||
@@ -704,6 +740,8 @@ Aggregated performance metrics for a specific variant.
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `hours` | int | `24` | max `720` | Time window |
|
||||
|
||||
- **Response:** Same shape as agent performance (invocations, successes, failures, durations, confidence, tokens, success_rate)
|
||||
|
||||
#### `GET /api/agents/{agent_id}/variants/{variant_id}/performance/history`
|
||||
Hourly performance time-series for a specific variant.
|
||||
|
||||
@@ -711,6 +749,108 @@ Hourly performance time-series for a specific variant.
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `hours` | int | `24` | max `720` | Time window |
|
||||
|
||||
- **Response:** Array of `{ hour, invocations, successes, avg_duration_ms, avg_confidence }`
|
||||
|
||||
### 1.22 Model Validation
|
||||
|
||||
#### `GET /api/validation/summary`
|
||||
Latest model metric snapshot plus quality gate status.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ snapshot: { id, generated_at, lookback_window, horizon, prediction_count, win_rate, directional_accuracy, information_coefficient, rank_information_coefficient, avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector, calibration_error, brier_score, buy_win_rate, sell_win_rate, hold_win_rate, metadata }, gate_status }`
|
||||
- **Errors:** `400` — Invalid lookback or horizon value
|
||||
|
||||
#### `GET /api/validation/calibration`
|
||||
Calibration table with confidence buckets showing predicted vs observed win rates.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ buckets: [{ bucket_low, bucket_high, avg_confidence, observed_win_rate, prediction_count, miscalibrated }], lookback, horizon }`
|
||||
- Buckets: 0.50–0.60, 0.60–0.70, 0.70–0.80, 0.80–0.90, 0.90–1.00
|
||||
- `miscalibrated` is `true` when `|avg_confidence - observed_win_rate| > 0.15`
|
||||
- **Errors:** `400` — Invalid lookback or horizon value
|
||||
|
||||
#### `GET /api/validation/ic-by-horizon`
|
||||
Information Coefficient and Rank IC per prediction horizon.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
|
||||
- **Response:** `{ horizons: [{ horizon, information_coefficient, rank_information_coefficient, prediction_count, generated_at }], lookback }`
|
||||
- Horizons ordered: `1h`, `6h`, `1d`, `7d`, `30d`
|
||||
- **Errors:** `400` — Invalid lookback value
|
||||
|
||||
#### `GET /api/validation/gate-status`
|
||||
Quality gate evaluation detail from `risk_configs` where `name = 'model_quality_gate'`.
|
||||
|
||||
- **Response:** `{ gate_status, updated_at }` or `{ gate_status: null, message: "No gate evaluation found..." }`
|
||||
|
||||
### 1.23 Attribution
|
||||
|
||||
#### `GET /api/validation/attribution/sources`
|
||||
Per-source performance metrics: win rate, IC, average return, duplicate rate.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ sources[], lookback, horizon }`
|
||||
- **Errors:** `400` — Invalid lookback or horizon; `500` — Computation failed
|
||||
|
||||
#### `GET /api/validation/attribution/catalysts`
|
||||
Per-catalyst-type performance metrics: win rate, IC, average return.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ catalysts[], lookback, horizon }`
|
||||
- **Errors:** `400` — Invalid lookback or horizon; `500` — Computation failed
|
||||
|
||||
#### `GET /api/validation/attribution/layers`
|
||||
Per-signal-layer (company, macro, competitive) performance metrics.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `lookback` | string | `"30d"` | `7d`, `30d`, `90d`, `all` | Lookback window |
|
||||
| `horizon` | string | `"7d"` | `1h`, `6h`, `1d`, `7d`, `30d` | Prediction horizon |
|
||||
|
||||
- **Response:** `{ layers[], lookback, horizon }` — each layer has `avg_contribution_pct`, `dominant_win_rate`, `dominant_ic`
|
||||
- **Errors:** `400` — Invalid lookback or horizon; `500` — Computation failed
|
||||
|
||||
### 1.24 Trading Reports
|
||||
|
||||
#### `GET /api/reports`
|
||||
Paginated list of trading reports with optional filtering.
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `report_type` | string | — | `daily` or `weekly` | Filter by report type |
|
||||
| `start_date` | string | — | ISO date (YYYY-MM-DD) | Filter `period_start >= this` |
|
||||
| `end_date` | string | — | ISO date (YYYY-MM-DD) | Filter `period_end <= this` |
|
||||
| `limit` | int | `20` | max `100` | Page size |
|
||||
| `offset` | int | `0` | min `0` | Pagination offset |
|
||||
|
||||
- **Response:** Array of `{ id, report_type, period_start, period_end, validation_status, generated_at }`
|
||||
- **Errors:** `400` — Invalid `report_type` or date format
|
||||
|
||||
#### `GET /api/reports/{report_id}`
|
||||
Fetch a single report including full `report_data` JSONB.
|
||||
|
||||
- **Path params:** `report_id` (UUID string)
|
||||
- **Response:** `{ id, report_type, period_start, period_end, report_data, validation_status, generated_at, created_at }`
|
||||
- **Errors:** `404` — Report not found
|
||||
|
||||
---
|
||||
|
||||
## 2. Symbol Registry API
|
||||
@@ -756,6 +896,7 @@ List tracked companies.
|
||||
#### `GET /companies/{company_id}`
|
||||
Get a single company.
|
||||
|
||||
- **Path params:** `company_id` (UUID string)
|
||||
- **Errors:** `404` — Company not found
|
||||
|
||||
#### `PUT /companies/{company_id}`
|
||||
@@ -783,14 +924,18 @@ List aliases for a company.
|
||||
Create a new watchlist.
|
||||
|
||||
- **Body:** `{ name: string, description?: string }`
|
||||
- **Response:** `{ id, name, description, active }`
|
||||
- **Errors:** `409` — Watchlist name already exists
|
||||
|
||||
#### `GET /watchlists`
|
||||
List all watchlists.
|
||||
|
||||
- **Response:** Array of `{ id, name, description, active }`
|
||||
|
||||
#### `POST /watchlists/{watchlist_id}/members/{company_id}` (201)
|
||||
Add a company to a watchlist.
|
||||
|
||||
- **Response:** `{ status: "added" }`
|
||||
- **Errors:** `409` — Already a member; `404` — Watchlist or company not found
|
||||
|
||||
#### `GET /watchlists/{watchlist_id}/members`
|
||||
@@ -814,11 +959,14 @@ Add a data source for a company.
|
||||
| `retention_days` | int | `365` | — | Data retention period |
|
||||
| `access_policy` | string | `"internal"` | `internal`, `public`, `restricted` | Access policy |
|
||||
|
||||
- **Response:** `{ id, source_type, source_name, credibility_score, active }`
|
||||
- **Errors:** `404` — Company not found; `422` — Invalid source_type or access_policy
|
||||
|
||||
#### `GET /companies/{company_id}/sources`
|
||||
List sources for a company.
|
||||
|
||||
- **Response:** Array of `{ id, source_type, source_name, config, credibility_score, retention_days, access_policy, active }`
|
||||
|
||||
### 2.6 Exposure Profiles
|
||||
|
||||
#### `GET /companies/{company_id}/exposure`
|
||||
@@ -848,6 +996,8 @@ Create or update an exposure profile. Archives the previous active version.
|
||||
#### `GET /companies/{company_id}/exposure/history`
|
||||
Get all exposure profile versions for a company, ordered by version descending.
|
||||
|
||||
- **Response:** Array of `ExposureProfileResponse`
|
||||
|
||||
### 2.7 Competitor Relationships
|
||||
|
||||
#### `POST /companies/{company_id}/competitors` (201)
|
||||
@@ -863,10 +1013,11 @@ Create a competitor relationship. Records an audit event.
|
||||
| `bidirectional` | bool | `true` | — | Bidirectional relationship |
|
||||
| `source` | string | `"manual"` | `manual`, `inferred` | Data source |
|
||||
|
||||
- **Response:** `CompetitorRelationship { id, company_a_id, company_b_id, relationship_type, strength, bidirectional, source, active, created_at, updated_at }`
|
||||
- **Errors:** `400` — Self-reference; `404` — Company not found; `409` — Relationship already exists
|
||||
|
||||
#### `GET /companies/{company_id}/competitors`
|
||||
List active competitor relationships, enriched with ticker and legal_name of the other company.
|
||||
List active competitor relationships, enriched with `ticker` and `legal_name` of the other company. Ordered by strength descending.
|
||||
|
||||
- **Errors:** `404` — Company not found
|
||||
|
||||
@@ -879,6 +1030,7 @@ Update a competitor relationship. Records an audit event with previous state.
|
||||
#### `DELETE /companies/{company_id}/competitors/{relationship_id}`
|
||||
Soft-delete a competitor relationship (sets `active=false`). Records an audit event.
|
||||
|
||||
- **Response:** `{ status: "deleted", id }`
|
||||
- **Errors:** `404` — Active relationship not found
|
||||
|
||||
### 2.8 Competitor Inference
|
||||
@@ -923,7 +1075,7 @@ Diagnostic endpoint showing engine internals for troubleshooting.
|
||||
#### `GET /api/trading/status`
|
||||
Return current engine state.
|
||||
|
||||
- **Response:** `{ enabled, paused, risk_tier, circuit_breaker_status, active_pool, reserve_pool, portfolio_heat, open_positions, last_decision_at }`
|
||||
- **Response:** `{ enabled, paused, risk_tier, circuit_breaker_status, active_pool, reserve_pool, portfolio_heat, open_positions, open_position_count, max_open_positions, absolute_position_cap, last_decision_at }`
|
||||
- **Errors:** `503` — Engine not initialised
|
||||
|
||||
#### `PUT /api/trading/config`
|
||||
@@ -960,7 +1112,13 @@ Resume the trading engine.
|
||||
#### `POST /api/trading/reset`
|
||||
Full paper trading reset: liquidate broker positions, cancel orders, clear trading state, reset capital.
|
||||
|
||||
- **Body:** `{ initial_capital?: float (default 0.0) }` — if 0, uses broker balance or defaults to 100,000
|
||||
- **Body:** `CapitalRequest`
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `initial_capital` | float | `0.0` | If 0, uses broker balance or defaults to 100,000 |
|
||||
| `reserve_pct` | float | `null` | Reserve pool percentage (0–1). If null, uses engine config `reserve_siphon_pct` |
|
||||
|
||||
- **Response:** `{ reset: true, initial_capital, active_pool, reserve_pool, broker: { orders_cancelled, positions_closed, portfolio_value, cash, buying_power } }`
|
||||
- **Errors:** `503` — Engine not initialised; `500` — Database reset failed
|
||||
|
||||
@@ -977,6 +1135,8 @@ Return recent trading decisions from the database.
|
||||
| `limit` | int | `50` | max `200` | Page size |
|
||||
| `offset` | int | `0` | — | Pagination offset |
|
||||
|
||||
- **Response:** Array of `{ id, recommendation_id, decision, skip_reason, ticker, computed_position_size, computed_share_quantity, risk_tier_at_decision, portfolio_heat_at_decision, active_pool_at_decision, reserve_pool_at_decision, circuit_breaker_status, is_micro_trade, created_at }`
|
||||
|
||||
### 3.5 Performance Metrics
|
||||
|
||||
#### `GET /api/trading/metrics`
|
||||
@@ -992,6 +1152,8 @@ Return historical daily portfolio snapshots.
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
| `limit` | int | `30` | max `365` | Max snapshots |
|
||||
|
||||
- **Response:** Array of `{ id, snapshot_date, portfolio_value, active_pool, reserve_pool, daily_return, cumulative_return, unrealized_pnl, realized_pnl, win_count, loss_count, win_rate, sharpe_ratio, max_drawdown, current_drawdown_pct, portfolio_heat, risk_tier, created_at }`
|
||||
|
||||
### 3.6 Backtesting
|
||||
|
||||
#### `POST /api/trading/backtest`
|
||||
@@ -1012,6 +1174,7 @@ Launch a backtest run asynchronously.
|
||||
#### `GET /api/trading/backtest/{backtest_id}`
|
||||
Retrieve backtest results.
|
||||
|
||||
- **Path params:** `backtest_id` (UUID string)
|
||||
- **Response:** `{ id, start_date, end_date, initial_capital, risk_tier, config, total_return, sharpe_ratio, max_drawdown, win_rate, profit_factor, trade_count, equity_curve[], trades[], status, completed_at, created_at }`
|
||||
- Status values: `running`, `completed`, `not_found`, `pending`
|
||||
|
||||
@@ -1037,10 +1200,11 @@ Update notification preferences.
|
||||
|
||||
All fields optional.
|
||||
|
||||
- **Response:** `{ updated: { ...changed fields } }`
|
||||
- **Errors:** `503` — Engine not initialised
|
||||
|
||||
#### `GET /api/trading/notifications/history`
|
||||
Return recent notifications.
|
||||
Return recent notifications (placeholder — currently returns empty array).
|
||||
|
||||
| Parameter | Type | Default | Constraints | Description |
|
||||
|-----------|------|---------|-------------|-------------|
|
||||
@@ -1116,6 +1280,8 @@ List pending approval requests.
|
||||
#### `GET /approvals/{approval_id}`
|
||||
Get a single approval request.
|
||||
|
||||
- **Path params:** `approval_id` (UUID string)
|
||||
- **Response:** Approval request object
|
||||
- **Errors:** `404` — Approval not found; `503` — Database not ready
|
||||
|
||||
#### `POST /approvals/{approval_id}/review`
|
||||
@@ -1138,4 +1304,4 @@ Approve or reject a pending approval request.
|
||||
Expire stale approvals that have passed their expiration time.
|
||||
|
||||
- **Response:** `{ expired: int, items: [] }`
|
||||
- **Errors:** `503` — Database not ready
|
||||
- **Errors:** `503` — Database not ready
|
||||
|
||||
@@ -18,13 +18,13 @@ flowchart TB
|
||||
end
|
||||
|
||||
%% ── Scheduler ─────────────────────────────────────────────────
|
||||
scheduler["<b>Scheduler</b><br/><i>services.scheduler.app</i><br/>Cadence polling, rate limiting,<br/>backoff & stale recovery"]
|
||||
scheduler["<b>Scheduler</b><br/><i>services.scheduler.app</i><br/>Cadence polling, rate limiting,<br/>backoff, stale recovery,<br/>periodic aggregation,<br/>report scheduling"]
|
||||
|
||||
sources -.->|"API polling<br/>on cadence"| scheduler
|
||||
|
||||
%% ── Ingestion Queue ───────────────────────────────────────────
|
||||
q_ingestion[["stonks:queue:ingestion"]]
|
||||
scheduler -->|"rpush job"| q_ingestion
|
||||
scheduler -->|"rpush job<br/>(company, macro,<br/>global market)"| q_ingestion
|
||||
|
||||
%% ── Ingestion Worker ──────────────────────────────────────────
|
||||
ingestion["<b>Ingestion</b><br/><i>services.ingestion.worker</i><br/>Adapter dispatch, dedupe,<br/>raw artifact upload"]
|
||||
@@ -42,7 +42,7 @@ flowchart TB
|
||||
|
||||
%% ── Parsing Queue ─────────────────────────────────────────────
|
||||
q_parsing[["stonks:queue:parsing"]]
|
||||
ingestion -->|"rpush<br/>(news, filings,<br/>web_scrape)"| q_parsing
|
||||
ingestion -->|"rpush<br/>(news, filings,<br/>web_scrape, macro)"| q_parsing
|
||||
|
||||
%% ── Parser Worker ─────────────────────────────────────────────
|
||||
parser["<b>Parser</b><br/><i>services.parser.worker</i><br/>HTML parsing, quality scoring,<br/>company mention detection"]
|
||||
@@ -50,7 +50,7 @@ flowchart TB
|
||||
q_parsing -->|"lpop"| parser
|
||||
|
||||
minio_norm[("MinIO<br/><i>Normalized Text</i><br/><i>Parser Output JSON</i>")]
|
||||
parser -->|"upload normalized text"| minio_norm
|
||||
parser -->|"upload normalized text<br/>+ structured output"| minio_norm
|
||||
parser -->|"update document status,<br/>insert mentions"| pg_docs
|
||||
```
|
||||
|
||||
@@ -70,18 +70,23 @@ flowchart TB
|
||||
parser -->|"rpush<br/>(standard docs)"| q_extraction
|
||||
parser -->|"rpush<br/>(macro_event docs)"| q_macro
|
||||
|
||||
%% ── Scheduler Recovery ────────────────────────────────────────
|
||||
scheduler_recovery(("Scheduler<br/><i>stale recovery &<br/>failed retry</i>"))
|
||||
scheduler_recovery -.->|"re-enqueue orphaned<br/>parsed docs"| q_extraction
|
||||
scheduler_recovery -.->|"re-enqueue orphaned<br/>macro docs"| q_macro
|
||||
|
||||
%% ── Extractor Worker ──────────────────────────────────────────
|
||||
subgraph extractor_svc ["Extractor Service"]
|
||||
direction TB
|
||||
ext_main["<b>Extractor</b><br/><i>services.extractor.main</i><br/>Alternates between queues<br/>(2 extraction : 1 macro)"]
|
||||
ext_main["<b>Extractor</b><br/><i>services.extractor.main</i><br/>Alternates between queues<br/>(2 extraction : 1 macro)<br/>Token budget enforcement"]
|
||||
end
|
||||
|
||||
q_extraction -->|"lpop"| ext_main
|
||||
q_macro -->|"lpop"| ext_main
|
||||
|
||||
%% ── Ollama LLM ───────────────────────────────────────────────
|
||||
ollama["<b>Ollama</b><br/><i>LLM Inference</i><br/>document-extractor agent<br/>event-classifier agent"]
|
||||
ext_main <-->|"HTTP /api/generate"| ollama
|
||||
ollama["<b>Ollama / vLLM</b><br/><i>LLM Inference</i><br/>document-extractor agent<br/>event-classifier agent"]
|
||||
ext_main <-->|"HTTP /api/generate<br/>(AgentConfigResolver<br/>selects model + variant)"| ollama
|
||||
|
||||
%% ── Signal Layer 1: Company ───────────────────────────────────
|
||||
subgraph layer1 ["Layer 1 — Company Signals"]
|
||||
@@ -95,7 +100,7 @@ flowchart TB
|
||||
subgraph layer2 ["Layer 2 — Macro Signals"]
|
||||
direction LR
|
||||
ge["global_events"]
|
||||
mir["macro_impact_records<br/><i>per-company interpolation</i>"]
|
||||
mir["macro_impact_records<br/><i>per-company interpolation<br/>via exposure profiles</i>"]
|
||||
ge --> mir
|
||||
end
|
||||
|
||||
@@ -106,6 +111,10 @@ flowchart TB
|
||||
q_agg[["stonks:queue:aggregation"]]
|
||||
ext_main -->|"rpush<br/>(per ticker)"| q_agg
|
||||
|
||||
%% ── Scheduler Periodic Aggregation ────────────────────────────
|
||||
scheduler_agg(("Scheduler<br/><i>periodic aggregation<br/>every ~15 min</i>"))
|
||||
scheduler_agg -.->|"rpush all<br/>active tickers"| q_agg
|
||||
|
||||
%% ── Aggregation Worker ────────────────────────────────────────
|
||||
aggregation["<b>Aggregation</b><br/><i>services.aggregation.main</i><br/>Trend windows, scoring,<br/>contradiction detection"]
|
||||
|
||||
@@ -133,6 +142,8 @@ flowchart TB
|
||||
|
||||
## Recommendation → Trading → Broker
|
||||
|
||||
The recommendation worker consumes from the recommendation queue. The trading engine does **not** consume from a queue — it polls the `recommendations` table in PostgreSQL on a configurable interval, evaluates each recommendation through its decision pipeline, and pushes "act" decisions to the broker queue.
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
%% ── Recommendation Queue ──────────────────────────────────────
|
||||
@@ -144,19 +155,23 @@ flowchart TB
|
||||
|
||||
q_rec -->|"lpop"| recommendation
|
||||
|
||||
ollama_thesis["<b>Ollama</b><br/><i>thesis-rewriter agent</i><br/>(optional LLM rewrite)"]
|
||||
ollama_thesis["<b>Ollama / vLLM</b><br/><i>thesis-rewriter agent</i><br/>(AgentConfigResolver<br/>selects model + variant)"]
|
||||
recommendation <-->|"rewrite thesis<br/>(trading-eligible only)"| ollama_thesis
|
||||
|
||||
pg_recs[("PostgreSQL<br/><i>recommendations,<br/>recommendation_evidence,<br/>risk_evaluations</i>")]
|
||||
recommendation -->|"persist recommendation<br/>+ evidence + risk eval"| pg_recs
|
||||
|
||||
%% ── Lake Publication (inline) ─────────────────────────────────
|
||||
minio_rec_lake[("MinIO<br/><i>Lakehouse</i><br/>recommendation facts")]
|
||||
recommendation -->|"publish_recommendation_facts<br/>(Parquet)"| minio_rec_lake
|
||||
|
||||
%% ── Trading Engine ────────────────────────────────────────────
|
||||
subgraph trading_loop ["Trading Engine Decision Loop"]
|
||||
direction TB
|
||||
poll["Poll recommendations<br/><i>action IN (buy, sell)<br/>mode IN (paper, live)<br/>generated_at > last_poll</i>"]
|
||||
dedup_check["Redis dedup check<br/><i>stonks:dedupe:trading:*</i>"]
|
||||
evaluate["evaluate_recommendation<br/><i>Circuit breaker check<br/>Trading window check<br/>Confidence gate<br/>Sector exposure check<br/>Correlation check<br/>Earnings blackout</i>"]
|
||||
size["Position sizing<br/><i>Kelly criterion,<br/>risk tier limits</i>"]
|
||||
evaluate["evaluate_recommendation<br/><i>Circuit breaker check<br/>Trading window check<br/>Confidence gate<br/>Sector exposure check<br/>Correlation check<br/>Earnings blackout<br/>Max positions check</i>"]
|
||||
size["Position sizing<br/><i>Kelly criterion,<br/>risk tier limits,<br/>micro-trade support</i>"]
|
||||
decide{{"Decision"}}
|
||||
poll --> dedup_check --> evaluate --> size --> decide
|
||||
end
|
||||
@@ -170,22 +185,30 @@ flowchart TB
|
||||
|
||||
pg_decisions[("PostgreSQL<br/><i>trading_decisions</i>")]
|
||||
|
||||
%% ── Manual Override ───────────────────────────────────────────
|
||||
trading_api(("Trading API<br/><i>POST /override/order</i>"))
|
||||
trading_api -->|"rpush<br/>manual order"| q_broker
|
||||
|
||||
%% ── Broker Adapter ────────────────────────────────────────────
|
||||
broker["<b>Broker Adapter</b><br/><i>services.adapters.broker_service</i><br/>Risk evaluation, idempotency,<br/>order submission, fill tracking"]
|
||||
broker["<b>Broker Adapter</b><br/><i>services.adapters.broker_service</i><br/>Idempotency, risk evaluation,<br/>approval gate, order submission,<br/>fill tracking, position sync"]
|
||||
|
||||
q_broker -->|"lpop"| broker
|
||||
|
||||
%% ── Risk Engine ───────────────────────────────────────────────
|
||||
risk["<b>Risk Engine</b><br/><i>services.risk.app</i><br/>POST /evaluate<br/>Approval workflow"]
|
||||
broker <-->|"evaluate order"| risk
|
||||
risk["<b>Risk Engine</b><br/><i>services.risk.app</i><br/>evaluate_order()<br/>Position limits, sector exposure,<br/>daily loss caps, approval workflow"]
|
||||
broker -->|"evaluate order<br/>(inline call)"| risk
|
||||
|
||||
%% ── Alpaca ────────────────────────────────────────────────────
|
||||
alpaca["<b>Alpaca</b><br/><i>Paper Trading API</i><br/>Order submission,<br/>position sync"]
|
||||
broker <-->|"submit order /<br/>sync positions"| alpaca
|
||||
alpaca["<b>Alpaca</b><br/><i>Paper Trading API</i><br/>Order submission,<br/>position sync,<br/>account state"]
|
||||
broker <-->|"submit order /<br/>sync positions /<br/>sync order status"| alpaca
|
||||
|
||||
pg_orders[("PostgreSQL<br/><i>orders, order_events,<br/>positions,<br/>portfolio_snapshots</i>")]
|
||||
pg_orders[("PostgreSQL<br/><i>orders, order_events,<br/>positions,<br/>portfolio_snapshots,<br/>broker_accounts</i>")]
|
||||
broker -->|"persist order,<br/>events, positions"| pg_orders
|
||||
|
||||
%% ── Lake Publication (broker inline) ──────────────────────────
|
||||
minio_broker_lake[("MinIO<br/><i>Lakehouse</i><br/>order + fill + position facts")]
|
||||
broker -->|"publish_trade_order<br/>publish_trade_fill<br/>publish_positions_daily_batch<br/>(Parquet)"| minio_broker_lake
|
||||
|
||||
%% ── Notifications ─────────────────────────────────────────────
|
||||
subgraph notifications ["Notifications"]
|
||||
direction LR
|
||||
@@ -198,28 +221,32 @@ flowchart TB
|
||||
|
||||
## Analytical Branch — Lake Publisher
|
||||
|
||||
The lake publisher runs as a separate worker, consuming from its own queue and writing partitioned Parquet fact tables to MinIO for analytical queries.
|
||||
The lake publisher runs as a separate worker, consuming from its own queue and writing partitioned Parquet fact tables to MinIO for analytical queries. Some services (broker adapter, recommendation worker) also publish facts directly to MinIO inline, bypassing the queue.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
%% ── Lake Publish Queue ────────────────────────────────────────
|
||||
q_lake[["stonks:queue:lake_publish"]]
|
||||
|
||||
various(("Various Services<br/><i>ingestion, extractor,<br/>recommendation,<br/>broker adapter</i>"))
|
||||
various -->|"enqueue_lake_job"| q_lake
|
||||
various(("Upstream Services<br/><i>via enqueue_lake_job()</i>"))
|
||||
various -->|"rpush job<br/>(job_type + entity_id)"| q_lake
|
||||
|
||||
%% ── Lake Publisher Worker ─────────────────────────────────────
|
||||
lake["<b>Lake Publisher</b><br/><i>services.lake_publisher.jobs</i><br/>Transforms operational data<br/>into analytical facts"]
|
||||
lake["<b>Lake Publisher</b><br/><i>services.lake_publisher.jobs</i><br/>Transforms operational data<br/>into analytical facts<br/><i>15 job types supported</i>"]
|
||||
|
||||
q_lake -->|"lpop"| lake
|
||||
|
||||
pg_source[("PostgreSQL<br/><i>Operational Tables</i><br/>documents, extractions,<br/>orders, positions, events")]
|
||||
pg_source[("PostgreSQL<br/><i>Operational Tables</i><br/>documents, extractions,<br/>orders, positions, events,<br/>global_events, macro_impacts,<br/>competitive_signals")]
|
||||
lake -->|"query source data"| pg_source
|
||||
|
||||
%% ── MinIO Parquet ─────────────────────────────────────────────
|
||||
minio_lake[("MinIO<br/><i>Lakehouse Bucket</i><br/>Partitioned Parquet<br/>/year=/month=/day=")]
|
||||
lake -->|"write Parquet files"| minio_lake
|
||||
|
||||
%% ── Inline Publishers ─────────────────────────────────────────
|
||||
inline(("Inline Publishers<br/><i>broker adapter,<br/>recommendation worker</i>"))
|
||||
inline -->|"publish_* functions<br/>(direct Parquet write)"| minio_lake
|
||||
|
||||
%% ── Trino ─────────────────────────────────────────────────────
|
||||
trino["<b>Trino</b><br/><i>SQL Query Engine</i><br/>Hive connector → MinIO"]
|
||||
minio_lake -->|"read via<br/>Hive Metastore"| trino
|
||||
@@ -238,18 +265,40 @@ flowchart LR
|
||||
query_api --> dashboard
|
||||
```
|
||||
|
||||
## Report Generation
|
||||
|
||||
The scheduler manages report generation as a sub-loop, enqueuing daily and weekly report jobs to a dedicated queue and consuming them inline.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
scheduler["<b>Scheduler</b><br/><i>report schedule check</i><br/>daily @ 16:30 ET<br/>weekly @ Saturday"]
|
||||
|
||||
q_report[["stonks:queue:report_generation"]]
|
||||
scheduler -->|"rpush<br/>(daily/weekly)"| q_report
|
||||
|
||||
scheduler_consumer["<b>Scheduler</b><br/><i>report consumer loop</i><br/>pops up to 5 jobs/cycle"]
|
||||
q_report -->|"lpop"| scheduler_consumer
|
||||
|
||||
generator["<b>Report Generator</b><br/><i>services.reporting.generator</i>"]
|
||||
scheduler_consumer -->|"process_report_job()"| generator
|
||||
|
||||
pg_reports[("PostgreSQL<br/><i>trading_reports</i>")]
|
||||
generator -->|"persist report"| pg_reports
|
||||
```
|
||||
|
||||
## Complete Queue Topology
|
||||
|
||||
| Queue | Full Key | Producer(s) | Consumer |
|
||||
|-------|----------|-------------|----------|
|
||||
| Ingestion | `stonks:queue:ingestion` | Scheduler | Ingestion Worker |
|
||||
| Parsing | `stonks:queue:parsing` | Ingestion Worker | Parser Worker |
|
||||
| Extraction | `stonks:queue:extraction` | Parser (standard docs) | Extractor Worker |
|
||||
| Macro Classification | `stonks:queue:macro_classification` | Parser (macro_event docs), Scheduler | Extractor Worker |
|
||||
| Aggregation | `stonks:queue:aggregation` | Extractor Worker | Aggregation Worker |
|
||||
| Recommendation | `stonks:queue:recommendation` | Aggregation Worker | Recommendation Worker |
|
||||
| Broker Orders | `stonks:queue:broker_orders` | Trading Engine, Trading API (manual overrides) | Broker Adapter |
|
||||
| Lake Publish | `stonks:queue:lake_publish` | Various services | Lake Publisher |
|
||||
| Ingestion | `stonks:queue:ingestion` | Scheduler (company, macro, global market sources) | Ingestion Worker |
|
||||
| Parsing | `stonks:queue:parsing` | Ingestion Worker (news, filings, web_scrape, macro) | Parser Worker |
|
||||
| Extraction | `stonks:queue:extraction` | Parser (standard docs), Scheduler (stale recovery) | Extractor Worker |
|
||||
| Macro Classification | `stonks:queue:macro_classification` | Parser (macro_event docs), Scheduler (stale/failed recovery) | Extractor Worker |
|
||||
| Aggregation | `stonks:queue:aggregation` | Extractor Worker (per ticker), Scheduler (periodic, all tickers) | Aggregation Worker |
|
||||
| Recommendation | `stonks:queue:recommendation` | Aggregation Worker (ticker + window, 5 min dedup TTL) | Recommendation Worker |
|
||||
| Broker Orders | `stonks:queue:broker_orders` | Trading Engine (act decisions), Trading API (manual overrides) | Broker Adapter |
|
||||
| Lake Publish | `stonks:queue:lake_publish` | Various services (via `enqueue_lake_job()`) | Lake Publisher |
|
||||
| Report Generation | `stonks:queue:report_generation` | Scheduler (daily/weekly triggers) | Scheduler (inline consumer) |
|
||||
|
||||
Dead-letter queues follow the pattern `stonks:dlq:<queue_name>` and are populated when a job exhausts its retry budget.
|
||||
|
||||
@@ -257,18 +306,25 @@ Dead-letter queues follow the pattern `stonks:dlq:<queue_name>` and are populate
|
||||
|
||||
| Store | Role | Key Tables / Buckets |
|
||||
|-------|------|---------------------|
|
||||
| **PostgreSQL** | Structured operational data | `documents`, `document_intelligence`, `document_impact_records`, `global_events`, `macro_impact_records`, `competitive_signal_records`, `trend_windows`, `trend_history`, `trend_projections`, `recommendations`, `recommendation_evidence`, `risk_evaluations`, `orders`, `order_events`, `positions`, `portfolio_snapshots`, `trading_decisions` |
|
||||
| **Redis** | Queues, dedup markers, rate limits, circuit breaker state | `stonks:queue:*`, `stonks:dedupe:*`, `stonks:ratelimit:*`, `stonks:trading:circuit_breaker:*`, `stonks:dlq:*` |
|
||||
| **MinIO** | Object storage for raw artifacts, normalized text, and analytical Parquet files | Raw artifacts bucket, normalized text bucket, lakehouse bucket (partitioned Parquet) |
|
||||
| **PostgreSQL** | Structured operational data | `documents`, `document_intelligence`, `document_impact_records`, `document_company_mentions`, `global_events`, `macro_impact_records`, `exposure_profiles`, `competitive_signal_records`, `competitor_relationships`, `trend_windows`, `trend_history`, `trend_projections`, `recommendations`, `recommendation_evidence`, `risk_evaluations`, `orders`, `order_events`, `positions`, `portfolio_snapshots`, `trading_decisions`, `circuit_breaker_events`, `reserve_pool_ledger`, `risk_tier_history`, `broker_accounts`, `ingestion_runs`, `sources`, `companies`, `company_aliases`, `ai_agents`, `agent_variants`, `agent_performance_log`, `risk_configs`, `trading_reports` |
|
||||
| **Redis** | Queues, dedup markers, rate limits, circuit breaker state, pipeline toggle | `stonks:queue:*` (9 queues), `stonks:dedupe:*`, `stonks:dedupe:trading:*`, `stonks:ratelimit:*`, `stonks:trading:circuit_breaker:*`, `stonks:trading:notification_rate:*`, `stonks:order_idempotency:*`, `stonks:lock:*`, `stonks:cache:*`, `stonks:retry:*`, `stonks:rec_dedup:*`, `stonks:pipeline:enabled`, `stonks:dlq:*` |
|
||||
| **MinIO** | Object storage for raw artifacts, normalized text, and analytical Parquet files | Raw artifacts bucket, normalized text bucket, parser output bucket, lakehouse bucket (partitioned Parquet: documents, extractions, market bars/quotes, orders, fills, positions, PnL, global events, macro impacts, trend projections, competitive signals, competitor relationships, recommendations) |
|
||||
|
||||
## External Integration Points
|
||||
|
||||
| Integration | Service | Protocol | Purpose |
|
||||
|-------------|---------|----------|---------|
|
||||
| **Polygon.io** | Ingestion (via adapters) | HTTPS REST | News articles, market bars, grouped daily data |
|
||||
| **SEC EDGAR** | Ingestion (via FilingsDataAdapter) | HTTPS REST | 10-K, 10-Q filings |
|
||||
| **Ollama** | Extractor, Recommendation | HTTP `/api/generate` | LLM inference for document extraction, event classification, thesis rewriting |
|
||||
| **Alpaca** | Broker Adapter | HTTPS REST | Paper trading order submission, position sync, account state |
|
||||
| **Polygon.io** | Ingestion (via PolygonNewsAdapter, PolygonMarketAdapter) | HTTPS REST | News articles, market bars, grouped daily data, intraday bars |
|
||||
| **SEC EDGAR** | Ingestion (via SECEdgarAdapter) | HTTPS REST | 10-K, 10-Q filings |
|
||||
| **Macro News** | Ingestion (via MacroNewsAdapter) | HTTPS REST | Geopolitical and economic event articles |
|
||||
| **Ollama / vLLM** | Extractor, Recommendation | HTTP `/api/generate` | LLM inference for document extraction (document-extractor agent), event classification (event-classifier agent), thesis rewriting (thesis-rewriter agent). Model and variant selected via `AgentConfigResolver` with 60s TTL cache. |
|
||||
| **Alpaca** | Broker Adapter | HTTPS REST | Paper/live trading: order submission, position sync, account state, order status polling |
|
||||
| **AWS SNS** | Trading Engine (notifications) | boto3 SDK | SMS alerts for circuit breaker trips, order fills, stop-loss triggers |
|
||||
| **Gmail** | Trading Engine (notifications) | SMTP (port 587 STARTTLS) | Email alerts for trading events |
|
||||
| **Trino** | Query API, Superset | JDBC / HTTP | SQL queries over lakehouse Parquet files |
|
||||
| **Trino** | Query API, Superset | HTTP | SQL queries over lakehouse Parquet files via Hive Metastore |
|
||||
|
||||
## Pipeline Toggle
|
||||
|
||||
The pipeline can be paused globally via the Redis key `stonks:pipeline:enabled`. When set to `"0"`, all queue workers (ingestion, parser, extractor, aggregation, recommendation, broker adapter, lake publisher) enter a sleep loop and stop processing jobs. The scheduler also skips scheduling cycles when the toggle is off. The toggle can be set via the Query API's pipeline control endpoints.
|
||||
|
||||
Setting `PIPELINE_DEFAULT_OFF=true` on the scheduler initializes the toggle to OFF on first boot, useful for staged deployments where you want to verify infrastructure before enabling the pipeline.
|
||||
|
||||
@@ -53,7 +53,7 @@ graph TB
|
||||
subgraph trading_tier ["Trading Tier"]
|
||||
direction LR
|
||||
trading_engine["trading-engine<br/><i>docker/Dockerfile</i><br/><i>uvicorn services.trading.app</i><br/>host :8002 → :8000"]
|
||||
risk_engine["risk-engine<br/><i>docker/Dockerfile</i><br/><i>uvicorn services.risk.app</i><br/>host :8003 → :8000"]
|
||||
risk_engine["risk-engine<br/><i>docker/Dockerfile</i><br/><i>uvicorn services.risk.app</i><br/>host :8003 → :8000<br/><i>alias: risk</i>"]
|
||||
broker_adapter["broker-adapter<br/><i>docker/Dockerfile</i><br/><i>python -m services.adapters.broker_service</i><br/><i>no host port</i>"]
|
||||
end
|
||||
|
||||
@@ -320,3 +320,4 @@ All containers share the default Docker Compose network. Services reference each
|
||||
| `hive-metastore` | Hive Metastore container | trino (thrift://hive-metastore:9083) |
|
||||
| `trino` | Trino container | superset (trino:8080) |
|
||||
| `query-api` | Query API container | dashboard (nginx proxy upstream) |
|
||||
| `risk` | risk-engine container (network alias) | trading-engine (risk evaluation calls) |
|
||||
|
||||
@@ -11,7 +11,7 @@ graph TB
|
||||
%% ── External traffic ──────────────────────────────────────────
|
||||
internet((Internet))
|
||||
|
||||
subgraph traefik ["kube-system (Traefik Ingress Controller)"]
|
||||
subgraph traefik ["kube-system · Traefik Ingress Controller"]
|
||||
direction LR
|
||||
ing_dash["stonks.celestium.life"]
|
||||
ing_api["stonks-api.celestium.life"]
|
||||
@@ -28,47 +28,55 @@ graph TB
|
||||
direction TB
|
||||
|
||||
%% ── API Tier (ingress-facing) ─────────────────────────────
|
||||
subgraph api_tier ["API Tier"]
|
||||
subgraph api_tier ["API Tier · tier: api"]
|
||||
direction LR
|
||||
query_api["query-api<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
||||
symbol_registry["symbol-registry<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
||||
query_api["query-api<br/><i>Deployment · 1 replica</i><br/>:8000<br/><i>readiness: /docs</i>"]
|
||||
symbol_registry["symbol-registry<br/><i>Deployment · 1 replica</i><br/>:8000<br/><i>readiness: /docs · liveness: /docs</i>"]
|
||||
end
|
||||
|
||||
%% ── Frontend Tier ─────────────────────────────────────────
|
||||
subgraph frontend_tier ["Frontend Tier"]
|
||||
dashboard["dashboard<br/><i>Deployment (1 replica)</i><br/>:8080<br/><i>nginx-unprivileged</i>"]
|
||||
subgraph frontend_tier ["Frontend Tier · tier: frontend"]
|
||||
dashboard["dashboard<br/><i>Deployment · 1 replica</i><br/>:8080<br/><i>nginx-unprivileged</i><br/><i>readiness: / · liveness: /</i>"]
|
||||
end
|
||||
|
||||
%% ── Trading Tier ──────────────────────────────────────────
|
||||
subgraph trading_tier ["Trading Tier"]
|
||||
subgraph trading_tier ["Trading Tier · tier: trading"]
|
||||
direction LR
|
||||
trading_engine["trading-engine<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
||||
risk_engine["risk-engine<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
||||
broker_adapter["broker-adapter<br/><i>Deployment (1 replica)</i><br/><i>queue-driven worker</i>"]
|
||||
trading_engine["trading-engine<br/><i>Deployment · 1 replica</i><br/>:8000<br/><i>readiness: /ready · liveness: /health</i>"]
|
||||
risk_engine["risk-engine<br/><i>Deployment · 1 replica</i><br/>:8000"]
|
||||
broker_adapter["broker-adapter<br/><i>Deployment · 1 replica</i><br/><i>queue-driven worker · pipeline-gated</i>"]
|
||||
end
|
||||
|
||||
%% ── Orchestration Tier ────────────────────────────────────
|
||||
subgraph orchestration_tier ["Orchestration Tier"]
|
||||
scheduler["scheduler<br/><i>Deployment (1 replica)</i><br/><i>runs migrations + seed</i>"]
|
||||
subgraph orchestration_tier ["Orchestration Tier · tier: orchestration"]
|
||||
scheduler["scheduler<br/><i>Deployment · 1 replica · pipeline-gated</i><br/><i>init: migrations → seed → backfill</i>"]
|
||||
end
|
||||
|
||||
%% ── Ingestion Tier ────────────────────────────────────────
|
||||
subgraph ingestion_tier ["Ingestion Tier · tier: ingestion"]
|
||||
ingestion["ingestion<br/><i>Deployment · 1 replica · pipeline-gated</i><br/><i>queue-driven worker</i>"]
|
||||
end
|
||||
|
||||
%% ── Processing Tier (pipeline workers) ────────────────────
|
||||
subgraph processing_tier ["Processing Tier (pipeline workers)"]
|
||||
subgraph processing_tier ["Processing Tier · tier: processing"]
|
||||
direction LR
|
||||
ingestion["ingestion<br/><i>Deployment (2 replicas)</i>"]
|
||||
parser["parser<br/><i>Deployment (2 replicas)</i>"]
|
||||
extractor["extractor<br/><i>Deployment (1 replica)</i>"]
|
||||
aggregation["aggregation<br/><i>Deployment (4 replicas)</i>"]
|
||||
recommendation["recommendation<br/><i>Deployment (1 replica)</i>"]
|
||||
parser["parser<br/><i>Deployment · 2 replicas · pipeline-gated</i>"]
|
||||
extractor["extractor<br/><i>Deployment · 1 replica · pipeline-gated</i>"]
|
||||
aggregation["aggregation<br/><i>Deployment · 4 replicas · pipeline-gated</i>"]
|
||||
recommendation["recommendation<br/><i>Deployment · 1 replica · pipeline-gated</i>"]
|
||||
end
|
||||
|
||||
%% ── Analytics Tier ────────────────────────────────────────
|
||||
subgraph analytics_tier ["Analytics Tier"]
|
||||
subgraph analytics_tier ["Analytics Tier · tier: analytics"]
|
||||
direction LR
|
||||
lake_publisher["lake-publisher<br/><i>Deployment (1 replica)</i><br/><i>queue-driven worker</i>"]
|
||||
hive_metastore["hive-metastore<br/><i>Deployment (1 replica)</i><br/>:9083<br/><i>apache/hive:4.0.0</i>"]
|
||||
trino["trino<br/><i>Deployment (1 replica)</i><br/>:8080<br/><i>trinodb/trino:latest</i>"]
|
||||
superset["superset<br/><i>Deployment (1 replica)</i><br/>:8088<br/><i>custom image</i>"]
|
||||
lake_publisher["lake-publisher<br/><i>Deployment · 1 replica · pipeline-gated</i><br/><i>queue-driven worker</i>"]
|
||||
hive_metastore["hive-metastore<br/><i>Deployment · 1 replica</i><br/>:9083<br/><i>apache/hive:4.0.0</i><br/><i>PVC: hive-metastore-data</i>"]
|
||||
trino["trino<br/><i>Deployment · 1 replica</i><br/>:8080<br/><i>trinodb/trino:latest</i><br/><i>readiness: /v1/info</i>"]
|
||||
end
|
||||
|
||||
%% ── Superset (tier: dashboard in template) ────────────────
|
||||
subgraph superset_block ["Superset · tier: dashboard"]
|
||||
superset["superset<br/><i>Deployment · 1 replica</i><br/>:8088<br/><i>custom image</i><br/><i>PVC: superset-data</i><br/><i>readiness: /health</i>"]
|
||||
end
|
||||
|
||||
%% ── Helm Secrets ──────────────────────────────────────────
|
||||
@@ -99,7 +107,7 @@ graph TB
|
||||
end
|
||||
|
||||
subgraph ollama_ns ["ollama-service namespace"]
|
||||
ollama[("Ollama<br/>ollama:11434<br/><i>GPU: 4070 Ti Super</i>")]
|
||||
ollama[("Ollama<br/>ollama:11434<br/><i>GPU: 4070 Ti Super 16GB</i>")]
|
||||
end
|
||||
|
||||
%% ── Ingress Routes ────────────────────────────────────────────
|
||||
@@ -191,6 +199,7 @@ graph TB
|
||||
sec_broker -.-> broker_adapter
|
||||
|
||||
sec_market -.-> ingestion
|
||||
sec_market -.-> query_api
|
||||
|
||||
sec_gmail -.-> trading_engine
|
||||
|
||||
@@ -216,7 +225,9 @@ graph TB
|
||||
classDef tradingSvc fill:#e8a838,stroke:#b07d1a,color:#fff
|
||||
classDef processSvc fill:#9b59b6,stroke:#6c3483,color:#fff
|
||||
classDef orchSvc fill:#1abc9c,stroke:#148f77,color:#fff
|
||||
classDef ingestionSvc fill:#e67e22,stroke:#bf6516,color:#fff
|
||||
classDef analyticsSvc fill:#e74c3c,stroke:#a93226,color:#fff
|
||||
classDef supersetSvc fill:#c0392b,stroke:#96281b,color:#fff
|
||||
classDef extSvc fill:#95a5a6,stroke:#717d7e,color:#fff
|
||||
classDef secretSvc fill:#f5f5dc,stroke:#999,color:#333
|
||||
classDef configSvc fill:#dfe6e9,stroke:#999,color:#333
|
||||
@@ -225,8 +236,10 @@ graph TB
|
||||
class dashboard frontendSvc
|
||||
class trading_engine,risk_engine,broker_adapter tradingSvc
|
||||
class scheduler orchSvc
|
||||
class ingestion,parser,extractor,aggregation,recommendation processSvc
|
||||
class lake_publisher,hive_metastore,trino,superset analyticsSvc
|
||||
class ingestion ingestionSvc
|
||||
class parser,extractor,aggregation,recommendation processSvc
|
||||
class lake_publisher,hive_metastore,trino analyticsSvc
|
||||
class superset supersetSvc
|
||||
class postgres,redis,minio,ollama extSvc
|
||||
class sec_core,sec_broker,sec_market,sec_gmail,sec_dashboard secretSvc
|
||||
class configmap configSvc
|
||||
@@ -284,8 +297,8 @@ The following services have **no inbound network policy** — they are queue-dri
|
||||
|
||||
| Service | Tier | Behavior |
|
||||
|---------|------|----------|
|
||||
| scheduler | orchestration | Polls DB, enqueues to Redis |
|
||||
| ingestion | processing | Reads from `stonks:queue:ingestion`, writes to DB/MinIO/Redis |
|
||||
| scheduler | orchestration | Polls DB, enqueues to Redis. Runs migrations + seed + backfill as init containers |
|
||||
| ingestion | ingestion | Reads from `stonks:queue:ingestion`, writes to DB/MinIO/Redis. Egress to Polygon.io/News APIs |
|
||||
| parser | processing | Reads from `stonks:queue:parsing`, writes to DB/Redis |
|
||||
| extractor | processing | Reads from `stonks:queue:extraction`, calls Ollama, writes to DB/Redis |
|
||||
| aggregation | processing | Reads from `stonks:queue:aggregation`, writes to DB/Redis |
|
||||
@@ -294,22 +307,24 @@ The following services have **no inbound network policy** — they are queue-dri
|
||||
|
||||
## Service Tier Summary
|
||||
|
||||
| Tier | Services | Ingress? | Replicas | Notes |
|
||||
|------|----------|----------|----------|-------|
|
||||
| **api** | query-api, symbol-registry | Yes (Traefik) | 1 each | FastAPI, readiness probes on `/docs` |
|
||||
| **frontend** | dashboard | Yes (Traefik) | 1 | nginx-unprivileged on :8080, proxies to API services |
|
||||
| **trading** | trading-engine, risk-engine, broker-adapter | trading-engine: Yes; risk-engine: internal only; broker-adapter: denied | 1 each | trading-engine has egress to Alpaca + Gmail |
|
||||
| **orchestration** | scheduler | No | 1 | Runs DB migrations + seed as init containers |
|
||||
| **processing** | ingestion, parser, extractor, aggregation, recommendation | No | 2, 2, 1, 4, 1 | Pipeline-gated by `pipelineEnabled` toggle |
|
||||
| **analytics** | lake-publisher, trino, hive-metastore, superset | trino + superset: Yes; others: No | 1 each | lake-publisher is pipeline-gated |
|
||||
| Tier | Services | Ingress? | Replicas | Pipeline-Gated? | Notes |
|
||||
|------|----------|----------|----------|-----------------|-------|
|
||||
| **api** | query-api, symbol-registry | Yes (Traefik) | 1 each | No | FastAPI, readiness probes on `/docs` |
|
||||
| **frontend** | dashboard | Yes (Traefik) | 1 | No | nginx-unprivileged on :8080, proxies to API services |
|
||||
| **trading** | trading-engine, risk-engine, broker-adapter | trading-engine: Yes; risk-engine: internal only; broker-adapter: denied | 1 each | broker-adapter only | trading-engine has egress to Alpaca + Gmail |
|
||||
| **orchestration** | scheduler | No | 1 | Yes | Runs DB migrations + seed + backfill as init containers |
|
||||
| **ingestion** | ingestion | No | 1 | Yes | Fetches from external APIs (Polygon.io, news, filings) |
|
||||
| **processing** | parser, extractor, aggregation, recommendation | No | 2, 1, 4, 1 | Yes | Queue-driven pipeline workers |
|
||||
| **analytics** | lake-publisher, trino, hive-metastore | trino: Yes (Traefik); others: No | 1 each | lake-publisher only | trino + hive-metastore gated by `trino.enabled` / `hiveMetastore.enabled` |
|
||||
| **dashboard** (Superset) | superset | Yes (Traefik) | 1 | No | Gated by `superset.enabled`, custom image with trino + psycopg2 drivers |
|
||||
|
||||
## Secret Consumption Map
|
||||
|
||||
| Secret | Keys | Consumers |
|
||||
|--------|------|-----------|
|
||||
| `stonks-core-secrets` | POSTGRES_PASSWORD, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, REDIS_PASSWORD | All 13 app services + hive-metastore, trino, superset |
|
||||
| `stonks-core-secrets` | POSTGRES_PASSWORD, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, REDIS_PASSWORD | All 13 app services + hive-metastore (init), trino (init), superset |
|
||||
| `stonks-broker-secrets` | BROKER_API_KEY, BROKER_API_SECRET, BROKER_BASE_URL | ingestion, trading-engine, risk-engine, broker-adapter |
|
||||
| `stonks-market-secrets` | MARKET_DATA_API_KEY | ingestion |
|
||||
| `stonks-market-secrets` | MARKET_DATA_API_KEY | ingestion, query-api |
|
||||
| `stonks-gmail-secrets` | GMAIL_SENDER, GMAIL_RECIPIENT, GMAIL_APP_PASSWORD | trading-engine |
|
||||
| `stonks-dashboard-secrets` | SUPERSET_SECRET_KEY, SUPERSET_ADMIN_PASSWORD | superset |
|
||||
|
||||
@@ -336,10 +351,10 @@ These services run outside the `stonks-oracle` namespace and are referenced via
|
||||
|
||||
The analytics stack runs within the `stonks-oracle` namespace:
|
||||
|
||||
1. **Lake Publisher** writes Parquet fact tables to MinIO at `s3a://stonks-lakehouse/warehouse`
|
||||
2. **Hive Metastore** (Apache Hive 4.0.0) manages table metadata, backed by embedded Derby DB with a PVC for persistence. Connects to MinIO for S3A filesystem access.
|
||||
3. **Trino** queries the lakehouse via Hive Metastore (thrift://hive-metastore:9083). Exposes two catalogs: `lakehouse` (Hive connector) and `iceberg` (Iceberg connector). Both connect to MinIO for data access.
|
||||
4. **Superset** connects to Trino for lakehouse queries and to PostgreSQL for its metadata DB. Uses Redis for caching. Exposed externally via Traefik ingress.
|
||||
1. **Lake Publisher** writes Parquet fact tables to MinIO at `s3a://stonks-lakehouse/warehouse`. Pipeline-gated — scales to 0 when `pipelineEnabled: false`.
|
||||
2. **Hive Metastore** (Apache Hive 4.0.0) manages table metadata, backed by embedded Derby DB with a PVC (`hive-metastore-data`) for persistence. Connects to MinIO for S3A filesystem access. Gated by `hiveMetastore.enabled`.
|
||||
3. **Trino** queries the lakehouse via Hive Metastore (`thrift://hive-metastore:9083`). Exposes two catalogs: `lakehouse` (Hive connector) and `iceberg` (Iceberg connector). Both connect to MinIO for data access. Gated by `trino.enabled`. Readiness probe on `/v1/info`.
|
||||
4. **Superset** connects to Trino for lakehouse queries and to PostgreSQL for its metadata DB. Uses Redis for caching. Exposed externally via Traefik ingress. Gated by `superset.enabled`. Uses custom image (`registry.celestium.life/stonks-oracle/superset:latest`) with trino + psycopg2 drivers. PVC (`superset-data`) for persistence.
|
||||
|
||||
## Ingress Routes
|
||||
|
||||
@@ -353,3 +368,13 @@ All ingress resources use the `traefik` IngressClass with TLS certificates issue
|
||||
| `stonks-trading.celestium.life` | trading-engine | 8000 | `stonks-trading-tls` |
|
||||
| `stonks-dash.celestium.life` | superset | 8088 | `stonks-dash-tls` |
|
||||
| `stonks-trino.celestium.life` | trino | 8080 | `stonks-trino-tls` |
|
||||
|
||||
## Deployment Stages
|
||||
|
||||
The Helm chart supports multiple deployment stages via value override files:
|
||||
|
||||
| Stage | Override File | Namespace | Key Differences |
|
||||
|-------|--------------|-----------|-----------------|
|
||||
| **Production** | `values.yaml` (base) | `stonks-oracle` | Full analytics stack, all services |
|
||||
| **Paper** | `values-paper.yaml` | `stonks-oracle` | `BROKER_MODE=paper`, `DEPLOY_STAGE=paper`, separate DB (`stonks_paper`), Redis DB 2, paper-specific ingress hostnames |
|
||||
| **Beta** | `values-beta.yaml` | `stonks-oracle-beta` | `DEPLOY_STAGE=beta`, `LOG_LEVEL=DEBUG`, separate DB (`stonks_beta`), Redis DB 1, analytics stack disabled, beta-specific ingress hostnames |
|
||||
|
||||
+284
-15
@@ -5,6 +5,7 @@ This guide covers running the full Stonks Oracle platform locally using Docker C
|
||||
## Prerequisites
|
||||
|
||||
- Docker Engine 24+ and Docker Compose v2
|
||||
- NVIDIA GPU with drivers and NVIDIA Container Toolkit (for Ollama LLM inference)
|
||||
- At least 16 GB RAM (Ollama + Trino + all services)
|
||||
- API keys for Polygon.io and Alpaca (optional — platform runs in degraded mode without them)
|
||||
|
||||
@@ -14,20 +15,54 @@ This guide covers running the full Stonks Oracle platform locally using Docker C
|
||||
# 1. Clone the repository
|
||||
git clone <repo-url> && cd stonks-oracle
|
||||
|
||||
# 2. Configure API keys
|
||||
cp .env.example .env # or edit the existing .env
|
||||
# Fill in MARKET_DATA_API_KEY, BROKER_API_KEY, BROKER_API_SECRET
|
||||
# 2. Configure API keys (create .env in the repo root)
|
||||
cat > .env <<'EOF'
|
||||
MARKET_DATA_API_KEY=your_polygon_key
|
||||
BROKER_API_KEY=your_alpaca_key
|
||||
BROKER_API_SECRET=your_alpaca_secret
|
||||
BROKER_BASE_URL=https://paper-api.alpaca.markets
|
||||
EOF
|
||||
|
||||
# 3. Start everything
|
||||
docker compose up -d
|
||||
|
||||
# 4. Verify all services are healthy
|
||||
# 4. Pull an LLM model into Ollama
|
||||
docker compose exec ollama ollama pull qwen3.5:9b-fast
|
||||
|
||||
# 5. Seed the database
|
||||
docker compose exec scheduler python -m services.symbol_registry.seed
|
||||
|
||||
# 6. Verify all services are healthy
|
||||
docker compose ps
|
||||
|
||||
# 5. Access the dashboard
|
||||
# 7. Access the dashboard
|
||||
open http://localhost:3000
|
||||
```
|
||||
|
||||
### Automated Deployment
|
||||
|
||||
The `deploy-docker.sh` script automates the full deployment to a remote host via SSH, including prerequisite installation, repository sync, environment configuration, image builds, service startup, database seeding, and Ollama model pulling:
|
||||
|
||||
```bash
|
||||
# Deploy with defaults (GPU-accelerated Docker Ollama)
|
||||
bash deploy-docker.sh
|
||||
|
||||
# Specify a custom Ollama model
|
||||
bash deploy-docker.sh --ollama-model qwen3.6
|
||||
|
||||
# Deploy to a different host
|
||||
bash deploy-docker.sh --host user@myserver --dir /opt/stonks
|
||||
```
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--host` | `celes@192.168.42.254` | SSH target (`USER@HOST`) |
|
||||
| `--ollama-url` | (auto — Docker container) | Ollama API URL |
|
||||
| `--ollama-model` | `qwen3.5:9b-fast` | Ollama model to pull |
|
||||
| `--dir` | `~/stonks-oracle` | Remote install directory |
|
||||
|
||||
The script detects the target OS and package manager (apt, dnf, yum, pacman, zypper) and installs Docker, NVIDIA drivers, and the NVIDIA Container Toolkit as needed. It also handles WSL environments and firewall configuration.
|
||||
|
||||
---
|
||||
|
||||
## Service Inventory
|
||||
@@ -63,6 +98,8 @@ open http://localhost:3000
|
||||
| `query-api` | `docker/Dockerfile` | `uvicorn services.api.app:app --host 0.0.0.0 --port 8000` | `8004:8000` | postgres (healthy), redis (healthy), minio (healthy) |
|
||||
| `dashboard` | `frontend/Dockerfile` | nginx (built-in) | `3000:8080` | query-api (healthy) |
|
||||
|
||||
The `risk-engine` service has a Docker network alias of `risk` so the dashboard's nginx reverse proxy can resolve it as `http://risk:8000`.
|
||||
|
||||
### Port Summary
|
||||
|
||||
| Port | Service | Protocol |
|
||||
@@ -109,15 +146,27 @@ The `.env` file is loaded by `ingestion`, `broker-adapter`, and `trading-engine`
|
||||
|
||||
```dotenv
|
||||
# Stonks Oracle — Environment Variables
|
||||
# These are loaded by ingestion, broker-adapter, and trading-engine services.
|
||||
# Loaded by: ingestion, broker-adapter, trading-engine
|
||||
|
||||
# Polygon.io market data API key (required for live data ingestion)
|
||||
# ── Required for live data ingestion ──
|
||||
MARKET_DATA_API_KEY=
|
||||
|
||||
# Alpaca broker credentials (required for paper/live trading)
|
||||
# ── Required for paper/live trading ──
|
||||
BROKER_API_KEY=
|
||||
BROKER_API_SECRET=
|
||||
BROKER_BASE_URL=https://paper-api.alpaca.markets
|
||||
|
||||
# ── Trading engine settings (optional) ──
|
||||
TRADING_ENABLED=true
|
||||
TRADING_RISK_TIER=moderate
|
||||
TRADING_MAX_OPEN_POSITIONS=15
|
||||
|
||||
# ── LLM model (optional) ──
|
||||
OLLAMA_MODEL=qwen3.5:9b-fast
|
||||
|
||||
# ── Signal layers (optional) ──
|
||||
MACRO_ENABLED=true
|
||||
COMPETITIVE_ENABLED=true
|
||||
```
|
||||
|
||||
| Variable | Required | Default | Used By | Description |
|
||||
@@ -181,10 +230,21 @@ All application services support additional environment variables loaded via `se
|
||||
| `OLLAMA_MODEL` | `qwen3.5:9b` | Default LLM model for extraction |
|
||||
| `OLLAMA_TIMEOUT` | `120` | Ollama request timeout (seconds) |
|
||||
| `OLLAMA_MAX_RETRIES` | `2` | Max retries for Ollama requests |
|
||||
| `OLLAMA_RETRY_BASE_DELAY` | `1.0` | Base delay between retries (seconds) |
|
||||
| `OLLAMA_RETRY_MAX_DELAY` | `10.0` | Maximum delay between retries (seconds) |
|
||||
| `OLLAMA_RETRY_BACKOFF_MULTIPLIER` | `2.0` | Backoff multiplier for retries |
|
||||
| `VLLM_BASE_URL` | `http://192.168.42.254:8000` | vLLM server URL (if using vLLM instead of Ollama) |
|
||||
| `VLLM_MODEL` | `RedHatAI/Qwen3.6-35B-A3B-NVFP4` | vLLM model name |
|
||||
| `VLLM_TIMEOUT` | `120` | vLLM request timeout (seconds) |
|
||||
| `VLLM_MAX_RETRIES` | `2` | Max retries for vLLM requests |
|
||||
| `VLLM_TEMPERATURE` | `0.7` | vLLM sampling temperature |
|
||||
| `VLLM_MAX_TOKENS` | `4096` | vLLM max output tokens |
|
||||
| `VLLM_API_KEY` | (empty) | vLLM API key (if required) |
|
||||
| `TRINO_HOST` | `localhost` | Trino hostname |
|
||||
| `TRINO_PORT` | `8080` | Trino port |
|
||||
| `TRINO_CATALOG` | `lakehouse` | Trino catalog name |
|
||||
| `TRINO_SCHEMA` | `stonks` | Trino schema name |
|
||||
| `TRINO_ICEBERG_CATALOG` | `iceberg` | Trino Iceberg catalog name |
|
||||
| `MARKET_DATA_BASE_URL` | `https://api.polygon.io` | Polygon.io base URL |
|
||||
| `MARKET_DATA_PROVIDER` | `polygon` | Market data provider |
|
||||
| `BROKER_MODE` | `paper` | Broker mode: `paper` or `live` |
|
||||
@@ -193,8 +253,59 @@ All application services support additional environment variables loaded via `se
|
||||
| `TRADING_RISK_TIER` | `moderate` | Risk tier: `conservative`, `moderate`, `aggressive` |
|
||||
| `TRADING_POLLING_INTERVAL_SECONDS` | `60` | Recommendation polling interval |
|
||||
| `TRADING_MAX_OPEN_POSITIONS` | `10` | Maximum concurrent open positions |
|
||||
| `TRADING_RESERVE_SIPHON_PCT` | `0.20` | Percentage of profits siphoned to reserve pool |
|
||||
| `TRADING_STOP_LOSS_CHECK_INTERVAL_SECONDS` | `300` | Stop-loss check interval |
|
||||
| `TRADING_FAST_STOP_LOSS_INTERVAL_SECONDS` | `60` | Fast stop-loss check interval |
|
||||
| `TRADING_GRADUAL_ENTRY_TRANCHES` | `3` | Number of tranches for gradual entry |
|
||||
| `TRADING_GRADUAL_ENTRY_THRESHOLD_DOLLARS` | `30.0` | Dollar threshold for gradual entry |
|
||||
| `TRADING_ABSOLUTE_POSITION_CAP` | `50.0` | Maximum position size (dollars) |
|
||||
| `TRADING_ACTIVE_POOL_MINIMUM` | `100.0` | Minimum active pool balance |
|
||||
| `TRADING_EMERGENCY_DRAWDOWN_THRESHOLD_PCT` | `0.40` | Emergency drawdown threshold |
|
||||
| `TRADING_RESERVE_HIGH_WATER_PCT` | `0.30` | Reserve high-water mark percentage |
|
||||
| `TRADING_MICRO_TRADING_ENABLED` | `false` | Enable micro-trading mode |
|
||||
| `TRADING_MICRO_TRADING_INTERVAL_SECONDS` | `300` | Micro-trading polling interval |
|
||||
| `TRADING_MICRO_TRADING_ALLOCATION_CAP_PCT` | `0.03` | Micro-trading allocation cap |
|
||||
| `TRADING_MICRO_TRADING_MAX_DAILY` | `10` | Max micro-trades per day |
|
||||
| `TRADING_MICRO_TRADING_MAX_HOLD_MINUTES` | `120` | Max micro-trade hold time |
|
||||
| `TRADING_SNS_TOPIC_ARN` | (empty) | AWS SNS topic ARN for notifications |
|
||||
| `TRADING_SNS_PHONE_NUMBER` | (empty) | Phone number for SNS notifications |
|
||||
| `TRADING_GMAIL_SENDER` | (empty) | Gmail sender address for notifications |
|
||||
| `TRADING_GMAIL_RECIPIENT` | (empty) | Gmail recipient address for notifications |
|
||||
| `MACRO_ENABLED` | `true` | Enable macro signal layer |
|
||||
| `MACRO_SIGNAL_WEIGHT` | `0.3` | Relative weight of macro vs company signals |
|
||||
| `MACRO_CONFIDENCE_THRESHOLD` | `0.4` | Minimum confidence for macro event inclusion |
|
||||
| `MACRO_SHORT_TERM_STALENESS_HOURS` | `48` | Hours before short-term events get accelerated decay |
|
||||
| `PROJECTION_CONFIDENCE_THRESHOLD` | `0.3` | Minimum confidence for projections to influence recommendations |
|
||||
| `COMPETITIVE_ENABLED` | `true` | Enable competitive signal layer |
|
||||
| `COMPETITIVE_SIGNAL_WEIGHT` | `0.2` | Relative weight of competitive signals |
|
||||
| `COMPETITIVE_PATTERN_CONFIDENCE_THRESHOLD` | `0.3` | Minimum confidence for pattern inclusion |
|
||||
| `COMPETITIVE_PROPAGATION_STRENGTH_THRESHOLD` | `0.2` | Minimum strength for signal propagation |
|
||||
| `COMPETITIVE_ROUTINE_LOOKBACK_DAYS` | `180` | Lookback window for routine patterns |
|
||||
| `COMPETITIVE_MAJOR_DECISION_LOOKBACK_DAYS` | `365` | Lookback window for major decisions |
|
||||
| `COMPETITIVE_MIN_PATTERN_SAMPLES` | `3` | Minimum samples for pattern matching |
|
||||
| `COMPETITIVE_MAJOR_DECISION_WEIGHT_MULTIPLIER` | `1.3` | Weight multiplier for major decision patterns |
|
||||
| `COMPETITIVE_STALENESS_WINDOW_DAYS` | `180` | Window for staleness decay on competitive signals |
|
||||
| `COMPETITIVE_STALENESS_RECENT_DAYS` | `90` | Days within which signals are considered recent |
|
||||
| `COMPETITIVE_STALENESS_DECAY_PENALTY` | `0.5` | Decay penalty for stale competitive signals |
|
||||
| `COMPETITIVE_PROPAGATION_FAILURE_THRESHOLD` | `5` | Consecutive propagation failures before operator alert |
|
||||
| `ALERT_SOURCE_FAILURE_THRESHOLD` | `3` | Consecutive source failures before alert fires |
|
||||
| `ALERT_SOURCE_FAILURE_WINDOW_HOURS` | `6` | Lookback window for source failure alerting |
|
||||
| `ALERT_SCHEMA_FAILURE_RATE_THRESHOLD` | `0.3` | Extraction failure rate (30%) that triggers alert |
|
||||
| `ALERT_SCHEMA_FAILURE_WINDOW_HOURS` | `1` | Lookback window for schema failure spike |
|
||||
| `ALERT_LAKE_LAG_THRESHOLD_MINUTES` | `60` | Minutes since last lake publish before alert |
|
||||
| `ALERT_BROKER_ERROR_THRESHOLD` | `3` | Consecutive broker errors before alert |
|
||||
| `ALERT_BROKER_ERROR_WINDOW_HOURS` | `1` | Lookback window for broker error alerting |
|
||||
| `ALERT_CHECK_INTERVAL_SECONDS` | `120` | How often alerting rules are evaluated |
|
||||
| `RETENTION_RAW_MARKET_DAYS` | `90` | Retention period for raw market data (days) |
|
||||
| `RETENTION_RAW_NEWS_DAYS` | `180` | Retention period for raw news articles (days) |
|
||||
| `RETENTION_RAW_FILINGS_DAYS` | `365` | Retention period for raw SEC filings (days) |
|
||||
| `RETENTION_NORMALIZED_DAYS` | `180` | Retention period for normalized documents (days) |
|
||||
| `RETENTION_LLM_PROMPTS_DAYS` | `365` | Retention period for LLM prompt archives (days) |
|
||||
| `RETENTION_LLM_RESULTS_DAYS` | `365` | Retention period for LLM extraction results (days) |
|
||||
| `RETENTION_LAKEHOUSE_DAYS` | `730` | Retention period for lakehouse Parquet files (days) |
|
||||
| `RETENTION_AUDIT_DAYS` | `730` | Retention period for audit trail artifacts (days) |
|
||||
| `RETENTION_CLEANUP_INTERVAL_HOURS` | `24` | How often the retention cleanup worker runs |
|
||||
| `RETENTION_BATCH_SIZE` | `1000` | Number of objects processed per cleanup batch |
|
||||
| `LOG_LEVEL` | `INFO` | Logging level |
|
||||
| `JSON_LOGS` | `true` | Enable structured JSON logging |
|
||||
| `DEPLOY_STAGE` | (empty) | Deployment stage prefix for bucket names |
|
||||
@@ -203,6 +314,107 @@ See `services/shared/config.py` for the complete list of all supported environme
|
||||
|
||||
---
|
||||
|
||||
## LLM Provider Configuration
|
||||
|
||||
Stonks Oracle supports two LLM backends: **Ollama** (local, self-hosted) and **vLLM** (high-performance inference server). The active provider is configured per-agent in the `ai_agents` database table, but the connection details come from environment variables.
|
||||
|
||||
### Option A: Bundled Ollama (default)
|
||||
|
||||
The `docker-compose.yml` includes an Ollama container with GPU passthrough via the NVIDIA Container Toolkit. On first start, pull a model:
|
||||
|
||||
```bash
|
||||
docker compose exec ollama ollama pull qwen3.5:9b-fast
|
||||
```
|
||||
|
||||
No additional configuration needed — services connect to `http://ollama:11434` by default.
|
||||
|
||||
The Ollama container requests all available NVIDIA GPUs via the `deploy.resources.reservations.devices` configuration. If no GPU is available, Ollama falls back to CPU inference (significantly slower).
|
||||
|
||||
### Option B: External Ollama
|
||||
|
||||
If Ollama is already running on the host (e.g. with GPU access), create a `docker-compose.override.yml`:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
ollama:
|
||||
entrypoint: ["true"]
|
||||
restart: "no"
|
||||
ports: []
|
||||
extractor:
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
OLLAMA_BASE_URL: "http://host.docker.internal:11434"
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
recommendation:
|
||||
environment:
|
||||
OLLAMA_BASE_URL: "http://host.docker.internal:11434"
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
```
|
||||
|
||||
This disables the bundled Ollama container and routes services to the host's instance. Replace the port if your Ollama runs on a non-standard port. For a remote Ollama instance (not on localhost), replace `host.docker.internal` with the remote IP and remove the `extra_hosts` block.
|
||||
|
||||
### Option C: vLLM Server
|
||||
|
||||
For higher throughput or quantized models (e.g. `RedHatAI/Qwen3.6-35B-A3B-NVFP4`), point services at a vLLM server. Add to your `.env`:
|
||||
|
||||
```dotenv
|
||||
VLLM_BASE_URL=http://192.168.42.254:8000
|
||||
VLLM_MODEL=RedHatAI/Qwen3.6-35B-A3B-NVFP4
|
||||
VLLM_TIMEOUT=120
|
||||
VLLM_TEMPERATURE=0.7
|
||||
```
|
||||
|
||||
Then update the `ai_agents` table to use the vLLM provider:
|
||||
|
||||
```sql
|
||||
UPDATE ai_agents SET model_provider = 'vllm', model_name = 'RedHatAI/Qwen3.6-35B-A3B-NVFP4' WHERE active = true;
|
||||
```
|
||||
|
||||
Or use the API:
|
||||
|
||||
```bash
|
||||
curl -X PUT http://localhost:8004/api/admin/agents/document-extractor \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"model_provider": "vllm", "model_name": "RedHatAI/Qwen3.6-35B-A3B-NVFP4"}'
|
||||
```
|
||||
|
||||
### Option D: Mixed (Ollama + vLLM)
|
||||
|
||||
You can run different agents on different providers. For example, use vLLM for the high-volume extractor and Ollama for the thesis rewriter:
|
||||
|
||||
```sql
|
||||
UPDATE ai_agents SET model_provider = 'vllm', model_name = 'RedHatAI/Qwen3.6-35B-A3B-NVFP4' WHERE slug = 'document-extractor';
|
||||
UPDATE ai_agents SET model_provider = 'vllm', model_name = 'RedHatAI/Qwen3.6-35B-A3B-NVFP4' WHERE slug = 'event-classifier';
|
||||
UPDATE ai_agents SET model_provider = 'ollama', model_name = 'qwen3.5:9b-fast' WHERE slug = 'thesis-rewriter';
|
||||
```
|
||||
|
||||
Both `OLLAMA_BASE_URL` and `VLLM_BASE_URL` must be set in the environment for mixed mode.
|
||||
|
||||
### Automated Deployment
|
||||
|
||||
The `deploy-docker.sh` script handles LLM configuration automatically. It always uses the Docker Ollama container with GPU passthrough (NVIDIA Container Toolkit):
|
||||
|
||||
```bash
|
||||
# Deploy with defaults (Docker Ollama, GPU-accelerated)
|
||||
bash deploy-docker.sh
|
||||
|
||||
# Specify a custom model
|
||||
bash deploy-docker.sh --ollama-model qwen3.6
|
||||
|
||||
# Specify a different host and directory
|
||||
bash deploy-docker.sh --host user@myserver --dir /opt/stonks
|
||||
```
|
||||
|
||||
If an external Ollama URL is provided via `--ollama-url`, the script creates a `docker-compose.override.yml` that disables the bundled container and routes services to the external instance.
|
||||
|
||||
---
|
||||
|
||||
## Volume Mounts and Data Persistence
|
||||
|
||||
Docker Compose defines five named volumes for persistent data:
|
||||
@@ -299,6 +511,9 @@ docker compose ps query-api
|
||||
|
||||
# Inspect health check details for a container
|
||||
docker inspect --format='{{json .State.Health}}' stonks-oracle-query-api-1 | python -m json.tool
|
||||
|
||||
# Wait for all services to be healthy
|
||||
docker compose up -d --wait
|
||||
```
|
||||
|
||||
---
|
||||
@@ -309,17 +524,19 @@ docker inspect --format='{{json .State.Health}}' stonks-oracle-query-api-1 | pyt
|
||||
|
||||
Used by all application services except the scheduler. Accepts a `SERVICE_CMD` build argument that determines which service the container runs.
|
||||
|
||||
**Base image**: `python:3.12-slim`
|
||||
**Base image**: `python:3.12-slim` (via Harbor proxy cache in CI)
|
||||
|
||||
**Build arguments**:
|
||||
|
||||
| Argument | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `SERVICE_CMD` | `python -m services.scheduler.app` | The command executed when the container starts |
|
||||
| `CACHE_BUST` | (none) | Optional cache-busting argument to force rebuild of source layers |
|
||||
|
||||
**What gets copied**:
|
||||
- `requirements.txt` → pip dependencies installed
|
||||
- `services/` → all service source code
|
||||
- `scripts/` → operational scripts
|
||||
- `tests/` → test files (available for in-container testing)
|
||||
- `conftest.py` → pytest configuration
|
||||
|
||||
@@ -357,7 +574,7 @@ A specialized variant of the generic Dockerfile used only by the `scheduler` ser
|
||||
|
||||
Extends the official Apache Superset image with additional database drivers.
|
||||
|
||||
**Base image**: `apache/superset:latest`
|
||||
**Base image**: `apache/superset:latest` (via Harbor proxy cache in CI)
|
||||
|
||||
**Additional packages**: `trino[sqlalchemy]`, `psycopg2-binary`, `redis`
|
||||
|
||||
@@ -376,7 +593,9 @@ Multi-stage build for the React dashboard.
|
||||
**Stage 2 — Serve** (base: `nginxinc/nginx-unprivileged:alpine`):
|
||||
- Serves the built static files on port 8080
|
||||
- Uses `frontend/nginx.conf` for SPA fallback and API reverse proxying
|
||||
- Proxies `/api/` → `query-api:8000`, `/registry/` → `symbol-registry:8000`, `/risk/` → `risk-engine:8000`, `/trading/` → `trading-engine:8000`
|
||||
- Proxies `/api/` → `query-api:8000`, `/registry/` → `symbol-registry:8000`, `/risk/` → `risk:8000`, `/trading/` → `trading-engine:8000`
|
||||
- SSE stream endpoint (`/api/ops/pipeline/stream`) has buffering disabled for real-time delivery
|
||||
- Static assets under `/assets/` are cached with 1-year expiry
|
||||
|
||||
### Building Custom Images
|
||||
|
||||
@@ -398,6 +617,9 @@ docker build -t my-dashboard \
|
||||
|
||||
# Rebuild all images
|
||||
docker compose build
|
||||
|
||||
# Rebuild without cache (force fresh build)
|
||||
docker compose build --no-cache
|
||||
```
|
||||
|
||||
---
|
||||
@@ -456,6 +678,9 @@ Services with `condition: service_healthy` wait until the dependency's health ch
|
||||
# Start all services in the background
|
||||
docker compose up -d
|
||||
|
||||
# Start all services and wait for health checks
|
||||
docker compose up -d --wait
|
||||
|
||||
# Start only infrastructure (useful for local development)
|
||||
docker compose up -d postgres redis minio minio-init ollama
|
||||
|
||||
@@ -534,6 +759,9 @@ docker compose exec query-api python -c "from services.shared.config import load
|
||||
|
||||
# Open a shell in a container
|
||||
docker compose exec postgres psql -U stonks -d stonks
|
||||
|
||||
# Seed the database
|
||||
docker compose exec scheduler python -m services.symbol_registry.seed
|
||||
```
|
||||
|
||||
### Full Reset
|
||||
@@ -575,11 +803,16 @@ The dashboard container runs nginx with reverse proxy rules that route API reque
|
||||
| Path | Proxied To | Service |
|
||||
|------|-----------|---------|
|
||||
| `/api/` | `http://query-api:8000` | Query API |
|
||||
| `/api/ops/pipeline/stream` | `http://query-api:8000` (SSE, no buffering) | Query API (real-time pipeline stream) |
|
||||
| `/registry/` | `http://symbol-registry:8000/` | Symbol Registry API |
|
||||
| `/risk/` | `http://risk-engine:8000/` | Risk Engine API |
|
||||
| `/risk/` | `http://risk:8000/` | Risk Engine (via network alias) |
|
||||
| `/trading/` | `http://trading-engine:8000/` | Trading Engine API |
|
||||
|
||||
All other paths serve the React SPA with `try_files` fallback to `index.html`.
|
||||
The `risk-engine` service has a network alias of `risk` in `docker-compose.yml` so the nginx upstream resolves correctly.
|
||||
|
||||
All other paths serve the React SPA with `try_files` fallback to `index.html`. Static assets under `/assets/` are served with 1-year cache headers.
|
||||
|
||||
Security headers applied: `X-Frame-Options: SAMEORIGIN`, `X-Content-Type-Options: nosniff`, `Referrer-Policy: strict-origin-when-cross-origin`.
|
||||
|
||||
---
|
||||
|
||||
@@ -610,10 +843,34 @@ docker compose up -d # Migrations re-applied on fresh init
|
||||
|
||||
### Ollama model not available
|
||||
|
||||
The extractor service needs an LLM model loaded in Ollama. Pull a model manually:
|
||||
The extractor service needs an LLM model loaded. Pull a model manually:
|
||||
|
||||
```bash
|
||||
docker compose exec ollama ollama pull qwen3.5:9b
|
||||
# If using bundled Ollama container:
|
||||
docker compose exec ollama ollama pull qwen3.5:9b-fast
|
||||
|
||||
# If using host Ollama:
|
||||
ollama pull qwen3.5:9b-fast
|
||||
|
||||
# If using vLLM, ensure the model is loaded on the vLLM server
|
||||
curl http://your-vllm-host:8000/v1/models
|
||||
```
|
||||
|
||||
### Ollama port conflict (address already in use)
|
||||
|
||||
If Ollama is already running on the host, the bundled container will fail to bind port 11434. Use the external Ollama configuration described in the "LLM Provider Configuration" section above, or use `deploy-docker.sh` which handles this automatically.
|
||||
|
||||
### GPU not detected by Ollama container
|
||||
|
||||
Ensure the NVIDIA Container Toolkit is installed and Docker is configured:
|
||||
|
||||
```bash
|
||||
# Verify GPU passthrough works
|
||||
docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi
|
||||
|
||||
# If it fails, reconfigure Docker runtime
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
### Port conflicts
|
||||
@@ -625,3 +882,15 @@ query-api:
|
||||
ports:
|
||||
- "9004:8000" # Changed from 8004 to 9004
|
||||
```
|
||||
|
||||
### Container runs out of memory
|
||||
|
||||
The full stack requires at least 16 GB RAM. If services are being OOM-killed:
|
||||
|
||||
```bash
|
||||
# Check which containers are using the most memory
|
||||
docker stats --no-stream
|
||||
|
||||
# Reduce memory usage by stopping non-essential services
|
||||
docker compose stop trino hive-metastore superset
|
||||
```
|
||||
@@ -0,0 +1,915 @@
|
||||
# Stonks Oracle — Mathematical Reference
|
||||
|
||||
Every equation, formula, threshold, and constant used in the signal processing, aggregation, recommendation, and trading pipeline. Organized by pipeline stage.
|
||||
|
||||
Code references are provided so each formula can be traced to its implementation.
|
||||
|
||||
---
|
||||
|
||||
## 1. Signal Scoring
|
||||
|
||||
**Source:** `services/aggregation/scoring.py`
|
||||
|
||||
### 1.1 Combined Signal Weight
|
||||
|
||||
Each document signal receives a composite weight:
|
||||
|
||||
```
|
||||
W_combined = G_conf × W_recency × W_credibility × (1 + B_novelty) × M_context
|
||||
```
|
||||
|
||||
| Component | Symbol | Formula | Range |
|
||||
|---|---|---|---|
|
||||
| Confidence gate | G_conf | 1 if extraction_confidence ≥ 0.2, else 0 | {0, 1} |
|
||||
| Recency decay | W_recency | 2^(−t_age / t_half) | [0.01, 1.0] |
|
||||
| Credibility | W_credibility | clamp(credibility, 0.1, 1.0)^α | [0.1, 1.0] |
|
||||
| Novelty bonus | B_novelty | novelty_score × 0.25 | [0, 0.25] |
|
||||
| Market context | M_context | 1 + boost_vol + boost_vol_surge | [1.0, 1.45] |
|
||||
|
||||
### 1.2 Recency Decay
|
||||
|
||||
```
|
||||
W_recency = max( 2^(−t_age / t_half), 0.01 )
|
||||
```
|
||||
|
||||
where `t_age` is document age in hours and half-lives by window are:
|
||||
|
||||
| Window | t_half (hours) |
|
||||
|---|---|
|
||||
| intraday | 2 |
|
||||
| 1d | 12 |
|
||||
| 7d | 72 |
|
||||
| 30d | 240 |
|
||||
| 90d | 720 |
|
||||
|
||||
### 1.3 Credibility Weight
|
||||
|
||||
```
|
||||
W_credibility = clamp(c_raw, 0.1, 1.0)^α where α = 1.0 (default)
|
||||
```
|
||||
|
||||
α > 1 penalizes low-credibility sources more aggressively; α < 1 flattens the curve.
|
||||
|
||||
### 1.4 Market Context Multiplier
|
||||
|
||||
```
|
||||
boost_vol = min( ln(1 + max(σ − 1.0, 0)) × 0.15, 0.30 )
|
||||
|
||||
boost_surge = 0.15 if ΔV% > 50%, else 0
|
||||
|
||||
M_context = 1.0 + boost_vol + boost_surge
|
||||
```
|
||||
|
||||
where σ is price volatility and ΔV% is volume change percentage.
|
||||
|
||||
### 1.5 Weighted Sentiment Average
|
||||
|
||||
```
|
||||
S_avg = Σ(W_combined_i × impact_i × sentiment_i) / Σ(W_combined_i × impact_i)
|
||||
```
|
||||
|
||||
- sentiment_i ∈ {+1.0 (positive), −1.0 (negative), 0.0 (neutral/mixed)}
|
||||
- impact_i ∈ [0, 1] from extraction
|
||||
- Returns 0.0 when denominator = 0
|
||||
|
||||
---
|
||||
|
||||
## 1B. Probabilistic Signal Scoring (Feature-Flagged)
|
||||
|
||||
**Source:** `services/aggregation/scoring.py`
|
||||
**Active when:** `probabilistic_scoring_enabled = true` in `risk_configs.config` JSONB
|
||||
|
||||
When the probabilistic pipeline is enabled, the combined weight formula changes:
|
||||
|
||||
### 1B.1 Combined Signal Weight (Probabilistic)
|
||||
|
||||
```
|
||||
W_combined = G_sigmoid × W_recency(adaptive) × W_credibility × (1 + B_novelty) × R_info × F_accuracy × M_regime
|
||||
```
|
||||
|
||||
| Component | Symbol | Formula | Range |
|
||||
|---|---|---|---|
|
||||
| Sigmoid gate | G_sigmoid | σ(k·(x − midpoint)) = 1/(1+e^(−5·(x−0.5))) | (0, 1) |
|
||||
| Adaptive recency | W_recency | 2^(−t_age / τ_adaptive) | [0.01, 1.0] |
|
||||
| Credibility | W_credibility | same as heuristic | [0.1, 1.0] |
|
||||
| Novelty bonus | B_novelty | same as heuristic | [0, 0.25] |
|
||||
| Information gain | R_info | 1 + λ·(−log₂ P(event_type)) | [1.0, 3.0] |
|
||||
| Source accuracy | F_accuracy | 0.5 + accuracy_ratio (if samples ≥ 10, else 1.0) | [0.5, 1.5] |
|
||||
| Regime multiplier | M_regime | 1 + 0.15·|z_r| + 0.10·|z_v| | [1.0, 2.5] |
|
||||
|
||||
### 1B.2 Sigmoid Confidence Gate
|
||||
|
||||
Replaces the binary 0/1 gate with a smooth transition:
|
||||
|
||||
```
|
||||
G_sigmoid = σ(k·(x − m)) = 1 / (1 + e^(−k·(x−m)))
|
||||
```
|
||||
|
||||
Default: k = 5.0, m = 0.5. At x=0.5 → 0.5; at x=0.2 → ~0.18; at x=0.8 → ~0.82.
|
||||
|
||||
### 1B.3 Information Gain (Surprise Weighting)
|
||||
|
||||
```
|
||||
R_info = min(1 + λ·(−log₂ P(event_type)), 3.0)
|
||||
```
|
||||
|
||||
| Event Type | P(event_type) | R_info (λ=0.3) |
|
||||
|---|---|---|
|
||||
| earnings | 0.25 | 1.60 |
|
||||
| dividend | 0.15 | 1.84 |
|
||||
| product_launch | 0.10 | 2.00 |
|
||||
| regulatory | 0.08 | 2.07 |
|
||||
| management_change | 0.06 | 2.19 |
|
||||
| legal | 0.05 | 2.29 |
|
||||
| restructuring | 0.04 | 2.39 |
|
||||
| m_and_a | 0.03 | 2.56 |
|
||||
| unknown | 0.10 (default) | 2.00 |
|
||||
|
||||
### 1B.4 Adaptive Recency Decay
|
||||
|
||||
```
|
||||
τ_adaptive = τ_base × (1 + β_impact) × (1 + β_surprise) × (1 + β_market)
|
||||
```
|
||||
|
||||
| Factor | Formula | Range |
|
||||
|---|---|---|
|
||||
| β_impact | impact_score × 1.0 | [0, 1.0] |
|
||||
| β_surprise | (R_info − 1) / 2 × 1.0 | [0, 1.0] |
|
||||
| β_market | (M_regime − 1) / 0.45 × 0.5 | [0, 0.5] |
|
||||
|
||||
Maximum adaptive half-life: 6× base (when all factors at max).
|
||||
Minimum: τ_base (adaptive decay is never faster than fixed).
|
||||
|
||||
### 1B.5 Regime Multiplier
|
||||
|
||||
```
|
||||
z_r = (r_t − μ_20) / σ_20 (return z-score)
|
||||
z_v = (ln(V_t) − μ_V) / σ_V (log-volume z-score)
|
||||
M_regime = clamp(1 + 0.15·|z_r| + 0.10·|z_v|, 1.0, 2.5)
|
||||
```
|
||||
|
||||
Defaults to 1.0 when market data unavailable or σ = 0.
|
||||
|
||||
### 1B.6 Source Accuracy Factor
|
||||
|
||||
```
|
||||
F_accuracy = 0.5 + clamp(accuracy_ratio, 0, 1) if sample_count ≥ 10
|
||||
F_accuracy = 1.0 if sample_count < 10
|
||||
```
|
||||
|
||||
Stored in `source_accuracy` table, updated asynchronously from realized 7-day price outcomes.
|
||||
|
||||
---
|
||||
|
||||
## 2. Trend Summary Assembly
|
||||
|
||||
**Source:** `services/aggregation/worker.py`
|
||||
|
||||
### 2.1 Trend Direction
|
||||
|
||||
| Condition | Direction |
|
||||
|---|---|
|
||||
| S_avg ≥ 0.15 | Bullish |
|
||||
| S_avg ≤ −0.15 | Bearish |
|
||||
| contradiction > 0.10 AND |S_avg| < 0.30 | Mixed |
|
||||
| otherwise | Neutral |
|
||||
|
||||
### 2.2 Trend Strength
|
||||
|
||||
```
|
||||
strength = min(|S_avg|, 1.0)
|
||||
```
|
||||
|
||||
### 2.3 Contradiction Score
|
||||
|
||||
**Source:** `services/aggregation/contradiction.py`
|
||||
|
||||
```
|
||||
contradiction = W_minority / (W_positive + W_negative)
|
||||
```
|
||||
|
||||
where:
|
||||
```
|
||||
W_positive = Σ(W_combined_i × impact_i) for signals with sentiment > 0
|
||||
W_negative = Σ(W_combined_i × impact_i) for signals with sentiment < 0
|
||||
W_minority = min(W_positive, W_negative)
|
||||
```
|
||||
|
||||
Range: [0, 1]. 0 = full agreement, 0.5 = equal-weight disagreement.
|
||||
|
||||
### 2.4 Trend Confidence
|
||||
|
||||
```
|
||||
confidence = clamp(0.3 × F_count + 0.3 × C_avg + 0.4 × A_agreement − P_contradiction, 0, 1)
|
||||
```
|
||||
|
||||
| Component | Formula |
|
||||
|---|---|
|
||||
| F_count (source count) | min(N_unique / 15, 0.8) |
|
||||
| C_avg (extraction confidence) | mean of extraction confidences |
|
||||
| A_agreement (signal agreement) | fraction_same_direction × min(1, log₂(N_unique + 1) / log₂(8)) |
|
||||
| P_contradiction | contradiction_score × 0.4 |
|
||||
|
||||
---
|
||||
|
||||
## 2B. Probabilistic Trend Assembly (Feature-Flagged)
|
||||
|
||||
**Source:** `services/aggregation/worker.py`, `services/aggregation/bayesian.py`
|
||||
**Active when:** `probabilistic_scoring_enabled = true`
|
||||
|
||||
### 2B.1 Bayesian Posterior Accumulation
|
||||
|
||||
```
|
||||
L_t = Σ(W_combined_i × sentiment_i) (log-likelihood)
|
||||
P_bull = σ(L_t) = 1 / (1 + e^(−L_t)) (bullish probability)
|
||||
α = 1 + W_bull (W_bull = Σ W_combined for positive signals)
|
||||
β = 1 + W_bear (W_bear = Σ W_combined for negative signals)
|
||||
C_bayesian = 1 − 4αβ / (α + β)² (Bayesian confidence)
|
||||
H = −P_bull·log₂(P_bull) − (1−P_bull)·log₂(1−P_bull) (Shannon entropy)
|
||||
```
|
||||
|
||||
Uninformative prior (no signals): P_bull=0.5, α=1, β=1, C=0, H=1.0.
|
||||
|
||||
### 2B.2 Entropy-Based Direction
|
||||
|
||||
| Condition | Direction |
|
||||
|---|---|
|
||||
| H > 0.9 | Mixed |
|
||||
| P_bull > 0.65 | Bullish |
|
||||
| P_bull < 0.35 | Bearish |
|
||||
| otherwise | Neutral |
|
||||
|
||||
### 2B.3 Bayesian Trend Confidence
|
||||
|
||||
```
|
||||
confidence = clamp(0.5 × C_bayesian + 0.25 × F_count + 0.25 × C_avg_credibility − P_contradiction, 0, 1)
|
||||
```
|
||||
|
||||
| Component | Formula |
|
||||
|---|---|
|
||||
| C_bayesian | 1 − 4αβ/(α+β)² from Beta posterior |
|
||||
| F_count | min(N_unique_sources / 15, 0.8) |
|
||||
| C_avg_credibility | mean credibility weight across active signals |
|
||||
| P_contradiction | contradiction_entropy × regime.contradiction_penalty_multiplier |
|
||||
|
||||
### 2B.4 Weighted Disagreement Entropy (Contradiction)
|
||||
|
||||
**Source:** `services/aggregation/contradiction.py`
|
||||
|
||||
```
|
||||
f_pos = W_positive / (W_positive + W_negative)
|
||||
f_neg = 1 − f_pos
|
||||
H_contradiction = −f_pos·log₂(f_pos) − f_neg·log₂(f_neg)
|
||||
contradiction_score = H_contradiction × min(1.0, (W_pos + W_neg) / W_threshold)
|
||||
```
|
||||
|
||||
W_threshold default = 5.0. Returns 0.0 when only one direction exists.
|
||||
|
||||
### 2B.5 Regime Detection
|
||||
|
||||
**Source:** `services/aggregation/regime.py`
|
||||
|
||||
```
|
||||
R = sign(EMA_20 − EMA_100) (trend indicator)
|
||||
V_r = σ_20 / σ_100 (volatility ratio)
|
||||
```
|
||||
|
||||
| Condition | Regime | Threshold | Contradiction Mult |
|
||||
|---|---|---|---|
|
||||
| V_r > 1.5 | Panic | ±0.10 | 0.4 |
|
||||
| R ≠ 0 AND V_r < 1.2 | Trend-following | ±0.15 | 0.4 |
|
||||
| R = 0 AND V_r < 1.0 | Mean-reversion | ±0.20 | 0.4 |
|
||||
| otherwise | Uncertainty | ±0.15 | 0.6 |
|
||||
|
||||
Falls back to Uncertainty when data < 100 days or σ = 0.
|
||||
|
||||
---
|
||||
|
||||
## 3. Macro Impact Scoring (Layer 2)
|
||||
|
||||
**Source:** `services/aggregation/interpolation.py`
|
||||
|
||||
### 3.1 Overlap Components
|
||||
|
||||
**Geographic overlap:**
|
||||
```
|
||||
O_geo = Σ revenue_pct_r for each event region r in company's revenue mix
|
||||
```
|
||||
Range: [0, 1]
|
||||
|
||||
**Supply chain overlap:**
|
||||
```
|
||||
O_supply = |event_regions ∩ supply_regions| / |supply_regions|
|
||||
```
|
||||
|
||||
**Commodity overlap:**
|
||||
```
|
||||
O_commodity = |event_commodities ∩ company_commodities| / |company_commodities|
|
||||
```
|
||||
|
||||
**Sector overlap:**
|
||||
```
|
||||
O_sector = 1.0 if company_sector ∈ event_affected_sectors, else 0.0
|
||||
```
|
||||
|
||||
### 3.2 Raw Macro Impact Score
|
||||
|
||||
```
|
||||
S_raw = W_severity × (0.35 × O_geo + 0.25 × O_supply + 0.25 × O_commodity + 0.15 × O_sector)
|
||||
```
|
||||
|
||||
Severity weights:
|
||||
|
||||
| Severity | W_severity |
|
||||
|---|---|
|
||||
| critical | 1.0 |
|
||||
| high | 0.75 |
|
||||
| moderate | 0.5 |
|
||||
| low | 0.25 |
|
||||
|
||||
### 3.3 Resilience Modifier
|
||||
|
||||
For international events, the raw score is adjusted by market position:
|
||||
|
||||
```
|
||||
S_final = clamp(S_raw × R_tier, 0, 1)
|
||||
```
|
||||
|
||||
| Market Position Tier | R_tier |
|
||||
|---|---|
|
||||
| Global leader | 0.70 |
|
||||
| Multinational | 0.85 |
|
||||
| Regional | 1.00 |
|
||||
| Domestic | 1.20 |
|
||||
|
||||
For domestic-only events, R_tier = 1.0 regardless of tier.
|
||||
|
||||
### 3B. Multiplicative Macro Exposure (Probabilistic)
|
||||
|
||||
**Active when:** `probabilistic_scoring_enabled = true`
|
||||
|
||||
```
|
||||
S_raw = W_severity × (1 − Π_k(1 − w_k × O_k))
|
||||
= W_severity × (1 − (1−0.35·O_geo)(1−0.25·O_supply)(1−0.25·O_commodity)(1−0.15·O_sector))
|
||||
```
|
||||
|
||||
Zero overlap → 0.0. Max overlap (all 1.0) → severity × 0.689.
|
||||
|
||||
### 3B.1 Conditional Macro Integration
|
||||
|
||||
When both company and macro signals exist:
|
||||
```
|
||||
modifier = clamp(1 + M_macro × sign_alignment, 0.5, 1.5)
|
||||
S_adjusted = S_company × modifier
|
||||
```
|
||||
|
||||
sign_alignment = +1 (agree), −1 (disagree), 0 (neutral/mixed).
|
||||
|
||||
When only macro signals exist: additive fallback with weight 0.3.
|
||||
When only company signals exist: modifier = 1.0.
|
||||
|
||||
### 3.4 Macro Impact Confidence
|
||||
|
||||
```
|
||||
confidence = min(event_confidence × min(O_total + 0.3, 1.0), 1.0)
|
||||
```
|
||||
|
||||
where O_total = O_geo + O_supply + O_commodity + O_sector.
|
||||
|
||||
### 3.5 Accelerated Staleness Decay
|
||||
|
||||
For short-term events older than 48 hours:
|
||||
|
||||
```
|
||||
decay_standard = e^(−0.693 × t_age_hours / t_half_hours) (t_half default = 168h)
|
||||
decay_accelerated = decay_standard × 0.5
|
||||
```
|
||||
|
||||
### 3.6 Macro Signal as WeightedSignal
|
||||
|
||||
When merged into the aggregation engine:
|
||||
|
||||
```
|
||||
impact_score_macro = macro_impact_score × W_macro (W_macro = 0.3 default)
|
||||
sentiment_value = +1 if positive, −1 if negative
|
||||
```
|
||||
|
||||
Recency decay uses the global event's publication time.
|
||||
|
||||
---
|
||||
|
||||
## 4. Competitive Signals (Layer 3)
|
||||
|
||||
### 4.1 Pattern Confidence
|
||||
|
||||
**Source:** `services/aggregation/pattern_matcher.py`
|
||||
|
||||
```
|
||||
confidence = F_sample × 0.4 + F_consistency × 0.4 + F_recency × 0.2
|
||||
```
|
||||
|
||||
| Factor | Formula |
|
||||
|---|---|
|
||||
| F_sample | min(N_samples / 20, 1.0) |
|
||||
| F_consistency | max(pct_bullish, pct_bearish) |
|
||||
| F_recency | 1.0 if age ≤ 7d; 0.7 if age ≤ 90d; 0.4 otherwise |
|
||||
|
||||
**Modifiers:**
|
||||
- Major corporate decision (m&a, earnings, legal): confidence × 1.3
|
||||
- Insufficient data (N_samples < min_pattern_samples): cap at 0.25
|
||||
- Stale data (age > staleness_window_days): confidence × staleness_decay_penalty
|
||||
|
||||
**Lookback windows:**
|
||||
- Routine signals: 180 days
|
||||
- Major corporate decisions: 365 days
|
||||
|
||||
### 4.2 Cross-Company Signal Strength
|
||||
|
||||
**Source:** `services/aggregation/signal_propagation.py`
|
||||
|
||||
```
|
||||
S_competitive = clamp(S_pattern_avg × R_relationship × C_pattern × I_source, 0, 1)
|
||||
```
|
||||
|
||||
| Component | Description |
|
||||
|---|---|
|
||||
| S_pattern_avg | Average historical outcome strength [0, 1] |
|
||||
| R_relationship | Relationship strength from competitor_relationships [0, 1] |
|
||||
| C_pattern | Pattern confidence from §4.1 |
|
||||
| I_source | Source document's impact_score [0, 1] |
|
||||
|
||||
**Threshold gate:** Skipped if R_relationship < propagation_strength_threshold (default 0.2).
|
||||
|
||||
### 4B. Graph-Distance Attenuation (Probabilistic)
|
||||
|
||||
**Active when:** `probabilistic_scoring_enabled = true`
|
||||
|
||||
```
|
||||
S_transfer = S_source × ρ_historical × e^(−d_network)
|
||||
```
|
||||
|
||||
| Component | Description |
|
||||
|---|---|
|
||||
| S_source | Source signal strength |
|
||||
| ρ_historical | 90-day rolling Pearson correlation (default 0.3 same-sector, 0.1 cross-sector) |
|
||||
| d_network | Shortest path in competitor graph (capped at 3) |
|
||||
|
||||
No propagation when d_network > 3 (e^(−3) ≈ 0.05).
|
||||
|
||||
### 4.3 Competitive Signal as WeightedSignal
|
||||
|
||||
```
|
||||
impact_score_competitive = S_competitive × W_competitive (W_competitive = 0.2 default)
|
||||
direction = majority historical outcome (bullish or bearish)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Trend Projection
|
||||
|
||||
**Source:** `services/aggregation/projection.py`
|
||||
|
||||
### 5.1 Trend Momentum
|
||||
|
||||
```
|
||||
momentum = S_current_signed − S_previous_signed
|
||||
```
|
||||
|
||||
where `S_signed = direction_sign × strength` (bullish = +1, bearish = −1, neutral = 0).
|
||||
|
||||
When no previous data exists:
|
||||
```
|
||||
momentum = direction_sign × strength × 0.5
|
||||
```
|
||||
|
||||
Range: [−1, 1]
|
||||
|
||||
### 5.2 Macro Decay Projection
|
||||
|
||||
For each active macro event projected forward by `H` days:
|
||||
|
||||
```
|
||||
F_future = 2^(−(t_current + H) / t_half)
|
||||
I_projected = macro_impact_score × F_future × W_severity
|
||||
```
|
||||
|
||||
Decay half-lives:
|
||||
|
||||
| Duration | t_half (days) |
|
||||
|---|---|
|
||||
| short_term | 1.0 |
|
||||
| medium_term | 7.0 |
|
||||
| long_term | 30.0 |
|
||||
|
||||
Aggregate direction: bullish if W_pos > 1.2 × W_neg; bearish if W_neg > 1.2 × W_pos; mixed if both > 0.
|
||||
|
||||
### 5.3 Projection Blending
|
||||
|
||||
```
|
||||
W_macro_blend = min(S_macro_projected × 0.4, 0.4)
|
||||
W_company = 1.0 − W_macro_blend
|
||||
|
||||
S_blended = W_company × S_momentum_projected + W_macro_blend × S_macro_signed
|
||||
```
|
||||
|
||||
**Catalyst boost:** `min(N_catalysts × 0.02, 0.1)` added to projected strength.
|
||||
|
||||
**Projected confidence:**
|
||||
```
|
||||
C_projected = C_base × 0.8 + min(S_macro × 0.15, 0.1)
|
||||
```
|
||||
|
||||
**Divergence detection:** Flagged when projected direction ≠ current trend direction.
|
||||
|
||||
### 5B. Exponentially Weighted Momentum (Probabilistic)
|
||||
|
||||
**Source:** `services/aggregation/projection.py`
|
||||
**Active when:** `probabilistic_scoring_enabled = true`
|
||||
|
||||
```
|
||||
M_t = Σ_{k=0}^{K-1} λ^k × ΔS_{t-k} (λ = 0.7, K ≤ 10)
|
||||
M_normalized = M_t / Σ_{k=0}^{K-1} λ^k (range: [−1, 1])
|
||||
M_adj = clamp(M_normalized / max(σ_20, 0.01), −2.0, 2.0)
|
||||
```
|
||||
|
||||
Falls back to heuristic momentum when < 2 historical cycles available.
|
||||
|
||||
---
|
||||
|
||||
## 6. Data Quality Suppression
|
||||
|
||||
**Source:** `services/recommendation/suppression.py`
|
||||
|
||||
### 6.1 Data Quality Score
|
||||
|
||||
```
|
||||
Q = 0.4 × Q_confidence + 0.3 × Q_freshness + 0.3 × Q_coverage
|
||||
```
|
||||
|
||||
| Component | Formula |
|
||||
|---|---|
|
||||
| Q_confidence | min(C_avg_extraction / 0.8, 1.0) |
|
||||
| Q_freshness | max(0, 1 − t_newest_hours / 168) |
|
||||
| Q_coverage | (N_valid / N_total) × min(N_valid / 10, 1.0) |
|
||||
|
||||
**Suppression triggers** (any one → informational only):
|
||||
|
||||
| Check | Threshold |
|
||||
|---|---|
|
||||
| Avg extraction confidence | < 0.40 |
|
||||
| Evidence staleness | > 168 hours (7 days) |
|
||||
| Source type diversity | < 1 distinct type |
|
||||
| Extraction failure rate | > 50% |
|
||||
| Valid document count | < 2 |
|
||||
| Data quality score | < 0.30 |
|
||||
|
||||
### 6.2 Safety Suppression
|
||||
|
||||
- **Macro-only:** If trend driven solely by macro signals with zero company evidence → forced informational
|
||||
- **Pattern-only:** If trend driven solely by pattern/competitive signals with no company or macro support → forced informational
|
||||
|
||||
---
|
||||
|
||||
## 7. Recommendation Eligibility
|
||||
|
||||
**Source:** `services/recommendation/eligibility.py`
|
||||
|
||||
### 7.1 Gate Checks (all must pass)
|
||||
|
||||
| Check | Threshold |
|
||||
|---|---|
|
||||
| Confidence | ≥ 0.35 |
|
||||
| Trend strength | ≥ 0.10 |
|
||||
| Contradiction score | ≤ 0.60 |
|
||||
| Evidence count | ≥ 2 |
|
||||
| Direction | ≠ neutral |
|
||||
|
||||
### 7.2 Action Mapping
|
||||
|
||||
| Condition | Action |
|
||||
|---|---|
|
||||
| Bullish AND strength ≥ 0.25 | BUY |
|
||||
| Bearish AND strength ≥ 0.25 | SELL |
|
||||
| Directional AND confidence ≥ 0.50 | HOLD |
|
||||
| Mixed or weak | WATCH |
|
||||
|
||||
### 7.3 Mode Escalation
|
||||
|
||||
| Mode | Requirements |
|
||||
|---|---|
|
||||
| live_eligible | confidence ≥ 0.70, contradiction ≤ 0.25, evidence ≥ 5 |
|
||||
| paper_eligible | confidence ≥ 0.50 |
|
||||
| informational | everything else (WATCH/HOLD always informational) |
|
||||
|
||||
### 7B. Expected Value Gate (Probabilistic)
|
||||
|
||||
**Active when:** `probabilistic_scoring_enabled = true`
|
||||
|
||||
```
|
||||
R_up = strength × σ_20 × √(horizon_days)
|
||||
R_down = (1 − strength) × σ_20 × √(horizon_days)
|
||||
EV = P_bull × R_up − (1 − P_bull) × R_down
|
||||
```
|
||||
|
||||
| Horizon window | horizon_days |
|
||||
|---|---|
|
||||
| intraday / 1d | 1 |
|
||||
| 7d | 7 |
|
||||
| 30d | 30 |
|
||||
| 90d | 90 |
|
||||
|
||||
- EV > 0.005 (0.5% expected return): recommendation proceeds through existing gates
|
||||
- EV ≤ 0.005: forced to informational mode regardless of confidence/strength
|
||||
- All existing eligibility gates (§7.1) remain as additional requirements
|
||||
|
||||
### 7.4 Position Sizing
|
||||
|
||||
```
|
||||
portfolio_pct = base + C_factor × S_factor × range × P_contradiction × P_evidence
|
||||
```
|
||||
|
||||
| Component | Formula | Default |
|
||||
|---|---|---|
|
||||
| base | base_portfolio_pct | 0.01 (1%) |
|
||||
| range | max_portfolio_pct − base_portfolio_pct | 0.09 (9%) |
|
||||
| C_factor | confidence_sizing_weight × confidence | 0.8 × confidence |
|
||||
| S_factor | 0.5 + 0.5 × trend_strength | [0.5, 1.0] |
|
||||
| P_contradiction | 1 − (contradiction_penalty × contradiction_score) | penalty = 0.5 |
|
||||
| P_evidence | 0.50 if evidence < 3; 0.75 if evidence < 5; 1.0 otherwise | |
|
||||
|
||||
Clamped to [base × 0.5, max_portfolio_pct].
|
||||
|
||||
**Max loss percentage** uses the same structure with base = 0.003 (0.3%) and max = 0.02 (2%).
|
||||
|
||||
---
|
||||
|
||||
## 8. Trading Engine — Position Sizing
|
||||
|
||||
**Source:** `services/trading/position_sizer.py`
|
||||
|
||||
### 8.1 Base Allocation
|
||||
|
||||
```
|
||||
raw_pct = (max_position_pct × 0.5) × (confidence / min_confidence) × multiplier
|
||||
clamped_pct = min(raw_pct, max_position_pct)
|
||||
dollar_amount = min(active_pool × clamped_pct, absolute_position_cap)
|
||||
```
|
||||
|
||||
### 8.2 Correlation Reduction
|
||||
|
||||
```
|
||||
ρ_avg = Σ(ρ_i × w_i) / Σ(w_i) for existing positions
|
||||
```
|
||||
|
||||
| ρ_avg | Action |
|
||||
|---|---|
|
||||
| > 0.8 | Reject order |
|
||||
| 0.5 < ρ_avg ≤ 0.8 | Reduce: factor = 1 − (ρ_avg − 0.5) / 0.3 |
|
||||
| ≤ 0.5 | No reduction |
|
||||
|
||||
### 8.3 Sector Exposure Reduction
|
||||
|
||||
```
|
||||
available = max(max_sector_pct × active_pool − current_sector_exposure, 0)
|
||||
dollar_amount = min(dollar_amount, available)
|
||||
```
|
||||
|
||||
### 8.4 Diversification Bonus
|
||||
|
||||
If < 3 sectors held AND entering a new sector: dollar_amount × 1.2 (capped at max_position_pct).
|
||||
|
||||
### 8.5 Earnings Proximity
|
||||
|
||||
| Days to earnings | Action |
|
||||
|---|---|
|
||||
| ≤ 1 | Reject |
|
||||
| 1–3 | 50% reduction |
|
||||
| > 3 | No adjustment |
|
||||
|
||||
### 8.6 Portfolio Heat Check
|
||||
|
||||
```
|
||||
heat_new = dollar_amount × atr_multiplier × 0.02
|
||||
heat_max = max_portfolio_heat × active_pool
|
||||
|
||||
Reject if: heat_current + heat_new > heat_max
|
||||
```
|
||||
|
||||
### 8.7 Share Rounding
|
||||
|
||||
```
|
||||
shares = floor(dollar_amount / current_price)
|
||||
final_dollar = shares × current_price
|
||||
```
|
||||
|
||||
Reject if shares = 0.
|
||||
|
||||
---
|
||||
|
||||
## 9. Stop-Loss and Take-Profit
|
||||
|
||||
**Source:** `services/trading/stop_loss_manager.py`
|
||||
|
||||
### 9.1 Initial Levels
|
||||
|
||||
```
|
||||
stop_distance = ATR × M_atr
|
||||
stop_loss = entry_price − stop_distance
|
||||
take_profit = entry_price + stop_distance × R_reward_risk
|
||||
```
|
||||
|
||||
| Trade type | M_atr | R_reward_risk |
|
||||
|---|---|---|
|
||||
| Standard | risk_tier.stop_loss_atr_multiplier | risk_tier.reward_risk_ratio |
|
||||
| Micro-trade | 1.0 | 1.5 |
|
||||
|
||||
### 9.2 Dynamic Tightening
|
||||
|
||||
| Condition | Effective multiplier |
|
||||
|---|---|
|
||||
| High-severity macro event | base × 0.5 |
|
||||
| Earnings within 3 days | base × 0.7 |
|
||||
| Portfolio heat > 80% of max | base × 0.7 |
|
||||
| Normal | base |
|
||||
|
||||
### 9.3 Trailing Stop Activation
|
||||
|
||||
Activates when:
|
||||
```
|
||||
favorable_move = current_price − entry_price > 0.5 × (take_profit − entry_price)
|
||||
```
|
||||
|
||||
Once active, stop-loss floor = entry_price (breakeven).
|
||||
|
||||
---
|
||||
|
||||
## 10. Risk Management
|
||||
|
||||
### 10.1 Position Limits
|
||||
|
||||
**Source:** `services/risk/engine.py`
|
||||
|
||||
| Limit | Default | Formula |
|
||||
|---|---|---|
|
||||
| Max position % | 5% | position_value / portfolio_value ≤ 0.05 |
|
||||
| Max position value | $10,000 | existing + new ≤ $10,000 |
|
||||
| Max shares/order | 1,000 | quantity ≤ 1,000 |
|
||||
| Max sector % | 25% | sector_value / portfolio_value ≤ 0.25 |
|
||||
| Max daily loss % | 2% | |daily_pnl| / portfolio_value ≤ 0.02 |
|
||||
| Max daily loss $ | $1,000 | |daily_pnl| ≤ $1,000 |
|
||||
| Max daily trades | 20 | trade_count < 20 |
|
||||
|
||||
### 10.2 Order Clamping
|
||||
|
||||
**Source:** `services/risk/engine.py` — `clamp_order_to_position_limits()`
|
||||
|
||||
When a buy order exceeds position limits, instead of rejecting:
|
||||
|
||||
```
|
||||
max_allowed_value = min(
|
||||
max_position_value − existing_value,
|
||||
max_position_pct × portfolio_value − existing_value
|
||||
)
|
||||
clamped_shares = min( floor(max_allowed_value / price_per_share), max_shares_per_order )
|
||||
```
|
||||
|
||||
### 10.3 News Shock Lockout
|
||||
|
||||
Trigger: impact_score ≥ 0.80 for catalyst ∈ {earnings, legal, m_and_a}
|
||||
Duration: 60 minutes (configurable)
|
||||
|
||||
### 10.4 Symbol Cooldown
|
||||
|
||||
Duration: 15 minutes between trades on same symbol.
|
||||
Max concurrent positions per symbol: 1.
|
||||
|
||||
---
|
||||
|
||||
## 11. Circuit Breaker
|
||||
|
||||
**Source:** `services/trading/circuit_breaker.py`
|
||||
|
||||
| Trigger | Condition | Cooldown |
|
||||
|---|---|---|
|
||||
| Daily loss | |daily_pnl| / portfolio_value > 0.05 | 2 hours |
|
||||
| Single position | position_loss_pct > 0.15 | 48 hours |
|
||||
| Volatility | ≥ 3 stop-losses within 30-minute window | 2 hours |
|
||||
|
||||
---
|
||||
|
||||
## 12. Risk Tier Auto-Adjustment
|
||||
|
||||
**Source:** `services/trading/risk_tier_controller.py`
|
||||
|
||||
Tiers: conservative → moderate → aggressive
|
||||
|
||||
**Downgrade** (any one triggers, drops one level):
|
||||
- 30-day win rate < 40%
|
||||
- Current drawdown > 15%
|
||||
|
||||
**Upgrade** (all must be true, raises one level):
|
||||
- 30-day win rate > 55%
|
||||
- Reserve pool > 20% of portfolio
|
||||
- Current drawdown < 5%
|
||||
|
||||
---
|
||||
|
||||
## 13. Portfolio Rebalancing
|
||||
|
||||
**Source:** `services/trading/rebalancer.py`
|
||||
|
||||
### 13.1 Single-Stock Rebalancing
|
||||
|
||||
```
|
||||
excess = market_value − max_position_pct × active_pool
|
||||
sell_qty = min( floor(excess / current_price), position_quantity )
|
||||
```
|
||||
|
||||
### 13.2 Sector Rebalancing
|
||||
|
||||
```
|
||||
sector_excess = Σ(market_value_i) − max_sector_pct × active_pool
|
||||
```
|
||||
|
||||
Sell from lowest-confidence positions first until excess is covered.
|
||||
|
||||
### 13.3 Max Positions Enforcement
|
||||
|
||||
```
|
||||
excess_count = N_positions − max_positions
|
||||
```
|
||||
|
||||
Sell entire lowest-confidence positions until count is within limit.
|
||||
|
||||
---
|
||||
|
||||
## Constants Summary
|
||||
|
||||
| Constant | Value | Location |
|
||||
|---|---|---|
|
||||
| Confidence gate floor | 0.20 | scoring.py |
|
||||
| Min recency weight | 0.01 | scoring.py |
|
||||
| Credibility floor/ceiling | 0.10 / 1.0 | scoring.py |
|
||||
| Novelty bonus max | 0.25 (25%) | scoring.py |
|
||||
| Volatility boost threshold | 1.0 price units | scoring.py |
|
||||
| Volatility boost max | 0.30 (30%) | scoring.py |
|
||||
| Volume surge threshold | 50% | scoring.py |
|
||||
| Volume surge boost | 0.15 (15%) | scoring.py |
|
||||
| Bullish/bearish threshold | ±0.15 | worker.py |
|
||||
| Mixed threshold | contradiction > 0.10, |S| < 0.30 | worker.py |
|
||||
| Macro signal weight | 0.30 | config.py |
|
||||
| Competitive signal weight | 0.20 | config.py |
|
||||
| Macro confidence threshold | 0.40 | interpolation.py |
|
||||
| Staleness accelerated decay | 0.50× | interpolation.py |
|
||||
| Short-term staleness hours | 48 | interpolation.py |
|
||||
| Pattern min samples | configurable | pattern_matcher.py |
|
||||
| Major decision weight multiplier | 1.3× | pattern_matcher.py |
|
||||
| Routine lookback | 180 days | pattern_matcher.py |
|
||||
| Major decision lookback | 365 days | pattern_matcher.py |
|
||||
| Propagation strength threshold | 0.20 | signal_propagation.py |
|
||||
| Data quality min score | 0.30 | suppression.py |
|
||||
| Evidence staleness max | 168 hours (7 days) | suppression.py |
|
||||
| Recommendation min confidence | 0.35 | eligibility.py |
|
||||
| Recommendation min strength | 0.10 | eligibility.py |
|
||||
| Action strength threshold | 0.25 | eligibility.py |
|
||||
| Live confidence threshold | 0.70 | eligibility.py |
|
||||
| Paper confidence threshold | 0.50 | eligibility.py |
|
||||
| Base portfolio allocation | 1% | eligibility.py |
|
||||
| Max portfolio allocation | 10% | eligibility.py |
|
||||
| Circuit breaker daily loss | 5% | circuit_breaker.py |
|
||||
| Circuit breaker single position | 15% | circuit_breaker.py |
|
||||
| Stop-loss cluster threshold | 3 hits / 30 min | circuit_breaker.py |
|
||||
| Tier downgrade win rate | < 40% | risk_tier_controller.py |
|
||||
| Tier upgrade win rate | > 55% | risk_tier_controller.py |
|
||||
| Tier upgrade max drawdown | < 5% | risk_tier_controller.py |
|
||||
| Tier upgrade min reserve | > 20% | risk_tier_controller.py |
|
||||
| **Probabilistic pipeline** | | |
|
||||
| Sigmoid steepness (k) | 5.0 | scoring.py |
|
||||
| Sigmoid midpoint (m) | 0.5 | scoring.py |
|
||||
| Info gain lambda (λ) | 0.3 | scoring.py |
|
||||
| Info gain max clamp | 3.0 | scoring.py |
|
||||
| Default base rate | 0.10 | scoring.py |
|
||||
| Adaptive decay impact scale | 1.0 | scoring.py |
|
||||
| Adaptive decay surprise scale | 1.0 | scoring.py |
|
||||
| Adaptive decay market scale | 0.5 | scoring.py |
|
||||
| Regime return weight | 0.15 | scoring.py |
|
||||
| Regime volume weight | 0.10 | scoring.py |
|
||||
| Regime multiplier max | 2.5 | scoring.py |
|
||||
| Source accuracy min samples | 10 | source_accuracy.py |
|
||||
| Contradiction W_threshold | 5.0 | contradiction.py |
|
||||
| EMA short period | 20 days | regime.py |
|
||||
| EMA long period | 100 days | regime.py |
|
||||
| Panic volatility ratio | > 1.5 | regime.py |
|
||||
| Trend-following vol ratio | < 1.2 | regime.py |
|
||||
| Mean-reversion vol ratio | < 1.0 | regime.py |
|
||||
| Panic threshold | ±0.10 | regime.py |
|
||||
| Mean-reversion threshold | ±0.20 | regime.py |
|
||||
| Uncertainty contradiction mult | 0.6 | regime.py |
|
||||
| EW momentum decay (λ) | 0.7 | projection.py |
|
||||
| EW momentum max lags (K) | 10 | projection.py |
|
||||
| Volatility floor (σ min) | 0.01 | projection.py |
|
||||
| Momentum clamp | ±2.0 | projection.py |
|
||||
| EV threshold | 0.005 (0.5%) | eligibility.py |
|
||||
| Graph distance max | 3 | signal_propagation.py |
|
||||
| Default correlation (same-sector) | 0.3 | signal_propagation.py |
|
||||
| Default correlation (cross-sector) | 0.1 | signal_propagation.py |
|
||||
+49
-31
@@ -94,7 +94,7 @@ Each key under `services` defines a Kubernetes Deployment. The deployments templ
|
||||
| `image` | string | yes | Image name appended to `image.registry`. Also used as the Deployment name and pod label (`app: <image>`). |
|
||||
| `command` | string | no | Shell command passed as `["sh", "-c", "<command>"]`. Omit for images with a built-in entrypoint (e.g., dashboard/nginx). |
|
||||
| `tier` | string | yes | Service tier label (`stonks-oracle/tier`). One of: `api`, `frontend`, `processing`, `trading`, `orchestration`, `analytics`, `ingestion`. |
|
||||
| `port` | int | no | Container port. When set, a Kubernetes Service is created mapping `port → port`. |
|
||||
| `port` | int | no | Container port. When set, a Kubernetes Service is created mapping `port -> port`. |
|
||||
| `pipeline` | bool | no | If `true`, replicas are set to 0 when `pipelineEnabled` is `false`. |
|
||||
| `secrets` | list(string) | no | List of Secret names to mount via `envFrom.secretRef`. |
|
||||
| `resources` | object | yes | Kubernetes resource requests and limits (`cpu`, `memory`). |
|
||||
@@ -118,9 +118,10 @@ Each key under `services` defines a Kubernetes Deployment. The deployments templ
|
||||
| `resources.limits` | cpu: 200m, memory: 128Mi |
|
||||
| `probes` | — |
|
||||
|
||||
The scheduler deployment has two init containers (not configurable via values):
|
||||
The scheduler deployment has three init containers (not configurable via values):
|
||||
1. **run-migrations** — applies all SQL files from `infra/migrations/*.sql` in sorted order.
|
||||
2. **seed-if-empty** — runs `python -m services.symbol_registry.seed` if the `companies` table is empty.
|
||||
3. **backfill-market-data** — runs `scripts/backfill_market_data.py` if available (skips gracefully if not).
|
||||
|
||||
#### symbolRegistry
|
||||
|
||||
@@ -141,7 +142,7 @@ The scheduler deployment has two init containers (not configurable via values):
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| `replicas` | `2` |
|
||||
| `replicas` | `1` |
|
||||
| `pipeline` | `true` |
|
||||
| `image` | `ingestion` |
|
||||
| `command` | `python -m services.ingestion.worker` |
|
||||
@@ -274,7 +275,7 @@ Single replica is recommended — the extractor is bottlenecked by the shared Ol
|
||||
| `command` | `uvicorn services.api.app:app --host 0.0.0.0 --port 8000` |
|
||||
| `tier` | `api` |
|
||||
| `port` | `8000` |
|
||||
| `secrets` | `stonks-core-secrets` |
|
||||
| `secrets` | `stonks-core-secrets`, `stonks-market-secrets` |
|
||||
| `resources.requests` | cpu: 100m, memory: 128Mi |
|
||||
| `resources.limits` | cpu: 500m, memory: 256Mi |
|
||||
| `probes.readiness` | path: `/docs`, port: 8000, initialDelay: 5s, period: 10s |
|
||||
@@ -323,7 +324,7 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `config.OLLAMA_BASE_URL` | string | `""` (empty) | Ollama API base URL. Set to the cluster-internal or external Ollama endpoint. |
|
||||
| `config.OLLAMA_BASE_URL` | string | `http://10.1.1.12:2701` | Ollama API base URL. Points to the external Ollama endpoint by default. |
|
||||
| `config.OLLAMA_MODEL` | string | `qwen3.5:9b-fast` | Default LLM model for extraction and classification agents. |
|
||||
| `config.OLLAMA_TIMEOUT` | string | `240` | Request timeout in seconds for Ollama API calls. |
|
||||
| `config.OLLAMA_MAX_RETRIES` | string | `2` | Maximum retry attempts for failed Ollama requests. |
|
||||
@@ -331,6 +332,17 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
| `config.OLLAMA_RETRY_MAX_DELAY` | string | `10.0` | Maximum delay cap in seconds for Ollama retry backoff. |
|
||||
| `config.OLLAMA_RETRY_BACKOFF_MULTIPLIER` | string | `2.0` | Multiplier for exponential backoff between Ollama retries. |
|
||||
|
||||
### vLLM
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `config.VLLM_BASE_URL` | string | `http://10.1.1.12:2701` | vLLM API base URL. Alternative LLM backend using OpenAI-compatible API. |
|
||||
| `config.VLLM_MODEL` | string | `qwen3.5:9b-fast` | vLLM model identifier. |
|
||||
| `config.VLLM_TIMEOUT` | string | `120` | Request timeout in seconds for vLLM API calls. |
|
||||
| `config.VLLM_MAX_RETRIES` | string | `2` | Maximum retry attempts for failed vLLM requests. |
|
||||
| `config.VLLM_TEMPERATURE` | string | `0.7` | Sampling temperature for vLLM generation (0.0-1.0). |
|
||||
| `config.VLLM_API_KEY` | string | `""` (empty) | API key for vLLM authentication. Leave empty if not required. |
|
||||
|
||||
### Analytics / Trino
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
@@ -347,7 +359,7 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
|-----|------|---------|-------------|
|
||||
| `config.BROKER_MODE` | string | `paper` | Broker execution mode. `paper` for simulated trading, `live` for real orders. |
|
||||
| `config.BROKER_PROVIDER` | string | `""` (empty) | Broker provider name (e.g., `alpaca`). |
|
||||
| `config.MARKET_DATA_BASE_URL` | string | `""` (empty) | Market data API base URL (e.g., `https://api.polygon.io`). |
|
||||
| `config.MARKET_DATA_BASE_URL` | string | `https://api.polygon.io` | Market data API base URL. |
|
||||
| `config.MARKET_DATA_PROVIDER` | string | `polygon` | Market data provider identifier. |
|
||||
| `config.TRADING_ENABLED` | string | `true` | Master toggle for the trading engine. Set to `false` to disable order submission. |
|
||||
| `config.TRADING_RISK_TIER` | string | `moderate` | Default risk tier for position sizing. Options: `conservative`, `moderate`, `aggressive`. |
|
||||
@@ -376,6 +388,7 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
| `config.LOG_LEVEL` | string | `INFO` | Python logging level. Options: `DEBUG`, `INFO`, `WARNING`, `ERROR`. |
|
||||
| `config.JSON_LOGS` | string | `true` | Emit structured JSON logs when `true`. |
|
||||
| `config.DEPLOY_STAGE` | string | `""` (empty) | Deployment stage identifier. Used to isolate Redis keys and MinIO buckets per stage (e.g., `beta`, `paper`). |
|
||||
| `config.TZ` | string | `America/Los_Angeles` | Container timezone. Affects log timestamps and any time-aware formatting. The frontend uses the browser's local timezone for display. |
|
||||
|
||||
### Alerting
|
||||
|
||||
@@ -383,7 +396,7 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
|-----|------|---------|-------------|
|
||||
| `config.ALERT_SOURCE_FAILURE_THRESHOLD` | string | `3` | Number of consecutive source failures before firing an alert. |
|
||||
| `config.ALERT_SOURCE_FAILURE_WINDOW_HOURS` | string | `6` | Time window (hours) for evaluating source failure count. |
|
||||
| `config.ALERT_SCHEMA_FAILURE_RATE_THRESHOLD` | string | `0.3` | Schema validation failure rate (0.0–1.0) that triggers an alert. |
|
||||
| `config.ALERT_SCHEMA_FAILURE_RATE_THRESHOLD` | string | `0.3` | Schema validation failure rate (0.0-1.0) that triggers an alert. |
|
||||
| `config.ALERT_SCHEMA_FAILURE_WINDOW_HOURS` | string | `1` | Time window (hours) for evaluating schema failure rate. |
|
||||
| `config.ALERT_LAKE_LAG_THRESHOLD_MINUTES` | string | `60` | Minutes of lakehouse publish lag before alerting. |
|
||||
| `config.ALERT_BROKER_ERROR_THRESHOLD` | string | `3` | Number of broker errors before firing an alert. |
|
||||
@@ -394,7 +407,7 @@ All keys under `config` are rendered into a Kubernetes ConfigMap named `stonks-c
|
||||
|
||||
## `secrets` — Kubernetes Secrets
|
||||
|
||||
Secrets are rendered into five Kubernetes Secret objects. In the base `values.yaml`, all secret values default to empty strings. Inject real values at deploy time using `--set` flags or a values override file.
|
||||
Secrets are rendered into five Kubernetes Secret objects. Inject real values at deploy time using `--set` flags or a values override file. The base `values.yaml` contains placeholder values — override them for each environment.
|
||||
|
||||
### Secret Objects
|
||||
|
||||
@@ -402,32 +415,32 @@ Secrets are rendered into five Kubernetes Secret objects. In the base `values.ya
|
||||
|-------------|-----------|-------------|
|
||||
| `stonks-core-secrets` | `secrets.core` | All services |
|
||||
| `stonks-broker-secrets` | `secrets.broker` | ingestion, trading-engine, risk-engine, broker-adapter |
|
||||
| `stonks-market-secrets` | `secrets.market` | ingestion |
|
||||
| `stonks-market-secrets` | `secrets.market` | ingestion, query-api |
|
||||
| `stonks-gmail-secrets` | `secrets.gmail` | trading-engine |
|
||||
| `stonks-dashboard-secrets` | `secrets.dashboard` | superset |
|
||||
|
||||
### `secrets.core`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `POSTGRES_PASSWORD` | string | `""` | PostgreSQL password. |
|
||||
| `MINIO_ACCESS_KEY` | string | `""` | MinIO access key (AWS-style). |
|
||||
| `MINIO_SECRET_KEY` | string | `""` | MinIO secret key. |
|
||||
| `REDIS_PASSWORD` | string | `""` | Redis authentication password. |
|
||||
| Key | Type | Description |
|
||||
|-----|------|-------------|
|
||||
| `POSTGRES_PASSWORD` | string | PostgreSQL password. |
|
||||
| `MINIO_ACCESS_KEY` | string | MinIO access key (AWS-style). |
|
||||
| `MINIO_SECRET_KEY` | string | MinIO secret key. |
|
||||
| `REDIS_PASSWORD` | string | Redis authentication password. |
|
||||
|
||||
### `secrets.broker`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `BROKER_API_KEY` | string | `""` | Broker API key (e.g., Alpaca paper trading key). |
|
||||
| `BROKER_API_SECRET` | string | `""` | Broker API secret. |
|
||||
| `BROKER_BASE_URL` | string | `""` | Broker API base URL (e.g., `https://paper-api.alpaca.markets`). |
|
||||
| Key | Type | Description |
|
||||
|-----|------|-------------|
|
||||
| `BROKER_API_KEY` | string | Broker API key (e.g., Alpaca paper trading key). |
|
||||
| `BROKER_API_SECRET` | string | Broker API secret. |
|
||||
| `BROKER_BASE_URL` | string | Broker API base URL (e.g., `https://paper-api.alpaca.markets`). |
|
||||
|
||||
### `secrets.market`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `MARKET_DATA_API_KEY` | string | `""` | Market data provider API key (e.g., Polygon.io). |
|
||||
| Key | Type | Description |
|
||||
|-----|------|-------------|
|
||||
| `MARKET_DATA_API_KEY` | string | Market data provider API key (e.g., Polygon.io). |
|
||||
|
||||
### `secrets.gmail`
|
||||
|
||||
@@ -439,10 +452,10 @@ Secrets are rendered into five Kubernetes Secret objects. In the base `values.ya
|
||||
|
||||
### `secrets.dashboard`
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| `SUPERSET_SECRET_KEY` | string | `""` | Flask secret key for Superset session encryption. |
|
||||
| `SUPERSET_ADMIN_PASSWORD` | string | `""` | Superset admin user password. |
|
||||
| Key | Type | Description |
|
||||
|-----|------|-------------|
|
||||
| `SUPERSET_SECRET_KEY` | string | Flask secret key for Superset session encryption. |
|
||||
| `SUPERSET_ADMIN_PASSWORD` | string | Superset admin user password. |
|
||||
|
||||
### Injecting Secrets at Deploy Time
|
||||
|
||||
@@ -595,15 +608,20 @@ Key overrides:
|
||||
| `pipelineEnabled` | `true` | Services deployed (ArgoCD health checks), but pipeline defaults to OFF via `PIPELINE_DEFAULT_OFF`. |
|
||||
| `config.DEPLOY_STAGE` | `beta` | Isolates Redis keys (`stonks:beta:*`) and MinIO buckets (`beta-stonks-*`). |
|
||||
| `config.POSTGRES_DB` | `stonks_beta` | Separate database for beta data. |
|
||||
| `config.POSTGRES_USER` | `stonks_beta` | Separate database user for beta. |
|
||||
| `config.REDIS_DB` | `1` | Separate Redis DB index. |
|
||||
| `config.LOG_LEVEL` | `DEBUG` | Verbose logging for debugging. |
|
||||
| `config.TRADING_ENABLED` | `false` | Safety net — no order submission in beta. |
|
||||
| `config.PIPELINE_DEFAULT_OFF` | `true` | Scheduler won't enqueue jobs unless explicitly enabled. |
|
||||
| `config.TRADING_ENABLED` | `true` | Trading engine active but constrained by paper broker mode. |
|
||||
| `config.PIPELINE_DEFAULT_OFF` | `true` | Scheduler won't enqueue jobs unless explicitly enabled via the UI. |
|
||||
| `config.BROKER_MODE` | `paper` | Simulated order execution. |
|
||||
| `config.BROKER_PROVIDER` | `alpaca` | Alpaca paper trading API. |
|
||||
| `config.OLLAMA_MODEL` | `qwen3.6` | May use a different model version for testing. |
|
||||
| `trino.enabled` | `false` | Analytics stack disabled in beta. |
|
||||
| `hiveMetastore.enabled` | `false` | Analytics stack disabled in beta. |
|
||||
| `superset.enabled` | `false` | Analytics stack disabled in beta. |
|
||||
|
||||
Beta also configures vLLM settings (`VLLM_BASE_URL`, `VLLM_MODEL`, etc.) for testing alternative LLM backends.
|
||||
|
||||
Beta ingress hostnames:
|
||||
|
||||
| Service | Hostname |
|
||||
@@ -648,11 +666,11 @@ Paper ingress hostnames:
|
||||
|
||||
```
|
||||
values-beta.yaml values-paper.yaml values.yaml (base)
|
||||
Beta → Paper Trading → Production
|
||||
Beta -> Paper Trading -> Production
|
||||
Integration Simulated orders Live trading
|
||||
testing Real market data Real orders
|
||||
Pipeline OFF Pipeline ON Pipeline ON
|
||||
Trading OFF Trading ON Trading ON
|
||||
Trading ON Trading ON Trading ON
|
||||
Analytics OFF Analytics ON Analytics ON
|
||||
```
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ scrape_configs:
|
||||
scrape_interval: 15s
|
||||
scrape_timeout: 10s
|
||||
metrics_path: /metrics
|
||||
static_targets:
|
||||
static_configs:
|
||||
- targets:
|
||||
# Docker Compose
|
||||
- "query-api:8000"
|
||||
@@ -124,6 +124,7 @@ All metrics are defined in `services/shared/metrics.py`. Metric names use the `s
|
||||
| `stonks_orders_rejected_total` | Counter | `reason_category` | Orders rejected before broker submission |
|
||||
| `stonks_orders_filled_total` | Counter | `side` | Orders filled by broker |
|
||||
| `stonks_orders_duplicates_prevented_total` | Counter | `detected_via` | Duplicate orders prevented by idempotency checks |
|
||||
| `stonks_orders_clamped_total` | Counter | — | Orders auto-clamped to fit within position limits |
|
||||
| `stonks_risk_evaluations_total` | Counter | `result` | Risk evaluations performed |
|
||||
| `stonks_risk_check_failures_total` | Counter | `check_name` | Individual risk check failures |
|
||||
| `stonks_positions_synced_total` | Counter | — | Position sync operations completed |
|
||||
|
||||
+117
-10
@@ -41,6 +41,7 @@ All queues use the `stonks:queue:<name>` key pattern (configurable via `DEPLOY_S
|
||||
| `recommendation` | `stonks:queue:recommendation` | Aggregation | Recommendation |
|
||||
| `broker_orders` | `stonks:queue:broker_orders` | Trading Engine, Trading API | Broker Adapter |
|
||||
| `lake_publish` | `stonks:queue:lake_publish` | Various services | Lake Publisher |
|
||||
| `report_generation` | `stonks:queue:report_generation` | Scheduler | Scheduler (inline consumer) |
|
||||
|
||||
### Queue Message Schemas
|
||||
|
||||
@@ -131,11 +132,20 @@ All queues use the `stonks:queue:<name>` key pattern (configurable via `DEPLOY_S
|
||||
}
|
||||
```
|
||||
|
||||
**Report Generation Job** (`stonks:queue:report_generation`):
|
||||
```json
|
||||
{
|
||||
"report_type": "daily | weekly",
|
||||
"period_start": "2025-01-01",
|
||||
"period_end": "2025-01-01"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Scheduler
|
||||
|
||||
**Purpose**: Triggers ingestion cycles for tracked companies and sources on a configurable cadence. Polls the symbol registry for active companies and their configured sources, respects per-source polling intervals and backoff windows, coordinates rate limits across source types, and enqueues ingestion jobs for downstream workers. Also runs periodic maintenance: stale document recovery, failed extraction retries, and data retention cleanup.
|
||||
**Purpose**: Triggers ingestion cycles for tracked companies and sources on a configurable cadence. Polls the symbol registry for active companies and their configured sources, respects per-source polling intervals and backoff windows, coordinates rate limits across source types, and enqueues ingestion jobs for downstream workers. Also runs periodic maintenance: stale document recovery, failed extraction retries, data retention cleanup, periodic aggregation re-runs, and automated report generation (daily/weekly).
|
||||
|
||||
**Entry Point**: `services.scheduler.app`
|
||||
|
||||
@@ -176,12 +186,16 @@ All queues use the `stonks:queue:<name>` key pattern (configurable via `DEPLOY_S
|
||||
| `recommendations` | Write (delete) | Retention cleanup |
|
||||
| `order_events` | Write (delete) | Retention cleanup |
|
||||
| `model_performance_metrics` | Write (delete) | Retention cleanup |
|
||||
| `ingestion_runs` | Write (delete) | Retention cleanup |
|
||||
| `trading_reports` | Write | Report generation storage |
|
||||
|
||||
### Redis Queues
|
||||
|
||||
| Direction | Queue | Purpose |
|
||||
|---|---|---|
|
||||
| Publish | `stonks:queue:ingestion` | Enqueue ingestion jobs for due sources |
|
||||
| Publish | `stonks:queue:aggregation` | Periodic aggregation re-runs |
|
||||
| Publish/Consume | `stonks:queue:report_generation` | Enqueue and consume report generation jobs |
|
||||
| Read | `stonks:pipeline:enabled` | Pipeline toggle (skip cycle if `"0"`) |
|
||||
| Read/Write | `stonks:lock:scheduler_cycle` | Distributed lock for single-writer |
|
||||
| Read/Write | `stonks:ratelimit:*` | Per-source-type and global Polygon rate limits |
|
||||
@@ -195,6 +209,8 @@ All queues use the `stonks:queue:<name>` key pattern (configurable via `DEPLOY_S
|
||||
- **Stale document recovery**: Every ~5 minutes, re-enqueues documents stuck in `parsed` status for >240 minutes.
|
||||
- **Failed extraction retry**: Every ~10 minutes, re-enqueues `extraction_failed` documents older than 60 minutes.
|
||||
- **Data retention cleanup**: Every ~25 minutes, deletes old rows from 10 tables with configurable retention windows (14–90 days).
|
||||
- **Periodic aggregation**: Re-enqueues aggregation jobs for all active tickers to keep trend summaries fresh.
|
||||
- **Report generation**: Enqueues daily and weekly report jobs on schedule; consumes them inline via `process_report_job` with retry logic (3 attempts, exponential backoff 30s/60s/120s).
|
||||
|
||||
---
|
||||
|
||||
@@ -281,7 +297,7 @@ None — this service is purely HTTP-driven.
|
||||
### MinIO Buckets
|
||||
|
||||
- `stonks-raw-market` — Raw market data JSON
|
||||
- `stonks-raw-news` — Raw news article JSON
|
||||
- `stonks-raw-news` — Raw news article JSON (also used for macro news)
|
||||
- `stonks-raw-filings` — Raw SEC filing data
|
||||
- `stonks-normalized` — Normalized text (written by parser)
|
||||
|
||||
@@ -296,6 +312,13 @@ None — this service is purely HTTP-driven.
|
||||
| `broker` | `AlpacaBrokerAdapter` | Alpaca |
|
||||
| `macro_news` | `MacroNewsAdapter` | Polygon.io |
|
||||
|
||||
### Key Behaviors
|
||||
|
||||
- Macro news jobs (`source_type=macro_news`) may lack a `company_id` — the worker handles this gracefully
|
||||
- Macro news documents are typed as `macro_event` so the parser routes them to the macro classification queue
|
||||
- Duplicate documents detected via content hash are linked to the current company (except for `macro_news`)
|
||||
- Tracks `last_published_at` per source to fetch only newer articles on subsequent runs
|
||||
|
||||
---
|
||||
|
||||
## 4. Parser
|
||||
@@ -349,7 +372,7 @@ None — this service is purely HTTP-driven.
|
||||
|
||||
## 5. Extractor
|
||||
|
||||
**Purpose**: Performs LLM-based intelligence extraction from documents using Ollama. Handles two pipelines: (1) standard document extraction producing `DocumentIntelligence` with per-company impact records, and (2) macro event classification producing `GlobalEventSchema` with company-level macro impact interpolation. Supports AI agent configuration with variant-based A/B testing.
|
||||
**Purpose**: Performs LLM-based intelligence extraction from documents using Ollama or a remote vLLM inference server. Handles two pipelines: (1) standard document extraction producing `DocumentIntelligence` with per-company impact records, and (2) macro event classification producing `GlobalEventSchema` with company-level macro impact interpolation. Supports AI agent configuration with variant-based A/B testing and provider routing (Ollama or vLLM).
|
||||
|
||||
**Entry Point**: `services.extractor.main`
|
||||
|
||||
@@ -363,9 +386,16 @@ None — this service is purely HTTP-driven.
|
||||
| `REDIS_*` | _(see shared)_ | Redis connection |
|
||||
| `MINIO_*` | _(see shared)_ | MinIO connection |
|
||||
| `OLLAMA_BASE_URL` | `http://localhost:11434` | Ollama API endpoint |
|
||||
| `OLLAMA_MODEL` | `qwen3.5:9b` | Default LLM model |
|
||||
| `OLLAMA_MODEL` | `qwen3.5:9b` | Default Ollama model |
|
||||
| `OLLAMA_TIMEOUT` | `120` | Request timeout (seconds) |
|
||||
| `OLLAMA_MAX_RETRIES` | `2` | Max retry attempts |
|
||||
| `VLLM_BASE_URL` | `http://192.168.42.254:8000` | vLLM inference server endpoint |
|
||||
| `VLLM_MODEL` | `RedHatAI/Qwen3.6-35B-A3B-NVFP4` | Default vLLM model |
|
||||
| `VLLM_TIMEOUT` | `120` | vLLM request timeout (seconds) |
|
||||
| `VLLM_MAX_RETRIES` | `2` | vLLM max retry attempts |
|
||||
| `VLLM_MAX_TOKENS` | `4096` | vLLM max output tokens |
|
||||
| `VLLM_TEMPERATURE` | `0.7` | vLLM sampling temperature |
|
||||
| `VLLM_API_KEY` | _(empty)_ | Optional API key for authenticated vLLM deployments |
|
||||
| `MACRO_CONFIDENCE_THRESHOLD` | `0.4` | Minimum confidence for macro event inclusion |
|
||||
| `LOG_LEVEL` | `INFO` | Logging level |
|
||||
|
||||
@@ -395,6 +425,7 @@ None — this service is purely HTTP-driven.
|
||||
|
||||
### Key Behaviors
|
||||
|
||||
- **LLM provider routing**: The `AgentConfigResolver` resolves agent configuration from the DB, including a `model_provider` field (`"ollama"` or `"vllm"`). The `build_llm_client` factory returns the appropriate client (`OllamaClient` or `VLLMClient`).
|
||||
- Alternates between macro and extraction queues (1 macro per 3 jobs) to prevent starvation
|
||||
- Resolves agent configuration from DB with 60-second TTL cache (`AgentConfigResolver`)
|
||||
- Supports separate models for document extraction and event classification
|
||||
@@ -565,7 +596,7 @@ None — this service is purely HTTP-driven.
|
||||
| `risk_tier_history` | Read/Write | Risk tier change audit trail |
|
||||
| `circuit_breaker_events` | Read/Write | Circuit breaker trigger/reset events |
|
||||
| `positions` | Read | Current open positions |
|
||||
| `position_stop_levels` | Read/Write | Stop-loss and take-profit levels |
|
||||
| `position_stop_levels` | Read/Write | Stop-loss and take-profit levels per position |
|
||||
| `orders` | Read | Order history for dedup |
|
||||
| `backtest_runs` | Read/Write | Backtest configuration and results |
|
||||
| `backtest_trades` | Read/Write | Individual trades within a backtest |
|
||||
@@ -652,7 +683,7 @@ None — called synchronously by the broker adapter and via HTTP.
|
||||
| `positions` | Write (upsert) | Sync positions from Alpaca |
|
||||
| `broker_accounts` | Write (upsert) | Register/update broker account |
|
||||
| `daily_risk_snapshots` | Read | Daily portfolio state for risk evaluation |
|
||||
| `risk_configs` | Read | Active risk configuration |
|
||||
| `risk_configs` | Read | Active risk configuration for order evaluation |
|
||||
| `approval_requests` | Write | Create approval requests for gated orders |
|
||||
| `audit_events` | Write | Full audit trail |
|
||||
|
||||
@@ -728,7 +759,7 @@ None — called synchronously by the broker adapter and via HTTP.
|
||||
|
||||
## 12. Query API
|
||||
|
||||
**Purpose**: Read-only FastAPI service for analytics, evidence drill-down, and admin controls. Serves the React dashboard and external integrations with endpoints for companies, documents, trends, recommendations, orders, positions, portfolio metrics, global events, macro impacts, competitive signals, trend projections, AI agents, dead-letter queues, pipeline control, SQL explorer, saved queries, audit trail, DevOps metrics, and Prometheus metrics.
|
||||
**Purpose**: Read-only FastAPI service for analytics, evidence drill-down, and admin controls. Serves the React dashboard and external integrations with endpoints for companies, documents, trends, recommendations, orders, positions, portfolio metrics, global events, macro impacts, competitive signals, trend projections, AI agents, dead-letter queues, pipeline control, SQL explorer, saved queries, audit trail, DevOps metrics, Prometheus metrics, model validation, and trading reports.
|
||||
|
||||
**Entry Point**: `services.api.app` (FastAPI)
|
||||
|
||||
@@ -745,6 +776,7 @@ None — called synchronously by the broker adapter and via HTTP.
|
||||
| `TRINO_PORT` | `8080` | Trino port |
|
||||
| `TRINO_CATALOG` | `lakehouse` | Trino catalog |
|
||||
| `TRINO_SCHEMA` | `stonks` | Trino schema |
|
||||
| `TRINO_ICEBERG_CATALOG` | `iceberg` | Trino Iceberg catalog |
|
||||
| `LOG_LEVEL` | `INFO` | Logging level |
|
||||
|
||||
### Database Tables
|
||||
@@ -757,9 +789,9 @@ The Query API reads from nearly all tables in the database, including:
|
||||
| `sources` | Source configurations |
|
||||
| `documents`, `document_company_mentions` | Document timelines |
|
||||
| `document_intelligence`, `document_impact_records` | Intelligence extraction results |
|
||||
| `trend_windows`, `trend_history`, `trend_projections` | Trend summaries and projections |
|
||||
| `trend_windows`, `trend_history`, `trend_projections`, `trend_evidence` | Trend summaries and projections |
|
||||
| `recommendations`, `recommendation_evidence` | Recommendation history with evidence |
|
||||
| `risk_evaluations` | Risk evaluation results |
|
||||
| `risk_evaluations`, `risk_configs` | Risk evaluation results and configuration |
|
||||
| `orders`, `order_events` | Order history and lifecycle |
|
||||
| `positions`, `portfolio_snapshots` | Portfolio state |
|
||||
| `global_events`, `macro_impact_records` | Macro event data |
|
||||
@@ -768,6 +800,13 @@ The Query API reads from nearly all tables in the database, including:
|
||||
| `audit_events` | Audit trail |
|
||||
| `market_snapshots` | Market price data |
|
||||
| `watchlists`, `watchlist_members` | Watchlist data |
|
||||
| `ingestion_runs` | Ingestion throughput and source health |
|
||||
| `model_performance_metrics` | Model quality metrics |
|
||||
| `prediction_snapshots`, `prediction_outcomes` | Model validation and calibration |
|
||||
| `trading_decisions` | Trading decision history |
|
||||
| `trading_reports` | Generated daily/weekly reports |
|
||||
| `approval_requests` | Pending approval workflow |
|
||||
| `symbol_lockouts` | Active trading lockouts per symbol |
|
||||
|
||||
### Redis Queues
|
||||
|
||||
@@ -776,15 +815,22 @@ The Query API reads from nearly all tables in the database, including:
|
||||
| Read/Write | `stonks:pipeline:enabled` | Pipeline toggle control |
|
||||
| Read | `stonks:queue:*` | Queue depth monitoring for DLQ and DevOps metrics |
|
||||
| Read | `stonks:dlq:*` | Dead-letter queue inspection and replay |
|
||||
| Read | `stonks:ratelimit:*` | Rate limit status monitoring |
|
||||
|
||||
### Key Behaviors
|
||||
|
||||
- Exposes `/metrics` endpoint for Prometheus scraping
|
||||
- Trace context propagation via `x-trace-id` header middleware
|
||||
- SQL explorer endpoint for ad-hoc Trino queries
|
||||
- SQL explorer endpoint for ad-hoc Trino queries (`/analytics/query`)
|
||||
- PostgreSQL schema explorer (`/pg/schema`, `/pg/query`)
|
||||
- Dead-letter queue management (list, inspect, replay)
|
||||
- Pipeline control (enable/disable via Redis toggle)
|
||||
- Saved queries with CRUD operations
|
||||
- Macro and competitive layer toggle endpoints
|
||||
- Model validation endpoints (summary, calibration, IC by horizon, gate status, attribution)
|
||||
- Trading report listing and retrieval
|
||||
- SSE pipeline health stream (`/pipeline/stream`)
|
||||
- Market price backfill endpoints
|
||||
|
||||
---
|
||||
|
||||
@@ -1042,6 +1088,67 @@ All services load configuration from environment variables via `services/shared/
|
||||
| `OLLAMA_MODEL` | `qwen3.5:9b` | Default model |
|
||||
| `OLLAMA_TIMEOUT` | `120` | Request timeout (seconds) |
|
||||
| `OLLAMA_MAX_RETRIES` | `2` | Max retry attempts |
|
||||
| `OLLAMA_RETRY_BASE_DELAY` | `1.0` | Base delay between retries (seconds) |
|
||||
| `OLLAMA_RETRY_MAX_DELAY` | `10.0` | Maximum delay between retries (seconds) |
|
||||
| `OLLAMA_RETRY_BACKOFF_MULTIPLIER` | `2.0` | Backoff multiplier |
|
||||
|
||||
### vLLM
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `VLLM_BASE_URL` | `http://192.168.42.254:8000` | vLLM inference server endpoint |
|
||||
| `VLLM_MODEL` | `RedHatAI/Qwen3.6-35B-A3B-NVFP4` | Default vLLM model |
|
||||
| `VLLM_TIMEOUT` | `120` | Request timeout (seconds) |
|
||||
| `VLLM_MAX_RETRIES` | `2` | Max retry attempts |
|
||||
| `VLLM_MAX_TOKENS` | `4096` | Max output tokens |
|
||||
| `VLLM_TEMPERATURE` | `0.7` | Sampling temperature |
|
||||
| `VLLM_API_KEY` | _(empty)_ | Optional API key for authenticated deployments |
|
||||
| `VLLM_RETRY_BASE_DELAY` | `1.0` | Base delay between retries (seconds) |
|
||||
| `VLLM_RETRY_MAX_DELAY` | `10.0` | Maximum delay between retries (seconds) |
|
||||
| `VLLM_RETRY_BACKOFF_MULTIPLIER` | `2.0` | Backoff multiplier |
|
||||
|
||||
### Trino
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `TRINO_HOST` | `localhost` | Trino host |
|
||||
| `TRINO_PORT` | `8080` | Trino port |
|
||||
| `TRINO_CATALOG` | `lakehouse` | Trino catalog |
|
||||
| `TRINO_SCHEMA` | `stonks` | Trino schema |
|
||||
| `TRINO_ICEBERG_CATALOG` | `iceberg` | Trino Iceberg catalog |
|
||||
|
||||
### Market Data
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `MARKET_DATA_API_KEY` | _(empty)_ | Polygon.io API key |
|
||||
| `MARKET_DATA_BASE_URL` | `https://api.polygon.io` | Polygon base URL |
|
||||
| `MARKET_DATA_PROVIDER` | `polygon` | Market data provider |
|
||||
|
||||
### Broker
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `BROKER_MODE` | `paper` | Trading mode (`paper` or `live`) |
|
||||
| `BROKER_PROVIDER` | `alpaca` | Broker provider |
|
||||
| `BROKER_API_KEY` | _(none)_ | Alpaca API key |
|
||||
| `BROKER_API_SECRET` | _(none)_ | Alpaca API secret |
|
||||
| `BROKER_BASE_URL` | _(none)_ | Alpaca base URL |
|
||||
|
||||
### Retention
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `RETENTION_RAW_MARKET_DAYS` | `90` | Raw market data retention (days) |
|
||||
| `RETENTION_RAW_NEWS_DAYS` | `180` | Raw news data retention (days) |
|
||||
| `RETENTION_RAW_FILINGS_DAYS` | `365` | Raw filings retention (days) |
|
||||
| `RETENTION_NORMALIZED_DAYS` | `180` | Normalized text retention (days) |
|
||||
| `RETENTION_LLM_PROMPTS_DAYS` | `365` | LLM prompt retention (days) |
|
||||
| `RETENTION_LLM_RESULTS_DAYS` | `365` | LLM result retention (days) |
|
||||
| `RETENTION_LAKEHOUSE_DAYS` | `730` | Lakehouse data retention (days) |
|
||||
| `RETENTION_AUDIT_DAYS` | `730` | Audit log retention (days) |
|
||||
| `RETENTION_CLEANUP_INTERVAL_HOURS` | `24` | Cleanup interval (hours) |
|
||||
| `RETENTION_BATCH_SIZE` | `1000` | Rows deleted per batch |
|
||||
|
||||
### Observability
|
||||
|
||||
|
||||
+234
-3
@@ -256,8 +256,13 @@ export interface MarketPrice {
|
||||
captured_at: string;
|
||||
}
|
||||
|
||||
export function useMarketPrices(ticker: string | undefined, limit = 30) {
|
||||
return useGet<MarketPrice[]>(
|
||||
export interface MarketPriceResponse {
|
||||
bars: MarketPrice[];
|
||||
range_90d: { low: number | null; high: number | null };
|
||||
}
|
||||
|
||||
export function useMarketPrices(ticker: string | undefined, limit = 200) {
|
||||
return useGet<MarketPriceResponse>(
|
||||
['market-prices', ticker, limit],
|
||||
'query',
|
||||
`/api/market/prices/${ticker}?limit=${limit}`,
|
||||
@@ -265,6 +270,18 @@ export function useMarketPrices(ticker: string | undefined, limit = 30) {
|
||||
);
|
||||
}
|
||||
|
||||
/** Backfill 90 days of daily bars from Polygon for a single ticker. */
|
||||
export function useBackfillMarketPrices() {
|
||||
const qc = useQueryClient();
|
||||
return useMutation({
|
||||
mutationFn: (ticker: string) =>
|
||||
apiPost<{ ticker: string; inserted: number; total_bars: number }>('query', `/api/market/backfill/${ticker}`, {}),
|
||||
onSuccess: (_data, ticker) => {
|
||||
qc.invalidateQueries({ queryKey: ['market-prices', ticker] });
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export function useTrend(id: string | undefined) {
|
||||
return useGet<TrendSummary>(['trend', id], 'query', `/api/trends/${id}`, !!id);
|
||||
}
|
||||
@@ -372,6 +389,7 @@ export interface Position {
|
||||
quantity: number;
|
||||
avg_entry_price: number;
|
||||
current_price: number | null;
|
||||
polygon_price: number | null;
|
||||
unrealized_pnl: number | null;
|
||||
realized_pnl: number | null;
|
||||
updated_at: string;
|
||||
@@ -662,12 +680,18 @@ export interface CompetitiveSignal {
|
||||
}
|
||||
|
||||
export function useCompetitiveSignals(ticker: string | undefined) {
|
||||
return useGet<CompetitiveSignal[]>(
|
||||
const result = useGet<CompetitiveSignal[] | { competitive_signals: CompetitiveSignal[] }>(
|
||||
['competitive-signals', ticker],
|
||||
'query',
|
||||
`/api/patterns/${ticker}/competitive-signals`,
|
||||
!!ticker,
|
||||
);
|
||||
// API returns { competitive_signals: [...] } wrapper — extract the array
|
||||
const data = result.data;
|
||||
const signals: CompetitiveSignal[] | undefined = data
|
||||
? (Array.isArray(data) ? data : (data as { competitive_signals: CompetitiveSignal[] }).competitive_signals ?? [])
|
||||
: undefined;
|
||||
return { ...result, data: signals };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -861,3 +885,210 @@ export function useToggleMacro() {
|
||||
onSuccess: () => qc.invalidateQueries({ queryKey: ['macro-status'] }),
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Validation: Model Quality & Calibration (Requirements 12.1, 12.2, 12.3, 12.7)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ModelMetricSnapshot {
|
||||
id: string;
|
||||
generated_at: string;
|
||||
lookback_window: string;
|
||||
horizon: string;
|
||||
prediction_count: number;
|
||||
win_rate: number | null;
|
||||
directional_accuracy: number | null;
|
||||
information_coefficient: number | null;
|
||||
rank_information_coefficient: number | null;
|
||||
avg_return: number | null;
|
||||
avg_excess_return_vs_spy: number | null;
|
||||
avg_excess_return_vs_sector: number | null;
|
||||
calibration_error: number | null;
|
||||
brier_score: number | null;
|
||||
buy_win_rate: number | null;
|
||||
sell_win_rate: number | null;
|
||||
hold_win_rate: number | null;
|
||||
metadata: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export interface ValidationSummary {
|
||||
snapshot: ModelMetricSnapshot | null;
|
||||
gate_status: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export interface CalibrationBucket {
|
||||
bucket_low: number;
|
||||
bucket_high: number;
|
||||
avg_confidence: number;
|
||||
observed_win_rate: number;
|
||||
prediction_count: number;
|
||||
miscalibrated: boolean;
|
||||
}
|
||||
|
||||
export interface ValidationCalibration {
|
||||
buckets: CalibrationBucket[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export interface ICByHorizonEntry {
|
||||
horizon: string;
|
||||
information_coefficient: number | null;
|
||||
rank_information_coefficient: number | null;
|
||||
prediction_count: number;
|
||||
generated_at: string | null;
|
||||
}
|
||||
|
||||
export interface ValidationICByHorizon {
|
||||
horizons: ICByHorizonEntry[];
|
||||
lookback: string;
|
||||
}
|
||||
|
||||
export interface ValidationGateStatus {
|
||||
gate_status: Record<string, unknown> | null;
|
||||
updated_at?: string | null;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export function useValidationSummary(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/summary${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ValidationSummary>(['validation-summary', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationCalibration(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/calibration${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ValidationCalibration>(['validation-calibration', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationICByHorizon(lookback = '30d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
const path = `/api/validation/ic-by-horizon${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ValidationICByHorizon>(['validation-ic-by-horizon', lookback], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationGateStatus() {
|
||||
return useGet<ValidationGateStatus>(['validation-gate-status'], 'query', '/api/validation/gate-status');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Validation: Attribution — Sources, Catalysts, Layers (Requirements 12.4, 12.5, 12.6)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface SourceAttribution {
|
||||
source: string;
|
||||
source_type: string;
|
||||
prediction_count: number;
|
||||
avg_weight: number;
|
||||
avg_contribution_score: number;
|
||||
win_rate: number;
|
||||
avg_future_return: number;
|
||||
avg_excess_return_vs_spy: number;
|
||||
information_coefficient: number | null;
|
||||
duplicate_rate: number;
|
||||
}
|
||||
|
||||
export interface SourceAttributionResponse {
|
||||
sources: SourceAttribution[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export interface CatalystAttribution {
|
||||
catalyst_type: string;
|
||||
prediction_count: number;
|
||||
win_rate: number;
|
||||
avg_future_return: number;
|
||||
avg_excess_return_vs_spy: number;
|
||||
information_coefficient: number | null;
|
||||
}
|
||||
|
||||
export interface CatalystAttributionResponse {
|
||||
catalysts: CatalystAttribution[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export interface LayerAttribution {
|
||||
layer: string;
|
||||
avg_contribution_pct: number;
|
||||
dominant_win_rate: number;
|
||||
dominant_ic: number | null;
|
||||
}
|
||||
|
||||
export interface LayerAttributionResponse {
|
||||
layers: LayerAttribution[];
|
||||
lookback: string;
|
||||
horizon: string;
|
||||
}
|
||||
|
||||
export function useValidationAttributionSources(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/attribution/sources${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<SourceAttributionResponse>(['validation-attribution-sources', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationAttributionCatalysts(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/attribution/catalysts${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<CatalystAttributionResponse>(['validation-attribution-catalysts', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
export function useValidationAttributionLayers(lookback = '30d', horizon = '7d') {
|
||||
const qs = new URLSearchParams();
|
||||
if (lookback) qs.set('lookback', lookback);
|
||||
if (horizon) qs.set('horizon', horizon);
|
||||
const path = `/api/validation/attribution/layers${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<LayerAttributionResponse>(['validation-attribution-layers', lookback, horizon], 'query', path);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Trading Reports
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ReportListItem {
|
||||
id: string;
|
||||
report_type: string;
|
||||
period_start: string;
|
||||
period_end: string;
|
||||
validation_status: string;
|
||||
generated_at: string;
|
||||
}
|
||||
|
||||
export interface ReportDetail extends ReportListItem {
|
||||
report_data: Record<string, unknown>;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
export function useReports(params?: {
|
||||
report_type?: string;
|
||||
start_date?: string;
|
||||
end_date?: string;
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
}) {
|
||||
const qs = new URLSearchParams();
|
||||
if (params?.report_type) qs.set('report_type', params.report_type);
|
||||
if (params?.start_date) qs.set('start_date', params.start_date);
|
||||
if (params?.end_date) qs.set('end_date', params.end_date);
|
||||
if (params?.limit) qs.set('limit', String(params.limit));
|
||||
if (params?.offset) qs.set('offset', String(params.offset));
|
||||
const path = `/api/reports${qs.toString() ? '?' + qs : ''}`;
|
||||
return useGet<ReportListItem[]>(['reports', params], 'query', path);
|
||||
}
|
||||
|
||||
export function useReport(id: string | undefined) {
|
||||
return useGet<ReportDetail>(
|
||||
['report', id], 'query', `/api/reports/${id}`, !!id
|
||||
);
|
||||
}
|
||||
|
||||
@@ -22,6 +22,8 @@ export interface TradingEngineStatus {
|
||||
portfolio_heat: number;
|
||||
portfolio_value: number;
|
||||
open_position_count: number;
|
||||
max_open_positions: number;
|
||||
absolute_position_cap: number;
|
||||
last_decision_at: string | null;
|
||||
micro_trading_enabled: boolean;
|
||||
uptime_seconds: number | null;
|
||||
@@ -314,9 +316,12 @@ export function useBacktestLaunch() {
|
||||
export function useResetPaperTrading() {
|
||||
const qc = useQueryClient();
|
||||
return useMutation({
|
||||
mutationFn: (initial_capital: number = 0) =>
|
||||
mutationFn: (params: { initial_capital?: number; reserve_pct?: number } = {}) =>
|
||||
apiPost<{ reset: boolean; initial_capital: number; active_pool: number; reserve_pool: number; broker: Record<string, number> }>(
|
||||
'trading', '/api/trading/reset', { initial_capital },
|
||||
'trading', '/api/trading/reset', {
|
||||
initial_capital: params.initial_capital ?? 0,
|
||||
reserve_pct: params.reserve_pct ?? undefined,
|
||||
},
|
||||
),
|
||||
onSuccess: () => {
|
||||
qc.invalidateQueries({ queryKey: ['trading-status'] });
|
||||
|
||||
@@ -19,6 +19,7 @@ import {
|
||||
Globe,
|
||||
BarChart3,
|
||||
Bot,
|
||||
ClipboardList,
|
||||
} from 'lucide-react';
|
||||
|
||||
interface NavItem {
|
||||
@@ -40,6 +41,7 @@ const navItems: NavItem[] = [
|
||||
{ to: '/positions', label: 'Positions', icon: <Wallet size={18} />, group: 'Trading' },
|
||||
{ to: '/trading', label: 'Trading Controls', icon: <ShieldCheck size={18} />, group: 'Trading' },
|
||||
{ to: '/trading/engine', label: 'Trading Engine', icon: <BarChart3 size={18} />, group: 'Trading' },
|
||||
{ to: '/reports', label: 'Reports', icon: <ClipboardList size={18} />, group: 'Trading' },
|
||||
{ to: '/ops/pipeline', label: 'Pipeline', icon: <Activity size={18} />, group: 'Ops' },
|
||||
{ to: '/ops/ingestion', label: 'Ingestion', icon: <Download size={18} />, group: 'Ops' },
|
||||
{ to: '/ops/model', label: 'Model Perf', icon: <Cpu size={18} />, group: 'Ops' },
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useParams, useNavigate } from '@tanstack/react-router';
|
||||
import { useState } from 'react';
|
||||
import { useParams, useNavigate, Link } from '@tanstack/react-router';
|
||||
import { useState, useEffect } from 'react';
|
||||
import {
|
||||
useCompany,
|
||||
useCompanySources,
|
||||
@@ -14,13 +14,15 @@ import {
|
||||
useTrends,
|
||||
useTrendHistory,
|
||||
useMarketPrices,
|
||||
useDocument,
|
||||
usePositions,
|
||||
} from '../api/hooks';
|
||||
import { StatusBadge, ConfidenceBar, LoadingSpinner, Card } from '../components/ui';
|
||||
import { DataTable, type Column } from '../components/DataTable';
|
||||
import type { Source, Alias, MacroImpactRecord, CompetitorRelationship, HistoricalPattern, CompetitiveSignal, CorporateDecision, TrendSummary, MarketPrice } from '../api/hooks';
|
||||
import {
|
||||
LineChart, Line, XAxis, YAxis, Tooltip, ResponsiveContainer,
|
||||
CartesianGrid, Legend,
|
||||
CartesianGrid, Legend, ReferenceLine,
|
||||
} from 'recharts';
|
||||
|
||||
const sourceCols: Column<Source>[] = [
|
||||
@@ -42,8 +44,12 @@ export function CompanyDetailPage() {
|
||||
const { data: signals } = useCompetitiveSignals(company?.ticker);
|
||||
const { data: decisions } = useCorporateDecisions(company?.ticker);
|
||||
const { data: trends } = useTrends({ ticker: company?.ticker, limit: 200 });
|
||||
const { data: trendHistory } = useTrendHistory({ ticker: company?.ticker, limit: 500 });
|
||||
const { data: marketPrices } = useMarketPrices(company?.ticker, 200);
|
||||
const [selectedWindow, setSelectedWindow] = useState('7d');
|
||||
const { data: trendHistory } = useTrendHistory({ ticker: company?.ticker, window: selectedWindow, limit: 500 });
|
||||
const { data: marketPriceData } = useMarketPrices(company?.ticker, 200);
|
||||
const marketPrices = marketPriceData?.bars ?? [];
|
||||
const range90d = marketPriceData?.range_90d ?? { low: null, high: null };
|
||||
const { data: positions } = usePositions(company?.ticker);
|
||||
const [tab, setTab] = useState<'trends' | 'sources' | 'aliases' | 'macro' | 'competitors' | 'patterns' | 'signals' | 'decisions'>('trends');
|
||||
|
||||
if (isLoading || !company) return <LoadingSpinner />;
|
||||
@@ -82,7 +88,10 @@ export function CompanyDetailPage() {
|
||||
</div>
|
||||
|
||||
{tab === 'trends' && (
|
||||
<TrendHistoryChart trends={trendHistory ?? []} latestTrends={trends ?? []} ticker={company.ticker} marketPrices={marketPrices ?? []} />
|
||||
<div className="space-y-4">
|
||||
<PositionCard positions={positions ?? []} ticker={company.ticker} />
|
||||
<TrendHistoryChart trends={trendHistory ?? []} latestTrends={trends ?? []} ticker={company.ticker} marketPrices={marketPrices} range90d={range90d} selectedWindow={selectedWindow} onWindowChange={setSelectedWindow} />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{tab === 'sources' && (
|
||||
@@ -444,62 +453,7 @@ function CompetitiveSignalsPanel({ signals }: { signals: CompetitiveSignal[] })
|
||||
) : (
|
||||
<div className="space-y-2">
|
||||
{signals.map((s) => (
|
||||
<div key={s.id}>
|
||||
<div
|
||||
className="flex items-center justify-between rounded-lg border border-cyan-700/30 bg-cyan-900/10 p-3 cursor-pointer hover:border-cyan-500/50"
|
||||
onClick={() => setExpandedId(expandedId === s.id ? null : s.id)}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="rounded bg-cyan-900/40 border border-cyan-700/50 px-1.5 py-0.5 text-[10px] font-medium text-cyan-400">COMPETITIVE</span>
|
||||
<span className="font-mono text-sm text-brand-300">{s.source_ticker}</span>
|
||||
<span className="text-xs text-gray-400">→</span>
|
||||
<StatusBadge status={s.catalyst_type} />
|
||||
<StatusBadge status={s.signal_direction} />
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<ConfidenceBar value={s.signal_strength} />
|
||||
<span className="text-xs text-gray-500">{new Date(s.computed_at).toLocaleDateString()}</span>
|
||||
</div>
|
||||
</div>
|
||||
{expandedId === s.id && (
|
||||
<Card className="mt-1 ml-4">
|
||||
<dl className="grid grid-cols-2 gap-x-6 gap-y-2 text-xs sm:grid-cols-3">
|
||||
<div>
|
||||
<dt className="text-gray-500">Source Ticker</dt>
|
||||
<dd className="font-mono text-gray-200">{s.source_ticker}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Target Ticker</dt>
|
||||
<dd className="font-mono text-gray-200">{s.target_ticker}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Catalyst Type</dt>
|
||||
<dd className="text-gray-200">{s.catalyst_type}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Pattern Confidence</dt>
|
||||
<dd><ConfidenceBar value={s.pattern_confidence} /></dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Signal Strength</dt>
|
||||
<dd><ConfidenceBar value={s.signal_strength} /></dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Relationship Strength</dt>
|
||||
<dd><ConfidenceBar value={s.relationship_strength} /></dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Source Document</dt>
|
||||
<dd className="font-mono text-gray-400 text-[10px]">{s.source_document_id}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Computed At</dt>
|
||||
<dd className="text-gray-200">{new Date(s.computed_at).toLocaleString()}</dd>
|
||||
</div>
|
||||
</dl>
|
||||
</Card>
|
||||
)}
|
||||
</div>
|
||||
<SignalRow key={s.id} signal={s} expanded={expandedId === s.id} onToggle={() => setExpandedId(expandedId === s.id ? null : s.id)} />
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
@@ -507,6 +461,88 @@ function CompetitiveSignalsPanel({ signals }: { signals: CompetitiveSignal[] })
|
||||
);
|
||||
}
|
||||
|
||||
function SignalRow({ signal: s, expanded, onToggle }: { signal: CompetitiveSignal; expanded: boolean; onToggle: () => void }) {
|
||||
const { data: doc } = useDocument(s.source_document_id);
|
||||
const docLabel = doc?.title ?? `doc:${s.source_document_id.slice(0, 8)}…`;
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div
|
||||
className="flex items-center justify-between rounded-lg border border-cyan-700/30 bg-cyan-900/10 p-3 cursor-pointer hover:border-cyan-500/50"
|
||||
onClick={onToggle}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="rounded bg-cyan-900/40 border border-cyan-700/50 px-1.5 py-0.5 text-[10px] font-medium text-cyan-400">COMPETITIVE</span>
|
||||
<span className="font-mono text-sm text-brand-300">{s.source_ticker}</span>
|
||||
<span className="text-xs text-gray-400">→</span>
|
||||
<StatusBadge status={s.catalyst_type} />
|
||||
<StatusBadge status={s.signal_direction} />
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<Link
|
||||
to="/documents/$id"
|
||||
params={{ id: s.source_document_id }}
|
||||
className="max-w-[180px] truncate text-xs text-brand-400 hover:underline"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
title={doc?.title ?? s.source_document_id}
|
||||
>
|
||||
{docLabel}
|
||||
</Link>
|
||||
<ConfidenceBar value={s.signal_strength} />
|
||||
<span className="text-xs text-gray-500">{new Date(s.computed_at).toLocaleDateString()}</span>
|
||||
</div>
|
||||
</div>
|
||||
{expanded && (
|
||||
<Card className="mt-1 ml-4">
|
||||
<dl className="grid grid-cols-2 gap-x-6 gap-y-2 text-xs sm:grid-cols-3">
|
||||
<div>
|
||||
<dt className="text-gray-500">Source Ticker</dt>
|
||||
<dd className="font-mono text-gray-200">{s.source_ticker}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Target Ticker</dt>
|
||||
<dd className="font-mono text-gray-200">{s.target_ticker}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Catalyst Type</dt>
|
||||
<dd className="text-gray-200">{s.catalyst_type}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Pattern Confidence</dt>
|
||||
<dd><ConfidenceBar value={s.pattern_confidence} /></dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Signal Strength</dt>
|
||||
<dd><ConfidenceBar value={s.signal_strength} /></dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Relationship Strength</dt>
|
||||
<dd><ConfidenceBar value={s.relationship_strength} /></dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Source Document</dt>
|
||||
<dd>
|
||||
<Link
|
||||
to="/documents/$id"
|
||||
params={{ id: s.source_document_id }}
|
||||
className="text-brand-400 hover:underline"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{docLabel}
|
||||
</Link>
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Computed At</dt>
|
||||
<dd className="text-gray-200">{new Date(s.computed_at).toLocaleString()}</dd>
|
||||
</div>
|
||||
</dl>
|
||||
</Card>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function DecisionsPanel({ decisions }: { decisions: CorporateDecision[] }) {
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
@@ -568,13 +604,30 @@ interface ChartPoint {
|
||||
price?: number;
|
||||
}
|
||||
|
||||
function ChartXTick({ x, y, payload }: { x?: number; y?: number; payload?: { value: number } }) {
|
||||
if (!payload || !x || !y) return null;
|
||||
const d = new Date(payload.value);
|
||||
const dateStr = d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
|
||||
const timeStr = d.toLocaleTimeString('en-US', { hour: 'numeric', hour12: true });
|
||||
|
||||
return (
|
||||
<g transform={`translate(${x},${y + 4})`}>
|
||||
<text x={0} y={0} textAnchor="end" fontSize={10} transform="rotate(-35)">
|
||||
<tspan fill="#e2e8f0" fontWeight="bold">{dateStr} </tspan>
|
||||
<tspan fill="#94a3b8">{timeStr}</tspan>
|
||||
</text>
|
||||
</g>
|
||||
);
|
||||
}
|
||||
|
||||
function TrendTooltip({ active, payload, label }: Record<string, unknown>) {
|
||||
if (!active) return null;
|
||||
const items = payload as Array<{ name: string; value: number; color: string; dataKey: string }> | undefined;
|
||||
if (!items?.length) return null;
|
||||
const ts = typeof label === 'number' ? new Date(label).toLocaleString('en-US', { month: 'short', day: 'numeric', hour: 'numeric', minute: '2-digit' }) : String(label ?? '');
|
||||
return (
|
||||
<div className="rounded-lg border border-surface-700 bg-surface-900 px-3 py-2 text-xs shadow-lg">
|
||||
<div className="mb-1 text-gray-400">{String(label ?? '')}</div>
|
||||
<div className="mb-1 text-gray-400">{ts}</div>
|
||||
{items.map((item, i) => (
|
||||
<div key={i} className="flex justify-between gap-4" style={{ color: item.color }}>
|
||||
<span>{item.name}:</span>
|
||||
@@ -587,12 +640,64 @@ function TrendTooltip({ active, payload, label }: Record<string, unknown>) {
|
||||
);
|
||||
}
|
||||
|
||||
function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices }: { trends: TrendSummary[]; latestTrends: TrendSummary[]; ticker: string; marketPrices: MarketPrice[] }) {
|
||||
const [selectedWindow, setSelectedWindow] = useState('7d');
|
||||
function PositionCard({ positions, ticker }: { positions: import('../api/hooks').Position[]; ticker: string }) {
|
||||
const pos = positions.find((p) => p.ticker === ticker && p.quantity > 0);
|
||||
if (!pos) return null;
|
||||
|
||||
// Use history data for charts
|
||||
const marketValue = pos.current_price ? pos.quantity * pos.current_price : null;
|
||||
const pnlColor = (pos.unrealized_pnl ?? 0) >= 0 ? 'text-green-400' : 'text-red-400';
|
||||
const pnlSign = (pos.unrealized_pnl ?? 0) >= 0 ? '+' : '';
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<div className="flex items-center justify-between">
|
||||
<h2 className="text-sm font-medium text-gray-400">Open Position</h2>
|
||||
<StatusBadge status="active" />
|
||||
</div>
|
||||
<dl className="mt-2 grid grid-cols-2 gap-x-8 gap-y-2 text-sm sm:grid-cols-5">
|
||||
<div>
|
||||
<dt className="text-gray-500">Shares</dt>
|
||||
<dd className="font-mono text-gray-200">{pos.quantity}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Avg Entry</dt>
|
||||
<dd className="font-mono text-gray-200">${pos.avg_entry_price.toFixed(2)}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Current Price</dt>
|
||||
<dd className="font-mono text-gray-200">{pos.current_price ? `$${pos.current_price.toFixed(2)}` : '—'}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Market Value</dt>
|
||||
<dd className="font-mono text-gray-200">{marketValue ? `$${marketValue.toLocaleString(undefined, { minimumFractionDigits: 2, maximumFractionDigits: 2 })}` : '—'}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Unrealized P&L</dt>
|
||||
<dd className={`font-mono font-semibold ${pnlColor}`}>
|
||||
{pos.unrealized_pnl != null ? `${pnlSign}$${Math.abs(pos.unrealized_pnl).toFixed(2)}` : '—'}
|
||||
</dd>
|
||||
</div>
|
||||
</dl>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices, range90d, selectedWindow, onWindowChange }: { trends: TrendSummary[]; latestTrends: TrendSummary[]; ticker: string; marketPrices: MarketPrice[]; range90d: { low: number | null; high: number | null }; selectedWindow: string; onWindowChange: (w: string) => void }) {
|
||||
|
||||
// Determine the time range for the selected window to filter data
|
||||
const windowHours: Record<string, number> = {
|
||||
intraday: 24,
|
||||
'1d': 48,
|
||||
'7d': 7 * 24,
|
||||
'30d': 30 * 24,
|
||||
'90d': 90 * 24,
|
||||
};
|
||||
const hoursBack = windowHours[selectedWindow] ?? 7 * 24;
|
||||
const cutoffTs = Date.now() - hoursBack * 3600_000;
|
||||
|
||||
// Use history data for charts — filter to selected window and time range
|
||||
const filtered = (trends ?? [])
|
||||
.filter((t) => t.entity_id === ticker && t.window === selectedWindow)
|
||||
.filter((t) => t.entity_id === ticker && t.window === selectedWindow && new Date(t.generated_at).getTime() >= cutoffTs)
|
||||
.sort((a, b) => new Date(a.generated_at).getTime() - new Date(b.generated_at).getTime());
|
||||
|
||||
// Build a price lookup — match by closest timestamp to each trend point
|
||||
@@ -600,19 +705,30 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices }: { tre
|
||||
.filter((p) => p.bar_timestamp != null && p.close != null)
|
||||
.sort((a, b) => a.bar_timestamp - b.bar_timestamp);
|
||||
|
||||
// Filter prices to the selected window's time range (use all prices if sparse)
|
||||
const windowPrices = sortedPrices.length <= 20 ? sortedPrices : sortedPrices.filter((p) => p.bar_timestamp >= cutoffTs);
|
||||
|
||||
function findClosestPrice(ts: number): number | undefined {
|
||||
if (sortedPrices.length === 0) return undefined;
|
||||
let best = sortedPrices[0];
|
||||
if (windowPrices.length === 0) return undefined;
|
||||
let best = windowPrices[0];
|
||||
let bestDiff = Math.abs(ts - best.bar_timestamp);
|
||||
for (const p of sortedPrices) {
|
||||
for (const p of windowPrices) {
|
||||
const diff = Math.abs(ts - p.bar_timestamp);
|
||||
if (diff < bestDiff) {
|
||||
best = p;
|
||||
bestDiff = diff;
|
||||
}
|
||||
}
|
||||
// Only match if within 2 hours (for intraday) or 36 hours (for daily)
|
||||
const maxGap = selectedWindow === 'intraday' ? 2 * 3600_000 : 36 * 3600_000;
|
||||
// Match if within reasonable gap for the window type
|
||||
// With sparse price data (~1 bar per 4-6 hours), use wider tolerances
|
||||
const maxGapHours: Record<string, number> = {
|
||||
intraday: 6,
|
||||
'1d': 12,
|
||||
'7d': 36,
|
||||
'30d': 72,
|
||||
'90d': 168,
|
||||
};
|
||||
const maxGap = (maxGapHours[selectedWindow] ?? 36) * 3600_000;
|
||||
return bestDiff <= maxGap ? best.close : undefined;
|
||||
}
|
||||
|
||||
@@ -620,7 +736,7 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices }: { tre
|
||||
const trendTs = new Date(t.generated_at).getTime();
|
||||
const price = findClosestPrice(trendTs);
|
||||
return {
|
||||
time: new Date(t.generated_at).toLocaleDateString('en-US', { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' }),
|
||||
time: String(trendTs),
|
||||
timestamp: trendTs,
|
||||
strength: +(t.trend_strength * 100).toFixed(1),
|
||||
confidence: +(t.confidence * 100).toFixed(1),
|
||||
@@ -634,6 +750,34 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices }: { tre
|
||||
|
||||
const hasPrice = chartData.some((pt) => pt.price != null);
|
||||
|
||||
// Compute market open/close vertical markers for intraday and 1d windows
|
||||
const showMarketMarkers = selectedWindow === 'intraday' || selectedWindow === '1d';
|
||||
const marketMarkers: { ts: number; label: string }[] = [];
|
||||
if (showMarketMarkers && chartData.length > 0) {
|
||||
const minTs = chartData[0].timestamp;
|
||||
const maxTs = chartData[chartData.length - 1].timestamp;
|
||||
// Walk each day in the range and compute 9:30 AM ET (open) and 4:00 PM ET (close)
|
||||
const dayMs = 86400_000;
|
||||
const startDay = new Date(minTs);
|
||||
startDay.setUTCHours(0, 0, 0, 0);
|
||||
for (let d = startDay.getTime(); d <= maxTs + dayMs; d += dayMs) {
|
||||
const date = new Date(d);
|
||||
const dow = date.getUTCDay();
|
||||
if (dow === 0 || dow === 6) continue; // skip weekends
|
||||
// ET offset: EDT = UTC-4, EST = UTC-5. Approximate with -4 (summer).
|
||||
// 9:30 AM ET = 13:30 UTC (EDT)
|
||||
const openTs = d + 13 * 3600_000 + 30 * 60_000;
|
||||
// 4:00 PM ET = 20:00 UTC (EDT)
|
||||
const closeTs = d + 20 * 3600_000;
|
||||
if (openTs >= minTs && openTs <= maxTs) {
|
||||
marketMarkers.push({ ts: openTs, label: 'Open' });
|
||||
}
|
||||
if (closeTs >= minTs && closeTs <= maxTs) {
|
||||
marketMarkers.push({ ts: closeTs, label: 'Close' });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Available windows from the data (check both history and latest)
|
||||
const allTrends = [...(trends ?? []), ...(latestTrends ?? [])];
|
||||
const availableWindows = [...new Set(allTrends.filter((t) => t.entity_id === ticker).map((t) => t.window))];
|
||||
@@ -645,6 +789,94 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices }: { tre
|
||||
.sort((a, b) => new Date(b.generated_at).getTime() - new Date(a.generated_at).getTime());
|
||||
const latest = latestForWindow[0] ?? (filtered.length > 0 ? filtered[filtered.length - 1] : null);
|
||||
|
||||
const [fullscreen, setFullscreen] = useState(false);
|
||||
|
||||
// Close fullscreen on Escape key
|
||||
useEffect(() => {
|
||||
if (!fullscreen) return;
|
||||
const handler = (e: KeyboardEvent) => { if (e.key === 'Escape') setFullscreen(false); };
|
||||
window.addEventListener('keydown', handler);
|
||||
return () => window.removeEventListener('keydown', handler);
|
||||
}, [fullscreen]);
|
||||
|
||||
// Shared chart content — rendered at different sizes
|
||||
const chartContent = (height: number) => (
|
||||
<ResponsiveContainer width="100%" height={height}>
|
||||
<LineChart data={chartData} margin={{ top: 5, right: 20, bottom: 70, left: 0 }}>
|
||||
<CartesianGrid strokeDasharray="3 3" stroke="#334155" />
|
||||
{marketMarkers.map((m, i) => (
|
||||
<ReferenceLine
|
||||
key={`market-${i}`}
|
||||
yAxisId="left"
|
||||
x={m.ts}
|
||||
stroke={m.label === 'Open' ? '#22c55e' : '#ef4444'}
|
||||
strokeDasharray="4 4"
|
||||
strokeWidth={1}
|
||||
strokeOpacity={0.5}
|
||||
label={{ value: m.label, position: 'top', fill: m.label === 'Open' ? '#22c55e' : '#ef4444', fontSize: 9 }}
|
||||
/>
|
||||
))}
|
||||
<XAxis
|
||||
dataKey="timestamp"
|
||||
type="number"
|
||||
domain={['dataMin', 'dataMax']}
|
||||
scale="time"
|
||||
tick={<ChartXTick />}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickCount={8}
|
||||
/>
|
||||
<YAxis
|
||||
yAxisId="left"
|
||||
domain={[0, 100]}
|
||||
tick={{ fill: '#94a3b8', fontSize: 11 }}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickFormatter={(v) => `${v}%`}
|
||||
/>
|
||||
{hasPrice && (
|
||||
<YAxis
|
||||
yAxisId="right"
|
||||
orientation="right"
|
||||
tick={{ fill: '#e879f9', fontSize: 11 }}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickFormatter={(v) => `$${v}`}
|
||||
domain={[
|
||||
range90d.low != null ? Math.floor(range90d.low * 0.97) : 'dataMin - 2',
|
||||
range90d.high != null ? Math.ceil(range90d.high * 1.03) : 'dataMax + 2',
|
||||
]}
|
||||
/>
|
||||
)}
|
||||
<Tooltip content={TrendTooltip} />
|
||||
<Legend verticalAlign="bottom" wrapperStyle={{ color: '#94a3b8', fontSize: 12, paddingTop: 24 }} />
|
||||
{hasPrice && range90d.high != null && (
|
||||
<ReferenceLine
|
||||
yAxisId="right"
|
||||
y={range90d.high}
|
||||
stroke="#22c55e"
|
||||
strokeDasharray="6 3"
|
||||
strokeWidth={1.5}
|
||||
label={{ value: `90d High $${range90d.high.toFixed(2)}`, position: 'insideTopRight', fill: '#22c55e', fontSize: 10 }}
|
||||
/>
|
||||
)}
|
||||
{hasPrice && range90d.low != null && (
|
||||
<ReferenceLine
|
||||
yAxisId="right"
|
||||
y={range90d.low}
|
||||
stroke="#ef4444"
|
||||
strokeDasharray="6 3"
|
||||
strokeWidth={1.5}
|
||||
label={{ value: `90d Low $${range90d.low.toFixed(2)}`, position: 'insideBottomRight', fill: '#ef4444', fontSize: 10 }}
|
||||
/>
|
||||
)}
|
||||
<Line yAxisId="left" type="monotone" dataKey="strength" name="Trend Strength" stroke="#3b82f6" strokeWidth={2} dot={{ r: 3, fill: '#3b82f6' }} activeDot={{ r: 5 }} />
|
||||
<Line yAxisId="left" type="monotone" dataKey="confidence" name="Confidence" stroke="#10b981" strokeWidth={2} dot={{ r: 3, fill: '#10b981' }} activeDot={{ r: 5 }} />
|
||||
<Line yAxisId="left" type="monotone" dataKey="contradiction" name="Contradiction" stroke="#f59e0b" strokeWidth={1.5} strokeDasharray="5 5" dot={{ r: 2, fill: '#f59e0b' }} />
|
||||
{hasPrice && (
|
||||
<Line yAxisId="right" type="monotone" dataKey="price" name="Price" stroke="#e879f9" strokeWidth={2} dot={{ r: 3, fill: '#e879f9' }} activeDot={{ r: 5 }} connectNulls />
|
||||
)}
|
||||
</LineChart>
|
||||
</ResponsiveContainer>
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Window selector */}
|
||||
@@ -653,7 +885,7 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices }: { tre
|
||||
{(availableWindows.length > 0 ? availableWindows : WINDOW_ORDER).map((w) => (
|
||||
<button
|
||||
key={w}
|
||||
onClick={() => setSelectedWindow(w)}
|
||||
onClick={() => onWindowChange(w)}
|
||||
className={`rounded-md px-3 py-1 text-xs font-medium transition-colors ${
|
||||
selectedWindow === w
|
||||
? 'bg-brand-600 text-white'
|
||||
@@ -673,83 +905,47 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices }: { tre
|
||||
<>
|
||||
{/* Trend Strength & Confidence Chart */}
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">
|
||||
Trend Strength & Confidence — {ticker} / {selectedWindow}
|
||||
</h2>
|
||||
<ResponsiveContainer width="100%" height={280}>
|
||||
<LineChart data={chartData} margin={{ top: 5, right: 20, bottom: 5, left: 0 }}>
|
||||
<CartesianGrid strokeDasharray="3 3" stroke="#334155" />
|
||||
<XAxis
|
||||
dataKey="time"
|
||||
tick={{ fill: '#94a3b8', fontSize: 11 }}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
/>
|
||||
<YAxis
|
||||
yAxisId="left"
|
||||
domain={[0, 100]}
|
||||
tick={{ fill: '#94a3b8', fontSize: 11 }}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickFormatter={(v) => `${v}%`}
|
||||
/>
|
||||
{hasPrice && (
|
||||
<YAxis
|
||||
yAxisId="right"
|
||||
orientation="right"
|
||||
tick={{ fill: '#e879f9', fontSize: 11 }}
|
||||
tickLine={{ stroke: '#475569' }}
|
||||
tickFormatter={(v) => `$${v}`}
|
||||
domain={['dataMin - 2', 'dataMax + 2']}
|
||||
/>
|
||||
)}
|
||||
<Tooltip content={TrendTooltip} />
|
||||
<Legend wrapperStyle={{ color: '#94a3b8', fontSize: 12 }} />
|
||||
<Line
|
||||
yAxisId="left"
|
||||
type="monotone"
|
||||
dataKey="strength"
|
||||
name="Trend Strength"
|
||||
stroke="#3b82f6"
|
||||
strokeWidth={2}
|
||||
dot={{ r: 3, fill: '#3b82f6' }}
|
||||
activeDot={{ r: 5 }}
|
||||
/>
|
||||
<Line
|
||||
yAxisId="left"
|
||||
type="monotone"
|
||||
dataKey="confidence"
|
||||
name="Confidence"
|
||||
stroke="#10b981"
|
||||
strokeWidth={2}
|
||||
dot={{ r: 3, fill: '#10b981' }}
|
||||
activeDot={{ r: 5 }}
|
||||
/>
|
||||
<Line
|
||||
yAxisId="left"
|
||||
type="monotone"
|
||||
dataKey="contradiction"
|
||||
name="Contradiction"
|
||||
stroke="#f59e0b"
|
||||
strokeWidth={1.5}
|
||||
strokeDasharray="5 5"
|
||||
dot={{ r: 2, fill: '#f59e0b' }}
|
||||
/>
|
||||
{hasPrice && (
|
||||
<Line
|
||||
yAxisId="right"
|
||||
type="monotone"
|
||||
dataKey="price"
|
||||
name="Price"
|
||||
stroke="#e879f9"
|
||||
strokeWidth={2}
|
||||
dot={{ r: 3, fill: '#e879f9' }}
|
||||
activeDot={{ r: 5 }}
|
||||
connectNulls
|
||||
/>
|
||||
)}
|
||||
</LineChart>
|
||||
</ResponsiveContainer>
|
||||
<div className="mb-3 flex items-center justify-between">
|
||||
<h2 className="text-sm font-medium text-gray-400">
|
||||
Trend Strength & Confidence — {ticker} / {selectedWindow}
|
||||
</h2>
|
||||
<button
|
||||
onClick={() => setFullscreen(true)}
|
||||
className="rounded-md border border-surface-700 px-2 py-1 text-xs text-gray-400 hover:bg-surface-800 hover:text-gray-200"
|
||||
title="Expand chart"
|
||||
>
|
||||
⛶ Expand
|
||||
</button>
|
||||
</div>
|
||||
{chartContent(280)}
|
||||
</Card>
|
||||
|
||||
{/* Fullscreen overlay */}
|
||||
{fullscreen && (
|
||||
<div
|
||||
className="fixed inset-0 z-50 flex flex-col bg-surface-950/95 p-6"
|
||||
onClick={(e) => { if (e.target === e.currentTarget) setFullscreen(false); }}
|
||||
role="dialog"
|
||||
aria-label="Expanded chart"
|
||||
>
|
||||
<div className="mb-4 flex items-center justify-between">
|
||||
<h2 className="text-lg font-medium text-gray-200">
|
||||
Trend Strength & Confidence — {ticker} / {selectedWindow}
|
||||
</h2>
|
||||
<button
|
||||
onClick={() => setFullscreen(false)}
|
||||
className="rounded-md border border-surface-700 px-3 py-1.5 text-sm text-gray-400 hover:bg-surface-800 hover:text-gray-200"
|
||||
>
|
||||
✕ Close
|
||||
</button>
|
||||
</div>
|
||||
<div className="flex-1 min-h-0">
|
||||
{chartContent(Math.max(400, window.innerHeight - 160))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Direction Timeline */}
|
||||
{/* Direction Timeline */}
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">
|
||||
@@ -764,13 +960,13 @@ function TrendHistoryChart({ trends, latestTrends, ticker, marketPrices }: { tre
|
||||
'bg-gray-600';
|
||||
const height = Math.max(8, pt.strength * 0.5);
|
||||
return (
|
||||
<div key={i} className="flex flex-col items-center gap-1" title={`${pt.time}: ${pt.directionLabel} (${pt.strength}%)`}>
|
||||
<div key={i} className="flex flex-col items-center gap-1" title={`${new Date(pt.timestamp).toLocaleString('en-US', { month: 'short', day: 'numeric', hour: 'numeric', minute: '2-digit' })}: ${pt.directionLabel} (${pt.strength}%)`}>
|
||||
<div
|
||||
className={`w-3 rounded-sm ${color}`}
|
||||
style={{ height: `${height}px` }}
|
||||
/>
|
||||
{i % Math.max(1, Math.floor(chartData.length / 8)) === 0 && (
|
||||
<span className="text-[9px] text-gray-500 -rotate-45 origin-top-left whitespace-nowrap">{pt.time}</span>
|
||||
<span className="text-[9px] text-gray-500 -rotate-45 origin-top-left whitespace-nowrap">{new Date(pt.timestamp).toLocaleTimeString('en-US', { hour: 'numeric', minute: '2-digit' })}</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -1,9 +1,89 @@
|
||||
import { useState } from 'react';
|
||||
import { useModelPerformance, useModelFailures } from '../api/hooks';
|
||||
import {
|
||||
useModelPerformance,
|
||||
useModelFailures,
|
||||
useValidationSummary,
|
||||
useValidationCalibration,
|
||||
useValidationICByHorizon,
|
||||
useValidationGateStatus,
|
||||
useValidationAttributionSources,
|
||||
useValidationAttributionCatalysts,
|
||||
useValidationAttributionLayers,
|
||||
} from '../api/hooks';
|
||||
import type {
|
||||
ValidationSummary,
|
||||
ValidationCalibration,
|
||||
CalibrationBucket,
|
||||
ValidationICByHorizon,
|
||||
ICByHorizonEntry,
|
||||
ValidationGateStatus,
|
||||
SourceAttributionResponse,
|
||||
CatalystAttributionResponse,
|
||||
LayerAttributionResponse,
|
||||
SourceAttribution,
|
||||
CatalystAttribution,
|
||||
LayerAttribution,
|
||||
} from '../api/hooks';
|
||||
import { LoadingSpinner, DateRangeSelector, StatusBadge, Card } from '../components/ui';
|
||||
import { AlertTriangle, ShieldCheck, ShieldX } from 'lucide-react';
|
||||
|
||||
type Tab = 'extraction' | 'validation';
|
||||
|
||||
export function OpsModelPage() {
|
||||
const [hours, setHours] = useState(24);
|
||||
const [activeTab, setActiveTab] = useState<Tab>('extraction');
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
|
||||
{activeTab === 'extraction' && (
|
||||
<DateRangeSelector value={hours} onChange={setHours} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Tab bar */}
|
||||
<div className="flex border-b border-surface-700" role="tablist" aria-label="Model performance tabs">
|
||||
<button
|
||||
role="tab"
|
||||
aria-selected={activeTab === 'extraction'}
|
||||
onClick={() => setActiveTab('extraction')}
|
||||
className={`px-4 py-2 text-sm font-medium transition-colors ${
|
||||
activeTab === 'extraction'
|
||||
? 'border-b-2 border-brand-500 text-brand-400'
|
||||
: 'text-gray-400 hover:text-gray-200'
|
||||
}`}
|
||||
>
|
||||
Extraction Performance
|
||||
</button>
|
||||
<button
|
||||
role="tab"
|
||||
aria-selected={activeTab === 'validation'}
|
||||
onClick={() => setActiveTab('validation')}
|
||||
className={`px-4 py-2 text-sm font-medium transition-colors ${
|
||||
activeTab === 'validation'
|
||||
? 'border-b-2 border-brand-500 text-brand-400'
|
||||
: 'text-gray-400 hover:text-gray-200'
|
||||
}`}
|
||||
>
|
||||
Model Validation
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{activeTab === 'extraction' ? (
|
||||
<ExtractionTab hours={hours} />
|
||||
) : (
|
||||
<ValidationTab />
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Extraction Performance Tab (existing content) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function ExtractionTab({ hours }: { hours: number }) {
|
||||
const { data: perf, isLoading } = useModelPerformance(hours);
|
||||
const { data: failures } = useModelFailures(hours);
|
||||
|
||||
@@ -13,11 +93,6 @@ export function OpsModelPage() {
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">Model Performance</h1>
|
||||
<DateRangeSelector value={hours} onChange={setHours} />
|
||||
</div>
|
||||
|
||||
{/* Key metrics */}
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-5">
|
||||
<StatCard label="Total Extractions" value={String(p.total_extractions ?? '—')} />
|
||||
@@ -63,6 +138,482 @@ export function OpsModelPage() {
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Model Validation Tab (new) */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function ValidationTab() {
|
||||
const { data: summary, isLoading: summaryLoading, error: summaryError } = useValidationSummary();
|
||||
const { data: calibration, isLoading: calLoading, error: calError } = useValidationCalibration();
|
||||
const { data: icData, isLoading: icLoading, error: icError } = useValidationICByHorizon();
|
||||
const { data: gateData, isLoading: gateLoading, error: gateError } = useValidationGateStatus();
|
||||
const { data: sourcesData, isLoading: srcLoading, error: srcError } = useValidationAttributionSources();
|
||||
const { data: catalystsData, isLoading: catLoading, error: catError } = useValidationAttributionCatalysts();
|
||||
const { data: layersData, isLoading: layLoading, error: layError } = useValidationAttributionLayers();
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Gate Status */}
|
||||
<GateStatusSection data={gateData} isLoading={gateLoading} error={gateError} />
|
||||
|
||||
{/* Summary Cards */}
|
||||
<SummaryCardsSection data={summary} isLoading={summaryLoading} error={summaryError} />
|
||||
|
||||
{/* Calibration Table */}
|
||||
<CalibrationTableSection data={calibration} isLoading={calLoading} error={calError} />
|
||||
|
||||
{/* IC by Horizon Table */}
|
||||
<ICByHorizonSection data={icData} isLoading={icLoading} error={icError} />
|
||||
|
||||
{/* Source Attribution Table */}
|
||||
<SourceAttributionSection data={sourcesData} isLoading={srcLoading} error={srcError} />
|
||||
|
||||
{/* Catalyst Attribution Table */}
|
||||
<CatalystAttributionSection data={catalystsData} isLoading={catLoading} error={catError} />
|
||||
|
||||
{/* Layer Attribution Table */}
|
||||
<LayerAttributionSection data={layersData} isLoading={layLoading} error={layError} />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Gate Status Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function GateStatusSection({ data, isLoading, error }: {
|
||||
data: ValidationGateStatus | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load gate status" />;
|
||||
|
||||
const gate = data?.gate_status as Record<string, unknown> | null;
|
||||
if (!gate) {
|
||||
return (
|
||||
<Card className="flex items-center gap-3">
|
||||
<ShieldX size={20} className="text-yellow-400" />
|
||||
<div>
|
||||
<div className="text-sm font-medium text-yellow-400">Gate Status Unknown</div>
|
||||
<div className="text-xs text-gray-500">No gate evaluation data available</div>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
const passed = gate.passed as boolean | undefined;
|
||||
const reason = gate.reason as string | undefined;
|
||||
const thresholds = gate.threshold_results as Array<Record<string, unknown>> | undefined;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<div className="mb-3 flex items-center gap-3">
|
||||
{passed ? (
|
||||
<ShieldCheck size={20} className="text-green-400" />
|
||||
) : (
|
||||
<ShieldX size={20} className="text-red-400" />
|
||||
)}
|
||||
<div>
|
||||
<div className={`text-sm font-medium ${passed ? 'text-green-400' : 'text-red-400'}`}>
|
||||
Live Trading Gate: {passed ? 'PASS' : 'FAIL'}
|
||||
</div>
|
||||
{reason && <div className="text-xs text-gray-500">{reason}</div>}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{thresholds && thresholds.length > 0 && (
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Threshold</th>
|
||||
<th className="pb-2 pr-4 font-medium">Required</th>
|
||||
<th className="pb-2 pr-4 font-medium">Actual</th>
|
||||
<th className="pb-2 font-medium">Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{thresholds.map((t, i) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300">{String(t.name ?? '')}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-400">{fmtThreshold(t.threshold)}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtThreshold(t.actual)}</td>
|
||||
<td className="py-1.5">
|
||||
<StatusBadge status={t.passed ? 'success' : 'failed'} />
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)}
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Summary Cards Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function SummaryCardsSection({ data, isLoading, error }: {
|
||||
data: ValidationSummary | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load validation summary" />;
|
||||
|
||||
const snap = data?.snapshot;
|
||||
if (!snap) {
|
||||
return (
|
||||
<Card>
|
||||
<p className="text-sm text-gray-500">No validation data available yet. Metrics will appear once predictions have been evaluated.</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-3 lg:grid-cols-5">
|
||||
<StatCard label="Predictions" value={String(snap.prediction_count ?? '—')} />
|
||||
<StatCard
|
||||
label="Win Rate"
|
||||
value={fmtPct(snap.win_rate)}
|
||||
color={colorForRate(snap.win_rate, 0.53)}
|
||||
/>
|
||||
<StatCard
|
||||
label="Directional Accuracy"
|
||||
value={fmtPct(snap.directional_accuracy)}
|
||||
color={colorForRate(snap.directional_accuracy, 0.53)}
|
||||
/>
|
||||
<StatCard
|
||||
label="IC"
|
||||
value={fmtIC(snap.information_coefficient)}
|
||||
color={colorForIC(snap.information_coefficient)}
|
||||
/>
|
||||
<StatCard
|
||||
label="Rank IC"
|
||||
value={fmtIC(snap.rank_information_coefficient)}
|
||||
color={colorForIC(snap.rank_information_coefficient)}
|
||||
/>
|
||||
<StatCard
|
||||
label="Brier Score"
|
||||
value={snap.brier_score != null ? snap.brier_score.toFixed(4) : '—'}
|
||||
color={snap.brier_score != null && snap.brier_score < 0.25 ? 'text-green-400' : 'text-gray-100'}
|
||||
/>
|
||||
<StatCard
|
||||
label="ECE"
|
||||
value={snap.calibration_error != null ? snap.calibration_error.toFixed(4) : '—'}
|
||||
color={snap.calibration_error != null && snap.calibration_error < 0.15 ? 'text-green-400' : 'text-yellow-400'}
|
||||
/>
|
||||
<StatCard
|
||||
label="Excess vs SPY"
|
||||
value={fmtPct(snap.avg_excess_return_vs_spy)}
|
||||
color={snap.avg_excess_return_vs_spy != null && snap.avg_excess_return_vs_spy > 0 ? 'text-green-400' : 'text-red-400'}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Calibration Table Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function CalibrationTableSection({ data, isLoading, error }: {
|
||||
data: ValidationCalibration | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load calibration data" />;
|
||||
|
||||
const buckets = data?.buckets;
|
||||
if (!buckets || buckets.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Calibration</h2>
|
||||
<p className="text-sm text-gray-500">No calibration data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Calibration by Confidence Bucket</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Bucket</th>
|
||||
<th className="pb-2 pr-4 font-medium">Avg Confidence</th>
|
||||
<th className="pb-2 pr-4 font-medium">Observed Win Rate</th>
|
||||
<th className="pb-2 pr-4 font-medium">Count</th>
|
||||
<th className="pb-2 font-medium">Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{buckets.map((b: CalibrationBucket, i: number) => (
|
||||
<CalibrationRow key={i} bucket={b} />
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function CalibrationRow({ bucket }: { bucket: CalibrationBucket }) {
|
||||
const isMiscalibrated = bucket.miscalibrated ||
|
||||
Math.abs(bucket.avg_confidence - bucket.observed_win_rate) > 0.15;
|
||||
|
||||
return (
|
||||
<tr className={`border-b border-surface-800 ${isMiscalibrated ? 'bg-amber-900/20' : ''}`}>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">
|
||||
[{fmtPctShort(bucket.bucket_low)}, {fmtPctShort(bucket.bucket_high)})
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.avg_confidence)}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPctShort(bucket.observed_win_rate)}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-400">{bucket.prediction_count}</td>
|
||||
<td className="py-1.5">
|
||||
{isMiscalibrated ? (
|
||||
<span className="inline-flex items-center gap-1 text-amber-400">
|
||||
<AlertTriangle size={14} />
|
||||
<span>Miscalibrated</span>
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-green-400">OK</span>
|
||||
)}
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* IC by Horizon Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function ICByHorizonSection({ data, isLoading, error }: {
|
||||
data: ValidationICByHorizon | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load IC by horizon data" />;
|
||||
|
||||
const horizons = data?.horizons;
|
||||
if (!horizons || horizons.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">IC by Horizon</h2>
|
||||
<p className="text-sm text-gray-500">No IC data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Information Coefficient by Horizon</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Horizon</th>
|
||||
<th className="pb-2 pr-4 font-medium">IC</th>
|
||||
<th className="pb-2 pr-4 font-medium">Rank IC</th>
|
||||
<th className="pb-2 font-medium">Predictions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{horizons.map((h: ICByHorizonEntry, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{h.horizon}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForIC(h.information_coefficient)}`}>
|
||||
{fmtIC(h.information_coefficient)}
|
||||
</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForIC(h.rank_information_coefficient)}`}>
|
||||
{fmtIC(h.rank_information_coefficient)}
|
||||
</td>
|
||||
<td className="py-1.5 font-mono text-gray-400">{h.prediction_count}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Source Attribution Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function SourceAttributionSection({ data, isLoading, error }: {
|
||||
data: SourceAttributionResponse | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load source attribution data" />;
|
||||
|
||||
const sources = data?.sources;
|
||||
if (!sources || sources.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Source Performance</h2>
|
||||
<p className="text-sm text-gray-500">No source attribution data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Source Performance</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Source</th>
|
||||
<th className="pb-2 pr-4 font-medium">Win Rate</th>
|
||||
<th className="pb-2 pr-4 font-medium">IC</th>
|
||||
<th className="pb-2 pr-4 font-medium">Avg Return</th>
|
||||
<th className="pb-2 font-medium">Duplicate Rate</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sources.map((s: SourceAttribution, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300">{s.source}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForRate(s.win_rate, 0.53)}`}>
|
||||
{fmtPct(s.win_rate)}
|
||||
</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForIC(s.information_coefficient)}`}>
|
||||
{fmtIC(s.information_coefficient)}
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(s.avg_future_return)}</td>
|
||||
<td className="py-1.5 font-mono text-gray-300">{fmtPct(s.duplicate_rate)}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Catalyst Attribution Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function CatalystAttributionSection({ data, isLoading, error }: {
|
||||
data: CatalystAttributionResponse | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load catalyst attribution data" />;
|
||||
|
||||
const catalysts = data?.catalysts;
|
||||
if (!catalysts || catalysts.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
|
||||
<p className="text-sm text-gray-500">No catalyst attribution data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Catalyst Truth Table</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Catalyst Type</th>
|
||||
<th className="pb-2 pr-4 font-medium">Win Rate</th>
|
||||
<th className="pb-2 pr-4 font-medium">Avg Return</th>
|
||||
<th className="pb-2 font-medium">IC</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{catalysts.map((c: CatalystAttribution, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300">{c.catalyst_type}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForRate(c.win_rate, 0.53)}`}>
|
||||
{fmtPct(c.win_rate)}
|
||||
</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(c.avg_future_return)}</td>
|
||||
<td className={`py-1.5 font-mono ${colorForIC(c.information_coefficient)}`}>
|
||||
{fmtIC(c.information_coefficient)}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Layer Attribution Section */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function LayerAttributionSection({ data, isLoading, error }: {
|
||||
data: LayerAttributionResponse | undefined;
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
}) {
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (error) return <ErrorCard message="Failed to load layer attribution data" />;
|
||||
|
||||
const layers = data?.layers;
|
||||
if (!layers || layers.length === 0) {
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Layer Attribution</h2>
|
||||
<p className="text-sm text-gray-500">No layer attribution data available</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Layer Attribution</h2>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead>
|
||||
<tr className="border-b border-surface-700 text-gray-500">
|
||||
<th className="pb-2 pr-4 font-medium">Layer</th>
|
||||
<th className="pb-2 pr-4 font-medium">Contribution %</th>
|
||||
<th className="pb-2 pr-4 font-medium">Dominant Win Rate</th>
|
||||
<th className="pb-2 font-medium">IC</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{layers.map((l: LayerAttribution, i: number) => (
|
||||
<tr key={i} className="border-b border-surface-800">
|
||||
<td className="py-1.5 pr-4 text-gray-300 capitalize">{l.layer}</td>
|
||||
<td className="py-1.5 pr-4 font-mono text-gray-300">{fmtPct(l.avg_contribution_pct)}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${colorForRate(l.dominant_win_rate, 0.53)}`}>
|
||||
{fmtPct(l.dominant_win_rate)}
|
||||
</td>
|
||||
<td className={`py-1.5 font-mono ${colorForIC(l.dominant_ic)}`}>
|
||||
{fmtIC(l.dominant_ic)}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Shared helpers */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
function StatCard({ label, value, color = 'text-gray-100' }: { label: string; value: string; color?: string }) {
|
||||
return (
|
||||
<Card className="text-center">
|
||||
@@ -71,3 +622,53 @@ function StatCard({ label, value, color = 'text-gray-100' }: { label: string; va
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function ErrorCard({ message }: { message: string }) {
|
||||
return (
|
||||
<Card className="border-red-700/50 bg-red-900/20">
|
||||
<p className="text-sm text-red-400">{message}</p>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/** Format a float as percentage with 1 decimal place, or '—' if null */
|
||||
function fmtPct(v: number | null | undefined): string {
|
||||
if (v == null) return '—';
|
||||
return `${(v * 100).toFixed(1)}%`;
|
||||
}
|
||||
|
||||
/** Format a float as short percentage (no decimal) for bucket display */
|
||||
function fmtPctShort(v: number | null | undefined): string {
|
||||
if (v == null) return '—';
|
||||
return `${(v * 100).toFixed(0)}%`;
|
||||
}
|
||||
|
||||
/** Format IC value with 4 decimal places, or '—' if null */
|
||||
function fmtIC(v: number | null | undefined): string {
|
||||
if (v == null) return '—';
|
||||
return v.toFixed(4);
|
||||
}
|
||||
|
||||
/** Format a threshold value for display */
|
||||
function fmtThreshold(v: unknown): string {
|
||||
if (v == null) return '—';
|
||||
if (typeof v === 'number') {
|
||||
if (Number.isInteger(v)) return String(v);
|
||||
return v.toFixed(4);
|
||||
}
|
||||
return String(v);
|
||||
}
|
||||
|
||||
/** Color for win rate / accuracy — green if above threshold, red otherwise */
|
||||
function colorForRate(v: number | null | undefined, threshold: number): string {
|
||||
if (v == null) return 'text-gray-100';
|
||||
return v >= threshold ? 'text-green-400' : 'text-red-400';
|
||||
}
|
||||
|
||||
/** Color for IC — green if positive, red if negative, gray if null */
|
||||
function colorForIC(v: number | null | undefined): string {
|
||||
if (v == null) return 'text-gray-400';
|
||||
if (v >= 0.03) return 'text-green-400';
|
||||
if (v > 0) return 'text-yellow-400';
|
||||
return 'text-red-400';
|
||||
}
|
||||
|
||||
@@ -2,6 +2,55 @@ import { useParams } from '@tanstack/react-router';
|
||||
import { useOrder } from '../api/hooks';
|
||||
import { StatusBadge, LoadingSpinner, Card } from '../components/ui';
|
||||
|
||||
/**
|
||||
* Lightweight JSON syntax highlighter for read-only display.
|
||||
* Returns React elements with colored spans for keys, strings, numbers, booleans, and null.
|
||||
*/
|
||||
function highlightJson(json: string): React.ReactNode {
|
||||
const parts: React.ReactNode[] = [];
|
||||
// Regex matches JSON tokens: strings, numbers, booleans, null, and structural chars
|
||||
const tokenRe = /("(?:\\.|[^"\\])*")\s*:|("(?:\\.|[^"\\])*")|(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)|(\btrue\b|\bfalse\b)|(\bnull\b)|([{}[\],])/g;
|
||||
let lastIndex = 0;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = tokenRe.exec(json)) !== null) {
|
||||
// Add any whitespace/text between tokens
|
||||
if (match.index > lastIndex) {
|
||||
parts.push(json.slice(lastIndex, match.index));
|
||||
}
|
||||
|
||||
if (match[1]) {
|
||||
// Key (string followed by colon)
|
||||
parts.push(<span key={match.index} className="text-cyan-400">{match[1]}</span>);
|
||||
parts.push(':');
|
||||
} else if (match[2]) {
|
||||
// String value
|
||||
parts.push(<span key={match.index} className="text-green-400">{match[2]}</span>);
|
||||
} else if (match[3]) {
|
||||
// Number
|
||||
parts.push(<span key={match.index} className="text-yellow-300">{match[3]}</span>);
|
||||
} else if (match[4]) {
|
||||
// Boolean
|
||||
parts.push(<span key={match.index} className="text-purple-400">{match[4]}</span>);
|
||||
} else if (match[5]) {
|
||||
// Null
|
||||
parts.push(<span key={match.index} className="text-red-400">{match[5]}</span>);
|
||||
} else if (match[6]) {
|
||||
// Structural characters
|
||||
parts.push(<span key={match.index} className="text-gray-500">{match[6]}</span>);
|
||||
}
|
||||
|
||||
lastIndex = match.index + match[0].length;
|
||||
}
|
||||
|
||||
// Remaining text
|
||||
if (lastIndex < json.length) {
|
||||
parts.push(json.slice(lastIndex));
|
||||
}
|
||||
|
||||
return <>{parts}</>;
|
||||
}
|
||||
|
||||
export function OrderDetailPage() {
|
||||
const { id } = useParams({ from: '/orders/$id' });
|
||||
const { data: order, isLoading } = useOrder(id);
|
||||
@@ -33,8 +82,8 @@ export function OrderDetailPage() {
|
||||
{order.decision_trace && Object.keys(order.decision_trace).length > 0 && (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Decision Trace</h2>
|
||||
<pre className="overflow-x-auto rounded bg-surface-950 p-3 text-xs text-gray-300">
|
||||
{JSON.stringify(order.decision_trace, null, 2)}
|
||||
<pre className="overflow-x-auto rounded bg-surface-950 p-3 text-xs leading-relaxed">
|
||||
{highlightJson(JSON.stringify(order.decision_trace, null, 2))}
|
||||
</pre>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { usePositions } from '../api/hooks';
|
||||
import { Link } from '@tanstack/react-router';
|
||||
import { usePositions, useCompanies } from '../api/hooks';
|
||||
import { DataTable, type Column } from '../components/DataTable';
|
||||
import { LoadingSpinner } from '../components/ui';
|
||||
import type { Position } from '../api/hooks';
|
||||
@@ -13,18 +14,38 @@ function pnlColor(v: number | null | undefined) {
|
||||
return v >= 0 ? 'text-green-400' : 'text-red-400';
|
||||
}
|
||||
|
||||
const columns: Column<Position>[] = [
|
||||
{ key: 'ticker', header: 'Ticker', className: 'font-mono font-semibold text-brand-300' },
|
||||
{ key: 'quantity', header: 'Qty' },
|
||||
{ key: 'avg_entry_price', header: 'Entry', render: (r) => <span>{fmtUsd(r.avg_entry_price)}</span> },
|
||||
{ key: 'current_price', header: 'Current', render: (r) => <span>{fmtUsd(r.current_price)}</span> },
|
||||
{ key: 'unrealized_pnl', header: 'Unrealized P&L', render: (r) => <span className={pnlColor(r.unrealized_pnl)}>{fmtUsd(r.unrealized_pnl)}</span> },
|
||||
{ key: 'realized_pnl', header: 'Realized P&L', render: (r) => <span className={pnlColor(r.realized_pnl)}>{fmtUsd(r.realized_pnl)}</span> },
|
||||
{ key: 'updated_at', header: 'Updated', render: (r) => <span className="text-xs">{new Date(r.updated_at).toLocaleString()}</span> },
|
||||
];
|
||||
|
||||
export function PositionsPage() {
|
||||
const { data, isLoading } = usePositions();
|
||||
const { data: companies } = useCompanies();
|
||||
|
||||
// Build ticker → company ID lookup
|
||||
const tickerToId: Record<string, string> = {};
|
||||
for (const c of companies ?? []) {
|
||||
tickerToId[c.ticker] = c.id;
|
||||
}
|
||||
|
||||
const posColumns: Column<Position>[] = [
|
||||
{
|
||||
key: 'ticker',
|
||||
header: 'Ticker',
|
||||
render: (r) => {
|
||||
const companyId = tickerToId[r.ticker];
|
||||
return companyId ? (
|
||||
<Link to="/companies/$id" params={{ id: companyId }} className="font-mono font-semibold text-brand-300 hover:underline">
|
||||
{r.ticker}
|
||||
</Link>
|
||||
) : (
|
||||
<span className="font-mono font-semibold text-brand-300">{r.ticker}</span>
|
||||
);
|
||||
},
|
||||
},
|
||||
{ key: 'quantity', header: 'Qty' },
|
||||
{ key: 'avg_entry_price', header: 'Entry', render: (r) => <span>{fmtUsd(r.avg_entry_price)}</span> },
|
||||
{ key: 'current_price', header: 'Current', render: (r) => <span>{fmtUsd(r.current_price)}</span> },
|
||||
{ key: 'unrealized_pnl', header: 'Unrealized P&L', render: (r) => <span className={pnlColor(r.unrealized_pnl)}>{fmtUsd(r.unrealized_pnl)}</span> },
|
||||
{ key: 'realized_pnl', header: 'Realized P&L', render: (r) => <span className={pnlColor(r.realized_pnl)}>{fmtUsd(r.realized_pnl)}</span> },
|
||||
{ key: 'updated_at', header: 'Updated', render: (r) => <span className="text-xs">{new Date(r.updated_at).toLocaleString()}</span> },
|
||||
];
|
||||
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
|
||||
@@ -58,7 +79,7 @@ export function PositionsPage() {
|
||||
<h1 className="mb-4 text-xl font-semibold text-gray-100">Positions</h1>
|
||||
<DataTable<Position>
|
||||
data={positions}
|
||||
columns={columns}
|
||||
columns={posColumns}
|
||||
keyField="id"
|
||||
footerRow={footer}
|
||||
/>
|
||||
|
||||
@@ -1,13 +1,92 @@
|
||||
/**
|
||||
* Recommendation detail page with validation context.
|
||||
*
|
||||
* Shows original confidence alongside calibrated confidence (historical win rate),
|
||||
* evidence quality indicators, source reliability, and live eligibility status.
|
||||
*
|
||||
* Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6, 13.7
|
||||
*/
|
||||
import { useParams, Link } from '@tanstack/react-router';
|
||||
import { useRecommendation } from '../api/hooks';
|
||||
import { AlertTriangle, ShieldCheck, ShieldX, Info } from 'lucide-react';
|
||||
import {
|
||||
useRecommendation,
|
||||
useValidationCalibration,
|
||||
useValidationGateStatus,
|
||||
useValidationAttributionSources,
|
||||
} from '../api/hooks';
|
||||
import { StatusBadge, ConfidenceBar, LoadingSpinner, Card } from '../components/ui';
|
||||
|
||||
export function RecommendationDetailPage() {
|
||||
const { id } = useParams({ from: '/recommendations/$id' });
|
||||
const { data: rec, isLoading } = useRecommendation(id);
|
||||
const { data: calibration } = useValidationCalibration();
|
||||
const { data: gateData } = useValidationGateStatus();
|
||||
const { data: sourcesData } = useValidationAttributionSources();
|
||||
|
||||
if (isLoading || !rec) return <LoadingSpinner />;
|
||||
|
||||
// --- Calibration: find the bucket matching this recommendation's confidence ---
|
||||
const matchingBucket = calibration?.buckets?.find(
|
||||
(b) => rec.confidence >= b.bucket_low && rec.confidence < b.bucket_high,
|
||||
);
|
||||
// Handle edge case: confidence of exactly 1.0 falls in the last bucket [0.90, 1.00]
|
||||
const calibratedBucket =
|
||||
matchingBucket ??
|
||||
(rec.confidence >= 1.0
|
||||
? calibration?.buckets?.find((b) => b.bucket_high >= 1.0)
|
||||
: undefined);
|
||||
|
||||
const historicalWinRate = calibratedBucket?.observed_win_rate;
|
||||
|
||||
// --- Evidence counts ---
|
||||
const totalEvidenceCount = rec.evidence.length;
|
||||
// Compute duplicate evidence: group by normalized title, count extras
|
||||
const titleCounts = new Map<string, number>();
|
||||
for (const ev of rec.evidence) {
|
||||
const key = (ev.title ?? '').toLowerCase().trim();
|
||||
titleCounts.set(key, (titleCounts.get(key) ?? 0) + 1);
|
||||
}
|
||||
let duplicateEvidenceCount = 0;
|
||||
for (const count of titleCounts.values()) {
|
||||
if (count > 1) duplicateEvidenceCount += count - 1;
|
||||
}
|
||||
const uniqueEvidenceCount = totalEvidenceCount - duplicateEvidenceCount;
|
||||
const duplicateRatio = totalEvidenceCount > 0 ? duplicateEvidenceCount / totalEvidenceCount : 0;
|
||||
const hasDuplicateWarning = duplicateRatio > 0.2;
|
||||
|
||||
// --- Source reliability: find primary contributing sources ---
|
||||
const evidenceSources = new Map<string, number>();
|
||||
for (const ev of rec.evidence) {
|
||||
const src = ev.source_type ?? ev.publisher ?? 'unknown';
|
||||
evidenceSources.set(src, (evidenceSources.get(src) ?? 0) + ev.weight);
|
||||
}
|
||||
// Sort by total weight descending to find primary source
|
||||
const sortedSources = [...evidenceSources.entries()].sort((a, b) => b[1] - a[1]);
|
||||
const primarySourceType = sortedSources[0]?.[0];
|
||||
|
||||
// Look up source reliability from attribution data
|
||||
const primarySourceAttribution = sourcesData?.sources?.find(
|
||||
(s) => s.source_type === primarySourceType || s.source === primarySourceType,
|
||||
);
|
||||
// Source reliability is approximated from win_rate via Bayesian shrinkage
|
||||
// The attribution data has win_rate which is the observed metric
|
||||
const primarySourceWinRate = primarySourceAttribution?.win_rate;
|
||||
// Bayesian shrinkage: reliability = 0.5 + (n/(n+30)) * (win_rate - 0.5)
|
||||
const primarySourceCount = primarySourceAttribution?.prediction_count ?? 0;
|
||||
const primarySourceReliability =
|
||||
primarySourceWinRate != null
|
||||
? 0.5 + (primarySourceCount / (primarySourceCount + 30)) * (primarySourceWinRate - 0.5)
|
||||
: undefined;
|
||||
const hasLowReliabilityWarning =
|
||||
primarySourceReliability != null && primarySourceReliability < 0.4;
|
||||
|
||||
// --- Gate status ---
|
||||
const gateStatus = gateData?.gate_status as {
|
||||
passed?: boolean;
|
||||
reason?: string;
|
||||
threshold_results?: Array<{ name: string; threshold: number; actual: number; passed: boolean }>;
|
||||
} | null;
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div className="flex items-center gap-3">
|
||||
@@ -28,6 +107,137 @@ export function RecommendationDetailPage() {
|
||||
</dl>
|
||||
</Card>
|
||||
|
||||
{/* Validation Context Card — Requirements 13.1–13.7 */}
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Validation Context</h2>
|
||||
<dl className="grid grid-cols-2 gap-x-8 gap-y-3 text-sm sm:grid-cols-3">
|
||||
{/* 13.1: Original confidence alongside calibrated confidence */}
|
||||
<div>
|
||||
<dt className="text-gray-500">Original Confidence</dt>
|
||||
<dd className="text-gray-200">{(rec.confidence * 100).toFixed(1)}%</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Calibrated Confidence</dt>
|
||||
<dd className="text-gray-200">
|
||||
{historicalWinRate != null
|
||||
? `${(historicalWinRate * 100).toFixed(1)}%`
|
||||
: 'N/A'}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.2: Historical win rate for similar confidence levels */}
|
||||
<div>
|
||||
<dt className="text-gray-500">Historical Win Rate</dt>
|
||||
<dd className="text-gray-200">
|
||||
{historicalWinRate != null ? (
|
||||
<span>
|
||||
{(historicalWinRate * 100).toFixed(1)}%
|
||||
{calibratedBucket && (
|
||||
<span className="ml-1 text-xs text-gray-500">
|
||||
({calibratedBucket.prediction_count} predictions)
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
) : (
|
||||
'N/A'
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.3: Evidence count, unique evidence count, duplicate evidence count */}
|
||||
<div>
|
||||
<dt className="text-gray-500">Evidence Count</dt>
|
||||
<dd className="text-gray-200">{totalEvidenceCount}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="text-gray-500">Unique Evidence</dt>
|
||||
<dd className="text-gray-200">{uniqueEvidenceCount}</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className="flex items-center gap-1 text-gray-500">
|
||||
Duplicate Evidence
|
||||
{/* 13.6: Warning badge when duplicate evidence count > 20% of total */}
|
||||
{hasDuplicateWarning && (
|
||||
<span
|
||||
className="inline-flex items-center gap-0.5 rounded-full border border-yellow-700/50 bg-yellow-900/40 px-1.5 py-0.5 text-[10px] font-medium text-yellow-400"
|
||||
title="Duplicate evidence exceeds 20% of total — potential evidence inflation"
|
||||
>
|
||||
<AlertTriangle size={10} />
|
||||
>20%
|
||||
</span>
|
||||
)}
|
||||
</dt>
|
||||
<dd className="text-gray-200">
|
||||
{duplicateEvidenceCount}
|
||||
{totalEvidenceCount > 0 && (
|
||||
<span className="ml-1 text-xs text-gray-500">
|
||||
({(duplicateRatio * 100).toFixed(0)}%)
|
||||
</span>
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.4: Source reliability indicator */}
|
||||
<div>
|
||||
<dt className="flex items-center gap-1 text-gray-500">
|
||||
Primary Source Reliability
|
||||
{/* 13.7: Warning badge when primary source reliability < 0.4 */}
|
||||
{hasLowReliabilityWarning && (
|
||||
<span
|
||||
className="inline-flex items-center gap-0.5 rounded-full border border-red-700/50 bg-red-900/40 px-1.5 py-0.5 text-[10px] font-medium text-red-400"
|
||||
title="Primary source reliability is below 0.4 — low or unknown reliability"
|
||||
>
|
||||
<AlertTriangle size={10} />
|
||||
Low
|
||||
</span>
|
||||
)}
|
||||
</dt>
|
||||
<dd className="text-gray-200">
|
||||
{primarySourceReliability != null ? (
|
||||
<span>
|
||||
{primarySourceReliability.toFixed(3)}
|
||||
{primarySourceType && (
|
||||
<span className="ml-1 text-xs text-gray-500">({primarySourceType})</span>
|
||||
)}
|
||||
</span>
|
||||
) : (
|
||||
'N/A'
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
|
||||
{/* 13.5: Live eligibility status with reason */}
|
||||
<div className="col-span-2">
|
||||
<dt className="text-gray-500">Live Eligibility</dt>
|
||||
<dd>
|
||||
{gateStatus != null ? (
|
||||
<div className="flex items-center gap-2">
|
||||
{gateStatus.passed ? (
|
||||
<span className="inline-flex items-center gap-1 text-green-400">
|
||||
<ShieldCheck size={14} />
|
||||
Gate Passed
|
||||
</span>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1 text-red-400">
|
||||
<ShieldX size={14} />
|
||||
Gate Failed
|
||||
</span>
|
||||
)}
|
||||
{gateStatus.reason && (
|
||||
<span className="text-xs text-gray-500">{gateStatus.reason}</span>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<span className="inline-flex items-center gap-1 text-gray-500">
|
||||
<Info size={14} />
|
||||
N/A — no gate evaluation available
|
||||
</span>
|
||||
)}
|
||||
</dd>
|
||||
</div>
|
||||
</dl>
|
||||
</Card>
|
||||
|
||||
{rec.thesis && (
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Thesis</h2>
|
||||
|
||||
@@ -0,0 +1,275 @@
|
||||
import { useParams, Link } from '@tanstack/react-router';
|
||||
import { useReport } from '../api/hooks';
|
||||
import { LoadingSpinner, StatusBadge, Card } from '../components/ui';
|
||||
import { ArrowLeft } from 'lucide-react';
|
||||
|
||||
interface PLSection {
|
||||
realized_pnl: number;
|
||||
unrealized_pnl: number;
|
||||
daily_return: number;
|
||||
cumulative_return: number;
|
||||
win_count: number;
|
||||
loss_count: number;
|
||||
win_rate: number;
|
||||
profit_factor: number;
|
||||
sharpe_ratio: number;
|
||||
summary: string;
|
||||
validation_warnings?: { field_name: string; computed_value: number; snapshot_value: number; pct_difference: number }[];
|
||||
}
|
||||
|
||||
interface PositionDetail {
|
||||
ticker: string;
|
||||
entry_price: number;
|
||||
current_or_exit_price: number;
|
||||
pnl: number;
|
||||
pnl_pct: number;
|
||||
hold_duration_hours: number;
|
||||
status: string;
|
||||
}
|
||||
|
||||
interface RiskMetrics {
|
||||
current_risk_tier: string;
|
||||
portfolio_heat: number;
|
||||
max_drawdown: number;
|
||||
current_drawdown_pct: number;
|
||||
reserve_pool_balance: number;
|
||||
circuit_breaker_event_count: number;
|
||||
summary: string;
|
||||
}
|
||||
|
||||
interface ModelWindow {
|
||||
lookback: string;
|
||||
win_rate: number | null;
|
||||
directional_accuracy: number | null;
|
||||
information_coefficient: number | null;
|
||||
calibration_error: number | null;
|
||||
brier_score: number | null;
|
||||
}
|
||||
|
||||
interface ReportData {
|
||||
pnl: PLSection;
|
||||
recommendation_accuracy: {
|
||||
total_evaluated: number;
|
||||
act_count: number;
|
||||
skip_count: number;
|
||||
acted_win_rate: number;
|
||||
avg_confidence_acted: number;
|
||||
avg_confidence_skipped: number;
|
||||
summary: string;
|
||||
validation_warnings?: { field_name: string; pct_difference: number }[];
|
||||
};
|
||||
position_performance: {
|
||||
positions: PositionDetail[];
|
||||
summary: string;
|
||||
};
|
||||
risk_metrics: RiskMetrics;
|
||||
model_quality: {
|
||||
windows: ModelWindow[];
|
||||
summary: string;
|
||||
validation_warnings?: { field_name: string; pct_difference: number }[];
|
||||
};
|
||||
executive_summary: string;
|
||||
validation_status: string;
|
||||
}
|
||||
|
||||
function MetricCard({ label, value, sub }: { label: string; value: string; sub?: string }) {
|
||||
return (
|
||||
<div className="rounded-lg bg-surface-800 border border-surface-700 p-3">
|
||||
<div className="text-xs text-gray-400 mb-1">{label}</div>
|
||||
<div className="text-lg font-semibold text-gray-100">{value}</div>
|
||||
{sub && <div className="text-xs text-gray-500 mt-0.5">{sub}</div>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function pct(v: number) {
|
||||
return `${(v * 100).toFixed(2)}%`;
|
||||
}
|
||||
|
||||
function dollar(v: number) {
|
||||
return v >= 0 ? `$${v.toFixed(2)}` : `-$${Math.abs(v).toFixed(2)}`;
|
||||
}
|
||||
|
||||
export function ReportDetailPage() {
|
||||
const { id } = useParams({ from: '/reports/$id' });
|
||||
const { data, isLoading } = useReport(id);
|
||||
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (!data) return <div className="text-gray-400">Report not found</div>;
|
||||
|
||||
const report = data.report_data as unknown as ReportData;
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center gap-3">
|
||||
<Link to="/reports" className="text-gray-400 hover:text-gray-200">
|
||||
<ArrowLeft size={20} />
|
||||
</Link>
|
||||
<div>
|
||||
<h1 className="text-xl font-semibold text-gray-100">
|
||||
{data.report_type === 'daily' ? 'Daily' : 'Weekly'} Report
|
||||
</h1>
|
||||
<p className="text-sm text-gray-400">
|
||||
{data.period_start === data.period_end
|
||||
? data.period_start
|
||||
: `${data.period_start} → ${data.period_end}`}
|
||||
{' · '}
|
||||
<StatusBadge status={data.validation_status} />
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Executive Summary */}
|
||||
{report.executive_summary && (
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-2">Executive Summary</h2>
|
||||
<p className="text-sm text-gray-200 whitespace-pre-wrap leading-relaxed">
|
||||
{report.executive_summary}
|
||||
</p>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* P&L Section */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">P&L</h2>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-3 mb-3">
|
||||
<MetricCard label="Realized P&L" value={dollar(report.pnl.realized_pnl)} />
|
||||
<MetricCard label="Unrealized P&L" value={dollar(report.pnl.unrealized_pnl)} />
|
||||
<MetricCard label="Daily Return" value={pct(report.pnl.daily_return)} />
|
||||
<MetricCard label="Cumulative Return" value={pct(report.pnl.cumulative_return)} />
|
||||
<MetricCard label="Win Rate" value={pct(report.pnl.win_rate)} sub={`${report.pnl.win_count}W / ${report.pnl.loss_count}L`} />
|
||||
<MetricCard label="Profit Factor" value={report.pnl.profit_factor.toFixed(2)} />
|
||||
<MetricCard label="Sharpe Ratio" value={report.pnl.sharpe_ratio.toFixed(2)} />
|
||||
</div>
|
||||
{report.pnl.summary && (
|
||||
<p className="text-xs text-gray-400 mt-2">{report.pnl.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Recommendation Accuracy */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">Recommendation Accuracy</h2>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-3 mb-3">
|
||||
<MetricCard label="Total Evaluated" value={String(report.recommendation_accuracy.total_evaluated)} />
|
||||
<MetricCard label="Acted" value={String(report.recommendation_accuracy.act_count)} />
|
||||
<MetricCard label="Skipped" value={String(report.recommendation_accuracy.skip_count)} />
|
||||
<MetricCard label="Acted Win Rate" value={pct(report.recommendation_accuracy.acted_win_rate)} />
|
||||
<MetricCard label="Avg Confidence (Acted)" value={report.recommendation_accuracy.avg_confidence_acted.toFixed(3)} />
|
||||
<MetricCard label="Avg Confidence (Skipped)" value={report.recommendation_accuracy.avg_confidence_skipped.toFixed(3)} />
|
||||
</div>
|
||||
{report.recommendation_accuracy.validation_warnings && report.recommendation_accuracy.validation_warnings.length > 0 && (
|
||||
<div className="mt-2 rounded bg-yellow-900/20 border border-yellow-700/30 p-2">
|
||||
<span className="text-xs text-yellow-400">⚠ Validation warnings:</span>
|
||||
{report.recommendation_accuracy.validation_warnings.map((w, i) => (
|
||||
<span key={i} className="text-xs text-yellow-300 ml-2">{w.field_name} ({w.pct_difference.toFixed(1)}% off)</span>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
{report.recommendation_accuracy.summary && (
|
||||
<p className="text-xs text-gray-400 mt-2">{report.recommendation_accuracy.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Position Performance */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">
|
||||
Positions ({report.position_performance.positions.length})
|
||||
</h2>
|
||||
{report.position_performance.positions.length > 0 ? (
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="text-left text-xs text-gray-400 border-b border-surface-700">
|
||||
<th className="pb-2 pr-4">Ticker</th>
|
||||
<th className="pb-2 pr-4">Status</th>
|
||||
<th className="pb-2 pr-4">Entry</th>
|
||||
<th className="pb-2 pr-4">Current/Exit</th>
|
||||
<th className="pb-2 pr-4">P&L</th>
|
||||
<th className="pb-2 pr-4">P&L %</th>
|
||||
<th className="pb-2">Hold (hrs)</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{report.position_performance.positions.map((p, i) => (
|
||||
<tr key={i} className="border-b border-surface-800 text-gray-200">
|
||||
<td className="py-1.5 pr-4 font-mono font-semibold text-brand-300">{p.ticker}</td>
|
||||
<td className="py-1.5 pr-4"><StatusBadge status={p.status} /></td>
|
||||
<td className="py-1.5 pr-4">${p.entry_price.toFixed(2)}</td>
|
||||
<td className="py-1.5 pr-4">${p.current_or_exit_price.toFixed(2)}</td>
|
||||
<td className={`py-1.5 pr-4 font-mono ${p.pnl >= 0 ? 'text-green-400' : 'text-red-400'}`}>
|
||||
{dollar(p.pnl)}
|
||||
</td>
|
||||
<td className={`py-1.5 pr-4 ${p.pnl_pct >= 0 ? 'text-green-400' : 'text-red-400'}`}>
|
||||
{p.pnl_pct.toFixed(2)}%
|
||||
</td>
|
||||
<td className="py-1.5 text-gray-400">{p.hold_duration_hours.toFixed(1)}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-gray-500">No positions during this period.</p>
|
||||
)}
|
||||
{report.position_performance.summary && (
|
||||
<p className="text-xs text-gray-400 mt-3">{report.position_performance.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Risk Metrics */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">Risk Metrics</h2>
|
||||
<div className="grid grid-cols-2 md:grid-cols-3 gap-3">
|
||||
<MetricCard label="Risk Tier" value={report.risk_metrics.current_risk_tier} />
|
||||
<MetricCard label="Portfolio Heat" value={pct(report.risk_metrics.portfolio_heat)} />
|
||||
<MetricCard label="Max Drawdown" value={pct(report.risk_metrics.max_drawdown)} />
|
||||
<MetricCard label="Current Drawdown" value={pct(report.risk_metrics.current_drawdown_pct)} />
|
||||
<MetricCard label="Reserve Pool" value={dollar(report.risk_metrics.reserve_pool_balance)} />
|
||||
<MetricCard label="Circuit Breaker Events" value={String(report.risk_metrics.circuit_breaker_event_count)} />
|
||||
</div>
|
||||
{report.risk_metrics.summary && (
|
||||
<p className="text-xs text-gray-400 mt-3">{report.risk_metrics.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Model Quality */}
|
||||
<Card>
|
||||
<h2 className="text-sm font-medium text-gray-300 mb-3">Model Quality</h2>
|
||||
{report.model_quality.windows.length > 0 ? (
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm">
|
||||
<thead>
|
||||
<tr className="text-left text-xs text-gray-400 border-b border-surface-700">
|
||||
<th className="pb-2 pr-4">Window</th>
|
||||
<th className="pb-2 pr-4">Win Rate</th>
|
||||
<th className="pb-2 pr-4">Dir. Accuracy</th>
|
||||
<th className="pb-2 pr-4">IC</th>
|
||||
<th className="pb-2 pr-4">ECE</th>
|
||||
<th className="pb-2">Brier</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{report.model_quality.windows.map((w, i) => (
|
||||
<tr key={i} className="border-b border-surface-800 text-gray-200">
|
||||
<td className="py-1.5 pr-4 font-medium">{w.lookback}</td>
|
||||
<td className="py-1.5 pr-4">{w.win_rate != null ? pct(w.win_rate) : '—'}</td>
|
||||
<td className="py-1.5 pr-4">{w.directional_accuracy != null ? pct(w.directional_accuracy) : '—'}</td>
|
||||
<td className="py-1.5 pr-4">{w.information_coefficient != null ? w.information_coefficient.toFixed(4) : '—'}</td>
|
||||
<td className="py-1.5 pr-4">{w.calibration_error != null ? w.calibration_error.toFixed(4) : '—'}</td>
|
||||
<td className="py-1.5">{w.brier_score != null ? w.brier_score.toFixed(4) : '—'}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-gray-500">No model quality data available.</p>
|
||||
)}
|
||||
{report.model_quality.summary && (
|
||||
<p className="text-xs text-gray-400 mt-3">{report.model_quality.summary}</p>
|
||||
)}
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
import { useState } from 'react';
|
||||
import { useNavigate } from '@tanstack/react-router';
|
||||
import { useReports } from '../api/hooks';
|
||||
import { DataTable, type Column } from '../components/DataTable';
|
||||
import { StatusBadge, LoadingSpinner } from '../components/ui';
|
||||
import type { ReportListItem } from '../api/hooks';
|
||||
|
||||
export function ReportsPage() {
|
||||
const navigate = useNavigate();
|
||||
const [reportType, setReportType] = useState('');
|
||||
const { data, isLoading } = useReports({
|
||||
report_type: reportType || undefined,
|
||||
limit: 50,
|
||||
});
|
||||
|
||||
const columns: Column<ReportListItem>[] = [
|
||||
{
|
||||
key: 'report_type',
|
||||
header: 'Type',
|
||||
render: (r) => (
|
||||
<span className="inline-flex items-center rounded px-2 py-0.5 text-xs font-medium bg-surface-700 text-brand-300 capitalize">
|
||||
{r.report_type}
|
||||
</span>
|
||||
),
|
||||
},
|
||||
{
|
||||
key: 'period_start',
|
||||
header: 'Period',
|
||||
render: (r) =>
|
||||
r.period_start === r.period_end
|
||||
? r.period_start
|
||||
: `${r.period_start} → ${r.period_end}`,
|
||||
},
|
||||
{
|
||||
key: 'validation_status',
|
||||
header: 'Validation',
|
||||
render: (r) => <StatusBadge status={r.validation_status} />,
|
||||
},
|
||||
{
|
||||
key: 'generated_at',
|
||||
header: 'Generated',
|
||||
render: (r) => (
|
||||
<span className="text-xs text-gray-400">
|
||||
{new Date(r.generated_at).toLocaleString()}
|
||||
</span>
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div className="mb-4 flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">
|
||||
Trading Reports
|
||||
</h1>
|
||||
<select
|
||||
value={reportType}
|
||||
onChange={(e) => setReportType(e.target.value)}
|
||||
className="rounded border border-surface-600 bg-surface-800 px-3 py-1.5 text-sm text-gray-200 focus:border-brand-500 focus:outline-none"
|
||||
aria-label="Filter by report type"
|
||||
>
|
||||
<option value="">All Types</option>
|
||||
<option value="daily">Daily</option>
|
||||
<option value="weekly">Weekly</option>
|
||||
</select>
|
||||
</div>
|
||||
<DataTable<ReportListItem>
|
||||
data={data ?? []}
|
||||
columns={columns}
|
||||
keyField="id"
|
||||
onRowClick={(row) =>
|
||||
navigate({ to: '/reports/$id', params: { id: row.id } })
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -116,7 +116,7 @@ export function TradingPage() {
|
||||
|
||||
{/* Paper Trading Reset */}
|
||||
<ResetCard
|
||||
onReset={() => resetTrading.mutate(0)}
|
||||
onReset={(params) => resetTrading.mutate(params)}
|
||||
isResetting={resetTrading.isPending}
|
||||
/>
|
||||
|
||||
@@ -490,26 +490,101 @@ function ApprovalRow({ approval, onReview }: {
|
||||
|
||||
|
||||
function ResetCard({ onReset, isResetting }: {
|
||||
onReset: () => void;
|
||||
onReset: (params: { initial_capital?: number; reserve_pct?: number }) => void;
|
||||
isResetting: boolean;
|
||||
}) {
|
||||
const [showConfirm, setShowConfirm] = useState(false);
|
||||
const [capitalInput, setCapitalInput] = useState('100000');
|
||||
const [reservePct, setReservePct] = useState(20);
|
||||
const [useCustomCapital, setUseCustomCapital] = useState(false);
|
||||
|
||||
const capital = parseFloat(capitalInput) || 0;
|
||||
const reserveAmount = capital * (reservePct / 100);
|
||||
const activeAmount = capital - reserveAmount;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Paper Trading Account</h2>
|
||||
|
||||
{/* Capital & Reserve Configuration */}
|
||||
<div className="mb-4 space-y-3 rounded-lg border border-surface-700 bg-surface-950 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<button
|
||||
onClick={() => setUseCustomCapital(!useCustomCapital)}
|
||||
className={`relative inline-flex h-5 w-9 shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors focus:outline-none focus:ring-2 focus:ring-brand-500 focus:ring-offset-2 focus:ring-offset-surface-900 ${
|
||||
useCustomCapital ? 'bg-brand-600' : 'bg-surface-700'
|
||||
}`}
|
||||
role="switch"
|
||||
aria-checked={useCustomCapital}
|
||||
aria-label="Set custom initial capital"
|
||||
>
|
||||
<span className={`pointer-events-none inline-block h-4 w-4 rounded-full bg-white shadow transition-transform ${
|
||||
useCustomCapital ? 'translate-x-4' : 'translate-x-0'
|
||||
}`} />
|
||||
</button>
|
||||
<span className="text-sm text-gray-300">Set initial capital</span>
|
||||
<span className="text-[10px] text-gray-600">(otherwise uses broker account balance)</span>
|
||||
</div>
|
||||
|
||||
{useCustomCapital && (
|
||||
<div className="flex items-center gap-2">
|
||||
<label htmlFor="reset-capital" className="text-xs text-gray-500">Capital $</label>
|
||||
<input
|
||||
id="reset-capital"
|
||||
type="number"
|
||||
min={0}
|
||||
step={1000}
|
||||
value={capitalInput}
|
||||
onChange={(e) => setCapitalInput(e.target.value)}
|
||||
className="w-36 rounded-md border border-surface-700 bg-surface-900 px-2 py-1 text-sm font-mono text-gray-200"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div>
|
||||
<div className="flex items-center justify-between">
|
||||
<label htmlFor="reset-reserve" className="text-xs text-gray-500">
|
||||
Reserve pool: {reservePct}%
|
||||
</label>
|
||||
<span className="text-xs text-gray-600">
|
||||
Active: {100 - reservePct}%
|
||||
</span>
|
||||
</div>
|
||||
<input
|
||||
id="reset-reserve"
|
||||
type="range"
|
||||
min={0}
|
||||
max={50}
|
||||
step={5}
|
||||
value={reservePct}
|
||||
onChange={(e) => setReservePct(Number(e.target.value))}
|
||||
className="mt-1 w-full accent-brand-600"
|
||||
/>
|
||||
{useCustomCapital && capital > 0 && (
|
||||
<div className="mt-1 flex justify-between text-[10px] text-gray-600">
|
||||
<span>Reserve: ${reserveAmount.toLocaleString(undefined, { maximumFractionDigits: 0 })}</span>
|
||||
<span>Active: ${activeAmount.toLocaleString(undefined, { maximumFractionDigits: 0 })}</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Reset Button */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<p className="text-sm text-gray-300">Full Reset</p>
|
||||
<p className="text-[10px] text-gray-600">
|
||||
Liquidates all broker positions, cancels open orders, wipes local trading history,
|
||||
and syncs capital from the broker account.
|
||||
and sets capital from {useCustomCapital ? 'the amount above' : 'the broker account balance'}.
|
||||
</p>
|
||||
<p className="mt-1 text-[10px] text-gray-600 italic">
|
||||
Note: To reset the Alpaca paper account balance itself, use the Alpaca dashboard.
|
||||
</p>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => setShowConfirm(true)}
|
||||
disabled={isResetting}
|
||||
className="rounded-md border border-red-700/50 bg-red-900/20 px-3 py-1.5 text-sm font-medium text-red-400 hover:bg-red-900/40 disabled:opacity-50"
|
||||
className="shrink-0 rounded-md border border-red-700/50 bg-red-900/20 px-3 py-1.5 text-sm font-medium text-red-400 hover:bg-red-900/40 disabled:opacity-50"
|
||||
>
|
||||
Reset Everything
|
||||
</button>
|
||||
@@ -519,11 +594,20 @@ function ResetCard({ onReset, isResetting }: {
|
||||
<p className="text-sm text-red-300">
|
||||
This will <span className="font-semibold">permanently delete</span> all positions, orders,
|
||||
trading decisions, stop levels, portfolio snapshots, and backtest data.
|
||||
All broker positions will be liquidated and capital will be set from the broker's account balance.
|
||||
All broker positions will be liquidated.
|
||||
{useCustomCapital
|
||||
? ` Capital will be set to $${capital.toLocaleString()} (${reservePct}% reserve / ${100 - reservePct}% active).`
|
||||
: ` Capital will be set from the broker's account balance (${reservePct}% reserve / ${100 - reservePct}% active).`}
|
||||
</p>
|
||||
<div className="mt-3 flex gap-2">
|
||||
<button
|
||||
onClick={() => { onReset(); setShowConfirm(false); }}
|
||||
onClick={() => {
|
||||
onReset({
|
||||
initial_capital: useCustomCapital ? capital : undefined,
|
||||
reserve_pct: reservePct / 100,
|
||||
});
|
||||
setShowConfirm(false);
|
||||
}}
|
||||
disabled={isResetting}
|
||||
className="rounded-md bg-red-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-red-700 disabled:opacity-50"
|
||||
>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { useState } from 'react';
|
||||
import { useState, useRef, useEffect } from 'react';
|
||||
import { useNavigate, Link } from '@tanstack/react-router';
|
||||
import { useTrends, useDocument } from '../api/hooks';
|
||||
import { TrendArrow, ConfidenceBar, LoadingSpinner, TickerFilter, Card } from '../components/ui';
|
||||
import { TrendArrow, ConfidenceBar, LoadingSpinner, Card } from '../components/ui';
|
||||
import type { TrendSummary } from '../api/hooks';
|
||||
|
||||
const WINDOWS = ['intraday', '1d', '7d', '30d', '90d'];
|
||||
@@ -9,8 +9,17 @@ const WINDOWS = ['intraday', '1d', '7d', '30d', '90d'];
|
||||
export function TrendsPage() {
|
||||
const navigate = useNavigate();
|
||||
const [ticker, setTicker] = useState('');
|
||||
const [debouncedTicker, setDebouncedTicker] = useState('');
|
||||
const [window, setWindow] = useState<string | undefined>(undefined);
|
||||
const { data, isLoading } = useTrends({ ticker: ticker || undefined, window, limit: 100 });
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
// Debounce ticker search — only query after 300ms of no typing
|
||||
useEffect(() => {
|
||||
const timer = setTimeout(() => setDebouncedTicker(ticker), 300);
|
||||
return () => clearTimeout(timer);
|
||||
}, [ticker]);
|
||||
|
||||
const { data, isLoading } = useTrends({ ticker: debouncedTicker || undefined, window, limit: 100 });
|
||||
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
|
||||
@@ -19,7 +28,15 @@ export function TrendsPage() {
|
||||
<div className="mb-4 flex items-center justify-between">
|
||||
<h1 className="text-xl font-semibold text-gray-100">Trends</h1>
|
||||
<div className="flex items-center gap-3">
|
||||
<TickerFilter value={ticker} onChange={setTicker} />
|
||||
<input
|
||||
ref={inputRef}
|
||||
type="text"
|
||||
placeholder="Ticker…"
|
||||
value={ticker}
|
||||
onChange={(e) => setTicker(e.target.value.toUpperCase())}
|
||||
className="w-24 rounded-md border border-surface-700 bg-surface-900 px-2 py-1 text-xs text-gray-200 placeholder-gray-500 focus:border-brand-500 focus:outline-none"
|
||||
aria-label="Filter by ticker"
|
||||
/>
|
||||
<div className="inline-flex rounded-md border border-surface-700" role="group" aria-label="Window selector">
|
||||
<button
|
||||
onClick={() => setWindow(undefined)}
|
||||
|
||||
@@ -35,6 +35,8 @@ export function TradingOverview() {
|
||||
const resume = useResumeTradingEngine();
|
||||
const updateConfig = useUpdateTradingConfig();
|
||||
const [selectedTier, setSelectedTier] = useState<string | null>(null);
|
||||
const [maxPositions, setMaxPositions] = useState<number | null>(null);
|
||||
const [positionCap, setPositionCap] = useState<number | null>(null);
|
||||
|
||||
if (isLoading) return <LoadingSpinner />;
|
||||
if (!status) return <p className="text-gray-500">No trading status available</p>;
|
||||
@@ -131,6 +133,68 @@ export function TradingOverview() {
|
||||
<StatCard label="Portfolio Heat" value={fmtPct(status.portfolio_heat)} />
|
||||
</div>
|
||||
|
||||
{/* Position Limits */}
|
||||
<Card>
|
||||
<h2 className="mb-3 text-sm font-medium text-gray-400">Position Limits</h2>
|
||||
<div className="grid grid-cols-1 gap-4 sm:grid-cols-2">
|
||||
<div>
|
||||
<label htmlFor="max-positions" className="block text-xs text-gray-500 mb-1">
|
||||
Max Open Positions
|
||||
</label>
|
||||
<div className="flex items-center gap-2">
|
||||
<input
|
||||
id="max-positions"
|
||||
type="number"
|
||||
min={1}
|
||||
max={50}
|
||||
value={maxPositions ?? status.max_open_positions ?? 10}
|
||||
onChange={(e) => setMaxPositions(Number(e.target.value))}
|
||||
className="w-20 rounded-md border border-surface-700 bg-surface-950 px-2 py-1.5 text-sm text-gray-200 focus:border-brand-500 focus:outline-none"
|
||||
/>
|
||||
<button
|
||||
onClick={() => {
|
||||
const val = maxPositions ?? status.max_open_positions ?? 10;
|
||||
updateConfig.mutate({ max_open_positions: val });
|
||||
}}
|
||||
disabled={updateConfig.isPending}
|
||||
className="rounded-md bg-brand-700 px-3 py-1.5 text-xs font-medium text-white hover:bg-brand-600 disabled:opacity-50"
|
||||
>
|
||||
Apply
|
||||
</button>
|
||||
<span className="text-xs text-gray-500">
|
||||
Current: {status.open_position_count ?? 0} / {status.max_open_positions ?? 10}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<label htmlFor="position-cap" className="block text-xs text-gray-500 mb-1">
|
||||
Absolute Position Cap ($)
|
||||
</label>
|
||||
<div className="flex items-center gap-2">
|
||||
<input
|
||||
id="position-cap"
|
||||
type="number"
|
||||
min={10}
|
||||
step={10}
|
||||
value={positionCap ?? status.absolute_position_cap ?? 50}
|
||||
onChange={(e) => setPositionCap(Number(e.target.value))}
|
||||
className="w-24 rounded-md border border-surface-700 bg-surface-950 px-2 py-1.5 text-sm text-gray-200 focus:border-brand-500 focus:outline-none"
|
||||
/>
|
||||
<button
|
||||
onClick={() => {
|
||||
const val = positionCap ?? status.absolute_position_cap ?? 50;
|
||||
updateConfig.mutate({ absolute_position_cap: val });
|
||||
}}
|
||||
disabled={updateConfig.isPending}
|
||||
className="rounded-md bg-brand-700 px-3 py-1.5 text-xs font-medium text-white hover:bg-brand-600 disabled:opacity-50"
|
||||
>
|
||||
Apply
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Card>
|
||||
|
||||
{/* Portfolio Heat Gauge */}
|
||||
<Card>
|
||||
<h2 className="mb-2 text-sm font-medium text-gray-400">Portfolio Heat</h2>
|
||||
|
||||
@@ -30,6 +30,8 @@ import { HomePage } from './pages/Home';
|
||||
import { GlobalEventsPage } from './pages/GlobalEvents';
|
||||
import { GlobalEventDetailPage } from './pages/GlobalEventDetail';
|
||||
import { AgentsPage } from './pages/Agents';
|
||||
import { ReportsPage } from './pages/Reports';
|
||||
import { ReportDetailPage } from './pages/ReportDetail';
|
||||
|
||||
// Root route wraps everything in the app shell layout
|
||||
const rootRoute = createRootRoute({
|
||||
@@ -167,6 +169,17 @@ const agentsRoute = createRoute({
|
||||
component: AgentsPage,
|
||||
});
|
||||
|
||||
const reportsRoute = createRoute({
|
||||
getParentRoute: () => rootRoute,
|
||||
path: '/reports',
|
||||
component: ReportsPage,
|
||||
});
|
||||
const reportDetailRoute = createRoute({
|
||||
getParentRoute: () => rootRoute,
|
||||
path: '/reports/$id',
|
||||
component: ReportDetailPage,
|
||||
});
|
||||
|
||||
const routeTree = rootRoute.addChildren([
|
||||
indexRoute,
|
||||
companiesRoute,
|
||||
@@ -192,6 +205,8 @@ const routeTree = rootRoute.addChildren([
|
||||
globalEventsRoute,
|
||||
globalEventDetailRoute,
|
||||
agentsRoute,
|
||||
reportsRoute,
|
||||
reportDetailRoute,
|
||||
]);
|
||||
|
||||
export const router = createRouter({ routeTree });
|
||||
|
||||
@@ -73,6 +73,97 @@ export const mockVariantPerfHistory = [
|
||||
{ hour: '2026-04-10T11:00:00Z', invocations: 12, successes: 11, avg_duration_ms: 1300, avg_confidence: 0.82 },
|
||||
];
|
||||
|
||||
// Validation: Model Quality & Calibration mock data
|
||||
export const mockValidationSummary = {
|
||||
snapshot: {
|
||||
id: 'ms-1',
|
||||
generated_at: '2026-04-11T12:00:00Z',
|
||||
lookback_window: '30d',
|
||||
horizon: '7d',
|
||||
prediction_count: 150,
|
||||
win_rate: 0.58,
|
||||
directional_accuracy: 0.56,
|
||||
information_coefficient: 0.045,
|
||||
rank_information_coefficient: 0.038,
|
||||
avg_return: 0.012,
|
||||
avg_excess_return_vs_spy: 0.003,
|
||||
avg_excess_return_vs_sector: 0.002,
|
||||
calibration_error: 0.08,
|
||||
brier_score: 0.21,
|
||||
buy_win_rate: 0.61,
|
||||
sell_win_rate: 0.54,
|
||||
hold_win_rate: 0.50,
|
||||
metadata: {},
|
||||
},
|
||||
gate_status: {
|
||||
passed: true,
|
||||
reason: 'all thresholds met',
|
||||
threshold_results: [
|
||||
{ name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
|
||||
{ name: 'min_ic', threshold: 0.03, actual: 0.045, passed: true },
|
||||
{ name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
export const mockValidationCalibration = {
|
||||
buckets: [
|
||||
{ bucket_low: 0.50, bucket_high: 0.60, avg_confidence: 0.55, observed_win_rate: 0.52, prediction_count: 30, miscalibrated: false },
|
||||
{ bucket_low: 0.60, bucket_high: 0.70, avg_confidence: 0.65, observed_win_rate: 0.58, prediction_count: 40, miscalibrated: false },
|
||||
{ bucket_low: 0.70, bucket_high: 0.80, avg_confidence: 0.75, observed_win_rate: 0.55, prediction_count: 35, miscalibrated: true },
|
||||
{ bucket_low: 0.80, bucket_high: 0.90, avg_confidence: 0.85, observed_win_rate: 0.70, prediction_count: 25, miscalibrated: false },
|
||||
{ bucket_low: 0.90, bucket_high: 1.00, avg_confidence: 0.95, observed_win_rate: 0.72, prediction_count: 20, miscalibrated: true },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const mockValidationGateStatus = {
|
||||
gate_status: {
|
||||
passed: false,
|
||||
reason: 'failed: min_ic below threshold',
|
||||
threshold_results: [
|
||||
{ name: 'min_prediction_count', threshold: 100, actual: 150, passed: true },
|
||||
{ name: 'min_ic', threshold: 0.03, actual: 0.02, passed: false },
|
||||
{ name: 'min_win_rate', threshold: 0.53, actual: 0.58, passed: true },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
export const mockValidationICByHorizon = {
|
||||
horizons: [
|
||||
{ horizon: '1h', information_coefficient: 0.02, rank_information_coefficient: 0.015, prediction_count: 120, generated_at: '2026-04-11T12:00:00Z' },
|
||||
{ horizon: '7d', information_coefficient: 0.045, rank_information_coefficient: 0.038, prediction_count: 100, generated_at: '2026-04-11T12:00:00Z' },
|
||||
],
|
||||
lookback: '30d',
|
||||
};
|
||||
|
||||
export const mockValidationAttributionSources = {
|
||||
sources: [
|
||||
{ source: 'Reuters', source_type: 'news_api', prediction_count: 50, avg_weight: 0.6, avg_contribution_score: 0.3, win_rate: 0.62, avg_future_return: 0.015, avg_excess_return_vs_spy: 0.005, information_coefficient: 0.05, duplicate_rate: 0.1 },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const mockValidationAttributionCatalysts = {
|
||||
catalysts: [
|
||||
{ catalyst_type: 'earnings', prediction_count: 40, win_rate: 0.65, avg_future_return: 0.02, avg_excess_return_vs_spy: 0.008, information_coefficient: 0.06 },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const mockValidationAttributionLayers = {
|
||||
layers: [
|
||||
{ layer: 'company', avg_contribution_pct: 0.55, dominant_win_rate: 0.60, dominant_ic: 0.04 },
|
||||
{ layer: 'macro', avg_contribution_pct: 0.30, dominant_win_rate: 0.52, dominant_ic: 0.02 },
|
||||
{ layer: 'competitive', avg_contribution_pct: 0.15, dominant_win_rate: 0.48, dominant_ic: null },
|
||||
],
|
||||
lookback: '30d',
|
||||
horizon: '7d',
|
||||
};
|
||||
|
||||
export const handlers = [
|
||||
// Query API (proxied at /api/)
|
||||
http.get('/api/companies', () => HttpResponse.json(mockCompanies)),
|
||||
@@ -242,4 +333,24 @@ export const handlers = [
|
||||
const body = await request.json() as Record<string, unknown>;
|
||||
return HttpResponse.json({ enabled: body.enabled, previous_enabled: true, toggled_by: 'operator' });
|
||||
}),
|
||||
|
||||
// Trading Reports
|
||||
http.get('/api/reports', () => HttpResponse.json([
|
||||
{ id: 'rpt-1', report_type: 'daily', period_start: '2025-01-15', period_end: '2025-01-15', validation_status: 'passed', generated_at: '2025-01-15T21:30:00Z' },
|
||||
])),
|
||||
http.get('/api/reports/:id', ({ params }) => {
|
||||
if (params.id === 'rpt-1') {
|
||||
return HttpResponse.json({ id: 'rpt-1', report_type: 'daily', period_start: '2025-01-15', period_end: '2025-01-15', report_data: { pnl: { realized_pnl: 125.5 }, executive_summary: 'Test' }, validation_status: 'passed', generated_at: '2025-01-15T21:30:00Z', created_at: '2025-01-15T21:30:05Z' });
|
||||
}
|
||||
return new HttpResponse(null, { status: 404 });
|
||||
}),
|
||||
|
||||
// Validation: Model Quality & Calibration endpoints
|
||||
http.get('/api/validation/summary', () => HttpResponse.json(mockValidationSummary)),
|
||||
http.get('/api/validation/calibration', () => HttpResponse.json(mockValidationCalibration)),
|
||||
http.get('/api/validation/gate-status', () => HttpResponse.json(mockValidationGateStatus)),
|
||||
http.get('/api/validation/ic-by-horizon', () => HttpResponse.json(mockValidationICByHorizon)),
|
||||
http.get('/api/validation/attribution/sources', () => HttpResponse.json(mockValidationAttributionSources)),
|
||||
http.get('/api/validation/attribution/catalysts', () => HttpResponse.json(mockValidationAttributionCatalysts)),
|
||||
http.get('/api/validation/attribution/layers', () => HttpResponse.json(mockValidationAttributionLayers)),
|
||||
];
|
||||
|
||||
@@ -169,6 +169,55 @@ describe('Global Events page', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('OpsModel validation tab', () => {
|
||||
it('renders Model Validation tab with summary cards', async () => {
|
||||
renderRoute('/ops/model');
|
||||
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
|
||||
|
||||
// The tab buttons should be present
|
||||
expect(screen.getByText('Extraction Performance')).toBeInTheDocument();
|
||||
expect(screen.getByText('Model Validation')).toBeInTheDocument();
|
||||
|
||||
// Click the Model Validation tab button
|
||||
await userEvent.click(screen.getByText('Model Validation'));
|
||||
|
||||
// Summary cards should render key metric labels unique to the validation summary
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('Brier Score')).toBeInTheDocument();
|
||||
expect(screen.getByText('ECE')).toBeInTheDocument();
|
||||
expect(screen.getByText('Directional Accuracy')).toBeInTheDocument();
|
||||
expect(screen.getByText('Excess vs SPY')).toBeInTheDocument();
|
||||
});
|
||||
}, 10000);
|
||||
|
||||
it('renders calibration table with miscalibration warning', async () => {
|
||||
renderRoute('/ops/model');
|
||||
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
|
||||
|
||||
await userEvent.click(screen.getByText('Model Validation'));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('Calibration by Confidence Bucket')).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Miscalibrated buckets should show warning text
|
||||
const miscalWarnings = screen.getAllByText('Miscalibrated');
|
||||
expect(miscalWarnings.length).toBeGreaterThanOrEqual(1);
|
||||
}, 10000);
|
||||
|
||||
it('renders gate status pass/fail indicator', async () => {
|
||||
renderRoute('/ops/model');
|
||||
await waitFor(() => expect(screen.getByText('Model Performance')).toBeInTheDocument());
|
||||
|
||||
await userEvent.click(screen.getByText('Model Validation'));
|
||||
|
||||
// The gate-status endpoint returns passed: false
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/Live Trading Gate: FAIL/)).toBeInTheDocument();
|
||||
});
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe('Agents page', () => {
|
||||
it('renders agent list in sidebar', async () => {
|
||||
renderRoute('/agents');
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Frontend hook tests for trading reports.
|
||||
*
|
||||
* Tests useReports and useReport hooks with MSW mocks.
|
||||
* Requirements validated: 5.4, 5.5
|
||||
*/
|
||||
import { renderHook, waitFor } from '@testing-library/react';
|
||||
import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
|
||||
import { http, HttpResponse } from 'msw';
|
||||
import { type ReactNode, createElement } from 'react';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { useReports, useReport } from '../api/hooks';
|
||||
import { server } from './mocks/server';
|
||||
|
||||
const mockReportList = [
|
||||
{
|
||||
id: 'rpt-1',
|
||||
report_type: 'daily',
|
||||
period_start: '2025-01-15',
|
||||
period_end: '2025-01-15',
|
||||
validation_status: 'passed',
|
||||
generated_at: '2025-01-15T21:30:00Z',
|
||||
},
|
||||
{
|
||||
id: 'rpt-2',
|
||||
report_type: 'weekly',
|
||||
period_start: '2025-01-13',
|
||||
period_end: '2025-01-17',
|
||||
validation_status: 'warnings',
|
||||
generated_at: '2025-01-18T10:00:00Z',
|
||||
},
|
||||
];
|
||||
|
||||
const mockReportDetail = {
|
||||
id: 'rpt-1',
|
||||
report_type: 'daily',
|
||||
period_start: '2025-01-15',
|
||||
period_end: '2025-01-15',
|
||||
validation_status: 'passed',
|
||||
generated_at: '2025-01-15T21:30:00Z',
|
||||
created_at: '2025-01-15T21:30:05Z',
|
||||
report_data: {
|
||||
pnl: { realized_pnl: 125.5, unrealized_pnl: -30.2 },
|
||||
executive_summary: 'Test executive summary',
|
||||
},
|
||||
};
|
||||
|
||||
function createWrapper() {
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: { retry: false, gcTime: 0 },
|
||||
},
|
||||
});
|
||||
return function Wrapper({ children }: { children: ReactNode }) {
|
||||
return createElement(QueryClientProvider, { client: queryClient }, children);
|
||||
};
|
||||
}
|
||||
|
||||
describe('useReports', () => {
|
||||
it('fetches report list with default params', async () => {
|
||||
server.use(
|
||||
http.get('/api/reports', () => HttpResponse.json(mockReportList)),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useReports(), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.isSuccess).toBe(true));
|
||||
|
||||
expect(result.current.data).toHaveLength(2);
|
||||
expect(result.current.data![0].id).toBe('rpt-1');
|
||||
expect(result.current.data![0].report_type).toBe('daily');
|
||||
expect(result.current.data![1].report_type).toBe('weekly');
|
||||
});
|
||||
|
||||
it('passes query params for filtering', async () => {
|
||||
let capturedUrl = '';
|
||||
server.use(
|
||||
http.get('/api/reports', ({ request }) => {
|
||||
capturedUrl = request.url;
|
||||
return HttpResponse.json([mockReportList[0]]);
|
||||
}),
|
||||
);
|
||||
|
||||
const { result } = renderHook(
|
||||
() => useReports({ report_type: 'daily', limit: 10 }),
|
||||
{ wrapper: createWrapper() },
|
||||
);
|
||||
|
||||
await waitFor(() => expect(result.current.isSuccess).toBe(true));
|
||||
|
||||
expect(capturedUrl).toContain('report_type=daily');
|
||||
expect(capturedUrl).toContain('limit=10');
|
||||
expect(result.current.data).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('handles error state', async () => {
|
||||
server.use(
|
||||
http.get('/api/reports', () =>
|
||||
new HttpResponse(null, { status: 500 }),
|
||||
),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useReports(), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.isError).toBe(true));
|
||||
});
|
||||
});
|
||||
|
||||
describe('useReport', () => {
|
||||
it('fetches single report by id', async () => {
|
||||
server.use(
|
||||
http.get('/api/reports/rpt-1', () =>
|
||||
HttpResponse.json(mockReportDetail),
|
||||
),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useReport('rpt-1'), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.isSuccess).toBe(true));
|
||||
|
||||
expect(result.current.data!.id).toBe('rpt-1');
|
||||
expect(result.current.data!.report_data).toBeDefined();
|
||||
expect(result.current.data!.report_data.pnl).toBeDefined();
|
||||
expect(result.current.data!.created_at).toBe('2025-01-15T21:30:05Z');
|
||||
});
|
||||
|
||||
it('does not fetch when id is undefined', async () => {
|
||||
const { result } = renderHook(() => useReport(undefined), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
// Should stay in idle/loading state without fetching
|
||||
expect(result.current.isFetching).toBe(false);
|
||||
});
|
||||
|
||||
it('handles 404 error', async () => {
|
||||
server.use(
|
||||
http.get('/api/reports/nonexistent', () =>
|
||||
new HttpResponse(null, { status: 404 }),
|
||||
),
|
||||
);
|
||||
|
||||
const { result } = renderHook(() => useReport('nonexistent'), {
|
||||
wrapper: createWrapper(),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.isError).toBe(true));
|
||||
});
|
||||
});
|
||||
@@ -90,6 +90,25 @@ spec:
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: backfill-market-data
|
||||
image: {{ $root.Values.image.registry }}/{{ $svc.image }}:{{ $root.Values.image.tag }}
|
||||
imagePullPolicy: {{ $root.Values.image.pullPolicy }}
|
||||
command: ["sh", "-c", "python /app/scripts/backfill_market_data.py 2>/dev/null || echo 'Backfill script not available — skipping'"]
|
||||
securityContext:
|
||||
{{- include "stonks.containerSecurityContext" $root | nindent 12 }}
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: stonks-config
|
||||
{{- range $svc.secrets }}
|
||||
- secretRef:
|
||||
name: {{ . }}
|
||||
{{- end }}
|
||||
resources:
|
||||
requests: { cpu: 50m, memory: 64Mi }
|
||||
limits: { cpu: 200m, memory: 256Mi }
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ $svc.image }}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
## Live-Math stage overrides
|
||||
## Helm merges these with the base values.yaml.
|
||||
## Runs the dual-pipeline signal engine with its own paper trading account
|
||||
## for validating the math-upgraded signal pipeline in production conditions.
|
||||
|
||||
## Image tag — overridden by Kargo during promotion
|
||||
image:
|
||||
tag: latest
|
||||
|
||||
## Config overrides: paper broker (separate account), dedicated DB/Redis namespace
|
||||
config:
|
||||
BROKER_MODE: "paper"
|
||||
BROKER_PROVIDER: "alpaca"
|
||||
LOG_LEVEL: "INFO"
|
||||
TRADING_ENABLED: "true"
|
||||
POSTGRES_DB: "stonks_live_math"
|
||||
REDIS_DB: "3"
|
||||
DEPLOY_STAGE: "live-math"
|
||||
POSTGRES_USER: "stonks_live_math"
|
||||
OLLAMA_BASE_URL: "http://10.1.1.12:2701"
|
||||
MARKET_DATA_BASE_URL: "https://api.polygon.io"
|
||||
|
||||
## Secrets — dedicated paper trading account for live-math
|
||||
secrets:
|
||||
core:
|
||||
POSTGRES_PASSWORD: "St0nks0racl3!"
|
||||
MINIO_ACCESS_KEY: "AKIA6V7J3N9B5P0D2YQH"
|
||||
MINIO_SECRET_KEY: "8fG3!v2rJ7$wN@9mLpQ6zXbC4tKdPqW1"
|
||||
REDIS_PASSWORD: "PSCh4ng3me!"
|
||||
broker:
|
||||
BROKER_API_KEY: "PK64RS7NH24XPBI3IDEU3BB72Y"
|
||||
BROKER_API_SECRET: "Ho4D84392vB4s2TkGi52ra5FcxEskGfJSZYRKHa3qrYq"
|
||||
BROKER_BASE_URL: "https://paper-api.alpaca.markets"
|
||||
market:
|
||||
MARKET_DATA_API_KEY: "NPwKtrLvoBxcKt3Byp5PEvuZiBZU_d8E"
|
||||
|
||||
## Live-math-specific ingress hostnames
|
||||
ingress:
|
||||
hosts:
|
||||
queryApi: stonks-math-api.celestium.life
|
||||
symbolRegistry: stonks-math-registry.celestium.life
|
||||
dashboard: stonks-math.celestium.life
|
||||
superset: stonks-math-dash.celestium.life
|
||||
trino: stonks-math-trino.celestium.life
|
||||
tradingEngine: stonks-math-trading.celestium.life
|
||||
|
||||
## Scale: same as production (single replicas for most services)
|
||||
services:
|
||||
extractor:
|
||||
replicas: 1
|
||||
@@ -37,7 +37,7 @@ services:
|
||||
liveness: { path: /docs, port: 8000, initialDelay: 10, period: 30 }
|
||||
|
||||
ingestion:
|
||||
replicas: 2
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: ingestion
|
||||
command: "python -m services.ingestion.worker"
|
||||
@@ -127,6 +127,17 @@ services:
|
||||
requests: { cpu: 50m, memory: 64Mi }
|
||||
limits: { cpu: 200m, memory: 128Mi }
|
||||
|
||||
signalEngine:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
image: signal-engine
|
||||
command: "python -m services.signal_engine.main"
|
||||
tier: processing
|
||||
secrets: [stonks-core-secrets, stonks-market-secrets]
|
||||
resources:
|
||||
requests: { cpu: 100m, memory: 128Mi }
|
||||
limits: { cpu: 500m, memory: 256Mi }
|
||||
|
||||
lakePublisher:
|
||||
replicas: 1
|
||||
pipeline: true
|
||||
@@ -144,7 +155,7 @@ services:
|
||||
command: "uvicorn services.api.app:app --host 0.0.0.0 --port 8000"
|
||||
tier: api
|
||||
port: 8000
|
||||
secrets: [stonks-core-secrets]
|
||||
secrets: [stonks-core-secrets, stonks-market-secrets]
|
||||
resources:
|
||||
requests: { cpu: 100m, memory: 128Mi }
|
||||
limits: { cpu: 500m, memory: 256Mi }
|
||||
@@ -174,15 +185,15 @@ config:
|
||||
REDIS_DB: "0"
|
||||
MINIO_ENDPOINT: "minio.minio-service.svc.cluster.local:80"
|
||||
MINIO_SECURE: "false"
|
||||
OLLAMA_BASE_URL: ""
|
||||
OLLAMA_BASE_URL: "http://10.1.1.12:2701"
|
||||
OLLAMA_MODEL: "qwen3.5:9b-fast"
|
||||
OLLAMA_TIMEOUT: "240"
|
||||
OLLAMA_MAX_RETRIES: "2"
|
||||
OLLAMA_RETRY_BASE_DELAY: "1.0"
|
||||
OLLAMA_RETRY_MAX_DELAY: "10.0"
|
||||
OLLAMA_RETRY_BACKOFF_MULTIPLIER: "2.0"
|
||||
VLLM_BASE_URL: "http://192.168.42.254:8000"
|
||||
VLLM_MODEL: "AxionML/Qwen3.5-9B-NVFP4"
|
||||
VLLM_BASE_URL: "http://10.1.1.12:2701"
|
||||
VLLM_MODEL: "qwen3.5:9b-fast"
|
||||
VLLM_TIMEOUT: "120"
|
||||
VLLM_MAX_RETRIES: "2"
|
||||
VLLM_TEMPERATURE: "0.7"
|
||||
@@ -194,7 +205,7 @@ config:
|
||||
TRINO_ICEBERG_CATALOG: "iceberg"
|
||||
BROKER_MODE: "paper"
|
||||
BROKER_PROVIDER: ""
|
||||
MARKET_DATA_BASE_URL: ""
|
||||
MARKET_DATA_BASE_URL: "https://api.polygon.io"
|
||||
MARKET_DATA_PROVIDER: "polygon"
|
||||
RETENTION_RAW_MARKET_DAYS: "90"
|
||||
RETENTION_RAW_NEWS_DAYS: "180"
|
||||
@@ -221,20 +232,21 @@ config:
|
||||
TRADING_RISK_TIER: "moderate"
|
||||
TRADING_ABSOLUTE_POSITION_CAP: "10000.0"
|
||||
TRADING_MAX_OPEN_POSITIONS: "10"
|
||||
TZ: "America/Los_Angeles"
|
||||
|
||||
## Secrets
|
||||
secrets:
|
||||
core:
|
||||
POSTGRES_PASSWORD: ""
|
||||
MINIO_ACCESS_KEY: ""
|
||||
MINIO_SECRET_KEY: ""
|
||||
REDIS_PASSWORD: ""
|
||||
POSTGRES_PASSWORD: "St0nks0racl3!"
|
||||
MINIO_ACCESS_KEY: "AKIA6V7J3N9B5P0D2YQH"
|
||||
MINIO_SECRET_KEY: "8fG3!v2rJ7$wN@9mLpQ6zXbC4tKdPqW1"
|
||||
REDIS_PASSWORD: "PSCh4ng3me!"
|
||||
broker:
|
||||
BROKER_API_KEY: ""
|
||||
BROKER_API_SECRET: ""
|
||||
BROKER_BASE_URL: ""
|
||||
BROKER_API_KEY: "PKECQBNHD6ZLKEXZZVIFTOLX72"
|
||||
BROKER_API_SECRET: "5pV8zfUn92zAUL4TAwFor3Lk8RqNBcRzN12Y1HJjU7Gn"
|
||||
BROKER_BASE_URL: "https://paper-api.alpaca.markets"
|
||||
market:
|
||||
MARKET_DATA_API_KEY: ""
|
||||
MARKET_DATA_API_KEY: "NPwKtrLvoBxcKt3Byp5PEvuZiBZU_d8E"
|
||||
gmail:
|
||||
GMAIL_SENDER: "celes@celestium.life"
|
||||
GMAIL_RECIPIENT: "celes@celestium.life"
|
||||
|
||||
@@ -8,7 +8,7 @@ CREATE TABLE IF NOT EXISTS ai_agents (
|
||||
slug VARCHAR(100) NOT NULL UNIQUE,
|
||||
purpose TEXT NOT NULL DEFAULT '',
|
||||
model_provider VARCHAR(50) NOT NULL DEFAULT 'ollama',
|
||||
model_name VARCHAR(200) NOT NULL DEFAULT 'qwen3.5:9b',
|
||||
model_name VARCHAR(200) NOT NULL DEFAULT 'qwen3.5:9b-fast',
|
||||
system_prompt TEXT NOT NULL DEFAULT '',
|
||||
user_prompt_template TEXT NOT NULL DEFAULT '',
|
||||
prompt_version VARCHAR(100) NOT NULL DEFAULT '',
|
||||
|
||||
@@ -1,22 +1,23 @@
|
||||
-- Sync ai_agents system_prompt and model_name to match code defaults.
|
||||
-- The original 026 seed used abbreviated prompts and the base model name;
|
||||
-- this migration brings them in line with the authoritative prompts defined
|
||||
-- in the Python service code and the actual deployed model tag.
|
||||
-- Sync ai_agents system_prompt to match code defaults.
|
||||
-- The original 026 seed used abbreviated prompts; this migration brings
|
||||
-- them in line with the authoritative prompts defined in the Python
|
||||
-- service code.
|
||||
--
|
||||
-- NOTE: model_name and model_provider are NOT overwritten here.
|
||||
-- They are configured per-environment via the API or direct DB update
|
||||
-- and should not be reset by migrations.
|
||||
|
||||
UPDATE ai_agents
|
||||
SET system_prompt = E'You are a financial document analyst. Extract structured data as JSON. Return ONLY a single JSON object. No markdown fences, no explanation, no text before or after the JSON. Every field in the schema is required. Use "other" for catalyst_type if unsure. Keep evidence_spans short (under 20 words each). Keep key_facts to 3-5 items max.',
|
||||
model_name = 'qwen3.5:9b-fast',
|
||||
updated_at = NOW()
|
||||
WHERE slug = 'document-extractor';
|
||||
|
||||
UPDATE ai_agents
|
||||
SET system_prompt = E'You classify MACRO-LEVEL global news into structured event JSON. Return ONLY a single JSON object. No markdown, no explanation. Every field is required. Keep key_facts to 3-5 items. Keep summary under 3 sentences.\n\nCRITICAL: Only classify articles about MACRO events that affect entire markets, sectors, or economies. Examples: trade wars, interest rate changes, commodity supply disruptions, regulatory changes, geopolitical conflicts, natural disasters.\n\nDO NOT classify as macro events: individual company earnings, lawsuits against a single company, single-company management changes, individual stock analysis, company-specific debt or bankruptcy, product launches by one company. For these, set severity to "low", confidence below 0.3, and leave affected_regions, affected_sectors, and affected_commodities as empty arrays.',
|
||||
model_name = 'qwen3.5:9b-fast',
|
||||
updated_at = NOW()
|
||||
WHERE slug = 'event-classifier';
|
||||
|
||||
UPDATE ai_agents
|
||||
SET system_prompt = E'You are a concise financial analyst. You rewrite structured trade thesis summaries into clear, professional prose suitable for an internal research note.\n\nSTRICT RULES:\n1. Do NOT add any information that is not present in the input.\n2. Do NOT fabricate numbers, dates, company names, or analyst opinions.\n3. Keep the rewrite under 150 words.\n4. Preserve all factual claims, risk notes, and evidence counts from the input.\n5. Use a neutral, professional tone. Avoid hype or marketing language.\n6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary.',
|
||||
model_name = 'qwen3.5:9b-fast',
|
||||
updated_at = NOW()
|
||||
WHERE slug = 'thesis-rewriter';
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
-- Fix agent default model_provider and model_name to match production config.
|
||||
-- The original migration 026 seeded with 'ollama'/'qwen3.5:9b-fast' but production
|
||||
-- uses vLLM. This migration updates agents that still have the old defaults,
|
||||
-- preserving any user customizations (only updates if model_name matches the old default).
|
||||
|
||||
UPDATE ai_agents
|
||||
SET model_provider = 'vllm',
|
||||
model_name = 'AxionML/Qwen3.5-9B-NVFP4',
|
||||
max_tokens = 2048,
|
||||
updated_at = NOW()
|
||||
WHERE slug IN ('document-extractor', 'event-classifier', 'thesis-rewriter')
|
||||
AND source = 'system'
|
||||
AND model_name = 'qwen3.5:9b-fast';
|
||||
@@ -0,0 +1,12 @@
|
||||
-- Seed a default risk_configs row with all signal layers explicitly enabled.
|
||||
-- This ensures fresh deployments have macro and competitive layers active
|
||||
-- without requiring manual API calls or DB patches.
|
||||
-- Idempotent: skips if an active config already exists.
|
||||
|
||||
INSERT INTO risk_configs (name, trading_mode, config, active)
|
||||
SELECT 'default', 'paper',
|
||||
'{"macro_enabled": true, "competitive_enabled": true}'::jsonb,
|
||||
TRUE
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM risk_configs WHERE active = TRUE
|
||||
);
|
||||
@@ -0,0 +1,16 @@
|
||||
-- Stop hardcoding agent model_name in migrations.
|
||||
--
|
||||
-- Migration 029 previously forced model_name='qwen3.5:9b-fast' on every
|
||||
-- deploy, overwriting per-environment model configuration. That migration
|
||||
-- has been fixed to only sync system_prompt (not model_name).
|
||||
--
|
||||
-- This migration updates agents still on the old ollama provider/model
|
||||
-- to use vllm with the default VLLM model. Agents already configured
|
||||
-- with a different model (e.g. via the API) are left untouched.
|
||||
|
||||
UPDATE ai_agents
|
||||
SET model_provider = 'vllm',
|
||||
model_name = 'AxionML/Qwen3.5-9B-NVFP4',
|
||||
updated_at = NOW()
|
||||
WHERE model_name IN ('qwen3.5:9b-fast', 'qwen3.5:9b')
|
||||
AND source = 'system';
|
||||
@@ -0,0 +1,18 @@
|
||||
-- Source accuracy tracking table for historical prediction accuracy per source.
|
||||
--
|
||||
-- Stores per-source accuracy metrics (fraction of correct directional calls)
|
||||
-- used by the probabilistic scoring pipeline to weight source credibility.
|
||||
-- See Requirement 4.5: source accuracy metrics stored with source identifier,
|
||||
-- accuracy ratio, sample count, and last updated timestamp.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS source_accuracy (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
source_id VARCHAR(200) NOT NULL,
|
||||
accuracy_ratio FLOAT NOT NULL DEFAULT 0.5,
|
||||
sample_count INTEGER NOT NULL DEFAULT 0,
|
||||
last_updated TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE(source_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_source_accuracy_source ON source_accuracy(source_id);
|
||||
@@ -0,0 +1,176 @@
|
||||
-- Migration 035: Model Validation, Calibration, and Signal Quality
|
||||
-- Creates tables for prediction snapshots, outcomes, evidence links, and metric snapshots
|
||||
-- Plus views for prediction performance and source performance analysis
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: prediction_snapshots
|
||||
-- Immutable snapshot of a prediction at generation time
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS prediction_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
ticker VARCHAR(20) NOT NULL,
|
||||
"window" VARCHAR(20) NOT NULL,
|
||||
horizon VARCHAR(50) NOT NULL,
|
||||
direction VARCHAR(20) NOT NULL,
|
||||
action VARCHAR(20) NOT NULL,
|
||||
mode VARCHAR(50) NOT NULL,
|
||||
strength FLOAT NOT NULL,
|
||||
confidence FLOAT NOT NULL,
|
||||
contradiction FLOAT NOT NULL DEFAULT 0.0,
|
||||
p_bull FLOAT,
|
||||
p_bear FLOAT,
|
||||
score_company FLOAT NOT NULL DEFAULT 0.0,
|
||||
score_macro FLOAT NOT NULL DEFAULT 0.0,
|
||||
score_competitive FLOAT NOT NULL DEFAULT 0.0,
|
||||
evidence_count INTEGER NOT NULL DEFAULT 0,
|
||||
unique_source_count INTEGER NOT NULL DEFAULT 0,
|
||||
duplicate_evidence_count INTEGER NOT NULL DEFAULT 0,
|
||||
price_at_prediction FLOAT,
|
||||
spy_price_at_prediction FLOAT,
|
||||
sector_etf_price_at_prediction FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_ticker ON prediction_snapshots(ticker);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_generated ON prediction_snapshots(generated_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_snap_horizon ON prediction_snapshots(horizon);
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: prediction_outcomes
|
||||
-- Realized outcome for a prediction at a specific horizon
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS prediction_outcomes (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
prediction_id UUID NOT NULL REFERENCES prediction_snapshots(id),
|
||||
evaluated_at TIMESTAMPTZ NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
future_price FLOAT,
|
||||
future_return FLOAT,
|
||||
spy_future_price FLOAT,
|
||||
spy_return FLOAT,
|
||||
sector_etf_future_price FLOAT,
|
||||
sector_etf_return FLOAT,
|
||||
excess_return_vs_spy FLOAT,
|
||||
excess_return_vs_sector FLOAT,
|
||||
direction_correct BOOLEAN,
|
||||
profitable BOOLEAN,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_prediction ON prediction_outcomes(prediction_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_horizon ON prediction_outcomes(horizon);
|
||||
CREATE INDEX IF NOT EXISTS idx_pred_out_evaluated ON prediction_outcomes(evaluated_at);
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: signal_evidence_links
|
||||
-- Link between a prediction and a contributing evidence document
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS signal_evidence_links (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
prediction_id UUID NOT NULL REFERENCES prediction_snapshots(id),
|
||||
document_id VARCHAR(200),
|
||||
signal_id VARCHAR(200),
|
||||
ticker VARCHAR(20),
|
||||
source VARCHAR(200),
|
||||
source_type VARCHAR(50),
|
||||
catalyst_type VARCHAR(50),
|
||||
sentiment VARCHAR(20),
|
||||
impact FLOAT,
|
||||
extraction_confidence FLOAT,
|
||||
weight FLOAT,
|
||||
is_duplicate BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
canonical_evidence_key VARCHAR(64),
|
||||
contribution_score FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_prediction ON signal_evidence_links(prediction_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_document ON signal_evidence_links(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_sig_ev_ticker ON signal_evidence_links(ticker);
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: model_metric_snapshots
|
||||
-- Aggregate model quality metrics for a lookback/horizon combination
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS model_metric_snapshots (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
lookback_window VARCHAR(20) NOT NULL,
|
||||
horizon VARCHAR(20) NOT NULL,
|
||||
prediction_count INTEGER NOT NULL DEFAULT 0,
|
||||
win_rate FLOAT,
|
||||
directional_accuracy FLOAT,
|
||||
information_coefficient FLOAT,
|
||||
rank_information_coefficient FLOAT,
|
||||
avg_return FLOAT,
|
||||
avg_excess_return_vs_spy FLOAT,
|
||||
avg_excess_return_vs_sector FLOAT,
|
||||
calibration_error FLOAT,
|
||||
brier_score FLOAT,
|
||||
buy_win_rate FLOAT,
|
||||
sell_win_rate FLOAT,
|
||||
hold_win_rate FLOAT,
|
||||
metadata JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_generated ON model_metric_snapshots(generated_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_lookback ON model_metric_snapshots(lookback_window);
|
||||
CREATE INDEX IF NOT EXISTS idx_model_snap_horizon ON model_metric_snapshots(horizon);
|
||||
|
||||
-- ============================================================================
|
||||
-- View: v_prediction_performance
|
||||
-- Joins prediction snapshots with outcomes for flat analysis
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE VIEW v_prediction_performance AS
|
||||
SELECT
|
||||
ps.ticker,
|
||||
ps.direction,
|
||||
ps.action,
|
||||
ps.confidence,
|
||||
ps.strength,
|
||||
ps.contradiction,
|
||||
ps.p_bull,
|
||||
ps.score_company,
|
||||
ps.score_macro,
|
||||
ps.score_competitive,
|
||||
ps.evidence_count,
|
||||
ps.unique_source_count,
|
||||
ps.duplicate_evidence_count,
|
||||
ps.price_at_prediction,
|
||||
po.future_return,
|
||||
po.excess_return_vs_spy,
|
||||
po.excess_return_vs_sector,
|
||||
po.direction_correct,
|
||||
po.profitable,
|
||||
po.horizon,
|
||||
ps.generated_at,
|
||||
po.evaluated_at
|
||||
FROM prediction_snapshots ps
|
||||
JOIN prediction_outcomes po ON po.prediction_id = ps.id;
|
||||
|
||||
-- ============================================================================
|
||||
-- View: v_source_performance
|
||||
-- Joins evidence links with snapshots and outcomes for source attribution
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE VIEW v_source_performance AS
|
||||
SELECT
|
||||
sel.source,
|
||||
sel.source_type,
|
||||
sel.catalyst_type,
|
||||
sel.sentiment,
|
||||
sel.weight,
|
||||
sel.contribution_score,
|
||||
sel.is_duplicate,
|
||||
po.direction_correct,
|
||||
po.future_return,
|
||||
po.excess_return_vs_spy,
|
||||
po.horizon,
|
||||
ps.generated_at
|
||||
FROM signal_evidence_links sel
|
||||
JOIN prediction_snapshots ps ON ps.id = sel.prediction_id
|
||||
JOIN prediction_outcomes po ON po.prediction_id = sel.prediction_id;
|
||||
@@ -0,0 +1,52 @@
|
||||
-- Seed saved queries for model validation, calibration, and signal quality analysis
|
||||
-- Uses the prediction_snapshots, prediction_outcomes, signal_evidence_links,
|
||||
-- model_metric_snapshots tables and the v_prediction_performance / v_source_performance views.
|
||||
|
||||
INSERT INTO saved_queries (name, description, sql_text) VALUES
|
||||
|
||||
('Prediction Snapshot Overview', 'Recent prediction snapshots with confidence and evidence counts',
|
||||
'SELECT ticker, direction, action, mode, round(confidence::numeric, 3) AS confidence, round(strength::numeric, 3) AS strength, evidence_count, unique_source_count, duplicate_evidence_count, round(price_at_prediction::numeric, 2) AS price, generated_at FROM prediction_snapshots ORDER BY generated_at DESC LIMIT 50'),
|
||||
|
||||
('Predictions by Ticker', 'Prediction count and avg confidence per ticker',
|
||||
'SELECT ticker, count(*) AS predictions, round(avg(confidence)::numeric, 3) AS avg_confidence, round(avg(strength)::numeric, 3) AS avg_strength, count(*) FILTER (WHERE action = ''buy'') AS buys, count(*) FILTER (WHERE action = ''sell'') AS sells, count(*) FILTER (WHERE action = ''hold'') AS holds, count(*) FILTER (WHERE action = ''watch'') AS watches FROM prediction_snapshots GROUP BY ticker ORDER BY predictions DESC'),
|
||||
|
||||
('Prediction Confidence Distribution', 'Predictions grouped by confidence bucket',
|
||||
'SELECT CASE WHEN confidence >= 0.90 THEN ''[0.90, 1.00]'' WHEN confidence >= 0.80 THEN ''[0.80, 0.90)'' WHEN confidence >= 0.70 THEN ''[0.70, 0.80)'' WHEN confidence >= 0.60 THEN ''[0.60, 0.70)'' ELSE ''[0.50, 0.60)'' END AS bucket, count(*) AS count, round(avg(confidence)::numeric, 3) AS avg_conf, count(*) FILTER (WHERE action = ''buy'') AS buys, count(*) FILTER (WHERE action = ''sell'') AS sells FROM prediction_snapshots GROUP BY 1 ORDER BY bucket'),
|
||||
|
||||
('Evidence Deduplication Quality', 'Duplicate evidence rate per ticker — high rates suggest source overlap',
|
||||
'SELECT ticker, count(*) AS total_links, sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END) AS duplicates, round(sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END)::numeric / NULLIF(count(*), 0) * 100, 1) AS dupe_pct, count(DISTINCT source_type) AS source_types FROM signal_evidence_links GROUP BY ticker ORDER BY dupe_pct DESC'),
|
||||
|
||||
('Evidence Source Breakdown', 'Evidence links by source type with duplicate rates',
|
||||
'SELECT source_type, count(*) AS total, sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END) AS duplicates, round(sum(CASE WHEN is_duplicate THEN 1 ELSE 0 END)::numeric / NULLIF(count(*), 0) * 100, 1) AS dupe_pct, round(avg(weight)::numeric, 3) AS avg_weight, round(avg(contribution_score)::numeric, 4) AS avg_contribution FROM signal_evidence_links GROUP BY source_type ORDER BY total DESC'),
|
||||
|
||||
('Evidence by Catalyst Type', 'Evidence links grouped by catalyst type',
|
||||
'SELECT catalyst_type, count(*) AS total, round(avg(impact)::numeric, 3) AS avg_impact, round(avg(extraction_confidence)::numeric, 3) AS avg_extraction_conf, count(DISTINCT ticker) AS tickers FROM signal_evidence_links WHERE catalyst_type IS NOT NULL GROUP BY catalyst_type ORDER BY total DESC'),
|
||||
|
||||
('Prediction Performance', 'Prediction outcomes with returns and accuracy (uses v_prediction_performance view)',
|
||||
'SELECT ticker, direction, action, round(confidence::numeric, 3) AS confidence, round(future_return::numeric, 4) AS future_return, round(excess_return_vs_spy::numeric, 4) AS excess_vs_spy, direction_correct, profitable, horizon, generated_at FROM v_prediction_performance ORDER BY generated_at DESC LIMIT 50'),
|
||||
|
||||
('Win Rate by Ticker', 'Directional accuracy and profitability per ticker',
|
||||
'SELECT ticker, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(CASE WHEN profitable THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS profit_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return, round(avg(excess_return_vs_spy)::numeric, 4) AS avg_excess_spy FROM v_prediction_performance GROUP BY ticker HAVING count(*) >= 5 ORDER BY win_rate_pct DESC'),
|
||||
|
||||
('Win Rate by Horizon', 'Directional accuracy across prediction horizons',
|
||||
'SELECT horizon, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return, round(avg(excess_return_vs_spy)::numeric, 4) AS avg_excess_spy FROM v_prediction_performance GROUP BY horizon ORDER BY outcomes DESC'),
|
||||
|
||||
('Source Performance', 'Per-source win rate and returns (uses v_source_performance view)',
|
||||
'SELECT source, source_type, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return, round(avg(excess_return_vs_spy)::numeric, 4) AS avg_excess_spy, round(avg(CASE WHEN is_duplicate THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS dupe_pct FROM v_source_performance GROUP BY source, source_type HAVING count(*) >= 10 ORDER BY win_rate_pct DESC'),
|
||||
|
||||
('Catalyst Performance', 'Win rate by catalyst type',
|
||||
'SELECT catalyst_type, count(*) AS outcomes, round(avg(CASE WHEN direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct, round(avg(future_return)::numeric, 4) AS avg_return FROM v_source_performance WHERE catalyst_type IS NOT NULL GROUP BY catalyst_type HAVING count(*) >= 5 ORDER BY win_rate_pct DESC'),
|
||||
|
||||
('Model Quality Timeline', 'Model metric snapshots over time for the 30d/7d window',
|
||||
'SELECT generated_at, prediction_count, round(win_rate::numeric, 3) AS win_rate, round(information_coefficient::numeric, 4) AS ic, round(rank_information_coefficient::numeric, 4) AS rank_ic, round(calibration_error::numeric, 4) AS ece, round(brier_score::numeric, 4) AS brier, round(avg_excess_return_vs_spy::numeric, 4) AS excess_spy FROM model_metric_snapshots WHERE lookback_window = ''30d'' AND horizon = ''7d'' ORDER BY generated_at DESC LIMIT 30'),
|
||||
|
||||
('Quality Gate Status', 'Latest quality gate evaluation from risk_configs',
|
||||
'SELECT key, value, updated_at FROM risk_configs WHERE key = ''model_quality_gate'' ORDER BY updated_at DESC LIMIT 1'),
|
||||
|
||||
('High Duplicate Predictions', 'Predictions where duplicate evidence exceeds 50% — potential inflation risk',
|
||||
'SELECT ticker, direction, action, round(confidence::numeric, 3) AS confidence, evidence_count, duplicate_evidence_count, round(duplicate_evidence_count::numeric / NULLIF(evidence_count, 0) * 100, 1) AS dupe_pct, generated_at FROM prediction_snapshots WHERE evidence_count > 0 AND duplicate_evidence_count::float / NULLIF(evidence_count, 0) > 0.5 ORDER BY dupe_pct DESC LIMIT 30'),
|
||||
|
||||
('Prediction vs SPY', 'Average excess return vs SPY by action type',
|
||||
'SELECT ps.action, count(*) AS outcomes, round(avg(po.future_return)::numeric, 4) AS avg_return, round(avg(po.excess_return_vs_spy)::numeric, 4) AS avg_excess_spy, round(avg(CASE WHEN po.direction_correct THEN 1.0 ELSE 0.0 END)::numeric * 100, 1) AS win_rate_pct FROM prediction_snapshots ps JOIN prediction_outcomes po ON po.prediction_id = ps.id GROUP BY ps.action ORDER BY avg_excess_spy DESC')
|
||||
|
||||
ON CONFLICT (name) DO UPDATE SET sql_text = EXCLUDED.sql_text, description = EXCLUDED.description;
|
||||
@@ -0,0 +1,42 @@
|
||||
-- Integrity check saved queries for the SQL Explorer
|
||||
-- These validate data consistency across the model validation pipeline.
|
||||
|
||||
INSERT INTO saved_queries (name, description, sql_text) VALUES
|
||||
|
||||
('⚕ Duplicate Snapshots', 'Detect duplicate prediction snapshots (same ticker+timestamp)',
|
||||
'SELECT ticker, generated_at, count(*) AS duplicates FROM prediction_snapshots GROUP BY ticker, generated_at HAVING count(*) > 1 ORDER BY duplicates DESC'),
|
||||
|
||||
('⚕ Orphaned Evidence Links', 'Evidence links referencing non-existent snapshots',
|
||||
'SELECT sel.id, sel.prediction_id, sel.ticker, sel.source_type FROM signal_evidence_links sel WHERE NOT EXISTS (SELECT 1 FROM prediction_snapshots ps WHERE ps.id = sel.prediction_id) LIMIT 20'),
|
||||
|
||||
('⚕ Evidence Count Mismatches', 'Snapshots where stored evidence_count differs from actual link count',
|
||||
'SELECT ps.id, ps.ticker, ps.evidence_count AS stored, count(sel.id) AS actual, ps.evidence_count - count(sel.id) AS diff FROM prediction_snapshots ps LEFT JOIN signal_evidence_links sel ON sel.prediction_id = ps.id GROUP BY ps.id, ps.ticker, ps.evidence_count HAVING ps.evidence_count != count(sel.id) ORDER BY abs(ps.evidence_count - count(sel.id)) DESC LIMIT 20'),
|
||||
|
||||
('⚕ Contribution Score Integrity', 'Snapshots where contribution scores do not sum to 1.0 (±0.01)',
|
||||
'SELECT prediction_id, round(sum(contribution_score)::numeric, 6) AS score_sum, count(*) AS links FROM signal_evidence_links WHERE contribution_score IS NOT NULL GROUP BY prediction_id HAVING abs(sum(contribution_score) - 1.0) > 0.01 LIMIT 20'),
|
||||
|
||||
('⚕ Canonical Key Consistency', 'Documents producing different canonical keys across predictions (should be 0)',
|
||||
'SELECT document_id, count(DISTINCT canonical_evidence_key) AS key_variants, array_agg(DISTINCT canonical_evidence_key) AS keys FROM signal_evidence_links WHERE document_id IS NOT NULL AND canonical_evidence_key IS NOT NULL GROUP BY document_id HAVING count(DISTINCT canonical_evidence_key) > 1 LIMIT 20'),
|
||||
|
||||
('⚕ Out-of-Range Values', 'Snapshots with confidence or strength outside [0, 1]',
|
||||
'SELECT id, ticker, confidence, strength, generated_at FROM prediction_snapshots WHERE confidence < 0 OR confidence > 1 OR strength < 0 OR strength > 1 LIMIT 20'),
|
||||
|
||||
('⚕ Unmatched Snapshots', 'Prediction snapshots with no matching recommendation',
|
||||
'SELECT ps.id, ps.ticker, ps.action, ps.confidence, ps.generated_at FROM prediction_snapshots ps WHERE NOT EXISTS (SELECT 1 FROM recommendations r WHERE r.ticker = ps.ticker AND r.generated_at = ps.generated_at) LIMIT 20'),
|
||||
|
||||
('⚕ Zero Evidence Rate', 'Percentage of snapshots with no evidence links by action type',
|
||||
'SELECT ps.action, count(*) AS total, count(*) FILTER (WHERE NOT EXISTS (SELECT 1 FROM signal_evidence_links sel WHERE sel.prediction_id = ps.id)) AS zero_evidence, round(count(*) FILTER (WHERE NOT EXISTS (SELECT 1 FROM signal_evidence_links sel WHERE sel.prediction_id = ps.id))::numeric / NULLIF(count(*), 0) * 100, 1) AS zero_pct FROM prediction_snapshots ps GROUP BY ps.action ORDER BY zero_pct DESC'),
|
||||
|
||||
('⚕ Duplicate Evidence Mismatches', 'Snapshots where stored duplicate count differs from actual is_duplicate count',
|
||||
'SELECT ps.id, ps.ticker, ps.duplicate_evidence_count AS stored_dupes, count(sel.id) FILTER (WHERE sel.is_duplicate) AS actual_dupes FROM prediction_snapshots ps JOIN signal_evidence_links sel ON sel.prediction_id = ps.id GROUP BY ps.id, ps.ticker, ps.duplicate_evidence_count HAVING ps.duplicate_evidence_count != count(sel.id) FILTER (WHERE sel.is_duplicate) LIMIT 20'),
|
||||
|
||||
('⚕ Missing Price Data', 'Snapshots missing ticker or SPY price at prediction time',
|
||||
'SELECT ticker, count(*) AS total, count(*) FILTER (WHERE price_at_prediction IS NULL) AS null_price, count(*) FILTER (WHERE spy_price_at_prediction IS NULL) AS null_spy FROM prediction_snapshots GROUP BY ticker HAVING count(*) FILTER (WHERE price_at_prediction IS NULL) > 0 OR count(*) FILTER (WHERE spy_price_at_prediction IS NULL) > 0 ORDER BY null_price DESC'),
|
||||
|
||||
('⚕ Outcome Integrity', 'Prediction outcomes with impossible values (return outside [-1, 10] or NULL direction_correct)',
|
||||
'SELECT po.id, ps.ticker, po.horizon, po.future_return, po.direction_correct, po.profitable FROM prediction_outcomes po JOIN prediction_snapshots ps ON ps.id = po.prediction_id WHERE po.future_return < -1 OR po.future_return > 10 OR po.direction_correct IS NULL LIMIT 20'),
|
||||
|
||||
('⚕ Pipeline Health Summary', 'Overall validation pipeline health dashboard',
|
||||
'SELECT ''Snapshots'' AS metric, count(*)::text AS value FROM prediction_snapshots UNION ALL SELECT ''Evidence Links'', count(*)::text FROM signal_evidence_links UNION ALL SELECT ''Outcomes'', count(*)::text FROM prediction_outcomes UNION ALL SELECT ''Metric Snapshots'', count(*)::text FROM model_metric_snapshots UNION ALL SELECT ''Duplicate Evidence %'', round(avg(CASE WHEN is_duplicate THEN 100.0 ELSE 0.0 END)::numeric, 1)::text FROM signal_evidence_links UNION ALL SELECT ''Zero-Evidence Snapshots'', count(*)::text FROM prediction_snapshots ps WHERE NOT EXISTS (SELECT 1 FROM signal_evidence_links sel WHERE sel.prediction_id = ps.id) UNION ALL SELECT ''Avg Confidence'', round(avg(confidence)::numeric, 3)::text FROM prediction_snapshots UNION ALL SELECT ''Distinct Tickers'', count(DISTINCT ticker)::text FROM prediction_snapshots')
|
||||
|
||||
ON CONFLICT (name) DO UPDATE SET sql_text = EXCLUDED.sql_text, description = EXCLUDED.description;
|
||||
@@ -0,0 +1,50 @@
|
||||
-- Migration 038: Trading Reports
|
||||
-- Creates the trading_reports table for storing periodic performance reports
|
||||
-- and seeds the Report Summarizer AI agent for report section summarization.
|
||||
|
||||
-- ============================================================================
|
||||
-- Table: trading_reports
|
||||
-- Stores daily and weekly trading performance reports as structured JSONB
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS trading_reports (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
report_type VARCHAR(20) NOT NULL,
|
||||
period_start DATE NOT NULL,
|
||||
period_end DATE NOT NULL,
|
||||
report_data JSONB NOT NULL,
|
||||
validation_status VARCHAR(20) NOT NULL DEFAULT 'passed',
|
||||
generated_at TIMESTAMPTZ NOT NULL,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT uq_trading_reports_period UNIQUE (report_type, period_start, period_end),
|
||||
CONSTRAINT chk_report_type CHECK (report_type IN ('daily', 'weekly'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_type ON trading_reports(report_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_period ON trading_reports(period_start, period_end);
|
||||
CREATE INDEX IF NOT EXISTS idx_trading_reports_generated ON trading_reports(generated_at DESC);
|
||||
|
||||
-- ============================================================================
|
||||
-- Seed: Report Summarizer Agent
|
||||
-- Generates concise natural-language summaries of trading performance report
|
||||
-- sections. Uses chunked data within the 8k-token context window.
|
||||
-- Only inserted if the slug does not already exist (idempotent).
|
||||
-- ============================================================================
|
||||
INSERT INTO ai_agents (name, slug, purpose, model_provider, model_name, system_prompt, prompt_version, schema_version, temperature, max_tokens, timeout_seconds, max_retries, source)
|
||||
SELECT * FROM (VALUES
|
||||
(
|
||||
'Report Summarizer',
|
||||
'report-summarizer',
|
||||
'Generates concise natural-language summaries of trading performance report sections. Processes chunked data within the 8k-token context window.',
|
||||
'ollama',
|
||||
'qwen3.5:9b-fast',
|
||||
E'You are a concise financial performance analyst. You summarize trading performance data into clear, professional prose.\n\nSTRICT RULES:\n1. Do NOT fabricate any data not present in the input.\n2. Do NOT add opinions, predictions, or recommendations.\n3. Keep each summary under 200 words.\n4. Highlight notable trends, outliers, and changes from prior periods.\n5. Use precise numbers from the input data.\n6. Use a neutral, professional tone.\n7. Return ONLY the summary text. No JSON, no markdown, no commentary.',
|
||||
'report-summarizer-v1',
|
||||
'1.0.0',
|
||||
0.0,
|
||||
1024,
|
||||
60,
|
||||
2,
|
||||
'system'
|
||||
)
|
||||
) AS v(name, slug, purpose, model_provider, model_name, system_prompt, prompt_version, schema_version, temperature, max_tokens, timeout_seconds, max_retries, source)
|
||||
WHERE NOT EXISTS (SELECT 1 FROM ai_agents WHERE slug = 'report-summarizer');
|
||||
@@ -0,0 +1,51 @@
|
||||
-- Migration 039: Signal Engine Outputs
|
||||
-- Creates the signal_engine_outputs table for persisting dual-pipeline evaluations.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS signal_engine_outputs (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
ticker TEXT NOT NULL,
|
||||
evaluated_at TIMESTAMPTZ NOT NULL,
|
||||
price NUMERIC NOT NULL,
|
||||
|
||||
-- Heuristic pipeline
|
||||
heuristic_verdict TEXT NOT NULL,
|
||||
heuristic_confidence NUMERIC NOT NULL,
|
||||
heuristic_s_total NUMERIC NOT NULL,
|
||||
|
||||
-- Probabilistic pipeline
|
||||
probabilistic_verdict TEXT NOT NULL,
|
||||
probabilistic_p_up NUMERIC NOT NULL,
|
||||
probabilistic_entropy NUMERIC NOT NULL,
|
||||
probabilistic_ev_r NUMERIC NOT NULL,
|
||||
|
||||
-- Delta analysis
|
||||
delta_agreement BOOLEAN NOT NULL,
|
||||
delta_confidence_delta NUMERIC NOT NULL,
|
||||
delta_reasons JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Trade plan (null when no BUY verdict)
|
||||
trade_plan JSONB,
|
||||
|
||||
-- Full output for audit
|
||||
full_output JSONB NOT NULL,
|
||||
|
||||
-- Exit signals
|
||||
exit_signals JSONB NOT NULL DEFAULT '[]'::jsonb,
|
||||
|
||||
-- Metadata
|
||||
pipeline_mode TEXT NOT NULL DEFAULT 'dual_pipeline',
|
||||
shadow_mode BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for per-ticker time-range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_ticker_time
|
||||
ON signal_engine_outputs (ticker, evaluated_at);
|
||||
|
||||
-- Index for global time-range queries
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_evaluated
|
||||
ON signal_engine_outputs (evaluated_at);
|
||||
|
||||
-- Index for filtering by verdict
|
||||
CREATE INDEX IF NOT EXISTS idx_signal_engine_outputs_verdicts
|
||||
ON signal_engine_outputs (heuristic_verdict, probabilistic_verdict);
|
||||
@@ -85,10 +85,10 @@ elif [ "$HTTP_CODE" = "404" ]; then
|
||||
--data-urlencode "repo_root_path=/data/git/repositories" \
|
||||
--data-urlencode "lfs_root_path=/data/git/lfs" \
|
||||
--data-urlencode "run_user=git" \
|
||||
--data-urlencode "domain=gitea-service.git-server.svc.cluster.local" \
|
||||
--data-urlencode "domain=git.celestium.life" \
|
||||
--data-urlencode "ssh_port=22" \
|
||||
--data-urlencode "http_port=3000" \
|
||||
--data-urlencode "app_url=http://gitea-service.git-server.svc.cluster.local:3000/" \
|
||||
--data-urlencode "app_url=https://git.celestium.life/" \
|
||||
--data-urlencode "log_root_path=/data/gitea/log" \
|
||||
--data-urlencode "admin_name=${GITEA_ADMIN_USER}" \
|
||||
--data-urlencode "admin_passwd=${GITEA_ADMIN_PASSWORD}" \
|
||||
|
||||
@@ -9,6 +9,8 @@ spec:
|
||||
provider:
|
||||
job:
|
||||
spec:
|
||||
ttlSecondsAfterFinished: 3600
|
||||
backoffLimit: 0
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
|
||||
@@ -13,4 +13,7 @@ spec:
|
||||
storageClassName: ""
|
||||
nfs:
|
||||
server: 192.168.42.8
|
||||
path: /volume1/Kubernetes/pipelines/woodpecker
|
||||
path: /volume1/Kubernetes/woodpecker
|
||||
claimRef:
|
||||
namespace: woodpecker
|
||||
name: data-woodpecker-server-0
|
||||
|
||||
@@ -13,9 +13,9 @@ server:
|
||||
WOODPECKER_SERVER_ADDR: "0.0.0.0:8000"
|
||||
WOODPECKER_GRPC_ADDR: "0.0.0.0:9000"
|
||||
WOODPECKER_GITEA: "true"
|
||||
WOODPECKER_GITEA_URL: "http://gitea-service.git-server.svc.cluster.local:3000"
|
||||
WOODPECKER_GITEA_CLIENT: "8fb7fc0f-98f6-42b5-b066-6cc4d745de4f"
|
||||
WOODPECKER_GITEA_SECRET: "gto_izanujbxlcxzc23znan56m3uie6s4ta2lgvro2yhgmuwvw3vutkq"
|
||||
WOODPECKER_GITEA_URL: "https://git.celestium.life"
|
||||
WOODPECKER_GITEA_CLIENT: "5f40e5f2-0153-458e-be5a-2ed5fd1b9054"
|
||||
WOODPECKER_GITEA_SECRET: "gto_h3rindnfegcurodm2vvujm7gzr6t5ly4rs2eto2wg57epwoi2x6q"
|
||||
WOODPECKER_AGENT_SECRET: "01eede973f522dbea9c1f09afc020ed0934a6f946d5832be5fecacb0da04ce23"
|
||||
WOODPECKER_ADMIN: "admin"
|
||||
WOODPECKER_PLUGINS_PRIVILEGED: "woodpeckerci/plugin-docker-buildx"
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
"""Backfill 90 days of daily OHLCV bars from Polygon into market_snapshots.
|
||||
|
||||
Run as: python scripts/backfill_market_data.py
|
||||
Requires env vars: POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_HOST,
|
||||
POSTGRES_PORT, POSTGRES_DB, MARKET_DATA_API_KEY,
|
||||
MARKET_DATA_BASE_URL (optional, defaults to polygon).
|
||||
|
||||
Skips if market_snapshots already has >= 50 bars.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
|
||||
import asyncpg
|
||||
import httpx
|
||||
|
||||
|
||||
async def backfill() -> None:
|
||||
api_key = os.environ.get("MARKET_DATA_API_KEY", "")
|
||||
if not api_key:
|
||||
print("No MARKET_DATA_API_KEY set — skipping backfill.")
|
||||
return
|
||||
|
||||
base_url = os.environ.get("MARKET_DATA_BASE_URL", "https://api.polygon.io")
|
||||
dsn = (
|
||||
f"postgresql://{os.environ['POSTGRES_USER']}:{os.environ['POSTGRES_PASSWORD']}"
|
||||
f"@{os.environ['POSTGRES_HOST']}:{os.environ['POSTGRES_PORT']}"
|
||||
f"/{os.environ['POSTGRES_DB']}"
|
||||
)
|
||||
pool = await asyncpg.create_pool(dsn=dsn)
|
||||
|
||||
# Check if backfill is needed
|
||||
count = await pool.fetchval(
|
||||
"SELECT count(*) FROM market_snapshots WHERE snapshot_type = 'bar'"
|
||||
)
|
||||
if count >= 50:
|
||||
print(f"Market data has {count} bars — skipping backfill.")
|
||||
await pool.close()
|
||||
return
|
||||
|
||||
print(f"Only {count} market bars found — backfilling 90 days from Polygon...")
|
||||
|
||||
tickers = [
|
||||
r["ticker"]
|
||||
for r in await pool.fetch(
|
||||
"SELECT ticker FROM companies WHERE active = TRUE ORDER BY ticker"
|
||||
)
|
||||
]
|
||||
to_d = date.today().isoformat()
|
||||
from_d = (date.today() - timedelta(days=90)).isoformat()
|
||||
total = 0
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
for ticker in tickers:
|
||||
url = f"{base_url}/v2/aggs/ticker/{ticker}/range/1/day/{from_d}/{to_d}"
|
||||
try:
|
||||
resp = await client.get(
|
||||
url,
|
||||
params={
|
||||
"apiKey": api_key,
|
||||
"adjusted": "true",
|
||||
"sort": "asc",
|
||||
"limit": "500",
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
bars = resp.json().get("results", [])
|
||||
except Exception as e:
|
||||
print(f" {ticker}: fetch failed ({e})")
|
||||
continue
|
||||
|
||||
existing = {
|
||||
r["bar_ts"]
|
||||
for r in await pool.fetch(
|
||||
"SELECT DISTINCT (data->>'t')::bigint AS bar_ts "
|
||||
"FROM market_snapshots WHERE ticker = $1 AND snapshot_type = 'bar'",
|
||||
ticker,
|
||||
)
|
||||
if r["bar_ts"]
|
||||
}
|
||||
co = await pool.fetchrow(
|
||||
"SELECT id FROM companies WHERE ticker = $1", ticker
|
||||
)
|
||||
cid = co["id"] if co else None
|
||||
inserted = 0
|
||||
|
||||
for bar in bars:
|
||||
bar_ts = bar.get("t")
|
||||
if not bar_ts or bar_ts in existing:
|
||||
continue
|
||||
bj = json.dumps(bar)
|
||||
ch = hashlib.sha256(bj.encode()).hexdigest()
|
||||
ca = datetime.fromtimestamp(bar_ts / 1000, tz=timezone.utc)
|
||||
await pool.execute(
|
||||
"INSERT INTO market_snapshots "
|
||||
"(company_id, ticker, snapshot_type, data, source_provider, "
|
||||
"captured_at, content_hash) "
|
||||
"VALUES ($1, $2, 'bar', $3::jsonb, 'polygon_backfill', $4, $5)",
|
||||
cid,
|
||||
ticker,
|
||||
bj,
|
||||
ca,
|
||||
ch,
|
||||
)
|
||||
existing.add(bar_ts)
|
||||
inserted += 1
|
||||
|
||||
total += inserted
|
||||
if inserted:
|
||||
print(f" {ticker}: {inserted} bars")
|
||||
|
||||
await pool.close()
|
||||
print(f"Backfill complete: {total} bars across {len(tickers)} tickers")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(backfill())
|
||||
except Exception as e:
|
||||
print(f"Backfill failed: {e}", file=sys.stderr)
|
||||
sys.exit(0) # Don't block startup on backfill failure
|
||||
Executable
+360
@@ -0,0 +1,360 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# factory-reset.sh — Factory reset a Stonks Oracle stage
|
||||
#
|
||||
# Drops and recreates the database, flushes Redis keys, empties S3 buckets,
|
||||
# re-runs migrations, and re-seeds the symbol registry.
|
||||
#
|
||||
# Usage:
|
||||
# bash scripts/factory-reset.sh <stage> [--component <component>] [--yes] [--verbose]
|
||||
#
|
||||
# Stages:
|
||||
# production — stonks-oracle namespace, DB=stonks, Redis DB=0, buckets=stonks-*
|
||||
# paper — stonks-paper namespace, DB=stonks_paper, Redis DB=2, buckets=paper-stonks-*
|
||||
# beta — stonks-beta namespace, DB=stonks_beta, Redis DB=1, buckets=beta-stonks-*
|
||||
#
|
||||
# Components (optional, default: all):
|
||||
# all — Full factory reset (DB + S3 + Redis)
|
||||
# db — Database only (drop/recreate + migrations + seed)
|
||||
# s3 — S3 buckets only (empty all stage buckets)
|
||||
# redis — Redis only (flush stage keys)
|
||||
# computed — Computed data only (trends, recommendations, orders, positions)
|
||||
#
|
||||
# Examples:
|
||||
# bash scripts/factory-reset.sh beta # Full reset of beta
|
||||
# bash scripts/factory-reset.sh production --component db # DB-only reset of production
|
||||
# bash scripts/factory-reset.sh paper --component computed # Clear computed data in paper
|
||||
# bash scripts/factory-reset.sh beta --verbose # Full reset with per-object output
|
||||
#
|
||||
# Requirements:
|
||||
# - kubectl access to the cluster
|
||||
# - mc (MinIO client) configured with alias "stonks"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
STAGE="${1:-}"
|
||||
COMPONENT="all"
|
||||
AUTO_YES=false
|
||||
VERBOSE=false
|
||||
|
||||
if [[ -z "$STAGE" ]]; then
|
||||
echo "Usage: bash scripts/factory-reset.sh <stage> [--component <component>] [--yes] [--verbose]"
|
||||
echo "Stages: production, paper, beta"
|
||||
echo "Components: all, db, s3, redis, computed"
|
||||
echo "Flags: --yes Skip confirmation prompt"
|
||||
echo " --verbose Show detailed per-object output"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
shift
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--component) COMPONENT="$2"; shift 2 ;;
|
||||
--yes|-y) AUTO_YES=true; shift ;;
|
||||
--verbose|-v) VERBOSE=true; shift ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Map stage to namespace, DB name, Redis DB, and bucket prefix
|
||||
case "$STAGE" in
|
||||
production|prod)
|
||||
NAMESPACE="stonks-oracle"
|
||||
DB_NAME="stonks"
|
||||
REDIS_DB=0
|
||||
BUCKET_PREFIX="stonks-"
|
||||
REDIS_KEY_PREFIX="stonks:"
|
||||
DEPLOY_STAGE=""
|
||||
;;
|
||||
paper)
|
||||
NAMESPACE="stonks-paper"
|
||||
DB_NAME="stonks_paper"
|
||||
REDIS_DB=2
|
||||
BUCKET_PREFIX="paper-stonks-"
|
||||
REDIS_KEY_PREFIX="stonks:paper:"
|
||||
DEPLOY_STAGE="paper"
|
||||
;;
|
||||
beta)
|
||||
NAMESPACE="stonks-beta"
|
||||
DB_NAME="stonks_beta"
|
||||
REDIS_DB=1
|
||||
BUCKET_PREFIX="beta-stonks-"
|
||||
REDIS_KEY_PREFIX="stonks:beta:"
|
||||
DEPLOY_STAGE="beta"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown stage '$STAGE'. Use: production, paper, beta"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
PG_POD="postgresql-1"
|
||||
PG_NS="postgresql-service"
|
||||
PG_USER="postgres"
|
||||
REDIS_HOST="redis-master.redis-service.svc.cluster.local"
|
||||
REDIS_PORT="6379"
|
||||
REDIS_PASSWORD="PSCh4ng3me!"
|
||||
MC_ALIAS="stonks"
|
||||
|
||||
# S3 bucket suffixes
|
||||
BUCKET_SUFFIXES=(
|
||||
"audit"
|
||||
"lakehouse"
|
||||
"llm-prompts"
|
||||
"llm-results"
|
||||
"normalized"
|
||||
"raw-filings"
|
||||
"raw-market"
|
||||
"raw-news"
|
||||
)
|
||||
|
||||
echo "============================================"
|
||||
echo " Stonks Oracle Factory Reset"
|
||||
echo "============================================"
|
||||
echo " Stage: $STAGE"
|
||||
echo " Namespace: $NAMESPACE"
|
||||
echo " Database: $DB_NAME"
|
||||
echo " Redis DB: $REDIS_DB"
|
||||
echo " Buckets: ${BUCKET_PREFIX}*"
|
||||
echo " Component: $COMPONENT"
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo " Verbose: ON"
|
||||
fi
|
||||
echo "============================================"
|
||||
echo ""
|
||||
|
||||
# Safety confirmation
|
||||
if [[ "$AUTO_YES" == true ]]; then
|
||||
echo "⚠️ --yes flag set, skipping confirmation"
|
||||
else
|
||||
read -rp "⚠️ This will DESTROY data. Type '$STAGE' to confirm: " confirm
|
||||
if [[ "$confirm" != "$STAGE" ]]; then
|
||||
echo "Aborted."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper: scale down all deployments in the namespace
|
||||
# ---------------------------------------------------------------------------
|
||||
scale_down() {
|
||||
echo "--- Scaling down $NAMESPACE deployments ---"
|
||||
local deployments
|
||||
deployments=$(kubectl get deployments -n "$NAMESPACE" -o name 2>/dev/null || true)
|
||||
if [[ -n "$deployments" ]]; then
|
||||
local count
|
||||
count=$(echo "$deployments" | wc -l)
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo "$deployments" | xargs -I{} kubectl scale {} --replicas=0 -n "$NAMESPACE" 2>/dev/null || true
|
||||
else
|
||||
echo "$deployments" | xargs -I{} kubectl scale {} --replicas=0 -n "$NAMESPACE" &>/dev/null || true
|
||||
fi
|
||||
echo " Waiting for $count deployments to terminate..."
|
||||
kubectl wait --for=delete pod --all -n "$NAMESPACE" --timeout=60s 2>/dev/null || true
|
||||
fi
|
||||
echo " ✓ All deployments scaled to 0"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper: scale up all deployments in the namespace
|
||||
# ---------------------------------------------------------------------------
|
||||
scale_up() {
|
||||
echo "--- Scaling up $NAMESPACE deployments ---"
|
||||
# ArgoCD will auto-heal and restore replica counts, just trigger a sync
|
||||
if kubectl get application -n argocd 2>/dev/null | grep -q "$NAMESPACE"; then
|
||||
echo " ArgoCD will restore replicas via self-heal"
|
||||
else
|
||||
echo " Manually restoring replicas..."
|
||||
local deployments
|
||||
deployments=$(kubectl get deployments -n "$NAMESPACE" -o name 2>/dev/null || true)
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo "$deployments" | xargs -I{} kubectl scale {} --replicas=1 -n "$NAMESPACE" 2>/dev/null || true
|
||||
else
|
||||
echo "$deployments" | xargs -I{} kubectl scale {} --replicas=1 -n "$NAMESPACE" &>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
echo " ✓ Scale-up triggered"
|
||||
|
||||
# Wait for the scheduler pod (which runs migrations via init containers)
|
||||
# to be fully ready before other services start querying the DB.
|
||||
echo " Waiting for scheduler pod (runs migrations)..."
|
||||
kubectl rollout restart deployment/scheduler -n "$NAMESPACE" 2>/dev/null || true
|
||||
kubectl rollout status deployment/scheduler -n "$NAMESPACE" --timeout=120s 2>/dev/null || true
|
||||
echo " ✓ Scheduler ready (migrations applied)"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reset: Database
|
||||
# ---------------------------------------------------------------------------
|
||||
reset_db() {
|
||||
echo "--- Resetting database: $DB_NAME ---"
|
||||
|
||||
# Terminate active connections
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -c \
|
||||
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '$DB_NAME' AND pid <> pg_backend_pid();" \
|
||||
2>/dev/null || true
|
||||
else
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -tAc \
|
||||
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '$DB_NAME' AND pid <> pg_backend_pid();" \
|
||||
&>/dev/null || true
|
||||
fi
|
||||
|
||||
# Drop and recreate
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -c \
|
||||
"DROP DATABASE IF EXISTS $DB_NAME;"
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -c \
|
||||
"CREATE DATABASE $DB_NAME OWNER stonks;"
|
||||
echo " ✓ Database recreated"
|
||||
|
||||
# Run migrations
|
||||
local migrations
|
||||
migrations=($(ls infra/migrations/*.sql | sort))
|
||||
local count=${#migrations[@]}
|
||||
echo " Running $count migrations..."
|
||||
for migration in "${migrations[@]}"; do
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo " Applying $(basename "$migration")..."
|
||||
fi
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -i -- psql -U stonks -d "$DB_NAME" < "$migration" 2>/dev/null || true
|
||||
done
|
||||
echo " ✓ Migrations applied ($count files)"
|
||||
|
||||
# Seed symbol registry
|
||||
echo " Seeding symbol registry..."
|
||||
# Wait for at least one pod to be ready
|
||||
scale_up
|
||||
sleep 10
|
||||
local scheduler_pod
|
||||
scheduler_pod=$(kubectl get pods -n "$NAMESPACE" -l app=scheduler -o name 2>/dev/null | head -1)
|
||||
if [[ -n "$scheduler_pod" ]]; then
|
||||
kubectl exec -n "$NAMESPACE" "$scheduler_pod" -c scheduler -- \
|
||||
python -m services.symbol_registry.seed 2>/dev/null && echo " ✓ Seeded" || echo " ⚠ Seed failed (will retry on next restart)"
|
||||
else
|
||||
echo " ⚠ No scheduler pod available — seed will run on next deployment"
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reset: Computed data only (trends, recommendations, orders, positions)
|
||||
# ---------------------------------------------------------------------------
|
||||
reset_computed() {
|
||||
echo "--- Clearing computed data in $DB_NAME ---"
|
||||
kubectl exec -n "$PG_NS" "$PG_POD" -c postgres -- psql -U "$PG_USER" -d "$DB_NAME" -c "
|
||||
-- Order matters due to FK constraints
|
||||
DELETE FROM recommendation_evidence;
|
||||
DELETE FROM risk_evaluations;
|
||||
DELETE FROM order_events;
|
||||
DELETE FROM orders;
|
||||
DELETE FROM trading_decisions;
|
||||
DELETE FROM positions;
|
||||
DELETE FROM portfolio_snapshots;
|
||||
DELETE FROM reserve_pool_ledger;
|
||||
DELETE FROM risk_tier_history;
|
||||
DELETE FROM circuit_breaker_events;
|
||||
DELETE FROM notifications;
|
||||
DELETE FROM recommendations;
|
||||
DELETE FROM trend_evidence;
|
||||
DELETE FROM trend_projections;
|
||||
DELETE FROM trend_history;
|
||||
DELETE FROM trend_windows;
|
||||
DELETE FROM backtest_trades;
|
||||
DELETE FROM backtest_runs;
|
||||
DELETE FROM position_stop_levels;
|
||||
" 2>/dev/null
|
||||
echo " ✓ Computed data cleared"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reset: S3 buckets
|
||||
# ---------------------------------------------------------------------------
|
||||
reset_s3() {
|
||||
echo "--- Emptying S3 buckets: ${BUCKET_PREFIX}* ---"
|
||||
for suffix in "${BUCKET_SUFFIXES[@]}"; do
|
||||
local bucket="${BUCKET_PREFIX}${suffix}"
|
||||
if mc ls "${MC_ALIAS}/${bucket}" &>/dev/null; then
|
||||
echo -n " Emptying ${bucket}..."
|
||||
if [[ "$VERBOSE" == true ]]; then
|
||||
echo ""
|
||||
mc rm --recursive --force "${MC_ALIAS}/${bucket}/" 2>/dev/null || true
|
||||
else
|
||||
local removed
|
||||
removed=$(mc rm --recursive --force "${MC_ALIAS}/${bucket}/" 2>/dev/null | wc -l || echo "0")
|
||||
echo " ${removed} objects removed"
|
||||
fi
|
||||
echo " ✓ ${bucket} emptied"
|
||||
else
|
||||
echo " ⚠ ${bucket} not found (skipping)"
|
||||
fi
|
||||
done
|
||||
echo " ✓ All S3 buckets emptied"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reset: Redis
|
||||
# ---------------------------------------------------------------------------
|
||||
reset_redis() {
|
||||
echo "--- Flushing Redis DB $REDIS_DB ---"
|
||||
kubectl exec -n "$NAMESPACE" deployment/scheduler -c scheduler -- python -c "
|
||||
import redis
|
||||
r = redis.from_url('redis://:${REDIS_PASSWORD}@${REDIS_HOST}:${REDIS_PORT}/${REDIS_DB}')
|
||||
keys = r.keys('stonks:*')
|
||||
if keys:
|
||||
r.delete(*keys)
|
||||
print(f' Deleted {len(keys)} keys')
|
||||
else:
|
||||
print(' No keys to delete')
|
||||
" 2>/dev/null || {
|
||||
# Fallback: flush the entire Redis DB if no scheduler pod
|
||||
echo " Falling back to FLUSHDB..."
|
||||
kubectl exec -n redis-service redis-master-0 -- redis-cli -a "$REDIS_PASSWORD" -n "$REDIS_DB" FLUSHDB 2>/dev/null || true
|
||||
}
|
||||
echo " ✓ Redis flushed"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Execute based on component selection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
case "$COMPONENT" in
|
||||
all)
|
||||
scale_down
|
||||
reset_db
|
||||
reset_s3
|
||||
reset_redis
|
||||
scale_up
|
||||
;;
|
||||
db)
|
||||
scale_down
|
||||
reset_db
|
||||
scale_up
|
||||
;;
|
||||
s3)
|
||||
reset_s3
|
||||
;;
|
||||
redis)
|
||||
reset_redis
|
||||
;;
|
||||
computed)
|
||||
reset_computed
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown component '$COMPONENT'. Use: all, db, s3, redis, computed"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo ""
|
||||
echo "============================================"
|
||||
echo " Factory reset complete: $STAGE / $COMPONENT"
|
||||
echo "============================================"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " - ArgoCD will auto-restore pod replicas"
|
||||
echo " - Migrations and seed run automatically on scheduler init"
|
||||
echo " - Ingestion will begin on the next scheduler cycle (~15s)"
|
||||
echo " - First aggregation will run within ~15 minutes"
|
||||
@@ -76,7 +76,7 @@ from services.shared.metrics import (
|
||||
RISK_CHECK_FAILURES,
|
||||
RISK_EVALUATIONS_TOTAL,
|
||||
)
|
||||
from services.shared.redis_keys import QUEUE_BROKER, queue_key
|
||||
from services.shared.redis_keys import QUEUE_BROKER, is_pipeline_enabled, queue_key
|
||||
|
||||
logger = logging.getLogger("broker_service")
|
||||
|
||||
@@ -428,10 +428,16 @@ async def sync_positions(
|
||||
account_uuid: str,
|
||||
minio_client: Any | None = None,
|
||||
) -> None:
|
||||
"""Sync current positions from Alpaca to PostgreSQL and publish to lake."""
|
||||
"""Sync current positions from Alpaca to PostgreSQL and publish to lake.
|
||||
|
||||
Performs a full reconciliation: upserts positions that Alpaca reports,
|
||||
then removes any DB positions that Alpaca no longer holds (e.g. after
|
||||
a paper reset or full liquidation).
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
try:
|
||||
positions = await adapter.get_positions()
|
||||
broker_tickers = {pos.ticker for pos in positions}
|
||||
async with pool.acquire() as conn:
|
||||
for pos in positions:
|
||||
await conn.execute(
|
||||
@@ -444,7 +450,20 @@ async def sync_positions(
|
||||
pos.unrealized_pnl,
|
||||
now,
|
||||
)
|
||||
logger.info("Synced %d positions from Alpaca", len(positions))
|
||||
# Remove positions that the broker no longer reports (closed/liquidated)
|
||||
if broker_tickers:
|
||||
await conn.execute(
|
||||
"DELETE FROM positions WHERE broker_account_id = $1::uuid AND ticker != ALL($2::varchar[])",
|
||||
account_uuid,
|
||||
list(broker_tickers),
|
||||
)
|
||||
else:
|
||||
# Broker reports zero positions — clear all local positions for this account
|
||||
await conn.execute(
|
||||
"DELETE FROM positions WHERE broker_account_id = $1::uuid",
|
||||
account_uuid,
|
||||
)
|
||||
logger.info("Synced %d positions from Alpaca (reconciled)", len(positions))
|
||||
POSITIONS_SYNCED.inc()
|
||||
|
||||
# Publish positions snapshot to analytical lake
|
||||
@@ -923,6 +942,9 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(rds):
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
result = await rds.lpop(queue)
|
||||
raw = str(result) if result else None
|
||||
if raw:
|
||||
|
||||
@@ -135,11 +135,11 @@ class PolygonMarketAdapter(MarketDataAdapter):
|
||||
if config.get("limit"):
|
||||
params["limit"] = str(config["limit"])
|
||||
elif endpoint_key == "intraday_bars":
|
||||
# Intraday: fetch hourly bars for today
|
||||
# Intraday: fetch 15-minute bars for today
|
||||
from datetime import date as date_cls
|
||||
today = date_cls.today().isoformat()
|
||||
multiplier = str(config.get("multiplier", 1))
|
||||
timespan = config.get("timespan", "hour")
|
||||
multiplier = str(config.get("multiplier", 15))
|
||||
timespan = config.get("timespan", "minute")
|
||||
path = self.INTRADAY_BARS.format(
|
||||
ticker=ticker,
|
||||
multiplier=multiplier,
|
||||
@@ -149,7 +149,7 @@ class PolygonMarketAdapter(MarketDataAdapter):
|
||||
)
|
||||
params["adjusted"] = str(config.get("adjusted", True)).lower()
|
||||
params["sort"] = "asc"
|
||||
params["limit"] = str(config.get("limit", 50))
|
||||
params["limit"] = str(config.get("limit", 100))
|
||||
elif endpoint_key == "grouped_daily":
|
||||
# Grouped daily: returns bars for ALL tickers for a given date
|
||||
target_date = config.get("date", "")
|
||||
|
||||
@@ -0,0 +1,127 @@
|
||||
"""Bayesian accumulator for probabilistic sentiment aggregation.
|
||||
|
||||
Accumulates weighted signals into a Bayesian posterior using
|
||||
log-likelihood accumulation, Beta distribution parameters, and
|
||||
Shannon entropy for mixed-signal detection.
|
||||
|
||||
Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 9.1, 9.7
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
|
||||
from services.aggregation.scoring import WeightedSignal
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BayesianPosterior:
|
||||
"""Bayesian posterior state from signal accumulation."""
|
||||
|
||||
p_bull: float # σ(L_t), bullish probability [0, 1]
|
||||
alpha: float # Beta distribution α parameter (≥ 1.0)
|
||||
beta: float # Beta distribution β parameter (≥ 1.0)
|
||||
log_likelihood: float # Raw log-likelihood accumulation L_t
|
||||
bayesian_confidence: float # 1 - 4αβ/(α+β)², [0, 1]
|
||||
entropy: float # Shannon entropy H, [0, 1]
|
||||
signal_count: int # Number of signals processed
|
||||
|
||||
|
||||
# Uninformative prior (no evidence)
|
||||
PRIOR = BayesianPosterior(
|
||||
p_bull=0.5,
|
||||
alpha=1.0,
|
||||
beta=1.0,
|
||||
log_likelihood=0.0,
|
||||
bayesian_confidence=0.0,
|
||||
entropy=1.0,
|
||||
signal_count=0,
|
||||
)
|
||||
|
||||
|
||||
def compute_entropy(p_bull: float) -> float:
|
||||
"""Shannon entropy H = -p·log₂(p) - (1-p)·log₂(1-p).
|
||||
|
||||
Returns value in [0, 1]. Maximum at p=0.5, zero at p=0 or p=1.
|
||||
Handles edge cases p≤0 and p≥1 by returning 0.0.
|
||||
"""
|
||||
if p_bull <= 0.0 or p_bull >= 1.0:
|
||||
return 0.0
|
||||
q = 1.0 - p_bull
|
||||
return -(p_bull * math.log2(p_bull) + q * math.log2(q))
|
||||
|
||||
|
||||
def compute_bayesian_posterior(
|
||||
signals: list[WeightedSignal],
|
||||
) -> BayesianPosterior:
|
||||
"""Accumulate weighted signals into a Bayesian posterior.
|
||||
|
||||
Computes:
|
||||
- Log-likelihood: L_t = Σ(w_i · s_i)
|
||||
- Bullish probability: P_bull = σ(L_t)
|
||||
- Beta posterior: α = 1 + W_bull, β = 1 + W_bear
|
||||
- Bayesian confidence: C = 1 - 4αβ/(α+β)²
|
||||
- Shannon entropy: H = -p·log₂(p) - (1-p)·log₂(1-p)
|
||||
|
||||
Returns PRIOR for empty signal lists.
|
||||
Skips signals with NaN weight or sentiment.
|
||||
"""
|
||||
if not signals:
|
||||
return PRIOR
|
||||
|
||||
log_likelihood = 0.0
|
||||
w_bull = 0.0
|
||||
w_bear = 0.0
|
||||
count = 0
|
||||
|
||||
for sig in signals:
|
||||
combined = sig.weight.combined
|
||||
sentiment = sig.sentiment_value
|
||||
|
||||
# Skip signals with NaN weight or sentiment
|
||||
if math.isnan(combined) or math.isnan(sentiment):
|
||||
continue
|
||||
|
||||
log_likelihood += combined * sentiment
|
||||
|
||||
if sentiment > 0.0:
|
||||
w_bull += combined
|
||||
elif sentiment < 0.0:
|
||||
w_bear += combined
|
||||
|
||||
count += 1
|
||||
|
||||
if count == 0:
|
||||
return PRIOR
|
||||
|
||||
# P_bull via sigmoid: σ(L_t) = 1 / (1 + exp(-L_t))
|
||||
# Guard against overflow in exp for very large |L_t|
|
||||
if log_likelihood > 500.0:
|
||||
p_bull = 1.0
|
||||
elif log_likelihood < -500.0:
|
||||
p_bull = 0.0
|
||||
else:
|
||||
p_bull = 1.0 / (1.0 + math.exp(-log_likelihood))
|
||||
|
||||
# Beta posterior parameters
|
||||
alpha = 1.0 + w_bull
|
||||
beta_param = 1.0 + w_bear
|
||||
|
||||
# Bayesian confidence: C = 1 - 4αβ/(α+β)²
|
||||
ab_sum = alpha + beta_param
|
||||
bayesian_confidence = 1.0 - (4.0 * alpha * beta_param) / (ab_sum * ab_sum)
|
||||
# Clamp to [0, 1] to guard against floating-point rounding
|
||||
bayesian_confidence = max(0.0, min(1.0, bayesian_confidence))
|
||||
|
||||
# Shannon entropy
|
||||
entropy = compute_entropy(p_bull)
|
||||
|
||||
return BayesianPosterior(
|
||||
p_bull=p_bull,
|
||||
alpha=alpha,
|
||||
beta=beta_param,
|
||||
log_likelihood=log_likelihood,
|
||||
bayesian_confidence=bayesian_confidence,
|
||||
entropy=entropy,
|
||||
signal_count=count,
|
||||
)
|
||||
@@ -4,10 +4,11 @@ Analyses weighted signals to detect and represent disagreement explicitly,
|
||||
rather than collapsing contradictory evidence into a single unsupported
|
||||
conclusion.
|
||||
|
||||
Requirements: 6.4, 6.5
|
||||
Requirements: 6.4, 6.5, 15.1–15.7
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
|
||||
from services.aggregation.scoring import WeightedSignal
|
||||
@@ -35,6 +36,9 @@ class ContradictionResult:
|
||||
def detect_contradictions(
|
||||
signals: list[WeightedSignal],
|
||||
catalyst_entries: list[CatalystEntry] | None = None,
|
||||
*,
|
||||
probabilistic: bool = False,
|
||||
w_threshold: float = 5.0,
|
||||
) -> ContradictionResult:
|
||||
"""Run contradiction detection across multiple dimensions.
|
||||
|
||||
@@ -42,6 +46,16 @@ def detect_contradictions(
|
||||
1. Sentiment disagreement — the core positive-vs-negative split
|
||||
2. Catalyst disagreement — same catalyst type with opposing sentiment
|
||||
|
||||
When ``probabilistic`` is True, the overall score uses weighted
|
||||
disagreement entropy (Req 15.1–15.7) instead of the minority/majority
|
||||
ratio. When False, the existing ratio formula is preserved exactly.
|
||||
|
||||
Args:
|
||||
signals: Weighted signals to analyse.
|
||||
catalyst_entries: Optional catalyst metadata for per-catalyst analysis.
|
||||
probabilistic: Use entropy-based scoring when True.
|
||||
w_threshold: Evidence mass threshold for entropy weighting (default 5.0).
|
||||
|
||||
Returns a ContradictionResult with an overall score and per-dimension
|
||||
disagreement details.
|
||||
"""
|
||||
@@ -55,7 +69,10 @@ def detect_contradictions(
|
||||
catalyst_details = _detect_catalyst_disagreement(signals, catalyst_entries)
|
||||
details.extend(catalyst_details)
|
||||
|
||||
score = _compute_overall_score(signals)
|
||||
if probabilistic:
|
||||
score = _compute_entropy_score(signals, w_threshold)
|
||||
else:
|
||||
score = _compute_overall_score(signals)
|
||||
|
||||
return ContradictionResult(score=score, details=details)
|
||||
|
||||
@@ -82,6 +99,58 @@ def _compute_overall_score(signals: list[WeightedSignal]) -> float:
|
||||
return round(minority / total, 4)
|
||||
|
||||
|
||||
def _compute_entropy_score(
|
||||
signals: list[WeightedSignal],
|
||||
w_threshold: float = 5.0,
|
||||
) -> float:
|
||||
"""Weighted disagreement entropy — probabilistic contradiction score.
|
||||
|
||||
Computes Shannon entropy over the positive/negative weight distribution,
|
||||
weighted by evidence mass relative to a configurable threshold.
|
||||
|
||||
Formula:
|
||||
f_pos = W_pos / (W_pos + W_neg)
|
||||
f_neg = 1 - f_pos
|
||||
H = -f_pos·log₂(f_pos) - f_neg·log₂(f_neg) (in [0, 1])
|
||||
score = H · min(1.0, (W_pos + W_neg) / W_threshold)
|
||||
|
||||
Returns 0.0 when only one direction exists (no disagreement).
|
||||
|
||||
Requirements: 15.1–15.7
|
||||
"""
|
||||
if not signals:
|
||||
return 0.0
|
||||
|
||||
pos_weight = 0.0
|
||||
neg_weight = 0.0
|
||||
for sig in signals:
|
||||
w = sig.weight.combined * sig.impact_score
|
||||
if sig.sentiment_value > 0:
|
||||
pos_weight += w
|
||||
elif sig.sentiment_value < 0:
|
||||
neg_weight += w
|
||||
|
||||
# No disagreement when only one direction exists (Req 15.5)
|
||||
if pos_weight <= 0.0 or neg_weight <= 0.0:
|
||||
return 0.0
|
||||
|
||||
total = pos_weight + neg_weight
|
||||
|
||||
# Compute weight fractions (Req 15.2)
|
||||
f_pos = pos_weight / total
|
||||
f_neg = neg_weight / total # = 1 - f_pos
|
||||
|
||||
# Shannon entropy H = -f_pos·log₂(f_pos) - f_neg·log₂(f_neg) (Req 15.3)
|
||||
# Guard against log₂(0) — already handled by the early return above
|
||||
h_contradiction = -f_pos * math.log2(f_pos) - f_neg * math.log2(f_neg)
|
||||
|
||||
# Weight by evidence mass (Req 15.4)
|
||||
evidence_factor = min(1.0, total / w_threshold) if w_threshold > 0.0 else 1.0
|
||||
score = h_contradiction * evidence_factor
|
||||
|
||||
return round(score, 4)
|
||||
|
||||
|
||||
def _detect_sentiment_disagreement(
|
||||
signals: list[WeightedSignal],
|
||||
) -> DisagreementDetail | None:
|
||||
|
||||
@@ -283,27 +283,82 @@ def _determine_impact_direction(
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _compute_multiplicative_exposure(
|
||||
geo_overlap: float,
|
||||
supply_overlap: float,
|
||||
commodity_overlap: float,
|
||||
sector_match: float,
|
||||
) -> float:
|
||||
"""Compute multiplicative compounding exposure.
|
||||
|
||||
Formula: 1 - Π_k(1 - w_k · O_k)
|
||||
|
||||
Multi-dimensional exposure compounds — a company exposed across
|
||||
multiple dimensions receives higher impact than simple addition.
|
||||
|
||||
Returns a value in [0, ~0.724] (max when all overlaps are 1.0).
|
||||
|
||||
Requirements: 10.1, 10.4, 10.7
|
||||
"""
|
||||
product = (
|
||||
(1.0 - GEO_WEIGHT * geo_overlap)
|
||||
* (1.0 - SUPPLY_WEIGHT * supply_overlap)
|
||||
* (1.0 - COMMODITY_WEIGHT * commodity_overlap)
|
||||
* (1.0 - SECTOR_WEIGHT * sector_match)
|
||||
)
|
||||
return 1.0 - product
|
||||
|
||||
|
||||
def _compute_linear_exposure(
|
||||
geo_overlap: float,
|
||||
supply_overlap: float,
|
||||
commodity_overlap: float,
|
||||
sector_match: float,
|
||||
) -> float:
|
||||
"""Compute linear weighted-sum exposure (original heuristic formula).
|
||||
|
||||
Formula: w_geo·O_geo + w_supply·O_supply + w_commodity·O_commodity + w_sector·O_sector
|
||||
|
||||
Returns a value in [0, 1].
|
||||
"""
|
||||
return (
|
||||
GEO_WEIGHT * geo_overlap
|
||||
+ SUPPLY_WEIGHT * supply_overlap
|
||||
+ COMMODITY_WEIGHT * commodity_overlap
|
||||
+ SECTOR_WEIGHT * sector_match
|
||||
)
|
||||
|
||||
|
||||
def compute_macro_impact(
|
||||
event: GlobalEvent,
|
||||
profile: ExposureProfileSchema,
|
||||
*,
|
||||
probabilistic: bool = False,
|
||||
) -> MacroImpactRecord:
|
||||
"""Compute the macro impact of a global event on a company.
|
||||
|
||||
Scoring formula:
|
||||
When ``probabilistic=False`` (default), uses the linear weighted-sum:
|
||||
raw_score = severity_weight * (
|
||||
0.35 * geographic_overlap +
|
||||
0.25 * supply_chain_overlap +
|
||||
0.25 * commodity_overlap +
|
||||
0.15 * sector_match
|
||||
)
|
||||
final_score = apply_resilience_modifier(raw_score, tier, is_international)
|
||||
|
||||
When ``probabilistic=True``, uses multiplicative compounding exposure:
|
||||
raw_score = severity_weight * (1 - Π_k(1 - w_k · O_k))
|
||||
|
||||
In both modes, the resilience modifier is applied after the raw score.
|
||||
|
||||
Args:
|
||||
event: The classified global event.
|
||||
profile: The company's exposure profile.
|
||||
probabilistic: Use multiplicative formula when True.
|
||||
|
||||
Returns:
|
||||
A MacroImpactRecord with the computed score and metadata.
|
||||
|
||||
Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
@@ -360,13 +415,16 @@ def compute_macro_impact(
|
||||
# Severity weight
|
||||
severity_weight = SEVERITY_WEIGHTS.get(event.severity, 0.25)
|
||||
|
||||
# Raw score
|
||||
raw_score = severity_weight * (
|
||||
GEO_WEIGHT * geo_overlap
|
||||
+ SUPPLY_WEIGHT * supply_overlap
|
||||
+ COMMODITY_WEIGHT * commodity_overlap
|
||||
+ SECTOR_WEIGHT * sector_match
|
||||
)
|
||||
# Raw score: multiplicative or linear depending on mode
|
||||
if probabilistic:
|
||||
exposure = _compute_multiplicative_exposure(
|
||||
geo_overlap, supply_overlap, commodity_overlap, sector_match,
|
||||
)
|
||||
else:
|
||||
exposure = _compute_linear_exposure(
|
||||
geo_overlap, supply_overlap, commodity_overlap, sector_match,
|
||||
)
|
||||
raw_score = severity_weight * exposure
|
||||
|
||||
# Determine if event is international (affects multiple regions)
|
||||
is_international = len(event.affected_regions) > 1
|
||||
@@ -406,19 +464,27 @@ def compute_macro_impact_with_sector(
|
||||
event: GlobalEvent,
|
||||
profile: ExposureProfileSchema,
|
||||
company_sector: str = "",
|
||||
*,
|
||||
probabilistic: bool = False,
|
||||
) -> MacroImpactRecord:
|
||||
"""Compute macro impact with explicit sector matching.
|
||||
|
||||
Like compute_macro_impact but accepts a company_sector parameter
|
||||
for proper sector_match computation.
|
||||
|
||||
When ``probabilistic=True``, uses multiplicative compounding exposure.
|
||||
When ``probabilistic=False``, uses the original linear weighted sum.
|
||||
|
||||
Args:
|
||||
event: The classified global event.
|
||||
profile: The company's exposure profile.
|
||||
company_sector: The company's GICS sector name.
|
||||
probabilistic: Use multiplicative formula when True.
|
||||
|
||||
Returns:
|
||||
A MacroImpactRecord with the computed score and metadata.
|
||||
|
||||
Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
@@ -472,13 +538,16 @@ def compute_macro_impact_with_sector(
|
||||
# Severity weight
|
||||
severity_weight = SEVERITY_WEIGHTS.get(event.severity, 0.25)
|
||||
|
||||
# Raw score
|
||||
raw_score = severity_weight * (
|
||||
GEO_WEIGHT * geo_overlap
|
||||
+ SUPPLY_WEIGHT * supply_overlap
|
||||
+ COMMODITY_WEIGHT * commodity_overlap
|
||||
+ SECTOR_WEIGHT * sector_match
|
||||
)
|
||||
# Raw score: multiplicative or linear depending on mode
|
||||
if probabilistic:
|
||||
exposure = _compute_multiplicative_exposure(
|
||||
geo_overlap, supply_overlap, commodity_overlap, sector_match,
|
||||
)
|
||||
else:
|
||||
exposure = _compute_linear_exposure(
|
||||
geo_overlap, supply_overlap, commodity_overlap, sector_match,
|
||||
)
|
||||
raw_score = severity_weight * exposure
|
||||
|
||||
# International check
|
||||
is_international = len(event.affected_regions) > 1
|
||||
@@ -588,6 +657,154 @@ def _infer_commodities(sector: str, industry: str) -> list[str]:
|
||||
return sector_commodities.get(sector, [])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Conditional macro signal integration (Requirements: 11.1–11.5)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_conditional_macro_modifier(
|
||||
company_strength: float,
|
||||
company_direction: str,
|
||||
macro_impact: float,
|
||||
macro_direction: str,
|
||||
) -> float:
|
||||
"""Compute the multiplicative macro modifier for conditional integration.
|
||||
|
||||
When both company and macro signals exist, macro acts as a modifier:
|
||||
S_adjusted = S_company · clamp(1 + M_macro · sign_alignment, 0.5, 1.5)
|
||||
|
||||
sign_alignment is +1 when macro and company agree in direction,
|
||||
-1 when they disagree.
|
||||
|
||||
Args:
|
||||
company_strength: The company-level signal strength (absolute).
|
||||
company_direction: Company trend direction (bullish/bearish/neutral/mixed).
|
||||
macro_impact: Normalized macro impact score in [0, 1].
|
||||
macro_direction: Macro impact direction (positive/negative/mixed/neutral).
|
||||
|
||||
Returns:
|
||||
The multiplicative modifier in [0.5, 1.5].
|
||||
|
||||
Requirements: 11.1, 11.2
|
||||
"""
|
||||
# Determine sign alignment between company and macro directions
|
||||
_DIRECTION_SIGN = {
|
||||
"bullish": 1,
|
||||
"positive": 1,
|
||||
"bearish": -1,
|
||||
"negative": -1,
|
||||
}
|
||||
company_sign = _DIRECTION_SIGN.get(company_direction, 0)
|
||||
macro_sign = _DIRECTION_SIGN.get(macro_direction, 0)
|
||||
|
||||
if company_sign == 0 or macro_sign == 0:
|
||||
# Neutral or mixed directions — no alignment signal
|
||||
sign_alignment = 0.0
|
||||
elif company_sign == macro_sign:
|
||||
sign_alignment = 1.0
|
||||
else:
|
||||
sign_alignment = -1.0
|
||||
|
||||
raw_modifier = 1.0 + macro_impact * sign_alignment
|
||||
return max(0.5, min(1.5, raw_modifier))
|
||||
|
||||
|
||||
def integrate_macro_signals(
|
||||
company_signals: list,
|
||||
macro_signals: list,
|
||||
company_direction: str,
|
||||
macro_impacts: list,
|
||||
ticker: str = "",
|
||||
*,
|
||||
probabilistic: bool = False,
|
||||
macro_signal_weight: float = 0.3,
|
||||
) -> tuple[list, float]:
|
||||
"""Integrate macro signals with company signals.
|
||||
|
||||
When ``probabilistic=True``:
|
||||
- Both exist: apply macro as multiplicative modifier on company signals
|
||||
- Only macro: fall back to additive behavior with weight 0.3
|
||||
- Only company: use modifier = 1.0 (no change)
|
||||
|
||||
When ``probabilistic=False``:
|
||||
- Preserve current additive merge behavior (concatenate lists)
|
||||
|
||||
Args:
|
||||
company_signals: WeightedSignal list from company layer.
|
||||
macro_signals: WeightedSignal list from macro layer.
|
||||
company_direction: Derived company trend direction string.
|
||||
macro_impacts: List of MacroImpactRecord or similar with
|
||||
macro_impact_score and impact_direction attributes.
|
||||
ticker: Ticker symbol for logging.
|
||||
probabilistic: Use conditional modifier when True.
|
||||
macro_signal_weight: Weight for macro-only fallback (default 0.3).
|
||||
|
||||
Returns:
|
||||
Tuple of (merged_signals, macro_modifier_applied).
|
||||
macro_modifier_applied is 1.0 when no modifier was used.
|
||||
|
||||
Requirements: 11.1, 11.2, 11.3, 11.4, 11.5
|
||||
"""
|
||||
if not probabilistic:
|
||||
# Heuristic mode: simple additive merge (current behavior)
|
||||
merged = list(company_signals) + list(macro_signals)
|
||||
return merged, 1.0
|
||||
|
||||
has_company = len(company_signals) > 0
|
||||
has_macro = len(macro_signals) > 0
|
||||
|
||||
if has_company and has_macro:
|
||||
# Compute average macro impact and dominant direction
|
||||
avg_macro_impact = 0.0
|
||||
direction_counts: dict[str, float] = {}
|
||||
for mir in macro_impacts:
|
||||
score = getattr(mir, "macro_impact_score", 0.0)
|
||||
direction = getattr(mir, "impact_direction", "neutral")
|
||||
avg_macro_impact += score
|
||||
direction_counts[direction] = direction_counts.get(direction, 0.0) + score
|
||||
|
||||
if macro_impacts:
|
||||
avg_macro_impact /= len(macro_impacts)
|
||||
|
||||
# Dominant macro direction by total impact weight
|
||||
macro_direction = max(direction_counts, key=direction_counts.get) if direction_counts else "neutral"
|
||||
|
||||
modifier = compute_conditional_macro_modifier(
|
||||
company_strength=0.0, # not used in current formula
|
||||
company_direction=company_direction,
|
||||
macro_impact=avg_macro_impact,
|
||||
macro_direction=macro_direction,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Macro modifier for %s: %.4f (avg_impact=%.4f, macro_dir=%s, company_dir=%s)",
|
||||
ticker, modifier, avg_macro_impact, macro_direction, company_direction,
|
||||
)
|
||||
|
||||
# Apply modifier to company signals by scaling their impact scores
|
||||
# We create modified copies rather than mutating originals
|
||||
from copy import copy
|
||||
modified_signals = []
|
||||
for sig in company_signals:
|
||||
new_sig = copy(sig)
|
||||
new_sig.impact_score = sig.impact_score * modifier
|
||||
modified_signals.append(new_sig)
|
||||
|
||||
return modified_signals, modifier
|
||||
|
||||
if has_macro and not has_company:
|
||||
# Macro-only fallback: additive behavior with weight 0.3 (Req 11.3)
|
||||
logger.info(
|
||||
"Macro-only fallback for %s: using additive merge with weight %.2f",
|
||||
ticker, macro_signal_weight,
|
||||
)
|
||||
return list(macro_signals), 1.0
|
||||
|
||||
# Company-only: no modification (Req 11.4)
|
||||
logger.info("Company-only signals for %s: macro modifier=1.0", ticker)
|
||||
return list(company_signals), 1.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PostgreSQL persistence
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -23,6 +23,7 @@ from services.shared.logging import inject_trace_context, setup_logging
|
||||
from services.shared.redis_keys import (
|
||||
QUEUE_AGGREGATION,
|
||||
QUEUE_RECOMMENDATION,
|
||||
is_pipeline_enabled,
|
||||
queue_key,
|
||||
)
|
||||
|
||||
@@ -134,6 +135,10 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(redis_client):
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
raw = await redis_client.lpop(queue)
|
||||
if raw is None:
|
||||
await asyncio.sleep(1)
|
||||
|
||||
@@ -4,7 +4,7 @@ Computes TrendProjection objects by combining current trend momentum,
|
||||
macro signal decay trajectories, and upcoming catalyst outlook.
|
||||
Projections are persisted alongside trend_window records.
|
||||
|
||||
Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.9
|
||||
Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.9, 13.1, 13.2, 13.3, 13.4, 13.5, 13.6
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -126,6 +126,87 @@ def _direction_sign(direction: str) -> float:
|
||||
return 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Exponentially weighted momentum (Requirements: 13.1–13.6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_ew_momentum(
|
||||
strength_changes: list[float],
|
||||
lambda_decay: float = 0.7,
|
||||
) -> float:
|
||||
"""Compute exponentially weighted momentum from historical strength changes.
|
||||
|
||||
Formula: M_t = Σ_{k=0}^{K-1} λ^k · ΔS_{t-k}
|
||||
Normalized by geometric series sum Σ λ^k to produce value in [-1, 1].
|
||||
|
||||
When fewer than 2 historical cycles are available, returns 0.0
|
||||
(caller should fall back to heuristic).
|
||||
|
||||
Args:
|
||||
strength_changes: List of signed strength changes ΔS, most recent first.
|
||||
Each value represents the change in signed trend strength from one
|
||||
cycle to the next. Positive = strengthening bullish / weakening bearish.
|
||||
lambda_decay: Decay factor λ (default 0.7). Must be in (0, 1).
|
||||
|
||||
Returns:
|
||||
Normalized momentum in [-1, 1]. Returns 0.0 for empty or single-element lists.
|
||||
|
||||
Requirements: 13.1, 13.2, 13.3, 13.6
|
||||
"""
|
||||
if len(strength_changes) < 2:
|
||||
return 0.0
|
||||
|
||||
# Use up to K=10 most recent changes, filtering out NaN values
|
||||
k_max = min(len(strength_changes), 10)
|
||||
changes = strength_changes[:k_max]
|
||||
|
||||
weighted_sum = 0.0
|
||||
weight_sum = 0.0
|
||||
for k, delta_s in enumerate(changes):
|
||||
if math.isnan(delta_s):
|
||||
continue
|
||||
w = lambda_decay ** k
|
||||
weighted_sum += w * delta_s
|
||||
weight_sum += w
|
||||
|
||||
if weight_sum == 0.0:
|
||||
return 0.0
|
||||
|
||||
normalized = weighted_sum / weight_sum
|
||||
# Guard against NaN propagation
|
||||
if math.isnan(normalized) or math.isinf(normalized):
|
||||
return 0.0
|
||||
return max(-1.0, min(1.0, normalized))
|
||||
|
||||
|
||||
def compute_volatility_scaled_momentum(
|
||||
momentum: float,
|
||||
sigma_20: float,
|
||||
) -> float:
|
||||
"""Compute volatility-scaled momentum.
|
||||
|
||||
Formula: M_adj = M_t / max(σ_20, 0.01), clamped to [-2.0, 2.0].
|
||||
|
||||
Normalizes momentum relative to the ticker's typical price movement.
|
||||
|
||||
Args:
|
||||
momentum: Raw or EW momentum value.
|
||||
sigma_20: 20-day return standard deviation.
|
||||
|
||||
Returns:
|
||||
Volatility-scaled momentum in [-2.0, 2.0].
|
||||
|
||||
Requirements: 13.4, 13.5
|
||||
"""
|
||||
denominator = max(sigma_20, 0.01)
|
||||
scaled = momentum / denominator
|
||||
# Guard against NaN propagation
|
||||
if math.isnan(scaled) or math.isinf(scaled):
|
||||
return 0.0
|
||||
return max(-2.0, min(2.0, scaled))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Macro signal decay projection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,170 @@
|
||||
"""Regime detector for market regime classification.
|
||||
|
||||
Classifies the current market regime for each ticker based on
|
||||
EMA trend indicators and volatility ratios. Adjusts scoring
|
||||
thresholds and contradiction penalties per regime.
|
||||
|
||||
Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.9
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import statistics
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class MarketRegime(str, Enum):
|
||||
"""Market regime classification categories."""
|
||||
|
||||
TREND_FOLLOWING = "trend_following"
|
||||
PANIC = "panic"
|
||||
MEAN_REVERSION = "mean_reversion"
|
||||
UNCERTAINTY = "uncertainty"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RegimeClassification:
|
||||
"""Result of regime detection for a ticker."""
|
||||
|
||||
regime: MarketRegime
|
||||
trend_indicator: float # R = sign(EMA_20 - EMA_100)
|
||||
volatility_ratio: float # V_r = σ_20 / σ_100
|
||||
bullish_threshold: float # Adjusted ±threshold for direction
|
||||
bearish_threshold: float
|
||||
contradiction_penalty_multiplier: float # 0.4 default, 0.6 for uncertainty
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RegimeConfig:
|
||||
"""Configuration parameters for regime detection."""
|
||||
|
||||
ema_short_period: int = 20
|
||||
ema_long_period: int = 100
|
||||
vol_short_period: int = 20
|
||||
vol_long_period: int = 100
|
||||
panic_vol_ratio: float = 1.5
|
||||
trend_vol_ratio: float = 1.2
|
||||
mean_reversion_vol_ratio: float = 1.0
|
||||
default_threshold: float = 0.15
|
||||
panic_threshold: float = 0.10
|
||||
mean_reversion_threshold: float = 0.20
|
||||
uncertainty_contradiction_multiplier: float = 0.6
|
||||
|
||||
|
||||
# Default uncertainty classification used when data is insufficient
|
||||
_DEFAULT_UNCERTAINTY = RegimeClassification(
|
||||
regime=MarketRegime.UNCERTAINTY,
|
||||
trend_indicator=0.0,
|
||||
volatility_ratio=1.0,
|
||||
bullish_threshold=0.15,
|
||||
bearish_threshold=-0.15,
|
||||
contradiction_penalty_multiplier=0.6,
|
||||
)
|
||||
|
||||
|
||||
def compute_ema(values: list[float], period: int) -> float:
|
||||
"""Compute exponential moving average over the last ``period`` values.
|
||||
|
||||
Uses the standard EMA formula with multiplier = 2 / (period + 1).
|
||||
Iterates through the values, seeding the EMA with the first value.
|
||||
|
||||
Raises ``ValueError`` when *values* is empty or *period* < 1.
|
||||
"""
|
||||
if not values or period < 1:
|
||||
raise ValueError("values must be non-empty and period must be >= 1")
|
||||
|
||||
# Use only the last `period` values (or all if fewer)
|
||||
data = values[-period:] if len(values) >= period else values
|
||||
|
||||
multiplier = 2.0 / (period + 1)
|
||||
ema = data[0]
|
||||
for value in data[1:]:
|
||||
ema = (value - ema) * multiplier + ema
|
||||
return ema
|
||||
|
||||
|
||||
def _sign(x: float) -> float:
|
||||
"""Return -1.0, 0.0, or 1.0 for the sign of *x*."""
|
||||
if x > 0.0:
|
||||
return 1.0
|
||||
if x < 0.0:
|
||||
return -1.0
|
||||
return 0.0
|
||||
|
||||
|
||||
def classify_regime(
|
||||
closing_prices: list[float],
|
||||
returns: list[float],
|
||||
config: RegimeConfig = RegimeConfig(),
|
||||
) -> RegimeClassification:
|
||||
"""Classify market regime from price and return history.
|
||||
|
||||
Requires at least ``config.ema_long_period`` days of price history
|
||||
for EMA_100. Falls back to UNCERTAINTY when data is insufficient
|
||||
or standard deviations are zero.
|
||||
|
||||
Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.9
|
||||
"""
|
||||
# Insufficient price data → uncertainty
|
||||
if len(closing_prices) < config.ema_long_period:
|
||||
return _DEFAULT_UNCERTAINTY
|
||||
|
||||
# Insufficient return data → uncertainty
|
||||
if len(returns) < config.vol_long_period:
|
||||
return _DEFAULT_UNCERTAINTY
|
||||
|
||||
# --- Trend indicator: R = sign(EMA_short - EMA_long) ---
|
||||
ema_short = compute_ema(closing_prices, config.ema_short_period)
|
||||
ema_long = compute_ema(closing_prices, config.ema_long_period)
|
||||
trend_indicator = _sign(ema_short - ema_long)
|
||||
|
||||
# --- Volatility ratio: V_r = σ_short / σ_long ---
|
||||
short_returns = returns[-config.vol_short_period:]
|
||||
long_returns = returns[-config.vol_long_period:]
|
||||
|
||||
# Guard against zero or near-zero standard deviations
|
||||
if len(short_returns) < 2 or len(long_returns) < 2:
|
||||
return _DEFAULT_UNCERTAINTY
|
||||
|
||||
sigma_short = statistics.stdev(short_returns)
|
||||
sigma_long = statistics.stdev(long_returns)
|
||||
|
||||
if sigma_long == 0.0 or sigma_short == 0.0:
|
||||
return _DEFAULT_UNCERTAINTY
|
||||
|
||||
if math.isnan(sigma_short) or math.isnan(sigma_long):
|
||||
return _DEFAULT_UNCERTAINTY
|
||||
|
||||
volatility_ratio = sigma_short / sigma_long
|
||||
|
||||
# --- Classification rules (Req 7.3) ---
|
||||
# Panic takes priority: V_r > 1.5
|
||||
if volatility_ratio > config.panic_vol_ratio:
|
||||
regime = MarketRegime.PANIC
|
||||
threshold = config.panic_threshold # ±0.10
|
||||
contradiction_mult = 0.4
|
||||
# Trend-following: R ≠ 0 AND V_r < 1.2
|
||||
elif trend_indicator != 0.0 and volatility_ratio < config.trend_vol_ratio:
|
||||
regime = MarketRegime.TREND_FOLLOWING
|
||||
threshold = config.default_threshold # ±0.15
|
||||
contradiction_mult = 0.4
|
||||
# Mean-reversion: R = 0 AND V_r < 1.0
|
||||
elif trend_indicator == 0.0 and volatility_ratio < config.mean_reversion_vol_ratio:
|
||||
regime = MarketRegime.MEAN_REVERSION
|
||||
threshold = config.mean_reversion_threshold # ±0.20
|
||||
contradiction_mult = 0.4
|
||||
# Uncertainty: all other cases
|
||||
else:
|
||||
regime = MarketRegime.UNCERTAINTY
|
||||
threshold = config.default_threshold # ±0.15
|
||||
contradiction_mult = config.uncertainty_contradiction_multiplier # 0.6
|
||||
|
||||
return RegimeClassification(
|
||||
regime=regime,
|
||||
trend_indicator=trend_indicator,
|
||||
volatility_ratio=volatility_ratio,
|
||||
bullish_threshold=threshold,
|
||||
bearish_threshold=-threshold,
|
||||
contradiction_penalty_multiplier=contradiction_mult,
|
||||
)
|
||||
+322
-16
@@ -4,7 +4,7 @@ integration for aggregation.
|
||||
Provides scoring functions used by the aggregation engine to weight
|
||||
document intelligence signals when computing trend summaries.
|
||||
|
||||
Requirements: 6.1, 6.2, 6.5
|
||||
Requirements: 2.1–2.6, 3.1–3.5, 4.2–4.3, 5.1–5.7, 6.1–6.5, 16.4–16.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -14,6 +14,24 @@ from datetime import datetime, timezone
|
||||
|
||||
from services.shared.schemas import MarketContext
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Event type base rates for information gain computation (Req 3.1)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
EVENT_TYPE_BASE_RATES: dict[str, float] = {
|
||||
"earnings": 0.25,
|
||||
"product_launch": 0.10,
|
||||
"regulatory": 0.08,
|
||||
"legal": 0.05,
|
||||
"m_and_a": 0.03,
|
||||
"management_change": 0.06,
|
||||
"partnership": 0.12,
|
||||
"market_expansion": 0.09,
|
||||
"restructuring": 0.04,
|
||||
"dividend": 0.15,
|
||||
}
|
||||
DEFAULT_BASE_RATE = 0.1
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScoringConfig:
|
||||
@@ -62,6 +80,37 @@ class ScoringConfig:
|
||||
volume_surge_threshold_pct: float = 50.0
|
||||
volume_surge_boost: float = 0.15
|
||||
|
||||
# --- Probabilistic scoring parameters ---
|
||||
|
||||
# Toggle: when True, use probabilistic formulas (sigmoid gate,
|
||||
# adaptive decay, info gain, regime multiplier, source accuracy).
|
||||
# When False, preserve exact current heuristic behaviour.
|
||||
probabilistic: bool = False
|
||||
|
||||
# Sigmoid gate parameters — smooth replacement for binary confidence gate.
|
||||
# Gate value: σ(k·(x - midpoint)) where k = steepness.
|
||||
sigmoid_steepness: float = 5.0
|
||||
sigmoid_midpoint: float = 0.5
|
||||
|
||||
# Information gain parameters — surprise weighting for rare events.
|
||||
# r = 1 + λ·(-log₂ P(event_type)), clamped to info_gain_max.
|
||||
info_gain_lambda: float = 0.3
|
||||
info_gain_max: float = 3.0
|
||||
default_base_rate: float = 0.1
|
||||
|
||||
# Adaptive decay parameters — β scaling factors for event-specific
|
||||
# half-life adjustment: τ_i = τ_base · (1+β_impact)·(1+β_surprise)·(1+β_market).
|
||||
adaptive_decay_impact_scale: float = 1.0
|
||||
adaptive_decay_surprise_scale: float = 1.0
|
||||
adaptive_decay_market_scale: float = 0.5
|
||||
|
||||
# Regime multiplier parameters — replaces market context multiplier.
|
||||
# M_regime = 1 + regime_return_weight·|z_r| + regime_volume_weight·|z_v|,
|
||||
# clamped to [1.0, regime_multiplier_max].
|
||||
regime_return_weight: float = 0.15
|
||||
regime_volume_weight: float = 0.10
|
||||
regime_multiplier_max: float = 2.5
|
||||
|
||||
|
||||
# Singleton default config
|
||||
DEFAULT_CONFIG = ScoringConfig()
|
||||
@@ -77,6 +126,8 @@ def recency_weight(
|
||||
reference_time: datetime,
|
||||
window: str,
|
||||
config: ScoringConfig = DEFAULT_CONFIG,
|
||||
*,
|
||||
half_life_override: float | None = None,
|
||||
) -> float:
|
||||
"""Compute an exponential recency decay weight for a document.
|
||||
|
||||
@@ -87,6 +138,8 @@ def recency_weight(
|
||||
reference_time: The "now" anchor for the aggregation window (tz-aware).
|
||||
window: One of the TrendWindow values (e.g. "7d").
|
||||
config: Scoring parameters.
|
||||
half_life_override: If provided, use this half-life instead of the
|
||||
window-based default (used for adaptive decay).
|
||||
|
||||
Returns:
|
||||
A weight in [config.min_recency_weight, 1.0].
|
||||
@@ -102,7 +155,7 @@ def recency_weight(
|
||||
return 1.0
|
||||
|
||||
age_hours = age_seconds / 3600.0
|
||||
half_life = config.half_life_hours.get(window, 72.0)
|
||||
half_life = half_life_override if half_life_override is not None else config.half_life_hours.get(window, 72.0)
|
||||
|
||||
weight = math.pow(2.0, -age_hours / half_life)
|
||||
return max(weight, config.min_recency_weight)
|
||||
@@ -170,6 +223,188 @@ def market_context_multiplier(
|
||||
return 1.0 + boost
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sigmoid confidence gate (Req 2.1–2.6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def sigmoid_gate(
|
||||
x: float,
|
||||
steepness: float = 5.0,
|
||||
midpoint: float = 0.5,
|
||||
) -> float:
|
||||
"""Smooth sigmoid confidence gate: σ(k·(x - midpoint)).
|
||||
|
||||
Replaces the binary 0/1 confidence gate in probabilistic mode.
|
||||
Returns a value in (0, 1) — higher confidence produces higher gate.
|
||||
|
||||
Args:
|
||||
x: Extraction confidence value, typically in [0, 1].
|
||||
steepness: Steepness parameter k (default 5.0).
|
||||
midpoint: Midpoint of the sigmoid transition (default 0.5).
|
||||
|
||||
Returns:
|
||||
Gate value in (0, 1).
|
||||
"""
|
||||
z = steepness * (x - midpoint)
|
||||
# Guard against overflow in exp for very negative z
|
||||
if z < -500.0:
|
||||
return 0.0
|
||||
if z > 500.0:
|
||||
return 1.0
|
||||
return 1.0 / (1.0 + math.exp(-z))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Information gain surprise weighting (Req 3.1–3.5)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_info_gain(
|
||||
event_type: str | None,
|
||||
lambda_param: float = 0.3,
|
||||
max_gain: float = 3.0,
|
||||
default_base_rate: float = 0.1,
|
||||
) -> float:
|
||||
"""Compute information gain factor for an event type.
|
||||
|
||||
Formula: r = 1 + λ·(-log₂ P(event_type)), clamped to [1.0, max_gain].
|
||||
|
||||
Rarer events produce higher surprise weight. Unknown event types
|
||||
use the default base rate.
|
||||
|
||||
Args:
|
||||
event_type: Event type string (e.g. "earnings", "m_and_a").
|
||||
lambda_param: Scaling parameter λ (default 0.3).
|
||||
max_gain: Maximum clamp for the info gain factor (default 3.0).
|
||||
default_base_rate: Fallback base rate for unknown event types.
|
||||
|
||||
Returns:
|
||||
Information gain factor r in [1.0, max_gain].
|
||||
"""
|
||||
if event_type is None:
|
||||
return 1.0
|
||||
|
||||
base_rate = EVENT_TYPE_BASE_RATES.get(event_type, default_base_rate)
|
||||
# Guard against log₂(0) — base rates must be > 0
|
||||
if base_rate <= 0.0:
|
||||
base_rate = default_base_rate
|
||||
if base_rate <= 0.0:
|
||||
return 1.0
|
||||
|
||||
surprise = -math.log2(base_rate)
|
||||
r = 1.0 + lambda_param * surprise
|
||||
return min(max(r, 1.0), max_gain)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adaptive recency decay (Req 5.1–5.7)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_adaptive_half_life(
|
||||
base_half_life: float,
|
||||
impact_score: float,
|
||||
info_gain_factor: float,
|
||||
market_multiplier: float,
|
||||
config: ScoringConfig,
|
||||
) -> float:
|
||||
"""Compute adaptive half-life for event-specific recency decay.
|
||||
|
||||
Formula: τ_i = τ_base · (1 + β_impact) · (1 + β_surprise) · (1 + β_market)
|
||||
|
||||
The adaptive half-life is always >= base_half_life (decay is never faster).
|
||||
|
||||
Args:
|
||||
base_half_life: Fixed half-life for the window (hours).
|
||||
impact_score: Signal impact score in [0, 1].
|
||||
info_gain_factor: Information gain factor r in [1.0, 3.0].
|
||||
market_multiplier: Market context/regime multiplier in [1.0, ~2.5].
|
||||
config: Scoring config with adaptive decay scale parameters.
|
||||
|
||||
Returns:
|
||||
Adaptive half-life in hours, >= base_half_life.
|
||||
"""
|
||||
# β_impact: impact_score scaled linearly 0→0, 1→adaptive_decay_impact_scale
|
||||
beta_impact = impact_score * config.adaptive_decay_impact_scale
|
||||
|
||||
# β_surprise: info_gain_factor scaled linearly r=1→0, r=3→adaptive_decay_surprise_scale
|
||||
beta_surprise = ((info_gain_factor - 1.0) / 2.0) * config.adaptive_decay_surprise_scale
|
||||
|
||||
# β_market: market_multiplier scaled linearly 1.0→0, 1.45→adaptive_decay_market_scale
|
||||
if market_multiplier > 1.0:
|
||||
beta_market = ((market_multiplier - 1.0) / 0.45) * config.adaptive_decay_market_scale
|
||||
else:
|
||||
beta_market = 0.0
|
||||
|
||||
tau = base_half_life * (1.0 + beta_impact) * (1.0 + beta_surprise) * (1.0 + beta_market)
|
||||
# Ensure adaptive half-life is never less than base (Property 5)
|
||||
return max(tau, base_half_life)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regime multiplier (Req 6.1–6.5)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_regime_multiplier(
|
||||
returns: list[float] | None,
|
||||
volumes: list[float] | None,
|
||||
config: ScoringConfig = DEFAULT_CONFIG,
|
||||
) -> float:
|
||||
"""Compute regime-aware multiplier from return and volume z-scores.
|
||||
|
||||
Formula: M_regime = 1 + 0.15·|z_r| + 0.10·|z_v|, clamped to [1.0, max].
|
||||
|
||||
Args:
|
||||
returns: List of recent daily returns (at least 20 values for z-score).
|
||||
volumes: List of recent daily volumes (at least 20 values for z-score).
|
||||
config: Scoring config with regime multiplier parameters.
|
||||
|
||||
Returns:
|
||||
Regime multiplier in [1.0, config.regime_multiplier_max].
|
||||
"""
|
||||
if not returns or len(returns) < 2:
|
||||
return 1.0
|
||||
|
||||
# Filter out NaN values from returns
|
||||
clean_returns = [r for r in returns if not math.isnan(r)]
|
||||
if len(clean_returns) < 2:
|
||||
return 1.0
|
||||
|
||||
# Return z-score: z_r = (r_t - μ_20) / σ_20
|
||||
r_window = clean_returns[-20:] if len(clean_returns) >= 20 else clean_returns
|
||||
r_t = clean_returns[-1]
|
||||
mu_r = sum(r_window) / len(r_window)
|
||||
var_r = sum((x - mu_r) ** 2 for x in r_window) / len(r_window)
|
||||
sigma_r = math.sqrt(var_r)
|
||||
|
||||
z_r = 0.0
|
||||
if sigma_r > 0.0:
|
||||
z_r = (r_t - mu_r) / sigma_r
|
||||
|
||||
# Volume z-score: z_v = (log(V_t) - μ_V) / σ_V
|
||||
z_v = 0.0
|
||||
if volumes and len(volumes) >= 2:
|
||||
clean_volumes = [v for v in volumes if not math.isnan(v)]
|
||||
if len(clean_volumes) >= 2:
|
||||
v_window = clean_volumes[-20:] if len(clean_volumes) >= 20 else clean_volumes
|
||||
# Use log-volumes, guard against zero/negative volumes
|
||||
log_vols = [math.log(max(v, 1.0)) for v in v_window]
|
||||
log_v_t = math.log(max(clean_volumes[-1], 1.0))
|
||||
mu_v = sum(log_vols) / len(log_vols)
|
||||
var_v = sum((x - mu_v) ** 2 for x in log_vols) / len(log_vols)
|
||||
sigma_v = math.sqrt(var_v)
|
||||
if sigma_v > 0.0:
|
||||
z_v = (log_v_t - mu_v) / sigma_v
|
||||
|
||||
m_regime = 1.0 + config.regime_return_weight * abs(z_r) + config.regime_volume_weight * abs(z_v)
|
||||
# Guard against NaN propagation from upstream data
|
||||
if math.isnan(m_regime) or math.isinf(m_regime):
|
||||
return 1.0
|
||||
return max(1.0, min(m_regime, config.regime_multiplier_max))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Combined document signal weight
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -186,6 +421,12 @@ class SignalWeight:
|
||||
market_ctx_multiplier: float # >= 1.0
|
||||
combined: float
|
||||
|
||||
# New optional fields for probabilistic mode
|
||||
sigmoid_gate: float | None = None # Smooth gate value [0, 1]
|
||||
info_gain_factor: float = 1.0 # Surprise multiplier
|
||||
source_accuracy_factor: float = 1.0 # Historical accuracy multiplier
|
||||
regime_multiplier: float | None = None # M_regime replacing M_context
|
||||
|
||||
|
||||
def compute_signal_weight(
|
||||
published_at: datetime,
|
||||
@@ -196,18 +437,23 @@ def compute_signal_weight(
|
||||
extraction_confidence: float = 0.5,
|
||||
market_ctx: MarketContext | None = None,
|
||||
config: ScoringConfig = DEFAULT_CONFIG,
|
||||
*,
|
||||
event_type: str | None = None,
|
||||
impact_score: float = 0.5,
|
||||
source_accuracy_factor: float = 1.0,
|
||||
returns: list[float] | None = None,
|
||||
volumes: list[float] | None = None,
|
||||
) -> SignalWeight:
|
||||
"""Compute the combined aggregation weight for a single document signal.
|
||||
|
||||
The formula is:
|
||||
When ``config.probabilistic`` is False (default), the formula is:
|
||||
combined = confidence_gate * recency * credibility
|
||||
* (1 + novelty_bonus) * market_ctx_multiplier
|
||||
|
||||
where novelty_bonus = novelty_score * config.novelty_bonus_max
|
||||
and market_ctx_multiplier >= 1.0 based on volatility/volume features.
|
||||
|
||||
Documents with extraction_confidence below config.confidence_floor
|
||||
receive a combined weight of 0.0 (gated out).
|
||||
When ``config.probabilistic`` is True, the formula is:
|
||||
combined = sigmoid_gate * recency(adaptive) * credibility
|
||||
* (1 + novelty_bonus) * info_gain * source_accuracy
|
||||
* regime_multiplier
|
||||
|
||||
Args:
|
||||
published_at: Document publication time.
|
||||
@@ -218,27 +464,82 @@ def compute_signal_weight(
|
||||
extraction_confidence: Extraction confidence from the model (0-1).
|
||||
market_ctx: Optional market context features for the symbol.
|
||||
config: Scoring parameters.
|
||||
event_type: Optional event type for information gain computation.
|
||||
impact_score: Signal impact score in [0, 1] (default 0.5).
|
||||
source_accuracy_factor: Historical source accuracy factor (default 1.0).
|
||||
returns: Optional list of recent daily returns for regime multiplier.
|
||||
volumes: Optional list of recent daily volumes for regime multiplier.
|
||||
|
||||
Returns:
|
||||
A ``SignalWeight`` with the component breakdown and combined score.
|
||||
"""
|
||||
# Confidence gate
|
||||
gate = 1.0 if extraction_confidence >= config.confidence_floor else 0.0
|
||||
|
||||
rec = recency_weight(published_at, reference_time, window, config)
|
||||
cred = credibility_weight(source_credibility, config)
|
||||
bonus = novelty_score * config.novelty_bonus_max
|
||||
mkt_mult = market_context_multiplier(market_ctx, config)
|
||||
|
||||
combined = gate * rec * cred * (1.0 + bonus) * mkt_mult
|
||||
if not config.probabilistic:
|
||||
# --- Heuristic mode: preserve exact current formula ---
|
||||
gate = 1.0 if extraction_confidence >= config.confidence_floor else 0.0
|
||||
rec = recency_weight(published_at, reference_time, window, config)
|
||||
mkt_mult = market_context_multiplier(market_ctx, config)
|
||||
|
||||
combined = gate * rec * cred * (1.0 + bonus) * mkt_mult
|
||||
|
||||
return SignalWeight(
|
||||
recency=rec,
|
||||
credibility=cred,
|
||||
novelty_bonus=bonus,
|
||||
confidence_gate=gate,
|
||||
market_ctx_multiplier=mkt_mult,
|
||||
combined=combined,
|
||||
)
|
||||
|
||||
# --- Probabilistic mode ---
|
||||
|
||||
# 1. Sigmoid confidence gate (Req 2.1–2.5)
|
||||
sg = sigmoid_gate(extraction_confidence, config.sigmoid_steepness, config.sigmoid_midpoint)
|
||||
|
||||
# 2. Information gain factor (Req 3.1–3.5)
|
||||
ig = compute_info_gain(
|
||||
event_type,
|
||||
lambda_param=config.info_gain_lambda,
|
||||
max_gain=config.info_gain_max,
|
||||
default_base_rate=config.default_base_rate,
|
||||
)
|
||||
|
||||
# 3. Regime multiplier (Req 6.1–6.5) — replaces market_context_multiplier
|
||||
rm = compute_regime_multiplier(returns, volumes, config)
|
||||
|
||||
# 4. Adaptive recency decay (Req 5.1–5.7)
|
||||
base_half_life = config.half_life_hours.get(window, 72.0)
|
||||
adaptive_hl = compute_adaptive_half_life(
|
||||
base_half_life=base_half_life,
|
||||
impact_score=impact_score,
|
||||
info_gain_factor=ig,
|
||||
market_multiplier=rm,
|
||||
config=config,
|
||||
)
|
||||
rec = recency_weight(
|
||||
published_at, reference_time, window, config,
|
||||
half_life_override=adaptive_hl,
|
||||
)
|
||||
|
||||
# 5. Source accuracy factor (Req 4.2–4.3)
|
||||
saf = source_accuracy_factor
|
||||
|
||||
# 6. Combined weight
|
||||
combined = sg * rec * cred * (1.0 + bonus) * ig * saf * rm
|
||||
|
||||
return SignalWeight(
|
||||
recency=rec,
|
||||
credibility=cred,
|
||||
novelty_bonus=bonus,
|
||||
confidence_gate=gate,
|
||||
market_ctx_multiplier=mkt_mult,
|
||||
confidence_gate=sg, # sigmoid gate value in probabilistic mode
|
||||
market_ctx_multiplier=rm, # regime multiplier stored here for compat
|
||||
combined=combined,
|
||||
sigmoid_gate=sg,
|
||||
info_gain_factor=ig,
|
||||
source_accuracy_factor=saf,
|
||||
regime_multiplier=rm,
|
||||
)
|
||||
|
||||
|
||||
@@ -256,6 +557,11 @@ class WeightedSignal:
|
||||
sentiment_value: float # numeric sentiment: +1 positive, -1 negative, 0 neutral/mixed
|
||||
impact_score: float
|
||||
|
||||
# New optional fields for probabilistic mode
|
||||
info_gain_factor: float = 1.0 # r = 1 + λ·(-log₂ P(event_type))
|
||||
source_accuracy_factor: float = 1.0 # [0.5, 1.5] from historical accuracy
|
||||
adaptive_half_life: float | None = None # τ_i when adaptive decay is active
|
||||
|
||||
|
||||
def sentiment_to_numeric(sentiment: str) -> float:
|
||||
"""Map a sentiment label to a signed numeric value."""
|
||||
|
||||
@@ -8,11 +8,12 @@ competitive_signal_records.
|
||||
Also converts pattern and competitive signals into WeightedSignal
|
||||
objects for the aggregation engine.
|
||||
|
||||
Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 9.1
|
||||
Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 9.1, 12.1, 12.2, 12.3, 12.4, 12.5, 12.6, 12.7
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
@@ -76,6 +77,38 @@ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Graph-distance attenuation (Requirements: 12.1–12.7)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_graph_distance_attenuation(
|
||||
source_strength: float,
|
||||
correlation: float,
|
||||
distance: int,
|
||||
) -> float:
|
||||
"""Compute attenuated transfer strength using graph distance.
|
||||
|
||||
Formula: S_transfer = S_source · ρ_historical · e^(-d_network)
|
||||
|
||||
Args:
|
||||
source_strength: Source signal strength S_source in [0, 1].
|
||||
correlation: Historical price correlation ρ_historical in [0, 1].
|
||||
distance: Graph distance d_network (shortest path, capped at 3).
|
||||
|
||||
Returns:
|
||||
Transfer strength, always non-negative. Returns 0.0 when
|
||||
distance exceeds 3.
|
||||
|
||||
Requirements: 12.1, 12.7
|
||||
"""
|
||||
if distance < 1:
|
||||
return 0.0
|
||||
if distance > 3:
|
||||
return 0.0
|
||||
return source_strength * correlation * math.exp(-distance)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# propagate_signals
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -87,10 +120,20 @@ async def propagate_signals(
|
||||
impact_score: float,
|
||||
document_id: str,
|
||||
config: Optional[CompetitiveConfig] = None,
|
||||
*,
|
||||
probabilistic: bool = False,
|
||||
) -> list[CompetitiveSignalRecord]:
|
||||
"""Look up competitors, query cross-company patterns, produce weighted
|
||||
competitive signals, and persist them.
|
||||
|
||||
When ``probabilistic=True``, uses graph-distance attenuation:
|
||||
S_transfer = S_source · ρ_historical · e^(-d_network)
|
||||
with 90-day rolling Pearson correlation for ρ_historical and shortest
|
||||
path in the competitor relationship graph for d_network (capped at 3).
|
||||
|
||||
When ``probabilistic=False``, preserves the existing flat transfer
|
||||
behavior.
|
||||
|
||||
Args:
|
||||
pool: asyncpg connection pool.
|
||||
ticker: Source company ticker that received the catalyst.
|
||||
@@ -98,9 +141,12 @@ async def propagate_signals(
|
||||
impact_score: The source document's impact score.
|
||||
document_id: The source document ID.
|
||||
config: Optional competitive config overrides.
|
||||
probabilistic: Use graph-distance attenuation when True.
|
||||
|
||||
Returns:
|
||||
List of CompetitiveSignalRecord objects produced and persisted.
|
||||
|
||||
Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.6, 12.7
|
||||
"""
|
||||
cfg = config or CompetitiveConfig()
|
||||
now = datetime.now(timezone.utc)
|
||||
@@ -127,7 +173,7 @@ async def propagate_signals(
|
||||
# Determine the competitor ticker (the other side of the relationship)
|
||||
competitor_ticker = ticker_b if ticker_a == ticker else ticker_a
|
||||
|
||||
# Threshold gating (Req 4.5)
|
||||
# Threshold gating (Req 4.5 / Req 12.6)
|
||||
if rel_strength < cfg.propagation_strength_threshold:
|
||||
logger.info(
|
||||
"Skipping propagation %s→%s: relationship strength %.3f "
|
||||
@@ -161,14 +207,39 @@ async def propagate_signals(
|
||||
)
|
||||
continue
|
||||
|
||||
# Compute signal strength (Req 4.3)
|
||||
raw_strength = (
|
||||
pattern.avg_strength
|
||||
* rel_strength
|
||||
* pattern.pattern_confidence
|
||||
* impact_score
|
||||
)
|
||||
signal_strength = min(max(raw_strength, 0.0), 1.0)
|
||||
if probabilistic:
|
||||
# Graph-distance attenuation (Req 12.1–12.7)
|
||||
# For direct competitors, graph distance = 1
|
||||
graph_distance = 1
|
||||
|
||||
# Use relationship strength as a proxy for historical
|
||||
# correlation when full correlation data is unavailable.
|
||||
# Default correlation: 0.3 same-sector, 0.1 cross-sector.
|
||||
# Here we use rel_strength as a reasonable proxy since
|
||||
# the full 90-day Pearson correlation requires market data
|
||||
# that is fetched asynchronously in the integration layer.
|
||||
correlation = max(rel_strength, 0.1)
|
||||
|
||||
source_strength = (
|
||||
pattern.avg_strength
|
||||
* pattern.pattern_confidence
|
||||
* impact_score
|
||||
)
|
||||
raw_strength = compute_graph_distance_attenuation(
|
||||
source_strength=min(max(source_strength, 0.0), 1.0),
|
||||
correlation=correlation,
|
||||
distance=graph_distance,
|
||||
)
|
||||
signal_strength = min(max(raw_strength, 0.0), 1.0)
|
||||
else:
|
||||
# Flat transfer (existing behavior, Req 4.3)
|
||||
raw_strength = (
|
||||
pattern.avg_strength
|
||||
* rel_strength
|
||||
* pattern.pattern_confidence
|
||||
* impact_score
|
||||
)
|
||||
signal_strength = min(max(raw_strength, 0.0), 1.0)
|
||||
|
||||
# Determine direction
|
||||
direction = (
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
"""Source accuracy tracker for historical prediction accuracy per source.
|
||||
|
||||
Tracks per-source accuracy metrics (fraction of correct directional calls)
|
||||
used by the probabilistic scoring pipeline to weight source credibility.
|
||||
Accuracy data is stored in the ``source_accuracy`` database table and
|
||||
fetched in batch at the start of each aggregation cycle.
|
||||
|
||||
Requirements: 4.1, 4.2, 4.3, 4.4, 4.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceAccuracy:
|
||||
"""Per-source historical prediction accuracy.
|
||||
|
||||
Attributes:
|
||||
source_id: Unique identifier for the signal source.
|
||||
accuracy_ratio: Fraction of correct directional calls, in [0, 1].
|
||||
sample_count: Number of signals with known outcomes.
|
||||
last_updated: Timestamp of the most recent accuracy update.
|
||||
"""
|
||||
|
||||
source_id: str
|
||||
accuracy_ratio: float
|
||||
sample_count: int
|
||||
last_updated: datetime
|
||||
|
||||
@property
|
||||
def accuracy_factor(self) -> float:
|
||||
"""Multiplicative factor for credibility weight.
|
||||
|
||||
Returns 1.0 (neutral) when sample_count < 10.
|
||||
Otherwise scales linearly from 0.5 (0% accuracy) to 1.5
|
||||
(100% accuracy). Corrupted accuracy_ratio values outside
|
||||
[0, 1] are clamped before computing the factor.
|
||||
"""
|
||||
if self.sample_count < 10:
|
||||
return 1.0
|
||||
clamped = max(0.0, min(1.0, self.accuracy_ratio))
|
||||
return 0.5 + clamped
|
||||
|
||||
|
||||
async def fetch_source_accuracy(
|
||||
pool: asyncpg.Pool,
|
||||
source_ids: list[str],
|
||||
) -> dict[str, SourceAccuracy]:
|
||||
"""Fetch accuracy metrics for a batch of sources.
|
||||
|
||||
Queries the ``source_accuracy`` table for all requested *source_ids*
|
||||
in a single round-trip. Returns a mapping from source_id to its
|
||||
:class:`SourceAccuracy` record.
|
||||
|
||||
When the database is unreachable or the query fails, returns an empty
|
||||
dict so that callers fall back to the neutral accuracy factor of 1.0.
|
||||
"""
|
||||
if not source_ids:
|
||||
return {}
|
||||
|
||||
try:
|
||||
rows = await pool.fetch(
|
||||
"""
|
||||
SELECT source_id, accuracy_ratio, sample_count, last_updated
|
||||
FROM source_accuracy
|
||||
WHERE source_id = ANY($1::varchar[])
|
||||
""",
|
||||
source_ids,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to fetch source accuracy; defaulting to neutral factor",
|
||||
exc_info=True,
|
||||
)
|
||||
return {}
|
||||
|
||||
result: dict[str, SourceAccuracy] = {}
|
||||
for row in rows:
|
||||
sid = row["source_id"]
|
||||
ratio = row["accuracy_ratio"]
|
||||
# Clamp corrupted accuracy_ratio to [0.0, 1.0]
|
||||
ratio = max(0.0, min(1.0, float(ratio)))
|
||||
result[sid] = SourceAccuracy(
|
||||
source_id=sid,
|
||||
accuracy_ratio=ratio,
|
||||
sample_count=int(row["sample_count"]),
|
||||
last_updated=row["last_updated"],
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
async def update_source_accuracy(
|
||||
pool: asyncpg.Pool,
|
||||
source_id: str,
|
||||
realized_outcomes: list[tuple[str, float]],
|
||||
) -> None:
|
||||
"""Update accuracy metrics for a source from realized price outcomes.
|
||||
|
||||
Each element of *realized_outcomes* is a ``(predicted_direction,
|
||||
actual_7d_return)`` pair. A prediction is considered correct when:
|
||||
|
||||
* ``predicted_direction`` is ``"bullish"`` and ``actual_7d_return > 0``
|
||||
* ``predicted_direction`` is ``"bearish"`` and ``actual_7d_return < 0``
|
||||
|
||||
Neutral predictions and zero returns are excluded from the accuracy
|
||||
calculation.
|
||||
|
||||
The function upserts the ``source_accuracy`` row, merging the new
|
||||
outcomes with any existing sample count and accuracy ratio.
|
||||
"""
|
||||
if not realized_outcomes:
|
||||
return
|
||||
|
||||
# Count correct directional calls from the new outcomes.
|
||||
correct = 0
|
||||
total = 0
|
||||
for predicted_direction, actual_return in realized_outcomes:
|
||||
direction = predicted_direction.lower()
|
||||
if direction not in ("bullish", "bearish"):
|
||||
continue
|
||||
if actual_return == 0.0:
|
||||
continue
|
||||
total += 1
|
||||
if direction == "bullish" and actual_return > 0:
|
||||
correct += 1
|
||||
elif direction == "bearish" and actual_return < 0:
|
||||
correct += 1
|
||||
|
||||
if total == 0:
|
||||
return
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
try:
|
||||
await pool.execute(
|
||||
"""
|
||||
INSERT INTO source_accuracy (source_id, accuracy_ratio, sample_count, last_updated)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT (source_id) DO UPDATE SET
|
||||
accuracy_ratio = (
|
||||
source_accuracy.accuracy_ratio * source_accuracy.sample_count
|
||||
+ $2 * $3
|
||||
) / NULLIF(source_accuracy.sample_count + $3, 0),
|
||||
sample_count = source_accuracy.sample_count + $3,
|
||||
last_updated = $4
|
||||
""",
|
||||
source_id,
|
||||
correct / total,
|
||||
total,
|
||||
now,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to update source accuracy for %s; continuing with stale data",
|
||||
source_id,
|
||||
exc_info=True,
|
||||
)
|
||||
+570
-15
@@ -19,6 +19,10 @@ from typing import Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.aggregation.bayesian import (
|
||||
BayesianPosterior,
|
||||
compute_bayesian_posterior,
|
||||
)
|
||||
from services.aggregation.contradiction import CatalystEntry, detect_contradictions
|
||||
from services.aggregation.evidence import (
|
||||
EvidenceRankConfig,
|
||||
@@ -28,6 +32,7 @@ from services.aggregation.evidence import (
|
||||
from services.aggregation.evidence import (
|
||||
rank_evidence as _rank_evidence_composite,
|
||||
)
|
||||
from services.aggregation.interpolation import integrate_macro_signals
|
||||
from services.aggregation.market_context import fetch_market_context
|
||||
from services.aggregation.pattern_matcher import find_self_patterns
|
||||
from services.aggregation.projection import (
|
||||
@@ -35,6 +40,11 @@ from services.aggregation.projection import (
|
||||
compute_projection,
|
||||
persist_trend_projection,
|
||||
)
|
||||
from services.aggregation.regime import (
|
||||
MarketRegime,
|
||||
RegimeClassification,
|
||||
classify_regime,
|
||||
)
|
||||
from services.aggregation.scoring import (
|
||||
ScoringConfig,
|
||||
WeightedSignal,
|
||||
@@ -46,6 +56,7 @@ from services.aggregation.signal_propagation import (
|
||||
CompetitiveSignalRecord,
|
||||
build_pattern_weighted_signals,
|
||||
)
|
||||
from services.aggregation.source_accuracy import fetch_source_accuracy
|
||||
from services.shared.metrics import (
|
||||
AGGREGATION_CONTRADICTION_SCORE,
|
||||
AGGREGATION_DURATION,
|
||||
@@ -53,6 +64,7 @@ from services.shared.metrics import (
|
||||
AGGREGATION_WINDOWS_COMPUTED,
|
||||
)
|
||||
from services.shared.schemas import TrendDirection, TrendSummary, TrendWindow
|
||||
from services.trading.model_quality_gate import QualityGateResult, evaluate_quality_gate
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -80,6 +92,7 @@ class AggregationConfig:
|
||||
macro_enabled: bool = True # runtime toggle state
|
||||
competitive_signal_weight: float = 0.2 # relative weight of pattern signals
|
||||
competitive_enabled: bool = True # runtime toggle state
|
||||
probabilistic_scoring_enabled: bool = False # probabilistic pipeline toggle
|
||||
|
||||
def effective_windows(self) -> list[str]:
|
||||
if self.windows:
|
||||
@@ -232,6 +245,59 @@ async def fetch_competitive_enabled(pool: asyncpg.Pool) -> bool | None:
|
||||
return row["competitive_enabled"].lower() == "true"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch probabilistic scoring toggle from risk_configs
|
||||
#
|
||||
# PROBABILISTIC PIPELINE TOGGLE (Requirements 16.3, 16.4, 16.5, 16.6, 16.7):
|
||||
# - Read once per aggregation cycle from the risk_configs table.
|
||||
# - When False (default): the heuristic pipeline is used — identical outputs
|
||||
# to the current system.
|
||||
# - When True: the new Bayesian, regime-aware, and adaptive formulas are
|
||||
# used for all pipeline stages.
|
||||
# - Defaults to False when the key is missing, the value is invalid, or the
|
||||
# database is unreachable (fail-safe to heuristic mode).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PROBABILISTIC_TOGGLE_QUERY = """
|
||||
SELECT config->>'probabilistic_scoring_enabled' AS probabilistic_scoring_enabled
|
||||
FROM risk_configs
|
||||
WHERE active = TRUE
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
|
||||
async def fetch_probabilistic_scoring_enabled(pool: asyncpg.Pool) -> bool:
|
||||
"""Check probabilistic scoring toggle from risk_configs table.
|
||||
|
||||
Returns True when explicitly enabled, False in all other cases
|
||||
(missing key, invalid value, no config row, DB error).
|
||||
This is fail-safe: any failure defaults to the heuristic pipeline.
|
||||
|
||||
Requirements: 16.3, 16.6
|
||||
"""
|
||||
try:
|
||||
row = await pool.fetchrow(_PROBABILISTIC_TOGGLE_QUERY)
|
||||
if row is None or row["probabilistic_scoring_enabled"] is None:
|
||||
return False
|
||||
raw = row["probabilistic_scoring_enabled"]
|
||||
if not isinstance(raw, str) or raw.lower() not in ("true", "false"):
|
||||
logger.warning(
|
||||
"Invalid probabilistic_scoring_enabled value %r in "
|
||||
"risk_configs; defaulting to heuristic pipeline",
|
||||
raw,
|
||||
)
|
||||
return False
|
||||
return raw.lower() == "true"
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to read probabilistic_scoring_enabled from risk_configs; "
|
||||
"defaulting to heuristic pipeline",
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch competitive signals targeting a ticker within a time window
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -366,6 +432,9 @@ def build_macro_weighted_signals(
|
||||
window: str,
|
||||
macro_signal_weight: float = 0.3,
|
||||
config: ScoringConfig | None = None,
|
||||
*,
|
||||
returns: list[float] | None = None,
|
||||
volumes: list[float] | None = None,
|
||||
) -> list[WeightedSignal]:
|
||||
"""Convert macro impact records into WeightedSignal objects.
|
||||
|
||||
@@ -375,6 +444,9 @@ def build_macro_weighted_signals(
|
||||
- impact_score = macro_impact_score * macro_signal_weight
|
||||
- recency decay from the global event's publication time
|
||||
- confidence gating from the macro record's confidence
|
||||
|
||||
When ``config.probabilistic`` is True, passes returns/volumes for
|
||||
regime multiplier computation.
|
||||
"""
|
||||
cfg = config or ScoringConfig()
|
||||
signals: list[WeightedSignal] = []
|
||||
@@ -387,6 +459,8 @@ def build_macro_weighted_signals(
|
||||
novelty_score=0.5,
|
||||
extraction_confidence=mir.confidence,
|
||||
config=cfg,
|
||||
returns=returns,
|
||||
volumes=volumes,
|
||||
)
|
||||
sentiment = _DIRECTION_TO_SENTIMENT.get(mir.impact_direction, 0.0)
|
||||
impact = mir.macro_impact_score * macro_signal_weight
|
||||
@@ -412,11 +486,24 @@ def build_weighted_signals(
|
||||
window: str,
|
||||
market_ctx: Any | None = None,
|
||||
config: ScoringConfig | None = None,
|
||||
*,
|
||||
source_accuracy_map: dict[str, float] | None = None,
|
||||
returns: list[float] | None = None,
|
||||
volumes: list[float] | None = None,
|
||||
) -> list[WeightedSignal]:
|
||||
"""Convert impact records into WeightedSignal objects using the scoring module."""
|
||||
"""Convert impact records into WeightedSignal objects using the scoring module.
|
||||
|
||||
When ``config.probabilistic`` is True, passes source accuracy factors,
|
||||
event types, and market data (returns/volumes) to the scoring pipeline
|
||||
for regime multiplier and adaptive decay computation.
|
||||
"""
|
||||
cfg = config or ScoringConfig()
|
||||
accuracy_map = source_accuracy_map or {}
|
||||
signals: list[WeightedSignal] = []
|
||||
for imp in impacts:
|
||||
# Look up source accuracy factor for this document's source
|
||||
saf = accuracy_map.get(imp.document_id, 1.0)
|
||||
|
||||
sw = compute_signal_weight(
|
||||
published_at=imp.published_at,
|
||||
reference_time=reference_time,
|
||||
@@ -426,6 +513,11 @@ def build_weighted_signals(
|
||||
extraction_confidence=imp.confidence,
|
||||
market_ctx=market_ctx,
|
||||
config=cfg,
|
||||
event_type=imp.catalyst_type if cfg.probabilistic else None,
|
||||
impact_score=imp.impact_score,
|
||||
source_accuracy_factor=saf,
|
||||
returns=returns,
|
||||
volumes=volumes,
|
||||
)
|
||||
signals.append(
|
||||
WeightedSignal(
|
||||
@@ -433,6 +525,8 @@ def build_weighted_signals(
|
||||
weight=sw,
|
||||
sentiment_value=sentiment_to_numeric(imp.sentiment),
|
||||
impact_score=imp.impact_score,
|
||||
info_gain_factor=sw.info_gain_factor,
|
||||
source_accuracy_factor=sw.source_accuracy_factor,
|
||||
)
|
||||
)
|
||||
return signals
|
||||
@@ -649,10 +743,15 @@ def assemble_trend_summary(
|
||||
market_ctx: Any | None = None,
|
||||
max_evidence: int = MAX_EVIDENCE_REFS,
|
||||
reference_time: datetime | None = None,
|
||||
*,
|
||||
probabilistic: bool = False,
|
||||
regime: RegimeClassification | None = None,
|
||||
) -> TrendSummary:
|
||||
"""Build a complete TrendSummary from weighted signals and impact records."""
|
||||
result = assemble_trend_with_evidence(
|
||||
ticker, window, signals, impacts, market_ctx, max_evidence, reference_time,
|
||||
probabilistic=probabilistic,
|
||||
regime=regime,
|
||||
)
|
||||
return result.summary
|
||||
|
||||
@@ -665,8 +764,25 @@ def assemble_trend_with_evidence(
|
||||
market_ctx: Any | None = None,
|
||||
max_evidence: int = MAX_EVIDENCE_REFS,
|
||||
reference_time: datetime | None = None,
|
||||
*,
|
||||
probabilistic: bool = False,
|
||||
regime: RegimeClassification | None = None,
|
||||
) -> AssembledTrend:
|
||||
"""Build a TrendSummary and return detailed evidence rankings for persistence."""
|
||||
"""Build a TrendSummary and return detailed evidence rankings for persistence.
|
||||
|
||||
When ``probabilistic`` is True:
|
||||
- Computes Bayesian posterior from merged signals
|
||||
- Uses Bayesian confidence formula for trend confidence
|
||||
- Uses entropy-based direction classification
|
||||
- Applies regime-adjusted thresholds
|
||||
- Populates probabilistic TrendSummary fields
|
||||
- Stores probabilistic outputs in market_context JSONB
|
||||
|
||||
When ``probabilistic`` is False:
|
||||
- Preserves exact current heuristic behavior (no changes)
|
||||
|
||||
Requirements: 1.1, 1.2, 8.1–8.5, 9.1–9.6, 7.8, 16.4, 16.5
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
@@ -677,15 +793,102 @@ def assemble_trend_with_evidence(
|
||||
CatalystEntry(document_id=imp.document_id, catalyst_type=imp.catalyst_type)
|
||||
for imp in impacts
|
||||
]
|
||||
contradiction_result = detect_contradictions(signals, catalyst_entries)
|
||||
contradiction_result = detect_contradictions(
|
||||
signals, catalyst_entries, probabilistic=probabilistic,
|
||||
)
|
||||
contradiction = contradiction_result.score
|
||||
|
||||
direction = derive_trend_direction(avg_sentiment, contradiction)
|
||||
confidence = compute_trend_confidence(signals, contradiction)
|
||||
if not probabilistic:
|
||||
# --- Heuristic mode: preserve exact current behavior ---
|
||||
direction = derive_trend_direction(avg_sentiment, contradiction)
|
||||
confidence = compute_trend_confidence(signals, contradiction)
|
||||
|
||||
# Get detailed evidence rankings for persistence
|
||||
ev_config = EvidenceRankConfig(max_refs=max_evidence)
|
||||
supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, ev_config)
|
||||
|
||||
supporting = list(dict.fromkeys(r.document_id for r in supporting_ranked))
|
||||
opposing = list(dict.fromkeys(r.document_id for r in opposing_ranked))
|
||||
|
||||
catalysts, risks = extract_catalysts_and_risks(impacts, signals)
|
||||
|
||||
# Trend strength: absolute value of weighted sentiment, clamped to [0, 1]
|
||||
strength = round(min(abs(avg_sentiment), 1.0), 4)
|
||||
|
||||
summary = TrendSummary(
|
||||
entity_type="company",
|
||||
entity_id=ticker,
|
||||
window=TrendWindow(window),
|
||||
trend_direction=direction,
|
||||
trend_strength=strength,
|
||||
confidence=confidence,
|
||||
top_supporting_evidence=supporting,
|
||||
top_opposing_evidence=opposing,
|
||||
dominant_catalysts=catalysts,
|
||||
material_risks=risks,
|
||||
contradiction_score=contradiction,
|
||||
disagreement_details=contradiction_result.details,
|
||||
market_context=market_ctx,
|
||||
generated_at=reference_time,
|
||||
)
|
||||
|
||||
return AssembledTrend(
|
||||
summary=summary,
|
||||
supporting_evidence=supporting_ranked,
|
||||
opposing_evidence=opposing_ranked,
|
||||
)
|
||||
|
||||
# --- Probabilistic mode (Req 8.1–8.5, 9.1–9.6) ---
|
||||
|
||||
# Default to uncertainty regime when not provided (Req 7.9)
|
||||
if regime is None:
|
||||
regime = RegimeClassification(
|
||||
regime=MarketRegime.UNCERTAINTY,
|
||||
trend_indicator=0.0,
|
||||
volatility_ratio=1.0,
|
||||
bullish_threshold=0.15,
|
||||
bearish_threshold=-0.15,
|
||||
contradiction_penalty_multiplier=0.6,
|
||||
)
|
||||
|
||||
# Compute Bayesian posterior from merged signals (Req 1.1, 1.2)
|
||||
posterior: BayesianPosterior = compute_bayesian_posterior(signals)
|
||||
|
||||
# --- Bayesian confidence formula (Req 8.1–8.4) ---
|
||||
# confidence = 0.5 × C_bayesian + 0.25 × F_count + 0.25 × C_avg_credibility - P_contradiction
|
||||
active = [s for s in signals if s.weight.combined > 0]
|
||||
unique_sources = len({s.document_id for s in active if s.document_id}) if active else 0
|
||||
f_count = min(unique_sources / 15.0, 0.8)
|
||||
|
||||
avg_credibility = (
|
||||
sum(s.weight.credibility for s in active) / len(active) if active else 0.0
|
||||
)
|
||||
|
||||
# Contradiction penalty uses regime-adjusted multiplier (Req 7.7)
|
||||
contradiction_penalty = contradiction * regime.contradiction_penalty_multiplier
|
||||
|
||||
confidence = (
|
||||
0.5 * posterior.bayesian_confidence
|
||||
+ 0.25 * f_count
|
||||
+ 0.25 * avg_credibility
|
||||
- contradiction_penalty
|
||||
)
|
||||
confidence = round(max(0.0, min(1.0, confidence)), 4)
|
||||
|
||||
# --- Entropy-based direction (Req 9.1–9.5) ---
|
||||
# Fixed P_bull thresholds for direction: 0.65 / 0.35
|
||||
if posterior.entropy > 0.9:
|
||||
direction = TrendDirection.MIXED
|
||||
elif posterior.p_bull > 0.65:
|
||||
direction = TrendDirection.BULLISH
|
||||
elif posterior.p_bull < 0.35:
|
||||
direction = TrendDirection.BEARISH
|
||||
else:
|
||||
direction = TrendDirection.NEUTRAL
|
||||
|
||||
# Get detailed evidence rankings for persistence
|
||||
config = EvidenceRankConfig(max_refs=max_evidence)
|
||||
supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, config)
|
||||
ev_config = EvidenceRankConfig(max_refs=max_evidence)
|
||||
supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, ev_config)
|
||||
|
||||
supporting = list(dict.fromkeys(r.document_id for r in supporting_ranked))
|
||||
opposing = list(dict.fromkeys(r.document_id for r in opposing_ranked))
|
||||
@@ -695,6 +898,30 @@ def assemble_trend_with_evidence(
|
||||
# Trend strength: absolute value of weighted sentiment, clamped to [0, 1]
|
||||
strength = round(min(abs(avg_sentiment), 1.0), 4)
|
||||
|
||||
# Build probabilistic JSONB data for market_context storage
|
||||
probabilistic_data = {
|
||||
"p_bull": round(posterior.p_bull, 6),
|
||||
"alpha": round(posterior.alpha, 4),
|
||||
"beta": round(posterior.beta, 4),
|
||||
"log_likelihood": round(posterior.log_likelihood, 6),
|
||||
"bayesian_confidence": round(posterior.bayesian_confidence, 6),
|
||||
"entropy": round(posterior.entropy, 6),
|
||||
"regime": regime.regime.value,
|
||||
"regime_volatility_ratio": round(regime.volatility_ratio, 4),
|
||||
"pipeline_mode": "probabilistic",
|
||||
"contradiction_entropy": round(contradiction, 4),
|
||||
}
|
||||
|
||||
# Enrich market_context with probabilistic outputs
|
||||
if market_ctx is not None and hasattr(market_ctx, "model_dump"):
|
||||
enriched_ctx_data = market_ctx.model_dump()
|
||||
enriched_ctx_data["probabilistic"] = probabilistic_data
|
||||
enriched_market_ctx = enriched_ctx_data
|
||||
elif isinstance(market_ctx, dict):
|
||||
enriched_market_ctx = {**market_ctx, "probabilistic": probabilistic_data}
|
||||
else:
|
||||
enriched_market_ctx = {"probabilistic": probabilistic_data}
|
||||
|
||||
summary = TrendSummary(
|
||||
entity_type="company",
|
||||
entity_id=ticker,
|
||||
@@ -708,8 +935,16 @@ def assemble_trend_with_evidence(
|
||||
material_risks=risks,
|
||||
contradiction_score=contradiction,
|
||||
disagreement_details=contradiction_result.details,
|
||||
market_context=market_ctx,
|
||||
market_context=enriched_market_ctx,
|
||||
generated_at=reference_time,
|
||||
# Probabilistic fields (Req 9.6, 16.1)
|
||||
p_bull=round(posterior.p_bull, 6),
|
||||
alpha=round(posterior.alpha, 4),
|
||||
beta_param=round(posterior.beta, 4),
|
||||
bayesian_confidence=round(posterior.bayesian_confidence, 6),
|
||||
entropy=round(posterior.entropy, 6),
|
||||
regime=regime.regime.value,
|
||||
pipeline_mode="probabilistic",
|
||||
)
|
||||
|
||||
return AssembledTrend(
|
||||
@@ -782,7 +1017,12 @@ async def persist_trend_summary(
|
||||
json.dumps(summary.material_risks),
|
||||
summary.contradiction_score,
|
||||
json.dumps([d.model_dump() for d in summary.disagreement_details]),
|
||||
json.dumps(summary.market_context.model_dump() if summary.market_context else {}, default=str),
|
||||
json.dumps(
|
||||
summary.market_context.model_dump()
|
||||
if hasattr(summary.market_context, "model_dump")
|
||||
else (summary.market_context if summary.market_context else {}),
|
||||
default=str,
|
||||
),
|
||||
summary.generated_at,
|
||||
)
|
||||
trend_id = str(row["id"])
|
||||
@@ -841,7 +1081,11 @@ async def persist_trend_evidence(
|
||||
supporting: list[RankedEvidence],
|
||||
opposing: list[RankedEvidence],
|
||||
) -> int:
|
||||
"""Insert evidence mapping rows for a trend window. Returns count inserted."""
|
||||
"""Insert evidence mapping rows for a trend window. Returns count inserted.
|
||||
|
||||
Deletes any existing evidence for this trend window first to prevent
|
||||
duplicate accumulation across aggregation cycles.
|
||||
"""
|
||||
rows: list[tuple[str, str, str, float, float, float, float, float, float]] = []
|
||||
for ev in supporting:
|
||||
# Skip non-UUID document IDs (e.g. pattern signal synthetic IDs)
|
||||
@@ -861,6 +1105,12 @@ async def persist_trend_evidence(
|
||||
ev.recency_component, ev.confidence_component, ev.sentiment_value,
|
||||
))
|
||||
|
||||
# Clear stale evidence before inserting fresh rows
|
||||
await pool.execute(
|
||||
"DELETE FROM trend_evidence WHERE trend_window_id = $1",
|
||||
trend_window_id,
|
||||
)
|
||||
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
@@ -923,6 +1173,131 @@ async def _build_macro_event_infos(
|
||||
return infos
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regime detection helper (Req 7.1, 7.2, 7.3, 7.8, 7.9)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CLOSING_PRICES_QUERY = """
|
||||
SELECT close
|
||||
FROM market_data_daily
|
||||
WHERE ticker = $1
|
||||
ORDER BY bar_date DESC
|
||||
LIMIT 120
|
||||
"""
|
||||
|
||||
_DAILY_RETURNS_QUERY = """
|
||||
SELECT (close - LAG(close) OVER (ORDER BY bar_date)) / NULLIF(LAG(close) OVER (ORDER BY bar_date), 0) AS daily_return
|
||||
FROM market_data_daily
|
||||
WHERE ticker = $1
|
||||
ORDER BY bar_date DESC
|
||||
LIMIT 120
|
||||
"""
|
||||
|
||||
_DAILY_VOLUMES_QUERY = """
|
||||
SELECT volume
|
||||
FROM market_data_daily
|
||||
WHERE ticker = $1
|
||||
ORDER BY bar_date DESC
|
||||
LIMIT 30
|
||||
"""
|
||||
|
||||
# Default uncertainty regime used when market data is unavailable
|
||||
_DEFAULT_UNCERTAINTY_REGIME = RegimeClassification(
|
||||
regime=MarketRegime.UNCERTAINTY,
|
||||
trend_indicator=0.0,
|
||||
volatility_ratio=1.0,
|
||||
bullish_threshold=0.15,
|
||||
bearish_threshold=-0.15,
|
||||
contradiction_penalty_multiplier=0.6,
|
||||
)
|
||||
|
||||
|
||||
async def _classify_ticker_regime(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
) -> RegimeClassification:
|
||||
"""Classify market regime for a ticker from historical price data.
|
||||
|
||||
Fetches closing prices and daily returns, then delegates to
|
||||
``classify_regime``. Falls back to the uncertainty regime when
|
||||
market data is unavailable or insufficient.
|
||||
|
||||
Requirements: 7.1, 7.2, 7.3, 7.8, 7.9
|
||||
"""
|
||||
try:
|
||||
price_rows = await pool.fetch(_CLOSING_PRICES_QUERY, ticker)
|
||||
if not price_rows:
|
||||
logger.info(
|
||||
"No market data for %s — defaulting to uncertainty regime",
|
||||
ticker,
|
||||
)
|
||||
return _DEFAULT_UNCERTAINTY_REGIME
|
||||
|
||||
# Prices come in DESC order; reverse to chronological
|
||||
closing_prices = [float(r["close"]) for r in reversed(price_rows) if r["close"] is not None]
|
||||
|
||||
return_rows = await pool.fetch(_DAILY_RETURNS_QUERY, ticker)
|
||||
# Returns come in DESC order; reverse to chronological, skip NULLs
|
||||
returns = [
|
||||
float(r["daily_return"])
|
||||
for r in reversed(return_rows)
|
||||
if r["daily_return"] is not None
|
||||
]
|
||||
|
||||
if not closing_prices or not returns:
|
||||
logger.info(
|
||||
"Insufficient market data for %s — defaulting to uncertainty regime",
|
||||
ticker,
|
||||
)
|
||||
return _DEFAULT_UNCERTAINTY_REGIME
|
||||
|
||||
return classify_regime(closing_prices, returns)
|
||||
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to classify regime for %s — defaulting to uncertainty regime",
|
||||
ticker,
|
||||
exc_info=True,
|
||||
)
|
||||
return _DEFAULT_UNCERTAINTY_REGIME
|
||||
|
||||
|
||||
async def _fetch_ticker_market_data(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
) -> tuple[list[float] | None, list[float] | None]:
|
||||
"""Fetch recent daily returns and volumes for regime multiplier scoring.
|
||||
|
||||
Returns (returns, volumes) where each is a chronological list or None
|
||||
if data is unavailable. Used by the probabilistic scoring pipeline
|
||||
to compute regime multiplier M_regime in ``compute_signal_weight``.
|
||||
"""
|
||||
try:
|
||||
return_rows = await pool.fetch(_DAILY_RETURNS_QUERY, ticker)
|
||||
returns = [
|
||||
float(r["daily_return"])
|
||||
for r in reversed(return_rows)
|
||||
if r["daily_return"] is not None
|
||||
] if return_rows else None
|
||||
|
||||
volume_rows = await pool.fetch(_DAILY_VOLUMES_QUERY, ticker)
|
||||
volumes = [
|
||||
float(r["volume"])
|
||||
for r in reversed(volume_rows)
|
||||
if r["volume"] is not None
|
||||
] if volume_rows else None
|
||||
|
||||
return returns or None, volumes or None
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to fetch market data for %s scoring — "
|
||||
"regime multiplier will default to 1.0",
|
||||
ticker,
|
||||
exc_info=True,
|
||||
)
|
||||
return None, None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main aggregation entry point for a single ticker + window
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -934,6 +1309,12 @@ async def aggregate_company_window(
|
||||
window: str,
|
||||
reference_time: datetime | None = None,
|
||||
config: AggregationConfig | None = None,
|
||||
*,
|
||||
probabilistic: bool = False,
|
||||
regime: RegimeClassification | None = None,
|
||||
source_accuracy_map: dict[str, float] | None = None,
|
||||
ticker_returns: list[float] | None = None,
|
||||
ticker_volumes: list[float] | None = None,
|
||||
) -> TrendSummary:
|
||||
"""Compute and persist a trend summary for one ticker and one window.
|
||||
|
||||
@@ -944,14 +1325,47 @@ async def aggregate_company_window(
|
||||
4. Build weighted signals using the scoring module.
|
||||
5. Check macro toggle and fetch/merge macro signals if enabled.
|
||||
6. Check competitive toggle and fetch/merge pattern/competitive signals if enabled.
|
||||
7. Assemble the TrendSummary.
|
||||
7. Assemble the TrendSummary (probabilistic or heuristic).
|
||||
8. Persist to trend_windows table.
|
||||
|
||||
When ``probabilistic`` is True, the scoring config is set to
|
||||
probabilistic mode, source accuracy factors are passed to signal
|
||||
scoring, and macro integration uses the conditional modifier.
|
||||
|
||||
Returns the assembled TrendSummary.
|
||||
"""
|
||||
cfg = config or AggregationConfig()
|
||||
scoring_cfg = cfg.effective_scoring()
|
||||
|
||||
# When probabilistic mode is active, create a scoring config with
|
||||
# probabilistic=True so all downstream scoring uses the new formulas.
|
||||
if probabilistic and not scoring_cfg.probabilistic:
|
||||
scoring_cfg = ScoringConfig(
|
||||
half_life_hours=scoring_cfg.half_life_hours,
|
||||
min_recency_weight=scoring_cfg.min_recency_weight,
|
||||
credibility_floor=scoring_cfg.credibility_floor,
|
||||
credibility_ceiling=scoring_cfg.credibility_ceiling,
|
||||
credibility_exponent=scoring_cfg.credibility_exponent,
|
||||
novelty_bonus_max=scoring_cfg.novelty_bonus_max,
|
||||
confidence_floor=scoring_cfg.confidence_floor,
|
||||
volatility_recency_boost_threshold=scoring_cfg.volatility_recency_boost_threshold,
|
||||
volatility_recency_boost_max=scoring_cfg.volatility_recency_boost_max,
|
||||
volume_surge_threshold_pct=scoring_cfg.volume_surge_threshold_pct,
|
||||
volume_surge_boost=scoring_cfg.volume_surge_boost,
|
||||
probabilistic=True,
|
||||
sigmoid_steepness=scoring_cfg.sigmoid_steepness,
|
||||
sigmoid_midpoint=scoring_cfg.sigmoid_midpoint,
|
||||
info_gain_lambda=scoring_cfg.info_gain_lambda,
|
||||
info_gain_max=scoring_cfg.info_gain_max,
|
||||
default_base_rate=scoring_cfg.default_base_rate,
|
||||
adaptive_decay_impact_scale=scoring_cfg.adaptive_decay_impact_scale,
|
||||
adaptive_decay_surprise_scale=scoring_cfg.adaptive_decay_surprise_scale,
|
||||
adaptive_decay_market_scale=scoring_cfg.adaptive_decay_market_scale,
|
||||
regime_return_weight=scoring_cfg.regime_return_weight,
|
||||
regime_volume_weight=scoring_cfg.regime_volume_weight,
|
||||
regime_multiplier_max=scoring_cfg.regime_multiplier_max,
|
||||
)
|
||||
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
@@ -965,9 +1379,13 @@ async def aggregate_company_window(
|
||||
# 2. Fetch market context
|
||||
market_ctx = await fetch_market_context(pool, ticker, window, reference_time)
|
||||
|
||||
# 3. Build weighted signals
|
||||
# 3. Build weighted signals — pass source accuracy and market data
|
||||
# when in probabilistic mode (Req 4.1–4.3, 6.1–6.5)
|
||||
signals = build_weighted_signals(
|
||||
impacts, reference_time, window, market_ctx, scoring_cfg,
|
||||
source_accuracy_map=source_accuracy_map if probabilistic else None,
|
||||
returns=ticker_returns if probabilistic else None,
|
||||
volumes=ticker_volumes if probabilistic else None,
|
||||
)
|
||||
|
||||
# 4. Check macro toggle and merge macro signals
|
||||
@@ -981,6 +1399,7 @@ async def aggregate_company_window(
|
||||
if db_toggle is not None:
|
||||
macro_enabled = db_toggle
|
||||
|
||||
macro_modifier = 1.0
|
||||
if macro_enabled:
|
||||
macro_impacts = await fetch_macro_impact_records(
|
||||
pool, ticker, window_start, reference_time,
|
||||
@@ -992,11 +1411,31 @@ async def aggregate_company_window(
|
||||
window,
|
||||
macro_signal_weight=cfg.macro_signal_weight,
|
||||
config=scoring_cfg,
|
||||
returns=ticker_returns if probabilistic else None,
|
||||
volumes=ticker_volumes if probabilistic else None,
|
||||
)
|
||||
signals = signals + macro_signals
|
||||
|
||||
if probabilistic:
|
||||
# Probabilistic mode: use conditional macro modifier (Req 11.1–11.5)
|
||||
company_direction = derive_trend_direction(
|
||||
weighted_sentiment_average(signals),
|
||||
).value
|
||||
signals, macro_modifier = integrate_macro_signals(
|
||||
company_signals=signals,
|
||||
macro_signals=macro_signals,
|
||||
company_direction=company_direction,
|
||||
macro_impacts=macro_impacts,
|
||||
ticker=ticker,
|
||||
probabilistic=True,
|
||||
macro_signal_weight=cfg.macro_signal_weight,
|
||||
)
|
||||
else:
|
||||
# Heuristic mode: simple additive merge (current behavior)
|
||||
signals = signals + macro_signals
|
||||
|
||||
logger.info(
|
||||
"Merged %d macro signals for %s/%s",
|
||||
len(macro_signals), ticker, window,
|
||||
"Merged %d macro signals for %s/%s (modifier=%.4f)",
|
||||
len(macro_signals), ticker, window, macro_modifier,
|
||||
)
|
||||
|
||||
# 5. Check competitive toggle and merge pattern/competitive signals
|
||||
@@ -1055,9 +1494,17 @@ async def aggregate_company_window(
|
||||
market_ctx=market_ctx if market_ctx.has_data else None,
|
||||
max_evidence=cfg.max_evidence,
|
||||
reference_time=reference_time,
|
||||
probabilistic=probabilistic,
|
||||
regime=regime,
|
||||
)
|
||||
summary = assembled.summary
|
||||
|
||||
# 6b. Enrich probabilistic JSONB with macro modifier (Req 16.2)
|
||||
if probabilistic and macro_modifier != 1.0:
|
||||
ctx = summary.market_context
|
||||
if isinstance(ctx, dict) and "probabilistic" in ctx:
|
||||
ctx["probabilistic"]["macro_modifier"] = round(macro_modifier, 4)
|
||||
|
||||
# 7. Persist trend window
|
||||
trend_id = await persist_trend_summary(pool, summary)
|
||||
|
||||
@@ -1126,11 +1573,119 @@ async def aggregate_company(
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
# Read probabilistic scoring flag once per cycle (Requirement 16.7).
|
||||
# Mid-cycle changes take effect on the next cycle.
|
||||
probabilistic = await fetch_probabilistic_scoring_enabled(pool)
|
||||
pipeline_mode = "probabilistic" if probabilistic else "heuristic"
|
||||
|
||||
# --- Quality gate evaluation (Req 11.2, 11.3) ---
|
||||
# Evaluate model quality gate at the start of each aggregation cycle.
|
||||
# When the gate fails, all recommendations are forced to paper mode.
|
||||
# Gate evaluation failure defaults to paper-only (fail-safe).
|
||||
quality_gate_passed = False
|
||||
try:
|
||||
gate_result: QualityGateResult = await evaluate_quality_gate(pool)
|
||||
quality_gate_passed = gate_result.passed
|
||||
logger.info(
|
||||
"Quality gate for %s cycle: %s — %s",
|
||||
ticker,
|
||||
"PASS" if gate_result.passed else "FAIL",
|
||||
gate_result.reason,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Quality gate evaluation failed for %s cycle — "
|
||||
"defaulting to paper-only mode (fail-safe)",
|
||||
ticker,
|
||||
)
|
||||
quality_gate_passed = False
|
||||
|
||||
logger.info(
|
||||
"Aggregation cycle for %s: pipeline_mode=%s quality_gate=%s",
|
||||
ticker,
|
||||
pipeline_mode,
|
||||
"passed" if quality_gate_passed else "failed",
|
||||
)
|
||||
|
||||
# --- Regime detection (Req 7.1, 7.2, 7.3, 7.8, 7.9) ---
|
||||
# Classify market regime for this ticker using closing prices and returns.
|
||||
# Default to uncertainty regime when market data is unavailable.
|
||||
regime: RegimeClassification | None = None
|
||||
ticker_returns: list[float] | None = None
|
||||
ticker_volumes: list[float] | None = None
|
||||
source_accuracy_map: dict[str, float] | None = None
|
||||
|
||||
if probabilistic:
|
||||
regime = await _classify_ticker_regime(pool, ticker)
|
||||
logger.info(
|
||||
"Regime for %s: %s (trend_indicator=%.1f, vol_ratio=%.2f, "
|
||||
"bullish_threshold=%.2f, contradiction_mult=%.1f)",
|
||||
ticker,
|
||||
regime.regime.value,
|
||||
regime.trend_indicator,
|
||||
regime.volatility_ratio,
|
||||
regime.bullish_threshold,
|
||||
regime.contradiction_penalty_multiplier,
|
||||
)
|
||||
|
||||
# Fetch market data (returns/volumes) for regime multiplier in scoring
|
||||
# (Req 6.1–6.5). Fetched once per cycle and reused across all windows.
|
||||
ticker_returns, ticker_volumes = await _fetch_ticker_market_data(pool, ticker)
|
||||
|
||||
# Batch-fetch source accuracy for all sources in the signal set
|
||||
# (Req 4.1–4.3). Fetched once per cycle; individual signals look up
|
||||
# their factor from this map. DB errors default to empty map (factor 1.0).
|
||||
try:
|
||||
# Fetch all source IDs from the longest window to cover all signals
|
||||
longest_window = max(
|
||||
cfg.effective_windows(),
|
||||
key=lambda w: WINDOW_DURATIONS.get(w, timedelta(days=7)),
|
||||
)
|
||||
longest_duration = WINDOW_DURATIONS.get(longest_window, timedelta(days=90))
|
||||
window_start = reference_time - longest_duration
|
||||
all_impacts = await fetch_impact_records(pool, ticker, window_start, reference_time)
|
||||
source_ids = list({imp.document_id for imp in all_impacts})
|
||||
if source_ids:
|
||||
sa_records = await fetch_source_accuracy(pool, source_ids)
|
||||
source_accuracy_map = {
|
||||
sid: sa.accuracy_factor for sid, sa in sa_records.items()
|
||||
}
|
||||
logger.info(
|
||||
"Fetched source accuracy for %s: %d/%d sources have records",
|
||||
ticker, len(sa_records), len(source_ids),
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to fetch source accuracy for %s — defaulting to neutral factor",
|
||||
ticker,
|
||||
exc_info=True,
|
||||
)
|
||||
source_accuracy_map = None
|
||||
|
||||
summaries: list[TrendSummary] = []
|
||||
for window in cfg.effective_windows():
|
||||
summary = await aggregate_company_window(
|
||||
pool, ticker, window, reference_time, cfg,
|
||||
probabilistic=probabilistic,
|
||||
regime=regime,
|
||||
source_accuracy_map=source_accuracy_map,
|
||||
ticker_returns=ticker_returns,
|
||||
ticker_volumes=ticker_volumes,
|
||||
)
|
||||
|
||||
# When quality gate fails, annotate the trend summary so the
|
||||
# recommendation engine forces paper mode (Req 11.2, 11.3).
|
||||
if not quality_gate_passed:
|
||||
ctx = summary.market_context
|
||||
if isinstance(ctx, dict):
|
||||
ctx["quality_gate_passed"] = False
|
||||
elif ctx is not None and hasattr(ctx, "model_dump"):
|
||||
ctx_dict = ctx.model_dump()
|
||||
ctx_dict["quality_gate_passed"] = False
|
||||
summary.market_context = ctx_dict
|
||||
else:
|
||||
summary.market_context = {"quality_gate_passed": False}
|
||||
|
||||
summaries.append(summary)
|
||||
|
||||
return summaries
|
||||
|
||||
+622
-8
@@ -41,8 +41,13 @@ from services.shared.audit import get_entity_audit_trail, get_order_audit_trail,
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.logging import new_trace_id, set_trace_context, setup_logging
|
||||
from services.shared.redis_keys import PREFIX, QUEUE_BROKER, QUEUE_PREFIX, queue_key
|
||||
from services.shared.redis_keys import PIPELINE_ENABLED_KEY, QUEUE_BROKER, QUEUE_PREFIX, queue_key
|
||||
from services.shared.schemas import MAJOR_DECISION_CATALYSTS
|
||||
from services.validation.attribution import (
|
||||
compute_catalyst_attribution,
|
||||
compute_layer_attribution,
|
||||
compute_source_attribution,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("query_api")
|
||||
|
||||
@@ -471,12 +476,13 @@ async def list_trend_history(
|
||||
@app.get("/api/market/prices/{ticker}")
|
||||
async def get_market_prices(
|
||||
ticker: str,
|
||||
limit: int = Query(default=30, le=200),
|
||||
limit: int = Query(default=200, le=500),
|
||||
):
|
||||
"""Return historical close prices for a ticker from market_snapshots.
|
||||
|
||||
Each row has a bar_date (from the Polygon bar timestamp) and OHLCV data.
|
||||
Ordered oldest-first for chart rendering.
|
||||
Ordered oldest-first for chart rendering. Also returns 90-day high/low
|
||||
computed from all bars in the last 90 days.
|
||||
"""
|
||||
ticker = ticker.upper()
|
||||
rows = await pool.fetch(
|
||||
@@ -515,7 +521,124 @@ async def get_market_prices(
|
||||
"bar_timestamp": bar_ts,
|
||||
"captured_at": r["captured_at"].isoformat() if r["captured_at"] else None,
|
||||
})
|
||||
return results
|
||||
|
||||
# Compute 90-day high/low from all bars in the window
|
||||
cutoff_90d = datetime.now(timezone.utc) - timedelta(days=90)
|
||||
range_row = await pool.fetchrow(
|
||||
"""SELECT
|
||||
MIN((data->>'l')::float) AS low_90d,
|
||||
MAX((data->>'h')::float) AS high_90d
|
||||
FROM market_snapshots
|
||||
WHERE ticker = $1 AND snapshot_type = 'bar'
|
||||
AND captured_at >= $2""",
|
||||
ticker, cutoff_90d,
|
||||
)
|
||||
low_90d = range_row["low_90d"] if range_row else None
|
||||
high_90d = range_row["high_90d"] if range_row else None
|
||||
|
||||
return {
|
||||
"bars": results,
|
||||
"range_90d": {"low": low_90d, "high": high_90d},
|
||||
}
|
||||
|
||||
|
||||
@app.post("/api/market/backfill/{ticker}")
|
||||
async def backfill_market_prices(ticker: str, days: int = Query(default=90, le=365)):
|
||||
"""Backfill daily OHLCV bars from Polygon for the last N days.
|
||||
|
||||
Fetches daily aggregate bars from Polygon's range endpoint and inserts
|
||||
any missing bars into market_snapshots (deduped by bar timestamp).
|
||||
Returns the number of bars inserted.
|
||||
"""
|
||||
ticker = ticker.upper()
|
||||
api_key = config.market_data.api_key
|
||||
if not api_key:
|
||||
raise HTTPException(503, "No market data API key configured")
|
||||
|
||||
import hashlib
|
||||
from datetime import date, timedelta
|
||||
|
||||
import httpx
|
||||
|
||||
to_date = date.today().isoformat()
|
||||
from_date = (date.today() - timedelta(days=days)).isoformat()
|
||||
|
||||
url = (
|
||||
f"{config.market_data.base_url}/v2/aggs/ticker/{ticker}"
|
||||
f"/range/1/day/{from_date}/{to_date}"
|
||||
)
|
||||
params = {"apiKey": api_key, "adjusted": "true", "sort": "asc", "limit": "500"}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
resp = await client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
bars = data.get("results", [])
|
||||
if not bars:
|
||||
return {"ticker": ticker, "inserted": 0, "total_bars": 0}
|
||||
|
||||
# Find existing bar timestamps to avoid duplicates
|
||||
existing = await pool.fetch(
|
||||
"""SELECT DISTINCT (data->>'t')::bigint AS bar_ts
|
||||
FROM market_snapshots
|
||||
WHERE ticker = $1 AND snapshot_type = 'bar'""",
|
||||
ticker,
|
||||
)
|
||||
existing_ts = {r["bar_ts"] for r in existing if r["bar_ts"] is not None}
|
||||
|
||||
# Look up company_id (nullable)
|
||||
company_row = await pool.fetchrow(
|
||||
"SELECT id FROM companies WHERE ticker = $1", ticker,
|
||||
)
|
||||
company_id = company_row["id"] if company_row else None
|
||||
|
||||
inserted = 0
|
||||
for bar in bars:
|
||||
bar_ts = bar.get("t")
|
||||
if bar_ts is None or bar_ts in existing_ts:
|
||||
continue
|
||||
bar_json = json.dumps(bar)
|
||||
content_hash = hashlib.sha256(bar_json.encode()).hexdigest()
|
||||
captured_at = datetime.fromtimestamp(bar_ts / 1000, tz=timezone.utc)
|
||||
await pool.execute(
|
||||
"""INSERT INTO market_snapshots
|
||||
(company_id, ticker, snapshot_type, data, source_provider, captured_at, content_hash)
|
||||
VALUES ($1, $2, 'bar', $3::jsonb, 'polygon_backfill', $4, $5)""",
|
||||
company_id, ticker, bar_json, captured_at, content_hash,
|
||||
)
|
||||
existing_ts.add(bar_ts)
|
||||
inserted += 1
|
||||
|
||||
return {"ticker": ticker, "inserted": inserted, "total_bars": len(bars), "days": days}
|
||||
|
||||
|
||||
@app.post("/api/market/backfill-all")
|
||||
async def backfill_all_market_prices(days: int = Query(default=90, le=365)):
|
||||
"""Backfill daily bars for ALL active companies from Polygon.
|
||||
|
||||
Iterates through all active tickers and calls the per-ticker backfill.
|
||||
Returns a summary of results per ticker.
|
||||
"""
|
||||
api_key = config.market_data.api_key
|
||||
if not api_key:
|
||||
raise HTTPException(503, "No market data API key configured")
|
||||
|
||||
rows = await pool.fetch(
|
||||
"SELECT ticker FROM companies WHERE active = TRUE ORDER BY ticker",
|
||||
)
|
||||
results = []
|
||||
for row in rows:
|
||||
ticker = row["ticker"]
|
||||
try:
|
||||
result = await backfill_market_prices(ticker, days)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
logger.warning("Backfill failed for %s: %s", ticker, e)
|
||||
results.append({"ticker": ticker, "inserted": 0, "error": str(e)})
|
||||
|
||||
total_inserted = sum(r.get("inserted", 0) for r in results)
|
||||
return {"total_inserted": total_inserted, "tickers": len(results), "details": results}
|
||||
|
||||
|
||||
@app.get("/api/trends/{trend_id}")
|
||||
@@ -1061,7 +1184,12 @@ async def get_order(order_id: str):
|
||||
async def list_positions(
|
||||
ticker: Optional[str] = None,
|
||||
):
|
||||
"""List current positions."""
|
||||
"""List current positions with Polygon market prices overlaid.
|
||||
|
||||
The current_price from the broker (Alpaca paper) can be stale or
|
||||
inaccurate. We overlay the latest close from market_snapshots
|
||||
(Polygon daily bars) and recompute unrealized P&L from that.
|
||||
"""
|
||||
if ticker:
|
||||
rows = await pool.fetch(
|
||||
"""SELECT p.id, p.broker_account_id, p.ticker, p.quantity,
|
||||
@@ -1077,7 +1205,28 @@ async def list_positions(
|
||||
p.unrealized_pnl, p.realized_pnl, p.updated_at
|
||||
FROM positions p ORDER BY p.ticker""",
|
||||
)
|
||||
return [_row_to_dict(r) for r in rows]
|
||||
|
||||
# Enrich with latest Polygon close for comparison.
|
||||
# Use whichever price is more recent: broker sync or Polygon bar.
|
||||
tickers = list({r["ticker"] for r in rows})
|
||||
price_map: dict[str, float] = {}
|
||||
if tickers:
|
||||
price_rows = await pool.fetch(
|
||||
"""SELECT DISTINCT ON (ticker) ticker, (data->>'c')::float AS close
|
||||
FROM market_snapshots
|
||||
WHERE ticker = ANY($1) AND snapshot_type = 'bar'
|
||||
ORDER BY ticker, captured_at DESC""",
|
||||
tickers,
|
||||
)
|
||||
price_map = {r["ticker"]: r["close"] for r in price_rows if r["close"]}
|
||||
|
||||
results = []
|
||||
for r in rows:
|
||||
d = _row_to_dict(r)
|
||||
polygon_price = price_map.get(d["ticker"])
|
||||
d["polygon_price"] = polygon_price
|
||||
results.append(d)
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1948,7 +2097,7 @@ async def retry_failed_extractions_endpoint():
|
||||
# Pipeline On/Off Toggle
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PIPELINE_ENABLED_KEY = f"{PREFIX}:pipeline:enabled"
|
||||
_PIPELINE_ENABLED_KEY = PIPELINE_ENABLED_KEY
|
||||
|
||||
|
||||
@app.get("/api/ops/pipeline/toggle")
|
||||
@@ -1966,10 +2115,33 @@ async def set_pipeline_toggle(body: dict[str, Any]):
|
||||
|
||||
Accepts: { "enabled": true/false }
|
||||
Workers check this flag before processing jobs.
|
||||
When disabling, optionally flush all pipeline queues so in-flight
|
||||
work stops immediately.
|
||||
"""
|
||||
enabled = body.get("enabled", True)
|
||||
flush = body.get("flush", not enabled) # default: flush when disabling
|
||||
await rds.set(_PIPELINE_ENABLED_KEY, "1" if enabled else "0")
|
||||
return {"pipeline_enabled": enabled, "message": f"Pipeline {'enabled' if enabled else 'disabled'}"}
|
||||
|
||||
flushed_counts: dict[str, int] = {}
|
||||
if flush and not enabled:
|
||||
from services.shared.redis_keys import QUEUE_PREFIX
|
||||
# Flush all pipeline queues
|
||||
queue_names = [
|
||||
"ingestion", "parsing", "extraction", "macro_classification",
|
||||
"aggregation", "recommendation", "lake_publish",
|
||||
]
|
||||
for qname in queue_names:
|
||||
qkey = f"{QUEUE_PREFIX}:{qname}"
|
||||
count = await rds.llen(qkey)
|
||||
if count > 0:
|
||||
await rds.delete(qkey)
|
||||
flushed_counts[qname] = count
|
||||
|
||||
msg = f"Pipeline {'enabled' if enabled else 'disabled'}"
|
||||
if flushed_counts:
|
||||
total = sum(flushed_counts.values())
|
||||
msg += f" — flushed {total} queued jobs"
|
||||
return {"pipeline_enabled": enabled, "flushed": flushed_counts, "message": msg}
|
||||
|
||||
|
||||
@app.get("/api/ops/sources/coverage-gaps")
|
||||
@@ -3602,3 +3774,445 @@ async def get_variant_performance_history(
|
||||
agent_id, variant_id, hours,
|
||||
)
|
||||
return [_row_to_dict(r) for r in rows]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model Validation Dashboard (Requirements 12.1, 12.2, 12.3, 12.7)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_VALID_LOOKBACKS = {"7d", "30d", "90d", "all"}
|
||||
_VALID_HORIZONS = {"1h", "6h", "1d", "7d", "30d"}
|
||||
|
||||
|
||||
@app.get("/api/validation/summary")
|
||||
async def get_validation_summary(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Latest model metric snapshot plus quality gate status.
|
||||
|
||||
Returns the most recent model_metric_snapshot for the given
|
||||
lookback/horizon combination, along with the current gate status
|
||||
from risk_configs.
|
||||
|
||||
Requirement 12.1
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
# Latest metric snapshot for the requested lookback/horizon
|
||||
snapshot_row = await pool.fetchrow(
|
||||
"""SELECT id, generated_at, lookback_window, horizon,
|
||||
prediction_count, win_rate, directional_accuracy,
|
||||
information_coefficient, rank_information_coefficient,
|
||||
avg_return, avg_excess_return_vs_spy, avg_excess_return_vs_sector,
|
||||
calibration_error, brier_score,
|
||||
buy_win_rate, sell_win_rate, hold_win_rate,
|
||||
metadata
|
||||
FROM model_metric_snapshots
|
||||
WHERE lookback_window = $1 AND horizon = $2
|
||||
ORDER BY generated_at DESC
|
||||
LIMIT 1""",
|
||||
lookback, horizon,
|
||||
)
|
||||
|
||||
snapshot = None
|
||||
if snapshot_row:
|
||||
snapshot = _row_to_dict(snapshot_row)
|
||||
snapshot["metadata"] = _parse_jsonb(snapshot.get("metadata"))
|
||||
|
||||
# Gate status from risk_configs
|
||||
gate_row = await pool.fetchrow(
|
||||
"SELECT config, updated_at FROM risk_configs WHERE name = 'model_quality_gate'",
|
||||
)
|
||||
gate_status = None
|
||||
if gate_row:
|
||||
gate_status = _parse_jsonb(gate_row["config"])
|
||||
|
||||
return {
|
||||
"snapshot": snapshot,
|
||||
"gate_status": gate_status,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/calibration")
|
||||
async def get_validation_calibration(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Calibration table with confidence buckets.
|
||||
|
||||
Queries v_prediction_performance for the given lookback/horizon,
|
||||
groups by confidence buckets, and computes avg_confidence,
|
||||
observed_win_rate, count, and miscalibrated flag per bucket.
|
||||
|
||||
Requirement 12.2
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
# Build lookback filter
|
||||
lookback_condition = ""
|
||||
params: list[Any] = [horizon]
|
||||
idx = 2
|
||||
|
||||
if lookback != "all":
|
||||
lookback_days = {"7d": 7, "30d": 30, "90d": 90}[lookback]
|
||||
lookback_condition = f"AND generated_at >= NOW() - make_interval(days => ${idx})"
|
||||
params.append(lookback_days)
|
||||
idx += 1
|
||||
|
||||
rows = await pool.fetch(
|
||||
f"""SELECT confidence, direction_correct
|
||||
FROM v_prediction_performance
|
||||
WHERE horizon = $1
|
||||
{lookback_condition}
|
||||
AND confidence IS NOT NULL""",
|
||||
*params,
|
||||
)
|
||||
|
||||
# Group into calibration buckets
|
||||
buckets_def = [
|
||||
(0.50, 0.60),
|
||||
(0.60, 0.70),
|
||||
(0.70, 0.80),
|
||||
(0.80, 0.90),
|
||||
(0.90, 1.00),
|
||||
]
|
||||
|
||||
buckets = []
|
||||
for low, high in buckets_def:
|
||||
bucket_rows = []
|
||||
for r in rows:
|
||||
conf = float(r["confidence"])
|
||||
if high == 1.00:
|
||||
in_bucket = low <= conf <= high
|
||||
else:
|
||||
in_bucket = low <= conf < high
|
||||
if in_bucket:
|
||||
bucket_rows.append(r)
|
||||
|
||||
count = len(bucket_rows)
|
||||
if count == 0:
|
||||
buckets.append({
|
||||
"bucket_low": low,
|
||||
"bucket_high": high,
|
||||
"avg_confidence": 0.0,
|
||||
"observed_win_rate": 0.0,
|
||||
"prediction_count": 0,
|
||||
"miscalibrated": False,
|
||||
})
|
||||
continue
|
||||
|
||||
avg_conf = sum(float(r["confidence"]) for r in bucket_rows) / count
|
||||
win_count = sum(1 for r in bucket_rows if r["direction_correct"] is True)
|
||||
win_rate = win_count / count
|
||||
diff = abs(avg_conf - win_rate)
|
||||
|
||||
buckets.append({
|
||||
"bucket_low": low,
|
||||
"bucket_high": high,
|
||||
"avg_confidence": round(avg_conf, 4),
|
||||
"observed_win_rate": round(win_rate, 4),
|
||||
"prediction_count": count,
|
||||
"miscalibrated": diff > 0.15,
|
||||
})
|
||||
|
||||
return {"buckets": buckets, "lookback": lookback, "horizon": horizon}
|
||||
|
||||
|
||||
@app.get("/api/validation/ic-by-horizon")
|
||||
async def get_validation_ic_by_horizon(
|
||||
lookback: str = Query(default="30d"),
|
||||
):
|
||||
"""IC and Rank IC per prediction horizon.
|
||||
|
||||
Queries the most recent model_metric_snapshot for the given lookback
|
||||
across all 5 horizons, returning IC and Rank IC for each.
|
||||
|
||||
Requirement 12.3
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
|
||||
rows = await pool.fetch(
|
||||
"""SELECT DISTINCT ON (horizon)
|
||||
horizon,
|
||||
information_coefficient,
|
||||
rank_information_coefficient,
|
||||
prediction_count,
|
||||
generated_at
|
||||
FROM model_metric_snapshots
|
||||
WHERE lookback_window = $1
|
||||
ORDER BY horizon, generated_at DESC""",
|
||||
lookback,
|
||||
)
|
||||
|
||||
horizons = []
|
||||
for r in rows:
|
||||
horizons.append({
|
||||
"horizon": r["horizon"],
|
||||
"information_coefficient": float(r["information_coefficient"]) if r["information_coefficient"] is not None else None,
|
||||
"rank_information_coefficient": float(r["rank_information_coefficient"]) if r["rank_information_coefficient"] is not None else None,
|
||||
"prediction_count": r["prediction_count"],
|
||||
"generated_at": r["generated_at"].isoformat() if r["generated_at"] else None,
|
||||
})
|
||||
|
||||
# Sort by canonical horizon order
|
||||
horizon_order = {"1h": 0, "6h": 1, "1d": 2, "7d": 3, "30d": 4}
|
||||
horizons.sort(key=lambda h: horizon_order.get(h["horizon"], 99))
|
||||
|
||||
return {"horizons": horizons, "lookback": lookback}
|
||||
|
||||
|
||||
@app.get("/api/validation/gate-status")
|
||||
async def get_validation_gate_status():
|
||||
"""Quality gate evaluation detail.
|
||||
|
||||
Returns the stored gate evaluation result from risk_configs
|
||||
where key = 'model_quality_gate'.
|
||||
|
||||
Requirement 12.7
|
||||
"""
|
||||
gate_row = await pool.fetchrow(
|
||||
"SELECT config, updated_at FROM risk_configs WHERE name = 'model_quality_gate'",
|
||||
)
|
||||
|
||||
if not gate_row:
|
||||
return {
|
||||
"gate_status": None,
|
||||
"message": "No gate evaluation found. Model metrics may not have been computed yet.",
|
||||
}
|
||||
|
||||
gate_data = _parse_jsonb(gate_row["config"])
|
||||
updated_at = gate_row["updated_at"].isoformat() if gate_row.get("updated_at") else None
|
||||
|
||||
return {
|
||||
"gate_status": gate_data,
|
||||
"updated_at": updated_at,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Attribution Endpoints (Requirements 12.4, 12.5, 12.6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_LOOKBACK_TO_DAYS: dict[str, int] = {
|
||||
"7d": 7,
|
||||
"30d": 30,
|
||||
"90d": 90,
|
||||
"all": 3650,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/attribution/sources")
|
||||
async def get_validation_attribution_sources(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Per-source performance metrics.
|
||||
|
||||
Returns win rate, IC, average return, duplicate rate, and other
|
||||
attribution metrics for each source, computed over the given
|
||||
lookback window and prediction horizon.
|
||||
|
||||
Requirement 12.4
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
lookback_days = _LOOKBACK_TO_DAYS[lookback]
|
||||
|
||||
try:
|
||||
results = await compute_source_attribution(pool, lookback_days=lookback_days, horizon=horizon)
|
||||
except Exception:
|
||||
logger.exception("Failed to compute source attribution")
|
||||
raise HTTPException(500, "Failed to compute source attribution")
|
||||
|
||||
return {
|
||||
"sources": [asdict(r) for r in results],
|
||||
"lookback": lookback,
|
||||
"horizon": horizon,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/attribution/catalysts")
|
||||
async def get_validation_attribution_catalysts(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Per-catalyst-type performance metrics.
|
||||
|
||||
Returns win rate, IC, average return, and other attribution metrics
|
||||
for each catalyst type, computed over the given lookback window
|
||||
and prediction horizon.
|
||||
|
||||
Requirement 12.5
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
lookback_days = _LOOKBACK_TO_DAYS[lookback]
|
||||
|
||||
try:
|
||||
results = await compute_catalyst_attribution(pool, lookback_days=lookback_days, horizon=horizon)
|
||||
except Exception:
|
||||
logger.exception("Failed to compute catalyst attribution")
|
||||
raise HTTPException(500, "Failed to compute catalyst attribution")
|
||||
|
||||
return {
|
||||
"catalysts": [asdict(r) for r in results],
|
||||
"lookback": lookback,
|
||||
"horizon": horizon,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/validation/attribution/layers")
|
||||
async def get_validation_attribution_layers(
|
||||
lookback: str = Query(default="30d"),
|
||||
horizon: str = Query(default="7d"),
|
||||
):
|
||||
"""Per-signal-layer (company, macro, competitive) performance metrics.
|
||||
|
||||
Returns average contribution percentage, dominant win rate, and
|
||||
dominant IC for each of the three signal layers, computed over
|
||||
the given lookback window and prediction horizon.
|
||||
|
||||
Requirement 12.6
|
||||
"""
|
||||
if lookback not in _VALID_LOOKBACKS:
|
||||
raise HTTPException(400, f"Invalid lookback: {lookback}. Must be one of {sorted(_VALID_LOOKBACKS)}")
|
||||
if horizon not in _VALID_HORIZONS:
|
||||
raise HTTPException(400, f"Invalid horizon: {horizon}. Must be one of {sorted(_VALID_HORIZONS)}")
|
||||
|
||||
lookback_days = _LOOKBACK_TO_DAYS[lookback]
|
||||
|
||||
try:
|
||||
results = await compute_layer_attribution(pool, lookback_days=lookback_days, horizon=horizon)
|
||||
except Exception:
|
||||
logger.exception("Failed to compute layer attribution")
|
||||
raise HTTPException(500, "Failed to compute layer attribution")
|
||||
|
||||
return {
|
||||
"layers": [asdict(r) for r in results],
|
||||
"lookback": lookback,
|
||||
"horizon": horizon,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Trading Reports
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.get("/api/reports")
|
||||
async def list_reports(
|
||||
report_type: Optional[str] = None,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
limit: int = Query(default=20, le=100),
|
||||
offset: int = Query(default=0, ge=0),
|
||||
):
|
||||
"""Paginated list of trading reports with optional filtering.
|
||||
|
||||
Query params:
|
||||
- report_type: 'daily' or 'weekly'
|
||||
- start_date: ISO date (YYYY-MM-DD) — filter period_start >= this
|
||||
- end_date: ISO date (YYYY-MM-DD) — filter period_end <= this
|
||||
- limit: max results (default 20, max 100)
|
||||
- offset: pagination offset (default 0)
|
||||
|
||||
Requirements: 5.4, 5.5, 5.6
|
||||
"""
|
||||
conditions: list[str] = []
|
||||
params: list[Any] = []
|
||||
idx = 1
|
||||
|
||||
if report_type:
|
||||
if report_type not in ("daily", "weekly"):
|
||||
raise HTTPException(400, "report_type must be 'daily' or 'weekly'")
|
||||
conditions.append(f"report_type = ${idx}")
|
||||
params.append(report_type)
|
||||
idx += 1
|
||||
|
||||
if start_date:
|
||||
try:
|
||||
from datetime import date as _date
|
||||
_date.fromisoformat(start_date)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "start_date must be YYYY-MM-DD")
|
||||
conditions.append(f"period_start >= ${idx}::date")
|
||||
params.append(start_date)
|
||||
idx += 1
|
||||
|
||||
if end_date:
|
||||
try:
|
||||
from datetime import date as _date
|
||||
_date.fromisoformat(end_date)
|
||||
except ValueError:
|
||||
raise HTTPException(400, "end_date must be YYYY-MM-DD")
|
||||
conditions.append(f"period_end <= ${idx}::date")
|
||||
params.append(end_date)
|
||||
idx += 1
|
||||
|
||||
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||
|
||||
query = f"""
|
||||
SELECT id, report_type, period_start, period_end,
|
||||
validation_status, generated_at
|
||||
FROM trading_reports
|
||||
{where}
|
||||
ORDER BY generated_at DESC
|
||||
LIMIT ${idx} OFFSET ${idx + 1}
|
||||
"""
|
||||
params.extend([limit, offset])
|
||||
|
||||
rows = await pool.fetch(query, *params)
|
||||
return [
|
||||
{
|
||||
"id": str(r["id"]),
|
||||
"report_type": r["report_type"],
|
||||
"period_start": r["period_start"].isoformat(),
|
||||
"period_end": r["period_end"].isoformat(),
|
||||
"validation_status": r["validation_status"],
|
||||
"generated_at": r["generated_at"].isoformat(),
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
@app.get("/api/reports/{report_id}")
|
||||
async def get_report(report_id: str):
|
||||
"""Fetch a single report including full report_data JSONB.
|
||||
|
||||
Requirements: 5.4, 5.5
|
||||
"""
|
||||
row = await pool.fetchrow(
|
||||
"""SELECT id, report_type, period_start, period_end,
|
||||
report_data, validation_status, generated_at, created_at
|
||||
FROM trading_reports
|
||||
WHERE id = $1::uuid""",
|
||||
report_id,
|
||||
)
|
||||
if row is None:
|
||||
raise HTTPException(404, "Report not found")
|
||||
|
||||
return {
|
||||
"id": str(row["id"]),
|
||||
"report_type": row["report_type"],
|
||||
"period_start": row["period_start"].isoformat(),
|
||||
"period_end": row["period_end"].isoformat(),
|
||||
"report_data": json.loads(row["report_data"]) if isinstance(row["report_data"], str) else row["report_data"],
|
||||
"validation_status": row["validation_status"],
|
||||
"generated_at": row["generated_at"].isoformat(),
|
||||
"created_at": row["created_at"].isoformat(),
|
||||
}
|
||||
|
||||
@@ -140,6 +140,11 @@ class OllamaClient:
|
||||
max_retries: int | None = None,
|
||||
http_client: httpx.AsyncClient | None = None,
|
||||
) -> None:
|
||||
if not config.base_url or not config.base_url.startswith(("http://", "https://")):
|
||||
raise ValueError(
|
||||
f"OllamaClient requires a valid base_url (got {config.base_url!r}). "
|
||||
"Set OLLAMA_BASE_URL environment variable."
|
||||
)
|
||||
self._config = config
|
||||
self._max_retries = max_retries if max_retries is not None else config.max_retries
|
||||
self._base_delay = config.retry_base_delay
|
||||
|
||||
+44
-23
@@ -27,6 +27,7 @@ from services.shared.redis_keys import (
|
||||
QUEUE_AGGREGATION,
|
||||
QUEUE_EXTRACTION,
|
||||
QUEUE_MACRO_CLASSIFICATION,
|
||||
is_pipeline_enabled,
|
||||
queue_key,
|
||||
)
|
||||
|
||||
@@ -421,6 +422,10 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(redis_client):
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
# Alternate: every 3rd job from macro queue, rest from extraction
|
||||
# This prevents macro events from starving regular extractions
|
||||
raw = None
|
||||
@@ -486,17 +491,25 @@ async def main() -> None:
|
||||
model_changed = new_cfg.model != extractor_client._config.model
|
||||
|
||||
if provider_changed or model_changed:
|
||||
logger.info(
|
||||
"Extractor provider switch: old_provider=%s new_provider=%s "
|
||||
"model=%s variant=%s",
|
||||
old_provider, new_provider,
|
||||
new_resolved.model_name, new_resolved.variant_id,
|
||||
)
|
||||
await extractor_client.close()
|
||||
extractor_client = build_llm_client(
|
||||
new_resolved, config.ollama, config.vllm,
|
||||
)
|
||||
extractor_provider = new_provider
|
||||
# Guard: don't switch to ollama if base_url is empty
|
||||
if new_provider == "ollama" and not config.ollama.base_url:
|
||||
logger.warning(
|
||||
"DB resolved provider=ollama but OLLAMA_BASE_URL is empty — "
|
||||
"keeping current %s client. Fix the agent config in the UI.",
|
||||
extractor_provider,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Extractor provider switch: old_provider=%s new_provider=%s "
|
||||
"model=%s variant=%s",
|
||||
old_provider, new_provider,
|
||||
new_resolved.model_name, new_resolved.variant_id,
|
||||
)
|
||||
await extractor_client.close()
|
||||
extractor_client = build_llm_client(
|
||||
new_resolved, config.ollama, config.vllm,
|
||||
)
|
||||
extractor_provider = new_provider
|
||||
else:
|
||||
# Same provider and model — just update config in-place
|
||||
extractor_client._config = new_cfg # type: ignore[assignment]
|
||||
@@ -517,18 +530,26 @@ async def main() -> None:
|
||||
cls_model_changed = new_cls_cfg.model != classifier_client._config.model
|
||||
|
||||
if cls_provider_changed or cls_model_changed:
|
||||
logger.info(
|
||||
"Classifier provider switch: old_provider=%s new_provider=%s "
|
||||
"model=%s variant=%s",
|
||||
old_cls_provider, new_cls_provider,
|
||||
new_cls_resolved.model_name, new_cls_resolved.variant_id,
|
||||
)
|
||||
if classifier_client is not extractor_client:
|
||||
await classifier_client.close()
|
||||
classifier_client = build_llm_client(
|
||||
new_cls_resolved, config.ollama, config.vllm,
|
||||
)
|
||||
classifier_provider = new_cls_provider
|
||||
# Guard: don't switch to ollama if base_url is empty
|
||||
if new_cls_provider == "ollama" and not config.ollama.base_url:
|
||||
logger.warning(
|
||||
"DB resolved classifier provider=ollama but OLLAMA_BASE_URL is empty — "
|
||||
"keeping current %s client. Fix the agent config in the UI.",
|
||||
classifier_provider,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Classifier provider switch: old_provider=%s new_provider=%s "
|
||||
"model=%s variant=%s",
|
||||
old_cls_provider, new_cls_provider,
|
||||
new_cls_resolved.model_name, new_cls_resolved.variant_id,
|
||||
)
|
||||
if classifier_client is not extractor_client:
|
||||
await classifier_client.close()
|
||||
classifier_client = build_llm_client(
|
||||
new_cls_resolved, config.ollama, config.vllm,
|
||||
)
|
||||
classifier_provider = new_cls_provider
|
||||
elif classifier_client is extractor_client and new_cls_cfg.model != extractor_client._config.model:
|
||||
classifier_client = build_llm_client(
|
||||
new_cls_resolved, config.ollama, config.vllm,
|
||||
|
||||
@@ -41,6 +41,7 @@ from services.shared.redis_keys import (
|
||||
QUEUE_INGESTION,
|
||||
QUEUE_PARSING,
|
||||
dedupe_key,
|
||||
is_pipeline_enabled,
|
||||
queue_key,
|
||||
)
|
||||
from services.shared.storage import (
|
||||
@@ -265,6 +266,9 @@ async def main():
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(rds):
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
raw = await rds.lpop(queue)
|
||||
if raw:
|
||||
job = json.loads(raw)
|
||||
|
||||
@@ -54,7 +54,7 @@ from services.lake_publisher.worker import (
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_minio, get_pg_pool, get_redis
|
||||
from services.shared.logging import setup_logging
|
||||
from services.shared.redis_keys import QUEUE_LAKE_PUBLISH, queue_key
|
||||
from services.shared.redis_keys import QUEUE_LAKE_PUBLISH, is_pipeline_enabled, queue_key
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -865,6 +865,9 @@ async def run_worker(
|
||||
logger.info("Lake publisher worker started, listening on %s", queue)
|
||||
|
||||
while True:
|
||||
if not await is_pipeline_enabled(rds):
|
||||
await asyncio.sleep(poll_interval)
|
||||
continue
|
||||
raw = await rds.lpop(queue) # type: ignore[misc]
|
||||
if raw is None:
|
||||
await asyncio.sleep(poll_interval)
|
||||
|
||||
@@ -35,7 +35,13 @@ from services.shared.metrics import (
|
||||
PARSE_LOW_QUALITY_TOTAL,
|
||||
PARSE_QUALITY_SCORE,
|
||||
)
|
||||
from services.shared.redis_keys import QUEUE_EXTRACTION, QUEUE_MACRO_CLASSIFICATION, QUEUE_PARSING, queue_key
|
||||
from services.shared.redis_keys import (
|
||||
QUEUE_EXTRACTION,
|
||||
QUEUE_MACRO_CLASSIFICATION,
|
||||
QUEUE_PARSING,
|
||||
is_pipeline_enabled,
|
||||
queue_key,
|
||||
)
|
||||
from services.shared.storage import upload_normalized_text, upload_parser_output
|
||||
|
||||
logger = logging.getLogger("parser_worker")
|
||||
@@ -260,6 +266,9 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(rds):
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
raw = await rds.lpop(queue)
|
||||
if raw:
|
||||
job = json.loads(raw)
|
||||
|
||||
@@ -9,10 +9,11 @@ Evaluates trend summaries against configurable thresholds to decide:
|
||||
All decisions are rule-based with no model involvement. The LLM is only
|
||||
used downstream for optional thesis wording (a separate task).
|
||||
|
||||
Requirements: 7.1, 7.2, 7.3, 7.4
|
||||
Requirements: 7.1, 7.2, 7.3, 7.4, 14.1, 14.2, 14.3, 14.4, 14.5, 14.6
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
|
||||
@@ -78,6 +79,10 @@ class EligibilityConfig:
|
||||
# Contradiction penalty: higher contradiction → smaller position
|
||||
contradiction_sizing_penalty: float = 0.5
|
||||
|
||||
# --- Expected value gate (Requirement 14) ---
|
||||
# EV threshold: minimum expected value to allow recommendation through
|
||||
ev_threshold: float = 0.005
|
||||
|
||||
|
||||
DEFAULT_ELIGIBILITY_CONFIG = EligibilityConfig()
|
||||
|
||||
@@ -98,6 +103,11 @@ class EligibilityResult:
|
||||
time_horizon: str = ""
|
||||
invalidation_conditions: list[str] = field(default_factory=list)
|
||||
|
||||
# Probabilistic pipeline fields (Req 14.5, 16.2)
|
||||
ev_value: float | None = None
|
||||
p_bull: float | None = None
|
||||
pipeline_mode: str = "heuristic"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gate checks
|
||||
@@ -318,6 +328,57 @@ def _derive_invalidation_conditions(
|
||||
return conditions
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Expected value computation (Requirements: 14.1–14.6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Horizon days mapping for EV computation
|
||||
_EV_HORIZON_DAYS: dict[str, float] = {
|
||||
"intraday": 1.0,
|
||||
"1d": 1.0,
|
||||
"7d": 7.0,
|
||||
"30d": 30.0,
|
||||
"90d": 90.0,
|
||||
}
|
||||
|
||||
|
||||
def compute_expected_value(
|
||||
p_bull: float,
|
||||
strength: float,
|
||||
sigma_20: float,
|
||||
horizon_days: float,
|
||||
) -> float:
|
||||
"""Compute expected value for the recommendation gate.
|
||||
|
||||
Formula:
|
||||
R_up = strength · σ_20 · √(horizon_days)
|
||||
R_down = (1 - strength) · σ_20 · √(horizon_days)
|
||||
EV = P_bull · R_up - P_bear · R_down
|
||||
|
||||
where P_bear = 1 - P_bull.
|
||||
|
||||
Args:
|
||||
p_bull: Bayesian bullish probability in [0, 1].
|
||||
strength: Trend strength in [0, 1].
|
||||
sigma_20: 20-day return standard deviation.
|
||||
horizon_days: Number of days for the projection horizon.
|
||||
|
||||
Returns:
|
||||
Expected value (can be negative).
|
||||
|
||||
Requirements: 14.1, 14.2
|
||||
"""
|
||||
p_bear = 1.0 - p_bull
|
||||
sqrt_horizon = math.sqrt(max(horizon_days, 0.0))
|
||||
r_up = strength * sigma_20 * sqrt_horizon
|
||||
r_down = (1.0 - strength) * sigma_20 * sqrt_horizon
|
||||
ev = p_bull * r_up - p_bear * r_down
|
||||
# Guard against NaN/infinity from extreme inputs
|
||||
if math.isnan(ev) or math.isinf(ev):
|
||||
return 0.0
|
||||
return ev
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -326,6 +387,10 @@ def _derive_invalidation_conditions(
|
||||
def evaluate_eligibility(
|
||||
summary: TrendSummary,
|
||||
config: EligibilityConfig = DEFAULT_ELIGIBILITY_CONFIG,
|
||||
*,
|
||||
probabilistic: bool = False,
|
||||
p_bull: float | None = None,
|
||||
sigma_20: float = 0.01,
|
||||
) -> EligibilityResult:
|
||||
"""Evaluate a trend summary for recommendation eligibility.
|
||||
|
||||
@@ -335,8 +400,27 @@ def evaluate_eligibility(
|
||||
3. Determines the highest allowed execution mode
|
||||
4. Computes position sizing from portfolio rules
|
||||
5. Derives invalidation conditions
|
||||
6. (probabilistic) Applies EV gate: EV > threshold to proceed
|
||||
|
||||
When ``probabilistic=True``:
|
||||
- Computes EV = P_bull · R_up - P_bear · R_down
|
||||
- When EV > threshold (default 0.005), allows recommendation through
|
||||
- When EV ≤ threshold, forces recommendation to informational mode
|
||||
- Populates expected_value, p_bull, pipeline_mode on result
|
||||
|
||||
When ``probabilistic=False``:
|
||||
- Skips EV gate entirely (existing behavior)
|
||||
|
||||
Args:
|
||||
summary: The current trend summary.
|
||||
config: Eligibility configuration thresholds.
|
||||
probabilistic: Use EV gate when True.
|
||||
p_bull: Bayesian bullish probability (required when probabilistic=True).
|
||||
sigma_20: 20-day return standard deviation for EV computation.
|
||||
|
||||
Returns an EligibilityResult with the full decision trace.
|
||||
|
||||
Requirements: 14.1, 14.2, 14.3, 14.4, 14.5, 14.6
|
||||
"""
|
||||
rejection_reasons = _check_gates(summary, config)
|
||||
|
||||
@@ -353,6 +437,21 @@ def evaluate_eligibility(
|
||||
if not eligible:
|
||||
mode = RecommendationMode.INFORMATIONAL
|
||||
|
||||
# EV gate (Requirement 14.1–14.6)
|
||||
ev_value: float | None = None
|
||||
if probabilistic and p_bull is not None:
|
||||
horizon_days = _EV_HORIZON_DAYS.get(summary.window.value, 7.0)
|
||||
ev_value = compute_expected_value(
|
||||
p_bull=p_bull,
|
||||
strength=summary.trend_strength,
|
||||
sigma_20=sigma_20,
|
||||
horizon_days=horizon_days,
|
||||
)
|
||||
|
||||
if ev_value <= config.ev_threshold:
|
||||
# Force to informational mode (Req 14.4)
|
||||
mode = RecommendationMode.INFORMATIONAL
|
||||
|
||||
return EligibilityResult(
|
||||
eligible=eligible,
|
||||
action=action,
|
||||
@@ -361,4 +460,7 @@ def evaluate_eligibility(
|
||||
rejection_reasons=rejection_reasons,
|
||||
time_horizon=horizon,
|
||||
invalidation_conditions=invalidation,
|
||||
ev_value=ev_value,
|
||||
p_bull=p_bull if probabilistic else None,
|
||||
pipeline_mode="probabilistic" if probabilistic else "heuristic",
|
||||
)
|
||||
|
||||
@@ -12,7 +12,7 @@ from services.recommendation.worker import generate_recommendation
|
||||
from services.shared.agent_config import AgentConfigResolver
|
||||
from services.shared.config import OllamaConfig, load_config
|
||||
from services.shared.logging import setup_logging
|
||||
from services.shared.redis_keys import QUEUE_RECOMMENDATION, queue_key
|
||||
from services.shared.redis_keys import QUEUE_RECOMMENDATION, is_pipeline_enabled, queue_key
|
||||
|
||||
logger = logging.getLogger("recommendation_main")
|
||||
|
||||
@@ -62,6 +62,10 @@ async def main() -> None:
|
||||
|
||||
try:
|
||||
while True:
|
||||
if not await is_pipeline_enabled(redis_client):
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
raw = await redis_client.lpop(queue)
|
||||
if raw is None:
|
||||
await asyncio.sleep(1)
|
||||
|
||||
@@ -20,7 +20,7 @@ import asyncpg
|
||||
import httpx
|
||||
|
||||
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
|
||||
from services.shared.config import OllamaConfig
|
||||
from services.shared.config import OllamaConfig, VLLMConfig
|
||||
from services.shared.schemas import TrendSummary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -37,7 +37,8 @@ STRICT RULES:
|
||||
3. Keep the rewrite under 150 words.
|
||||
4. Preserve all factual claims, risk notes, and evidence counts from the input.
|
||||
5. Use a neutral, professional tone. Avoid hype or marketing language.
|
||||
6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary."""
|
||||
6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary.
|
||||
7. Do NOT show your thinking process. Do NOT include any reasoning steps. Output ONLY the final rewritten text."""
|
||||
|
||||
|
||||
def build_thesis_rewrite_prompt(
|
||||
@@ -75,7 +76,7 @@ Rewrite the following structured thesis into clear, professional analyst prose.
|
||||
{context_block}
|
||||
--- END CONTEXT ---
|
||||
|
||||
Return ONLY the rewritten thesis. No other text."""
|
||||
Return ONLY the rewritten thesis. No other text. /no_think"""
|
||||
|
||||
return {
|
||||
"system": THESIS_SYSTEM_PROMPT,
|
||||
@@ -115,26 +116,42 @@ async def rewrite_thesis_with_llm(
|
||||
|
||||
# Resolve thesis-rewriter config from DB for variant override
|
||||
resolved: ResolvedAgentConfig | None = None
|
||||
effective_config = config
|
||||
effective_config: OllamaConfig | VLLMConfig = config
|
||||
use_vllm = False
|
||||
if pool is not None:
|
||||
try:
|
||||
resolver = AgentConfigResolver(pool, ttl_seconds=60)
|
||||
resolved = await resolver.resolve("thesis-rewriter")
|
||||
if resolved is not None:
|
||||
effective_config = OllamaConfig(
|
||||
base_url=config.base_url,
|
||||
model=resolved.model_name,
|
||||
timeout=resolved.timeout_seconds,
|
||||
max_retries=resolved.max_retries,
|
||||
retry_base_delay=config.retry_base_delay,
|
||||
retry_max_delay=config.retry_max_delay,
|
||||
retry_backoff_multiplier=config.retry_backoff_multiplier,
|
||||
max_tokens=resolved.max_tokens,
|
||||
context_window=resolved.context_window,
|
||||
)
|
||||
provider = (resolved.model_provider or "").strip().lower()
|
||||
if provider == "vllm":
|
||||
use_vllm = True
|
||||
# Import load_config to get vllm base_url from env
|
||||
from services.shared.config import load_config as _load_config
|
||||
_cfg = _load_config()
|
||||
effective_config = VLLMConfig(
|
||||
base_url=_cfg.vllm.base_url,
|
||||
model=resolved.model_name,
|
||||
timeout=resolved.timeout_seconds,
|
||||
max_retries=resolved.max_retries,
|
||||
max_tokens=resolved.max_tokens,
|
||||
temperature=0.0,
|
||||
)
|
||||
else:
|
||||
effective_config = OllamaConfig(
|
||||
base_url=config.base_url,
|
||||
model=resolved.model_name,
|
||||
timeout=resolved.timeout_seconds,
|
||||
max_retries=resolved.max_retries,
|
||||
retry_base_delay=config.retry_base_delay,
|
||||
retry_max_delay=config.retry_max_delay,
|
||||
retry_backoff_multiplier=config.retry_backoff_multiplier,
|
||||
max_tokens=resolved.max_tokens,
|
||||
context_window=resolved.context_window,
|
||||
)
|
||||
logger.info(
|
||||
"Thesis rewriter using resolved config: model=%s variant=%s",
|
||||
resolved.model_name, resolved.variant_id,
|
||||
"Thesis rewriter using resolved config: model=%s variant=%s provider=%s",
|
||||
resolved.model_name, resolved.variant_id, provider or "ollama",
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
@@ -177,7 +194,10 @@ async def rewrite_thesis_with_llm(
|
||||
client = http_client or httpx.AsyncClient(timeout=effective_config.timeout)
|
||||
|
||||
try:
|
||||
rewritten = await _call_ollama_thesis(client, effective_config, prompts)
|
||||
if use_vllm:
|
||||
rewritten = await _call_vllm_thesis(client, effective_config, prompts) # type: ignore[arg-type]
|
||||
else:
|
||||
rewritten = await _call_ollama_thesis(client, effective_config, prompts) # type: ignore[arg-type]
|
||||
duration_ms = int((time.monotonic() - start_time) * 1000)
|
||||
|
||||
if rewritten:
|
||||
@@ -296,8 +316,16 @@ async def _call_ollama_thesis(
|
||||
}
|
||||
|
||||
# Support context_window override via num_ctx (Requirement 10.4)
|
||||
options: dict[str, object] = {}
|
||||
if config.context_window > 0:
|
||||
payload["options"] = {"num_ctx": config.context_window}
|
||||
options["num_ctx"] = config.context_window
|
||||
# Disable thinking/reasoning mode for models that support it (e.g. Qwen3)
|
||||
options["num_predict"] = options.get("num_predict", 512)
|
||||
if options:
|
||||
payload["options"] = options
|
||||
|
||||
# Qwen3 thinking mode control: /no_think suffix or think parameter
|
||||
payload["think"] = False
|
||||
|
||||
resp = await client.post(
|
||||
f"{config.base_url}/api/chat",
|
||||
@@ -317,4 +345,94 @@ async def _call_ollama_thesis(
|
||||
len(content),
|
||||
)
|
||||
|
||||
return content.strip()
|
||||
return _strip_thinking_block(content.strip())
|
||||
|
||||
|
||||
def _strip_thinking_block(text: str) -> str:
|
||||
"""Remove thinking/reasoning blocks from model output.
|
||||
|
||||
Some models (e.g. Qwen) emit chain-of-thought either in <think> XML tags
|
||||
or as plain-text "Thinking Process:" blocks before the actual response.
|
||||
This strips both patterns to return only the final thesis text.
|
||||
"""
|
||||
import re
|
||||
# Remove <think>...</think> blocks (greedy, handles multiline)
|
||||
cleaned = re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL)
|
||||
# Handle unclosed <think> tag (model cut off mid-thought)
|
||||
cleaned = re.sub(r"<think>.*", "", cleaned, flags=re.DOTALL)
|
||||
# Remove plain-text "Thinking Process:" blocks followed by the actual thesis
|
||||
# Pattern: everything from "Thinking Process:" up to "</think>" or the final
|
||||
# clean thesis (identified by the last paragraph that doesn't start with numbering/bullets)
|
||||
cleaned = re.sub(
|
||||
r"(?:Thinking Process:|Thought Process:|Chain of Thought:).*?(?=\n[A-Z]{2,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?))",
|
||||
"",
|
||||
cleaned,
|
||||
flags=re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
# Fallback: if "Thinking Process:" still present, take only text after last "</think>" or
|
||||
# after the thinking block ends (heuristic: last substantial paragraph)
|
||||
if "thinking process:" in cleaned.lower():
|
||||
# Find the actual thesis — it's typically the last coherent paragraph
|
||||
# that starts with a ticker symbol pattern
|
||||
match = re.search(
|
||||
r"\n([A-Z]{1,5}\s+(?:shows?|demonstrates?|exhibits?|displays?|maintains?)\s.+)",
|
||||
cleaned,
|
||||
flags=re.DOTALL,
|
||||
)
|
||||
if match:
|
||||
cleaned = match.group(1)
|
||||
return cleaned.strip()
|
||||
|
||||
|
||||
async def _call_vllm_thesis(
|
||||
client: httpx.AsyncClient,
|
||||
config: VLLMConfig,
|
||||
prompts: dict[str, str],
|
||||
) -> str:
|
||||
"""Make a vLLM chat completion call for thesis rewriting.
|
||||
|
||||
Uses the OpenAI-compatible /v1/chat/completions endpoint.
|
||||
Returns the model's text response, or empty string on failure.
|
||||
"""
|
||||
start = time.monotonic()
|
||||
|
||||
payload: dict[str, object] = {
|
||||
"model": config.model,
|
||||
"messages": [
|
||||
{"role": "system", "content": prompts["system"]},
|
||||
{"role": "user", "content": prompts["user"]},
|
||||
],
|
||||
"max_tokens": config.max_tokens,
|
||||
"temperature": config.temperature,
|
||||
"stream": False,
|
||||
# Disable thinking/reasoning mode for Qwen3 models on vLLM
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
}
|
||||
|
||||
headers: dict[str, str] = {"Content-Type": "application/json"}
|
||||
if config.api_key:
|
||||
headers["Authorization"] = f"Bearer {config.api_key}"
|
||||
|
||||
resp = await client.post(
|
||||
f"{config.base_url}/v1/chat/completions",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
_ = resp.raise_for_status()
|
||||
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
|
||||
body: dict[str, object] = resp.json()
|
||||
choices = body.get("choices", [])
|
||||
content: str = ""
|
||||
if choices and isinstance(choices, list):
|
||||
msg = choices[0].get("message", {}) # type: ignore[union-attr]
|
||||
content = msg.get("content", "") if isinstance(msg, dict) else ""
|
||||
|
||||
logger.debug(
|
||||
"vLLM thesis call completed in %dms, response length=%d",
|
||||
duration_ms,
|
||||
len(content),
|
||||
)
|
||||
|
||||
return _strip_thinking_block(content.strip())
|
||||
|
||||
@@ -48,6 +48,7 @@ from services.shared.schemas import (
|
||||
TrendSummary,
|
||||
TrendWindow,
|
||||
)
|
||||
from services.validation.prediction_snapshot import create_prediction_snapshot
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -606,6 +607,13 @@ async def persist_recommendation(
|
||||
"invalidation_conditions": eligibility_result.invalidation_conditions,
|
||||
"risk_classification": risk_class,
|
||||
}
|
||||
|
||||
# Store probabilistic EV fields in risk_checks JSONB (Req 16.2)
|
||||
if eligibility_result.pipeline_mode == "probabilistic":
|
||||
risk_checks["ev"] = eligibility_result.ev_value
|
||||
risk_checks["p_bull"] = eligibility_result.p_bull
|
||||
risk_checks["pipeline_mode"] = eligibility_result.pipeline_mode
|
||||
risk_checks["ev_threshold"] = 0.005
|
||||
await pool.execute(
|
||||
_INSERT_RISK_EVALUATION,
|
||||
rec_id,
|
||||
@@ -734,6 +742,92 @@ def _map_time_horizon_prefix(window: str) -> str:
|
||||
return mapping.get(window, "window_")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch evidence signals and docs for prediction snapshot (Requirement 1.1)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_EVIDENCE_SIGNALS_QUERY = """
|
||||
SELECT
|
||||
di.document_id::text AS document_id,
|
||||
di.id::text AS signal_id,
|
||||
dir.ticker,
|
||||
d.source_type AS source,
|
||||
d.source_type,
|
||||
dir.catalyst_type,
|
||||
dir.sentiment,
|
||||
dir.impact_score AS impact,
|
||||
di.confidence AS extraction_confidence,
|
||||
di.source_credibility AS weight
|
||||
FROM document_impact_records dir
|
||||
JOIN document_intelligence di ON di.id = dir.intelligence_id
|
||||
JOIN documents d ON d.id = di.document_id
|
||||
WHERE di.document_id = ANY($1::uuid[])
|
||||
AND di.validation_status = 'valid'
|
||||
"""
|
||||
|
||||
_EVIDENCE_DOCS_QUERY = """
|
||||
SELECT
|
||||
d.id::text AS document_id,
|
||||
COALESCE(d.title, '') AS title,
|
||||
COALESCE(d.url, '') AS url
|
||||
FROM documents d
|
||||
WHERE d.id = ANY($1::uuid[])
|
||||
"""
|
||||
|
||||
|
||||
async def _fetch_evidence_for_snapshot(
|
||||
pool: asyncpg.Pool,
|
||||
document_ids: list[str],
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""Fetch evidence signals and document metadata for prediction snapshot.
|
||||
|
||||
Filters out non-UUID document IDs (e.g. synthetic pattern IDs) since
|
||||
they cannot be looked up in the documents table.
|
||||
|
||||
Returns (evidence_signals, evidence_docs).
|
||||
"""
|
||||
# Filter to valid UUIDs only
|
||||
valid_ids: list[str] = []
|
||||
for doc_id in document_ids:
|
||||
try:
|
||||
_uuid.UUID(doc_id)
|
||||
valid_ids.append(doc_id)
|
||||
except (ValueError, AttributeError):
|
||||
continue
|
||||
|
||||
if not valid_ids:
|
||||
return [], []
|
||||
|
||||
signal_rows = await pool.fetch(_EVIDENCE_SIGNALS_QUERY, valid_ids)
|
||||
evidence_signals = [
|
||||
{
|
||||
"document_id": row["document_id"],
|
||||
"signal_id": row["signal_id"],
|
||||
"ticker": row["ticker"] or "",
|
||||
"source": row["source"] or "",
|
||||
"source_type": row["source_type"] or "",
|
||||
"catalyst_type": row["catalyst_type"] or "",
|
||||
"sentiment": row["sentiment"] or "",
|
||||
"impact": float(row["impact"] or 0.0),
|
||||
"extraction_confidence": float(row["extraction_confidence"] or 0.0),
|
||||
"weight": float(row["weight"] or 0.0),
|
||||
}
|
||||
for row in signal_rows
|
||||
]
|
||||
|
||||
doc_rows = await pool.fetch(_EVIDENCE_DOCS_QUERY, valid_ids)
|
||||
evidence_docs = [
|
||||
{
|
||||
"document_id": row["document_id"],
|
||||
"title": row["title"],
|
||||
"url": row["url"],
|
||||
}
|
||||
for row in doc_rows
|
||||
]
|
||||
|
||||
return evidence_signals, evidence_docs
|
||||
|
||||
|
||||
async def generate_recommendation(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
@@ -840,6 +934,22 @@ async def generate_recommendation(
|
||||
eligibility_result=result,
|
||||
)
|
||||
|
||||
# 7b. Capture prediction snapshot for model validation (Requirements 1.1, 1.6)
|
||||
try:
|
||||
all_doc_ids = list(summary.top_supporting_evidence) + list(summary.top_opposing_evidence)
|
||||
evidence_signals, evidence_docs = await _fetch_evidence_for_snapshot(
|
||||
pool, all_doc_ids,
|
||||
)
|
||||
await create_prediction_snapshot(
|
||||
pool, rec, summary, evidence_signals, evidence_docs,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to create prediction snapshot for %s/%s — recommendation "
|
||||
"persisted but snapshot creation failed",
|
||||
ticker, rec_id, exc_info=True,
|
||||
)
|
||||
|
||||
# 8. Publish prediction facts to analytical tables (Requirement 9.4)
|
||||
if minio_client is not None:
|
||||
try:
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
"""Data collector for trading performance reports.
|
||||
|
||||
Queries all relevant trading data for a reporting period and returns
|
||||
a CollectedData bundle for downstream section builders.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date
|
||||
from typing import Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CollectedData:
|
||||
"""Raw data collected for a reporting period."""
|
||||
|
||||
trading_decisions: list[dict] = field(default_factory=list)
|
||||
orders: list[dict] = field(default_factory=list)
|
||||
open_positions: list[dict] = field(default_factory=list)
|
||||
closed_positions: list[dict] = field(default_factory=list)
|
||||
portfolio_snapshot: dict | None = None
|
||||
previous_portfolio_snapshot: dict | None = None
|
||||
recommendations: list[dict] = field(default_factory=list)
|
||||
prediction_outcomes: list[dict] = field(default_factory=list)
|
||||
model_metric_snapshots: list[dict] = field(default_factory=list)
|
||||
circuit_breaker_events: list[dict] = field(default_factory=list)
|
||||
reserve_pool_balance: float = 0.0
|
||||
|
||||
|
||||
def _row_dict(row: asyncpg.Record) -> dict[str, Any]:
|
||||
"""Convert asyncpg Record to dict with UUID→str coercion."""
|
||||
d = dict(row)
|
||||
for k, v in d.items():
|
||||
if isinstance(v, uuid.UUID):
|
||||
d[k] = str(v)
|
||||
return d
|
||||
|
||||
|
||||
async def collect_report_data(
|
||||
pool: asyncpg.Pool,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> CollectedData:
|
||||
"""Query all trading data for the reporting period.
|
||||
|
||||
Queries: trading_decisions, orders, positions, portfolio_snapshots,
|
||||
recommendations, prediction_outcomes, model_metric_snapshots,
|
||||
circuit_breaker_events, reserve_pool_ledger.
|
||||
|
||||
Returns CollectedData with all raw query results.
|
||||
If no trading_decisions exist, returns empty lists (zero-activity).
|
||||
"""
|
||||
async with pool.acquire() as conn:
|
||||
trading_decisions = await _fetch_trading_decisions(conn, period_start, period_end)
|
||||
orders = await _fetch_orders(conn, period_start, period_end)
|
||||
open_positions = await _fetch_open_positions(conn)
|
||||
closed_positions = await _fetch_closed_positions(conn, period_start, period_end)
|
||||
portfolio_snapshot = await _fetch_portfolio_snapshot(conn, period_start, period_end)
|
||||
previous_portfolio_snapshot = await _fetch_previous_portfolio_snapshot(conn, period_start)
|
||||
recommendations = await _fetch_recommendations(conn, period_start, period_end)
|
||||
prediction_outcomes = await _fetch_prediction_outcomes(conn, period_start, period_end)
|
||||
model_metric_snapshots = await _fetch_model_metric_snapshots(conn, period_start, period_end)
|
||||
circuit_breaker_events = await _fetch_circuit_breaker_events(conn, period_start, period_end)
|
||||
reserve_pool_balance = await _fetch_reserve_pool_balance(conn)
|
||||
|
||||
return CollectedData(
|
||||
trading_decisions=trading_decisions,
|
||||
orders=orders,
|
||||
open_positions=open_positions,
|
||||
closed_positions=closed_positions,
|
||||
portfolio_snapshot=portfolio_snapshot,
|
||||
previous_portfolio_snapshot=previous_portfolio_snapshot,
|
||||
recommendations=recommendations,
|
||||
prediction_outcomes=prediction_outcomes,
|
||||
model_metric_snapshots=model_metric_snapshots,
|
||||
circuit_breaker_events=circuit_breaker_events,
|
||||
reserve_pool_balance=reserve_pool_balance,
|
||||
)
|
||||
|
||||
|
||||
async def _fetch_trading_decisions(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch trading decisions created within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, recommendation_id, decision, skip_reason, ticker,
|
||||
computed_position_size, computed_share_quantity,
|
||||
risk_tier_at_decision, portfolio_heat_at_decision,
|
||||
active_pool_at_decision, reserve_pool_at_decision,
|
||||
circuit_breaker_status, correlation_check_result,
|
||||
sector_exposure_check_result, earnings_proximity_flag,
|
||||
is_micro_trade, decision_trace, created_at
|
||||
FROM trading_decisions
|
||||
WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY created_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_orders(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch orders created within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, recommendation_id, broker_account_id, ticker, side,
|
||||
order_type, quantity, limit_price, stop_price, status,
|
||||
broker_order_id, fill_price, fill_quantity,
|
||||
submitted_at, filled_at, cancelled_at, rejected_at,
|
||||
rejection_reason, created_at
|
||||
FROM orders
|
||||
WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY created_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_open_positions(conn: asyncpg.Connection) -> list[dict]:
|
||||
"""Fetch currently open positions (quantity > 0)."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, broker_account_id, ticker, quantity,
|
||||
avg_entry_price, current_price,
|
||||
unrealized_pnl, realized_pnl, updated_at
|
||||
FROM positions
|
||||
WHERE quantity > 0
|
||||
ORDER BY ticker""",
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_closed_positions(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch positions closed during the period (quantity = 0, updated within period)."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, broker_account_id, ticker, quantity,
|
||||
avg_entry_price, current_price,
|
||||
unrealized_pnl, realized_pnl, updated_at
|
||||
FROM positions
|
||||
WHERE quantity = 0
|
||||
AND updated_at >= $1::date
|
||||
AND updated_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY updated_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_portfolio_snapshot(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> dict | None:
|
||||
"""Fetch the most recent portfolio snapshot within the period."""
|
||||
row = await conn.fetchrow(
|
||||
"""SELECT id, snapshot_date, portfolio_value, active_pool, reserve_pool,
|
||||
daily_return, cumulative_return, unrealized_pnl, realized_pnl,
|
||||
win_count, loss_count, win_rate, sharpe_ratio,
|
||||
max_drawdown, current_drawdown_pct, portfolio_heat,
|
||||
risk_tier, positions, metrics, created_at
|
||||
FROM portfolio_snapshots
|
||||
WHERE snapshot_date >= $1 AND snapshot_date <= $2
|
||||
ORDER BY snapshot_date DESC
|
||||
LIMIT 1""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return _row_dict(row) if row else None
|
||||
|
||||
|
||||
async def _fetch_previous_portfolio_snapshot(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
) -> dict | None:
|
||||
"""Fetch the most recent portfolio snapshot before the period start."""
|
||||
row = await conn.fetchrow(
|
||||
"""SELECT id, snapshot_date, portfolio_value, active_pool, reserve_pool,
|
||||
daily_return, cumulative_return, unrealized_pnl, realized_pnl,
|
||||
win_count, loss_count, win_rate, sharpe_ratio,
|
||||
max_drawdown, current_drawdown_pct, portfolio_heat,
|
||||
risk_tier, positions, metrics, created_at
|
||||
FROM portfolio_snapshots
|
||||
WHERE snapshot_date < $1
|
||||
ORDER BY snapshot_date DESC
|
||||
LIMIT 1""",
|
||||
period_start,
|
||||
)
|
||||
return _row_dict(row) if row else None
|
||||
|
||||
|
||||
async def _fetch_recommendations(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch recommendations created within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, ticker, company_id, action, mode, confidence,
|
||||
time_horizon, thesis, portfolio_pct, max_loss_pct,
|
||||
model_version, generated_at, created_at
|
||||
FROM recommendations
|
||||
WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY created_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_prediction_outcomes(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch prediction outcomes evaluated within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT po.id, po.prediction_id, po.evaluated_at, po.horizon,
|
||||
po.future_price, po.future_return,
|
||||
po.spy_future_price, po.spy_return,
|
||||
po.sector_etf_future_price, po.sector_etf_return,
|
||||
po.excess_return_vs_spy, po.excess_return_vs_sector,
|
||||
po.direction_correct, po.profitable,
|
||||
ps.ticker, ps.direction, ps.action, ps.confidence
|
||||
FROM prediction_outcomes po
|
||||
JOIN prediction_snapshots ps ON ps.id = po.prediction_id
|
||||
WHERE po.evaluated_at >= $1::date
|
||||
AND po.evaluated_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY po.evaluated_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_model_metric_snapshots(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch model metric snapshots generated within the period."""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, generated_at, lookback_window, horizon,
|
||||
prediction_count, win_rate, directional_accuracy,
|
||||
information_coefficient, rank_information_coefficient,
|
||||
avg_return, avg_excess_return_vs_spy,
|
||||
avg_excess_return_vs_sector,
|
||||
calibration_error, brier_score,
|
||||
buy_win_rate, sell_win_rate, hold_win_rate,
|
||||
created_at
|
||||
FROM model_metric_snapshots
|
||||
WHERE generated_at >= $1::date
|
||||
AND generated_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY generated_at DESC""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_circuit_breaker_events(
|
||||
conn: asyncpg.Connection,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> list[dict]:
|
||||
"""Fetch circuit breaker events from trading decisions within the period.
|
||||
|
||||
Circuit breaker events are trading decisions where
|
||||
circuit_breaker_status is not 'clear' (i.e. a breaker was active).
|
||||
"""
|
||||
rows = await conn.fetch(
|
||||
"""SELECT id, recommendation_id, decision, ticker,
|
||||
circuit_breaker_status, decision_trace, created_at
|
||||
FROM trading_decisions
|
||||
WHERE circuit_breaker_status != 'clear'
|
||||
AND created_at >= $1::date
|
||||
AND created_at < ($2::date + INTERVAL '1 day')
|
||||
ORDER BY created_at""",
|
||||
period_start,
|
||||
period_end,
|
||||
)
|
||||
return [_row_dict(r) for r in rows]
|
||||
|
||||
|
||||
async def _fetch_reserve_pool_balance(conn: asyncpg.Connection) -> float:
|
||||
"""Fetch the latest reserve pool balance."""
|
||||
row = await conn.fetchrow(
|
||||
"SELECT balance_after FROM reserve_pool_ledger ORDER BY created_at DESC LIMIT 1",
|
||||
)
|
||||
return float(row["balance_after"]) if row else 0.0
|
||||
@@ -0,0 +1,279 @@
|
||||
"""Report generator — orchestrates collection, building, validation, summarization, and storage.
|
||||
|
||||
Provides three public functions:
|
||||
- generate_report: full pipeline from data collection to assembled ReportData
|
||||
- store_report: upsert into trading_reports table
|
||||
- process_report_job: Redis queue job handler with retry and dedup
|
||||
|
||||
Requirements: 5.1, 5.2, 5.3, 6.3, 6.4, 6.5
|
||||
Design: Report Generator
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import date, datetime, timezone
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.reporting.collector import collect_report_data
|
||||
from services.reporting.models import ReportData, ReportType
|
||||
from services.reporting.sections import (
|
||||
build_model_quality_section,
|
||||
build_pnl_section,
|
||||
build_position_performance_section,
|
||||
build_recommendation_accuracy_section,
|
||||
build_risk_metrics_section,
|
||||
)
|
||||
from services.reporting.summarizer import (
|
||||
generate_executive_summary,
|
||||
summarize_section,
|
||||
)
|
||||
from services.reporting.validator import (
|
||||
compute_validation_status,
|
||||
validate_model_quality,
|
||||
validate_recommendation_accuracy,
|
||||
)
|
||||
from services.shared.agent_config import AgentConfigResolver
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Retry configuration for process_report_job
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_MAX_RETRIES = 3
|
||||
_BACKOFF_SECONDS = (30, 60, 120)
|
||||
|
||||
# In-memory set tracking in-progress jobs to reject duplicates.
|
||||
# Key format: "{report_type}:{period_start}:{period_end}"
|
||||
_in_progress_jobs: set[str] = set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# generate_report
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def generate_report(
|
||||
pool: asyncpg.Pool,
|
||||
report_type: ReportType,
|
||||
period_start: date,
|
||||
period_end: date,
|
||||
) -> ReportData:
|
||||
"""Orchestrate full report generation.
|
||||
|
||||
1. Collect data via collector
|
||||
2. Build all 5 sections via section builders
|
||||
3. Validate recommendation_accuracy and model_quality via validator
|
||||
4. Create AgentConfigResolver and summarize each section
|
||||
5. Generate executive summary
|
||||
6. Assemble final ReportData
|
||||
"""
|
||||
# 1. Collect data
|
||||
data = await collect_report_data(pool, period_start, period_end)
|
||||
|
||||
# 2. Build sections
|
||||
pnl = build_pnl_section(data)
|
||||
rec_accuracy = build_recommendation_accuracy_section(data)
|
||||
position_perf = build_position_performance_section(data)
|
||||
risk_metrics = build_risk_metrics_section(data)
|
||||
model_quality = build_model_quality_section(data)
|
||||
|
||||
# 3. Validate
|
||||
rec_warnings = validate_recommendation_accuracy(
|
||||
rec_accuracy, data.prediction_outcomes,
|
||||
)
|
||||
rec_accuracy.validation_warnings = rec_warnings
|
||||
|
||||
mq_warnings = validate_model_quality(
|
||||
model_quality, data.model_metric_snapshots,
|
||||
)
|
||||
model_quality.validation_warnings = mq_warnings
|
||||
|
||||
# 4. Summarize each section
|
||||
resolver = AgentConfigResolver(pool)
|
||||
|
||||
pnl.summary = await summarize_section(
|
||||
pool, resolver, "pnl", pnl.model_dump(),
|
||||
)
|
||||
rec_accuracy.summary = await summarize_section(
|
||||
pool, resolver, "recommendation_accuracy", rec_accuracy.model_dump(),
|
||||
)
|
||||
position_perf.summary = await summarize_section(
|
||||
pool, resolver, "position_performance", position_perf.model_dump(),
|
||||
)
|
||||
risk_metrics.summary = await summarize_section(
|
||||
pool, resolver, "risk_metrics", risk_metrics.model_dump(),
|
||||
)
|
||||
model_quality.summary = await summarize_section(
|
||||
pool, resolver, "model_quality", model_quality.model_dump(),
|
||||
)
|
||||
|
||||
# 5. Generate executive summary
|
||||
section_summaries = {
|
||||
"pnl": pnl.summary,
|
||||
"recommendation_accuracy": rec_accuracy.summary,
|
||||
"position_performance": position_perf.summary,
|
||||
"risk_metrics": risk_metrics.summary,
|
||||
"model_quality": model_quality.summary,
|
||||
}
|
||||
executive_summary = await generate_executive_summary(
|
||||
pool, resolver, section_summaries,
|
||||
)
|
||||
|
||||
# 6. Assemble ReportData
|
||||
report = ReportData(
|
||||
pnl=pnl,
|
||||
recommendation_accuracy=rec_accuracy,
|
||||
position_performance=position_perf,
|
||||
risk_metrics=risk_metrics,
|
||||
model_quality=model_quality,
|
||||
executive_summary=executive_summary,
|
||||
generated_at=datetime.now(timezone.utc),
|
||||
period_start=period_start,
|
||||
period_end=period_end,
|
||||
report_type=ReportType(report_type),
|
||||
)
|
||||
|
||||
# Set validation status based on all warnings
|
||||
report.validation_status = compute_validation_status(report)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# store_report
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_UPSERT_SQL = """\
|
||||
INSERT INTO trading_reports
|
||||
(report_type, period_start, period_end, report_data, validation_status, generated_at)
|
||||
VALUES
|
||||
($1, $2, $3, $4::jsonb, $5, $6)
|
||||
ON CONFLICT (report_type, period_start, period_end)
|
||||
DO UPDATE SET
|
||||
report_data = EXCLUDED.report_data,
|
||||
validation_status = EXCLUDED.validation_status,
|
||||
generated_at = EXCLUDED.generated_at
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
|
||||
async def store_report(
|
||||
pool: asyncpg.Pool,
|
||||
report: ReportData,
|
||||
) -> str:
|
||||
"""Store report in trading_reports table via upsert.
|
||||
|
||||
Uses INSERT ... ON CONFLICT (report_type, period_start, period_end)
|
||||
DO UPDATE to handle regeneration of existing reports.
|
||||
|
||||
Returns the report UUID as a string.
|
||||
"""
|
||||
row = await pool.fetchrow(
|
||||
_UPSERT_SQL,
|
||||
report.report_type.value,
|
||||
report.period_start,
|
||||
report.period_end,
|
||||
report.model_dump_json(),
|
||||
report.validation_status.value,
|
||||
report.generated_at,
|
||||
)
|
||||
report_id = str(row["id"]) # type: ignore[index]
|
||||
logger.info(
|
||||
"Stored report %s (type=%s, period=%s to %s)",
|
||||
report_id,
|
||||
report.report_type.value,
|
||||
report.period_start,
|
||||
report.period_end,
|
||||
)
|
||||
return report_id
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# process_report_job
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _job_key(report_type: str, period_start: str, period_end: str) -> str:
|
||||
"""Build a dedup key for an in-progress job."""
|
||||
return f"{report_type}:{period_start}:{period_end}"
|
||||
|
||||
|
||||
async def process_report_job(
|
||||
pool: asyncpg.Pool,
|
||||
job: dict,
|
||||
) -> None:
|
||||
"""Process a report generation job from the Redis queue.
|
||||
|
||||
Deserializes job payload, calls generate_report + store_report.
|
||||
Handles retries with exponential backoff (30s, 60s, 120s up to 3 attempts).
|
||||
Rejects duplicate jobs for the same report_type + period.
|
||||
|
||||
Expected job payload::
|
||||
|
||||
{
|
||||
"report_type": "daily" | "weekly",
|
||||
"period_start": "YYYY-MM-DD",
|
||||
"period_end": "YYYY-MM-DD"
|
||||
}
|
||||
"""
|
||||
report_type_str = job.get("report_type", "")
|
||||
period_start_str = job.get("period_start", "")
|
||||
period_end_str = job.get("period_end", "")
|
||||
|
||||
# Validate payload
|
||||
try:
|
||||
report_type = ReportType(report_type_str)
|
||||
period_start = date.fromisoformat(period_start_str)
|
||||
period_end = date.fromisoformat(period_end_str)
|
||||
except (ValueError, TypeError) as exc:
|
||||
logger.error("Invalid report job payload: %s — %s", job, exc)
|
||||
return
|
||||
|
||||
# Reject duplicate in-progress jobs
|
||||
key = _job_key(report_type_str, period_start_str, period_end_str)
|
||||
if key in _in_progress_jobs:
|
||||
logger.warning(
|
||||
"Duplicate report job rejected (already in progress): %s", key,
|
||||
)
|
||||
return
|
||||
|
||||
_in_progress_jobs.add(key)
|
||||
try:
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(_MAX_RETRIES):
|
||||
try:
|
||||
report = await generate_report(
|
||||
pool, report_type, period_start, period_end,
|
||||
)
|
||||
await store_report(pool, report)
|
||||
logger.info(
|
||||
"Report job completed: %s (attempt %d)", key, attempt + 1,
|
||||
)
|
||||
return
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
if attempt < _MAX_RETRIES - 1:
|
||||
backoff = _BACKOFF_SECONDS[attempt]
|
||||
logger.warning(
|
||||
"Report job %s failed (attempt %d/%d): %s — retrying in %ds",
|
||||
key,
|
||||
attempt + 1,
|
||||
_MAX_RETRIES,
|
||||
exc,
|
||||
backoff,
|
||||
)
|
||||
await asyncio.sleep(backoff)
|
||||
|
||||
# All retries exhausted
|
||||
logger.error(
|
||||
"Report job %s failed after %d attempts: %s",
|
||||
key,
|
||||
_MAX_RETRIES,
|
||||
last_error,
|
||||
)
|
||||
finally:
|
||||
_in_progress_jobs.discard(key)
|
||||
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ReportType(str, Enum):
|
||||
DAILY = "daily"
|
||||
WEEKLY = "weekly"
|
||||
|
||||
|
||||
class ValidationStatus(str, Enum):
|
||||
PASSED = "passed"
|
||||
WARNINGS = "warnings"
|
||||
|
||||
|
||||
class ValidationWarning(BaseModel):
|
||||
field_name: str
|
||||
computed_value: float
|
||||
snapshot_value: float
|
||||
pct_difference: float
|
||||
|
||||
|
||||
class PLSection(BaseModel):
|
||||
realized_pnl: float
|
||||
unrealized_pnl: float
|
||||
daily_return: float
|
||||
cumulative_return: float
|
||||
win_count: int
|
||||
loss_count: int
|
||||
win_rate: float
|
||||
profit_factor: float
|
||||
sharpe_ratio: float
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class RecommendationAccuracySection(BaseModel):
|
||||
total_evaluated: int
|
||||
act_count: int
|
||||
skip_count: int
|
||||
acted_win_rate: float
|
||||
avg_confidence_acted: float
|
||||
avg_confidence_skipped: float
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class PositionDetail(BaseModel):
|
||||
ticker: str
|
||||
entry_price: float
|
||||
current_or_exit_price: float
|
||||
pnl: float
|
||||
pnl_pct: float
|
||||
hold_duration_hours: float
|
||||
status: str # "open" or "closed"
|
||||
|
||||
|
||||
class PositionPerformanceSection(BaseModel):
|
||||
positions: list[PositionDetail] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
|
||||
|
||||
class RiskMetricsSection(BaseModel):
|
||||
current_risk_tier: str
|
||||
portfolio_heat: float
|
||||
max_drawdown: float
|
||||
current_drawdown_pct: float
|
||||
reserve_pool_balance: float
|
||||
circuit_breaker_event_count: int
|
||||
summary: str = ""
|
||||
|
||||
|
||||
class ModelQualityWindow(BaseModel):
|
||||
lookback: str
|
||||
win_rate: float | None
|
||||
directional_accuracy: float | None
|
||||
information_coefficient: float | None
|
||||
calibration_error: float | None
|
||||
brier_score: float | None
|
||||
|
||||
|
||||
class ModelQualitySection(BaseModel):
|
||||
windows: list[ModelQualityWindow] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ReportData(BaseModel):
|
||||
"""Top-level report structure stored as JSONB."""
|
||||
|
||||
pnl: PLSection
|
||||
recommendation_accuracy: RecommendationAccuracySection
|
||||
position_performance: PositionPerformanceSection
|
||||
risk_metrics: RiskMetricsSection
|
||||
model_quality: ModelQualitySection
|
||||
executive_summary: str = ""
|
||||
validation_status: ValidationStatus = ValidationStatus.PASSED
|
||||
generated_at: datetime
|
||||
period_start: date
|
||||
period_end: date
|
||||
report_type: ReportType
|
||||
@@ -0,0 +1,370 @@
|
||||
"""Section builders for trading performance reports.
|
||||
|
||||
Each builder takes a CollectedData bundle and returns a typed Pydantic
|
||||
section model. All builders handle zero-activity gracefully by returning
|
||||
zero values and empty lists when no data is available.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from services.reporting.collector import CollectedData
|
||||
from services.reporting.models import (
|
||||
ModelQualitySection,
|
||||
ModelQualityWindow,
|
||||
PLSection,
|
||||
PositionDetail,
|
||||
PositionPerformanceSection,
|
||||
RecommendationAccuracySection,
|
||||
RiskMetricsSection,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_pnl_section(data: CollectedData) -> PLSection:
|
||||
"""Build P&L section from collected data.
|
||||
|
||||
Computes realized/unrealized P&L, daily return, cumulative return,
|
||||
win/loss counts, win rate, profit factor, and Sharpe ratio from
|
||||
portfolio_snapshot and closed positions.
|
||||
"""
|
||||
snap = data.portfolio_snapshot
|
||||
|
||||
if snap is None:
|
||||
return PLSection(
|
||||
realized_pnl=0.0,
|
||||
unrealized_pnl=0.0,
|
||||
daily_return=0.0,
|
||||
cumulative_return=0.0,
|
||||
win_count=0,
|
||||
loss_count=0,
|
||||
win_rate=0.0,
|
||||
profit_factor=0.0,
|
||||
sharpe_ratio=0.0,
|
||||
)
|
||||
|
||||
# Compute profit factor from closed positions:
|
||||
# sum of gains / abs(sum of losses)
|
||||
gains = 0.0
|
||||
losses = 0.0
|
||||
for pos in data.closed_positions:
|
||||
rpnl = float(pos.get("realized_pnl", 0) or 0)
|
||||
if rpnl > 0:
|
||||
gains += rpnl
|
||||
elif rpnl < 0:
|
||||
losses += abs(rpnl)
|
||||
|
||||
profit_factor = (gains / losses) if losses > 0 else 0.0
|
||||
|
||||
return PLSection(
|
||||
realized_pnl=float(snap.get("realized_pnl", 0) or 0),
|
||||
unrealized_pnl=float(snap.get("unrealized_pnl", 0) or 0),
|
||||
daily_return=float(snap.get("daily_return", 0) or 0),
|
||||
cumulative_return=float(snap.get("cumulative_return", 0) or 0),
|
||||
win_count=int(snap.get("win_count", 0) or 0),
|
||||
loss_count=int(snap.get("loss_count", 0) or 0),
|
||||
win_rate=float(snap.get("win_rate", 0) or 0),
|
||||
profit_factor=profit_factor,
|
||||
sharpe_ratio=float(snap.get("sharpe_ratio", 0) or 0),
|
||||
)
|
||||
|
||||
|
||||
def build_recommendation_accuracy_section(
|
||||
data: CollectedData,
|
||||
) -> RecommendationAccuracySection:
|
||||
"""Build recommendation accuracy section.
|
||||
|
||||
Joins trading_decisions with prediction_outcomes to compute
|
||||
act/skip breakdown, win rate of acted recommendations, and
|
||||
average confidence of acted vs skipped.
|
||||
"""
|
||||
if not data.trading_decisions:
|
||||
return RecommendationAccuracySection(
|
||||
total_evaluated=0,
|
||||
act_count=0,
|
||||
skip_count=0,
|
||||
acted_win_rate=0.0,
|
||||
avg_confidence_acted=0.0,
|
||||
avg_confidence_skipped=0.0,
|
||||
)
|
||||
|
||||
# Build lookup: recommendation_id -> prediction_outcome
|
||||
# prediction_outcomes are joined with prediction_snapshots in the collector,
|
||||
# so they carry ticker, direction, action, confidence from the snapshot.
|
||||
# trading_decisions reference recommendations via recommendation_id.
|
||||
# We need to match trading_decisions -> recommendations -> prediction_outcomes.
|
||||
#
|
||||
# The collector fetches prediction_outcomes joined with prediction_snapshots
|
||||
# (po.prediction_id = ps.id). Trading decisions reference recommendation_id.
|
||||
# Recommendations and prediction_snapshots share the same ticker, so we
|
||||
# match by recommendation_id on the trading_decision side.
|
||||
|
||||
# Build recommendation_id -> recommendation dict for confidence lookup
|
||||
rec_by_id: dict[str, dict] = {}
|
||||
for rec in data.recommendations:
|
||||
rec_id = str(rec.get("id", ""))
|
||||
if rec_id:
|
||||
rec_by_id[rec_id] = rec
|
||||
|
||||
# Build prediction_id -> prediction_outcome for profitability lookup
|
||||
# We also need to map recommendation_id -> prediction_outcome.
|
||||
# The link is: trading_decision.recommendation_id -> recommendation.id
|
||||
# and prediction_outcome has ticker from prediction_snapshots.
|
||||
# We match by ticker between recommendation and prediction_outcome.
|
||||
outcome_by_ticker: dict[str, list[dict]] = {}
|
||||
for po in data.prediction_outcomes:
|
||||
ticker = po.get("ticker", "")
|
||||
if ticker:
|
||||
outcome_by_ticker.setdefault(ticker, []).append(po)
|
||||
|
||||
act_count = 0
|
||||
skip_count = 0
|
||||
acted_wins = 0
|
||||
acted_total_with_outcome = 0
|
||||
confidence_acted: list[float] = []
|
||||
confidence_skipped: list[float] = []
|
||||
|
||||
for td in data.trading_decisions:
|
||||
decision = str(td.get("decision", "")).lower()
|
||||
rec_id = str(td.get("recommendation_id", ""))
|
||||
rec = rec_by_id.get(rec_id, {})
|
||||
conf = rec.get("confidence")
|
||||
ticker = td.get("ticker", "")
|
||||
|
||||
if decision == "act":
|
||||
act_count += 1
|
||||
if conf is not None:
|
||||
confidence_acted.append(float(conf))
|
||||
|
||||
# Check profitability from prediction_outcomes for this ticker
|
||||
ticker_outcomes = outcome_by_ticker.get(ticker, [])
|
||||
if ticker_outcomes:
|
||||
# Use the most recent outcome for this ticker
|
||||
latest = ticker_outcomes[-1]
|
||||
acted_total_with_outcome += 1
|
||||
if latest.get("profitable"):
|
||||
acted_wins += 1
|
||||
else:
|
||||
skip_count += 1
|
||||
if conf is not None:
|
||||
confidence_skipped.append(float(conf))
|
||||
|
||||
total_evaluated = act_count + skip_count
|
||||
acted_win_rate = (
|
||||
(acted_wins / acted_total_with_outcome)
|
||||
if acted_total_with_outcome > 0
|
||||
else 0.0
|
||||
)
|
||||
avg_confidence_acted = (
|
||||
(sum(confidence_acted) / len(confidence_acted))
|
||||
if confidence_acted
|
||||
else 0.0
|
||||
)
|
||||
avg_confidence_skipped = (
|
||||
(sum(confidence_skipped) / len(confidence_skipped))
|
||||
if confidence_skipped
|
||||
else 0.0
|
||||
)
|
||||
|
||||
return RecommendationAccuracySection(
|
||||
total_evaluated=total_evaluated,
|
||||
act_count=act_count,
|
||||
skip_count=skip_count,
|
||||
acted_win_rate=acted_win_rate,
|
||||
avg_confidence_acted=avg_confidence_acted,
|
||||
avg_confidence_skipped=avg_confidence_skipped,
|
||||
)
|
||||
|
||||
|
||||
def build_position_performance_section(
|
||||
data: CollectedData,
|
||||
) -> PositionPerformanceSection:
|
||||
"""Build position performance section.
|
||||
|
||||
Lists each position (open and closed) with entry price,
|
||||
current/exit price, P&L, P&L%, and hold duration.
|
||||
"""
|
||||
positions: list[PositionDetail] = []
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Open positions
|
||||
for pos in data.open_positions:
|
||||
entry_price = float(pos.get("avg_entry_price", 0) or 0)
|
||||
current_price = float(pos.get("current_price", 0) or 0)
|
||||
quantity = float(pos.get("quantity", 0) or 0)
|
||||
|
||||
pnl = (current_price - entry_price) * quantity
|
||||
cost_basis = entry_price * quantity
|
||||
pnl_pct = (pnl / cost_basis * 100) if cost_basis > 0 else 0.0
|
||||
|
||||
# Hold duration from updated_at to now
|
||||
updated_at = pos.get("updated_at")
|
||||
hold_hours = _compute_hold_hours(updated_at, now)
|
||||
|
||||
positions.append(
|
||||
PositionDetail(
|
||||
ticker=pos.get("ticker", ""),
|
||||
entry_price=entry_price,
|
||||
current_or_exit_price=current_price,
|
||||
pnl=pnl,
|
||||
pnl_pct=pnl_pct,
|
||||
hold_duration_hours=hold_hours,
|
||||
status="open",
|
||||
)
|
||||
)
|
||||
|
||||
# Closed positions
|
||||
for pos in data.closed_positions:
|
||||
entry_price = float(pos.get("avg_entry_price", 0) or 0)
|
||||
current_price = float(pos.get("current_price", 0) or 0)
|
||||
realized_pnl = float(pos.get("realized_pnl", 0) or 0)
|
||||
|
||||
cost_basis = entry_price * float(pos.get("quantity", 0) or 0)
|
||||
# For closed positions, quantity is 0 in the DB, so use realized_pnl
|
||||
# directly. P&L% is based on the original cost basis which we can
|
||||
# approximate from entry_price and the realized_pnl.
|
||||
# If entry_price is available, compute pnl_pct from realized_pnl / cost.
|
||||
# Since quantity=0 for closed, we estimate original quantity from
|
||||
# realized_pnl and price difference, or just use realized_pnl directly.
|
||||
if entry_price > 0 and current_price != entry_price:
|
||||
# Estimate original quantity from realized_pnl / (exit - entry)
|
||||
price_diff = current_price - entry_price
|
||||
if price_diff != 0:
|
||||
est_quantity = abs(realized_pnl / price_diff)
|
||||
est_cost = entry_price * est_quantity
|
||||
pnl_pct = (realized_pnl / est_cost * 100) if est_cost > 0 else 0.0
|
||||
else:
|
||||
pnl_pct = 0.0
|
||||
else:
|
||||
pnl_pct = 0.0
|
||||
|
||||
updated_at = pos.get("updated_at")
|
||||
hold_hours = _compute_hold_hours(updated_at, now)
|
||||
|
||||
positions.append(
|
||||
PositionDetail(
|
||||
ticker=pos.get("ticker", ""),
|
||||
entry_price=entry_price,
|
||||
current_or_exit_price=current_price,
|
||||
pnl=realized_pnl,
|
||||
pnl_pct=pnl_pct,
|
||||
hold_duration_hours=hold_hours,
|
||||
status="closed",
|
||||
)
|
||||
)
|
||||
|
||||
return PositionPerformanceSection(positions=positions)
|
||||
|
||||
|
||||
def _compute_hold_hours(updated_at: datetime | str | None, now: datetime) -> float:
|
||||
"""Compute hold duration in hours from updated_at to now."""
|
||||
if updated_at is None:
|
||||
return 0.0
|
||||
if isinstance(updated_at, str):
|
||||
try:
|
||||
updated_at = datetime.fromisoformat(updated_at)
|
||||
except (ValueError, TypeError):
|
||||
return 0.0
|
||||
if not isinstance(updated_at, datetime):
|
||||
return 0.0
|
||||
# Ensure timezone-aware comparison
|
||||
if updated_at.tzinfo is None:
|
||||
updated_at = updated_at.replace(tzinfo=timezone.utc)
|
||||
delta = now - updated_at
|
||||
return max(delta.total_seconds() / 3600.0, 0.0)
|
||||
|
||||
|
||||
def build_risk_metrics_section(data: CollectedData) -> RiskMetricsSection:
|
||||
"""Build risk metrics section.
|
||||
|
||||
Extracts current risk tier, portfolio heat, max drawdown,
|
||||
current drawdown %, reserve pool balance, and circuit breaker
|
||||
event count from collected data.
|
||||
"""
|
||||
snap = data.portfolio_snapshot
|
||||
|
||||
if snap is None:
|
||||
return RiskMetricsSection(
|
||||
current_risk_tier="unknown",
|
||||
portfolio_heat=0.0,
|
||||
max_drawdown=0.0,
|
||||
current_drawdown_pct=0.0,
|
||||
reserve_pool_balance=data.reserve_pool_balance,
|
||||
circuit_breaker_event_count=len(data.circuit_breaker_events),
|
||||
)
|
||||
|
||||
return RiskMetricsSection(
|
||||
current_risk_tier=str(snap.get("risk_tier", "unknown") or "unknown"),
|
||||
portfolio_heat=float(snap.get("portfolio_heat", 0) or 0),
|
||||
max_drawdown=float(snap.get("max_drawdown", 0) or 0),
|
||||
current_drawdown_pct=float(snap.get("current_drawdown_pct", 0) or 0),
|
||||
reserve_pool_balance=data.reserve_pool_balance,
|
||||
circuit_breaker_event_count=len(data.circuit_breaker_events),
|
||||
)
|
||||
|
||||
|
||||
def build_model_quality_section(data: CollectedData) -> ModelQualitySection:
|
||||
"""Build model quality section.
|
||||
|
||||
Extracts latest model_metric_snapshot values for 7d, 30d, 90d
|
||||
lookback windows.
|
||||
"""
|
||||
if not data.model_metric_snapshots:
|
||||
return ModelQualitySection(windows=[])
|
||||
|
||||
# Group by lookback_window, take the latest (first in list since
|
||||
# collector orders by generated_at DESC)
|
||||
target_windows = {"7d", "30d", "90d"}
|
||||
latest_by_window: dict[str, dict] = {}
|
||||
|
||||
for snap in data.model_metric_snapshots:
|
||||
window = snap.get("lookback_window", "")
|
||||
if window in target_windows and window not in latest_by_window:
|
||||
latest_by_window[window] = snap
|
||||
|
||||
windows: list[ModelQualityWindow] = []
|
||||
for w in ("7d", "30d", "90d"):
|
||||
snap = latest_by_window.get(w)
|
||||
if snap is None:
|
||||
windows.append(
|
||||
ModelQualityWindow(
|
||||
lookback=w,
|
||||
win_rate=None,
|
||||
directional_accuracy=None,
|
||||
information_coefficient=None,
|
||||
calibration_error=None,
|
||||
brier_score=None,
|
||||
)
|
||||
)
|
||||
else:
|
||||
windows.append(
|
||||
ModelQualityWindow(
|
||||
lookback=w,
|
||||
win_rate=_safe_float(snap.get("win_rate")),
|
||||
directional_accuracy=_safe_float(snap.get("directional_accuracy")),
|
||||
information_coefficient=_safe_float(
|
||||
snap.get("information_coefficient")
|
||||
),
|
||||
calibration_error=_safe_float(snap.get("calibration_error")),
|
||||
brier_score=_safe_float(snap.get("brier_score")),
|
||||
)
|
||||
)
|
||||
|
||||
return ModelQualitySection(windows=windows)
|
||||
|
||||
|
||||
def _safe_float(value: object) -> float | None:
|
||||
"""Convert a value to float, returning None for None/invalid values."""
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
f = float(value) # type: ignore[arg-type]
|
||||
# Replace NaN/inf with None
|
||||
if f != f or f == float("inf") or f == float("-inf"):
|
||||
return None
|
||||
return f
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
@@ -0,0 +1,437 @@
|
||||
"""AI-powered report summarizer with chunking and deterministic fallback.
|
||||
|
||||
Generates natural-language summaries for trading performance report sections
|
||||
using the Report_Summarizer_Agent (resolved via AgentConfigResolver + llm_factory).
|
||||
Data is chunked to fit within the 8k-token context window of the local model.
|
||||
|
||||
Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.6
|
||||
Design: AI Summarizer
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.extractor.llm_factory import build_llm_client
|
||||
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
|
||||
from services.shared.config import load_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CHUNK_SIZE_LIMIT = 6000 # characters per chunk
|
||||
MAX_SUMMARY_WORDS = 200 # per section summary
|
||||
MAX_EXECUTIVE_SUMMARY_WORDS = 300
|
||||
|
||||
_REPORT_SUMMARIZER_SLUG = "report-summarizer"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Chunking
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def chunk_data(serialized: str, max_chars: int = CHUNK_SIZE_LIMIT) -> list[str]:
|
||||
"""Split serialized data into chunks of at most *max_chars* characters.
|
||||
|
||||
Splits on newline boundaries to avoid breaking JSON structures.
|
||||
Each chunk is ≤ *max_chars* characters. Returns at least one chunk
|
||||
(even for empty input).
|
||||
|
||||
Round-trip property: ``"".join(chunk_data(s, n)) == s`` for all *s*.
|
||||
|
||||
If a single line (including its trailing newline) exceeds *max_chars*,
|
||||
it is included as its own chunk (we never break mid-line).
|
||||
"""
|
||||
if not serialized:
|
||||
return [""]
|
||||
|
||||
# Split into segments where each segment includes its trailing "\n"
|
||||
# (except possibly the last one if the string doesn't end with "\n").
|
||||
# This preserves the exact original when chunks are concatenated.
|
||||
segments: list[str] = []
|
||||
start = 0
|
||||
while start < len(serialized):
|
||||
nl = serialized.find("\n", start)
|
||||
if nl == -1:
|
||||
# Last segment, no trailing newline
|
||||
segments.append(serialized[start:])
|
||||
break
|
||||
else:
|
||||
# Include the newline in this segment
|
||||
segments.append(serialized[start : nl + 1])
|
||||
start = nl + 1
|
||||
|
||||
chunks: list[str] = []
|
||||
current_parts: list[str] = []
|
||||
current_len = 0
|
||||
|
||||
for segment in segments:
|
||||
if current_parts and current_len + len(segment) > max_chars:
|
||||
# Flush current chunk
|
||||
chunks.append("".join(current_parts))
|
||||
current_parts = [segment]
|
||||
current_len = len(segment)
|
||||
else:
|
||||
current_parts.append(segment)
|
||||
current_len += len(segment)
|
||||
|
||||
# Flush remaining
|
||||
if current_parts:
|
||||
chunks.append("".join(current_parts))
|
||||
|
||||
return chunks if chunks else [""]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Performance logging
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _log_performance(
|
||||
pool: asyncpg.Pool,
|
||||
resolved: ResolvedAgentConfig,
|
||||
success: bool,
|
||||
duration_ms: int,
|
||||
input_text: str,
|
||||
output_text: str,
|
||||
error_message: str | None = None,
|
||||
) -> None:
|
||||
"""Insert a row into agent_performance_log for a summarizer invocation."""
|
||||
try:
|
||||
await pool.execute(
|
||||
"""INSERT INTO agent_performance_log
|
||||
(agent_id, variant_id, document_id, ticker, success,
|
||||
duration_ms, confidence, retry_count,
|
||||
input_tokens, output_tokens, error_message)
|
||||
VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
|
||||
resolved.agent_id,
|
||||
resolved.variant_id,
|
||||
None, # no document_id for report summaries
|
||||
None, # no ticker for report summaries
|
||||
success,
|
||||
duration_ms,
|
||||
0.0, # no confidence score for summaries
|
||||
0,
|
||||
len(input_text) // 4, # token estimate
|
||||
len(output_text) // 4, # token estimate
|
||||
error_message,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to log summarizer performance", exc_info=True)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM summarization helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _summarize_chunk(
|
||||
resolved: ResolvedAgentConfig,
|
||||
section_name: str,
|
||||
chunk: str,
|
||||
) -> str:
|
||||
"""Summarize a single chunk via the Report_Summarizer_Agent LLM client.
|
||||
|
||||
Returns the raw text output from the model.
|
||||
Raises on failure so the caller can handle retries / fallback.
|
||||
"""
|
||||
cfg = load_config()
|
||||
client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
|
||||
try:
|
||||
prompts = {
|
||||
"system": resolved.system_prompt,
|
||||
"user": f"Summarize this {section_name} data:\n{chunk}",
|
||||
}
|
||||
attempt = await client.call_llm(
|
||||
prompts=prompts,
|
||||
json_schema={}, # plain text, no structured output
|
||||
document_text="",
|
||||
)
|
||||
if attempt.error:
|
||||
raise RuntimeError(f"LLM error: {attempt.error}")
|
||||
if not attempt.raw_output.strip():
|
||||
raise RuntimeError("LLM returned empty response")
|
||||
return attempt.raw_output.strip()
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
|
||||
async def _merge_summaries(
|
||||
resolved: ResolvedAgentConfig,
|
||||
section_name: str,
|
||||
summaries: list[str],
|
||||
) -> str:
|
||||
"""Merge multiple chunk summaries into a single coherent summary."""
|
||||
combined = "\n\n".join(summaries)
|
||||
cfg = load_config()
|
||||
client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
|
||||
try:
|
||||
prompts = {
|
||||
"system": resolved.system_prompt,
|
||||
"user": (
|
||||
f"Merge these {section_name} summaries into a single coherent "
|
||||
f"summary of no more than {MAX_SUMMARY_WORDS} words:\n{combined}"
|
||||
),
|
||||
}
|
||||
attempt = await client.call_llm(
|
||||
prompts=prompts,
|
||||
json_schema={},
|
||||
document_text="",
|
||||
)
|
||||
if attempt.error:
|
||||
raise RuntimeError(f"LLM merge error: {attempt.error}")
|
||||
if not attempt.raw_output.strip():
|
||||
raise RuntimeError("LLM returned empty merge response")
|
||||
return attempt.raw_output.strip()
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Section summarization
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def summarize_section(
|
||||
pool: asyncpg.Pool,
|
||||
resolver: AgentConfigResolver,
|
||||
section_name: str,
|
||||
section_data: dict,
|
||||
) -> str:
|
||||
"""Generate AI summary for a report section.
|
||||
|
||||
1. Serialize section data to JSON string
|
||||
2. Chunk if > CHUNK_SIZE_LIMIT
|
||||
3. Summarize each chunk via Report_Summarizer_Agent
|
||||
4. If multiple chunks, merge summaries with a final LLM call
|
||||
5. Log each invocation to agent_performance_log
|
||||
6. On failure, fall back to deterministic summary
|
||||
"""
|
||||
resolved = await resolver.resolve(_REPORT_SUMMARIZER_SLUG)
|
||||
if resolved is None:
|
||||
logger.error(
|
||||
"Report summarizer agent not found (slug=%s) — using deterministic fallback",
|
||||
_REPORT_SUMMARIZER_SLUG,
|
||||
)
|
||||
return build_deterministic_summary(section_name, section_data)
|
||||
|
||||
serialized = json.dumps(section_data, indent=2, default=str)
|
||||
chunks = chunk_data(serialized)
|
||||
|
||||
start = time.monotonic()
|
||||
try:
|
||||
# Summarize each chunk
|
||||
chunk_summaries: list[str] = []
|
||||
for chunk in chunks:
|
||||
summary = await _summarize_chunk(resolved, section_name, chunk)
|
||||
chunk_summaries.append(summary)
|
||||
|
||||
# Merge if multiple chunks
|
||||
if len(chunk_summaries) > 1:
|
||||
try:
|
||||
final_summary = await _merge_summaries(
|
||||
resolved, section_name, chunk_summaries,
|
||||
)
|
||||
except Exception:
|
||||
# Merge failed — fall back to concatenation of chunk summaries
|
||||
logger.warning(
|
||||
"Chunk merge LLM call failed for section %s — concatenating summaries",
|
||||
section_name,
|
||||
)
|
||||
final_summary = "\n".join(chunk_summaries)
|
||||
else:
|
||||
final_summary = chunk_summaries[0]
|
||||
|
||||
# Truncate to MAX_SUMMARY_WORDS at sentence boundary
|
||||
words = final_summary.split()
|
||||
if len(words) > MAX_SUMMARY_WORDS:
|
||||
truncated = " ".join(words[:MAX_SUMMARY_WORDS])
|
||||
# Try to end at a sentence boundary
|
||||
last_period = truncated.rfind(".")
|
||||
if last_period > len(truncated) // 2:
|
||||
truncated = truncated[: last_period + 1]
|
||||
final_summary = truncated
|
||||
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
await _log_performance(
|
||||
pool, resolved, True, duration_ms, serialized, final_summary,
|
||||
)
|
||||
return final_summary
|
||||
|
||||
except Exception as exc:
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
logger.warning(
|
||||
"AI summarization failed for section %s: %s — using deterministic fallback",
|
||||
section_name,
|
||||
exc,
|
||||
)
|
||||
await _log_performance(
|
||||
pool, resolved, False, duration_ms, serialized, "",
|
||||
error_message=str(exc),
|
||||
)
|
||||
return build_deterministic_summary(section_name, section_data)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Deterministic fallback summaries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DETERMINISTIC_TEMPLATES: dict[str, str] = {
|
||||
"pnl": (
|
||||
"P&L Summary: Realized P&L ${realized_pnl}, unrealized ${unrealized_pnl}, "
|
||||
"daily return {daily_return}%, win rate {win_rate}%."
|
||||
),
|
||||
"recommendation_accuracy": (
|
||||
"Recommendation Accuracy: {total_evaluated} evaluated, "
|
||||
"{act_count} acted ({acted_win_rate}% win rate), "
|
||||
"{skip_count} skipped. "
|
||||
"Avg confidence acted {avg_confidence_acted}, skipped {avg_confidence_skipped}."
|
||||
),
|
||||
"position_performance": (
|
||||
"Position Performance: {position_count} positions tracked during the period."
|
||||
),
|
||||
"risk_metrics": (
|
||||
"Risk Metrics: Risk tier {current_risk_tier}, portfolio heat {portfolio_heat}, "
|
||||
"max drawdown {max_drawdown}, current drawdown {current_drawdown_pct}%, "
|
||||
"reserve pool ${reserve_pool_balance}, "
|
||||
"{circuit_breaker_event_count} circuit breaker events."
|
||||
),
|
||||
"model_quality": (
|
||||
"Model Quality: {window_count} lookback windows evaluated."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def build_deterministic_summary(section_name: str, section_data: dict) -> str:
|
||||
"""Build a fallback deterministic summary from raw metrics.
|
||||
|
||||
Produces a template-based text summary when AI summarization fails.
|
||||
"""
|
||||
template = _DETERMINISTIC_TEMPLATES.get(section_name)
|
||||
if template is None:
|
||||
# Generic fallback for unknown sections
|
||||
return f"{section_name} summary: {len(section_data)} metrics reported."
|
||||
|
||||
try:
|
||||
# Prepare template variables with safe defaults
|
||||
data = dict(section_data)
|
||||
|
||||
# Add computed fields for templates that need them
|
||||
if section_name == "position_performance":
|
||||
positions = data.get("positions", [])
|
||||
data["position_count"] = len(positions)
|
||||
elif section_name == "model_quality":
|
||||
windows = data.get("windows", [])
|
||||
data["window_count"] = len(windows)
|
||||
|
||||
return template.format(**data)
|
||||
except (KeyError, ValueError, TypeError) as exc:
|
||||
logger.warning(
|
||||
"Deterministic summary template failed for %s: %s",
|
||||
section_name,
|
||||
exc,
|
||||
)
|
||||
return f"{section_name} summary: data available but template formatting failed."
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Executive summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def generate_executive_summary(
|
||||
pool: asyncpg.Pool,
|
||||
resolver: AgentConfigResolver,
|
||||
section_summaries: dict[str, str],
|
||||
) -> str:
|
||||
"""Generate executive summary from all section summaries.
|
||||
|
||||
Concatenates section summaries, chunks if needed, and produces
|
||||
a ≤300-word synthesis via the Report_Summarizer_Agent.
|
||||
Falls back to concatenated section summaries on failure.
|
||||
"""
|
||||
resolved = await resolver.resolve(_REPORT_SUMMARIZER_SLUG)
|
||||
concatenated = "\n\n".join(
|
||||
f"{name}: {summary}" for name, summary in section_summaries.items()
|
||||
)
|
||||
|
||||
if resolved is None:
|
||||
logger.error(
|
||||
"Report summarizer agent not found — using concatenated summaries as executive summary",
|
||||
)
|
||||
return concatenated
|
||||
|
||||
chunks = chunk_data(concatenated)
|
||||
|
||||
start = time.monotonic()
|
||||
try:
|
||||
# Summarize chunks if needed
|
||||
if len(chunks) > 1:
|
||||
chunk_summaries: list[str] = []
|
||||
for chunk in chunks:
|
||||
summary = await _summarize_chunk(resolved, "executive", chunk)
|
||||
chunk_summaries.append(summary)
|
||||
input_text = "\n\n".join(chunk_summaries)
|
||||
else:
|
||||
input_text = chunks[0]
|
||||
|
||||
# Final executive summary call
|
||||
cfg = load_config()
|
||||
client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
|
||||
try:
|
||||
prompts = {
|
||||
"system": resolved.system_prompt,
|
||||
"user": (
|
||||
f"Synthesize these trading performance section summaries into "
|
||||
f"a concise executive summary of no more than "
|
||||
f"{MAX_EXECUTIVE_SUMMARY_WORDS} words:\n{input_text}"
|
||||
),
|
||||
}
|
||||
attempt = await client.call_llm(
|
||||
prompts=prompts,
|
||||
json_schema={},
|
||||
document_text="",
|
||||
)
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
if attempt.error:
|
||||
raise RuntimeError(f"Executive summary LLM error: {attempt.error}")
|
||||
if not attempt.raw_output.strip():
|
||||
raise RuntimeError("Executive summary LLM returned empty response")
|
||||
|
||||
executive = attempt.raw_output.strip()
|
||||
|
||||
# Truncate to MAX_EXECUTIVE_SUMMARY_WORDS at sentence boundary
|
||||
words = executive.split()
|
||||
if len(words) > MAX_EXECUTIVE_SUMMARY_WORDS:
|
||||
truncated = " ".join(words[:MAX_EXECUTIVE_SUMMARY_WORDS])
|
||||
last_period = truncated.rfind(".")
|
||||
if last_period > len(truncated) // 2:
|
||||
truncated = truncated[: last_period + 1]
|
||||
executive = truncated
|
||||
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
await _log_performance(
|
||||
pool, resolved, True, duration_ms, concatenated, executive,
|
||||
)
|
||||
return executive
|
||||
|
||||
except Exception as exc:
|
||||
duration_ms = int((time.monotonic() - start) * 1000)
|
||||
logger.warning(
|
||||
"Executive summary generation failed: %s — using concatenated summaries",
|
||||
exc,
|
||||
)
|
||||
await _log_performance(
|
||||
pool, resolved, False, duration_ms, concatenated, "",
|
||||
error_message=str(exc),
|
||||
)
|
||||
return concatenated
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user