feat: trading feedback engine — periodic performance reports with AI summarization
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled

- Migration 038: trading_reports table + report-summarizer agent seed
- 6 reporting modules: models, collector, sections, validator, summarizer, generator
- API endpoints: GET /api/reports (paginated, filterable), GET /api/reports/{id}
- Frontend hooks: useReports, useReport with TanStack Query
- Scheduler: daily (after 16:30 ET) and weekly (Saturday) report triggers
- Redis queue consumer for async report generation with retry/dedup
- 5 property-based tests (chunking, serialization, validation, accuracy, deltas)
- 109 unit/integration tests across all modules
- 6 frontend hook tests with MSW mocks
This commit is contained in:
Celes Renata
2026-05-01 22:13:09 +00:00
parent 376fcb4bb4
commit bc077bfcc8
28 changed files with 6771 additions and 1 deletions
+1
View File
@@ -0,0 +1 @@
+306
View File
@@ -0,0 +1,306 @@
"""Data collector for trading performance reports.
Queries all relevant trading data for a reporting period and returns
a CollectedData bundle for downstream section builders.
"""
from __future__ import annotations
import logging
import uuid
from dataclasses import dataclass, field
from datetime import date
from typing import Any
import asyncpg
logger = logging.getLogger(__name__)
@dataclass
class CollectedData:
"""Raw data collected for a reporting period."""
trading_decisions: list[dict] = field(default_factory=list)
orders: list[dict] = field(default_factory=list)
open_positions: list[dict] = field(default_factory=list)
closed_positions: list[dict] = field(default_factory=list)
portfolio_snapshot: dict | None = None
previous_portfolio_snapshot: dict | None = None
recommendations: list[dict] = field(default_factory=list)
prediction_outcomes: list[dict] = field(default_factory=list)
model_metric_snapshots: list[dict] = field(default_factory=list)
circuit_breaker_events: list[dict] = field(default_factory=list)
reserve_pool_balance: float = 0.0
def _row_dict(row: asyncpg.Record) -> dict[str, Any]:
"""Convert asyncpg Record to dict with UUID→str coercion."""
d = dict(row)
for k, v in d.items():
if isinstance(v, uuid.UUID):
d[k] = str(v)
return d
async def collect_report_data(
pool: asyncpg.Pool,
period_start: date,
period_end: date,
) -> CollectedData:
"""Query all trading data for the reporting period.
Queries: trading_decisions, orders, positions, portfolio_snapshots,
recommendations, prediction_outcomes, model_metric_snapshots,
circuit_breaker_events, reserve_pool_ledger.
Returns CollectedData with all raw query results.
If no trading_decisions exist, returns empty lists (zero-activity).
"""
async with pool.acquire() as conn:
trading_decisions = await _fetch_trading_decisions(conn, period_start, period_end)
orders = await _fetch_orders(conn, period_start, period_end)
open_positions = await _fetch_open_positions(conn)
closed_positions = await _fetch_closed_positions(conn, period_start, period_end)
portfolio_snapshot = await _fetch_portfolio_snapshot(conn, period_start, period_end)
previous_portfolio_snapshot = await _fetch_previous_portfolio_snapshot(conn, period_start)
recommendations = await _fetch_recommendations(conn, period_start, period_end)
prediction_outcomes = await _fetch_prediction_outcomes(conn, period_start, period_end)
model_metric_snapshots = await _fetch_model_metric_snapshots(conn, period_start, period_end)
circuit_breaker_events = await _fetch_circuit_breaker_events(conn, period_start, period_end)
reserve_pool_balance = await _fetch_reserve_pool_balance(conn)
return CollectedData(
trading_decisions=trading_decisions,
orders=orders,
open_positions=open_positions,
closed_positions=closed_positions,
portfolio_snapshot=portfolio_snapshot,
previous_portfolio_snapshot=previous_portfolio_snapshot,
recommendations=recommendations,
prediction_outcomes=prediction_outcomes,
model_metric_snapshots=model_metric_snapshots,
circuit_breaker_events=circuit_breaker_events,
reserve_pool_balance=reserve_pool_balance,
)
async def _fetch_trading_decisions(
conn: asyncpg.Connection,
period_start: date,
period_end: date,
) -> list[dict]:
"""Fetch trading decisions created within the period."""
rows = await conn.fetch(
"""SELECT id, recommendation_id, decision, skip_reason, ticker,
computed_position_size, computed_share_quantity,
risk_tier_at_decision, portfolio_heat_at_decision,
active_pool_at_decision, reserve_pool_at_decision,
circuit_breaker_status, correlation_check_result,
sector_exposure_check_result, earnings_proximity_flag,
is_micro_trade, decision_trace, created_at
FROM trading_decisions
WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
ORDER BY created_at""",
period_start,
period_end,
)
return [_row_dict(r) for r in rows]
async def _fetch_orders(
conn: asyncpg.Connection,
period_start: date,
period_end: date,
) -> list[dict]:
"""Fetch orders created within the period."""
rows = await conn.fetch(
"""SELECT id, recommendation_id, broker_account_id, ticker, side,
order_type, quantity, limit_price, stop_price, status,
broker_order_id, fill_price, fill_quantity,
submitted_at, filled_at, cancelled_at, rejected_at,
rejection_reason, created_at
FROM orders
WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
ORDER BY created_at""",
period_start,
period_end,
)
return [_row_dict(r) for r in rows]
async def _fetch_open_positions(conn: asyncpg.Connection) -> list[dict]:
"""Fetch currently open positions (quantity > 0)."""
rows = await conn.fetch(
"""SELECT id, broker_account_id, ticker, quantity,
avg_entry_price, current_price,
unrealized_pnl, realized_pnl, updated_at
FROM positions
WHERE quantity > 0
ORDER BY ticker""",
)
return [_row_dict(r) for r in rows]
async def _fetch_closed_positions(
conn: asyncpg.Connection,
period_start: date,
period_end: date,
) -> list[dict]:
"""Fetch positions closed during the period (quantity = 0, updated within period)."""
rows = await conn.fetch(
"""SELECT id, broker_account_id, ticker, quantity,
avg_entry_price, current_price,
unrealized_pnl, realized_pnl, updated_at
FROM positions
WHERE quantity = 0
AND updated_at >= $1::date
AND updated_at < ($2::date + INTERVAL '1 day')
ORDER BY updated_at""",
period_start,
period_end,
)
return [_row_dict(r) for r in rows]
async def _fetch_portfolio_snapshot(
conn: asyncpg.Connection,
period_start: date,
period_end: date,
) -> dict | None:
"""Fetch the most recent portfolio snapshot within the period."""
row = await conn.fetchrow(
"""SELECT id, snapshot_date, portfolio_value, active_pool, reserve_pool,
daily_return, cumulative_return, unrealized_pnl, realized_pnl,
win_count, loss_count, win_rate, sharpe_ratio,
max_drawdown, current_drawdown_pct, portfolio_heat,
risk_tier, positions, metrics, created_at
FROM portfolio_snapshots
WHERE snapshot_date >= $1 AND snapshot_date <= $2
ORDER BY snapshot_date DESC
LIMIT 1""",
period_start,
period_end,
)
return _row_dict(row) if row else None
async def _fetch_previous_portfolio_snapshot(
conn: asyncpg.Connection,
period_start: date,
) -> dict | None:
"""Fetch the most recent portfolio snapshot before the period start."""
row = await conn.fetchrow(
"""SELECT id, snapshot_date, portfolio_value, active_pool, reserve_pool,
daily_return, cumulative_return, unrealized_pnl, realized_pnl,
win_count, loss_count, win_rate, sharpe_ratio,
max_drawdown, current_drawdown_pct, portfolio_heat,
risk_tier, positions, metrics, created_at
FROM portfolio_snapshots
WHERE snapshot_date < $1
ORDER BY snapshot_date DESC
LIMIT 1""",
period_start,
)
return _row_dict(row) if row else None
async def _fetch_recommendations(
conn: asyncpg.Connection,
period_start: date,
period_end: date,
) -> list[dict]:
"""Fetch recommendations created within the period."""
rows = await conn.fetch(
"""SELECT id, ticker, company_id, action, mode, confidence,
time_horizon, thesis, portfolio_pct, max_loss_pct,
model_version, generated_at, created_at
FROM recommendations
WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
ORDER BY created_at""",
period_start,
period_end,
)
return [_row_dict(r) for r in rows]
async def _fetch_prediction_outcomes(
conn: asyncpg.Connection,
period_start: date,
period_end: date,
) -> list[dict]:
"""Fetch prediction outcomes evaluated within the period."""
rows = await conn.fetch(
"""SELECT po.id, po.prediction_id, po.evaluated_at, po.horizon,
po.future_price, po.future_return,
po.spy_future_price, po.spy_return,
po.sector_etf_future_price, po.sector_etf_return,
po.excess_return_vs_spy, po.excess_return_vs_sector,
po.direction_correct, po.profitable,
ps.ticker, ps.direction, ps.action, ps.confidence
FROM prediction_outcomes po
JOIN prediction_snapshots ps ON ps.id = po.prediction_id
WHERE po.evaluated_at >= $1::date
AND po.evaluated_at < ($2::date + INTERVAL '1 day')
ORDER BY po.evaluated_at""",
period_start,
period_end,
)
return [_row_dict(r) for r in rows]
async def _fetch_model_metric_snapshots(
conn: asyncpg.Connection,
period_start: date,
period_end: date,
) -> list[dict]:
"""Fetch model metric snapshots generated within the period."""
rows = await conn.fetch(
"""SELECT id, generated_at, lookback_window, horizon,
prediction_count, win_rate, directional_accuracy,
information_coefficient, rank_information_coefficient,
avg_return, avg_excess_return_vs_spy,
avg_excess_return_vs_sector,
calibration_error, brier_score,
buy_win_rate, sell_win_rate, hold_win_rate,
created_at
FROM model_metric_snapshots
WHERE generated_at >= $1::date
AND generated_at < ($2::date + INTERVAL '1 day')
ORDER BY generated_at DESC""",
period_start,
period_end,
)
return [_row_dict(r) for r in rows]
async def _fetch_circuit_breaker_events(
conn: asyncpg.Connection,
period_start: date,
period_end: date,
) -> list[dict]:
"""Fetch circuit breaker events from trading decisions within the period.
Circuit breaker events are trading decisions where
circuit_breaker_status is not 'clear' (i.e. a breaker was active).
"""
rows = await conn.fetch(
"""SELECT id, recommendation_id, decision, ticker,
circuit_breaker_status, decision_trace, created_at
FROM trading_decisions
WHERE circuit_breaker_status != 'clear'
AND created_at >= $1::date
AND created_at < ($2::date + INTERVAL '1 day')
ORDER BY created_at""",
period_start,
period_end,
)
return [_row_dict(r) for r in rows]
async def _fetch_reserve_pool_balance(conn: asyncpg.Connection) -> float:
"""Fetch the latest reserve pool balance."""
row = await conn.fetchrow(
"SELECT balance_after FROM reserve_pool_ledger ORDER BY created_at DESC LIMIT 1",
)
return float(row["balance_after"]) if row else 0.0
+279
View File
@@ -0,0 +1,279 @@
"""Report generator — orchestrates collection, building, validation, summarization, and storage.
Provides three public functions:
- generate_report: full pipeline from data collection to assembled ReportData
- store_report: upsert into trading_reports table
- process_report_job: Redis queue job handler with retry and dedup
Requirements: 5.1, 5.2, 5.3, 6.3, 6.4, 6.5
Design: Report Generator
"""
from __future__ import annotations
import asyncio
import logging
from datetime import date, datetime, timezone
import asyncpg
from services.reporting.collector import collect_report_data
from services.reporting.models import ReportData, ReportType
from services.reporting.sections import (
build_model_quality_section,
build_pnl_section,
build_position_performance_section,
build_recommendation_accuracy_section,
build_risk_metrics_section,
)
from services.reporting.summarizer import (
generate_executive_summary,
summarize_section,
)
from services.reporting.validator import (
compute_validation_status,
validate_model_quality,
validate_recommendation_accuracy,
)
from services.shared.agent_config import AgentConfigResolver
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Retry configuration for process_report_job
# ---------------------------------------------------------------------------
_MAX_RETRIES = 3
_BACKOFF_SECONDS = (30, 60, 120)
# In-memory set tracking in-progress jobs to reject duplicates.
# Key format: "{report_type}:{period_start}:{period_end}"
_in_progress_jobs: set[str] = set()
# ---------------------------------------------------------------------------
# generate_report
# ---------------------------------------------------------------------------
async def generate_report(
pool: asyncpg.Pool,
report_type: ReportType,
period_start: date,
period_end: date,
) -> ReportData:
"""Orchestrate full report generation.
1. Collect data via collector
2. Build all 5 sections via section builders
3. Validate recommendation_accuracy and model_quality via validator
4. Create AgentConfigResolver and summarize each section
5. Generate executive summary
6. Assemble final ReportData
"""
# 1. Collect data
data = await collect_report_data(pool, period_start, period_end)
# 2. Build sections
pnl = build_pnl_section(data)
rec_accuracy = build_recommendation_accuracy_section(data)
position_perf = build_position_performance_section(data)
risk_metrics = build_risk_metrics_section(data)
model_quality = build_model_quality_section(data)
# 3. Validate
rec_warnings = validate_recommendation_accuracy(
rec_accuracy, data.prediction_outcomes,
)
rec_accuracy.validation_warnings = rec_warnings
mq_warnings = validate_model_quality(
model_quality, data.model_metric_snapshots,
)
model_quality.validation_warnings = mq_warnings
# 4. Summarize each section
resolver = AgentConfigResolver(pool)
pnl.summary = await summarize_section(
pool, resolver, "pnl", pnl.model_dump(),
)
rec_accuracy.summary = await summarize_section(
pool, resolver, "recommendation_accuracy", rec_accuracy.model_dump(),
)
position_perf.summary = await summarize_section(
pool, resolver, "position_performance", position_perf.model_dump(),
)
risk_metrics.summary = await summarize_section(
pool, resolver, "risk_metrics", risk_metrics.model_dump(),
)
model_quality.summary = await summarize_section(
pool, resolver, "model_quality", model_quality.model_dump(),
)
# 5. Generate executive summary
section_summaries = {
"pnl": pnl.summary,
"recommendation_accuracy": rec_accuracy.summary,
"position_performance": position_perf.summary,
"risk_metrics": risk_metrics.summary,
"model_quality": model_quality.summary,
}
executive_summary = await generate_executive_summary(
pool, resolver, section_summaries,
)
# 6. Assemble ReportData
report = ReportData(
pnl=pnl,
recommendation_accuracy=rec_accuracy,
position_performance=position_perf,
risk_metrics=risk_metrics,
model_quality=model_quality,
executive_summary=executive_summary,
generated_at=datetime.now(timezone.utc),
period_start=period_start,
period_end=period_end,
report_type=ReportType(report_type),
)
# Set validation status based on all warnings
report.validation_status = compute_validation_status(report)
return report
# ---------------------------------------------------------------------------
# store_report
# ---------------------------------------------------------------------------
_UPSERT_SQL = """\
INSERT INTO trading_reports
(report_type, period_start, period_end, report_data, validation_status, generated_at)
VALUES
($1, $2, $3, $4::jsonb, $5, $6)
ON CONFLICT (report_type, period_start, period_end)
DO UPDATE SET
report_data = EXCLUDED.report_data,
validation_status = EXCLUDED.validation_status,
generated_at = EXCLUDED.generated_at
RETURNING id
"""
async def store_report(
pool: asyncpg.Pool,
report: ReportData,
) -> str:
"""Store report in trading_reports table via upsert.
Uses INSERT ... ON CONFLICT (report_type, period_start, period_end)
DO UPDATE to handle regeneration of existing reports.
Returns the report UUID as a string.
"""
row = await pool.fetchrow(
_UPSERT_SQL,
report.report_type.value,
report.period_start,
report.period_end,
report.model_dump_json(),
report.validation_status.value,
report.generated_at,
)
report_id = str(row["id"]) # type: ignore[index]
logger.info(
"Stored report %s (type=%s, period=%s to %s)",
report_id,
report.report_type.value,
report.period_start,
report.period_end,
)
return report_id
# ---------------------------------------------------------------------------
# process_report_job
# ---------------------------------------------------------------------------
def _job_key(report_type: str, period_start: str, period_end: str) -> str:
"""Build a dedup key for an in-progress job."""
return f"{report_type}:{period_start}:{period_end}"
async def process_report_job(
pool: asyncpg.Pool,
job: dict,
) -> None:
"""Process a report generation job from the Redis queue.
Deserializes job payload, calls generate_report + store_report.
Handles retries with exponential backoff (30s, 60s, 120s up to 3 attempts).
Rejects duplicate jobs for the same report_type + period.
Expected job payload::
{
"report_type": "daily" | "weekly",
"period_start": "YYYY-MM-DD",
"period_end": "YYYY-MM-DD"
}
"""
report_type_str = job.get("report_type", "")
period_start_str = job.get("period_start", "")
period_end_str = job.get("period_end", "")
# Validate payload
try:
report_type = ReportType(report_type_str)
period_start = date.fromisoformat(period_start_str)
period_end = date.fromisoformat(period_end_str)
except (ValueError, TypeError) as exc:
logger.error("Invalid report job payload: %s%s", job, exc)
return
# Reject duplicate in-progress jobs
key = _job_key(report_type_str, period_start_str, period_end_str)
if key in _in_progress_jobs:
logger.warning(
"Duplicate report job rejected (already in progress): %s", key,
)
return
_in_progress_jobs.add(key)
try:
last_error: Exception | None = None
for attempt in range(_MAX_RETRIES):
try:
report = await generate_report(
pool, report_type, period_start, period_end,
)
await store_report(pool, report)
logger.info(
"Report job completed: %s (attempt %d)", key, attempt + 1,
)
return
except Exception as exc:
last_error = exc
if attempt < _MAX_RETRIES - 1:
backoff = _BACKOFF_SECONDS[attempt]
logger.warning(
"Report job %s failed (attempt %d/%d): %s — retrying in %ds",
key,
attempt + 1,
_MAX_RETRIES,
exc,
backoff,
)
await asyncio.sleep(backoff)
# All retries exhausted
logger.error(
"Report job %s failed after %d attempts: %s",
key,
_MAX_RETRIES,
last_error,
)
finally:
_in_progress_jobs.discard(key)
+104
View File
@@ -0,0 +1,104 @@
from __future__ import annotations
from datetime import date, datetime
from enum import Enum
from pydantic import BaseModel, Field
class ReportType(str, Enum):
DAILY = "daily"
WEEKLY = "weekly"
class ValidationStatus(str, Enum):
PASSED = "passed"
WARNINGS = "warnings"
class ValidationWarning(BaseModel):
field_name: str
computed_value: float
snapshot_value: float
pct_difference: float
class PLSection(BaseModel):
realized_pnl: float
unrealized_pnl: float
daily_return: float
cumulative_return: float
win_count: int
loss_count: int
win_rate: float
profit_factor: float
sharpe_ratio: float
summary: str = ""
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
class RecommendationAccuracySection(BaseModel):
total_evaluated: int
act_count: int
skip_count: int
acted_win_rate: float
avg_confidence_acted: float
avg_confidence_skipped: float
summary: str = ""
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
class PositionDetail(BaseModel):
ticker: str
entry_price: float
current_or_exit_price: float
pnl: float
pnl_pct: float
hold_duration_hours: float
status: str # "open" or "closed"
class PositionPerformanceSection(BaseModel):
positions: list[PositionDetail] = Field(default_factory=list)
summary: str = ""
class RiskMetricsSection(BaseModel):
current_risk_tier: str
portfolio_heat: float
max_drawdown: float
current_drawdown_pct: float
reserve_pool_balance: float
circuit_breaker_event_count: int
summary: str = ""
class ModelQualityWindow(BaseModel):
lookback: str
win_rate: float | None
directional_accuracy: float | None
information_coefficient: float | None
calibration_error: float | None
brier_score: float | None
class ModelQualitySection(BaseModel):
windows: list[ModelQualityWindow] = Field(default_factory=list)
summary: str = ""
validation_warnings: list[ValidationWarning] = Field(default_factory=list)
class ReportData(BaseModel):
"""Top-level report structure stored as JSONB."""
pnl: PLSection
recommendation_accuracy: RecommendationAccuracySection
position_performance: PositionPerformanceSection
risk_metrics: RiskMetricsSection
model_quality: ModelQualitySection
executive_summary: str = ""
validation_status: ValidationStatus = ValidationStatus.PASSED
generated_at: datetime
period_start: date
period_end: date
report_type: ReportType
+370
View File
@@ -0,0 +1,370 @@
"""Section builders for trading performance reports.
Each builder takes a CollectedData bundle and returns a typed Pydantic
section model. All builders handle zero-activity gracefully by returning
zero values and empty lists when no data is available.
"""
from __future__ import annotations
import logging
from datetime import datetime, timezone
from services.reporting.collector import CollectedData
from services.reporting.models import (
ModelQualitySection,
ModelQualityWindow,
PLSection,
PositionDetail,
PositionPerformanceSection,
RecommendationAccuracySection,
RiskMetricsSection,
)
logger = logging.getLogger(__name__)
def build_pnl_section(data: CollectedData) -> PLSection:
"""Build P&L section from collected data.
Computes realized/unrealized P&L, daily return, cumulative return,
win/loss counts, win rate, profit factor, and Sharpe ratio from
portfolio_snapshot and closed positions.
"""
snap = data.portfolio_snapshot
if snap is None:
return PLSection(
realized_pnl=0.0,
unrealized_pnl=0.0,
daily_return=0.0,
cumulative_return=0.0,
win_count=0,
loss_count=0,
win_rate=0.0,
profit_factor=0.0,
sharpe_ratio=0.0,
)
# Compute profit factor from closed positions:
# sum of gains / abs(sum of losses)
gains = 0.0
losses = 0.0
for pos in data.closed_positions:
rpnl = float(pos.get("realized_pnl", 0) or 0)
if rpnl > 0:
gains += rpnl
elif rpnl < 0:
losses += abs(rpnl)
profit_factor = (gains / losses) if losses > 0 else 0.0
return PLSection(
realized_pnl=float(snap.get("realized_pnl", 0) or 0),
unrealized_pnl=float(snap.get("unrealized_pnl", 0) or 0),
daily_return=float(snap.get("daily_return", 0) or 0),
cumulative_return=float(snap.get("cumulative_return", 0) or 0),
win_count=int(snap.get("win_count", 0) or 0),
loss_count=int(snap.get("loss_count", 0) or 0),
win_rate=float(snap.get("win_rate", 0) or 0),
profit_factor=profit_factor,
sharpe_ratio=float(snap.get("sharpe_ratio", 0) or 0),
)
def build_recommendation_accuracy_section(
data: CollectedData,
) -> RecommendationAccuracySection:
"""Build recommendation accuracy section.
Joins trading_decisions with prediction_outcomes to compute
act/skip breakdown, win rate of acted recommendations, and
average confidence of acted vs skipped.
"""
if not data.trading_decisions:
return RecommendationAccuracySection(
total_evaluated=0,
act_count=0,
skip_count=0,
acted_win_rate=0.0,
avg_confidence_acted=0.0,
avg_confidence_skipped=0.0,
)
# Build lookup: recommendation_id -> prediction_outcome
# prediction_outcomes are joined with prediction_snapshots in the collector,
# so they carry ticker, direction, action, confidence from the snapshot.
# trading_decisions reference recommendations via recommendation_id.
# We need to match trading_decisions -> recommendations -> prediction_outcomes.
#
# The collector fetches prediction_outcomes joined with prediction_snapshots
# (po.prediction_id = ps.id). Trading decisions reference recommendation_id.
# Recommendations and prediction_snapshots share the same ticker, so we
# match by recommendation_id on the trading_decision side.
# Build recommendation_id -> recommendation dict for confidence lookup
rec_by_id: dict[str, dict] = {}
for rec in data.recommendations:
rec_id = str(rec.get("id", ""))
if rec_id:
rec_by_id[rec_id] = rec
# Build prediction_id -> prediction_outcome for profitability lookup
# We also need to map recommendation_id -> prediction_outcome.
# The link is: trading_decision.recommendation_id -> recommendation.id
# and prediction_outcome has ticker from prediction_snapshots.
# We match by ticker between recommendation and prediction_outcome.
outcome_by_ticker: dict[str, list[dict]] = {}
for po in data.prediction_outcomes:
ticker = po.get("ticker", "")
if ticker:
outcome_by_ticker.setdefault(ticker, []).append(po)
act_count = 0
skip_count = 0
acted_wins = 0
acted_total_with_outcome = 0
confidence_acted: list[float] = []
confidence_skipped: list[float] = []
for td in data.trading_decisions:
decision = str(td.get("decision", "")).lower()
rec_id = str(td.get("recommendation_id", ""))
rec = rec_by_id.get(rec_id, {})
conf = rec.get("confidence")
ticker = td.get("ticker", "")
if decision == "act":
act_count += 1
if conf is not None:
confidence_acted.append(float(conf))
# Check profitability from prediction_outcomes for this ticker
ticker_outcomes = outcome_by_ticker.get(ticker, [])
if ticker_outcomes:
# Use the most recent outcome for this ticker
latest = ticker_outcomes[-1]
acted_total_with_outcome += 1
if latest.get("profitable"):
acted_wins += 1
else:
skip_count += 1
if conf is not None:
confidence_skipped.append(float(conf))
total_evaluated = act_count + skip_count
acted_win_rate = (
(acted_wins / acted_total_with_outcome)
if acted_total_with_outcome > 0
else 0.0
)
avg_confidence_acted = (
(sum(confidence_acted) / len(confidence_acted))
if confidence_acted
else 0.0
)
avg_confidence_skipped = (
(sum(confidence_skipped) / len(confidence_skipped))
if confidence_skipped
else 0.0
)
return RecommendationAccuracySection(
total_evaluated=total_evaluated,
act_count=act_count,
skip_count=skip_count,
acted_win_rate=acted_win_rate,
avg_confidence_acted=avg_confidence_acted,
avg_confidence_skipped=avg_confidence_skipped,
)
def build_position_performance_section(
data: CollectedData,
) -> PositionPerformanceSection:
"""Build position performance section.
Lists each position (open and closed) with entry price,
current/exit price, P&L, P&L%, and hold duration.
"""
positions: list[PositionDetail] = []
now = datetime.now(timezone.utc)
# Open positions
for pos in data.open_positions:
entry_price = float(pos.get("avg_entry_price", 0) or 0)
current_price = float(pos.get("current_price", 0) or 0)
quantity = float(pos.get("quantity", 0) or 0)
pnl = (current_price - entry_price) * quantity
cost_basis = entry_price * quantity
pnl_pct = (pnl / cost_basis * 100) if cost_basis > 0 else 0.0
# Hold duration from updated_at to now
updated_at = pos.get("updated_at")
hold_hours = _compute_hold_hours(updated_at, now)
positions.append(
PositionDetail(
ticker=pos.get("ticker", ""),
entry_price=entry_price,
current_or_exit_price=current_price,
pnl=pnl,
pnl_pct=pnl_pct,
hold_duration_hours=hold_hours,
status="open",
)
)
# Closed positions
for pos in data.closed_positions:
entry_price = float(pos.get("avg_entry_price", 0) or 0)
current_price = float(pos.get("current_price", 0) or 0)
realized_pnl = float(pos.get("realized_pnl", 0) or 0)
cost_basis = entry_price * float(pos.get("quantity", 0) or 0)
# For closed positions, quantity is 0 in the DB, so use realized_pnl
# directly. P&L% is based on the original cost basis which we can
# approximate from entry_price and the realized_pnl.
# If entry_price is available, compute pnl_pct from realized_pnl / cost.
# Since quantity=0 for closed, we estimate original quantity from
# realized_pnl and price difference, or just use realized_pnl directly.
if entry_price > 0 and current_price != entry_price:
# Estimate original quantity from realized_pnl / (exit - entry)
price_diff = current_price - entry_price
if price_diff != 0:
est_quantity = abs(realized_pnl / price_diff)
est_cost = entry_price * est_quantity
pnl_pct = (realized_pnl / est_cost * 100) if est_cost > 0 else 0.0
else:
pnl_pct = 0.0
else:
pnl_pct = 0.0
updated_at = pos.get("updated_at")
hold_hours = _compute_hold_hours(updated_at, now)
positions.append(
PositionDetail(
ticker=pos.get("ticker", ""),
entry_price=entry_price,
current_or_exit_price=current_price,
pnl=realized_pnl,
pnl_pct=pnl_pct,
hold_duration_hours=hold_hours,
status="closed",
)
)
return PositionPerformanceSection(positions=positions)
def _compute_hold_hours(updated_at: datetime | str | None, now: datetime) -> float:
"""Compute hold duration in hours from updated_at to now."""
if updated_at is None:
return 0.0
if isinstance(updated_at, str):
try:
updated_at = datetime.fromisoformat(updated_at)
except (ValueError, TypeError):
return 0.0
if not isinstance(updated_at, datetime):
return 0.0
# Ensure timezone-aware comparison
if updated_at.tzinfo is None:
updated_at = updated_at.replace(tzinfo=timezone.utc)
delta = now - updated_at
return max(delta.total_seconds() / 3600.0, 0.0)
def build_risk_metrics_section(data: CollectedData) -> RiskMetricsSection:
"""Build risk metrics section.
Extracts current risk tier, portfolio heat, max drawdown,
current drawdown %, reserve pool balance, and circuit breaker
event count from collected data.
"""
snap = data.portfolio_snapshot
if snap is None:
return RiskMetricsSection(
current_risk_tier="unknown",
portfolio_heat=0.0,
max_drawdown=0.0,
current_drawdown_pct=0.0,
reserve_pool_balance=data.reserve_pool_balance,
circuit_breaker_event_count=len(data.circuit_breaker_events),
)
return RiskMetricsSection(
current_risk_tier=str(snap.get("risk_tier", "unknown") or "unknown"),
portfolio_heat=float(snap.get("portfolio_heat", 0) or 0),
max_drawdown=float(snap.get("max_drawdown", 0) or 0),
current_drawdown_pct=float(snap.get("current_drawdown_pct", 0) or 0),
reserve_pool_balance=data.reserve_pool_balance,
circuit_breaker_event_count=len(data.circuit_breaker_events),
)
def build_model_quality_section(data: CollectedData) -> ModelQualitySection:
"""Build model quality section.
Extracts latest model_metric_snapshot values for 7d, 30d, 90d
lookback windows.
"""
if not data.model_metric_snapshots:
return ModelQualitySection(windows=[])
# Group by lookback_window, take the latest (first in list since
# collector orders by generated_at DESC)
target_windows = {"7d", "30d", "90d"}
latest_by_window: dict[str, dict] = {}
for snap in data.model_metric_snapshots:
window = snap.get("lookback_window", "")
if window in target_windows and window not in latest_by_window:
latest_by_window[window] = snap
windows: list[ModelQualityWindow] = []
for w in ("7d", "30d", "90d"):
snap = latest_by_window.get(w)
if snap is None:
windows.append(
ModelQualityWindow(
lookback=w,
win_rate=None,
directional_accuracy=None,
information_coefficient=None,
calibration_error=None,
brier_score=None,
)
)
else:
windows.append(
ModelQualityWindow(
lookback=w,
win_rate=_safe_float(snap.get("win_rate")),
directional_accuracy=_safe_float(snap.get("directional_accuracy")),
information_coefficient=_safe_float(
snap.get("information_coefficient")
),
calibration_error=_safe_float(snap.get("calibration_error")),
brier_score=_safe_float(snap.get("brier_score")),
)
)
return ModelQualitySection(windows=windows)
def _safe_float(value: object) -> float | None:
"""Convert a value to float, returning None for None/invalid values."""
if value is None:
return None
try:
f = float(value) # type: ignore[arg-type]
# Replace NaN/inf with None
if f != f or f == float("inf") or f == float("-inf"):
return None
return f
except (ValueError, TypeError):
return None
+437
View File
@@ -0,0 +1,437 @@
"""AI-powered report summarizer with chunking and deterministic fallback.
Generates natural-language summaries for trading performance report sections
using the Report_Summarizer_Agent (resolved via AgentConfigResolver + llm_factory).
Data is chunked to fit within the 8k-token context window of the local model.
Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.6
Design: AI Summarizer
"""
from __future__ import annotations
import json
import logging
import time
import asyncpg
from services.extractor.llm_factory import build_llm_client
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
from services.shared.config import load_config
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
CHUNK_SIZE_LIMIT = 6000 # characters per chunk
MAX_SUMMARY_WORDS = 200 # per section summary
MAX_EXECUTIVE_SUMMARY_WORDS = 300
_REPORT_SUMMARIZER_SLUG = "report-summarizer"
# ---------------------------------------------------------------------------
# Chunking
# ---------------------------------------------------------------------------
def chunk_data(serialized: str, max_chars: int = CHUNK_SIZE_LIMIT) -> list[str]:
"""Split serialized data into chunks of at most *max_chars* characters.
Splits on newline boundaries to avoid breaking JSON structures.
Each chunk is ≤ *max_chars* characters. Returns at least one chunk
(even for empty input).
Round-trip property: ``"".join(chunk_data(s, n)) == s`` for all *s*.
If a single line (including its trailing newline) exceeds *max_chars*,
it is included as its own chunk (we never break mid-line).
"""
if not serialized:
return [""]
# Split into segments where each segment includes its trailing "\n"
# (except possibly the last one if the string doesn't end with "\n").
# This preserves the exact original when chunks are concatenated.
segments: list[str] = []
start = 0
while start < len(serialized):
nl = serialized.find("\n", start)
if nl == -1:
# Last segment, no trailing newline
segments.append(serialized[start:])
break
else:
# Include the newline in this segment
segments.append(serialized[start : nl + 1])
start = nl + 1
chunks: list[str] = []
current_parts: list[str] = []
current_len = 0
for segment in segments:
if current_parts and current_len + len(segment) > max_chars:
# Flush current chunk
chunks.append("".join(current_parts))
current_parts = [segment]
current_len = len(segment)
else:
current_parts.append(segment)
current_len += len(segment)
# Flush remaining
if current_parts:
chunks.append("".join(current_parts))
return chunks if chunks else [""]
# ---------------------------------------------------------------------------
# Performance logging
# ---------------------------------------------------------------------------
async def _log_performance(
pool: asyncpg.Pool,
resolved: ResolvedAgentConfig,
success: bool,
duration_ms: int,
input_text: str,
output_text: str,
error_message: str | None = None,
) -> None:
"""Insert a row into agent_performance_log for a summarizer invocation."""
try:
await pool.execute(
"""INSERT INTO agent_performance_log
(agent_id, variant_id, document_id, ticker, success,
duration_ms, confidence, retry_count,
input_tokens, output_tokens, error_message)
VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
resolved.agent_id,
resolved.variant_id,
None, # no document_id for report summaries
None, # no ticker for report summaries
success,
duration_ms,
0.0, # no confidence score for summaries
0,
len(input_text) // 4, # token estimate
len(output_text) // 4, # token estimate
error_message,
)
except Exception:
logger.warning("Failed to log summarizer performance", exc_info=True)
# ---------------------------------------------------------------------------
# LLM summarization helpers
# ---------------------------------------------------------------------------
async def _summarize_chunk(
resolved: ResolvedAgentConfig,
section_name: str,
chunk: str,
) -> str:
"""Summarize a single chunk via the Report_Summarizer_Agent LLM client.
Returns the raw text output from the model.
Raises on failure so the caller can handle retries / fallback.
"""
cfg = load_config()
client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
try:
prompts = {
"system": resolved.system_prompt,
"user": f"Summarize this {section_name} data:\n{chunk}",
}
attempt = await client.call_llm(
prompts=prompts,
json_schema={}, # plain text, no structured output
document_text="",
)
if attempt.error:
raise RuntimeError(f"LLM error: {attempt.error}")
if not attempt.raw_output.strip():
raise RuntimeError("LLM returned empty response")
return attempt.raw_output.strip()
finally:
await client.close()
async def _merge_summaries(
resolved: ResolvedAgentConfig,
section_name: str,
summaries: list[str],
) -> str:
"""Merge multiple chunk summaries into a single coherent summary."""
combined = "\n\n".join(summaries)
cfg = load_config()
client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
try:
prompts = {
"system": resolved.system_prompt,
"user": (
f"Merge these {section_name} summaries into a single coherent "
f"summary of no more than {MAX_SUMMARY_WORDS} words:\n{combined}"
),
}
attempt = await client.call_llm(
prompts=prompts,
json_schema={},
document_text="",
)
if attempt.error:
raise RuntimeError(f"LLM merge error: {attempt.error}")
if not attempt.raw_output.strip():
raise RuntimeError("LLM returned empty merge response")
return attempt.raw_output.strip()
finally:
await client.close()
# ---------------------------------------------------------------------------
# Section summarization
# ---------------------------------------------------------------------------
async def summarize_section(
pool: asyncpg.Pool,
resolver: AgentConfigResolver,
section_name: str,
section_data: dict,
) -> str:
"""Generate AI summary for a report section.
1. Serialize section data to JSON string
2. Chunk if > CHUNK_SIZE_LIMIT
3. Summarize each chunk via Report_Summarizer_Agent
4. If multiple chunks, merge summaries with a final LLM call
5. Log each invocation to agent_performance_log
6. On failure, fall back to deterministic summary
"""
resolved = await resolver.resolve(_REPORT_SUMMARIZER_SLUG)
if resolved is None:
logger.error(
"Report summarizer agent not found (slug=%s) — using deterministic fallback",
_REPORT_SUMMARIZER_SLUG,
)
return build_deterministic_summary(section_name, section_data)
serialized = json.dumps(section_data, indent=2, default=str)
chunks = chunk_data(serialized)
start = time.monotonic()
try:
# Summarize each chunk
chunk_summaries: list[str] = []
for chunk in chunks:
summary = await _summarize_chunk(resolved, section_name, chunk)
chunk_summaries.append(summary)
# Merge if multiple chunks
if len(chunk_summaries) > 1:
try:
final_summary = await _merge_summaries(
resolved, section_name, chunk_summaries,
)
except Exception:
# Merge failed — fall back to concatenation of chunk summaries
logger.warning(
"Chunk merge LLM call failed for section %s — concatenating summaries",
section_name,
)
final_summary = "\n".join(chunk_summaries)
else:
final_summary = chunk_summaries[0]
# Truncate to MAX_SUMMARY_WORDS at sentence boundary
words = final_summary.split()
if len(words) > MAX_SUMMARY_WORDS:
truncated = " ".join(words[:MAX_SUMMARY_WORDS])
# Try to end at a sentence boundary
last_period = truncated.rfind(".")
if last_period > len(truncated) // 2:
truncated = truncated[: last_period + 1]
final_summary = truncated
duration_ms = int((time.monotonic() - start) * 1000)
await _log_performance(
pool, resolved, True, duration_ms, serialized, final_summary,
)
return final_summary
except Exception as exc:
duration_ms = int((time.monotonic() - start) * 1000)
logger.warning(
"AI summarization failed for section %s: %s — using deterministic fallback",
section_name,
exc,
)
await _log_performance(
pool, resolved, False, duration_ms, serialized, "",
error_message=str(exc),
)
return build_deterministic_summary(section_name, section_data)
# ---------------------------------------------------------------------------
# Deterministic fallback summaries
# ---------------------------------------------------------------------------
_DETERMINISTIC_TEMPLATES: dict[str, str] = {
"pnl": (
"P&L Summary: Realized P&L ${realized_pnl}, unrealized ${unrealized_pnl}, "
"daily return {daily_return}%, win rate {win_rate}%."
),
"recommendation_accuracy": (
"Recommendation Accuracy: {total_evaluated} evaluated, "
"{act_count} acted ({acted_win_rate}% win rate), "
"{skip_count} skipped. "
"Avg confidence acted {avg_confidence_acted}, skipped {avg_confidence_skipped}."
),
"position_performance": (
"Position Performance: {position_count} positions tracked during the period."
),
"risk_metrics": (
"Risk Metrics: Risk tier {current_risk_tier}, portfolio heat {portfolio_heat}, "
"max drawdown {max_drawdown}, current drawdown {current_drawdown_pct}%, "
"reserve pool ${reserve_pool_balance}, "
"{circuit_breaker_event_count} circuit breaker events."
),
"model_quality": (
"Model Quality: {window_count} lookback windows evaluated."
),
}
def build_deterministic_summary(section_name: str, section_data: dict) -> str:
"""Build a fallback deterministic summary from raw metrics.
Produces a template-based text summary when AI summarization fails.
"""
template = _DETERMINISTIC_TEMPLATES.get(section_name)
if template is None:
# Generic fallback for unknown sections
return f"{section_name} summary: {len(section_data)} metrics reported."
try:
# Prepare template variables with safe defaults
data = dict(section_data)
# Add computed fields for templates that need them
if section_name == "position_performance":
positions = data.get("positions", [])
data["position_count"] = len(positions)
elif section_name == "model_quality":
windows = data.get("windows", [])
data["window_count"] = len(windows)
return template.format(**data)
except (KeyError, ValueError, TypeError) as exc:
logger.warning(
"Deterministic summary template failed for %s: %s",
section_name,
exc,
)
return f"{section_name} summary: data available but template formatting failed."
# ---------------------------------------------------------------------------
# Executive summary
# ---------------------------------------------------------------------------
async def generate_executive_summary(
pool: asyncpg.Pool,
resolver: AgentConfigResolver,
section_summaries: dict[str, str],
) -> str:
"""Generate executive summary from all section summaries.
Concatenates section summaries, chunks if needed, and produces
a ≤300-word synthesis via the Report_Summarizer_Agent.
Falls back to concatenated section summaries on failure.
"""
resolved = await resolver.resolve(_REPORT_SUMMARIZER_SLUG)
concatenated = "\n\n".join(
f"{name}: {summary}" for name, summary in section_summaries.items()
)
if resolved is None:
logger.error(
"Report summarizer agent not found — using concatenated summaries as executive summary",
)
return concatenated
chunks = chunk_data(concatenated)
start = time.monotonic()
try:
# Summarize chunks if needed
if len(chunks) > 1:
chunk_summaries: list[str] = []
for chunk in chunks:
summary = await _summarize_chunk(resolved, "executive", chunk)
chunk_summaries.append(summary)
input_text = "\n\n".join(chunk_summaries)
else:
input_text = chunks[0]
# Final executive summary call
cfg = load_config()
client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
try:
prompts = {
"system": resolved.system_prompt,
"user": (
f"Synthesize these trading performance section summaries into "
f"a concise executive summary of no more than "
f"{MAX_EXECUTIVE_SUMMARY_WORDS} words:\n{input_text}"
),
}
attempt = await client.call_llm(
prompts=prompts,
json_schema={},
document_text="",
)
finally:
await client.close()
if attempt.error:
raise RuntimeError(f"Executive summary LLM error: {attempt.error}")
if not attempt.raw_output.strip():
raise RuntimeError("Executive summary LLM returned empty response")
executive = attempt.raw_output.strip()
# Truncate to MAX_EXECUTIVE_SUMMARY_WORDS at sentence boundary
words = executive.split()
if len(words) > MAX_EXECUTIVE_SUMMARY_WORDS:
truncated = " ".join(words[:MAX_EXECUTIVE_SUMMARY_WORDS])
last_period = truncated.rfind(".")
if last_period > len(truncated) // 2:
truncated = truncated[: last_period + 1]
executive = truncated
duration_ms = int((time.monotonic() - start) * 1000)
await _log_performance(
pool, resolved, True, duration_ms, concatenated, executive,
)
return executive
except Exception as exc:
duration_ms = int((time.monotonic() - start) * 1000)
logger.warning(
"Executive summary generation failed: %s — using concatenated summaries",
exc,
)
await _log_performance(
pool, resolved, False, duration_ms, concatenated, "",
error_message=str(exc),
)
return concatenated
+175
View File
@@ -0,0 +1,175 @@
"""Report validator — cross-checks computed metrics against live data.
Compares report section values against prediction_outcomes and
model_metric_snapshots, flagging discrepancies that exceed the
configured threshold.
"""
from __future__ import annotations
import logging
import math
from services.reporting.models import (
ModelQualitySection,
RecommendationAccuracySection,
ReportData,
ValidationStatus,
ValidationWarning,
)
logger = logging.getLogger(__name__)
DISCREPANCY_THRESHOLD_PCT = 5.0
def _sanitize(value: float | None) -> float:
"""Replace None, NaN, and infinity with 0.0."""
if value is None:
return 0.0
if math.isnan(value) or math.isinf(value):
return 0.0
return value
def _check_discrepancy(
field_name: str,
computed: float,
snapshot: float,
) -> ValidationWarning | None:
"""Compare computed vs snapshot and return a warning if >5% discrepancy.
Edge cases:
- snapshot=0 and computed≠0 → 100% difference → warning
- both=0 → 0% difference → no warning
- snapshot is handled upstream (NULL → skip before calling this)
"""
computed = _sanitize(computed)
snapshot = _sanitize(snapshot)
if snapshot == 0.0 and computed == 0.0:
return None
if snapshot == 0.0:
# Non-zero computed with zero snapshot → 100% discrepancy
pct_diff = 100.0
else:
pct_diff = abs(computed - snapshot) / abs(snapshot) * 100.0
if pct_diff > DISCREPANCY_THRESHOLD_PCT:
return ValidationWarning(
field_name=field_name,
computed_value=computed,
snapshot_value=snapshot,
pct_difference=round(pct_diff, 4),
)
return None
def validate_recommendation_accuracy(
section: RecommendationAccuracySection,
prediction_outcomes: list[dict],
) -> list[ValidationWarning]:
"""Cross-reference reported win rates with prediction_outcomes.
Computes win_rate from prediction_outcomes (count profitable / total)
and compares against section.acted_win_rate. Returns warnings for
discrepancies > 5%.
"""
warnings: list[ValidationWarning] = []
if not prediction_outcomes:
return warnings
total = len(prediction_outcomes)
profitable_count = sum(
1 for po in prediction_outcomes if po.get("profitable")
)
computed_win_rate = profitable_count / total if total > 0 else 0.0
w = _check_discrepancy(
"acted_win_rate",
section.acted_win_rate,
computed_win_rate,
)
if w is not None:
warnings.append(w)
return warnings
def validate_model_quality(
section: ModelQualitySection,
metric_snapshots: list[dict],
) -> list[ValidationWarning]:
"""Compare reported model quality metrics against model_metric_snapshots.
For each window in the section, finds the matching snapshot by
lookback_window and compares win_rate, directional_accuracy,
information_coefficient, calibration_error, and brier_score.
Flags discrepancies > 5%.
"""
warnings: list[ValidationWarning] = []
if not metric_snapshots:
return warnings
# Build lookup: lookback_window → latest snapshot (first match since
# collector orders by generated_at DESC)
snap_by_window: dict[str, dict] = {}
for snap in metric_snapshots:
window = snap.get("lookback_window", "")
if window and window not in snap_by_window:
snap_by_window[window] = snap
metric_fields = [
("win_rate", "win_rate"),
("directional_accuracy", "directional_accuracy"),
("information_coefficient", "information_coefficient"),
("calibration_error", "calibration_error"),
("brier_score", "brier_score"),
]
for mq_window in section.windows:
snap = snap_by_window.get(mq_window.lookback)
if snap is None:
continue
for section_attr, snap_key in metric_fields:
section_value = getattr(mq_window, section_attr, None)
snapshot_value = snap.get(snap_key)
# NULL snapshot → skip
if snapshot_value is None:
continue
# NULL section value → skip
if section_value is None:
continue
snapshot_float = _sanitize(float(snapshot_value))
section_float = _sanitize(section_value)
w = _check_discrepancy(
f"{mq_window.lookback}_{section_attr}",
section_float,
snapshot_float,
)
if w is not None:
warnings.append(w)
return warnings
def compute_validation_status(report: ReportData) -> ValidationStatus:
"""Determine overall validation status.
Returns 'passed' if no warnings across all sections,
'warnings' if any section has validation warnings.
"""
if report.pnl.validation_warnings:
return ValidationStatus.WARNINGS
if report.recommendation_accuracy.validation_warnings:
return ValidationStatus.WARNINGS
if report.model_quality.validation_warnings:
return ValidationStatus.WARNINGS
return ValidationStatus.PASSED