feat: trading feedback engine — periodic performance reports with AI summarization

- Migration 038: trading_reports table + report-summarizer agent seed - 6 reporting modules: models, collector, sections, validator, summarizer, generator - API endpoints: GET /api/reports (paginated, filterable), GET /api/reports/{id} - Frontend hooks: useReports, useReport with TanStack Query - Scheduler: daily (after 16:30 ET) and weekly (Saturday) report triggers - Redis queue consumer for async report generation with retry/dedup - 5 property-based tests (chunking, serialization, validation, accuracy, deltas) - 109 unit/integration tests across all modules - 6 frontend hook tests with MSW mocks
2026-05-01 22:13:09 +00:00
parent 376fcb4bb4
commit bc077bfcc8
28 changed files with 6771 additions and 1 deletions
@@ -0,0 +1 @@
+
@@ -0,0 +1,306 @@
+"""Data collector for trading performance reports.
+
+Queries all relevant trading data for a reporting period and returns
+a CollectedData bundle for downstream section builders.
+"""
+
+from __future__ import annotations
+
+import logging
+import uuid
+from dataclasses import dataclass, field
+from datetime import date
+from typing import Any
+
+import asyncpg
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class CollectedData:
+    """Raw data collected for a reporting period."""
+
+    trading_decisions: list[dict] = field(default_factory=list)
+    orders: list[dict] = field(default_factory=list)
+    open_positions: list[dict] = field(default_factory=list)
+    closed_positions: list[dict] = field(default_factory=list)
+    portfolio_snapshot: dict | None = None
+    previous_portfolio_snapshot: dict | None = None
+    recommendations: list[dict] = field(default_factory=list)
+    prediction_outcomes: list[dict] = field(default_factory=list)
+    model_metric_snapshots: list[dict] = field(default_factory=list)
+    circuit_breaker_events: list[dict] = field(default_factory=list)
+    reserve_pool_balance: float = 0.0
+
+
+def _row_dict(row: asyncpg.Record) -> dict[str, Any]:
+    """Convert asyncpg Record to dict with UUID→str coercion."""
+    d = dict(row)
+    for k, v in d.items():
+        if isinstance(v, uuid.UUID):
+            d[k] = str(v)
+    return d
+
+
+async def collect_report_data(
+    pool: asyncpg.Pool,
+    period_start: date,
+    period_end: date,
+) -> CollectedData:
+    """Query all trading data for the reporting period.
+
+    Queries: trading_decisions, orders, positions, portfolio_snapshots,
+    recommendations, prediction_outcomes, model_metric_snapshots,
+    circuit_breaker_events, reserve_pool_ledger.
+
+    Returns CollectedData with all raw query results.
+    If no trading_decisions exist, returns empty lists (zero-activity).
+    """
+    async with pool.acquire() as conn:
+        trading_decisions = await _fetch_trading_decisions(conn, period_start, period_end)
+        orders = await _fetch_orders(conn, period_start, period_end)
+        open_positions = await _fetch_open_positions(conn)
+        closed_positions = await _fetch_closed_positions(conn, period_start, period_end)
+        portfolio_snapshot = await _fetch_portfolio_snapshot(conn, period_start, period_end)
+        previous_portfolio_snapshot = await _fetch_previous_portfolio_snapshot(conn, period_start)
+        recommendations = await _fetch_recommendations(conn, period_start, period_end)
+        prediction_outcomes = await _fetch_prediction_outcomes(conn, period_start, period_end)
+        model_metric_snapshots = await _fetch_model_metric_snapshots(conn, period_start, period_end)
+        circuit_breaker_events = await _fetch_circuit_breaker_events(conn, period_start, period_end)
+        reserve_pool_balance = await _fetch_reserve_pool_balance(conn)
+
+    return CollectedData(
+        trading_decisions=trading_decisions,
+        orders=orders,
+        open_positions=open_positions,
+        closed_positions=closed_positions,
+        portfolio_snapshot=portfolio_snapshot,
+        previous_portfolio_snapshot=previous_portfolio_snapshot,
+        recommendations=recommendations,
+        prediction_outcomes=prediction_outcomes,
+        model_metric_snapshots=model_metric_snapshots,
+        circuit_breaker_events=circuit_breaker_events,
+        reserve_pool_balance=reserve_pool_balance,
+    )
+
+
+async def _fetch_trading_decisions(
+    conn: asyncpg.Connection,
+    period_start: date,
+    period_end: date,
+) -> list[dict]:
+    """Fetch trading decisions created within the period."""
+    rows = await conn.fetch(
+        """SELECT id, recommendation_id, decision, skip_reason, ticker,
+                  computed_position_size, computed_share_quantity,
+                  risk_tier_at_decision, portfolio_heat_at_decision,
+                  active_pool_at_decision, reserve_pool_at_decision,
+                  circuit_breaker_status, correlation_check_result,
+                  sector_exposure_check_result, earnings_proximity_flag,
+                  is_micro_trade, decision_trace, created_at
+           FROM trading_decisions
+           WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
+           ORDER BY created_at""",
+        period_start,
+        period_end,
+    )
+    return [_row_dict(r) for r in rows]
+
+
+async def _fetch_orders(
+    conn: asyncpg.Connection,
+    period_start: date,
+    period_end: date,
+) -> list[dict]:
+    """Fetch orders created within the period."""
+    rows = await conn.fetch(
+        """SELECT id, recommendation_id, broker_account_id, ticker, side,
+                  order_type, quantity, limit_price, stop_price, status,
+                  broker_order_id, fill_price, fill_quantity,
+                  submitted_at, filled_at, cancelled_at, rejected_at,
+                  rejection_reason, created_at
+           FROM orders
+           WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
+           ORDER BY created_at""",
+        period_start,
+        period_end,
+    )
+    return [_row_dict(r) for r in rows]
+
+
+async def _fetch_open_positions(conn: asyncpg.Connection) -> list[dict]:
+    """Fetch currently open positions (quantity > 0)."""
+    rows = await conn.fetch(
+        """SELECT id, broker_account_id, ticker, quantity,
+                  avg_entry_price, current_price,
+                  unrealized_pnl, realized_pnl, updated_at
+           FROM positions
+           WHERE quantity > 0
+           ORDER BY ticker""",
+    )
+    return [_row_dict(r) for r in rows]
+
+
+async def _fetch_closed_positions(
+    conn: asyncpg.Connection,
+    period_start: date,
+    period_end: date,
+) -> list[dict]:
+    """Fetch positions closed during the period (quantity = 0, updated within period)."""
+    rows = await conn.fetch(
+        """SELECT id, broker_account_id, ticker, quantity,
+                  avg_entry_price, current_price,
+                  unrealized_pnl, realized_pnl, updated_at
+           FROM positions
+           WHERE quantity = 0
+             AND updated_at >= $1::date
+             AND updated_at < ($2::date + INTERVAL '1 day')
+           ORDER BY updated_at""",
+        period_start,
+        period_end,
+    )
+    return [_row_dict(r) for r in rows]
+
+
+async def _fetch_portfolio_snapshot(
+    conn: asyncpg.Connection,
+    period_start: date,
+    period_end: date,
+) -> dict | None:
+    """Fetch the most recent portfolio snapshot within the period."""
+    row = await conn.fetchrow(
+        """SELECT id, snapshot_date, portfolio_value, active_pool, reserve_pool,
+                  daily_return, cumulative_return, unrealized_pnl, realized_pnl,
+                  win_count, loss_count, win_rate, sharpe_ratio,
+                  max_drawdown, current_drawdown_pct, portfolio_heat,
+                  risk_tier, positions, metrics, created_at
+           FROM portfolio_snapshots
+           WHERE snapshot_date >= $1 AND snapshot_date <= $2
+           ORDER BY snapshot_date DESC
+           LIMIT 1""",
+        period_start,
+        period_end,
+    )
+    return _row_dict(row) if row else None
+
+
+async def _fetch_previous_portfolio_snapshot(
+    conn: asyncpg.Connection,
+    period_start: date,
+) -> dict | None:
+    """Fetch the most recent portfolio snapshot before the period start."""
+    row = await conn.fetchrow(
+        """SELECT id, snapshot_date, portfolio_value, active_pool, reserve_pool,
+                  daily_return, cumulative_return, unrealized_pnl, realized_pnl,
+                  win_count, loss_count, win_rate, sharpe_ratio,
+                  max_drawdown, current_drawdown_pct, portfolio_heat,
+                  risk_tier, positions, metrics, created_at
+           FROM portfolio_snapshots
+           WHERE snapshot_date < $1
+           ORDER BY snapshot_date DESC
+           LIMIT 1""",
+        period_start,
+    )
+    return _row_dict(row) if row else None
+
+
+async def _fetch_recommendations(
+    conn: asyncpg.Connection,
+    period_start: date,
+    period_end: date,
+) -> list[dict]:
+    """Fetch recommendations created within the period."""
+    rows = await conn.fetch(
+        """SELECT id, ticker, company_id, action, mode, confidence,
+                  time_horizon, thesis, portfolio_pct, max_loss_pct,
+                  model_version, generated_at, created_at
+           FROM recommendations
+           WHERE created_at >= $1::date AND created_at < ($2::date + INTERVAL '1 day')
+           ORDER BY created_at""",
+        period_start,
+        period_end,
+    )
+    return [_row_dict(r) for r in rows]
+
+
+async def _fetch_prediction_outcomes(
+    conn: asyncpg.Connection,
+    period_start: date,
+    period_end: date,
+) -> list[dict]:
+    """Fetch prediction outcomes evaluated within the period."""
+    rows = await conn.fetch(
+        """SELECT po.id, po.prediction_id, po.evaluated_at, po.horizon,
+                  po.future_price, po.future_return,
+                  po.spy_future_price, po.spy_return,
+                  po.sector_etf_future_price, po.sector_etf_return,
+                  po.excess_return_vs_spy, po.excess_return_vs_sector,
+                  po.direction_correct, po.profitable,
+                  ps.ticker, ps.direction, ps.action, ps.confidence
+           FROM prediction_outcomes po
+           JOIN prediction_snapshots ps ON ps.id = po.prediction_id
+           WHERE po.evaluated_at >= $1::date
+             AND po.evaluated_at < ($2::date + INTERVAL '1 day')
+           ORDER BY po.evaluated_at""",
+        period_start,
+        period_end,
+    )
+    return [_row_dict(r) for r in rows]
+
+
+async def _fetch_model_metric_snapshots(
+    conn: asyncpg.Connection,
+    period_start: date,
+    period_end: date,
+) -> list[dict]:
+    """Fetch model metric snapshots generated within the period."""
+    rows = await conn.fetch(
+        """SELECT id, generated_at, lookback_window, horizon,
+                  prediction_count, win_rate, directional_accuracy,
+                  information_coefficient, rank_information_coefficient,
+                  avg_return, avg_excess_return_vs_spy,
+                  avg_excess_return_vs_sector,
+                  calibration_error, brier_score,
+                  buy_win_rate, sell_win_rate, hold_win_rate,
+                  created_at
+           FROM model_metric_snapshots
+           WHERE generated_at >= $1::date
+             AND generated_at < ($2::date + INTERVAL '1 day')
+           ORDER BY generated_at DESC""",
+        period_start,
+        period_end,
+    )
+    return [_row_dict(r) for r in rows]
+
+
+async def _fetch_circuit_breaker_events(
+    conn: asyncpg.Connection,
+    period_start: date,
+    period_end: date,
+) -> list[dict]:
+    """Fetch circuit breaker events from trading decisions within the period.
+
+    Circuit breaker events are trading decisions where
+    circuit_breaker_status is not 'clear' (i.e. a breaker was active).
+    """
+    rows = await conn.fetch(
+        """SELECT id, recommendation_id, decision, ticker,
+                  circuit_breaker_status, decision_trace, created_at
+           FROM trading_decisions
+           WHERE circuit_breaker_status != 'clear'
+             AND created_at >= $1::date
+             AND created_at < ($2::date + INTERVAL '1 day')
+           ORDER BY created_at""",
+        period_start,
+        period_end,
+    )
+    return [_row_dict(r) for r in rows]
+
+
+async def _fetch_reserve_pool_balance(conn: asyncpg.Connection) -> float:
+    """Fetch the latest reserve pool balance."""
+    row = await conn.fetchrow(
+        "SELECT balance_after FROM reserve_pool_ledger ORDER BY created_at DESC LIMIT 1",
+    )
+    return float(row["balance_after"]) if row else 0.0
@@ -0,0 +1,279 @@
+"""Report generator — orchestrates collection, building, validation, summarization, and storage.
+
+Provides three public functions:
+- generate_report: full pipeline from data collection to assembled ReportData
+- store_report: upsert into trading_reports table
+- process_report_job: Redis queue job handler with retry and dedup
+
+Requirements: 5.1, 5.2, 5.3, 6.3, 6.4, 6.5
+Design: Report Generator
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from datetime import date, datetime, timezone
+
+import asyncpg
+
+from services.reporting.collector import collect_report_data
+from services.reporting.models import ReportData, ReportType
+from services.reporting.sections import (
+    build_model_quality_section,
+    build_pnl_section,
+    build_position_performance_section,
+    build_recommendation_accuracy_section,
+    build_risk_metrics_section,
+)
+from services.reporting.summarizer import (
+    generate_executive_summary,
+    summarize_section,
+)
+from services.reporting.validator import (
+    compute_validation_status,
+    validate_model_quality,
+    validate_recommendation_accuracy,
+)
+from services.shared.agent_config import AgentConfigResolver
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Retry configuration for process_report_job
+# ---------------------------------------------------------------------------
+
+_MAX_RETRIES = 3
+_BACKOFF_SECONDS = (30, 60, 120)
+
+# In-memory set tracking in-progress jobs to reject duplicates.
+# Key format: "{report_type}:{period_start}:{period_end}"
+_in_progress_jobs: set[str] = set()
+
+
+# ---------------------------------------------------------------------------
+# generate_report
+# ---------------------------------------------------------------------------
+
+
+async def generate_report(
+    pool: asyncpg.Pool,
+    report_type: ReportType,
+    period_start: date,
+    period_end: date,
+) -> ReportData:
+    """Orchestrate full report generation.
+
+    1. Collect data via collector
+    2. Build all 5 sections via section builders
+    3. Validate recommendation_accuracy and model_quality via validator
+    4. Create AgentConfigResolver and summarize each section
+    5. Generate executive summary
+    6. Assemble final ReportData
+    """
+    # 1. Collect data
+    data = await collect_report_data(pool, period_start, period_end)
+
+    # 2. Build sections
+    pnl = build_pnl_section(data)
+    rec_accuracy = build_recommendation_accuracy_section(data)
+    position_perf = build_position_performance_section(data)
+    risk_metrics = build_risk_metrics_section(data)
+    model_quality = build_model_quality_section(data)
+
+    # 3. Validate
+    rec_warnings = validate_recommendation_accuracy(
+        rec_accuracy, data.prediction_outcomes,
+    )
+    rec_accuracy.validation_warnings = rec_warnings
+
+    mq_warnings = validate_model_quality(
+        model_quality, data.model_metric_snapshots,
+    )
+    model_quality.validation_warnings = mq_warnings
+
+    # 4. Summarize each section
+    resolver = AgentConfigResolver(pool)
+
+    pnl.summary = await summarize_section(
+        pool, resolver, "pnl", pnl.model_dump(),
+    )
+    rec_accuracy.summary = await summarize_section(
+        pool, resolver, "recommendation_accuracy", rec_accuracy.model_dump(),
+    )
+    position_perf.summary = await summarize_section(
+        pool, resolver, "position_performance", position_perf.model_dump(),
+    )
+    risk_metrics.summary = await summarize_section(
+        pool, resolver, "risk_metrics", risk_metrics.model_dump(),
+    )
+    model_quality.summary = await summarize_section(
+        pool, resolver, "model_quality", model_quality.model_dump(),
+    )
+
+    # 5. Generate executive summary
+    section_summaries = {
+        "pnl": pnl.summary,
+        "recommendation_accuracy": rec_accuracy.summary,
+        "position_performance": position_perf.summary,
+        "risk_metrics": risk_metrics.summary,
+        "model_quality": model_quality.summary,
+    }
+    executive_summary = await generate_executive_summary(
+        pool, resolver, section_summaries,
+    )
+
+    # 6. Assemble ReportData
+    report = ReportData(
+        pnl=pnl,
+        recommendation_accuracy=rec_accuracy,
+        position_performance=position_perf,
+        risk_metrics=risk_metrics,
+        model_quality=model_quality,
+        executive_summary=executive_summary,
+        generated_at=datetime.now(timezone.utc),
+        period_start=period_start,
+        period_end=period_end,
+        report_type=ReportType(report_type),
+    )
+
+    # Set validation status based on all warnings
+    report.validation_status = compute_validation_status(report)
+
+    return report
+
+
+# ---------------------------------------------------------------------------
+# store_report
+# ---------------------------------------------------------------------------
+
+_UPSERT_SQL = """\
+INSERT INTO trading_reports
+    (report_type, period_start, period_end, report_data, validation_status, generated_at)
+VALUES
+    ($1, $2, $3, $4::jsonb, $5, $6)
+ON CONFLICT (report_type, period_start, period_end)
+DO UPDATE SET
+    report_data = EXCLUDED.report_data,
+    validation_status = EXCLUDED.validation_status,
+    generated_at = EXCLUDED.generated_at
+RETURNING id
+"""
+
+
+async def store_report(
+    pool: asyncpg.Pool,
+    report: ReportData,
+) -> str:
+    """Store report in trading_reports table via upsert.
+
+    Uses INSERT ... ON CONFLICT (report_type, period_start, period_end)
+    DO UPDATE to handle regeneration of existing reports.
+
+    Returns the report UUID as a string.
+    """
+    row = await pool.fetchrow(
+        _UPSERT_SQL,
+        report.report_type.value,
+        report.period_start,
+        report.period_end,
+        report.model_dump_json(),
+        report.validation_status.value,
+        report.generated_at,
+    )
+    report_id = str(row["id"])  # type: ignore[index]
+    logger.info(
+        "Stored report %s (type=%s, period=%s to %s)",
+        report_id,
+        report.report_type.value,
+        report.period_start,
+        report.period_end,
+    )
+    return report_id
+
+
+# ---------------------------------------------------------------------------
+# process_report_job
+# ---------------------------------------------------------------------------
+
+
+def _job_key(report_type: str, period_start: str, period_end: str) -> str:
+    """Build a dedup key for an in-progress job."""
+    return f"{report_type}:{period_start}:{period_end}"
+
+
+async def process_report_job(
+    pool: asyncpg.Pool,
+    job: dict,
+) -> None:
+    """Process a report generation job from the Redis queue.
+
+    Deserializes job payload, calls generate_report + store_report.
+    Handles retries with exponential backoff (30s, 60s, 120s up to 3 attempts).
+    Rejects duplicate jobs for the same report_type + period.
+
+    Expected job payload::
+
+        {
+            "report_type": "daily" | "weekly",
+            "period_start": "YYYY-MM-DD",
+            "period_end": "YYYY-MM-DD"
+        }
+    """
+    report_type_str = job.get("report_type", "")
+    period_start_str = job.get("period_start", "")
+    period_end_str = job.get("period_end", "")
+
+    # Validate payload
+    try:
+        report_type = ReportType(report_type_str)
+        period_start = date.fromisoformat(period_start_str)
+        period_end = date.fromisoformat(period_end_str)
+    except (ValueError, TypeError) as exc:
+        logger.error("Invalid report job payload: %s — %s", job, exc)
+        return
+
+    # Reject duplicate in-progress jobs
+    key = _job_key(report_type_str, period_start_str, period_end_str)
+    if key in _in_progress_jobs:
+        logger.warning(
+            "Duplicate report job rejected (already in progress): %s", key,
+        )
+        return
+
+    _in_progress_jobs.add(key)
+    try:
+        last_error: Exception | None = None
+        for attempt in range(_MAX_RETRIES):
+            try:
+                report = await generate_report(
+                    pool, report_type, period_start, period_end,
+                )
+                await store_report(pool, report)
+                logger.info(
+                    "Report job completed: %s (attempt %d)", key, attempt + 1,
+                )
+                return
+            except Exception as exc:
+                last_error = exc
+                if attempt < _MAX_RETRIES - 1:
+                    backoff = _BACKOFF_SECONDS[attempt]
+                    logger.warning(
+                        "Report job %s failed (attempt %d/%d): %s — retrying in %ds",
+                        key,
+                        attempt + 1,
+                        _MAX_RETRIES,
+                        exc,
+                        backoff,
+                    )
+                    await asyncio.sleep(backoff)
+
+        # All retries exhausted
+        logger.error(
+            "Report job %s failed after %d attempts: %s",
+            key,
+            _MAX_RETRIES,
+            last_error,
+        )
+    finally:
+        _in_progress_jobs.discard(key)
@@ -0,0 +1,104 @@
+from __future__ import annotations
+
+from datetime import date, datetime
+from enum import Enum
+
+from pydantic import BaseModel, Field
+
+
+class ReportType(str, Enum):
+    DAILY = "daily"
+    WEEKLY = "weekly"
+
+
+class ValidationStatus(str, Enum):
+    PASSED = "passed"
+    WARNINGS = "warnings"
+
+
+class ValidationWarning(BaseModel):
+    field_name: str
+    computed_value: float
+    snapshot_value: float
+    pct_difference: float
+
+
+class PLSection(BaseModel):
+    realized_pnl: float
+    unrealized_pnl: float
+    daily_return: float
+    cumulative_return: float
+    win_count: int
+    loss_count: int
+    win_rate: float
+    profit_factor: float
+    sharpe_ratio: float
+    summary: str = ""
+    validation_warnings: list[ValidationWarning] = Field(default_factory=list)
+
+
+class RecommendationAccuracySection(BaseModel):
+    total_evaluated: int
+    act_count: int
+    skip_count: int
+    acted_win_rate: float
+    avg_confidence_acted: float
+    avg_confidence_skipped: float
+    summary: str = ""
+    validation_warnings: list[ValidationWarning] = Field(default_factory=list)
+
+
+class PositionDetail(BaseModel):
+    ticker: str
+    entry_price: float
+    current_or_exit_price: float
+    pnl: float
+    pnl_pct: float
+    hold_duration_hours: float
+    status: str  # "open" or "closed"
+
+
+class PositionPerformanceSection(BaseModel):
+    positions: list[PositionDetail] = Field(default_factory=list)
+    summary: str = ""
+
+
+class RiskMetricsSection(BaseModel):
+    current_risk_tier: str
+    portfolio_heat: float
+    max_drawdown: float
+    current_drawdown_pct: float
+    reserve_pool_balance: float
+    circuit_breaker_event_count: int
+    summary: str = ""
+
+
+class ModelQualityWindow(BaseModel):
+    lookback: str
+    win_rate: float | None
+    directional_accuracy: float | None
+    information_coefficient: float | None
+    calibration_error: float | None
+    brier_score: float | None
+
+
+class ModelQualitySection(BaseModel):
+    windows: list[ModelQualityWindow] = Field(default_factory=list)
+    summary: str = ""
+    validation_warnings: list[ValidationWarning] = Field(default_factory=list)
+
+
+class ReportData(BaseModel):
+    """Top-level report structure stored as JSONB."""
+
+    pnl: PLSection
+    recommendation_accuracy: RecommendationAccuracySection
+    position_performance: PositionPerformanceSection
+    risk_metrics: RiskMetricsSection
+    model_quality: ModelQualitySection
+    executive_summary: str = ""
+    validation_status: ValidationStatus = ValidationStatus.PASSED
+    generated_at: datetime
+    period_start: date
+    period_end: date
+    report_type: ReportType
@@ -0,0 +1,370 @@
+"""Section builders for trading performance reports.
+
+Each builder takes a CollectedData bundle and returns a typed Pydantic
+section model.  All builders handle zero-activity gracefully by returning
+zero values and empty lists when no data is available.
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import datetime, timezone
+
+from services.reporting.collector import CollectedData
+from services.reporting.models import (
+    ModelQualitySection,
+    ModelQualityWindow,
+    PLSection,
+    PositionDetail,
+    PositionPerformanceSection,
+    RecommendationAccuracySection,
+    RiskMetricsSection,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def build_pnl_section(data: CollectedData) -> PLSection:
+    """Build P&L section from collected data.
+
+    Computes realized/unrealized P&L, daily return, cumulative return,
+    win/loss counts, win rate, profit factor, and Sharpe ratio from
+    portfolio_snapshot and closed positions.
+    """
+    snap = data.portfolio_snapshot
+
+    if snap is None:
+        return PLSection(
+            realized_pnl=0.0,
+            unrealized_pnl=0.0,
+            daily_return=0.0,
+            cumulative_return=0.0,
+            win_count=0,
+            loss_count=0,
+            win_rate=0.0,
+            profit_factor=0.0,
+            sharpe_ratio=0.0,
+        )
+
+    # Compute profit factor from closed positions:
+    # sum of gains / abs(sum of losses)
+    gains = 0.0
+    losses = 0.0
+    for pos in data.closed_positions:
+        rpnl = float(pos.get("realized_pnl", 0) or 0)
+        if rpnl > 0:
+            gains += rpnl
+        elif rpnl < 0:
+            losses += abs(rpnl)
+
+    profit_factor = (gains / losses) if losses > 0 else 0.0
+
+    return PLSection(
+        realized_pnl=float(snap.get("realized_pnl", 0) or 0),
+        unrealized_pnl=float(snap.get("unrealized_pnl", 0) or 0),
+        daily_return=float(snap.get("daily_return", 0) or 0),
+        cumulative_return=float(snap.get("cumulative_return", 0) or 0),
+        win_count=int(snap.get("win_count", 0) or 0),
+        loss_count=int(snap.get("loss_count", 0) or 0),
+        win_rate=float(snap.get("win_rate", 0) or 0),
+        profit_factor=profit_factor,
+        sharpe_ratio=float(snap.get("sharpe_ratio", 0) or 0),
+    )
+
+
+def build_recommendation_accuracy_section(
+    data: CollectedData,
+) -> RecommendationAccuracySection:
+    """Build recommendation accuracy section.
+
+    Joins trading_decisions with prediction_outcomes to compute
+    act/skip breakdown, win rate of acted recommendations, and
+    average confidence of acted vs skipped.
+    """
+    if not data.trading_decisions:
+        return RecommendationAccuracySection(
+            total_evaluated=0,
+            act_count=0,
+            skip_count=0,
+            acted_win_rate=0.0,
+            avg_confidence_acted=0.0,
+            avg_confidence_skipped=0.0,
+        )
+
+    # Build lookup: recommendation_id -> prediction_outcome
+    # prediction_outcomes are joined with prediction_snapshots in the collector,
+    # so they carry ticker, direction, action, confidence from the snapshot.
+    # trading_decisions reference recommendations via recommendation_id.
+    # We need to match trading_decisions -> recommendations -> prediction_outcomes.
+    #
+    # The collector fetches prediction_outcomes joined with prediction_snapshots
+    # (po.prediction_id = ps.id).  Trading decisions reference recommendation_id.
+    # Recommendations and prediction_snapshots share the same ticker, so we
+    # match by recommendation_id on the trading_decision side.
+
+    # Build recommendation_id -> recommendation dict for confidence lookup
+    rec_by_id: dict[str, dict] = {}
+    for rec in data.recommendations:
+        rec_id = str(rec.get("id", ""))
+        if rec_id:
+            rec_by_id[rec_id] = rec
+
+    # Build prediction_id -> prediction_outcome for profitability lookup
+    # We also need to map recommendation_id -> prediction_outcome.
+    # The link is: trading_decision.recommendation_id -> recommendation.id
+    # and prediction_outcome has ticker from prediction_snapshots.
+    # We match by ticker between recommendation and prediction_outcome.
+    outcome_by_ticker: dict[str, list[dict]] = {}
+    for po in data.prediction_outcomes:
+        ticker = po.get("ticker", "")
+        if ticker:
+            outcome_by_ticker.setdefault(ticker, []).append(po)
+
+    act_count = 0
+    skip_count = 0
+    acted_wins = 0
+    acted_total_with_outcome = 0
+    confidence_acted: list[float] = []
+    confidence_skipped: list[float] = []
+
+    for td in data.trading_decisions:
+        decision = str(td.get("decision", "")).lower()
+        rec_id = str(td.get("recommendation_id", ""))
+        rec = rec_by_id.get(rec_id, {})
+        conf = rec.get("confidence")
+        ticker = td.get("ticker", "")
+
+        if decision == "act":
+            act_count += 1
+            if conf is not None:
+                confidence_acted.append(float(conf))
+
+            # Check profitability from prediction_outcomes for this ticker
+            ticker_outcomes = outcome_by_ticker.get(ticker, [])
+            if ticker_outcomes:
+                # Use the most recent outcome for this ticker
+                latest = ticker_outcomes[-1]
+                acted_total_with_outcome += 1
+                if latest.get("profitable"):
+                    acted_wins += 1
+        else:
+            skip_count += 1
+            if conf is not None:
+                confidence_skipped.append(float(conf))
+
+    total_evaluated = act_count + skip_count
+    acted_win_rate = (
+        (acted_wins / acted_total_with_outcome)
+        if acted_total_with_outcome > 0
+        else 0.0
+    )
+    avg_confidence_acted = (
+        (sum(confidence_acted) / len(confidence_acted))
+        if confidence_acted
+        else 0.0
+    )
+    avg_confidence_skipped = (
+        (sum(confidence_skipped) / len(confidence_skipped))
+        if confidence_skipped
+        else 0.0
+    )
+
+    return RecommendationAccuracySection(
+        total_evaluated=total_evaluated,
+        act_count=act_count,
+        skip_count=skip_count,
+        acted_win_rate=acted_win_rate,
+        avg_confidence_acted=avg_confidence_acted,
+        avg_confidence_skipped=avg_confidence_skipped,
+    )
+
+
+def build_position_performance_section(
+    data: CollectedData,
+) -> PositionPerformanceSection:
+    """Build position performance section.
+
+    Lists each position (open and closed) with entry price,
+    current/exit price, P&L, P&L%, and hold duration.
+    """
+    positions: list[PositionDetail] = []
+    now = datetime.now(timezone.utc)
+
+    # Open positions
+    for pos in data.open_positions:
+        entry_price = float(pos.get("avg_entry_price", 0) or 0)
+        current_price = float(pos.get("current_price", 0) or 0)
+        quantity = float(pos.get("quantity", 0) or 0)
+
+        pnl = (current_price - entry_price) * quantity
+        cost_basis = entry_price * quantity
+        pnl_pct = (pnl / cost_basis * 100) if cost_basis > 0 else 0.0
+
+        # Hold duration from updated_at to now
+        updated_at = pos.get("updated_at")
+        hold_hours = _compute_hold_hours(updated_at, now)
+
+        positions.append(
+            PositionDetail(
+                ticker=pos.get("ticker", ""),
+                entry_price=entry_price,
+                current_or_exit_price=current_price,
+                pnl=pnl,
+                pnl_pct=pnl_pct,
+                hold_duration_hours=hold_hours,
+                status="open",
+            )
+        )
+
+    # Closed positions
+    for pos in data.closed_positions:
+        entry_price = float(pos.get("avg_entry_price", 0) or 0)
+        current_price = float(pos.get("current_price", 0) or 0)
+        realized_pnl = float(pos.get("realized_pnl", 0) or 0)
+
+        cost_basis = entry_price * float(pos.get("quantity", 0) or 0)
+        # For closed positions, quantity is 0 in the DB, so use realized_pnl
+        # directly. P&L% is based on the original cost basis which we can
+        # approximate from entry_price and the realized_pnl.
+        # If entry_price is available, compute pnl_pct from realized_pnl / cost.
+        # Since quantity=0 for closed, we estimate original quantity from
+        # realized_pnl and price difference, or just use realized_pnl directly.
+        if entry_price > 0 and current_price != entry_price:
+            # Estimate original quantity from realized_pnl / (exit - entry)
+            price_diff = current_price - entry_price
+            if price_diff != 0:
+                est_quantity = abs(realized_pnl / price_diff)
+                est_cost = entry_price * est_quantity
+                pnl_pct = (realized_pnl / est_cost * 100) if est_cost > 0 else 0.0
+            else:
+                pnl_pct = 0.0
+        else:
+            pnl_pct = 0.0
+
+        updated_at = pos.get("updated_at")
+        hold_hours = _compute_hold_hours(updated_at, now)
+
+        positions.append(
+            PositionDetail(
+                ticker=pos.get("ticker", ""),
+                entry_price=entry_price,
+                current_or_exit_price=current_price,
+                pnl=realized_pnl,
+                pnl_pct=pnl_pct,
+                hold_duration_hours=hold_hours,
+                status="closed",
+            )
+        )
+
+    return PositionPerformanceSection(positions=positions)
+
+
+def _compute_hold_hours(updated_at: datetime | str | None, now: datetime) -> float:
+    """Compute hold duration in hours from updated_at to now."""
+    if updated_at is None:
+        return 0.0
+    if isinstance(updated_at, str):
+        try:
+            updated_at = datetime.fromisoformat(updated_at)
+        except (ValueError, TypeError):
+            return 0.0
+    if not isinstance(updated_at, datetime):
+        return 0.0
+    # Ensure timezone-aware comparison
+    if updated_at.tzinfo is None:
+        updated_at = updated_at.replace(tzinfo=timezone.utc)
+    delta = now - updated_at
+    return max(delta.total_seconds() / 3600.0, 0.0)
+
+
+def build_risk_metrics_section(data: CollectedData) -> RiskMetricsSection:
+    """Build risk metrics section.
+
+    Extracts current risk tier, portfolio heat, max drawdown,
+    current drawdown %, reserve pool balance, and circuit breaker
+    event count from collected data.
+    """
+    snap = data.portfolio_snapshot
+
+    if snap is None:
+        return RiskMetricsSection(
+            current_risk_tier="unknown",
+            portfolio_heat=0.0,
+            max_drawdown=0.0,
+            current_drawdown_pct=0.0,
+            reserve_pool_balance=data.reserve_pool_balance,
+            circuit_breaker_event_count=len(data.circuit_breaker_events),
+        )
+
+    return RiskMetricsSection(
+        current_risk_tier=str(snap.get("risk_tier", "unknown") or "unknown"),
+        portfolio_heat=float(snap.get("portfolio_heat", 0) or 0),
+        max_drawdown=float(snap.get("max_drawdown", 0) or 0),
+        current_drawdown_pct=float(snap.get("current_drawdown_pct", 0) or 0),
+        reserve_pool_balance=data.reserve_pool_balance,
+        circuit_breaker_event_count=len(data.circuit_breaker_events),
+    )
+
+
+def build_model_quality_section(data: CollectedData) -> ModelQualitySection:
+    """Build model quality section.
+
+    Extracts latest model_metric_snapshot values for 7d, 30d, 90d
+    lookback windows.
+    """
+    if not data.model_metric_snapshots:
+        return ModelQualitySection(windows=[])
+
+    # Group by lookback_window, take the latest (first in list since
+    # collector orders by generated_at DESC)
+    target_windows = {"7d", "30d", "90d"}
+    latest_by_window: dict[str, dict] = {}
+
+    for snap in data.model_metric_snapshots:
+        window = snap.get("lookback_window", "")
+        if window in target_windows and window not in latest_by_window:
+            latest_by_window[window] = snap
+
+    windows: list[ModelQualityWindow] = []
+    for w in ("7d", "30d", "90d"):
+        snap = latest_by_window.get(w)
+        if snap is None:
+            windows.append(
+                ModelQualityWindow(
+                    lookback=w,
+                    win_rate=None,
+                    directional_accuracy=None,
+                    information_coefficient=None,
+                    calibration_error=None,
+                    brier_score=None,
+                )
+            )
+        else:
+            windows.append(
+                ModelQualityWindow(
+                    lookback=w,
+                    win_rate=_safe_float(snap.get("win_rate")),
+                    directional_accuracy=_safe_float(snap.get("directional_accuracy")),
+                    information_coefficient=_safe_float(
+                        snap.get("information_coefficient")
+                    ),
+                    calibration_error=_safe_float(snap.get("calibration_error")),
+                    brier_score=_safe_float(snap.get("brier_score")),
+                )
+            )
+
+    return ModelQualitySection(windows=windows)
+
+
+def _safe_float(value: object) -> float | None:
+    """Convert a value to float, returning None for None/invalid values."""
+    if value is None:
+        return None
+    try:
+        f = float(value)  # type: ignore[arg-type]
+        # Replace NaN/inf with None
+        if f != f or f == float("inf") or f == float("-inf"):
+            return None
+        return f
+    except (ValueError, TypeError):
+        return None
@@ -0,0 +1,437 @@
+"""AI-powered report summarizer with chunking and deterministic fallback.
+
+Generates natural-language summaries for trading performance report sections
+using the Report_Summarizer_Agent (resolved via AgentConfigResolver + llm_factory).
+Data is chunked to fit within the 8k-token context window of the local model.
+
+Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 3.6
+Design: AI Summarizer
+"""
+from __future__ import annotations
+
+import json
+import logging
+import time
+
+import asyncpg
+
+from services.extractor.llm_factory import build_llm_client
+from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
+from services.shared.config import load_config
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+CHUNK_SIZE_LIMIT = 6000  # characters per chunk
+MAX_SUMMARY_WORDS = 200  # per section summary
+MAX_EXECUTIVE_SUMMARY_WORDS = 300
+
+_REPORT_SUMMARIZER_SLUG = "report-summarizer"
+
+
+# ---------------------------------------------------------------------------
+# Chunking
+# ---------------------------------------------------------------------------
+
+
+def chunk_data(serialized: str, max_chars: int = CHUNK_SIZE_LIMIT) -> list[str]:
+    """Split serialized data into chunks of at most *max_chars* characters.
+
+    Splits on newline boundaries to avoid breaking JSON structures.
+    Each chunk is ≤ *max_chars* characters.  Returns at least one chunk
+    (even for empty input).
+
+    Round-trip property: ``"".join(chunk_data(s, n)) == s`` for all *s*.
+
+    If a single line (including its trailing newline) exceeds *max_chars*,
+    it is included as its own chunk (we never break mid-line).
+    """
+    if not serialized:
+        return [""]
+
+    # Split into segments where each segment includes its trailing "\n"
+    # (except possibly the last one if the string doesn't end with "\n").
+    # This preserves the exact original when chunks are concatenated.
+    segments: list[str] = []
+    start = 0
+    while start < len(serialized):
+        nl = serialized.find("\n", start)
+        if nl == -1:
+            # Last segment, no trailing newline
+            segments.append(serialized[start:])
+            break
+        else:
+            # Include the newline in this segment
+            segments.append(serialized[start : nl + 1])
+            start = nl + 1
+
+    chunks: list[str] = []
+    current_parts: list[str] = []
+    current_len = 0
+
+    for segment in segments:
+        if current_parts and current_len + len(segment) > max_chars:
+            # Flush current chunk
+            chunks.append("".join(current_parts))
+            current_parts = [segment]
+            current_len = len(segment)
+        else:
+            current_parts.append(segment)
+            current_len += len(segment)
+
+    # Flush remaining
+    if current_parts:
+        chunks.append("".join(current_parts))
+
+    return chunks if chunks else [""]
+
+
+# ---------------------------------------------------------------------------
+# Performance logging
+# ---------------------------------------------------------------------------
+
+
+async def _log_performance(
+    pool: asyncpg.Pool,
+    resolved: ResolvedAgentConfig,
+    success: bool,
+    duration_ms: int,
+    input_text: str,
+    output_text: str,
+    error_message: str | None = None,
+) -> None:
+    """Insert a row into agent_performance_log for a summarizer invocation."""
+    try:
+        await pool.execute(
+            """INSERT INTO agent_performance_log
+                   (agent_id, variant_id, document_id, ticker, success,
+                    duration_ms, confidence, retry_count,
+                    input_tokens, output_tokens, error_message)
+               VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
+            resolved.agent_id,
+            resolved.variant_id,
+            None,  # no document_id for report summaries
+            None,  # no ticker for report summaries
+            success,
+            duration_ms,
+            0.0,  # no confidence score for summaries
+            0,
+            len(input_text) // 4,  # token estimate
+            len(output_text) // 4,  # token estimate
+            error_message,
+        )
+    except Exception:
+        logger.warning("Failed to log summarizer performance", exc_info=True)
+
+
+# ---------------------------------------------------------------------------
+# LLM summarization helpers
+# ---------------------------------------------------------------------------
+
+
+async def _summarize_chunk(
+    resolved: ResolvedAgentConfig,
+    section_name: str,
+    chunk: str,
+) -> str:
+    """Summarize a single chunk via the Report_Summarizer_Agent LLM client.
+
+    Returns the raw text output from the model.
+    Raises on failure so the caller can handle retries / fallback.
+    """
+    cfg = load_config()
+    client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
+    try:
+        prompts = {
+            "system": resolved.system_prompt,
+            "user": f"Summarize this {section_name} data:\n{chunk}",
+        }
+        attempt = await client.call_llm(
+            prompts=prompts,
+            json_schema={},  # plain text, no structured output
+            document_text="",
+        )
+        if attempt.error:
+            raise RuntimeError(f"LLM error: {attempt.error}")
+        if not attempt.raw_output.strip():
+            raise RuntimeError("LLM returned empty response")
+        return attempt.raw_output.strip()
+    finally:
+        await client.close()
+
+
+async def _merge_summaries(
+    resolved: ResolvedAgentConfig,
+    section_name: str,
+    summaries: list[str],
+) -> str:
+    """Merge multiple chunk summaries into a single coherent summary."""
+    combined = "\n\n".join(summaries)
+    cfg = load_config()
+    client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
+    try:
+        prompts = {
+            "system": resolved.system_prompt,
+            "user": (
+                f"Merge these {section_name} summaries into a single coherent "
+                f"summary of no more than {MAX_SUMMARY_WORDS} words:\n{combined}"
+            ),
+        }
+        attempt = await client.call_llm(
+            prompts=prompts,
+            json_schema={},
+            document_text="",
+        )
+        if attempt.error:
+            raise RuntimeError(f"LLM merge error: {attempt.error}")
+        if not attempt.raw_output.strip():
+            raise RuntimeError("LLM returned empty merge response")
+        return attempt.raw_output.strip()
+    finally:
+        await client.close()
+
+
+# ---------------------------------------------------------------------------
+# Section summarization
+# ---------------------------------------------------------------------------
+
+
+async def summarize_section(
+    pool: asyncpg.Pool,
+    resolver: AgentConfigResolver,
+    section_name: str,
+    section_data: dict,
+) -> str:
+    """Generate AI summary for a report section.
+
+    1. Serialize section data to JSON string
+    2. Chunk if > CHUNK_SIZE_LIMIT
+    3. Summarize each chunk via Report_Summarizer_Agent
+    4. If multiple chunks, merge summaries with a final LLM call
+    5. Log each invocation to agent_performance_log
+    6. On failure, fall back to deterministic summary
+    """
+    resolved = await resolver.resolve(_REPORT_SUMMARIZER_SLUG)
+    if resolved is None:
+        logger.error(
+            "Report summarizer agent not found (slug=%s) — using deterministic fallback",
+            _REPORT_SUMMARIZER_SLUG,
+        )
+        return build_deterministic_summary(section_name, section_data)
+
+    serialized = json.dumps(section_data, indent=2, default=str)
+    chunks = chunk_data(serialized)
+
+    start = time.monotonic()
+    try:
+        # Summarize each chunk
+        chunk_summaries: list[str] = []
+        for chunk in chunks:
+            summary = await _summarize_chunk(resolved, section_name, chunk)
+            chunk_summaries.append(summary)
+
+        # Merge if multiple chunks
+        if len(chunk_summaries) > 1:
+            try:
+                final_summary = await _merge_summaries(
+                    resolved, section_name, chunk_summaries,
+                )
+            except Exception:
+                # Merge failed — fall back to concatenation of chunk summaries
+                logger.warning(
+                    "Chunk merge LLM call failed for section %s — concatenating summaries",
+                    section_name,
+                )
+                final_summary = "\n".join(chunk_summaries)
+        else:
+            final_summary = chunk_summaries[0]
+
+        # Truncate to MAX_SUMMARY_WORDS at sentence boundary
+        words = final_summary.split()
+        if len(words) > MAX_SUMMARY_WORDS:
+            truncated = " ".join(words[:MAX_SUMMARY_WORDS])
+            # Try to end at a sentence boundary
+            last_period = truncated.rfind(".")
+            if last_period > len(truncated) // 2:
+                truncated = truncated[: last_period + 1]
+            final_summary = truncated
+
+        duration_ms = int((time.monotonic() - start) * 1000)
+        await _log_performance(
+            pool, resolved, True, duration_ms, serialized, final_summary,
+        )
+        return final_summary
+
+    except Exception as exc:
+        duration_ms = int((time.monotonic() - start) * 1000)
+        logger.warning(
+            "AI summarization failed for section %s: %s — using deterministic fallback",
+            section_name,
+            exc,
+        )
+        await _log_performance(
+            pool, resolved, False, duration_ms, serialized, "",
+            error_message=str(exc),
+        )
+        return build_deterministic_summary(section_name, section_data)
+
+
+# ---------------------------------------------------------------------------
+# Deterministic fallback summaries
+# ---------------------------------------------------------------------------
+
+_DETERMINISTIC_TEMPLATES: dict[str, str] = {
+    "pnl": (
+        "P&L Summary: Realized P&L ${realized_pnl}, unrealized ${unrealized_pnl}, "
+        "daily return {daily_return}%, win rate {win_rate}%."
+    ),
+    "recommendation_accuracy": (
+        "Recommendation Accuracy: {total_evaluated} evaluated, "
+        "{act_count} acted ({acted_win_rate}% win rate), "
+        "{skip_count} skipped. "
+        "Avg confidence acted {avg_confidence_acted}, skipped {avg_confidence_skipped}."
+    ),
+    "position_performance": (
+        "Position Performance: {position_count} positions tracked during the period."
+    ),
+    "risk_metrics": (
+        "Risk Metrics: Risk tier {current_risk_tier}, portfolio heat {portfolio_heat}, "
+        "max drawdown {max_drawdown}, current drawdown {current_drawdown_pct}%, "
+        "reserve pool ${reserve_pool_balance}, "
+        "{circuit_breaker_event_count} circuit breaker events."
+    ),
+    "model_quality": (
+        "Model Quality: {window_count} lookback windows evaluated."
+    ),
+}
+
+
+def build_deterministic_summary(section_name: str, section_data: dict) -> str:
+    """Build a fallback deterministic summary from raw metrics.
+
+    Produces a template-based text summary when AI summarization fails.
+    """
+    template = _DETERMINISTIC_TEMPLATES.get(section_name)
+    if template is None:
+        # Generic fallback for unknown sections
+        return f"{section_name} summary: {len(section_data)} metrics reported."
+
+    try:
+        # Prepare template variables with safe defaults
+        data = dict(section_data)
+
+        # Add computed fields for templates that need them
+        if section_name == "position_performance":
+            positions = data.get("positions", [])
+            data["position_count"] = len(positions)
+        elif section_name == "model_quality":
+            windows = data.get("windows", [])
+            data["window_count"] = len(windows)
+
+        return template.format(**data)
+    except (KeyError, ValueError, TypeError) as exc:
+        logger.warning(
+            "Deterministic summary template failed for %s: %s",
+            section_name,
+            exc,
+        )
+        return f"{section_name} summary: data available but template formatting failed."
+
+
+# ---------------------------------------------------------------------------
+# Executive summary
+# ---------------------------------------------------------------------------
+
+
+async def generate_executive_summary(
+    pool: asyncpg.Pool,
+    resolver: AgentConfigResolver,
+    section_summaries: dict[str, str],
+) -> str:
+    """Generate executive summary from all section summaries.
+
+    Concatenates section summaries, chunks if needed, and produces
+    a ≤300-word synthesis via the Report_Summarizer_Agent.
+    Falls back to concatenated section summaries on failure.
+    """
+    resolved = await resolver.resolve(_REPORT_SUMMARIZER_SLUG)
+    concatenated = "\n\n".join(
+        f"{name}: {summary}" for name, summary in section_summaries.items()
+    )
+
+    if resolved is None:
+        logger.error(
+            "Report summarizer agent not found — using concatenated summaries as executive summary",
+        )
+        return concatenated
+
+    chunks = chunk_data(concatenated)
+
+    start = time.monotonic()
+    try:
+        # Summarize chunks if needed
+        if len(chunks) > 1:
+            chunk_summaries: list[str] = []
+            for chunk in chunks:
+                summary = await _summarize_chunk(resolved, "executive", chunk)
+                chunk_summaries.append(summary)
+            input_text = "\n\n".join(chunk_summaries)
+        else:
+            input_text = chunks[0]
+
+        # Final executive summary call
+        cfg = load_config()
+        client = build_llm_client(resolved, cfg.ollama, cfg.vllm)
+        try:
+            prompts = {
+                "system": resolved.system_prompt,
+                "user": (
+                    f"Synthesize these trading performance section summaries into "
+                    f"a concise executive summary of no more than "
+                    f"{MAX_EXECUTIVE_SUMMARY_WORDS} words:\n{input_text}"
+                ),
+            }
+            attempt = await client.call_llm(
+                prompts=prompts,
+                json_schema={},
+                document_text="",
+            )
+        finally:
+            await client.close()
+
+        if attempt.error:
+            raise RuntimeError(f"Executive summary LLM error: {attempt.error}")
+        if not attempt.raw_output.strip():
+            raise RuntimeError("Executive summary LLM returned empty response")
+
+        executive = attempt.raw_output.strip()
+
+        # Truncate to MAX_EXECUTIVE_SUMMARY_WORDS at sentence boundary
+        words = executive.split()
+        if len(words) > MAX_EXECUTIVE_SUMMARY_WORDS:
+            truncated = " ".join(words[:MAX_EXECUTIVE_SUMMARY_WORDS])
+            last_period = truncated.rfind(".")
+            if last_period > len(truncated) // 2:
+                truncated = truncated[: last_period + 1]
+            executive = truncated
+
+        duration_ms = int((time.monotonic() - start) * 1000)
+        await _log_performance(
+            pool, resolved, True, duration_ms, concatenated, executive,
+        )
+        return executive
+
+    except Exception as exc:
+        duration_ms = int((time.monotonic() - start) * 1000)
+        logger.warning(
+            "Executive summary generation failed: %s — using concatenated summaries",
+            exc,
+        )
+        await _log_performance(
+            pool, resolved, False, duration_ms, concatenated, "",
+            error_message=str(exc),
+        )
+        return concatenated
@@ -0,0 +1,175 @@
+"""Report validator — cross-checks computed metrics against live data.
+
+Compares report section values against prediction_outcomes and
+model_metric_snapshots, flagging discrepancies that exceed the
+configured threshold.
+"""
+
+from __future__ import annotations
+
+import logging
+import math
+
+from services.reporting.models import (
+    ModelQualitySection,
+    RecommendationAccuracySection,
+    ReportData,
+    ValidationStatus,
+    ValidationWarning,
+)
+
+logger = logging.getLogger(__name__)
+
+DISCREPANCY_THRESHOLD_PCT = 5.0
+
+
+def _sanitize(value: float | None) -> float:
+    """Replace None, NaN, and infinity with 0.0."""
+    if value is None:
+        return 0.0
+    if math.isnan(value) or math.isinf(value):
+        return 0.0
+    return value
+
+
+def _check_discrepancy(
+    field_name: str,
+    computed: float,
+    snapshot: float,
+) -> ValidationWarning | None:
+    """Compare computed vs snapshot and return a warning if >5% discrepancy.
+
+    Edge cases:
+    - snapshot=0 and computed≠0 → 100% difference → warning
+    - both=0 → 0% difference → no warning
+    - snapshot is handled upstream (NULL → skip before calling this)
+    """
+    computed = _sanitize(computed)
+    snapshot = _sanitize(snapshot)
+
+    if snapshot == 0.0 and computed == 0.0:
+        return None
+
+    if snapshot == 0.0:
+        # Non-zero computed with zero snapshot → 100% discrepancy
+        pct_diff = 100.0
+    else:
+        pct_diff = abs(computed - snapshot) / abs(snapshot) * 100.0
+
+    if pct_diff > DISCREPANCY_THRESHOLD_PCT:
+        return ValidationWarning(
+            field_name=field_name,
+            computed_value=computed,
+            snapshot_value=snapshot,
+            pct_difference=round(pct_diff, 4),
+        )
+    return None
+
+
+def validate_recommendation_accuracy(
+    section: RecommendationAccuracySection,
+    prediction_outcomes: list[dict],
+) -> list[ValidationWarning]:
+    """Cross-reference reported win rates with prediction_outcomes.
+
+    Computes win_rate from prediction_outcomes (count profitable / total)
+    and compares against section.acted_win_rate.  Returns warnings for
+    discrepancies > 5%.
+    """
+    warnings: list[ValidationWarning] = []
+
+    if not prediction_outcomes:
+        return warnings
+
+    total = len(prediction_outcomes)
+    profitable_count = sum(
+        1 for po in prediction_outcomes if po.get("profitable")
+    )
+    computed_win_rate = profitable_count / total if total > 0 else 0.0
+
+    w = _check_discrepancy(
+        "acted_win_rate",
+        section.acted_win_rate,
+        computed_win_rate,
+    )
+    if w is not None:
+        warnings.append(w)
+
+    return warnings
+
+
+def validate_model_quality(
+    section: ModelQualitySection,
+    metric_snapshots: list[dict],
+) -> list[ValidationWarning]:
+    """Compare reported model quality metrics against model_metric_snapshots.
+
+    For each window in the section, finds the matching snapshot by
+    lookback_window and compares win_rate, directional_accuracy,
+    information_coefficient, calibration_error, and brier_score.
+    Flags discrepancies > 5%.
+    """
+    warnings: list[ValidationWarning] = []
+
+    if not metric_snapshots:
+        return warnings
+
+    # Build lookup: lookback_window → latest snapshot (first match since
+    # collector orders by generated_at DESC)
+    snap_by_window: dict[str, dict] = {}
+    for snap in metric_snapshots:
+        window = snap.get("lookback_window", "")
+        if window and window not in snap_by_window:
+            snap_by_window[window] = snap
+
+    metric_fields = [
+        ("win_rate", "win_rate"),
+        ("directional_accuracy", "directional_accuracy"),
+        ("information_coefficient", "information_coefficient"),
+        ("calibration_error", "calibration_error"),
+        ("brier_score", "brier_score"),
+    ]
+
+    for mq_window in section.windows:
+        snap = snap_by_window.get(mq_window.lookback)
+        if snap is None:
+            continue
+
+        for section_attr, snap_key in metric_fields:
+            section_value = getattr(mq_window, section_attr, None)
+            snapshot_value = snap.get(snap_key)
+
+            # NULL snapshot → skip
+            if snapshot_value is None:
+                continue
+            # NULL section value → skip
+            if section_value is None:
+                continue
+
+            snapshot_float = _sanitize(float(snapshot_value))
+            section_float = _sanitize(section_value)
+
+            w = _check_discrepancy(
+                f"{mq_window.lookback}_{section_attr}",
+                section_float,
+                snapshot_float,
+            )
+            if w is not None:
+                warnings.append(w)
+
+    return warnings
+
+
+def compute_validation_status(report: ReportData) -> ValidationStatus:
+    """Determine overall validation status.
+
+    Returns 'passed' if no warnings across all sections,
+    'warnings' if any section has validation warnings.
+    """
+    if report.pnl.validation_warnings:
+        return ValidationStatus.WARNINGS
+    if report.recommendation_accuracy.validation_warnings:
+        return ValidationStatus.WARNINGS
+    if report.model_quality.validation_warnings:
+        return ValidationStatus.WARNINGS
+    return ValidationStatus.PASSED