feat: trading feedback engine — periodic performance reports with AI summarization
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled

- Migration 038: trading_reports table + report-summarizer agent seed
- 6 reporting modules: models, collector, sections, validator, summarizer, generator
- API endpoints: GET /api/reports (paginated, filterable), GET /api/reports/{id}
- Frontend hooks: useReports, useReport with TanStack Query
- Scheduler: daily (after 16:30 ET) and weekly (Saturday) report triggers
- Redis queue consumer for async report generation with retry/dedup
- 5 property-based tests (chunking, serialization, validation, accuracy, deltas)
- 109 unit/integration tests across all modules
- 6 frontend hook tests with MSW mocks
This commit is contained in:
Celes Renata
2026-05-01 22:13:09 +00:00
parent 376fcb4bb4
commit bc077bfcc8
28 changed files with 6771 additions and 1 deletions
+551
View File
@@ -0,0 +1,551 @@
"""Unit tests for report validator.
Tests the validation functions from services.reporting.validator with
specific discrepancy scenarios, boundary cases, and edge cases.
Requirements validated: 4.1, 4.2, 4.3, 4.4
"""
from __future__ import annotations
from datetime import date, datetime, timezone
from services.reporting.models import (
ModelQualitySection,
ModelQualityWindow,
PLSection,
PositionPerformanceSection,
RecommendationAccuracySection,
ReportData,
ReportType,
RiskMetricsSection,
ValidationStatus,
)
from services.reporting.validator import (
_check_discrepancy,
compute_validation_status,
validate_model_quality,
validate_recommendation_accuracy,
)
# ── Helpers ──────────────────────────────────────────────────────────────
def _make_report(**overrides: object) -> ReportData:
"""Build a minimal ReportData with sensible defaults."""
defaults: dict = {
"pnl": PLSection(
realized_pnl=0.0,
unrealized_pnl=0.0,
daily_return=0.0,
cumulative_return=0.0,
win_count=0,
loss_count=0,
win_rate=0.0,
profit_factor=0.0,
sharpe_ratio=0.0,
),
"recommendation_accuracy": RecommendationAccuracySection(
total_evaluated=0,
act_count=0,
skip_count=0,
acted_win_rate=0.0,
avg_confidence_acted=0.0,
avg_confidence_skipped=0.0,
),
"position_performance": PositionPerformanceSection(),
"risk_metrics": RiskMetricsSection(
current_risk_tier="moderate",
portfolio_heat=0.0,
max_drawdown=0.0,
current_drawdown_pct=0.0,
reserve_pool_balance=0.0,
circuit_breaker_event_count=0,
),
"model_quality": ModelQualitySection(),
"generated_at": datetime(2025, 1, 15, 21, 30, tzinfo=timezone.utc),
"period_start": date(2025, 1, 15),
"period_end": date(2025, 1, 15),
"report_type": ReportType.DAILY,
}
defaults.update(overrides)
return ReportData(**defaults)
# ═══════════════════════════════════════════════════════════════════════
# 1. _check_discrepancy — boundary tests
# Requirements validated: 4.1, 4.2, 4.3
# ═══════════════════════════════════════════════════════════════════════
class TestCheckDiscrepancy:
"""Tests for _check_discrepancy boundary and edge cases."""
def test_exactly_5_percent_no_warning(self) -> None:
"""Exactly 5% discrepancy does NOT trigger a warning (threshold is >5%)."""
# snapshot=100, computed=105 → |105-100|/100*100 = 5.0%
result = _check_discrepancy("test_field", 105.0, 100.0)
assert result is None
def test_just_above_5_percent_triggers_warning(self) -> None:
"""5.1% discrepancy triggers a warning."""
# snapshot=100, computed=105.1 → |105.1-100|/100*100 = 5.1%
result = _check_discrepancy("test_field", 105.1, 100.0)
assert result is not None
assert result.field_name == "test_field"
assert result.computed_value == 105.1
assert result.snapshot_value == 100.0
assert abs(result.pct_difference - 5.1) < 0.01
def test_snapshot_zero_computed_nonzero_warns(self) -> None:
"""snapshot=0 with computed≠0 → 100% discrepancy → warning."""
result = _check_discrepancy("test_field", 42.0, 0.0)
assert result is not None
assert result.pct_difference == 100.0
def test_both_zero_no_warning(self) -> None:
"""Both snapshot=0 and computed=0 → no warning."""
result = _check_discrepancy("test_field", 0.0, 0.0)
assert result is None
def test_large_discrepancy(self) -> None:
"""A large discrepancy (50%) triggers a warning."""
# snapshot=100, computed=150 → 50%
result = _check_discrepancy("big_diff", 150.0, 100.0)
assert result is not None
assert abs(result.pct_difference - 50.0) < 0.01
def test_small_discrepancy_no_warning(self) -> None:
"""A small discrepancy (1%) does not trigger a warning."""
# snapshot=100, computed=101 → 1%
result = _check_discrepancy("small_diff", 101.0, 100.0)
assert result is None
def test_computed_below_snapshot(self) -> None:
"""Discrepancy is detected when computed < snapshot too."""
# snapshot=100, computed=94 → 6%
result = _check_discrepancy("below", 94.0, 100.0)
assert result is not None
assert abs(result.pct_difference - 6.0) < 0.01
def test_nan_computed_sanitized_to_zero(self) -> None:
"""NaN computed value is sanitized to 0.0 before comparison."""
result = _check_discrepancy("nan_field", float("nan"), 100.0)
# sanitized computed=0.0, snapshot=100 → 100% discrepancy
assert result is not None
assert result.computed_value == 0.0
assert result.pct_difference == 100.0
def test_inf_computed_sanitized_to_zero(self) -> None:
"""Infinity computed value is sanitized to 0.0 before comparison."""
result = _check_discrepancy("inf_field", float("inf"), 100.0)
assert result is not None
assert result.computed_value == 0.0
def test_snapshot_zero_computed_zero_small(self) -> None:
"""snapshot=0.0 and computed=0.0 exactly → no warning."""
result = _check_discrepancy("zero_zero", 0.0, 0.0)
assert result is None
# ═══════════════════════════════════════════════════════════════════════
# 2. validate_recommendation_accuracy
# Requirements validated: 4.1
# ═══════════════════════════════════════════════════════════════════════
class TestValidateRecommendationAccuracy:
"""Tests for validate_recommendation_accuracy."""
def test_matching_data_no_warnings(self) -> None:
"""When section win rate matches prediction outcomes, no warnings."""
# 2 out of 4 profitable → 0.5 win rate
section = RecommendationAccuracySection(
total_evaluated=4,
act_count=4,
skip_count=0,
acted_win_rate=0.5,
avg_confidence_acted=0.7,
avg_confidence_skipped=0.0,
)
outcomes = [
{"profitable": True},
{"profitable": False},
{"profitable": True},
{"profitable": False},
]
warnings = validate_recommendation_accuracy(section, outcomes)
assert warnings == []
def test_discrepancy_triggers_warning(self) -> None:
"""When section win rate differs >5% from outcomes, a warning is raised."""
# outcomes: 1/2 profitable → 0.5, section says 0.8 → 60% discrepancy
section = RecommendationAccuracySection(
total_evaluated=2,
act_count=2,
skip_count=0,
acted_win_rate=0.8,
avg_confidence_acted=0.7,
avg_confidence_skipped=0.0,
)
outcomes = [
{"profitable": True},
{"profitable": False},
]
warnings = validate_recommendation_accuracy(section, outcomes)
assert len(warnings) == 1
assert warnings[0].field_name == "acted_win_rate"
def test_no_outcomes_returns_empty(self) -> None:
"""When there are no prediction outcomes, validation is skipped."""
section = RecommendationAccuracySection(
total_evaluated=5,
act_count=3,
skip_count=2,
acted_win_rate=0.6,
avg_confidence_acted=0.7,
avg_confidence_skipped=0.4,
)
warnings = validate_recommendation_accuracy(section, [])
assert warnings == []
def test_all_profitable_matching(self) -> None:
"""All outcomes profitable and section says 1.0 → no warning."""
section = RecommendationAccuracySection(
total_evaluated=3,
act_count=3,
skip_count=0,
acted_win_rate=1.0,
avg_confidence_acted=0.9,
avg_confidence_skipped=0.0,
)
outcomes = [
{"profitable": True},
{"profitable": True},
{"profitable": True},
]
warnings = validate_recommendation_accuracy(section, outcomes)
assert warnings == []
# ═══════════════════════════════════════════════════════════════════════
# 3. validate_model_quality
# Requirements validated: 4.2, 4.3
# ═══════════════════════════════════════════════════════════════════════
class TestValidateModelQuality:
"""Tests for validate_model_quality."""
def test_matching_data_no_warnings(self) -> None:
"""When section metrics match snapshots, no warnings are produced."""
section = ModelQualitySection(
windows=[
ModelQualityWindow(
lookback="7d",
win_rate=0.65,
directional_accuracy=0.62,
information_coefficient=0.08,
calibration_error=0.12,
brier_score=0.22,
),
],
)
snapshots = [
{
"lookback_window": "7d",
"win_rate": 0.65,
"directional_accuracy": 0.62,
"information_coefficient": 0.08,
"calibration_error": 0.12,
"brier_score": 0.22,
},
]
warnings = validate_model_quality(section, snapshots)
assert warnings == []
def test_discrepancy_triggers_warnings(self) -> None:
"""When section metrics differ >5% from snapshots, warnings are raised."""
section = ModelQualitySection(
windows=[
ModelQualityWindow(
lookback="7d",
win_rate=0.80, # snapshot says 0.65 → ~23% off
directional_accuracy=0.62,
information_coefficient=0.08,
calibration_error=0.12,
brier_score=0.22,
),
],
)
snapshots = [
{
"lookback_window": "7d",
"win_rate": 0.65,
"directional_accuracy": 0.62,
"information_coefficient": 0.08,
"calibration_error": 0.12,
"brier_score": 0.22,
},
]
warnings = validate_model_quality(section, snapshots)
assert len(warnings) == 1
assert warnings[0].field_name == "7d_win_rate"
def test_null_snapshot_value_skipped(self) -> None:
"""When a snapshot metric is NULL (None), that metric is skipped."""
section = ModelQualitySection(
windows=[
ModelQualityWindow(
lookback="7d",
win_rate=0.65,
directional_accuracy=0.62,
information_coefficient=0.08,
calibration_error=0.12,
brier_score=0.22,
),
],
)
snapshots = [
{
"lookback_window": "7d",
"win_rate": None, # NULL → skip
"directional_accuracy": None,
"information_coefficient": None,
"calibration_error": None,
"brier_score": None,
},
]
warnings = validate_model_quality(section, snapshots)
assert warnings == []
def test_no_snapshots_returns_empty(self) -> None:
"""When there are no metric snapshots, validation is skipped."""
section = ModelQualitySection(
windows=[
ModelQualityWindow(
lookback="7d",
win_rate=0.65,
directional_accuracy=0.62,
information_coefficient=0.08,
calibration_error=0.12,
brier_score=0.22,
),
],
)
warnings = validate_model_quality(section, [])
assert warnings == []
def test_multiple_windows_validated(self) -> None:
"""Validation runs across all lookback windows."""
section = ModelQualitySection(
windows=[
ModelQualityWindow(
lookback="7d",
win_rate=0.65,
directional_accuracy=0.62,
information_coefficient=0.08,
calibration_error=0.12,
brier_score=0.22,
),
ModelQualityWindow(
lookback="30d",
win_rate=0.90, # snapshot says 0.60 → 50% off
directional_accuracy=0.58,
information_coefficient=0.06,
calibration_error=0.15,
brier_score=0.25,
),
],
)
snapshots = [
{
"lookback_window": "7d",
"win_rate": 0.65,
"directional_accuracy": 0.62,
"information_coefficient": 0.08,
"calibration_error": 0.12,
"brier_score": 0.22,
},
{
"lookback_window": "30d",
"win_rate": 0.60,
"directional_accuracy": 0.58,
"information_coefficient": 0.06,
"calibration_error": 0.15,
"brier_score": 0.25,
},
]
warnings = validate_model_quality(section, snapshots)
# Only 30d_win_rate should be flagged
assert len(warnings) == 1
assert warnings[0].field_name == "30d_win_rate"
def test_null_section_value_skipped(self) -> None:
"""When a section metric is None, that metric is skipped."""
section = ModelQualitySection(
windows=[
ModelQualityWindow(
lookback="7d",
win_rate=None,
directional_accuracy=None,
information_coefficient=None,
calibration_error=None,
brier_score=None,
),
],
)
snapshots = [
{
"lookback_window": "7d",
"win_rate": 0.65,
"directional_accuracy": 0.62,
"information_coefficient": 0.08,
"calibration_error": 0.12,
"brier_score": 0.22,
},
]
warnings = validate_model_quality(section, snapshots)
assert warnings == []
def test_no_matching_window_in_snapshots(self) -> None:
"""When section has a window not in snapshots, it is skipped."""
section = ModelQualitySection(
windows=[
ModelQualityWindow(
lookback="90d",
win_rate=0.55,
directional_accuracy=0.53,
information_coefficient=0.04,
calibration_error=0.18,
brier_score=0.28,
),
],
)
snapshots = [
{
"lookback_window": "7d",
"win_rate": 0.65,
"directional_accuracy": 0.62,
"information_coefficient": 0.08,
"calibration_error": 0.12,
"brier_score": 0.22,
},
]
warnings = validate_model_quality(section, snapshots)
assert warnings == []
# ═══════════════════════════════════════════════════════════════════════
# 4. compute_validation_status
# Requirements validated: 4.4
# ═══════════════════════════════════════════════════════════════════════
class TestComputeValidationStatus:
"""Tests for compute_validation_status."""
def test_no_warnings_returns_passed(self) -> None:
"""When no sections have warnings, status is PASSED."""
report = _make_report()
status = compute_validation_status(report)
assert status == ValidationStatus.PASSED
def test_pnl_warnings_returns_warnings(self) -> None:
"""When P&L section has warnings, status is WARNINGS."""
from services.reporting.models import ValidationWarning
report = _make_report(
pnl=PLSection(
realized_pnl=0.0,
unrealized_pnl=0.0,
daily_return=0.0,
cumulative_return=0.0,
win_count=0,
loss_count=0,
win_rate=0.0,
profit_factor=0.0,
sharpe_ratio=0.0,
validation_warnings=[
ValidationWarning(
field_name="test",
computed_value=1.0,
snapshot_value=0.5,
pct_difference=100.0,
),
],
),
)
status = compute_validation_status(report)
assert status == ValidationStatus.WARNINGS
def test_recommendation_accuracy_warnings_returns_warnings(self) -> None:
"""When recommendation accuracy section has warnings, status is WARNINGS."""
from services.reporting.models import ValidationWarning
report = _make_report(
recommendation_accuracy=RecommendationAccuracySection(
total_evaluated=0,
act_count=0,
skip_count=0,
acted_win_rate=0.0,
avg_confidence_acted=0.0,
avg_confidence_skipped=0.0,
validation_warnings=[
ValidationWarning(
field_name="acted_win_rate",
computed_value=0.8,
snapshot_value=0.5,
pct_difference=60.0,
),
],
),
)
status = compute_validation_status(report)
assert status == ValidationStatus.WARNINGS
def test_model_quality_warnings_returns_warnings(self) -> None:
"""When model quality section has warnings, status is WARNINGS."""
from services.reporting.models import ValidationWarning
report = _make_report(
model_quality=ModelQualitySection(
validation_warnings=[
ValidationWarning(
field_name="7d_win_rate",
computed_value=0.9,
snapshot_value=0.65,
pct_difference=38.46,
),
],
),
)
status = compute_validation_status(report)
assert status == ValidationStatus.WARNINGS
def test_multiple_sections_with_warnings(self) -> None:
"""When multiple sections have warnings, status is still WARNINGS."""
from services.reporting.models import ValidationWarning
w = ValidationWarning(
field_name="x",
computed_value=1.0,
snapshot_value=0.0,
pct_difference=100.0,
)
report = _make_report(
pnl=PLSection(
realized_pnl=0.0,
unrealized_pnl=0.0,
daily_return=0.0,
cumulative_return=0.0,
win_count=0,
loss_count=0,
win_rate=0.0,
profit_factor=0.0,
sharpe_ratio=0.0,
validation_warnings=[w],
),
model_quality=ModelQualitySection(validation_warnings=[w]),
)
status = compute_validation_status(report)
assert status == ValidationStatus.WARNINGS