bc077bfcc8
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
- Migration 038: trading_reports table + report-summarizer agent seed
- 6 reporting modules: models, collector, sections, validator, summarizer, generator
- API endpoints: GET /api/reports (paginated, filterable), GET /api/reports/{id}
- Frontend hooks: useReports, useReport with TanStack Query
- Scheduler: daily (after 16:30 ET) and weekly (Saturday) report triggers
- Redis queue consumer for async report generation with retry/dedup
- 5 property-based tests (chunking, serialization, validation, accuracy, deltas)
- 109 unit/integration tests across all modules
- 6 frontend hook tests with MSW mocks
552 lines
21 KiB
Python
552 lines
21 KiB
Python
"""Unit tests for report validator.
|
|
|
|
Tests the validation functions from services.reporting.validator with
|
|
specific discrepancy scenarios, boundary cases, and edge cases.
|
|
|
|
Requirements validated: 4.1, 4.2, 4.3, 4.4
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from datetime import date, datetime, timezone
|
|
|
|
from services.reporting.models import (
|
|
ModelQualitySection,
|
|
ModelQualityWindow,
|
|
PLSection,
|
|
PositionPerformanceSection,
|
|
RecommendationAccuracySection,
|
|
ReportData,
|
|
ReportType,
|
|
RiskMetricsSection,
|
|
ValidationStatus,
|
|
)
|
|
from services.reporting.validator import (
|
|
_check_discrepancy,
|
|
compute_validation_status,
|
|
validate_model_quality,
|
|
validate_recommendation_accuracy,
|
|
)
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _make_report(**overrides: object) -> ReportData:
|
|
"""Build a minimal ReportData with sensible defaults."""
|
|
defaults: dict = {
|
|
"pnl": PLSection(
|
|
realized_pnl=0.0,
|
|
unrealized_pnl=0.0,
|
|
daily_return=0.0,
|
|
cumulative_return=0.0,
|
|
win_count=0,
|
|
loss_count=0,
|
|
win_rate=0.0,
|
|
profit_factor=0.0,
|
|
sharpe_ratio=0.0,
|
|
),
|
|
"recommendation_accuracy": RecommendationAccuracySection(
|
|
total_evaluated=0,
|
|
act_count=0,
|
|
skip_count=0,
|
|
acted_win_rate=0.0,
|
|
avg_confidence_acted=0.0,
|
|
avg_confidence_skipped=0.0,
|
|
),
|
|
"position_performance": PositionPerformanceSection(),
|
|
"risk_metrics": RiskMetricsSection(
|
|
current_risk_tier="moderate",
|
|
portfolio_heat=0.0,
|
|
max_drawdown=0.0,
|
|
current_drawdown_pct=0.0,
|
|
reserve_pool_balance=0.0,
|
|
circuit_breaker_event_count=0,
|
|
),
|
|
"model_quality": ModelQualitySection(),
|
|
"generated_at": datetime(2025, 1, 15, 21, 30, tzinfo=timezone.utc),
|
|
"period_start": date(2025, 1, 15),
|
|
"period_end": date(2025, 1, 15),
|
|
"report_type": ReportType.DAILY,
|
|
}
|
|
defaults.update(overrides)
|
|
return ReportData(**defaults)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# 1. _check_discrepancy — boundary tests
|
|
# Requirements validated: 4.1, 4.2, 4.3
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
|
|
class TestCheckDiscrepancy:
|
|
"""Tests for _check_discrepancy boundary and edge cases."""
|
|
|
|
def test_exactly_5_percent_no_warning(self) -> None:
|
|
"""Exactly 5% discrepancy does NOT trigger a warning (threshold is >5%)."""
|
|
# snapshot=100, computed=105 → |105-100|/100*100 = 5.0%
|
|
result = _check_discrepancy("test_field", 105.0, 100.0)
|
|
assert result is None
|
|
|
|
def test_just_above_5_percent_triggers_warning(self) -> None:
|
|
"""5.1% discrepancy triggers a warning."""
|
|
# snapshot=100, computed=105.1 → |105.1-100|/100*100 = 5.1%
|
|
result = _check_discrepancy("test_field", 105.1, 100.0)
|
|
assert result is not None
|
|
assert result.field_name == "test_field"
|
|
assert result.computed_value == 105.1
|
|
assert result.snapshot_value == 100.0
|
|
assert abs(result.pct_difference - 5.1) < 0.01
|
|
|
|
def test_snapshot_zero_computed_nonzero_warns(self) -> None:
|
|
"""snapshot=0 with computed≠0 → 100% discrepancy → warning."""
|
|
result = _check_discrepancy("test_field", 42.0, 0.0)
|
|
assert result is not None
|
|
assert result.pct_difference == 100.0
|
|
|
|
def test_both_zero_no_warning(self) -> None:
|
|
"""Both snapshot=0 and computed=0 → no warning."""
|
|
result = _check_discrepancy("test_field", 0.0, 0.0)
|
|
assert result is None
|
|
|
|
def test_large_discrepancy(self) -> None:
|
|
"""A large discrepancy (50%) triggers a warning."""
|
|
# snapshot=100, computed=150 → 50%
|
|
result = _check_discrepancy("big_diff", 150.0, 100.0)
|
|
assert result is not None
|
|
assert abs(result.pct_difference - 50.0) < 0.01
|
|
|
|
def test_small_discrepancy_no_warning(self) -> None:
|
|
"""A small discrepancy (1%) does not trigger a warning."""
|
|
# snapshot=100, computed=101 → 1%
|
|
result = _check_discrepancy("small_diff", 101.0, 100.0)
|
|
assert result is None
|
|
|
|
def test_computed_below_snapshot(self) -> None:
|
|
"""Discrepancy is detected when computed < snapshot too."""
|
|
# snapshot=100, computed=94 → 6%
|
|
result = _check_discrepancy("below", 94.0, 100.0)
|
|
assert result is not None
|
|
assert abs(result.pct_difference - 6.0) < 0.01
|
|
|
|
def test_nan_computed_sanitized_to_zero(self) -> None:
|
|
"""NaN computed value is sanitized to 0.0 before comparison."""
|
|
result = _check_discrepancy("nan_field", float("nan"), 100.0)
|
|
# sanitized computed=0.0, snapshot=100 → 100% discrepancy
|
|
assert result is not None
|
|
assert result.computed_value == 0.0
|
|
assert result.pct_difference == 100.0
|
|
|
|
def test_inf_computed_sanitized_to_zero(self) -> None:
|
|
"""Infinity computed value is sanitized to 0.0 before comparison."""
|
|
result = _check_discrepancy("inf_field", float("inf"), 100.0)
|
|
assert result is not None
|
|
assert result.computed_value == 0.0
|
|
|
|
def test_snapshot_zero_computed_zero_small(self) -> None:
|
|
"""snapshot=0.0 and computed=0.0 exactly → no warning."""
|
|
result = _check_discrepancy("zero_zero", 0.0, 0.0)
|
|
assert result is None
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# 2. validate_recommendation_accuracy
|
|
# Requirements validated: 4.1
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
|
|
class TestValidateRecommendationAccuracy:
|
|
"""Tests for validate_recommendation_accuracy."""
|
|
|
|
def test_matching_data_no_warnings(self) -> None:
|
|
"""When section win rate matches prediction outcomes, no warnings."""
|
|
# 2 out of 4 profitable → 0.5 win rate
|
|
section = RecommendationAccuracySection(
|
|
total_evaluated=4,
|
|
act_count=4,
|
|
skip_count=0,
|
|
acted_win_rate=0.5,
|
|
avg_confidence_acted=0.7,
|
|
avg_confidence_skipped=0.0,
|
|
)
|
|
outcomes = [
|
|
{"profitable": True},
|
|
{"profitable": False},
|
|
{"profitable": True},
|
|
{"profitable": False},
|
|
]
|
|
warnings = validate_recommendation_accuracy(section, outcomes)
|
|
assert warnings == []
|
|
|
|
def test_discrepancy_triggers_warning(self) -> None:
|
|
"""When section win rate differs >5% from outcomes, a warning is raised."""
|
|
# outcomes: 1/2 profitable → 0.5, section says 0.8 → 60% discrepancy
|
|
section = RecommendationAccuracySection(
|
|
total_evaluated=2,
|
|
act_count=2,
|
|
skip_count=0,
|
|
acted_win_rate=0.8,
|
|
avg_confidence_acted=0.7,
|
|
avg_confidence_skipped=0.0,
|
|
)
|
|
outcomes = [
|
|
{"profitable": True},
|
|
{"profitable": False},
|
|
]
|
|
warnings = validate_recommendation_accuracy(section, outcomes)
|
|
assert len(warnings) == 1
|
|
assert warnings[0].field_name == "acted_win_rate"
|
|
|
|
def test_no_outcomes_returns_empty(self) -> None:
|
|
"""When there are no prediction outcomes, validation is skipped."""
|
|
section = RecommendationAccuracySection(
|
|
total_evaluated=5,
|
|
act_count=3,
|
|
skip_count=2,
|
|
acted_win_rate=0.6,
|
|
avg_confidence_acted=0.7,
|
|
avg_confidence_skipped=0.4,
|
|
)
|
|
warnings = validate_recommendation_accuracy(section, [])
|
|
assert warnings == []
|
|
|
|
def test_all_profitable_matching(self) -> None:
|
|
"""All outcomes profitable and section says 1.0 → no warning."""
|
|
section = RecommendationAccuracySection(
|
|
total_evaluated=3,
|
|
act_count=3,
|
|
skip_count=0,
|
|
acted_win_rate=1.0,
|
|
avg_confidence_acted=0.9,
|
|
avg_confidence_skipped=0.0,
|
|
)
|
|
outcomes = [
|
|
{"profitable": True},
|
|
{"profitable": True},
|
|
{"profitable": True},
|
|
]
|
|
warnings = validate_recommendation_accuracy(section, outcomes)
|
|
assert warnings == []
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# 3. validate_model_quality
|
|
# Requirements validated: 4.2, 4.3
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
|
|
class TestValidateModelQuality:
|
|
"""Tests for validate_model_quality."""
|
|
|
|
def test_matching_data_no_warnings(self) -> None:
|
|
"""When section metrics match snapshots, no warnings are produced."""
|
|
section = ModelQualitySection(
|
|
windows=[
|
|
ModelQualityWindow(
|
|
lookback="7d",
|
|
win_rate=0.65,
|
|
directional_accuracy=0.62,
|
|
information_coefficient=0.08,
|
|
calibration_error=0.12,
|
|
brier_score=0.22,
|
|
),
|
|
],
|
|
)
|
|
snapshots = [
|
|
{
|
|
"lookback_window": "7d",
|
|
"win_rate": 0.65,
|
|
"directional_accuracy": 0.62,
|
|
"information_coefficient": 0.08,
|
|
"calibration_error": 0.12,
|
|
"brier_score": 0.22,
|
|
},
|
|
]
|
|
warnings = validate_model_quality(section, snapshots)
|
|
assert warnings == []
|
|
|
|
def test_discrepancy_triggers_warnings(self) -> None:
|
|
"""When section metrics differ >5% from snapshots, warnings are raised."""
|
|
section = ModelQualitySection(
|
|
windows=[
|
|
ModelQualityWindow(
|
|
lookback="7d",
|
|
win_rate=0.80, # snapshot says 0.65 → ~23% off
|
|
directional_accuracy=0.62,
|
|
information_coefficient=0.08,
|
|
calibration_error=0.12,
|
|
brier_score=0.22,
|
|
),
|
|
],
|
|
)
|
|
snapshots = [
|
|
{
|
|
"lookback_window": "7d",
|
|
"win_rate": 0.65,
|
|
"directional_accuracy": 0.62,
|
|
"information_coefficient": 0.08,
|
|
"calibration_error": 0.12,
|
|
"brier_score": 0.22,
|
|
},
|
|
]
|
|
warnings = validate_model_quality(section, snapshots)
|
|
assert len(warnings) == 1
|
|
assert warnings[0].field_name == "7d_win_rate"
|
|
|
|
def test_null_snapshot_value_skipped(self) -> None:
|
|
"""When a snapshot metric is NULL (None), that metric is skipped."""
|
|
section = ModelQualitySection(
|
|
windows=[
|
|
ModelQualityWindow(
|
|
lookback="7d",
|
|
win_rate=0.65,
|
|
directional_accuracy=0.62,
|
|
information_coefficient=0.08,
|
|
calibration_error=0.12,
|
|
brier_score=0.22,
|
|
),
|
|
],
|
|
)
|
|
snapshots = [
|
|
{
|
|
"lookback_window": "7d",
|
|
"win_rate": None, # NULL → skip
|
|
"directional_accuracy": None,
|
|
"information_coefficient": None,
|
|
"calibration_error": None,
|
|
"brier_score": None,
|
|
},
|
|
]
|
|
warnings = validate_model_quality(section, snapshots)
|
|
assert warnings == []
|
|
|
|
def test_no_snapshots_returns_empty(self) -> None:
|
|
"""When there are no metric snapshots, validation is skipped."""
|
|
section = ModelQualitySection(
|
|
windows=[
|
|
ModelQualityWindow(
|
|
lookback="7d",
|
|
win_rate=0.65,
|
|
directional_accuracy=0.62,
|
|
information_coefficient=0.08,
|
|
calibration_error=0.12,
|
|
brier_score=0.22,
|
|
),
|
|
],
|
|
)
|
|
warnings = validate_model_quality(section, [])
|
|
assert warnings == []
|
|
|
|
def test_multiple_windows_validated(self) -> None:
|
|
"""Validation runs across all lookback windows."""
|
|
section = ModelQualitySection(
|
|
windows=[
|
|
ModelQualityWindow(
|
|
lookback="7d",
|
|
win_rate=0.65,
|
|
directional_accuracy=0.62,
|
|
information_coefficient=0.08,
|
|
calibration_error=0.12,
|
|
brier_score=0.22,
|
|
),
|
|
ModelQualityWindow(
|
|
lookback="30d",
|
|
win_rate=0.90, # snapshot says 0.60 → 50% off
|
|
directional_accuracy=0.58,
|
|
information_coefficient=0.06,
|
|
calibration_error=0.15,
|
|
brier_score=0.25,
|
|
),
|
|
],
|
|
)
|
|
snapshots = [
|
|
{
|
|
"lookback_window": "7d",
|
|
"win_rate": 0.65,
|
|
"directional_accuracy": 0.62,
|
|
"information_coefficient": 0.08,
|
|
"calibration_error": 0.12,
|
|
"brier_score": 0.22,
|
|
},
|
|
{
|
|
"lookback_window": "30d",
|
|
"win_rate": 0.60,
|
|
"directional_accuracy": 0.58,
|
|
"information_coefficient": 0.06,
|
|
"calibration_error": 0.15,
|
|
"brier_score": 0.25,
|
|
},
|
|
]
|
|
warnings = validate_model_quality(section, snapshots)
|
|
# Only 30d_win_rate should be flagged
|
|
assert len(warnings) == 1
|
|
assert warnings[0].field_name == "30d_win_rate"
|
|
|
|
def test_null_section_value_skipped(self) -> None:
|
|
"""When a section metric is None, that metric is skipped."""
|
|
section = ModelQualitySection(
|
|
windows=[
|
|
ModelQualityWindow(
|
|
lookback="7d",
|
|
win_rate=None,
|
|
directional_accuracy=None,
|
|
information_coefficient=None,
|
|
calibration_error=None,
|
|
brier_score=None,
|
|
),
|
|
],
|
|
)
|
|
snapshots = [
|
|
{
|
|
"lookback_window": "7d",
|
|
"win_rate": 0.65,
|
|
"directional_accuracy": 0.62,
|
|
"information_coefficient": 0.08,
|
|
"calibration_error": 0.12,
|
|
"brier_score": 0.22,
|
|
},
|
|
]
|
|
warnings = validate_model_quality(section, snapshots)
|
|
assert warnings == []
|
|
|
|
def test_no_matching_window_in_snapshots(self) -> None:
|
|
"""When section has a window not in snapshots, it is skipped."""
|
|
section = ModelQualitySection(
|
|
windows=[
|
|
ModelQualityWindow(
|
|
lookback="90d",
|
|
win_rate=0.55,
|
|
directional_accuracy=0.53,
|
|
information_coefficient=0.04,
|
|
calibration_error=0.18,
|
|
brier_score=0.28,
|
|
),
|
|
],
|
|
)
|
|
snapshots = [
|
|
{
|
|
"lookback_window": "7d",
|
|
"win_rate": 0.65,
|
|
"directional_accuracy": 0.62,
|
|
"information_coefficient": 0.08,
|
|
"calibration_error": 0.12,
|
|
"brier_score": 0.22,
|
|
},
|
|
]
|
|
warnings = validate_model_quality(section, snapshots)
|
|
assert warnings == []
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# 4. compute_validation_status
|
|
# Requirements validated: 4.4
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
|
|
class TestComputeValidationStatus:
|
|
"""Tests for compute_validation_status."""
|
|
|
|
def test_no_warnings_returns_passed(self) -> None:
|
|
"""When no sections have warnings, status is PASSED."""
|
|
report = _make_report()
|
|
status = compute_validation_status(report)
|
|
assert status == ValidationStatus.PASSED
|
|
|
|
def test_pnl_warnings_returns_warnings(self) -> None:
|
|
"""When P&L section has warnings, status is WARNINGS."""
|
|
from services.reporting.models import ValidationWarning
|
|
|
|
report = _make_report(
|
|
pnl=PLSection(
|
|
realized_pnl=0.0,
|
|
unrealized_pnl=0.0,
|
|
daily_return=0.0,
|
|
cumulative_return=0.0,
|
|
win_count=0,
|
|
loss_count=0,
|
|
win_rate=0.0,
|
|
profit_factor=0.0,
|
|
sharpe_ratio=0.0,
|
|
validation_warnings=[
|
|
ValidationWarning(
|
|
field_name="test",
|
|
computed_value=1.0,
|
|
snapshot_value=0.5,
|
|
pct_difference=100.0,
|
|
),
|
|
],
|
|
),
|
|
)
|
|
status = compute_validation_status(report)
|
|
assert status == ValidationStatus.WARNINGS
|
|
|
|
def test_recommendation_accuracy_warnings_returns_warnings(self) -> None:
|
|
"""When recommendation accuracy section has warnings, status is WARNINGS."""
|
|
from services.reporting.models import ValidationWarning
|
|
|
|
report = _make_report(
|
|
recommendation_accuracy=RecommendationAccuracySection(
|
|
total_evaluated=0,
|
|
act_count=0,
|
|
skip_count=0,
|
|
acted_win_rate=0.0,
|
|
avg_confidence_acted=0.0,
|
|
avg_confidence_skipped=0.0,
|
|
validation_warnings=[
|
|
ValidationWarning(
|
|
field_name="acted_win_rate",
|
|
computed_value=0.8,
|
|
snapshot_value=0.5,
|
|
pct_difference=60.0,
|
|
),
|
|
],
|
|
),
|
|
)
|
|
status = compute_validation_status(report)
|
|
assert status == ValidationStatus.WARNINGS
|
|
|
|
def test_model_quality_warnings_returns_warnings(self) -> None:
|
|
"""When model quality section has warnings, status is WARNINGS."""
|
|
from services.reporting.models import ValidationWarning
|
|
|
|
report = _make_report(
|
|
model_quality=ModelQualitySection(
|
|
validation_warnings=[
|
|
ValidationWarning(
|
|
field_name="7d_win_rate",
|
|
computed_value=0.9,
|
|
snapshot_value=0.65,
|
|
pct_difference=38.46,
|
|
),
|
|
],
|
|
),
|
|
)
|
|
status = compute_validation_status(report)
|
|
assert status == ValidationStatus.WARNINGS
|
|
|
|
def test_multiple_sections_with_warnings(self) -> None:
|
|
"""When multiple sections have warnings, status is still WARNINGS."""
|
|
from services.reporting.models import ValidationWarning
|
|
|
|
w = ValidationWarning(
|
|
field_name="x",
|
|
computed_value=1.0,
|
|
snapshot_value=0.0,
|
|
pct_difference=100.0,
|
|
)
|
|
report = _make_report(
|
|
pnl=PLSection(
|
|
realized_pnl=0.0,
|
|
unrealized_pnl=0.0,
|
|
daily_return=0.0,
|
|
cumulative_return=0.0,
|
|
win_count=0,
|
|
loss_count=0,
|
|
win_rate=0.0,
|
|
profit_factor=0.0,
|
|
sharpe_ratio=0.0,
|
|
validation_warnings=[w],
|
|
),
|
|
model_quality=ModelQualitySection(validation_warnings=[w]),
|
|
)
|
|
status = compute_validation_status(report)
|
|
assert status == ValidationStatus.WARNINGS
|