# Feature: trading-feedback-engine, Property 4: Recommendation accuracy aggregation # Feature: trading-feedback-engine, Property 5: Portfolio period-over-period delta computation """Property-based tests for report section builders. Feature: trading-feedback-engine Property 4 tests the recommendation accuracy aggregation property from the design specification: for any non-empty list of trading decisions with associated prediction outcomes, the computed acted_win_rate SHALL equal the count of profitable outcomes divided by total acted outcomes with prediction data, and all rate values SHALL be in [0.0, 1.0]. Property 5 tests the portfolio period-over-period delta computation property from the design specification: for any two valid portfolio snapshots (current and previous), the period-over-period deltas SHALL equal (current - previous) for each field. When no previous snapshot exists, the deltas SHALL be zero. """ from __future__ import annotations import uuid from hypothesis import given, settings from hypothesis import strategies as st from services.reporting.collector import CollectedData from services.reporting.sections import ( build_pnl_section, build_recommendation_accuracy_section, ) # --------------------------------------------------------------------------- # Property 4: Recommendation Accuracy Aggregation # Validates: Requirements 1.4 # --------------------------------------------------------------------------- # Strategy: generate a list of unique tickers, then build matching # trading_decisions, recommendations, and prediction_outcomes. _ticker_strategy = st.text( alphabet=st.characters(whitelist_categories=("Lu",)), min_size=1, max_size=5, ) _confidence_strategy = st.floats( min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False, ) _excess_return_strategy = st.floats( min_value=-1.0, max_value=1.0, allow_nan=False, allow_infinity=False, ) @st.composite def recommendation_accuracy_data(draw: st.DrawFn) -> tuple[CollectedData, dict]: """Generate CollectedData with matching trading decisions, recommendations, and prediction outcomes for testing recommendation accuracy. Returns (CollectedData, expected_values) where expected_values contains the independently computed expected results. """ # Generate 1-20 trading decisions with unique tickers n = draw(st.integers(min_value=1, max_value=20)) tickers = [draw(_ticker_strategy) for _ in range(n)] # Ensure unique tickers by appending index tickers = [f"{t}{i}" for i, t in enumerate(tickers)] decisions = draw( st.lists( st.sampled_from(["act", "skip"]), min_size=n, max_size=n, ) ) confidences = draw( st.lists( _confidence_strategy, min_size=n, max_size=n, ) ) profitable_flags = draw( st.lists( st.booleans(), min_size=n, max_size=n, ) ) direction_correct_flags = draw( st.lists( st.booleans(), min_size=n, max_size=n, ) ) excess_returns = draw( st.lists( _excess_return_strategy, min_size=n, max_size=n, ) ) trading_decisions = [] recommendations = [] prediction_outcomes = [] # Track expected values exp_act_count = 0 exp_skip_count = 0 exp_acted_wins = 0 exp_acted_with_outcome = 0 exp_confidence_acted: list[float] = [] exp_confidence_skipped: list[float] = [] for i in range(n): rec_id = str(uuid.uuid4()) ticker = tickers[i] decision = decisions[i] confidence = confidences[i] profitable = profitable_flags[i] direction_correct = direction_correct_flags[i] excess_return = excess_returns[i] trading_decisions.append( { "id": str(uuid.uuid4()), "recommendation_id": rec_id, "decision": decision, "ticker": ticker, } ) recommendations.append( { "id": rec_id, "confidence": confidence, } ) prediction_outcomes.append( { "ticker": ticker, "profitable": profitable, "direction_correct": direction_correct, "excess_return_vs_spy": excess_return, } ) if decision == "act": exp_act_count += 1 exp_confidence_acted.append(confidence) # Every acted decision has a matching prediction outcome by ticker exp_acted_with_outcome += 1 if profitable: exp_acted_wins += 1 else: exp_skip_count += 1 exp_confidence_skipped.append(confidence) data = CollectedData( trading_decisions=trading_decisions, recommendations=recommendations, prediction_outcomes=prediction_outcomes, ) exp_acted_win_rate = ( (exp_acted_wins / exp_acted_with_outcome) if exp_acted_with_outcome > 0 else 0.0 ) exp_avg_confidence_acted = ( (sum(exp_confidence_acted) / len(exp_confidence_acted)) if exp_confidence_acted else 0.0 ) exp_avg_confidence_skipped = ( (sum(exp_confidence_skipped) / len(exp_confidence_skipped)) if exp_confidence_skipped else 0.0 ) expected = { "total_evaluated": exp_act_count + exp_skip_count, "act_count": exp_act_count, "skip_count": exp_skip_count, "acted_win_rate": exp_acted_win_rate, "avg_confidence_acted": exp_avg_confidence_acted, "avg_confidence_skipped": exp_avg_confidence_skipped, } return data, expected @given(data_and_expected=recommendation_accuracy_data()) @settings(max_examples=100) def test_recommendation_accuracy_aggregation( data_and_expected: tuple[CollectedData, dict], ) -> None: """**Validates: Requirements 1.4** For any non-empty list of trading decisions with associated prediction outcomes, the computed acted_win_rate SHALL equal the count of profitable outcomes divided by total acted outcomes with prediction data, act/skip counts SHALL match, average confidence values SHALL match, and all rate values SHALL be in [0.0, 1.0]. """ data, expected = data_and_expected section = build_recommendation_accuracy_section(data) # Verify act/skip counts assert section.total_evaluated == expected["total_evaluated"], ( f"total_evaluated mismatch: got {section.total_evaluated}, " f"expected {expected['total_evaluated']}" ) assert section.act_count == expected["act_count"], ( f"act_count mismatch: got {section.act_count}, " f"expected {expected['act_count']}" ) assert section.skip_count == expected["skip_count"], ( f"skip_count mismatch: got {section.skip_count}, " f"expected {expected['skip_count']}" ) # Verify acted win rate assert abs(section.acted_win_rate - expected["acted_win_rate"]) < 1e-9, ( f"acted_win_rate mismatch: got {section.acted_win_rate}, " f"expected {expected['acted_win_rate']}" ) # Verify average confidence values assert abs(section.avg_confidence_acted - expected["avg_confidence_acted"]) < 1e-9, ( f"avg_confidence_acted mismatch: got {section.avg_confidence_acted}, " f"expected {expected['avg_confidence_acted']}" ) assert abs(section.avg_confidence_skipped - expected["avg_confidence_skipped"]) < 1e-9, ( f"avg_confidence_skipped mismatch: got {section.avg_confidence_skipped}, " f"expected {expected['avg_confidence_skipped']}" ) # All rate values must be in [0.0, 1.0] assert 0.0 <= section.acted_win_rate <= 1.0, ( f"acted_win_rate out of range: {section.acted_win_rate}" ) assert 0.0 <= section.avg_confidence_acted <= 1.0, ( f"avg_confidence_acted out of range: {section.avg_confidence_acted}" ) assert 0.0 <= section.avg_confidence_skipped <= 1.0, ( f"avg_confidence_skipped out of range: {section.avg_confidence_skipped}" ) # --------------------------------------------------------------------------- # Property 5: Portfolio Period-Over-Period Delta Computation # Validates: Requirements 1.3 # --------------------------------------------------------------------------- _non_negative_float = st.floats( min_value=0.0, max_value=1e8, allow_nan=False, allow_infinity=False, ) _finite_float = st.floats( min_value=-1e6, max_value=1e6, allow_nan=False, allow_infinity=False, ) @st.composite def portfolio_snapshot_pair(draw: st.DrawFn) -> tuple[dict, dict]: """Generate a pair of portfolio snapshots (current, previous) with non-negative portfolio_value, active_pool, reserve_pool, and finite cumulative_return. """ current = { "portfolio_value": draw(_non_negative_float), "active_pool": draw(_non_negative_float), "reserve_pool": draw(_non_negative_float), "cumulative_return": draw(_finite_float), "realized_pnl": draw(_finite_float), "unrealized_pnl": draw(_finite_float), "daily_return": draw(_finite_float), "win_count": draw(st.integers(min_value=0, max_value=10000)), "loss_count": draw(st.integers(min_value=0, max_value=10000)), "win_rate": draw( st.floats( min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False, ) ), "sharpe_ratio": draw(_finite_float), } previous = { "portfolio_value": draw(_non_negative_float), "active_pool": draw(_non_negative_float), "reserve_pool": draw(_non_negative_float), "cumulative_return": draw(_finite_float), "realized_pnl": draw(_finite_float), "unrealized_pnl": draw(_finite_float), "daily_return": draw(_finite_float), "win_count": draw(st.integers(min_value=0, max_value=10000)), "loss_count": draw(st.integers(min_value=0, max_value=10000)), "win_rate": draw( st.floats( min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False, ) ), "sharpe_ratio": draw(_finite_float), } return current, previous @given(snapshots=portfolio_snapshot_pair()) @settings(max_examples=100) def test_portfolio_delta_with_both_snapshots( snapshots: tuple[dict, dict], ) -> None: """**Validates: Requirements 1.3** For any two valid portfolio snapshots (current and previous), the period-over-period deltas SHALL equal (current - previous) for portfolio_value, active_pool, reserve_pool, and cumulative_return. The build_pnl_section extracts values from the current snapshot. We verify that the delta between the current and previous section outputs matches (current - previous) for each field. """ current_snap, previous_snap = snapshots # Build sections from current and previous snapshots data_current = CollectedData(portfolio_snapshot=current_snap) data_previous = CollectedData(portfolio_snapshot=previous_snap) section_current = build_pnl_section(data_current) section_previous = build_pnl_section(data_previous) # Verify deltas: current section values - previous section values # should equal current snapshot values - previous snapshot values delta_cumulative = section_current.cumulative_return - section_previous.cumulative_return expected_delta_cumulative = ( float(current_snap["cumulative_return"]) - float(previous_snap["cumulative_return"]) ) assert abs(delta_cumulative - expected_delta_cumulative) < 1e-9, ( f"cumulative_return delta mismatch: " f"got {delta_cumulative}, expected {expected_delta_cumulative}" ) delta_realized = section_current.realized_pnl - section_previous.realized_pnl expected_delta_realized = ( float(current_snap["realized_pnl"]) - float(previous_snap["realized_pnl"]) ) assert abs(delta_realized - expected_delta_realized) < 1e-9, ( f"realized_pnl delta mismatch: " f"got {delta_realized}, expected {expected_delta_realized}" ) delta_unrealized = section_current.unrealized_pnl - section_previous.unrealized_pnl expected_delta_unrealized = ( float(current_snap["unrealized_pnl"]) - float(previous_snap["unrealized_pnl"]) ) assert abs(delta_unrealized - expected_delta_unrealized) < 1e-9, ( f"unrealized_pnl delta mismatch: " f"got {delta_unrealized}, expected {expected_delta_unrealized}" ) # Verify that section values faithfully reflect snapshot values assert abs(section_current.cumulative_return - float(current_snap["cumulative_return"])) < 1e-9 assert abs(section_current.realized_pnl - float(current_snap["realized_pnl"])) < 1e-9 assert abs(section_current.unrealized_pnl - float(current_snap["unrealized_pnl"])) < 1e-9 assert abs(section_current.daily_return - float(current_snap["daily_return"])) < 1e-9 assert abs(section_current.win_rate - float(current_snap["win_rate"])) < 1e-9 @given( portfolio_value=_non_negative_float, active_pool=_non_negative_float, reserve_pool=_non_negative_float, cumulative_return=_finite_float, ) @settings(max_examples=100) def test_portfolio_delta_no_previous_snapshot( portfolio_value: float, active_pool: float, reserve_pool: float, cumulative_return: float, ) -> None: """**Validates: Requirements 1.3** When no previous snapshot exists, the section SHALL use zero values for all fields (since portfolio_snapshot is None), meaning the deltas from a zero baseline are effectively zero. """ # When portfolio_snapshot is None, build_pnl_section returns all zeros data_no_snapshot = CollectedData(portfolio_snapshot=None) section = build_pnl_section(data_no_snapshot) assert section.realized_pnl == 0.0, ( f"Expected 0.0 realized_pnl with no snapshot, got {section.realized_pnl}" ) assert section.unrealized_pnl == 0.0, ( f"Expected 0.0 unrealized_pnl with no snapshot, got {section.unrealized_pnl}" ) assert section.daily_return == 0.0, ( f"Expected 0.0 daily_return with no snapshot, got {section.daily_return}" ) assert section.cumulative_return == 0.0, ( f"Expected 0.0 cumulative_return with no snapshot, got {section.cumulative_return}" ) assert section.win_count == 0, ( f"Expected 0 win_count with no snapshot, got {section.win_count}" ) assert section.loss_count == 0, ( f"Expected 0 loss_count with no snapshot, got {section.loss_count}" ) assert section.win_rate == 0.0, ( f"Expected 0.0 win_rate with no snapshot, got {section.win_rate}" ) assert section.sharpe_ratio == 0.0, ( f"Expected 0.0 sharpe_ratio with no snapshot, got {section.sharpe_ratio}" ) assert section.profit_factor == 0.0, ( f"Expected 0.0 profit_factor with no snapshot, got {section.profit_factor}" )