stonks-oracle/tests/test_pbt_report_sections.py

# Feature: trading-feedback-engine, Property 4: Recommendation accuracy aggregation
# Feature: trading-feedback-engine, Property 5: Portfolio period-over-period delta computation
"""Property-based tests for report section builders.

Feature: trading-feedback-engine

Property 4 tests the recommendation accuracy aggregation property from the
design specification: for any non-empty list of trading decisions with
associated prediction outcomes, the computed acted_win_rate SHALL equal the
count of profitable outcomes divided by total acted outcomes with prediction
data, and all rate values SHALL be in [0.0, 1.0].

Property 5 tests the portfolio period-over-period delta computation property
from the design specification: for any two valid portfolio snapshots (current
and previous), the period-over-period deltas SHALL equal (current - previous)
for each field. When no previous snapshot exists, the deltas SHALL be zero.
"""
from __future__ import annotations

import uuid

from hypothesis import given, settings
from hypothesis import strategies as st

from services.reporting.collector import CollectedData
from services.reporting.sections import (
    build_pnl_section,
    build_recommendation_accuracy_section,
)

# ---------------------------------------------------------------------------
# Property 4: Recommendation Accuracy Aggregation
# Validates: Requirements 1.4
# ---------------------------------------------------------------------------

# Strategy: generate a list of unique tickers, then build matching
# trading_decisions, recommendations, and prediction_outcomes.

_ticker_strategy = st.text(
    alphabet=st.characters(whitelist_categories=("Lu",)),
    min_size=1,
    max_size=5,
)

_confidence_strategy = st.floats(
    min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False,
)

_excess_return_strategy = st.floats(
    min_value=-1.0, max_value=1.0, allow_nan=False, allow_infinity=False,
)


@st.composite
def recommendation_accuracy_data(draw: st.DrawFn) -> tuple[CollectedData, dict]:
    """Generate CollectedData with matching trading decisions, recommendations,
    and prediction outcomes for testing recommendation accuracy.

    Returns (CollectedData, expected_values) where expected_values contains
    the independently computed expected results.
    """
    # Generate 1-20 trading decisions with unique tickers
    n = draw(st.integers(min_value=1, max_value=20))
    tickers = [draw(_ticker_strategy) for _ in range(n)]
    # Ensure unique tickers by appending index
    tickers = [f"{t}{i}" for i, t in enumerate(tickers)]

    decisions = draw(
        st.lists(
            st.sampled_from(["act", "skip"]),
            min_size=n,
            max_size=n,
        )
    )
    confidences = draw(
        st.lists(
            _confidence_strategy,
            min_size=n,
            max_size=n,
        )
    )
    profitable_flags = draw(
        st.lists(
            st.booleans(),
            min_size=n,
            max_size=n,
        )
    )
    direction_correct_flags = draw(
        st.lists(
            st.booleans(),
            min_size=n,
            max_size=n,
        )
    )
    excess_returns = draw(
        st.lists(
            _excess_return_strategy,
            min_size=n,
            max_size=n,
        )
    )

    trading_decisions = []
    recommendations = []
    prediction_outcomes = []

    # Track expected values
    exp_act_count = 0
    exp_skip_count = 0
    exp_acted_wins = 0
    exp_acted_with_outcome = 0
    exp_confidence_acted: list[float] = []
    exp_confidence_skipped: list[float] = []

    for i in range(n):
        rec_id = str(uuid.uuid4())
        ticker = tickers[i]
        decision = decisions[i]
        confidence = confidences[i]
        profitable = profitable_flags[i]
        direction_correct = direction_correct_flags[i]
        excess_return = excess_returns[i]

        trading_decisions.append(
            {
                "id": str(uuid.uuid4()),
                "recommendation_id": rec_id,
                "decision": decision,
                "ticker": ticker,
            }
        )
        recommendations.append(
            {
                "id": rec_id,
                "confidence": confidence,
            }
        )
        prediction_outcomes.append(
            {
                "ticker": ticker,
                "profitable": profitable,
                "direction_correct": direction_correct,
                "excess_return_vs_spy": excess_return,
            }
        )

        if decision == "act":
            exp_act_count += 1
            exp_confidence_acted.append(confidence)
            # Every acted decision has a matching prediction outcome by ticker
            exp_acted_with_outcome += 1
            if profitable:
                exp_acted_wins += 1
        else:
            exp_skip_count += 1
            exp_confidence_skipped.append(confidence)

    data = CollectedData(
        trading_decisions=trading_decisions,
        recommendations=recommendations,
        prediction_outcomes=prediction_outcomes,
    )

    exp_acted_win_rate = (
        (exp_acted_wins / exp_acted_with_outcome)
        if exp_acted_with_outcome > 0
        else 0.0
    )
    exp_avg_confidence_acted = (
        (sum(exp_confidence_acted) / len(exp_confidence_acted))
        if exp_confidence_acted
        else 0.0
    )
    exp_avg_confidence_skipped = (
        (sum(exp_confidence_skipped) / len(exp_confidence_skipped))
        if exp_confidence_skipped
        else 0.0
    )

    expected = {
        "total_evaluated": exp_act_count + exp_skip_count,
        "act_count": exp_act_count,
        "skip_count": exp_skip_count,
        "acted_win_rate": exp_acted_win_rate,
        "avg_confidence_acted": exp_avg_confidence_acted,
        "avg_confidence_skipped": exp_avg_confidence_skipped,
    }

    return data, expected


@given(data_and_expected=recommendation_accuracy_data())
@settings(max_examples=100)
def test_recommendation_accuracy_aggregation(
    data_and_expected: tuple[CollectedData, dict],
) -> None:
    """**Validates: Requirements 1.4**

    For any non-empty list of trading decisions with associated prediction
    outcomes, the computed acted_win_rate SHALL equal the count of profitable
    outcomes divided by total acted outcomes with prediction data, act/skip
    counts SHALL match, average confidence values SHALL match, and all rate
    values SHALL be in [0.0, 1.0].
    """
    data, expected = data_and_expected
    section = build_recommendation_accuracy_section(data)

    # Verify act/skip counts
    assert section.total_evaluated == expected["total_evaluated"], (
        f"total_evaluated mismatch: got {section.total_evaluated}, "
        f"expected {expected['total_evaluated']}"
    )
    assert section.act_count == expected["act_count"], (
        f"act_count mismatch: got {section.act_count}, "
        f"expected {expected['act_count']}"
    )
    assert section.skip_count == expected["skip_count"], (
        f"skip_count mismatch: got {section.skip_count}, "
        f"expected {expected['skip_count']}"
    )

    # Verify acted win rate
    assert abs(section.acted_win_rate - expected["acted_win_rate"]) < 1e-9, (
        f"acted_win_rate mismatch: got {section.acted_win_rate}, "
        f"expected {expected['acted_win_rate']}"
    )

    # Verify average confidence values
    assert abs(section.avg_confidence_acted - expected["avg_confidence_acted"]) < 1e-9, (
        f"avg_confidence_acted mismatch: got {section.avg_confidence_acted}, "
        f"expected {expected['avg_confidence_acted']}"
    )
    assert abs(section.avg_confidence_skipped - expected["avg_confidence_skipped"]) < 1e-9, (
        f"avg_confidence_skipped mismatch: got {section.avg_confidence_skipped}, "
        f"expected {expected['avg_confidence_skipped']}"
    )

    # All rate values must be in [0.0, 1.0]
    assert 0.0 <= section.acted_win_rate <= 1.0, (
        f"acted_win_rate out of range: {section.acted_win_rate}"
    )
    assert 0.0 <= section.avg_confidence_acted <= 1.0, (
        f"avg_confidence_acted out of range: {section.avg_confidence_acted}"
    )
    assert 0.0 <= section.avg_confidence_skipped <= 1.0, (
        f"avg_confidence_skipped out of range: {section.avg_confidence_skipped}"
    )


# ---------------------------------------------------------------------------
# Property 5: Portfolio Period-Over-Period Delta Computation
# Validates: Requirements 1.3
# ---------------------------------------------------------------------------

_non_negative_float = st.floats(
    min_value=0.0, max_value=1e8, allow_nan=False, allow_infinity=False,
)

_finite_float = st.floats(
    min_value=-1e6, max_value=1e6, allow_nan=False, allow_infinity=False,
)


@st.composite
def portfolio_snapshot_pair(draw: st.DrawFn) -> tuple[dict, dict]:
    """Generate a pair of portfolio snapshots (current, previous) with
    non-negative portfolio_value, active_pool, reserve_pool, and finite
    cumulative_return.
    """
    current = {
        "portfolio_value": draw(_non_negative_float),
        "active_pool": draw(_non_negative_float),
        "reserve_pool": draw(_non_negative_float),
        "cumulative_return": draw(_finite_float),
        "realized_pnl": draw(_finite_float),
        "unrealized_pnl": draw(_finite_float),
        "daily_return": draw(_finite_float),
        "win_count": draw(st.integers(min_value=0, max_value=10000)),
        "loss_count": draw(st.integers(min_value=0, max_value=10000)),
        "win_rate": draw(
            st.floats(
                min_value=0.0, max_value=1.0,
                allow_nan=False, allow_infinity=False,
            )
        ),
        "sharpe_ratio": draw(_finite_float),
    }
    previous = {
        "portfolio_value": draw(_non_negative_float),
        "active_pool": draw(_non_negative_float),
        "reserve_pool": draw(_non_negative_float),
        "cumulative_return": draw(_finite_float),
        "realized_pnl": draw(_finite_float),
        "unrealized_pnl": draw(_finite_float),
        "daily_return": draw(_finite_float),
        "win_count": draw(st.integers(min_value=0, max_value=10000)),
        "loss_count": draw(st.integers(min_value=0, max_value=10000)),
        "win_rate": draw(
            st.floats(
                min_value=0.0, max_value=1.0,
                allow_nan=False, allow_infinity=False,
            )
        ),
        "sharpe_ratio": draw(_finite_float),
    }
    return current, previous


@given(snapshots=portfolio_snapshot_pair())
@settings(max_examples=100)
def test_portfolio_delta_with_both_snapshots(
    snapshots: tuple[dict, dict],
) -> None:
    """**Validates: Requirements 1.3**

    For any two valid portfolio snapshots (current and previous), the
    period-over-period deltas SHALL equal (current - previous) for
    portfolio_value, active_pool, reserve_pool, and cumulative_return.

    The build_pnl_section extracts values from the current snapshot.
    We verify that the delta between the current and previous section
    outputs matches (current - previous) for each field.
    """
    current_snap, previous_snap = snapshots

    # Build sections from current and previous snapshots
    data_current = CollectedData(portfolio_snapshot=current_snap)
    data_previous = CollectedData(portfolio_snapshot=previous_snap)

    section_current = build_pnl_section(data_current)
    section_previous = build_pnl_section(data_previous)

    # Verify deltas: current section values - previous section values
    # should equal current snapshot values - previous snapshot values
    delta_cumulative = section_current.cumulative_return - section_previous.cumulative_return
    expected_delta_cumulative = (
        float(current_snap["cumulative_return"])
        - float(previous_snap["cumulative_return"])
    )
    assert abs(delta_cumulative - expected_delta_cumulative) < 1e-9, (
        f"cumulative_return delta mismatch: "
        f"got {delta_cumulative}, expected {expected_delta_cumulative}"
    )

    delta_realized = section_current.realized_pnl - section_previous.realized_pnl
    expected_delta_realized = (
        float(current_snap["realized_pnl"])
        - float(previous_snap["realized_pnl"])
    )
    assert abs(delta_realized - expected_delta_realized) < 1e-9, (
        f"realized_pnl delta mismatch: "
        f"got {delta_realized}, expected {expected_delta_realized}"
    )

    delta_unrealized = section_current.unrealized_pnl - section_previous.unrealized_pnl
    expected_delta_unrealized = (
        float(current_snap["unrealized_pnl"])
        - float(previous_snap["unrealized_pnl"])
    )
    assert abs(delta_unrealized - expected_delta_unrealized) < 1e-9, (
        f"unrealized_pnl delta mismatch: "
        f"got {delta_unrealized}, expected {expected_delta_unrealized}"
    )

    # Verify that section values faithfully reflect snapshot values
    assert abs(section_current.cumulative_return - float(current_snap["cumulative_return"])) < 1e-9
    assert abs(section_current.realized_pnl - float(current_snap["realized_pnl"])) < 1e-9
    assert abs(section_current.unrealized_pnl - float(current_snap["unrealized_pnl"])) < 1e-9
    assert abs(section_current.daily_return - float(current_snap["daily_return"])) < 1e-9
    assert abs(section_current.win_rate - float(current_snap["win_rate"])) < 1e-9


@given(
    portfolio_value=_non_negative_float,
    active_pool=_non_negative_float,
    reserve_pool=_non_negative_float,
    cumulative_return=_finite_float,
)
@settings(max_examples=100)
def test_portfolio_delta_no_previous_snapshot(
    portfolio_value: float,
    active_pool: float,
    reserve_pool: float,
    cumulative_return: float,
) -> None:
    """**Validates: Requirements 1.3**

    When no previous snapshot exists, the section SHALL use zero values
    for all fields (since portfolio_snapshot is None), meaning the deltas
    from a zero baseline are effectively zero.
    """
    # When portfolio_snapshot is None, build_pnl_section returns all zeros
    data_no_snapshot = CollectedData(portfolio_snapshot=None)
    section = build_pnl_section(data_no_snapshot)

    assert section.realized_pnl == 0.0, (
        f"Expected 0.0 realized_pnl with no snapshot, got {section.realized_pnl}"
    )
    assert section.unrealized_pnl == 0.0, (
        f"Expected 0.0 unrealized_pnl with no snapshot, got {section.unrealized_pnl}"
    )
    assert section.daily_return == 0.0, (
        f"Expected 0.0 daily_return with no snapshot, got {section.daily_return}"
    )
    assert section.cumulative_return == 0.0, (
        f"Expected 0.0 cumulative_return with no snapshot, got {section.cumulative_return}"
    )
    assert section.win_count == 0, (
        f"Expected 0 win_count with no snapshot, got {section.win_count}"
    )
    assert section.loss_count == 0, (
        f"Expected 0 loss_count with no snapshot, got {section.loss_count}"
    )
    assert section.win_rate == 0.0, (
        f"Expected 0.0 win_rate with no snapshot, got {section.win_rate}"
    )
    assert section.sharpe_ratio == 0.0, (
        f"Expected 0.0 sharpe_ratio with no snapshot, got {section.sharpe_ratio}"
    )
    assert section.profit_factor == 0.0, (
        f"Expected 0.0 profit_factor with no snapshot, got {section.profit_factor}"
    )