Files
stonks-oracle/tests/test_paper_trading_simulation.py
T
Celes Renata e21f162e48 fix: dampen agreement factor by sample size in trend confidence to prevent low-evidence inflation
Agreement of 1-2 signals was inflating confidence to paper-eligible
levels (0.575) even with low credibility sources. Added log2-based
dampener that scales agreement contribution by unique source count,
saturating at n=7. Single signals now cap at 0.39 confidence,
2 signals at 0.49 — both correctly below paper threshold (0.50).
2026-04-17 03:41:39 +00:00

628 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Paper trading simulation scenarios.
End-to-end scenarios that exercise the full recommendation-to-execution
pipeline through the paper trading adapter, risk engine, and position
tracking. Each scenario simulates a realistic trading session using
real logic from all service modules — no mocked business logic.
Covers:
- Single-symbol buy-and-sell round trips with P&L verification
- Multi-symbol portfolio construction and diversification
- Risk engine rejection scenarios (position limits, daily loss, lockouts)
- Idempotent order submission under replay conditions
- Insufficient funds and insufficient shares edge cases
- Recommendation-driven order flow (bullish → buy, bearish → sell)
- Portfolio drawdown halting via daily loss limits
- News-shock lockout preventing trades during high-impact events
Requirements: 7.1-7.4, 8.1-8.5
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
import pytest
from services.adapters.broker_adapter import (
OrderRequest,
OrderSide,
OrderStatus,
OrderType,
TradingMode,
)
from services.adapters.paper_trading import PaperTradingAdapter
from services.aggregation.worker import (
ImpactRow,
assemble_trend_with_evidence,
build_weighted_signals,
)
from services.recommendation.eligibility import evaluate_eligibility
from services.recommendation.worker import build_recommendation
from services.risk.engine import (
AccountRiskState,
DailyLossLimits,
NewsShockLockout,
PortfolioRiskConfig,
PositionLimits,
ProposedOrder,
RiskCheckResult,
SectorExposureLimits,
SymbolCooldown,
evaluate_order,
)
from services.shared.schemas import (
ActionType,
RecommendationMode,
)
NOW = datetime(2026, 4, 11, 14, 0, 0, tzinfo=timezone.utc)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _bullish_impacts(ticker: str, count: int = 3) -> list[ImpactRow]:
"""Generate bullish impact rows for aggregation."""
return [
ImpactRow(
document_id=f"doc-bull-{ticker}-{i}",
confidence=0.80 + i * 0.02,
novelty_score=0.6,
source_credibility=0.8,
sentiment="positive",
impact_score=0.70 + i * 0.03,
catalyst_type="earnings",
key_facts=[f"Strong Q{i+1} results for {ticker}"],
risks=[],
published_at=NOW - timedelta(hours=i + 1),
)
for i in range(count)
]
def _bearish_impacts(ticker: str, count: int = 3) -> list[ImpactRow]:
"""Generate bearish impact rows for aggregation."""
return [
ImpactRow(
document_id=f"doc-bear-{ticker}-{i}",
confidence=0.78 + i * 0.02,
novelty_score=0.55,
source_credibility=0.75,
sentiment="negative",
impact_score=0.65 + i * 0.03,
catalyst_type="legal",
key_facts=[f"Regulatory action against {ticker}"],
risks=[f"Potential fine for {ticker}"],
published_at=NOW - timedelta(hours=i + 1),
)
for i in range(count)
]
def _build_trend_and_recommendation(impacts, ticker, window="7d"):
"""Run aggregation + eligibility + recommendation for a set of impacts."""
signals = build_weighted_signals(impacts, NOW, window)
assembled = assemble_trend_with_evidence(
ticker, window, signals, impacts, reference_time=NOW,
)
summary = assembled.summary
eligibility = evaluate_eligibility(summary)
rec = build_recommendation(summary, eligibility, reference_time=NOW)
return summary, eligibility, rec
def _risk_state_from_adapter(adapter: PaperTradingAdapter) -> AccountRiskState:
"""Build an AccountRiskState snapshot from the paper adapter's in-memory state."""
acct = adapter.account
positions_by_symbol = {
t: p.quantity * p.avg_entry_price
for t, p in acct.positions.items()
if p.is_open
}
return AccountRiskState(
account_id=acct.account_id,
portfolio_value=acct.portfolio_value,
cash=acct.cash,
buying_power=acct.buying_power,
positions_by_symbol=positions_by_symbol,
open_position_count=sum(1 for p in acct.positions.values() if p.is_open),
)
# ---------------------------------------------------------------------------
# Scenario 1: Single-symbol buy-sell round trip
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
class TestSingleSymbolRoundTrip:
"""Buy shares, sell at a profit, verify P&L and cash reconciliation."""
async def test_buy_hold_sell_profit(self):
adapter = PaperTradingAdapter(initial_cash=100_000.0)
# Generate bullish recommendation
impacts = _bullish_impacts("AAPL")
summary, eligibility, rec = _build_trend_and_recommendation(impacts, "AAPL")
assert rec.action == ActionType.BUY
# Execute buy
buy = OrderRequest(
ticker="AAPL", side=OrderSide.BUY, quantity=50,
order_type=OrderType.LIMIT, limit_price=180.0,
)
buy_resp = await adapter.submit_order(buy)
assert buy_resp.status == OrderStatus.FILLED
assert adapter.account.cash == pytest.approx(100_000.0 - 50 * 180.0)
# Verify position
positions = await adapter.get_positions()
assert len(positions) == 1
assert positions[0].ticker == "AAPL"
assert positions[0].quantity == 50
# Sell at higher price
sell = OrderRequest(
ticker="AAPL", side=OrderSide.SELL, quantity=50,
order_type=OrderType.LIMIT, limit_price=195.0,
)
sell_resp = await adapter.submit_order(sell)
assert sell_resp.status == OrderStatus.FILLED
assert sell_resp.raw_response["realized_pnl"] == pytest.approx(50 * 15.0)
# Cash should be back to initial + profit
expected_cash = 100_000.0 + 50 * 15.0
assert adapter.account.cash == pytest.approx(expected_cash)
# Position should be closed
positions = await adapter.get_positions()
assert len(positions) == 0
async def test_buy_hold_sell_loss(self):
adapter = PaperTradingAdapter(initial_cash=100_000.0)
buy = OrderRequest(
ticker="TSLA", side=OrderSide.BUY, quantity=20,
order_type=OrderType.LIMIT, limit_price=250.0,
)
await adapter.submit_order(buy)
sell = OrderRequest(
ticker="TSLA", side=OrderSide.SELL, quantity=20,
order_type=OrderType.LIMIT, limit_price=230.0,
)
sell_resp = await adapter.submit_order(sell)
assert sell_resp.raw_response["realized_pnl"] == pytest.approx(-400.0)
expected_cash = 100_000.0 - 400.0
assert adapter.account.cash == pytest.approx(expected_cash)
# ---------------------------------------------------------------------------
# Scenario 2: Multi-symbol portfolio construction
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
class TestMultiSymbolPortfolio:
"""Build a diversified portfolio across multiple symbols."""
async def test_build_three_position_portfolio(self):
adapter = PaperTradingAdapter(initial_cash=100_000.0)
orders = [
("AAPL", 20, 180.0),
("MSFT", 15, 420.0),
("GOOGL", 10, 175.0),
]
total_cost = 0.0
for ticker, qty, price in orders:
req = OrderRequest(
ticker=ticker, side=OrderSide.BUY, quantity=qty,
order_type=OrderType.LIMIT, limit_price=price,
)
resp = await adapter.submit_order(req)
assert resp.status == OrderStatus.FILLED
total_cost += qty * price
assert adapter.account.cash == pytest.approx(100_000.0 - total_cost)
positions = await adapter.get_positions()
tickers = {p.ticker for p in positions}
assert tickers == {"AAPL", "MSFT", "GOOGL"}
# Portfolio value = cash + position value at entry
assert adapter.account.portfolio_value == pytest.approx(100_000.0)
async def test_partial_liquidation(self):
adapter = PaperTradingAdapter(initial_cash=50_000.0)
# Buy two positions
await adapter.submit_order(OrderRequest(
ticker="AAPL", side=OrderSide.BUY, quantity=30,
order_type=OrderType.LIMIT, limit_price=150.0,
))
await adapter.submit_order(OrderRequest(
ticker="MSFT", side=OrderSide.BUY, quantity=10,
order_type=OrderType.LIMIT, limit_price=400.0,
))
# Sell only AAPL
await adapter.submit_order(OrderRequest(
ticker="AAPL", side=OrderSide.SELL, quantity=30,
order_type=OrderType.LIMIT, limit_price=155.0,
))
positions = await adapter.get_positions()
assert len(positions) == 1
assert positions[0].ticker == "MSFT"
# ---------------------------------------------------------------------------
# Scenario 3: Risk engine blocks unsafe orders
# ---------------------------------------------------------------------------
class TestRiskEngineBlocking:
"""Verify risk engine prevents orders that violate configured limits."""
def test_position_size_limit_blocks_large_order(self):
config = PortfolioRiskConfig(
position_limits=PositionLimits(max_position_value=5_000.0),
)
state = AccountRiskState(
portfolio_value=100_000.0, cash=100_000.0,
)
order = ProposedOrder(
ticker="AAPL", sector="Technology",
estimated_value=10_000.0, quantity=50,
)
result = evaluate_order(order, config, state)
assert not result.passed
assert any(
c.check_name == "max_position_value" and c.result == RiskCheckResult.FAIL
for c in result.checks
)
def test_sector_concentration_blocks_overweight(self):
config = PortfolioRiskConfig(
sector_exposure=SectorExposureLimits(max_sector_pct=0.20),
)
state = AccountRiskState(
portfolio_value=100_000.0,
positions_by_sector={"Technology": 18_000.0},
)
order = ProposedOrder(
ticker="NVDA", sector="Technology",
estimated_value=5_000.0, quantity=20,
)
result = evaluate_order(order, config, state)
assert not result.passed
def test_daily_loss_halt_blocks_further_trading(self):
config = PortfolioRiskConfig(
daily_loss=DailyLossLimits(
max_daily_loss_pct=0.02,
max_daily_loss_value=2_000.0,
),
)
state = AccountRiskState(
portfolio_value=100_000.0,
daily_pnl=-2_500.0,
)
order = ProposedOrder(
ticker="AAPL", sector="Technology",
estimated_value=1_000.0, quantity=5,
)
result = evaluate_order(order, config, state)
assert not result.passed
loss_failures = [
c for c in result.checks
if c.check_name.startswith("daily_loss") and c.result == RiskCheckResult.FAIL
]
assert len(loss_failures) >= 1
def test_news_shock_lockout_blocks_trade(self):
lockout_expiry = NOW + timedelta(minutes=45)
config = PortfolioRiskConfig(
news_shock=NewsShockLockout(enabled=True, lockout_minutes=60),
)
state = AccountRiskState(
portfolio_value=100_000.0,
locked_symbols={"AAPL": lockout_expiry},
)
order = ProposedOrder(
ticker="AAPL", sector="Technology",
estimated_value=1_000.0, quantity=5,
)
result = evaluate_order(order, config, state, now=NOW)
assert not result.passed
assert any(
c.check_name == "news_shock_lockout" and c.result == RiskCheckResult.FAIL
for c in result.checks
)
def test_symbol_cooldown_blocks_rapid_retrade(self):
last_trade = NOW - timedelta(minutes=5)
config = PortfolioRiskConfig(
symbol_cooldown=SymbolCooldown(cooldown_minutes=15),
)
state = AccountRiskState(
portfolio_value=100_000.0,
last_trade_times={"AAPL": last_trade},
)
order = ProposedOrder(
ticker="AAPL", sector="Technology",
estimated_value=1_000.0, quantity=5,
)
result = evaluate_order(order, config, state, now=NOW)
assert not result.passed
# ---------------------------------------------------------------------------
# Scenario 4: Recommendation-driven order flow
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
class TestRecommendationDrivenOrders:
"""Simulate the full path: signals → recommendation → risk check → paper fill."""
async def test_bullish_recommendation_to_paper_buy(self):
adapter = PaperTradingAdapter(initial_cash=100_000.0)
impacts = _bullish_impacts("AAPL", count=4)
summary, eligibility, rec = _build_trend_and_recommendation(impacts, "AAPL")
assert rec.action == ActionType.BUY
assert rec.confidence > 0
# Risk check the proposed order
risk_state = _risk_state_from_adapter(adapter)
proposed = ProposedOrder(
ticker="AAPL", sector="Technology",
estimated_value=rec.position_sizing.portfolio_pct * risk_state.portfolio_value,
quantity=10,
confidence=rec.confidence,
recommendation_id=rec.recommendation_id,
)
risk_eval = evaluate_order(proposed, PortfolioRiskConfig(), risk_state)
assert risk_eval.passed
# Execute the paper order
order = OrderRequest(
ticker="AAPL", side=OrderSide.BUY, quantity=10,
order_type=OrderType.LIMIT, limit_price=180.0,
)
resp = await adapter.submit_order(order)
assert resp.status == OrderStatus.FILLED
async def test_bearish_recommendation_to_paper_sell(self):
adapter = PaperTradingAdapter(initial_cash=100_000.0)
# First buy a position to sell
await adapter.submit_order(OrderRequest(
ticker="TSLA", side=OrderSide.BUY, quantity=20,
order_type=OrderType.LIMIT, limit_price=250.0,
))
# Generate bearish recommendation
impacts = _bearish_impacts("TSLA", count=3)
summary, eligibility, rec = _build_trend_and_recommendation(impacts, "TSLA")
assert rec.action == ActionType.SELL
# Execute the sell
sell = OrderRequest(
ticker="TSLA", side=OrderSide.SELL, quantity=20,
order_type=OrderType.LIMIT, limit_price=240.0,
)
resp = await adapter.submit_order(sell)
assert resp.status == OrderStatus.FILLED
assert resp.raw_response["realized_pnl"] == pytest.approx(-200.0)
async def test_low_confidence_recommendation_is_informational(self):
"""Low-confidence signals should produce informational-only recommendations."""
impacts = [
ImpactRow(
document_id="doc-weak-1",
confidence=0.40,
novelty_score=0.3,
source_credibility=0.5,
sentiment="positive",
impact_score=0.3,
catalyst_type="other",
key_facts=["Minor update"],
risks=[],
published_at=NOW - timedelta(hours=1),
),
ImpactRow(
document_id="doc-weak-2",
confidence=0.35,
novelty_score=0.2,
source_credibility=0.4,
sentiment="positive",
impact_score=0.25,
catalyst_type="other",
key_facts=["Routine filing"],
risks=[],
published_at=NOW - timedelta(hours=3),
),
]
_, _, rec = _build_trend_and_recommendation(impacts, "XYZ")
assert rec.mode == RecommendationMode.INFORMATIONAL
# ---------------------------------------------------------------------------
# Scenario 5: Idempotent order submission
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
class TestIdempotentOrderSubmission:
"""Verify duplicate orders with the same idempotency key are not double-executed."""
async def test_duplicate_buy_only_fills_once(self):
adapter = PaperTradingAdapter(initial_cash=100_000.0)
order = OrderRequest(
ticker="AAPL", side=OrderSide.BUY, quantity=10,
order_type=OrderType.LIMIT, limit_price=150.0,
idempotency_key="idem-buy-1",
)
resp1 = await adapter.submit_order(order)
resp2 = await adapter.submit_order(order)
assert resp1.broker_order_id == resp2.broker_order_id
# Cash deducted only once
assert adapter.account.cash == pytest.approx(100_000.0 - 1_500.0)
# Only one position entry
pos = adapter.account.get_position("AAPL")
assert pos.quantity == 10
# ---------------------------------------------------------------------------
# Scenario 6: Insufficient funds and shares
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
class TestInsufficientResources:
"""Verify the adapter rejects orders when resources are insufficient."""
async def test_buy_exceeding_cash_rejected(self):
adapter = PaperTradingAdapter(initial_cash=5_000.0)
order = OrderRequest(
ticker="AAPL", side=OrderSide.BUY, quantity=100,
order_type=OrderType.LIMIT, limit_price=180.0,
)
resp = await adapter.submit_order(order)
assert resp.status == OrderStatus.REJECTED
assert resp.error is not None and "Insufficient cash" in resp.error
async def test_sell_more_than_held_rejected(self):
adapter = PaperTradingAdapter(initial_cash=100_000.0)
await adapter.submit_order(OrderRequest(
ticker="AAPL", side=OrderSide.BUY, quantity=10,
order_type=OrderType.LIMIT, limit_price=150.0,
))
sell = OrderRequest(
ticker="AAPL", side=OrderSide.SELL, quantity=20,
order_type=OrderType.LIMIT, limit_price=155.0,
)
resp = await adapter.submit_order(sell)
assert resp.status == OrderStatus.REJECTED
assert resp.error is not None and "Insufficient shares" in resp.error
# ---------------------------------------------------------------------------
# Scenario 7: Portfolio drawdown halts trading
# ---------------------------------------------------------------------------
class TestDrawdownHalt:
"""Simulate a losing session that triggers the daily loss circuit breaker."""
def test_cumulative_losses_trigger_halt(self):
"""After multiple losing trades, the risk engine should block new orders."""
config = PortfolioRiskConfig(
daily_loss=DailyLossLimits(
max_daily_loss_pct=0.03,
max_daily_loss_value=3_000.0,
max_daily_trades=50,
),
)
# Simulate state after several losing trades
state = AccountRiskState(
portfolio_value=97_000.0,
cash=47_000.0,
daily_pnl=-3_200.0,
daily_trade_count=8,
)
order = ProposedOrder(
ticker="NVDA", sector="Technology",
estimated_value=2_000.0, quantity=5,
)
result = evaluate_order(order, config, state)
assert not result.passed
# Both pct and value limits should be breached
failed_checks = {
c.check_name for c in result.checks if c.result == RiskCheckResult.FAIL
}
assert "daily_loss_value" in failed_checks
# ---------------------------------------------------------------------------
# Scenario 8: Full session simulation
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
class TestFullTradingSession:
"""Simulate a realistic multi-trade session with mixed outcomes."""
async def test_morning_session_with_mixed_results(self):
adapter = PaperTradingAdapter(initial_cash=100_000.0)
initial_cash = 100_000.0
# Trade 1: Buy AAPL, sell at profit
await adapter.submit_order(OrderRequest(
ticker="AAPL", side=OrderSide.BUY, quantity=30,
order_type=OrderType.LIMIT, limit_price=180.0,
))
resp1 = await adapter.submit_order(OrderRequest(
ticker="AAPL", side=OrderSide.SELL, quantity=30,
order_type=OrderType.LIMIT, limit_price=185.0,
))
pnl_1 = resp1.raw_response["realized_pnl"]
assert pnl_1 == pytest.approx(150.0)
# Trade 2: Buy TSLA, sell at loss
await adapter.submit_order(OrderRequest(
ticker="TSLA", side=OrderSide.BUY, quantity=10,
order_type=OrderType.LIMIT, limit_price=250.0,
))
resp2 = await adapter.submit_order(OrderRequest(
ticker="TSLA", side=OrderSide.SELL, quantity=10,
order_type=OrderType.LIMIT, limit_price=242.0,
))
pnl_2 = resp2.raw_response["realized_pnl"]
assert pnl_2 == pytest.approx(-80.0)
# Trade 3: Buy MSFT, hold (don't sell)
await adapter.submit_order(OrderRequest(
ticker="MSFT", side=OrderSide.BUY, quantity=5,
order_type=OrderType.LIMIT, limit_price=420.0,
))
# Verify final state
positions = await adapter.get_positions()
assert len(positions) == 1
assert positions[0].ticker == "MSFT"
# Cash = initial + AAPL profit + TSLA loss - MSFT cost
expected_cash = initial_cash + 150.0 - 80.0 - (5 * 420.0)
assert adapter.account.cash == pytest.approx(expected_cash)
# Audit trail should have events for all trades
event_count = len(adapter.account.order_events)
# 5 orders × 3 events each (submitted, accepted, fill) = 15
# (rejected orders get fewer events, but all 5 here are fills)
assert event_count == 15
async def test_account_info_reflects_session(self):
adapter = PaperTradingAdapter(initial_cash=50_000.0, account_id="sim-session")
await adapter.submit_order(OrderRequest(
ticker="AAPL", side=OrderSide.BUY, quantity=10,
order_type=OrderType.LIMIT, limit_price=180.0,
))
acct = await adapter.get_account()
assert acct.account_id == "sim-session"
assert acct.mode == TradingMode.PAPER
assert acct.cash == pytest.approx(50_000.0 - 1_800.0)
assert acct.portfolio_value == pytest.approx(50_000.0)