stonks-oracle/tests/test_aggregation_scoring.py

"""Tests for aggregation scoring — recency decay, source credibility weighting,
and market context integration."""
from datetime import datetime, timedelta, timezone

from services.aggregation.scoring import (
    DEFAULT_CONFIG,
    ScoringConfig,
    WeightedSignal,
    compute_signal_weight,
    credibility_weight,
    market_context_multiplier,
    recency_weight,
    sentiment_to_numeric,
    weighted_sentiment_average,
)
from services.shared.schemas import MarketContext

# ---------------------------------------------------------------------------
# recency_weight
# ---------------------------------------------------------------------------


def test_recency_weight_at_zero_age():
    """A document published exactly at reference time gets weight 1.0."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    assert recency_weight(now, now, "7d") == 1.0


def test_recency_weight_future_document():
    """A document published after reference time is clamped to 1.0."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    future = now + timedelta(hours=1)
    assert recency_weight(future, now, "7d") == 1.0


def test_recency_weight_at_one_half_life():
    """After exactly one half-life the weight should be ~0.5."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    half_life_7d = DEFAULT_CONFIG.half_life_hours["7d"]  # 72 hours
    published = now - timedelta(hours=half_life_7d)
    w = recency_weight(published, now, "7d")
    assert abs(w - 0.5) < 1e-9


def test_recency_weight_very_old_clamps_to_min():
    """A very old document should not go below min_recency_weight."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    ancient = now - timedelta(days=365)
    w = recency_weight(ancient, now, "7d")
    assert w == DEFAULT_CONFIG.min_recency_weight


def test_recency_weight_different_windows():
    """Shorter windows decay faster than longer ones."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    published = now - timedelta(hours=24)
    w_intraday = recency_weight(published, now, "intraday")
    w_90d = recency_weight(published, now, "90d")
    assert w_intraday < w_90d


def test_recency_weight_naive_datetimes():
    """Naive datetimes are treated as UTC."""
    now = datetime(2026, 4, 11, 12, 0, 0)
    published = now - timedelta(hours=72)
    w = recency_weight(published, now, "7d")
    assert abs(w - 0.5) < 1e-9


# ---------------------------------------------------------------------------
# credibility_weight
# ---------------------------------------------------------------------------


def test_credibility_weight_high():
    """High credibility source gets weight close to 1.0."""
    assert abs(credibility_weight(1.0) - 1.0) < 1e-9


def test_credibility_weight_low_clamped():
    """Credibility below floor is clamped to floor."""
    w = credibility_weight(0.0)
    assert abs(w - DEFAULT_CONFIG.credibility_floor) < 1e-9


def test_credibility_weight_mid():
    """Mid-range credibility passes through with exponent=1."""
    assert abs(credibility_weight(0.5) - 0.5) < 1e-9


def test_credibility_weight_custom_exponent():
    """Custom exponent penalises low credibility more."""
    cfg = ScoringConfig(credibility_exponent=2.0)
    w = credibility_weight(0.5, config=cfg)
    assert abs(w - 0.25) < 1e-9


# ---------------------------------------------------------------------------
# compute_signal_weight
# ---------------------------------------------------------------------------


def test_signal_weight_gates_low_confidence():
    """Documents below confidence floor get zero combined weight."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    sw = compute_signal_weight(
        published_at=now,
        reference_time=now,
        window="7d",
        source_credibility=0.8,
        extraction_confidence=0.1,  # below default 0.2 floor
    )
    assert sw.combined == 0.0
    assert sw.confidence_gate == 0.0


def test_signal_weight_fresh_high_credibility():
    """Fresh doc with high credibility and default novelty gets a strong weight."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    sw = compute_signal_weight(
        published_at=now,
        reference_time=now,
        window="7d",
        source_credibility=1.0,
        novelty_score=0.5,
        extraction_confidence=0.8,
    )
    # recency=1.0, credibility=1.0, bonus=0.125, gate=1.0
    expected = 1.0 * 1.0 * (1.0 + 0.125)
    assert abs(sw.combined - expected) < 1e-9


def test_signal_weight_novelty_bonus():
    """Higher novelty gives a proportionally higher combined weight."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    sw_low = compute_signal_weight(now, now, "7d", 0.8, novelty_score=0.0, extraction_confidence=0.8)
    sw_high = compute_signal_weight(now, now, "7d", 0.8, novelty_score=1.0, extraction_confidence=0.8)
    assert sw_high.combined > sw_low.combined


# ---------------------------------------------------------------------------
# sentiment helpers
# ---------------------------------------------------------------------------


def test_sentiment_to_numeric():
    assert sentiment_to_numeric("positive") == 1.0
    assert sentiment_to_numeric("negative") == -1.0
    assert sentiment_to_numeric("neutral") == 0.0
    assert sentiment_to_numeric("mixed") == 0.0
    assert sentiment_to_numeric("unknown") == 0.0


def test_weighted_sentiment_average_empty():
    assert weighted_sentiment_average([]) == 0.0


def test_weighted_sentiment_average_single():
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    sw = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8)
    signals = [WeightedSignal("doc1", sw, sentiment_value=1.0, impact_score=0.7)]
    avg = weighted_sentiment_average(signals)
    assert abs(avg - 1.0) < 1e-9  # single positive signal → 1.0


def test_weighted_sentiment_average_opposing():
    """Equal-weight opposing signals should cancel to ~0."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    sw = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8)
    signals = [
        WeightedSignal("doc1", sw, sentiment_value=1.0, impact_score=0.5),
        WeightedSignal("doc2", sw, sentiment_value=-1.0, impact_score=0.5),
    ]
    avg = weighted_sentiment_average(signals)
    assert abs(avg) < 1e-9


# ---------------------------------------------------------------------------
# market_context_multiplier
# ---------------------------------------------------------------------------


def test_market_context_multiplier_none():
    """No market context returns 1.0 (no adjustment)."""
    assert market_context_multiplier(None) == 1.0


def test_market_context_multiplier_no_data():
    """MarketContext with no bars returns 1.0."""
    ctx = MarketContext(ticker="AAPL", bars_available=0)
    assert market_context_multiplier(ctx) == 1.0


def test_market_context_multiplier_low_volatility():
    """Below-threshold volatility produces no boost."""
    ctx = MarketContext(ticker="AAPL", volatility=0.5, volume_change_pct=10.0, bars_available=5)
    assert market_context_multiplier(ctx) == 1.0


def test_market_context_multiplier_high_volatility():
    """Above-threshold volatility produces a boost > 1.0."""
    ctx = MarketContext(ticker="AAPL", volatility=3.0, volume_change_pct=10.0, bars_available=5)
    m = market_context_multiplier(ctx)
    assert m > 1.0
    assert m <= 1.0 + DEFAULT_CONFIG.volatility_recency_boost_max + DEFAULT_CONFIG.volume_surge_boost


def test_market_context_multiplier_volume_surge():
    """Volume surge above threshold adds a boost."""
    ctx = MarketContext(ticker="AAPL", volatility=0.5, volume_change_pct=80.0, bars_available=5)
    m = market_context_multiplier(ctx)
    assert abs(m - (1.0 + DEFAULT_CONFIG.volume_surge_boost)) < 1e-9


def test_market_context_multiplier_both_triggers():
    """Both volatility and volume surge stack."""
    ctx = MarketContext(ticker="AAPL", volatility=3.0, volume_change_pct=80.0, bars_available=5)
    m = market_context_multiplier(ctx)
    # Should be > 1.0 + volume_surge_boost alone
    assert m > 1.0 + DEFAULT_CONFIG.volume_surge_boost


# ---------------------------------------------------------------------------
# compute_signal_weight with market context
# ---------------------------------------------------------------------------


def test_signal_weight_with_market_context_boost():
    """Market context with high volatility should increase combined weight."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    ctx = MarketContext(ticker="AAPL", volatility=3.0, volume_change_pct=80.0, bars_available=10)

    sw_no_ctx = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8)
    sw_with_ctx = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8, market_ctx=ctx)

    assert sw_with_ctx.combined > sw_no_ctx.combined
    assert sw_with_ctx.market_ctx_multiplier > 1.0
    assert sw_no_ctx.market_ctx_multiplier == 1.0


def test_signal_weight_market_context_gated_still_zero():
    """Low confidence docs stay at zero even with market context boost."""
    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
    ctx = MarketContext(ticker="AAPL", volatility=5.0, volume_change_pct=100.0, bars_available=10)

    sw = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.1, market_ctx=ctx)
    assert sw.combined == 0.0