Files
stonks-oracle/tests/test_aggregation_scoring.py
T
Celes Renata c85c0068a2 fix: clean up utcnow deprecation warnings, fix 12 failing tests, add CI/CD pipeline manifests
- Replace all datetime.utcnow() with datetime.now(tz=timezone.utc) across 8 files
- Fix 12 failing tests to match current implementation behavior
- Fix pytest_plugins in non-top-level conftest (moved to root conftest.py)
- Auto-fix 189 lint issues (import sorting, unused imports)
- Add CI/CD pipeline infrastructure (ARC, ArgoCD, Kargo manifests)
- Add values-beta.yaml and values-paper.yaml for staged deployments
- Update GitHub Actions workflow to use self-hosted-gremlin runners
- Add integration-test job to CI pipeline

Result: 1596 passed, 0 failed, 0 warnings
2026-04-18 03:59:28 +00:00

248 lines
9.3 KiB
Python

"""Tests for aggregation scoring — recency decay, source credibility weighting,
and market context integration."""
from datetime import datetime, timedelta, timezone
from services.aggregation.scoring import (
DEFAULT_CONFIG,
ScoringConfig,
WeightedSignal,
compute_signal_weight,
credibility_weight,
market_context_multiplier,
recency_weight,
sentiment_to_numeric,
weighted_sentiment_average,
)
from services.shared.schemas import MarketContext
# ---------------------------------------------------------------------------
# recency_weight
# ---------------------------------------------------------------------------
def test_recency_weight_at_zero_age():
"""A document published exactly at reference time gets weight 1.0."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
assert recency_weight(now, now, "7d") == 1.0
def test_recency_weight_future_document():
"""A document published after reference time is clamped to 1.0."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
future = now + timedelta(hours=1)
assert recency_weight(future, now, "7d") == 1.0
def test_recency_weight_at_one_half_life():
"""After exactly one half-life the weight should be ~0.5."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
half_life_7d = DEFAULT_CONFIG.half_life_hours["7d"] # 72 hours
published = now - timedelta(hours=half_life_7d)
w = recency_weight(published, now, "7d")
assert abs(w - 0.5) < 1e-9
def test_recency_weight_very_old_clamps_to_min():
"""A very old document should not go below min_recency_weight."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
ancient = now - timedelta(days=365)
w = recency_weight(ancient, now, "7d")
assert w == DEFAULT_CONFIG.min_recency_weight
def test_recency_weight_different_windows():
"""Shorter windows decay faster than longer ones."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
published = now - timedelta(hours=24)
w_intraday = recency_weight(published, now, "intraday")
w_90d = recency_weight(published, now, "90d")
assert w_intraday < w_90d
def test_recency_weight_naive_datetimes():
"""Naive datetimes are treated as UTC."""
now = datetime(2026, 4, 11, 12, 0, 0)
published = now - timedelta(hours=72)
w = recency_weight(published, now, "7d")
assert abs(w - 0.5) < 1e-9
# ---------------------------------------------------------------------------
# credibility_weight
# ---------------------------------------------------------------------------
def test_credibility_weight_high():
"""High credibility source gets weight close to 1.0."""
assert abs(credibility_weight(1.0) - 1.0) < 1e-9
def test_credibility_weight_low_clamped():
"""Credibility below floor is clamped to floor."""
w = credibility_weight(0.0)
assert abs(w - DEFAULT_CONFIG.credibility_floor) < 1e-9
def test_credibility_weight_mid():
"""Mid-range credibility passes through with exponent=1."""
assert abs(credibility_weight(0.5) - 0.5) < 1e-9
def test_credibility_weight_custom_exponent():
"""Custom exponent penalises low credibility more."""
cfg = ScoringConfig(credibility_exponent=2.0)
w = credibility_weight(0.5, config=cfg)
assert abs(w - 0.25) < 1e-9
# ---------------------------------------------------------------------------
# compute_signal_weight
# ---------------------------------------------------------------------------
def test_signal_weight_gates_low_confidence():
"""Documents below confidence floor get zero combined weight."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
sw = compute_signal_weight(
published_at=now,
reference_time=now,
window="7d",
source_credibility=0.8,
extraction_confidence=0.1, # below default 0.2 floor
)
assert sw.combined == 0.0
assert sw.confidence_gate == 0.0
def test_signal_weight_fresh_high_credibility():
"""Fresh doc with high credibility and default novelty gets a strong weight."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
sw = compute_signal_weight(
published_at=now,
reference_time=now,
window="7d",
source_credibility=1.0,
novelty_score=0.5,
extraction_confidence=0.8,
)
# recency=1.0, credibility=1.0, bonus=0.125, gate=1.0
expected = 1.0 * 1.0 * (1.0 + 0.125)
assert abs(sw.combined - expected) < 1e-9
def test_signal_weight_novelty_bonus():
"""Higher novelty gives a proportionally higher combined weight."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
sw_low = compute_signal_weight(now, now, "7d", 0.8, novelty_score=0.0, extraction_confidence=0.8)
sw_high = compute_signal_weight(now, now, "7d", 0.8, novelty_score=1.0, extraction_confidence=0.8)
assert sw_high.combined > sw_low.combined
# ---------------------------------------------------------------------------
# sentiment helpers
# ---------------------------------------------------------------------------
def test_sentiment_to_numeric():
assert sentiment_to_numeric("positive") == 1.0
assert sentiment_to_numeric("negative") == -1.0
assert sentiment_to_numeric("neutral") == 0.0
assert sentiment_to_numeric("mixed") == 0.0
assert sentiment_to_numeric("unknown") == 0.0
def test_weighted_sentiment_average_empty():
assert weighted_sentiment_average([]) == 0.0
def test_weighted_sentiment_average_single():
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
sw = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8)
signals = [WeightedSignal("doc1", sw, sentiment_value=1.0, impact_score=0.7)]
avg = weighted_sentiment_average(signals)
assert abs(avg - 1.0) < 1e-9 # single positive signal → 1.0
def test_weighted_sentiment_average_opposing():
"""Equal-weight opposing signals should cancel to ~0."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
sw = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8)
signals = [
WeightedSignal("doc1", sw, sentiment_value=1.0, impact_score=0.5),
WeightedSignal("doc2", sw, sentiment_value=-1.0, impact_score=0.5),
]
avg = weighted_sentiment_average(signals)
assert abs(avg) < 1e-9
# ---------------------------------------------------------------------------
# market_context_multiplier
# ---------------------------------------------------------------------------
def test_market_context_multiplier_none():
"""No market context returns 1.0 (no adjustment)."""
assert market_context_multiplier(None) == 1.0
def test_market_context_multiplier_no_data():
"""MarketContext with no bars returns 1.0."""
ctx = MarketContext(ticker="AAPL", bars_available=0)
assert market_context_multiplier(ctx) == 1.0
def test_market_context_multiplier_low_volatility():
"""Below-threshold volatility produces no boost."""
ctx = MarketContext(ticker="AAPL", volatility=0.5, volume_change_pct=10.0, bars_available=5)
assert market_context_multiplier(ctx) == 1.0
def test_market_context_multiplier_high_volatility():
"""Above-threshold volatility produces a boost > 1.0."""
ctx = MarketContext(ticker="AAPL", volatility=3.0, volume_change_pct=10.0, bars_available=5)
m = market_context_multiplier(ctx)
assert m > 1.0
assert m <= 1.0 + DEFAULT_CONFIG.volatility_recency_boost_max + DEFAULT_CONFIG.volume_surge_boost
def test_market_context_multiplier_volume_surge():
"""Volume surge above threshold adds a boost."""
ctx = MarketContext(ticker="AAPL", volatility=0.5, volume_change_pct=80.0, bars_available=5)
m = market_context_multiplier(ctx)
assert abs(m - (1.0 + DEFAULT_CONFIG.volume_surge_boost)) < 1e-9
def test_market_context_multiplier_both_triggers():
"""Both volatility and volume surge stack."""
ctx = MarketContext(ticker="AAPL", volatility=3.0, volume_change_pct=80.0, bars_available=5)
m = market_context_multiplier(ctx)
# Should be > 1.0 + volume_surge_boost alone
assert m > 1.0 + DEFAULT_CONFIG.volume_surge_boost
# ---------------------------------------------------------------------------
# compute_signal_weight with market context
# ---------------------------------------------------------------------------
def test_signal_weight_with_market_context_boost():
"""Market context with high volatility should increase combined weight."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
ctx = MarketContext(ticker="AAPL", volatility=3.0, volume_change_pct=80.0, bars_available=10)
sw_no_ctx = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8)
sw_with_ctx = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8, market_ctx=ctx)
assert sw_with_ctx.combined > sw_no_ctx.combined
assert sw_with_ctx.market_ctx_multiplier > 1.0
assert sw_no_ctx.market_ctx_multiplier == 1.0
def test_signal_weight_market_context_gated_still_zero():
"""Low confidence docs stay at zero even with market context boost."""
now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
ctx = MarketContext(ticker="AAPL", volatility=5.0, volume_change_pct=100.0, bars_available=10)
sw = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.1, market_ctx=ctx)
assert sw.combined == 0.0