Files
stonks-oracle/tests/test_evidence_ranking.py
T

137 lines
4.0 KiB
Python

"""Tests for evidence ranking — composite scoring for supporting/opposing docs.
Requirements: 6.5
"""
from datetime import datetime, timedelta, timezone
from services.aggregation.evidence import (
EvidenceRankConfig,
compute_evidence_rank,
rank_evidence,
rank_evidence_detailed,
)
from services.aggregation.scoring import WeightedSignal, compute_signal_weight
NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
def _sw(
doc_id: str = "doc-1",
sentiment: float = 1.0,
impact: float = 0.7,
credibility: float = 0.8,
confidence: float = 0.8,
age_hours: float = 1.0,
) -> WeightedSignal:
published = NOW - timedelta(hours=age_hours)
weight = compute_signal_weight(
published_at=published,
reference_time=NOW,
window="7d",
source_credibility=credibility,
extraction_confidence=confidence,
)
return WeightedSignal(
document_id=doc_id,
weight=weight,
sentiment_value=sentiment,
impact_score=impact,
)
# ---------------------------------------------------------------------------
# compute_evidence_rank
# ---------------------------------------------------------------------------
def test_rank_score_positive():
sig = _sw("d1", sentiment=1.0, impact=0.9, credibility=1.0)
ranked = compute_evidence_rank(sig)
assert ranked.rank_score > 0
assert ranked.document_id == "d1"
assert ranked.sentiment_value == 1.0
def test_higher_impact_ranks_higher():
low = _sw("low", impact=0.3)
high = _sw("high", impact=0.9)
r_low = compute_evidence_rank(low)
r_high = compute_evidence_rank(high)
assert r_high.rank_score > r_low.rank_score
def test_fresher_doc_ranks_higher():
old = _sw("old", age_hours=100.0)
fresh = _sw("fresh", age_hours=1.0)
r_old = compute_evidence_rank(old)
r_fresh = compute_evidence_rank(fresh)
assert r_fresh.rank_score > r_old.rank_score
def test_higher_credibility_ranks_higher():
low_cred = _sw("low", credibility=0.2)
high_cred = _sw("high", credibility=1.0)
r_low = compute_evidence_rank(low_cred)
r_high = compute_evidence_rank(high_cred)
assert r_high.rank_score > r_low.rank_score
# ---------------------------------------------------------------------------
# rank_evidence
# ---------------------------------------------------------------------------
def test_rank_evidence_separates_sides():
signals = [
_sw("pos1", sentiment=1.0, impact=0.9),
_sw("pos2", sentiment=1.0, impact=0.3),
_sw("neg1", sentiment=-1.0, impact=0.7),
_sw("neutral", sentiment=0.0, impact=0.5),
]
supporting, opposing = rank_evidence(signals)
assert "pos1" in supporting
assert "pos2" in supporting
assert "neg1" in opposing
assert "neutral" not in supporting and "neutral" not in opposing
def test_rank_evidence_ordered_by_composite():
signals = [
_sw("weak", sentiment=1.0, impact=0.2, credibility=0.3),
_sw("strong", sentiment=1.0, impact=0.9, credibility=1.0),
]
supporting, _ = rank_evidence(signals)
assert supporting[0] == "strong"
def test_rank_evidence_respects_max_refs():
signals = [_sw(f"d{i}", sentiment=1.0) for i in range(20)]
cfg = EvidenceRankConfig(max_refs=3)
supporting, opposing = rank_evidence(signals, config=cfg)
assert len(supporting) == 3
assert len(opposing) == 0
def test_rank_evidence_empty():
supporting, opposing = rank_evidence([])
assert supporting == []
assert opposing == []
# ---------------------------------------------------------------------------
# rank_evidence_detailed
# ---------------------------------------------------------------------------
def test_detailed_returns_ranked_evidence_objects():
signals = [
_sw("pos1", sentiment=1.0, impact=0.9),
_sw("neg1", sentiment=-1.0, impact=0.7),
]
sup, opp = rank_evidence_detailed(signals)
assert len(sup) == 1
assert sup[0].document_id == "pos1"
assert sup[0].rank_score > 0
assert len(opp) == 1
assert opp[0].document_id == "neg1"