Files

142 lines
4.4 KiB
Python

"""Evidence ranking for supporting and opposing documents.
Ranks document signals by a composite score that considers multiple
factors beyond raw weight, producing explainable evidence lists for
trend summaries.
Requirements: 6.5
"""
from __future__ import annotations
from dataclasses import dataclass
from services.aggregation.scoring import WeightedSignal
@dataclass(frozen=True)
class EvidenceRankConfig:
"""Weights for the composite evidence ranking score."""
# How much the combined signal weight matters (recency * credibility * novelty * market)
weight_factor: float = 0.40
# How much the document's impact score matters
impact_factor: float = 0.30
# How much recency alone matters (favours fresh evidence in the ranking)
recency_factor: float = 0.20
# How much extraction confidence matters
confidence_factor: float = 0.10
# Maximum evidence refs per side (supporting / opposing)
max_refs: int = 10
DEFAULT_RANK_CONFIG = EvidenceRankConfig()
@dataclass
class RankedEvidence:
"""A document with its composite ranking score and breakdown."""
document_id: str
rank_score: float
weight_component: float
impact_component: float
recency_component: float
confidence_component: float
sentiment_value: float # +1 / -1 / 0
def compute_evidence_rank(
signal: WeightedSignal,
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
) -> RankedEvidence:
"""Compute a composite ranking score for a single signal.
The score blends:
- combined signal weight (captures recency decay, credibility, novelty, market ctx)
- raw impact score
- recency weight alone (extra boost for freshness in the ranking)
- extraction confidence (via the credibility component of the weight)
All components are in [0, 1] so the composite is bounded by the sum
of the factor weights.
"""
w = signal.weight
weight_component = w.combined * config.weight_factor
impact_component = signal.impact_score * config.impact_factor
recency_component = w.recency * config.recency_factor
confidence_component = w.credibility * config.confidence_factor
rank_score = weight_component + impact_component + recency_component + confidence_component
return RankedEvidence(
document_id=signal.document_id,
rank_score=round(rank_score, 6),
weight_component=round(weight_component, 6),
impact_component=round(impact_component, 6),
recency_component=round(recency_component, 6),
confidence_component=round(confidence_component, 6),
sentiment_value=signal.sentiment_value,
)
def rank_evidence(
signals: list[WeightedSignal],
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
) -> tuple[list[str], list[str]]:
"""Rank signals into top supporting and opposing document ID lists.
Supporting = positive sentiment, Opposing = negative sentiment.
Neutral/mixed signals are excluded.
Returns (supporting_ids, opposing_ids) each capped at config.max_refs.
"""
supporting: list[RankedEvidence] = []
opposing: list[RankedEvidence] = []
for sig in signals:
if sig.sentiment_value == 0.0:
continue
ranked = compute_evidence_rank(sig, config)
if sig.sentiment_value > 0:
supporting.append(ranked)
else:
opposing.append(ranked)
supporting.sort(key=lambda r: r.rank_score, reverse=True)
opposing.sort(key=lambda r: r.rank_score, reverse=True)
return (
[r.document_id for r in supporting[: config.max_refs]],
[r.document_id for r in opposing[: config.max_refs]],
)
def rank_evidence_detailed(
signals: list[WeightedSignal],
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
) -> tuple[list[RankedEvidence], list[RankedEvidence]]:
"""Like rank_evidence but returns full RankedEvidence objects.
Useful when callers need the score breakdown for explainability.
"""
supporting: list[RankedEvidence] = []
opposing: list[RankedEvidence] = []
for sig in signals:
if sig.sentiment_value == 0.0:
continue
ranked = compute_evidence_rank(sig, config)
if sig.sentiment_value > 0:
supporting.append(ranked)
else:
opposing.append(ranked)
supporting.sort(key=lambda r: r.rank_score, reverse=True)
opposing.sort(key=lambda r: r.rank_score, reverse=True)
return (
supporting[: config.max_refs],
opposing[: config.max_refs],
)