142 lines
4.4 KiB
Python
142 lines
4.4 KiB
Python
"""Evidence ranking for supporting and opposing documents.
|
|
|
|
Ranks document signals by a composite score that considers multiple
|
|
factors beyond raw weight, producing explainable evidence lists for
|
|
trend summaries.
|
|
|
|
Requirements: 6.5
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from services.aggregation.scoring import WeightedSignal
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class EvidenceRankConfig:
|
|
"""Weights for the composite evidence ranking score."""
|
|
|
|
# How much the combined signal weight matters (recency * credibility * novelty * market)
|
|
weight_factor: float = 0.40
|
|
# How much the document's impact score matters
|
|
impact_factor: float = 0.30
|
|
# How much recency alone matters (favours fresh evidence in the ranking)
|
|
recency_factor: float = 0.20
|
|
# How much extraction confidence matters
|
|
confidence_factor: float = 0.10
|
|
# Maximum evidence refs per side (supporting / opposing)
|
|
max_refs: int = 10
|
|
|
|
|
|
DEFAULT_RANK_CONFIG = EvidenceRankConfig()
|
|
|
|
|
|
@dataclass
|
|
class RankedEvidence:
|
|
"""A document with its composite ranking score and breakdown."""
|
|
|
|
document_id: str
|
|
rank_score: float
|
|
weight_component: float
|
|
impact_component: float
|
|
recency_component: float
|
|
confidence_component: float
|
|
sentiment_value: float # +1 / -1 / 0
|
|
|
|
|
|
def compute_evidence_rank(
|
|
signal: WeightedSignal,
|
|
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
|
|
) -> RankedEvidence:
|
|
"""Compute a composite ranking score for a single signal.
|
|
|
|
The score blends:
|
|
- combined signal weight (captures recency decay, credibility, novelty, market ctx)
|
|
- raw impact score
|
|
- recency weight alone (extra boost for freshness in the ranking)
|
|
- extraction confidence (via the credibility component of the weight)
|
|
|
|
All components are in [0, 1] so the composite is bounded by the sum
|
|
of the factor weights.
|
|
"""
|
|
w = signal.weight
|
|
|
|
weight_component = w.combined * config.weight_factor
|
|
impact_component = signal.impact_score * config.impact_factor
|
|
recency_component = w.recency * config.recency_factor
|
|
confidence_component = w.credibility * config.confidence_factor
|
|
|
|
rank_score = weight_component + impact_component + recency_component + confidence_component
|
|
|
|
return RankedEvidence(
|
|
document_id=signal.document_id,
|
|
rank_score=round(rank_score, 6),
|
|
weight_component=round(weight_component, 6),
|
|
impact_component=round(impact_component, 6),
|
|
recency_component=round(recency_component, 6),
|
|
confidence_component=round(confidence_component, 6),
|
|
sentiment_value=signal.sentiment_value,
|
|
)
|
|
|
|
|
|
def rank_evidence(
|
|
signals: list[WeightedSignal],
|
|
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
|
|
) -> tuple[list[str], list[str]]:
|
|
"""Rank signals into top supporting and opposing document ID lists.
|
|
|
|
Supporting = positive sentiment, Opposing = negative sentiment.
|
|
Neutral/mixed signals are excluded.
|
|
|
|
Returns (supporting_ids, opposing_ids) each capped at config.max_refs.
|
|
"""
|
|
supporting: list[RankedEvidence] = []
|
|
opposing: list[RankedEvidence] = []
|
|
|
|
for sig in signals:
|
|
if sig.sentiment_value == 0.0:
|
|
continue
|
|
ranked = compute_evidence_rank(sig, config)
|
|
if sig.sentiment_value > 0:
|
|
supporting.append(ranked)
|
|
else:
|
|
opposing.append(ranked)
|
|
|
|
supporting.sort(key=lambda r: r.rank_score, reverse=True)
|
|
opposing.sort(key=lambda r: r.rank_score, reverse=True)
|
|
|
|
return (
|
|
[r.document_id for r in supporting[: config.max_refs]],
|
|
[r.document_id for r in opposing[: config.max_refs]],
|
|
)
|
|
|
|
|
|
def rank_evidence_detailed(
|
|
signals: list[WeightedSignal],
|
|
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
|
|
) -> tuple[list[RankedEvidence], list[RankedEvidence]]:
|
|
"""Like rank_evidence but returns full RankedEvidence objects.
|
|
|
|
Useful when callers need the score breakdown for explainability.
|
|
"""
|
|
supporting: list[RankedEvidence] = []
|
|
opposing: list[RankedEvidence] = []
|
|
|
|
for sig in signals:
|
|
if sig.sentiment_value == 0.0:
|
|
continue
|
|
ranked = compute_evidence_rank(sig, config)
|
|
if sig.sentiment_value > 0:
|
|
supporting.append(ranked)
|
|
else:
|
|
opposing.append(ranked)
|
|
|
|
supporting.sort(key=lambda r: r.rank_score, reverse=True)
|
|
opposing.sort(key=lambda r: r.rank_score, reverse=True)
|
|
|
|
return (
|
|
supporting[: config.max_refs],
|
|
opposing[: config.max_refs],
|
|
)
|