"""Evidence ranking for supporting and opposing documents. Ranks document signals by a composite score that considers multiple factors beyond raw weight, producing explainable evidence lists for trend summaries. Requirements: 6.5 """ from __future__ import annotations from dataclasses import dataclass from services.aggregation.scoring import WeightedSignal @dataclass(frozen=True) class EvidenceRankConfig: """Weights for the composite evidence ranking score.""" # How much the combined signal weight matters (recency * credibility * novelty * market) weight_factor: float = 0.40 # How much the document's impact score matters impact_factor: float = 0.30 # How much recency alone matters (favours fresh evidence in the ranking) recency_factor: float = 0.20 # How much extraction confidence matters confidence_factor: float = 0.10 # Maximum evidence refs per side (supporting / opposing) max_refs: int = 10 DEFAULT_RANK_CONFIG = EvidenceRankConfig() @dataclass class RankedEvidence: """A document with its composite ranking score and breakdown.""" document_id: str rank_score: float weight_component: float impact_component: float recency_component: float confidence_component: float sentiment_value: float # +1 / -1 / 0 def compute_evidence_rank( signal: WeightedSignal, config: EvidenceRankConfig = DEFAULT_RANK_CONFIG, ) -> RankedEvidence: """Compute a composite ranking score for a single signal. The score blends: - combined signal weight (captures recency decay, credibility, novelty, market ctx) - raw impact score - recency weight alone (extra boost for freshness in the ranking) - extraction confidence (via the credibility component of the weight) All components are in [0, 1] so the composite is bounded by the sum of the factor weights. """ w = signal.weight weight_component = w.combined * config.weight_factor impact_component = signal.impact_score * config.impact_factor recency_component = w.recency * config.recency_factor confidence_component = w.credibility * config.confidence_factor rank_score = weight_component + impact_component + recency_component + confidence_component return RankedEvidence( document_id=signal.document_id, rank_score=round(rank_score, 6), weight_component=round(weight_component, 6), impact_component=round(impact_component, 6), recency_component=round(recency_component, 6), confidence_component=round(confidence_component, 6), sentiment_value=signal.sentiment_value, ) def rank_evidence( signals: list[WeightedSignal], config: EvidenceRankConfig = DEFAULT_RANK_CONFIG, ) -> tuple[list[str], list[str]]: """Rank signals into top supporting and opposing document ID lists. Supporting = positive sentiment, Opposing = negative sentiment. Neutral/mixed signals are excluded. Returns (supporting_ids, opposing_ids) each capped at config.max_refs. """ supporting: list[RankedEvidence] = [] opposing: list[RankedEvidence] = [] for sig in signals: if sig.sentiment_value == 0.0: continue ranked = compute_evidence_rank(sig, config) if sig.sentiment_value > 0: supporting.append(ranked) else: opposing.append(ranked) supporting.sort(key=lambda r: r.rank_score, reverse=True) opposing.sort(key=lambda r: r.rank_score, reverse=True) return ( [r.document_id for r in supporting[: config.max_refs]], [r.document_id for r in opposing[: config.max_refs]], ) def rank_evidence_detailed( signals: list[WeightedSignal], config: EvidenceRankConfig = DEFAULT_RANK_CONFIG, ) -> tuple[list[RankedEvidence], list[RankedEvidence]]: """Like rank_evidence but returns full RankedEvidence objects. Useful when callers need the score breakdown for explainability. """ supporting: list[RankedEvidence] = [] opposing: list[RankedEvidence] = [] for sig in signals: if sig.sentiment_value == 0.0: continue ranked = compute_evidence_rank(sig, config) if sig.sentiment_value > 0: supporting.append(ranked) else: opposing.append(ranked) supporting.sort(key=lambda r: r.rank_score, reverse=True) opposing.sort(key=lambda r: r.rank_score, reverse=True) return ( supporting[: config.max_refs], opposing[: config.max_refs], )