phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,141 @@
|
||||
"""Evidence ranking for supporting and opposing documents.
|
||||
|
||||
Ranks document signals by a composite score that considers multiple
|
||||
factors beyond raw weight, producing explainable evidence lists for
|
||||
trend summaries.
|
||||
|
||||
Requirements: 6.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from services.aggregation.scoring import WeightedSignal
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EvidenceRankConfig:
|
||||
"""Weights for the composite evidence ranking score."""
|
||||
|
||||
# How much the combined signal weight matters (recency * credibility * novelty * market)
|
||||
weight_factor: float = 0.40
|
||||
# How much the document's impact score matters
|
||||
impact_factor: float = 0.30
|
||||
# How much recency alone matters (favours fresh evidence in the ranking)
|
||||
recency_factor: float = 0.20
|
||||
# How much extraction confidence matters
|
||||
confidence_factor: float = 0.10
|
||||
# Maximum evidence refs per side (supporting / opposing)
|
||||
max_refs: int = 10
|
||||
|
||||
|
||||
DEFAULT_RANK_CONFIG = EvidenceRankConfig()
|
||||
|
||||
|
||||
@dataclass
|
||||
class RankedEvidence:
|
||||
"""A document with its composite ranking score and breakdown."""
|
||||
|
||||
document_id: str
|
||||
rank_score: float
|
||||
weight_component: float
|
||||
impact_component: float
|
||||
recency_component: float
|
||||
confidence_component: float
|
||||
sentiment_value: float # +1 / -1 / 0
|
||||
|
||||
|
||||
def compute_evidence_rank(
|
||||
signal: WeightedSignal,
|
||||
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
|
||||
) -> RankedEvidence:
|
||||
"""Compute a composite ranking score for a single signal.
|
||||
|
||||
The score blends:
|
||||
- combined signal weight (captures recency decay, credibility, novelty, market ctx)
|
||||
- raw impact score
|
||||
- recency weight alone (extra boost for freshness in the ranking)
|
||||
- extraction confidence (via the credibility component of the weight)
|
||||
|
||||
All components are in [0, 1] so the composite is bounded by the sum
|
||||
of the factor weights.
|
||||
"""
|
||||
w = signal.weight
|
||||
|
||||
weight_component = w.combined * config.weight_factor
|
||||
impact_component = signal.impact_score * config.impact_factor
|
||||
recency_component = w.recency * config.recency_factor
|
||||
confidence_component = w.credibility * config.confidence_factor
|
||||
|
||||
rank_score = weight_component + impact_component + recency_component + confidence_component
|
||||
|
||||
return RankedEvidence(
|
||||
document_id=signal.document_id,
|
||||
rank_score=round(rank_score, 6),
|
||||
weight_component=round(weight_component, 6),
|
||||
impact_component=round(impact_component, 6),
|
||||
recency_component=round(recency_component, 6),
|
||||
confidence_component=round(confidence_component, 6),
|
||||
sentiment_value=signal.sentiment_value,
|
||||
)
|
||||
|
||||
|
||||
def rank_evidence(
|
||||
signals: list[WeightedSignal],
|
||||
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
|
||||
) -> tuple[list[str], list[str]]:
|
||||
"""Rank signals into top supporting and opposing document ID lists.
|
||||
|
||||
Supporting = positive sentiment, Opposing = negative sentiment.
|
||||
Neutral/mixed signals are excluded.
|
||||
|
||||
Returns (supporting_ids, opposing_ids) each capped at config.max_refs.
|
||||
"""
|
||||
supporting: list[RankedEvidence] = []
|
||||
opposing: list[RankedEvidence] = []
|
||||
|
||||
for sig in signals:
|
||||
if sig.sentiment_value == 0.0:
|
||||
continue
|
||||
ranked = compute_evidence_rank(sig, config)
|
||||
if sig.sentiment_value > 0:
|
||||
supporting.append(ranked)
|
||||
else:
|
||||
opposing.append(ranked)
|
||||
|
||||
supporting.sort(key=lambda r: r.rank_score, reverse=True)
|
||||
opposing.sort(key=lambda r: r.rank_score, reverse=True)
|
||||
|
||||
return (
|
||||
[r.document_id for r in supporting[: config.max_refs]],
|
||||
[r.document_id for r in opposing[: config.max_refs]],
|
||||
)
|
||||
|
||||
|
||||
def rank_evidence_detailed(
|
||||
signals: list[WeightedSignal],
|
||||
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
|
||||
) -> tuple[list[RankedEvidence], list[RankedEvidence]]:
|
||||
"""Like rank_evidence but returns full RankedEvidence objects.
|
||||
|
||||
Useful when callers need the score breakdown for explainability.
|
||||
"""
|
||||
supporting: list[RankedEvidence] = []
|
||||
opposing: list[RankedEvidence] = []
|
||||
|
||||
for sig in signals:
|
||||
if sig.sentiment_value == 0.0:
|
||||
continue
|
||||
ranked = compute_evidence_rank(sig, config)
|
||||
if sig.sentiment_value > 0:
|
||||
supporting.append(ranked)
|
||||
else:
|
||||
opposing.append(ranked)
|
||||
|
||||
supporting.sort(key=lambda r: r.rank_score, reverse=True)
|
||||
opposing.sort(key=lambda r: r.rank_score, reverse=True)
|
||||
|
||||
return (
|
||||
supporting[: config.max_refs],
|
||||
opposing[: config.max_refs],
|
||||
)
|
||||
Reference in New Issue
Block a user