phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,169 @@
|
||||
"""Contradiction detection and disagreement representation.
|
||||
|
||||
Analyses weighted signals to detect and represent disagreement explicitly,
|
||||
rather than collapsing contradictory evidence into a single unsupported
|
||||
conclusion.
|
||||
|
||||
Requirements: 6.4, 6.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from services.aggregation.scoring import WeightedSignal
|
||||
from services.shared.schemas import DisagreementDetail
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalystEntry:
|
||||
"""Lightweight carrier for per-document catalyst info needed by
|
||||
contradiction detection. Avoids importing ImpactRow and creating
|
||||
a circular dependency with worker.py."""
|
||||
|
||||
document_id: str
|
||||
catalyst_type: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContradictionResult:
|
||||
"""Full contradiction analysis output."""
|
||||
|
||||
score: float # 0-1, same semantics as existing compute_contradiction_score
|
||||
details: list[DisagreementDetail]
|
||||
|
||||
|
||||
def detect_contradictions(
|
||||
signals: list[WeightedSignal],
|
||||
catalyst_entries: list[CatalystEntry] | None = None,
|
||||
) -> ContradictionResult:
|
||||
"""Run contradiction detection across multiple dimensions.
|
||||
|
||||
Analyses:
|
||||
1. Sentiment disagreement — the core positive-vs-negative split
|
||||
2. Catalyst disagreement — same catalyst type with opposing sentiment
|
||||
|
||||
Returns a ContradictionResult with an overall score and per-dimension
|
||||
disagreement details.
|
||||
"""
|
||||
details: list[DisagreementDetail] = []
|
||||
|
||||
sentiment_detail = _detect_sentiment_disagreement(signals)
|
||||
if sentiment_detail is not None:
|
||||
details.append(sentiment_detail)
|
||||
|
||||
if catalyst_entries:
|
||||
catalyst_details = _detect_catalyst_disagreement(signals, catalyst_entries)
|
||||
details.extend(catalyst_details)
|
||||
|
||||
score = _compute_overall_score(signals)
|
||||
|
||||
return ContradictionResult(score=score, details=details)
|
||||
|
||||
|
||||
def _compute_overall_score(signals: list[WeightedSignal]) -> float:
|
||||
"""Minority/majority weight ratio — backward-compatible formula."""
|
||||
if not signals:
|
||||
return 0.0
|
||||
|
||||
pos_weight = 0.0
|
||||
neg_weight = 0.0
|
||||
for sig in signals:
|
||||
w = sig.weight.combined * sig.impact_score
|
||||
if sig.sentiment_value > 0:
|
||||
pos_weight += w
|
||||
elif sig.sentiment_value < 0:
|
||||
neg_weight += w
|
||||
|
||||
total = pos_weight + neg_weight
|
||||
if total == 0.0:
|
||||
return 0.0
|
||||
|
||||
minority = min(pos_weight, neg_weight)
|
||||
return round(minority / total, 4)
|
||||
|
||||
|
||||
def _detect_sentiment_disagreement(
|
||||
signals: list[WeightedSignal],
|
||||
) -> DisagreementDetail | None:
|
||||
"""Detect when both positive and negative sentiment signals exist."""
|
||||
pos_ids: list[str] = []
|
||||
neg_ids: list[str] = []
|
||||
pos_weight = 0.0
|
||||
neg_weight = 0.0
|
||||
|
||||
for sig in signals:
|
||||
w = sig.weight.combined * sig.impact_score
|
||||
if w <= 0:
|
||||
continue
|
||||
if sig.sentiment_value > 0:
|
||||
pos_ids.append(sig.document_id)
|
||||
pos_weight += w
|
||||
elif sig.sentiment_value < 0:
|
||||
neg_ids.append(sig.document_id)
|
||||
neg_weight += w
|
||||
|
||||
if not pos_ids or not neg_ids:
|
||||
return None
|
||||
|
||||
total = pos_weight + neg_weight
|
||||
minority_pct = min(pos_weight, neg_weight) / total if total > 0 else 0.0
|
||||
|
||||
return DisagreementDetail(
|
||||
dimension="sentiment",
|
||||
positive_doc_ids=pos_ids,
|
||||
negative_doc_ids=neg_ids,
|
||||
positive_weight=round(pos_weight, 4),
|
||||
negative_weight=round(neg_weight, 4),
|
||||
description=(
|
||||
f"Sentiment split: {len(pos_ids)} positive vs {len(neg_ids)} negative signals "
|
||||
f"(minority weight ratio {minority_pct:.0%})"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _detect_catalyst_disagreement(
|
||||
signals: list[WeightedSignal],
|
||||
catalyst_entries: list[CatalystEntry],
|
||||
) -> list[DisagreementDetail]:
|
||||
"""Detect when the same catalyst type has both positive and negative signals."""
|
||||
# Build lookup: document_id → (sentiment_value, combined_weight)
|
||||
sig_lookup: dict[str, tuple[float, float]] = {}
|
||||
for sig in signals:
|
||||
w = sig.weight.combined * sig.impact_score
|
||||
if w > 0:
|
||||
sig_lookup[sig.document_id] = (sig.sentiment_value, w)
|
||||
|
||||
# Group by catalyst type
|
||||
from collections import defaultdict
|
||||
catalyst_groups: dict[str, list[tuple[str, float, float]]] = defaultdict(list)
|
||||
for entry in catalyst_entries:
|
||||
if entry.document_id in sig_lookup:
|
||||
sent_val, weight = sig_lookup[entry.document_id]
|
||||
if sent_val != 0.0:
|
||||
catalyst_groups[entry.catalyst_type].append(
|
||||
(entry.document_id, sent_val, weight)
|
||||
)
|
||||
|
||||
details: list[DisagreementDetail] = []
|
||||
for catalyst, entries in catalyst_groups.items():
|
||||
pos_ids = [doc_id for doc_id, sv, _ in entries if sv > 0]
|
||||
neg_ids = [doc_id for doc_id, sv, _ in entries if sv < 0]
|
||||
if not pos_ids or not neg_ids:
|
||||
continue
|
||||
|
||||
pos_w = sum(w for _, sv, w in entries if sv > 0)
|
||||
neg_w = sum(w for _, sv, w in entries if sv < 0)
|
||||
|
||||
details.append(DisagreementDetail(
|
||||
dimension=f"catalyst:{catalyst}",
|
||||
positive_doc_ids=pos_ids,
|
||||
negative_doc_ids=neg_ids,
|
||||
positive_weight=round(pos_w, 4),
|
||||
negative_weight=round(neg_w, 4),
|
||||
description=(
|
||||
f"Catalyst '{catalyst}' has {len(pos_ids)} positive and "
|
||||
f"{len(neg_ids)} negative signals"
|
||||
),
|
||||
))
|
||||
|
||||
return details
|
||||
Reference in New Issue
Block a user