170 lines
5.3 KiB
Python
170 lines
5.3 KiB
Python
"""Contradiction detection and disagreement representation.
|
|
|
|
Analyses weighted signals to detect and represent disagreement explicitly,
|
|
rather than collapsing contradictory evidence into a single unsupported
|
|
conclusion.
|
|
|
|
Requirements: 6.4, 6.5
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from services.aggregation.scoring import WeightedSignal
|
|
from services.shared.schemas import DisagreementDetail
|
|
|
|
|
|
@dataclass
|
|
class CatalystEntry:
|
|
"""Lightweight carrier for per-document catalyst info needed by
|
|
contradiction detection. Avoids importing ImpactRow and creating
|
|
a circular dependency with worker.py."""
|
|
|
|
document_id: str
|
|
catalyst_type: str
|
|
|
|
|
|
@dataclass
|
|
class ContradictionResult:
|
|
"""Full contradiction analysis output."""
|
|
|
|
score: float # 0-1, same semantics as existing compute_contradiction_score
|
|
details: list[DisagreementDetail]
|
|
|
|
|
|
def detect_contradictions(
|
|
signals: list[WeightedSignal],
|
|
catalyst_entries: list[CatalystEntry] | None = None,
|
|
) -> ContradictionResult:
|
|
"""Run contradiction detection across multiple dimensions.
|
|
|
|
Analyses:
|
|
1. Sentiment disagreement — the core positive-vs-negative split
|
|
2. Catalyst disagreement — same catalyst type with opposing sentiment
|
|
|
|
Returns a ContradictionResult with an overall score and per-dimension
|
|
disagreement details.
|
|
"""
|
|
details: list[DisagreementDetail] = []
|
|
|
|
sentiment_detail = _detect_sentiment_disagreement(signals)
|
|
if sentiment_detail is not None:
|
|
details.append(sentiment_detail)
|
|
|
|
if catalyst_entries:
|
|
catalyst_details = _detect_catalyst_disagreement(signals, catalyst_entries)
|
|
details.extend(catalyst_details)
|
|
|
|
score = _compute_overall_score(signals)
|
|
|
|
return ContradictionResult(score=score, details=details)
|
|
|
|
|
|
def _compute_overall_score(signals: list[WeightedSignal]) -> float:
|
|
"""Minority/majority weight ratio — backward-compatible formula."""
|
|
if not signals:
|
|
return 0.0
|
|
|
|
pos_weight = 0.0
|
|
neg_weight = 0.0
|
|
for sig in signals:
|
|
w = sig.weight.combined * sig.impact_score
|
|
if sig.sentiment_value > 0:
|
|
pos_weight += w
|
|
elif sig.sentiment_value < 0:
|
|
neg_weight += w
|
|
|
|
total = pos_weight + neg_weight
|
|
if total == 0.0:
|
|
return 0.0
|
|
|
|
minority = min(pos_weight, neg_weight)
|
|
return round(minority / total, 4)
|
|
|
|
|
|
def _detect_sentiment_disagreement(
|
|
signals: list[WeightedSignal],
|
|
) -> DisagreementDetail | None:
|
|
"""Detect when both positive and negative sentiment signals exist."""
|
|
pos_ids: list[str] = []
|
|
neg_ids: list[str] = []
|
|
pos_weight = 0.0
|
|
neg_weight = 0.0
|
|
|
|
for sig in signals:
|
|
w = sig.weight.combined * sig.impact_score
|
|
if w <= 0:
|
|
continue
|
|
if sig.sentiment_value > 0:
|
|
pos_ids.append(sig.document_id)
|
|
pos_weight += w
|
|
elif sig.sentiment_value < 0:
|
|
neg_ids.append(sig.document_id)
|
|
neg_weight += w
|
|
|
|
if not pos_ids or not neg_ids:
|
|
return None
|
|
|
|
total = pos_weight + neg_weight
|
|
minority_pct = min(pos_weight, neg_weight) / total if total > 0 else 0.0
|
|
|
|
return DisagreementDetail(
|
|
dimension="sentiment",
|
|
positive_doc_ids=pos_ids,
|
|
negative_doc_ids=neg_ids,
|
|
positive_weight=round(pos_weight, 4),
|
|
negative_weight=round(neg_weight, 4),
|
|
description=(
|
|
f"Sentiment split: {len(pos_ids)} positive vs {len(neg_ids)} negative signals "
|
|
f"(minority weight ratio {minority_pct:.0%})"
|
|
),
|
|
)
|
|
|
|
|
|
def _detect_catalyst_disagreement(
|
|
signals: list[WeightedSignal],
|
|
catalyst_entries: list[CatalystEntry],
|
|
) -> list[DisagreementDetail]:
|
|
"""Detect when the same catalyst type has both positive and negative signals."""
|
|
# Build lookup: document_id → (sentiment_value, combined_weight)
|
|
sig_lookup: dict[str, tuple[float, float]] = {}
|
|
for sig in signals:
|
|
w = sig.weight.combined * sig.impact_score
|
|
if w > 0:
|
|
sig_lookup[sig.document_id] = (sig.sentiment_value, w)
|
|
|
|
# Group by catalyst type
|
|
from collections import defaultdict
|
|
catalyst_groups: dict[str, list[tuple[str, float, float]]] = defaultdict(list)
|
|
for entry in catalyst_entries:
|
|
if entry.document_id in sig_lookup:
|
|
sent_val, weight = sig_lookup[entry.document_id]
|
|
if sent_val != 0.0:
|
|
catalyst_groups[entry.catalyst_type].append(
|
|
(entry.document_id, sent_val, weight)
|
|
)
|
|
|
|
details: list[DisagreementDetail] = []
|
|
for catalyst, entries in catalyst_groups.items():
|
|
pos_ids = [doc_id for doc_id, sv, _ in entries if sv > 0]
|
|
neg_ids = [doc_id for doc_id, sv, _ in entries if sv < 0]
|
|
if not pos_ids or not neg_ids:
|
|
continue
|
|
|
|
pos_w = sum(w for _, sv, w in entries if sv > 0)
|
|
neg_w = sum(w for _, sv, w in entries if sv < 0)
|
|
|
|
details.append(DisagreementDetail(
|
|
dimension=f"catalyst:{catalyst}",
|
|
positive_doc_ids=pos_ids,
|
|
negative_doc_ids=neg_ids,
|
|
positive_weight=round(pos_w, 4),
|
|
negative_weight=round(neg_w, 4),
|
|
description=(
|
|
f"Catalyst '{catalyst}' has {len(pos_ids)} positive and "
|
|
f"{len(neg_ids)} negative signals"
|
|
),
|
|
))
|
|
|
|
return details
|