phase 14-15: docker build validation and helm deployment

2026-04-11 11:59:45 -07:00
parent 7394d241c9
commit ce10afa034
179 changed files with 32559 additions and 576 deletions
@@ -0,0 +1,169 @@
+"""Contradiction detection and disagreement representation.
+
+Analyses weighted signals to detect and represent disagreement explicitly,
+rather than collapsing contradictory evidence into a single unsupported
+conclusion.
+
+Requirements: 6.4, 6.5
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from services.aggregation.scoring import WeightedSignal
+from services.shared.schemas import DisagreementDetail
+
+
+@dataclass
+class CatalystEntry:
+    """Lightweight carrier for per-document catalyst info needed by
+    contradiction detection.  Avoids importing ImpactRow and creating
+    a circular dependency with worker.py."""
+
+    document_id: str
+    catalyst_type: str
+
+
+@dataclass
+class ContradictionResult:
+    """Full contradiction analysis output."""
+
+    score: float  # 0-1, same semantics as existing compute_contradiction_score
+    details: list[DisagreementDetail]
+
+
+def detect_contradictions(
+    signals: list[WeightedSignal],
+    catalyst_entries: list[CatalystEntry] | None = None,
+) -> ContradictionResult:
+    """Run contradiction detection across multiple dimensions.
+
+    Analyses:
+    1. Sentiment disagreement — the core positive-vs-negative split
+    2. Catalyst disagreement — same catalyst type with opposing sentiment
+
+    Returns a ContradictionResult with an overall score and per-dimension
+    disagreement details.
+    """
+    details: list[DisagreementDetail] = []
+
+    sentiment_detail = _detect_sentiment_disagreement(signals)
+    if sentiment_detail is not None:
+        details.append(sentiment_detail)
+
+    if catalyst_entries:
+        catalyst_details = _detect_catalyst_disagreement(signals, catalyst_entries)
+        details.extend(catalyst_details)
+
+    score = _compute_overall_score(signals)
+
+    return ContradictionResult(score=score, details=details)
+
+
+def _compute_overall_score(signals: list[WeightedSignal]) -> float:
+    """Minority/majority weight ratio — backward-compatible formula."""
+    if not signals:
+        return 0.0
+
+    pos_weight = 0.0
+    neg_weight = 0.0
+    for sig in signals:
+        w = sig.weight.combined * sig.impact_score
+        if sig.sentiment_value > 0:
+            pos_weight += w
+        elif sig.sentiment_value < 0:
+            neg_weight += w
+
+    total = pos_weight + neg_weight
+    if total == 0.0:
+        return 0.0
+
+    minority = min(pos_weight, neg_weight)
+    return round(minority / total, 4)
+
+
+def _detect_sentiment_disagreement(
+    signals: list[WeightedSignal],
+) -> DisagreementDetail | None:
+    """Detect when both positive and negative sentiment signals exist."""
+    pos_ids: list[str] = []
+    neg_ids: list[str] = []
+    pos_weight = 0.0
+    neg_weight = 0.0
+
+    for sig in signals:
+        w = sig.weight.combined * sig.impact_score
+        if w <= 0:
+            continue
+        if sig.sentiment_value > 0:
+            pos_ids.append(sig.document_id)
+            pos_weight += w
+        elif sig.sentiment_value < 0:
+            neg_ids.append(sig.document_id)
+            neg_weight += w
+
+    if not pos_ids or not neg_ids:
+        return None
+
+    total = pos_weight + neg_weight
+    minority_pct = min(pos_weight, neg_weight) / total if total > 0 else 0.0
+
+    return DisagreementDetail(
+        dimension="sentiment",
+        positive_doc_ids=pos_ids,
+        negative_doc_ids=neg_ids,
+        positive_weight=round(pos_weight, 4),
+        negative_weight=round(neg_weight, 4),
+        description=(
+            f"Sentiment split: {len(pos_ids)} positive vs {len(neg_ids)} negative signals "
+            f"(minority weight ratio {minority_pct:.0%})"
+        ),
+    )
+
+
+def _detect_catalyst_disagreement(
+    signals: list[WeightedSignal],
+    catalyst_entries: list[CatalystEntry],
+) -> list[DisagreementDetail]:
+    """Detect when the same catalyst type has both positive and negative signals."""
+    # Build lookup: document_id → (sentiment_value, combined_weight)
+    sig_lookup: dict[str, tuple[float, float]] = {}
+    for sig in signals:
+        w = sig.weight.combined * sig.impact_score
+        if w > 0:
+            sig_lookup[sig.document_id] = (sig.sentiment_value, w)
+
+    # Group by catalyst type
+    from collections import defaultdict
+    catalyst_groups: dict[str, list[tuple[str, float, float]]] = defaultdict(list)
+    for entry in catalyst_entries:
+        if entry.document_id in sig_lookup:
+            sent_val, weight = sig_lookup[entry.document_id]
+            if sent_val != 0.0:
+                catalyst_groups[entry.catalyst_type].append(
+                    (entry.document_id, sent_val, weight)
+                )
+
+    details: list[DisagreementDetail] = []
+    for catalyst, entries in catalyst_groups.items():
+        pos_ids = [doc_id for doc_id, sv, _ in entries if sv > 0]
+        neg_ids = [doc_id for doc_id, sv, _ in entries if sv < 0]
+        if not pos_ids or not neg_ids:
+            continue
+
+        pos_w = sum(w for _, sv, w in entries if sv > 0)
+        neg_w = sum(w for _, sv, w in entries if sv < 0)
+
+        details.append(DisagreementDetail(
+            dimension=f"catalyst:{catalyst}",
+            positive_doc_ids=pos_ids,
+            negative_doc_ids=neg_ids,
+            positive_weight=round(pos_w, 4),
+            negative_weight=round(neg_w, 4),
+            description=(
+                f"Catalyst '{catalyst}' has {len(pos_ids)} positive and "
+                f"{len(neg_ids)} negative signals"
+            ),
+        ))
+
+    return details