fix: confidence formula now uses unique doc count + signal agreement instead of raw signal count — prevents 99.9% inflation

2026-04-16 23:48:05 +00:00
parent 1a5fb2e36a
commit 18eb150c75
1 changed files with 17 additions and 7 deletions
@@ -580,8 +580,9 @@ def compute_trend_confidence(
    """Derive an overall confidence for the trend summary.

    Confidence is based on:
-    - Number of contributing signals (more = higher base)
+    - Number of UNIQUE source documents (not raw signal count)
    - Average extraction confidence of contributing signals
+    - Signal agreement (what fraction point the same direction)
    - Contradiction penalty (high contradiction lowers confidence)

    Returns a value in [0, 1].
@@ -593,18 +594,27 @@ def compute_trend_confidence(
    if not active:
        return 0.0

-    # Base confidence from signal count (diminishing returns)
-    count_factor = min(len(active) / 20.0, 1.0)
+    # Count unique source documents — competitive signals from the same doc
+    # shouldn't inflate confidence
+    unique_sources = len({s.document_id for s in active if s.document_id})
+    count_factor = min(unique_sources / 15.0, 0.8)  # Cap at 0.8, needs 15 unique docs

-    # Average extraction confidence (from the confidence_gate — if gated,
-    # the signal wouldn't be in active list, so we use the raw confidence
-    # from the weight breakdown).
+    # Average extraction confidence
    avg_conf = sum(s.weight.credibility for s in active) / len(active)

+    # Signal agreement: what fraction of signals agree on direction
+    bullish = sum(1 for s in active if s.sentiment_value > 0)
+    bearish = sum(1 for s in active if s.sentiment_value < 0)
+    total = bullish + bearish
+    if total > 0:
+        agreement = max(bullish, bearish) / total
+    else:
+        agreement = 0.5
+
    # Contradiction penalty
    contradiction_penalty = contradiction_score * 0.4

-    confidence = (0.4 * count_factor + 0.6 * avg_conf) - contradiction_penalty
+    confidence = (0.3 * count_factor + 0.3 * avg_conf + 0.4 * agreement) - contradiction_penalty
    return round(max(0.0, min(1.0, confidence)), 4)