feat: add 7 tests for confidence agreement dampener — sample-size boundary coverage

2026-04-17 03:48:08 +00:00
parent e21f162e48
commit 5fc78bd9b4
1 changed files with 77 additions and 1 deletions
@@ -215,10 +215,86 @@ def test_confidence_penalized_by_contradiction():


 # ---------------------------------------------------------------------------
-# assemble_trend_summary
+# Agreement dampener — sample-size scaling
 # ---------------------------------------------------------------------------


+def test_single_signal_never_paper_eligible():
+    """One signal agreeing with itself should not reach paper threshold (0.50)."""
+    # Use the highest credibility possible
+    sw = compute_signal_weight(NOW, NOW, "7d", 1.0, extraction_confidence=1.0)
+    signals = [WeightedSignal("d0", sw, 1.0, 0.5)]
+    conf = compute_trend_confidence(signals, 0.0)
+    assert conf < 0.50, f"Single signal confidence {conf} should be below paper threshold 0.50"
+
+
+def test_two_signals_below_paper_threshold():
+    """Two agreeing signals with moderate credibility should stay below paper threshold."""
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(2)]
+    conf = compute_trend_confidence(signals, 0.0)
+    assert conf < 0.50, f"Two-signal confidence {conf} should be below paper threshold 0.50"
+
+
+def test_dampener_saturates_at_seven_plus():
+    """With 7+ unique sources the dampener should be at or near 1.0,
+    producing the same confidence as the undampened formula would."""
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    c7 = compute_trend_confidence(
+        [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(7)], 0.0,
+    )
+    c15 = compute_trend_confidence(
+        [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(15)], 0.0,
+    )
+    # 7 signals should be close to 15 signals (difference only from count_factor)
+    # The agreement component should be identical (both dampened to ~1.0)
+    assert c15 > c7  # more docs still helps via count_factor
+    assert c7 > 0.55, f"7-signal confidence {c7} should be well above paper threshold"
+
+
+def test_three_good_signals_paper_eligible():
+    """Three signals with decent credibility (0.6+) should reach paper threshold."""
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.7, extraction_confidence=0.7)
+    signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(3)]
+    conf = compute_trend_confidence(signals, 0.0)
+    assert conf >= 0.50, f"Three good-signal confidence {conf} should reach paper threshold 0.50"
+
+
+def test_dampener_monotonically_increases_with_sources():
+    """Confidence should strictly increase as we add more agreeing unique sources."""
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.6, extraction_confidence=0.6)
+    prev = 0.0
+    for n in [1, 2, 3, 5, 7, 10, 15]:
+        signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(n)]
+        conf = compute_trend_confidence(signals, 0.0)
+        assert conf > prev, f"Confidence should increase: n={n} conf={conf} <= prev={prev}"
+        prev = conf
+
+
+def test_low_credibility_two_signals_not_paper_eligible():
+    """Two low-credibility signals agreeing should not be paper-eligible."""
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.3, extraction_confidence=0.3)
+    signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(2)]
+    conf = compute_trend_confidence(signals, 0.0)
+    assert conf < 0.50, f"Two low-cred signals confidence {conf} should be below 0.50"
+
+
+def test_mixed_sentiment_dampened_correctly():
+    """Mixed signals (some bullish, some bearish) should have lower agreement
+    AND the dampener should further reduce it for small samples."""
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.6, extraction_confidence=0.6)
+    # 2 bullish, 1 bearish — agreement = 2/3 = 0.667
+    signals = [
+        WeightedSignal("d0", sw, 1.0, 0.5),
+        WeightedSignal("d1", sw, 1.0, 0.5),
+        WeightedSignal("d2", sw, -1.0, 0.5),
+    ]
+    conf = compute_trend_confidence(signals, 0.0)
+    # With dampener: agreement 0.667 * dampener(3) ≈ 0.667 * 0.667 ≈ 0.445
+    # Should be well below paper threshold
+    assert conf < 0.50, f"Mixed 3-signal confidence {conf} should be below 0.50"
+
+
 def test_assemble_trend_summary_bullish():
    impacts = [
        _make_impact("d1", sentiment="positive", impact_score=0.8),