feat: add 7 tests for confidence agreement dampener — sample-size boundary coverage

This commit is contained in:
Celes Renata
2026-04-17 03:48:08 +00:00
parent e21f162e48
commit 5fc78bd9b4
+77 -1
View File
@@ -215,10 +215,86 @@ def test_confidence_penalized_by_contradiction():
# ---------------------------------------------------------------------------
# assemble_trend_summary
# Agreement dampener — sample-size scaling
# ---------------------------------------------------------------------------
def test_single_signal_never_paper_eligible():
"""One signal agreeing with itself should not reach paper threshold (0.50)."""
# Use the highest credibility possible
sw = compute_signal_weight(NOW, NOW, "7d", 1.0, extraction_confidence=1.0)
signals = [WeightedSignal("d0", sw, 1.0, 0.5)]
conf = compute_trend_confidence(signals, 0.0)
assert conf < 0.50, f"Single signal confidence {conf} should be below paper threshold 0.50"
def test_two_signals_below_paper_threshold():
"""Two agreeing signals with moderate credibility should stay below paper threshold."""
sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(2)]
conf = compute_trend_confidence(signals, 0.0)
assert conf < 0.50, f"Two-signal confidence {conf} should be below paper threshold 0.50"
def test_dampener_saturates_at_seven_plus():
"""With 7+ unique sources the dampener should be at or near 1.0,
producing the same confidence as the undampened formula would."""
sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
c7 = compute_trend_confidence(
[WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(7)], 0.0,
)
c15 = compute_trend_confidence(
[WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(15)], 0.0,
)
# 7 signals should be close to 15 signals (difference only from count_factor)
# The agreement component should be identical (both dampened to ~1.0)
assert c15 > c7 # more docs still helps via count_factor
assert c7 > 0.55, f"7-signal confidence {c7} should be well above paper threshold"
def test_three_good_signals_paper_eligible():
"""Three signals with decent credibility (0.6+) should reach paper threshold."""
sw = compute_signal_weight(NOW, NOW, "7d", 0.7, extraction_confidence=0.7)
signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(3)]
conf = compute_trend_confidence(signals, 0.0)
assert conf >= 0.50, f"Three good-signal confidence {conf} should reach paper threshold 0.50"
def test_dampener_monotonically_increases_with_sources():
"""Confidence should strictly increase as we add more agreeing unique sources."""
sw = compute_signal_weight(NOW, NOW, "7d", 0.6, extraction_confidence=0.6)
prev = 0.0
for n in [1, 2, 3, 5, 7, 10, 15]:
signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(n)]
conf = compute_trend_confidence(signals, 0.0)
assert conf > prev, f"Confidence should increase: n={n} conf={conf} <= prev={prev}"
prev = conf
def test_low_credibility_two_signals_not_paper_eligible():
"""Two low-credibility signals agreeing should not be paper-eligible."""
sw = compute_signal_weight(NOW, NOW, "7d", 0.3, extraction_confidence=0.3)
signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(2)]
conf = compute_trend_confidence(signals, 0.0)
assert conf < 0.50, f"Two low-cred signals confidence {conf} should be below 0.50"
def test_mixed_sentiment_dampened_correctly():
"""Mixed signals (some bullish, some bearish) should have lower agreement
AND the dampener should further reduce it for small samples."""
sw = compute_signal_weight(NOW, NOW, "7d", 0.6, extraction_confidence=0.6)
# 2 bullish, 1 bearish — agreement = 2/3 = 0.667
signals = [
WeightedSignal("d0", sw, 1.0, 0.5),
WeightedSignal("d1", sw, 1.0, 0.5),
WeightedSignal("d2", sw, -1.0, 0.5),
]
conf = compute_trend_confidence(signals, 0.0)
# With dampener: agreement 0.667 * dampener(3) ≈ 0.667 * 0.667 ≈ 0.445
# Should be well below paper threshold
assert conf < 0.50, f"Mixed 3-signal confidence {conf} should be below 0.50"
def test_assemble_trend_summary_bullish():
impacts = [
_make_impact("d1", sentiment="positive", impact_score=0.8),