diff --git a/tests/test_aggregation_worker.py b/tests/test_aggregation_worker.py index 6112757..799fdcf 100644 --- a/tests/test_aggregation_worker.py +++ b/tests/test_aggregation_worker.py @@ -215,10 +215,86 @@ def test_confidence_penalized_by_contradiction(): # --------------------------------------------------------------------------- -# assemble_trend_summary +# Agreement dampener — sample-size scaling # --------------------------------------------------------------------------- +def test_single_signal_never_paper_eligible(): + """One signal agreeing with itself should not reach paper threshold (0.50).""" + # Use the highest credibility possible + sw = compute_signal_weight(NOW, NOW, "7d", 1.0, extraction_confidence=1.0) + signals = [WeightedSignal("d0", sw, 1.0, 0.5)] + conf = compute_trend_confidence(signals, 0.0) + assert conf < 0.50, f"Single signal confidence {conf} should be below paper threshold 0.50" + + +def test_two_signals_below_paper_threshold(): + """Two agreeing signals with moderate credibility should stay below paper threshold.""" + sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8) + signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(2)] + conf = compute_trend_confidence(signals, 0.0) + assert conf < 0.50, f"Two-signal confidence {conf} should be below paper threshold 0.50" + + +def test_dampener_saturates_at_seven_plus(): + """With 7+ unique sources the dampener should be at or near 1.0, + producing the same confidence as the undampened formula would.""" + sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8) + c7 = compute_trend_confidence( + [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(7)], 0.0, + ) + c15 = compute_trend_confidence( + [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(15)], 0.0, + ) + # 7 signals should be close to 15 signals (difference only from count_factor) + # The agreement component should be identical (both dampened to ~1.0) + assert c15 > c7 # more docs still helps via count_factor + assert c7 > 0.55, f"7-signal confidence {c7} should be well above paper threshold" + + +def test_three_good_signals_paper_eligible(): + """Three signals with decent credibility (0.6+) should reach paper threshold.""" + sw = compute_signal_weight(NOW, NOW, "7d", 0.7, extraction_confidence=0.7) + signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(3)] + conf = compute_trend_confidence(signals, 0.0) + assert conf >= 0.50, f"Three good-signal confidence {conf} should reach paper threshold 0.50" + + +def test_dampener_monotonically_increases_with_sources(): + """Confidence should strictly increase as we add more agreeing unique sources.""" + sw = compute_signal_weight(NOW, NOW, "7d", 0.6, extraction_confidence=0.6) + prev = 0.0 + for n in [1, 2, 3, 5, 7, 10, 15]: + signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(n)] + conf = compute_trend_confidence(signals, 0.0) + assert conf > prev, f"Confidence should increase: n={n} conf={conf} <= prev={prev}" + prev = conf + + +def test_low_credibility_two_signals_not_paper_eligible(): + """Two low-credibility signals agreeing should not be paper-eligible.""" + sw = compute_signal_weight(NOW, NOW, "7d", 0.3, extraction_confidence=0.3) + signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(2)] + conf = compute_trend_confidence(signals, 0.0) + assert conf < 0.50, f"Two low-cred signals confidence {conf} should be below 0.50" + + +def test_mixed_sentiment_dampened_correctly(): + """Mixed signals (some bullish, some bearish) should have lower agreement + AND the dampener should further reduce it for small samples.""" + sw = compute_signal_weight(NOW, NOW, "7d", 0.6, extraction_confidence=0.6) + # 2 bullish, 1 bearish — agreement = 2/3 = 0.667 + signals = [ + WeightedSignal("d0", sw, 1.0, 0.5), + WeightedSignal("d1", sw, 1.0, 0.5), + WeightedSignal("d2", sw, -1.0, 0.5), + ] + conf = compute_trend_confidence(signals, 0.0) + # With dampener: agreement 0.667 * dampener(3) ≈ 0.667 * 0.667 ≈ 0.445 + # Should be well below paper threshold + assert conf < 0.50, f"Mixed 3-signal confidence {conf} should be below 0.50" + + def test_assemble_trend_summary_bullish(): impacts = [ _make_impact("d1", sentiment="positive", impact_score=0.8),