From e21f162e48983ddd72a430c544ada1e7927863d7 Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Fri, 17 Apr 2026 03:41:39 +0000 Subject: [PATCH] fix: dampen agreement factor by sample size in trend confidence to prevent low-evidence inflation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agreement of 1-2 signals was inflating confidence to paper-eligible levels (0.575) even with low credibility sources. Added log2-based dampener that scales agreement contribution by unique source count, saturating at n=7. Single signals now cap at 0.39 confidence, 2 signals at 0.49 — both correctly below paper threshold (0.50). --- services/aggregation/worker.py | 11 ++++++++++- tests/test_paper_trading_simulation.py | 18 +++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/services/aggregation/worker.py b/services/aggregation/worker.py index 929acc2..4e3f66d 100644 --- a/services/aggregation/worker.py +++ b/services/aggregation/worker.py @@ -10,6 +10,7 @@ from __future__ import annotations import json import logging +import math import time import uuid as _uuid from dataclasses import dataclass @@ -582,7 +583,9 @@ def compute_trend_confidence( Confidence is based on: - Number of UNIQUE source documents (not raw signal count) - Average extraction confidence of contributing signals - - Signal agreement (what fraction point the same direction) + - Signal agreement (what fraction point the same direction), + dampened by sample size so that 1-2 signals agreeing doesn't + inflate confidence the same way 10+ signals agreeing does - Contradiction penalty (high contradiction lowers confidence) Returns a value in [0, 1]. @@ -611,6 +614,12 @@ def compute_trend_confidence( else: agreement = 0.5 + # Dampen agreement by sample size: 1-2 signals agreeing is far less + # meaningful than 7+ signals agreeing. Uses log2(n+1)/log2(8) so the + # dampener saturates at 1.0 around n=7 unique sources. + agreement_dampener = min(1.0, math.log2(unique_sources + 1) / math.log2(8)) + agreement *= agreement_dampener + # Contradiction penalty contradiction_penalty = contradiction_score * 0.4 diff --git a/tests/test_paper_trading_simulation.py b/tests/test_paper_trading_simulation.py index b0c5020..9037805 100644 --- a/tests/test_paper_trading_simulation.py +++ b/tests/test_paper_trading_simulation.py @@ -426,11 +426,11 @@ class TestRecommendationDrivenOrders: impacts = [ ImpactRow( document_id="doc-weak-1", - confidence=0.20, - novelty_score=0.1, - source_credibility=0.2, + confidence=0.40, + novelty_score=0.3, + source_credibility=0.5, sentiment="positive", - impact_score=0.1, + impact_score=0.3, catalyst_type="other", key_facts=["Minor update"], risks=[], @@ -438,11 +438,11 @@ class TestRecommendationDrivenOrders: ), ImpactRow( document_id="doc-weak-2", - confidence=0.15, - novelty_score=0.1, - source_credibility=0.2, - sentiment="negative", - impact_score=0.1, + confidence=0.35, + novelty_score=0.2, + source_credibility=0.4, + sentiment="positive", + impact_score=0.25, catalyst_type="other", key_facts=["Routine filing"], risks=[],