fix: dampen agreement factor by sample size in trend confidence to prevent low-evidence inflation

Agreement of 1-2 signals was inflating confidence to paper-eligible
levels (0.575) even with low credibility sources. Added log2-based
dampener that scales agreement contribution by unique source count,
saturating at n=7. Single signals now cap at 0.39 confidence,
2 signals at 0.49 — both correctly below paper threshold (0.50).
This commit is contained in:
Celes Renata
2026-04-17 03:41:39 +00:00
parent d80d44e2fc
commit e21f162e48
2 changed files with 19 additions and 10 deletions
+10 -1
View File
@@ -10,6 +10,7 @@ from __future__ import annotations
import json import json
import logging import logging
import math
import time import time
import uuid as _uuid import uuid as _uuid
from dataclasses import dataclass from dataclasses import dataclass
@@ -582,7 +583,9 @@ def compute_trend_confidence(
Confidence is based on: Confidence is based on:
- Number of UNIQUE source documents (not raw signal count) - Number of UNIQUE source documents (not raw signal count)
- Average extraction confidence of contributing signals - Average extraction confidence of contributing signals
- Signal agreement (what fraction point the same direction) - Signal agreement (what fraction point the same direction),
dampened by sample size so that 1-2 signals agreeing doesn't
inflate confidence the same way 10+ signals agreeing does
- Contradiction penalty (high contradiction lowers confidence) - Contradiction penalty (high contradiction lowers confidence)
Returns a value in [0, 1]. Returns a value in [0, 1].
@@ -611,6 +614,12 @@ def compute_trend_confidence(
else: else:
agreement = 0.5 agreement = 0.5
# Dampen agreement by sample size: 1-2 signals agreeing is far less
# meaningful than 7+ signals agreeing. Uses log2(n+1)/log2(8) so the
# dampener saturates at 1.0 around n=7 unique sources.
agreement_dampener = min(1.0, math.log2(unique_sources + 1) / math.log2(8))
agreement *= agreement_dampener
# Contradiction penalty # Contradiction penalty
contradiction_penalty = contradiction_score * 0.4 contradiction_penalty = contradiction_score * 0.4
+9 -9
View File
@@ -426,11 +426,11 @@ class TestRecommendationDrivenOrders:
impacts = [ impacts = [
ImpactRow( ImpactRow(
document_id="doc-weak-1", document_id="doc-weak-1",
confidence=0.20, confidence=0.40,
novelty_score=0.1, novelty_score=0.3,
source_credibility=0.2, source_credibility=0.5,
sentiment="positive", sentiment="positive",
impact_score=0.1, impact_score=0.3,
catalyst_type="other", catalyst_type="other",
key_facts=["Minor update"], key_facts=["Minor update"],
risks=[], risks=[],
@@ -438,11 +438,11 @@ class TestRecommendationDrivenOrders:
), ),
ImpactRow( ImpactRow(
document_id="doc-weak-2", document_id="doc-weak-2",
confidence=0.15, confidence=0.35,
novelty_score=0.1, novelty_score=0.2,
source_credibility=0.2, source_credibility=0.4,
sentiment="negative", sentiment="positive",
impact_score=0.1, impact_score=0.25,
catalyst_type="other", catalyst_type="other",
key_facts=["Routine filing"], key_facts=["Routine filing"],
risks=[], risks=[],