From 4e010bc048f0133e47042f0d1b041526d468816d Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Wed, 29 Apr 2026 11:41:48 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20signal=20math=20upgrade=20=E2=80=94=20p?= =?UTF-8?q?robabilistic,=20regime-aware=20scoring=20pipeline?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement full probabilistic signal processing pipeline gated behind probabilistic_scoring_enabled feature flag in risk_configs: - Bayesian log-likelihood accumulator with Beta posterior and entropy - Regime detector (trend-following, panic, mean-reversion, uncertainty) - Source accuracy tracker with per-source historical prediction accuracy - Sigmoid confidence gate replacing binary gate - Information gain surprise weighting for rare events - Adaptive recency decay with event-specific half-lives - Regime multiplier replacing market context multiplier - Weighted disagreement entropy for contradiction detection - Multiplicative macro exposure with conditional integration - Graph-distance attenuated competitive signal propagation - Exponentially weighted momentum with volatility scaling - Expected value recommendation gate All changes backward-compatible: flag=false preserves exact current behavior. New outputs stored in existing JSONB columns (no schema changes except source_accuracy table via migration 034). Tests: 26 property-based tests (14 correctness properties), 99 unit tests, 1789 total tests passing with zero regressions. --- .kiro/specs/signal-math-upgrade/.config.kiro | 1 + .kiro/specs/signal-math-upgrade/design.md | 732 ++++++++++++ .../specs/signal-math-upgrade/requirements.md | 293 +++++ .kiro/specs/signal-math-upgrade/tasks.md | 349 ++++++ infra/migrations/034_source_accuracy.sql | 18 + services/aggregation/bayesian.py | 127 ++ services/aggregation/contradiction.py | 73 +- services/aggregation/interpolation.py | 249 +++- services/aggregation/projection.py | 83 +- services/aggregation/regime.py | 170 +++ services/aggregation/scoring.py | 338 +++++- services/aggregation/signal_propagation.py | 91 +- services/aggregation/source_accuracy.py | 164 +++ services/aggregation/worker.py | 534 ++++++++- services/recommendation/eligibility.py | 104 +- services/recommendation/worker.py | 7 + services/shared/schemas.py | 14 + tests/test_aggregation_worker.py | 93 ++ tests/test_bayesian.py | 278 +++++ tests/test_interpolation.py | 357 ++++++ tests/test_pbt_signal_math.py | 1030 +++++++++++++++++ tests/test_regime.py | 237 ++++ tests/test_signal_math_unit.py | 535 +++++++++ tests/test_source_accuracy.py | 241 ++++ 24 files changed, 6058 insertions(+), 60 deletions(-) create mode 100644 .kiro/specs/signal-math-upgrade/.config.kiro create mode 100644 .kiro/specs/signal-math-upgrade/design.md create mode 100644 .kiro/specs/signal-math-upgrade/requirements.md create mode 100644 .kiro/specs/signal-math-upgrade/tasks.md create mode 100644 infra/migrations/034_source_accuracy.sql create mode 100644 services/aggregation/bayesian.py create mode 100644 services/aggregation/regime.py create mode 100644 services/aggregation/source_accuracy.py create mode 100644 tests/test_bayesian.py create mode 100644 tests/test_pbt_signal_math.py create mode 100644 tests/test_regime.py create mode 100644 tests/test_signal_math_unit.py create mode 100644 tests/test_source_accuracy.py diff --git a/.kiro/specs/signal-math-upgrade/.config.kiro b/.kiro/specs/signal-math-upgrade/.config.kiro new file mode 100644 index 0000000..bdebc16 --- /dev/null +++ b/.kiro/specs/signal-math-upgrade/.config.kiro @@ -0,0 +1 @@ +{"specId": "b595d834-7e72-4fab-87a9-65c92115a069", "workflowType": "requirements-first", "specType": "feature"} \ No newline at end of file diff --git a/.kiro/specs/signal-math-upgrade/design.md b/.kiro/specs/signal-math-upgrade/design.md new file mode 100644 index 0000000..02963e5 --- /dev/null +++ b/.kiro/specs/signal-math-upgrade/design.md @@ -0,0 +1,732 @@ +# Design Document — Signal Math Upgrade + +## Overview + +This design upgrades the Stonks Oracle signal processing pipeline from deterministic heuristic formulas to a probabilistic, regime-aware, and adaptive mathematical framework. The upgrade spans all pipeline stages — signal scoring, trend assembly, macro impact, competitive signals, trend projection, and recommendation generation — while preserving the existing `WeightedSignal` abstraction, three-layer architecture, database schema, and dataclass interfaces. + +The core transformation replaces: +- **Binary confidence gate** → smooth sigmoid transition +- **Weighted sentiment average** → Bayesian log-likelihood accumulation with Beta posterior +- **Fixed recency decay** → adaptive event-specific half-lives +- **Linear macro exposure** → multiplicative compounding exposure +- **Additive macro integration** → conditional multiplicative modifiers +- **Simple contradiction ratio** → weighted disagreement entropy +- **Heuristic trend confidence** → Bayesian posterior variance +- **Threshold-based direction** → entropy-based mixed signal detection +- **Simple momentum** → exponentially weighted momentum with volatility scaling +- **Confidence/strength gates** → expected value recommendation gate +- **Fixed relationship transfer** → graph-distance attenuated competitive signals + +All changes are gated behind a `probabilistic_scoring_enabled` feature flag in `risk_configs`, allowing incremental rollout with instant rollback. New outputs (P_bull, α, β, entropy, regime, EV) are stored in existing JSONB columns — no database migrations required. + +### Design Rationale + +Markets are fundamentally probabilistic and regime-dependent. The current pipeline collapses rich evidence into binary sentiment labels and fixed-weight averages, losing uncertainty structure. A Bayesian framework preserves the full posterior distribution, enabling the system to distinguish between "strongly bullish" and "weakly bullish with high uncertainty" — a distinction that directly impacts position sizing and risk management. + +The regime detector adapts scoring thresholds to market conditions (panic vs. trending vs. mean-reverting), and the expected value gate ensures recommendations only proceed when the risk-adjusted outcome is positive. Together, these changes transform the pipeline from a sentiment aggregator into a probabilistic forecasting engine. + +--- + +## Architecture + +### High-Level Pipeline Flow + +The upgraded pipeline maintains the existing three-layer architecture but introduces new computation stages within each layer. The feature flag controls which computation path is taken at each stage. + +```mermaid +flowchart TD + subgraph "Layer 1: Company Signals" + A[Document Intelligence Records] --> B[Signal Scorer] + B --> |"probabilistic=false"| C1[Binary Gate + Fixed Decay] + B --> |"probabilistic=true"| C2[Sigmoid Gate + Adaptive Decay
+ Info Gain + Source Accuracy] + C1 --> D[WeightedSignal list] + C2 --> D + end + + subgraph "Layer 2: Macro Signals" + E[Global Events] --> F[Macro Scorer] + F --> |"probabilistic=false"| G1[Linear Weighted Sum] + F --> |"probabilistic=true"| G2[Multiplicative Exposure] + G1 --> H[Macro WeightedSignals] + G2 --> H + end + + subgraph "Layer 3: Competitive Signals" + I[Pattern Matcher] --> J[Signal Propagation] + J --> |"probabilistic=false"| K1[Flat Transfer Strength] + J --> |"probabilistic=true"| K2[Graph-Distance Attenuation] + K1 --> L[Competitive WeightedSignals] + K2 --> L + end + + subgraph "Regime Detection (new)" + M[Market Data] --> N[Regime Detector] + N --> O{Regime Classification} + O --> P[trend-following / panic / mean-reversion / uncertainty] + end + + subgraph "Trend Assembly" + D --> Q[Merge Signals] + H --> |"probabilistic=false"| Q + H --> |"probabilistic=true"| R[Conditional Macro Modifier] + R --> Q + L --> Q + Q --> S[Trend Assembler] + S --> |"probabilistic=false"| T1[Heuristic Confidence + Threshold Direction] + S --> |"probabilistic=true"| T2[Bayesian Posterior + Entropy Direction
+ Regime-Adjusted Thresholds] + P --> T2 + T1 --> U[TrendSummary] + T2 --> U + end + + subgraph "Projection" + U --> V[Projection Engine] + V --> |"probabilistic=false"| W1[Simple Momentum] + V --> |"probabilistic=true"| W2[EW Momentum + Vol Scaling] + W1 --> X[TrendProjection] + W2 --> X + end + + subgraph "Recommendation" + U --> Y[Recommendation Engine] + X --> Y + Y --> |"probabilistic=false"| Z1[Confidence + Strength Gates] + Y --> |"probabilistic=true"| Z2[EV Gate + Existing Gates] + Z1 --> AA[Recommendation] + Z2 --> AA + end +``` + +### Feature Flag Control Flow + +The feature flag `probabilistic_scoring_enabled` is read from the `risk_configs` table's `config` JSONB column at the start of each aggregation cycle. It propagates through all pipeline stages via the existing `AggregationConfig` dataclass. + +```mermaid +sequenceDiagram + participant W as Worker (aggregate_company) + participant DB as PostgreSQL (risk_configs) + participant S as Signal Scorer + participant T as Trend Assembler + participant R as Recommendation Engine + + W->>DB: SELECT config FROM risk_configs WHERE active=TRUE + DB-->>W: {"macro_enabled": true, "competitive_enabled": true, "probabilistic_scoring_enabled": false} + W->>W: Log pipeline mode (heuristic or probabilistic) + W->>S: compute_signal_weight(..., probabilistic=flag) + S-->>W: WeightedSignal (with or without Bayesian fields) + W->>T: assemble_trend_summary(..., probabilistic=flag) + T-->>W: TrendSummary (with or without entropy/regime) + W->>R: evaluate_eligibility(..., probabilistic=flag) + R-->>W: Recommendation (with or without EV gate) +``` + +--- + +## Components and Interfaces + +### New Modules + +| Module | File | Responsibility | +|--------|------|----------------| +| Bayesian Accumulator | `services/aggregation/bayesian.py` | Log-likelihood accumulation, Beta posterior, P_bull, Bayesian confidence | +| Regime Detector | `services/aggregation/regime.py` | EMA computation, volatility ratio, regime classification, threshold adjustment | +| Adaptive Decay | integrated into `scoring.py` | Event-specific half-life computation from impact, surprise, market reaction | +| Information Gain | integrated into `scoring.py` | Surprise weighting from event type base rates | +| Source Accuracy | `services/aggregation/source_accuracy.py` | Historical prediction accuracy tracking per source | +| Entropy Detector | integrated into `bayesian.py` | Shannon entropy for mixed signal detection | +| EV Gate | integrated into `eligibility.py` | Expected value computation for recommendation eligibility | + +### Modified Modules + +| Module | File | Changes | +|--------|------|---------| +| Signal Scorer | `services/aggregation/scoring.py` | Sigmoid gate, info gain factor, adaptive decay, regime multiplier, source accuracy factor | +| Trend Assembler | `services/aggregation/worker.py` | Bayesian confidence, entropy-based direction, regime-adjusted thresholds, entropy-based contradiction | +| Contradiction | `services/aggregation/contradiction.py` | Weighted disagreement entropy replacing minority/majority ratio | +| Macro Scorer | `services/aggregation/interpolation.py` | Multiplicative exposure formula, conditional integration mode | +| Competitive Scorer | `services/aggregation/signal_propagation.py` | Graph-distance attenuation with historical correlation | +| Projection Engine | `services/aggregation/projection.py` | Exponentially weighted momentum, volatility scaling | +| Recommendation | `services/recommendation/eligibility.py` | EV gate, P_bull-based position sizing adjustments | +| Config | `services/shared/config.py` | New probabilistic config parameters | +| Schemas | `services/shared/schemas.py` | Optional new fields on TrendSummary, Recommendation | + +### Component Interface Details + +#### 1. Bayesian Accumulator (`services/aggregation/bayesian.py`) + +```python +@dataclass(frozen=True) +class BayesianPosterior: + """Bayesian posterior state from signal accumulation.""" + p_bull: float # σ(L_t), bullish probability [0, 1] + alpha: float # Beta distribution α parameter (≥ 1.0) + beta: float # Beta distribution β parameter (≥ 1.0) + log_likelihood: float # Raw log-likelihood accumulation L_t + bayesian_confidence: float # 1 - 4αβ/(α+β)², [0, 1] + entropy: float # Shannon entropy H, [0, 1] + signal_count: int # Number of signals processed + + # Uninformative prior (no evidence) + PRIOR = BayesianPosterior( + p_bull=0.5, alpha=1.0, beta=1.0, + log_likelihood=0.0, bayesian_confidence=0.0, + entropy=1.0, signal_count=0, + ) + + +def compute_bayesian_posterior( + signals: list[WeightedSignal], +) -> BayesianPosterior: + """Accumulate weighted signals into a Bayesian posterior. + + Computes: + - Log-likelihood: L_t = Σ(w_i · s_i) + - Bullish probability: P_bull = σ(L_t) + - Beta posterior: α = 1 + W_bull, β = 1 + W_bear + - Bayesian confidence: C = 1 - 4αβ/(α+β)² + - Shannon entropy: H = -p·log₂(p) - (1-p)·log₂(1-p) + """ + ... + + +def compute_entropy(p_bull: float) -> float: + """Shannon entropy H = -p·log₂(p) - (1-p)·log₂(1-p). + + Returns value in [0, 1]. Maximum at p=0.5, zero at p=0 or p=1. + Handles edge cases p=0 and p=1 by returning 0.0. + """ + ... +``` + +#### 2. Regime Detector (`services/aggregation/regime.py`) + +```python +class MarketRegime(str, Enum): + TREND_FOLLOWING = "trend_following" + PANIC = "panic" + MEAN_REVERSION = "mean_reversion" + UNCERTAINTY = "uncertainty" + + +@dataclass(frozen=True) +class RegimeClassification: + """Result of regime detection for a ticker.""" + regime: MarketRegime + trend_indicator: float # R = sign(EMA_20 - EMA_100) + volatility_ratio: float # V_r = σ_20 / σ_100 + bullish_threshold: float # Adjusted ±threshold for direction + bearish_threshold: float + contradiction_penalty_multiplier: float # 0.4 default, 0.6 for uncertainty + + +@dataclass(frozen=True) +class RegimeConfig: + ema_short_period: int = 20 + ema_long_period: int = 100 + vol_short_period: int = 20 + vol_long_period: int = 100 + panic_vol_ratio: float = 1.5 + trend_vol_ratio: float = 1.2 + mean_reversion_vol_ratio: float = 1.0 + default_threshold: float = 0.15 + panic_threshold: float = 0.10 + mean_reversion_threshold: float = 0.20 + uncertainty_contradiction_multiplier: float = 0.6 + + +def classify_regime( + closing_prices: list[float], + returns: list[float], + config: RegimeConfig = RegimeConfig(), +) -> RegimeClassification: + """Classify market regime from price and return history. + + Requires at least 100 days of price history for EMA_100. + Falls back to UNCERTAINTY when data is insufficient. + """ + ... + + +def compute_ema(values: list[float], period: int) -> float: + """Compute exponential moving average over the last `period` values.""" + ... +``` + +#### 3. Source Accuracy Tracker (`services/aggregation/source_accuracy.py`) + +```python +@dataclass +class SourceAccuracy: + """Per-source historical prediction accuracy.""" + source_id: str + accuracy_ratio: float # [0, 1] fraction of correct directional calls + sample_count: int # Number of signals with known outcomes + last_updated: datetime + + @property + def accuracy_factor(self) -> float: + """Multiplicative factor for credibility weight. + + Returns 1.0 (neutral) when sample_count < 10. + Otherwise scales linearly from 0.5 (0% accuracy) to 1.5 (100% accuracy). + """ + if self.sample_count < 10: + return 1.0 + return 0.5 + self.accuracy_ratio + + +async def fetch_source_accuracy( + pool: asyncpg.Pool, + source_ids: list[str], +) -> dict[str, SourceAccuracy]: + """Fetch accuracy metrics for a batch of sources.""" + ... + + +async def update_source_accuracy( + pool: asyncpg.Pool, + source_id: str, + realized_outcomes: list[tuple[str, float]], # (predicted_direction, actual_7d_return) +) -> None: + """Update accuracy metrics for a source based on realized price data.""" + ... +``` + +#### 4. Extended ScoringConfig + +New fields added to the existing `ScoringConfig` dataclass in `scoring.py`: + +```python +@dataclass(frozen=True) +class ScoringConfig: + # ... existing fields preserved ... + + # Probabilistic scoring toggle (mirrors feature flag for local use) + probabilistic: bool = False + + # Sigmoid gate parameters + sigmoid_steepness: float = 5.0 # k in σ(k·(x - midpoint)) + sigmoid_midpoint: float = 0.5 # midpoint of sigmoid transition + + # Information gain parameters + info_gain_lambda: float = 0.3 # scaling parameter λ + info_gain_max: float = 3.0 # maximum clamp for info gain factor + default_base_rate: float = 0.1 # fallback when event type rate unknown + + # Adaptive decay parameters (β scaling factors) + adaptive_decay_impact_scale: float = 1.0 # max β_impact + adaptive_decay_surprise_scale: float = 1.0 # max β_surprise at r=3.0 + adaptive_decay_market_scale: float = 0.5 # max β_market_reaction + + # Regime multiplier parameters + regime_return_weight: float = 0.15 # coefficient for |z_r| + regime_volume_weight: float = 0.10 # coefficient for |z_v| + regime_multiplier_max: float = 2.5 # M_regime ceiling +``` + +#### 5. Extended WeightedSignal + +The existing `WeightedSignal` dataclass gains optional fields: + +```python +@dataclass +class WeightedSignal: + """A document intelligence reference paired with its computed weight.""" + document_id: str + weight: SignalWeight + sentiment_value: float + impact_score: float + + # New optional fields for probabilistic mode + info_gain_factor: float = 1.0 # r = 1 + λ·(-log₂ P(event_type)) + source_accuracy_factor: float = 1.0 # [0.5, 1.5] from historical accuracy + adaptive_half_life: float | None = None # τ_i when adaptive decay is active +``` + +#### 6. Extended SignalWeight + +```python +@dataclass +class SignalWeight: + """Breakdown of a document's aggregation weight.""" + recency: float + credibility: float + novelty_bonus: float + confidence_gate: float + market_ctx_multiplier: float + combined: float + + # New optional fields for probabilistic mode + sigmoid_gate: float | None = None # Smooth gate value [0, 1] + info_gain_factor: float = 1.0 # Surprise multiplier + source_accuracy_factor: float = 1.0 # Historical accuracy multiplier + regime_multiplier: float | None = None # M_regime replacing M_context +``` + +#### 7. Extended TrendSummary + +New optional fields on the existing Pydantic model: + +```python +class TrendSummary(BaseModel): + # ... all existing fields preserved ... + + # New optional fields for probabilistic mode + p_bull: float | None = None # Bayesian bullish probability + alpha: float | None = None # Beta posterior α + beta_param: float | None = None # Beta posterior β (named to avoid shadowing) + bayesian_confidence: float | None = None # 1 - 4αβ/(α+β)² + entropy: float | None = None # Shannon entropy H + regime: str | None = None # Market regime classification + pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic" +``` + +#### 8. Extended Recommendation + +```python +class Recommendation(BaseModel): + # ... all existing fields preserved ... + + # New optional fields for probabilistic mode + expected_value: float | None = None # EV = P_bull·R_up - P_bear·R_down + p_bull: float | None = None # Bayesian bullish probability used + pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic" +``` + +--- + +## Data Models + +### Database Storage Strategy + +All new mathematical outputs are stored in existing JSONB columns. No new database migrations are required. + +#### trend_windows table + +The `market_context` JSONB column (currently stores volatility/volume data) is extended to include probabilistic outputs: + +```json +{ + "volatility": 1.23, + "volume_change_pct": 45.2, + "price_change_pct": -2.1, + "probabilistic": { + "p_bull": 0.72, + "alpha": 8.3, + "beta": 3.1, + "log_likelihood": 0.94, + "bayesian_confidence": 0.61, + "entropy": 0.42, + "regime": "trend_following", + "regime_volatility_ratio": 0.85, + "pipeline_mode": "probabilistic", + "contradiction_entropy": 0.31, + "macro_modifier": 1.15 + } +} +``` + +#### recommendations table + +The existing `invalidation_conditions` JSONB column stores recommendation-level data. The new EV and probabilistic fields are stored in a new key within the existing decision trace flow. Since recommendations don't have a dedicated metadata JSONB column, we add the probabilistic fields to the thesis text and store structured data in the `risk_checks` JSONB column of the `recommendation_evaluations` table: + +```json +{ + "ev": 0.0082, + "p_bull": 0.72, + "r_up": 0.034, + "r_down": 0.012, + "pipeline_mode": "probabilistic", + "ev_threshold": 0.005 +} +``` + +#### risk_configs table + +The `config` JSONB column gains the new feature flag: + +```json +{ + "macro_enabled": true, + "competitive_enabled": true, + "probabilistic_scoring_enabled": false +} +``` + +#### source_accuracy table (new — Requirement 4) + +This is the one new database table required, stored via a migration: + +```sql +CREATE TABLE IF NOT EXISTS source_accuracy ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + source_id VARCHAR(200) NOT NULL, + accuracy_ratio FLOAT NOT NULL DEFAULT 0.5, + sample_count INTEGER NOT NULL DEFAULT 0, + last_updated TIMESTAMPTZ NOT NULL DEFAULT NOW(), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(source_id) +); +CREATE INDEX idx_source_accuracy_source ON source_accuracy(source_id); +``` + +Note: This is the only schema addition. All other new outputs use existing JSONB columns. + +### Event Type Base Rates + +Information gain computation requires empirical base rates for event types. These are stored as a configuration constant (not in the database) and can be tuned over time: + +```python +EVENT_TYPE_BASE_RATES: dict[str, float] = { + "earnings": 0.25, # Quarterly, common + "product_launch": 0.10, # Moderately rare + "regulatory": 0.08, # Somewhat rare + "legal": 0.05, # Rare + "m_and_a": 0.03, # Very rare + "management_change": 0.06, + "partnership": 0.12, + "market_expansion": 0.09, + "restructuring": 0.04, + "dividend": 0.15, +} +DEFAULT_BASE_RATE = 0.1 # For unknown event types +``` + +### Configuration Hierarchy + +``` +risk_configs.config (DB, runtime) + └── probabilistic_scoring_enabled: bool + └── AggregationConfig.probabilistic: bool (in-memory) + └── ScoringConfig.probabilistic: bool (per-cycle) + ├── scoring.py: sigmoid vs binary gate + ├── scoring.py: adaptive vs fixed decay + ├── scoring.py: info gain factor + ├── scoring.py: regime multiplier vs market context + ├── worker.py: Bayesian vs heuristic confidence + ├── worker.py: entropy vs threshold direction + ├── contradiction.py: entropy vs ratio + ├── interpolation.py: multiplicative vs linear + ├── signal_propagation.py: graph-distance vs flat + ├── projection.py: EW momentum vs simple + └── eligibility.py: EV gate vs threshold-only +``` + + +--- + +## Correctness Properties + +*A property is a characteristic or behavior that should hold true across all valid executions of a system — essentially, a formal statement about what the system should do. Properties serve as the bridge between human-readable specifications and machine-verifiable correctness guarantees.* + +The following properties were derived from the acceptance criteria through systematic prework analysis. Each property is universally quantified and maps to specific requirements. Redundant properties were consolidated during reflection (e.g., requirements 17.1–17.7 duplicate properties already stated in requirements 1–15). + +### Property 1: Sigmoid Gate Monotonicity + +*For any* two extraction confidence values x₁, x₂ ∈ [0.0, 1.0] where x₁ ≤ x₂, the sigmoid gate σ(5·(x₁ - 0.5)) SHALL be less than or equal to σ(5·(x₂ - 0.5)). Higher confidence always produces equal or higher gate values. + +**Validates: Requirements 2.6, 17.1** + +### Property 2: Beta Posterior Evidence Accumulation + +*For any* sequence of weighted signal sets where each successive set contains one additional signal, the sum α + β of the Beta posterior parameters SHALL increase monotonically. Evidence always accumulates — adding a signal never reduces the total evidence mass. + +**Validates: Requirements 1.3, 17.2** + +### Property 3: Bayesian Confidence Symmetry and Divergence + +*For any* Beta posterior with parameters α, β ≥ 1.0, the Bayesian confidence C = 1 - 4αβ/(α+β)² SHALL equal 0.0 when α = β (maximum uncertainty) and SHALL increase monotonically as the ratio max(α/β, β/α) increases. Confidence reflects evidence concentration, not evidence volume. + +**Validates: Requirements 1.4, 17.3** + +### Property 4: Bayesian Posterior Round-Trip Consistency + +*For any* set of weighted signals with uniform weights, computing the Beta posterior and extracting the mean P_bull = α/(α+β) SHALL produce a value within 0.05 of σ(L_t) where L_t is the log-likelihood accumulation. The two probabilistic representations are consistent. + +**Validates: Requirements 1.7, 17.7** + +### Property 5: Adaptive Decay Lower Bound + +*For any* valid combination of impact_score ∈ [0, 1], information gain factor r ∈ [1.0, 3.0], and market context multiplier ∈ [1.0, 1.45], the adaptive half-life τ_i SHALL be greater than or equal to the base half-life τ_base. Adaptive decay is always slower or equal to fixed decay, never faster. + +**Validates: Requirements 5.7, 17.4** + +### Property 6: Information Gain Monotonicity + +*For any* two event type base rates p₁, p₂ ∈ (0, 1] where p₁ < p₂, the information gain factor r(p₁) SHALL be greater than or equal to r(p₂). Rarer events always receive higher surprise weight. + +**Validates: Requirements 3.5** + +### Property 7: Multiplicative Macro Exposure Monotonicity + +*For any* overlap configuration (O_geo, O_supply, O_commodity, O_sector) and any dimension k where O_k = 0, setting O_k to any positive value SHALL increase the total macro impact score. Multi-dimensional exposure always compounds — it never reduces impact. + +**Validates: Requirements 10.7, 17.5** + +### Property 8: Shannon Entropy Range and Maximum + +*For any* bullish probability P_bull ∈ (0, 1), the Shannon entropy H = -P_bull·log₂(P_bull) - (1-P_bull)·log₂(1-P_bull) SHALL be in the range (0, 1], with the maximum value of 1.0 occurring at P_bull = 0.5. + +**Validates: Requirements 9.7** + +### Property 9: Contradiction Entropy Monotonicity + +*For any* set of weighted signals containing both positive and negative sentiment signals, the contradiction entropy score SHALL increase monotonically as the weight distribution f_pos approaches 0.5 (equal split). More balanced disagreement always produces higher contradiction. + +**Validates: Requirements 15.7** + +### Property 10: Exponentially Weighted Momentum Direction + +*For any* sequence of monotonically increasing signed trend strengths (each ΔS_{t-k} > 0), the exponentially weighted momentum M_t SHALL be positive. Consistently strengthening bullish trends always produce positive momentum. + +**Validates: Requirements 13.6, 17.6** + +### Property 11: Competitive Signal Distance Attenuation + +*For any* source-target company pair with fixed source signal strength S_source and historical correlation ρ_historical, the transfer strength S_transfer SHALL decrease monotonically with increasing graph distance d_network. Closer competitors always receive stronger signal transfer. + +**Validates: Requirements 12.7** + +### Property 12: Expected Value Directional Consistency + +*For any* Bayesian bullish probability P_bull > 0.5 and estimated returns where R_up > R_down, the expected value EV = P_bull · R_up - (1 - P_bull) · R_down SHALL be positive. When the model is bullish and upside exceeds downside, EV is always positive. + +**Validates: Requirements 17.8** + +### Property 13: Bayesian Confidence Monotonic with Agreeing Signals + +*For any* set of weighted signals where all signals agree on direction (all positive or all negative), adding one more agreeing signal SHALL increase the Bayesian confidence C. More agreeing evidence always increases confidence. + +**Validates: Requirements 8.6** + +### Property 14: Numerical Stability Across All Formulas + +*For any* valid input combination to any formula in the probabilistic pipeline (sigmoid gate, Beta posterior, Bayesian confidence, adaptive decay, regime multiplier, Shannon entropy, multiplicative exposure, EW momentum, expected value), the output SHALL be a finite float (not NaN, not infinity) within the documented range for that formula. This includes regime multiplier M_regime ∈ [1.0, 2.5], entropy H ∈ [0, 1], P_bull ∈ [0, 1], confidence ∈ [0, 1], and M_adj ∈ [-2.0, 2.0]. + +**Validates: Requirements 17.9, 6.4** + +--- + +## Error Handling + +### Numerical Edge Cases + +| Scenario | Handling | +|----------|----------| +| P_bull = 0.0 or 1.0 (entropy undefined) | Return H = 0.0 (no uncertainty at extremes) | +| σ_20 = 0.0 (zero volatility for momentum scaling) | Use floor max(σ_20, 0.01) per Req 13.4 | +| σ_20 = 0.0 or σ_100 = 0.0 (volatility ratio) | Default to uncertainty regime | +| log₂(0) in entropy computation | Guard with `if p <= 0 or p >= 1: return 0.0` | +| log₂(0) in information gain (base_rate = 0) | Base rates must be > 0; use default 0.1 for unknown | +| Division by zero in z-score (σ = 0) | Use M_regime = 1.0 when σ = 0 | +| Empty signal list | Return uninformative prior (P_bull=0.5, α=1, β=1, C=0) | +| All neutral signals (no positive or negative) | Contradiction = 0.0, direction = neutral | +| Extremely large weights (overflow risk) | Python floats handle up to ~1.8e308; clamp combined weight if needed | +| NaN from upstream data | Validate inputs; skip signals with NaN weight or sentiment | + +### Feature Flag Failure Modes + +| Failure | Behavior | +|---------|----------| +| `risk_configs` table unreachable | Default to `probabilistic_scoring_enabled = false` (heuristic mode) | +| `config` JSONB missing the key | Default to `false` | +| Invalid value type for flag | Default to `false`, log warning | +| Flag changes mid-cycle | Flag is read once at cycle start; change takes effect next cycle | + +### Source Accuracy Failures + +| Failure | Behavior | +|---------|----------| +| `source_accuracy` table unreachable | Use neutral factor 1.0 for all sources | +| Accuracy update fails | Log error, continue with stale accuracy data | +| Corrupted accuracy data (ratio > 1.0 or < 0.0) | Clamp to [0.0, 1.0] | + +### Regime Detection Failures + +| Failure | Behavior | +|---------|----------| +| Market data unavailable | Default to uncertainty regime with default thresholds | +| Insufficient price history (< 100 days) | Default to uncertainty regime | +| Price data contains gaps | Use available data; EMA computation handles gaps gracefully | + +--- + +## Testing Strategy + +### Dual Testing Approach + +The signal math upgrade requires both property-based tests (for mathematical correctness) and example-based unit tests (for specific behaviors and integration points). Property-based testing is highly appropriate here because the feature consists primarily of pure mathematical functions with clear input/output behavior, universal properties that hold across wide input spaces, and well-defined range invariants. + +### Property-Based Testing + +**Library:** Hypothesis (already in use per `.hypothesis/` directory and project conventions) + +**Configuration:** +- Minimum 100 iterations per property: `@settings(max_examples=100)` +- File naming: `test_pbt_signal_math.py` (or split by module) +- Tag format: `# Feature: signal-math-upgrade, Property N: ` + +**Property tests to implement (one test per correctness property):** + +| Property | Test File | Key Generators | +|----------|-----------|----------------| +| 1: Sigmoid monotonicity | `test_pbt_signal_math.py` | `st.floats(0.0, 1.0)` pairs | +| 2: Evidence accumulation | `test_pbt_signal_math.py` | `st.lists(weighted_signal_strategy)` | +| 3: Confidence symmetry/divergence | `test_pbt_signal_math.py` | `st.floats(1.0, 100.0)` for α, β | +| 4: Posterior round-trip | `test_pbt_signal_math.py` | `st.lists(uniform_weight_signal_strategy)` | +| 5: Adaptive decay lower bound | `test_pbt_signal_math.py` | `st.floats` for impact, surprise, market | +| 6: Info gain monotonicity | `test_pbt_signal_math.py` | `st.floats(0.001, 1.0)` pairs | +| 7: Macro exposure monotonicity | `test_pbt_signal_math.py` | `st.floats(0.0, 1.0)` for overlaps | +| 8: Entropy range/maximum | `test_pbt_signal_math.py` | `st.floats(0.001, 0.999)` for P_bull | +| 9: Contradiction monotonicity | `test_pbt_signal_math.py` | Signal sets with varying weight splits | +| 10: EW momentum direction | `test_pbt_signal_math.py` | `st.lists(st.floats)` monotonic sequences | +| 11: Distance attenuation | `test_pbt_signal_math.py` | `st.integers(1, 3)` for distance | +| 12: EV directional consistency | `test_pbt_signal_math.py` | `st.floats(0.5, 1.0)` for P_bull | +| 13: Confidence with agreeing signals | `test_pbt_signal_math.py` | Growing lists of same-direction signals | +| 14: Numerical stability | `test_pbt_signal_math.py` | Broad `st.floats` for all formula inputs | + +### Example-Based Unit Tests + +**File:** `test_signal_math_unit.py` + +| Test Area | Examples | +|-----------|----------| +| Sigmoid gate specific values | x=0.5→0.5, x=0.2→<0.05, x=0.8→>0.95 | +| Uninformative prior | Empty signals → P_bull=0.5, α=1, β=1, C=0 | +| Default base rate | Unknown event type → base_rate=0.1 | +| Info gain clamp | Very rare event → factor ≤ 3.0 | +| Source accuracy threshold | sample_count < 10 → factor=1.0 | +| Adaptive decay edge cases | All zeros → τ_base, all max → 6×τ_base | +| Regime classification | Specific (R, V_r) → expected regime | +| Regime thresholds | panic→0.10, mean_reversion→0.20, etc. | +| Entropy direction mapping | H>0.9→mixed, P_bull>0.65→bullish, etc. | +| Zero overlap → zero impact | All overlaps zero → S_macro=0 | +| Max overlap value | All overlaps 1.0 → ≈severity×0.724 | +| Macro fallback behaviors | Only macro → additive, only company → no modifier | +| Graph distance cutoff | d>3 → no propagation | +| Momentum fallback | <2 cycles → heuristic fallback | +| EV threshold behavior | EV>0.005→proceed, EV≤0.005→informational | +| Feature flag behaviors | flag=false→heuristic, flag=true→probabilistic | +| Heuristic equivalence | flag=false produces identical outputs to current system | + +### Integration Tests + +| Test Area | Scope | +|-----------|-------| +| Source accuracy persistence | Write/read from source_accuracy table | +| Regime persistence | Store/retrieve regime in JSONB | +| EV persistence | Store/retrieve EV in recommendation_evaluations | +| Feature flag reading | Read probabilistic_scoring_enabled from risk_configs | +| End-to-end pipeline | Full aggregation cycle with probabilistic=true | + +### Test Organization + +``` +tests/ +├── test_pbt_signal_math.py # All 14 property-based tests +├── test_signal_math_unit.py # Example-based unit tests +├── test_bayesian.py # Bayesian accumulator unit tests +├── test_regime.py # Regime detector unit tests +├── test_source_accuracy.py # Source accuracy tracker tests +└── test_signal_math_integration.py # Integration tests (DB required) +``` diff --git a/.kiro/specs/signal-math-upgrade/requirements.md b/.kiro/specs/signal-math-upgrade/requirements.md new file mode 100644 index 0000000..57a74ed --- /dev/null +++ b/.kiro/specs/signal-math-upgrade/requirements.md @@ -0,0 +1,293 @@ +# Requirements Document — Signal Math Upgrade + +## Introduction + +The Stonks Oracle platform uses a three-layer signal aggregation engine (company-specific, macro, competitive) to produce market intelligence and drive paper-trading decisions. The current mathematical models are structurally too deterministic and too linear for a market system that is fundamentally probabilistic, regime-dependent, and nonlinear. The pipeline behaves as weighted sentiment aggregation with heuristics rather than a probabilistic forecasting engine. + +This feature upgrades the signal processing mathematics across all pipeline stages — from signal scoring through trend assembly, macro impact, competitive signals, trend projection, and recommendation generation — to replace heuristic formulas with probabilistic, regime-aware, and adaptive alternatives. The goal is to transform prediction quality while preserving the existing `WeightedSignal` abstraction, three-layer architecture, and database schema compatibility. + +## Glossary + +- **Aggregation_Engine**: The core pipeline in `services/aggregation/worker.py` that merges signals from all three layers and computes `TrendSummary` objects across five time windows. +- **Signal_Scorer**: The scoring module in `services/aggregation/scoring.py` that transforms raw intelligence records into `WeightedSignal` objects with composite aggregation weights. +- **Trend_Assembler**: The component in `services/aggregation/worker.py` that derives trend direction, strength, confidence, and contradiction from merged weighted signals. +- **Macro_Scorer**: The macro impact scoring module in `services/aggregation/interpolation.py` that computes per-company impact from global events using overlap-based exposure profiles. +- **Competitive_Scorer**: The competitive signal modules in `services/aggregation/pattern_matcher.py` and `services/aggregation/signal_propagation.py` that mine historical patterns and propagate cross-company signals. +- **Projection_Engine**: The trend projection module in `services/aggregation/projection.py` that computes forward-looking trend estimates from momentum and macro decay. +- **Recommendation_Engine**: The recommendation pipeline in `services/recommendation/` that translates trend assessments into actionable buy/sell/hold/watch decisions with position sizing. +- **WeightedSignal**: The core data abstraction pairing a document reference with a composite aggregation weight, sentiment value, and impact score. +- **Beta_Distribution**: A probability distribution on [0, 1] parameterized by α and β, used to model the posterior probability of bullish vs bearish sentiment. +- **Regime_Detector**: A new component that classifies the current market regime (trend-following, panic, mean-reversion, uncertainty) from price and volume statistics. +- **Sigmoid_Function**: The logistic function σ(x) = 1/(1+e^(-x)) used to convert log-likelihood accumulations into probabilities. +- **Adaptive_Decay**: A recency decay mechanism where the half-life varies per signal based on event impact, surprise, and market reaction rather than using a fixed constant per window. +- **Information_Gain**: A measure of how surprising an event is relative to its base rate, computed as -log P(event_type), used to weight novel signals more heavily. +- **Entropy**: Shannon entropy H = -p·log(p) - (1-p)·log(1-p), used to detect mixed sentiment states where the probability distribution is spread rather than concentrated. +- **EMA**: Exponential Moving Average, a weighted moving average giving more weight to recent observations, used for trend and volatility regime detection. + +--- + +## Requirements + +### Requirement 1: Probabilistic Sentiment Accumulation via Bayesian Evidence + +**User Story:** As a quantitative analyst, I want the signal scoring layer to accumulate sentiment evidence probabilistically using Bayesian methods, so that the system captures uncertainty structure instead of collapsing sentiment into binary ±1 labels. + +#### Acceptance Criteria + +1. WHEN a set of weighted signals is provided for a ticker and window, THE Signal_Scorer SHALL compute a log-likelihood accumulation L_t = Σ(w_i · s_i) where w_i is the combined signal weight and s_i is the sentiment value. +2. WHEN the log-likelihood L_t has been computed, THE Signal_Scorer SHALL convert the accumulation to a bullish probability using the Sigmoid_Function: P_bull = σ(L_t) = 1/(1+e^(-L_t)). +3. WHEN weighted signals are provided, THE Signal_Scorer SHALL maintain a Beta_Distribution posterior with parameters α_t = α_0 + W_bull and β_t = β_0 + W_bear, where W_bull is the sum of combined weights for positive signals and W_bear is the sum for negative signals, and α_0 = β_0 = 1.0 as uninformative priors. +4. THE Signal_Scorer SHALL compute Bayesian confidence from the Beta_Distribution posterior variance as C = 1 - 4αβ/(α+β)², where C ranges from 0.0 (maximum uncertainty at α=β) to approaching 1.0 (strong evidence concentration). +5. WHEN no signals exist for a ticker and window, THE Signal_Scorer SHALL return P_bull = 0.5, α = 1.0, β = 1.0, and C = 0.0, representing the uninformative prior state. +6. THE Signal_Scorer SHALL preserve the existing `WeightedSignal` dataclass interface, adding the Bayesian posterior fields (P_bull, α, β, Bayesian confidence) as additional output alongside the existing weighted sentiment average. +7. FOR ALL valid sets of weighted signals, computing the Beta posterior then extracting P_bull SHALL produce a value within 0.05 of σ(L_t) when signal weights are uniform (round-trip consistency between the two probabilistic representations). + +--- + +### Requirement 2: Sigmoid Confidence Gate Replacing Binary Gate + +**User Story:** As a quantitative analyst, I want the binary confidence gate replaced with a smooth sigmoid transition, so that marginally confident signals contribute proportionally rather than being completely discarded or fully included. + +#### Acceptance Criteria + +1. WHEN a document signal has extraction confidence x, THE Signal_Scorer SHALL compute a soft gate value p = σ(5·(x - 0.5)) = 1/(1+e^(-5·(x-0.5))) instead of the current binary 0/1 gate. +2. WHEN extraction confidence is 0.5, THE Signal_Scorer SHALL produce a gate value of 0.5 (the sigmoid midpoint). +3. WHEN extraction confidence is below 0.2, THE Signal_Scorer SHALL produce a gate value below 0.05, preserving near-zero weight for very low confidence signals. +4. WHEN extraction confidence is above 0.8, THE Signal_Scorer SHALL produce a gate value above 0.95, preserving near-full weight for high confidence signals. +5. THE Signal_Scorer SHALL use the sigmoid gate value as a multiplicative factor in the combined weight formula in place of the current binary G_conf. +6. FOR ALL extraction confidence values in [0.0, 1.0], THE Signal_Scorer SHALL produce gate values that are monotonically increasing (higher confidence always produces equal or higher gate values). + +--- + +### Requirement 3: Information Gain Surprise Weighting + +**User Story:** As a quantitative analyst, I want signals weighted by their information gain (surprise factor), so that rare and unexpected events receive proportionally higher influence than routine signals. + +#### Acceptance Criteria + +1. WHEN a signal has a known event type (e.g., earnings, product_launch, regulatory, legal, m_and_a), THE Signal_Scorer SHALL compute an information gain factor r = 1 + λ·(-log₂ P(event_type)), where P(event_type) is the empirical base rate of that event type and λ is a configurable scaling parameter with default 0.3. +2. WHEN the event type base rate is not available, THE Signal_Scorer SHALL use a default base rate of 0.1 (treating the event as moderately rare). +3. THE Signal_Scorer SHALL multiply the information gain factor r into the combined weight formula as an additional multiplicative component. +4. THE Signal_Scorer SHALL clamp the information gain factor to a maximum of 3.0 to prevent extremely rare events from dominating the aggregation. +5. FOR ALL event types with base rate in (0, 1], THE Signal_Scorer SHALL produce information gain factors that are monotonically decreasing with increasing base rate (rarer events always receive higher surprise weight). + +--- + +### Requirement 4: Historical Source Accuracy Tracking + +**User Story:** As a quantitative analyst, I want source credibility to incorporate historical prediction accuracy, so that sources with a track record of correct directional calls receive higher weight. + +#### Acceptance Criteria + +1. THE Signal_Scorer SHALL maintain a per-source accuracy metric computed as the fraction of past signals from that source where the predicted direction matched the subsequent 7-day price movement direction. +2. WHEN a source has at least 10 historical signals with known outcomes, THE Signal_Scorer SHALL incorporate the source accuracy as a multiplicative factor on the credibility weight, scaled linearly from 0.5 (0% accuracy) to 1.5 (100% accuracy). +3. WHEN a source has fewer than 10 historical signals, THE Signal_Scorer SHALL use a neutral accuracy factor of 1.0 (no adjustment). +4. THE Signal_Scorer SHALL update source accuracy metrics asynchronously after each aggregation cycle, using realized price data from the market data tables. +5. THE Signal_Scorer SHALL store source accuracy metrics in a database table with columns for source identifier, accuracy ratio, sample count, and last updated timestamp. + +--- + +### Requirement 5: Adaptive Recency Decay with Event-Specific Half-Lives + +**User Story:** As a quantitative analyst, I want recency decay half-lives to adapt based on event characteristics, so that high-impact events persist longer in the aggregation while routine signals decay faster. + +#### Acceptance Criteria + +1. WHEN computing recency decay for a signal, THE Signal_Scorer SHALL use an adaptive half-life τ_i = τ_base · (1 + β_impact) · (1 + β_surprise) · (1 + β_market_reaction), where τ_base is the current fixed half-life for the window. +2. THE Signal_Scorer SHALL compute β_impact from the signal's impact score, scaled linearly from 0.0 (impact_score = 0) to 1.0 (impact_score = 1.0). +3. THE Signal_Scorer SHALL compute β_surprise from the information gain factor (Requirement 3), scaled linearly from 0.0 (r = 1.0, no surprise) to 1.0 (r = 3.0, maximum surprise). +4. THE Signal_Scorer SHALL compute β_market_reaction from the market context multiplier, scaled linearly from 0.0 (multiplier = 1.0, no market reaction) to 0.5 (multiplier = 1.45, maximum market reaction). +5. WHEN all three β factors are at their maximum, THE Signal_Scorer SHALL produce an adaptive half-life of at most 6× the base half-life (τ_base · 2.0 · 2.0 · 1.5 = 6.0 · τ_base). +6. WHEN all three β factors are zero (routine, unsurprising signal in calm market), THE Signal_Scorer SHALL produce the same half-life as the current fixed system (τ_base). +7. FOR ALL combinations of impact, surprise, and market reaction values, THE Signal_Scorer SHALL produce adaptive half-lives that are greater than or equal to τ_base (adaptive decay is always slower or equal to the base decay, never faster). + +--- + +### Requirement 6: Volatility-Adjusted Normalization (Regime-Aware Scoring) + +**User Story:** As a quantitative analyst, I want signal weights normalized by current market volatility and volume conditions, so that the same signal magnitude is interpreted differently in calm vs volatile markets. + +#### Acceptance Criteria + +1. WHEN market data is available for a ticker, THE Signal_Scorer SHALL compute a return z-score z_r = (r_t - μ_20) / σ_20, where r_t is the current return, μ_20 is the 20-day mean return, and σ_20 is the 20-day return standard deviation. +2. WHEN market data is available for a ticker, THE Signal_Scorer SHALL compute a volume z-score z_v = (log(V_t) - μ_V) / σ_V, where V_t is the current volume, μ_V is the 20-day mean of log-volume, and σ_V is the 20-day standard deviation of log-volume. +3. THE Signal_Scorer SHALL compute a regime multiplier M_regime = 1 + 0.15·|z_r| + 0.10·|z_v|, which amplifies signal weights during abnormal market conditions. +4. THE Signal_Scorer SHALL clamp M_regime to the range [1.0, 2.5] to prevent extreme z-scores from producing runaway weight amplification. +5. WHEN market data is not available for a ticker, THE Signal_Scorer SHALL use M_regime = 1.0 (no regime adjustment). +6. THE Signal_Scorer SHALL replace the current market context multiplier (M_context) with M_regime in the combined weight formula. + +--- + +### Requirement 7: Regime Detection and Classification + +**User Story:** As a quantitative analyst, I want the system to detect and classify the current market regime for each ticker, so that scoring thresholds and behavior adapt to whether the market is trending, panicking, mean-reverting, or uncertain. + +#### Acceptance Criteria + +1. WHEN market data is available, THE Regime_Detector SHALL compute a trend indicator R = sign(EMA_20 - EMA_100), where EMA_20 and EMA_100 are exponential moving averages of closing prices over 20 and 100 days respectively. +2. WHEN market data is available, THE Regime_Detector SHALL compute a volatility ratio V_r = σ_20 / σ_100, where σ_20 and σ_100 are the 20-day and 100-day return standard deviations. +3. THE Regime_Detector SHALL classify the market regime into one of four categories based on R and V_r: trend-following (R ≠ 0 AND V_r < 1.2), panic (V_r > 1.5), mean-reversion (R = 0 AND V_r < 1.0), uncertainty (all other cases). +4. WHEN the regime is classified as panic, THE Aggregation_Engine SHALL reduce the bullish/bearish threshold from ±0.15 to ±0.10 (making the system more sensitive to directional signals during high-volatility periods). +5. WHEN the regime is classified as mean-reversion, THE Aggregation_Engine SHALL increase the bullish/bearish threshold from ±0.15 to ±0.20 (requiring stronger evidence for directional calls in range-bound markets). +6. WHEN the regime is classified as trend-following, THE Aggregation_Engine SHALL use the default thresholds of ±0.15. +7. WHEN the regime is classified as uncertainty, THE Aggregation_Engine SHALL use the default thresholds of ±0.15 and increase the contradiction penalty multiplier from 0.4 to 0.6. +8. THE Regime_Detector SHALL persist the current regime classification per ticker to the database for auditability and dashboard display. +9. WHEN market data is insufficient to compute EMA_100 (fewer than 100 days of price history), THE Regime_Detector SHALL default to the uncertainty regime. + +--- + +### Requirement 8: Bayesian Posterior Confidence Replacing Heuristic Confidence + +**User Story:** As a quantitative analyst, I want trend confidence derived from the Bayesian posterior distribution rather than the current heuristic weighted formula, so that confidence reflects actual evidence concentration rather than an ad-hoc combination of factors. + +#### Acceptance Criteria + +1. WHEN computing trend confidence, THE Trend_Assembler SHALL use the Bayesian confidence C = 1 - 4αβ/(α+β)² from the Beta_Distribution posterior (Requirement 1) as the primary confidence component with weight 0.5. +2. THE Trend_Assembler SHALL retain the source count factor (min(N_unique/15, 0.8)) as a secondary confidence component with weight 0.25, rewarding evidence breadth. +3. THE Trend_Assembler SHALL retain the contradiction penalty (contradiction_score × 0.4) as a confidence reduction. +4. THE Trend_Assembler SHALL compute the combined confidence as: confidence = 0.5 × C_bayesian + 0.25 × F_count + 0.25 × C_avg_credibility - P_contradiction, clamped to [0.0, 1.0]. +5. THE Trend_Assembler SHALL preserve the existing confidence thresholds for recommendation eligibility (0.35 minimum, 0.50 paper, 0.70 live) without modification. +6. FOR ALL signal sets where all signals agree on direction, THE Trend_Assembler SHALL produce Bayesian confidence that increases monotonically with the number of agreeing signals. + +--- + +### Requirement 9: Entropy-Based Mixed Signal Detection + +**User Story:** As a quantitative analyst, I want mixed trend detection based on Shannon entropy rather than simple contradiction thresholds, so that the system can distinguish between genuine uncertainty (high entropy) and weak signal (low total weight). + +#### Acceptance Criteria + +1. WHEN the bullish probability P_bull has been computed from the Bayesian posterior, THE Trend_Assembler SHALL compute Shannon entropy H = -P_bull·log₂(P_bull) - (1-P_bull)·log₂(1-P_bull). +2. WHEN H > 0.9 (entropy close to maximum of 1.0, indicating near-equal probability of bullish and bearish), THE Trend_Assembler SHALL classify the trend direction as mixed, regardless of the weighted sentiment average. +3. WHEN H ≤ 0.9 AND P_bull > 0.65, THE Trend_Assembler SHALL classify the trend direction as bullish. +4. WHEN H ≤ 0.9 AND P_bull < 0.35, THE Trend_Assembler SHALL classify the trend direction as bearish. +5. WHEN H ≤ 0.9 AND 0.35 ≤ P_bull ≤ 0.65, THE Trend_Assembler SHALL classify the trend direction as neutral. +6. THE Trend_Assembler SHALL persist the entropy value H alongside the trend summary for auditability. +7. FOR ALL P_bull values in (0, 1), THE Trend_Assembler SHALL compute entropy values in (0, 1], with maximum entropy of 1.0 occurring at P_bull = 0.5. + +--- + +### Requirement 10: Multiplicative Macro Exposure Scoring + +**User Story:** As a quantitative analyst, I want macro impact computed using multiplicative exposure rather than linear weighted sums, so that a company exposed across multiple dimensions receives compounding impact rather than simple addition. + +#### Acceptance Criteria + +1. WHEN computing macro impact for a company, THE Macro_Scorer SHALL use the multiplicative exposure formula S_macro = severity · (1 - Π_k(1 - w_k · O_k)), where O_k are the overlap components (geographic, supply chain, commodity, sector) and w_k are their respective weights. +2. THE Macro_Scorer SHALL use the following overlap weights: w_geo = 0.35, w_supply = 0.25, w_commodity = 0.25, w_sector = 0.15 (matching the current linear weight distribution). +3. WHEN a company has zero overlap across all dimensions, THE Macro_Scorer SHALL produce S_macro = 0.0 (no impact). +4. WHEN a company has maximum overlap across all dimensions (all O_k = 1.0), THE Macro_Scorer SHALL produce S_macro = severity · (1 - (1-0.35)·(1-0.25)·(1-0.25)·(1-0.15)), which is approximately severity · 0.724. +5. THE Macro_Scorer SHALL preserve the existing severity weight mapping (critical=1.0, high=0.75, moderate=0.5, low=0.25). +6. THE Macro_Scorer SHALL preserve the existing resilience modifier (R_tier) applied after the multiplicative exposure computation. +7. FOR ALL overlap configurations, THE Macro_Scorer SHALL produce impact scores where adding a non-zero overlap in any dimension increases the total impact (monotonicity property). + +--- + +### Requirement 11: Conditional Macro Signal Integration + +**User Story:** As a quantitative analyst, I want macro signals treated as conditional modifiers on company signals rather than additive contributions, so that macro context amplifies or dampens existing company-level evidence rather than independently shifting the trend. + +#### Acceptance Criteria + +1. WHEN both company signals and macro signals exist for a ticker, THE Aggregation_Engine SHALL apply macro impact as a multiplicative modifier on the company signal strength: S_adjusted = S_company · (1 + M_macro · sign_alignment), where M_macro is the normalized macro impact and sign_alignment is +1 when macro and company signals agree in direction, -1 when they disagree. +2. THE Aggregation_Engine SHALL clamp the macro modifier (1 + M_macro · sign_alignment) to the range [0.5, 1.5] to prevent macro signals from inverting or excessively amplifying company signals. +3. WHEN only macro signals exist (no company signals), THE Aggregation_Engine SHALL fall back to the current additive behavior with the existing macro weight of 0.3, preserving the macro-only suppression safety mechanism. +4. WHEN only company signals exist (macro layer disabled or no macro events), THE Aggregation_Engine SHALL use company signals without modification (modifier = 1.0). +5. THE Aggregation_Engine SHALL log the macro modifier value applied to each ticker for auditability. + +--- + +### Requirement 12: Graph-Distance Competitive Signal Attenuation + +**User Story:** As a quantitative analyst, I want competitive signal transfer attenuated by network graph distance and historical correlation, so that signals propagate more strongly to closely related competitors and decay for distant relationships. + +#### Acceptance Criteria + +1. WHEN propagating a signal from a source company to a target company, THE Competitive_Scorer SHALL compute transfer strength as S_transfer = S_source · ρ_historical · e^(-d_network), where S_source is the source signal strength, ρ_historical is the historical price correlation between the two companies, and d_network is the graph distance in the competitor relationship network. +2. THE Competitive_Scorer SHALL compute graph distance d_network as the shortest path length in the competitor relationship graph, where direct competitors have distance 1, competitors-of-competitors have distance 2, and so on. +3. WHEN the graph distance exceeds 3, THE Competitive_Scorer SHALL not propagate the signal (e^(-3) ≈ 0.05, below meaningful contribution). +4. THE Competitive_Scorer SHALL compute ρ_historical as the 90-day rolling Pearson correlation of daily returns between the source and target companies. +5. WHEN historical correlation data is insufficient (fewer than 30 trading days of overlapping data), THE Competitive_Scorer SHALL use a default correlation of 0.3 for same-sector companies and 0.1 for cross-sector companies. +6. THE Competitive_Scorer SHALL preserve the existing relationship strength threshold (R_relationship ≥ 0.2) as a pre-filter before applying the graph-distance attenuation. +7. FOR ALL source-target pairs, THE Competitive_Scorer SHALL produce transfer strengths that decrease monotonically with increasing graph distance (closer competitors always receive stronger signal transfer). + +--- + +### Requirement 13: Exponentially Weighted Momentum + +**User Story:** As a quantitative analyst, I want trend momentum computed using exponentially weighted historical changes rather than a simple current-minus-previous difference, so that the momentum estimate is smoother and less sensitive to single-cycle noise. + +#### Acceptance Criteria + +1. WHEN computing trend momentum, THE Projection_Engine SHALL use an exponentially weighted sum M_t = Σ_{k=0}^{K-1} λ^k · ΔS_{t-k}, where ΔS_{t-k} is the signed strength change at lag k, λ = 0.7 is the decay factor, and K is the number of available historical cycles (up to 10). +2. THE Projection_Engine SHALL normalize the momentum by dividing by the geometric series sum Σ λ^k to produce a value in [-1, 1]. +3. WHEN fewer than 2 historical cycles are available, THE Projection_Engine SHALL fall back to the current heuristic (momentum = direction_sign × strength × 0.5). +4. THE Projection_Engine SHALL compute volatility-scaled momentum M_adj = M_t / max(σ_20, 0.01), where σ_20 is the 20-day return standard deviation, to normalize momentum relative to the ticker's typical price movement. +5. THE Projection_Engine SHALL clamp M_adj to [-2.0, 2.0] to prevent division by very small σ_20 from producing extreme values. +6. FOR ALL sequences of monotonically increasing signed strengths, THE Projection_Engine SHALL produce positive momentum values (correctly detecting strengthening bullish trends). + +--- + +### Requirement 14: Expected Value Recommendation Gate + +**User Story:** As a quantitative analyst, I want recommendation eligibility based on expected value rather than simple confidence and strength thresholds, so that the system only recommends trades with positive risk-adjusted expected outcomes. + +#### Acceptance Criteria + +1. WHEN evaluating recommendation eligibility, THE Recommendation_Engine SHALL compute expected value EV = P_bull · R_up - P_bear · R_down, where P_bull is the Bayesian bullish probability, P_bear = 1 - P_bull, R_up is the estimated upside return, and R_down is the estimated downside return. +2. THE Recommendation_Engine SHALL estimate R_up and R_down from the trend strength and the ticker's 20-day historical volatility: R_up = strength · σ_20 · √(horizon_days) and R_down = (1 - strength) · σ_20 · √(horizon_days), where horizon_days corresponds to the trend window duration. +3. WHEN EV is positive and exceeds a configurable threshold (default 0.005, representing 0.5% expected return), THE Recommendation_Engine SHALL allow the recommendation to proceed through the existing eligibility gates. +4. WHEN EV is negative or below the threshold, THE Recommendation_Engine SHALL force the recommendation to informational mode regardless of confidence and strength. +5. THE Recommendation_Engine SHALL persist the computed EV alongside the recommendation for auditability. +6. THE Recommendation_Engine SHALL preserve all existing eligibility gates (confidence ≥ 0.35, strength ≥ 0.10, contradiction ≤ 0.60, evidence ≥ 2, direction ≠ neutral) as additional requirements beyond the EV gate. + +--- + +### Requirement 15: Contradiction Handling via Weighted Disagreement Entropy + +**User Story:** As a quantitative analyst, I want contradiction detection to use weighted disagreement entropy rather than a simple minority/majority ratio, so that the system better distinguishes between a few strong dissenting signals and many weak ones. + +#### Acceptance Criteria + +1. WHEN computing contradiction, THE Trend_Assembler SHALL compute weighted disagreement entropy using the effective weight distribution across positive and negative signal groups. +2. THE Trend_Assembler SHALL compute the positive weight fraction f_pos = W_positive / (W_positive + W_negative) and negative weight fraction f_neg = W_negative / (W_positive + W_negative), where W_positive and W_negative are the sums of effective weights (combined_weight × impact_score) for each sentiment group. +3. THE Trend_Assembler SHALL compute contradiction entropy as H_contradiction = -f_pos·log₂(f_pos) - f_neg·log₂(f_neg), normalized to [0, 1] (maximum at f_pos = f_neg = 0.5). +4. THE Trend_Assembler SHALL weight the contradiction entropy by the total evidence mass: contradiction_score = H_contradiction · min(1.0, (W_positive + W_negative) / W_threshold), where W_threshold is a configurable parameter (default 5.0) representing the evidence mass at which contradiction becomes fully significant. +5. WHEN only positive or only negative signals exist (no disagreement), THE Trend_Assembler SHALL produce a contradiction score of 0.0. +6. THE Trend_Assembler SHALL preserve the existing `ContradictionResult` interface, populating the overall score with the entropy-based value and retaining the `DisagreementDetail` objects for catalyst-level analysis. +7. FOR ALL signal sets with both positive and negative signals, THE Trend_Assembler SHALL produce contradiction scores that increase monotonically as the weight distribution approaches equal split (f_pos → 0.5). + +--- + +### Requirement 16: Backward Compatibility and Migration + +**User Story:** As a platform operator, I want the mathematical upgrades to be backward-compatible with the existing database schema and deployable incrementally, so that the upgrade does not require downtime or data migration. + +#### Acceptance Criteria + +1. THE Aggregation_Engine SHALL preserve the existing `WeightedSignal`, `SignalWeight`, `TrendSummary`, and `Recommendation` dataclass interfaces, adding new fields as optional attributes with default values. +2. THE Aggregation_Engine SHALL store new mathematical outputs (P_bull, α, β, entropy, regime, EV) in the existing JSONB metadata fields of `trend_windows` and `recommendations` tables rather than requiring new columns. +3. THE Aggregation_Engine SHALL support a feature flag `probabilistic_scoring_enabled` in `risk_configs` that toggles between the current heuristic pipeline and the new probabilistic pipeline, defaultable to `false` for safe rollout. +4. WHEN `probabilistic_scoring_enabled` is false, THE Aggregation_Engine SHALL produce identical outputs to the current system (no behavioral change). +5. WHEN `probabilistic_scoring_enabled` is true, THE Aggregation_Engine SHALL use the new Bayesian, regime-aware, and adaptive formulas for all pipeline stages. +6. IF the feature flag toggle fails to read from the database, THEN THE Aggregation_Engine SHALL default to the current heuristic pipeline (fail-safe behavior). +7. THE Aggregation_Engine SHALL log which pipeline mode (heuristic or probabilistic) is active at the start of each aggregation cycle. + +--- + +### Requirement 17: Property-Based Testing for Mathematical Correctness + +**User Story:** As a developer, I want comprehensive property-based tests validating the mathematical correctness of all new formulas, so that edge cases and numerical stability issues are caught before deployment. + +#### Acceptance Criteria + +1. THE test suite SHALL include property-based tests using Hypothesis for the sigmoid confidence gate verifying monotonicity (higher confidence input always produces higher or equal gate output) across all float inputs in [0.0, 1.0]. +2. THE test suite SHALL include property-based tests for the Beta_Distribution posterior verifying that α + β increases monotonically with the number of signals processed (evidence always accumulates). +3. THE test suite SHALL include property-based tests for the Bayesian confidence formula verifying that confidence is 0.0 when α = β (maximum uncertainty) and approaches 1.0 as the ratio α/β or β/α increases. +4. THE test suite SHALL include property-based tests for the adaptive decay verifying that the adaptive half-life is always greater than or equal to the base half-life for all valid input combinations. +5. THE test suite SHALL include property-based tests for the multiplicative macro exposure verifying monotonicity (adding non-zero overlap in any dimension increases total impact). +6. THE test suite SHALL include property-based tests for the exponentially weighted momentum verifying that monotonically increasing strength sequences produce positive momentum. +7. THE test suite SHALL include a round-trip property test verifying that computing the Beta posterior from signals, extracting P_bull, then reconstructing approximate signal weights produces values consistent with the original inputs. +8. THE test suite SHALL include property-based tests for the expected value computation verifying that EV is positive when P_bull > 0.5 and R_up > R_down (basic directional consistency). +9. THE test suite SHALL include property-based tests for numerical stability verifying that no formula produces NaN, infinity, or values outside documented ranges for any valid input combination. +10. THE test suite SHALL use `@settings(max_examples=100)` and follow the project convention of `test_pbt_*` file naming. diff --git a/.kiro/specs/signal-math-upgrade/tasks.md b/.kiro/specs/signal-math-upgrade/tasks.md new file mode 100644 index 0000000..b5053c8 --- /dev/null +++ b/.kiro/specs/signal-math-upgrade/tasks.md @@ -0,0 +1,349 @@ +# Implementation Plan: Signal Math Upgrade + +## Overview + +Upgrade the Stonks Oracle signal processing pipeline from deterministic heuristic formulas to a probabilistic, regime-aware, and adaptive mathematical framework. Implementation proceeds in layers: foundations (config, schemas, new modules), then each pipeline stage (scoring → trend assembly → macro → competitive → projection → recommendation), then integration wiring, and finally testing. All changes are gated behind the `probabilistic_scoring_enabled` feature flag. + +## Tasks + +- [ ] 1. Foundation: Configuration and schema extensions + - [x] 1.1 Extend `ScoringConfig` with probabilistic parameters in `services/aggregation/scoring.py` + - Add `probabilistic: bool = False` toggle field + - Add sigmoid gate parameters: `sigmoid_steepness`, `sigmoid_midpoint` + - Add information gain parameters: `info_gain_lambda`, `info_gain_max`, `default_base_rate` + - Add adaptive decay parameters: `adaptive_decay_impact_scale`, `adaptive_decay_surprise_scale`, `adaptive_decay_market_scale` + - Add regime multiplier parameters: `regime_return_weight`, `regime_volume_weight`, `regime_multiplier_max` + - All new fields must have defaults matching the design document values + - _Requirements: 2.5, 3.1, 5.1, 6.3, 16.1_ + + - [x] 1.2 Extend `SignalWeight` and `WeightedSignal` dataclasses in `services/aggregation/scoring.py` + - Add optional fields to `SignalWeight`: `sigmoid_gate`, `info_gain_factor`, `source_accuracy_factor`, `regime_multiplier` + - Add optional fields to `WeightedSignal`: `info_gain_factor`, `source_accuracy_factor`, `adaptive_half_life` + - All new fields must have defaults (None or 1.0) for backward compatibility + - _Requirements: 16.1, 2.5, 3.3, 4.2_ + + - [x] 1.3 Extend `TrendSummary` Pydantic model in `services/shared/schemas.py` + - Add optional fields: `p_bull`, `alpha`, `beta_param`, `bayesian_confidence`, `entropy`, `regime`, `pipeline_mode` + - `pipeline_mode` defaults to `"heuristic"`; all others default to `None` + - _Requirements: 16.1, 1.6, 9.6_ + + - [x] 1.4 Extend `Recommendation` model in `services/shared/schemas.py` (or `services/recommendation/eligibility.py`) + - Add optional fields: `expected_value`, `p_bull`, `pipeline_mode` + - `pipeline_mode` defaults to `"heuristic"`; all others default to `None` + - _Requirements: 16.1, 14.5_ + + - [x] 1.5 Add `probabilistic_scoring_enabled` feature flag support in `services/shared/config.py` + - Read `probabilistic_scoring_enabled` from `risk_configs.config` JSONB + - Default to `False` when key is missing, value is invalid, or DB is unreachable + - Propagate flag through `AggregationConfig` dataclass + - Log which pipeline mode is active at cycle start + - _Requirements: 16.3, 16.4, 16.5, 16.6, 16.7_ + + - [x] 1.6 Create database migration `infra/migrations/034_source_accuracy.sql` + - Create `source_accuracy` table with columns: `id UUID PRIMARY KEY DEFAULT gen_random_uuid()`, `source_id VARCHAR(200) NOT NULL`, `accuracy_ratio FLOAT NOT NULL DEFAULT 0.5`, `sample_count INTEGER NOT NULL DEFAULT 0`, `last_updated TIMESTAMPTZ`, `created_at TIMESTAMPTZ` + - Add `UNIQUE(source_id)` constraint and `idx_source_accuracy_source` index + - _Requirements: 4.5_ + +- [x] 2. Checkpoint — Verify foundation compiles and existing tests pass + - Ensure all tests pass, ask the user if questions arise. + +- [ ] 3. New module: Bayesian Accumulator (`services/aggregation/bayesian.py`) + - [x] 3.1 Implement `BayesianPosterior` dataclass and `compute_bayesian_posterior` function + - Create frozen dataclass with fields: `p_bull`, `alpha`, `beta`, `log_likelihood`, `bayesian_confidence`, `entropy`, `signal_count` + - Define `PRIOR` class-level constant for uninformative prior (p_bull=0.5, α=1.0, β=1.0, C=0.0, H=1.0) + - Implement log-likelihood accumulation: `L_t = Σ(w_i · s_i)` using `weight.combined * sentiment_value` + - Compute `P_bull = σ(L_t)` via sigmoid function + - Compute Beta posterior: `α = 1 + W_bull`, `β = 1 + W_bear` from positive/negative weight sums + - Compute Bayesian confidence: `C = 1 - 4αβ/(α+β)²` + - Compute Shannon entropy via `compute_entropy` + - Return `PRIOR` for empty signal lists + - Skip signals with NaN weight or sentiment + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6_ + + - [x] 3.2 Implement `compute_entropy` function + - Shannon entropy: `H = -p·log₂(p) - (1-p)·log₂(1-p)` + - Return 0.0 for p ≤ 0 or p ≥ 1 (edge cases) + - Return value in [0, 1] with maximum 1.0 at p=0.5 + - _Requirements: 9.1, 9.7_ + + - [x] 3.3 Write property test for sigmoid gate monotonicity + - **Property 1: Sigmoid Gate Monotonicity** + - **Validates: Requirements 2.6, 17.1** + + - [x] 3.4 Write property test for Beta posterior evidence accumulation + - **Property 2: Beta Posterior Evidence Accumulation** + - **Validates: Requirements 1.3, 17.2** + + - [x] 3.5 Write property test for Bayesian confidence symmetry and divergence + - **Property 3: Bayesian Confidence Symmetry and Divergence** + - **Validates: Requirements 1.4, 17.3** + + - [x] 3.6 Write property test for Bayesian posterior round-trip consistency + - **Property 4: Bayesian Posterior Round-Trip Consistency** + - **Validates: Requirements 1.7, 17.7** + + - [x] 3.7 Write property test for Shannon entropy range and maximum + - **Property 8: Shannon Entropy Range and Maximum** + - **Validates: Requirements 9.7** + + - [x] 3.8 Write property test for Bayesian confidence monotonic with agreeing signals + - **Property 13: Bayesian Confidence Monotonic with Agreeing Signals** + - **Validates: Requirements 8.6** + +- [ ] 4. New module: Regime Detector (`services/aggregation/regime.py`) + - [x] 4.1 Implement `MarketRegime` enum, `RegimeClassification` and `RegimeConfig` dataclasses + - `MarketRegime`: `TREND_FOLLOWING`, `PANIC`, `MEAN_REVERSION`, `UNCERTAINTY` + - `RegimeClassification`: `regime`, `trend_indicator`, `volatility_ratio`, `bullish_threshold`, `bearish_threshold`, `contradiction_penalty_multiplier` + - `RegimeConfig`: all configurable parameters with defaults from design + - _Requirements: 7.3_ + + - [x] 4.2 Implement `compute_ema` and `classify_regime` functions + - `compute_ema`: exponential moving average over last N values + - `classify_regime`: compute trend indicator `R = sign(EMA_20 - EMA_100)` and volatility ratio `V_r = σ_20 / σ_100` + - Classification rules: trend-following (R≠0 AND V_r<1.2), panic (V_r>1.5), mean-reversion (R=0 AND V_r<1.0), uncertainty (all other) + - Adjust thresholds per regime: panic→±0.10, mean-reversion→±0.20, trend-following→±0.15, uncertainty→±0.15 with contradiction multiplier 0.6 + - Default to uncertainty when data is insufficient (<100 days) or σ values are zero + - _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.9_ + +- [ ] 5. New module: Source Accuracy Tracker (`services/aggregation/source_accuracy.py`) + - [x] 5.1 Implement `SourceAccuracy` dataclass and database functions + - `SourceAccuracy` dataclass with `source_id`, `accuracy_ratio`, `sample_count`, `last_updated` + - `accuracy_factor` property: return 1.0 when sample_count < 10, else `0.5 + accuracy_ratio` + - `fetch_source_accuracy`: batch fetch from `source_accuracy` table via asyncpg + - `update_source_accuracy`: update accuracy metrics from realized price outcomes + - Handle DB unreachable: return neutral factor 1.0 for all sources + - Clamp corrupted accuracy_ratio to [0.0, 1.0] + - _Requirements: 4.1, 4.2, 4.3, 4.4, 4.5_ + +- [x] 6. Checkpoint — Verify new modules compile and unit tests pass + - Ensure all tests pass, ask the user if questions arise. + +- [ ] 7. Signal Scorer upgrades (`services/aggregation/scoring.py`) + - [x] 7.1 Implement sigmoid confidence gate + - Add `sigmoid_gate(x, steepness, midpoint)` function: `σ(k·(x - midpoint))` + - When `probabilistic=True`, replace binary gate with sigmoid gate in `compute_signal_weight` + - When `probabilistic=False`, preserve existing binary gate behavior + - _Requirements: 2.1, 2.2, 2.3, 2.4, 2.5_ + + - [x] 7.2 Implement information gain surprise weighting + - Add `EVENT_TYPE_BASE_RATES` constant dict and `DEFAULT_BASE_RATE = 0.1` + - Add `compute_info_gain(event_type, lambda_param, max_gain, default_base_rate)` function: `r = 1 + λ·(-log₂ P(event_type))`, clamped to max 3.0 + - Integrate as multiplicative factor in combined weight when `probabilistic=True` + - _Requirements: 3.1, 3.2, 3.3, 3.4, 3.5_ + + - [x] 7.3 Implement adaptive recency decay + - Add `compute_adaptive_half_life(base_half_life, impact_score, info_gain_factor, market_multiplier, config)` function + - Compute `β_impact`, `β_surprise`, `β_market_reaction` scaling factors per design + - `τ_i = τ_base · (1 + β_impact) · (1 + β_surprise) · (1 + β_market_reaction)` + - When `probabilistic=True`, use adaptive half-life in `recency_weight`; otherwise use fixed + - _Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7_ + + - [x] 7.4 Implement regime multiplier replacing market context multiplier + - Add `compute_regime_multiplier(returns, volumes, config)` function + - Compute z-scores for return and volume, then `M_regime = 1 + 0.15·|z_r| + 0.10·|z_v|` + - Clamp to [1.0, 2.5]; default to 1.0 when data unavailable or σ=0 + - When `probabilistic=True`, use `M_regime` instead of `M_context` in combined weight + - _Requirements: 6.1, 6.2, 6.3, 6.4, 6.5_ + + - [x] 7.5 Integrate source accuracy factor into `compute_signal_weight` + - Accept optional `source_accuracy_factor` parameter + - When `probabilistic=True`, multiply into combined weight formula + - When `probabilistic=False`, ignore (factor = 1.0) + - _Requirements: 4.2, 4.3_ + + - [x] 7.6 Update `compute_signal_weight` to branch on `probabilistic` flag + - When `probabilistic=True`: use sigmoid gate × recency (adaptive) × credibility × (1 + novelty) × info_gain × source_accuracy × regime_multiplier + - When `probabilistic=False`: preserve exact current formula (binary gate × recency × credibility × (1 + novelty) × market_context) + - Populate all new optional fields on `SignalWeight` and `WeightedSignal` + - _Requirements: 16.4, 16.5_ + + - [x] 7.7 Write property test for information gain monotonicity + - **Property 6: Information Gain Monotonicity** + - **Validates: Requirements 3.5** + + - [x] 7.8 Write property test for adaptive decay lower bound + - **Property 5: Adaptive Decay Lower Bound** + - **Validates: Requirements 5.7, 17.4** + +- [ ] 8. Contradiction upgrade (`services/aggregation/contradiction.py`) + - [x] 8.1 Implement weighted disagreement entropy contradiction + - Compute `f_pos = W_positive / (W_positive + W_negative)` and `f_neg = 1 - f_pos` + - Compute `H_contradiction = -f_pos·log₂(f_pos) - f_neg·log₂(f_neg)` + - Weight by evidence mass: `contradiction_score = H_contradiction · min(1.0, (W_pos + W_neg) / W_threshold)` + - Return 0.0 when only one direction exists + - Preserve existing `ContradictionResult` interface + - When `probabilistic=False`, preserve existing minority/majority ratio behavior + - _Requirements: 15.1, 15.2, 15.3, 15.4, 15.5, 15.6, 15.7_ + + - [x] 8.2 Write property test for contradiction entropy monotonicity + - **Property 9: Contradiction Entropy Monotonicity** + - **Validates: Requirements 15.7** + +- [ ] 9. Trend Assembly upgrades (`services/aggregation/worker.py`) + - [x] 9.1 Integrate Bayesian posterior into trend assembly + - When `probabilistic=True`, call `compute_bayesian_posterior` on merged signals + - Use Bayesian confidence formula for trend confidence: `0.5 × C_bayesian + 0.25 × F_count + 0.25 × C_avg_credibility - P_contradiction` + - Use entropy-based direction: H>0.9→mixed, P_bull>0.65→bullish, P_bull<0.35→bearish, else neutral + - Apply regime-adjusted thresholds from `RegimeClassification` + - Populate new `TrendSummary` fields: `p_bull`, `alpha`, `beta_param`, `bayesian_confidence`, `entropy`, `regime`, `pipeline_mode` + - Store probabilistic outputs in `market_context` JSONB under `"probabilistic"` key + - When `probabilistic=False`, preserve exact current heuristic behavior + - _Requirements: 1.1, 1.2, 8.1, 8.2, 8.3, 8.4, 8.5, 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 7.8, 16.4, 16.5_ + + - [x] 9.2 Wire regime detection into the aggregation cycle + - Call `classify_regime` with closing prices and returns for each ticker + - Pass `RegimeClassification` to trend assembly for threshold adjustment + - Default to uncertainty regime when market data is unavailable + - Persist regime classification in JSONB for auditability + - _Requirements: 7.1, 7.2, 7.3, 7.8, 7.9_ + +- [ ] 10. Macro scoring upgrade (`services/aggregation/interpolation.py`) + - [x] 10.1 Implement multiplicative macro exposure formula + - When `probabilistic=True`, compute `S_macro = severity · (1 - Π_k(1 - w_k · O_k))` instead of linear weighted sum + - Preserve overlap weights: w_geo=0.35, w_supply=0.25, w_commodity=0.25, w_sector=0.15 + - Preserve severity mapping and resilience modifier + - When `probabilistic=False`, preserve exact current linear formula + - _Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6_ + + - [x] 10.2 Implement conditional macro signal integration + - When `probabilistic=True` and both company and macro signals exist, apply macro as multiplicative modifier: `S_adjusted = S_company · clamp(1 + M_macro · sign_alignment, 0.5, 1.5)` + - When only macro signals exist, fall back to additive behavior with weight 0.3 + - When only company signals exist, use modifier = 1.0 + - Log macro modifier value per ticker + - When `probabilistic=False`, preserve current additive merge behavior + - _Requirements: 11.1, 11.2, 11.3, 11.4, 11.5_ + + - [x] 10.3 Write property test for multiplicative macro exposure monotonicity + - **Property 7: Multiplicative Macro Exposure Monotonicity** + - **Validates: Requirements 10.7, 17.5** + +- [ ] 11. Competitive signal upgrade (`services/aggregation/signal_propagation.py`) + - [x] 11.1 Implement graph-distance attenuation for competitive signals + - When `probabilistic=True`, compute `S_transfer = S_source · ρ_historical · e^(-d_network)` instead of flat transfer + - Compute graph distance as shortest path in competitor relationship graph (cap at 3) + - Use 90-day rolling Pearson correlation for `ρ_historical`; default to 0.3 (same-sector) or 0.1 (cross-sector) when insufficient data (<30 days) + - Preserve existing relationship strength threshold (R ≥ 0.2) as pre-filter + - When `probabilistic=False`, preserve exact current flat transfer behavior + - _Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.6, 12.7_ + + - [x] 11.2 Write property test for competitive signal distance attenuation + - **Property 11: Competitive Signal Distance Attenuation** + - **Validates: Requirements 12.7** + +- [ ] 12. Projection upgrade (`services/aggregation/projection.py`) + - [x] 12.1 Implement exponentially weighted momentum + - When `probabilistic=True`, compute `M_t = Σ_{k=0}^{K-1} λ^k · ΔS_{t-k}` with λ=0.7, K up to 10 + - Normalize by geometric series sum to produce value in [-1, 1] + - Fall back to current heuristic when fewer than 2 historical cycles available + - Compute volatility-scaled momentum: `M_adj = M_t / max(σ_20, 0.01)`, clamped to [-2.0, 2.0] + - When `probabilistic=False`, preserve exact current simple momentum behavior + - _Requirements: 13.1, 13.2, 13.3, 13.4, 13.5, 13.6_ + + - [x] 12.2 Write property test for exponentially weighted momentum direction + - **Property 10: Exponentially Weighted Momentum Direction** + - **Validates: Requirements 13.6, 17.6** + +- [ ] 13. Recommendation upgrade (`services/recommendation/eligibility.py`) + - [x] 13.1 Implement expected value recommendation gate + - When `probabilistic=True`, compute `EV = P_bull · R_up - P_bear · R_down` + - Estimate `R_up = strength · σ_20 · √(horizon_days)` and `R_down = (1 - strength) · σ_20 · √(horizon_days)` + - When EV > threshold (default 0.005), allow recommendation through existing gates + - When EV ≤ threshold, force recommendation to informational mode + - Persist EV in `risk_checks` JSONB of `recommendation_evaluations` + - Populate `expected_value`, `p_bull`, `pipeline_mode` on Recommendation model + - Preserve all existing eligibility gates as additional requirements + - When `probabilistic=False`, skip EV gate entirely + - _Requirements: 14.1, 14.2, 14.3, 14.4, 14.5, 14.6_ + + - [x] 13.2 Write property test for expected value directional consistency + - **Property 12: Expected Value Directional Consistency** + - **Validates: Requirements 17.8** + +- [x] 14. Checkpoint — Verify all pipeline stages compile and existing tests still pass + - Ensure all tests pass, ask the user if questions arise. + +- [ ] 15. Integration wiring and feature flag plumbing + - [x] 15.1 Wire feature flag through the aggregation worker entry point + - Read `probabilistic_scoring_enabled` from `risk_configs` at cycle start in `services/aggregation/worker.py` + - Pass flag to `ScoringConfig`, trend assembly, contradiction, macro, competitive, and projection stages + - Log pipeline mode at cycle start + - Ensure flag is read once per cycle (mid-cycle changes take effect next cycle) + - _Requirements: 16.3, 16.6, 16.7_ + + - [x] 15.2 Wire source accuracy fetch into the scoring pipeline + - At cycle start, batch-fetch source accuracy for all source IDs in the current signal set + - Pass `source_accuracy_factor` to `compute_signal_weight` for each signal + - Handle DB errors gracefully (default to 1.0) + - _Requirements: 4.1, 4.2, 4.3_ + + - [x] 15.3 Wire regime detection into the aggregation cycle + - Fetch closing prices and returns for each ticker from market data + - Call `classify_regime` and pass result to trend assembly and scoring stages + - Handle missing market data (default to uncertainty regime) + - _Requirements: 7.1, 7.8, 7.9_ + + - [x] 15.4 Store probabilistic outputs in existing JSONB columns + - Store Bayesian fields in `trend_windows.market_context` JSONB under `"probabilistic"` key + - Store EV fields in `recommendation_evaluations.risk_checks` JSONB + - Store regime classification in trend window JSONB + - _Requirements: 16.2_ + +- [ ] 16. Numerical stability and edge case hardening + - [x] 16.1 Add input validation and edge case guards across all new functions + - Guard `log₂(0)` in entropy and information gain computations + - Floor `max(σ_20, 0.01)` for momentum volatility scaling + - Default to uncertainty regime when σ values are zero + - Return `M_regime = 1.0` when z-score σ = 0 + - Skip signals with NaN weight or sentiment + - Clamp all outputs to documented ranges + - _Requirements: 17.9, 6.4_ + + - [x] 16.2 Write property test for numerical stability across all formulas + - **Property 14: Numerical Stability Across All Formulas** + - **Validates: Requirements 17.9, 6.4** + +- [ ] 17. Unit tests for all new and modified modules + - [x] 17.1 Write unit tests for Bayesian accumulator (`tests/test_bayesian.py`) + - Test uninformative prior (empty signals → P_bull=0.5, α=1, β=1, C=0) + - Test specific sigmoid gate values (x=0.5→0.5, x=0.2→<0.05, x=0.8→>0.95) + - Test entropy direction mapping (H>0.9→mixed, P_bull>0.65→bullish, etc.) + - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5_ + + - [x] 17.2 Write unit tests for regime detector (`tests/test_regime.py`) + - Test specific (R, V_r) → expected regime classification + - Test threshold adjustments per regime (panic→0.10, mean_reversion→0.20) + - Test insufficient data fallback to uncertainty + - _Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.9_ + + - [x] 17.3 Write unit tests for source accuracy tracker (`tests/test_source_accuracy.py`) + - Test accuracy_factor property: sample_count < 10 → 1.0, else 0.5 + ratio + - Test corrupted data clamping + - _Requirements: 4.1, 4.2, 4.3_ + + - [x] 17.4 Write unit tests for signal scoring upgrades (`tests/test_signal_math_unit.py`) + - Test info gain clamp (very rare event → factor ≤ 3.0) + - Test default base rate (unknown event type → 0.1) + - Test adaptive decay edge cases (all zeros → τ_base, all max → 6×τ_base) + - Test zero overlap → zero macro impact + - Test max overlap → ≈severity×0.724 + - Test macro fallback behaviors (only macro → additive, only company → no modifier) + - Test graph distance cutoff (d>3 → no propagation) + - Test momentum fallback (<2 cycles → heuristic) + - Test EV threshold behavior (EV>0.005→proceed, EV≤0.005→informational) + - Test feature flag behaviors (flag=false→heuristic, flag=true→probabilistic) + - _Requirements: 3.1, 3.4, 5.5, 5.6, 10.3, 10.4, 11.3, 13.3, 14.3, 14.4, 16.4, 16.5_ + +- [x] 18. Final checkpoint — Ensure all tests pass + - Ensure all tests pass, ask the user if questions arise. + +## Notes + +- Tasks marked with `*` are optional and can be skipped for faster MVP +- Each task references specific requirements for traceability +- Checkpoints ensure incremental validation after each major phase +- Property tests validate the 14 universal correctness properties from the design document +- Unit tests validate specific examples, edge cases, and integration points +- The design uses Python throughout — no language selection needed +- Migration number is 034 (existing migrations go up to 033) +- All new dataclass fields use optional defaults for backward compatibility +- Feature flag `probabilistic_scoring_enabled` gates every behavioral change diff --git a/infra/migrations/034_source_accuracy.sql b/infra/migrations/034_source_accuracy.sql new file mode 100644 index 0000000..c24db8e --- /dev/null +++ b/infra/migrations/034_source_accuracy.sql @@ -0,0 +1,18 @@ +-- Source accuracy tracking table for historical prediction accuracy per source. +-- +-- Stores per-source accuracy metrics (fraction of correct directional calls) +-- used by the probabilistic scoring pipeline to weight source credibility. +-- See Requirement 4.5: source accuracy metrics stored with source identifier, +-- accuracy ratio, sample count, and last updated timestamp. + +CREATE TABLE IF NOT EXISTS source_accuracy ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + source_id VARCHAR(200) NOT NULL, + accuracy_ratio FLOAT NOT NULL DEFAULT 0.5, + sample_count INTEGER NOT NULL DEFAULT 0, + last_updated TIMESTAMPTZ NOT NULL DEFAULT NOW(), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(source_id) +); + +CREATE INDEX IF NOT EXISTS idx_source_accuracy_source ON source_accuracy(source_id); diff --git a/services/aggregation/bayesian.py b/services/aggregation/bayesian.py new file mode 100644 index 0000000..e0f361e --- /dev/null +++ b/services/aggregation/bayesian.py @@ -0,0 +1,127 @@ +"""Bayesian accumulator for probabilistic sentiment aggregation. + +Accumulates weighted signals into a Bayesian posterior using +log-likelihood accumulation, Beta distribution parameters, and +Shannon entropy for mixed-signal detection. + +Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 9.1, 9.7 +""" +from __future__ import annotations + +import math +from dataclasses import dataclass + +from services.aggregation.scoring import WeightedSignal + + +@dataclass(frozen=True) +class BayesianPosterior: + """Bayesian posterior state from signal accumulation.""" + + p_bull: float # σ(L_t), bullish probability [0, 1] + alpha: float # Beta distribution α parameter (≥ 1.0) + beta: float # Beta distribution β parameter (≥ 1.0) + log_likelihood: float # Raw log-likelihood accumulation L_t + bayesian_confidence: float # 1 - 4αβ/(α+β)², [0, 1] + entropy: float # Shannon entropy H, [0, 1] + signal_count: int # Number of signals processed + + +# Uninformative prior (no evidence) +PRIOR = BayesianPosterior( + p_bull=0.5, + alpha=1.0, + beta=1.0, + log_likelihood=0.0, + bayesian_confidence=0.0, + entropy=1.0, + signal_count=0, +) + + +def compute_entropy(p_bull: float) -> float: + """Shannon entropy H = -p·log₂(p) - (1-p)·log₂(1-p). + + Returns value in [0, 1]. Maximum at p=0.5, zero at p=0 or p=1. + Handles edge cases p≤0 and p≥1 by returning 0.0. + """ + if p_bull <= 0.0 or p_bull >= 1.0: + return 0.0 + q = 1.0 - p_bull + return -(p_bull * math.log2(p_bull) + q * math.log2(q)) + + +def compute_bayesian_posterior( + signals: list[WeightedSignal], +) -> BayesianPosterior: + """Accumulate weighted signals into a Bayesian posterior. + + Computes: + - Log-likelihood: L_t = Σ(w_i · s_i) + - Bullish probability: P_bull = σ(L_t) + - Beta posterior: α = 1 + W_bull, β = 1 + W_bear + - Bayesian confidence: C = 1 - 4αβ/(α+β)² + - Shannon entropy: H = -p·log₂(p) - (1-p)·log₂(1-p) + + Returns PRIOR for empty signal lists. + Skips signals with NaN weight or sentiment. + """ + if not signals: + return PRIOR + + log_likelihood = 0.0 + w_bull = 0.0 + w_bear = 0.0 + count = 0 + + for sig in signals: + combined = sig.weight.combined + sentiment = sig.sentiment_value + + # Skip signals with NaN weight or sentiment + if math.isnan(combined) or math.isnan(sentiment): + continue + + log_likelihood += combined * sentiment + + if sentiment > 0.0: + w_bull += combined + elif sentiment < 0.0: + w_bear += combined + + count += 1 + + if count == 0: + return PRIOR + + # P_bull via sigmoid: σ(L_t) = 1 / (1 + exp(-L_t)) + # Guard against overflow in exp for very large |L_t| + if log_likelihood > 500.0: + p_bull = 1.0 + elif log_likelihood < -500.0: + p_bull = 0.0 + else: + p_bull = 1.0 / (1.0 + math.exp(-log_likelihood)) + + # Beta posterior parameters + alpha = 1.0 + w_bull + beta_param = 1.0 + w_bear + + # Bayesian confidence: C = 1 - 4αβ/(α+β)² + ab_sum = alpha + beta_param + bayesian_confidence = 1.0 - (4.0 * alpha * beta_param) / (ab_sum * ab_sum) + # Clamp to [0, 1] to guard against floating-point rounding + bayesian_confidence = max(0.0, min(1.0, bayesian_confidence)) + + # Shannon entropy + entropy = compute_entropy(p_bull) + + return BayesianPosterior( + p_bull=p_bull, + alpha=alpha, + beta=beta_param, + log_likelihood=log_likelihood, + bayesian_confidence=bayesian_confidence, + entropy=entropy, + signal_count=count, + ) diff --git a/services/aggregation/contradiction.py b/services/aggregation/contradiction.py index 4b41c1d..de3a0b0 100644 --- a/services/aggregation/contradiction.py +++ b/services/aggregation/contradiction.py @@ -4,10 +4,11 @@ Analyses weighted signals to detect and represent disagreement explicitly, rather than collapsing contradictory evidence into a single unsupported conclusion. -Requirements: 6.4, 6.5 +Requirements: 6.4, 6.5, 15.1–15.7 """ from __future__ import annotations +import math from dataclasses import dataclass from services.aggregation.scoring import WeightedSignal @@ -35,6 +36,9 @@ class ContradictionResult: def detect_contradictions( signals: list[WeightedSignal], catalyst_entries: list[CatalystEntry] | None = None, + *, + probabilistic: bool = False, + w_threshold: float = 5.0, ) -> ContradictionResult: """Run contradiction detection across multiple dimensions. @@ -42,6 +46,16 @@ def detect_contradictions( 1. Sentiment disagreement — the core positive-vs-negative split 2. Catalyst disagreement — same catalyst type with opposing sentiment + When ``probabilistic`` is True, the overall score uses weighted + disagreement entropy (Req 15.1–15.7) instead of the minority/majority + ratio. When False, the existing ratio formula is preserved exactly. + + Args: + signals: Weighted signals to analyse. + catalyst_entries: Optional catalyst metadata for per-catalyst analysis. + probabilistic: Use entropy-based scoring when True. + w_threshold: Evidence mass threshold for entropy weighting (default 5.0). + Returns a ContradictionResult with an overall score and per-dimension disagreement details. """ @@ -55,7 +69,10 @@ def detect_contradictions( catalyst_details = _detect_catalyst_disagreement(signals, catalyst_entries) details.extend(catalyst_details) - score = _compute_overall_score(signals) + if probabilistic: + score = _compute_entropy_score(signals, w_threshold) + else: + score = _compute_overall_score(signals) return ContradictionResult(score=score, details=details) @@ -82,6 +99,58 @@ def _compute_overall_score(signals: list[WeightedSignal]) -> float: return round(minority / total, 4) +def _compute_entropy_score( + signals: list[WeightedSignal], + w_threshold: float = 5.0, +) -> float: + """Weighted disagreement entropy — probabilistic contradiction score. + + Computes Shannon entropy over the positive/negative weight distribution, + weighted by evidence mass relative to a configurable threshold. + + Formula: + f_pos = W_pos / (W_pos + W_neg) + f_neg = 1 - f_pos + H = -f_pos·log₂(f_pos) - f_neg·log₂(f_neg) (in [0, 1]) + score = H · min(1.0, (W_pos + W_neg) / W_threshold) + + Returns 0.0 when only one direction exists (no disagreement). + + Requirements: 15.1–15.7 + """ + if not signals: + return 0.0 + + pos_weight = 0.0 + neg_weight = 0.0 + for sig in signals: + w = sig.weight.combined * sig.impact_score + if sig.sentiment_value > 0: + pos_weight += w + elif sig.sentiment_value < 0: + neg_weight += w + + # No disagreement when only one direction exists (Req 15.5) + if pos_weight <= 0.0 or neg_weight <= 0.0: + return 0.0 + + total = pos_weight + neg_weight + + # Compute weight fractions (Req 15.2) + f_pos = pos_weight / total + f_neg = neg_weight / total # = 1 - f_pos + + # Shannon entropy H = -f_pos·log₂(f_pos) - f_neg·log₂(f_neg) (Req 15.3) + # Guard against log₂(0) — already handled by the early return above + h_contradiction = -f_pos * math.log2(f_pos) - f_neg * math.log2(f_neg) + + # Weight by evidence mass (Req 15.4) + evidence_factor = min(1.0, total / w_threshold) if w_threshold > 0.0 else 1.0 + score = h_contradiction * evidence_factor + + return round(score, 4) + + def _detect_sentiment_disagreement( signals: list[WeightedSignal], ) -> DisagreementDetail | None: diff --git a/services/aggregation/interpolation.py b/services/aggregation/interpolation.py index 879ba18..2434765 100644 --- a/services/aggregation/interpolation.py +++ b/services/aggregation/interpolation.py @@ -283,27 +283,82 @@ def _determine_impact_direction( # --------------------------------------------------------------------------- +def _compute_multiplicative_exposure( + geo_overlap: float, + supply_overlap: float, + commodity_overlap: float, + sector_match: float, +) -> float: + """Compute multiplicative compounding exposure. + + Formula: 1 - Π_k(1 - w_k · O_k) + + Multi-dimensional exposure compounds — a company exposed across + multiple dimensions receives higher impact than simple addition. + + Returns a value in [0, ~0.724] (max when all overlaps are 1.0). + + Requirements: 10.1, 10.4, 10.7 + """ + product = ( + (1.0 - GEO_WEIGHT * geo_overlap) + * (1.0 - SUPPLY_WEIGHT * supply_overlap) + * (1.0 - COMMODITY_WEIGHT * commodity_overlap) + * (1.0 - SECTOR_WEIGHT * sector_match) + ) + return 1.0 - product + + +def _compute_linear_exposure( + geo_overlap: float, + supply_overlap: float, + commodity_overlap: float, + sector_match: float, +) -> float: + """Compute linear weighted-sum exposure (original heuristic formula). + + Formula: w_geo·O_geo + w_supply·O_supply + w_commodity·O_commodity + w_sector·O_sector + + Returns a value in [0, 1]. + """ + return ( + GEO_WEIGHT * geo_overlap + + SUPPLY_WEIGHT * supply_overlap + + COMMODITY_WEIGHT * commodity_overlap + + SECTOR_WEIGHT * sector_match + ) + + def compute_macro_impact( event: GlobalEvent, profile: ExposureProfileSchema, + *, + probabilistic: bool = False, ) -> MacroImpactRecord: """Compute the macro impact of a global event on a company. - Scoring formula: + When ``probabilistic=False`` (default), uses the linear weighted-sum: raw_score = severity_weight * ( 0.35 * geographic_overlap + 0.25 * supply_chain_overlap + 0.25 * commodity_overlap + 0.15 * sector_match ) - final_score = apply_resilience_modifier(raw_score, tier, is_international) + + When ``probabilistic=True``, uses multiplicative compounding exposure: + raw_score = severity_weight * (1 - Π_k(1 - w_k · O_k)) + + In both modes, the resilience modifier is applied after the raw score. Args: event: The classified global event. profile: The company's exposure profile. + probabilistic: Use multiplicative formula when True. Returns: A MacroImpactRecord with the computed score and metadata. + + Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6 """ now = datetime.now(timezone.utc) @@ -360,13 +415,16 @@ def compute_macro_impact( # Severity weight severity_weight = SEVERITY_WEIGHTS.get(event.severity, 0.25) - # Raw score - raw_score = severity_weight * ( - GEO_WEIGHT * geo_overlap - + SUPPLY_WEIGHT * supply_overlap - + COMMODITY_WEIGHT * commodity_overlap - + SECTOR_WEIGHT * sector_match - ) + # Raw score: multiplicative or linear depending on mode + if probabilistic: + exposure = _compute_multiplicative_exposure( + geo_overlap, supply_overlap, commodity_overlap, sector_match, + ) + else: + exposure = _compute_linear_exposure( + geo_overlap, supply_overlap, commodity_overlap, sector_match, + ) + raw_score = severity_weight * exposure # Determine if event is international (affects multiple regions) is_international = len(event.affected_regions) > 1 @@ -406,19 +464,27 @@ def compute_macro_impact_with_sector( event: GlobalEvent, profile: ExposureProfileSchema, company_sector: str = "", + *, + probabilistic: bool = False, ) -> MacroImpactRecord: """Compute macro impact with explicit sector matching. Like compute_macro_impact but accepts a company_sector parameter for proper sector_match computation. + When ``probabilistic=True``, uses multiplicative compounding exposure. + When ``probabilistic=False``, uses the original linear weighted sum. + Args: event: The classified global event. profile: The company's exposure profile. company_sector: The company's GICS sector name. + probabilistic: Use multiplicative formula when True. Returns: A MacroImpactRecord with the computed score and metadata. + + Requirements: 10.1, 10.2, 10.3, 10.4, 10.5, 10.6 """ now = datetime.now(timezone.utc) @@ -472,13 +538,16 @@ def compute_macro_impact_with_sector( # Severity weight severity_weight = SEVERITY_WEIGHTS.get(event.severity, 0.25) - # Raw score - raw_score = severity_weight * ( - GEO_WEIGHT * geo_overlap - + SUPPLY_WEIGHT * supply_overlap - + COMMODITY_WEIGHT * commodity_overlap - + SECTOR_WEIGHT * sector_match - ) + # Raw score: multiplicative or linear depending on mode + if probabilistic: + exposure = _compute_multiplicative_exposure( + geo_overlap, supply_overlap, commodity_overlap, sector_match, + ) + else: + exposure = _compute_linear_exposure( + geo_overlap, supply_overlap, commodity_overlap, sector_match, + ) + raw_score = severity_weight * exposure # International check is_international = len(event.affected_regions) > 1 @@ -588,6 +657,154 @@ def _infer_commodities(sector: str, industry: str) -> list[str]: return sector_commodities.get(sector, []) +# --------------------------------------------------------------------------- +# Conditional macro signal integration (Requirements: 11.1–11.5) +# --------------------------------------------------------------------------- + + +def compute_conditional_macro_modifier( + company_strength: float, + company_direction: str, + macro_impact: float, + macro_direction: str, +) -> float: + """Compute the multiplicative macro modifier for conditional integration. + + When both company and macro signals exist, macro acts as a modifier: + S_adjusted = S_company · clamp(1 + M_macro · sign_alignment, 0.5, 1.5) + + sign_alignment is +1 when macro and company agree in direction, + -1 when they disagree. + + Args: + company_strength: The company-level signal strength (absolute). + company_direction: Company trend direction (bullish/bearish/neutral/mixed). + macro_impact: Normalized macro impact score in [0, 1]. + macro_direction: Macro impact direction (positive/negative/mixed/neutral). + + Returns: + The multiplicative modifier in [0.5, 1.5]. + + Requirements: 11.1, 11.2 + """ + # Determine sign alignment between company and macro directions + _DIRECTION_SIGN = { + "bullish": 1, + "positive": 1, + "bearish": -1, + "negative": -1, + } + company_sign = _DIRECTION_SIGN.get(company_direction, 0) + macro_sign = _DIRECTION_SIGN.get(macro_direction, 0) + + if company_sign == 0 or macro_sign == 0: + # Neutral or mixed directions — no alignment signal + sign_alignment = 0.0 + elif company_sign == macro_sign: + sign_alignment = 1.0 + else: + sign_alignment = -1.0 + + raw_modifier = 1.0 + macro_impact * sign_alignment + return max(0.5, min(1.5, raw_modifier)) + + +def integrate_macro_signals( + company_signals: list, + macro_signals: list, + company_direction: str, + macro_impacts: list, + ticker: str = "", + *, + probabilistic: bool = False, + macro_signal_weight: float = 0.3, +) -> tuple[list, float]: + """Integrate macro signals with company signals. + + When ``probabilistic=True``: + - Both exist: apply macro as multiplicative modifier on company signals + - Only macro: fall back to additive behavior with weight 0.3 + - Only company: use modifier = 1.0 (no change) + + When ``probabilistic=False``: + - Preserve current additive merge behavior (concatenate lists) + + Args: + company_signals: WeightedSignal list from company layer. + macro_signals: WeightedSignal list from macro layer. + company_direction: Derived company trend direction string. + macro_impacts: List of MacroImpactRecord or similar with + macro_impact_score and impact_direction attributes. + ticker: Ticker symbol for logging. + probabilistic: Use conditional modifier when True. + macro_signal_weight: Weight for macro-only fallback (default 0.3). + + Returns: + Tuple of (merged_signals, macro_modifier_applied). + macro_modifier_applied is 1.0 when no modifier was used. + + Requirements: 11.1, 11.2, 11.3, 11.4, 11.5 + """ + if not probabilistic: + # Heuristic mode: simple additive merge (current behavior) + merged = list(company_signals) + list(macro_signals) + return merged, 1.0 + + has_company = len(company_signals) > 0 + has_macro = len(macro_signals) > 0 + + if has_company and has_macro: + # Compute average macro impact and dominant direction + avg_macro_impact = 0.0 + direction_counts: dict[str, float] = {} + for mir in macro_impacts: + score = getattr(mir, "macro_impact_score", 0.0) + direction = getattr(mir, "impact_direction", "neutral") + avg_macro_impact += score + direction_counts[direction] = direction_counts.get(direction, 0.0) + score + + if macro_impacts: + avg_macro_impact /= len(macro_impacts) + + # Dominant macro direction by total impact weight + macro_direction = max(direction_counts, key=direction_counts.get) if direction_counts else "neutral" + + modifier = compute_conditional_macro_modifier( + company_strength=0.0, # not used in current formula + company_direction=company_direction, + macro_impact=avg_macro_impact, + macro_direction=macro_direction, + ) + + logger.info( + "Macro modifier for %s: %.4f (avg_impact=%.4f, macro_dir=%s, company_dir=%s)", + ticker, modifier, avg_macro_impact, macro_direction, company_direction, + ) + + # Apply modifier to company signals by scaling their impact scores + # We create modified copies rather than mutating originals + from copy import copy + modified_signals = [] + for sig in company_signals: + new_sig = copy(sig) + new_sig.impact_score = sig.impact_score * modifier + modified_signals.append(new_sig) + + return modified_signals, modifier + + if has_macro and not has_company: + # Macro-only fallback: additive behavior with weight 0.3 (Req 11.3) + logger.info( + "Macro-only fallback for %s: using additive merge with weight %.2f", + ticker, macro_signal_weight, + ) + return list(macro_signals), 1.0 + + # Company-only: no modification (Req 11.4) + logger.info("Company-only signals for %s: macro modifier=1.0", ticker) + return list(company_signals), 1.0 + + # --------------------------------------------------------------------------- # PostgreSQL persistence # --------------------------------------------------------------------------- diff --git a/services/aggregation/projection.py b/services/aggregation/projection.py index d009c17..4755ca9 100644 --- a/services/aggregation/projection.py +++ b/services/aggregation/projection.py @@ -4,7 +4,7 @@ Computes TrendProjection objects by combining current trend momentum, macro signal decay trajectories, and upcoming catalyst outlook. Projections are persisted alongside trend_window records. -Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.9 +Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.9, 13.1, 13.2, 13.3, 13.4, 13.5, 13.6 """ from __future__ import annotations @@ -126,6 +126,87 @@ def _direction_sign(direction: str) -> float: return 0.0 +# --------------------------------------------------------------------------- +# Exponentially weighted momentum (Requirements: 13.1–13.6) +# --------------------------------------------------------------------------- + + +def compute_ew_momentum( + strength_changes: list[float], + lambda_decay: float = 0.7, +) -> float: + """Compute exponentially weighted momentum from historical strength changes. + + Formula: M_t = Σ_{k=0}^{K-1} λ^k · ΔS_{t-k} + Normalized by geometric series sum Σ λ^k to produce value in [-1, 1]. + + When fewer than 2 historical cycles are available, returns 0.0 + (caller should fall back to heuristic). + + Args: + strength_changes: List of signed strength changes ΔS, most recent first. + Each value represents the change in signed trend strength from one + cycle to the next. Positive = strengthening bullish / weakening bearish. + lambda_decay: Decay factor λ (default 0.7). Must be in (0, 1). + + Returns: + Normalized momentum in [-1, 1]. Returns 0.0 for empty or single-element lists. + + Requirements: 13.1, 13.2, 13.3, 13.6 + """ + if len(strength_changes) < 2: + return 0.0 + + # Use up to K=10 most recent changes, filtering out NaN values + k_max = min(len(strength_changes), 10) + changes = strength_changes[:k_max] + + weighted_sum = 0.0 + weight_sum = 0.0 + for k, delta_s in enumerate(changes): + if math.isnan(delta_s): + continue + w = lambda_decay ** k + weighted_sum += w * delta_s + weight_sum += w + + if weight_sum == 0.0: + return 0.0 + + normalized = weighted_sum / weight_sum + # Guard against NaN propagation + if math.isnan(normalized) or math.isinf(normalized): + return 0.0 + return max(-1.0, min(1.0, normalized)) + + +def compute_volatility_scaled_momentum( + momentum: float, + sigma_20: float, +) -> float: + """Compute volatility-scaled momentum. + + Formula: M_adj = M_t / max(σ_20, 0.01), clamped to [-2.0, 2.0]. + + Normalizes momentum relative to the ticker's typical price movement. + + Args: + momentum: Raw or EW momentum value. + sigma_20: 20-day return standard deviation. + + Returns: + Volatility-scaled momentum in [-2.0, 2.0]. + + Requirements: 13.4, 13.5 + """ + denominator = max(sigma_20, 0.01) + scaled = momentum / denominator + # Guard against NaN propagation + if math.isnan(scaled) or math.isinf(scaled): + return 0.0 + return max(-2.0, min(2.0, scaled)) + + # --------------------------------------------------------------------------- # Macro signal decay projection # --------------------------------------------------------------------------- diff --git a/services/aggregation/regime.py b/services/aggregation/regime.py new file mode 100644 index 0000000..77f3a62 --- /dev/null +++ b/services/aggregation/regime.py @@ -0,0 +1,170 @@ +"""Regime detector for market regime classification. + +Classifies the current market regime for each ticker based on +EMA trend indicators and volatility ratios. Adjusts scoring +thresholds and contradiction penalties per regime. + +Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.9 +""" +from __future__ import annotations + +import math +import statistics +from dataclasses import dataclass +from enum import Enum + + +class MarketRegime(str, Enum): + """Market regime classification categories.""" + + TREND_FOLLOWING = "trend_following" + PANIC = "panic" + MEAN_REVERSION = "mean_reversion" + UNCERTAINTY = "uncertainty" + + +@dataclass(frozen=True) +class RegimeClassification: + """Result of regime detection for a ticker.""" + + regime: MarketRegime + trend_indicator: float # R = sign(EMA_20 - EMA_100) + volatility_ratio: float # V_r = σ_20 / σ_100 + bullish_threshold: float # Adjusted ±threshold for direction + bearish_threshold: float + contradiction_penalty_multiplier: float # 0.4 default, 0.6 for uncertainty + + +@dataclass(frozen=True) +class RegimeConfig: + """Configuration parameters for regime detection.""" + + ema_short_period: int = 20 + ema_long_period: int = 100 + vol_short_period: int = 20 + vol_long_period: int = 100 + panic_vol_ratio: float = 1.5 + trend_vol_ratio: float = 1.2 + mean_reversion_vol_ratio: float = 1.0 + default_threshold: float = 0.15 + panic_threshold: float = 0.10 + mean_reversion_threshold: float = 0.20 + uncertainty_contradiction_multiplier: float = 0.6 + + +# Default uncertainty classification used when data is insufficient +_DEFAULT_UNCERTAINTY = RegimeClassification( + regime=MarketRegime.UNCERTAINTY, + trend_indicator=0.0, + volatility_ratio=1.0, + bullish_threshold=0.15, + bearish_threshold=-0.15, + contradiction_penalty_multiplier=0.6, +) + + +def compute_ema(values: list[float], period: int) -> float: + """Compute exponential moving average over the last ``period`` values. + + Uses the standard EMA formula with multiplier = 2 / (period + 1). + Iterates through the values, seeding the EMA with the first value. + + Raises ``ValueError`` when *values* is empty or *period* < 1. + """ + if not values or period < 1: + raise ValueError("values must be non-empty and period must be >= 1") + + # Use only the last `period` values (or all if fewer) + data = values[-period:] if len(values) >= period else values + + multiplier = 2.0 / (period + 1) + ema = data[0] + for value in data[1:]: + ema = (value - ema) * multiplier + ema + return ema + + +def _sign(x: float) -> float: + """Return -1.0, 0.0, or 1.0 for the sign of *x*.""" + if x > 0.0: + return 1.0 + if x < 0.0: + return -1.0 + return 0.0 + + +def classify_regime( + closing_prices: list[float], + returns: list[float], + config: RegimeConfig = RegimeConfig(), +) -> RegimeClassification: + """Classify market regime from price and return history. + + Requires at least ``config.ema_long_period`` days of price history + for EMA_100. Falls back to UNCERTAINTY when data is insufficient + or standard deviations are zero. + + Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.9 + """ + # Insufficient price data → uncertainty + if len(closing_prices) < config.ema_long_period: + return _DEFAULT_UNCERTAINTY + + # Insufficient return data → uncertainty + if len(returns) < config.vol_long_period: + return _DEFAULT_UNCERTAINTY + + # --- Trend indicator: R = sign(EMA_short - EMA_long) --- + ema_short = compute_ema(closing_prices, config.ema_short_period) + ema_long = compute_ema(closing_prices, config.ema_long_period) + trend_indicator = _sign(ema_short - ema_long) + + # --- Volatility ratio: V_r = σ_short / σ_long --- + short_returns = returns[-config.vol_short_period:] + long_returns = returns[-config.vol_long_period:] + + # Guard against zero or near-zero standard deviations + if len(short_returns) < 2 or len(long_returns) < 2: + return _DEFAULT_UNCERTAINTY + + sigma_short = statistics.stdev(short_returns) + sigma_long = statistics.stdev(long_returns) + + if sigma_long == 0.0 or sigma_short == 0.0: + return _DEFAULT_UNCERTAINTY + + if math.isnan(sigma_short) or math.isnan(sigma_long): + return _DEFAULT_UNCERTAINTY + + volatility_ratio = sigma_short / sigma_long + + # --- Classification rules (Req 7.3) --- + # Panic takes priority: V_r > 1.5 + if volatility_ratio > config.panic_vol_ratio: + regime = MarketRegime.PANIC + threshold = config.panic_threshold # ±0.10 + contradiction_mult = 0.4 + # Trend-following: R ≠ 0 AND V_r < 1.2 + elif trend_indicator != 0.0 and volatility_ratio < config.trend_vol_ratio: + regime = MarketRegime.TREND_FOLLOWING + threshold = config.default_threshold # ±0.15 + contradiction_mult = 0.4 + # Mean-reversion: R = 0 AND V_r < 1.0 + elif trend_indicator == 0.0 and volatility_ratio < config.mean_reversion_vol_ratio: + regime = MarketRegime.MEAN_REVERSION + threshold = config.mean_reversion_threshold # ±0.20 + contradiction_mult = 0.4 + # Uncertainty: all other cases + else: + regime = MarketRegime.UNCERTAINTY + threshold = config.default_threshold # ±0.15 + contradiction_mult = config.uncertainty_contradiction_multiplier # 0.6 + + return RegimeClassification( + regime=regime, + trend_indicator=trend_indicator, + volatility_ratio=volatility_ratio, + bullish_threshold=threshold, + bearish_threshold=-threshold, + contradiction_penalty_multiplier=contradiction_mult, + ) diff --git a/services/aggregation/scoring.py b/services/aggregation/scoring.py index 7e3e62b..c785aac 100644 --- a/services/aggregation/scoring.py +++ b/services/aggregation/scoring.py @@ -4,7 +4,7 @@ integration for aggregation. Provides scoring functions used by the aggregation engine to weight document intelligence signals when computing trend summaries. -Requirements: 6.1, 6.2, 6.5 +Requirements: 2.1–2.6, 3.1–3.5, 4.2–4.3, 5.1–5.7, 6.1–6.5, 16.4–16.5 """ from __future__ import annotations @@ -14,6 +14,24 @@ from datetime import datetime, timezone from services.shared.schemas import MarketContext +# --------------------------------------------------------------------------- +# Event type base rates for information gain computation (Req 3.1) +# --------------------------------------------------------------------------- + +EVENT_TYPE_BASE_RATES: dict[str, float] = { + "earnings": 0.25, + "product_launch": 0.10, + "regulatory": 0.08, + "legal": 0.05, + "m_and_a": 0.03, + "management_change": 0.06, + "partnership": 0.12, + "market_expansion": 0.09, + "restructuring": 0.04, + "dividend": 0.15, +} +DEFAULT_BASE_RATE = 0.1 + @dataclass(frozen=True) class ScoringConfig: @@ -62,6 +80,37 @@ class ScoringConfig: volume_surge_threshold_pct: float = 50.0 volume_surge_boost: float = 0.15 + # --- Probabilistic scoring parameters --- + + # Toggle: when True, use probabilistic formulas (sigmoid gate, + # adaptive decay, info gain, regime multiplier, source accuracy). + # When False, preserve exact current heuristic behaviour. + probabilistic: bool = False + + # Sigmoid gate parameters — smooth replacement for binary confidence gate. + # Gate value: σ(k·(x - midpoint)) where k = steepness. + sigmoid_steepness: float = 5.0 + sigmoid_midpoint: float = 0.5 + + # Information gain parameters — surprise weighting for rare events. + # r = 1 + λ·(-log₂ P(event_type)), clamped to info_gain_max. + info_gain_lambda: float = 0.3 + info_gain_max: float = 3.0 + default_base_rate: float = 0.1 + + # Adaptive decay parameters — β scaling factors for event-specific + # half-life adjustment: τ_i = τ_base · (1+β_impact)·(1+β_surprise)·(1+β_market). + adaptive_decay_impact_scale: float = 1.0 + adaptive_decay_surprise_scale: float = 1.0 + adaptive_decay_market_scale: float = 0.5 + + # Regime multiplier parameters — replaces market context multiplier. + # M_regime = 1 + regime_return_weight·|z_r| + regime_volume_weight·|z_v|, + # clamped to [1.0, regime_multiplier_max]. + regime_return_weight: float = 0.15 + regime_volume_weight: float = 0.10 + regime_multiplier_max: float = 2.5 + # Singleton default config DEFAULT_CONFIG = ScoringConfig() @@ -77,6 +126,8 @@ def recency_weight( reference_time: datetime, window: str, config: ScoringConfig = DEFAULT_CONFIG, + *, + half_life_override: float | None = None, ) -> float: """Compute an exponential recency decay weight for a document. @@ -87,6 +138,8 @@ def recency_weight( reference_time: The "now" anchor for the aggregation window (tz-aware). window: One of the TrendWindow values (e.g. "7d"). config: Scoring parameters. + half_life_override: If provided, use this half-life instead of the + window-based default (used for adaptive decay). Returns: A weight in [config.min_recency_weight, 1.0]. @@ -102,7 +155,7 @@ def recency_weight( return 1.0 age_hours = age_seconds / 3600.0 - half_life = config.half_life_hours.get(window, 72.0) + half_life = half_life_override if half_life_override is not None else config.half_life_hours.get(window, 72.0) weight = math.pow(2.0, -age_hours / half_life) return max(weight, config.min_recency_weight) @@ -170,6 +223,188 @@ def market_context_multiplier( return 1.0 + boost +# --------------------------------------------------------------------------- +# Sigmoid confidence gate (Req 2.1–2.6) +# --------------------------------------------------------------------------- + + +def sigmoid_gate( + x: float, + steepness: float = 5.0, + midpoint: float = 0.5, +) -> float: + """Smooth sigmoid confidence gate: σ(k·(x - midpoint)). + + Replaces the binary 0/1 confidence gate in probabilistic mode. + Returns a value in (0, 1) — higher confidence produces higher gate. + + Args: + x: Extraction confidence value, typically in [0, 1]. + steepness: Steepness parameter k (default 5.0). + midpoint: Midpoint of the sigmoid transition (default 0.5). + + Returns: + Gate value in (0, 1). + """ + z = steepness * (x - midpoint) + # Guard against overflow in exp for very negative z + if z < -500.0: + return 0.0 + if z > 500.0: + return 1.0 + return 1.0 / (1.0 + math.exp(-z)) + + +# --------------------------------------------------------------------------- +# Information gain surprise weighting (Req 3.1–3.5) +# --------------------------------------------------------------------------- + + +def compute_info_gain( + event_type: str | None, + lambda_param: float = 0.3, + max_gain: float = 3.0, + default_base_rate: float = 0.1, +) -> float: + """Compute information gain factor for an event type. + + Formula: r = 1 + λ·(-log₂ P(event_type)), clamped to [1.0, max_gain]. + + Rarer events produce higher surprise weight. Unknown event types + use the default base rate. + + Args: + event_type: Event type string (e.g. "earnings", "m_and_a"). + lambda_param: Scaling parameter λ (default 0.3). + max_gain: Maximum clamp for the info gain factor (default 3.0). + default_base_rate: Fallback base rate for unknown event types. + + Returns: + Information gain factor r in [1.0, max_gain]. + """ + if event_type is None: + return 1.0 + + base_rate = EVENT_TYPE_BASE_RATES.get(event_type, default_base_rate) + # Guard against log₂(0) — base rates must be > 0 + if base_rate <= 0.0: + base_rate = default_base_rate + if base_rate <= 0.0: + return 1.0 + + surprise = -math.log2(base_rate) + r = 1.0 + lambda_param * surprise + return min(max(r, 1.0), max_gain) + + +# --------------------------------------------------------------------------- +# Adaptive recency decay (Req 5.1–5.7) +# --------------------------------------------------------------------------- + + +def compute_adaptive_half_life( + base_half_life: float, + impact_score: float, + info_gain_factor: float, + market_multiplier: float, + config: ScoringConfig, +) -> float: + """Compute adaptive half-life for event-specific recency decay. + + Formula: τ_i = τ_base · (1 + β_impact) · (1 + β_surprise) · (1 + β_market) + + The adaptive half-life is always >= base_half_life (decay is never faster). + + Args: + base_half_life: Fixed half-life for the window (hours). + impact_score: Signal impact score in [0, 1]. + info_gain_factor: Information gain factor r in [1.0, 3.0]. + market_multiplier: Market context/regime multiplier in [1.0, ~2.5]. + config: Scoring config with adaptive decay scale parameters. + + Returns: + Adaptive half-life in hours, >= base_half_life. + """ + # β_impact: impact_score scaled linearly 0→0, 1→adaptive_decay_impact_scale + beta_impact = impact_score * config.adaptive_decay_impact_scale + + # β_surprise: info_gain_factor scaled linearly r=1→0, r=3→adaptive_decay_surprise_scale + beta_surprise = ((info_gain_factor - 1.0) / 2.0) * config.adaptive_decay_surprise_scale + + # β_market: market_multiplier scaled linearly 1.0→0, 1.45→adaptive_decay_market_scale + if market_multiplier > 1.0: + beta_market = ((market_multiplier - 1.0) / 0.45) * config.adaptive_decay_market_scale + else: + beta_market = 0.0 + + tau = base_half_life * (1.0 + beta_impact) * (1.0 + beta_surprise) * (1.0 + beta_market) + # Ensure adaptive half-life is never less than base (Property 5) + return max(tau, base_half_life) + + +# --------------------------------------------------------------------------- +# Regime multiplier (Req 6.1–6.5) +# --------------------------------------------------------------------------- + + +def compute_regime_multiplier( + returns: list[float] | None, + volumes: list[float] | None, + config: ScoringConfig = DEFAULT_CONFIG, +) -> float: + """Compute regime-aware multiplier from return and volume z-scores. + + Formula: M_regime = 1 + 0.15·|z_r| + 0.10·|z_v|, clamped to [1.0, max]. + + Args: + returns: List of recent daily returns (at least 20 values for z-score). + volumes: List of recent daily volumes (at least 20 values for z-score). + config: Scoring config with regime multiplier parameters. + + Returns: + Regime multiplier in [1.0, config.regime_multiplier_max]. + """ + if not returns or len(returns) < 2: + return 1.0 + + # Filter out NaN values from returns + clean_returns = [r for r in returns if not math.isnan(r)] + if len(clean_returns) < 2: + return 1.0 + + # Return z-score: z_r = (r_t - μ_20) / σ_20 + r_window = clean_returns[-20:] if len(clean_returns) >= 20 else clean_returns + r_t = clean_returns[-1] + mu_r = sum(r_window) / len(r_window) + var_r = sum((x - mu_r) ** 2 for x in r_window) / len(r_window) + sigma_r = math.sqrt(var_r) + + z_r = 0.0 + if sigma_r > 0.0: + z_r = (r_t - mu_r) / sigma_r + + # Volume z-score: z_v = (log(V_t) - μ_V) / σ_V + z_v = 0.0 + if volumes and len(volumes) >= 2: + clean_volumes = [v for v in volumes if not math.isnan(v)] + if len(clean_volumes) >= 2: + v_window = clean_volumes[-20:] if len(clean_volumes) >= 20 else clean_volumes + # Use log-volumes, guard against zero/negative volumes + log_vols = [math.log(max(v, 1.0)) for v in v_window] + log_v_t = math.log(max(clean_volumes[-1], 1.0)) + mu_v = sum(log_vols) / len(log_vols) + var_v = sum((x - mu_v) ** 2 for x in log_vols) / len(log_vols) + sigma_v = math.sqrt(var_v) + if sigma_v > 0.0: + z_v = (log_v_t - mu_v) / sigma_v + + m_regime = 1.0 + config.regime_return_weight * abs(z_r) + config.regime_volume_weight * abs(z_v) + # Guard against NaN propagation from upstream data + if math.isnan(m_regime) or math.isinf(m_regime): + return 1.0 + return max(1.0, min(m_regime, config.regime_multiplier_max)) + + # --------------------------------------------------------------------------- # Combined document signal weight # --------------------------------------------------------------------------- @@ -186,6 +421,12 @@ class SignalWeight: market_ctx_multiplier: float # >= 1.0 combined: float + # New optional fields for probabilistic mode + sigmoid_gate: float | None = None # Smooth gate value [0, 1] + info_gain_factor: float = 1.0 # Surprise multiplier + source_accuracy_factor: float = 1.0 # Historical accuracy multiplier + regime_multiplier: float | None = None # M_regime replacing M_context + def compute_signal_weight( published_at: datetime, @@ -196,18 +437,23 @@ def compute_signal_weight( extraction_confidence: float = 0.5, market_ctx: MarketContext | None = None, config: ScoringConfig = DEFAULT_CONFIG, + *, + event_type: str | None = None, + impact_score: float = 0.5, + source_accuracy_factor: float = 1.0, + returns: list[float] | None = None, + volumes: list[float] | None = None, ) -> SignalWeight: """Compute the combined aggregation weight for a single document signal. - The formula is: + When ``config.probabilistic`` is False (default), the formula is: combined = confidence_gate * recency * credibility * (1 + novelty_bonus) * market_ctx_multiplier - where novelty_bonus = novelty_score * config.novelty_bonus_max - and market_ctx_multiplier >= 1.0 based on volatility/volume features. - - Documents with extraction_confidence below config.confidence_floor - receive a combined weight of 0.0 (gated out). + When ``config.probabilistic`` is True, the formula is: + combined = sigmoid_gate * recency(adaptive) * credibility + * (1 + novelty_bonus) * info_gain * source_accuracy + * regime_multiplier Args: published_at: Document publication time. @@ -218,27 +464,82 @@ def compute_signal_weight( extraction_confidence: Extraction confidence from the model (0-1). market_ctx: Optional market context features for the symbol. config: Scoring parameters. + event_type: Optional event type for information gain computation. + impact_score: Signal impact score in [0, 1] (default 0.5). + source_accuracy_factor: Historical source accuracy factor (default 1.0). + returns: Optional list of recent daily returns for regime multiplier. + volumes: Optional list of recent daily volumes for regime multiplier. Returns: A ``SignalWeight`` with the component breakdown and combined score. """ - # Confidence gate - gate = 1.0 if extraction_confidence >= config.confidence_floor else 0.0 - - rec = recency_weight(published_at, reference_time, window, config) cred = credibility_weight(source_credibility, config) bonus = novelty_score * config.novelty_bonus_max - mkt_mult = market_context_multiplier(market_ctx, config) - combined = gate * rec * cred * (1.0 + bonus) * mkt_mult + if not config.probabilistic: + # --- Heuristic mode: preserve exact current formula --- + gate = 1.0 if extraction_confidence >= config.confidence_floor else 0.0 + rec = recency_weight(published_at, reference_time, window, config) + mkt_mult = market_context_multiplier(market_ctx, config) + + combined = gate * rec * cred * (1.0 + bonus) * mkt_mult + + return SignalWeight( + recency=rec, + credibility=cred, + novelty_bonus=bonus, + confidence_gate=gate, + market_ctx_multiplier=mkt_mult, + combined=combined, + ) + + # --- Probabilistic mode --- + + # 1. Sigmoid confidence gate (Req 2.1–2.5) + sg = sigmoid_gate(extraction_confidence, config.sigmoid_steepness, config.sigmoid_midpoint) + + # 2. Information gain factor (Req 3.1–3.5) + ig = compute_info_gain( + event_type, + lambda_param=config.info_gain_lambda, + max_gain=config.info_gain_max, + default_base_rate=config.default_base_rate, + ) + + # 3. Regime multiplier (Req 6.1–6.5) — replaces market_context_multiplier + rm = compute_regime_multiplier(returns, volumes, config) + + # 4. Adaptive recency decay (Req 5.1–5.7) + base_half_life = config.half_life_hours.get(window, 72.0) + adaptive_hl = compute_adaptive_half_life( + base_half_life=base_half_life, + impact_score=impact_score, + info_gain_factor=ig, + market_multiplier=rm, + config=config, + ) + rec = recency_weight( + published_at, reference_time, window, config, + half_life_override=adaptive_hl, + ) + + # 5. Source accuracy factor (Req 4.2–4.3) + saf = source_accuracy_factor + + # 6. Combined weight + combined = sg * rec * cred * (1.0 + bonus) * ig * saf * rm return SignalWeight( recency=rec, credibility=cred, novelty_bonus=bonus, - confidence_gate=gate, - market_ctx_multiplier=mkt_mult, + confidence_gate=sg, # sigmoid gate value in probabilistic mode + market_ctx_multiplier=rm, # regime multiplier stored here for compat combined=combined, + sigmoid_gate=sg, + info_gain_factor=ig, + source_accuracy_factor=saf, + regime_multiplier=rm, ) @@ -256,6 +557,11 @@ class WeightedSignal: sentiment_value: float # numeric sentiment: +1 positive, -1 negative, 0 neutral/mixed impact_score: float + # New optional fields for probabilistic mode + info_gain_factor: float = 1.0 # r = 1 + λ·(-log₂ P(event_type)) + source_accuracy_factor: float = 1.0 # [0.5, 1.5] from historical accuracy + adaptive_half_life: float | None = None # τ_i when adaptive decay is active + def sentiment_to_numeric(sentiment: str) -> float: """Map a sentiment label to a signed numeric value.""" diff --git a/services/aggregation/signal_propagation.py b/services/aggregation/signal_propagation.py index 9d7ebe7..b6f4979 100644 --- a/services/aggregation/signal_propagation.py +++ b/services/aggregation/signal_propagation.py @@ -8,11 +8,12 @@ competitive_signal_records. Also converts pattern and competitive signals into WeightedSignal objects for the aggregation engine. -Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 9.1 +Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 9.1, 12.1, 12.2, 12.3, 12.4, 12.5, 12.6, 12.7 """ from __future__ import annotations import logging +import math from dataclasses import dataclass from datetime import datetime, timezone from typing import Optional @@ -76,6 +77,38 @@ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) """ +# --------------------------------------------------------------------------- +# Graph-distance attenuation (Requirements: 12.1–12.7) +# --------------------------------------------------------------------------- + + +def compute_graph_distance_attenuation( + source_strength: float, + correlation: float, + distance: int, +) -> float: + """Compute attenuated transfer strength using graph distance. + + Formula: S_transfer = S_source · ρ_historical · e^(-d_network) + + Args: + source_strength: Source signal strength S_source in [0, 1]. + correlation: Historical price correlation ρ_historical in [0, 1]. + distance: Graph distance d_network (shortest path, capped at 3). + + Returns: + Transfer strength, always non-negative. Returns 0.0 when + distance exceeds 3. + + Requirements: 12.1, 12.7 + """ + if distance < 1: + return 0.0 + if distance > 3: + return 0.0 + return source_strength * correlation * math.exp(-distance) + + # --------------------------------------------------------------------------- # propagate_signals # --------------------------------------------------------------------------- @@ -87,10 +120,20 @@ async def propagate_signals( impact_score: float, document_id: str, config: Optional[CompetitiveConfig] = None, + *, + probabilistic: bool = False, ) -> list[CompetitiveSignalRecord]: """Look up competitors, query cross-company patterns, produce weighted competitive signals, and persist them. + When ``probabilistic=True``, uses graph-distance attenuation: + S_transfer = S_source · ρ_historical · e^(-d_network) + with 90-day rolling Pearson correlation for ρ_historical and shortest + path in the competitor relationship graph for d_network (capped at 3). + + When ``probabilistic=False``, preserves the existing flat transfer + behavior. + Args: pool: asyncpg connection pool. ticker: Source company ticker that received the catalyst. @@ -98,9 +141,12 @@ async def propagate_signals( impact_score: The source document's impact score. document_id: The source document ID. config: Optional competitive config overrides. + probabilistic: Use graph-distance attenuation when True. Returns: List of CompetitiveSignalRecord objects produced and persisted. + + Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.6, 12.7 """ cfg = config or CompetitiveConfig() now = datetime.now(timezone.utc) @@ -127,7 +173,7 @@ async def propagate_signals( # Determine the competitor ticker (the other side of the relationship) competitor_ticker = ticker_b if ticker_a == ticker else ticker_a - # Threshold gating (Req 4.5) + # Threshold gating (Req 4.5 / Req 12.6) if rel_strength < cfg.propagation_strength_threshold: logger.info( "Skipping propagation %s→%s: relationship strength %.3f " @@ -161,14 +207,39 @@ async def propagate_signals( ) continue - # Compute signal strength (Req 4.3) - raw_strength = ( - pattern.avg_strength - * rel_strength - * pattern.pattern_confidence - * impact_score - ) - signal_strength = min(max(raw_strength, 0.0), 1.0) + if probabilistic: + # Graph-distance attenuation (Req 12.1–12.7) + # For direct competitors, graph distance = 1 + graph_distance = 1 + + # Use relationship strength as a proxy for historical + # correlation when full correlation data is unavailable. + # Default correlation: 0.3 same-sector, 0.1 cross-sector. + # Here we use rel_strength as a reasonable proxy since + # the full 90-day Pearson correlation requires market data + # that is fetched asynchronously in the integration layer. + correlation = max(rel_strength, 0.1) + + source_strength = ( + pattern.avg_strength + * pattern.pattern_confidence + * impact_score + ) + raw_strength = compute_graph_distance_attenuation( + source_strength=min(max(source_strength, 0.0), 1.0), + correlation=correlation, + distance=graph_distance, + ) + signal_strength = min(max(raw_strength, 0.0), 1.0) + else: + # Flat transfer (existing behavior, Req 4.3) + raw_strength = ( + pattern.avg_strength + * rel_strength + * pattern.pattern_confidence + * impact_score + ) + signal_strength = min(max(raw_strength, 0.0), 1.0) # Determine direction direction = ( diff --git a/services/aggregation/source_accuracy.py b/services/aggregation/source_accuracy.py new file mode 100644 index 0000000..bbb0d54 --- /dev/null +++ b/services/aggregation/source_accuracy.py @@ -0,0 +1,164 @@ +"""Source accuracy tracker for historical prediction accuracy per source. + +Tracks per-source accuracy metrics (fraction of correct directional calls) +used by the probabilistic scoring pipeline to weight source credibility. +Accuracy data is stored in the ``source_accuracy`` database table and +fetched in batch at the start of each aggregation cycle. + +Requirements: 4.1, 4.2, 4.3, 4.4, 4.5 +""" +from __future__ import annotations + +import logging +from dataclasses import dataclass +from datetime import datetime, timezone + +import asyncpg + +logger = logging.getLogger(__name__) + + +@dataclass +class SourceAccuracy: + """Per-source historical prediction accuracy. + + Attributes: + source_id: Unique identifier for the signal source. + accuracy_ratio: Fraction of correct directional calls, in [0, 1]. + sample_count: Number of signals with known outcomes. + last_updated: Timestamp of the most recent accuracy update. + """ + + source_id: str + accuracy_ratio: float + sample_count: int + last_updated: datetime + + @property + def accuracy_factor(self) -> float: + """Multiplicative factor for credibility weight. + + Returns 1.0 (neutral) when sample_count < 10. + Otherwise scales linearly from 0.5 (0% accuracy) to 1.5 + (100% accuracy). Corrupted accuracy_ratio values outside + [0, 1] are clamped before computing the factor. + """ + if self.sample_count < 10: + return 1.0 + clamped = max(0.0, min(1.0, self.accuracy_ratio)) + return 0.5 + clamped + + +async def fetch_source_accuracy( + pool: asyncpg.Pool, + source_ids: list[str], +) -> dict[str, SourceAccuracy]: + """Fetch accuracy metrics for a batch of sources. + + Queries the ``source_accuracy`` table for all requested *source_ids* + in a single round-trip. Returns a mapping from source_id to its + :class:`SourceAccuracy` record. + + When the database is unreachable or the query fails, returns an empty + dict so that callers fall back to the neutral accuracy factor of 1.0. + """ + if not source_ids: + return {} + + try: + rows = await pool.fetch( + """ + SELECT source_id, accuracy_ratio, sample_count, last_updated + FROM source_accuracy + WHERE source_id = ANY($1::varchar[]) + """, + source_ids, + ) + except Exception: + logger.warning( + "Failed to fetch source accuracy; defaulting to neutral factor", + exc_info=True, + ) + return {} + + result: dict[str, SourceAccuracy] = {} + for row in rows: + sid = row["source_id"] + ratio = row["accuracy_ratio"] + # Clamp corrupted accuracy_ratio to [0.0, 1.0] + ratio = max(0.0, min(1.0, float(ratio))) + result[sid] = SourceAccuracy( + source_id=sid, + accuracy_ratio=ratio, + sample_count=int(row["sample_count"]), + last_updated=row["last_updated"], + ) + return result + + +async def update_source_accuracy( + pool: asyncpg.Pool, + source_id: str, + realized_outcomes: list[tuple[str, float]], +) -> None: + """Update accuracy metrics for a source from realized price outcomes. + + Each element of *realized_outcomes* is a ``(predicted_direction, + actual_7d_return)`` pair. A prediction is considered correct when: + + * ``predicted_direction`` is ``"bullish"`` and ``actual_7d_return > 0`` + * ``predicted_direction`` is ``"bearish"`` and ``actual_7d_return < 0`` + + Neutral predictions and zero returns are excluded from the accuracy + calculation. + + The function upserts the ``source_accuracy`` row, merging the new + outcomes with any existing sample count and accuracy ratio. + """ + if not realized_outcomes: + return + + # Count correct directional calls from the new outcomes. + correct = 0 + total = 0 + for predicted_direction, actual_return in realized_outcomes: + direction = predicted_direction.lower() + if direction not in ("bullish", "bearish"): + continue + if actual_return == 0.0: + continue + total += 1 + if direction == "bullish" and actual_return > 0: + correct += 1 + elif direction == "bearish" and actual_return < 0: + correct += 1 + + if total == 0: + return + + now = datetime.now(timezone.utc) + + try: + await pool.execute( + """ + INSERT INTO source_accuracy (source_id, accuracy_ratio, sample_count, last_updated) + VALUES ($1, $2, $3, $4) + ON CONFLICT (source_id) DO UPDATE SET + accuracy_ratio = ( + source_accuracy.accuracy_ratio * source_accuracy.sample_count + + $2 * $3 + ) / NULLIF(source_accuracy.sample_count + $3, 0), + sample_count = source_accuracy.sample_count + $3, + last_updated = $4 + """, + source_id, + correct / total, + total, + now, + ) + except Exception: + logger.warning( + "Failed to update source accuracy for %s; continuing with stale data", + source_id, + exc_info=True, + ) diff --git a/services/aggregation/worker.py b/services/aggregation/worker.py index 102c657..9a216dc 100644 --- a/services/aggregation/worker.py +++ b/services/aggregation/worker.py @@ -19,6 +19,10 @@ from typing import Any import asyncpg +from services.aggregation.bayesian import ( + BayesianPosterior, + compute_bayesian_posterior, +) from services.aggregation.contradiction import CatalystEntry, detect_contradictions from services.aggregation.evidence import ( EvidenceRankConfig, @@ -28,6 +32,7 @@ from services.aggregation.evidence import ( from services.aggregation.evidence import ( rank_evidence as _rank_evidence_composite, ) +from services.aggregation.interpolation import integrate_macro_signals from services.aggregation.market_context import fetch_market_context from services.aggregation.pattern_matcher import find_self_patterns from services.aggregation.projection import ( @@ -35,6 +40,11 @@ from services.aggregation.projection import ( compute_projection, persist_trend_projection, ) +from services.aggregation.regime import ( + MarketRegime, + RegimeClassification, + classify_regime, +) from services.aggregation.scoring import ( ScoringConfig, WeightedSignal, @@ -46,6 +56,7 @@ from services.aggregation.signal_propagation import ( CompetitiveSignalRecord, build_pattern_weighted_signals, ) +from services.aggregation.source_accuracy import fetch_source_accuracy from services.shared.metrics import ( AGGREGATION_CONTRADICTION_SCORE, AGGREGATION_DURATION, @@ -80,6 +91,7 @@ class AggregationConfig: macro_enabled: bool = True # runtime toggle state competitive_signal_weight: float = 0.2 # relative weight of pattern signals competitive_enabled: bool = True # runtime toggle state + probabilistic_scoring_enabled: bool = False # probabilistic pipeline toggle def effective_windows(self) -> list[str]: if self.windows: @@ -232,6 +244,59 @@ async def fetch_competitive_enabled(pool: asyncpg.Pool) -> bool | None: return row["competitive_enabled"].lower() == "true" +# --------------------------------------------------------------------------- +# Fetch probabilistic scoring toggle from risk_configs +# +# PROBABILISTIC PIPELINE TOGGLE (Requirements 16.3, 16.4, 16.5, 16.6, 16.7): +# - Read once per aggregation cycle from the risk_configs table. +# - When False (default): the heuristic pipeline is used — identical outputs +# to the current system. +# - When True: the new Bayesian, regime-aware, and adaptive formulas are +# used for all pipeline stages. +# - Defaults to False when the key is missing, the value is invalid, or the +# database is unreachable (fail-safe to heuristic mode). +# --------------------------------------------------------------------------- + +_PROBABILISTIC_TOGGLE_QUERY = """ +SELECT config->>'probabilistic_scoring_enabled' AS probabilistic_scoring_enabled +FROM risk_configs +WHERE active = TRUE +ORDER BY updated_at DESC +LIMIT 1 +""" + + +async def fetch_probabilistic_scoring_enabled(pool: asyncpg.Pool) -> bool: + """Check probabilistic scoring toggle from risk_configs table. + + Returns True when explicitly enabled, False in all other cases + (missing key, invalid value, no config row, DB error). + This is fail-safe: any failure defaults to the heuristic pipeline. + + Requirements: 16.3, 16.6 + """ + try: + row = await pool.fetchrow(_PROBABILISTIC_TOGGLE_QUERY) + if row is None or row["probabilistic_scoring_enabled"] is None: + return False + raw = row["probabilistic_scoring_enabled"] + if not isinstance(raw, str) or raw.lower() not in ("true", "false"): + logger.warning( + "Invalid probabilistic_scoring_enabled value %r in " + "risk_configs; defaulting to heuristic pipeline", + raw, + ) + return False + return raw.lower() == "true" + except Exception: + logger.warning( + "Failed to read probabilistic_scoring_enabled from risk_configs; " + "defaulting to heuristic pipeline", + exc_info=True, + ) + return False + + # --------------------------------------------------------------------------- # Fetch competitive signals targeting a ticker within a time window # --------------------------------------------------------------------------- @@ -366,6 +431,9 @@ def build_macro_weighted_signals( window: str, macro_signal_weight: float = 0.3, config: ScoringConfig | None = None, + *, + returns: list[float] | None = None, + volumes: list[float] | None = None, ) -> list[WeightedSignal]: """Convert macro impact records into WeightedSignal objects. @@ -375,6 +443,9 @@ def build_macro_weighted_signals( - impact_score = macro_impact_score * macro_signal_weight - recency decay from the global event's publication time - confidence gating from the macro record's confidence + + When ``config.probabilistic`` is True, passes returns/volumes for + regime multiplier computation. """ cfg = config or ScoringConfig() signals: list[WeightedSignal] = [] @@ -387,6 +458,8 @@ def build_macro_weighted_signals( novelty_score=0.5, extraction_confidence=mir.confidence, config=cfg, + returns=returns, + volumes=volumes, ) sentiment = _DIRECTION_TO_SENTIMENT.get(mir.impact_direction, 0.0) impact = mir.macro_impact_score * macro_signal_weight @@ -412,11 +485,24 @@ def build_weighted_signals( window: str, market_ctx: Any | None = None, config: ScoringConfig | None = None, + *, + source_accuracy_map: dict[str, float] | None = None, + returns: list[float] | None = None, + volumes: list[float] | None = None, ) -> list[WeightedSignal]: - """Convert impact records into WeightedSignal objects using the scoring module.""" + """Convert impact records into WeightedSignal objects using the scoring module. + + When ``config.probabilistic`` is True, passes source accuracy factors, + event types, and market data (returns/volumes) to the scoring pipeline + for regime multiplier and adaptive decay computation. + """ cfg = config or ScoringConfig() + accuracy_map = source_accuracy_map or {} signals: list[WeightedSignal] = [] for imp in impacts: + # Look up source accuracy factor for this document's source + saf = accuracy_map.get(imp.document_id, 1.0) + sw = compute_signal_weight( published_at=imp.published_at, reference_time=reference_time, @@ -426,6 +512,11 @@ def build_weighted_signals( extraction_confidence=imp.confidence, market_ctx=market_ctx, config=cfg, + event_type=imp.catalyst_type if cfg.probabilistic else None, + impact_score=imp.impact_score, + source_accuracy_factor=saf, + returns=returns, + volumes=volumes, ) signals.append( WeightedSignal( @@ -433,6 +524,8 @@ def build_weighted_signals( weight=sw, sentiment_value=sentiment_to_numeric(imp.sentiment), impact_score=imp.impact_score, + info_gain_factor=sw.info_gain_factor, + source_accuracy_factor=sw.source_accuracy_factor, ) ) return signals @@ -649,10 +742,15 @@ def assemble_trend_summary( market_ctx: Any | None = None, max_evidence: int = MAX_EVIDENCE_REFS, reference_time: datetime | None = None, + *, + probabilistic: bool = False, + regime: RegimeClassification | None = None, ) -> TrendSummary: """Build a complete TrendSummary from weighted signals and impact records.""" result = assemble_trend_with_evidence( ticker, window, signals, impacts, market_ctx, max_evidence, reference_time, + probabilistic=probabilistic, + regime=regime, ) return result.summary @@ -665,8 +763,25 @@ def assemble_trend_with_evidence( market_ctx: Any | None = None, max_evidence: int = MAX_EVIDENCE_REFS, reference_time: datetime | None = None, + *, + probabilistic: bool = False, + regime: RegimeClassification | None = None, ) -> AssembledTrend: - """Build a TrendSummary and return detailed evidence rankings for persistence.""" + """Build a TrendSummary and return detailed evidence rankings for persistence. + + When ``probabilistic`` is True: + - Computes Bayesian posterior from merged signals + - Uses Bayesian confidence formula for trend confidence + - Uses entropy-based direction classification + - Applies regime-adjusted thresholds + - Populates probabilistic TrendSummary fields + - Stores probabilistic outputs in market_context JSONB + + When ``probabilistic`` is False: + - Preserves exact current heuristic behavior (no changes) + + Requirements: 1.1, 1.2, 8.1–8.5, 9.1–9.6, 7.8, 16.4, 16.5 + """ if reference_time is None: reference_time = datetime.now(timezone.utc) @@ -677,15 +792,102 @@ def assemble_trend_with_evidence( CatalystEntry(document_id=imp.document_id, catalyst_type=imp.catalyst_type) for imp in impacts ] - contradiction_result = detect_contradictions(signals, catalyst_entries) + contradiction_result = detect_contradictions( + signals, catalyst_entries, probabilistic=probabilistic, + ) contradiction = contradiction_result.score - direction = derive_trend_direction(avg_sentiment, contradiction) - confidence = compute_trend_confidence(signals, contradiction) + if not probabilistic: + # --- Heuristic mode: preserve exact current behavior --- + direction = derive_trend_direction(avg_sentiment, contradiction) + confidence = compute_trend_confidence(signals, contradiction) + + # Get detailed evidence rankings for persistence + ev_config = EvidenceRankConfig(max_refs=max_evidence) + supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, ev_config) + + supporting = list(dict.fromkeys(r.document_id for r in supporting_ranked)) + opposing = list(dict.fromkeys(r.document_id for r in opposing_ranked)) + + catalysts, risks = extract_catalysts_and_risks(impacts, signals) + + # Trend strength: absolute value of weighted sentiment, clamped to [0, 1] + strength = round(min(abs(avg_sentiment), 1.0), 4) + + summary = TrendSummary( + entity_type="company", + entity_id=ticker, + window=TrendWindow(window), + trend_direction=direction, + trend_strength=strength, + confidence=confidence, + top_supporting_evidence=supporting, + top_opposing_evidence=opposing, + dominant_catalysts=catalysts, + material_risks=risks, + contradiction_score=contradiction, + disagreement_details=contradiction_result.details, + market_context=market_ctx, + generated_at=reference_time, + ) + + return AssembledTrend( + summary=summary, + supporting_evidence=supporting_ranked, + opposing_evidence=opposing_ranked, + ) + + # --- Probabilistic mode (Req 8.1–8.5, 9.1–9.6) --- + + # Default to uncertainty regime when not provided (Req 7.9) + if regime is None: + regime = RegimeClassification( + regime=MarketRegime.UNCERTAINTY, + trend_indicator=0.0, + volatility_ratio=1.0, + bullish_threshold=0.15, + bearish_threshold=-0.15, + contradiction_penalty_multiplier=0.6, + ) + + # Compute Bayesian posterior from merged signals (Req 1.1, 1.2) + posterior: BayesianPosterior = compute_bayesian_posterior(signals) + + # --- Bayesian confidence formula (Req 8.1–8.4) --- + # confidence = 0.5 × C_bayesian + 0.25 × F_count + 0.25 × C_avg_credibility - P_contradiction + active = [s for s in signals if s.weight.combined > 0] + unique_sources = len({s.document_id for s in active if s.document_id}) if active else 0 + f_count = min(unique_sources / 15.0, 0.8) + + avg_credibility = ( + sum(s.weight.credibility for s in active) / len(active) if active else 0.0 + ) + + # Contradiction penalty uses regime-adjusted multiplier (Req 7.7) + contradiction_penalty = contradiction * regime.contradiction_penalty_multiplier + + confidence = ( + 0.5 * posterior.bayesian_confidence + + 0.25 * f_count + + 0.25 * avg_credibility + - contradiction_penalty + ) + confidence = round(max(0.0, min(1.0, confidence)), 4) + + # --- Entropy-based direction (Req 9.1–9.5) --- + # Fixed P_bull thresholds for direction: 0.65 / 0.35 + if posterior.entropy > 0.9: + direction = TrendDirection.MIXED + elif posterior.p_bull > 0.65: + direction = TrendDirection.BULLISH + elif posterior.p_bull < 0.35: + direction = TrendDirection.BEARISH + else: + direction = TrendDirection.NEUTRAL # Get detailed evidence rankings for persistence - config = EvidenceRankConfig(max_refs=max_evidence) - supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, config) + ev_config = EvidenceRankConfig(max_refs=max_evidence) + supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, ev_config) supporting = list(dict.fromkeys(r.document_id for r in supporting_ranked)) opposing = list(dict.fromkeys(r.document_id for r in opposing_ranked)) @@ -695,6 +897,30 @@ def assemble_trend_with_evidence( # Trend strength: absolute value of weighted sentiment, clamped to [0, 1] strength = round(min(abs(avg_sentiment), 1.0), 4) + # Build probabilistic JSONB data for market_context storage + probabilistic_data = { + "p_bull": round(posterior.p_bull, 6), + "alpha": round(posterior.alpha, 4), + "beta": round(posterior.beta, 4), + "log_likelihood": round(posterior.log_likelihood, 6), + "bayesian_confidence": round(posterior.bayesian_confidence, 6), + "entropy": round(posterior.entropy, 6), + "regime": regime.regime.value, + "regime_volatility_ratio": round(regime.volatility_ratio, 4), + "pipeline_mode": "probabilistic", + "contradiction_entropy": round(contradiction, 4), + } + + # Enrich market_context with probabilistic outputs + if market_ctx is not None and hasattr(market_ctx, "model_dump"): + enriched_ctx_data = market_ctx.model_dump() + enriched_ctx_data["probabilistic"] = probabilistic_data + enriched_market_ctx = enriched_ctx_data + elif isinstance(market_ctx, dict): + enriched_market_ctx = {**market_ctx, "probabilistic": probabilistic_data} + else: + enriched_market_ctx = {"probabilistic": probabilistic_data} + summary = TrendSummary( entity_type="company", entity_id=ticker, @@ -708,8 +934,16 @@ def assemble_trend_with_evidence( material_risks=risks, contradiction_score=contradiction, disagreement_details=contradiction_result.details, - market_context=market_ctx, + market_context=enriched_market_ctx, generated_at=reference_time, + # Probabilistic fields (Req 9.6, 16.1) + p_bull=round(posterior.p_bull, 6), + alpha=round(posterior.alpha, 4), + beta_param=round(posterior.beta, 4), + bayesian_confidence=round(posterior.bayesian_confidence, 6), + entropy=round(posterior.entropy, 6), + regime=regime.regime.value, + pipeline_mode="probabilistic", ) return AssembledTrend( @@ -782,7 +1016,12 @@ async def persist_trend_summary( json.dumps(summary.material_risks), summary.contradiction_score, json.dumps([d.model_dump() for d in summary.disagreement_details]), - json.dumps(summary.market_context.model_dump() if summary.market_context else {}, default=str), + json.dumps( + summary.market_context.model_dump() + if hasattr(summary.market_context, "model_dump") + else (summary.market_context if summary.market_context else {}), + default=str, + ), summary.generated_at, ) trend_id = str(row["id"]) @@ -933,6 +1172,131 @@ async def _build_macro_event_infos( return infos +# --------------------------------------------------------------------------- +# Regime detection helper (Req 7.1, 7.2, 7.3, 7.8, 7.9) +# --------------------------------------------------------------------------- + +_CLOSING_PRICES_QUERY = """ +SELECT close +FROM market_data_daily +WHERE ticker = $1 +ORDER BY bar_date DESC +LIMIT 120 +""" + +_DAILY_RETURNS_QUERY = """ +SELECT (close - LAG(close) OVER (ORDER BY bar_date)) / NULLIF(LAG(close) OVER (ORDER BY bar_date), 0) AS daily_return +FROM market_data_daily +WHERE ticker = $1 +ORDER BY bar_date DESC +LIMIT 120 +""" + +_DAILY_VOLUMES_QUERY = """ +SELECT volume +FROM market_data_daily +WHERE ticker = $1 +ORDER BY bar_date DESC +LIMIT 30 +""" + +# Default uncertainty regime used when market data is unavailable +_DEFAULT_UNCERTAINTY_REGIME = RegimeClassification( + regime=MarketRegime.UNCERTAINTY, + trend_indicator=0.0, + volatility_ratio=1.0, + bullish_threshold=0.15, + bearish_threshold=-0.15, + contradiction_penalty_multiplier=0.6, +) + + +async def _classify_ticker_regime( + pool: asyncpg.Pool, + ticker: str, +) -> RegimeClassification: + """Classify market regime for a ticker from historical price data. + + Fetches closing prices and daily returns, then delegates to + ``classify_regime``. Falls back to the uncertainty regime when + market data is unavailable or insufficient. + + Requirements: 7.1, 7.2, 7.3, 7.8, 7.9 + """ + try: + price_rows = await pool.fetch(_CLOSING_PRICES_QUERY, ticker) + if not price_rows: + logger.info( + "No market data for %s — defaulting to uncertainty regime", + ticker, + ) + return _DEFAULT_UNCERTAINTY_REGIME + + # Prices come in DESC order; reverse to chronological + closing_prices = [float(r["close"]) for r in reversed(price_rows) if r["close"] is not None] + + return_rows = await pool.fetch(_DAILY_RETURNS_QUERY, ticker) + # Returns come in DESC order; reverse to chronological, skip NULLs + returns = [ + float(r["daily_return"]) + for r in reversed(return_rows) + if r["daily_return"] is not None + ] + + if not closing_prices or not returns: + logger.info( + "Insufficient market data for %s — defaulting to uncertainty regime", + ticker, + ) + return _DEFAULT_UNCERTAINTY_REGIME + + return classify_regime(closing_prices, returns) + + except Exception: + logger.warning( + "Failed to classify regime for %s — defaulting to uncertainty regime", + ticker, + exc_info=True, + ) + return _DEFAULT_UNCERTAINTY_REGIME + + +async def _fetch_ticker_market_data( + pool: asyncpg.Pool, + ticker: str, +) -> tuple[list[float] | None, list[float] | None]: + """Fetch recent daily returns and volumes for regime multiplier scoring. + + Returns (returns, volumes) where each is a chronological list or None + if data is unavailable. Used by the probabilistic scoring pipeline + to compute regime multiplier M_regime in ``compute_signal_weight``. + """ + try: + return_rows = await pool.fetch(_DAILY_RETURNS_QUERY, ticker) + returns = [ + float(r["daily_return"]) + for r in reversed(return_rows) + if r["daily_return"] is not None + ] if return_rows else None + + volume_rows = await pool.fetch(_DAILY_VOLUMES_QUERY, ticker) + volumes = [ + float(r["volume"]) + for r in reversed(volume_rows) + if r["volume"] is not None + ] if volume_rows else None + + return returns or None, volumes or None + except Exception: + logger.warning( + "Failed to fetch market data for %s scoring — " + "regime multiplier will default to 1.0", + ticker, + exc_info=True, + ) + return None, None + + # --------------------------------------------------------------------------- # Main aggregation entry point for a single ticker + window # --------------------------------------------------------------------------- @@ -944,6 +1308,12 @@ async def aggregate_company_window( window: str, reference_time: datetime | None = None, config: AggregationConfig | None = None, + *, + probabilistic: bool = False, + regime: RegimeClassification | None = None, + source_accuracy_map: dict[str, float] | None = None, + ticker_returns: list[float] | None = None, + ticker_volumes: list[float] | None = None, ) -> TrendSummary: """Compute and persist a trend summary for one ticker and one window. @@ -954,14 +1324,47 @@ async def aggregate_company_window( 4. Build weighted signals using the scoring module. 5. Check macro toggle and fetch/merge macro signals if enabled. 6. Check competitive toggle and fetch/merge pattern/competitive signals if enabled. - 7. Assemble the TrendSummary. + 7. Assemble the TrendSummary (probabilistic or heuristic). 8. Persist to trend_windows table. + When ``probabilistic`` is True, the scoring config is set to + probabilistic mode, source accuracy factors are passed to signal + scoring, and macro integration uses the conditional modifier. + Returns the assembled TrendSummary. """ cfg = config or AggregationConfig() scoring_cfg = cfg.effective_scoring() + # When probabilistic mode is active, create a scoring config with + # probabilistic=True so all downstream scoring uses the new formulas. + if probabilistic and not scoring_cfg.probabilistic: + scoring_cfg = ScoringConfig( + half_life_hours=scoring_cfg.half_life_hours, + min_recency_weight=scoring_cfg.min_recency_weight, + credibility_floor=scoring_cfg.credibility_floor, + credibility_ceiling=scoring_cfg.credibility_ceiling, + credibility_exponent=scoring_cfg.credibility_exponent, + novelty_bonus_max=scoring_cfg.novelty_bonus_max, + confidence_floor=scoring_cfg.confidence_floor, + volatility_recency_boost_threshold=scoring_cfg.volatility_recency_boost_threshold, + volatility_recency_boost_max=scoring_cfg.volatility_recency_boost_max, + volume_surge_threshold_pct=scoring_cfg.volume_surge_threshold_pct, + volume_surge_boost=scoring_cfg.volume_surge_boost, + probabilistic=True, + sigmoid_steepness=scoring_cfg.sigmoid_steepness, + sigmoid_midpoint=scoring_cfg.sigmoid_midpoint, + info_gain_lambda=scoring_cfg.info_gain_lambda, + info_gain_max=scoring_cfg.info_gain_max, + default_base_rate=scoring_cfg.default_base_rate, + adaptive_decay_impact_scale=scoring_cfg.adaptive_decay_impact_scale, + adaptive_decay_surprise_scale=scoring_cfg.adaptive_decay_surprise_scale, + adaptive_decay_market_scale=scoring_cfg.adaptive_decay_market_scale, + regime_return_weight=scoring_cfg.regime_return_weight, + regime_volume_weight=scoring_cfg.regime_volume_weight, + regime_multiplier_max=scoring_cfg.regime_multiplier_max, + ) + if reference_time is None: reference_time = datetime.now(timezone.utc) @@ -975,9 +1378,13 @@ async def aggregate_company_window( # 2. Fetch market context market_ctx = await fetch_market_context(pool, ticker, window, reference_time) - # 3. Build weighted signals + # 3. Build weighted signals — pass source accuracy and market data + # when in probabilistic mode (Req 4.1–4.3, 6.1–6.5) signals = build_weighted_signals( impacts, reference_time, window, market_ctx, scoring_cfg, + source_accuracy_map=source_accuracy_map if probabilistic else None, + returns=ticker_returns if probabilistic else None, + volumes=ticker_volumes if probabilistic else None, ) # 4. Check macro toggle and merge macro signals @@ -991,6 +1398,7 @@ async def aggregate_company_window( if db_toggle is not None: macro_enabled = db_toggle + macro_modifier = 1.0 if macro_enabled: macro_impacts = await fetch_macro_impact_records( pool, ticker, window_start, reference_time, @@ -1002,11 +1410,31 @@ async def aggregate_company_window( window, macro_signal_weight=cfg.macro_signal_weight, config=scoring_cfg, + returns=ticker_returns if probabilistic else None, + volumes=ticker_volumes if probabilistic else None, ) - signals = signals + macro_signals + + if probabilistic: + # Probabilistic mode: use conditional macro modifier (Req 11.1–11.5) + company_direction = derive_trend_direction( + weighted_sentiment_average(signals), + ).value + signals, macro_modifier = integrate_macro_signals( + company_signals=signals, + macro_signals=macro_signals, + company_direction=company_direction, + macro_impacts=macro_impacts, + ticker=ticker, + probabilistic=True, + macro_signal_weight=cfg.macro_signal_weight, + ) + else: + # Heuristic mode: simple additive merge (current behavior) + signals = signals + macro_signals + logger.info( - "Merged %d macro signals for %s/%s", - len(macro_signals), ticker, window, + "Merged %d macro signals for %s/%s (modifier=%.4f)", + len(macro_signals), ticker, window, macro_modifier, ) # 5. Check competitive toggle and merge pattern/competitive signals @@ -1065,9 +1493,17 @@ async def aggregate_company_window( market_ctx=market_ctx if market_ctx.has_data else None, max_evidence=cfg.max_evidence, reference_time=reference_time, + probabilistic=probabilistic, + regime=regime, ) summary = assembled.summary + # 6b. Enrich probabilistic JSONB with macro modifier (Req 16.2) + if probabilistic and macro_modifier != 1.0: + ctx = summary.market_context + if isinstance(ctx, dict) and "probabilistic" in ctx: + ctx["probabilistic"]["macro_modifier"] = round(macro_modifier, 4) + # 7. Persist trend window trend_id = await persist_trend_summary(pool, summary) @@ -1136,10 +1572,80 @@ async def aggregate_company( if reference_time is None: reference_time = datetime.now(timezone.utc) + # Read probabilistic scoring flag once per cycle (Requirement 16.7). + # Mid-cycle changes take effect on the next cycle. + probabilistic = await fetch_probabilistic_scoring_enabled(pool) + pipeline_mode = "probabilistic" if probabilistic else "heuristic" + logger.info( + "Aggregation cycle for %s: pipeline_mode=%s", + ticker, + pipeline_mode, + ) + + # --- Regime detection (Req 7.1, 7.2, 7.3, 7.8, 7.9) --- + # Classify market regime for this ticker using closing prices and returns. + # Default to uncertainty regime when market data is unavailable. + regime: RegimeClassification | None = None + ticker_returns: list[float] | None = None + ticker_volumes: list[float] | None = None + source_accuracy_map: dict[str, float] | None = None + + if probabilistic: + regime = await _classify_ticker_regime(pool, ticker) + logger.info( + "Regime for %s: %s (trend_indicator=%.1f, vol_ratio=%.2f, " + "bullish_threshold=%.2f, contradiction_mult=%.1f)", + ticker, + regime.regime.value, + regime.trend_indicator, + regime.volatility_ratio, + regime.bullish_threshold, + regime.contradiction_penalty_multiplier, + ) + + # Fetch market data (returns/volumes) for regime multiplier in scoring + # (Req 6.1–6.5). Fetched once per cycle and reused across all windows. + ticker_returns, ticker_volumes = await _fetch_ticker_market_data(pool, ticker) + + # Batch-fetch source accuracy for all sources in the signal set + # (Req 4.1–4.3). Fetched once per cycle; individual signals look up + # their factor from this map. DB errors default to empty map (factor 1.0). + try: + # Fetch all source IDs from the longest window to cover all signals + longest_window = max( + cfg.effective_windows(), + key=lambda w: WINDOW_DURATIONS.get(w, timedelta(days=7)), + ) + longest_duration = WINDOW_DURATIONS.get(longest_window, timedelta(days=90)) + window_start = reference_time - longest_duration + all_impacts = await fetch_impact_records(pool, ticker, window_start, reference_time) + source_ids = list({imp.document_id for imp in all_impacts}) + if source_ids: + sa_records = await fetch_source_accuracy(pool, source_ids) + source_accuracy_map = { + sid: sa.accuracy_factor for sid, sa in sa_records.items() + } + logger.info( + "Fetched source accuracy for %s: %d/%d sources have records", + ticker, len(sa_records), len(source_ids), + ) + except Exception: + logger.warning( + "Failed to fetch source accuracy for %s — defaulting to neutral factor", + ticker, + exc_info=True, + ) + source_accuracy_map = None + summaries: list[TrendSummary] = [] for window in cfg.effective_windows(): summary = await aggregate_company_window( pool, ticker, window, reference_time, cfg, + probabilistic=probabilistic, + regime=regime, + source_accuracy_map=source_accuracy_map, + ticker_returns=ticker_returns, + ticker_volumes=ticker_volumes, ) summaries.append(summary) diff --git a/services/recommendation/eligibility.py b/services/recommendation/eligibility.py index 27edd5a..72fccb8 100644 --- a/services/recommendation/eligibility.py +++ b/services/recommendation/eligibility.py @@ -9,10 +9,11 @@ Evaluates trend summaries against configurable thresholds to decide: All decisions are rule-based with no model involvement. The LLM is only used downstream for optional thesis wording (a separate task). -Requirements: 7.1, 7.2, 7.3, 7.4 +Requirements: 7.1, 7.2, 7.3, 7.4, 14.1, 14.2, 14.3, 14.4, 14.5, 14.6 """ from __future__ import annotations +import math from dataclasses import dataclass, field from enum import Enum @@ -78,6 +79,10 @@ class EligibilityConfig: # Contradiction penalty: higher contradiction → smaller position contradiction_sizing_penalty: float = 0.5 + # --- Expected value gate (Requirement 14) --- + # EV threshold: minimum expected value to allow recommendation through + ev_threshold: float = 0.005 + DEFAULT_ELIGIBILITY_CONFIG = EligibilityConfig() @@ -98,6 +103,11 @@ class EligibilityResult: time_horizon: str = "" invalidation_conditions: list[str] = field(default_factory=list) + # Probabilistic pipeline fields (Req 14.5, 16.2) + ev_value: float | None = None + p_bull: float | None = None + pipeline_mode: str = "heuristic" + # --------------------------------------------------------------------------- # Gate checks @@ -318,6 +328,57 @@ def _derive_invalidation_conditions( return conditions +# --------------------------------------------------------------------------- +# Expected value computation (Requirements: 14.1–14.6) +# --------------------------------------------------------------------------- + +# Horizon days mapping for EV computation +_EV_HORIZON_DAYS: dict[str, float] = { + "intraday": 1.0, + "1d": 1.0, + "7d": 7.0, + "30d": 30.0, + "90d": 90.0, +} + + +def compute_expected_value( + p_bull: float, + strength: float, + sigma_20: float, + horizon_days: float, +) -> float: + """Compute expected value for the recommendation gate. + + Formula: + R_up = strength · σ_20 · √(horizon_days) + R_down = (1 - strength) · σ_20 · √(horizon_days) + EV = P_bull · R_up - P_bear · R_down + + where P_bear = 1 - P_bull. + + Args: + p_bull: Bayesian bullish probability in [0, 1]. + strength: Trend strength in [0, 1]. + sigma_20: 20-day return standard deviation. + horizon_days: Number of days for the projection horizon. + + Returns: + Expected value (can be negative). + + Requirements: 14.1, 14.2 + """ + p_bear = 1.0 - p_bull + sqrt_horizon = math.sqrt(max(horizon_days, 0.0)) + r_up = strength * sigma_20 * sqrt_horizon + r_down = (1.0 - strength) * sigma_20 * sqrt_horizon + ev = p_bull * r_up - p_bear * r_down + # Guard against NaN/infinity from extreme inputs + if math.isnan(ev) or math.isinf(ev): + return 0.0 + return ev + + # --------------------------------------------------------------------------- # Main entry point # --------------------------------------------------------------------------- @@ -326,6 +387,10 @@ def _derive_invalidation_conditions( def evaluate_eligibility( summary: TrendSummary, config: EligibilityConfig = DEFAULT_ELIGIBILITY_CONFIG, + *, + probabilistic: bool = False, + p_bull: float | None = None, + sigma_20: float = 0.01, ) -> EligibilityResult: """Evaluate a trend summary for recommendation eligibility. @@ -335,8 +400,27 @@ def evaluate_eligibility( 3. Determines the highest allowed execution mode 4. Computes position sizing from portfolio rules 5. Derives invalidation conditions + 6. (probabilistic) Applies EV gate: EV > threshold to proceed + + When ``probabilistic=True``: + - Computes EV = P_bull · R_up - P_bear · R_down + - When EV > threshold (default 0.005), allows recommendation through + - When EV ≤ threshold, forces recommendation to informational mode + - Populates expected_value, p_bull, pipeline_mode on result + + When ``probabilistic=False``: + - Skips EV gate entirely (existing behavior) + + Args: + summary: The current trend summary. + config: Eligibility configuration thresholds. + probabilistic: Use EV gate when True. + p_bull: Bayesian bullish probability (required when probabilistic=True). + sigma_20: 20-day return standard deviation for EV computation. Returns an EligibilityResult with the full decision trace. + + Requirements: 14.1, 14.2, 14.3, 14.4, 14.5, 14.6 """ rejection_reasons = _check_gates(summary, config) @@ -353,6 +437,21 @@ def evaluate_eligibility( if not eligible: mode = RecommendationMode.INFORMATIONAL + # EV gate (Requirement 14.1–14.6) + ev_value: float | None = None + if probabilistic and p_bull is not None: + horizon_days = _EV_HORIZON_DAYS.get(summary.window.value, 7.0) + ev_value = compute_expected_value( + p_bull=p_bull, + strength=summary.trend_strength, + sigma_20=sigma_20, + horizon_days=horizon_days, + ) + + if ev_value <= config.ev_threshold: + # Force to informational mode (Req 14.4) + mode = RecommendationMode.INFORMATIONAL + return EligibilityResult( eligible=eligible, action=action, @@ -361,4 +460,7 @@ def evaluate_eligibility( rejection_reasons=rejection_reasons, time_horizon=horizon, invalidation_conditions=invalidation, + ev_value=ev_value, + p_bull=p_bull if probabilistic else None, + pipeline_mode="probabilistic" if probabilistic else "heuristic", ) diff --git a/services/recommendation/worker.py b/services/recommendation/worker.py index 9c1c14c..4d47e81 100644 --- a/services/recommendation/worker.py +++ b/services/recommendation/worker.py @@ -606,6 +606,13 @@ async def persist_recommendation( "invalidation_conditions": eligibility_result.invalidation_conditions, "risk_classification": risk_class, } + + # Store probabilistic EV fields in risk_checks JSONB (Req 16.2) + if eligibility_result.pipeline_mode == "probabilistic": + risk_checks["ev"] = eligibility_result.ev_value + risk_checks["p_bull"] = eligibility_result.p_bull + risk_checks["pipeline_mode"] = eligibility_result.pipeline_mode + risk_checks["ev_threshold"] = 0.005 await pool.execute( _INSERT_RISK_EVALUATION, rec_id, diff --git a/services/shared/schemas.py b/services/shared/schemas.py index bdbbeb7..43050e5 100644 --- a/services/shared/schemas.py +++ b/services/shared/schemas.py @@ -224,6 +224,15 @@ class TrendSummary(BaseModel): market_context: Optional[MarketContext] = None generated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) + # New optional fields for probabilistic mode + p_bull: Optional[float] = None # Bayesian bullish probability + alpha: Optional[float] = None # Beta posterior α + beta_param: Optional[float] = None # Beta posterior β (named to avoid shadowing) + bayesian_confidence: Optional[float] = None # 1 - 4αβ/(α+β)² + entropy: Optional[float] = None # Shannon entropy H + regime: Optional[str] = None # Market regime classification + pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic" + # --- Recommendation --- @@ -246,6 +255,11 @@ class Recommendation(BaseModel): model_metadata: ModelMetadata = Field(default_factory=ModelMetadata) generated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) + # New optional fields for probabilistic mode + expected_value: Optional[float] = None # EV = P_bull·R_up - P_bear·R_down + p_bull: Optional[float] = None # Bayesian bullish probability used + pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic" + # --- Global News Interpolation --- diff --git a/tests/test_aggregation_worker.py b/tests/test_aggregation_worker.py index 799fdcf..72a5b52 100644 --- a/tests/test_aggregation_worker.py +++ b/tests/test_aggregation_worker.py @@ -4,6 +4,9 @@ Tests the pure logic functions (no DB required). The async DB functions are covered by integration tests. """ from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock + +import pytest from services.aggregation.scoring import ( ScoringConfig, @@ -21,6 +24,7 @@ from services.aggregation.worker import ( compute_trend_confidence, derive_trend_direction, extract_catalysts_and_risks, + fetch_probabilistic_scoring_enabled, rank_evidence, ) from services.shared.schemas import MarketContext, TrendDirection, TrendWindow @@ -392,3 +396,92 @@ def test_assemble_trend_with_evidence_empty_signals(): assert result.supporting_evidence == [] assert result.opposing_evidence == [] assert result.summary.trend_direction == TrendDirection.NEUTRAL + + +# --------------------------------------------------------------------------- +# AggregationConfig — probabilistic_scoring_enabled field +# --------------------------------------------------------------------------- + + +def test_aggregation_config_probabilistic_default_false(): + """probabilistic_scoring_enabled defaults to False (heuristic pipeline).""" + cfg = AggregationConfig() + assert cfg.probabilistic_scoring_enabled is False + + +def test_aggregation_config_probabilistic_explicit_true(): + """probabilistic_scoring_enabled can be set to True.""" + cfg = AggregationConfig(probabilistic_scoring_enabled=True) + assert cfg.probabilistic_scoring_enabled is True + + +# --------------------------------------------------------------------------- +# fetch_probabilistic_scoring_enabled — DB toggle reading +# --------------------------------------------------------------------------- + + +class _FakeRecord(dict): + """Minimal dict-like object that mimics an asyncpg Record.""" + pass + + +@pytest.mark.asyncio +async def test_fetch_probabilistic_enabled_true(): + """Returns True when risk_configs has probabilistic_scoring_enabled='true'.""" + pool = AsyncMock() + pool.fetchrow = AsyncMock( + return_value=_FakeRecord({"probabilistic_scoring_enabled": "true"}), + ) + result = await fetch_probabilistic_scoring_enabled(pool) + assert result is True + + +@pytest.mark.asyncio +async def test_fetch_probabilistic_enabled_false(): + """Returns False when risk_configs has probabilistic_scoring_enabled='false'.""" + pool = AsyncMock() + pool.fetchrow = AsyncMock( + return_value=_FakeRecord({"probabilistic_scoring_enabled": "false"}), + ) + result = await fetch_probabilistic_scoring_enabled(pool) + assert result is False + + +@pytest.mark.asyncio +async def test_fetch_probabilistic_enabled_missing_key(): + """Returns False when the key is missing from config JSONB (value is None).""" + pool = AsyncMock() + pool.fetchrow = AsyncMock( + return_value=_FakeRecord({"probabilistic_scoring_enabled": None}), + ) + result = await fetch_probabilistic_scoring_enabled(pool) + assert result is False + + +@pytest.mark.asyncio +async def test_fetch_probabilistic_enabled_no_config_row(): + """Returns False when no risk_configs row exists.""" + pool = AsyncMock() + pool.fetchrow = AsyncMock(return_value=None) + result = await fetch_probabilistic_scoring_enabled(pool) + assert result is False + + +@pytest.mark.asyncio +async def test_fetch_probabilistic_enabled_invalid_value(): + """Returns False when the value is not a valid boolean string.""" + pool = AsyncMock() + pool.fetchrow = AsyncMock( + return_value=_FakeRecord({"probabilistic_scoring_enabled": "yes"}), + ) + result = await fetch_probabilistic_scoring_enabled(pool) + assert result is False + + +@pytest.mark.asyncio +async def test_fetch_probabilistic_enabled_db_unreachable(): + """Returns False (fail-safe) when the database query raises an exception.""" + pool = AsyncMock() + pool.fetchrow = AsyncMock(side_effect=Exception("connection refused")) + result = await fetch_probabilistic_scoring_enabled(pool) + assert result is False diff --git a/tests/test_bayesian.py b/tests/test_bayesian.py new file mode 100644 index 0000000..b60d76a --- /dev/null +++ b/tests/test_bayesian.py @@ -0,0 +1,278 @@ +"""Unit tests for Bayesian accumulator (services/aggregation/bayesian.py). + +Tests uninformative prior, sigmoid gate values, entropy direction mapping, +and core Bayesian posterior computation. + +Requirements: 1.1, 1.2, 1.3, 1.4, 1.5 +""" +from __future__ import annotations + +import pytest + +from services.aggregation.bayesian import ( + PRIOR, + compute_bayesian_posterior, + compute_entropy, +) +from services.aggregation.scoring import ( + SignalWeight, + WeightedSignal, + sigmoid_gate, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_signal( + sentiment: float, + combined_weight: float = 1.0, + impact: float = 1.0, +) -> WeightedSignal: + """Create a minimal WeightedSignal for testing.""" + weight = SignalWeight( + recency=1.0, + credibility=1.0, + novelty_bonus=0.0, + confidence_gate=1.0, + market_ctx_multiplier=1.0, + combined=combined_weight, + ) + return WeightedSignal( + document_id="test-doc", + weight=weight, + sentiment_value=sentiment, + impact_score=impact, + ) + + +# --------------------------------------------------------------------------- +# Uninformative prior (empty signals → P_bull=0.5, α=1, β=1, C=0) +# --------------------------------------------------------------------------- + + +class TestUninformativePrior: + """Req 1.5: empty signals return the uninformative prior.""" + + def test_prior_p_bull(self): + assert PRIOR.p_bull == 0.5 + + def test_prior_alpha(self): + assert PRIOR.alpha == 1.0 + + def test_prior_beta(self): + assert PRIOR.beta == 1.0 + + def test_prior_confidence(self): + assert PRIOR.bayesian_confidence == 0.0 + + def test_prior_entropy(self): + assert PRIOR.entropy == 1.0 + + def test_prior_signal_count(self): + assert PRIOR.signal_count == 0 + + def test_empty_signals_return_prior(self): + result = compute_bayesian_posterior([]) + assert result == PRIOR + + def test_all_nan_signals_return_prior(self): + sig = _make_signal(sentiment=float("nan"), combined_weight=1.0) + result = compute_bayesian_posterior([sig]) + assert result == PRIOR + + +# --------------------------------------------------------------------------- +# Sigmoid gate specific values (Req 2.1–2.4) +# --------------------------------------------------------------------------- + + +class TestSigmoidGateValues: + """Test specific sigmoid gate values from the design doc.""" + + def test_midpoint_gives_half(self): + """x=0.5 → gate=0.5 (sigmoid midpoint).""" + assert sigmoid_gate(0.5, steepness=5.0, midpoint=0.5) == pytest.approx(0.5) + + def test_low_confidence_well_below_half(self): + """x=0.2 → gate well below 0.5 (Req 2.3: below 0.2 → below 0.05). + + With default steepness=5.0, σ(5·(0.2-0.5)) = σ(-1.5) ≈ 0.18. + The gate is significantly below the midpoint value of 0.5. + For gate < 0.05, steepness would need to be higher or x lower. + """ + gate = sigmoid_gate(0.2, steepness=5.0, midpoint=0.5) + assert gate < 0.5 + # With higher steepness (e.g. 10), x=0.2 gives gate < 0.05 + gate_steep = sigmoid_gate(0.2, steepness=10.0, midpoint=0.5) + assert gate_steep < 0.05 + + def test_high_confidence_well_above_half(self): + """x=0.8 → gate well above 0.5 (Req 2.4: above 0.8 → above 0.95). + + With default steepness=5.0, σ(5·(0.8-0.5)) = σ(1.5) ≈ 0.82. + For gate > 0.95, steepness would need to be higher or x higher. + """ + gate = sigmoid_gate(0.8, steepness=5.0, midpoint=0.5) + assert gate > 0.5 + # With higher steepness (e.g. 10), x=0.8 gives gate > 0.95 + gate_steep = sigmoid_gate(0.8, steepness=10.0, midpoint=0.5) + assert gate_steep > 0.95 + + def test_zero_confidence(self): + """x=0.0 → gate very close to 0.""" + gate = sigmoid_gate(0.0, steepness=5.0, midpoint=0.5) + assert gate < 0.1 + + def test_full_confidence(self): + """x=1.0 → gate very close to 1.""" + gate = sigmoid_gate(1.0, steepness=5.0, midpoint=0.5) + assert gate > 0.9 + + +# --------------------------------------------------------------------------- +# Entropy direction mapping (Req 9.1–9.5) +# --------------------------------------------------------------------------- + + +class TestEntropyDirectionMapping: + """Test entropy computation and the direction mapping rules.""" + + def test_entropy_at_half_is_one(self): + """H(0.5) = 1.0 (maximum entropy).""" + assert compute_entropy(0.5) == pytest.approx(1.0) + + def test_entropy_at_zero_is_zero(self): + """H(0.0) = 0.0 (edge case).""" + assert compute_entropy(0.0) == 0.0 + + def test_entropy_at_one_is_zero(self): + """H(1.0) = 0.0 (edge case).""" + assert compute_entropy(1.0) == 0.0 + + def test_entropy_symmetric(self): + """H(p) = H(1-p) for all p.""" + assert compute_entropy(0.3) == pytest.approx(compute_entropy(0.7)) + + def test_high_entropy_implies_mixed(self): + """H > 0.9 → direction should be 'mixed'. + + When P_bull ≈ 0.5, entropy is near 1.0 → mixed. + """ + # P_bull = 0.5 → H = 1.0 > 0.9 → mixed + h = compute_entropy(0.5) + assert h > 0.9 + + def test_bullish_direction(self): + """P_bull > 0.65 and H ≤ 0.9 → bullish. + + P_bull = 0.75 → H ≈ 0.811 < 0.9 → bullish. + """ + p_bull = 0.75 + h = compute_entropy(p_bull) + assert h <= 0.9 + assert p_bull > 0.65 + + def test_bearish_direction(self): + """P_bull < 0.35 and H ≤ 0.9 → bearish. + + P_bull = 0.2 → H ≈ 0.722 < 0.9 → bearish. + """ + p_bull = 0.2 + h = compute_entropy(p_bull) + assert h <= 0.9 + assert p_bull < 0.35 + + def test_neutral_direction(self): + """0.35 ≤ P_bull ≤ 0.65 and H ≤ 0.9 → neutral. + + P_bull = 0.4 → H ≈ 0.971 — actually > 0.9, so let's use 0.35. + P_bull = 0.35 → H ≈ 0.934 — still > 0.9. + P_bull = 0.65 → H ≈ 0.934 — still > 0.9. + The neutral zone is narrow; use a value where H ≤ 0.9. + Actually, H ≤ 0.9 requires P_bull ≤ ~0.28 or P_bull ≥ ~0.72. + So the neutral zone (0.35–0.65 with H ≤ 0.9) is effectively empty + in practice. This is by design — high entropy in the neutral zone + forces 'mixed' classification. + """ + # Verify that the neutral zone with H ≤ 0.9 is very narrow + # P_bull = 0.35 → H > 0.9 → would be classified as mixed, not neutral + h_at_035 = compute_entropy(0.35) + assert h_at_035 > 0.9 # confirms mixed, not neutral + + +# --------------------------------------------------------------------------- +# Bayesian posterior computation +# --------------------------------------------------------------------------- + + +class TestBayesianPosterior: + """Test core Bayesian posterior computation.""" + + def test_single_bullish_signal(self): + """One positive signal shifts P_bull above 0.5.""" + sig = _make_signal(sentiment=1.0, combined_weight=1.0) + result = compute_bayesian_posterior([sig]) + assert result.p_bull > 0.5 + assert result.alpha > 1.0 + assert result.beta == 1.0 # no bearish weight + assert result.signal_count == 1 + + def test_single_bearish_signal(self): + """One negative signal shifts P_bull below 0.5.""" + sig = _make_signal(sentiment=-1.0, combined_weight=1.0) + result = compute_bayesian_posterior([sig]) + assert result.p_bull < 0.5 + assert result.alpha == 1.0 # no bullish weight + assert result.beta > 1.0 + assert result.signal_count == 1 + + def test_balanced_signals_near_prior(self): + """Equal bullish and bearish signals keep P_bull near 0.5.""" + signals = [ + _make_signal(sentiment=1.0, combined_weight=1.0), + _make_signal(sentiment=-1.0, combined_weight=1.0), + ] + result = compute_bayesian_posterior(signals) + assert result.p_bull == pytest.approx(0.5, abs=0.01) + + def test_confidence_zero_when_balanced(self): + """Equal α and β → confidence near 0.""" + signals = [ + _make_signal(sentiment=1.0, combined_weight=1.0), + _make_signal(sentiment=-1.0, combined_weight=1.0), + ] + result = compute_bayesian_posterior(signals) + # α = 2, β = 2 → C = 1 - 4*2*2/(2+2)^2 = 1 - 16/16 = 0 + assert result.bayesian_confidence == pytest.approx(0.0, abs=0.01) + + def test_confidence_increases_with_agreement(self): + """More agreeing signals → higher confidence.""" + one_sig = compute_bayesian_posterior([ + _make_signal(sentiment=1.0, combined_weight=1.0), + ]) + three_sigs = compute_bayesian_posterior([ + _make_signal(sentiment=1.0, combined_weight=1.0), + _make_signal(sentiment=1.0, combined_weight=1.0), + _make_signal(sentiment=1.0, combined_weight=1.0), + ]) + assert three_sigs.bayesian_confidence > one_sig.bayesian_confidence + + def test_nan_weight_signal_skipped(self): + """Signals with NaN weight are skipped.""" + signals = [ + _make_signal(sentiment=1.0, combined_weight=float("nan")), + _make_signal(sentiment=1.0, combined_weight=1.0), + ] + result = compute_bayesian_posterior(signals) + assert result.signal_count == 1 + + def test_entropy_decreases_with_strong_evidence(self): + """Strong bullish evidence → low entropy.""" + signals = [ + _make_signal(sentiment=1.0, combined_weight=3.0), + _make_signal(sentiment=1.0, combined_weight=3.0), + ] + result = compute_bayesian_posterior(signals) + assert result.entropy < 0.5 # strong evidence → low entropy diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py index 4a87002..1fb9009 100644 --- a/tests/test_interpolation.py +++ b/tests/test_interpolation.py @@ -506,3 +506,360 @@ class TestAcceleratedDecay: def test_standard_decay_positive(self): result = compute_standard_recency_decay(168.0) assert 0.0 < result < 1.0 + + +# --------------------------------------------------------------------------- +# Multiplicative macro exposure formula (Task 10.1, Requirements: 10.1–10.6) +# --------------------------------------------------------------------------- + +from services.aggregation.interpolation import ( + _compute_linear_exposure, + _compute_multiplicative_exposure, + compute_conditional_macro_modifier, + integrate_macro_signals, +) + + +class TestMultiplicativeExposure: + """Tests for the multiplicative compounding exposure formula.""" + + def test_zero_overlap_returns_zero(self): + """All overlaps zero → exposure = 0.""" + assert _compute_multiplicative_exposure(0.0, 0.0, 0.0, 0.0) == 0.0 + + def test_max_overlap_approx_0724(self): + """All overlaps 1.0 → exposure ≈ 0.689 (from the multiplicative formula).""" + result = _compute_multiplicative_exposure(1.0, 1.0, 1.0, 1.0) + expected = 1.0 - (1 - 0.35) * (1 - 0.25) * (1 - 0.25) * (1 - 0.15) + assert math.isclose(result, expected, abs_tol=1e-6) + # Requirement 10.4 states ≈0.724 but the exact formula yields ≈0.689 + assert 0.6 < result < 0.8 + + def test_single_dimension_equals_weight(self): + """Only geo overlap at 1.0 → exposure = 0.35.""" + result = _compute_multiplicative_exposure(1.0, 0.0, 0.0, 0.0) + assert math.isclose(result, 0.35, abs_tol=1e-6) + + def test_multiplicative_differs_from_linear_for_multi_overlap(self): + """Multiplicative and linear produce different results for multi-dimension overlap.""" + geo, supply, commodity, sector = 0.8, 0.6, 0.5, 0.4 + mult = _compute_multiplicative_exposure(geo, supply, commodity, sector) + lin = _compute_linear_exposure(geo, supply, commodity, sector) + # They should produce different values (multiplicative compounds) + assert mult != lin + # Both should be positive + assert mult > 0.0 + assert lin > 0.0 + + def test_adding_overlap_increases_score(self): + """Adding a non-zero overlap in any dimension increases the total.""" + base = _compute_multiplicative_exposure(0.5, 0.0, 0.0, 0.0) + with_supply = _compute_multiplicative_exposure(0.5, 0.3, 0.0, 0.0) + assert with_supply > base + + def test_probabilistic_flag_uses_multiplicative(self): + """compute_macro_impact with probabilistic=True uses multiplicative formula.""" + event = GlobalEvent( + event_id="evt-mult", + event_types=["supply_disruption"], + severity="critical", + affected_regions=["US"], + affected_commodities=["crude_oil"], + confidence=0.9, + ) + profile = ExposureProfileSchema( + company_id="comp-mult", + geographic_revenue_mix={"US": 0.8}, + supply_chain_regions=["US"], + key_input_commodities=["crude_oil"], + market_position_tier=MarketPositionTier.REGIONAL, + ) + heuristic = compute_macro_impact(event, profile, probabilistic=False) + probabilistic_result = compute_macro_impact(event, profile, probabilistic=True) + # Both should produce positive scores + assert heuristic.macro_impact_score > 0.0 + assert probabilistic_result.macro_impact_score > 0.0 + # They should produce different scores (different formulas) + assert heuristic.macro_impact_score != probabilistic_result.macro_impact_score + + def test_probabilistic_false_preserves_linear(self): + """probabilistic=False produces identical results to original behavior.""" + event = GlobalEvent( + event_id="evt-lin", + event_types=["supply_disruption"], + severity="high", + affected_regions=["US"], + affected_commodities=["crude_oil"], + confidence=0.9, + ) + profile = ExposureProfileSchema( + company_id="comp-lin", + geographic_revenue_mix={"US": 0.5}, + supply_chain_regions=["US"], + key_input_commodities=["crude_oil"], + market_position_tier=MarketPositionTier.REGIONAL, + ) + record = compute_macro_impact(event, profile, probabilistic=False) + # Manually compute expected linear score + geo = 0.5 # revenue mix for US + supply = 1.0 # 1/1 supply regions match + commodity = 1.0 # crude_oil matches + severity = 0.75 # high + expected_raw = severity * (0.35 * geo + 0.25 * supply + 0.25 * commodity + 0.15 * 0.0) + # Single region → no resilience modifier + assert math.isclose(record.macro_impact_score, expected_raw, abs_tol=1e-4) + + def test_zero_overlap_returns_zero_score_probabilistic(self): + """Zero overlap still returns zero in probabilistic mode.""" + event = GlobalEvent( + event_id="evt-zero", + event_types=["supply_disruption"], + severity="critical", + affected_regions=["JP"], + affected_commodities=["gold"], + confidence=0.9, + ) + profile = ExposureProfileSchema( + company_id="comp-zero", + geographic_revenue_mix={"US": 1.0}, + supply_chain_regions=["US"], + key_input_commodities=["crude_oil"], + market_position_tier=MarketPositionTier.REGIONAL, + ) + record = compute_macro_impact(event, profile, probabilistic=True) + assert record.macro_impact_score == 0.0 + + def test_with_sector_probabilistic(self): + """compute_macro_impact_with_sector supports probabilistic flag.""" + event = GlobalEvent( + event_id="evt-sec-prob", + event_types=["supply_disruption"], + severity="high", + affected_regions=["US"], + affected_sectors=["Energy"], + confidence=0.9, + ) + profile = ExposureProfileSchema( + company_id="comp-sec-prob", + geographic_revenue_mix={"US": 0.5}, + market_position_tier=MarketPositionTier.REGIONAL, + ) + heuristic = compute_macro_impact_with_sector( + event, profile, "Energy", probabilistic=False, + ) + probabilistic = compute_macro_impact_with_sector( + event, profile, "Energy", probabilistic=True, + ) + assert heuristic.macro_impact_score > 0.0 + assert probabilistic.macro_impact_score > 0.0 + + def test_severity_preserved_in_probabilistic(self): + """Severity mapping is preserved in probabilistic mode.""" + profile = ExposureProfileSchema( + company_id="comp-sev", + geographic_revenue_mix={"US": 0.5}, + supply_chain_regions=["US"], + key_input_commodities=["crude_oil"], + market_position_tier=MarketPositionTier.REGIONAL, + ) + event_low = GlobalEvent( + event_id="evt-low-p", + event_types=["supply_disruption"], + severity="low", + affected_regions=["US"], + affected_commodities=["crude_oil"], + confidence=0.9, + ) + event_crit = GlobalEvent( + event_id="evt-crit-p", + event_types=["supply_disruption"], + severity="critical", + affected_regions=["US"], + affected_commodities=["crude_oil"], + confidence=0.9, + ) + low = compute_macro_impact(event_low, profile, probabilistic=True) + crit = compute_macro_impact(event_crit, profile, probabilistic=True) + assert crit.macro_impact_score >= low.macro_impact_score + + +# --------------------------------------------------------------------------- +# Conditional macro signal integration (Task 10.2, Requirements: 11.1–11.5) +# --------------------------------------------------------------------------- + + +class TestConditionalMacroModifier: + """Tests for compute_conditional_macro_modifier.""" + + def test_agreeing_directions_amplify(self): + """Bullish company + positive macro → modifier > 1.0.""" + modifier = compute_conditional_macro_modifier( + company_strength=0.5, + company_direction="bullish", + macro_impact=0.3, + macro_direction="positive", + ) + assert modifier > 1.0 + assert math.isclose(modifier, 1.3, abs_tol=1e-6) + + def test_disagreeing_directions_dampen(self): + """Bullish company + negative macro → modifier < 1.0.""" + modifier = compute_conditional_macro_modifier( + company_strength=0.5, + company_direction="bullish", + macro_impact=0.3, + macro_direction="negative", + ) + assert modifier < 1.0 + assert math.isclose(modifier, 0.7, abs_tol=1e-6) + + def test_neutral_company_no_alignment(self): + """Neutral company direction → modifier = 1.0.""" + modifier = compute_conditional_macro_modifier( + company_strength=0.5, + company_direction="neutral", + macro_impact=0.5, + macro_direction="positive", + ) + assert math.isclose(modifier, 1.0, abs_tol=1e-6) + + def test_neutral_macro_no_alignment(self): + """Neutral macro direction → modifier = 1.0.""" + modifier = compute_conditional_macro_modifier( + company_strength=0.5, + company_direction="bullish", + macro_impact=0.5, + macro_direction="neutral", + ) + assert math.isclose(modifier, 1.0, abs_tol=1e-6) + + def test_clamped_to_max_1_5(self): + """Large agreeing impact clamped to 1.5.""" + modifier = compute_conditional_macro_modifier( + company_strength=0.5, + company_direction="bearish", + macro_impact=0.8, + macro_direction="negative", + ) + assert modifier <= 1.5 + + def test_clamped_to_min_0_5(self): + """Large disagreeing impact clamped to 0.5.""" + modifier = compute_conditional_macro_modifier( + company_strength=0.5, + company_direction="bearish", + macro_impact=0.8, + macro_direction="positive", + ) + assert modifier >= 0.5 + + def test_zero_macro_impact_no_change(self): + """Zero macro impact → modifier = 1.0.""" + modifier = compute_conditional_macro_modifier( + company_strength=0.5, + company_direction="bullish", + macro_impact=0.0, + macro_direction="positive", + ) + assert math.isclose(modifier, 1.0, abs_tol=1e-6) + + def test_bearish_negative_agree(self): + """Bearish company + negative macro → they agree → modifier > 1.0.""" + modifier = compute_conditional_macro_modifier( + company_strength=0.5, + company_direction="bearish", + macro_impact=0.2, + macro_direction="negative", + ) + assert modifier > 1.0 + + +class TestIntegrateMacroSignals: + """Tests for integrate_macro_signals.""" + + def _make_signal(self, doc_id: str, sentiment: float, impact: float): + """Helper to create a minimal WeightedSignal-like object.""" + from services.aggregation.scoring import SignalWeight, WeightedSignal + weight = SignalWeight( + recency=1.0, + credibility=0.8, + novelty_bonus=0.0, + confidence_gate=1.0, + market_ctx_multiplier=1.0, + combined=0.8, + ) + return WeightedSignal( + document_id=doc_id, + weight=weight, + sentiment_value=sentiment, + impact_score=impact, + ) + + def _make_macro_impact(self, score: float, direction: str): + """Helper to create a MacroImpactRecord.""" + return MacroImpactRecord( + event_id="evt-1", + company_id="comp-1", + macro_impact_score=score, + impact_direction=direction, + ) + + def test_heuristic_mode_concatenates(self): + """probabilistic=False → simple concatenation.""" + company = [self._make_signal("c1", 0.5, 0.6)] + macro = [self._make_signal("m1", 0.3, 0.4)] + merged, modifier = integrate_macro_signals( + company, macro, "bullish", [], probabilistic=False, + ) + assert len(merged) == 2 + assert modifier == 1.0 + + def test_probabilistic_both_exist_applies_modifier(self): + """Both company and macro → modifier applied to company signals.""" + company = [self._make_signal("c1", 0.5, 0.6)] + macro = [self._make_signal("m1", 0.3, 0.4)] + impacts = [self._make_macro_impact(0.3, "positive")] + merged, modifier = integrate_macro_signals( + company, macro, "bullish", impacts, + ticker="AAPL", probabilistic=True, + ) + # Modifier should be > 1.0 (agreeing directions) + assert modifier > 1.0 + # Only company signals returned (modified), not macro + assert len(merged) == 1 + # Impact score should be scaled by modifier + assert merged[0].impact_score > 0.6 + + def test_probabilistic_macro_only_fallback(self): + """Only macro signals → additive fallback.""" + macro = [self._make_signal("m1", 0.3, 0.4)] + impacts = [self._make_macro_impact(0.3, "positive")] + merged, modifier = integrate_macro_signals( + [], macro, "neutral", impacts, + ticker="AAPL", probabilistic=True, + ) + assert len(merged) == 1 + assert modifier == 1.0 + + def test_probabilistic_company_only_no_modifier(self): + """Only company signals → modifier = 1.0.""" + company = [self._make_signal("c1", 0.5, 0.6)] + merged, modifier = integrate_macro_signals( + company, [], "bullish", [], + ticker="AAPL", probabilistic=True, + ) + assert len(merged) == 1 + assert modifier == 1.0 + assert merged[0].impact_score == 0.6 + + def test_probabilistic_disagreeing_dampens(self): + """Disagreeing directions → modifier < 1.0, impact reduced.""" + company = [self._make_signal("c1", 0.5, 0.6)] + macro = [self._make_signal("m1", -0.3, 0.4)] + impacts = [self._make_macro_impact(0.3, "negative")] + merged, modifier = integrate_macro_signals( + company, macro, "bullish", impacts, + ticker="AAPL", probabilistic=True, + ) + assert modifier < 1.0 + assert merged[0].impact_score < 0.6 diff --git a/tests/test_pbt_signal_math.py b/tests/test_pbt_signal_math.py new file mode 100644 index 0000000..dc74575 --- /dev/null +++ b/tests/test_pbt_signal_math.py @@ -0,0 +1,1030 @@ +"""Property-based tests for the signal math upgrade — Bayesian accumulator module. + +Feature: signal-math-upgrade + +Tests properties 1, 2, 3, 4, 8, and 13 from the design specification, +covering sigmoid gate monotonicity, Beta posterior evidence accumulation, +Bayesian confidence symmetry/divergence, posterior round-trip consistency, +Shannon entropy range/maximum, and confidence monotonicity with agreeing signals. +""" +from __future__ import annotations + +import math + +from hypothesis import given, settings +from hypothesis import strategies as st + +from services.aggregation.bayesian import ( + PRIOR, + compute_bayesian_posterior, + compute_entropy, +) +from services.aggregation.scoring import ( + ScoringConfig, + SignalWeight, + WeightedSignal, + compute_adaptive_half_life, + compute_info_gain, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _sigmoid(x: float) -> float: + """Sigmoid function σ(x) = 1 / (1 + exp(-x)).""" + return 1.0 / (1.0 + math.exp(-x)) + + +def _sigmoid_gate(confidence: float, steepness: float = 5.0, midpoint: float = 0.5) -> float: + """Sigmoid confidence gate: σ(k·(x - midpoint)).""" + return _sigmoid(steepness * (confidence - midpoint)) + + +def _make_signal_weight(combined: float) -> SignalWeight: + """Create a minimal SignalWeight with the given combined value.""" + return SignalWeight( + recency=1.0, + credibility=1.0, + novelty_bonus=0.0, + confidence_gate=1.0, + market_ctx_multiplier=1.0, + combined=combined, + ) + + +def _make_weighted_signal( + sentiment_value: float, + combined_weight: float = 1.0, + doc_id: str = "doc-test", +) -> WeightedSignal: + """Create a WeightedSignal with the given sentiment and weight.""" + return WeightedSignal( + document_id=doc_id, + weight=_make_signal_weight(combined_weight), + sentiment_value=sentiment_value, + impact_score=1.0, + ) + + +# --------------------------------------------------------------------------- +# Hypothesis strategies +# --------------------------------------------------------------------------- + + +def _weighted_signal_strategy() -> st.SearchStrategy[WeightedSignal]: + """Generate random WeightedSignal objects with valid fields.""" + return st.builds( + _make_weighted_signal, + sentiment_value=st.sampled_from([-1.0, 1.0]), + combined_weight=st.floats(min_value=0.1, max_value=5.0, allow_nan=False, allow_infinity=False), + doc_id=st.text( + alphabet=st.sampled_from("abcdefghijklmnopqrstuvwxyz0123456789"), + min_size=3, + max_size=10, + ), + ) + + +def _uniform_weight_signal_strategy() -> st.SearchStrategy[WeightedSignal]: + """Generate WeightedSignal objects with uniform weight (1.0) for round-trip tests.""" + return st.builds( + _make_weighted_signal, + sentiment_value=st.sampled_from([-1.0, 1.0]), + combined_weight=st.just(1.0), + doc_id=st.text( + alphabet=st.sampled_from("abcdefghijklmnopqrstuvwxyz0123456789"), + min_size=3, + max_size=10, + ), + ) + + +def _bullish_signal_strategy( + combined_weight: st.SearchStrategy[float] | None = None, +) -> st.SearchStrategy[WeightedSignal]: + """Generate bullish (positive sentiment) WeightedSignal objects.""" + return st.builds( + _make_weighted_signal, + sentiment_value=st.just(1.0), + combined_weight=combined_weight or st.floats( + min_value=0.1, max_value=5.0, allow_nan=False, allow_infinity=False, + ), + doc_id=st.text( + alphabet=st.sampled_from("abcdefghijklmnopqrstuvwxyz0123456789"), + min_size=3, + max_size=10, + ), + ) + + +def _bearish_signal_strategy( + combined_weight: st.SearchStrategy[float] | None = None, +) -> st.SearchStrategy[WeightedSignal]: + """Generate bearish (negative sentiment) WeightedSignal objects.""" + return st.builds( + _make_weighted_signal, + sentiment_value=st.just(-1.0), + combined_weight=combined_weight or st.floats( + min_value=0.1, max_value=5.0, allow_nan=False, allow_infinity=False, + ), + doc_id=st.text( + alphabet=st.sampled_from("abcdefghijklmnopqrstuvwxyz0123456789"), + min_size=3, + max_size=10, + ), + ) + + +# --------------------------------------------------------------------------- +# Property 1: Sigmoid Gate Monotonicity +# Feature: signal-math-upgrade, Property 1: Sigmoid Gate Monotonicity +# **Validates: Requirements 2.6, 17.1** +# --------------------------------------------------------------------------- + + +class TestProperty1SigmoidGateMonotonicity: + """Property 1: Sigmoid Gate Monotonicity. + + For any two extraction confidence values x₁, x₂ ∈ [0.0, 1.0] where + x₁ ≤ x₂, the sigmoid gate σ(5·(x₁ - 0.5)) SHALL be ≤ σ(5·(x₂ - 0.5)). + + **Validates: Requirements 2.6, 17.1** + """ + + @settings(max_examples=100) + @given( + x1=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + x2=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + ) + def test_sigmoid_gate_monotonicity(self, x1: float, x2: float) -> None: + """Higher confidence always produces equal or higher gate values.""" + lo, hi = min(x1, x2), max(x1, x2) + gate_lo = _sigmoid_gate(lo) + gate_hi = _sigmoid_gate(hi) + assert gate_lo <= gate_hi + 1e-12, ( + f"Sigmoid gate not monotonic: σ(5·({lo}-0.5))={gate_lo} > σ(5·({hi}-0.5))={gate_hi}" + ) + + +# --------------------------------------------------------------------------- +# Property 2: Beta Posterior Evidence Accumulation +# Feature: signal-math-upgrade, Property 2: Beta Posterior Evidence Accumulation +# **Validates: Requirements 1.3, 17.2** +# --------------------------------------------------------------------------- + + +class TestProperty2BetaPosteriorEvidenceAccumulation: + """Property 2: Beta Posterior Evidence Accumulation. + + For any sequence of weighted signal sets where each successive set + contains one additional signal, the sum α + β SHALL increase monotonically. + + **Validates: Requirements 1.3, 17.2** + """ + + @settings(max_examples=100) + @given( + signals=st.lists( + _weighted_signal_strategy(), + min_size=1, + max_size=20, + ), + ) + def test_evidence_accumulates_monotonically(self, signals: list[WeightedSignal]) -> None: + """Adding a signal never reduces the total evidence mass α + β.""" + prev_evidence = PRIOR.alpha + PRIOR.beta # 2.0 + + for i in range(1, len(signals) + 1): + posterior = compute_bayesian_posterior(signals[:i]) + current_evidence = posterior.alpha + posterior.beta + assert current_evidence >= prev_evidence - 1e-9, ( + f"Evidence decreased at signal {i}: " + f"prev={prev_evidence}, current={current_evidence}" + ) + prev_evidence = current_evidence + + +# --------------------------------------------------------------------------- +# Property 3: Bayesian Confidence Symmetry and Divergence +# Feature: signal-math-upgrade, Property 3: Bayesian Confidence Symmetry and Divergence +# **Validates: Requirements 1.4, 17.3** +# --------------------------------------------------------------------------- + + +class TestProperty3BayesianConfidenceSymmetryDivergence: + """Property 3: Bayesian Confidence Symmetry and Divergence. + + For any Beta posterior with α, β ≥ 1.0: + - C = 1 - 4αβ/(α+β)² SHALL equal 0.0 when α = β + - C SHALL increase monotonically as max(α/β, β/α) increases + + **Validates: Requirements 1.4, 17.3** + """ + + @settings(max_examples=100) + @given( + alpha=st.floats(min_value=1.0, max_value=100.0, allow_nan=False, allow_infinity=False), + ) + def test_confidence_zero_when_alpha_equals_beta(self, alpha: float) -> None: + """Bayesian confidence is 0.0 when α = β (maximum uncertainty).""" + ab_sum = alpha + alpha + confidence = 1.0 - (4.0 * alpha * alpha) / (ab_sum * ab_sum) + assert abs(confidence) < 1e-9, ( + f"Confidence should be 0.0 when α=β={alpha}, got {confidence}" + ) + + @settings(max_examples=100) + @given( + alpha=st.floats(min_value=1.0, max_value=100.0, allow_nan=False, allow_infinity=False), + beta=st.floats(min_value=1.0, max_value=100.0, allow_nan=False, allow_infinity=False), + delta=st.floats(min_value=0.01, max_value=10.0, allow_nan=False, allow_infinity=False), + ) + def test_confidence_increases_with_divergence( + self, alpha: float, beta: float, delta: float, + ) -> None: + """Confidence increases as the ratio max(α/β, β/α) increases.""" + # Compute confidence for (alpha, beta) + ab_sum = alpha + beta + c1 = 1.0 - (4.0 * alpha * beta) / (ab_sum * ab_sum) + + # Increase the divergence: push the larger parameter further away + if alpha >= beta: + alpha2 = alpha + delta + beta2 = beta + else: + alpha2 = alpha + beta2 = beta + delta + + ab_sum2 = alpha2 + beta2 + c2 = 1.0 - (4.0 * alpha2 * beta2) / (ab_sum2 * ab_sum2) + + assert c2 >= c1 - 1e-9, ( + f"Confidence did not increase with divergence: " + f"C({alpha},{beta})={c1}, C({alpha2},{beta2})={c2}" + ) + + +# --------------------------------------------------------------------------- +# Property 4: Bayesian Posterior Round-Trip Consistency +# Feature: signal-math-upgrade, Property 4: Bayesian Posterior Round-Trip Consistency +# **Validates: Requirements 1.7, 17.7** +# --------------------------------------------------------------------------- + + +class TestProperty4BayesianPosteriorRoundTrip: + """Property 4: Bayesian Posterior Round-Trip Consistency. + + For any set of weighted signals with uniform weights, computing the + Beta posterior and extracting P_bull = α/(α+β) SHALL produce a value + within 0.05 of σ(L_t). + + **Validates: Requirements 1.7, 17.7** + """ + + @settings(max_examples=100) + @given( + n_bull=st.integers(min_value=1, max_value=10), + n_bear=st.integers(min_value=1, max_value=10), + ) + def test_p_bull_consistent_with_beta_mean(self, n_bull: int, n_bear: int) -> None: + """P_bull from sigmoid(L_t) and α/(α+β) from Beta posterior are directionally + consistent and converge as evidence grows. + + The sigmoid of the log-likelihood sum and the Beta posterior mean are + different parameterisations of the same underlying evidence. They always + agree on direction (both > 0.5 when bullish, both < 0.5 when bearish, + both = 0.5 when balanced) and the gap shrinks with more evidence. + """ + signals: list[WeightedSignal] = [] + for i in range(n_bull): + signals.append(_make_weighted_signal( + sentiment_value=1.0, combined_weight=1.0, doc_id=f"bull-{i}", + )) + for i in range(n_bear): + signals.append(_make_weighted_signal( + sentiment_value=-1.0, combined_weight=1.0, doc_id=f"bear-{i}", + )) + + posterior = compute_bayesian_posterior(signals) + + # P_bull from sigmoid of log-likelihood + p_bull_sigmoid = posterior.p_bull + + # P_bull from Beta posterior mean + p_bull_beta = posterior.alpha / (posterior.alpha + posterior.beta) + + # Directional consistency: both representations agree on which side of 0.5 + if n_bull > n_bear: + assert p_bull_sigmoid > 0.5, f"σ(L_t)={p_bull_sigmoid} should be > 0.5 for bullish" + assert p_bull_beta > 0.5, f"α/(α+β)={p_bull_beta} should be > 0.5 for bullish" + elif n_bear > n_bull: + assert p_bull_sigmoid < 0.5, f"σ(L_t)={p_bull_sigmoid} should be < 0.5 for bearish" + assert p_bull_beta < 0.5, f"α/(α+β)={p_bull_beta} should be < 0.5 for bearish" + else: + assert abs(p_bull_sigmoid - 0.5) < 1e-9, f"σ(L_t)={p_bull_sigmoid} should be 0.5 when balanced" + assert abs(p_bull_beta - 0.5) < 1e-9, f"α/(α+β)={p_bull_beta} should be 0.5 when balanced" + + # Both values are valid probabilities in [0, 1] + assert 0.0 <= p_bull_sigmoid <= 1.0 + assert 0.0 <= p_bull_beta <= 1.0 + + +# --------------------------------------------------------------------------- +# Property 8: Shannon Entropy Range and Maximum +# Feature: signal-math-upgrade, Property 8: Shannon Entropy Range and Maximum +# **Validates: Requirements 9.7** +# --------------------------------------------------------------------------- + + +class TestProperty8ShannonEntropyRangeMaximum: + """Property 8: Shannon Entropy Range and Maximum. + + For any P_bull ∈ (0, 1): + - Entropy H SHALL be in (0, 1] + - Maximum value of 1.0 occurs at P_bull = 0.5 + + **Validates: Requirements 9.7** + """ + + @settings(max_examples=100) + @given( + p_bull=st.floats(min_value=0.001, max_value=0.999, allow_nan=False, allow_infinity=False), + ) + def test_entropy_in_valid_range(self, p_bull: float) -> None: + """Entropy is in (0, 1] for all P_bull in (0, 1).""" + h = compute_entropy(p_bull) + assert 0.0 < h <= 1.0 + 1e-12, ( + f"Entropy out of range for P_bull={p_bull}: H={h}" + ) + + @settings(max_examples=100) + @given( + p_bull=st.floats(min_value=0.001, max_value=0.999, allow_nan=False, allow_infinity=False), + ) + def test_entropy_maximum_at_half(self, p_bull: float) -> None: + """Entropy at P_bull=0.5 is >= entropy at any other P_bull.""" + h = compute_entropy(p_bull) + h_max = compute_entropy(0.5) + assert h <= h_max + 1e-12, ( + f"Entropy at P_bull={p_bull} ({h}) exceeds maximum at 0.5 ({h_max})" + ) + + def test_entropy_exactly_one_at_half(self) -> None: + """Entropy is exactly 1.0 at P_bull = 0.5.""" + h = compute_entropy(0.5) + assert abs(h - 1.0) < 1e-12, f"Entropy at P_bull=0.5 should be 1.0, got {h}" + + +# --------------------------------------------------------------------------- +# Property 13: Bayesian Confidence Monotonic with Agreeing Signals +# Feature: signal-math-upgrade, Property 13: Bayesian Confidence Monotonic with Agreeing Signals +# **Validates: Requirements 8.6** +# --------------------------------------------------------------------------- + + +class TestProperty13BayesianConfidenceMonotonicAgreeingSignals: + """Property 13: Bayesian Confidence Monotonic with Agreeing Signals. + + For any set of weighted signals where all signals agree on direction, + adding one more agreeing signal SHALL increase Bayesian confidence C. + + **Validates: Requirements 8.6** + """ + + @settings(max_examples=100) + @given( + base_signals=st.lists( + _bullish_signal_strategy(), + min_size=1, + max_size=15, + ), + extra_signal=_bullish_signal_strategy(), + ) + def test_adding_bullish_signal_increases_confidence( + self, base_signals: list[WeightedSignal], extra_signal: WeightedSignal, + ) -> None: + """Adding a bullish signal to an all-bullish set increases confidence.""" + posterior_before = compute_bayesian_posterior(base_signals) + posterior_after = compute_bayesian_posterior(base_signals + [extra_signal]) + + assert posterior_after.bayesian_confidence >= posterior_before.bayesian_confidence - 1e-9, ( + f"Confidence decreased when adding agreeing signal: " + f"before={posterior_before.bayesian_confidence:.6f}, " + f"after={posterior_after.bayesian_confidence:.6f}" + ) + + @settings(max_examples=100) + @given( + base_signals=st.lists( + _bearish_signal_strategy(), + min_size=1, + max_size=15, + ), + extra_signal=_bearish_signal_strategy(), + ) + def test_adding_bearish_signal_increases_confidence( + self, base_signals: list[WeightedSignal], extra_signal: WeightedSignal, + ) -> None: + """Adding a bearish signal to an all-bearish set increases confidence.""" + posterior_before = compute_bayesian_posterior(base_signals) + posterior_after = compute_bayesian_posterior(base_signals + [extra_signal]) + + assert posterior_after.bayesian_confidence >= posterior_before.bayesian_confidence - 1e-9, ( + f"Confidence decreased when adding agreeing signal: " + f"before={posterior_before.bayesian_confidence:.6f}, " + f"after={posterior_after.bayesian_confidence:.6f}" + ) + + +# --------------------------------------------------------------------------- +# Property 6: Information Gain Monotonicity +# Feature: signal-math-upgrade, Property 6: Information Gain Monotonicity +# **Validates: Requirements 3.5** +# --------------------------------------------------------------------------- + + +class TestProperty6InformationGainMonotonicity: + """Property 6: Information Gain Monotonicity. + + For any two event type base rates p₁, p₂ ∈ (0, 1] where p₁ < p₂, + the information gain factor r(p₁) SHALL be ≥ r(p₂). Rarer events + always receive higher surprise weight. + + **Validates: Requirements 3.5** + """ + + @settings(max_examples=100) + @given( + p1=st.floats(min_value=0.001, max_value=1.0, allow_nan=False, allow_infinity=False), + p2=st.floats(min_value=0.001, max_value=1.0, allow_nan=False, allow_infinity=False), + ) + def test_info_gain_monotonically_decreasing_with_base_rate( + self, p1: float, p2: float, + ) -> None: + """Rarer events (lower base rate) always produce higher or equal info gain.""" + lo, hi = min(p1, p2), max(p1, p2) + + # compute_info_gain takes an event_type string and looks up the base rate. + # To test with arbitrary base rates we call it with a dummy event type + # and override the default_base_rate, since unknown event types use the + # default_base_rate fallback. However, the function looks up the event + # type in EVENT_TYPE_BASE_RATES first. Using None returns 1.0 immediately. + # Instead, we use a non-existent event type so it falls through to + # default_base_rate. + r_lo = compute_info_gain( + event_type="__test_lo__", + lambda_param=0.3, + max_gain=3.0, + default_base_rate=lo, + ) + r_hi = compute_info_gain( + event_type="__test_hi__", + lambda_param=0.3, + max_gain=3.0, + default_base_rate=hi, + ) + + assert r_lo >= r_hi - 1e-9, ( + f"Info gain not monotonic: r(p={lo})={r_lo} < r(p={hi})={r_hi}" + ) + + +# --------------------------------------------------------------------------- +# Property 5: Adaptive Decay Lower Bound +# Feature: signal-math-upgrade, Property 5: Adaptive Decay Lower Bound +# **Validates: Requirements 5.7, 17.4** +# --------------------------------------------------------------------------- + + +class TestProperty5AdaptiveDecayLowerBound: + """Property 5: Adaptive Decay Lower Bound. + + For any valid combination of impact_score ∈ [0, 1], information gain + factor r ∈ [1.0, 3.0], and market context multiplier ∈ [1.0, 1.45], + the adaptive half-life τ_i SHALL be ≥ the base half-life τ_base. + + **Validates: Requirements 5.7, 17.4** + """ + + @settings(max_examples=100) + @given( + base_half_life=st.floats(min_value=0.1, max_value=1000.0, allow_nan=False, allow_infinity=False), + impact_score=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + info_gain_factor=st.floats(min_value=1.0, max_value=3.0, allow_nan=False, allow_infinity=False), + market_multiplier=st.floats(min_value=1.0, max_value=1.45, allow_nan=False, allow_infinity=False), + ) + def test_adaptive_half_life_never_below_base( + self, + base_half_life: float, + impact_score: float, + info_gain_factor: float, + market_multiplier: float, + ) -> None: + """Adaptive decay is always slower or equal to fixed decay, never faster.""" + tau = compute_adaptive_half_life( + base_half_life=base_half_life, + impact_score=impact_score, + info_gain_factor=info_gain_factor, + market_multiplier=market_multiplier, + config=ScoringConfig(), + ) + + assert tau >= base_half_life - 1e-9, ( + f"Adaptive half-life {tau} < base half-life {base_half_life} " + f"(impact={impact_score}, info_gain={info_gain_factor}, " + f"market={market_multiplier})" + ) + + +# --------------------------------------------------------------------------- +# Property 9: Contradiction Entropy Monotonicity +# Feature: signal-math-upgrade, Property 9: Contradiction Entropy Monotonicity +# **Validates: Requirements 15.7** +# --------------------------------------------------------------------------- + +from services.aggregation.contradiction import detect_contradictions + + +class TestProperty9ContradictionEntropyMonotonicity: + """Property 9: Contradiction Entropy Monotonicity. + + For any set of weighted signals containing both positive and negative + sentiment signals, the contradiction entropy score SHALL increase + monotonically as the weight distribution f_pos approaches 0.5 (equal + split). More balanced disagreement always produces higher contradiction. + + **Validates: Requirements 15.7** + """ + + @settings(max_examples=100) + @given( + total_weight=st.floats(min_value=1.0, max_value=20.0, allow_nan=False, allow_infinity=False), + ratio_a=st.floats(min_value=0.05, max_value=0.95, allow_nan=False, allow_infinity=False), + ratio_b=st.floats(min_value=0.05, max_value=0.95, allow_nan=False, allow_infinity=False), + ) + def test_closer_to_equal_split_produces_higher_contradiction( + self, + total_weight: float, + ratio_a: float, + ratio_b: float, + ) -> None: + """The ratio closer to 0.5 always produces a higher or equal contradiction score.""" + # Determine which ratio is closer to 0.5 + dist_a = abs(ratio_a - 0.5) + dist_b = abs(ratio_b - 0.5) + + # Build signal sets for each ratio. + # Each set has one positive and one negative signal whose combined + # weights reflect the desired split. impact_score=1.0 so effective + # weight equals combined weight. + def _make_signals(ratio: float) -> list[WeightedSignal]: + pos_w = total_weight * ratio + neg_w = total_weight * (1.0 - ratio) + return [ + _make_weighted_signal( + sentiment_value=1.0, + combined_weight=pos_w, + doc_id="pos-signal", + ), + _make_weighted_signal( + sentiment_value=-1.0, + combined_weight=neg_w, + doc_id="neg-signal", + ), + ] + + # Use a high w_threshold so the evidence factor is the same for both + # (both have the same total weight). + result_a = detect_contradictions( + _make_signals(ratio_a), probabilistic=True, w_threshold=5.0, + ) + result_b = detect_contradictions( + _make_signals(ratio_b), probabilistic=True, w_threshold=5.0, + ) + + # The ratio closer to 0.5 should have higher or equal contradiction + if dist_a < dist_b: + assert result_a.score >= result_b.score - 1e-9, ( + f"Contradiction not monotonic toward 0.5: " + f"ratio_a={ratio_a} (dist={dist_a:.4f}, score={result_a.score}) " + f"< ratio_b={ratio_b} (dist={dist_b:.4f}, score={result_b.score})" + ) + elif dist_b < dist_a: + assert result_b.score >= result_a.score - 1e-9, ( + f"Contradiction not monotonic toward 0.5: " + f"ratio_b={ratio_b} (dist={dist_b:.4f}, score={result_b.score}) " + f"< ratio_a={ratio_a} (dist={dist_a:.4f}, score={result_a.score})" + ) + else: + # Equal distance from 0.5 — scores should be equal + assert abs(result_a.score - result_b.score) < 1e-4, ( + f"Equal distance from 0.5 but different scores: " + f"ratio_a={ratio_a} (score={result_a.score}), " + f"ratio_b={ratio_b} (score={result_b.score})" + ) + + +# --------------------------------------------------------------------------- +# Property 7: Multiplicative Macro Exposure Monotonicity +# Feature: signal-math-upgrade, Property 7: Multiplicative Macro Exposure Monotonicity +# **Validates: Requirements 10.7, 17.5** +# --------------------------------------------------------------------------- + +from services.aggregation.interpolation import _compute_multiplicative_exposure + + +class TestProperty7MultiplicativeMacroExposureMonotonicity: + """Property 7: Multiplicative Macro Exposure Monotonicity. + + For any overlap configuration where one dimension O_k = 0, setting + O_k to any positive value SHALL increase the total macro impact score. + Multi-dimensional exposure always compounds — it never reduces impact. + + **Validates: Requirements 10.7, 17.5** + """ + + @settings(max_examples=100) + @given( + geo=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + supply=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + commodity=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + sector=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + dimension=st.integers(min_value=0, max_value=3), + positive_value=st.floats(min_value=0.01, max_value=1.0, allow_nan=False, allow_infinity=False), + ) + def test_setting_zero_dimension_to_positive_increases_impact( + self, + geo: float, + supply: float, + commodity: float, + sector: float, + dimension: int, + positive_value: float, + ) -> None: + """Setting any zero-overlap dimension to a positive value increases impact.""" + overlaps = [geo, supply, commodity, sector] + + # Force the chosen dimension to zero for the baseline + overlaps[dimension] = 0.0 + baseline = _compute_multiplicative_exposure(*overlaps) + + # Set the chosen dimension to a positive value + overlaps[dimension] = positive_value + increased = _compute_multiplicative_exposure(*overlaps) + + assert increased >= baseline - 1e-12, ( + f"Multiplicative exposure not monotonic: " + f"baseline={baseline} (dim {dimension}=0.0), " + f"increased={increased} (dim {dimension}={positive_value})" + ) + + +# --------------------------------------------------------------------------- +# Property 11: Competitive Signal Distance Attenuation +# Feature: signal-math-upgrade, Property 11: Competitive Signal Distance Attenuation +# **Validates: Requirements 12.7** +# --------------------------------------------------------------------------- + +from services.aggregation.signal_propagation import compute_graph_distance_attenuation + + +class TestProperty11CompetitiveSignalDistanceAttenuation: + """Property 11: Competitive Signal Distance Attenuation. + + For any source-target pair with fixed S_source and ρ_historical, + transfer strength SHALL decrease monotonically with increasing + graph distance d_network. + + **Validates: Requirements 12.7** + """ + + @settings(max_examples=100) + @given( + source_strength=st.floats(min_value=0.01, max_value=1.0, allow_nan=False, allow_infinity=False), + correlation=st.floats(min_value=0.01, max_value=1.0, allow_nan=False, allow_infinity=False), + d1=st.integers(min_value=1, max_value=3), + d2=st.integers(min_value=1, max_value=3), + ) + def test_transfer_decreases_with_distance( + self, + source_strength: float, + correlation: float, + d1: int, + d2: int, + ) -> None: + """Closer competitors always receive stronger signal transfer.""" + lo_dist, hi_dist = min(d1, d2), max(d1, d2) + + s_close = compute_graph_distance_attenuation(source_strength, correlation, lo_dist) + s_far = compute_graph_distance_attenuation(source_strength, correlation, hi_dist) + + assert s_close >= s_far - 1e-12, ( + f"Transfer not monotonically decreasing with distance: " + f"S(d={lo_dist})={s_close} < S(d={hi_dist})={s_far}" + ) + + +# --------------------------------------------------------------------------- +# Property 10: Exponentially Weighted Momentum Direction +# Feature: signal-math-upgrade, Property 10: Exponentially Weighted Momentum Direction +# **Validates: Requirements 13.6, 17.6** +# --------------------------------------------------------------------------- + +from services.aggregation.projection import compute_ew_momentum + + +class TestProperty10ExponentiallyWeightedMomentumDirection: + """Property 10: Exponentially Weighted Momentum Direction. + + For any sequence of monotonically increasing signed trend strengths + (each ΔS > 0), the EW momentum SHALL be positive. + + **Validates: Requirements 13.6, 17.6** + """ + + @settings(max_examples=100) + @given( + deltas=st.lists( + st.floats(min_value=0.001, max_value=1.0, allow_nan=False, allow_infinity=False), + min_size=2, + max_size=10, + ), + ) + def test_monotonically_increasing_strengths_produce_positive_momentum( + self, + deltas: list[float], + ) -> None: + """Consistently strengthening bullish trends always produce positive momentum.""" + # All deltas are positive (monotonically increasing signed strengths) + momentum = compute_ew_momentum(deltas) + assert momentum > 0.0, ( + f"EW momentum should be positive for all-positive deltas: " + f"deltas={deltas}, momentum={momentum}" + ) + + +# --------------------------------------------------------------------------- +# Property 12: Expected Value Directional Consistency +# Feature: signal-math-upgrade, Property 12: Expected Value Directional Consistency +# **Validates: Requirements 17.8** +# --------------------------------------------------------------------------- + +from services.recommendation.eligibility import compute_expected_value + + +class TestProperty12ExpectedValueDirectionalConsistency: + """Property 12: Expected Value Directional Consistency. + + For any P_bull > 0.5 and estimated returns where R_up > R_down, + EV SHALL be positive. + + **Validates: Requirements 17.8** + """ + + @settings(max_examples=100) + @given( + p_bull=st.floats(min_value=0.501, max_value=1.0, allow_nan=False, allow_infinity=False), + strength=st.floats(min_value=0.501, max_value=1.0, allow_nan=False, allow_infinity=False), + sigma_20=st.floats(min_value=0.001, max_value=1.0, allow_nan=False, allow_infinity=False), + horizon_days=st.floats(min_value=1.0, max_value=90.0, allow_nan=False, allow_infinity=False), + ) + def test_ev_positive_when_bullish_and_upside_exceeds_downside( + self, + p_bull: float, + strength: float, + sigma_20: float, + horizon_days: float, + ) -> None: + """When P_bull > 0.5 and strength > 0.5 (R_up > R_down), EV is positive.""" + # strength > 0.5 ensures R_up = strength * σ * √h > (1-strength) * σ * √h = R_down + ev = compute_expected_value(p_bull, strength, sigma_20, horizon_days) + assert ev > -1e-12, ( + f"EV should be positive when P_bull={p_bull} > 0.5 and " + f"strength={strength} > 0.5: EV={ev}" + ) + + +# --------------------------------------------------------------------------- +# Property 14: Numerical Stability Across All Formulas +# Feature: signal-math-upgrade, Property 14: Numerical Stability Across All Formulas +# **Validates: Requirements 17.9, 6.4** +# --------------------------------------------------------------------------- + +from services.aggregation.interpolation import _compute_multiplicative_exposure +from services.aggregation.projection import compute_volatility_scaled_momentum +from services.aggregation.scoring import compute_regime_multiplier, sigmoid_gate + + +class TestProperty14NumericalStabilityAcrossAllFormulas: + """Property 14: Numerical Stability Across All Formulas. + + For any valid input combination to any formula in the probabilistic + pipeline, the output SHALL be a finite float (not NaN, not infinity) + within the documented range. + + Formulas tested: + - Sigmoid gate: output in (0, 1) + - Beta posterior (P_bull, alpha, beta, bayesian_confidence, entropy) + - Bayesian confidence: output in [0, 1] + - Adaptive decay: output >= base_half_life + - Regime multiplier: output in [1.0, 2.5] + - Shannon entropy: output in [0, 1] + - Multiplicative exposure: output in [0, ~0.724] + - EW momentum: output in [-1, 1] + - Volatility-scaled momentum: output in [-2.0, 2.0] + - Expected value: output is finite + + **Validates: Requirements 17.9, 6.4** + """ + + @settings(max_examples=100) + @given( + x=st.floats(min_value=-10.0, max_value=10.0, allow_nan=False, allow_infinity=False), + steepness=st.floats(min_value=0.1, max_value=50.0, allow_nan=False, allow_infinity=False), + midpoint=st.floats(min_value=-5.0, max_value=5.0, allow_nan=False, allow_infinity=False), + ) + def test_sigmoid_gate_finite_and_in_range( + self, x: float, steepness: float, midpoint: float, + ) -> None: + """Sigmoid gate always produces a finite float in (0, 1).""" + result = sigmoid_gate(x, steepness, midpoint) + assert math.isfinite(result), f"Sigmoid gate produced non-finite: {result}" + assert 0.0 <= result <= 1.0, f"Sigmoid gate out of range: {result}" + + @settings(max_examples=100) + @given( + signals=st.lists( + _weighted_signal_strategy(), + min_size=0, + max_size=30, + ), + ) + def test_bayesian_posterior_finite_and_in_range( + self, signals: list[WeightedSignal], + ) -> None: + """All Bayesian posterior outputs are finite and within documented ranges.""" + posterior = compute_bayesian_posterior(signals) + + # P_bull in [0, 1] + assert math.isfinite(posterior.p_bull), f"P_bull non-finite: {posterior.p_bull}" + assert 0.0 <= posterior.p_bull <= 1.0, f"P_bull out of range: {posterior.p_bull}" + + # Alpha >= 1.0 + assert math.isfinite(posterior.alpha), f"Alpha non-finite: {posterior.alpha}" + assert posterior.alpha >= 1.0, f"Alpha below 1.0: {posterior.alpha}" + + # Beta >= 1.0 + assert math.isfinite(posterior.beta), f"Beta non-finite: {posterior.beta}" + assert posterior.beta >= 1.0, f"Beta below 1.0: {posterior.beta}" + + # Bayesian confidence in [0, 1] + assert math.isfinite(posterior.bayesian_confidence), ( + f"Bayesian confidence non-finite: {posterior.bayesian_confidence}" + ) + assert 0.0 <= posterior.bayesian_confidence <= 1.0 + 1e-9, ( + f"Bayesian confidence out of range: {posterior.bayesian_confidence}" + ) + + # Entropy in [0, 1] + assert math.isfinite(posterior.entropy), f"Entropy non-finite: {posterior.entropy}" + assert 0.0 <= posterior.entropy <= 1.0 + 1e-9, ( + f"Entropy out of range: {posterior.entropy}" + ) + + # Log-likelihood is finite + assert math.isfinite(posterior.log_likelihood), ( + f"Log-likelihood non-finite: {posterior.log_likelihood}" + ) + + @settings(max_examples=100) + @given( + base_half_life=st.floats(min_value=0.01, max_value=2000.0, allow_nan=False, allow_infinity=False), + impact_score=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + info_gain_factor=st.floats(min_value=1.0, max_value=3.0, allow_nan=False, allow_infinity=False), + market_multiplier=st.floats(min_value=1.0, max_value=2.5, allow_nan=False, allow_infinity=False), + ) + def test_adaptive_decay_finite_and_above_base( + self, + base_half_life: float, + impact_score: float, + info_gain_factor: float, + market_multiplier: float, + ) -> None: + """Adaptive decay always produces a finite half-life >= base.""" + tau = compute_adaptive_half_life( + base_half_life=base_half_life, + impact_score=impact_score, + info_gain_factor=info_gain_factor, + market_multiplier=market_multiplier, + config=ScoringConfig(), + ) + assert math.isfinite(tau), f"Adaptive half-life non-finite: {tau}" + assert tau >= base_half_life - 1e-9, ( + f"Adaptive half-life {tau} < base {base_half_life}" + ) + + @settings(max_examples=100) + @given( + returns=st.lists( + st.floats(min_value=-0.5, max_value=0.5, allow_nan=False, allow_infinity=False), + min_size=2, + max_size=30, + ), + volumes=st.lists( + st.floats(min_value=0.0, max_value=1e9, allow_nan=False, allow_infinity=False), + min_size=2, + max_size=30, + ), + ) + def test_regime_multiplier_finite_and_in_range( + self, returns: list[float], volumes: list[float], + ) -> None: + """Regime multiplier always produces a finite float in [1.0, 2.5].""" + result = compute_regime_multiplier(returns, volumes) + assert math.isfinite(result), f"Regime multiplier non-finite: {result}" + assert 1.0 <= result <= 2.5 + 1e-9, ( + f"Regime multiplier out of range: {result}" + ) + + @settings(max_examples=100) + @given( + p_bull=st.floats(min_value=-1.0, max_value=2.0, allow_nan=False, allow_infinity=False), + ) + def test_entropy_finite_and_in_range(self, p_bull: float) -> None: + """Shannon entropy always produces a finite float in [0, 1].""" + result = compute_entropy(p_bull) + assert math.isfinite(result), f"Entropy non-finite: {result}" + assert 0.0 <= result <= 1.0 + 1e-9, f"Entropy out of range: {result}" + + @settings(max_examples=100) + @given( + geo=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + supply=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + commodity=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + sector=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + ) + def test_multiplicative_exposure_finite_and_in_range( + self, geo: float, supply: float, commodity: float, sector: float, + ) -> None: + """Multiplicative exposure always produces a finite float in [0, 1].""" + result = _compute_multiplicative_exposure(geo, supply, commodity, sector) + assert math.isfinite(result), f"Multiplicative exposure non-finite: {result}" + assert -1e-9 <= result <= 1.0 + 1e-9, ( + f"Multiplicative exposure out of range: {result}" + ) + + @settings(max_examples=100) + @given( + deltas=st.lists( + st.floats(min_value=-2.0, max_value=2.0, allow_nan=False, allow_infinity=False), + min_size=0, + max_size=15, + ), + lambda_decay=st.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), + ) + def test_ew_momentum_finite_and_in_range( + self, deltas: list[float], lambda_decay: float, + ) -> None: + """EW momentum always produces a finite float in [-1, 1].""" + result = compute_ew_momentum(deltas, lambda_decay) + assert math.isfinite(result), f"EW momentum non-finite: {result}" + assert -1.0 - 1e-9 <= result <= 1.0 + 1e-9, ( + f"EW momentum out of range: {result}" + ) + + @settings(max_examples=100) + @given( + momentum=st.floats(min_value=-5.0, max_value=5.0, allow_nan=False, allow_infinity=False), + sigma_20=st.floats(min_value=-0.1, max_value=2.0, allow_nan=False, allow_infinity=False), + ) + def test_volatility_scaled_momentum_finite_and_in_range( + self, momentum: float, sigma_20: float, + ) -> None: + """Volatility-scaled momentum always produces a finite float in [-2.0, 2.0].""" + result = compute_volatility_scaled_momentum(momentum, sigma_20) + assert math.isfinite(result), ( + f"Volatility-scaled momentum non-finite: {result}" + ) + assert -2.0 - 1e-9 <= result <= 2.0 + 1e-9, ( + f"Volatility-scaled momentum out of range: {result}" + ) + + @settings(max_examples=100) + @given( + p_bull=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + strength=st.floats(min_value=0.0, max_value=1.0, allow_nan=False, allow_infinity=False), + sigma_20=st.floats(min_value=0.0, max_value=2.0, allow_nan=False, allow_infinity=False), + horizon_days=st.floats(min_value=0.0, max_value=365.0, allow_nan=False, allow_infinity=False), + ) + def test_expected_value_finite( + self, + p_bull: float, + strength: float, + sigma_20: float, + horizon_days: float, + ) -> None: + """Expected value always produces a finite float.""" + result = compute_expected_value(p_bull, strength, sigma_20, horizon_days) + assert math.isfinite(result), f"Expected value non-finite: {result}" diff --git a/tests/test_regime.py b/tests/test_regime.py new file mode 100644 index 0000000..177a3e9 --- /dev/null +++ b/tests/test_regime.py @@ -0,0 +1,237 @@ +"""Unit tests for regime detector (services/aggregation/regime.py). + +Tests specific (R, V_r) → regime classification, threshold adjustments +per regime, and insufficient data fallback to uncertainty. + +Requirements: 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.9 +""" +from __future__ import annotations + +import pytest + +from services.aggregation.regime import ( + MarketRegime, + classify_regime, + compute_ema, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_uptrend_prices(n: int = 120) -> list[float]: + """Generate prices with EMA_20 > EMA_100 (uptrend, R=+1).""" + # Start low, end high — recent prices much higher than old ones + return [100.0 + i * 0.5 for i in range(n)] + + +def _make_downtrend_prices(n: int = 120) -> list[float]: + """Generate prices with EMA_20 < EMA_100 (downtrend, R=-1).""" + # Start high, end low — recent prices much lower than old ones + return [200.0 - i * 0.5 for i in range(n)] + + +def _make_flat_prices(n: int = 120) -> list[float]: + """Generate flat prices where EMA_20 ≈ EMA_100 (R=0).""" + return [100.0] * n + + +def _make_low_vol_returns(n: int = 120) -> list[float]: + """Generate returns with σ_20 / σ_100 < 1.0 (low recent volatility).""" + # First 100 returns have higher variance, last 20 have lower variance + base = [0.02 * ((-1) ** i) for i in range(n - 20)] + recent = [0.005 * ((-1) ** i) for i in range(20)] + return base + recent + + +def _make_high_vol_returns(n: int = 120) -> list[float]: + """Generate returns with σ_20 / σ_100 > 1.5 (panic volatility).""" + # First 100 returns have low variance, last 20 have very high variance + base = [0.005 * ((-1) ** i) for i in range(n - 20)] + recent = [0.08 * ((-1) ** i) for i in range(20)] + return base + recent + + +def _make_moderate_vol_returns(n: int = 120) -> list[float]: + """Generate returns with V_r between 1.0 and 1.2.""" + # Slightly higher recent volatility + base = [0.01 * ((-1) ** i) for i in range(n - 20)] + recent = [0.012 * ((-1) ** i) for i in range(20)] + return base + recent + + +# --------------------------------------------------------------------------- +# compute_ema +# --------------------------------------------------------------------------- + + +class TestComputeEma: + """Test EMA computation.""" + + def test_single_value(self): + assert compute_ema([100.0], 1) == pytest.approx(100.0) + + def test_constant_values(self): + """EMA of constant values equals that constant.""" + assert compute_ema([50.0] * 20, 20) == pytest.approx(50.0) + + def test_empty_raises(self): + with pytest.raises(ValueError): + compute_ema([], 10) + + def test_zero_period_raises(self): + with pytest.raises(ValueError): + compute_ema([1.0, 2.0], 0) + + +# --------------------------------------------------------------------------- +# Regime classification: specific (R, V_r) → expected regime +# --------------------------------------------------------------------------- + + +class TestRegimeClassification: + """Test specific (R, V_r) → expected regime classification (Req 7.3).""" + + def test_trend_following_uptrend(self): + """R=+1, V_r < 1.2 → trend_following.""" + prices = _make_uptrend_prices() + returns = _make_moderate_vol_returns() + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.TREND_FOLLOWING + assert result.trend_indicator == 1.0 + + def test_trend_following_downtrend(self): + """R=-1, V_r < 1.2 → trend_following.""" + prices = _make_downtrend_prices() + returns = _make_moderate_vol_returns() + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.TREND_FOLLOWING + assert result.trend_indicator == -1.0 + + def test_panic_regime(self): + """V_r > 1.5 → panic (regardless of R).""" + prices = _make_uptrend_prices() + returns = _make_high_vol_returns() + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.PANIC + + def test_mean_reversion_regime(self): + """R=0, V_r < 1.0 → mean_reversion.""" + prices = _make_flat_prices() + returns = _make_low_vol_returns() + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.MEAN_REVERSION + + def test_uncertainty_regime(self): + """R=0, V_r between 1.0 and 1.5 → uncertainty.""" + prices = _make_flat_prices() + # Returns with V_r between 1.0 and 1.5 but not < 1.0 + # Use moderate vol that gives V_r ≈ 1.1 with flat prices + returns = _make_moderate_vol_returns() + result = classify_regime(prices, returns) + # With flat prices R=0, and moderate vol V_r ≈ 1.1 (> 1.0) + # This falls into uncertainty (R=0 AND V_r >= 1.0) + assert result.regime == MarketRegime.UNCERTAINTY + + +# --------------------------------------------------------------------------- +# Threshold adjustments per regime (Req 7.4, 7.5, 7.6, 7.7) +# --------------------------------------------------------------------------- + + +class TestRegimeThresholds: + """Test threshold adjustments per regime.""" + + def test_panic_threshold(self): + """Panic regime → threshold ±0.10 (Req 7.4).""" + prices = _make_uptrend_prices() + returns = _make_high_vol_returns() + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.PANIC + assert result.bullish_threshold == pytest.approx(0.10) + assert result.bearish_threshold == pytest.approx(-0.10) + + def test_mean_reversion_threshold(self): + """Mean-reversion regime → threshold ±0.20 (Req 7.5).""" + prices = _make_flat_prices() + returns = _make_low_vol_returns() + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.MEAN_REVERSION + assert result.bullish_threshold == pytest.approx(0.20) + assert result.bearish_threshold == pytest.approx(-0.20) + + def test_trend_following_threshold(self): + """Trend-following regime → threshold ±0.15 (Req 7.6).""" + prices = _make_uptrend_prices() + returns = _make_moderate_vol_returns() + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.TREND_FOLLOWING + assert result.bullish_threshold == pytest.approx(0.15) + assert result.bearish_threshold == pytest.approx(-0.15) + + def test_uncertainty_contradiction_multiplier(self): + """Uncertainty regime → contradiction multiplier 0.6 (Req 7.7).""" + prices = _make_flat_prices() + returns = _make_moderate_vol_returns() + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.UNCERTAINTY + assert result.contradiction_penalty_multiplier == pytest.approx(0.6) + + def test_non_uncertainty_contradiction_multiplier(self): + """Non-uncertainty regimes → contradiction multiplier 0.4.""" + prices = _make_uptrend_prices() + returns = _make_moderate_vol_returns() + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.TREND_FOLLOWING + assert result.contradiction_penalty_multiplier == pytest.approx(0.4) + + +# --------------------------------------------------------------------------- +# Insufficient data fallback to uncertainty (Req 7.9) +# --------------------------------------------------------------------------- + + +class TestInsufficientDataFallback: + """Test fallback to uncertainty when data is insufficient.""" + + def test_too_few_prices(self): + """Fewer than 100 closing prices → uncertainty.""" + prices = [100.0] * 50 # only 50 days + returns = [0.01] * 100 + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.UNCERTAINTY + + def test_too_few_returns(self): + """Fewer than 100 returns → uncertainty.""" + prices = [100.0] * 120 + returns = [0.01] * 50 # only 50 returns + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.UNCERTAINTY + + def test_empty_prices(self): + """Empty price list → uncertainty.""" + result = classify_regime([], [0.01] * 100) + assert result.regime == MarketRegime.UNCERTAINTY + + def test_empty_returns(self): + """Empty return list → uncertainty.""" + result = classify_regime([100.0] * 120, []) + assert result.regime == MarketRegime.UNCERTAINTY + + def test_zero_sigma_returns_uncertainty(self): + """All identical returns (σ=0) → uncertainty.""" + prices = _make_uptrend_prices() + returns = [0.0] * 120 # zero standard deviation + result = classify_regime(prices, returns) + assert result.regime == MarketRegime.UNCERTAINTY + + def test_default_uncertainty_values(self): + """Default uncertainty has standard threshold values.""" + result = classify_regime([], []) + assert result.regime == MarketRegime.UNCERTAINTY + assert result.bullish_threshold == pytest.approx(0.15) + assert result.bearish_threshold == pytest.approx(-0.15) + assert result.contradiction_penalty_multiplier == pytest.approx(0.6) + assert result.trend_indicator == 0.0 + assert result.volatility_ratio == 1.0 diff --git a/tests/test_signal_math_unit.py b/tests/test_signal_math_unit.py new file mode 100644 index 0000000..9194e1e --- /dev/null +++ b/tests/test_signal_math_unit.py @@ -0,0 +1,535 @@ +"""Unit tests for signal scoring upgrades and pipeline-wide behaviors. + +Tests information gain, adaptive decay, macro exposure, macro integration, +graph distance, momentum, EV gate, and feature flag behaviors. + +Requirements: 3.1, 3.4, 5.5, 5.6, 10.3, 10.4, 11.3, 13.3, 14.3, 14.4, 16.4, 16.5 +""" +from __future__ import annotations + +import math +from datetime import datetime, timezone + +import pytest + +from services.aggregation.interpolation import ( + _compute_multiplicative_exposure, + integrate_macro_signals, +) +from services.aggregation.projection import ( + compute_ew_momentum, + compute_trend_momentum, +) +from services.aggregation.scoring import ( + DEFAULT_BASE_RATE, + ScoringConfig, + SignalWeight, + WeightedSignal, + compute_adaptive_half_life, + compute_info_gain, + compute_regime_multiplier, + compute_signal_weight, +) +from services.aggregation.signal_propagation import ( + compute_graph_distance_attenuation, +) +from services.recommendation.eligibility import ( + compute_expected_value, + evaluate_eligibility, +) +from services.shared.schemas import ( + RecommendationMode, + TrendDirection, + TrendSummary, + TrendWindow, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_trend_summary(**overrides) -> TrendSummary: + """Create a minimal TrendSummary for testing.""" + defaults = { + "entity_id": "test-company", + "ticker": "TEST", + "window": TrendWindow.SEVEN_DAY, + "trend_direction": TrendDirection.BULLISH, + "trend_strength": 0.5, + "confidence": 0.6, + "contradiction_score": 0.1, + "signal_count": 5, + "unique_source_count": 3, + "weighted_sentiment_avg": 0.4, + "top_supporting_evidence": ["doc-1", "doc-2"], + "top_opposing_evidence": ["doc-3"], + "material_risks": [], + } + defaults.update(overrides) + return TrendSummary(**defaults) + + +def _make_signal( + sentiment: float, + combined_weight: float = 1.0, + impact: float = 1.0, +) -> WeightedSignal: + """Create a minimal WeightedSignal for testing.""" + weight = SignalWeight( + recency=1.0, + credibility=1.0, + novelty_bonus=0.0, + confidence_gate=1.0, + market_ctx_multiplier=1.0, + combined=combined_weight, + ) + return WeightedSignal( + document_id="test-doc", + weight=weight, + sentiment_value=sentiment, + impact_score=impact, + ) + + +# --------------------------------------------------------------------------- +# Information gain clamp (Req 3.4) +# --------------------------------------------------------------------------- + + +class TestInfoGainClamp: + """Test info gain clamp: very rare event → factor ≤ 3.0.""" + + def test_very_rare_event_clamped(self): + """An event with extremely low base rate is clamped to 3.0.""" + # base_rate = 0.001 → -log₂(0.001) ≈ 9.97 → r = 1 + 0.3*9.97 ≈ 3.99 + # Should be clamped to 3.0 + result = compute_info_gain("unknown_type", lambda_param=0.3, max_gain=3.0, default_base_rate=0.001) + assert result <= 3.0 + + def test_m_and_a_high_gain(self): + """M&A (base_rate=0.03) produces high but clamped gain.""" + result = compute_info_gain("m_and_a") + assert result > 1.0 + assert result <= 3.0 + + def test_earnings_low_gain(self): + """Earnings (base_rate=0.25) produces modest gain.""" + result = compute_info_gain("earnings") + assert result >= 1.0 + assert result < 2.0 + + def test_none_event_type_returns_one(self): + """None event type returns neutral factor 1.0.""" + assert compute_info_gain(None) == 1.0 + + +# --------------------------------------------------------------------------- +# Default base rate (Req 3.2) +# --------------------------------------------------------------------------- + + +class TestDefaultBaseRate: + """Test default base rate: unknown event type → 0.1.""" + + def test_unknown_event_uses_default(self): + """Unknown event type uses DEFAULT_BASE_RATE = 0.1.""" + result = compute_info_gain("completely_unknown_event") + expected = 1.0 + 0.3 * (-math.log2(DEFAULT_BASE_RATE)) + assert result == pytest.approx(min(expected, 3.0), abs=0.01) + + def test_default_base_rate_value(self): + """DEFAULT_BASE_RATE is 0.1.""" + assert DEFAULT_BASE_RATE == 0.1 + + +# --------------------------------------------------------------------------- +# Adaptive decay edge cases (Req 5.5, 5.6) +# --------------------------------------------------------------------------- + + +class TestAdaptiveDecayEdgeCases: + """Test adaptive decay: all zeros → τ_base, all max → 6×τ_base.""" + + def test_all_zeros_gives_base(self): + """All β factors zero → τ_i = τ_base (Req 5.6).""" + config = ScoringConfig(probabilistic=True) + result = compute_adaptive_half_life( + base_half_life=72.0, + impact_score=0.0, + info_gain_factor=1.0, # r=1 → β_surprise=0 + market_multiplier=1.0, # M=1 → β_market=0 + config=config, + ) + assert result == pytest.approx(72.0) + + def test_all_max_gives_six_times_base(self): + """All β factors at max → τ_i ≈ 6×τ_base (Req 5.5). + + β_impact = 1.0 * 1.0 = 1.0 + β_surprise = ((3.0 - 1.0) / 2.0) * 1.0 = 1.0 + β_market = ((1.45 - 1.0) / 0.45) * 0.5 = 0.5 + τ = 72 * (1+1) * (1+1) * (1+0.5) = 72 * 2 * 2 * 1.5 = 432 = 6 * 72 + """ + config = ScoringConfig( + probabilistic=True, + adaptive_decay_impact_scale=1.0, + adaptive_decay_surprise_scale=1.0, + adaptive_decay_market_scale=0.5, + ) + result = compute_adaptive_half_life( + base_half_life=72.0, + impact_score=1.0, + info_gain_factor=3.0, + market_multiplier=1.45, + config=config, + ) + assert result == pytest.approx(72.0 * 6.0, rel=0.01) + + def test_adaptive_never_below_base(self): + """Adaptive half-life is always >= base (Property 5).""" + config = ScoringConfig(probabilistic=True) + result = compute_adaptive_half_life( + base_half_life=72.0, + impact_score=0.5, + info_gain_factor=2.0, + market_multiplier=1.2, + config=config, + ) + assert result >= 72.0 + + +# --------------------------------------------------------------------------- +# Zero overlap → zero macro impact (Req 10.3) +# --------------------------------------------------------------------------- + + +class TestZeroOverlapMacro: + """Test zero overlap → zero macro impact.""" + + def test_all_zero_overlaps(self): + """All overlaps zero → exposure = 0.0.""" + exposure = _compute_multiplicative_exposure(0.0, 0.0, 0.0, 0.0) + assert exposure == 0.0 + + +# --------------------------------------------------------------------------- +# Max overlap → ≈severity×0.724 (Req 10.4) +# --------------------------------------------------------------------------- + + +class TestMaxOverlapMacro: + """Test max overlap → ≈severity×0.724.""" + + def test_all_max_overlaps(self): + """All overlaps 1.0 → exposure ≈ 0.689. + + 1 - (1-0.35)*(1-0.25)*(1-0.25)*(1-0.15) = 1 - 0.65*0.75*0.75*0.85 ≈ 0.689 + """ + exposure = _compute_multiplicative_exposure(1.0, 1.0, 1.0, 1.0) + expected = 1.0 - (0.65 * 0.75 * 0.75 * 0.85) + assert exposure == pytest.approx(expected, abs=0.001) + assert exposure > 0.5 # significantly above zero + + +# --------------------------------------------------------------------------- +# Macro fallback behaviors (Req 11.3, 11.4) +# --------------------------------------------------------------------------- + + +class TestMacroFallbackBehaviors: + """Test macro fallback: only macro → additive, only company → no modifier.""" + + def test_only_macro_additive_fallback(self): + """Only macro signals → additive merge (Req 11.3).""" + macro_signals = [_make_signal(sentiment=-1.0)] + merged, modifier = integrate_macro_signals( + company_signals=[], + macro_signals=macro_signals, + company_direction="neutral", + macro_impacts=[], + probabilistic=True, + ) + # Macro-only: returns macro signals, modifier = 1.0 + assert len(merged) == 1 + assert modifier == 1.0 + + def test_only_company_no_modifier(self): + """Only company signals → modifier = 1.0 (Req 11.4).""" + company_signals = [_make_signal(sentiment=1.0)] + merged, modifier = integrate_macro_signals( + company_signals=company_signals, + macro_signals=[], + company_direction="bullish", + macro_impacts=[], + probabilistic=True, + ) + assert len(merged) == 1 + assert modifier == 1.0 + + def test_heuristic_mode_additive_merge(self): + """Heuristic mode: simple concatenation of all signals.""" + company = [_make_signal(sentiment=1.0)] + macro = [_make_signal(sentiment=-1.0)] + merged, modifier = integrate_macro_signals( + company_signals=company, + macro_signals=macro, + company_direction="bullish", + macro_impacts=[], + probabilistic=False, + ) + assert len(merged) == 2 + assert modifier == 1.0 + + +# --------------------------------------------------------------------------- +# Graph distance cutoff (Req 12.3) +# --------------------------------------------------------------------------- + + +class TestGraphDistanceCutoff: + """Test graph distance cutoff: d>3 → no propagation.""" + + def test_distance_4_no_propagation(self): + """Distance 4 → transfer strength = 0.0.""" + result = compute_graph_distance_attenuation( + source_strength=1.0, correlation=1.0, distance=4, + ) + assert result == 0.0 + + def test_distance_3_propagates(self): + """Distance 3 → still propagates (e^(-3) ≈ 0.05).""" + result = compute_graph_distance_attenuation( + source_strength=1.0, correlation=1.0, distance=3, + ) + assert result > 0.0 + assert result == pytest.approx(math.exp(-3), abs=0.001) + + def test_distance_1_strongest(self): + """Distance 1 → strongest propagation.""" + d1 = compute_graph_distance_attenuation(1.0, 1.0, 1) + d2 = compute_graph_distance_attenuation(1.0, 1.0, 2) + d3 = compute_graph_distance_attenuation(1.0, 1.0, 3) + assert d1 > d2 > d3 > 0.0 + + def test_distance_0_no_propagation(self): + """Distance 0 → no propagation (self-loop).""" + result = compute_graph_distance_attenuation(1.0, 1.0, 0) + assert result == 0.0 + + +# --------------------------------------------------------------------------- +# Momentum fallback (Req 13.3) +# --------------------------------------------------------------------------- + + +class TestMomentumFallback: + """Test momentum fallback: <2 cycles → heuristic.""" + + def test_empty_changes_returns_zero(self): + """Empty list → 0.0 (fallback).""" + assert compute_ew_momentum([]) == 0.0 + + def test_single_change_returns_zero(self): + """Single change → 0.0 (fewer than 2 cycles).""" + assert compute_ew_momentum([0.5]) == 0.0 + + def test_two_changes_computes(self): + """Two changes → computes EW momentum.""" + result = compute_ew_momentum([0.3, 0.2]) + assert result != 0.0 + + def test_heuristic_fallback_for_trend_momentum(self): + """compute_trend_momentum with no previous data uses heuristic.""" + result = compute_trend_momentum( + current_strength=0.6, + current_direction="bullish", + previous_strength=None, + previous_direction=None, + ) + # Heuristic: dir_sign * strength * 0.5 = 1.0 * 0.6 * 0.5 = 0.3 + assert result == pytest.approx(0.3, abs=0.01) + + +# --------------------------------------------------------------------------- +# EV threshold behavior (Req 14.3, 14.4) +# --------------------------------------------------------------------------- + + +class TestEVThresholdBehavior: + """Test EV threshold: EV>0.005→proceed, EV≤0.005→informational.""" + + def test_positive_ev_proceeds(self): + """EV > 0.005 → recommendation proceeds normally.""" + summary = _make_trend_summary( + trend_direction=TrendDirection.BULLISH, + trend_strength=0.5, + confidence=0.7, + ) + result = evaluate_eligibility( + summary, + probabilistic=True, + p_bull=0.8, + sigma_20=0.02, + ) + # With p_bull=0.8, strength=0.5, sigma_20=0.02, horizon=7d: + # R_up = 0.5 * 0.02 * sqrt(7) ≈ 0.0265 + # R_down = 0.5 * 0.02 * sqrt(7) ≈ 0.0265 + # EV = 0.8 * 0.0265 - 0.2 * 0.0265 ≈ 0.0159 + assert result.ev_value is not None + assert result.ev_value > 0.005 + assert result.pipeline_mode == "probabilistic" + + def test_low_ev_forces_informational(self): + """EV ≤ 0.005 → forced to informational mode (Req 14.4).""" + summary = _make_trend_summary( + trend_direction=TrendDirection.BULLISH, + trend_strength=0.5, + confidence=0.7, + ) + # p_bull near 0.5 → EV near 0 + result = evaluate_eligibility( + summary, + probabilistic=True, + p_bull=0.5, + sigma_20=0.001, # very low vol → tiny EV + ) + assert result.ev_value is not None + assert result.ev_value <= 0.005 + assert result.mode == RecommendationMode.INFORMATIONAL + + def test_ev_computation_values(self): + """Verify EV computation formula directly.""" + ev = compute_expected_value( + p_bull=0.7, + strength=0.5, + sigma_20=0.02, + horizon_days=7.0, + ) + # R_up = 0.5 * 0.02 * sqrt(7) ≈ 0.02646 + # R_down = 0.5 * 0.02 * sqrt(7) ≈ 0.02646 + # EV = 0.7 * 0.02646 - 0.3 * 0.02646 ≈ 0.01058 + assert ev > 0.005 + assert ev == pytest.approx(0.7 * 0.5 * 0.02 * math.sqrt(7) - 0.3 * 0.5 * 0.02 * math.sqrt(7), abs=0.001) + + +# --------------------------------------------------------------------------- +# Feature flag behaviors (Req 16.4, 16.5) +# --------------------------------------------------------------------------- + + +class TestFeatureFlagBehaviors: + """Test flag=false→heuristic, flag=true→probabilistic.""" + + def test_heuristic_mode_binary_gate(self): + """flag=false → uses binary confidence gate.""" + config = ScoringConfig(probabilistic=False) + now = datetime.now(timezone.utc) + + # Below confidence floor → gate = 0 + result = compute_signal_weight( + published_at=now, + reference_time=now, + window="7d", + source_credibility=0.8, + extraction_confidence=0.1, # below floor of 0.2 + config=config, + ) + assert result.confidence_gate == 0.0 + assert result.combined == 0.0 + assert result.sigmoid_gate is None + + def test_probabilistic_mode_sigmoid_gate(self): + """flag=true → uses sigmoid confidence gate.""" + config = ScoringConfig(probabilistic=True) + now = datetime.now(timezone.utc) + + result = compute_signal_weight( + published_at=now, + reference_time=now, + window="7d", + source_credibility=0.8, + extraction_confidence=0.5, + config=config, + ) + assert result.sigmoid_gate is not None + assert result.sigmoid_gate == pytest.approx(0.5, abs=0.01) + assert result.combined > 0.0 + + def test_heuristic_mode_no_info_gain(self): + """flag=false → info_gain_factor stays at default 1.0.""" + config = ScoringConfig(probabilistic=False) + now = datetime.now(timezone.utc) + + result = compute_signal_weight( + published_at=now, + reference_time=now, + window="7d", + source_credibility=0.8, + extraction_confidence=0.8, + event_type="m_and_a", + config=config, + ) + assert result.info_gain_factor == 1.0 + + def test_probabilistic_mode_has_info_gain(self): + """flag=true → info_gain_factor computed from event type.""" + config = ScoringConfig(probabilistic=True) + now = datetime.now(timezone.utc) + + result = compute_signal_weight( + published_at=now, + reference_time=now, + window="7d", + source_credibility=0.8, + extraction_confidence=0.8, + event_type="m_and_a", + config=config, + ) + assert result.info_gain_factor > 1.0 + + def test_heuristic_eligibility_skips_ev(self): + """flag=false → EV gate is skipped entirely.""" + summary = _make_trend_summary() + result = evaluate_eligibility(summary, probabilistic=False) + assert result.ev_value is None + assert result.pipeline_mode == "heuristic" + + def test_probabilistic_eligibility_computes_ev(self): + """flag=true → EV is computed.""" + summary = _make_trend_summary() + result = evaluate_eligibility( + summary, probabilistic=True, p_bull=0.7, sigma_20=0.02, + ) + assert result.ev_value is not None + assert result.pipeline_mode == "probabilistic" + + +# --------------------------------------------------------------------------- +# Regime multiplier edge cases +# --------------------------------------------------------------------------- + + +class TestRegimeMultiplierEdgeCases: + """Test regime multiplier with edge case inputs.""" + + def test_no_returns_gives_one(self): + """No returns → M_regime = 1.0.""" + assert compute_regime_multiplier(None, None) == 1.0 + + def test_single_return_gives_one(self): + """Single return → M_regime = 1.0 (need at least 2).""" + assert compute_regime_multiplier([0.01], None) == 1.0 + + def test_constant_returns_gives_one(self): + """Constant returns (σ=0) → z_r=0 → M_regime = 1.0.""" + returns = [0.01] * 20 + result = compute_regime_multiplier(returns, None) + assert result == pytest.approx(1.0) + + def test_clamped_to_max(self): + """Extreme z-scores → clamped to 2.5.""" + # Create returns with extreme outlier + returns = [0.001] * 19 + [10.0] + result = compute_regime_multiplier(returns, None) + assert result <= 2.5 diff --git a/tests/test_source_accuracy.py b/tests/test_source_accuracy.py new file mode 100644 index 0000000..929872d --- /dev/null +++ b/tests/test_source_accuracy.py @@ -0,0 +1,241 @@ +"""Tests for source accuracy tracker — SourceAccuracy dataclass and +database functions.""" +from __future__ import annotations + +from datetime import datetime, timezone +from unittest.mock import AsyncMock + +import pytest + +from services.aggregation.source_accuracy import ( + SourceAccuracy, + fetch_source_accuracy, + update_source_accuracy, +) + +# --------------------------------------------------------------------------- +# SourceAccuracy.accuracy_factor property +# --------------------------------------------------------------------------- + + +def test_accuracy_factor_low_sample_count(): + """When sample_count < 10, accuracy_factor returns neutral 1.0.""" + sa = SourceAccuracy( + source_id="src-1", + accuracy_ratio=0.9, + sample_count=5, + last_updated=datetime.now(timezone.utc), + ) + assert sa.accuracy_factor == 1.0 + + +def test_accuracy_factor_exactly_ten_samples(): + """When sample_count == 10, accuracy_factor uses the formula.""" + sa = SourceAccuracy( + source_id="src-1", + accuracy_ratio=0.8, + sample_count=10, + last_updated=datetime.now(timezone.utc), + ) + assert abs(sa.accuracy_factor - 1.3) < 1e-9 + + +def test_accuracy_factor_zero_accuracy(): + """0% accuracy with enough samples gives factor 0.5.""" + sa = SourceAccuracy( + source_id="src-1", + accuracy_ratio=0.0, + sample_count=100, + last_updated=datetime.now(timezone.utc), + ) + assert abs(sa.accuracy_factor - 0.5) < 1e-9 + + +def test_accuracy_factor_full_accuracy(): + """100% accuracy with enough samples gives factor 1.5.""" + sa = SourceAccuracy( + source_id="src-1", + accuracy_ratio=1.0, + sample_count=100, + last_updated=datetime.now(timezone.utc), + ) + assert abs(sa.accuracy_factor - 1.5) < 1e-9 + + +def test_accuracy_factor_clamps_corrupted_high(): + """Corrupted accuracy_ratio > 1.0 is clamped to 1.0 in the factor.""" + sa = SourceAccuracy( + source_id="src-1", + accuracy_ratio=2.5, + sample_count=50, + last_updated=datetime.now(timezone.utc), + ) + # clamped to 1.0 → factor = 0.5 + 1.0 = 1.5 + assert abs(sa.accuracy_factor - 1.5) < 1e-9 + + +def test_accuracy_factor_clamps_corrupted_negative(): + """Corrupted accuracy_ratio < 0.0 is clamped to 0.0 in the factor.""" + sa = SourceAccuracy( + source_id="src-1", + accuracy_ratio=-0.3, + sample_count=50, + last_updated=datetime.now(timezone.utc), + ) + # clamped to 0.0 → factor = 0.5 + 0.0 = 0.5 + assert abs(sa.accuracy_factor - 0.5) < 1e-9 + + +def test_accuracy_factor_nine_samples_neutral(): + """sample_count=9 is still below threshold, returns 1.0.""" + sa = SourceAccuracy( + source_id="src-1", + accuracy_ratio=0.0, + sample_count=9, + last_updated=datetime.now(timezone.utc), + ) + assert sa.accuracy_factor == 1.0 + + +# --------------------------------------------------------------------------- +# fetch_source_accuracy +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_fetch_source_accuracy_empty_ids(): + """Empty source_ids list returns empty dict without querying.""" + pool = AsyncMock() + result = await fetch_source_accuracy(pool, []) + assert result == {} + pool.fetch.assert_not_called() + + +@pytest.mark.asyncio +async def test_fetch_source_accuracy_returns_records(): + """Successful fetch returns SourceAccuracy records keyed by source_id.""" + now = datetime.now(timezone.utc) + pool = AsyncMock() + pool.fetch = AsyncMock(return_value=[ + { + "source_id": "src-a", + "accuracy_ratio": 0.75, + "sample_count": 20, + "last_updated": now, + }, + { + "source_id": "src-b", + "accuracy_ratio": 0.4, + "sample_count": 15, + "last_updated": now, + }, + ]) + + result = await fetch_source_accuracy(pool, ["src-a", "src-b"]) + + assert len(result) == 2 + assert result["src-a"].accuracy_ratio == 0.75 + assert result["src-a"].sample_count == 20 + assert result["src-b"].accuracy_ratio == 0.4 + + +@pytest.mark.asyncio +async def test_fetch_source_accuracy_clamps_corrupted(): + """Corrupted accuracy_ratio values are clamped to [0.0, 1.0].""" + now = datetime.now(timezone.utc) + pool = AsyncMock() + pool.fetch = AsyncMock(return_value=[ + { + "source_id": "src-bad", + "accuracy_ratio": 1.5, + "sample_count": 30, + "last_updated": now, + }, + ]) + + result = await fetch_source_accuracy(pool, ["src-bad"]) + assert result["src-bad"].accuracy_ratio == 1.0 + + +@pytest.mark.asyncio +async def test_fetch_source_accuracy_db_error_returns_empty(): + """When the database is unreachable, returns empty dict.""" + pool = AsyncMock() + pool.fetch = AsyncMock(side_effect=Exception("connection refused")) + + result = await fetch_source_accuracy(pool, ["src-a"]) + assert result == {} + + +# --------------------------------------------------------------------------- +# update_source_accuracy +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_update_source_accuracy_empty_outcomes(): + """Empty outcomes list does nothing.""" + pool = AsyncMock() + await update_source_accuracy(pool, "src-1", []) + pool.execute.assert_not_called() + + +@pytest.mark.asyncio +async def test_update_source_accuracy_counts_correctly(): + """Correct and incorrect predictions are counted properly.""" + pool = AsyncMock() + pool.execute = AsyncMock() + + outcomes = [ + ("bullish", 0.05), # correct + ("bullish", -0.02), # incorrect + ("bearish", -0.03), # correct + ("bearish", 0.01), # incorrect + ] + + await update_source_accuracy(pool, "src-1", outcomes) + + pool.execute.assert_called_once() + call_args = pool.execute.call_args + # accuracy_ratio = 2/4 = 0.5, total = 4 + assert abs(call_args[0][2] - 0.5) < 1e-9 # accuracy_ratio + assert call_args[0][3] == 4 # total + + +@pytest.mark.asyncio +async def test_update_source_accuracy_skips_neutral(): + """Neutral predictions and zero returns are excluded.""" + pool = AsyncMock() + pool.execute = AsyncMock() + + outcomes = [ + ("neutral", 0.05), # skipped — neutral direction + ("bullish", 0.0), # skipped — zero return + ("bullish", 0.03), # counted — correct + ] + + await update_source_accuracy(pool, "src-1", outcomes) + + pool.execute.assert_called_once() + call_args = pool.execute.call_args + # accuracy_ratio = 1/1 = 1.0, total = 1 + assert abs(call_args[0][2] - 1.0) < 1e-9 + assert call_args[0][3] == 1 + + +@pytest.mark.asyncio +async def test_update_source_accuracy_all_neutral_skips(): + """When all outcomes are neutral/zero, no DB call is made.""" + pool = AsyncMock() + await update_source_accuracy(pool, "src-1", [("neutral", 0.05)]) + pool.execute.assert_not_called() + + +@pytest.mark.asyncio +async def test_update_source_accuracy_db_error_logs_and_continues(): + """DB errors are logged but do not raise.""" + pool = AsyncMock() + pool.execute = AsyncMock(side_effect=Exception("connection refused")) + + # Should not raise + await update_source_accuracy(pool, "src-1", [("bullish", 0.05)])