feat: signal math upgrade — probabilistic, regime-aware scoring pipeline

Implement full probabilistic signal processing pipeline gated behind probabilistic_scoring_enabled feature flag in risk_configs: - Bayesian log-likelihood accumulator with Beta posterior and entropy - Regime detector (trend-following, panic, mean-reversion, uncertainty) - Source accuracy tracker with per-source historical prediction accuracy - Sigmoid confidence gate replacing binary gate - Information gain surprise weighting for rare events - Adaptive recency decay with event-specific half-lives - Regime multiplier replacing market context multiplier - Weighted disagreement entropy for contradiction detection - Multiplicative macro exposure with conditional integration - Graph-distance attenuated competitive signal propagation - Exponentially weighted momentum with volatility scaling - Expected value recommendation gate All changes backward-compatible: flag=false preserves exact current behavior. New outputs stored in existing JSONB columns (no schema changes except source_accuracy table via migration 034). Tests: 26 property-based tests (14 correctness properties), 99 unit tests, 1789 total tests passing with zero regressions.
2026-04-29 11:41:48 +00:00
parent 8c3c1aab43
commit 4e010bc048
24 changed files with 6058 additions and 60 deletions
@@ -19,6 +19,10 @@ from typing import Any

 import asyncpg

+from services.aggregation.bayesian import (
+    BayesianPosterior,
+    compute_bayesian_posterior,
+)
 from services.aggregation.contradiction import CatalystEntry, detect_contradictions
 from services.aggregation.evidence import (
    EvidenceRankConfig,
@@ -28,6 +32,7 @@ from services.aggregation.evidence import (
 from services.aggregation.evidence import (
    rank_evidence as _rank_evidence_composite,
 )
+from services.aggregation.interpolation import integrate_macro_signals
 from services.aggregation.market_context import fetch_market_context
 from services.aggregation.pattern_matcher import find_self_patterns
 from services.aggregation.projection import (
@@ -35,6 +40,11 @@ from services.aggregation.projection import (
    compute_projection,
    persist_trend_projection,
 )
+from services.aggregation.regime import (
+    MarketRegime,
+    RegimeClassification,
+    classify_regime,
+)
 from services.aggregation.scoring import (
    ScoringConfig,
    WeightedSignal,
@@ -46,6 +56,7 @@ from services.aggregation.signal_propagation import (
    CompetitiveSignalRecord,
    build_pattern_weighted_signals,
 )
+from services.aggregation.source_accuracy import fetch_source_accuracy
 from services.shared.metrics import (
    AGGREGATION_CONTRADICTION_SCORE,
    AGGREGATION_DURATION,
@@ -80,6 +91,7 @@ class AggregationConfig:
    macro_enabled: bool = True  # runtime toggle state
    competitive_signal_weight: float = 0.2  # relative weight of pattern signals
    competitive_enabled: bool = True  # runtime toggle state
+    probabilistic_scoring_enabled: bool = False  # probabilistic pipeline toggle

    def effective_windows(self) -> list[str]:
        if self.windows:
@@ -232,6 +244,59 @@ async def fetch_competitive_enabled(pool: asyncpg.Pool) -> bool | None:
    return row["competitive_enabled"].lower() == "true"


+# ---------------------------------------------------------------------------
+# Fetch probabilistic scoring toggle from risk_configs
+#
+# PROBABILISTIC PIPELINE TOGGLE (Requirements 16.3, 16.4, 16.5, 16.6, 16.7):
+# - Read once per aggregation cycle from the risk_configs table.
+# - When False (default): the heuristic pipeline is used — identical outputs
+#   to the current system.
+# - When True: the new Bayesian, regime-aware, and adaptive formulas are
+#   used for all pipeline stages.
+# - Defaults to False when the key is missing, the value is invalid, or the
+#   database is unreachable (fail-safe to heuristic mode).
+# ---------------------------------------------------------------------------
+
+_PROBABILISTIC_TOGGLE_QUERY = """
+SELECT config->>'probabilistic_scoring_enabled' AS probabilistic_scoring_enabled
+FROM risk_configs
+WHERE active = TRUE
+ORDER BY updated_at DESC
+LIMIT 1
+"""
+
+
+async def fetch_probabilistic_scoring_enabled(pool: asyncpg.Pool) -> bool:
+    """Check probabilistic scoring toggle from risk_configs table.
+
+    Returns True when explicitly enabled, False in all other cases
+    (missing key, invalid value, no config row, DB error).
+    This is fail-safe: any failure defaults to the heuristic pipeline.
+
+    Requirements: 16.3, 16.6
+    """
+    try:
+        row = await pool.fetchrow(_PROBABILISTIC_TOGGLE_QUERY)
+        if row is None or row["probabilistic_scoring_enabled"] is None:
+            return False
+        raw = row["probabilistic_scoring_enabled"]
+        if not isinstance(raw, str) or raw.lower() not in ("true", "false"):
+            logger.warning(
+                "Invalid probabilistic_scoring_enabled value %r in "
+                "risk_configs; defaulting to heuristic pipeline",
+                raw,
+            )
+            return False
+        return raw.lower() == "true"
+    except Exception:
+        logger.warning(
+            "Failed to read probabilistic_scoring_enabled from risk_configs; "
+            "defaulting to heuristic pipeline",
+            exc_info=True,
+        )
+        return False
+
+
 # ---------------------------------------------------------------------------
 # Fetch competitive signals targeting a ticker within a time window
 # ---------------------------------------------------------------------------
@@ -366,6 +431,9 @@ def build_macro_weighted_signals(
    window: str,
    macro_signal_weight: float = 0.3,
    config: ScoringConfig | None = None,
+    *,
+    returns: list[float] | None = None,
+    volumes: list[float] | None = None,
 ) -> list[WeightedSignal]:
    """Convert macro impact records into WeightedSignal objects.

@@ -375,6 +443,9 @@ def build_macro_weighted_signals(
    - impact_score = macro_impact_score * macro_signal_weight
    - recency decay from the global event's publication time
    - confidence gating from the macro record's confidence
+
+    When ``config.probabilistic`` is True, passes returns/volumes for
+    regime multiplier computation.
    """
    cfg = config or ScoringConfig()
    signals: list[WeightedSignal] = []
@@ -387,6 +458,8 @@ def build_macro_weighted_signals(
            novelty_score=0.5,
            extraction_confidence=mir.confidence,
            config=cfg,
+            returns=returns,
+            volumes=volumes,
        )
        sentiment = _DIRECTION_TO_SENTIMENT.get(mir.impact_direction, 0.0)
        impact = mir.macro_impact_score * macro_signal_weight
@@ -412,11 +485,24 @@ def build_weighted_signals(
    window: str,
    market_ctx: Any | None = None,
    config: ScoringConfig | None = None,
+    *,
+    source_accuracy_map: dict[str, float] | None = None,
+    returns: list[float] | None = None,
+    volumes: list[float] | None = None,
 ) -> list[WeightedSignal]:
-    """Convert impact records into WeightedSignal objects using the scoring module."""
+    """Convert impact records into WeightedSignal objects using the scoring module.
+
+    When ``config.probabilistic`` is True, passes source accuracy factors,
+    event types, and market data (returns/volumes) to the scoring pipeline
+    for regime multiplier and adaptive decay computation.
+    """
    cfg = config or ScoringConfig()
+    accuracy_map = source_accuracy_map or {}
    signals: list[WeightedSignal] = []
    for imp in impacts:
+        # Look up source accuracy factor for this document's source
+        saf = accuracy_map.get(imp.document_id, 1.0)
+
        sw = compute_signal_weight(
            published_at=imp.published_at,
            reference_time=reference_time,
@@ -426,6 +512,11 @@ def build_weighted_signals(
            extraction_confidence=imp.confidence,
            market_ctx=market_ctx,
            config=cfg,
+            event_type=imp.catalyst_type if cfg.probabilistic else None,
+            impact_score=imp.impact_score,
+            source_accuracy_factor=saf,
+            returns=returns,
+            volumes=volumes,
        )
        signals.append(
            WeightedSignal(
@@ -433,6 +524,8 @@ def build_weighted_signals(
                weight=sw,
                sentiment_value=sentiment_to_numeric(imp.sentiment),
                impact_score=imp.impact_score,
+                info_gain_factor=sw.info_gain_factor,
+                source_accuracy_factor=sw.source_accuracy_factor,
            )
        )
    return signals
@@ -649,10 +742,15 @@ def assemble_trend_summary(
    market_ctx: Any | None = None,
    max_evidence: int = MAX_EVIDENCE_REFS,
    reference_time: datetime | None = None,
+    *,
+    probabilistic: bool = False,
+    regime: RegimeClassification | None = None,
 ) -> TrendSummary:
    """Build a complete TrendSummary from weighted signals and impact records."""
    result = assemble_trend_with_evidence(
        ticker, window, signals, impacts, market_ctx, max_evidence, reference_time,
+        probabilistic=probabilistic,
+        regime=regime,
    )
    return result.summary

@@ -665,8 +763,25 @@ def assemble_trend_with_evidence(
    market_ctx: Any | None = None,
    max_evidence: int = MAX_EVIDENCE_REFS,
    reference_time: datetime | None = None,
+    *,
+    probabilistic: bool = False,
+    regime: RegimeClassification | None = None,
 ) -> AssembledTrend:
-    """Build a TrendSummary and return detailed evidence rankings for persistence."""
+    """Build a TrendSummary and return detailed evidence rankings for persistence.
+
+    When ``probabilistic`` is True:
+    - Computes Bayesian posterior from merged signals
+    - Uses Bayesian confidence formula for trend confidence
+    - Uses entropy-based direction classification
+    - Applies regime-adjusted thresholds
+    - Populates probabilistic TrendSummary fields
+    - Stores probabilistic outputs in market_context JSONB
+
+    When ``probabilistic`` is False:
+    - Preserves exact current heuristic behavior (no changes)
+
+    Requirements: 1.1, 1.2, 8.1–8.5, 9.1–9.6, 7.8, 16.4, 16.5
+    """
    if reference_time is None:
        reference_time = datetime.now(timezone.utc)

@@ -677,15 +792,102 @@ def assemble_trend_with_evidence(
        CatalystEntry(document_id=imp.document_id, catalyst_type=imp.catalyst_type)
        for imp in impacts
    ]
-    contradiction_result = detect_contradictions(signals, catalyst_entries)
+    contradiction_result = detect_contradictions(
+        signals, catalyst_entries, probabilistic=probabilistic,
+    )
    contradiction = contradiction_result.score

-    direction = derive_trend_direction(avg_sentiment, contradiction)
-    confidence = compute_trend_confidence(signals, contradiction)
+    if not probabilistic:
+        # --- Heuristic mode: preserve exact current behavior ---
+        direction = derive_trend_direction(avg_sentiment, contradiction)
+        confidence = compute_trend_confidence(signals, contradiction)
+
+        # Get detailed evidence rankings for persistence
+        ev_config = EvidenceRankConfig(max_refs=max_evidence)
+        supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, ev_config)
+
+        supporting = list(dict.fromkeys(r.document_id for r in supporting_ranked))
+        opposing = list(dict.fromkeys(r.document_id for r in opposing_ranked))
+
+        catalysts, risks = extract_catalysts_and_risks(impacts, signals)
+
+        # Trend strength: absolute value of weighted sentiment, clamped to [0, 1]
+        strength = round(min(abs(avg_sentiment), 1.0), 4)
+
+        summary = TrendSummary(
+            entity_type="company",
+            entity_id=ticker,
+            window=TrendWindow(window),
+            trend_direction=direction,
+            trend_strength=strength,
+            confidence=confidence,
+            top_supporting_evidence=supporting,
+            top_opposing_evidence=opposing,
+            dominant_catalysts=catalysts,
+            material_risks=risks,
+            contradiction_score=contradiction,
+            disagreement_details=contradiction_result.details,
+            market_context=market_ctx,
+            generated_at=reference_time,
+        )
+
+        return AssembledTrend(
+            summary=summary,
+            supporting_evidence=supporting_ranked,
+            opposing_evidence=opposing_ranked,
+        )
+
+    # --- Probabilistic mode (Req 8.1–8.5, 9.1–9.6) ---
+
+    # Default to uncertainty regime when not provided (Req 7.9)
+    if regime is None:
+        regime = RegimeClassification(
+            regime=MarketRegime.UNCERTAINTY,
+            trend_indicator=0.0,
+            volatility_ratio=1.0,
+            bullish_threshold=0.15,
+            bearish_threshold=-0.15,
+            contradiction_penalty_multiplier=0.6,
+        )
+
+    # Compute Bayesian posterior from merged signals (Req 1.1, 1.2)
+    posterior: BayesianPosterior = compute_bayesian_posterior(signals)
+
+    # --- Bayesian confidence formula (Req 8.1–8.4) ---
+    # confidence = 0.5 × C_bayesian + 0.25 × F_count + 0.25 × C_avg_credibility - P_contradiction
+    active = [s for s in signals if s.weight.combined > 0]
+    unique_sources = len({s.document_id for s in active if s.document_id}) if active else 0
+    f_count = min(unique_sources / 15.0, 0.8)
+
+    avg_credibility = (
+        sum(s.weight.credibility for s in active) / len(active) if active else 0.0
+    )
+
+    # Contradiction penalty uses regime-adjusted multiplier (Req 7.7)
+    contradiction_penalty = contradiction * regime.contradiction_penalty_multiplier
+
+    confidence = (
+        0.5 * posterior.bayesian_confidence
+        + 0.25 * f_count
+        + 0.25 * avg_credibility
+        - contradiction_penalty
+    )
+    confidence = round(max(0.0, min(1.0, confidence)), 4)
+
+    # --- Entropy-based direction (Req 9.1–9.5) ---
+    # Fixed P_bull thresholds for direction: 0.65 / 0.35
+    if posterior.entropy > 0.9:
+        direction = TrendDirection.MIXED
+    elif posterior.p_bull > 0.65:
+        direction = TrendDirection.BULLISH
+    elif posterior.p_bull < 0.35:
+        direction = TrendDirection.BEARISH
+    else:
+        direction = TrendDirection.NEUTRAL

    # Get detailed evidence rankings for persistence
-    config = EvidenceRankConfig(max_refs=max_evidence)
-    supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, config)
+    ev_config = EvidenceRankConfig(max_refs=max_evidence)
+    supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, ev_config)

    supporting = list(dict.fromkeys(r.document_id for r in supporting_ranked))
    opposing = list(dict.fromkeys(r.document_id for r in opposing_ranked))
@@ -695,6 +897,30 @@ def assemble_trend_with_evidence(
    # Trend strength: absolute value of weighted sentiment, clamped to [0, 1]
    strength = round(min(abs(avg_sentiment), 1.0), 4)

+    # Build probabilistic JSONB data for market_context storage
+    probabilistic_data = {
+        "p_bull": round(posterior.p_bull, 6),
+        "alpha": round(posterior.alpha, 4),
+        "beta": round(posterior.beta, 4),
+        "log_likelihood": round(posterior.log_likelihood, 6),
+        "bayesian_confidence": round(posterior.bayesian_confidence, 6),
+        "entropy": round(posterior.entropy, 6),
+        "regime": regime.regime.value,
+        "regime_volatility_ratio": round(regime.volatility_ratio, 4),
+        "pipeline_mode": "probabilistic",
+        "contradiction_entropy": round(contradiction, 4),
+    }
+
+    # Enrich market_context with probabilistic outputs
+    if market_ctx is not None and hasattr(market_ctx, "model_dump"):
+        enriched_ctx_data = market_ctx.model_dump()
+        enriched_ctx_data["probabilistic"] = probabilistic_data
+        enriched_market_ctx = enriched_ctx_data
+    elif isinstance(market_ctx, dict):
+        enriched_market_ctx = {**market_ctx, "probabilistic": probabilistic_data}
+    else:
+        enriched_market_ctx = {"probabilistic": probabilistic_data}
+
    summary = TrendSummary(
        entity_type="company",
        entity_id=ticker,
@@ -708,8 +934,16 @@ def assemble_trend_with_evidence(
        material_risks=risks,
        contradiction_score=contradiction,
        disagreement_details=contradiction_result.details,
-        market_context=market_ctx,
+        market_context=enriched_market_ctx,
        generated_at=reference_time,
+        # Probabilistic fields (Req 9.6, 16.1)
+        p_bull=round(posterior.p_bull, 6),
+        alpha=round(posterior.alpha, 4),
+        beta_param=round(posterior.beta, 4),
+        bayesian_confidence=round(posterior.bayesian_confidence, 6),
+        entropy=round(posterior.entropy, 6),
+        regime=regime.regime.value,
+        pipeline_mode="probabilistic",
    )

    return AssembledTrend(
@@ -782,7 +1016,12 @@ async def persist_trend_summary(
        json.dumps(summary.material_risks),
        summary.contradiction_score,
        json.dumps([d.model_dump() for d in summary.disagreement_details]),
-        json.dumps(summary.market_context.model_dump() if summary.market_context else {}, default=str),
+        json.dumps(
+            summary.market_context.model_dump()
+            if hasattr(summary.market_context, "model_dump")
+            else (summary.market_context if summary.market_context else {}),
+            default=str,
+        ),
        summary.generated_at,
    )
    trend_id = str(row["id"])
@@ -933,6 +1172,131 @@ async def _build_macro_event_infos(
    return infos


+# ---------------------------------------------------------------------------
+# Regime detection helper (Req 7.1, 7.2, 7.3, 7.8, 7.9)
+# ---------------------------------------------------------------------------
+
+_CLOSING_PRICES_QUERY = """
+SELECT close
+FROM market_data_daily
+WHERE ticker = $1
+ORDER BY bar_date DESC
+LIMIT 120
+"""
+
+_DAILY_RETURNS_QUERY = """
+SELECT (close - LAG(close) OVER (ORDER BY bar_date)) / NULLIF(LAG(close) OVER (ORDER BY bar_date), 0) AS daily_return
+FROM market_data_daily
+WHERE ticker = $1
+ORDER BY bar_date DESC
+LIMIT 120
+"""
+
+_DAILY_VOLUMES_QUERY = """
+SELECT volume
+FROM market_data_daily
+WHERE ticker = $1
+ORDER BY bar_date DESC
+LIMIT 30
+"""
+
+# Default uncertainty regime used when market data is unavailable
+_DEFAULT_UNCERTAINTY_REGIME = RegimeClassification(
+    regime=MarketRegime.UNCERTAINTY,
+    trend_indicator=0.0,
+    volatility_ratio=1.0,
+    bullish_threshold=0.15,
+    bearish_threshold=-0.15,
+    contradiction_penalty_multiplier=0.6,
+)
+
+
+async def _classify_ticker_regime(
+    pool: asyncpg.Pool,
+    ticker: str,
+) -> RegimeClassification:
+    """Classify market regime for a ticker from historical price data.
+
+    Fetches closing prices and daily returns, then delegates to
+    ``classify_regime``.  Falls back to the uncertainty regime when
+    market data is unavailable or insufficient.
+
+    Requirements: 7.1, 7.2, 7.3, 7.8, 7.9
+    """
+    try:
+        price_rows = await pool.fetch(_CLOSING_PRICES_QUERY, ticker)
+        if not price_rows:
+            logger.info(
+                "No market data for %s — defaulting to uncertainty regime",
+                ticker,
+            )
+            return _DEFAULT_UNCERTAINTY_REGIME
+
+        # Prices come in DESC order; reverse to chronological
+        closing_prices = [float(r["close"]) for r in reversed(price_rows) if r["close"] is not None]
+
+        return_rows = await pool.fetch(_DAILY_RETURNS_QUERY, ticker)
+        # Returns come in DESC order; reverse to chronological, skip NULLs
+        returns = [
+            float(r["daily_return"])
+            for r in reversed(return_rows)
+            if r["daily_return"] is not None
+        ]
+
+        if not closing_prices or not returns:
+            logger.info(
+                "Insufficient market data for %s — defaulting to uncertainty regime",
+                ticker,
+            )
+            return _DEFAULT_UNCERTAINTY_REGIME
+
+        return classify_regime(closing_prices, returns)
+
+    except Exception:
+        logger.warning(
+            "Failed to classify regime for %s — defaulting to uncertainty regime",
+            ticker,
+            exc_info=True,
+        )
+        return _DEFAULT_UNCERTAINTY_REGIME
+
+
+async def _fetch_ticker_market_data(
+    pool: asyncpg.Pool,
+    ticker: str,
+) -> tuple[list[float] | None, list[float] | None]:
+    """Fetch recent daily returns and volumes for regime multiplier scoring.
+
+    Returns (returns, volumes) where each is a chronological list or None
+    if data is unavailable.  Used by the probabilistic scoring pipeline
+    to compute regime multiplier M_regime in ``compute_signal_weight``.
+    """
+    try:
+        return_rows = await pool.fetch(_DAILY_RETURNS_QUERY, ticker)
+        returns = [
+            float(r["daily_return"])
+            for r in reversed(return_rows)
+            if r["daily_return"] is not None
+        ] if return_rows else None
+
+        volume_rows = await pool.fetch(_DAILY_VOLUMES_QUERY, ticker)
+        volumes = [
+            float(r["volume"])
+            for r in reversed(volume_rows)
+            if r["volume"] is not None
+        ] if volume_rows else None
+
+        return returns or None, volumes or None
+    except Exception:
+        logger.warning(
+            "Failed to fetch market data for %s scoring — "
+            "regime multiplier will default to 1.0",
+            ticker,
+            exc_info=True,
+        )
+        return None, None
+
+
 # ---------------------------------------------------------------------------
 # Main aggregation entry point for a single ticker + window
 # ---------------------------------------------------------------------------
@@ -944,6 +1308,12 @@ async def aggregate_company_window(
    window: str,
    reference_time: datetime | None = None,
    config: AggregationConfig | None = None,
+    *,
+    probabilistic: bool = False,
+    regime: RegimeClassification | None = None,
+    source_accuracy_map: dict[str, float] | None = None,
+    ticker_returns: list[float] | None = None,
+    ticker_volumes: list[float] | None = None,
 ) -> TrendSummary:
    """Compute and persist a trend summary for one ticker and one window.

@@ -954,14 +1324,47 @@ async def aggregate_company_window(
    4. Build weighted signals using the scoring module.
    5. Check macro toggle and fetch/merge macro signals if enabled.
    6. Check competitive toggle and fetch/merge pattern/competitive signals if enabled.
-    7. Assemble the TrendSummary.
+    7. Assemble the TrendSummary (probabilistic or heuristic).
    8. Persist to trend_windows table.

+    When ``probabilistic`` is True, the scoring config is set to
+    probabilistic mode, source accuracy factors are passed to signal
+    scoring, and macro integration uses the conditional modifier.
+
    Returns the assembled TrendSummary.
    """
    cfg = config or AggregationConfig()
    scoring_cfg = cfg.effective_scoring()

+    # When probabilistic mode is active, create a scoring config with
+    # probabilistic=True so all downstream scoring uses the new formulas.
+    if probabilistic and not scoring_cfg.probabilistic:
+        scoring_cfg = ScoringConfig(
+            half_life_hours=scoring_cfg.half_life_hours,
+            min_recency_weight=scoring_cfg.min_recency_weight,
+            credibility_floor=scoring_cfg.credibility_floor,
+            credibility_ceiling=scoring_cfg.credibility_ceiling,
+            credibility_exponent=scoring_cfg.credibility_exponent,
+            novelty_bonus_max=scoring_cfg.novelty_bonus_max,
+            confidence_floor=scoring_cfg.confidence_floor,
+            volatility_recency_boost_threshold=scoring_cfg.volatility_recency_boost_threshold,
+            volatility_recency_boost_max=scoring_cfg.volatility_recency_boost_max,
+            volume_surge_threshold_pct=scoring_cfg.volume_surge_threshold_pct,
+            volume_surge_boost=scoring_cfg.volume_surge_boost,
+            probabilistic=True,
+            sigmoid_steepness=scoring_cfg.sigmoid_steepness,
+            sigmoid_midpoint=scoring_cfg.sigmoid_midpoint,
+            info_gain_lambda=scoring_cfg.info_gain_lambda,
+            info_gain_max=scoring_cfg.info_gain_max,
+            default_base_rate=scoring_cfg.default_base_rate,
+            adaptive_decay_impact_scale=scoring_cfg.adaptive_decay_impact_scale,
+            adaptive_decay_surprise_scale=scoring_cfg.adaptive_decay_surprise_scale,
+            adaptive_decay_market_scale=scoring_cfg.adaptive_decay_market_scale,
+            regime_return_weight=scoring_cfg.regime_return_weight,
+            regime_volume_weight=scoring_cfg.regime_volume_weight,
+            regime_multiplier_max=scoring_cfg.regime_multiplier_max,
+        )
+
    if reference_time is None:
        reference_time = datetime.now(timezone.utc)

@@ -975,9 +1378,13 @@ async def aggregate_company_window(
    # 2. Fetch market context
    market_ctx = await fetch_market_context(pool, ticker, window, reference_time)

-    # 3. Build weighted signals
+    # 3. Build weighted signals — pass source accuracy and market data
+    #    when in probabilistic mode (Req 4.1–4.3, 6.1–6.5)
    signals = build_weighted_signals(
        impacts, reference_time, window, market_ctx, scoring_cfg,
+        source_accuracy_map=source_accuracy_map if probabilistic else None,
+        returns=ticker_returns if probabilistic else None,
+        volumes=ticker_volumes if probabilistic else None,
    )

    # 4. Check macro toggle and merge macro signals
@@ -991,6 +1398,7 @@ async def aggregate_company_window(
    if db_toggle is not None:
        macro_enabled = db_toggle

+    macro_modifier = 1.0
    if macro_enabled:
        macro_impacts = await fetch_macro_impact_records(
            pool, ticker, window_start, reference_time,
@@ -1002,11 +1410,31 @@ async def aggregate_company_window(
                window,
                macro_signal_weight=cfg.macro_signal_weight,
                config=scoring_cfg,
+                returns=ticker_returns if probabilistic else None,
+                volumes=ticker_volumes if probabilistic else None,
            )
-            signals = signals + macro_signals
+
+            if probabilistic:
+                # Probabilistic mode: use conditional macro modifier (Req 11.1–11.5)
+                company_direction = derive_trend_direction(
+                    weighted_sentiment_average(signals),
+                ).value
+                signals, macro_modifier = integrate_macro_signals(
+                    company_signals=signals,
+                    macro_signals=macro_signals,
+                    company_direction=company_direction,
+                    macro_impacts=macro_impacts,
+                    ticker=ticker,
+                    probabilistic=True,
+                    macro_signal_weight=cfg.macro_signal_weight,
+                )
+            else:
+                # Heuristic mode: simple additive merge (current behavior)
+                signals = signals + macro_signals
+
            logger.info(
-                "Merged %d macro signals for %s/%s",
-                len(macro_signals), ticker, window,
+                "Merged %d macro signals for %s/%s (modifier=%.4f)",
+                len(macro_signals), ticker, window, macro_modifier,
            )

    # 5. Check competitive toggle and merge pattern/competitive signals
@@ -1065,9 +1493,17 @@ async def aggregate_company_window(
        market_ctx=market_ctx if market_ctx.has_data else None,
        max_evidence=cfg.max_evidence,
        reference_time=reference_time,
+        probabilistic=probabilistic,
+        regime=regime,
    )
    summary = assembled.summary

+    # 6b. Enrich probabilistic JSONB with macro modifier (Req 16.2)
+    if probabilistic and macro_modifier != 1.0:
+        ctx = summary.market_context
+        if isinstance(ctx, dict) and "probabilistic" in ctx:
+            ctx["probabilistic"]["macro_modifier"] = round(macro_modifier, 4)
+
    # 7. Persist trend window
    trend_id = await persist_trend_summary(pool, summary)

@@ -1136,10 +1572,80 @@ async def aggregate_company(
    if reference_time is None:
        reference_time = datetime.now(timezone.utc)

+    # Read probabilistic scoring flag once per cycle (Requirement 16.7).
+    # Mid-cycle changes take effect on the next cycle.
+    probabilistic = await fetch_probabilistic_scoring_enabled(pool)
+    pipeline_mode = "probabilistic" if probabilistic else "heuristic"
+    logger.info(
+        "Aggregation cycle for %s: pipeline_mode=%s",
+        ticker,
+        pipeline_mode,
+    )
+
+    # --- Regime detection (Req 7.1, 7.2, 7.3, 7.8, 7.9) ---
+    # Classify market regime for this ticker using closing prices and returns.
+    # Default to uncertainty regime when market data is unavailable.
+    regime: RegimeClassification | None = None
+    ticker_returns: list[float] | None = None
+    ticker_volumes: list[float] | None = None
+    source_accuracy_map: dict[str, float] | None = None
+
+    if probabilistic:
+        regime = await _classify_ticker_regime(pool, ticker)
+        logger.info(
+            "Regime for %s: %s (trend_indicator=%.1f, vol_ratio=%.2f, "
+            "bullish_threshold=%.2f, contradiction_mult=%.1f)",
+            ticker,
+            regime.regime.value,
+            regime.trend_indicator,
+            regime.volatility_ratio,
+            regime.bullish_threshold,
+            regime.contradiction_penalty_multiplier,
+        )
+
+        # Fetch market data (returns/volumes) for regime multiplier in scoring
+        # (Req 6.1–6.5). Fetched once per cycle and reused across all windows.
+        ticker_returns, ticker_volumes = await _fetch_ticker_market_data(pool, ticker)
+
+        # Batch-fetch source accuracy for all sources in the signal set
+        # (Req 4.1–4.3). Fetched once per cycle; individual signals look up
+        # their factor from this map. DB errors default to empty map (factor 1.0).
+        try:
+            # Fetch all source IDs from the longest window to cover all signals
+            longest_window = max(
+                cfg.effective_windows(),
+                key=lambda w: WINDOW_DURATIONS.get(w, timedelta(days=7)),
+            )
+            longest_duration = WINDOW_DURATIONS.get(longest_window, timedelta(days=90))
+            window_start = reference_time - longest_duration
+            all_impacts = await fetch_impact_records(pool, ticker, window_start, reference_time)
+            source_ids = list({imp.document_id for imp in all_impacts})
+            if source_ids:
+                sa_records = await fetch_source_accuracy(pool, source_ids)
+                source_accuracy_map = {
+                    sid: sa.accuracy_factor for sid, sa in sa_records.items()
+                }
+                logger.info(
+                    "Fetched source accuracy for %s: %d/%d sources have records",
+                    ticker, len(sa_records), len(source_ids),
+                )
+        except Exception:
+            logger.warning(
+                "Failed to fetch source accuracy for %s — defaulting to neutral factor",
+                ticker,
+                exc_info=True,
+            )
+            source_accuracy_map = None
+
    summaries: list[TrendSummary] = []
    for window in cfg.effective_windows():
        summary = await aggregate_company_window(
            pool, ticker, window, reference_time, cfg,
+            probabilistic=probabilistic,
+            regime=regime,
+            source_accuracy_map=source_accuracy_map,
+            ticker_returns=ticker_returns,
+            ticker_volumes=ticker_volumes,
        )
        summaries.append(summary)