feat: competitive intelligence & historical pattern matching layer

2026-04-14 19:42:48 +00:00
parent b478022ba3
commit f7a11d14ea
203 changed files with 20155 additions and 97 deletions
@@ -40,6 +40,17 @@ from services.shared.metrics import (
    AGGREGATION_SIGNALS_PROCESSED,
    AGGREGATION_WINDOWS_COMPUTED,
 )
+from services.aggregation.pattern_matcher import find_self_patterns
+from services.aggregation.projection import (
+    MacroEventInfo,
+    TrendProjection,
+    compute_projection,
+    persist_trend_projection,
+)
+from services.aggregation.signal_propagation import (
+    CompetitiveSignalRecord,
+    build_pattern_weighted_signals,
+)
 from services.shared.schemas import TrendDirection, TrendSummary, TrendWindow

 logger = logging.getLogger(__name__)
@@ -64,6 +75,10 @@ class AggregationConfig:
    windows: list[str] | None = None  # None = all windows
    scoring: ScoringConfig | None = None
    max_evidence: int = MAX_EVIDENCE_REFS
+    macro_signal_weight: float = 0.3  # relative weight of macro vs company signals
+    macro_enabled: bool = True  # runtime toggle state
+    competitive_signal_weight: float = 0.2  # relative weight of pattern signals
+    competitive_enabled: bool = True  # runtime toggle state

    def effective_windows(self) -> list[str]:
        if self.windows:
@@ -154,6 +169,236 @@ async def fetch_impact_records(



+# ---------------------------------------------------------------------------
+# Fetch macro toggle state from risk_configs
+#
+# MACRO LAYER TOGGLE BEHAVIOR (Requirements 11.2, 11.3, 11.4):
+# - The toggle state is read fresh from PostgreSQL at the start of each
+#   aggregation cycle (no caching), so changes take effect immediately on
+#   the next cycle.
+# - When disabled: ingestion and classification continue normally (historical
+#   data is preserved), but interpolation and aggregation integration are
+#   skipped — the aggregation engine produces trends using only company-
+#   specific signals.
+# - When re-enabled: the engine resumes computing macro impact scores using
+#   the most recent GlobalEvent classifications, including any events that
+#   were ingested and classified while the layer was disabled.
+# ---------------------------------------------------------------------------
+
+_MACRO_TOGGLE_QUERY = """
+SELECT config->>'macro_enabled' AS macro_enabled
+FROM risk_configs
+WHERE active = TRUE
+ORDER BY updated_at DESC
+LIMIT 1
+"""
+
+
+async def fetch_macro_enabled(pool: asyncpg.Pool) -> bool | None:
+    """Check macro toggle state from risk_configs table.
+
+    Returns True/False if explicitly set, or None if no config exists
+    (caller should fall back to AggregationConfig default).
+    """
+    row = await pool.fetchrow(_MACRO_TOGGLE_QUERY)
+    if row is None or row["macro_enabled"] is None:
+        return None
+    return row["macro_enabled"].lower() == "true"
+
+
+# ---------------------------------------------------------------------------
+# Fetch competitive toggle state from risk_configs
+# ---------------------------------------------------------------------------
+
+_COMPETITIVE_TOGGLE_QUERY = """
+SELECT config->>'competitive_enabled' AS competitive_enabled
+FROM risk_configs
+WHERE active = TRUE
+ORDER BY updated_at DESC
+LIMIT 1
+"""
+
+
+async def fetch_competitive_enabled(pool: asyncpg.Pool) -> bool | None:
+    """Check competitive toggle state from risk_configs table.
+
+    Returns True/False if explicitly set, or None if no config exists
+    (caller should fall back to AggregationConfig default).
+    """
+    row = await pool.fetchrow(_COMPETITIVE_TOGGLE_QUERY)
+    if row is None or row["competitive_enabled"] is None:
+        return None
+    return row["competitive_enabled"].lower() == "true"
+
+
+# ---------------------------------------------------------------------------
+# Fetch competitive signals targeting a ticker within a time window
+# ---------------------------------------------------------------------------
+
+_COMPETITIVE_SIGNALS_QUERY = """
+SELECT source_document_id, source_ticker, target_ticker, catalyst_type,
+       pattern_confidence, signal_direction, signal_strength,
+       relationship_strength, computed_at
+FROM competitive_signal_records
+WHERE target_ticker = $1
+  AND computed_at >= $2
+  AND computed_at <= $3
+ORDER BY computed_at DESC
+"""
+
+
+async def fetch_competitive_signals(
+    pool: asyncpg.Pool,
+    ticker: str,
+    window_start: datetime,
+    window_end: datetime,
+) -> list[CompetitiveSignalRecord]:
+    """Fetch competitive signal records targeting a ticker in a time range."""
+    rows = await pool.fetch(
+        _COMPETITIVE_SIGNALS_QUERY, ticker, window_start, window_end,
+    )
+    return [
+        CompetitiveSignalRecord(
+            source_document_id=str(row["source_document_id"]),
+            source_ticker=row["source_ticker"],
+            target_ticker=row["target_ticker"],
+            catalyst_type=row["catalyst_type"],
+            pattern_confidence=float(row["pattern_confidence"]),
+            signal_direction=row["signal_direction"],
+            signal_strength=float(row["signal_strength"]),
+            relationship_strength=float(row["relationship_strength"]),
+            computed_at=row["computed_at"],
+        )
+        for row in rows
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Fetch macro impact records for a ticker within a time window
+# ---------------------------------------------------------------------------
+
+_MACRO_IMPACT_QUERY = """
+SELECT
+    mir.event_id,
+    mir.company_id,
+    mir.ticker,
+    mir.macro_impact_score,
+    mir.impact_direction,
+    mir.contributing_factors,
+    mir.confidence,
+    mir.computed_at,
+    ge.source_document_id,
+    d.published_at AS event_published_at
+FROM macro_impact_records mir
+JOIN global_events ge ON ge.id = mir.event_id
+JOIN documents d ON d.id = ge.source_document_id
+WHERE mir.ticker = $1
+  AND mir.computed_at >= $2
+  AND mir.computed_at <= $3
+ORDER BY mir.computed_at DESC
+"""
+
+
+@dataclass
+class MacroImpactRow:
+    """Parsed row from the macro impact query."""
+
+    event_id: str
+    company_id: str
+    ticker: str
+    macro_impact_score: float
+    impact_direction: str
+    contributing_factors: list[str]
+    confidence: float
+    computed_at: datetime
+    source_document_id: str
+    event_published_at: datetime
+
+
+def _parse_macro_impact_row(row: Any) -> MacroImpactRow:
+    """Convert an asyncpg Record to a MacroImpactRow."""
+    factors = row["contributing_factors"]
+    if isinstance(factors, str):
+        factors = json.loads(factors)
+
+    return MacroImpactRow(
+        event_id=str(row["event_id"]),
+        company_id=str(row["company_id"]),
+        ticker=row["ticker"],
+        macro_impact_score=float(row["macro_impact_score"] or 0.0),
+        impact_direction=row["impact_direction"] or "neutral",
+        contributing_factors=factors if isinstance(factors, list) else [],
+        confidence=float(row["confidence"] or 0.5),
+        computed_at=row["computed_at"],
+        source_document_id=str(row["source_document_id"]),
+        event_published_at=row["event_published_at"],
+    )
+
+
+async def fetch_macro_impact_records(
+    pool: asyncpg.Pool,
+    ticker: str,
+    window_start: datetime,
+    window_end: datetime,
+) -> list[MacroImpactRow]:
+    """Fetch macro impact records for a ticker in a time range."""
+    rows = await pool.fetch(_MACRO_IMPACT_QUERY, ticker, window_start, window_end)
+    return [_parse_macro_impact_row(r) for r in rows]
+
+
+# ---------------------------------------------------------------------------
+# Convert macro impact records to WeightedSignals
+# ---------------------------------------------------------------------------
+
+_DIRECTION_TO_SENTIMENT: dict[str, float] = {
+    "positive": 1.0,
+    "negative": -1.0,
+    "mixed": 0.0,
+    "neutral": 0.0,
+}
+
+
+def build_macro_weighted_signals(
+    macro_impacts: list[MacroImpactRow],
+    reference_time: datetime,
+    window: str,
+    macro_signal_weight: float = 0.3,
+    config: ScoringConfig | None = None,
+) -> list[WeightedSignal]:
+    """Convert macro impact records into WeightedSignal objects.
+
+    Uses the same scoring pipeline as company signals:
+    - document_id = source_document_id (for evidence tracing)
+    - sentiment_value mapped from impact_direction
+    - impact_score = macro_impact_score * macro_signal_weight
+    - recency decay from the global event's publication time
+    - confidence gating from the macro record's confidence
+    """
+    cfg = config or ScoringConfig()
+    signals: list[WeightedSignal] = []
+    for mir in macro_impacts:
+        sw = compute_signal_weight(
+            published_at=mir.event_published_at,
+            reference_time=reference_time,
+            window=window,
+            source_credibility=mir.confidence,
+            novelty_score=0.5,
+            extraction_confidence=mir.confidence,
+            config=cfg,
+        )
+        sentiment = _DIRECTION_TO_SENTIMENT.get(mir.impact_direction, 0.0)
+        impact = mir.macro_impact_score * macro_signal_weight
+        signals.append(
+            WeightedSignal(
+                document_id=mir.source_document_id,
+                weight=sw,
+                sentiment_value=sentiment,
+                impact_score=impact,
+            )
+        )
+    return signals
+
+
 # ---------------------------------------------------------------------------
 # Build weighted signals from impact records
 # ---------------------------------------------------------------------------
@@ -544,6 +789,61 @@ async def persist_trend_evidence(
    return len(rows)


+# ---------------------------------------------------------------------------
+# Build MacroEventInfo objects for projection computation
+# ---------------------------------------------------------------------------
+
+_MACRO_EVENT_INFO_QUERY = """
+SELECT
+    mir.event_id,
+    mir.macro_impact_score,
+    mir.impact_direction,
+    mir.confidence,
+    ge.estimated_duration,
+    ge.severity,
+    d.published_at AS event_published_at
+FROM macro_impact_records mir
+JOIN global_events ge ON ge.id = mir.event_id
+JOIN documents d ON d.id = ge.source_document_id
+WHERE mir.ticker = $1
+  AND mir.computed_at >= $2
+  AND mir.computed_at <= $3
+ORDER BY mir.computed_at DESC
+"""
+
+
+async def _build_macro_event_infos(
+    pool: asyncpg.Pool,
+    ticker: str,
+    window_start: datetime,
+    reference_time: datetime,
+) -> list[MacroEventInfo]:
+    """Fetch macro impact records and build MacroEventInfo objects for projection."""
+    rows = await pool.fetch(
+        _MACRO_EVENT_INFO_QUERY, ticker, window_start, reference_time,
+    )
+    infos: list[MacroEventInfo] = []
+    for row in rows:
+        published_at = row["event_published_at"]
+        age_hours = 0.0
+        if published_at:
+            age_hours = max(
+                (reference_time - published_at).total_seconds() / 3600.0, 0.0,
+            )
+        infos.append(
+            MacroEventInfo(
+                event_id=str(row["event_id"]),
+                macro_impact_score=float(row["macro_impact_score"] or 0.0),
+                impact_direction=row["impact_direction"] or "neutral",
+                confidence=float(row["confidence"] or 0.5),
+                estimated_duration=row["estimated_duration"] or "short_term",
+                severity=row["severity"] or "low",
+                event_age_hours=age_hours,
+            )
+        )
+    return infos
+
+
 # ---------------------------------------------------------------------------
 # Main aggregation entry point for a single ticker + window
 # ---------------------------------------------------------------------------
@@ -563,8 +863,10 @@ async def aggregate_company_window(
    2. Fetch document impact records from PostgreSQL.
    3. Fetch market context for the ticker.
    4. Build weighted signals using the scoring module.
-    5. Assemble the TrendSummary.
-    6. Persist to trend_windows table.
+    5. Check macro toggle and fetch/merge macro signals if enabled.
+    6. Check competitive toggle and fetch/merge pattern/competitive signals if enabled.
+    7. Assemble the TrendSummary.
+    8. Persist to trend_windows table.

    Returns the assembled TrendSummary.
    """
@@ -589,7 +891,83 @@ async def aggregate_company_window(
        impacts, reference_time, window, market_ctx, scoring_cfg,
    )

-    # 4. Assemble trend summary with evidence details
+    # 4. Check macro toggle and merge macro signals
+    # (Requirement 11.2, 11.3, 11.4): Toggle state is read from the DB on
+    # every aggregation cycle. When disabled, macro signals are skipped but
+    # ingestion/classification continue independently — so when re-enabled,
+    # the most recent classifications (including those ingested while disabled)
+    # are immediately available for impact computation.
+    macro_enabled = cfg.macro_enabled
+    db_toggle = await fetch_macro_enabled(pool)
+    if db_toggle is not None:
+        macro_enabled = db_toggle
+
+    if macro_enabled:
+        macro_impacts = await fetch_macro_impact_records(
+            pool, ticker, window_start, reference_time,
+        )
+        if macro_impacts:
+            macro_signals = build_macro_weighted_signals(
+                macro_impacts,
+                reference_time,
+                window,
+                macro_signal_weight=cfg.macro_signal_weight,
+                config=scoring_cfg,
+            )
+            signals = signals + macro_signals
+            logger.info(
+                "Merged %d macro signals for %s/%s",
+                len(macro_signals), ticker, window,
+            )
+
+    # 5. Check competitive toggle and merge pattern/competitive signals
+    # (Requirements 5.1-5.6): Same toggle pattern as macro layer. When
+    # disabled, pattern mining remains queryable but aggregation skips
+    # competitive signals — no degradation of existing behavior.
+    competitive_enabled = cfg.competitive_enabled
+    db_competitive_toggle = await fetch_competitive_enabled(pool)
+    if db_competitive_toggle is not None:
+        competitive_enabled = db_competitive_toggle
+
+    if competitive_enabled:
+        try:
+            # Get unique catalyst types from the impact records
+            catalyst_types = {imp.catalyst_type for imp in impacts}
+
+            # Query self-company historical patterns for each catalyst type
+            all_patterns = []
+            for cat_type in catalyst_types:
+                patterns = await find_self_patterns(pool, ticker, cat_type)
+                all_patterns.extend(patterns)
+
+            # Fetch competitive signals targeting this ticker
+            comp_signals = await fetch_competitive_signals(
+                pool, ticker, window_start, reference_time,
+            )
+
+            # Convert to WeightedSignal objects
+            if all_patterns or comp_signals:
+                pattern_weighted = build_pattern_weighted_signals(
+                    patterns=all_patterns,
+                    competitive_signals=comp_signals,
+                    reference_time=reference_time,
+                    window=window,
+                )
+                signals = signals + pattern_weighted
+                logger.info(
+                    "Merged %d pattern/competitive signals for %s/%s "
+                    "(patterns=%d, competitive=%d)",
+                    len(pattern_weighted), ticker, window,
+                    len(all_patterns), len(comp_signals),
+                )
+        except Exception:
+            logger.exception(
+                "Failed to fetch pattern/competitive signals for %s/%s — "
+                "continuing with company+macro signals only",
+                ticker, window,
+            )
+
+    # 6. Assemble trend summary with evidence details
    assembled = assemble_trend_with_evidence(
        ticker=ticker,
        window=window,
@@ -601,10 +979,10 @@ async def aggregate_company_window(
    )
    summary = assembled.summary

-    # 5. Persist trend window
+    # 7. Persist trend window
    trend_id = await persist_trend_summary(pool, summary)

-    # 6. Persist evidence mappings
+    # 8. Persist evidence mappings
    evidence_count = await persist_trend_evidence(
        pool, trend_id,
        assembled.supporting_evidence,
@@ -617,6 +995,33 @@ async def aggregate_company_window(
        summary.trend_strength, summary.confidence, len(signals), evidence_count,
    )

+    # 9. Compute and persist trend projection
+    try:
+        macro_event_infos: list[MacroEventInfo] = []
+        if macro_enabled:
+            macro_event_infos = await _build_macro_event_infos(
+                pool, ticker, window_start, reference_time,
+            )
+
+        projection = compute_projection(
+            summary=summary,
+            macro_events=macro_event_infos if macro_event_infos else None,
+            macro_enabled=macro_enabled,
+            upcoming_catalysts=summary.dominant_catalysts[:3] if summary.dominant_catalysts else None,
+        )
+        await persist_trend_projection(pool, trend_id, projection)
+        logger.info(
+            "Persisted projection for %s/%s: direction=%s strength=%.3f confidence=%.3f diverges=%s",
+            ticker, window, projection.projected_direction,
+            projection.projected_strength, projection.projected_confidence,
+            projection.diverges_from_current,
+        )
+    except Exception:
+        logger.exception(
+            "Failed to compute/persist projection for trend %s (%s/%s) — continuing",
+            trend_id, ticker, window,
+        )
+
    # Prometheus metrics
    AGGREGATION_WINDOWS_COMPUTED.labels(window=window).inc()
    AGGREGATION_SIGNALS_PROCESSED.labels(window=window).inc(len(signals))