feat: competitive intelligence & historical pattern matching layer

2026-04-14 19:42:48 +00:00
parent b478022ba3
commit f7a11d14ea
203 changed files with 20155 additions and 97 deletions
@@ -4,13 +4,13 @@ Aggregates company-level trend summaries into sector and market-level
 summaries, enabling top-down views of sentiment and risk across the
 portfolio.

-Requirements: 6.3, 6.4, 6.5
+Requirements: 6.1, 6.2, 6.3, 6.4, 6.5
 """
 from __future__ import annotations

 import json
 import logging
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime, timedelta, timezone

 import asyncpg
@@ -42,6 +42,126 @@ class CompanyTrendRow:
    top_opposing_evidence: list[str]


+@dataclass
+class SectorMacroImpact:
+    """Aggregated macro impact data for a single sector.
+
+    Used to incorporate macro signals into sector and market rollups.
+    Requirements: 6.1, 6.2, 6.3
+    """
+
+    sector: str
+    total_impact: float  # sum of macro_impact_score across companies in sector
+    avg_impact: float  # average macro_impact_score
+    company_count: int  # number of companies affected
+    net_direction: float  # weighted direction: +1 positive, -1 negative, 0 mixed
+    event_ids: list[str] = field(default_factory=list)  # contributing event IDs
+
+
+# Threshold for disproportionate sector impact (Requirement 6.3)
+SECTOR_CONCENTRATION_THRESHOLD = 0.60
+
+
+# ---------------------------------------------------------------------------
+# Fetch sector-level macro impact aggregates
+# ---------------------------------------------------------------------------
+
+_SECTOR_MACRO_IMPACT_QUERY = """
+SELECT
+    c.sector,
+    mir.event_id,
+    mir.macro_impact_score,
+    mir.impact_direction
+FROM macro_impact_records mir
+JOIN companies c ON c.id = mir.company_id AND c.active = TRUE
+WHERE mir.computed_at >= $1
+  AND mir.computed_at <= $2
+ORDER BY c.sector, mir.macro_impact_score DESC
+"""
+
+
+async def fetch_sector_macro_impacts(
+    pool: asyncpg.Pool,
+    window_start: datetime,
+    window_end: datetime,
+) -> dict[str, SectorMacroImpact]:
+    """Fetch macro impact records aggregated by sector for a time range.
+
+    Returns a mapping of sector name to SectorMacroImpact.
+    """
+    rows = await pool.fetch(_SECTOR_MACRO_IMPACT_QUERY, window_start, window_end)
+
+    # Accumulate per-sector
+    sector_data: dict[str, dict] = {}
+    direction_map = {"positive": 1.0, "negative": -1.0, "mixed": 0.0, "neutral": 0.0}
+
+    for row in rows:
+        sector = str(row["sector"]) if row["sector"] else "Unknown"
+        score = float(row["macro_impact_score"] or 0.0)
+        direction = row["impact_direction"] or "neutral"
+        event_id = str(row["event_id"])
+
+        if sector not in sector_data:
+            sector_data[sector] = {
+                "total": 0.0,
+                "count": 0,
+                "dir_sum": 0.0,
+                "dir_count": 0,
+                "event_ids": set(),
+            }
+
+        d = sector_data[sector]
+        d["total"] += score
+        d["count"] += 1
+        dir_val = direction_map.get(direction, 0.0)
+        if dir_val != 0.0:
+            d["dir_sum"] += dir_val
+            d["dir_count"] += 1
+        d["event_ids"].add(event_id)
+
+    result: dict[str, SectorMacroImpact] = {}
+    for sector, d in sector_data.items():
+        count = d["count"]
+        avg = d["total"] / count if count > 0 else 0.0
+        net_dir = d["dir_sum"] / d["dir_count"] if d["dir_count"] > 0 else 0.0
+        result[sector] = SectorMacroImpact(
+            sector=sector,
+            total_impact=d["total"],
+            avg_impact=avg,
+            company_count=count,
+            net_direction=net_dir,
+            event_ids=sorted(d["event_ids"]),
+        )
+
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Sector macro concentration helper (Requirement 6.3)
+# ---------------------------------------------------------------------------
+
+
+def compute_sector_macro_concentration(
+    sector_impacts: dict[str, SectorMacroImpact],
+) -> list[tuple[str, float]]:
+    """Compute the fraction of total macro impact concentrated in each sector.
+
+    Returns a list of (sector, fraction) tuples sorted by fraction descending.
+    Sectors with fraction > SECTOR_CONCENTRATION_THRESHOLD are considered
+    disproportionately affected.
+    """
+    total = sum(si.total_impact for si in sector_impacts.values())
+    if total <= 0.0:
+        return []
+
+    fractions = [
+        (sector, si.total_impact / total)
+        for sector, si in sector_impacts.items()
+    ]
+    fractions.sort(key=lambda x: x[1], reverse=True)
+    return fractions
+
+
 # ---------------------------------------------------------------------------
 # Fetch latest company trends for a given window
 # ---------------------------------------------------------------------------
@@ -141,11 +261,22 @@ def rollup_trends(
    entity_id: str,
    window: str,
    reference_time: datetime,
+    macro_impacts: dict[str, SectorMacroImpact] | None = None,
 ) -> TrendSummary:
    """Aggregate a list of company-level trends into a single rollup summary.

    Each company trend is weighted by its confidence to produce a
    confidence-weighted average of direction, strength, and contradiction.
+
+    When macro_impacts is provided:
+    - For sector rollups: incorporates the sector's macro signal into
+      strength and confidence, weighted by constituent company exposure.
+    - For market rollups: aggregates macro signals across all sectors and
+      surfaces disproportionately affected sectors (>60% concentration)
+      in material_risks or dominant_catalysts.
+
+    When macro_impacts is None or empty, produces identical output to
+    the original company-only rollup.
    """
    if not trends:
        return TrendSummary(
@@ -204,16 +335,70 @@ def rollup_trends(
    avg_contradiction = weighted_contradiction / total_weight
    avg_confidence = total_weight / len(trends)

+    # --- Incorporate macro impact signals when available ---
+    macro_strength_adj = 0.0
+    macro_confidence_adj = 0.0
+    macro_catalysts: list[str] = []
+    macro_risks: list[str] = []
+
+    if macro_impacts:
+        if entity_type == "sector":
+            # Sector rollup: incorporate this sector's macro signal
+            sector_macro = macro_impacts.get(entity_id)
+            if sector_macro and sector_macro.total_impact > 0:
+                # Weight macro contribution by avg impact and company breadth
+                breadth = min(sector_macro.company_count / max(len(trends), 1), 1.0)
+                macro_strength_adj = sector_macro.avg_impact * breadth * 0.3
+                macro_confidence_adj = sector_macro.avg_impact * breadth * 0.1
+                # Nudge direction based on macro net direction
+                avg_direction += sector_macro.net_direction * macro_strength_adj * 0.5
+
+        elif entity_type == "market":
+            # Market rollup: aggregate macro signals across all sectors
+            total_macro = sum(si.total_impact for si in macro_impacts.values())
+            if total_macro > 0:
+                total_companies = sum(si.company_count for si in macro_impacts.values())
+                breadth = min(total_companies / max(len(trends), 1), 1.0)
+                avg_macro = total_macro / max(len(macro_impacts), 1)
+                macro_strength_adj = avg_macro * breadth * 0.3
+                macro_confidence_adj = avg_macro * breadth * 0.1
+
+                # Aggregate net direction across sectors
+                dir_sum = sum(
+                    si.net_direction * si.total_impact
+                    for si in macro_impacts.values()
+                )
+                net_dir = dir_sum / total_macro if total_macro > 0 else 0.0
+                avg_direction += net_dir * macro_strength_adj * 0.5
+
+                # Surface disproportionately affected sectors (Requirement 6.3)
+                concentration = compute_sector_macro_concentration(macro_impacts)
+                for sector, fraction in concentration:
+                    if fraction > SECTOR_CONCENTRATION_THRESHOLD:
+                        si = macro_impacts[sector]
+                        label = f"Macro: {sector} ({fraction:.0%} of macro impact)"
+                        if si.net_direction < 0:
+                            macro_risks.append(label)
+                        else:
+                            macro_catalysts.append(label)
+
+    # Apply macro adjustments to strength and confidence
+    adj_strength = avg_strength + macro_strength_adj
+    adj_confidence = avg_confidence + macro_confidence_adj
+
    # Derive direction
    direction = _derive_rollup_direction(avg_direction, avg_contradiction)

-    # Top catalysts
+    # Top catalysts (macro catalysts prepended when present)
    sorted_catalysts = sorted(catalyst_weights.items(), key=lambda x: x[1], reverse=True)
-    catalysts = [c for c, _ in sorted_catalysts[:5]]
+    catalysts = macro_catalysts + [c for c, _ in sorted_catalysts[:5]]
+    catalysts = catalysts[:5]

-    # Top risks (deduplicated, by weight)
+    # Top risks (macro risks prepended when present, deduplicated)
    sorted_risks = sorted(risk_set.items(), key=lambda x: x[1], reverse=True)
-    risks = [r for r, _ in sorted_risks[:5]]
+    base_risks = [r for r, _ in sorted_risks[:5]]
+    risks = macro_risks + base_risks
+    risks = risks[:5]

    # Disagreement details
    disagreement = _build_rollup_disagreement(trends, entity_id)
@@ -223,8 +408,8 @@ def rollup_trends(
        entity_id=entity_id,
        window=TrendWindow(window),
        trend_direction=direction,
-        trend_strength=round(min(abs(avg_strength), 1.0), 4),
-        confidence=round(max(0.0, min(avg_confidence, 1.0)), 4),
+        trend_strength=round(min(abs(adj_strength), 1.0), 4),
+        confidence=round(max(0.0, min(adj_confidence, 1.0)), 4),
        top_supporting_evidence=list(dict.fromkeys(all_supporting))[:10],
        top_opposing_evidence=list(dict.fromkeys(all_opposing))[:10],
        dominant_catalysts=catalysts,
@@ -341,11 +526,14 @@ async def aggregate_sector(
    window: str,
    reference_time: datetime | None = None,
    since: datetime | None = None,
+    macro_impacts: dict[str, SectorMacroImpact] | None = None,
 ) -> TrendSummary:
    """Compute and persist a sector-level rollup for one window.

    Fetches the latest company trends, filters to the given sector,
-    and rolls them up into a single sector summary.
+    and rolls them up into a single sector summary. When macro_impacts
+    is provided, incorporates macro signals weighted by constituent
+    company exposure.
    """
    if reference_time is None:
        reference_time = datetime.now(timezone.utc)
@@ -355,7 +543,14 @@ async def aggregate_sector(
    all_trends = await fetch_latest_company_trends(pool, window, since)
    sector_trends = [t for t in all_trends if t.sector == sector]

-    summary = rollup_trends(sector_trends, "sector", sector, window, reference_time)
+    # Fetch macro impacts if not provided
+    if macro_impacts is None:
+        macro_impacts = await fetch_sector_macro_impacts(pool, since, reference_time)
+
+    summary = rollup_trends(
+        sector_trends, "sector", sector, window, reference_time,
+        macro_impacts=macro_impacts,
+    )

    if sector_trends:
        rollup_id = await persist_rollup(pool, summary)
@@ -373,10 +568,13 @@ async def aggregate_market(
    window: str,
    reference_time: datetime | None = None,
    since: datetime | None = None,
+    macro_impacts: dict[str, SectorMacroImpact] | None = None,
 ) -> TrendSummary:
    """Compute and persist a market-wide rollup for one window.

-    Aggregates all company trends regardless of sector.
+    Aggregates all company trends regardless of sector. When macro_impacts
+    is provided, aggregates macro signals across all sectors and surfaces
+    disproportionately affected sectors in material_risks or dominant_catalysts.
    """
    if reference_time is None:
        reference_time = datetime.now(timezone.utc)
@@ -385,7 +583,14 @@ async def aggregate_market(

    all_trends = await fetch_latest_company_trends(pool, window, since)

-    summary = rollup_trends(all_trends, "market", "all", window, reference_time)
+    # Fetch macro impacts if not provided
+    if macro_impacts is None:
+        macro_impacts = await fetch_sector_macro_impacts(pool, since, reference_time)
+
+    summary = rollup_trends(
+        all_trends, "market", "all", window, reference_time,
+        macro_impacts=macro_impacts,
+    )

    if all_trends:
        rollup_id = await persist_rollup(pool, summary)
@@ -403,6 +608,7 @@ async def aggregate_all_sectors(
    window: str,
    reference_time: datetime | None = None,
    since: datetime | None = None,
+    macro_impacts: dict[str, SectorMacroImpact] | None = None,
 ) -> list[TrendSummary]:
    """Compute sector rollups for every sector that has company trends."""
    if reference_time is None:
@@ -412,6 +618,10 @@ async def aggregate_all_sectors(

    all_trends = await fetch_latest_company_trends(pool, window, since)

+    # Fetch macro impacts once for all sectors if not provided
+    if macro_impacts is None:
+        macro_impacts = await fetch_sector_macro_impacts(pool, since, reference_time)
+
    # Group by sector
    sectors: dict[str, list[CompanyTrendRow]] = {}
    for t in all_trends:
@@ -419,7 +629,10 @@ async def aggregate_all_sectors(

    summaries: list[TrendSummary] = []
    for sector, trends in sectors.items():
-        summary = rollup_trends(trends, "sector", sector, window, reference_time)
+        summary = rollup_trends(
+            trends, "sector", sector, window, reference_time,
+            macro_impacts=macro_impacts,
+        )
        if trends:
            _id = await persist_rollup(pool, summary)
        summaries.append(summary)