Files
stonks-oracle/services/aggregation/rollups.py
T

653 lines
22 KiB
Python

"""Sector and market-level rollup aggregation.
Aggregates company-level trend summaries into sector and market-level
summaries, enabling top-down views of sentiment and risk across the
portfolio.
Requirements: 6.1, 6.2, 6.3, 6.4, 6.5
"""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
import asyncpg
from services.shared.schemas import (
DisagreementDetail,
TrendDirection,
TrendSummary,
TrendWindow,
)
logger = logging.getLogger(__name__)
@dataclass
class CompanyTrendRow:
"""A company-level trend summary fetched from the DB for rollup."""
entity_id: str # ticker
sector: str
window: str
trend_direction: str
trend_strength: float
confidence: float
contradiction_score: float
dominant_catalysts: list[str]
material_risks: list[str]
top_supporting_evidence: list[str]
top_opposing_evidence: list[str]
@dataclass
class SectorMacroImpact:
"""Aggregated macro impact data for a single sector.
Used to incorporate macro signals into sector and market rollups.
Requirements: 6.1, 6.2, 6.3
"""
sector: str
total_impact: float # sum of macro_impact_score across companies in sector
avg_impact: float # average macro_impact_score
company_count: int # number of companies affected
net_direction: float # weighted direction: +1 positive, -1 negative, 0 mixed
event_ids: list[str] = field(default_factory=list) # contributing event IDs
# Threshold for disproportionate sector impact (Requirement 6.3)
SECTOR_CONCENTRATION_THRESHOLD = 0.60
# ---------------------------------------------------------------------------
# Fetch sector-level macro impact aggregates
# ---------------------------------------------------------------------------
_SECTOR_MACRO_IMPACT_QUERY = """
SELECT
c.sector,
mir.event_id,
mir.macro_impact_score,
mir.impact_direction
FROM macro_impact_records mir
JOIN companies c ON c.id = mir.company_id AND c.active = TRUE
WHERE mir.computed_at >= $1
AND mir.computed_at <= $2
ORDER BY c.sector, mir.macro_impact_score DESC
"""
async def fetch_sector_macro_impacts(
pool: asyncpg.Pool,
window_start: datetime,
window_end: datetime,
) -> dict[str, SectorMacroImpact]:
"""Fetch macro impact records aggregated by sector for a time range.
Returns a mapping of sector name to SectorMacroImpact.
"""
rows = await pool.fetch(_SECTOR_MACRO_IMPACT_QUERY, window_start, window_end)
# Accumulate per-sector
sector_data: dict[str, dict] = {}
direction_map = {"positive": 1.0, "negative": -1.0, "mixed": 0.0, "neutral": 0.0}
for row in rows:
sector = str(row["sector"]) if row["sector"] else "Unknown"
score = float(row["macro_impact_score"] or 0.0)
direction = row["impact_direction"] or "neutral"
event_id = str(row["event_id"])
if sector not in sector_data:
sector_data[sector] = {
"total": 0.0,
"count": 0,
"dir_sum": 0.0,
"dir_count": 0,
"event_ids": set(),
}
d = sector_data[sector]
d["total"] += score
d["count"] += 1
dir_val = direction_map.get(direction, 0.0)
if dir_val != 0.0:
d["dir_sum"] += dir_val
d["dir_count"] += 1
d["event_ids"].add(event_id)
result: dict[str, SectorMacroImpact] = {}
for sector, d in sector_data.items():
count = d["count"]
avg = d["total"] / count if count > 0 else 0.0
net_dir = d["dir_sum"] / d["dir_count"] if d["dir_count"] > 0 else 0.0
result[sector] = SectorMacroImpact(
sector=sector,
total_impact=d["total"],
avg_impact=avg,
company_count=count,
net_direction=net_dir,
event_ids=sorted(d["event_ids"]),
)
return result
# ---------------------------------------------------------------------------
# Sector macro concentration helper (Requirement 6.3)
# ---------------------------------------------------------------------------
def compute_sector_macro_concentration(
sector_impacts: dict[str, SectorMacroImpact],
) -> list[tuple[str, float]]:
"""Compute the fraction of total macro impact concentrated in each sector.
Returns a list of (sector, fraction) tuples sorted by fraction descending.
Sectors with fraction > SECTOR_CONCENTRATION_THRESHOLD are considered
disproportionately affected.
"""
total = sum(si.total_impact for si in sector_impacts.values())
if total <= 0.0:
return []
fractions = [
(sector, si.total_impact / total)
for sector, si in sector_impacts.items()
]
fractions.sort(key=lambda x: x[1], reverse=True)
return fractions
# ---------------------------------------------------------------------------
# Fetch latest company trends for a given window
# ---------------------------------------------------------------------------
_LATEST_COMPANY_TRENDS_QUERY = """
SELECT DISTINCT ON (tw.entity_id)
tw.entity_id,
c.sector,
tw.window,
tw.trend_direction,
tw.trend_strength,
tw.confidence,
tw.contradiction_score,
tw.dominant_catalysts,
tw.material_risks,
tw.top_supporting_evidence,
tw.top_opposing_evidence
FROM trend_windows tw
JOIN companies c ON c.ticker = tw.entity_id AND c.active = TRUE
WHERE tw.entity_type = 'company'
AND tw.window = $1
AND tw.generated_at >= $2
ORDER BY tw.entity_id, tw.generated_at DESC
"""
def _parse_jsonb_list(val: object) -> list[str]:
"""Safely parse a JSONB column that should be a list of strings."""
if isinstance(val, list):
return [str(v) for v in val]
if isinstance(val, str):
parsed = json.loads(val)
if isinstance(parsed, list):
return [str(v) for v in parsed]
return []
def _parse_company_trend_row(row: object) -> CompanyTrendRow:
"""Convert an asyncpg Record to a CompanyTrendRow."""
# asyncpg Records support dict() but aren't typed; use getattr-style access
get = getattr(row, "__getitem__", None)
if get is None:
raise TypeError(f"Expected a mapping-like row, got {type(row)}")
def _str(key: str, default: str = "") -> str:
val = get(key)
return str(val) if val is not None else default
def _float(key: str) -> float:
val = get(key)
return float(val) if val is not None else 0.0
return CompanyTrendRow(
entity_id=_str("entity_id"),
sector=_str("sector", "Unknown") or "Unknown",
window=_str("window"),
trend_direction=_str("trend_direction"),
trend_strength=_float("trend_strength"),
confidence=_float("confidence"),
contradiction_score=_float("contradiction_score"),
dominant_catalysts=_parse_jsonb_list(get("dominant_catalysts")),
material_risks=_parse_jsonb_list(get("material_risks")),
top_supporting_evidence=_parse_jsonb_list(get("top_supporting_evidence")),
top_opposing_evidence=_parse_jsonb_list(get("top_opposing_evidence")),
)
async def fetch_latest_company_trends(
pool: asyncpg.Pool,
window: str,
since: datetime,
) -> list[CompanyTrendRow]:
"""Fetch the most recent company-level trend for each ticker in a window."""
rows = await pool.fetch(_LATEST_COMPANY_TRENDS_QUERY, window, since)
return [_parse_company_trend_row(r) for r in rows]
# ---------------------------------------------------------------------------
# Pure rollup logic
# ---------------------------------------------------------------------------
# Direction mapping for numeric aggregation
_DIRECTION_VALUES = {
TrendDirection.BULLISH.value: 1.0,
TrendDirection.BEARISH.value: -1.0,
TrendDirection.MIXED.value: 0.0,
TrendDirection.NEUTRAL.value: 0.0,
}
BULLISH_THRESHOLD = 0.15
BEARISH_THRESHOLD = -0.15
def rollup_trends(
trends: list[CompanyTrendRow],
entity_type: str,
entity_id: str,
window: str,
reference_time: datetime,
macro_impacts: dict[str, SectorMacroImpact] | None = None,
) -> TrendSummary:
"""Aggregate a list of company-level trends into a single rollup summary.
Each company trend is weighted by its confidence to produce a
confidence-weighted average of direction, strength, and contradiction.
When macro_impacts is provided:
- For sector rollups: incorporates the sector's macro signal into
strength and confidence, weighted by constituent company exposure.
- For market rollups: aggregates macro signals across all sectors and
surfaces disproportionately affected sectors (>60% concentration)
in material_risks or dominant_catalysts.
When macro_impacts is None or empty, produces identical output to
the original company-only rollup.
"""
if not trends:
return TrendSummary(
entity_type=entity_type,
entity_id=entity_id,
window=TrendWindow(window),
trend_direction=TrendDirection.NEUTRAL,
trend_strength=0.0,
confidence=0.0,
generated_at=reference_time,
)
total_weight = 0.0
weighted_direction = 0.0
weighted_strength = 0.0
weighted_contradiction = 0.0
catalyst_weights: dict[str, float] = {}
risk_set: dict[str, float] = {}
all_supporting: list[str] = []
all_opposing: list[str] = []
for t in trends:
w = t.confidence
total_weight += w
dir_val = _DIRECTION_VALUES.get(t.trend_direction, 0.0)
weighted_direction += w * dir_val
weighted_strength += w * t.trend_strength
weighted_contradiction += w * t.contradiction_score
for cat in t.dominant_catalysts:
catalyst_weights[cat] = catalyst_weights.get(cat, 0.0) + w
for risk in t.material_risks:
norm = risk.strip().lower()
if norm not in risk_set:
risk_set[norm] = w
else:
risk_set[norm] = max(risk_set[norm], w)
all_supporting.extend(t.top_supporting_evidence)
all_opposing.extend(t.top_opposing_evidence)
if total_weight == 0.0:
return TrendSummary(
entity_type=entity_type,
entity_id=entity_id,
window=TrendWindow(window),
trend_direction=TrendDirection.NEUTRAL,
trend_strength=0.0,
confidence=0.0,
generated_at=reference_time,
)
avg_direction = weighted_direction / total_weight
avg_strength = weighted_strength / total_weight
avg_contradiction = weighted_contradiction / total_weight
avg_confidence = total_weight / len(trends)
# --- Incorporate macro impact signals when available ---
macro_strength_adj = 0.0
macro_confidence_adj = 0.0
macro_catalysts: list[str] = []
macro_risks: list[str] = []
if macro_impacts:
if entity_type == "sector":
# Sector rollup: incorporate this sector's macro signal
sector_macro = macro_impacts.get(entity_id)
if sector_macro and sector_macro.total_impact > 0:
# Weight macro contribution by avg impact and company breadth
breadth = min(sector_macro.company_count / max(len(trends), 1), 1.0)
macro_strength_adj = sector_macro.avg_impact * breadth * 0.3
macro_confidence_adj = sector_macro.avg_impact * breadth * 0.1
# Nudge direction based on macro net direction
avg_direction += sector_macro.net_direction * macro_strength_adj * 0.5
elif entity_type == "market":
# Market rollup: aggregate macro signals across all sectors
total_macro = sum(si.total_impact for si in macro_impacts.values())
if total_macro > 0:
total_companies = sum(si.company_count for si in macro_impacts.values())
breadth = min(total_companies / max(len(trends), 1), 1.0)
avg_macro = total_macro / max(len(macro_impacts), 1)
macro_strength_adj = avg_macro * breadth * 0.3
macro_confidence_adj = avg_macro * breadth * 0.1
# Aggregate net direction across sectors
dir_sum = sum(
si.net_direction * si.total_impact
for si in macro_impacts.values()
)
net_dir = dir_sum / total_macro if total_macro > 0 else 0.0
avg_direction += net_dir * macro_strength_adj * 0.5
# Surface disproportionately affected sectors (Requirement 6.3)
concentration = compute_sector_macro_concentration(macro_impacts)
for sector, fraction in concentration:
if fraction > SECTOR_CONCENTRATION_THRESHOLD:
si = macro_impacts[sector]
label = f"Macro: {sector} ({fraction:.0%} of macro impact)"
if si.net_direction < 0:
macro_risks.append(label)
else:
macro_catalysts.append(label)
# Apply macro adjustments to strength and confidence
adj_strength = avg_strength + macro_strength_adj
adj_confidence = avg_confidence + macro_confidence_adj
# Derive direction
direction = _derive_rollup_direction(avg_direction, avg_contradiction)
# Top catalysts (macro catalysts prepended when present)
sorted_catalysts = sorted(catalyst_weights.items(), key=lambda x: x[1], reverse=True)
catalysts = macro_catalysts + [c for c, _ in sorted_catalysts[:5]]
catalysts = catalysts[:5]
# Top risks (macro risks prepended when present, deduplicated)
sorted_risks = sorted(risk_set.items(), key=lambda x: x[1], reverse=True)
base_risks = [r for r, _ in sorted_risks[:5]]
risks = macro_risks + base_risks
risks = risks[:5]
# Disagreement details
disagreement = _build_rollup_disagreement(trends, entity_id)
return TrendSummary(
entity_type=entity_type,
entity_id=entity_id,
window=TrendWindow(window),
trend_direction=direction,
trend_strength=round(min(abs(adj_strength), 1.0), 4),
confidence=round(max(0.0, min(adj_confidence, 1.0)), 4),
top_supporting_evidence=list(dict.fromkeys(all_supporting))[:10],
top_opposing_evidence=list(dict.fromkeys(all_opposing))[:10],
dominant_catalysts=catalysts,
material_risks=risks,
contradiction_score=round(max(0.0, min(avg_contradiction, 1.0)), 4),
disagreement_details=disagreement,
generated_at=reference_time,
)
def _derive_rollup_direction(
avg_direction: float,
avg_contradiction: float,
) -> TrendDirection:
"""Map averaged direction value to a TrendDirection."""
if avg_contradiction > 0.10 and abs(avg_direction) < 0.3:
return TrendDirection.MIXED
if avg_direction >= BULLISH_THRESHOLD:
return TrendDirection.BULLISH
if avg_direction <= BEARISH_THRESHOLD:
return TrendDirection.BEARISH
return TrendDirection.NEUTRAL
def _build_rollup_disagreement(
trends: list[CompanyTrendRow],
entity_id: str,
) -> list[DisagreementDetail]:
"""Build disagreement details showing which companies are bullish vs bearish."""
bullish_ids: list[str] = []
bearish_ids: list[str] = []
bullish_weight = 0.0
bearish_weight = 0.0
for t in trends:
if t.trend_direction == TrendDirection.BULLISH.value:
bullish_ids.append(t.entity_id)
bullish_weight += t.confidence
elif t.trend_direction == TrendDirection.BEARISH.value:
bearish_ids.append(t.entity_id)
bearish_weight += t.confidence
if not bullish_ids or not bearish_ids:
return []
return [
DisagreementDetail(
dimension="company_direction",
positive_doc_ids=bullish_ids,
negative_doc_ids=bearish_ids,
positive_weight=round(bullish_weight, 4),
negative_weight=round(bearish_weight, 4),
description=(
f"{entity_id}: {len(bullish_ids)} bullish vs "
f"{len(bearish_ids)} bearish companies"
),
)
]
# ---------------------------------------------------------------------------
# Persist rollup (reuses the same trend_windows table)
# ---------------------------------------------------------------------------
_UPSERT_TREND = """
INSERT INTO trend_windows (
entity_type, entity_id, window, trend_direction, trend_strength,
confidence, top_supporting_evidence, top_opposing_evidence,
dominant_catalysts, material_risks, contradiction_score,
disagreement_details, market_context, generated_at
) VALUES (
$1, $2, $3, $4, $5,
$6, $7::jsonb, $8::jsonb,
$9::jsonb, $10::jsonb, $11,
$12::jsonb, $13::jsonb, $14
)
RETURNING id
"""
async def persist_rollup(
pool: asyncpg.Pool,
summary: TrendSummary,
) -> str:
"""Insert a rollup trend summary and return its UUID."""
row = await pool.fetchrow(
_UPSERT_TREND,
summary.entity_type,
summary.entity_id,
summary.window.value,
summary.trend_direction.value,
summary.trend_strength,
summary.confidence,
json.dumps(summary.top_supporting_evidence),
json.dumps(summary.top_opposing_evidence),
json.dumps(summary.dominant_catalysts),
json.dumps(summary.material_risks),
summary.contradiction_score,
json.dumps([d.model_dump() for d in summary.disagreement_details]),
json.dumps({}),
summary.generated_at,
)
return str(row["id"]) # type: ignore[index]
# ---------------------------------------------------------------------------
# High-level rollup entry points
# ---------------------------------------------------------------------------
async def aggregate_sector(
pool: asyncpg.Pool,
sector: str,
window: str,
reference_time: datetime | None = None,
since: datetime | None = None,
macro_impacts: dict[str, SectorMacroImpact] | None = None,
) -> TrendSummary:
"""Compute and persist a sector-level rollup for one window.
Fetches the latest company trends, filters to the given sector,
and rolls them up into a single sector summary. When macro_impacts
is provided, incorporates macro signals weighted by constituent
company exposure.
"""
if reference_time is None:
reference_time = datetime.now(timezone.utc)
if since is None:
since = reference_time - _window_lookback(window)
all_trends = await fetch_latest_company_trends(pool, window, since)
sector_trends = [t for t in all_trends if t.sector == sector]
# Fetch macro impacts if not provided
if macro_impacts is None:
macro_impacts = await fetch_sector_macro_impacts(pool, since, reference_time)
summary = rollup_trends(
sector_trends, "sector", sector, window, reference_time,
macro_impacts=macro_impacts,
)
if sector_trends:
rollup_id = await persist_rollup(pool, summary)
logger.info(
"Persisted sector rollup %s for %s/%s: direction=%s strength=%.3f companies=%d",
rollup_id, sector, window, summary.trend_direction.value,
summary.trend_strength, len(sector_trends),
)
return summary
async def aggregate_market(
pool: asyncpg.Pool,
window: str,
reference_time: datetime | None = None,
since: datetime | None = None,
macro_impacts: dict[str, SectorMacroImpact] | None = None,
) -> TrendSummary:
"""Compute and persist a market-wide rollup for one window.
Aggregates all company trends regardless of sector. When macro_impacts
is provided, aggregates macro signals across all sectors and surfaces
disproportionately affected sectors in material_risks or dominant_catalysts.
"""
if reference_time is None:
reference_time = datetime.now(timezone.utc)
if since is None:
since = reference_time - _window_lookback(window)
all_trends = await fetch_latest_company_trends(pool, window, since)
# Fetch macro impacts if not provided
if macro_impacts is None:
macro_impacts = await fetch_sector_macro_impacts(pool, since, reference_time)
summary = rollup_trends(
all_trends, "market", "all", window, reference_time,
macro_impacts=macro_impacts,
)
if all_trends:
rollup_id = await persist_rollup(pool, summary)
logger.info(
"Persisted market rollup %s for %s: direction=%s strength=%.3f companies=%d",
rollup_id, window, summary.trend_direction.value,
summary.trend_strength, len(all_trends),
)
return summary
async def aggregate_all_sectors(
pool: asyncpg.Pool,
window: str,
reference_time: datetime | None = None,
since: datetime | None = None,
macro_impacts: dict[str, SectorMacroImpact] | None = None,
) -> list[TrendSummary]:
"""Compute sector rollups for every sector that has company trends."""
if reference_time is None:
reference_time = datetime.now(timezone.utc)
if since is None:
since = reference_time - _window_lookback(window)
all_trends = await fetch_latest_company_trends(pool, window, since)
# Fetch macro impacts once for all sectors if not provided
if macro_impacts is None:
macro_impacts = await fetch_sector_macro_impacts(pool, since, reference_time)
# Group by sector
sectors: dict[str, list[CompanyTrendRow]] = {}
for t in all_trends:
sectors.setdefault(t.sector, []).append(t)
summaries: list[TrendSummary] = []
for sector, trends in sectors.items():
summary = rollup_trends(
trends, "sector", sector, window, reference_time,
macro_impacts=macro_impacts,
)
if trends:
_id = await persist_rollup(pool, summary)
summaries.append(summary)
return summaries
def _window_lookback(window: str) -> timedelta:
"""Return a reasonable lookback for finding recent company trends."""
mapping = {
TrendWindow.INTRADAY.value: timedelta(hours=24),
TrendWindow.ONE_DAY.value: timedelta(days=2),
TrendWindow.SEVEN_DAY.value: timedelta(days=8),
TrendWindow.THIRTY_DAY.value: timedelta(days=35),
TrendWindow.NINETY_DAY.value: timedelta(days=95),
}
return mapping.get(window, timedelta(days=8))