"""Sector and market-level rollup aggregation. Aggregates company-level trend summaries into sector and market-level summaries, enabling top-down views of sentiment and risk across the portfolio. Requirements: 6.3, 6.4, 6.5 """ from __future__ import annotations import json import logging from dataclasses import dataclass from datetime import datetime, timedelta, timezone import asyncpg from services.shared.schemas import ( DisagreementDetail, TrendDirection, TrendSummary, TrendWindow, ) logger = logging.getLogger(__name__) @dataclass class CompanyTrendRow: """A company-level trend summary fetched from the DB for rollup.""" entity_id: str # ticker sector: str window: str trend_direction: str trend_strength: float confidence: float contradiction_score: float dominant_catalysts: list[str] material_risks: list[str] top_supporting_evidence: list[str] top_opposing_evidence: list[str] # --------------------------------------------------------------------------- # Fetch latest company trends for a given window # --------------------------------------------------------------------------- _LATEST_COMPANY_TRENDS_QUERY = """ SELECT DISTINCT ON (tw.entity_id) tw.entity_id, c.sector, tw.window, tw.trend_direction, tw.trend_strength, tw.confidence, tw.contradiction_score, tw.dominant_catalysts, tw.material_risks, tw.top_supporting_evidence, tw.top_opposing_evidence FROM trend_windows tw JOIN companies c ON c.ticker = tw.entity_id AND c.active = TRUE WHERE tw.entity_type = 'company' AND tw.window = $1 AND tw.generated_at >= $2 ORDER BY tw.entity_id, tw.generated_at DESC """ def _parse_jsonb_list(val: object) -> list[str]: """Safely parse a JSONB column that should be a list of strings.""" if isinstance(val, list): return [str(v) for v in val] if isinstance(val, str): parsed = json.loads(val) if isinstance(parsed, list): return [str(v) for v in parsed] return [] def _parse_company_trend_row(row: object) -> CompanyTrendRow: """Convert an asyncpg Record to a CompanyTrendRow.""" # asyncpg Records support dict() but aren't typed; use getattr-style access get = getattr(row, "__getitem__", None) if get is None: raise TypeError(f"Expected a mapping-like row, got {type(row)}") def _str(key: str, default: str = "") -> str: val = get(key) return str(val) if val is not None else default def _float(key: str) -> float: val = get(key) return float(val) if val is not None else 0.0 return CompanyTrendRow( entity_id=_str("entity_id"), sector=_str("sector", "Unknown") or "Unknown", window=_str("window"), trend_direction=_str("trend_direction"), trend_strength=_float("trend_strength"), confidence=_float("confidence"), contradiction_score=_float("contradiction_score"), dominant_catalysts=_parse_jsonb_list(get("dominant_catalysts")), material_risks=_parse_jsonb_list(get("material_risks")), top_supporting_evidence=_parse_jsonb_list(get("top_supporting_evidence")), top_opposing_evidence=_parse_jsonb_list(get("top_opposing_evidence")), ) async def fetch_latest_company_trends( pool: asyncpg.Pool, window: str, since: datetime, ) -> list[CompanyTrendRow]: """Fetch the most recent company-level trend for each ticker in a window.""" rows = await pool.fetch(_LATEST_COMPANY_TRENDS_QUERY, window, since) return [_parse_company_trend_row(r) for r in rows] # --------------------------------------------------------------------------- # Pure rollup logic # --------------------------------------------------------------------------- # Direction mapping for numeric aggregation _DIRECTION_VALUES = { TrendDirection.BULLISH.value: 1.0, TrendDirection.BEARISH.value: -1.0, TrendDirection.MIXED.value: 0.0, TrendDirection.NEUTRAL.value: 0.0, } BULLISH_THRESHOLD = 0.15 BEARISH_THRESHOLD = -0.15 def rollup_trends( trends: list[CompanyTrendRow], entity_type: str, entity_id: str, window: str, reference_time: datetime, ) -> TrendSummary: """Aggregate a list of company-level trends into a single rollup summary. Each company trend is weighted by its confidence to produce a confidence-weighted average of direction, strength, and contradiction. """ if not trends: return TrendSummary( entity_type=entity_type, entity_id=entity_id, window=TrendWindow(window), trend_direction=TrendDirection.NEUTRAL, trend_strength=0.0, confidence=0.0, generated_at=reference_time, ) total_weight = 0.0 weighted_direction = 0.0 weighted_strength = 0.0 weighted_contradiction = 0.0 catalyst_weights: dict[str, float] = {} risk_set: dict[str, float] = {} all_supporting: list[str] = [] all_opposing: list[str] = [] for t in trends: w = t.confidence total_weight += w dir_val = _DIRECTION_VALUES.get(t.trend_direction, 0.0) weighted_direction += w * dir_val weighted_strength += w * t.trend_strength weighted_contradiction += w * t.contradiction_score for cat in t.dominant_catalysts: catalyst_weights[cat] = catalyst_weights.get(cat, 0.0) + w for risk in t.material_risks: norm = risk.strip().lower() if norm not in risk_set: risk_set[norm] = w else: risk_set[norm] = max(risk_set[norm], w) all_supporting.extend(t.top_supporting_evidence) all_opposing.extend(t.top_opposing_evidence) if total_weight == 0.0: return TrendSummary( entity_type=entity_type, entity_id=entity_id, window=TrendWindow(window), trend_direction=TrendDirection.NEUTRAL, trend_strength=0.0, confidence=0.0, generated_at=reference_time, ) avg_direction = weighted_direction / total_weight avg_strength = weighted_strength / total_weight avg_contradiction = weighted_contradiction / total_weight avg_confidence = total_weight / len(trends) # Derive direction direction = _derive_rollup_direction(avg_direction, avg_contradiction) # Top catalysts sorted_catalysts = sorted(catalyst_weights.items(), key=lambda x: x[1], reverse=True) catalysts = [c for c, _ in sorted_catalysts[:5]] # Top risks (deduplicated, by weight) sorted_risks = sorted(risk_set.items(), key=lambda x: x[1], reverse=True) risks = [r for r, _ in sorted_risks[:5]] # Disagreement details disagreement = _build_rollup_disagreement(trends, entity_id) return TrendSummary( entity_type=entity_type, entity_id=entity_id, window=TrendWindow(window), trend_direction=direction, trend_strength=round(min(abs(avg_strength), 1.0), 4), confidence=round(max(0.0, min(avg_confidence, 1.0)), 4), top_supporting_evidence=list(dict.fromkeys(all_supporting))[:10], top_opposing_evidence=list(dict.fromkeys(all_opposing))[:10], dominant_catalysts=catalysts, material_risks=risks, contradiction_score=round(max(0.0, min(avg_contradiction, 1.0)), 4), disagreement_details=disagreement, generated_at=reference_time, ) def _derive_rollup_direction( avg_direction: float, avg_contradiction: float, ) -> TrendDirection: """Map averaged direction value to a TrendDirection.""" if avg_contradiction > 0.10 and abs(avg_direction) < 0.3: return TrendDirection.MIXED if avg_direction >= BULLISH_THRESHOLD: return TrendDirection.BULLISH if avg_direction <= BEARISH_THRESHOLD: return TrendDirection.BEARISH return TrendDirection.NEUTRAL def _build_rollup_disagreement( trends: list[CompanyTrendRow], entity_id: str, ) -> list[DisagreementDetail]: """Build disagreement details showing which companies are bullish vs bearish.""" bullish_ids: list[str] = [] bearish_ids: list[str] = [] bullish_weight = 0.0 bearish_weight = 0.0 for t in trends: if t.trend_direction == TrendDirection.BULLISH.value: bullish_ids.append(t.entity_id) bullish_weight += t.confidence elif t.trend_direction == TrendDirection.BEARISH.value: bearish_ids.append(t.entity_id) bearish_weight += t.confidence if not bullish_ids or not bearish_ids: return [] return [ DisagreementDetail( dimension="company_direction", positive_doc_ids=bullish_ids, negative_doc_ids=bearish_ids, positive_weight=round(bullish_weight, 4), negative_weight=round(bearish_weight, 4), description=( f"{entity_id}: {len(bullish_ids)} bullish vs " f"{len(bearish_ids)} bearish companies" ), ) ] # --------------------------------------------------------------------------- # Persist rollup (reuses the same trend_windows table) # --------------------------------------------------------------------------- _UPSERT_TREND = """ INSERT INTO trend_windows ( entity_type, entity_id, window, trend_direction, trend_strength, confidence, top_supporting_evidence, top_opposing_evidence, dominant_catalysts, material_risks, contradiction_score, disagreement_details, market_context, generated_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7::jsonb, $8::jsonb, $9::jsonb, $10::jsonb, $11, $12::jsonb, $13::jsonb, $14 ) RETURNING id """ async def persist_rollup( pool: asyncpg.Pool, summary: TrendSummary, ) -> str: """Insert a rollup trend summary and return its UUID.""" row = await pool.fetchrow( _UPSERT_TREND, summary.entity_type, summary.entity_id, summary.window.value, summary.trend_direction.value, summary.trend_strength, summary.confidence, json.dumps(summary.top_supporting_evidence), json.dumps(summary.top_opposing_evidence), json.dumps(summary.dominant_catalysts), json.dumps(summary.material_risks), summary.contradiction_score, json.dumps([d.model_dump() for d in summary.disagreement_details]), json.dumps({}), summary.generated_at, ) return str(row["id"]) # type: ignore[index] # --------------------------------------------------------------------------- # High-level rollup entry points # --------------------------------------------------------------------------- async def aggregate_sector( pool: asyncpg.Pool, sector: str, window: str, reference_time: datetime | None = None, since: datetime | None = None, ) -> TrendSummary: """Compute and persist a sector-level rollup for one window. Fetches the latest company trends, filters to the given sector, and rolls them up into a single sector summary. """ if reference_time is None: reference_time = datetime.now(timezone.utc) if since is None: since = reference_time - _window_lookback(window) all_trends = await fetch_latest_company_trends(pool, window, since) sector_trends = [t for t in all_trends if t.sector == sector] summary = rollup_trends(sector_trends, "sector", sector, window, reference_time) if sector_trends: rollup_id = await persist_rollup(pool, summary) logger.info( "Persisted sector rollup %s for %s/%s: direction=%s strength=%.3f companies=%d", rollup_id, sector, window, summary.trend_direction.value, summary.trend_strength, len(sector_trends), ) return summary async def aggregate_market( pool: asyncpg.Pool, window: str, reference_time: datetime | None = None, since: datetime | None = None, ) -> TrendSummary: """Compute and persist a market-wide rollup for one window. Aggregates all company trends regardless of sector. """ if reference_time is None: reference_time = datetime.now(timezone.utc) if since is None: since = reference_time - _window_lookback(window) all_trends = await fetch_latest_company_trends(pool, window, since) summary = rollup_trends(all_trends, "market", "all", window, reference_time) if all_trends: rollup_id = await persist_rollup(pool, summary) logger.info( "Persisted market rollup %s for %s: direction=%s strength=%.3f companies=%d", rollup_id, window, summary.trend_direction.value, summary.trend_strength, len(all_trends), ) return summary async def aggregate_all_sectors( pool: asyncpg.Pool, window: str, reference_time: datetime | None = None, since: datetime | None = None, ) -> list[TrendSummary]: """Compute sector rollups for every sector that has company trends.""" if reference_time is None: reference_time = datetime.now(timezone.utc) if since is None: since = reference_time - _window_lookback(window) all_trends = await fetch_latest_company_trends(pool, window, since) # Group by sector sectors: dict[str, list[CompanyTrendRow]] = {} for t in all_trends: sectors.setdefault(t.sector, []).append(t) summaries: list[TrendSummary] = [] for sector, trends in sectors.items(): summary = rollup_trends(trends, "sector", sector, window, reference_time) if trends: _id = await persist_rollup(pool, summary) summaries.append(summary) return summaries def _window_lookback(window: str) -> timedelta: """Return a reasonable lookback for finding recent company trends.""" mapping = { TrendWindow.INTRADAY.value: timedelta(hours=24), TrendWindow.ONE_DAY.value: timedelta(days=2), TrendWindow.SEVEN_DAY.value: timedelta(days=8), TrendWindow.THIRTY_DAY.value: timedelta(days=35), TrendWindow.NINETY_DAY.value: timedelta(days=95), } return mapping.get(window, timedelta(days=8))