phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,439 @@
|
||||
"""Sector and market-level rollup aggregation.
|
||||
|
||||
Aggregates company-level trend summaries into sector and market-level
|
||||
summaries, enabling top-down views of sentiment and risk across the
|
||||
portfolio.
|
||||
|
||||
Requirements: 6.3, 6.4, 6.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.shared.schemas import (
|
||||
DisagreementDetail,
|
||||
TrendDirection,
|
||||
TrendSummary,
|
||||
TrendWindow,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompanyTrendRow:
|
||||
"""A company-level trend summary fetched from the DB for rollup."""
|
||||
|
||||
entity_id: str # ticker
|
||||
sector: str
|
||||
window: str
|
||||
trend_direction: str
|
||||
trend_strength: float
|
||||
confidence: float
|
||||
contradiction_score: float
|
||||
dominant_catalysts: list[str]
|
||||
material_risks: list[str]
|
||||
top_supporting_evidence: list[str]
|
||||
top_opposing_evidence: list[str]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch latest company trends for a given window
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_LATEST_COMPANY_TRENDS_QUERY = """
|
||||
SELECT DISTINCT ON (tw.entity_id)
|
||||
tw.entity_id,
|
||||
c.sector,
|
||||
tw.window,
|
||||
tw.trend_direction,
|
||||
tw.trend_strength,
|
||||
tw.confidence,
|
||||
tw.contradiction_score,
|
||||
tw.dominant_catalysts,
|
||||
tw.material_risks,
|
||||
tw.top_supporting_evidence,
|
||||
tw.top_opposing_evidence
|
||||
FROM trend_windows tw
|
||||
JOIN companies c ON c.ticker = tw.entity_id AND c.active = TRUE
|
||||
WHERE tw.entity_type = 'company'
|
||||
AND tw.window = $1
|
||||
AND tw.generated_at >= $2
|
||||
ORDER BY tw.entity_id, tw.generated_at DESC
|
||||
"""
|
||||
|
||||
|
||||
def _parse_jsonb_list(val: object) -> list[str]:
|
||||
"""Safely parse a JSONB column that should be a list of strings."""
|
||||
if isinstance(val, list):
|
||||
return [str(v) for v in val]
|
||||
if isinstance(val, str):
|
||||
parsed = json.loads(val)
|
||||
if isinstance(parsed, list):
|
||||
return [str(v) for v in parsed]
|
||||
return []
|
||||
|
||||
|
||||
def _parse_company_trend_row(row: object) -> CompanyTrendRow:
|
||||
"""Convert an asyncpg Record to a CompanyTrendRow."""
|
||||
# asyncpg Records support dict() but aren't typed; use getattr-style access
|
||||
get = getattr(row, "__getitem__", None)
|
||||
if get is None:
|
||||
raise TypeError(f"Expected a mapping-like row, got {type(row)}")
|
||||
|
||||
def _str(key: str, default: str = "") -> str:
|
||||
val = get(key)
|
||||
return str(val) if val is not None else default
|
||||
|
||||
def _float(key: str) -> float:
|
||||
val = get(key)
|
||||
return float(val) if val is not None else 0.0
|
||||
|
||||
return CompanyTrendRow(
|
||||
entity_id=_str("entity_id"),
|
||||
sector=_str("sector", "Unknown") or "Unknown",
|
||||
window=_str("window"),
|
||||
trend_direction=_str("trend_direction"),
|
||||
trend_strength=_float("trend_strength"),
|
||||
confidence=_float("confidence"),
|
||||
contradiction_score=_float("contradiction_score"),
|
||||
dominant_catalysts=_parse_jsonb_list(get("dominant_catalysts")),
|
||||
material_risks=_parse_jsonb_list(get("material_risks")),
|
||||
top_supporting_evidence=_parse_jsonb_list(get("top_supporting_evidence")),
|
||||
top_opposing_evidence=_parse_jsonb_list(get("top_opposing_evidence")),
|
||||
)
|
||||
|
||||
|
||||
async def fetch_latest_company_trends(
|
||||
pool: asyncpg.Pool,
|
||||
window: str,
|
||||
since: datetime,
|
||||
) -> list[CompanyTrendRow]:
|
||||
"""Fetch the most recent company-level trend for each ticker in a window."""
|
||||
rows = await pool.fetch(_LATEST_COMPANY_TRENDS_QUERY, window, since)
|
||||
return [_parse_company_trend_row(r) for r in rows]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure rollup logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Direction mapping for numeric aggregation
|
||||
_DIRECTION_VALUES = {
|
||||
TrendDirection.BULLISH.value: 1.0,
|
||||
TrendDirection.BEARISH.value: -1.0,
|
||||
TrendDirection.MIXED.value: 0.0,
|
||||
TrendDirection.NEUTRAL.value: 0.0,
|
||||
}
|
||||
|
||||
BULLISH_THRESHOLD = 0.15
|
||||
BEARISH_THRESHOLD = -0.15
|
||||
|
||||
|
||||
def rollup_trends(
|
||||
trends: list[CompanyTrendRow],
|
||||
entity_type: str,
|
||||
entity_id: str,
|
||||
window: str,
|
||||
reference_time: datetime,
|
||||
) -> TrendSummary:
|
||||
"""Aggregate a list of company-level trends into a single rollup summary.
|
||||
|
||||
Each company trend is weighted by its confidence to produce a
|
||||
confidence-weighted average of direction, strength, and contradiction.
|
||||
"""
|
||||
if not trends:
|
||||
return TrendSummary(
|
||||
entity_type=entity_type,
|
||||
entity_id=entity_id,
|
||||
window=TrendWindow(window),
|
||||
trend_direction=TrendDirection.NEUTRAL,
|
||||
trend_strength=0.0,
|
||||
confidence=0.0,
|
||||
generated_at=reference_time,
|
||||
)
|
||||
|
||||
total_weight = 0.0
|
||||
weighted_direction = 0.0
|
||||
weighted_strength = 0.0
|
||||
weighted_contradiction = 0.0
|
||||
catalyst_weights: dict[str, float] = {}
|
||||
risk_set: dict[str, float] = {}
|
||||
all_supporting: list[str] = []
|
||||
all_opposing: list[str] = []
|
||||
|
||||
for t in trends:
|
||||
w = t.confidence
|
||||
total_weight += w
|
||||
dir_val = _DIRECTION_VALUES.get(t.trend_direction, 0.0)
|
||||
weighted_direction += w * dir_val
|
||||
weighted_strength += w * t.trend_strength
|
||||
weighted_contradiction += w * t.contradiction_score
|
||||
|
||||
for cat in t.dominant_catalysts:
|
||||
catalyst_weights[cat] = catalyst_weights.get(cat, 0.0) + w
|
||||
|
||||
for risk in t.material_risks:
|
||||
norm = risk.strip().lower()
|
||||
if norm not in risk_set:
|
||||
risk_set[norm] = w
|
||||
else:
|
||||
risk_set[norm] = max(risk_set[norm], w)
|
||||
|
||||
all_supporting.extend(t.top_supporting_evidence)
|
||||
all_opposing.extend(t.top_opposing_evidence)
|
||||
|
||||
if total_weight == 0.0:
|
||||
return TrendSummary(
|
||||
entity_type=entity_type,
|
||||
entity_id=entity_id,
|
||||
window=TrendWindow(window),
|
||||
trend_direction=TrendDirection.NEUTRAL,
|
||||
trend_strength=0.0,
|
||||
confidence=0.0,
|
||||
generated_at=reference_time,
|
||||
)
|
||||
|
||||
avg_direction = weighted_direction / total_weight
|
||||
avg_strength = weighted_strength / total_weight
|
||||
avg_contradiction = weighted_contradiction / total_weight
|
||||
avg_confidence = total_weight / len(trends)
|
||||
|
||||
# Derive direction
|
||||
direction = _derive_rollup_direction(avg_direction, avg_contradiction)
|
||||
|
||||
# Top catalysts
|
||||
sorted_catalysts = sorted(catalyst_weights.items(), key=lambda x: x[1], reverse=True)
|
||||
catalysts = [c for c, _ in sorted_catalysts[:5]]
|
||||
|
||||
# Top risks (deduplicated, by weight)
|
||||
sorted_risks = sorted(risk_set.items(), key=lambda x: x[1], reverse=True)
|
||||
risks = [r for r, _ in sorted_risks[:5]]
|
||||
|
||||
# Disagreement details
|
||||
disagreement = _build_rollup_disagreement(trends, entity_id)
|
||||
|
||||
return TrendSummary(
|
||||
entity_type=entity_type,
|
||||
entity_id=entity_id,
|
||||
window=TrendWindow(window),
|
||||
trend_direction=direction,
|
||||
trend_strength=round(min(abs(avg_strength), 1.0), 4),
|
||||
confidence=round(max(0.0, min(avg_confidence, 1.0)), 4),
|
||||
top_supporting_evidence=list(dict.fromkeys(all_supporting))[:10],
|
||||
top_opposing_evidence=list(dict.fromkeys(all_opposing))[:10],
|
||||
dominant_catalysts=catalysts,
|
||||
material_risks=risks,
|
||||
contradiction_score=round(max(0.0, min(avg_contradiction, 1.0)), 4),
|
||||
disagreement_details=disagreement,
|
||||
generated_at=reference_time,
|
||||
)
|
||||
|
||||
|
||||
def _derive_rollup_direction(
|
||||
avg_direction: float,
|
||||
avg_contradiction: float,
|
||||
) -> TrendDirection:
|
||||
"""Map averaged direction value to a TrendDirection."""
|
||||
if avg_contradiction > 0.10 and abs(avg_direction) < 0.3:
|
||||
return TrendDirection.MIXED
|
||||
if avg_direction >= BULLISH_THRESHOLD:
|
||||
return TrendDirection.BULLISH
|
||||
if avg_direction <= BEARISH_THRESHOLD:
|
||||
return TrendDirection.BEARISH
|
||||
return TrendDirection.NEUTRAL
|
||||
|
||||
|
||||
def _build_rollup_disagreement(
|
||||
trends: list[CompanyTrendRow],
|
||||
entity_id: str,
|
||||
) -> list[DisagreementDetail]:
|
||||
"""Build disagreement details showing which companies are bullish vs bearish."""
|
||||
bullish_ids: list[str] = []
|
||||
bearish_ids: list[str] = []
|
||||
bullish_weight = 0.0
|
||||
bearish_weight = 0.0
|
||||
|
||||
for t in trends:
|
||||
if t.trend_direction == TrendDirection.BULLISH.value:
|
||||
bullish_ids.append(t.entity_id)
|
||||
bullish_weight += t.confidence
|
||||
elif t.trend_direction == TrendDirection.BEARISH.value:
|
||||
bearish_ids.append(t.entity_id)
|
||||
bearish_weight += t.confidence
|
||||
|
||||
if not bullish_ids or not bearish_ids:
|
||||
return []
|
||||
|
||||
return [
|
||||
DisagreementDetail(
|
||||
dimension="company_direction",
|
||||
positive_doc_ids=bullish_ids,
|
||||
negative_doc_ids=bearish_ids,
|
||||
positive_weight=round(bullish_weight, 4),
|
||||
negative_weight=round(bearish_weight, 4),
|
||||
description=(
|
||||
f"{entity_id}: {len(bullish_ids)} bullish vs "
|
||||
f"{len(bearish_ids)} bearish companies"
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Persist rollup (reuses the same trend_windows table)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_UPSERT_TREND = """
|
||||
INSERT INTO trend_windows (
|
||||
entity_type, entity_id, window, trend_direction, trend_strength,
|
||||
confidence, top_supporting_evidence, top_opposing_evidence,
|
||||
dominant_catalysts, material_risks, contradiction_score,
|
||||
disagreement_details, market_context, generated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5,
|
||||
$6, $7::jsonb, $8::jsonb,
|
||||
$9::jsonb, $10::jsonb, $11,
|
||||
$12::jsonb, $13::jsonb, $14
|
||||
)
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
|
||||
async def persist_rollup(
|
||||
pool: asyncpg.Pool,
|
||||
summary: TrendSummary,
|
||||
) -> str:
|
||||
"""Insert a rollup trend summary and return its UUID."""
|
||||
row = await pool.fetchrow(
|
||||
_UPSERT_TREND,
|
||||
summary.entity_type,
|
||||
summary.entity_id,
|
||||
summary.window.value,
|
||||
summary.trend_direction.value,
|
||||
summary.trend_strength,
|
||||
summary.confidence,
|
||||
json.dumps(summary.top_supporting_evidence),
|
||||
json.dumps(summary.top_opposing_evidence),
|
||||
json.dumps(summary.dominant_catalysts),
|
||||
json.dumps(summary.material_risks),
|
||||
summary.contradiction_score,
|
||||
json.dumps([d.model_dump() for d in summary.disagreement_details]),
|
||||
json.dumps({}),
|
||||
summary.generated_at,
|
||||
)
|
||||
return str(row["id"]) # type: ignore[index]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# High-level rollup entry points
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def aggregate_sector(
|
||||
pool: asyncpg.Pool,
|
||||
sector: str,
|
||||
window: str,
|
||||
reference_time: datetime | None = None,
|
||||
since: datetime | None = None,
|
||||
) -> TrendSummary:
|
||||
"""Compute and persist a sector-level rollup for one window.
|
||||
|
||||
Fetches the latest company trends, filters to the given sector,
|
||||
and rolls them up into a single sector summary.
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
if since is None:
|
||||
since = reference_time - _window_lookback(window)
|
||||
|
||||
all_trends = await fetch_latest_company_trends(pool, window, since)
|
||||
sector_trends = [t for t in all_trends if t.sector == sector]
|
||||
|
||||
summary = rollup_trends(sector_trends, "sector", sector, window, reference_time)
|
||||
|
||||
if sector_trends:
|
||||
rollup_id = await persist_rollup(pool, summary)
|
||||
logger.info(
|
||||
"Persisted sector rollup %s for %s/%s: direction=%s strength=%.3f companies=%d",
|
||||
rollup_id, sector, window, summary.trend_direction.value,
|
||||
summary.trend_strength, len(sector_trends),
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
async def aggregate_market(
|
||||
pool: asyncpg.Pool,
|
||||
window: str,
|
||||
reference_time: datetime | None = None,
|
||||
since: datetime | None = None,
|
||||
) -> TrendSummary:
|
||||
"""Compute and persist a market-wide rollup for one window.
|
||||
|
||||
Aggregates all company trends regardless of sector.
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
if since is None:
|
||||
since = reference_time - _window_lookback(window)
|
||||
|
||||
all_trends = await fetch_latest_company_trends(pool, window, since)
|
||||
|
||||
summary = rollup_trends(all_trends, "market", "all", window, reference_time)
|
||||
|
||||
if all_trends:
|
||||
rollup_id = await persist_rollup(pool, summary)
|
||||
logger.info(
|
||||
"Persisted market rollup %s for %s: direction=%s strength=%.3f companies=%d",
|
||||
rollup_id, window, summary.trend_direction.value,
|
||||
summary.trend_strength, len(all_trends),
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
async def aggregate_all_sectors(
|
||||
pool: asyncpg.Pool,
|
||||
window: str,
|
||||
reference_time: datetime | None = None,
|
||||
since: datetime | None = None,
|
||||
) -> list[TrendSummary]:
|
||||
"""Compute sector rollups for every sector that has company trends."""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
if since is None:
|
||||
since = reference_time - _window_lookback(window)
|
||||
|
||||
all_trends = await fetch_latest_company_trends(pool, window, since)
|
||||
|
||||
# Group by sector
|
||||
sectors: dict[str, list[CompanyTrendRow]] = {}
|
||||
for t in all_trends:
|
||||
sectors.setdefault(t.sector, []).append(t)
|
||||
|
||||
summaries: list[TrendSummary] = []
|
||||
for sector, trends in sectors.items():
|
||||
summary = rollup_trends(trends, "sector", sector, window, reference_time)
|
||||
if trends:
|
||||
_id = await persist_rollup(pool, summary)
|
||||
summaries.append(summary)
|
||||
|
||||
return summaries
|
||||
|
||||
|
||||
def _window_lookback(window: str) -> timedelta:
|
||||
"""Return a reasonable lookback for finding recent company trends."""
|
||||
mapping = {
|
||||
TrendWindow.INTRADAY.value: timedelta(hours=24),
|
||||
TrendWindow.ONE_DAY.value: timedelta(days=2),
|
||||
TrendWindow.SEVEN_DAY.value: timedelta(days=8),
|
||||
TrendWindow.THIRTY_DAY.value: timedelta(days=35),
|
||||
TrendWindow.NINETY_DAY.value: timedelta(days=95),
|
||||
}
|
||||
return mapping.get(window, timedelta(days=8))
|
||||
Reference in New Issue
Block a user