phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,169 @@
|
||||
"""Contradiction detection and disagreement representation.
|
||||
|
||||
Analyses weighted signals to detect and represent disagreement explicitly,
|
||||
rather than collapsing contradictory evidence into a single unsupported
|
||||
conclusion.
|
||||
|
||||
Requirements: 6.4, 6.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from services.aggregation.scoring import WeightedSignal
|
||||
from services.shared.schemas import DisagreementDetail
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalystEntry:
|
||||
"""Lightweight carrier for per-document catalyst info needed by
|
||||
contradiction detection. Avoids importing ImpactRow and creating
|
||||
a circular dependency with worker.py."""
|
||||
|
||||
document_id: str
|
||||
catalyst_type: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContradictionResult:
|
||||
"""Full contradiction analysis output."""
|
||||
|
||||
score: float # 0-1, same semantics as existing compute_contradiction_score
|
||||
details: list[DisagreementDetail]
|
||||
|
||||
|
||||
def detect_contradictions(
|
||||
signals: list[WeightedSignal],
|
||||
catalyst_entries: list[CatalystEntry] | None = None,
|
||||
) -> ContradictionResult:
|
||||
"""Run contradiction detection across multiple dimensions.
|
||||
|
||||
Analyses:
|
||||
1. Sentiment disagreement — the core positive-vs-negative split
|
||||
2. Catalyst disagreement — same catalyst type with opposing sentiment
|
||||
|
||||
Returns a ContradictionResult with an overall score and per-dimension
|
||||
disagreement details.
|
||||
"""
|
||||
details: list[DisagreementDetail] = []
|
||||
|
||||
sentiment_detail = _detect_sentiment_disagreement(signals)
|
||||
if sentiment_detail is not None:
|
||||
details.append(sentiment_detail)
|
||||
|
||||
if catalyst_entries:
|
||||
catalyst_details = _detect_catalyst_disagreement(signals, catalyst_entries)
|
||||
details.extend(catalyst_details)
|
||||
|
||||
score = _compute_overall_score(signals)
|
||||
|
||||
return ContradictionResult(score=score, details=details)
|
||||
|
||||
|
||||
def _compute_overall_score(signals: list[WeightedSignal]) -> float:
|
||||
"""Minority/majority weight ratio — backward-compatible formula."""
|
||||
if not signals:
|
||||
return 0.0
|
||||
|
||||
pos_weight = 0.0
|
||||
neg_weight = 0.0
|
||||
for sig in signals:
|
||||
w = sig.weight.combined * sig.impact_score
|
||||
if sig.sentiment_value > 0:
|
||||
pos_weight += w
|
||||
elif sig.sentiment_value < 0:
|
||||
neg_weight += w
|
||||
|
||||
total = pos_weight + neg_weight
|
||||
if total == 0.0:
|
||||
return 0.0
|
||||
|
||||
minority = min(pos_weight, neg_weight)
|
||||
return round(minority / total, 4)
|
||||
|
||||
|
||||
def _detect_sentiment_disagreement(
|
||||
signals: list[WeightedSignal],
|
||||
) -> DisagreementDetail | None:
|
||||
"""Detect when both positive and negative sentiment signals exist."""
|
||||
pos_ids: list[str] = []
|
||||
neg_ids: list[str] = []
|
||||
pos_weight = 0.0
|
||||
neg_weight = 0.0
|
||||
|
||||
for sig in signals:
|
||||
w = sig.weight.combined * sig.impact_score
|
||||
if w <= 0:
|
||||
continue
|
||||
if sig.sentiment_value > 0:
|
||||
pos_ids.append(sig.document_id)
|
||||
pos_weight += w
|
||||
elif sig.sentiment_value < 0:
|
||||
neg_ids.append(sig.document_id)
|
||||
neg_weight += w
|
||||
|
||||
if not pos_ids or not neg_ids:
|
||||
return None
|
||||
|
||||
total = pos_weight + neg_weight
|
||||
minority_pct = min(pos_weight, neg_weight) / total if total > 0 else 0.0
|
||||
|
||||
return DisagreementDetail(
|
||||
dimension="sentiment",
|
||||
positive_doc_ids=pos_ids,
|
||||
negative_doc_ids=neg_ids,
|
||||
positive_weight=round(pos_weight, 4),
|
||||
negative_weight=round(neg_weight, 4),
|
||||
description=(
|
||||
f"Sentiment split: {len(pos_ids)} positive vs {len(neg_ids)} negative signals "
|
||||
f"(minority weight ratio {minority_pct:.0%})"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _detect_catalyst_disagreement(
|
||||
signals: list[WeightedSignal],
|
||||
catalyst_entries: list[CatalystEntry],
|
||||
) -> list[DisagreementDetail]:
|
||||
"""Detect when the same catalyst type has both positive and negative signals."""
|
||||
# Build lookup: document_id → (sentiment_value, combined_weight)
|
||||
sig_lookup: dict[str, tuple[float, float]] = {}
|
||||
for sig in signals:
|
||||
w = sig.weight.combined * sig.impact_score
|
||||
if w > 0:
|
||||
sig_lookup[sig.document_id] = (sig.sentiment_value, w)
|
||||
|
||||
# Group by catalyst type
|
||||
from collections import defaultdict
|
||||
catalyst_groups: dict[str, list[tuple[str, float, float]]] = defaultdict(list)
|
||||
for entry in catalyst_entries:
|
||||
if entry.document_id in sig_lookup:
|
||||
sent_val, weight = sig_lookup[entry.document_id]
|
||||
if sent_val != 0.0:
|
||||
catalyst_groups[entry.catalyst_type].append(
|
||||
(entry.document_id, sent_val, weight)
|
||||
)
|
||||
|
||||
details: list[DisagreementDetail] = []
|
||||
for catalyst, entries in catalyst_groups.items():
|
||||
pos_ids = [doc_id for doc_id, sv, _ in entries if sv > 0]
|
||||
neg_ids = [doc_id for doc_id, sv, _ in entries if sv < 0]
|
||||
if not pos_ids or not neg_ids:
|
||||
continue
|
||||
|
||||
pos_w = sum(w for _, sv, w in entries if sv > 0)
|
||||
neg_w = sum(w for _, sv, w in entries if sv < 0)
|
||||
|
||||
details.append(DisagreementDetail(
|
||||
dimension=f"catalyst:{catalyst}",
|
||||
positive_doc_ids=pos_ids,
|
||||
negative_doc_ids=neg_ids,
|
||||
positive_weight=round(pos_w, 4),
|
||||
negative_weight=round(neg_w, 4),
|
||||
description=(
|
||||
f"Catalyst '{catalyst}' has {len(pos_ids)} positive and "
|
||||
f"{len(neg_ids)} negative signals"
|
||||
),
|
||||
))
|
||||
|
||||
return details
|
||||
@@ -0,0 +1,141 @@
|
||||
"""Evidence ranking for supporting and opposing documents.
|
||||
|
||||
Ranks document signals by a composite score that considers multiple
|
||||
factors beyond raw weight, producing explainable evidence lists for
|
||||
trend summaries.
|
||||
|
||||
Requirements: 6.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from services.aggregation.scoring import WeightedSignal
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EvidenceRankConfig:
|
||||
"""Weights for the composite evidence ranking score."""
|
||||
|
||||
# How much the combined signal weight matters (recency * credibility * novelty * market)
|
||||
weight_factor: float = 0.40
|
||||
# How much the document's impact score matters
|
||||
impact_factor: float = 0.30
|
||||
# How much recency alone matters (favours fresh evidence in the ranking)
|
||||
recency_factor: float = 0.20
|
||||
# How much extraction confidence matters
|
||||
confidence_factor: float = 0.10
|
||||
# Maximum evidence refs per side (supporting / opposing)
|
||||
max_refs: int = 10
|
||||
|
||||
|
||||
DEFAULT_RANK_CONFIG = EvidenceRankConfig()
|
||||
|
||||
|
||||
@dataclass
|
||||
class RankedEvidence:
|
||||
"""A document with its composite ranking score and breakdown."""
|
||||
|
||||
document_id: str
|
||||
rank_score: float
|
||||
weight_component: float
|
||||
impact_component: float
|
||||
recency_component: float
|
||||
confidence_component: float
|
||||
sentiment_value: float # +1 / -1 / 0
|
||||
|
||||
|
||||
def compute_evidence_rank(
|
||||
signal: WeightedSignal,
|
||||
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
|
||||
) -> RankedEvidence:
|
||||
"""Compute a composite ranking score for a single signal.
|
||||
|
||||
The score blends:
|
||||
- combined signal weight (captures recency decay, credibility, novelty, market ctx)
|
||||
- raw impact score
|
||||
- recency weight alone (extra boost for freshness in the ranking)
|
||||
- extraction confidence (via the credibility component of the weight)
|
||||
|
||||
All components are in [0, 1] so the composite is bounded by the sum
|
||||
of the factor weights.
|
||||
"""
|
||||
w = signal.weight
|
||||
|
||||
weight_component = w.combined * config.weight_factor
|
||||
impact_component = signal.impact_score * config.impact_factor
|
||||
recency_component = w.recency * config.recency_factor
|
||||
confidence_component = w.credibility * config.confidence_factor
|
||||
|
||||
rank_score = weight_component + impact_component + recency_component + confidence_component
|
||||
|
||||
return RankedEvidence(
|
||||
document_id=signal.document_id,
|
||||
rank_score=round(rank_score, 6),
|
||||
weight_component=round(weight_component, 6),
|
||||
impact_component=round(impact_component, 6),
|
||||
recency_component=round(recency_component, 6),
|
||||
confidence_component=round(confidence_component, 6),
|
||||
sentiment_value=signal.sentiment_value,
|
||||
)
|
||||
|
||||
|
||||
def rank_evidence(
|
||||
signals: list[WeightedSignal],
|
||||
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
|
||||
) -> tuple[list[str], list[str]]:
|
||||
"""Rank signals into top supporting and opposing document ID lists.
|
||||
|
||||
Supporting = positive sentiment, Opposing = negative sentiment.
|
||||
Neutral/mixed signals are excluded.
|
||||
|
||||
Returns (supporting_ids, opposing_ids) each capped at config.max_refs.
|
||||
"""
|
||||
supporting: list[RankedEvidence] = []
|
||||
opposing: list[RankedEvidence] = []
|
||||
|
||||
for sig in signals:
|
||||
if sig.sentiment_value == 0.0:
|
||||
continue
|
||||
ranked = compute_evidence_rank(sig, config)
|
||||
if sig.sentiment_value > 0:
|
||||
supporting.append(ranked)
|
||||
else:
|
||||
opposing.append(ranked)
|
||||
|
||||
supporting.sort(key=lambda r: r.rank_score, reverse=True)
|
||||
opposing.sort(key=lambda r: r.rank_score, reverse=True)
|
||||
|
||||
return (
|
||||
[r.document_id for r in supporting[: config.max_refs]],
|
||||
[r.document_id for r in opposing[: config.max_refs]],
|
||||
)
|
||||
|
||||
|
||||
def rank_evidence_detailed(
|
||||
signals: list[WeightedSignal],
|
||||
config: EvidenceRankConfig = DEFAULT_RANK_CONFIG,
|
||||
) -> tuple[list[RankedEvidence], list[RankedEvidence]]:
|
||||
"""Like rank_evidence but returns full RankedEvidence objects.
|
||||
|
||||
Useful when callers need the score breakdown for explainability.
|
||||
"""
|
||||
supporting: list[RankedEvidence] = []
|
||||
opposing: list[RankedEvidence] = []
|
||||
|
||||
for sig in signals:
|
||||
if sig.sentiment_value == 0.0:
|
||||
continue
|
||||
ranked = compute_evidence_rank(sig, config)
|
||||
if sig.sentiment_value > 0:
|
||||
supporting.append(ranked)
|
||||
else:
|
||||
opposing.append(ranked)
|
||||
|
||||
supporting.sort(key=lambda r: r.rank_score, reverse=True)
|
||||
opposing.sort(key=lambda r: r.rank_score, reverse=True)
|
||||
|
||||
return (
|
||||
supporting[: config.max_refs],
|
||||
opposing[: config.max_refs],
|
||||
)
|
||||
@@ -0,0 +1,57 @@
|
||||
"""Aggregation worker entrypoint - polls Redis for aggregation jobs."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.aggregation.worker import aggregate_company
|
||||
from services.shared.config import load_config
|
||||
from services.shared.logging import setup_logging
|
||||
from services.shared.redis_keys import QUEUE_AGGREGATION, queue_key
|
||||
|
||||
logger = logging.getLogger("aggregation_main")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
config = load_config()
|
||||
setup_logging("aggregation", level=config.log_level, json_output=config.json_logs)
|
||||
|
||||
pool = await asyncpg.create_pool(dsn=config.postgres.dsn, min_size=2, max_size=8)
|
||||
|
||||
import redis.asyncio as aioredis
|
||||
|
||||
redis_client = aioredis.from_url(config.redis.url)
|
||||
queue = queue_key(QUEUE_AGGREGATION)
|
||||
logger.info("Aggregation worker started, polling %s", queue)
|
||||
|
||||
try:
|
||||
while True:
|
||||
raw = await redis_client.lpop(queue)
|
||||
if raw is None:
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
payload = raw
|
||||
job = json.loads(payload)
|
||||
ticker = job.get("ticker", "")
|
||||
|
||||
logger.info("Processing aggregation job for %s", ticker)
|
||||
|
||||
try:
|
||||
summaries = await aggregate_company(pool, ticker)
|
||||
logger.info(
|
||||
"Aggregation complete for %s: %d windows",
|
||||
ticker, len(summaries),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Aggregation failed for %s", ticker)
|
||||
finally:
|
||||
await pool.close()
|
||||
await redis_client.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,150 @@
|
||||
"""Market context feature computation for aggregation windows.
|
||||
|
||||
Fetches recent market snapshots from PostgreSQL and computes context
|
||||
features (price change, volume trend, volatility) that enrich trend
|
||||
summaries and modulate signal weighting.
|
||||
|
||||
Requirements: 6.1, 6.2
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.shared.schemas import MarketContext, TrendWindow
|
||||
|
||||
# Map TrendWindow values to lookback durations in days.
|
||||
WINDOW_LOOKBACK_DAYS: dict[str, int] = {
|
||||
TrendWindow.INTRADAY.value: 1,
|
||||
TrendWindow.ONE_DAY.value: 2,
|
||||
TrendWindow.SEVEN_DAY.value: 8,
|
||||
TrendWindow.THIRTY_DAY.value: 35,
|
||||
TrendWindow.NINETY_DAY.value: 95,
|
||||
}
|
||||
|
||||
|
||||
async def fetch_market_context(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
window: str,
|
||||
reference_time: datetime | None = None,
|
||||
) -> MarketContext:
|
||||
"""Build a MarketContext for *ticker* over the given trend *window*.
|
||||
|
||||
Queries the ``market_snapshots`` table for recent bars and computes:
|
||||
- price_change_pct: (last_close - first_close) / first_close
|
||||
- avg_volume: mean volume across bars
|
||||
- volume_change_pct: second-half avg volume vs first-half avg volume
|
||||
- volatility: std-dev of close prices
|
||||
- latest_close / latest_bar_at
|
||||
|
||||
Returns a MarketContext with ``bars_available == 0`` when no data exists.
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
lookback_days = WINDOW_LOOKBACK_DAYS.get(window, 8)
|
||||
start = reference_time - timedelta(days=lookback_days)
|
||||
|
||||
rows = await pool.fetch(
|
||||
"""
|
||||
SELECT data, captured_at
|
||||
FROM market_snapshots
|
||||
WHERE ticker = $1
|
||||
AND captured_at >= $2
|
||||
AND captured_at <= $3
|
||||
ORDER BY captured_at ASC
|
||||
""",
|
||||
ticker,
|
||||
start,
|
||||
reference_time,
|
||||
)
|
||||
|
||||
if not rows:
|
||||
return MarketContext(ticker=ticker)
|
||||
|
||||
bars = _extract_bars(rows)
|
||||
if not bars:
|
||||
return MarketContext(ticker=ticker)
|
||||
|
||||
return _compute_context(ticker, bars)
|
||||
|
||||
|
||||
def _extract_bars(rows: list[Any]) -> list[dict[str, Any]]:
|
||||
"""Extract OHLCV bar dicts from market_snapshot rows.
|
||||
|
||||
The ``data`` column is JSONB. Polygon prev-day bars store fields like
|
||||
``o``, ``h``, ``l``, ``c``, ``v``, ``t``. We normalise to a common
|
||||
dict with ``close``, ``volume``, ``captured_at``.
|
||||
"""
|
||||
bars: list[dict[str, Any]] = []
|
||||
for row in rows:
|
||||
data = row["data"]
|
||||
if isinstance(data, str):
|
||||
import json
|
||||
data = json.loads(data)
|
||||
|
||||
# Polygon-style single bar or list of bars
|
||||
items = data if isinstance(data, list) else [data]
|
||||
for item in items:
|
||||
close = item.get("c") or item.get("close")
|
||||
volume = item.get("v") or item.get("volume")
|
||||
if close is not None:
|
||||
bars.append({
|
||||
"close": float(close),
|
||||
"volume": float(volume) if volume is not None else 0.0,
|
||||
"captured_at": row["captured_at"],
|
||||
})
|
||||
return bars
|
||||
|
||||
|
||||
def _compute_context(ticker: str, bars: list[dict[str, Any]]) -> MarketContext:
|
||||
"""Derive market context features from a sorted list of bar dicts."""
|
||||
closes = [b["close"] for b in bars]
|
||||
volumes = [b["volume"] for b in bars]
|
||||
|
||||
first_close = closes[0]
|
||||
last_close = closes[-1]
|
||||
|
||||
price_change_pct = (
|
||||
((last_close - first_close) / first_close * 100.0)
|
||||
if first_close != 0
|
||||
else 0.0
|
||||
)
|
||||
|
||||
avg_volume = sum(volumes) / len(volumes) if volumes else 0.0
|
||||
|
||||
# Volume trend: compare second half to first half
|
||||
mid = len(volumes) // 2
|
||||
if mid > 0:
|
||||
first_half_avg = sum(volumes[:mid]) / mid
|
||||
second_half_avg = sum(volumes[mid:]) / len(volumes[mid:])
|
||||
volume_change_pct = (
|
||||
((second_half_avg - first_half_avg) / first_half_avg * 100.0)
|
||||
if first_half_avg > 0
|
||||
else 0.0
|
||||
)
|
||||
else:
|
||||
volume_change_pct = 0.0
|
||||
|
||||
# Volatility: std dev of closes
|
||||
if len(closes) > 1:
|
||||
mean_close = sum(closes) / len(closes)
|
||||
variance = sum((c - mean_close) ** 2 for c in closes) / len(closes)
|
||||
volatility = math.sqrt(variance)
|
||||
else:
|
||||
volatility = 0.0
|
||||
|
||||
return MarketContext(
|
||||
ticker=ticker,
|
||||
price_change_pct=round(price_change_pct, 4),
|
||||
avg_volume=round(avg_volume, 2),
|
||||
volume_change_pct=round(volume_change_pct, 4),
|
||||
volatility=round(volatility, 6),
|
||||
latest_close=last_close,
|
||||
latest_bar_at=bars[-1]["captured_at"],
|
||||
bars_available=len(bars),
|
||||
)
|
||||
@@ -0,0 +1,439 @@
|
||||
"""Sector and market-level rollup aggregation.
|
||||
|
||||
Aggregates company-level trend summaries into sector and market-level
|
||||
summaries, enabling top-down views of sentiment and risk across the
|
||||
portfolio.
|
||||
|
||||
Requirements: 6.3, 6.4, 6.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.shared.schemas import (
|
||||
DisagreementDetail,
|
||||
TrendDirection,
|
||||
TrendSummary,
|
||||
TrendWindow,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompanyTrendRow:
|
||||
"""A company-level trend summary fetched from the DB for rollup."""
|
||||
|
||||
entity_id: str # ticker
|
||||
sector: str
|
||||
window: str
|
||||
trend_direction: str
|
||||
trend_strength: float
|
||||
confidence: float
|
||||
contradiction_score: float
|
||||
dominant_catalysts: list[str]
|
||||
material_risks: list[str]
|
||||
top_supporting_evidence: list[str]
|
||||
top_opposing_evidence: list[str]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch latest company trends for a given window
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_LATEST_COMPANY_TRENDS_QUERY = """
|
||||
SELECT DISTINCT ON (tw.entity_id)
|
||||
tw.entity_id,
|
||||
c.sector,
|
||||
tw.window,
|
||||
tw.trend_direction,
|
||||
tw.trend_strength,
|
||||
tw.confidence,
|
||||
tw.contradiction_score,
|
||||
tw.dominant_catalysts,
|
||||
tw.material_risks,
|
||||
tw.top_supporting_evidence,
|
||||
tw.top_opposing_evidence
|
||||
FROM trend_windows tw
|
||||
JOIN companies c ON c.ticker = tw.entity_id AND c.active = TRUE
|
||||
WHERE tw.entity_type = 'company'
|
||||
AND tw.window = $1
|
||||
AND tw.generated_at >= $2
|
||||
ORDER BY tw.entity_id, tw.generated_at DESC
|
||||
"""
|
||||
|
||||
|
||||
def _parse_jsonb_list(val: object) -> list[str]:
|
||||
"""Safely parse a JSONB column that should be a list of strings."""
|
||||
if isinstance(val, list):
|
||||
return [str(v) for v in val]
|
||||
if isinstance(val, str):
|
||||
parsed = json.loads(val)
|
||||
if isinstance(parsed, list):
|
||||
return [str(v) for v in parsed]
|
||||
return []
|
||||
|
||||
|
||||
def _parse_company_trend_row(row: object) -> CompanyTrendRow:
|
||||
"""Convert an asyncpg Record to a CompanyTrendRow."""
|
||||
# asyncpg Records support dict() but aren't typed; use getattr-style access
|
||||
get = getattr(row, "__getitem__", None)
|
||||
if get is None:
|
||||
raise TypeError(f"Expected a mapping-like row, got {type(row)}")
|
||||
|
||||
def _str(key: str, default: str = "") -> str:
|
||||
val = get(key)
|
||||
return str(val) if val is not None else default
|
||||
|
||||
def _float(key: str) -> float:
|
||||
val = get(key)
|
||||
return float(val) if val is not None else 0.0
|
||||
|
||||
return CompanyTrendRow(
|
||||
entity_id=_str("entity_id"),
|
||||
sector=_str("sector", "Unknown") or "Unknown",
|
||||
window=_str("window"),
|
||||
trend_direction=_str("trend_direction"),
|
||||
trend_strength=_float("trend_strength"),
|
||||
confidence=_float("confidence"),
|
||||
contradiction_score=_float("contradiction_score"),
|
||||
dominant_catalysts=_parse_jsonb_list(get("dominant_catalysts")),
|
||||
material_risks=_parse_jsonb_list(get("material_risks")),
|
||||
top_supporting_evidence=_parse_jsonb_list(get("top_supporting_evidence")),
|
||||
top_opposing_evidence=_parse_jsonb_list(get("top_opposing_evidence")),
|
||||
)
|
||||
|
||||
|
||||
async def fetch_latest_company_trends(
|
||||
pool: asyncpg.Pool,
|
||||
window: str,
|
||||
since: datetime,
|
||||
) -> list[CompanyTrendRow]:
|
||||
"""Fetch the most recent company-level trend for each ticker in a window."""
|
||||
rows = await pool.fetch(_LATEST_COMPANY_TRENDS_QUERY, window, since)
|
||||
return [_parse_company_trend_row(r) for r in rows]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure rollup logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Direction mapping for numeric aggregation
|
||||
_DIRECTION_VALUES = {
|
||||
TrendDirection.BULLISH.value: 1.0,
|
||||
TrendDirection.BEARISH.value: -1.0,
|
||||
TrendDirection.MIXED.value: 0.0,
|
||||
TrendDirection.NEUTRAL.value: 0.0,
|
||||
}
|
||||
|
||||
BULLISH_THRESHOLD = 0.15
|
||||
BEARISH_THRESHOLD = -0.15
|
||||
|
||||
|
||||
def rollup_trends(
|
||||
trends: list[CompanyTrendRow],
|
||||
entity_type: str,
|
||||
entity_id: str,
|
||||
window: str,
|
||||
reference_time: datetime,
|
||||
) -> TrendSummary:
|
||||
"""Aggregate a list of company-level trends into a single rollup summary.
|
||||
|
||||
Each company trend is weighted by its confidence to produce a
|
||||
confidence-weighted average of direction, strength, and contradiction.
|
||||
"""
|
||||
if not trends:
|
||||
return TrendSummary(
|
||||
entity_type=entity_type,
|
||||
entity_id=entity_id,
|
||||
window=TrendWindow(window),
|
||||
trend_direction=TrendDirection.NEUTRAL,
|
||||
trend_strength=0.0,
|
||||
confidence=0.0,
|
||||
generated_at=reference_time,
|
||||
)
|
||||
|
||||
total_weight = 0.0
|
||||
weighted_direction = 0.0
|
||||
weighted_strength = 0.0
|
||||
weighted_contradiction = 0.0
|
||||
catalyst_weights: dict[str, float] = {}
|
||||
risk_set: dict[str, float] = {}
|
||||
all_supporting: list[str] = []
|
||||
all_opposing: list[str] = []
|
||||
|
||||
for t in trends:
|
||||
w = t.confidence
|
||||
total_weight += w
|
||||
dir_val = _DIRECTION_VALUES.get(t.trend_direction, 0.0)
|
||||
weighted_direction += w * dir_val
|
||||
weighted_strength += w * t.trend_strength
|
||||
weighted_contradiction += w * t.contradiction_score
|
||||
|
||||
for cat in t.dominant_catalysts:
|
||||
catalyst_weights[cat] = catalyst_weights.get(cat, 0.0) + w
|
||||
|
||||
for risk in t.material_risks:
|
||||
norm = risk.strip().lower()
|
||||
if norm not in risk_set:
|
||||
risk_set[norm] = w
|
||||
else:
|
||||
risk_set[norm] = max(risk_set[norm], w)
|
||||
|
||||
all_supporting.extend(t.top_supporting_evidence)
|
||||
all_opposing.extend(t.top_opposing_evidence)
|
||||
|
||||
if total_weight == 0.0:
|
||||
return TrendSummary(
|
||||
entity_type=entity_type,
|
||||
entity_id=entity_id,
|
||||
window=TrendWindow(window),
|
||||
trend_direction=TrendDirection.NEUTRAL,
|
||||
trend_strength=0.0,
|
||||
confidence=0.0,
|
||||
generated_at=reference_time,
|
||||
)
|
||||
|
||||
avg_direction = weighted_direction / total_weight
|
||||
avg_strength = weighted_strength / total_weight
|
||||
avg_contradiction = weighted_contradiction / total_weight
|
||||
avg_confidence = total_weight / len(trends)
|
||||
|
||||
# Derive direction
|
||||
direction = _derive_rollup_direction(avg_direction, avg_contradiction)
|
||||
|
||||
# Top catalysts
|
||||
sorted_catalysts = sorted(catalyst_weights.items(), key=lambda x: x[1], reverse=True)
|
||||
catalysts = [c for c, _ in sorted_catalysts[:5]]
|
||||
|
||||
# Top risks (deduplicated, by weight)
|
||||
sorted_risks = sorted(risk_set.items(), key=lambda x: x[1], reverse=True)
|
||||
risks = [r for r, _ in sorted_risks[:5]]
|
||||
|
||||
# Disagreement details
|
||||
disagreement = _build_rollup_disagreement(trends, entity_id)
|
||||
|
||||
return TrendSummary(
|
||||
entity_type=entity_type,
|
||||
entity_id=entity_id,
|
||||
window=TrendWindow(window),
|
||||
trend_direction=direction,
|
||||
trend_strength=round(min(abs(avg_strength), 1.0), 4),
|
||||
confidence=round(max(0.0, min(avg_confidence, 1.0)), 4),
|
||||
top_supporting_evidence=list(dict.fromkeys(all_supporting))[:10],
|
||||
top_opposing_evidence=list(dict.fromkeys(all_opposing))[:10],
|
||||
dominant_catalysts=catalysts,
|
||||
material_risks=risks,
|
||||
contradiction_score=round(max(0.0, min(avg_contradiction, 1.0)), 4),
|
||||
disagreement_details=disagreement,
|
||||
generated_at=reference_time,
|
||||
)
|
||||
|
||||
|
||||
def _derive_rollup_direction(
|
||||
avg_direction: float,
|
||||
avg_contradiction: float,
|
||||
) -> TrendDirection:
|
||||
"""Map averaged direction value to a TrendDirection."""
|
||||
if avg_contradiction > 0.10 and abs(avg_direction) < 0.3:
|
||||
return TrendDirection.MIXED
|
||||
if avg_direction >= BULLISH_THRESHOLD:
|
||||
return TrendDirection.BULLISH
|
||||
if avg_direction <= BEARISH_THRESHOLD:
|
||||
return TrendDirection.BEARISH
|
||||
return TrendDirection.NEUTRAL
|
||||
|
||||
|
||||
def _build_rollup_disagreement(
|
||||
trends: list[CompanyTrendRow],
|
||||
entity_id: str,
|
||||
) -> list[DisagreementDetail]:
|
||||
"""Build disagreement details showing which companies are bullish vs bearish."""
|
||||
bullish_ids: list[str] = []
|
||||
bearish_ids: list[str] = []
|
||||
bullish_weight = 0.0
|
||||
bearish_weight = 0.0
|
||||
|
||||
for t in trends:
|
||||
if t.trend_direction == TrendDirection.BULLISH.value:
|
||||
bullish_ids.append(t.entity_id)
|
||||
bullish_weight += t.confidence
|
||||
elif t.trend_direction == TrendDirection.BEARISH.value:
|
||||
bearish_ids.append(t.entity_id)
|
||||
bearish_weight += t.confidence
|
||||
|
||||
if not bullish_ids or not bearish_ids:
|
||||
return []
|
||||
|
||||
return [
|
||||
DisagreementDetail(
|
||||
dimension="company_direction",
|
||||
positive_doc_ids=bullish_ids,
|
||||
negative_doc_ids=bearish_ids,
|
||||
positive_weight=round(bullish_weight, 4),
|
||||
negative_weight=round(bearish_weight, 4),
|
||||
description=(
|
||||
f"{entity_id}: {len(bullish_ids)} bullish vs "
|
||||
f"{len(bearish_ids)} bearish companies"
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Persist rollup (reuses the same trend_windows table)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_UPSERT_TREND = """
|
||||
INSERT INTO trend_windows (
|
||||
entity_type, entity_id, window, trend_direction, trend_strength,
|
||||
confidence, top_supporting_evidence, top_opposing_evidence,
|
||||
dominant_catalysts, material_risks, contradiction_score,
|
||||
disagreement_details, market_context, generated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5,
|
||||
$6, $7::jsonb, $8::jsonb,
|
||||
$9::jsonb, $10::jsonb, $11,
|
||||
$12::jsonb, $13::jsonb, $14
|
||||
)
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
|
||||
async def persist_rollup(
|
||||
pool: asyncpg.Pool,
|
||||
summary: TrendSummary,
|
||||
) -> str:
|
||||
"""Insert a rollup trend summary and return its UUID."""
|
||||
row = await pool.fetchrow(
|
||||
_UPSERT_TREND,
|
||||
summary.entity_type,
|
||||
summary.entity_id,
|
||||
summary.window.value,
|
||||
summary.trend_direction.value,
|
||||
summary.trend_strength,
|
||||
summary.confidence,
|
||||
json.dumps(summary.top_supporting_evidence),
|
||||
json.dumps(summary.top_opposing_evidence),
|
||||
json.dumps(summary.dominant_catalysts),
|
||||
json.dumps(summary.material_risks),
|
||||
summary.contradiction_score,
|
||||
json.dumps([d.model_dump() for d in summary.disagreement_details]),
|
||||
json.dumps({}),
|
||||
summary.generated_at,
|
||||
)
|
||||
return str(row["id"]) # type: ignore[index]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# High-level rollup entry points
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def aggregate_sector(
|
||||
pool: asyncpg.Pool,
|
||||
sector: str,
|
||||
window: str,
|
||||
reference_time: datetime | None = None,
|
||||
since: datetime | None = None,
|
||||
) -> TrendSummary:
|
||||
"""Compute and persist a sector-level rollup for one window.
|
||||
|
||||
Fetches the latest company trends, filters to the given sector,
|
||||
and rolls them up into a single sector summary.
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
if since is None:
|
||||
since = reference_time - _window_lookback(window)
|
||||
|
||||
all_trends = await fetch_latest_company_trends(pool, window, since)
|
||||
sector_trends = [t for t in all_trends if t.sector == sector]
|
||||
|
||||
summary = rollup_trends(sector_trends, "sector", sector, window, reference_time)
|
||||
|
||||
if sector_trends:
|
||||
rollup_id = await persist_rollup(pool, summary)
|
||||
logger.info(
|
||||
"Persisted sector rollup %s for %s/%s: direction=%s strength=%.3f companies=%d",
|
||||
rollup_id, sector, window, summary.trend_direction.value,
|
||||
summary.trend_strength, len(sector_trends),
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
async def aggregate_market(
|
||||
pool: asyncpg.Pool,
|
||||
window: str,
|
||||
reference_time: datetime | None = None,
|
||||
since: datetime | None = None,
|
||||
) -> TrendSummary:
|
||||
"""Compute and persist a market-wide rollup for one window.
|
||||
|
||||
Aggregates all company trends regardless of sector.
|
||||
"""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
if since is None:
|
||||
since = reference_time - _window_lookback(window)
|
||||
|
||||
all_trends = await fetch_latest_company_trends(pool, window, since)
|
||||
|
||||
summary = rollup_trends(all_trends, "market", "all", window, reference_time)
|
||||
|
||||
if all_trends:
|
||||
rollup_id = await persist_rollup(pool, summary)
|
||||
logger.info(
|
||||
"Persisted market rollup %s for %s: direction=%s strength=%.3f companies=%d",
|
||||
rollup_id, window, summary.trend_direction.value,
|
||||
summary.trend_strength, len(all_trends),
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
async def aggregate_all_sectors(
|
||||
pool: asyncpg.Pool,
|
||||
window: str,
|
||||
reference_time: datetime | None = None,
|
||||
since: datetime | None = None,
|
||||
) -> list[TrendSummary]:
|
||||
"""Compute sector rollups for every sector that has company trends."""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
if since is None:
|
||||
since = reference_time - _window_lookback(window)
|
||||
|
||||
all_trends = await fetch_latest_company_trends(pool, window, since)
|
||||
|
||||
# Group by sector
|
||||
sectors: dict[str, list[CompanyTrendRow]] = {}
|
||||
for t in all_trends:
|
||||
sectors.setdefault(t.sector, []).append(t)
|
||||
|
||||
summaries: list[TrendSummary] = []
|
||||
for sector, trends in sectors.items():
|
||||
summary = rollup_trends(trends, "sector", sector, window, reference_time)
|
||||
if trends:
|
||||
_id = await persist_rollup(pool, summary)
|
||||
summaries.append(summary)
|
||||
|
||||
return summaries
|
||||
|
||||
|
||||
def _window_lookback(window: str) -> timedelta:
|
||||
"""Return a reasonable lookback for finding recent company trends."""
|
||||
mapping = {
|
||||
TrendWindow.INTRADAY.value: timedelta(hours=24),
|
||||
TrendWindow.ONE_DAY.value: timedelta(days=2),
|
||||
TrendWindow.SEVEN_DAY.value: timedelta(days=8),
|
||||
TrendWindow.THIRTY_DAY.value: timedelta(days=35),
|
||||
TrendWindow.NINETY_DAY.value: timedelta(days=95),
|
||||
}
|
||||
return mapping.get(window, timedelta(days=8))
|
||||
@@ -0,0 +1,285 @@
|
||||
"""Recency decay, source credibility weighting, and market context
|
||||
integration for aggregation.
|
||||
|
||||
Provides scoring functions used by the aggregation engine to weight
|
||||
document intelligence signals when computing trend summaries.
|
||||
|
||||
Requirements: 6.1, 6.2, 6.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from services.shared.schemas import MarketContext
|
||||
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScoringConfig:
|
||||
"""Tunable parameters for signal scoring."""
|
||||
|
||||
# Recency decay: exponential half-life in hours per window.
|
||||
# After one half-life, a document's recency weight drops to 0.5.
|
||||
half_life_hours: dict[str, float] = field(default_factory=lambda: {
|
||||
"intraday": 2.0,
|
||||
"1d": 12.0,
|
||||
"7d": 72.0,
|
||||
"30d": 240.0,
|
||||
"90d": 720.0,
|
||||
})
|
||||
|
||||
# Minimum recency weight — prevents very old docs from being zeroed out
|
||||
# entirely so they can still contribute trace-level signal.
|
||||
min_recency_weight: float = 0.01
|
||||
|
||||
# Source credibility bounds — credibility scores outside this range
|
||||
# are clamped before weighting.
|
||||
credibility_floor: float = 0.1
|
||||
credibility_ceiling: float = 1.0
|
||||
|
||||
# Exponent applied to credibility score. >1 penalises low-credibility
|
||||
# sources more aggressively; <1 flattens the curve.
|
||||
credibility_exponent: float = 1.0
|
||||
|
||||
# Novelty bonus: multiplier range applied on top of base weight.
|
||||
# A novelty_score of 1.0 gets the full bonus; 0.0 gets none.
|
||||
novelty_bonus_max: float = 0.25
|
||||
|
||||
# Confidence floor — documents below this extraction confidence
|
||||
# receive zero weight (they are too unreliable to aggregate).
|
||||
confidence_floor: float = 0.2
|
||||
|
||||
# Market context modulation ---
|
||||
# When volatility exceeds this threshold (in price units), recency
|
||||
# signals are amplified because fast-moving markets make fresh data
|
||||
# more important.
|
||||
volatility_recency_boost_threshold: float = 1.0
|
||||
volatility_recency_boost_max: float = 0.30 # max extra multiplier
|
||||
|
||||
# When volume surges above this % change, signals get a small boost
|
||||
# because high-volume moves carry more conviction.
|
||||
volume_surge_threshold_pct: float = 50.0
|
||||
volume_surge_boost: float = 0.15
|
||||
|
||||
|
||||
# Singleton default config
|
||||
DEFAULT_CONFIG = ScoringConfig()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Recency decay
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def recency_weight(
|
||||
published_at: datetime,
|
||||
reference_time: datetime,
|
||||
window: str,
|
||||
config: ScoringConfig = DEFAULT_CONFIG,
|
||||
) -> float:
|
||||
"""Compute an exponential recency decay weight for a document.
|
||||
|
||||
Uses the formula: w = 2^(-age_hours / half_life)
|
||||
|
||||
Args:
|
||||
published_at: When the document was published (tz-aware).
|
||||
reference_time: The "now" anchor for the aggregation window (tz-aware).
|
||||
window: One of the TrendWindow values (e.g. "7d").
|
||||
config: Scoring parameters.
|
||||
|
||||
Returns:
|
||||
A weight in [config.min_recency_weight, 1.0].
|
||||
"""
|
||||
# Ensure both are tz-aware; treat naive as UTC.
|
||||
if published_at.tzinfo is None:
|
||||
published_at = published_at.replace(tzinfo=timezone.utc)
|
||||
if reference_time.tzinfo is None:
|
||||
reference_time = reference_time.replace(tzinfo=timezone.utc)
|
||||
|
||||
age_seconds = (reference_time - published_at).total_seconds()
|
||||
if age_seconds <= 0:
|
||||
return 1.0
|
||||
|
||||
age_hours = age_seconds / 3600.0
|
||||
half_life = config.half_life_hours.get(window, 72.0)
|
||||
|
||||
weight = math.pow(2.0, -age_hours / half_life)
|
||||
return max(weight, config.min_recency_weight)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source credibility weighting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def credibility_weight(
|
||||
source_credibility: float,
|
||||
config: ScoringConfig = DEFAULT_CONFIG,
|
||||
) -> float:
|
||||
"""Compute a weight from a source's credibility score.
|
||||
|
||||
The raw credibility (0-1) is clamped to [floor, ceiling] then raised
|
||||
to ``credibility_exponent``.
|
||||
|
||||
Args:
|
||||
source_credibility: The credibility score from the source or
|
||||
document intelligence record (0-1).
|
||||
config: Scoring parameters.
|
||||
|
||||
Returns:
|
||||
A weight in [floor^exp, ceiling^exp].
|
||||
"""
|
||||
clamped = max(config.credibility_floor, min(source_credibility, config.credibility_ceiling))
|
||||
return math.pow(clamped, config.credibility_exponent)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Market context adjustment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def market_context_multiplier(
|
||||
market_ctx: MarketContext | None,
|
||||
config: ScoringConfig = DEFAULT_CONFIG,
|
||||
) -> float:
|
||||
"""Compute a multiplicative adjustment from market context features.
|
||||
|
||||
Returns a value >= 1.0 that amplifies signal weights when market
|
||||
conditions suggest heightened importance (high volatility or volume
|
||||
surges). Returns 1.0 when no market context is available.
|
||||
"""
|
||||
if market_ctx is None or not market_ctx.has_data:
|
||||
return 1.0
|
||||
|
||||
boost = 0.0
|
||||
|
||||
# Volatility boost — more volatile markets make recent signals more valuable
|
||||
if market_ctx.volatility is not None and market_ctx.volatility > config.volatility_recency_boost_threshold:
|
||||
excess = market_ctx.volatility - config.volatility_recency_boost_threshold
|
||||
# Logarithmic scaling so extreme volatility doesn't blow up the weight
|
||||
boost += min(
|
||||
math.log1p(excess) * 0.15,
|
||||
config.volatility_recency_boost_max,
|
||||
)
|
||||
|
||||
# Volume surge boost
|
||||
if market_ctx.volume_change_pct is not None and market_ctx.volume_change_pct > config.volume_surge_threshold_pct:
|
||||
boost += config.volume_surge_boost
|
||||
|
||||
return 1.0 + boost
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Combined document signal weight
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class SignalWeight:
|
||||
"""Breakdown of a document's aggregation weight."""
|
||||
|
||||
recency: float
|
||||
credibility: float
|
||||
novelty_bonus: float
|
||||
confidence_gate: float # 0.0 or 1.0
|
||||
market_ctx_multiplier: float # >= 1.0
|
||||
combined: float
|
||||
|
||||
|
||||
def compute_signal_weight(
|
||||
published_at: datetime,
|
||||
reference_time: datetime,
|
||||
window: str,
|
||||
source_credibility: float,
|
||||
novelty_score: float = 0.5,
|
||||
extraction_confidence: float = 0.5,
|
||||
market_ctx: MarketContext | None = None,
|
||||
config: ScoringConfig = DEFAULT_CONFIG,
|
||||
) -> SignalWeight:
|
||||
"""Compute the combined aggregation weight for a single document signal.
|
||||
|
||||
The formula is:
|
||||
combined = confidence_gate * recency * credibility
|
||||
* (1 + novelty_bonus) * market_ctx_multiplier
|
||||
|
||||
where novelty_bonus = novelty_score * config.novelty_bonus_max
|
||||
and market_ctx_multiplier >= 1.0 based on volatility/volume features.
|
||||
|
||||
Documents with extraction_confidence below config.confidence_floor
|
||||
receive a combined weight of 0.0 (gated out).
|
||||
|
||||
Args:
|
||||
published_at: Document publication time.
|
||||
reference_time: Aggregation anchor time.
|
||||
window: Trend window identifier.
|
||||
source_credibility: Source credibility score (0-1).
|
||||
novelty_score: Document novelty score (0-1).
|
||||
extraction_confidence: Extraction confidence from the model (0-1).
|
||||
market_ctx: Optional market context features for the symbol.
|
||||
config: Scoring parameters.
|
||||
|
||||
Returns:
|
||||
A ``SignalWeight`` with the component breakdown and combined score.
|
||||
"""
|
||||
# Confidence gate
|
||||
gate = 1.0 if extraction_confidence >= config.confidence_floor else 0.0
|
||||
|
||||
rec = recency_weight(published_at, reference_time, window, config)
|
||||
cred = credibility_weight(source_credibility, config)
|
||||
bonus = novelty_score * config.novelty_bonus_max
|
||||
mkt_mult = market_context_multiplier(market_ctx, config)
|
||||
|
||||
combined = gate * rec * cred * (1.0 + bonus) * mkt_mult
|
||||
|
||||
return SignalWeight(
|
||||
recency=rec,
|
||||
credibility=cred,
|
||||
novelty_bonus=bonus,
|
||||
confidence_gate=gate,
|
||||
market_ctx_multiplier=mkt_mult,
|
||||
combined=combined,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Batch helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class WeightedSignal:
|
||||
"""A document intelligence reference paired with its computed weight."""
|
||||
|
||||
document_id: str
|
||||
weight: SignalWeight
|
||||
sentiment_value: float # numeric sentiment: +1 positive, -1 negative, 0 neutral/mixed
|
||||
impact_score: float
|
||||
|
||||
|
||||
def sentiment_to_numeric(sentiment: str) -> float:
|
||||
"""Map a sentiment label to a signed numeric value."""
|
||||
mapping = {
|
||||
"positive": 1.0,
|
||||
"negative": -1.0,
|
||||
"neutral": 0.0,
|
||||
"mixed": 0.0,
|
||||
}
|
||||
return mapping.get(sentiment.lower(), 0.0)
|
||||
|
||||
|
||||
def weighted_sentiment_average(signals: list[WeightedSignal]) -> float:
|
||||
"""Compute a weight-adjusted average sentiment across signals.
|
||||
|
||||
Returns a value in [-1, 1]. Returns 0.0 when total weight is zero.
|
||||
"""
|
||||
total_weight = 0.0
|
||||
weighted_sum = 0.0
|
||||
for sig in signals:
|
||||
w = sig.weight.combined * sig.impact_score
|
||||
weighted_sum += w * sig.sentiment_value
|
||||
total_weight += w
|
||||
if total_weight == 0.0:
|
||||
return 0.0
|
||||
return weighted_sum / total_weight
|
||||
@@ -1 +1,650 @@
|
||||
"""Aggregation worker - rolling trend summaries, contradiction detection, evidence ranking."""
|
||||
"""Aggregation worker - company-level rolling window trend summaries.
|
||||
|
||||
Queries document intelligence and market context for a given ticker,
|
||||
computes weighted signal scores, and produces TrendSummary objects
|
||||
persisted to the trend_windows table.
|
||||
|
||||
Requirements: 6.1, 6.2, 6.5
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.aggregation.contradiction import CatalystEntry, detect_contradictions
|
||||
from services.aggregation.evidence import (
|
||||
EvidenceRankConfig,
|
||||
RankedEvidence,
|
||||
rank_evidence as _rank_evidence_composite,
|
||||
rank_evidence_detailed,
|
||||
)
|
||||
from services.aggregation.market_context import fetch_market_context
|
||||
from services.aggregation.scoring import (
|
||||
ScoringConfig,
|
||||
WeightedSignal,
|
||||
compute_signal_weight,
|
||||
sentiment_to_numeric,
|
||||
weighted_sentiment_average,
|
||||
)
|
||||
from services.shared.schemas import TrendDirection, TrendSummary, TrendWindow
|
||||
from services.shared.metrics import (
|
||||
AGGREGATION_CONTRADICTION_SCORE,
|
||||
AGGREGATION_DURATION,
|
||||
AGGREGATION_SIGNALS_PROCESSED,
|
||||
AGGREGATION_WINDOWS_COMPUTED,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Map TrendWindow values to lookback durations.
|
||||
WINDOW_DURATIONS: dict[str, timedelta] = {
|
||||
TrendWindow.INTRADAY.value: timedelta(hours=12),
|
||||
TrendWindow.ONE_DAY.value: timedelta(days=1),
|
||||
TrendWindow.SEVEN_DAY.value: timedelta(days=7),
|
||||
TrendWindow.THIRTY_DAY.value: timedelta(days=30),
|
||||
TrendWindow.NINETY_DAY.value: timedelta(days=90),
|
||||
}
|
||||
|
||||
# How many evidence document IDs to keep in supporting/opposing lists.
|
||||
MAX_EVIDENCE_REFS = 10
|
||||
|
||||
|
||||
@dataclass
|
||||
class AggregationConfig:
|
||||
"""Controls which windows to compute and scoring parameters."""
|
||||
|
||||
windows: list[str] | None = None # None = all windows
|
||||
scoring: ScoringConfig | None = None
|
||||
max_evidence: int = MAX_EVIDENCE_REFS
|
||||
|
||||
def effective_windows(self) -> list[str]:
|
||||
if self.windows:
|
||||
return self.windows
|
||||
return [w.value for w in TrendWindow]
|
||||
|
||||
def effective_scoring(self) -> ScoringConfig:
|
||||
return self.scoring or ScoringConfig()
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch impact records for a ticker within a time window
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_IMPACT_QUERY = """
|
||||
SELECT
|
||||
di.document_id,
|
||||
di.confidence,
|
||||
di.novelty_score,
|
||||
di.source_credibility,
|
||||
dir.sentiment,
|
||||
dir.impact_score,
|
||||
dir.catalyst_type,
|
||||
dir.key_facts,
|
||||
dir.risks,
|
||||
d.published_at
|
||||
FROM document_impact_records dir
|
||||
JOIN document_intelligence di ON di.id = dir.intelligence_id
|
||||
JOIN documents d ON d.id = di.document_id
|
||||
WHERE dir.ticker = $1
|
||||
AND d.published_at >= $2
|
||||
AND d.published_at <= $3
|
||||
AND di.validation_status = 'valid'
|
||||
AND d.status != 'rejected'
|
||||
ORDER BY d.published_at DESC
|
||||
"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImpactRow:
|
||||
"""Parsed row from the impact query."""
|
||||
|
||||
document_id: str
|
||||
confidence: float
|
||||
novelty_score: float
|
||||
source_credibility: float
|
||||
sentiment: str
|
||||
impact_score: float
|
||||
catalyst_type: str
|
||||
key_facts: list[str]
|
||||
risks: list[str]
|
||||
published_at: datetime
|
||||
|
||||
|
||||
def _parse_impact_row(row: Any) -> ImpactRow:
|
||||
"""Convert an asyncpg Record to an ImpactRow."""
|
||||
key_facts = row["key_facts"]
|
||||
if isinstance(key_facts, str):
|
||||
key_facts = json.loads(key_facts)
|
||||
risks = row["risks"]
|
||||
if isinstance(risks, str):
|
||||
risks = json.loads(risks)
|
||||
|
||||
return ImpactRow(
|
||||
document_id=str(row["document_id"]),
|
||||
confidence=float(row["confidence"] or 0.5),
|
||||
novelty_score=float(row["novelty_score"] or 0.5),
|
||||
source_credibility=float(row["source_credibility"] or 0.5),
|
||||
sentiment=row["sentiment"] or "neutral",
|
||||
impact_score=float(row["impact_score"] or 0.0),
|
||||
catalyst_type=row["catalyst_type"] or "other",
|
||||
key_facts=key_facts if isinstance(key_facts, list) else [],
|
||||
risks=risks if isinstance(risks, list) else [],
|
||||
published_at=row["published_at"],
|
||||
)
|
||||
|
||||
|
||||
async def fetch_impact_records(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
window_start: datetime,
|
||||
window_end: datetime,
|
||||
) -> list[ImpactRow]:
|
||||
"""Fetch validated document impact records for a ticker in a time range."""
|
||||
rows = await pool.fetch(_IMPACT_QUERY, ticker, window_start, window_end)
|
||||
return [_parse_impact_row(r) for r in rows]
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build weighted signals from impact records
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_weighted_signals(
|
||||
impacts: list[ImpactRow],
|
||||
reference_time: datetime,
|
||||
window: str,
|
||||
market_ctx: Any | None = None,
|
||||
config: ScoringConfig | None = None,
|
||||
) -> list[WeightedSignal]:
|
||||
"""Convert impact records into WeightedSignal objects using the scoring module."""
|
||||
cfg = config or ScoringConfig()
|
||||
signals: list[WeightedSignal] = []
|
||||
for imp in impacts:
|
||||
sw = compute_signal_weight(
|
||||
published_at=imp.published_at,
|
||||
reference_time=reference_time,
|
||||
window=window,
|
||||
source_credibility=imp.source_credibility,
|
||||
novelty_score=imp.novelty_score,
|
||||
extraction_confidence=imp.confidence,
|
||||
market_ctx=market_ctx,
|
||||
config=cfg,
|
||||
)
|
||||
signals.append(
|
||||
WeightedSignal(
|
||||
document_id=imp.document_id,
|
||||
weight=sw,
|
||||
sentiment_value=sentiment_to_numeric(imp.sentiment),
|
||||
impact_score=imp.impact_score,
|
||||
)
|
||||
)
|
||||
return signals
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Derive trend direction from weighted sentiment
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Thresholds for mapping numeric sentiment to direction.
|
||||
BULLISH_THRESHOLD = 0.15
|
||||
BEARISH_THRESHOLD = -0.15
|
||||
MIXED_THRESHOLD = 0.10 # contradiction score above this → mixed
|
||||
|
||||
|
||||
def derive_trend_direction(
|
||||
avg_sentiment: float,
|
||||
contradiction_score: float = 0.0,
|
||||
) -> TrendDirection:
|
||||
"""Map a weighted average sentiment to a TrendDirection.
|
||||
|
||||
If contradiction is high, the direction is MIXED regardless of
|
||||
the average sentiment value.
|
||||
"""
|
||||
if contradiction_score > MIXED_THRESHOLD and abs(avg_sentiment) < 0.3:
|
||||
return TrendDirection.MIXED
|
||||
if avg_sentiment >= BULLISH_THRESHOLD:
|
||||
return TrendDirection.BULLISH
|
||||
if avg_sentiment <= BEARISH_THRESHOLD:
|
||||
return TrendDirection.BEARISH
|
||||
return TrendDirection.NEUTRAL
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Compute contradiction score
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_contradiction_score(signals: list[WeightedSignal]) -> float:
|
||||
"""Measure how much disagreement exists among weighted signals.
|
||||
|
||||
Returns a value in [0, 1] where 0 means full agreement and 1 means
|
||||
equal-weight positive and negative signals.
|
||||
|
||||
The formula computes the ratio of the minority-side total weight to
|
||||
the majority-side total weight.
|
||||
"""
|
||||
if not signals:
|
||||
return 0.0
|
||||
|
||||
pos_weight = 0.0
|
||||
neg_weight = 0.0
|
||||
for sig in signals:
|
||||
w = sig.weight.combined * sig.impact_score
|
||||
if sig.sentiment_value > 0:
|
||||
pos_weight += w
|
||||
elif sig.sentiment_value < 0:
|
||||
neg_weight += w
|
||||
|
||||
total = pos_weight + neg_weight
|
||||
if total == 0.0:
|
||||
return 0.0
|
||||
|
||||
minority = min(pos_weight, neg_weight)
|
||||
return round(minority / total, 4)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rank evidence (supporting vs opposing)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def rank_evidence(
|
||||
signals: list[WeightedSignal],
|
||||
max_refs: int = MAX_EVIDENCE_REFS,
|
||||
) -> tuple[list[str], list[str]]:
|
||||
"""Return top supporting and opposing document IDs ranked by composite score.
|
||||
|
||||
Delegates to the evidence ranking module which considers multiple
|
||||
factors (weight, impact, recency, confidence) rather than raw weight alone.
|
||||
|
||||
Supporting = positive sentiment, Opposing = negative sentiment.
|
||||
Neutral/mixed signals are excluded from evidence lists.
|
||||
"""
|
||||
config = EvidenceRankConfig(max_refs=max_refs)
|
||||
return _rank_evidence_composite(signals, config)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Extract dominant catalysts and material risks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def extract_catalysts_and_risks(
|
||||
impacts: list[ImpactRow],
|
||||
signals: list[WeightedSignal],
|
||||
) -> tuple[list[str], list[str]]:
|
||||
"""Return dominant catalyst types and material risks weighted by signal strength.
|
||||
|
||||
Catalysts are ranked by cumulative weight. Risks are deduplicated and
|
||||
ordered by the weight of the signal that surfaced them.
|
||||
"""
|
||||
catalyst_weights: dict[str, float] = {}
|
||||
risk_entries: list[tuple[float, str]] = []
|
||||
|
||||
# Build a lookup from document_id to combined weight
|
||||
weight_by_doc = {s.document_id: s.weight.combined * s.impact_score for s in signals}
|
||||
|
||||
for imp in impacts:
|
||||
w = weight_by_doc.get(imp.document_id, 0.0)
|
||||
if w <= 0.0:
|
||||
continue
|
||||
catalyst_weights[imp.catalyst_type] = catalyst_weights.get(imp.catalyst_type, 0.0) + w
|
||||
for risk in imp.risks:
|
||||
risk_entries.append((w, risk))
|
||||
|
||||
# Top catalysts by cumulative weight
|
||||
sorted_catalysts = sorted(catalyst_weights.items(), key=lambda x: x[1], reverse=True)
|
||||
catalysts = [cat for cat, _ in sorted_catalysts[:5]]
|
||||
|
||||
# Deduplicated risks ordered by weight
|
||||
seen_risks: set[str] = set()
|
||||
risks: list[str] = []
|
||||
risk_entries.sort(key=lambda x: x[0], reverse=True)
|
||||
for _, risk_text in risk_entries:
|
||||
normalized = risk_text.strip().lower()
|
||||
if normalized not in seen_risks:
|
||||
seen_risks.add(normalized)
|
||||
risks.append(risk_text.strip())
|
||||
if len(risks) >= 5:
|
||||
break
|
||||
|
||||
return catalysts, risks
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Compute trend confidence
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_trend_confidence(
|
||||
signals: list[WeightedSignal],
|
||||
contradiction_score: float,
|
||||
) -> float:
|
||||
"""Derive an overall confidence for the trend summary.
|
||||
|
||||
Confidence is based on:
|
||||
- Number of contributing signals (more = higher base)
|
||||
- Average extraction confidence of contributing signals
|
||||
- Contradiction penalty (high contradiction lowers confidence)
|
||||
|
||||
Returns a value in [0, 1].
|
||||
"""
|
||||
if not signals:
|
||||
return 0.0
|
||||
|
||||
active = [s for s in signals if s.weight.combined > 0]
|
||||
if not active:
|
||||
return 0.0
|
||||
|
||||
# Base confidence from signal count (diminishing returns)
|
||||
count_factor = min(len(active) / 20.0, 1.0)
|
||||
|
||||
# Average extraction confidence (from the confidence_gate — if gated,
|
||||
# the signal wouldn't be in active list, so we use the raw confidence
|
||||
# from the weight breakdown).
|
||||
avg_conf = sum(s.weight.credibility for s in active) / len(active)
|
||||
|
||||
# Contradiction penalty
|
||||
contradiction_penalty = contradiction_score * 0.4
|
||||
|
||||
confidence = (0.4 * count_factor + 0.6 * avg_conf) - contradiction_penalty
|
||||
return round(max(0.0, min(1.0, confidence)), 4)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Assemble a TrendSummary from components
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssembledTrend:
|
||||
"""A trend summary paired with its detailed evidence rankings."""
|
||||
|
||||
summary: TrendSummary
|
||||
supporting_evidence: list[RankedEvidence]
|
||||
opposing_evidence: list[RankedEvidence]
|
||||
|
||||
|
||||
def assemble_trend_summary(
|
||||
ticker: str,
|
||||
window: str,
|
||||
signals: list[WeightedSignal],
|
||||
impacts: list[ImpactRow],
|
||||
market_ctx: Any | None = None,
|
||||
max_evidence: int = MAX_EVIDENCE_REFS,
|
||||
reference_time: datetime | None = None,
|
||||
) -> TrendSummary:
|
||||
"""Build a complete TrendSummary from weighted signals and impact records."""
|
||||
result = assemble_trend_with_evidence(
|
||||
ticker, window, signals, impacts, market_ctx, max_evidence, reference_time,
|
||||
)
|
||||
return result.summary
|
||||
|
||||
|
||||
def assemble_trend_with_evidence(
|
||||
ticker: str,
|
||||
window: str,
|
||||
signals: list[WeightedSignal],
|
||||
impacts: list[ImpactRow],
|
||||
market_ctx: Any | None = None,
|
||||
max_evidence: int = MAX_EVIDENCE_REFS,
|
||||
reference_time: datetime | None = None,
|
||||
) -> AssembledTrend:
|
||||
"""Build a TrendSummary and return detailed evidence rankings for persistence."""
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
avg_sentiment = weighted_sentiment_average(signals)
|
||||
|
||||
# Run full contradiction detection (Requirement 6.4)
|
||||
catalyst_entries = [
|
||||
CatalystEntry(document_id=imp.document_id, catalyst_type=imp.catalyst_type)
|
||||
for imp in impacts
|
||||
]
|
||||
contradiction_result = detect_contradictions(signals, catalyst_entries)
|
||||
contradiction = contradiction_result.score
|
||||
|
||||
direction = derive_trend_direction(avg_sentiment, contradiction)
|
||||
confidence = compute_trend_confidence(signals, contradiction)
|
||||
|
||||
# Get detailed evidence rankings for persistence
|
||||
config = EvidenceRankConfig(max_refs=max_evidence)
|
||||
supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, config)
|
||||
|
||||
supporting = [r.document_id for r in supporting_ranked]
|
||||
opposing = [r.document_id for r in opposing_ranked]
|
||||
|
||||
catalysts, risks = extract_catalysts_and_risks(impacts, signals)
|
||||
|
||||
# Trend strength: absolute value of weighted sentiment, clamped to [0, 1]
|
||||
strength = round(min(abs(avg_sentiment), 1.0), 4)
|
||||
|
||||
summary = TrendSummary(
|
||||
entity_type="company",
|
||||
entity_id=ticker,
|
||||
window=TrendWindow(window),
|
||||
trend_direction=direction,
|
||||
trend_strength=strength,
|
||||
confidence=confidence,
|
||||
top_supporting_evidence=supporting,
|
||||
top_opposing_evidence=opposing,
|
||||
dominant_catalysts=catalysts,
|
||||
material_risks=risks,
|
||||
contradiction_score=contradiction,
|
||||
disagreement_details=contradiction_result.details,
|
||||
market_context=market_ctx,
|
||||
generated_at=reference_time,
|
||||
)
|
||||
|
||||
return AssembledTrend(
|
||||
summary=summary,
|
||||
supporting_evidence=supporting_ranked,
|
||||
opposing_evidence=opposing_ranked,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Persist trend summary to PostgreSQL
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_UPSERT_TREND = """
|
||||
INSERT INTO trend_windows (
|
||||
entity_type, entity_id, window, trend_direction, trend_strength,
|
||||
confidence, top_supporting_evidence, top_opposing_evidence,
|
||||
dominant_catalysts, material_risks, contradiction_score,
|
||||
disagreement_details, market_context, generated_at
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5,
|
||||
$6, $7::jsonb, $8::jsonb,
|
||||
$9::jsonb, $10::jsonb, $11,
|
||||
$12::jsonb, $13::jsonb, $14
|
||||
)
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
|
||||
async def persist_trend_summary(
|
||||
pool: asyncpg.Pool,
|
||||
summary: TrendSummary,
|
||||
) -> str:
|
||||
"""Insert a trend summary row and return its UUID."""
|
||||
row = await pool.fetchrow(
|
||||
_UPSERT_TREND,
|
||||
summary.entity_type,
|
||||
summary.entity_id,
|
||||
summary.window.value,
|
||||
summary.trend_direction.value,
|
||||
summary.trend_strength,
|
||||
summary.confidence,
|
||||
json.dumps(summary.top_supporting_evidence),
|
||||
json.dumps(summary.top_opposing_evidence),
|
||||
json.dumps(summary.dominant_catalysts),
|
||||
json.dumps(summary.material_risks),
|
||||
summary.contradiction_score,
|
||||
json.dumps([d.model_dump() for d in summary.disagreement_details]),
|
||||
json.dumps(summary.market_context.model_dump() if summary.market_context else {}),
|
||||
summary.generated_at,
|
||||
)
|
||||
return str(row["id"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Persist evidence mappings to trend_evidence table
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_INSERT_EVIDENCE = """
|
||||
INSERT INTO trend_evidence (
|
||||
trend_window_id, document_id, evidence_type,
|
||||
rank_score, weight_component, impact_component,
|
||||
recency_component, confidence_component, sentiment_value
|
||||
) VALUES (
|
||||
$1, $2::uuid, $3,
|
||||
$4, $5, $6,
|
||||
$7, $8, $9
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
async def persist_trend_evidence(
|
||||
pool: asyncpg.Pool,
|
||||
trend_window_id: str,
|
||||
supporting: list[RankedEvidence],
|
||||
opposing: list[RankedEvidence],
|
||||
) -> int:
|
||||
"""Insert evidence mapping rows for a trend window. Returns count inserted."""
|
||||
rows: list[tuple[str, str, str, float, float, float, float, float, float]] = []
|
||||
for ev in supporting:
|
||||
rows.append((
|
||||
trend_window_id, ev.document_id, "supporting",
|
||||
ev.rank_score, ev.weight_component, ev.impact_component,
|
||||
ev.recency_component, ev.confidence_component, ev.sentiment_value,
|
||||
))
|
||||
for ev in opposing:
|
||||
rows.append((
|
||||
trend_window_id, ev.document_id, "opposing",
|
||||
ev.rank_score, ev.weight_component, ev.impact_component,
|
||||
ev.recency_component, ev.confidence_component, ev.sentiment_value,
|
||||
))
|
||||
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
await pool.executemany(_INSERT_EVIDENCE, rows)
|
||||
return len(rows)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main aggregation entry point for a single ticker + window
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def aggregate_company_window(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
window: str,
|
||||
reference_time: datetime | None = None,
|
||||
config: AggregationConfig | None = None,
|
||||
) -> TrendSummary:
|
||||
"""Compute and persist a trend summary for one ticker and one window.
|
||||
|
||||
Steps:
|
||||
1. Determine the time range for the window.
|
||||
2. Fetch document impact records from PostgreSQL.
|
||||
3. Fetch market context for the ticker.
|
||||
4. Build weighted signals using the scoring module.
|
||||
5. Assemble the TrendSummary.
|
||||
6. Persist to trend_windows table.
|
||||
|
||||
Returns the assembled TrendSummary.
|
||||
"""
|
||||
cfg = config or AggregationConfig()
|
||||
scoring_cfg = cfg.effective_scoring()
|
||||
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
_agg_start = time.monotonic()
|
||||
duration = WINDOW_DURATIONS.get(window, timedelta(days=7))
|
||||
window_start = reference_time - duration
|
||||
|
||||
# 1. Fetch impact records
|
||||
impacts = await fetch_impact_records(pool, ticker, window_start, reference_time)
|
||||
|
||||
# 2. Fetch market context
|
||||
market_ctx = await fetch_market_context(pool, ticker, window, reference_time)
|
||||
|
||||
# 3. Build weighted signals
|
||||
signals = build_weighted_signals(
|
||||
impacts, reference_time, window, market_ctx, scoring_cfg,
|
||||
)
|
||||
|
||||
# 4. Assemble trend summary with evidence details
|
||||
assembled = assemble_trend_with_evidence(
|
||||
ticker=ticker,
|
||||
window=window,
|
||||
signals=signals,
|
||||
impacts=impacts,
|
||||
market_ctx=market_ctx if market_ctx.has_data else None,
|
||||
max_evidence=cfg.max_evidence,
|
||||
reference_time=reference_time,
|
||||
)
|
||||
summary = assembled.summary
|
||||
|
||||
# 5. Persist trend window
|
||||
trend_id = await persist_trend_summary(pool, summary)
|
||||
|
||||
# 6. Persist evidence mappings
|
||||
evidence_count = await persist_trend_evidence(
|
||||
pool, trend_id,
|
||||
assembled.supporting_evidence,
|
||||
assembled.opposing_evidence,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Persisted trend %s for %s/%s: direction=%s strength=%.3f confidence=%.3f signals=%d evidence=%d",
|
||||
trend_id, ticker, window, summary.trend_direction.value,
|
||||
summary.trend_strength, summary.confidence, len(signals), evidence_count,
|
||||
)
|
||||
|
||||
# Prometheus metrics
|
||||
AGGREGATION_WINDOWS_COMPUTED.labels(window=window).inc()
|
||||
AGGREGATION_SIGNALS_PROCESSED.labels(window=window).inc(len(signals))
|
||||
AGGREGATION_CONTRADICTION_SCORE.observe(summary.contradiction_score)
|
||||
AGGREGATION_DURATION.labels(window=window).observe(time.monotonic() - _agg_start)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Aggregate all windows for a single ticker
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def aggregate_company(
|
||||
pool: asyncpg.Pool,
|
||||
ticker: str,
|
||||
reference_time: datetime | None = None,
|
||||
config: AggregationConfig | None = None,
|
||||
) -> list[TrendSummary]:
|
||||
"""Compute trend summaries for all configured windows for a ticker."""
|
||||
cfg = config or AggregationConfig()
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
summaries: list[TrendSummary] = []
|
||||
for window in cfg.effective_windows():
|
||||
summary = await aggregate_company_window(
|
||||
pool, ticker, window, reference_time, cfg,
|
||||
)
|
||||
summaries.append(summary)
|
||||
|
||||
return summaries
|
||||
|
||||
Reference in New Issue
Block a user