"""Aggregation worker - company-level rolling window trend summaries. Queries document intelligence and market context for a given ticker, computes weighted signal scores, and produces TrendSummary objects persisted to the trend_windows table. Requirements: 6.1, 6.2, 6.5 """ from __future__ import annotations import json import logging import time import uuid as _uuid from dataclasses import dataclass from datetime import datetime, timedelta, timezone from typing import Any import asyncpg from services.aggregation.contradiction import CatalystEntry, detect_contradictions from services.aggregation.evidence import ( EvidenceRankConfig, RankedEvidence, rank_evidence_detailed, ) from services.aggregation.evidence import ( rank_evidence as _rank_evidence_composite, ) from services.aggregation.market_context import fetch_market_context from services.aggregation.pattern_matcher import find_self_patterns from services.aggregation.projection import ( MacroEventInfo, compute_projection, persist_trend_projection, ) from services.aggregation.scoring import ( ScoringConfig, WeightedSignal, compute_signal_weight, sentiment_to_numeric, weighted_sentiment_average, ) from services.aggregation.signal_propagation import ( CompetitiveSignalRecord, build_pattern_weighted_signals, ) from services.shared.metrics import ( AGGREGATION_CONTRADICTION_SCORE, AGGREGATION_DURATION, AGGREGATION_SIGNALS_PROCESSED, AGGREGATION_WINDOWS_COMPUTED, ) from services.shared.schemas import TrendDirection, TrendSummary, TrendWindow logger = logging.getLogger(__name__) # Map TrendWindow values to lookback durations. WINDOW_DURATIONS: dict[str, timedelta] = { TrendWindow.INTRADAY.value: timedelta(hours=12), TrendWindow.ONE_DAY.value: timedelta(days=1), TrendWindow.SEVEN_DAY.value: timedelta(days=7), TrendWindow.THIRTY_DAY.value: timedelta(days=30), TrendWindow.NINETY_DAY.value: timedelta(days=90), } # How many evidence document IDs to keep in supporting/opposing lists. MAX_EVIDENCE_REFS = 10 @dataclass class AggregationConfig: """Controls which windows to compute and scoring parameters.""" windows: list[str] | None = None # None = all windows scoring: ScoringConfig | None = None max_evidence: int = MAX_EVIDENCE_REFS macro_signal_weight: float = 0.3 # relative weight of macro vs company signals macro_enabled: bool = True # runtime toggle state competitive_signal_weight: float = 0.2 # relative weight of pattern signals competitive_enabled: bool = True # runtime toggle state def effective_windows(self) -> list[str]: if self.windows: return self.windows return [w.value for w in TrendWindow] def effective_scoring(self) -> ScoringConfig: return self.scoring or ScoringConfig() # --------------------------------------------------------------------------- # Fetch impact records for a ticker within a time window # --------------------------------------------------------------------------- _IMPACT_QUERY = """ SELECT di.document_id, di.confidence, di.novelty_score, di.source_credibility, dir.sentiment, dir.impact_score, dir.catalyst_type, dir.key_facts, dir.risks, d.published_at FROM document_impact_records dir JOIN document_intelligence di ON di.id = dir.intelligence_id JOIN documents d ON d.id = di.document_id WHERE dir.ticker = $1 AND d.published_at >= $2 AND d.published_at <= $3 AND di.validation_status = 'valid' AND d.status != 'rejected' ORDER BY d.published_at DESC """ @dataclass class ImpactRow: """Parsed row from the impact query.""" document_id: str confidence: float novelty_score: float source_credibility: float sentiment: str impact_score: float catalyst_type: str key_facts: list[str] risks: list[str] published_at: datetime def _parse_impact_row(row: Any) -> ImpactRow: """Convert an asyncpg Record to an ImpactRow.""" key_facts = row["key_facts"] if isinstance(key_facts, str): key_facts = json.loads(key_facts) risks = row["risks"] if isinstance(risks, str): risks = json.loads(risks) return ImpactRow( document_id=str(row["document_id"]), confidence=float(row["confidence"] or 0.5), novelty_score=float(row["novelty_score"] or 0.5), source_credibility=float(row["source_credibility"] or 0.5), sentiment=row["sentiment"] or "neutral", impact_score=float(row["impact_score"] or 0.0), catalyst_type=row["catalyst_type"] or "other", key_facts=key_facts if isinstance(key_facts, list) else [], risks=risks if isinstance(risks, list) else [], published_at=row["published_at"], ) async def fetch_impact_records( pool: asyncpg.Pool, ticker: str, window_start: datetime, window_end: datetime, ) -> list[ImpactRow]: """Fetch validated document impact records for a ticker in a time range.""" rows = await pool.fetch(_IMPACT_QUERY, ticker, window_start, window_end) return [_parse_impact_row(r) for r in rows] # --------------------------------------------------------------------------- # Fetch macro toggle state from risk_configs # # MACRO LAYER TOGGLE BEHAVIOR (Requirements 11.2, 11.3, 11.4): # - The toggle state is read fresh from PostgreSQL at the start of each # aggregation cycle (no caching), so changes take effect immediately on # the next cycle. # - When disabled: ingestion and classification continue normally (historical # data is preserved), but interpolation and aggregation integration are # skipped — the aggregation engine produces trends using only company- # specific signals. # - When re-enabled: the engine resumes computing macro impact scores using # the most recent GlobalEvent classifications, including any events that # were ingested and classified while the layer was disabled. # --------------------------------------------------------------------------- _MACRO_TOGGLE_QUERY = """ SELECT config->>'macro_enabled' AS macro_enabled FROM risk_configs WHERE active = TRUE ORDER BY updated_at DESC LIMIT 1 """ async def fetch_macro_enabled(pool: asyncpg.Pool) -> bool | None: """Check macro toggle state from risk_configs table. Returns True/False if explicitly set, or None if no config exists (caller should fall back to AggregationConfig default). """ row = await pool.fetchrow(_MACRO_TOGGLE_QUERY) if row is None or row["macro_enabled"] is None: return None return row["macro_enabled"].lower() == "true" # --------------------------------------------------------------------------- # Fetch competitive toggle state from risk_configs # --------------------------------------------------------------------------- _COMPETITIVE_TOGGLE_QUERY = """ SELECT config->>'competitive_enabled' AS competitive_enabled FROM risk_configs WHERE active = TRUE ORDER BY updated_at DESC LIMIT 1 """ async def fetch_competitive_enabled(pool: asyncpg.Pool) -> bool | None: """Check competitive toggle state from risk_configs table. Returns True/False if explicitly set, or None if no config exists (caller should fall back to AggregationConfig default). """ row = await pool.fetchrow(_COMPETITIVE_TOGGLE_QUERY) if row is None or row["competitive_enabled"] is None: return None return row["competitive_enabled"].lower() == "true" # --------------------------------------------------------------------------- # Fetch competitive signals targeting a ticker within a time window # --------------------------------------------------------------------------- _COMPETITIVE_SIGNALS_QUERY = """ SELECT source_document_id, source_ticker, target_ticker, catalyst_type, pattern_confidence, signal_direction, signal_strength, relationship_strength, computed_at FROM competitive_signal_records WHERE target_ticker = $1 AND computed_at >= $2 AND computed_at <= $3 ORDER BY computed_at DESC LIMIT 500 """ async def fetch_competitive_signals( pool: asyncpg.Pool, ticker: str, window_start: datetime, window_end: datetime, ) -> list[CompetitiveSignalRecord]: """Fetch competitive signal records targeting a ticker in a time range.""" rows = await pool.fetch( _COMPETITIVE_SIGNALS_QUERY, ticker, window_start, window_end, ) return [ CompetitiveSignalRecord( source_document_id=str(row["source_document_id"]), source_ticker=row["source_ticker"], target_ticker=row["target_ticker"], catalyst_type=row["catalyst_type"], pattern_confidence=float(row["pattern_confidence"]), signal_direction=row["signal_direction"], signal_strength=float(row["signal_strength"]), relationship_strength=float(row["relationship_strength"]), computed_at=row["computed_at"], ) for row in rows ] # --------------------------------------------------------------------------- # Fetch macro impact records for a ticker within a time window # --------------------------------------------------------------------------- _MACRO_IMPACT_QUERY = """ SELECT mir.event_id, mir.company_id, mir.ticker, mir.macro_impact_score, mir.impact_direction, mir.contributing_factors, mir.confidence, mir.computed_at, ge.source_document_id, d.published_at AS event_published_at FROM macro_impact_records mir JOIN global_events ge ON ge.id = mir.event_id JOIN documents d ON d.id = ge.source_document_id WHERE mir.ticker = $1 AND mir.computed_at >= $2 AND mir.computed_at <= $3 ORDER BY mir.computed_at DESC """ @dataclass class MacroImpactRow: """Parsed row from the macro impact query.""" event_id: str company_id: str ticker: str macro_impact_score: float impact_direction: str contributing_factors: list[str] confidence: float computed_at: datetime source_document_id: str event_published_at: datetime def _parse_macro_impact_row(row: Any) -> MacroImpactRow: """Convert an asyncpg Record to a MacroImpactRow.""" factors = row["contributing_factors"] if isinstance(factors, str): factors = json.loads(factors) return MacroImpactRow( event_id=str(row["event_id"]), company_id=str(row["company_id"]), ticker=row["ticker"], macro_impact_score=float(row["macro_impact_score"] or 0.0), impact_direction=row["impact_direction"] or "neutral", contributing_factors=factors if isinstance(factors, list) else [], confidence=float(row["confidence"] or 0.5), computed_at=row["computed_at"], source_document_id=str(row["source_document_id"]), event_published_at=row["event_published_at"], ) async def fetch_macro_impact_records( pool: asyncpg.Pool, ticker: str, window_start: datetime, window_end: datetime, ) -> list[MacroImpactRow]: """Fetch macro impact records for a ticker in a time range.""" rows = await pool.fetch(_MACRO_IMPACT_QUERY, ticker, window_start, window_end) return [_parse_macro_impact_row(r) for r in rows] # --------------------------------------------------------------------------- # Convert macro impact records to WeightedSignals # --------------------------------------------------------------------------- _DIRECTION_TO_SENTIMENT: dict[str, float] = { "positive": 1.0, "negative": -1.0, "mixed": 0.0, "neutral": 0.0, } def build_macro_weighted_signals( macro_impacts: list[MacroImpactRow], reference_time: datetime, window: str, macro_signal_weight: float = 0.3, config: ScoringConfig | None = None, ) -> list[WeightedSignal]: """Convert macro impact records into WeightedSignal objects. Uses the same scoring pipeline as company signals: - document_id = source_document_id (for evidence tracing) - sentiment_value mapped from impact_direction - impact_score = macro_impact_score * macro_signal_weight - recency decay from the global event's publication time - confidence gating from the macro record's confidence """ cfg = config or ScoringConfig() signals: list[WeightedSignal] = [] for mir in macro_impacts: sw = compute_signal_weight( published_at=mir.event_published_at, reference_time=reference_time, window=window, source_credibility=mir.confidence, novelty_score=0.5, extraction_confidence=mir.confidence, config=cfg, ) sentiment = _DIRECTION_TO_SENTIMENT.get(mir.impact_direction, 0.0) impact = mir.macro_impact_score * macro_signal_weight signals.append( WeightedSignal( document_id=mir.source_document_id, weight=sw, sentiment_value=sentiment, impact_score=impact, ) ) return signals # --------------------------------------------------------------------------- # Build weighted signals from impact records # --------------------------------------------------------------------------- def build_weighted_signals( impacts: list[ImpactRow], reference_time: datetime, window: str, market_ctx: Any | None = None, config: ScoringConfig | None = None, ) -> list[WeightedSignal]: """Convert impact records into WeightedSignal objects using the scoring module.""" cfg = config or ScoringConfig() signals: list[WeightedSignal] = [] for imp in impacts: sw = compute_signal_weight( published_at=imp.published_at, reference_time=reference_time, window=window, source_credibility=imp.source_credibility, novelty_score=imp.novelty_score, extraction_confidence=imp.confidence, market_ctx=market_ctx, config=cfg, ) signals.append( WeightedSignal( document_id=imp.document_id, weight=sw, sentiment_value=sentiment_to_numeric(imp.sentiment), impact_score=imp.impact_score, ) ) return signals # --------------------------------------------------------------------------- # Derive trend direction from weighted sentiment # --------------------------------------------------------------------------- # Thresholds for mapping numeric sentiment to direction. BULLISH_THRESHOLD = 0.15 BEARISH_THRESHOLD = -0.15 MIXED_THRESHOLD = 0.10 # contradiction score above this → mixed def derive_trend_direction( avg_sentiment: float, contradiction_score: float = 0.0, ) -> TrendDirection: """Map a weighted average sentiment to a TrendDirection. If contradiction is high, the direction is MIXED regardless of the average sentiment value. """ if contradiction_score > MIXED_THRESHOLD and abs(avg_sentiment) < 0.3: return TrendDirection.MIXED if avg_sentiment >= BULLISH_THRESHOLD: return TrendDirection.BULLISH if avg_sentiment <= BEARISH_THRESHOLD: return TrendDirection.BEARISH return TrendDirection.NEUTRAL # --------------------------------------------------------------------------- # Compute contradiction score # --------------------------------------------------------------------------- def compute_contradiction_score(signals: list[WeightedSignal]) -> float: """Measure how much disagreement exists among weighted signals. Returns a value in [0, 1] where 0 means full agreement and 1 means equal-weight positive and negative signals. The formula computes the ratio of the minority-side total weight to the majority-side total weight. """ if not signals: return 0.0 pos_weight = 0.0 neg_weight = 0.0 for sig in signals: w = sig.weight.combined * sig.impact_score if sig.sentiment_value > 0: pos_weight += w elif sig.sentiment_value < 0: neg_weight += w total = pos_weight + neg_weight if total == 0.0: return 0.0 minority = min(pos_weight, neg_weight) return round(minority / total, 4) # --------------------------------------------------------------------------- # Rank evidence (supporting vs opposing) # --------------------------------------------------------------------------- def rank_evidence( signals: list[WeightedSignal], max_refs: int = MAX_EVIDENCE_REFS, ) -> tuple[list[str], list[str]]: """Return top supporting and opposing document IDs ranked by composite score. Delegates to the evidence ranking module which considers multiple factors (weight, impact, recency, confidence) rather than raw weight alone. Supporting = positive sentiment, Opposing = negative sentiment. Neutral/mixed signals are excluded from evidence lists. """ config = EvidenceRankConfig(max_refs=max_refs) return _rank_evidence_composite(signals, config) # --------------------------------------------------------------------------- # Extract dominant catalysts and material risks # --------------------------------------------------------------------------- def extract_catalysts_and_risks( impacts: list[ImpactRow], signals: list[WeightedSignal], ) -> tuple[list[str], list[str]]: """Return dominant catalyst types and material risks weighted by signal strength. Catalysts are ranked by cumulative weight. Risks are deduplicated and ordered by the weight of the signal that surfaced them. """ catalyst_weights: dict[str, float] = {} risk_entries: list[tuple[float, str]] = [] # Build a lookup from document_id to combined weight weight_by_doc = {s.document_id: s.weight.combined * s.impact_score for s in signals} for imp in impacts: w = weight_by_doc.get(imp.document_id, 0.0) if w <= 0.0: continue catalyst_weights[imp.catalyst_type] = catalyst_weights.get(imp.catalyst_type, 0.0) + w for risk in imp.risks: risk_entries.append((w, risk)) # Top catalysts by cumulative weight sorted_catalysts = sorted(catalyst_weights.items(), key=lambda x: x[1], reverse=True) catalysts = [cat for cat, _ in sorted_catalysts[:5]] # Deduplicated risks ordered by weight seen_risks: set[str] = set() risks: list[str] = [] risk_entries.sort(key=lambda x: x[0], reverse=True) for _, risk_text in risk_entries: normalized = risk_text.strip().lower() if normalized not in seen_risks: seen_risks.add(normalized) risks.append(risk_text.strip()) if len(risks) >= 5: break return catalysts, risks # --------------------------------------------------------------------------- # Compute trend confidence # --------------------------------------------------------------------------- def compute_trend_confidence( signals: list[WeightedSignal], contradiction_score: float, ) -> float: """Derive an overall confidence for the trend summary. Confidence is based on: - Number of UNIQUE source documents (not raw signal count) - Average extraction confidence of contributing signals - Signal agreement (what fraction point the same direction) - Contradiction penalty (high contradiction lowers confidence) Returns a value in [0, 1]. """ if not signals: return 0.0 active = [s for s in signals if s.weight.combined > 0] if not active: return 0.0 # Count unique source documents — competitive signals from the same doc # shouldn't inflate confidence unique_sources = len({s.document_id for s in active if s.document_id}) count_factor = min(unique_sources / 15.0, 0.8) # Cap at 0.8, needs 15 unique docs # Average extraction confidence avg_conf = sum(s.weight.credibility for s in active) / len(active) # Signal agreement: what fraction of signals agree on direction bullish = sum(1 for s in active if s.sentiment_value > 0) bearish = sum(1 for s in active if s.sentiment_value < 0) total = bullish + bearish if total > 0: agreement = max(bullish, bearish) / total else: agreement = 0.5 # Contradiction penalty contradiction_penalty = contradiction_score * 0.4 confidence = (0.3 * count_factor + 0.3 * avg_conf + 0.4 * agreement) - contradiction_penalty return round(max(0.0, min(1.0, confidence)), 4) # --------------------------------------------------------------------------- # Assemble a TrendSummary from components # --------------------------------------------------------------------------- @dataclass class AssembledTrend: """A trend summary paired with its detailed evidence rankings.""" summary: TrendSummary supporting_evidence: list[RankedEvidence] opposing_evidence: list[RankedEvidence] def assemble_trend_summary( ticker: str, window: str, signals: list[WeightedSignal], impacts: list[ImpactRow], market_ctx: Any | None = None, max_evidence: int = MAX_EVIDENCE_REFS, reference_time: datetime | None = None, ) -> TrendSummary: """Build a complete TrendSummary from weighted signals and impact records.""" result = assemble_trend_with_evidence( ticker, window, signals, impacts, market_ctx, max_evidence, reference_time, ) return result.summary def assemble_trend_with_evidence( ticker: str, window: str, signals: list[WeightedSignal], impacts: list[ImpactRow], market_ctx: Any | None = None, max_evidence: int = MAX_EVIDENCE_REFS, reference_time: datetime | None = None, ) -> AssembledTrend: """Build a TrendSummary and return detailed evidence rankings for persistence.""" if reference_time is None: reference_time = datetime.now(timezone.utc) avg_sentiment = weighted_sentiment_average(signals) # Run full contradiction detection (Requirement 6.4) catalyst_entries = [ CatalystEntry(document_id=imp.document_id, catalyst_type=imp.catalyst_type) for imp in impacts ] contradiction_result = detect_contradictions(signals, catalyst_entries) contradiction = contradiction_result.score direction = derive_trend_direction(avg_sentiment, contradiction) confidence = compute_trend_confidence(signals, contradiction) # Get detailed evidence rankings for persistence config = EvidenceRankConfig(max_refs=max_evidence) supporting_ranked, opposing_ranked = rank_evidence_detailed(signals, config) supporting = [r.document_id for r in supporting_ranked] opposing = [r.document_id for r in opposing_ranked] catalysts, risks = extract_catalysts_and_risks(impacts, signals) # Trend strength: absolute value of weighted sentiment, clamped to [0, 1] strength = round(min(abs(avg_sentiment), 1.0), 4) summary = TrendSummary( entity_type="company", entity_id=ticker, window=TrendWindow(window), trend_direction=direction, trend_strength=strength, confidence=confidence, top_supporting_evidence=supporting, top_opposing_evidence=opposing, dominant_catalysts=catalysts, material_risks=risks, contradiction_score=contradiction, disagreement_details=contradiction_result.details, market_context=market_ctx, generated_at=reference_time, ) return AssembledTrend( summary=summary, supporting_evidence=supporting_ranked, opposing_evidence=opposing_ranked, ) # --------------------------------------------------------------------------- # Persist trend summary to PostgreSQL # --------------------------------------------------------------------------- _UPSERT_TREND = """ INSERT INTO trend_windows ( entity_type, entity_id, "window", trend_direction, trend_strength, confidence, top_supporting_evidence, top_opposing_evidence, dominant_catalysts, material_risks, contradiction_score, disagreement_details, market_context, generated_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7::jsonb, $8::jsonb, $9::jsonb, $10::jsonb, $11, $12::jsonb, $13::jsonb, $14 ) ON CONFLICT (entity_type, entity_id, "window") DO UPDATE SET trend_direction = EXCLUDED.trend_direction, trend_strength = EXCLUDED.trend_strength, confidence = EXCLUDED.confidence, top_supporting_evidence = EXCLUDED.top_supporting_evidence, top_opposing_evidence = EXCLUDED.top_opposing_evidence, dominant_catalysts = EXCLUDED.dominant_catalysts, material_risks = EXCLUDED.material_risks, contradiction_score = EXCLUDED.contradiction_score, disagreement_details = EXCLUDED.disagreement_details, market_context = EXCLUDED.market_context, generated_at = EXCLUDED.generated_at RETURNING id """ async def persist_trend_summary( pool: asyncpg.Pool, summary: TrendSummary, ) -> str: """Insert a trend summary row and return its UUID.""" row = await pool.fetchrow( _UPSERT_TREND, summary.entity_type, summary.entity_id, summary.window.value, summary.trend_direction.value, summary.trend_strength, summary.confidence, json.dumps(summary.top_supporting_evidence), json.dumps(summary.top_opposing_evidence), json.dumps(summary.dominant_catalysts), json.dumps(summary.material_risks), summary.contradiction_score, json.dumps([d.model_dump() for d in summary.disagreement_details]), json.dumps(summary.market_context.model_dump() if summary.market_context else {}, default=str), summary.generated_at, ) return str(row["id"]) # --------------------------------------------------------------------------- # Persist evidence mappings to trend_evidence table # --------------------------------------------------------------------------- _INSERT_EVIDENCE = """ INSERT INTO trend_evidence ( trend_window_id, document_id, evidence_type, rank_score, weight_component, impact_component, recency_component, confidence_component, sentiment_value ) VALUES ( $1, $2::uuid, $3, $4, $5, $6, $7, $8, $9 ) """ def _is_valid_uuid(val: str) -> bool: """Check if a string is a valid UUID (pattern signal IDs are not).""" try: _uuid.UUID(val) return True except (ValueError, AttributeError): return False async def persist_trend_evidence( pool: asyncpg.Pool, trend_window_id: str, supporting: list[RankedEvidence], opposing: list[RankedEvidence], ) -> int: """Insert evidence mapping rows for a trend window. Returns count inserted.""" rows: list[tuple[str, str, str, float, float, float, float, float, float]] = [] for ev in supporting: # Skip non-UUID document IDs (e.g. pattern signal synthetic IDs) if not _is_valid_uuid(ev.document_id): continue rows.append(( trend_window_id, ev.document_id, "supporting", ev.rank_score, ev.weight_component, ev.impact_component, ev.recency_component, ev.confidence_component, ev.sentiment_value, )) for ev in opposing: if not _is_valid_uuid(ev.document_id): continue rows.append(( trend_window_id, ev.document_id, "opposing", ev.rank_score, ev.weight_component, ev.impact_component, ev.recency_component, ev.confidence_component, ev.sentiment_value, )) if not rows: return 0 await pool.executemany(_INSERT_EVIDENCE, rows) return len(rows) # --------------------------------------------------------------------------- # Build MacroEventInfo objects for projection computation # --------------------------------------------------------------------------- _MACRO_EVENT_INFO_QUERY = """ SELECT mir.event_id, mir.macro_impact_score, mir.impact_direction, mir.confidence, ge.estimated_duration, ge.severity, d.published_at AS event_published_at FROM macro_impact_records mir JOIN global_events ge ON ge.id = mir.event_id JOIN documents d ON d.id = ge.source_document_id WHERE mir.ticker = $1 AND mir.computed_at >= $2 AND mir.computed_at <= $3 ORDER BY mir.computed_at DESC """ async def _build_macro_event_infos( pool: asyncpg.Pool, ticker: str, window_start: datetime, reference_time: datetime, ) -> list[MacroEventInfo]: """Fetch macro impact records and build MacroEventInfo objects for projection.""" rows = await pool.fetch( _MACRO_EVENT_INFO_QUERY, ticker, window_start, reference_time, ) infos: list[MacroEventInfo] = [] for row in rows: published_at = row["event_published_at"] age_hours = 0.0 if published_at: age_hours = max( (reference_time - published_at).total_seconds() / 3600.0, 0.0, ) infos.append( MacroEventInfo( event_id=str(row["event_id"]), macro_impact_score=float(row["macro_impact_score"] or 0.0), impact_direction=row["impact_direction"] or "neutral", confidence=float(row["confidence"] or 0.5), estimated_duration=row["estimated_duration"] or "short_term", severity=row["severity"] or "low", event_age_hours=age_hours, ) ) return infos # --------------------------------------------------------------------------- # Main aggregation entry point for a single ticker + window # --------------------------------------------------------------------------- async def aggregate_company_window( pool: asyncpg.Pool, ticker: str, window: str, reference_time: datetime | None = None, config: AggregationConfig | None = None, ) -> TrendSummary: """Compute and persist a trend summary for one ticker and one window. Steps: 1. Determine the time range for the window. 2. Fetch document impact records from PostgreSQL. 3. Fetch market context for the ticker. 4. Build weighted signals using the scoring module. 5. Check macro toggle and fetch/merge macro signals if enabled. 6. Check competitive toggle and fetch/merge pattern/competitive signals if enabled. 7. Assemble the TrendSummary. 8. Persist to trend_windows table. Returns the assembled TrendSummary. """ cfg = config or AggregationConfig() scoring_cfg = cfg.effective_scoring() if reference_time is None: reference_time = datetime.now(timezone.utc) _agg_start = time.monotonic() duration = WINDOW_DURATIONS.get(window, timedelta(days=7)) window_start = reference_time - duration # 1. Fetch impact records impacts = await fetch_impact_records(pool, ticker, window_start, reference_time) # 2. Fetch market context market_ctx = await fetch_market_context(pool, ticker, window, reference_time) # 3. Build weighted signals signals = build_weighted_signals( impacts, reference_time, window, market_ctx, scoring_cfg, ) # 4. Check macro toggle and merge macro signals # (Requirement 11.2, 11.3, 11.4): Toggle state is read from the DB on # every aggregation cycle. When disabled, macro signals are skipped but # ingestion/classification continue independently — so when re-enabled, # the most recent classifications (including those ingested while disabled) # are immediately available for impact computation. macro_enabled = cfg.macro_enabled db_toggle = await fetch_macro_enabled(pool) if db_toggle is not None: macro_enabled = db_toggle if macro_enabled: macro_impacts = await fetch_macro_impact_records( pool, ticker, window_start, reference_time, ) if macro_impacts: macro_signals = build_macro_weighted_signals( macro_impacts, reference_time, window, macro_signal_weight=cfg.macro_signal_weight, config=scoring_cfg, ) signals = signals + macro_signals logger.info( "Merged %d macro signals for %s/%s", len(macro_signals), ticker, window, ) # 5. Check competitive toggle and merge pattern/competitive signals # (Requirements 5.1-5.6): Same toggle pattern as macro layer. When # disabled, pattern mining remains queryable but aggregation skips # competitive signals — no degradation of existing behavior. competitive_enabled = cfg.competitive_enabled db_competitive_toggle = await fetch_competitive_enabled(pool) if db_competitive_toggle is not None: competitive_enabled = db_competitive_toggle if competitive_enabled: try: # Get unique catalyst types from the impact records catalyst_types = {imp.catalyst_type for imp in impacts} # Query self-company historical patterns for each catalyst type all_patterns = [] for cat_type in catalyst_types: patterns = await find_self_patterns(pool, ticker, cat_type) all_patterns.extend(patterns) # Fetch competitive signals targeting this ticker comp_signals = await fetch_competitive_signals( pool, ticker, window_start, reference_time, ) # Convert to WeightedSignal objects if all_patterns or comp_signals: pattern_weighted = build_pattern_weighted_signals( patterns=all_patterns, competitive_signals=comp_signals, reference_time=reference_time, window=window, ) signals = signals + pattern_weighted logger.info( "Merged %d pattern/competitive signals for %s/%s " "(patterns=%d, competitive=%d)", len(pattern_weighted), ticker, window, len(all_patterns), len(comp_signals), ) except Exception: logger.exception( "Failed to fetch pattern/competitive signals for %s/%s — " "continuing with company+macro signals only", ticker, window, ) # 6. Assemble trend summary with evidence details assembled = assemble_trend_with_evidence( ticker=ticker, window=window, signals=signals, impacts=impacts, market_ctx=market_ctx if market_ctx.has_data else None, max_evidence=cfg.max_evidence, reference_time=reference_time, ) summary = assembled.summary # 7. Persist trend window trend_id = await persist_trend_summary(pool, summary) # 8. Persist evidence mappings evidence_count = await persist_trend_evidence( pool, trend_id, assembled.supporting_evidence, assembled.opposing_evidence, ) logger.info( "Persisted trend %s for %s/%s: direction=%s strength=%.3f confidence=%.3f signals=%d evidence=%d", trend_id, ticker, window, summary.trend_direction.value, summary.trend_strength, summary.confidence, len(signals), evidence_count, ) # 9. Compute and persist trend projection try: macro_event_infos: list[MacroEventInfo] = [] if macro_enabled: macro_event_infos = await _build_macro_event_infos( pool, ticker, window_start, reference_time, ) projection = compute_projection( summary=summary, macro_events=macro_event_infos if macro_event_infos else None, macro_enabled=macro_enabled, upcoming_catalysts=summary.dominant_catalysts[:3] if summary.dominant_catalysts else None, ) await persist_trend_projection(pool, trend_id, projection) logger.info( "Persisted projection for %s/%s: direction=%s strength=%.3f confidence=%.3f diverges=%s", ticker, window, projection.projected_direction, projection.projected_strength, projection.projected_confidence, projection.diverges_from_current, ) except Exception: logger.exception( "Failed to compute/persist projection for trend %s (%s/%s) — continuing", trend_id, ticker, window, ) # Prometheus metrics AGGREGATION_WINDOWS_COMPUTED.labels(window=window).inc() AGGREGATION_SIGNALS_PROCESSED.labels(window=window).inc(len(signals)) AGGREGATION_CONTRADICTION_SCORE.observe(summary.contradiction_score) AGGREGATION_DURATION.labels(window=window).observe(time.monotonic() - _agg_start) return summary # --------------------------------------------------------------------------- # Aggregate all windows for a single ticker # --------------------------------------------------------------------------- async def aggregate_company( pool: asyncpg.Pool, ticker: str, reference_time: datetime | None = None, config: AggregationConfig | None = None, ) -> list[TrendSummary]: """Compute trend summaries for all configured windows for a ticker.""" cfg = config or AggregationConfig() if reference_time is None: reference_time = datetime.now(timezone.utc) summaries: list[TrendSummary] = [] for window in cfg.effective_windows(): summary = await aggregate_company_window( pool, ticker, window, reference_time, cfg, ) summaries.append(summary) return summaries