feat: competitive intelligence & historical pattern matching layer

This commit is contained in:
Celes Renata
2026-04-14 19:42:48 +00:00
parent b478022ba3
commit f7a11d14ea
203 changed files with 20155 additions and 97 deletions
+410 -5
View File
@@ -40,6 +40,17 @@ from services.shared.metrics import (
AGGREGATION_SIGNALS_PROCESSED,
AGGREGATION_WINDOWS_COMPUTED,
)
from services.aggregation.pattern_matcher import find_self_patterns
from services.aggregation.projection import (
MacroEventInfo,
TrendProjection,
compute_projection,
persist_trend_projection,
)
from services.aggregation.signal_propagation import (
CompetitiveSignalRecord,
build_pattern_weighted_signals,
)
from services.shared.schemas import TrendDirection, TrendSummary, TrendWindow
logger = logging.getLogger(__name__)
@@ -64,6 +75,10 @@ class AggregationConfig:
windows: list[str] | None = None # None = all windows
scoring: ScoringConfig | None = None
max_evidence: int = MAX_EVIDENCE_REFS
macro_signal_weight: float = 0.3 # relative weight of macro vs company signals
macro_enabled: bool = True # runtime toggle state
competitive_signal_weight: float = 0.2 # relative weight of pattern signals
competitive_enabled: bool = True # runtime toggle state
def effective_windows(self) -> list[str]:
if self.windows:
@@ -154,6 +169,236 @@ async def fetch_impact_records(
# ---------------------------------------------------------------------------
# Fetch macro toggle state from risk_configs
#
# MACRO LAYER TOGGLE BEHAVIOR (Requirements 11.2, 11.3, 11.4):
# - The toggle state is read fresh from PostgreSQL at the start of each
# aggregation cycle (no caching), so changes take effect immediately on
# the next cycle.
# - When disabled: ingestion and classification continue normally (historical
# data is preserved), but interpolation and aggregation integration are
# skipped — the aggregation engine produces trends using only company-
# specific signals.
# - When re-enabled: the engine resumes computing macro impact scores using
# the most recent GlobalEvent classifications, including any events that
# were ingested and classified while the layer was disabled.
# ---------------------------------------------------------------------------
_MACRO_TOGGLE_QUERY = """
SELECT config->>'macro_enabled' AS macro_enabled
FROM risk_configs
WHERE active = TRUE
ORDER BY updated_at DESC
LIMIT 1
"""
async def fetch_macro_enabled(pool: asyncpg.Pool) -> bool | None:
"""Check macro toggle state from risk_configs table.
Returns True/False if explicitly set, or None if no config exists
(caller should fall back to AggregationConfig default).
"""
row = await pool.fetchrow(_MACRO_TOGGLE_QUERY)
if row is None or row["macro_enabled"] is None:
return None
return row["macro_enabled"].lower() == "true"
# ---------------------------------------------------------------------------
# Fetch competitive toggle state from risk_configs
# ---------------------------------------------------------------------------
_COMPETITIVE_TOGGLE_QUERY = """
SELECT config->>'competitive_enabled' AS competitive_enabled
FROM risk_configs
WHERE active = TRUE
ORDER BY updated_at DESC
LIMIT 1
"""
async def fetch_competitive_enabled(pool: asyncpg.Pool) -> bool | None:
"""Check competitive toggle state from risk_configs table.
Returns True/False if explicitly set, or None if no config exists
(caller should fall back to AggregationConfig default).
"""
row = await pool.fetchrow(_COMPETITIVE_TOGGLE_QUERY)
if row is None or row["competitive_enabled"] is None:
return None
return row["competitive_enabled"].lower() == "true"
# ---------------------------------------------------------------------------
# Fetch competitive signals targeting a ticker within a time window
# ---------------------------------------------------------------------------
_COMPETITIVE_SIGNALS_QUERY = """
SELECT source_document_id, source_ticker, target_ticker, catalyst_type,
pattern_confidence, signal_direction, signal_strength,
relationship_strength, computed_at
FROM competitive_signal_records
WHERE target_ticker = $1
AND computed_at >= $2
AND computed_at <= $3
ORDER BY computed_at DESC
"""
async def fetch_competitive_signals(
pool: asyncpg.Pool,
ticker: str,
window_start: datetime,
window_end: datetime,
) -> list[CompetitiveSignalRecord]:
"""Fetch competitive signal records targeting a ticker in a time range."""
rows = await pool.fetch(
_COMPETITIVE_SIGNALS_QUERY, ticker, window_start, window_end,
)
return [
CompetitiveSignalRecord(
source_document_id=str(row["source_document_id"]),
source_ticker=row["source_ticker"],
target_ticker=row["target_ticker"],
catalyst_type=row["catalyst_type"],
pattern_confidence=float(row["pattern_confidence"]),
signal_direction=row["signal_direction"],
signal_strength=float(row["signal_strength"]),
relationship_strength=float(row["relationship_strength"]),
computed_at=row["computed_at"],
)
for row in rows
]
# ---------------------------------------------------------------------------
# Fetch macro impact records for a ticker within a time window
# ---------------------------------------------------------------------------
_MACRO_IMPACT_QUERY = """
SELECT
mir.event_id,
mir.company_id,
mir.ticker,
mir.macro_impact_score,
mir.impact_direction,
mir.contributing_factors,
mir.confidence,
mir.computed_at,
ge.source_document_id,
d.published_at AS event_published_at
FROM macro_impact_records mir
JOIN global_events ge ON ge.id = mir.event_id
JOIN documents d ON d.id = ge.source_document_id
WHERE mir.ticker = $1
AND mir.computed_at >= $2
AND mir.computed_at <= $3
ORDER BY mir.computed_at DESC
"""
@dataclass
class MacroImpactRow:
"""Parsed row from the macro impact query."""
event_id: str
company_id: str
ticker: str
macro_impact_score: float
impact_direction: str
contributing_factors: list[str]
confidence: float
computed_at: datetime
source_document_id: str
event_published_at: datetime
def _parse_macro_impact_row(row: Any) -> MacroImpactRow:
"""Convert an asyncpg Record to a MacroImpactRow."""
factors = row["contributing_factors"]
if isinstance(factors, str):
factors = json.loads(factors)
return MacroImpactRow(
event_id=str(row["event_id"]),
company_id=str(row["company_id"]),
ticker=row["ticker"],
macro_impact_score=float(row["macro_impact_score"] or 0.0),
impact_direction=row["impact_direction"] or "neutral",
contributing_factors=factors if isinstance(factors, list) else [],
confidence=float(row["confidence"] or 0.5),
computed_at=row["computed_at"],
source_document_id=str(row["source_document_id"]),
event_published_at=row["event_published_at"],
)
async def fetch_macro_impact_records(
pool: asyncpg.Pool,
ticker: str,
window_start: datetime,
window_end: datetime,
) -> list[MacroImpactRow]:
"""Fetch macro impact records for a ticker in a time range."""
rows = await pool.fetch(_MACRO_IMPACT_QUERY, ticker, window_start, window_end)
return [_parse_macro_impact_row(r) for r in rows]
# ---------------------------------------------------------------------------
# Convert macro impact records to WeightedSignals
# ---------------------------------------------------------------------------
_DIRECTION_TO_SENTIMENT: dict[str, float] = {
"positive": 1.0,
"negative": -1.0,
"mixed": 0.0,
"neutral": 0.0,
}
def build_macro_weighted_signals(
macro_impacts: list[MacroImpactRow],
reference_time: datetime,
window: str,
macro_signal_weight: float = 0.3,
config: ScoringConfig | None = None,
) -> list[WeightedSignal]:
"""Convert macro impact records into WeightedSignal objects.
Uses the same scoring pipeline as company signals:
- document_id = source_document_id (for evidence tracing)
- sentiment_value mapped from impact_direction
- impact_score = macro_impact_score * macro_signal_weight
- recency decay from the global event's publication time
- confidence gating from the macro record's confidence
"""
cfg = config or ScoringConfig()
signals: list[WeightedSignal] = []
for mir in macro_impacts:
sw = compute_signal_weight(
published_at=mir.event_published_at,
reference_time=reference_time,
window=window,
source_credibility=mir.confidence,
novelty_score=0.5,
extraction_confidence=mir.confidence,
config=cfg,
)
sentiment = _DIRECTION_TO_SENTIMENT.get(mir.impact_direction, 0.0)
impact = mir.macro_impact_score * macro_signal_weight
signals.append(
WeightedSignal(
document_id=mir.source_document_id,
weight=sw,
sentiment_value=sentiment,
impact_score=impact,
)
)
return signals
# ---------------------------------------------------------------------------
# Build weighted signals from impact records
# ---------------------------------------------------------------------------
@@ -544,6 +789,61 @@ async def persist_trend_evidence(
return len(rows)
# ---------------------------------------------------------------------------
# Build MacroEventInfo objects for projection computation
# ---------------------------------------------------------------------------
_MACRO_EVENT_INFO_QUERY = """
SELECT
mir.event_id,
mir.macro_impact_score,
mir.impact_direction,
mir.confidence,
ge.estimated_duration,
ge.severity,
d.published_at AS event_published_at
FROM macro_impact_records mir
JOIN global_events ge ON ge.id = mir.event_id
JOIN documents d ON d.id = ge.source_document_id
WHERE mir.ticker = $1
AND mir.computed_at >= $2
AND mir.computed_at <= $3
ORDER BY mir.computed_at DESC
"""
async def _build_macro_event_infos(
pool: asyncpg.Pool,
ticker: str,
window_start: datetime,
reference_time: datetime,
) -> list[MacroEventInfo]:
"""Fetch macro impact records and build MacroEventInfo objects for projection."""
rows = await pool.fetch(
_MACRO_EVENT_INFO_QUERY, ticker, window_start, reference_time,
)
infos: list[MacroEventInfo] = []
for row in rows:
published_at = row["event_published_at"]
age_hours = 0.0
if published_at:
age_hours = max(
(reference_time - published_at).total_seconds() / 3600.0, 0.0,
)
infos.append(
MacroEventInfo(
event_id=str(row["event_id"]),
macro_impact_score=float(row["macro_impact_score"] or 0.0),
impact_direction=row["impact_direction"] or "neutral",
confidence=float(row["confidence"] or 0.5),
estimated_duration=row["estimated_duration"] or "short_term",
severity=row["severity"] or "low",
event_age_hours=age_hours,
)
)
return infos
# ---------------------------------------------------------------------------
# Main aggregation entry point for a single ticker + window
# ---------------------------------------------------------------------------
@@ -563,8 +863,10 @@ async def aggregate_company_window(
2. Fetch document impact records from PostgreSQL.
3. Fetch market context for the ticker.
4. Build weighted signals using the scoring module.
5. Assemble the TrendSummary.
6. Persist to trend_windows table.
5. Check macro toggle and fetch/merge macro signals if enabled.
6. Check competitive toggle and fetch/merge pattern/competitive signals if enabled.
7. Assemble the TrendSummary.
8. Persist to trend_windows table.
Returns the assembled TrendSummary.
"""
@@ -589,7 +891,83 @@ async def aggregate_company_window(
impacts, reference_time, window, market_ctx, scoring_cfg,
)
# 4. Assemble trend summary with evidence details
# 4. Check macro toggle and merge macro signals
# (Requirement 11.2, 11.3, 11.4): Toggle state is read from the DB on
# every aggregation cycle. When disabled, macro signals are skipped but
# ingestion/classification continue independently — so when re-enabled,
# the most recent classifications (including those ingested while disabled)
# are immediately available for impact computation.
macro_enabled = cfg.macro_enabled
db_toggle = await fetch_macro_enabled(pool)
if db_toggle is not None:
macro_enabled = db_toggle
if macro_enabled:
macro_impacts = await fetch_macro_impact_records(
pool, ticker, window_start, reference_time,
)
if macro_impacts:
macro_signals = build_macro_weighted_signals(
macro_impacts,
reference_time,
window,
macro_signal_weight=cfg.macro_signal_weight,
config=scoring_cfg,
)
signals = signals + macro_signals
logger.info(
"Merged %d macro signals for %s/%s",
len(macro_signals), ticker, window,
)
# 5. Check competitive toggle and merge pattern/competitive signals
# (Requirements 5.1-5.6): Same toggle pattern as macro layer. When
# disabled, pattern mining remains queryable but aggregation skips
# competitive signals — no degradation of existing behavior.
competitive_enabled = cfg.competitive_enabled
db_competitive_toggle = await fetch_competitive_enabled(pool)
if db_competitive_toggle is not None:
competitive_enabled = db_competitive_toggle
if competitive_enabled:
try:
# Get unique catalyst types from the impact records
catalyst_types = {imp.catalyst_type for imp in impacts}
# Query self-company historical patterns for each catalyst type
all_patterns = []
for cat_type in catalyst_types:
patterns = await find_self_patterns(pool, ticker, cat_type)
all_patterns.extend(patterns)
# Fetch competitive signals targeting this ticker
comp_signals = await fetch_competitive_signals(
pool, ticker, window_start, reference_time,
)
# Convert to WeightedSignal objects
if all_patterns or comp_signals:
pattern_weighted = build_pattern_weighted_signals(
patterns=all_patterns,
competitive_signals=comp_signals,
reference_time=reference_time,
window=window,
)
signals = signals + pattern_weighted
logger.info(
"Merged %d pattern/competitive signals for %s/%s "
"(patterns=%d, competitive=%d)",
len(pattern_weighted), ticker, window,
len(all_patterns), len(comp_signals),
)
except Exception:
logger.exception(
"Failed to fetch pattern/competitive signals for %s/%s"
"continuing with company+macro signals only",
ticker, window,
)
# 6. Assemble trend summary with evidence details
assembled = assemble_trend_with_evidence(
ticker=ticker,
window=window,
@@ -601,10 +979,10 @@ async def aggregate_company_window(
)
summary = assembled.summary
# 5. Persist trend window
# 7. Persist trend window
trend_id = await persist_trend_summary(pool, summary)
# 6. Persist evidence mappings
# 8. Persist evidence mappings
evidence_count = await persist_trend_evidence(
pool, trend_id,
assembled.supporting_evidence,
@@ -617,6 +995,33 @@ async def aggregate_company_window(
summary.trend_strength, summary.confidence, len(signals), evidence_count,
)
# 9. Compute and persist trend projection
try:
macro_event_infos: list[MacroEventInfo] = []
if macro_enabled:
macro_event_infos = await _build_macro_event_infos(
pool, ticker, window_start, reference_time,
)
projection = compute_projection(
summary=summary,
macro_events=macro_event_infos if macro_event_infos else None,
macro_enabled=macro_enabled,
upcoming_catalysts=summary.dominant_catalysts[:3] if summary.dominant_catalysts else None,
)
await persist_trend_projection(pool, trend_id, projection)
logger.info(
"Persisted projection for %s/%s: direction=%s strength=%.3f confidence=%.3f diverges=%s",
ticker, window, projection.projected_direction,
projection.projected_strength, projection.projected_confidence,
projection.diverges_from_current,
)
except Exception:
logger.exception(
"Failed to compute/persist projection for trend %s (%s/%s) — continuing",
trend_id, ticker, window,
)
# Prometheus metrics
AGGREGATION_WINDOWS_COMPUTED.labels(window=window).inc()
AGGREGATION_SIGNALS_PROCESSED.labels(window=window).inc(len(signals))