feat: competitive intelligence & historical pattern matching layer

2026-04-14 19:42:48 +00:00
parent b478022ba3
commit f7a11d14ea
203 changed files with 20155 additions and 97 deletions
@@ -110,6 +110,19 @@ BUCKET_RETENTION_FIELDS: dict[str, str] = {
 }


+@dataclass
+class MacroConfig:
+    """Configuration for the macro news interpolation layer.
+
+    Requirements: 5.6, 10.1, 10.2, 12.9
+    """
+    macro_signal_weight: float = 0.3  # relative weight of macro vs company signals
+    macro_enabled: bool = True  # runtime toggle state (default on)
+    macro_confidence_threshold: float = 0.4  # minimum confidence for event inclusion
+    macro_short_term_staleness_hours: int = 48  # hours after which short-term events get accelerated decay
+    projection_confidence_threshold: float = 0.3  # minimum confidence for projections to influence recommendations
+
+
@dataclass
 class AlertingConfig:
    """Thresholds for operational alerting rules.
@@ -135,6 +148,26 @@ class AlertingConfig:
    check_interval_seconds: int = 120


+@dataclass
+class CompetitiveConfig:
+    """Configuration for the competitive intelligence & historical pattern matching layer.
+
+    Requirements: 5.6, 6.1, 9.1, 9.2, 11.2, 11.3
+    """
+    competitive_signal_weight: float = 0.2
+    competitive_enabled: bool = True
+    pattern_confidence_threshold: float = 0.3
+    propagation_strength_threshold: float = 0.2
+    routine_lookback_days: int = 180
+    major_decision_lookback_days: int = 365
+    major_decision_weight_multiplier: float = 1.3
+    staleness_window_days: int = 180
+    staleness_recent_days: int = 90
+    staleness_decay_penalty: float = 0.5
+    min_pattern_samples: int = 3
+    propagation_failure_threshold: int = 5  # consecutive failures before operator alert
+
+
@dataclass
 class AppConfig:
    postgres: PostgresConfig = field(default_factory=PostgresConfig)
@@ -146,6 +179,8 @@ class AppConfig:
    broker: BrokerConfig = field(default_factory=BrokerConfig)
    retention: RetentionConfig = field(default_factory=RetentionConfig)
    alerting: AlertingConfig = field(default_factory=AlertingConfig)
+    macro: MacroConfig = field(default_factory=MacroConfig)
+    competitive: CompetitiveConfig = field(default_factory=CompetitiveConfig)
    log_level: str = "INFO"
    json_logs: bool = True

@@ -222,6 +257,27 @@ def load_config() -> AppConfig:
            broker_error_window_hours=int(os.getenv("ALERT_BROKER_ERROR_WINDOW_HOURS", "1")),
            check_interval_seconds=int(os.getenv("ALERT_CHECK_INTERVAL_SECONDS", "120")),
        ),
+        macro=MacroConfig(
+            macro_signal_weight=float(os.getenv("MACRO_SIGNAL_WEIGHT", "0.3")),
+            macro_enabled=os.getenv("MACRO_ENABLED", "true").lower() == "true",
+            macro_confidence_threshold=float(os.getenv("MACRO_CONFIDENCE_THRESHOLD", "0.4")),
+            macro_short_term_staleness_hours=int(os.getenv("MACRO_SHORT_TERM_STALENESS_HOURS", "48")),
+            projection_confidence_threshold=float(os.getenv("PROJECTION_CONFIDENCE_THRESHOLD", "0.3")),
+        ),
+        competitive=CompetitiveConfig(
+            competitive_signal_weight=float(os.getenv("COMPETITIVE_SIGNAL_WEIGHT", "0.2")),
+            competitive_enabled=os.getenv("COMPETITIVE_ENABLED", "true").lower() == "true",
+            pattern_confidence_threshold=float(os.getenv("COMPETITIVE_PATTERN_CONFIDENCE_THRESHOLD", "0.3")),
+            propagation_strength_threshold=float(os.getenv("COMPETITIVE_PROPAGATION_STRENGTH_THRESHOLD", "0.2")),
+            routine_lookback_days=int(os.getenv("COMPETITIVE_ROUTINE_LOOKBACK_DAYS", "180")),
+            major_decision_lookback_days=int(os.getenv("COMPETITIVE_MAJOR_DECISION_LOOKBACK_DAYS", "365")),
+            major_decision_weight_multiplier=float(os.getenv("COMPETITIVE_MAJOR_DECISION_WEIGHT_MULTIPLIER", "1.3")),
+            staleness_window_days=int(os.getenv("COMPETITIVE_STALENESS_WINDOW_DAYS", "180")),
+            staleness_recent_days=int(os.getenv("COMPETITIVE_STALENESS_RECENT_DAYS", "90")),
+            staleness_decay_penalty=float(os.getenv("COMPETITIVE_STALENESS_DECAY_PENALTY", "0.5")),
+            min_pattern_samples=int(os.getenv("COMPETITIVE_MIN_PATTERN_SAMPLES", "3")),
+            propagation_failure_threshold=int(os.getenv("COMPETITIVE_PROPAGATION_FAILURE_THRESHOLD", "5")),
+        ),
        log_level=os.getenv("LOG_LEVEL", "INFO"),
        json_logs=os.getenv("JSON_LOGS", "true").lower() == "true",
    )
@@ -214,6 +214,7 @@ def _resolve_document_type(source_type: str) -> str:
        "news_api": "article",
        "filings_api": "filing",
        "web_scrape": "press_release",
+        "macro_news": "macro_event",
    }
    return mapping.get(source_type, "article")

@@ -64,3 +64,4 @@ QUEUE_RECOMMENDATION = "recommendation"
 QUEUE_LAKE_PUBLISH = "lake_publish"
 QUEUE_TRADE = "trade"
 QUEUE_BROKER = "broker_orders"
+QUEUE_MACRO_CLASSIFICATION = "macro_classification"
@@ -15,6 +15,7 @@ class DocumentType(str, Enum):
    FILING = "filing"
    TRANSCRIPT = "transcript"
    PRESS_RELEASE = "press_release"
+    MACRO_EVENT = "macro_event"


 class SourceType(str, Enum):
@@ -71,6 +72,37 @@ class TrendWindow(str, Enum):
    NINETY_DAY = "90d"


+class ImpactType(str, Enum):
+    SUPPLY_DISRUPTION = "supply_disruption"
+    DEMAND_SHIFT = "demand_shift"
+    COST_INCREASE = "cost_increase"
+    REGULATORY_PRESSURE = "regulatory_pressure"
+    CURRENCY_IMPACT = "currency_impact"
+    COMMODITY_SHOCK = "commodity_shock"
+    TRADE_BARRIER = "trade_barrier"
+    GEOPOLITICAL_RISK = "geopolitical_risk"
+
+
+class SeverityLevel(str, Enum):
+    LOW = "low"
+    MODERATE = "moderate"
+    HIGH = "high"
+    CRITICAL = "critical"
+
+
+class MarketPositionTier(str, Enum):
+    GLOBAL_LEADER = "global_leader"
+    MULTINATIONAL = "multinational"
+    REGIONAL = "regional"
+    DOMESTIC = "domestic"
+
+
+class EstimatedDuration(str, Enum):
+    SHORT_TERM = "short_term"
+    MEDIUM_TERM = "medium_term"
+    LONG_TERM = "long_term"
+
+
 # --- Document Intelligence ---

 class CompanyImpact(BaseModel):
@@ -182,6 +214,63 @@ class Recommendation(BaseModel):
    generated_at: datetime = Field(default_factory=datetime.utcnow)


+# --- Global News Interpolation ---
+
+class GlobalEventSchema(BaseModel):
+    event_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    event_types: List[ImpactType] = Field(default_factory=list)
+    severity: SeverityLevel = SeverityLevel.LOW
+    affected_regions: List[str] = Field(default_factory=list)
+    affected_sectors: List[str] = Field(default_factory=list)
+    affected_commodities: List[str] = Field(default_factory=list)
+    summary: str = ""
+    key_facts: List[str] = Field(default_factory=list)
+    estimated_duration: EstimatedDuration = EstimatedDuration.SHORT_TERM
+    confidence: float = Field(ge=0, le=1, default=0.5)
+    source_document_id: str = ""
+    model_metadata: ModelMetadata = Field(default_factory=ModelMetadata)
+    created_at: datetime = Field(default_factory=datetime.utcnow)
+
+
+class MacroImpactRecordSchema(BaseModel):
+    event_id: str = ""
+    company_id: str = ""
+    ticker: str = ""
+    macro_impact_score: float = Field(ge=0, le=1, default=0.0)
+    impact_direction: str = "neutral"
+    contributing_factors: List[str] = Field(default_factory=list)
+    confidence: float = Field(ge=0, le=1, default=0.5)
+    computed_at: datetime = Field(default_factory=datetime.utcnow)
+
+
+class ExposureProfileSchema(BaseModel):
+    company_id: str = ""
+    geographic_revenue_mix: dict[str, float] = Field(default_factory=dict)
+    supply_chain_regions: List[str] = Field(default_factory=list)
+    key_input_commodities: List[str] = Field(default_factory=list)
+    regulatory_jurisdictions: List[str] = Field(default_factory=list)
+    market_position_tier: MarketPositionTier = MarketPositionTier.REGIONAL
+    export_dependency_pct: float = Field(ge=0, le=1, default=0.0)
+    source: str = "manual"
+    confidence: float = Field(ge=0, le=1, default=1.0)
+    version: int = 1
+    active: bool = True
+    created_at: datetime = Field(default_factory=datetime.utcnow)
+    updated_at: datetime = Field(default_factory=datetime.utcnow)
+
+
+class TrendProjectionSchema(BaseModel):
+    trend_window_id: str = ""
+    projected_direction: TrendDirection = TrendDirection.NEUTRAL
+    projected_strength: float = Field(ge=0, le=1, default=0.5)
+    projected_confidence: float = Field(ge=0, le=1, default=0.5)
+    projection_horizon: str = "7d"
+    driving_factors: List[str] = Field(default_factory=list)
+    macro_contribution_pct: float = Field(ge=0, le=1, default=0.0)
+    diverges_from_current: bool = False
+    computed_at: datetime = Field(default_factory=datetime.utcnow)
+
+
 # --- Document Metadata ---

 class StorageRefs(BaseModel):
@@ -204,3 +293,73 @@ class DocumentMetadata(BaseModel):
    language: str = "en"
    content_hash: str = ""
    storage_refs: StorageRefs = Field(default_factory=StorageRefs)
+
+
+# --- Competitive Intelligence & Historical Patterns ---
+
+
+class RelationshipType(str, Enum):
+    DIRECT_RIVAL = "direct_rival"
+    SAME_SECTOR = "same_sector"
+    OVERLAPPING_PRODUCTS = "overlapping_products"
+    SUPPLY_CHAIN_ADJACENT = "supply_chain_adjacent"
+
+
+class CatalystTier(str, Enum):
+    MAJOR_CORPORATE_DECISION = "major_corporate_decision"
+    ROUTINE_SIGNAL = "routine_signal"
+
+
+# Major corporate decision catalyst types (Req 11.1)
+MAJOR_DECISION_CATALYSTS: frozenset[str] = frozenset({
+    "m_and_a",
+    "legal",
+    "restructuring",
+    "leadership_change",
+    "strategic_pivot",
+    "buyback",
+    "dividend_change",
+})
+
+
+class CompetitorRelationshipSchema(BaseModel):
+    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    company_a_id: str = ""
+    company_b_id: str = ""
+    relationship_type: RelationshipType = RelationshipType.DIRECT_RIVAL
+    strength: float = Field(ge=0, le=1, default=0.5)
+    bidirectional: bool = True
+    source: str = "manual"
+    active: bool = True
+    created_at: datetime = Field(default_factory=datetime.utcnow)
+    updated_at: datetime = Field(default_factory=datetime.utcnow)
+
+
+class CompetitiveSignalRecordSchema(BaseModel):
+    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    source_document_id: str = ""
+    source_ticker: str = ""
+    target_ticker: str = ""
+    catalyst_type: str = ""
+    pattern_confidence: float = Field(ge=0, le=1, default=0.0)
+    signal_direction: str = "neutral"
+    signal_strength: float = Field(ge=0, le=1, default=0.0)
+    relationship_strength: float = Field(ge=0, le=1, default=0.0)
+    computed_at: datetime = Field(default_factory=datetime.utcnow)
+
+
+class HistoricalPatternSchema(BaseModel):
+    source_ticker: str = ""
+    target_ticker: str = ""
+    catalyst_type: str = ""
+    time_horizon: str = "7d"
+    sample_count: int = 0
+    bullish_pct: float = Field(ge=0, le=1, default=0.0)
+    bearish_pct: float = Field(ge=0, le=1, default=0.0)
+    avg_strength: float = Field(ge=0, le=1, default=0.0)
+    avg_time_to_resolution: float = 0.0
+    pattern_confidence: float = Field(ge=0, le=1, default=0.0)
+    data_start: Optional[datetime] = None
+    data_end: Optional[datetime] = None
+    tier: CatalystTier = CatalystTier.ROUTINE_SIGNAL
+    insufficient_data: bool = False
@@ -48,6 +48,7 @@ SOURCE_BUCKET_MAP: dict[str, str] = {
    "filings_api": "stonks-raw-filings",
    "web_scrape": "stonks-raw-news",
    "broker": "stonks-raw-market",
+    "macro_news": "stonks-raw-news",
 }

 # Map artifact type to content type and file extension
@@ -75,10 +76,14 @@ def build_artifact_path(
    """Build a MinIO object path following the design convention.

    Pattern: {source_type}/{ticker}/{yyyy}/{mm}/{dd}/{document_id}/{artifact_name}.{ext}
+    For macro_news sources, uses macro/ prefix instead of ticker:
+      macro/{yyyy}/{mm}/{dd}/{document_id}/{artifact_name}.{ext}
    """
    ts = timestamp or datetime.now(timezone.utc)
+    # Macro sources use macro/ prefix instead of ticker (Requirement 1.1)
+    path_prefix = "macro" if source_type == "macro_news" else f"{source_type}/{ticker}"
    return (
-        f"{source_type}/{ticker}/"
+        f"{path_prefix}/"
        f"{ts.year}/{ts.month:02d}/{ts.day:02d}/"
        f"{document_id}/{artifact_name}.{ext}"
    )