feat: competitive intelligence & historical pattern matching layer

This commit is contained in:
Celes Renata
2026-04-14 19:42:48 +00:00
parent b478022ba3
commit f7a11d14ea
203 changed files with 20155 additions and 97 deletions
+56
View File
@@ -110,6 +110,19 @@ BUCKET_RETENTION_FIELDS: dict[str, str] = {
}
@dataclass
class MacroConfig:
"""Configuration for the macro news interpolation layer.
Requirements: 5.6, 10.1, 10.2, 12.9
"""
macro_signal_weight: float = 0.3 # relative weight of macro vs company signals
macro_enabled: bool = True # runtime toggle state (default on)
macro_confidence_threshold: float = 0.4 # minimum confidence for event inclusion
macro_short_term_staleness_hours: int = 48 # hours after which short-term events get accelerated decay
projection_confidence_threshold: float = 0.3 # minimum confidence for projections to influence recommendations
@dataclass
class AlertingConfig:
"""Thresholds for operational alerting rules.
@@ -135,6 +148,26 @@ class AlertingConfig:
check_interval_seconds: int = 120
@dataclass
class CompetitiveConfig:
"""Configuration for the competitive intelligence & historical pattern matching layer.
Requirements: 5.6, 6.1, 9.1, 9.2, 11.2, 11.3
"""
competitive_signal_weight: float = 0.2
competitive_enabled: bool = True
pattern_confidence_threshold: float = 0.3
propagation_strength_threshold: float = 0.2
routine_lookback_days: int = 180
major_decision_lookback_days: int = 365
major_decision_weight_multiplier: float = 1.3
staleness_window_days: int = 180
staleness_recent_days: int = 90
staleness_decay_penalty: float = 0.5
min_pattern_samples: int = 3
propagation_failure_threshold: int = 5 # consecutive failures before operator alert
@dataclass
class AppConfig:
postgres: PostgresConfig = field(default_factory=PostgresConfig)
@@ -146,6 +179,8 @@ class AppConfig:
broker: BrokerConfig = field(default_factory=BrokerConfig)
retention: RetentionConfig = field(default_factory=RetentionConfig)
alerting: AlertingConfig = field(default_factory=AlertingConfig)
macro: MacroConfig = field(default_factory=MacroConfig)
competitive: CompetitiveConfig = field(default_factory=CompetitiveConfig)
log_level: str = "INFO"
json_logs: bool = True
@@ -222,6 +257,27 @@ def load_config() -> AppConfig:
broker_error_window_hours=int(os.getenv("ALERT_BROKER_ERROR_WINDOW_HOURS", "1")),
check_interval_seconds=int(os.getenv("ALERT_CHECK_INTERVAL_SECONDS", "120")),
),
macro=MacroConfig(
macro_signal_weight=float(os.getenv("MACRO_SIGNAL_WEIGHT", "0.3")),
macro_enabled=os.getenv("MACRO_ENABLED", "true").lower() == "true",
macro_confidence_threshold=float(os.getenv("MACRO_CONFIDENCE_THRESHOLD", "0.4")),
macro_short_term_staleness_hours=int(os.getenv("MACRO_SHORT_TERM_STALENESS_HOURS", "48")),
projection_confidence_threshold=float(os.getenv("PROJECTION_CONFIDENCE_THRESHOLD", "0.3")),
),
competitive=CompetitiveConfig(
competitive_signal_weight=float(os.getenv("COMPETITIVE_SIGNAL_WEIGHT", "0.2")),
competitive_enabled=os.getenv("COMPETITIVE_ENABLED", "true").lower() == "true",
pattern_confidence_threshold=float(os.getenv("COMPETITIVE_PATTERN_CONFIDENCE_THRESHOLD", "0.3")),
propagation_strength_threshold=float(os.getenv("COMPETITIVE_PROPAGATION_STRENGTH_THRESHOLD", "0.2")),
routine_lookback_days=int(os.getenv("COMPETITIVE_ROUTINE_LOOKBACK_DAYS", "180")),
major_decision_lookback_days=int(os.getenv("COMPETITIVE_MAJOR_DECISION_LOOKBACK_DAYS", "365")),
major_decision_weight_multiplier=float(os.getenv("COMPETITIVE_MAJOR_DECISION_WEIGHT_MULTIPLIER", "1.3")),
staleness_window_days=int(os.getenv("COMPETITIVE_STALENESS_WINDOW_DAYS", "180")),
staleness_recent_days=int(os.getenv("COMPETITIVE_STALENESS_RECENT_DAYS", "90")),
staleness_decay_penalty=float(os.getenv("COMPETITIVE_STALENESS_DECAY_PENALTY", "0.5")),
min_pattern_samples=int(os.getenv("COMPETITIVE_MIN_PATTERN_SAMPLES", "3")),
propagation_failure_threshold=int(os.getenv("COMPETITIVE_PROPAGATION_FAILURE_THRESHOLD", "5")),
),
log_level=os.getenv("LOG_LEVEL", "INFO"),
json_logs=os.getenv("JSON_LOGS", "true").lower() == "true",
)
+1
View File
@@ -214,6 +214,7 @@ def _resolve_document_type(source_type: str) -> str:
"news_api": "article",
"filings_api": "filing",
"web_scrape": "press_release",
"macro_news": "macro_event",
}
return mapping.get(source_type, "article")
+1
View File
@@ -64,3 +64,4 @@ QUEUE_RECOMMENDATION = "recommendation"
QUEUE_LAKE_PUBLISH = "lake_publish"
QUEUE_TRADE = "trade"
QUEUE_BROKER = "broker_orders"
QUEUE_MACRO_CLASSIFICATION = "macro_classification"
+159
View File
@@ -15,6 +15,7 @@ class DocumentType(str, Enum):
FILING = "filing"
TRANSCRIPT = "transcript"
PRESS_RELEASE = "press_release"
MACRO_EVENT = "macro_event"
class SourceType(str, Enum):
@@ -71,6 +72,37 @@ class TrendWindow(str, Enum):
NINETY_DAY = "90d"
class ImpactType(str, Enum):
SUPPLY_DISRUPTION = "supply_disruption"
DEMAND_SHIFT = "demand_shift"
COST_INCREASE = "cost_increase"
REGULATORY_PRESSURE = "regulatory_pressure"
CURRENCY_IMPACT = "currency_impact"
COMMODITY_SHOCK = "commodity_shock"
TRADE_BARRIER = "trade_barrier"
GEOPOLITICAL_RISK = "geopolitical_risk"
class SeverityLevel(str, Enum):
LOW = "low"
MODERATE = "moderate"
HIGH = "high"
CRITICAL = "critical"
class MarketPositionTier(str, Enum):
GLOBAL_LEADER = "global_leader"
MULTINATIONAL = "multinational"
REGIONAL = "regional"
DOMESTIC = "domestic"
class EstimatedDuration(str, Enum):
SHORT_TERM = "short_term"
MEDIUM_TERM = "medium_term"
LONG_TERM = "long_term"
# --- Document Intelligence ---
class CompanyImpact(BaseModel):
@@ -182,6 +214,63 @@ class Recommendation(BaseModel):
generated_at: datetime = Field(default_factory=datetime.utcnow)
# --- Global News Interpolation ---
class GlobalEventSchema(BaseModel):
event_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
event_types: List[ImpactType] = Field(default_factory=list)
severity: SeverityLevel = SeverityLevel.LOW
affected_regions: List[str] = Field(default_factory=list)
affected_sectors: List[str] = Field(default_factory=list)
affected_commodities: List[str] = Field(default_factory=list)
summary: str = ""
key_facts: List[str] = Field(default_factory=list)
estimated_duration: EstimatedDuration = EstimatedDuration.SHORT_TERM
confidence: float = Field(ge=0, le=1, default=0.5)
source_document_id: str = ""
model_metadata: ModelMetadata = Field(default_factory=ModelMetadata)
created_at: datetime = Field(default_factory=datetime.utcnow)
class MacroImpactRecordSchema(BaseModel):
event_id: str = ""
company_id: str = ""
ticker: str = ""
macro_impact_score: float = Field(ge=0, le=1, default=0.0)
impact_direction: str = "neutral"
contributing_factors: List[str] = Field(default_factory=list)
confidence: float = Field(ge=0, le=1, default=0.5)
computed_at: datetime = Field(default_factory=datetime.utcnow)
class ExposureProfileSchema(BaseModel):
company_id: str = ""
geographic_revenue_mix: dict[str, float] = Field(default_factory=dict)
supply_chain_regions: List[str] = Field(default_factory=list)
key_input_commodities: List[str] = Field(default_factory=list)
regulatory_jurisdictions: List[str] = Field(default_factory=list)
market_position_tier: MarketPositionTier = MarketPositionTier.REGIONAL
export_dependency_pct: float = Field(ge=0, le=1, default=0.0)
source: str = "manual"
confidence: float = Field(ge=0, le=1, default=1.0)
version: int = 1
active: bool = True
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
class TrendProjectionSchema(BaseModel):
trend_window_id: str = ""
projected_direction: TrendDirection = TrendDirection.NEUTRAL
projected_strength: float = Field(ge=0, le=1, default=0.5)
projected_confidence: float = Field(ge=0, le=1, default=0.5)
projection_horizon: str = "7d"
driving_factors: List[str] = Field(default_factory=list)
macro_contribution_pct: float = Field(ge=0, le=1, default=0.0)
diverges_from_current: bool = False
computed_at: datetime = Field(default_factory=datetime.utcnow)
# --- Document Metadata ---
class StorageRefs(BaseModel):
@@ -204,3 +293,73 @@ class DocumentMetadata(BaseModel):
language: str = "en"
content_hash: str = ""
storage_refs: StorageRefs = Field(default_factory=StorageRefs)
# --- Competitive Intelligence & Historical Patterns ---
class RelationshipType(str, Enum):
DIRECT_RIVAL = "direct_rival"
SAME_SECTOR = "same_sector"
OVERLAPPING_PRODUCTS = "overlapping_products"
SUPPLY_CHAIN_ADJACENT = "supply_chain_adjacent"
class CatalystTier(str, Enum):
MAJOR_CORPORATE_DECISION = "major_corporate_decision"
ROUTINE_SIGNAL = "routine_signal"
# Major corporate decision catalyst types (Req 11.1)
MAJOR_DECISION_CATALYSTS: frozenset[str] = frozenset({
"m_and_a",
"legal",
"restructuring",
"leadership_change",
"strategic_pivot",
"buyback",
"dividend_change",
})
class CompetitorRelationshipSchema(BaseModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
company_a_id: str = ""
company_b_id: str = ""
relationship_type: RelationshipType = RelationshipType.DIRECT_RIVAL
strength: float = Field(ge=0, le=1, default=0.5)
bidirectional: bool = True
source: str = "manual"
active: bool = True
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
class CompetitiveSignalRecordSchema(BaseModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
source_document_id: str = ""
source_ticker: str = ""
target_ticker: str = ""
catalyst_type: str = ""
pattern_confidence: float = Field(ge=0, le=1, default=0.0)
signal_direction: str = "neutral"
signal_strength: float = Field(ge=0, le=1, default=0.0)
relationship_strength: float = Field(ge=0, le=1, default=0.0)
computed_at: datetime = Field(default_factory=datetime.utcnow)
class HistoricalPatternSchema(BaseModel):
source_ticker: str = ""
target_ticker: str = ""
catalyst_type: str = ""
time_horizon: str = "7d"
sample_count: int = 0
bullish_pct: float = Field(ge=0, le=1, default=0.0)
bearish_pct: float = Field(ge=0, le=1, default=0.0)
avg_strength: float = Field(ge=0, le=1, default=0.0)
avg_time_to_resolution: float = 0.0
pattern_confidence: float = Field(ge=0, le=1, default=0.0)
data_start: Optional[datetime] = None
data_end: Optional[datetime] = None
tier: CatalystTier = CatalystTier.ROUTINE_SIGNAL
insufficient_data: bool = False
+6 -1
View File
@@ -48,6 +48,7 @@ SOURCE_BUCKET_MAP: dict[str, str] = {
"filings_api": "stonks-raw-filings",
"web_scrape": "stonks-raw-news",
"broker": "stonks-raw-market",
"macro_news": "stonks-raw-news",
}
# Map artifact type to content type and file extension
@@ -75,10 +76,14 @@ def build_artifact_path(
"""Build a MinIO object path following the design convention.
Pattern: {source_type}/{ticker}/{yyyy}/{mm}/{dd}/{document_id}/{artifact_name}.{ext}
For macro_news sources, uses macro/ prefix instead of ticker:
macro/{yyyy}/{mm}/{dd}/{document_id}/{artifact_name}.{ext}
"""
ts = timestamp or datetime.now(timezone.utc)
# Macro sources use macro/ prefix instead of ticker (Requirement 1.1)
path_prefix = "macro" if source_type == "macro_news" else f"{source_type}/{ticker}"
return (
f"{source_type}/{ticker}/"
f"{path_prefix}/"
f"{ts.year}/{ts.month:02d}/{ts.day:02d}/"
f"{document_id}/{artifact_name}.{ext}"
)