feat: competitive intelligence & historical pattern matching layer
This commit is contained in:
@@ -110,6 +110,19 @@ BUCKET_RETENTION_FIELDS: dict[str, str] = {
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MacroConfig:
|
||||
"""Configuration for the macro news interpolation layer.
|
||||
|
||||
Requirements: 5.6, 10.1, 10.2, 12.9
|
||||
"""
|
||||
macro_signal_weight: float = 0.3 # relative weight of macro vs company signals
|
||||
macro_enabled: bool = True # runtime toggle state (default on)
|
||||
macro_confidence_threshold: float = 0.4 # minimum confidence for event inclusion
|
||||
macro_short_term_staleness_hours: int = 48 # hours after which short-term events get accelerated decay
|
||||
projection_confidence_threshold: float = 0.3 # minimum confidence for projections to influence recommendations
|
||||
|
||||
|
||||
@dataclass
|
||||
class AlertingConfig:
|
||||
"""Thresholds for operational alerting rules.
|
||||
@@ -135,6 +148,26 @@ class AlertingConfig:
|
||||
check_interval_seconds: int = 120
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompetitiveConfig:
|
||||
"""Configuration for the competitive intelligence & historical pattern matching layer.
|
||||
|
||||
Requirements: 5.6, 6.1, 9.1, 9.2, 11.2, 11.3
|
||||
"""
|
||||
competitive_signal_weight: float = 0.2
|
||||
competitive_enabled: bool = True
|
||||
pattern_confidence_threshold: float = 0.3
|
||||
propagation_strength_threshold: float = 0.2
|
||||
routine_lookback_days: int = 180
|
||||
major_decision_lookback_days: int = 365
|
||||
major_decision_weight_multiplier: float = 1.3
|
||||
staleness_window_days: int = 180
|
||||
staleness_recent_days: int = 90
|
||||
staleness_decay_penalty: float = 0.5
|
||||
min_pattern_samples: int = 3
|
||||
propagation_failure_threshold: int = 5 # consecutive failures before operator alert
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig:
|
||||
postgres: PostgresConfig = field(default_factory=PostgresConfig)
|
||||
@@ -146,6 +179,8 @@ class AppConfig:
|
||||
broker: BrokerConfig = field(default_factory=BrokerConfig)
|
||||
retention: RetentionConfig = field(default_factory=RetentionConfig)
|
||||
alerting: AlertingConfig = field(default_factory=AlertingConfig)
|
||||
macro: MacroConfig = field(default_factory=MacroConfig)
|
||||
competitive: CompetitiveConfig = field(default_factory=CompetitiveConfig)
|
||||
log_level: str = "INFO"
|
||||
json_logs: bool = True
|
||||
|
||||
@@ -222,6 +257,27 @@ def load_config() -> AppConfig:
|
||||
broker_error_window_hours=int(os.getenv("ALERT_BROKER_ERROR_WINDOW_HOURS", "1")),
|
||||
check_interval_seconds=int(os.getenv("ALERT_CHECK_INTERVAL_SECONDS", "120")),
|
||||
),
|
||||
macro=MacroConfig(
|
||||
macro_signal_weight=float(os.getenv("MACRO_SIGNAL_WEIGHT", "0.3")),
|
||||
macro_enabled=os.getenv("MACRO_ENABLED", "true").lower() == "true",
|
||||
macro_confidence_threshold=float(os.getenv("MACRO_CONFIDENCE_THRESHOLD", "0.4")),
|
||||
macro_short_term_staleness_hours=int(os.getenv("MACRO_SHORT_TERM_STALENESS_HOURS", "48")),
|
||||
projection_confidence_threshold=float(os.getenv("PROJECTION_CONFIDENCE_THRESHOLD", "0.3")),
|
||||
),
|
||||
competitive=CompetitiveConfig(
|
||||
competitive_signal_weight=float(os.getenv("COMPETITIVE_SIGNAL_WEIGHT", "0.2")),
|
||||
competitive_enabled=os.getenv("COMPETITIVE_ENABLED", "true").lower() == "true",
|
||||
pattern_confidence_threshold=float(os.getenv("COMPETITIVE_PATTERN_CONFIDENCE_THRESHOLD", "0.3")),
|
||||
propagation_strength_threshold=float(os.getenv("COMPETITIVE_PROPAGATION_STRENGTH_THRESHOLD", "0.2")),
|
||||
routine_lookback_days=int(os.getenv("COMPETITIVE_ROUTINE_LOOKBACK_DAYS", "180")),
|
||||
major_decision_lookback_days=int(os.getenv("COMPETITIVE_MAJOR_DECISION_LOOKBACK_DAYS", "365")),
|
||||
major_decision_weight_multiplier=float(os.getenv("COMPETITIVE_MAJOR_DECISION_WEIGHT_MULTIPLIER", "1.3")),
|
||||
staleness_window_days=int(os.getenv("COMPETITIVE_STALENESS_WINDOW_DAYS", "180")),
|
||||
staleness_recent_days=int(os.getenv("COMPETITIVE_STALENESS_RECENT_DAYS", "90")),
|
||||
staleness_decay_penalty=float(os.getenv("COMPETITIVE_STALENESS_DECAY_PENALTY", "0.5")),
|
||||
min_pattern_samples=int(os.getenv("COMPETITIVE_MIN_PATTERN_SAMPLES", "3")),
|
||||
propagation_failure_threshold=int(os.getenv("COMPETITIVE_PROPAGATION_FAILURE_THRESHOLD", "5")),
|
||||
),
|
||||
log_level=os.getenv("LOG_LEVEL", "INFO"),
|
||||
json_logs=os.getenv("JSON_LOGS", "true").lower() == "true",
|
||||
)
|
||||
|
||||
@@ -214,6 +214,7 @@ def _resolve_document_type(source_type: str) -> str:
|
||||
"news_api": "article",
|
||||
"filings_api": "filing",
|
||||
"web_scrape": "press_release",
|
||||
"macro_news": "macro_event",
|
||||
}
|
||||
return mapping.get(source_type, "article")
|
||||
|
||||
|
||||
@@ -64,3 +64,4 @@ QUEUE_RECOMMENDATION = "recommendation"
|
||||
QUEUE_LAKE_PUBLISH = "lake_publish"
|
||||
QUEUE_TRADE = "trade"
|
||||
QUEUE_BROKER = "broker_orders"
|
||||
QUEUE_MACRO_CLASSIFICATION = "macro_classification"
|
||||
|
||||
@@ -15,6 +15,7 @@ class DocumentType(str, Enum):
|
||||
FILING = "filing"
|
||||
TRANSCRIPT = "transcript"
|
||||
PRESS_RELEASE = "press_release"
|
||||
MACRO_EVENT = "macro_event"
|
||||
|
||||
|
||||
class SourceType(str, Enum):
|
||||
@@ -71,6 +72,37 @@ class TrendWindow(str, Enum):
|
||||
NINETY_DAY = "90d"
|
||||
|
||||
|
||||
class ImpactType(str, Enum):
|
||||
SUPPLY_DISRUPTION = "supply_disruption"
|
||||
DEMAND_SHIFT = "demand_shift"
|
||||
COST_INCREASE = "cost_increase"
|
||||
REGULATORY_PRESSURE = "regulatory_pressure"
|
||||
CURRENCY_IMPACT = "currency_impact"
|
||||
COMMODITY_SHOCK = "commodity_shock"
|
||||
TRADE_BARRIER = "trade_barrier"
|
||||
GEOPOLITICAL_RISK = "geopolitical_risk"
|
||||
|
||||
|
||||
class SeverityLevel(str, Enum):
|
||||
LOW = "low"
|
||||
MODERATE = "moderate"
|
||||
HIGH = "high"
|
||||
CRITICAL = "critical"
|
||||
|
||||
|
||||
class MarketPositionTier(str, Enum):
|
||||
GLOBAL_LEADER = "global_leader"
|
||||
MULTINATIONAL = "multinational"
|
||||
REGIONAL = "regional"
|
||||
DOMESTIC = "domestic"
|
||||
|
||||
|
||||
class EstimatedDuration(str, Enum):
|
||||
SHORT_TERM = "short_term"
|
||||
MEDIUM_TERM = "medium_term"
|
||||
LONG_TERM = "long_term"
|
||||
|
||||
|
||||
# --- Document Intelligence ---
|
||||
|
||||
class CompanyImpact(BaseModel):
|
||||
@@ -182,6 +214,63 @@ class Recommendation(BaseModel):
|
||||
generated_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
# --- Global News Interpolation ---
|
||||
|
||||
class GlobalEventSchema(BaseModel):
|
||||
event_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
event_types: List[ImpactType] = Field(default_factory=list)
|
||||
severity: SeverityLevel = SeverityLevel.LOW
|
||||
affected_regions: List[str] = Field(default_factory=list)
|
||||
affected_sectors: List[str] = Field(default_factory=list)
|
||||
affected_commodities: List[str] = Field(default_factory=list)
|
||||
summary: str = ""
|
||||
key_facts: List[str] = Field(default_factory=list)
|
||||
estimated_duration: EstimatedDuration = EstimatedDuration.SHORT_TERM
|
||||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||||
source_document_id: str = ""
|
||||
model_metadata: ModelMetadata = Field(default_factory=ModelMetadata)
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class MacroImpactRecordSchema(BaseModel):
|
||||
event_id: str = ""
|
||||
company_id: str = ""
|
||||
ticker: str = ""
|
||||
macro_impact_score: float = Field(ge=0, le=1, default=0.0)
|
||||
impact_direction: str = "neutral"
|
||||
contributing_factors: List[str] = Field(default_factory=list)
|
||||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||||
computed_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class ExposureProfileSchema(BaseModel):
|
||||
company_id: str = ""
|
||||
geographic_revenue_mix: dict[str, float] = Field(default_factory=dict)
|
||||
supply_chain_regions: List[str] = Field(default_factory=list)
|
||||
key_input_commodities: List[str] = Field(default_factory=list)
|
||||
regulatory_jurisdictions: List[str] = Field(default_factory=list)
|
||||
market_position_tier: MarketPositionTier = MarketPositionTier.REGIONAL
|
||||
export_dependency_pct: float = Field(ge=0, le=1, default=0.0)
|
||||
source: str = "manual"
|
||||
confidence: float = Field(ge=0, le=1, default=1.0)
|
||||
version: int = 1
|
||||
active: bool = True
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
updated_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class TrendProjectionSchema(BaseModel):
|
||||
trend_window_id: str = ""
|
||||
projected_direction: TrendDirection = TrendDirection.NEUTRAL
|
||||
projected_strength: float = Field(ge=0, le=1, default=0.5)
|
||||
projected_confidence: float = Field(ge=0, le=1, default=0.5)
|
||||
projection_horizon: str = "7d"
|
||||
driving_factors: List[str] = Field(default_factory=list)
|
||||
macro_contribution_pct: float = Field(ge=0, le=1, default=0.0)
|
||||
diverges_from_current: bool = False
|
||||
computed_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
# --- Document Metadata ---
|
||||
|
||||
class StorageRefs(BaseModel):
|
||||
@@ -204,3 +293,73 @@ class DocumentMetadata(BaseModel):
|
||||
language: str = "en"
|
||||
content_hash: str = ""
|
||||
storage_refs: StorageRefs = Field(default_factory=StorageRefs)
|
||||
|
||||
|
||||
# --- Competitive Intelligence & Historical Patterns ---
|
||||
|
||||
|
||||
class RelationshipType(str, Enum):
|
||||
DIRECT_RIVAL = "direct_rival"
|
||||
SAME_SECTOR = "same_sector"
|
||||
OVERLAPPING_PRODUCTS = "overlapping_products"
|
||||
SUPPLY_CHAIN_ADJACENT = "supply_chain_adjacent"
|
||||
|
||||
|
||||
class CatalystTier(str, Enum):
|
||||
MAJOR_CORPORATE_DECISION = "major_corporate_decision"
|
||||
ROUTINE_SIGNAL = "routine_signal"
|
||||
|
||||
|
||||
# Major corporate decision catalyst types (Req 11.1)
|
||||
MAJOR_DECISION_CATALYSTS: frozenset[str] = frozenset({
|
||||
"m_and_a",
|
||||
"legal",
|
||||
"restructuring",
|
||||
"leadership_change",
|
||||
"strategic_pivot",
|
||||
"buyback",
|
||||
"dividend_change",
|
||||
})
|
||||
|
||||
|
||||
class CompetitorRelationshipSchema(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
company_a_id: str = ""
|
||||
company_b_id: str = ""
|
||||
relationship_type: RelationshipType = RelationshipType.DIRECT_RIVAL
|
||||
strength: float = Field(ge=0, le=1, default=0.5)
|
||||
bidirectional: bool = True
|
||||
source: str = "manual"
|
||||
active: bool = True
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
updated_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class CompetitiveSignalRecordSchema(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
source_document_id: str = ""
|
||||
source_ticker: str = ""
|
||||
target_ticker: str = ""
|
||||
catalyst_type: str = ""
|
||||
pattern_confidence: float = Field(ge=0, le=1, default=0.0)
|
||||
signal_direction: str = "neutral"
|
||||
signal_strength: float = Field(ge=0, le=1, default=0.0)
|
||||
relationship_strength: float = Field(ge=0, le=1, default=0.0)
|
||||
computed_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class HistoricalPatternSchema(BaseModel):
|
||||
source_ticker: str = ""
|
||||
target_ticker: str = ""
|
||||
catalyst_type: str = ""
|
||||
time_horizon: str = "7d"
|
||||
sample_count: int = 0
|
||||
bullish_pct: float = Field(ge=0, le=1, default=0.0)
|
||||
bearish_pct: float = Field(ge=0, le=1, default=0.0)
|
||||
avg_strength: float = Field(ge=0, le=1, default=0.0)
|
||||
avg_time_to_resolution: float = 0.0
|
||||
pattern_confidence: float = Field(ge=0, le=1, default=0.0)
|
||||
data_start: Optional[datetime] = None
|
||||
data_end: Optional[datetime] = None
|
||||
tier: CatalystTier = CatalystTier.ROUTINE_SIGNAL
|
||||
insufficient_data: bool = False
|
||||
|
||||
@@ -48,6 +48,7 @@ SOURCE_BUCKET_MAP: dict[str, str] = {
|
||||
"filings_api": "stonks-raw-filings",
|
||||
"web_scrape": "stonks-raw-news",
|
||||
"broker": "stonks-raw-market",
|
||||
"macro_news": "stonks-raw-news",
|
||||
}
|
||||
|
||||
# Map artifact type to content type and file extension
|
||||
@@ -75,10 +76,14 @@ def build_artifact_path(
|
||||
"""Build a MinIO object path following the design convention.
|
||||
|
||||
Pattern: {source_type}/{ticker}/{yyyy}/{mm}/{dd}/{document_id}/{artifact_name}.{ext}
|
||||
For macro_news sources, uses macro/ prefix instead of ticker:
|
||||
macro/{yyyy}/{mm}/{dd}/{document_id}/{artifact_name}.{ext}
|
||||
"""
|
||||
ts = timestamp or datetime.now(timezone.utc)
|
||||
# Macro sources use macro/ prefix instead of ticker (Requirement 1.1)
|
||||
path_prefix = "macro" if source_type == "macro_news" else f"{source_type}/{ticker}"
|
||||
return (
|
||||
f"{source_type}/{ticker}/"
|
||||
f"{path_prefix}/"
|
||||
f"{ts.year}/{ts.month:02d}/{ts.day:02d}/"
|
||||
f"{document_id}/{artifact_name}.{ext}"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user