"""Shared configuration loader for all services.""" import os from dataclasses import dataclass, field from typing import Optional @dataclass class PostgresConfig: host: str = "localhost" port: int = 5432 database: str = "stonks" user: str = "stonks" password: str = "stonks_dev" @property def dsn(self) -> str: return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}" @dataclass class RedisConfig: host: str = "localhost" port: int = 6379 db: int = 0 password: Optional[str] = None @property def url(self) -> str: auth = f":{self.password}@" if self.password else "" return f"redis://{auth}{self.host}:{self.port}/{self.db}" @dataclass class MinioConfig: endpoint: str = "localhost:9000" access_key: str = "minioadmin" secret_key: str = "minioadmin" secure: bool = False @dataclass class OllamaConfig: base_url: str = "http://localhost:11434" model: str = "llama3.1:8b" timeout: int = 120 max_retries: int = 2 retry_base_delay: float = 1.0 retry_max_delay: float = 10.0 retry_backoff_multiplier: float = 2.0 max_tokens: int = 32768 stall_timeout: float = 30.0 loop_window: int = 64 loop_threshold: float = 0.5 @dataclass class TrinoConfig: host: str = "localhost" port: int = 8080 catalog: str = "lakehouse" schema: str = "stonks" iceberg_catalog: str = "iceberg" @dataclass class MarketDataConfig: api_key: str = "" base_url: str = "https://api.polygon.io" provider: str = "polygon" @dataclass class BrokerConfig: mode: str = "paper" # paper | live provider: str = "alpaca" api_key: Optional[str] = None api_secret: Optional[str] = None base_url: Optional[str] = None @dataclass class RetentionConfig: """Default retention periods (days) per bucket class. These can be overridden per-bucket via the retention_policies DB table. The cleanup_interval_hours controls how often the retention worker runs. """ raw_market_days: int = 90 raw_news_days: int = 180 raw_filings_days: int = 365 normalized_days: int = 180 llm_prompts_days: int = 365 llm_results_days: int = 365 lakehouse_days: int = 730 audit_days: int = 730 cleanup_interval_hours: int = 24 batch_size: int = 1000 # Map bucket names to RetentionConfig field names BUCKET_RETENTION_FIELDS: dict[str, str] = { "stonks-raw-market": "raw_market_days", "stonks-raw-news": "raw_news_days", "stonks-raw-filings": "raw_filings_days", "stonks-normalized": "normalized_days", "stonks-llm-prompts": "llm_prompts_days", "stonks-llm-results": "llm_results_days", "stonks-lakehouse": "lakehouse_days", "stonks-audit": "audit_days", } @dataclass class MacroConfig: """Configuration for the macro news interpolation layer. Requirements: 5.6, 10.1, 10.2, 12.9 """ macro_signal_weight: float = 0.3 # relative weight of macro vs company signals macro_enabled: bool = True # runtime toggle state (default on) macro_confidence_threshold: float = 0.4 # minimum confidence for event inclusion macro_short_term_staleness_hours: int = 48 # hours after which short-term events get accelerated decay projection_confidence_threshold: float = 0.3 # minimum confidence for projections to influence recommendations @dataclass class AlertingConfig: """Thresholds for operational alerting rules. Requirements: 12.3 """ # Source failure alerting source_failure_threshold: int = 3 # consecutive failures before alert source_failure_window_hours: int = 6 # lookback window # Schema/extraction failure spike schema_failure_rate_threshold: float = 0.3 # 30% failure rate triggers alert schema_failure_window_hours: int = 1 # Analytical (lake publication) lag lake_lag_threshold_minutes: int = 60 # minutes since last successful publish # Broker issues broker_error_threshold: int = 3 # consecutive broker errors broker_error_window_hours: int = 1 # Evaluation interval check_interval_seconds: int = 120 @dataclass class CompetitiveConfig: """Configuration for the competitive intelligence & historical pattern matching layer. Requirements: 5.6, 6.1, 9.1, 9.2, 11.2, 11.3 """ competitive_signal_weight: float = 0.2 competitive_enabled: bool = True pattern_confidence_threshold: float = 0.3 propagation_strength_threshold: float = 0.2 routine_lookback_days: int = 180 major_decision_lookback_days: int = 365 major_decision_weight_multiplier: float = 1.3 staleness_window_days: int = 180 staleness_recent_days: int = 90 staleness_decay_penalty: float = 0.5 min_pattern_samples: int = 3 propagation_failure_threshold: int = 5 # consecutive failures before operator alert @dataclass class AppConfig: postgres: PostgresConfig = field(default_factory=PostgresConfig) redis: RedisConfig = field(default_factory=RedisConfig) minio: MinioConfig = field(default_factory=MinioConfig) ollama: OllamaConfig = field(default_factory=OllamaConfig) trino: TrinoConfig = field(default_factory=TrinoConfig) market_data: MarketDataConfig = field(default_factory=MarketDataConfig) broker: BrokerConfig = field(default_factory=BrokerConfig) retention: RetentionConfig = field(default_factory=RetentionConfig) alerting: AlertingConfig = field(default_factory=AlertingConfig) macro: MacroConfig = field(default_factory=MacroConfig) competitive: CompetitiveConfig = field(default_factory=CompetitiveConfig) log_level: str = "INFO" json_logs: bool = True def load_config() -> AppConfig: """Load configuration from environment variables with sensible defaults.""" return AppConfig( postgres=PostgresConfig( host=os.getenv("POSTGRES_HOST", "localhost"), port=int(os.getenv("POSTGRES_PORT", "5432")), database=os.getenv("POSTGRES_DB", "stonks"), user=os.getenv("POSTGRES_USER", "stonks"), password=os.getenv("POSTGRES_PASSWORD", "stonks_dev"), ), redis=RedisConfig( host=os.getenv("REDIS_HOST", "localhost"), port=int(os.getenv("REDIS_PORT", "6379")), db=int(os.getenv("REDIS_DB", "0")), password=os.getenv("REDIS_PASSWORD", None), ), minio=MinioConfig( endpoint=os.getenv("MINIO_ENDPOINT", "localhost:9000"), access_key=os.getenv("MINIO_ACCESS_KEY", "minioadmin"), secret_key=os.getenv("MINIO_SECRET_KEY", "minioadmin"), secure=os.getenv("MINIO_SECURE", "false").lower() == "true", ), ollama=OllamaConfig( base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"), model=os.getenv("OLLAMA_MODEL", "llama3.1:8b"), timeout=int(os.getenv("OLLAMA_TIMEOUT", "120")), max_retries=int(os.getenv("OLLAMA_MAX_RETRIES", "2")), retry_base_delay=float(os.getenv("OLLAMA_RETRY_BASE_DELAY", "1.0")), retry_max_delay=float(os.getenv("OLLAMA_RETRY_MAX_DELAY", "10.0")), retry_backoff_multiplier=float(os.getenv("OLLAMA_RETRY_BACKOFF_MULTIPLIER", "2.0")), ), trino=TrinoConfig( host=os.getenv("TRINO_HOST", "localhost"), port=int(os.getenv("TRINO_PORT", "8080")), catalog=os.getenv("TRINO_CATALOG", "lakehouse"), schema=os.getenv("TRINO_SCHEMA", "stonks"), iceberg_catalog=os.getenv("TRINO_ICEBERG_CATALOG", "iceberg"), ), market_data=MarketDataConfig( api_key=os.getenv("MARKET_DATA_API_KEY", ""), base_url=os.getenv("MARKET_DATA_BASE_URL", "https://api.polygon.io"), provider=os.getenv("MARKET_DATA_PROVIDER", "polygon"), ), broker=BrokerConfig( mode=os.getenv("BROKER_MODE", "paper"), provider=os.getenv("BROKER_PROVIDER", "alpaca"), api_key=os.getenv("BROKER_API_KEY", None), api_secret=os.getenv("BROKER_API_SECRET", None), base_url=os.getenv("BROKER_BASE_URL", None), ), retention=RetentionConfig( raw_market_days=int(os.getenv("RETENTION_RAW_MARKET_DAYS", "90")), raw_news_days=int(os.getenv("RETENTION_RAW_NEWS_DAYS", "180")), raw_filings_days=int(os.getenv("RETENTION_RAW_FILINGS_DAYS", "365")), normalized_days=int(os.getenv("RETENTION_NORMALIZED_DAYS", "180")), llm_prompts_days=int(os.getenv("RETENTION_LLM_PROMPTS_DAYS", "365")), llm_results_days=int(os.getenv("RETENTION_LLM_RESULTS_DAYS", "365")), lakehouse_days=int(os.getenv("RETENTION_LAKEHOUSE_DAYS", "730")), audit_days=int(os.getenv("RETENTION_AUDIT_DAYS", "730")), cleanup_interval_hours=int(os.getenv("RETENTION_CLEANUP_INTERVAL_HOURS", "24")), batch_size=int(os.getenv("RETENTION_BATCH_SIZE", "1000")), ), alerting=AlertingConfig( source_failure_threshold=int(os.getenv("ALERT_SOURCE_FAILURE_THRESHOLD", "3")), source_failure_window_hours=int(os.getenv("ALERT_SOURCE_FAILURE_WINDOW_HOURS", "6")), schema_failure_rate_threshold=float(os.getenv("ALERT_SCHEMA_FAILURE_RATE_THRESHOLD", "0.3")), schema_failure_window_hours=int(os.getenv("ALERT_SCHEMA_FAILURE_WINDOW_HOURS", "1")), lake_lag_threshold_minutes=int(os.getenv("ALERT_LAKE_LAG_THRESHOLD_MINUTES", "60")), broker_error_threshold=int(os.getenv("ALERT_BROKER_ERROR_THRESHOLD", "3")), broker_error_window_hours=int(os.getenv("ALERT_BROKER_ERROR_WINDOW_HOURS", "1")), check_interval_seconds=int(os.getenv("ALERT_CHECK_INTERVAL_SECONDS", "120")), ), macro=MacroConfig( macro_signal_weight=float(os.getenv("MACRO_SIGNAL_WEIGHT", "0.3")), macro_enabled=os.getenv("MACRO_ENABLED", "true").lower() == "true", macro_confidence_threshold=float(os.getenv("MACRO_CONFIDENCE_THRESHOLD", "0.4")), macro_short_term_staleness_hours=int(os.getenv("MACRO_SHORT_TERM_STALENESS_HOURS", "48")), projection_confidence_threshold=float(os.getenv("PROJECTION_CONFIDENCE_THRESHOLD", "0.3")), ), competitive=CompetitiveConfig( competitive_signal_weight=float(os.getenv("COMPETITIVE_SIGNAL_WEIGHT", "0.2")), competitive_enabled=os.getenv("COMPETITIVE_ENABLED", "true").lower() == "true", pattern_confidence_threshold=float(os.getenv("COMPETITIVE_PATTERN_CONFIDENCE_THRESHOLD", "0.3")), propagation_strength_threshold=float(os.getenv("COMPETITIVE_PROPAGATION_STRENGTH_THRESHOLD", "0.2")), routine_lookback_days=int(os.getenv("COMPETITIVE_ROUTINE_LOOKBACK_DAYS", "180")), major_decision_lookback_days=int(os.getenv("COMPETITIVE_MAJOR_DECISION_LOOKBACK_DAYS", "365")), major_decision_weight_multiplier=float(os.getenv("COMPETITIVE_MAJOR_DECISION_WEIGHT_MULTIPLIER", "1.3")), staleness_window_days=int(os.getenv("COMPETITIVE_STALENESS_WINDOW_DAYS", "180")), staleness_recent_days=int(os.getenv("COMPETITIVE_STALENESS_RECENT_DAYS", "90")), staleness_decay_penalty=float(os.getenv("COMPETITIVE_STALENESS_DECAY_PENALTY", "0.5")), min_pattern_samples=int(os.getenv("COMPETITIVE_MIN_PATTERN_SAMPLES", "3")), propagation_failure_threshold=int(os.getenv("COMPETITIVE_PROPAGATION_FAILURE_THRESHOLD", "5")), ), log_level=os.getenv("LOG_LEVEL", "INFO"), json_logs=os.getenv("JSON_LOGS", "true").lower() == "true", )