284 lines
12 KiB
Python
284 lines
12 KiB
Python
"""Shared configuration loader for all services."""
|
|
import os
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional
|
|
|
|
|
|
@dataclass
|
|
class PostgresConfig:
|
|
host: str = "localhost"
|
|
port: int = 5432
|
|
database: str = "stonks"
|
|
user: str = "stonks"
|
|
password: str = "stonks_dev"
|
|
|
|
@property
|
|
def dsn(self) -> str:
|
|
return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
|
|
|
|
|
|
@dataclass
|
|
class RedisConfig:
|
|
host: str = "localhost"
|
|
port: int = 6379
|
|
db: int = 0
|
|
password: Optional[str] = None
|
|
|
|
@property
|
|
def url(self) -> str:
|
|
auth = f":{self.password}@" if self.password else ""
|
|
return f"redis://{auth}{self.host}:{self.port}/{self.db}"
|
|
|
|
|
|
@dataclass
|
|
class MinioConfig:
|
|
endpoint: str = "localhost:9000"
|
|
access_key: str = "minioadmin"
|
|
secret_key: str = "minioadmin"
|
|
secure: bool = False
|
|
|
|
|
|
@dataclass
|
|
class OllamaConfig:
|
|
base_url: str = "http://localhost:11434"
|
|
model: str = "llama3.1:8b"
|
|
timeout: int = 120
|
|
max_retries: int = 2
|
|
retry_base_delay: float = 1.0
|
|
retry_max_delay: float = 10.0
|
|
retry_backoff_multiplier: float = 2.0
|
|
max_tokens: int = 32768
|
|
stall_timeout: float = 30.0
|
|
loop_window: int = 64
|
|
loop_threshold: float = 0.5
|
|
|
|
|
|
@dataclass
|
|
class TrinoConfig:
|
|
host: str = "localhost"
|
|
port: int = 8080
|
|
catalog: str = "lakehouse"
|
|
schema: str = "stonks"
|
|
iceberg_catalog: str = "iceberg"
|
|
|
|
|
|
@dataclass
|
|
class MarketDataConfig:
|
|
api_key: str = ""
|
|
base_url: str = "https://api.polygon.io"
|
|
provider: str = "polygon"
|
|
|
|
|
|
@dataclass
|
|
class BrokerConfig:
|
|
mode: str = "paper" # paper | live
|
|
provider: str = "alpaca"
|
|
api_key: Optional[str] = None
|
|
api_secret: Optional[str] = None
|
|
base_url: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class RetentionConfig:
|
|
"""Default retention periods (days) per bucket class.
|
|
|
|
These can be overridden per-bucket via the retention_policies DB table.
|
|
The cleanup_interval_hours controls how often the retention worker runs.
|
|
"""
|
|
raw_market_days: int = 90
|
|
raw_news_days: int = 180
|
|
raw_filings_days: int = 365
|
|
normalized_days: int = 180
|
|
llm_prompts_days: int = 365
|
|
llm_results_days: int = 365
|
|
lakehouse_days: int = 730
|
|
audit_days: int = 730
|
|
cleanup_interval_hours: int = 24
|
|
batch_size: int = 1000
|
|
|
|
|
|
# Map bucket names to RetentionConfig field names
|
|
BUCKET_RETENTION_FIELDS: dict[str, str] = {
|
|
"stonks-raw-market": "raw_market_days",
|
|
"stonks-raw-news": "raw_news_days",
|
|
"stonks-raw-filings": "raw_filings_days",
|
|
"stonks-normalized": "normalized_days",
|
|
"stonks-llm-prompts": "llm_prompts_days",
|
|
"stonks-llm-results": "llm_results_days",
|
|
"stonks-lakehouse": "lakehouse_days",
|
|
"stonks-audit": "audit_days",
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class MacroConfig:
|
|
"""Configuration for the macro news interpolation layer.
|
|
|
|
Requirements: 5.6, 10.1, 10.2, 12.9
|
|
"""
|
|
macro_signal_weight: float = 0.3 # relative weight of macro vs company signals
|
|
macro_enabled: bool = True # runtime toggle state (default on)
|
|
macro_confidence_threshold: float = 0.4 # minimum confidence for event inclusion
|
|
macro_short_term_staleness_hours: int = 48 # hours after which short-term events get accelerated decay
|
|
projection_confidence_threshold: float = 0.3 # minimum confidence for projections to influence recommendations
|
|
|
|
|
|
@dataclass
|
|
class AlertingConfig:
|
|
"""Thresholds for operational alerting rules.
|
|
|
|
Requirements: 12.3
|
|
"""
|
|
# Source failure alerting
|
|
source_failure_threshold: int = 3 # consecutive failures before alert
|
|
source_failure_window_hours: int = 6 # lookback window
|
|
|
|
# Schema/extraction failure spike
|
|
schema_failure_rate_threshold: float = 0.3 # 30% failure rate triggers alert
|
|
schema_failure_window_hours: int = 1
|
|
|
|
# Analytical (lake publication) lag
|
|
lake_lag_threshold_minutes: int = 60 # minutes since last successful publish
|
|
|
|
# Broker issues
|
|
broker_error_threshold: int = 3 # consecutive broker errors
|
|
broker_error_window_hours: int = 1
|
|
|
|
# Evaluation interval
|
|
check_interval_seconds: int = 120
|
|
|
|
|
|
@dataclass
|
|
class CompetitiveConfig:
|
|
"""Configuration for the competitive intelligence & historical pattern matching layer.
|
|
|
|
Requirements: 5.6, 6.1, 9.1, 9.2, 11.2, 11.3
|
|
"""
|
|
competitive_signal_weight: float = 0.2
|
|
competitive_enabled: bool = True
|
|
pattern_confidence_threshold: float = 0.3
|
|
propagation_strength_threshold: float = 0.2
|
|
routine_lookback_days: int = 180
|
|
major_decision_lookback_days: int = 365
|
|
major_decision_weight_multiplier: float = 1.3
|
|
staleness_window_days: int = 180
|
|
staleness_recent_days: int = 90
|
|
staleness_decay_penalty: float = 0.5
|
|
min_pattern_samples: int = 3
|
|
propagation_failure_threshold: int = 5 # consecutive failures before operator alert
|
|
|
|
|
|
@dataclass
|
|
class AppConfig:
|
|
postgres: PostgresConfig = field(default_factory=PostgresConfig)
|
|
redis: RedisConfig = field(default_factory=RedisConfig)
|
|
minio: MinioConfig = field(default_factory=MinioConfig)
|
|
ollama: OllamaConfig = field(default_factory=OllamaConfig)
|
|
trino: TrinoConfig = field(default_factory=TrinoConfig)
|
|
market_data: MarketDataConfig = field(default_factory=MarketDataConfig)
|
|
broker: BrokerConfig = field(default_factory=BrokerConfig)
|
|
retention: RetentionConfig = field(default_factory=RetentionConfig)
|
|
alerting: AlertingConfig = field(default_factory=AlertingConfig)
|
|
macro: MacroConfig = field(default_factory=MacroConfig)
|
|
competitive: CompetitiveConfig = field(default_factory=CompetitiveConfig)
|
|
log_level: str = "INFO"
|
|
json_logs: bool = True
|
|
|
|
|
|
def load_config() -> AppConfig:
|
|
"""Load configuration from environment variables with sensible defaults."""
|
|
return AppConfig(
|
|
postgres=PostgresConfig(
|
|
host=os.getenv("POSTGRES_HOST", "localhost"),
|
|
port=int(os.getenv("POSTGRES_PORT", "5432")),
|
|
database=os.getenv("POSTGRES_DB", "stonks"),
|
|
user=os.getenv("POSTGRES_USER", "stonks"),
|
|
password=os.getenv("POSTGRES_PASSWORD", "stonks_dev"),
|
|
),
|
|
redis=RedisConfig(
|
|
host=os.getenv("REDIS_HOST", "localhost"),
|
|
port=int(os.getenv("REDIS_PORT", "6379")),
|
|
db=int(os.getenv("REDIS_DB", "0")),
|
|
password=os.getenv("REDIS_PASSWORD", None),
|
|
),
|
|
minio=MinioConfig(
|
|
endpoint=os.getenv("MINIO_ENDPOINT", "localhost:9000"),
|
|
access_key=os.getenv("MINIO_ACCESS_KEY", "minioadmin"),
|
|
secret_key=os.getenv("MINIO_SECRET_KEY", "minioadmin"),
|
|
secure=os.getenv("MINIO_SECURE", "false").lower() == "true",
|
|
),
|
|
ollama=OllamaConfig(
|
|
base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"),
|
|
model=os.getenv("OLLAMA_MODEL", "llama3.1:8b"),
|
|
timeout=int(os.getenv("OLLAMA_TIMEOUT", "120")),
|
|
max_retries=int(os.getenv("OLLAMA_MAX_RETRIES", "2")),
|
|
retry_base_delay=float(os.getenv("OLLAMA_RETRY_BASE_DELAY", "1.0")),
|
|
retry_max_delay=float(os.getenv("OLLAMA_RETRY_MAX_DELAY", "10.0")),
|
|
retry_backoff_multiplier=float(os.getenv("OLLAMA_RETRY_BACKOFF_MULTIPLIER", "2.0")),
|
|
),
|
|
trino=TrinoConfig(
|
|
host=os.getenv("TRINO_HOST", "localhost"),
|
|
port=int(os.getenv("TRINO_PORT", "8080")),
|
|
catalog=os.getenv("TRINO_CATALOG", "lakehouse"),
|
|
schema=os.getenv("TRINO_SCHEMA", "stonks"),
|
|
iceberg_catalog=os.getenv("TRINO_ICEBERG_CATALOG", "iceberg"),
|
|
),
|
|
market_data=MarketDataConfig(
|
|
api_key=os.getenv("MARKET_DATA_API_KEY", ""),
|
|
base_url=os.getenv("MARKET_DATA_BASE_URL", "https://api.polygon.io"),
|
|
provider=os.getenv("MARKET_DATA_PROVIDER", "polygon"),
|
|
),
|
|
broker=BrokerConfig(
|
|
mode=os.getenv("BROKER_MODE", "paper"),
|
|
provider=os.getenv("BROKER_PROVIDER", "alpaca"),
|
|
api_key=os.getenv("BROKER_API_KEY", None),
|
|
api_secret=os.getenv("BROKER_API_SECRET", None),
|
|
base_url=os.getenv("BROKER_BASE_URL", None),
|
|
),
|
|
retention=RetentionConfig(
|
|
raw_market_days=int(os.getenv("RETENTION_RAW_MARKET_DAYS", "90")),
|
|
raw_news_days=int(os.getenv("RETENTION_RAW_NEWS_DAYS", "180")),
|
|
raw_filings_days=int(os.getenv("RETENTION_RAW_FILINGS_DAYS", "365")),
|
|
normalized_days=int(os.getenv("RETENTION_NORMALIZED_DAYS", "180")),
|
|
llm_prompts_days=int(os.getenv("RETENTION_LLM_PROMPTS_DAYS", "365")),
|
|
llm_results_days=int(os.getenv("RETENTION_LLM_RESULTS_DAYS", "365")),
|
|
lakehouse_days=int(os.getenv("RETENTION_LAKEHOUSE_DAYS", "730")),
|
|
audit_days=int(os.getenv("RETENTION_AUDIT_DAYS", "730")),
|
|
cleanup_interval_hours=int(os.getenv("RETENTION_CLEANUP_INTERVAL_HOURS", "24")),
|
|
batch_size=int(os.getenv("RETENTION_BATCH_SIZE", "1000")),
|
|
),
|
|
alerting=AlertingConfig(
|
|
source_failure_threshold=int(os.getenv("ALERT_SOURCE_FAILURE_THRESHOLD", "3")),
|
|
source_failure_window_hours=int(os.getenv("ALERT_SOURCE_FAILURE_WINDOW_HOURS", "6")),
|
|
schema_failure_rate_threshold=float(os.getenv("ALERT_SCHEMA_FAILURE_RATE_THRESHOLD", "0.3")),
|
|
schema_failure_window_hours=int(os.getenv("ALERT_SCHEMA_FAILURE_WINDOW_HOURS", "1")),
|
|
lake_lag_threshold_minutes=int(os.getenv("ALERT_LAKE_LAG_THRESHOLD_MINUTES", "60")),
|
|
broker_error_threshold=int(os.getenv("ALERT_BROKER_ERROR_THRESHOLD", "3")),
|
|
broker_error_window_hours=int(os.getenv("ALERT_BROKER_ERROR_WINDOW_HOURS", "1")),
|
|
check_interval_seconds=int(os.getenv("ALERT_CHECK_INTERVAL_SECONDS", "120")),
|
|
),
|
|
macro=MacroConfig(
|
|
macro_signal_weight=float(os.getenv("MACRO_SIGNAL_WEIGHT", "0.3")),
|
|
macro_enabled=os.getenv("MACRO_ENABLED", "true").lower() == "true",
|
|
macro_confidence_threshold=float(os.getenv("MACRO_CONFIDENCE_THRESHOLD", "0.4")),
|
|
macro_short_term_staleness_hours=int(os.getenv("MACRO_SHORT_TERM_STALENESS_HOURS", "48")),
|
|
projection_confidence_threshold=float(os.getenv("PROJECTION_CONFIDENCE_THRESHOLD", "0.3")),
|
|
),
|
|
competitive=CompetitiveConfig(
|
|
competitive_signal_weight=float(os.getenv("COMPETITIVE_SIGNAL_WEIGHT", "0.2")),
|
|
competitive_enabled=os.getenv("COMPETITIVE_ENABLED", "true").lower() == "true",
|
|
pattern_confidence_threshold=float(os.getenv("COMPETITIVE_PATTERN_CONFIDENCE_THRESHOLD", "0.3")),
|
|
propagation_strength_threshold=float(os.getenv("COMPETITIVE_PROPAGATION_STRENGTH_THRESHOLD", "0.2")),
|
|
routine_lookback_days=int(os.getenv("COMPETITIVE_ROUTINE_LOOKBACK_DAYS", "180")),
|
|
major_decision_lookback_days=int(os.getenv("COMPETITIVE_MAJOR_DECISION_LOOKBACK_DAYS", "365")),
|
|
major_decision_weight_multiplier=float(os.getenv("COMPETITIVE_MAJOR_DECISION_WEIGHT_MULTIPLIER", "1.3")),
|
|
staleness_window_days=int(os.getenv("COMPETITIVE_STALENESS_WINDOW_DAYS", "180")),
|
|
staleness_recent_days=int(os.getenv("COMPETITIVE_STALENESS_RECENT_DAYS", "90")),
|
|
staleness_decay_penalty=float(os.getenv("COMPETITIVE_STALENESS_DECAY_PENALTY", "0.5")),
|
|
min_pattern_samples=int(os.getenv("COMPETITIVE_MIN_PATTERN_SAMPLES", "3")),
|
|
propagation_failure_threshold=int(os.getenv("COMPETITIVE_PROPAGATION_FAILURE_THRESHOLD", "5")),
|
|
),
|
|
log_level=os.getenv("LOG_LEVEL", "INFO"),
|
|
json_logs=os.getenv("JSON_LOGS", "true").lower() == "true",
|
|
)
|