Files
stonks-oracle/services/shared/schemas.py
T
Celes Renata 4e010bc048
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
feat: signal math upgrade — probabilistic, regime-aware scoring pipeline
Implement full probabilistic signal processing pipeline gated behind
probabilistic_scoring_enabled feature flag in risk_configs:

- Bayesian log-likelihood accumulator with Beta posterior and entropy
- Regime detector (trend-following, panic, mean-reversion, uncertainty)
- Source accuracy tracker with per-source historical prediction accuracy
- Sigmoid confidence gate replacing binary gate
- Information gain surprise weighting for rare events
- Adaptive recency decay with event-specific half-lives
- Regime multiplier replacing market context multiplier
- Weighted disagreement entropy for contradiction detection
- Multiplicative macro exposure with conditional integration
- Graph-distance attenuated competitive signal propagation
- Exponentially weighted momentum with volatility scaling
- Expected value recommendation gate

All changes backward-compatible: flag=false preserves exact current behavior.
New outputs stored in existing JSONB columns (no schema changes except
source_accuracy table via migration 034).

Tests: 26 property-based tests (14 correctness properties), 99 unit tests,
1789 total tests passing with zero regressions.
2026-04-29 11:41:48 +00:00

413 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Typed JSON schemas for document intelligence, trend summaries, and recommendations."""
from __future__ import annotations
import uuid
from datetime import datetime, timezone
from enum import Enum
from typing import List, Optional
from pydantic import BaseModel, Field
# --- Enums ---
class DocumentType(str, Enum):
ARTICLE = "article"
FILING = "filing"
TRANSCRIPT = "transcript"
PRESS_RELEASE = "press_release"
MACRO_EVENT = "macro_event"
class SourceType(str, Enum):
MARKET_API = "market_api"
NEWS_API = "news_api"
FILINGS_API = "filings_api"
WEB_SCRAPE = "web_scrape"
BROKER = "broker"
class Sentiment(str, Enum):
POSITIVE = "positive"
NEGATIVE = "negative"
NEUTRAL = "neutral"
MIXED = "mixed"
class CatalystType(str, Enum):
EARNINGS = "earnings"
PRODUCT = "product"
LEGAL = "legal"
MACRO = "macro"
SUPPLY_CHAIN = "supply_chain"
M_AND_A = "m_and_a"
RATING_CHANGE = "rating_change"
OTHER = "other"
class TrendDirection(str, Enum):
BULLISH = "bullish"
BEARISH = "bearish"
MIXED = "mixed"
NEUTRAL = "neutral"
class ActionType(str, Enum):
BUY = "buy"
SELL = "sell"
HOLD = "hold"
WATCH = "watch"
class RecommendationMode(str, Enum):
INFORMATIONAL = "informational"
PAPER_ELIGIBLE = "paper_eligible"
LIVE_ELIGIBLE = "live_eligible"
class TrendWindow(str, Enum):
INTRADAY = "intraday"
ONE_DAY = "1d"
SEVEN_DAY = "7d"
THIRTY_DAY = "30d"
NINETY_DAY = "90d"
class ImpactType(str, Enum):
SUPPLY_DISRUPTION = "supply_disruption"
DEMAND_SHIFT = "demand_shift"
COST_INCREASE = "cost_increase"
REGULATORY_PRESSURE = "regulatory_pressure"
CURRENCY_IMPACT = "currency_impact"
COMMODITY_SHOCK = "commodity_shock"
TRADE_BARRIER = "trade_barrier"
GEOPOLITICAL_RISK = "geopolitical_risk"
class SeverityLevel(str, Enum):
LOW = "low"
MODERATE = "moderate"
HIGH = "high"
CRITICAL = "critical"
class MarketPositionTier(str, Enum):
GLOBAL_LEADER = "global_leader"
MULTINATIONAL = "multinational"
REGIONAL = "regional"
DOMESTIC = "domestic"
class EstimatedDuration(str, Enum):
SHORT_TERM = "short_term"
MEDIUM_TERM = "medium_term"
LONG_TERM = "long_term"
# --- Autonomous Trading Engine Enums ---
class TradingDecisionType(str, Enum):
ACT = "act"
SKIP = "skip"
class CircuitBreakerTriggerType(str, Enum):
DAILY_LOSS = "daily_loss"
SINGLE_POSITION = "single_position"
VOLATILITY = "volatility"
MANUAL = "manual"
class ReservePoolTriggerType(str, Enum):
PROFIT_SIPHON = "profit_siphon"
EMERGENCY_LIQUIDATION = "emergency_liquidation"
MANUAL_ADJUSTMENT = "manual_adjustment"
INITIAL = "initial"
class NotificationChannel(str, Enum):
SMS = "sms"
EMAIL = "email"
class RiskTierName(str, Enum):
CONSERVATIVE = "conservative"
MODERATE = "moderate"
AGGRESSIVE = "aggressive"
# --- Document Intelligence ---
class CompanyImpact(BaseModel):
ticker: str
company_name: str
relevance: float = Field(ge=0, le=1)
sentiment: Sentiment
impact_score: float = Field(ge=0, le=1)
impact_horizon: str
catalyst_type: CatalystType
key_facts: List[str] = Field(default_factory=list)
risks: List[str] = Field(default_factory=list)
evidence_spans: List[str] = Field(default_factory=list)
class ModelMetadata(BaseModel):
provider: str = "ollama"
model_name: str = ""
prompt_version: str = ""
schema_version: str = "2.0.0"
class DocumentIntelligence(BaseModel):
document_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
document_type: DocumentType = DocumentType.ARTICLE
summary: str = ""
companies: List[CompanyImpact] = Field(default_factory=list)
macro_themes: List[str] = Field(default_factory=list)
novelty_score: float = Field(ge=0, le=1, default=0.5)
source_credibility: float = Field(ge=0, le=1, default=0.5)
extraction_warnings: List[str] = Field(default_factory=list)
confidence: float = Field(ge=0, le=1, default=0.5)
model: ModelMetadata = Field(default_factory=ModelMetadata)
# --- Trend Summary ---
class MarketContext(BaseModel):
"""Recent market data features for a symbol, used to enrich aggregation."""
ticker: str = ""
price_change_pct: Optional[float] = None # % change over the window
avg_volume: Optional[float] = None # average daily volume
volume_change_pct: Optional[float] = None # volume vs prior period
volatility: Optional[float] = None # intra-window price std dev
latest_close: Optional[float] = None
latest_bar_at: Optional[datetime] = None
bars_available: int = 0
@property
def has_data(self) -> bool:
return self.bars_available > 0
class DisagreementDetail(BaseModel):
"""Represents an explicit disagreement between document signals.
Rather than collapsing contradictory signals into a single score,
this captures the nature of the disagreement so downstream consumers
can inspect *why* signals conflict.
Requirements: 6.4
"""
dimension: str = "" # e.g. "sentiment", "catalyst", "impact_horizon"
positive_doc_ids: List[str] = Field(default_factory=list)
negative_doc_ids: List[str] = Field(default_factory=list)
positive_weight: float = 0.0
negative_weight: float = 0.0
description: str = ""
class TrendSummary(BaseModel):
entity_type: str = "company"
entity_id: str = ""
window: TrendWindow = TrendWindow.SEVEN_DAY
trend_direction: TrendDirection = TrendDirection.NEUTRAL
trend_strength: float = Field(ge=0, le=1, default=0.5)
confidence: float = Field(ge=0, le=1, default=0.5)
top_supporting_evidence: List[str] = Field(default_factory=list)
top_opposing_evidence: List[str] = Field(default_factory=list)
dominant_catalysts: List[str] = Field(default_factory=list)
material_risks: List[str] = Field(default_factory=list)
contradiction_score: float = Field(ge=0, le=1, default=0.0)
disagreement_details: List[DisagreementDetail] = Field(default_factory=list)
market_context: Optional[MarketContext] = None
generated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
# New optional fields for probabilistic mode
p_bull: Optional[float] = None # Bayesian bullish probability
alpha: Optional[float] = None # Beta posterior α
beta_param: Optional[float] = None # Beta posterior β (named to avoid shadowing)
bayesian_confidence: Optional[float] = None # 1 - 4αβ/(α+β)²
entropy: Optional[float] = None # Shannon entropy H
regime: Optional[str] = None # Market regime classification
pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic"
# --- Recommendation ---
class PositionSizing(BaseModel):
portfolio_pct: float = Field(ge=0, le=1, default=0.02)
max_loss_pct: float = Field(ge=0, le=1, default=0.005)
class Recommendation(BaseModel):
recommendation_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
ticker: str = ""
action: ActionType = ActionType.WATCH
mode: RecommendationMode = RecommendationMode.INFORMATIONAL
confidence: float = Field(ge=0, le=1, default=0.5)
time_horizon: str = ""
thesis: str = ""
invalidation_conditions: List[str] = Field(default_factory=list)
position_sizing: PositionSizing = Field(default_factory=PositionSizing)
evidence_refs: List[str] = Field(default_factory=list)
model_metadata: ModelMetadata = Field(default_factory=ModelMetadata)
generated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
# New optional fields for probabilistic mode
expected_value: Optional[float] = None # EV = P_bull·R_up - P_bear·R_down
p_bull: Optional[float] = None # Bayesian bullish probability used
pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic"
# --- Global News Interpolation ---
class GlobalEventSchema(BaseModel):
event_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
event_types: List[ImpactType] = Field(default_factory=list)
severity: SeverityLevel = SeverityLevel.LOW
affected_regions: List[str] = Field(default_factory=list)
affected_sectors: List[str] = Field(default_factory=list)
affected_commodities: List[str] = Field(default_factory=list)
summary: str = ""
key_facts: List[str] = Field(default_factory=list)
estimated_duration: EstimatedDuration = EstimatedDuration.SHORT_TERM
confidence: float = Field(ge=0, le=1, default=0.5)
source_document_id: str = ""
model_metadata: ModelMetadata = Field(default_factory=ModelMetadata)
created_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
class MacroImpactRecordSchema(BaseModel):
event_id: str = ""
company_id: str = ""
ticker: str = ""
macro_impact_score: float = Field(ge=0, le=1, default=0.0)
impact_direction: str = "neutral"
contributing_factors: List[str] = Field(default_factory=list)
confidence: float = Field(ge=0, le=1, default=0.5)
computed_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
class ExposureProfileSchema(BaseModel):
company_id: str = ""
geographic_revenue_mix: dict[str, float] = Field(default_factory=dict)
supply_chain_regions: List[str] = Field(default_factory=list)
key_input_commodities: List[str] = Field(default_factory=list)
regulatory_jurisdictions: List[str] = Field(default_factory=list)
market_position_tier: MarketPositionTier = MarketPositionTier.REGIONAL
export_dependency_pct: float = Field(ge=0, le=1, default=0.0)
source: str = "manual"
confidence: float = Field(ge=0, le=1, default=1.0)
version: int = 1
active: bool = True
created_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
updated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
class TrendProjectionSchema(BaseModel):
trend_window_id: str = ""
projected_direction: TrendDirection = TrendDirection.NEUTRAL
projected_strength: float = Field(ge=0, le=1, default=0.5)
projected_confidence: float = Field(ge=0, le=1, default=0.5)
projection_horizon: str = "7d"
driving_factors: List[str] = Field(default_factory=list)
macro_contribution_pct: float = Field(ge=0, le=1, default=0.0)
diverges_from_current: bool = False
computed_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
# --- Document Metadata ---
class StorageRefs(BaseModel):
raw_html: Optional[str] = None
raw_payload: Optional[str] = None
normalized_text: Optional[str] = None
class DocumentMetadata(BaseModel):
document_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
document_type: DocumentType = DocumentType.ARTICLE
symbol_candidates: List[str] = Field(default_factory=list)
source_type: SourceType = SourceType.NEWS_API
publisher: str = ""
url: Optional[str] = None
canonical_url: Optional[str] = None
title: str = ""
published_at: Optional[datetime] = None
retrieved_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
language: str = "en"
content_hash: str = ""
storage_refs: StorageRefs = Field(default_factory=StorageRefs)
# --- Competitive Intelligence & Historical Patterns ---
class RelationshipType(str, Enum):
DIRECT_RIVAL = "direct_rival"
SAME_SECTOR = "same_sector"
OVERLAPPING_PRODUCTS = "overlapping_products"
SUPPLY_CHAIN_ADJACENT = "supply_chain_adjacent"
class CatalystTier(str, Enum):
MAJOR_CORPORATE_DECISION = "major_corporate_decision"
ROUTINE_SIGNAL = "routine_signal"
# Major corporate decision catalyst types (Req 11.1)
MAJOR_DECISION_CATALYSTS: frozenset[str] = frozenset({
"m_and_a",
"legal",
"restructuring",
"leadership_change",
"strategic_pivot",
"buyback",
"dividend_change",
})
class CompetitorRelationshipSchema(BaseModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
company_a_id: str = ""
company_b_id: str = ""
relationship_type: RelationshipType = RelationshipType.DIRECT_RIVAL
strength: float = Field(ge=0, le=1, default=0.5)
bidirectional: bool = True
source: str = "manual"
active: bool = True
created_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
updated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
class CompetitiveSignalRecordSchema(BaseModel):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
source_document_id: str = ""
source_ticker: str = ""
target_ticker: str = ""
catalyst_type: str = ""
pattern_confidence: float = Field(ge=0, le=1, default=0.0)
signal_direction: str = "neutral"
signal_strength: float = Field(ge=0, le=1, default=0.0)
relationship_strength: float = Field(ge=0, le=1, default=0.0)
computed_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
class HistoricalPatternSchema(BaseModel):
source_ticker: str = ""
target_ticker: str = ""
catalyst_type: str = ""
time_horizon: str = "7d"
sample_count: int = 0
bullish_pct: float = Field(ge=0, le=1, default=0.0)
bearish_pct: float = Field(ge=0, le=1, default=0.0)
avg_strength: float = Field(ge=0, le=1, default=0.0)
avg_time_to_resolution: float = 0.0
pattern_confidence: float = Field(ge=0, le=1, default=0.0)
data_start: Optional[datetime] = None
data_end: Optional[datetime] = None
tier: CatalystTier = CatalystTier.ROUTINE_SIGNAL
insufficient_data: bool = False