"""Typed JSON schemas for document intelligence, trend summaries, and recommendations.""" from __future__ import annotations import uuid from datetime import datetime, timezone from enum import Enum from typing import List, Optional from pydantic import BaseModel, Field # --- Enums --- class DocumentType(str, Enum): ARTICLE = "article" FILING = "filing" TRANSCRIPT = "transcript" PRESS_RELEASE = "press_release" MACRO_EVENT = "macro_event" class SourceType(str, Enum): MARKET_API = "market_api" NEWS_API = "news_api" FILINGS_API = "filings_api" WEB_SCRAPE = "web_scrape" BROKER = "broker" class Sentiment(str, Enum): POSITIVE = "positive" NEGATIVE = "negative" NEUTRAL = "neutral" MIXED = "mixed" class CatalystType(str, Enum): EARNINGS = "earnings" PRODUCT = "product" LEGAL = "legal" MACRO = "macro" SUPPLY_CHAIN = "supply_chain" M_AND_A = "m_and_a" RATING_CHANGE = "rating_change" OTHER = "other" class TrendDirection(str, Enum): BULLISH = "bullish" BEARISH = "bearish" MIXED = "mixed" NEUTRAL = "neutral" class ActionType(str, Enum): BUY = "buy" SELL = "sell" HOLD = "hold" WATCH = "watch" class RecommendationMode(str, Enum): INFORMATIONAL = "informational" PAPER_ELIGIBLE = "paper_eligible" LIVE_ELIGIBLE = "live_eligible" class TrendWindow(str, Enum): INTRADAY = "intraday" ONE_DAY = "1d" SEVEN_DAY = "7d" THIRTY_DAY = "30d" NINETY_DAY = "90d" class ImpactType(str, Enum): SUPPLY_DISRUPTION = "supply_disruption" DEMAND_SHIFT = "demand_shift" COST_INCREASE = "cost_increase" REGULATORY_PRESSURE = "regulatory_pressure" CURRENCY_IMPACT = "currency_impact" COMMODITY_SHOCK = "commodity_shock" TRADE_BARRIER = "trade_barrier" GEOPOLITICAL_RISK = "geopolitical_risk" class SeverityLevel(str, Enum): LOW = "low" MODERATE = "moderate" HIGH = "high" CRITICAL = "critical" class MarketPositionTier(str, Enum): GLOBAL_LEADER = "global_leader" MULTINATIONAL = "multinational" REGIONAL = "regional" DOMESTIC = "domestic" class EstimatedDuration(str, Enum): SHORT_TERM = "short_term" MEDIUM_TERM = "medium_term" LONG_TERM = "long_term" # --- Autonomous Trading Engine Enums --- class TradingDecisionType(str, Enum): ACT = "act" SKIP = "skip" class CircuitBreakerTriggerType(str, Enum): DAILY_LOSS = "daily_loss" SINGLE_POSITION = "single_position" VOLATILITY = "volatility" MANUAL = "manual" class ReservePoolTriggerType(str, Enum): PROFIT_SIPHON = "profit_siphon" EMERGENCY_LIQUIDATION = "emergency_liquidation" MANUAL_ADJUSTMENT = "manual_adjustment" INITIAL = "initial" class NotificationChannel(str, Enum): SMS = "sms" EMAIL = "email" class RiskTierName(str, Enum): CONSERVATIVE = "conservative" MODERATE = "moderate" AGGRESSIVE = "aggressive" # --- Document Intelligence --- class CompanyImpact(BaseModel): ticker: str company_name: str relevance: float = Field(ge=0, le=1) sentiment: Sentiment impact_score: float = Field(ge=0, le=1) impact_horizon: str catalyst_type: CatalystType key_facts: List[str] = Field(default_factory=list) risks: List[str] = Field(default_factory=list) evidence_spans: List[str] = Field(default_factory=list) class ModelMetadata(BaseModel): provider: str = "ollama" model_name: str = "" prompt_version: str = "" schema_version: str = "2.0.0" class DocumentIntelligence(BaseModel): document_id: str = Field(default_factory=lambda: str(uuid.uuid4())) document_type: DocumentType = DocumentType.ARTICLE summary: str = "" companies: List[CompanyImpact] = Field(default_factory=list) macro_themes: List[str] = Field(default_factory=list) novelty_score: float = Field(ge=0, le=1, default=0.5) source_credibility: float = Field(ge=0, le=1, default=0.5) extraction_warnings: List[str] = Field(default_factory=list) confidence: float = Field(ge=0, le=1, default=0.5) model: ModelMetadata = Field(default_factory=ModelMetadata) # --- Trend Summary --- class MarketContext(BaseModel): """Recent market data features for a symbol, used to enrich aggregation.""" ticker: str = "" price_change_pct: Optional[float] = None # % change over the window avg_volume: Optional[float] = None # average daily volume volume_change_pct: Optional[float] = None # volume vs prior period volatility: Optional[float] = None # intra-window price std dev latest_close: Optional[float] = None latest_bar_at: Optional[datetime] = None bars_available: int = 0 @property def has_data(self) -> bool: return self.bars_available > 0 class DisagreementDetail(BaseModel): """Represents an explicit disagreement between document signals. Rather than collapsing contradictory signals into a single score, this captures the nature of the disagreement so downstream consumers can inspect *why* signals conflict. Requirements: 6.4 """ dimension: str = "" # e.g. "sentiment", "catalyst", "impact_horizon" positive_doc_ids: List[str] = Field(default_factory=list) negative_doc_ids: List[str] = Field(default_factory=list) positive_weight: float = 0.0 negative_weight: float = 0.0 description: str = "" class TrendSummary(BaseModel): entity_type: str = "company" entity_id: str = "" window: TrendWindow = TrendWindow.SEVEN_DAY trend_direction: TrendDirection = TrendDirection.NEUTRAL trend_strength: float = Field(ge=0, le=1, default=0.5) confidence: float = Field(ge=0, le=1, default=0.5) top_supporting_evidence: List[str] = Field(default_factory=list) top_opposing_evidence: List[str] = Field(default_factory=list) dominant_catalysts: List[str] = Field(default_factory=list) material_risks: List[str] = Field(default_factory=list) contradiction_score: float = Field(ge=0, le=1, default=0.0) disagreement_details: List[DisagreementDetail] = Field(default_factory=list) market_context: Optional[MarketContext] = None generated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) # New optional fields for probabilistic mode p_bull: Optional[float] = None # Bayesian bullish probability alpha: Optional[float] = None # Beta posterior α beta_param: Optional[float] = None # Beta posterior β (named to avoid shadowing) bayesian_confidence: Optional[float] = None # 1 - 4αβ/(α+β)² entropy: Optional[float] = None # Shannon entropy H regime: Optional[str] = None # Market regime classification pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic" # --- Recommendation --- class PositionSizing(BaseModel): portfolio_pct: float = Field(ge=0, le=1, default=0.02) max_loss_pct: float = Field(ge=0, le=1, default=0.005) class Recommendation(BaseModel): recommendation_id: str = Field(default_factory=lambda: str(uuid.uuid4())) ticker: str = "" action: ActionType = ActionType.WATCH mode: RecommendationMode = RecommendationMode.INFORMATIONAL confidence: float = Field(ge=0, le=1, default=0.5) time_horizon: str = "" thesis: str = "" invalidation_conditions: List[str] = Field(default_factory=list) position_sizing: PositionSizing = Field(default_factory=PositionSizing) evidence_refs: List[str] = Field(default_factory=list) model_metadata: ModelMetadata = Field(default_factory=ModelMetadata) generated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) # New optional fields for probabilistic mode expected_value: Optional[float] = None # EV = P_bull·R_up - P_bear·R_down p_bull: Optional[float] = None # Bayesian bullish probability used pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic" # --- Global News Interpolation --- class GlobalEventSchema(BaseModel): event_id: str = Field(default_factory=lambda: str(uuid.uuid4())) event_types: List[ImpactType] = Field(default_factory=list) severity: SeverityLevel = SeverityLevel.LOW affected_regions: List[str] = Field(default_factory=list) affected_sectors: List[str] = Field(default_factory=list) affected_commodities: List[str] = Field(default_factory=list) summary: str = "" key_facts: List[str] = Field(default_factory=list) estimated_duration: EstimatedDuration = EstimatedDuration.SHORT_TERM confidence: float = Field(ge=0, le=1, default=0.5) source_document_id: str = "" model_metadata: ModelMetadata = Field(default_factory=ModelMetadata) created_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) class MacroImpactRecordSchema(BaseModel): event_id: str = "" company_id: str = "" ticker: str = "" macro_impact_score: float = Field(ge=0, le=1, default=0.0) impact_direction: str = "neutral" contributing_factors: List[str] = Field(default_factory=list) confidence: float = Field(ge=0, le=1, default=0.5) computed_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) class ExposureProfileSchema(BaseModel): company_id: str = "" geographic_revenue_mix: dict[str, float] = Field(default_factory=dict) supply_chain_regions: List[str] = Field(default_factory=list) key_input_commodities: List[str] = Field(default_factory=list) regulatory_jurisdictions: List[str] = Field(default_factory=list) market_position_tier: MarketPositionTier = MarketPositionTier.REGIONAL export_dependency_pct: float = Field(ge=0, le=1, default=0.0) source: str = "manual" confidence: float = Field(ge=0, le=1, default=1.0) version: int = 1 active: bool = True created_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) updated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) class TrendProjectionSchema(BaseModel): trend_window_id: str = "" projected_direction: TrendDirection = TrendDirection.NEUTRAL projected_strength: float = Field(ge=0, le=1, default=0.5) projected_confidence: float = Field(ge=0, le=1, default=0.5) projection_horizon: str = "7d" driving_factors: List[str] = Field(default_factory=list) macro_contribution_pct: float = Field(ge=0, le=1, default=0.0) diverges_from_current: bool = False computed_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) # --- Document Metadata --- class StorageRefs(BaseModel): raw_html: Optional[str] = None raw_payload: Optional[str] = None normalized_text: Optional[str] = None class DocumentMetadata(BaseModel): document_id: str = Field(default_factory=lambda: str(uuid.uuid4())) document_type: DocumentType = DocumentType.ARTICLE symbol_candidates: List[str] = Field(default_factory=list) source_type: SourceType = SourceType.NEWS_API publisher: str = "" url: Optional[str] = None canonical_url: Optional[str] = None title: str = "" published_at: Optional[datetime] = None retrieved_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) language: str = "en" content_hash: str = "" storage_refs: StorageRefs = Field(default_factory=StorageRefs) # --- Competitive Intelligence & Historical Patterns --- class RelationshipType(str, Enum): DIRECT_RIVAL = "direct_rival" SAME_SECTOR = "same_sector" OVERLAPPING_PRODUCTS = "overlapping_products" SUPPLY_CHAIN_ADJACENT = "supply_chain_adjacent" class CatalystTier(str, Enum): MAJOR_CORPORATE_DECISION = "major_corporate_decision" ROUTINE_SIGNAL = "routine_signal" # Major corporate decision catalyst types (Req 11.1) MAJOR_DECISION_CATALYSTS: frozenset[str] = frozenset({ "m_and_a", "legal", "restructuring", "leadership_change", "strategic_pivot", "buyback", "dividend_change", }) class CompetitorRelationshipSchema(BaseModel): id: str = Field(default_factory=lambda: str(uuid.uuid4())) company_a_id: str = "" company_b_id: str = "" relationship_type: RelationshipType = RelationshipType.DIRECT_RIVAL strength: float = Field(ge=0, le=1, default=0.5) bidirectional: bool = True source: str = "manual" active: bool = True created_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) updated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) class CompetitiveSignalRecordSchema(BaseModel): id: str = Field(default_factory=lambda: str(uuid.uuid4())) source_document_id: str = "" source_ticker: str = "" target_ticker: str = "" catalyst_type: str = "" pattern_confidence: float = Field(ge=0, le=1, default=0.0) signal_direction: str = "neutral" signal_strength: float = Field(ge=0, le=1, default=0.0) relationship_strength: float = Field(ge=0, le=1, default=0.0) computed_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc)) class HistoricalPatternSchema(BaseModel): source_ticker: str = "" target_ticker: str = "" catalyst_type: str = "" time_horizon: str = "7d" sample_count: int = 0 bullish_pct: float = Field(ge=0, le=1, default=0.0) bearish_pct: float = Field(ge=0, le=1, default=0.0) avg_strength: float = Field(ge=0, le=1, default=0.0) avg_time_to_resolution: float = 0.0 pattern_confidence: float = Field(ge=0, le=1, default=0.0) data_start: Optional[datetime] = None data_end: Optional[datetime] = None tier: CatalystTier = CatalystTier.ROUTINE_SIGNAL insufficient_data: bool = False