4e010bc048
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build-1 Pipeline was successful
ci/woodpecker/push/build-2 Pipeline was successful
ci/woodpecker/push/build-3 Pipeline was successful
ci/woodpecker/push/finalize Pipeline was successful
Build and Push / lint-and-test (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.adapters.broker_adapter name:broker-adapter]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.aggregation.worker name:aggregation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.extractor.worker name:extractor]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.ingestion.worker name:ingestion]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.lake_publisher.worker name:lake-publisher]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.parser.worker name:parser]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.recommendation.worker name:recommendation]) (push) Has been cancelled
Build and Push / build-services (map[cmd:python -m services.scheduler.app name:scheduler]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.api.app:app --host 0.0.0.0 --port 8000 name:query-api]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.risk.app:app --host 0.0.0.0 --port 8000 name:risk]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.symbol_registry.app:app --host 0.0.0.0 --port 8000 name:symbol-registry]) (push) Has been cancelled
Build and Push / build-services (map[cmd:uvicorn services.trading.app:app --host 0.0.0.0 --port 8000 name:trading-engine]) (push) Has been cancelled
Build and Push / build-dashboard (push) Has been cancelled
Build and Push / build-superset (push) Has been cancelled
Build and Push / integration-test (push) Has been cancelled
Build and Push / beta-gate (push) Has been cancelled
Implement full probabilistic signal processing pipeline gated behind probabilistic_scoring_enabled feature flag in risk_configs: - Bayesian log-likelihood accumulator with Beta posterior and entropy - Regime detector (trend-following, panic, mean-reversion, uncertainty) - Source accuracy tracker with per-source historical prediction accuracy - Sigmoid confidence gate replacing binary gate - Information gain surprise weighting for rare events - Adaptive recency decay with event-specific half-lives - Regime multiplier replacing market context multiplier - Weighted disagreement entropy for contradiction detection - Multiplicative macro exposure with conditional integration - Graph-distance attenuated competitive signal propagation - Exponentially weighted momentum with volatility scaling - Expected value recommendation gate All changes backward-compatible: flag=false preserves exact current behavior. New outputs stored in existing JSONB columns (no schema changes except source_accuracy table via migration 034). Tests: 26 property-based tests (14 correctness properties), 99 unit tests, 1789 total tests passing with zero regressions.
413 lines
14 KiB
Python
413 lines
14 KiB
Python
"""Typed JSON schemas for document intelligence, trend summaries, and recommendations."""
|
||
from __future__ import annotations
|
||
|
||
import uuid
|
||
from datetime import datetime, timezone
|
||
from enum import Enum
|
||
from typing import List, Optional
|
||
|
||
from pydantic import BaseModel, Field
|
||
|
||
# --- Enums ---
|
||
|
||
class DocumentType(str, Enum):
|
||
ARTICLE = "article"
|
||
FILING = "filing"
|
||
TRANSCRIPT = "transcript"
|
||
PRESS_RELEASE = "press_release"
|
||
MACRO_EVENT = "macro_event"
|
||
|
||
|
||
class SourceType(str, Enum):
|
||
MARKET_API = "market_api"
|
||
NEWS_API = "news_api"
|
||
FILINGS_API = "filings_api"
|
||
WEB_SCRAPE = "web_scrape"
|
||
BROKER = "broker"
|
||
|
||
|
||
class Sentiment(str, Enum):
|
||
POSITIVE = "positive"
|
||
NEGATIVE = "negative"
|
||
NEUTRAL = "neutral"
|
||
MIXED = "mixed"
|
||
|
||
|
||
class CatalystType(str, Enum):
|
||
EARNINGS = "earnings"
|
||
PRODUCT = "product"
|
||
LEGAL = "legal"
|
||
MACRO = "macro"
|
||
SUPPLY_CHAIN = "supply_chain"
|
||
M_AND_A = "m_and_a"
|
||
RATING_CHANGE = "rating_change"
|
||
OTHER = "other"
|
||
|
||
|
||
class TrendDirection(str, Enum):
|
||
BULLISH = "bullish"
|
||
BEARISH = "bearish"
|
||
MIXED = "mixed"
|
||
NEUTRAL = "neutral"
|
||
|
||
|
||
class ActionType(str, Enum):
|
||
BUY = "buy"
|
||
SELL = "sell"
|
||
HOLD = "hold"
|
||
WATCH = "watch"
|
||
|
||
|
||
class RecommendationMode(str, Enum):
|
||
INFORMATIONAL = "informational"
|
||
PAPER_ELIGIBLE = "paper_eligible"
|
||
LIVE_ELIGIBLE = "live_eligible"
|
||
|
||
|
||
class TrendWindow(str, Enum):
|
||
INTRADAY = "intraday"
|
||
ONE_DAY = "1d"
|
||
SEVEN_DAY = "7d"
|
||
THIRTY_DAY = "30d"
|
||
NINETY_DAY = "90d"
|
||
|
||
|
||
class ImpactType(str, Enum):
|
||
SUPPLY_DISRUPTION = "supply_disruption"
|
||
DEMAND_SHIFT = "demand_shift"
|
||
COST_INCREASE = "cost_increase"
|
||
REGULATORY_PRESSURE = "regulatory_pressure"
|
||
CURRENCY_IMPACT = "currency_impact"
|
||
COMMODITY_SHOCK = "commodity_shock"
|
||
TRADE_BARRIER = "trade_barrier"
|
||
GEOPOLITICAL_RISK = "geopolitical_risk"
|
||
|
||
|
||
class SeverityLevel(str, Enum):
|
||
LOW = "low"
|
||
MODERATE = "moderate"
|
||
HIGH = "high"
|
||
CRITICAL = "critical"
|
||
|
||
|
||
class MarketPositionTier(str, Enum):
|
||
GLOBAL_LEADER = "global_leader"
|
||
MULTINATIONAL = "multinational"
|
||
REGIONAL = "regional"
|
||
DOMESTIC = "domestic"
|
||
|
||
|
||
class EstimatedDuration(str, Enum):
|
||
SHORT_TERM = "short_term"
|
||
MEDIUM_TERM = "medium_term"
|
||
LONG_TERM = "long_term"
|
||
|
||
|
||
# --- Autonomous Trading Engine Enums ---
|
||
|
||
|
||
class TradingDecisionType(str, Enum):
|
||
ACT = "act"
|
||
SKIP = "skip"
|
||
|
||
|
||
class CircuitBreakerTriggerType(str, Enum):
|
||
DAILY_LOSS = "daily_loss"
|
||
SINGLE_POSITION = "single_position"
|
||
VOLATILITY = "volatility"
|
||
MANUAL = "manual"
|
||
|
||
|
||
class ReservePoolTriggerType(str, Enum):
|
||
PROFIT_SIPHON = "profit_siphon"
|
||
EMERGENCY_LIQUIDATION = "emergency_liquidation"
|
||
MANUAL_ADJUSTMENT = "manual_adjustment"
|
||
INITIAL = "initial"
|
||
|
||
|
||
class NotificationChannel(str, Enum):
|
||
SMS = "sms"
|
||
EMAIL = "email"
|
||
|
||
|
||
class RiskTierName(str, Enum):
|
||
CONSERVATIVE = "conservative"
|
||
MODERATE = "moderate"
|
||
AGGRESSIVE = "aggressive"
|
||
|
||
|
||
# --- Document Intelligence ---
|
||
|
||
class CompanyImpact(BaseModel):
|
||
ticker: str
|
||
company_name: str
|
||
relevance: float = Field(ge=0, le=1)
|
||
sentiment: Sentiment
|
||
impact_score: float = Field(ge=0, le=1)
|
||
impact_horizon: str
|
||
catalyst_type: CatalystType
|
||
key_facts: List[str] = Field(default_factory=list)
|
||
risks: List[str] = Field(default_factory=list)
|
||
evidence_spans: List[str] = Field(default_factory=list)
|
||
|
||
|
||
class ModelMetadata(BaseModel):
|
||
provider: str = "ollama"
|
||
model_name: str = ""
|
||
prompt_version: str = ""
|
||
schema_version: str = "2.0.0"
|
||
|
||
|
||
class DocumentIntelligence(BaseModel):
|
||
document_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||
document_type: DocumentType = DocumentType.ARTICLE
|
||
summary: str = ""
|
||
companies: List[CompanyImpact] = Field(default_factory=list)
|
||
macro_themes: List[str] = Field(default_factory=list)
|
||
novelty_score: float = Field(ge=0, le=1, default=0.5)
|
||
source_credibility: float = Field(ge=0, le=1, default=0.5)
|
||
extraction_warnings: List[str] = Field(default_factory=list)
|
||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||
model: ModelMetadata = Field(default_factory=ModelMetadata)
|
||
|
||
|
||
# --- Trend Summary ---
|
||
|
||
class MarketContext(BaseModel):
|
||
"""Recent market data features for a symbol, used to enrich aggregation."""
|
||
|
||
ticker: str = ""
|
||
price_change_pct: Optional[float] = None # % change over the window
|
||
avg_volume: Optional[float] = None # average daily volume
|
||
volume_change_pct: Optional[float] = None # volume vs prior period
|
||
volatility: Optional[float] = None # intra-window price std dev
|
||
latest_close: Optional[float] = None
|
||
latest_bar_at: Optional[datetime] = None
|
||
bars_available: int = 0
|
||
|
||
@property
|
||
def has_data(self) -> bool:
|
||
return self.bars_available > 0
|
||
|
||
|
||
class DisagreementDetail(BaseModel):
|
||
"""Represents an explicit disagreement between document signals.
|
||
|
||
Rather than collapsing contradictory signals into a single score,
|
||
this captures the nature of the disagreement so downstream consumers
|
||
can inspect *why* signals conflict.
|
||
|
||
Requirements: 6.4
|
||
"""
|
||
|
||
dimension: str = "" # e.g. "sentiment", "catalyst", "impact_horizon"
|
||
positive_doc_ids: List[str] = Field(default_factory=list)
|
||
negative_doc_ids: List[str] = Field(default_factory=list)
|
||
positive_weight: float = 0.0
|
||
negative_weight: float = 0.0
|
||
description: str = ""
|
||
|
||
|
||
class TrendSummary(BaseModel):
|
||
entity_type: str = "company"
|
||
entity_id: str = ""
|
||
window: TrendWindow = TrendWindow.SEVEN_DAY
|
||
trend_direction: TrendDirection = TrendDirection.NEUTRAL
|
||
trend_strength: float = Field(ge=0, le=1, default=0.5)
|
||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||
top_supporting_evidence: List[str] = Field(default_factory=list)
|
||
top_opposing_evidence: List[str] = Field(default_factory=list)
|
||
dominant_catalysts: List[str] = Field(default_factory=list)
|
||
material_risks: List[str] = Field(default_factory=list)
|
||
contradiction_score: float = Field(ge=0, le=1, default=0.0)
|
||
disagreement_details: List[DisagreementDetail] = Field(default_factory=list)
|
||
market_context: Optional[MarketContext] = None
|
||
generated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
|
||
# New optional fields for probabilistic mode
|
||
p_bull: Optional[float] = None # Bayesian bullish probability
|
||
alpha: Optional[float] = None # Beta posterior α
|
||
beta_param: Optional[float] = None # Beta posterior β (named to avoid shadowing)
|
||
bayesian_confidence: Optional[float] = None # 1 - 4αβ/(α+β)²
|
||
entropy: Optional[float] = None # Shannon entropy H
|
||
regime: Optional[str] = None # Market regime classification
|
||
pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic"
|
||
|
||
|
||
# --- Recommendation ---
|
||
|
||
class PositionSizing(BaseModel):
|
||
portfolio_pct: float = Field(ge=0, le=1, default=0.02)
|
||
max_loss_pct: float = Field(ge=0, le=1, default=0.005)
|
||
|
||
|
||
class Recommendation(BaseModel):
|
||
recommendation_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||
ticker: str = ""
|
||
action: ActionType = ActionType.WATCH
|
||
mode: RecommendationMode = RecommendationMode.INFORMATIONAL
|
||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||
time_horizon: str = ""
|
||
thesis: str = ""
|
||
invalidation_conditions: List[str] = Field(default_factory=list)
|
||
position_sizing: PositionSizing = Field(default_factory=PositionSizing)
|
||
evidence_refs: List[str] = Field(default_factory=list)
|
||
model_metadata: ModelMetadata = Field(default_factory=ModelMetadata)
|
||
generated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
|
||
# New optional fields for probabilistic mode
|
||
expected_value: Optional[float] = None # EV = P_bull·R_up - P_bear·R_down
|
||
p_bull: Optional[float] = None # Bayesian bullish probability used
|
||
pipeline_mode: str = "heuristic" # "heuristic" or "probabilistic"
|
||
|
||
|
||
# --- Global News Interpolation ---
|
||
|
||
class GlobalEventSchema(BaseModel):
|
||
event_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||
event_types: List[ImpactType] = Field(default_factory=list)
|
||
severity: SeverityLevel = SeverityLevel.LOW
|
||
affected_regions: List[str] = Field(default_factory=list)
|
||
affected_sectors: List[str] = Field(default_factory=list)
|
||
affected_commodities: List[str] = Field(default_factory=list)
|
||
summary: str = ""
|
||
key_facts: List[str] = Field(default_factory=list)
|
||
estimated_duration: EstimatedDuration = EstimatedDuration.SHORT_TERM
|
||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||
source_document_id: str = ""
|
||
model_metadata: ModelMetadata = Field(default_factory=ModelMetadata)
|
||
created_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
|
||
|
||
class MacroImpactRecordSchema(BaseModel):
|
||
event_id: str = ""
|
||
company_id: str = ""
|
||
ticker: str = ""
|
||
macro_impact_score: float = Field(ge=0, le=1, default=0.0)
|
||
impact_direction: str = "neutral"
|
||
contributing_factors: List[str] = Field(default_factory=list)
|
||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||
computed_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
|
||
|
||
class ExposureProfileSchema(BaseModel):
|
||
company_id: str = ""
|
||
geographic_revenue_mix: dict[str, float] = Field(default_factory=dict)
|
||
supply_chain_regions: List[str] = Field(default_factory=list)
|
||
key_input_commodities: List[str] = Field(default_factory=list)
|
||
regulatory_jurisdictions: List[str] = Field(default_factory=list)
|
||
market_position_tier: MarketPositionTier = MarketPositionTier.REGIONAL
|
||
export_dependency_pct: float = Field(ge=0, le=1, default=0.0)
|
||
source: str = "manual"
|
||
confidence: float = Field(ge=0, le=1, default=1.0)
|
||
version: int = 1
|
||
active: bool = True
|
||
created_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
updated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
|
||
|
||
class TrendProjectionSchema(BaseModel):
|
||
trend_window_id: str = ""
|
||
projected_direction: TrendDirection = TrendDirection.NEUTRAL
|
||
projected_strength: float = Field(ge=0, le=1, default=0.5)
|
||
projected_confidence: float = Field(ge=0, le=1, default=0.5)
|
||
projection_horizon: str = "7d"
|
||
driving_factors: List[str] = Field(default_factory=list)
|
||
macro_contribution_pct: float = Field(ge=0, le=1, default=0.0)
|
||
diverges_from_current: bool = False
|
||
computed_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
|
||
|
||
# --- Document Metadata ---
|
||
|
||
class StorageRefs(BaseModel):
|
||
raw_html: Optional[str] = None
|
||
raw_payload: Optional[str] = None
|
||
normalized_text: Optional[str] = None
|
||
|
||
|
||
class DocumentMetadata(BaseModel):
|
||
document_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||
document_type: DocumentType = DocumentType.ARTICLE
|
||
symbol_candidates: List[str] = Field(default_factory=list)
|
||
source_type: SourceType = SourceType.NEWS_API
|
||
publisher: str = ""
|
||
url: Optional[str] = None
|
||
canonical_url: Optional[str] = None
|
||
title: str = ""
|
||
published_at: Optional[datetime] = None
|
||
retrieved_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
language: str = "en"
|
||
content_hash: str = ""
|
||
storage_refs: StorageRefs = Field(default_factory=StorageRefs)
|
||
|
||
|
||
# --- Competitive Intelligence & Historical Patterns ---
|
||
|
||
|
||
class RelationshipType(str, Enum):
|
||
DIRECT_RIVAL = "direct_rival"
|
||
SAME_SECTOR = "same_sector"
|
||
OVERLAPPING_PRODUCTS = "overlapping_products"
|
||
SUPPLY_CHAIN_ADJACENT = "supply_chain_adjacent"
|
||
|
||
|
||
class CatalystTier(str, Enum):
|
||
MAJOR_CORPORATE_DECISION = "major_corporate_decision"
|
||
ROUTINE_SIGNAL = "routine_signal"
|
||
|
||
|
||
# Major corporate decision catalyst types (Req 11.1)
|
||
MAJOR_DECISION_CATALYSTS: frozenset[str] = frozenset({
|
||
"m_and_a",
|
||
"legal",
|
||
"restructuring",
|
||
"leadership_change",
|
||
"strategic_pivot",
|
||
"buyback",
|
||
"dividend_change",
|
||
})
|
||
|
||
|
||
class CompetitorRelationshipSchema(BaseModel):
|
||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||
company_a_id: str = ""
|
||
company_b_id: str = ""
|
||
relationship_type: RelationshipType = RelationshipType.DIRECT_RIVAL
|
||
strength: float = Field(ge=0, le=1, default=0.5)
|
||
bidirectional: bool = True
|
||
source: str = "manual"
|
||
active: bool = True
|
||
created_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
updated_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
|
||
|
||
class CompetitiveSignalRecordSchema(BaseModel):
|
||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||
source_document_id: str = ""
|
||
source_ticker: str = ""
|
||
target_ticker: str = ""
|
||
catalyst_type: str = ""
|
||
pattern_confidence: float = Field(ge=0, le=1, default=0.0)
|
||
signal_direction: str = "neutral"
|
||
signal_strength: float = Field(ge=0, le=1, default=0.0)
|
||
relationship_strength: float = Field(ge=0, le=1, default=0.0)
|
||
computed_at: datetime = Field(default_factory=lambda: datetime.now(tz=timezone.utc))
|
||
|
||
|
||
class HistoricalPatternSchema(BaseModel):
|
||
source_ticker: str = ""
|
||
target_ticker: str = ""
|
||
catalyst_type: str = ""
|
||
time_horizon: str = "7d"
|
||
sample_count: int = 0
|
||
bullish_pct: float = Field(ge=0, le=1, default=0.0)
|
||
bearish_pct: float = Field(ge=0, le=1, default=0.0)
|
||
avg_strength: float = Field(ge=0, le=1, default=0.0)
|
||
avg_time_to_resolution: float = 0.0
|
||
pattern_confidence: float = Field(ge=0, le=1, default=0.0)
|
||
data_start: Optional[datetime] = None
|
||
data_end: Optional[datetime] = None
|
||
tier: CatalystTier = CatalystTier.ROUTINE_SIGNAL
|
||
insufficient_data: bool = False
|