Files
stonks-oracle/services/shared/schemas.py
T

207 lines
6.3 KiB
Python

"""Typed JSON schemas for document intelligence, trend summaries, and recommendations."""
from __future__ import annotations
import uuid
from datetime import datetime
from enum import Enum
from typing import List, Optional
from pydantic import BaseModel, Field
# --- Enums ---
class DocumentType(str, Enum):
ARTICLE = "article"
FILING = "filing"
TRANSCRIPT = "transcript"
PRESS_RELEASE = "press_release"
class SourceType(str, Enum):
MARKET_API = "market_api"
NEWS_API = "news_api"
FILINGS_API = "filings_api"
WEB_SCRAPE = "web_scrape"
BROKER = "broker"
class Sentiment(str, Enum):
POSITIVE = "positive"
NEGATIVE = "negative"
NEUTRAL = "neutral"
MIXED = "mixed"
class CatalystType(str, Enum):
EARNINGS = "earnings"
PRODUCT = "product"
LEGAL = "legal"
MACRO = "macro"
SUPPLY_CHAIN = "supply_chain"
M_AND_A = "m_and_a"
RATING_CHANGE = "rating_change"
OTHER = "other"
class TrendDirection(str, Enum):
BULLISH = "bullish"
BEARISH = "bearish"
MIXED = "mixed"
NEUTRAL = "neutral"
class ActionType(str, Enum):
BUY = "buy"
SELL = "sell"
HOLD = "hold"
WATCH = "watch"
class RecommendationMode(str, Enum):
INFORMATIONAL = "informational"
PAPER_ELIGIBLE = "paper_eligible"
LIVE_ELIGIBLE = "live_eligible"
class TrendWindow(str, Enum):
INTRADAY = "intraday"
ONE_DAY = "1d"
SEVEN_DAY = "7d"
THIRTY_DAY = "30d"
NINETY_DAY = "90d"
# --- Document Intelligence ---
class CompanyImpact(BaseModel):
ticker: str
company_name: str
relevance: float = Field(ge=0, le=1)
sentiment: Sentiment
impact_score: float = Field(ge=0, le=1)
impact_horizon: str
catalyst_type: CatalystType
key_facts: List[str] = Field(default_factory=list)
risks: List[str] = Field(default_factory=list)
evidence_spans: List[str] = Field(default_factory=list)
class ModelMetadata(BaseModel):
provider: str = "ollama"
model_name: str = ""
prompt_version: str = ""
schema_version: str = "2.0.0"
class DocumentIntelligence(BaseModel):
document_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
document_type: DocumentType = DocumentType.ARTICLE
summary: str = ""
companies: List[CompanyImpact] = Field(default_factory=list)
macro_themes: List[str] = Field(default_factory=list)
novelty_score: float = Field(ge=0, le=1, default=0.5)
source_credibility: float = Field(ge=0, le=1, default=0.5)
extraction_warnings: List[str] = Field(default_factory=list)
confidence: float = Field(ge=0, le=1, default=0.5)
model: ModelMetadata = Field(default_factory=ModelMetadata)
# --- Trend Summary ---
class MarketContext(BaseModel):
"""Recent market data features for a symbol, used to enrich aggregation."""
ticker: str = ""
price_change_pct: Optional[float] = None # % change over the window
avg_volume: Optional[float] = None # average daily volume
volume_change_pct: Optional[float] = None # volume vs prior period
volatility: Optional[float] = None # intra-window price std dev
latest_close: Optional[float] = None
latest_bar_at: Optional[datetime] = None
bars_available: int = 0
@property
def has_data(self) -> bool:
return self.bars_available > 0
class DisagreementDetail(BaseModel):
"""Represents an explicit disagreement between document signals.
Rather than collapsing contradictory signals into a single score,
this captures the nature of the disagreement so downstream consumers
can inspect *why* signals conflict.
Requirements: 6.4
"""
dimension: str = "" # e.g. "sentiment", "catalyst", "impact_horizon"
positive_doc_ids: List[str] = Field(default_factory=list)
negative_doc_ids: List[str] = Field(default_factory=list)
positive_weight: float = 0.0
negative_weight: float = 0.0
description: str = ""
class TrendSummary(BaseModel):
entity_type: str = "company"
entity_id: str = ""
window: TrendWindow = TrendWindow.SEVEN_DAY
trend_direction: TrendDirection = TrendDirection.NEUTRAL
trend_strength: float = Field(ge=0, le=1, default=0.5)
confidence: float = Field(ge=0, le=1, default=0.5)
top_supporting_evidence: List[str] = Field(default_factory=list)
top_opposing_evidence: List[str] = Field(default_factory=list)
dominant_catalysts: List[str] = Field(default_factory=list)
material_risks: List[str] = Field(default_factory=list)
contradiction_score: float = Field(ge=0, le=1, default=0.0)
disagreement_details: List[DisagreementDetail] = Field(default_factory=list)
market_context: Optional[MarketContext] = None
generated_at: datetime = Field(default_factory=datetime.utcnow)
# --- Recommendation ---
class PositionSizing(BaseModel):
portfolio_pct: float = Field(ge=0, le=1, default=0.02)
max_loss_pct: float = Field(ge=0, le=1, default=0.005)
class Recommendation(BaseModel):
recommendation_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
ticker: str = ""
action: ActionType = ActionType.WATCH
mode: RecommendationMode = RecommendationMode.INFORMATIONAL
confidence: float = Field(ge=0, le=1, default=0.5)
time_horizon: str = ""
thesis: str = ""
invalidation_conditions: List[str] = Field(default_factory=list)
position_sizing: PositionSizing = Field(default_factory=PositionSizing)
evidence_refs: List[str] = Field(default_factory=list)
model_metadata: ModelMetadata = Field(default_factory=ModelMetadata)
generated_at: datetime = Field(default_factory=datetime.utcnow)
# --- Document Metadata ---
class StorageRefs(BaseModel):
raw_html: Optional[str] = None
raw_payload: Optional[str] = None
normalized_text: Optional[str] = None
class DocumentMetadata(BaseModel):
document_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
document_type: DocumentType = DocumentType.ARTICLE
symbol_candidates: List[str] = Field(default_factory=list)
source_type: SourceType = SourceType.NEWS_API
publisher: str = ""
url: Optional[str] = None
canonical_url: Optional[str] = None
title: str = ""
published_at: Optional[datetime] = None
retrieved_at: datetime = Field(default_factory=datetime.utcnow)
language: str = "en"
content_hash: str = ""
storage_refs: StorageRefs = Field(default_factory=StorageRefs)