phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -1 +1,45 @@
|
||||
# Ingestion Adapters
|
||||
from .base import AdapterResult, BaseAdapter
|
||||
from .resilient import ResilientAdapter, RetryConfig, RetryStats, compute_delay
|
||||
from .broker_adapter import (
|
||||
AccountInfo,
|
||||
AlpacaBrokerAdapter,
|
||||
BrokerDataAdapter,
|
||||
OrderEventType,
|
||||
OrderRequest,
|
||||
OrderResponse,
|
||||
OrderSide,
|
||||
OrderStatus,
|
||||
OrderType,
|
||||
PositionInfo,
|
||||
TradingMode,
|
||||
)
|
||||
from .filings_adapter import FilingsDataAdapter, SECEdgarAdapter
|
||||
from .market_adapter import MarketDataAdapter, PolygonMarketAdapter
|
||||
from .news_adapter import NewsDataAdapter, PolygonNewsAdapter
|
||||
|
||||
__all__ = [
|
||||
"AccountInfo",
|
||||
"AdapterResult",
|
||||
"AlpacaBrokerAdapter",
|
||||
"BaseAdapter",
|
||||
"BrokerDataAdapter",
|
||||
"FilingsDataAdapter",
|
||||
"MarketDataAdapter",
|
||||
"NewsDataAdapter",
|
||||
"OrderEventType",
|
||||
"OrderRequest",
|
||||
"OrderResponse",
|
||||
"OrderSide",
|
||||
"OrderStatus",
|
||||
"OrderType",
|
||||
"PolygonMarketAdapter",
|
||||
"PolygonNewsAdapter",
|
||||
"PositionInfo",
|
||||
"ResilientAdapter",
|
||||
"RetryConfig",
|
||||
"RetryStats",
|
||||
"SECEdgarAdapter",
|
||||
"TradingMode",
|
||||
"compute_delay",
|
||||
]
|
||||
|
||||
@@ -1,29 +1,84 @@
|
||||
"""Base adapter interface for all external API integrations."""
|
||||
"""Base adapter interface for all external API integrations.
|
||||
|
||||
All ingestion adapters follow the same contract:
|
||||
1. Fetch external payloads for a given ticker/source config.
|
||||
2. Return a structured result with raw bytes, parsed items, and metadata.
|
||||
3. The ingestion worker handles MinIO upload, PostgreSQL metadata, and downstream job emission.
|
||||
|
||||
Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 3.1, 3.2, 3.3, 3.4
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdapterResult:
|
||||
"""Result of a single adapter fetch operation."""
|
||||
|
||||
source_type: str
|
||||
ticker: str
|
||||
items: List[Dict[str, Any]]
|
||||
items: list[dict[str, Any]]
|
||||
raw_payload: bytes
|
||||
content_hash: str
|
||||
fetched_at: datetime
|
||||
error: Optional[str] = None
|
||||
error: str | None = None
|
||||
# HTTP metadata for observability
|
||||
http_status: int | None = None
|
||||
response_time_ms: float | None = None
|
||||
# Additional metadata the adapter wants to pass downstream
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def ok(self) -> bool:
|
||||
"""True if the fetch succeeded without error."""
|
||||
return self.error is None and len(self.items) > 0
|
||||
|
||||
@property
|
||||
def item_count(self) -> int:
|
||||
return len(self.items)
|
||||
|
||||
|
||||
class BaseAdapter(ABC):
|
||||
"""Interface for all ingestion adapters."""
|
||||
"""Interface for all ingestion adapters.
|
||||
|
||||
Subclasses implement fetch() for their specific API and source_type()
|
||||
to identify the adapter class. The ingestion worker orchestrates
|
||||
persistence and downstream job emission.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch data for a given ticker using source config."""
|
||||
async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch data for a given ticker using source config.
|
||||
|
||||
Args:
|
||||
ticker: The company ticker symbol.
|
||||
config: Source-specific configuration from the sources table.
|
||||
|
||||
Returns:
|
||||
AdapterResult with raw payload, parsed items, and metadata.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def source_type(self) -> str:
|
||||
"""Return the source type identifier for this adapter (e.g. 'market_api')."""
|
||||
...
|
||||
|
||||
def bucket_name(self) -> str:
|
||||
"""Return the MinIO bucket name for raw artifact storage.
|
||||
|
||||
Override in subclasses if the bucket differs from the default pattern.
|
||||
"""
|
||||
return f"stonks-raw-{self.source_type().replace('_api', '').replace('_', '-')}"
|
||||
|
||||
def artifact_path(self, ticker: str, document_id: str, now: datetime) -> str:
|
||||
"""Build the MinIO object path for a raw artifact.
|
||||
|
||||
Pattern: /{source_type}/{ticker}/{yyyy}/{mm}/{dd}/{document_id}/raw.json
|
||||
"""
|
||||
return (
|
||||
f"{self.source_type()}/{ticker}/"
|
||||
f"{now.strftime('%Y/%m/%d')}/{document_id}/raw.json"
|
||||
)
|
||||
|
||||
@@ -1,9 +1,19 @@
|
||||
"""Broker API adapter - paper/live trading, orders, positions, balances."""
|
||||
"""Broker API adapter interface for paper trading and order events.
|
||||
|
||||
The BrokerDataAdapter is the abstract interface for all broker integrations.
|
||||
AlpacaBrokerAdapter is the first concrete implementation, targeting the
|
||||
Alpaca Markets REST API for paper and live trading.
|
||||
|
||||
Requirements: 2.4, 2.5, 8.1, 8.3, 8.5
|
||||
"""
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -12,97 +22,584 @@ from .base import AdapterResult, BaseAdapter
|
||||
logger = logging.getLogger("broker_adapter")
|
||||
|
||||
|
||||
class BrokerAdapter(BaseAdapter):
|
||||
"""Broker API adapter supporting paper and live modes."""
|
||||
# --- Broker-specific enums ---
|
||||
|
||||
def __init__(self, api_key: str = "", api_secret: str = "", base_url: str = "", mode: str = "paper"):
|
||||
self.api_key = api_key
|
||||
self.api_secret = api_secret
|
||||
self.base_url = base_url
|
||||
self.mode = mode # paper | live
|
||||
|
||||
class OrderSide(str, Enum):
|
||||
BUY = "buy"
|
||||
SELL = "sell"
|
||||
|
||||
|
||||
class OrderType(str, Enum):
|
||||
MARKET = "market"
|
||||
LIMIT = "limit"
|
||||
STOP = "stop"
|
||||
STOP_LIMIT = "stop_limit"
|
||||
|
||||
|
||||
class OrderStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
SUBMITTED = "submitted"
|
||||
ACCEPTED = "accepted"
|
||||
PARTIALLY_FILLED = "partially_filled"
|
||||
FILLED = "filled"
|
||||
CANCELLED = "cancelled"
|
||||
REJECTED = "rejected"
|
||||
EXPIRED = "expired"
|
||||
|
||||
|
||||
class TradingMode(str, Enum):
|
||||
PAPER = "paper"
|
||||
LIVE = "live"
|
||||
|
||||
|
||||
class OrderEventType(str, Enum):
|
||||
SUBMITTED = "submitted"
|
||||
ACCEPTED = "accepted"
|
||||
REJECTED = "rejected"
|
||||
FILL = "fill"
|
||||
PARTIAL_FILL = "partial_fill"
|
||||
CANCELLED = "cancelled"
|
||||
EXPIRED = "expired"
|
||||
|
||||
|
||||
# --- Data structures ---
|
||||
|
||||
|
||||
class OrderRequest:
|
||||
"""Represents an order to be submitted to a broker."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ticker: str,
|
||||
side: OrderSide,
|
||||
quantity: float,
|
||||
order_type: OrderType = OrderType.MARKET,
|
||||
limit_price: float | None = None,
|
||||
stop_price: float | None = None,
|
||||
time_in_force: str = "day",
|
||||
idempotency_key: str | None = None,
|
||||
) -> None:
|
||||
self.ticker = ticker
|
||||
self.side = side
|
||||
self.quantity = quantity
|
||||
self.order_type = order_type
|
||||
self.limit_price = limit_price
|
||||
self.stop_price = stop_price
|
||||
self.time_in_force = time_in_force
|
||||
self.idempotency_key = idempotency_key or str(uuid.uuid4())
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Serialize to a dict for audit/persistence."""
|
||||
d: dict[str, Any] = {
|
||||
"ticker": self.ticker,
|
||||
"side": self.side.value,
|
||||
"quantity": self.quantity,
|
||||
"order_type": self.order_type.value,
|
||||
"time_in_force": self.time_in_force,
|
||||
"idempotency_key": self.idempotency_key,
|
||||
}
|
||||
if self.limit_price is not None:
|
||||
d["limit_price"] = self.limit_price
|
||||
if self.stop_price is not None:
|
||||
d["stop_price"] = self.stop_price
|
||||
return d
|
||||
|
||||
|
||||
class OrderResponse:
|
||||
"""Represents a broker's response to an order submission."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
broker_order_id: str,
|
||||
status: OrderStatus,
|
||||
ticker: str,
|
||||
side: OrderSide,
|
||||
quantity: float,
|
||||
filled_quantity: float = 0.0,
|
||||
filled_avg_price: float | None = None,
|
||||
submitted_at: datetime | None = None,
|
||||
raw_response: dict[str, Any] | None = None,
|
||||
error: str | None = None,
|
||||
) -> None:
|
||||
self.broker_order_id = broker_order_id
|
||||
self.status = status
|
||||
self.ticker = ticker
|
||||
self.side = side
|
||||
self.quantity = quantity
|
||||
self.filled_quantity = filled_quantity
|
||||
self.filled_avg_price = filled_avg_price
|
||||
self.submitted_at = submitted_at or datetime.now(timezone.utc)
|
||||
self.raw_response = raw_response or {}
|
||||
self.error = error
|
||||
|
||||
@property
|
||||
def ok(self) -> bool:
|
||||
return self.error is None and self.status not in (
|
||||
OrderStatus.REJECTED,
|
||||
OrderStatus.CANCELLED,
|
||||
OrderStatus.EXPIRED,
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"broker_order_id": self.broker_order_id,
|
||||
"status": self.status.value,
|
||||
"ticker": self.ticker,
|
||||
"side": self.side.value,
|
||||
"quantity": self.quantity,
|
||||
"filled_quantity": self.filled_quantity,
|
||||
"filled_avg_price": self.filled_avg_price,
|
||||
"submitted_at": self.submitted_at.isoformat(),
|
||||
"error": self.error,
|
||||
}
|
||||
|
||||
|
||||
class PositionInfo:
|
||||
"""Represents a current position from the broker."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ticker: str,
|
||||
quantity: float,
|
||||
avg_entry_price: float,
|
||||
current_price: float,
|
||||
unrealized_pnl: float,
|
||||
market_value: float,
|
||||
side: str = "long",
|
||||
) -> None:
|
||||
self.ticker = ticker
|
||||
self.quantity = quantity
|
||||
self.avg_entry_price = avg_entry_price
|
||||
self.current_price = current_price
|
||||
self.unrealized_pnl = unrealized_pnl
|
||||
self.market_value = market_value
|
||||
self.side = side
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"ticker": self.ticker,
|
||||
"quantity": self.quantity,
|
||||
"avg_entry_price": self.avg_entry_price,
|
||||
"current_price": self.current_price,
|
||||
"unrealized_pnl": self.unrealized_pnl,
|
||||
"market_value": self.market_value,
|
||||
"side": self.side,
|
||||
}
|
||||
|
||||
|
||||
class AccountInfo:
|
||||
"""Represents broker account summary."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
account_id: str,
|
||||
buying_power: float,
|
||||
cash: float,
|
||||
portfolio_value: float,
|
||||
currency: str = "USD",
|
||||
mode: TradingMode = TradingMode.PAPER,
|
||||
) -> None:
|
||||
self.account_id = account_id
|
||||
self.buying_power = buying_power
|
||||
self.cash = cash
|
||||
self.portfolio_value = portfolio_value
|
||||
self.currency = currency
|
||||
self.mode = mode
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"account_id": self.account_id,
|
||||
"buying_power": self.buying_power,
|
||||
"cash": self.cash,
|
||||
"portfolio_value": self.portfolio_value,
|
||||
"currency": self.currency,
|
||||
"mode": self.mode.value,
|
||||
}
|
||||
|
||||
|
||||
# --- Abstract interface ---
|
||||
|
||||
|
||||
class BrokerDataAdapter(BaseAdapter, ABC):
|
||||
"""Abstract interface for broker API integrations.
|
||||
|
||||
Extends BaseAdapter with broker-specific operations:
|
||||
- submit_order: place an order with idempotency key
|
||||
- cancel_order: cancel an existing order
|
||||
- get_order_status: check order state
|
||||
- get_positions: list current positions
|
||||
- get_account: retrieve account summary
|
||||
|
||||
All concrete adapters must enforce:
|
||||
- Idempotent order submission via idempotency_key (Req 8.5)
|
||||
- Paper/live mode separation (Req 8.1)
|
||||
- Fail-closed on broker unavailability (Req 8.5)
|
||||
"""
|
||||
|
||||
def __init__(self, mode: TradingMode = TradingMode.PAPER) -> None:
|
||||
self._mode = mode
|
||||
|
||||
@property
|
||||
def mode(self) -> TradingMode:
|
||||
return self._mode
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "broker"
|
||||
|
||||
def _headers(self) -> Dict[str, str]:
|
||||
@abstractmethod
|
||||
async def submit_order(self, order: OrderRequest) -> OrderResponse:
|
||||
"""Submit an order to the broker.
|
||||
|
||||
Must use order.idempotency_key to prevent duplicate submissions.
|
||||
Must fail closed if the broker is unavailable or returns ambiguous state.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def cancel_order(self, broker_order_id: str) -> OrderResponse:
|
||||
"""Cancel an existing order by broker order ID."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def get_order_status(self, broker_order_id: str) -> OrderResponse:
|
||||
"""Get the current status of an order."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def get_positions(self) -> list[PositionInfo]:
|
||||
"""Get all current positions."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def get_account(self) -> AccountInfo:
|
||||
"""Get account summary (balance, buying power, etc.)."""
|
||||
...
|
||||
|
||||
|
||||
# --- Concrete Alpaca implementation ---
|
||||
|
||||
|
||||
class AlpacaBrokerAdapter(BrokerDataAdapter):
|
||||
"""Concrete broker adapter for the Alpaca Markets REST API.
|
||||
|
||||
Supports:
|
||||
- Paper trading via paper-api.alpaca.markets
|
||||
- Live trading via api.alpaca.markets
|
||||
- Order submission, cancellation, and status
|
||||
- Position and account queries
|
||||
|
||||
Config options for fetch():
|
||||
endpoint: One of "positions", "orders", "account" (default "positions")
|
||||
"""
|
||||
|
||||
PAPER_BASE_URL: str = "https://paper-api.alpaca.markets"
|
||||
LIVE_BASE_URL: str = "https://api.alpaca.markets"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
api_secret: str,
|
||||
mode: TradingMode = TradingMode.PAPER,
|
||||
base_url: str | None = None,
|
||||
) -> None:
|
||||
super().__init__(mode=mode)
|
||||
self.api_key = api_key
|
||||
self.api_secret = api_secret
|
||||
if base_url:
|
||||
self.base_url = base_url.rstrip("/")
|
||||
elif mode == TradingMode.LIVE:
|
||||
self.base_url = self.LIVE_BASE_URL
|
||||
else:
|
||||
self.base_url = self.PAPER_BASE_URL
|
||||
|
||||
def _headers(self) -> dict[str, str]:
|
||||
return {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"APCA-API-KEY-ID": self.api_key,
|
||||
"APCA-API-SECRET-KEY": self.api_secret,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch positions and recent orders for a ticker."""
|
||||
async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch positions or recent orders for a ticker from Alpaca.
|
||||
|
||||
This satisfies the BaseAdapter contract for the ingestion pipeline.
|
||||
The broker adapter uses fetch() to pull position/order snapshots
|
||||
that get persisted as raw artifacts.
|
||||
"""
|
||||
endpoint = config.get("endpoint", "positions")
|
||||
url = self._build_fetch_url(ticker, endpoint)
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{self.base_url}/v2/positions/{ticker}",
|
||||
headers=self._headers(),
|
||||
)
|
||||
resp = await client.get(url, headers=self._headers())
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
resp.raise_for_status()
|
||||
|
||||
raw = resp.content
|
||||
data = resp.json() if resp.status_code == 200 else {}
|
||||
data = resp.json()
|
||||
content_hash = hashlib.sha256(raw).hexdigest()
|
||||
items = [data] if isinstance(data, dict) else data if isinstance(data, list) else []
|
||||
|
||||
return AdapterResult(
|
||||
source_type="broker",
|
||||
ticker=ticker,
|
||||
items=[data] if data else [],
|
||||
items=items,
|
||||
raw_payload=raw,
|
||||
content_hash=content_hash,
|
||||
fetched_at=datetime.utcnow(),
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
http_status=resp.status_code,
|
||||
response_time_ms=round(elapsed_ms, 1),
|
||||
metadata={
|
||||
"provider": "alpaca",
|
||||
"mode": self._mode.value,
|
||||
"endpoint": endpoint,
|
||||
},
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("Alpaca HTTP error for %s: %s", ticker, e)
|
||||
return self._error_result(
|
||||
ticker, str(e), elapsed_ms,
|
||||
http_status=e.response.status_code if e.response else None,
|
||||
raw=e.response.content if e.response else b"",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Broker fetch failed for {ticker}: {e}")
|
||||
return AdapterResult(
|
||||
source_type="broker",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=b"",
|
||||
content_hash="",
|
||||
fetched_at=datetime.utcnow(),
|
||||
error=str(e),
|
||||
)
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("Alpaca fetch failed for %s: %s", ticker, e)
|
||||
return self._error_result(ticker, str(e), elapsed_ms)
|
||||
|
||||
async def submit_order(
|
||||
self,
|
||||
ticker: str,
|
||||
side: str,
|
||||
qty: float,
|
||||
order_type: str = "market",
|
||||
limit_price: Optional[float] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Submit an order to the broker. Returns broker response."""
|
||||
if self.mode == "live":
|
||||
logger.warning("LIVE order submission")
|
||||
def _build_fetch_url(self, ticker: str, endpoint: str) -> str:
|
||||
"""Build the URL for a fetch operation."""
|
||||
if endpoint == "orders":
|
||||
return f"{self.base_url}/v2/orders?symbols={ticker}&status=all&limit=50"
|
||||
if endpoint == "account":
|
||||
return f"{self.base_url}/v2/account"
|
||||
# Default: positions for ticker
|
||||
return f"{self.base_url}/v2/positions/{ticker}"
|
||||
|
||||
idem_key = idempotency_key or str(uuid.uuid4())
|
||||
payload = {
|
||||
"symbol": ticker,
|
||||
"qty": str(qty),
|
||||
"side": side,
|
||||
"type": order_type,
|
||||
"time_in_force": "day",
|
||||
async def submit_order(self, order: OrderRequest) -> OrderResponse:
|
||||
"""Submit an order to Alpaca with idempotency key.
|
||||
|
||||
Fails closed: any network error or ambiguous response returns
|
||||
a rejected OrderResponse rather than risking duplicate orders.
|
||||
"""
|
||||
if self._mode == TradingMode.LIVE:
|
||||
logger.warning("LIVE order submission: %s %s %s", order.side.value, order.quantity, order.ticker)
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"symbol": order.ticker,
|
||||
"qty": str(order.quantity),
|
||||
"side": order.side.value,
|
||||
"type": order.order_type.value,
|
||||
"time_in_force": order.time_in_force,
|
||||
}
|
||||
if limit_price and order_type == "limit":
|
||||
payload["limit_price"] = str(limit_price)
|
||||
if order.limit_price is not None and order.order_type in (OrderType.LIMIT, OrderType.STOP_LIMIT):
|
||||
payload["limit_price"] = str(order.limit_price)
|
||||
if order.stop_price is not None and order.order_type in (OrderType.STOP, OrderType.STOP_LIMIT):
|
||||
payload["stop_price"] = str(order.stop_price)
|
||||
|
||||
headers = {**self._headers(), "Idempotency-Key": order.idempotency_key}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{self.base_url}/v2/orders",
|
||||
headers={**self._headers(), "Idempotency-Key": idem_key},
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
data = resp.json()
|
||||
return self._parse_order_response(data)
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Order rejected: {e.response.text}")
|
||||
return {"error": e.response.text, "status": e.response.status_code}
|
||||
error_body = e.response.text if e.response else "unknown"
|
||||
logger.error("Order rejected by Alpaca: %s", error_body)
|
||||
return OrderResponse(
|
||||
broker_order_id="",
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker=order.ticker,
|
||||
side=order.side,
|
||||
quantity=order.quantity,
|
||||
error=f"HTTP {e.response.status_code}: {error_body}" if e.response else str(e),
|
||||
raw_response={"error": error_body},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Order submission failed: {e}")
|
||||
return {"error": str(e)}
|
||||
# Fail closed: treat any unexpected error as rejection
|
||||
logger.error("Order submission failed (fail-closed): %s", e)
|
||||
return OrderResponse(
|
||||
broker_order_id="",
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker=order.ticker,
|
||||
side=order.side,
|
||||
quantity=order.quantity,
|
||||
error=f"fail-closed: {e}",
|
||||
)
|
||||
|
||||
async def get_account(self) -> Dict[str, Any]:
|
||||
async def cancel_order(self, broker_order_id: str) -> OrderResponse:
|
||||
"""Cancel an order on Alpaca."""
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
resp = await client.get(f"{self.base_url}/v2/account", headers=self._headers())
|
||||
return resp.json()
|
||||
try:
|
||||
resp = await client.delete(
|
||||
f"{self.base_url}/v2/orders/{broker_order_id}",
|
||||
headers=self._headers(),
|
||||
)
|
||||
if resp.status_code == 204:
|
||||
return OrderResponse(
|
||||
broker_order_id=broker_order_id,
|
||||
status=OrderStatus.CANCELLED,
|
||||
ticker="",
|
||||
side=OrderSide.BUY,
|
||||
quantity=0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return self._parse_order_response(data)
|
||||
except Exception as e:
|
||||
logger.error("Cancel failed for %s: %s", broker_order_id, e)
|
||||
return OrderResponse(
|
||||
broker_order_id=broker_order_id,
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker="",
|
||||
side=OrderSide.BUY,
|
||||
quantity=0,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
async def get_order_status(self, broker_order_id: str) -> OrderResponse:
|
||||
"""Get order status from Alpaca."""
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{self.base_url}/v2/orders/{broker_order_id}",
|
||||
headers=self._headers(),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return self._parse_order_response(data)
|
||||
except Exception as e:
|
||||
logger.error("Get order status failed for %s: %s", broker_order_id, e)
|
||||
return OrderResponse(
|
||||
broker_order_id=broker_order_id,
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker="",
|
||||
side=OrderSide.BUY,
|
||||
quantity=0,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
async def get_positions(self) -> list[PositionInfo]:
|
||||
"""Get all current positions from Alpaca."""
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{self.base_url}/v2/positions",
|
||||
headers=self._headers(),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
return [self._parse_position(p) for p in data if isinstance(p, dict)]
|
||||
except Exception as e:
|
||||
logger.error("Get positions failed: %s", e)
|
||||
return []
|
||||
|
||||
async def get_account(self) -> AccountInfo:
|
||||
"""Get account summary from Alpaca."""
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{self.base_url}/v2/account",
|
||||
headers=self._headers(),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return AccountInfo(
|
||||
account_id=str(data.get("id", "")),
|
||||
buying_power=float(data.get("buying_power", 0)),
|
||||
cash=float(data.get("cash", 0)),
|
||||
portfolio_value=float(data.get("portfolio_value", 0)),
|
||||
currency=str(data.get("currency", "USD")),
|
||||
mode=self._mode,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Get account failed: %s", e)
|
||||
return AccountInfo(
|
||||
account_id="",
|
||||
buying_power=0,
|
||||
cash=0,
|
||||
portfolio_value=0,
|
||||
mode=self._mode,
|
||||
)
|
||||
|
||||
def _parse_order_response(self, data: dict[str, Any]) -> OrderResponse:
|
||||
"""Parse an Alpaca order response into an OrderResponse."""
|
||||
status_map: dict[str, OrderStatus] = {
|
||||
"new": OrderStatus.SUBMITTED,
|
||||
"accepted": OrderStatus.ACCEPTED,
|
||||
"partially_filled": OrderStatus.PARTIALLY_FILLED,
|
||||
"filled": OrderStatus.FILLED,
|
||||
"done_for_day": OrderStatus.FILLED,
|
||||
"canceled": OrderStatus.CANCELLED,
|
||||
"expired": OrderStatus.EXPIRED,
|
||||
"replaced": OrderStatus.SUBMITTED,
|
||||
"pending_new": OrderStatus.PENDING,
|
||||
"pending_cancel": OrderStatus.PENDING,
|
||||
"pending_replace": OrderStatus.PENDING,
|
||||
"rejected": OrderStatus.REJECTED,
|
||||
}
|
||||
raw_status = str(data.get("status", "pending"))
|
||||
status = status_map.get(raw_status, OrderStatus.PENDING)
|
||||
|
||||
side_str = str(data.get("side", "buy"))
|
||||
side = OrderSide.SELL if side_str == "sell" else OrderSide.BUY
|
||||
|
||||
filled_qty = float(data.get("filled_qty", 0) or 0)
|
||||
filled_avg = data.get("filled_avg_price")
|
||||
filled_avg_price = float(filled_avg) if filled_avg else None
|
||||
|
||||
return OrderResponse(
|
||||
broker_order_id=str(data.get("id", "")),
|
||||
status=status,
|
||||
ticker=str(data.get("symbol", "")),
|
||||
side=side,
|
||||
quantity=float(data.get("qty", 0) or 0),
|
||||
filled_quantity=filled_qty,
|
||||
filled_avg_price=filled_avg_price,
|
||||
raw_response=data,
|
||||
)
|
||||
|
||||
def _parse_position(self, data: dict[str, Any]) -> PositionInfo:
|
||||
"""Parse an Alpaca position response into a PositionInfo."""
|
||||
return PositionInfo(
|
||||
ticker=str(data.get("symbol", "")),
|
||||
quantity=float(data.get("qty", 0) or 0),
|
||||
avg_entry_price=float(data.get("avg_entry_price", 0) or 0),
|
||||
current_price=float(data.get("current_price", 0) or 0),
|
||||
unrealized_pnl=float(data.get("unrealized_pl", 0) or 0),
|
||||
market_value=float(data.get("market_value", 0) or 0),
|
||||
side=str(data.get("side", "long")),
|
||||
)
|
||||
|
||||
def _error_result(
|
||||
self,
|
||||
ticker: str,
|
||||
error: str,
|
||||
elapsed_ms: float,
|
||||
http_status: int | None = None,
|
||||
raw: bytes = b"",
|
||||
) -> AdapterResult:
|
||||
"""Build an error AdapterResult for broker fetches."""
|
||||
return AdapterResult(
|
||||
source_type="broker",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=raw,
|
||||
content_hash="",
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
error=error,
|
||||
http_status=http_status,
|
||||
response_time_ms=round(elapsed_ms, 1),
|
||||
metadata={"provider": "alpaca", "mode": self._mode.value},
|
||||
)
|
||||
|
||||
@@ -0,0 +1,832 @@
|
||||
"""Broker adapter service - standalone worker for sandbox order execution.
|
||||
|
||||
Runs the Alpaca broker adapter in sandbox (paper) mode, processing order
|
||||
requests from the broker queue, evaluating them through the risk engine,
|
||||
submitting to Alpaca's paper trading API, and persisting the full audit trail.
|
||||
|
||||
Also periodically syncs positions and account state from Alpaca.
|
||||
|
||||
Implements idempotent order submission keys and duplicate prevention:
|
||||
- Deterministic idempotency key generation from job attributes
|
||||
- Redis-based fast-path duplicate detection before broker submission
|
||||
- PostgreSQL UNIQUE constraint on idempotency_key as durable fallback
|
||||
|
||||
Requirements: 2.4, 8.1, 8.3, 8.5
|
||||
Design: Section 4.9 - Broker Adapter
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import asyncpg
|
||||
import redis.asyncio as aioredis
|
||||
|
||||
from services.adapters.broker_adapter import (
|
||||
AlpacaBrokerAdapter,
|
||||
OrderRequest,
|
||||
OrderResponse,
|
||||
OrderSide,
|
||||
OrderStatus,
|
||||
OrderType,
|
||||
TradingMode,
|
||||
)
|
||||
from services.risk.engine import (
|
||||
AccountRiskState,
|
||||
PortfolioRiskConfig,
|
||||
ProposedOrder,
|
||||
evaluate_order,
|
||||
)
|
||||
from services.risk.approval import (
|
||||
ApprovalRequest,
|
||||
ApprovalStatus,
|
||||
compute_expiry,
|
||||
create_approval_request,
|
||||
requires_approval,
|
||||
)
|
||||
from services.shared.audit import (
|
||||
audit_approval_requested,
|
||||
audit_duplicate_prevented,
|
||||
audit_order_filled,
|
||||
audit_order_rejected,
|
||||
audit_order_submitted,
|
||||
audit_risk_evaluated,
|
||||
)
|
||||
from services.lake_publisher.worker import (
|
||||
publish_trade_order,
|
||||
publish_trade_fill,
|
||||
publish_positions_daily_batch,
|
||||
LAKEHOUSE_BUCKET,
|
||||
)
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.logging import Span, new_trace_id, set_trace_context, setup_logging
|
||||
from services.shared.metrics import (
|
||||
ORDERS_DUPLICATES_PREVENTED,
|
||||
ORDERS_FILLED,
|
||||
ORDERS_REJECTED,
|
||||
ORDERS_SUBMITTED,
|
||||
POSITIONS_SYNCED,
|
||||
RISK_CHECK_FAILURES,
|
||||
RISK_EVALUATIONS_TOTAL,
|
||||
)
|
||||
from services.shared.redis_keys import QUEUE_BROKER, queue_key
|
||||
|
||||
logger = logging.getLogger("broker_service")
|
||||
|
||||
POSITION_SYNC_INTERVAL = 60 # seconds
|
||||
|
||||
# Redis TTL for idempotency markers (24 hours)
|
||||
ORDER_IDEMPOTENCY_TTL = 86400
|
||||
ORDER_IDEMPOTENCY_PREFIX = "stonks:order_idempotency"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DB persistence helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_UPSERT_BROKER_ACCOUNT = """
|
||||
INSERT INTO broker_accounts (id, provider, account_id, mode, config, active)
|
||||
VALUES ($1::uuid, $2, $3, $4, $5::jsonb, TRUE)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
config = EXCLUDED.config,
|
||||
mode = EXCLUDED.mode,
|
||||
active = TRUE
|
||||
"""
|
||||
|
||||
_INSERT_ORDER = """
|
||||
INSERT INTO orders (
|
||||
id, recommendation_id, broker_account_id, ticker, side, order_type,
|
||||
quantity, limit_price, stop_price, status, idempotency_key,
|
||||
broker_order_id, decision_trace, submitted_at, filled_at,
|
||||
fill_price, fill_quantity
|
||||
) VALUES (
|
||||
$1::uuid, $2, $3::uuid, $4, $5, $6,
|
||||
$7, $8, $9, $10, $11,
|
||||
$12, $13::jsonb, $14, $15,
|
||||
$16, $17
|
||||
)
|
||||
ON CONFLICT (idempotency_key) DO UPDATE SET
|
||||
status = EXCLUDED.status,
|
||||
broker_order_id = EXCLUDED.broker_order_id,
|
||||
filled_at = EXCLUDED.filled_at,
|
||||
fill_price = EXCLUDED.fill_price,
|
||||
fill_quantity = EXCLUDED.fill_quantity,
|
||||
updated_at = NOW()
|
||||
"""
|
||||
|
||||
_INSERT_ORDER_EVENT = """
|
||||
INSERT INTO order_events (order_id, event_type, data, broker_timestamp)
|
||||
VALUES ($1::uuid, $2, $3::jsonb, $4)
|
||||
"""
|
||||
|
||||
_INSERT_RISK_EVALUATION = """
|
||||
INSERT INTO risk_evaluations (id, recommendation_id, eligible, allowed_mode, rejection_reasons, risk_checks, evaluated_at)
|
||||
VALUES ($1::uuid, $2::uuid, $3, $4, $5::jsonb, $6::jsonb, $7)
|
||||
"""
|
||||
|
||||
_UPSERT_POSITION = """
|
||||
INSERT INTO positions (broker_account_id, ticker, quantity, avg_entry_price, current_price, unrealized_pnl, updated_at)
|
||||
VALUES ($1::uuid, $2, $3, $4, $5, $6, $7)
|
||||
ON CONFLICT (broker_account_id, ticker)
|
||||
DO UPDATE SET
|
||||
quantity = EXCLUDED.quantity,
|
||||
avg_entry_price = EXCLUDED.avg_entry_price,
|
||||
current_price = EXCLUDED.current_price,
|
||||
unrealized_pnl = EXCLUDED.unrealized_pnl,
|
||||
updated_at = EXCLUDED.updated_at
|
||||
"""
|
||||
|
||||
_LOAD_RISK_CONFIG = """
|
||||
SELECT config FROM risk_configs WHERE active = TRUE ORDER BY updated_at DESC LIMIT 1
|
||||
"""
|
||||
|
||||
_LOAD_DAILY_SNAPSHOT = """
|
||||
SELECT portfolio_value, daily_pnl, daily_trade_count, positions_by_sector
|
||||
FROM daily_risk_snapshots
|
||||
WHERE account_id = $1 AND snapshot_date = CURRENT_DATE
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
_CHECK_ORDER_BY_IDEMPOTENCY_KEY = """
|
||||
SELECT id, status, broker_order_id FROM orders
|
||||
WHERE idempotency_key = $1
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Idempotency helpers (Requirement 8.5)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def generate_idempotency_key(job: dict[str, Any]) -> str:
|
||||
"""Generate a deterministic idempotency key from job attributes.
|
||||
|
||||
If the job already carries an explicit idempotency_key, use it.
|
||||
Otherwise, derive a stable key from the combination of
|
||||
recommendation_id, ticker, side, quantity, and order_type so that
|
||||
replayed queue messages produce the same key and are detected as
|
||||
duplicates.
|
||||
"""
|
||||
explicit = job.get("idempotency_key")
|
||||
if explicit:
|
||||
return str(explicit)
|
||||
|
||||
# Build a deterministic key from job content
|
||||
parts = [
|
||||
str(job.get("recommendation_id", "")),
|
||||
str(job.get("ticker", "")),
|
||||
str(job.get("side", "buy")),
|
||||
str(job.get("quantity", 0)),
|
||||
str(job.get("order_type", "market")),
|
||||
str(job.get("limit_price", "")),
|
||||
str(job.get("stop_price", "")),
|
||||
]
|
||||
raw = "|".join(parts)
|
||||
return hashlib.sha256(raw.encode()).hexdigest()[:40]
|
||||
|
||||
|
||||
def _redis_idempotency_key(idempotency_key: str) -> str:
|
||||
"""Build the Redis key for an order idempotency marker."""
|
||||
return f"{ORDER_IDEMPOTENCY_PREFIX}:{idempotency_key}"
|
||||
|
||||
|
||||
async def check_idempotency_redis(
|
||||
rds: aioredis.Redis,
|
||||
idempotency_key: str,
|
||||
) -> str | None:
|
||||
"""Fast-path: check Redis for a previously processed idempotency key.
|
||||
|
||||
Returns the existing order_id if found, None otherwise.
|
||||
"""
|
||||
redis_key = _redis_idempotency_key(idempotency_key)
|
||||
cached = await rds.get(redis_key)
|
||||
if cached:
|
||||
return str(cached)
|
||||
return None
|
||||
|
||||
|
||||
async def check_idempotency_db(
|
||||
pool: asyncpg.Pool,
|
||||
idempotency_key: str,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Durable fallback: check PostgreSQL for an existing order with this key.
|
||||
|
||||
Returns a dict with id, status, broker_order_id if found, None otherwise.
|
||||
"""
|
||||
row = await pool.fetchrow(_CHECK_ORDER_BY_IDEMPOTENCY_KEY, idempotency_key)
|
||||
if row:
|
||||
return {
|
||||
"id": str(row["id"]),
|
||||
"status": str(row["status"]),
|
||||
"broker_order_id": str(row["broker_order_id"] or ""),
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
async def mark_idempotency_redis(
|
||||
rds: aioredis.Redis,
|
||||
idempotency_key: str,
|
||||
order_id: str,
|
||||
) -> None:
|
||||
"""Set the Redis idempotency marker after an order is processed."""
|
||||
redis_key = _redis_idempotency_key(idempotency_key)
|
||||
await rds.set(redis_key, order_id, ex=ORDER_IDEMPOTENCY_TTL)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core service logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_order_request(job: dict[str, Any]) -> OrderRequest:
|
||||
"""Build an OrderRequest from a broker queue job payload."""
|
||||
side = OrderSide.SELL if job.get("side", "buy") == "sell" else OrderSide.BUY
|
||||
order_type_str = job.get("order_type", "market")
|
||||
order_type_map = {
|
||||
"market": OrderType.MARKET,
|
||||
"limit": OrderType.LIMIT,
|
||||
"stop": OrderType.STOP,
|
||||
"stop_limit": OrderType.STOP_LIMIT,
|
||||
}
|
||||
return OrderRequest(
|
||||
ticker=job["ticker"],
|
||||
side=side,
|
||||
quantity=float(job.get("quantity", 0)),
|
||||
order_type=order_type_map.get(order_type_str, OrderType.MARKET),
|
||||
limit_price=job.get("limit_price"),
|
||||
stop_price=job.get("stop_price"),
|
||||
time_in_force=job.get("time_in_force", "day"),
|
||||
idempotency_key=generate_idempotency_key(job),
|
||||
)
|
||||
|
||||
|
||||
def build_proposed_order(job: dict[str, Any]) -> ProposedOrder:
|
||||
"""Build a ProposedOrder for risk evaluation from a broker queue job."""
|
||||
return ProposedOrder(
|
||||
recommendation_id=job.get("recommendation_id"),
|
||||
ticker=job["ticker"],
|
||||
sector=job.get("sector", ""),
|
||||
action=job.get("side", "buy"),
|
||||
quantity=float(job.get("quantity", 0)),
|
||||
estimated_value=float(job.get("estimated_value", 0)),
|
||||
confidence=float(job.get("confidence", 0)),
|
||||
)
|
||||
|
||||
|
||||
async def load_risk_config(pool: asyncpg.Pool) -> PortfolioRiskConfig:
|
||||
"""Load the active risk configuration from the database."""
|
||||
row = await pool.fetchrow(_LOAD_RISK_CONFIG)
|
||||
if row and row["config"]:
|
||||
data = row["config"] if isinstance(row["config"], dict) else json.loads(row["config"])
|
||||
return PortfolioRiskConfig.from_db_json(data)
|
||||
return PortfolioRiskConfig()
|
||||
|
||||
|
||||
async def load_account_risk_state(
|
||||
pool: asyncpg.Pool,
|
||||
adapter: AlpacaBrokerAdapter,
|
||||
account_uuid: str,
|
||||
) -> AccountRiskState:
|
||||
"""Build an AccountRiskState from the broker and daily snapshot."""
|
||||
state = AccountRiskState(account_id=account_uuid)
|
||||
|
||||
# Get live account info from Alpaca
|
||||
try:
|
||||
acct = await adapter.get_account()
|
||||
state.portfolio_value = acct.portfolio_value
|
||||
state.cash = acct.cash
|
||||
state.buying_power = acct.buying_power
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch account from Alpaca: %s", e)
|
||||
|
||||
# Get positions from Alpaca
|
||||
try:
|
||||
positions = await adapter.get_positions()
|
||||
for pos in positions:
|
||||
state.positions_by_symbol[pos.ticker] = pos.market_value
|
||||
state.open_position_count = len(positions)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch positions from Alpaca: %s", e)
|
||||
|
||||
# Overlay daily snapshot from DB
|
||||
row = await pool.fetchrow(_LOAD_DAILY_SNAPSHOT, account_uuid)
|
||||
if row:
|
||||
state.daily_pnl = float(row["daily_pnl"] or 0)
|
||||
state.daily_trade_count = int(row["daily_trade_count"] or 0)
|
||||
sector_data = row["positions_by_sector"]
|
||||
if sector_data:
|
||||
state.positions_by_sector = (
|
||||
sector_data if isinstance(sector_data, dict) else json.loads(sector_data)
|
||||
)
|
||||
|
||||
return state
|
||||
|
||||
|
||||
async def persist_order(
|
||||
pool: asyncpg.Pool,
|
||||
order_id: str,
|
||||
order: OrderRequest,
|
||||
resp: OrderResponse,
|
||||
account_uuid: str,
|
||||
risk_eval: dict[str, Any],
|
||||
recommendation_id: str | None = None,
|
||||
) -> None:
|
||||
"""Persist order, events, and risk evaluation to PostgreSQL."""
|
||||
now = datetime.now(timezone.utc)
|
||||
filled_at = now if resp.status == OrderStatus.FILLED else None
|
||||
|
||||
decision_trace = {
|
||||
"risk_evaluation": risk_eval,
|
||||
"order_request": order.to_dict(),
|
||||
"broker_response": resp.to_dict(),
|
||||
}
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
await conn.execute(
|
||||
_INSERT_ORDER,
|
||||
order_id,
|
||||
recommendation_id,
|
||||
account_uuid,
|
||||
order.ticker,
|
||||
order.side.value,
|
||||
order.order_type.value,
|
||||
order.quantity,
|
||||
order.limit_price,
|
||||
order.stop_price,
|
||||
resp.status.value,
|
||||
order.idempotency_key,
|
||||
resp.broker_order_id,
|
||||
json.dumps(decision_trace),
|
||||
resp.submitted_at or now,
|
||||
filled_at,
|
||||
resp.filled_avg_price,
|
||||
resp.filled_quantity,
|
||||
)
|
||||
|
||||
# Record order events
|
||||
for event_type in ["submitted"]:
|
||||
await conn.execute(
|
||||
_INSERT_ORDER_EVENT,
|
||||
order_id,
|
||||
event_type,
|
||||
json.dumps({"ticker": order.ticker, "side": order.side.value}),
|
||||
now,
|
||||
)
|
||||
|
||||
if resp.status == OrderStatus.FILLED:
|
||||
await conn.execute(
|
||||
_INSERT_ORDER_EVENT,
|
||||
order_id,
|
||||
"fill",
|
||||
json.dumps({
|
||||
"fill_price": resp.filled_avg_price,
|
||||
"fill_qty": resp.filled_quantity,
|
||||
}),
|
||||
now,
|
||||
)
|
||||
elif resp.status == OrderStatus.REJECTED:
|
||||
await conn.execute(
|
||||
_INSERT_ORDER_EVENT,
|
||||
order_id,
|
||||
"rejected",
|
||||
json.dumps({"error": resp.error}),
|
||||
now,
|
||||
)
|
||||
|
||||
|
||||
async def sync_positions(
|
||||
adapter: AlpacaBrokerAdapter,
|
||||
pool: asyncpg.Pool,
|
||||
account_uuid: str,
|
||||
minio_client: Any | None = None,
|
||||
) -> None:
|
||||
"""Sync current positions from Alpaca to PostgreSQL and publish to lake."""
|
||||
now = datetime.now(timezone.utc)
|
||||
try:
|
||||
positions = await adapter.get_positions()
|
||||
async with pool.acquire() as conn:
|
||||
for pos in positions:
|
||||
await conn.execute(
|
||||
_UPSERT_POSITION,
|
||||
account_uuid,
|
||||
pos.ticker,
|
||||
pos.quantity,
|
||||
pos.avg_entry_price,
|
||||
pos.current_price,
|
||||
pos.unrealized_pnl,
|
||||
now,
|
||||
)
|
||||
logger.info("Synced %d positions from Alpaca", len(positions))
|
||||
POSITIONS_SYNCED.inc()
|
||||
|
||||
# Publish positions snapshot to analytical lake
|
||||
if minio_client is not None and positions:
|
||||
try:
|
||||
pos_dicts = [
|
||||
{
|
||||
"ticker": p.ticker,
|
||||
"quantity": p.quantity,
|
||||
"avg_entry_price": p.avg_entry_price,
|
||||
"close_price": p.current_price,
|
||||
"unrealized_pnl": p.unrealized_pnl,
|
||||
}
|
||||
for p in positions
|
||||
]
|
||||
publish_positions_daily_batch(
|
||||
minio_client, pos_dicts, account_uuid, now,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish positions to lake: %s", e)
|
||||
except Exception as e:
|
||||
logger.error("Position sync failed: %s", e)
|
||||
|
||||
|
||||
async def register_broker_account(
|
||||
pool: asyncpg.Pool,
|
||||
account_uuid: str,
|
||||
adapter: AlpacaBrokerAdapter,
|
||||
) -> None:
|
||||
"""Register or update the broker account in PostgreSQL."""
|
||||
try:
|
||||
acct = await adapter.get_account()
|
||||
config_json = json.dumps({
|
||||
"provider": "alpaca",
|
||||
"buying_power": acct.buying_power,
|
||||
"cash": acct.cash,
|
||||
"portfolio_value": acct.portfolio_value,
|
||||
})
|
||||
await pool.execute(
|
||||
_UPSERT_BROKER_ACCOUNT,
|
||||
account_uuid,
|
||||
"alpaca",
|
||||
acct.account_id or account_uuid,
|
||||
adapter.mode.value,
|
||||
config_json,
|
||||
)
|
||||
logger.info(
|
||||
"Registered Alpaca account: id=%s mode=%s portfolio=%.2f",
|
||||
acct.account_id, adapter.mode.value, acct.portfolio_value,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to register broker account: %s", e)
|
||||
|
||||
|
||||
async def process_order_job(
|
||||
job: dict[str, Any],
|
||||
adapter: AlpacaBrokerAdapter,
|
||||
pool: asyncpg.Pool,
|
||||
account_uuid: str,
|
||||
rds: aioredis.Redis | None = None,
|
||||
minio_client: Any | None = None,
|
||||
) -> None:
|
||||
"""Process a single order job from the broker queue.
|
||||
|
||||
1. Generate deterministic idempotency key
|
||||
2. Check Redis + DB for duplicate (Req 8.5)
|
||||
3. Build proposed order and run risk evaluation
|
||||
4. If risk passes, submit to Alpaca
|
||||
5. Persist order, events, and risk evaluation
|
||||
6. Set Redis idempotency marker
|
||||
"""
|
||||
ticker = job.get("ticker", "???")
|
||||
order_id = str(uuid.uuid4())
|
||||
idempotency_key = generate_idempotency_key(job)
|
||||
|
||||
# --- Duplicate prevention (Requirement 8.5) ---
|
||||
# Fast path: Redis check
|
||||
if rds is not None:
|
||||
existing_order_id = await check_idempotency_redis(rds, idempotency_key)
|
||||
if existing_order_id:
|
||||
logger.info(
|
||||
"Duplicate order detected (redis) for %s key=%s existing=%s",
|
||||
ticker, idempotency_key[:16], existing_order_id,
|
||||
)
|
||||
ORDERS_DUPLICATES_PREVENTED.labels(detected_via="redis").inc()
|
||||
await audit_duplicate_prevented(
|
||||
pool, existing_order_id, ticker, idempotency_key, detected_via="redis",
|
||||
)
|
||||
return
|
||||
|
||||
# Durable fallback: DB check
|
||||
existing = await check_idempotency_db(pool, idempotency_key)
|
||||
if existing:
|
||||
logger.info(
|
||||
"Duplicate order detected (db) for %s key=%s existing=%s status=%s",
|
||||
ticker, idempotency_key[:16], existing["id"], existing["status"],
|
||||
)
|
||||
ORDERS_DUPLICATES_PREVENTED.labels(detected_via="db").inc()
|
||||
await audit_duplicate_prevented(
|
||||
pool, existing["id"], ticker, idempotency_key, detected_via="db",
|
||||
)
|
||||
# Warm Redis cache for future fast-path hits
|
||||
if rds is not None:
|
||||
await mark_idempotency_redis(rds, idempotency_key, existing["id"])
|
||||
return
|
||||
|
||||
# Risk evaluation
|
||||
risk_config = await load_risk_config(pool)
|
||||
risk_state = await load_account_risk_state(pool, adapter, account_uuid)
|
||||
proposed = build_proposed_order(job)
|
||||
evaluation = evaluate_order(proposed, risk_config, risk_state)
|
||||
|
||||
risk_eval_dict = {
|
||||
"evaluation_id": evaluation.evaluation_id,
|
||||
"eligible": evaluation.eligible,
|
||||
"allowed_mode": evaluation.allowed_mode.value,
|
||||
"rejection_reasons": evaluation.rejection_reasons,
|
||||
"checks": [c.model_dump(mode="json") for c in evaluation.checks],
|
||||
}
|
||||
|
||||
# Persist risk evaluation
|
||||
rec_id = job.get("recommendation_id")
|
||||
try:
|
||||
await pool.execute(
|
||||
_INSERT_RISK_EVALUATION,
|
||||
evaluation.evaluation_id,
|
||||
rec_id,
|
||||
evaluation.eligible,
|
||||
evaluation.allowed_mode.value,
|
||||
json.dumps(evaluation.rejection_reasons),
|
||||
json.dumps(risk_eval_dict["checks"]),
|
||||
evaluation.evaluated_at,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to persist risk evaluation: %s", e)
|
||||
|
||||
# Audit: risk evaluation result
|
||||
await audit_risk_evaluated(
|
||||
pool,
|
||||
evaluation_id=evaluation.evaluation_id,
|
||||
recommendation_id=rec_id,
|
||||
ticker=ticker,
|
||||
eligible=evaluation.eligible,
|
||||
allowed_mode=evaluation.allowed_mode.value,
|
||||
rejection_reasons=evaluation.rejection_reasons,
|
||||
check_count=len(evaluation.checks),
|
||||
)
|
||||
|
||||
if not evaluation.eligible:
|
||||
RISK_EVALUATIONS_TOTAL.labels(result="rejected").inc()
|
||||
for check in evaluation.checks:
|
||||
if check.result.value == "fail":
|
||||
RISK_CHECK_FAILURES.labels(check_name=check.check_name).inc()
|
||||
ORDERS_REJECTED.labels(reason_category="risk_engine").inc()
|
||||
logger.info(
|
||||
"Order rejected by risk engine for %s: %s",
|
||||
ticker, evaluation.rejection_reasons,
|
||||
)
|
||||
# Persist the rejected order for audit
|
||||
order_req = build_order_request(job)
|
||||
rejected_resp = OrderResponse(
|
||||
broker_order_id="",
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker=ticker,
|
||||
side=OrderSide.SELL if job.get("side") == "sell" else OrderSide.BUY,
|
||||
quantity=float(job.get("quantity", 0)),
|
||||
error=f"Risk rejected: {'; '.join(evaluation.rejection_reasons)}",
|
||||
)
|
||||
await persist_order(
|
||||
pool, order_id, order_req, rejected_resp,
|
||||
account_uuid, risk_eval_dict, rec_id,
|
||||
)
|
||||
# Publish rejected order fact to analytical lake
|
||||
if minio_client is not None:
|
||||
try:
|
||||
publish_trade_order(
|
||||
minio_client, order_id, ticker,
|
||||
side=job.get("side", "buy"),
|
||||
order_type=job.get("order_type", "market"),
|
||||
quantity=float(job.get("quantity", 0)),
|
||||
limit_price=job.get("limit_price"),
|
||||
status="rejected",
|
||||
broker_account=account_uuid,
|
||||
submitted_at=datetime.now(timezone.utc),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish rejected order to lake: %s", e)
|
||||
# Audit: order rejected by risk engine
|
||||
await audit_order_rejected(
|
||||
pool, order_id, ticker,
|
||||
reason=f"Risk rejected: {'; '.join(evaluation.rejection_reasons)}",
|
||||
source="risk_engine",
|
||||
)
|
||||
# Mark idempotency even for rejected orders to prevent reprocessing
|
||||
if rds is not None:
|
||||
await mark_idempotency_redis(rds, idempotency_key, order_id)
|
||||
return
|
||||
|
||||
# --- Operator approval gate (Requirement 8.2) ---
|
||||
if requires_approval(risk_config, evaluation.allowed_mode):
|
||||
expiry = compute_expiry(risk_config)
|
||||
approval_req = ApprovalRequest(
|
||||
order_job=job,
|
||||
recommendation_id=rec_id,
|
||||
ticker=ticker,
|
||||
side=job.get("side", "buy"),
|
||||
quantity=float(job.get("quantity", 0)),
|
||||
estimated_value=float(job.get("estimated_value", 0)),
|
||||
risk_evaluation_id=evaluation.evaluation_id,
|
||||
expires_at=expiry,
|
||||
)
|
||||
try:
|
||||
await create_approval_request(pool, approval_req)
|
||||
logger.info(
|
||||
"Order for %s held for operator approval (id=%s, expires=%s)",
|
||||
ticker, approval_req.approval_id, expiry.isoformat(),
|
||||
)
|
||||
await audit_approval_requested(
|
||||
pool,
|
||||
approval_id=approval_req.approval_id,
|
||||
ticker=ticker,
|
||||
side=approval_req.side,
|
||||
quantity=approval_req.quantity,
|
||||
estimated_value=approval_req.estimated_value,
|
||||
recommendation_id=rec_id,
|
||||
expires_at=expiry.isoformat(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to create approval request for %s: %s", ticker, e)
|
||||
# Do NOT mark idempotency — the job will be re-submitted after approval
|
||||
return
|
||||
|
||||
# Submit to Alpaca
|
||||
order_req = build_order_request(job)
|
||||
RISK_EVALUATIONS_TOTAL.labels(result="passed").inc()
|
||||
|
||||
# Audit: order submitted to broker
|
||||
await audit_order_submitted(
|
||||
pool,
|
||||
order_id=order_id,
|
||||
ticker=ticker,
|
||||
side=order_req.side.value,
|
||||
quantity=order_req.quantity,
|
||||
order_type=order_req.order_type.value,
|
||||
idempotency_key=order_req.idempotency_key,
|
||||
recommendation_id=rec_id,
|
||||
evaluation_id=evaluation.evaluation_id,
|
||||
)
|
||||
|
||||
resp = await adapter.submit_order(order_req)
|
||||
|
||||
await persist_order(
|
||||
pool, order_id, order_req, resp,
|
||||
account_uuid, risk_eval_dict, rec_id,
|
||||
)
|
||||
|
||||
# Publish order fact to analytical lake
|
||||
if minio_client is not None:
|
||||
try:
|
||||
publish_trade_order(
|
||||
minio_client, order_id, ticker,
|
||||
side=order_req.side.value,
|
||||
order_type=order_req.order_type.value,
|
||||
quantity=order_req.quantity,
|
||||
limit_price=order_req.limit_price,
|
||||
status=resp.status.value,
|
||||
broker_account=account_uuid,
|
||||
submitted_at=resp.submitted_at or datetime.now(timezone.utc),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish order to lake: %s", e)
|
||||
|
||||
# Publish fill fact if the order was filled
|
||||
if resp.status == OrderStatus.FILLED and resp.filled_avg_price is not None:
|
||||
try:
|
||||
fill_id = str(uuid.uuid4())
|
||||
publish_trade_fill(
|
||||
minio_client, fill_id, order_id, ticker,
|
||||
side=order_req.side.value,
|
||||
fill_price=resp.filled_avg_price,
|
||||
fill_quantity=resp.filled_quantity,
|
||||
broker_account=account_uuid,
|
||||
filled_at=datetime.now(timezone.utc),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish fill to lake: %s", e)
|
||||
|
||||
# Mark idempotency after successful persistence
|
||||
if rds is not None:
|
||||
await mark_idempotency_redis(rds, idempotency_key, order_id)
|
||||
|
||||
if resp.ok:
|
||||
mode = "paper" if adapter.mode == TradingMode.PAPER else "live"
|
||||
ORDERS_SUBMITTED.labels(
|
||||
side=order_req.side.value,
|
||||
order_type=order_req.order_type.value,
|
||||
mode=mode,
|
||||
).inc()
|
||||
logger.info(
|
||||
"Order submitted to Alpaca: %s %s %.0f %s @ %s | broker_id=%s",
|
||||
resp.status.value, order_req.side.value, order_req.quantity,
|
||||
ticker, resp.filled_avg_price, resp.broker_order_id,
|
||||
)
|
||||
# Audit: order filled
|
||||
if resp.status == OrderStatus.FILLED:
|
||||
ORDERS_FILLED.labels(side=order_req.side.value).inc()
|
||||
await audit_order_filled(
|
||||
pool, order_id, ticker,
|
||||
side=order_req.side.value,
|
||||
fill_quantity=resp.filled_quantity,
|
||||
fill_price=resp.filled_avg_price,
|
||||
broker_order_id=resp.broker_order_id,
|
||||
)
|
||||
else:
|
||||
ORDERS_REJECTED.labels(reason_category="broker").inc()
|
||||
logger.warning(
|
||||
"Order failed for %s: %s (status=%s)",
|
||||
ticker, resp.error, resp.status.value,
|
||||
)
|
||||
# Audit: order rejected by broker
|
||||
await audit_order_rejected(
|
||||
pool, order_id, ticker,
|
||||
reason=resp.error or f"Broker status: {resp.status.value}",
|
||||
source="broker",
|
||||
)
|
||||
|
||||
|
||||
|
||||
async def position_sync_loop(
|
||||
adapter: AlpacaBrokerAdapter,
|
||||
pool: asyncpg.Pool,
|
||||
account_uuid: str,
|
||||
minio_client: Any | None = None,
|
||||
) -> None:
|
||||
"""Periodically sync positions from Alpaca to PostgreSQL and lake."""
|
||||
while True:
|
||||
await sync_positions(adapter, pool, account_uuid, minio_client)
|
||||
await asyncio.sleep(POSITION_SYNC_INTERVAL)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
config = load_config()
|
||||
setup_logging("broker_service", level=config.log_level, json_output=config.json_logs)
|
||||
|
||||
pool = await get_pg_pool(config)
|
||||
rds = get_redis(config)
|
||||
|
||||
# Initialize MinIO client for lake publishing
|
||||
from minio import Minio
|
||||
minio_client = Minio(
|
||||
config.minio.endpoint,
|
||||
access_key=config.minio.access_key,
|
||||
secret_key=config.minio.secret_key,
|
||||
secure=config.minio.secure,
|
||||
)
|
||||
# Ensure lakehouse bucket exists
|
||||
if not minio_client.bucket_exists(LAKEHOUSE_BUCKET):
|
||||
minio_client.make_bucket(LAKEHOUSE_BUCKET)
|
||||
|
||||
# Determine mode — default to paper for safety (Req 8.1)
|
||||
mode = TradingMode.LIVE if config.broker.mode == "live" else TradingMode.PAPER
|
||||
if mode == TradingMode.LIVE:
|
||||
logger.warning("LIVE trading mode enabled — orders will be submitted to real broker")
|
||||
|
||||
adapter = AlpacaBrokerAdapter(
|
||||
api_key=config.broker.api_key or "",
|
||||
api_secret=config.broker.api_secret or "",
|
||||
mode=mode,
|
||||
base_url=config.broker.base_url,
|
||||
)
|
||||
|
||||
# Generate a stable account UUID from the API key
|
||||
account_uuid = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"alpaca-{config.broker.api_key or 'default'}"))
|
||||
|
||||
# Register broker account on startup
|
||||
await register_broker_account(pool, account_uuid, adapter)
|
||||
|
||||
# Start position sync in background
|
||||
sync_task = asyncio.create_task(
|
||||
position_sync_loop(adapter, pool, account_uuid, minio_client)
|
||||
)
|
||||
|
||||
queue = queue_key(QUEUE_BROKER)
|
||||
logger.info("Broker service started (mode=%s)", mode.value)
|
||||
|
||||
try:
|
||||
while True:
|
||||
result = await rds.lpop(queue)
|
||||
raw = str(result) if result else None
|
||||
if raw:
|
||||
try:
|
||||
job = json.loads(raw)
|
||||
await process_order_job(job, adapter, pool, account_uuid, rds, minio_client)
|
||||
except Exception:
|
||||
logger.exception("Error processing broker job")
|
||||
else:
|
||||
await asyncio.sleep(2)
|
||||
finally:
|
||||
sync_task.cancel()
|
||||
await pool.close()
|
||||
await rds.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -1,8 +1,17 @@
|
||||
"""Filings / Regulatory API adapter - fetches SEC-style submissions."""
|
||||
"""Filings / Regulatory API adapter interface and concrete SEC EDGAR provider.
|
||||
|
||||
The FilingsDataAdapter is the abstract interface for all filings data providers.
|
||||
SECEdgarAdapter is the first concrete implementation, targeting the SEC EDGAR
|
||||
full-text search system (EFTS) for company filings discovery.
|
||||
|
||||
Requirements: 2.3, 2.5, 3.1, 3.2, 3.3
|
||||
"""
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
import time
|
||||
from abc import ABC
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -11,48 +20,182 @@ from .base import AdapterResult, BaseAdapter
|
||||
logger = logging.getLogger("filings_adapter")
|
||||
|
||||
|
||||
class FilingsAdapter(BaseAdapter):
|
||||
"""Concrete adapter for SEC EDGAR or similar filings API."""
|
||||
class FilingsDataAdapter(BaseAdapter, ABC):
|
||||
"""Abstract interface for filings / regulatory data providers.
|
||||
|
||||
def __init__(self, base_url: str = "https://efts.sec.gov", user_agent: str = "StonksOracle/1.0"):
|
||||
self.base_url = base_url
|
||||
self.user_agent = user_agent
|
||||
Subclasses implement fetch() for their specific filings API.
|
||||
source_type() is concrete here since all filings adapters share the same type.
|
||||
"""
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "filings_api"
|
||||
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
_cik = config.get("cik", "")
|
||||
endpoint = config.get("endpoint", f"/LATEST/search-index?q=%22{ticker}%22&dateRange=custom&startdt=2026-01-01&forms=8-K,10-Q,10-K")
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
|
||||
headers = {"User-Agent": self.user_agent}
|
||||
class SECEdgarAdapter(FilingsDataAdapter):
|
||||
"""Concrete adapter for the SEC EDGAR full-text search system (EFTS).
|
||||
|
||||
Supports:
|
||||
- Full-text search (/LATEST/search-index) for 8-K, 10-Q, 10-K, and other forms
|
||||
- Filtering by date range, form type, and entity
|
||||
|
||||
The SEC EDGAR EFTS API is public and does not require an API key,
|
||||
but requires a descriptive User-Agent header per SEC fair-access policy.
|
||||
|
||||
Config options:
|
||||
cik: Company CIK number (optional, narrows search)
|
||||
forms: Comma-separated form types to search (default "8-K,10-Q,10-K")
|
||||
start_date: Only filings on or after this date, YYYY-MM-DD (optional)
|
||||
end_date: Only filings on or before this date, YYYY-MM-DD (optional)
|
||||
query: Custom search query override (optional, replaces ticker-based query)
|
||||
"""
|
||||
|
||||
SEARCH_ENDPOINT: str = "/LATEST/search-index"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = "https://efts.sec.gov",
|
||||
user_agent: str = "StonksOracle/1.0 ([email])",
|
||||
) -> None:
|
||||
self.base_url: str = base_url.rstrip("/")
|
||||
self.user_agent: str = user_agent
|
||||
|
||||
async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch filings from SEC EDGAR EFTS for a given ticker.
|
||||
|
||||
Args:
|
||||
ticker: The company ticker symbol.
|
||||
config: Source-specific configuration from the sources table.
|
||||
|
||||
Returns:
|
||||
AdapterResult with raw payload, parsed filing items, and metadata.
|
||||
"""
|
||||
url, params, headers = self._build_request(ticker, config)
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
resp = await client.get(url, headers=headers)
|
||||
resp = await client.get(url, params=params, headers=headers)
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
resp.raise_for_status()
|
||||
|
||||
raw = resp.content
|
||||
data = resp.json()
|
||||
content_hash = hashlib.sha256(raw).hexdigest()
|
||||
items = self._extract_items(data)
|
||||
|
||||
hits = data.get("hits", {}).get("hits", [])
|
||||
return AdapterResult(
|
||||
source_type="filings_api",
|
||||
ticker=ticker,
|
||||
items=hits,
|
||||
items=items,
|
||||
raw_payload=raw,
|
||||
content_hash=content_hash,
|
||||
fetched_at=datetime.utcnow(),
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
http_status=resp.status_code,
|
||||
response_time_ms=round(elapsed_ms, 1),
|
||||
metadata={
|
||||
"provider": "sec_edgar",
|
||||
"results_count": len(items),
|
||||
"total_hits": self._total_hits(data),
|
||||
"query": params.get("q", ""),
|
||||
"forms": params.get("forms", ""),
|
||||
},
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("SEC EDGAR HTTP error for %s: %s", ticker, e)
|
||||
return self._error_result(
|
||||
ticker, str(e), elapsed_ms,
|
||||
http_status=e.response.status_code if e.response else None,
|
||||
raw=e.response.content if e.response else b"",
|
||||
)
|
||||
except httpx.TimeoutException as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("SEC EDGAR timeout for %s: %s", ticker, e)
|
||||
return self._error_result(ticker, f"timeout: {e}", elapsed_ms)
|
||||
except Exception as e:
|
||||
logger.error(f"Filings fetch failed for {ticker}: {e}")
|
||||
return AdapterResult(
|
||||
source_type="filings_api",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=b"",
|
||||
content_hash="",
|
||||
fetched_at=datetime.utcnow(),
|
||||
error=str(e),
|
||||
)
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("SEC EDGAR fetch failed for %s: %s", ticker, e)
|
||||
return self._error_result(ticker, str(e), elapsed_ms)
|
||||
|
||||
def _build_request(
|
||||
self, ticker: str, config: dict[str, Any]
|
||||
) -> tuple[str, dict[str, str], dict[str, str]]:
|
||||
"""Build the URL, query params, and headers for an EDGAR EFTS request."""
|
||||
params: dict[str, str] = {}
|
||||
headers: dict[str, str] = {"User-Agent": self.user_agent}
|
||||
|
||||
# Query: use custom override or default to ticker-based search
|
||||
query = config.get("query")
|
||||
if query:
|
||||
params["q"] = str(query)
|
||||
else:
|
||||
params["q"] = f'"{ticker}"'
|
||||
|
||||
# Form types filter
|
||||
forms = config.get("forms", "8-K,10-Q,10-K")
|
||||
params["forms"] = str(forms)
|
||||
|
||||
# Date range
|
||||
if config.get("start_date"):
|
||||
params["dateRange"] = "custom"
|
||||
params["startdt"] = str(config["start_date"])
|
||||
if config.get("end_date"):
|
||||
params["dateRange"] = "custom"
|
||||
params["enddt"] = str(config["end_date"])
|
||||
|
||||
# CIK filter (entity-level narrowing)
|
||||
cik = config.get("cik")
|
||||
if cik:
|
||||
params["q"] = f'{params["q"]} AND cik:{cik}'
|
||||
|
||||
url = f"{self.base_url}{self.SEARCH_ENDPOINT}"
|
||||
return url, params, headers
|
||||
|
||||
def _extract_items(self, data: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
"""Extract the filing hits from an EDGAR EFTS response.
|
||||
|
||||
EFTS returns results under hits.hits as a list of objects,
|
||||
each containing _source with fields like file_date, form_type,
|
||||
entity_name, file_num, and period_of_report.
|
||||
"""
|
||||
hits_wrapper = data.get("hits", {})
|
||||
if not isinstance(hits_wrapper, dict):
|
||||
return []
|
||||
hits = hits_wrapper.get("hits", [])
|
||||
if isinstance(hits, list):
|
||||
return hits
|
||||
return []
|
||||
|
||||
def _total_hits(self, data: dict[str, Any]) -> int:
|
||||
"""Extract total hit count from EFTS response."""
|
||||
hits_wrapper = data.get("hits", {})
|
||||
if not isinstance(hits_wrapper, dict):
|
||||
return 0
|
||||
total = hits_wrapper.get("total", {})
|
||||
if isinstance(total, dict):
|
||||
return int(total.get("value", 0))
|
||||
if isinstance(total, int):
|
||||
return total
|
||||
return 0
|
||||
|
||||
def _error_result(
|
||||
self,
|
||||
ticker: str,
|
||||
error: str,
|
||||
elapsed_ms: float,
|
||||
http_status: int | None = None,
|
||||
raw: bytes = b"",
|
||||
) -> AdapterResult:
|
||||
"""Build an error AdapterResult for filings fetches."""
|
||||
return AdapterResult(
|
||||
source_type="filings_api",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=raw,
|
||||
content_hash="",
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
error=error,
|
||||
http_status=http_status,
|
||||
response_time_ms=round(elapsed_ms, 1),
|
||||
metadata={"provider": "sec_edgar"},
|
||||
)
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
"""Market data API adapter - fetches quotes, bars, and reference data."""
|
||||
"""Market data API adapter interface and concrete Polygon.io provider.
|
||||
|
||||
The MarketDataAdapter is the abstract interface for all market data providers.
|
||||
PolygonMarketAdapter is the first concrete implementation, targeting the
|
||||
Polygon.io REST API for previous-day bars, quotes, and ticker details.
|
||||
|
||||
Requirements: 2.1, 2.5, 3.1, 3.2, 3.3
|
||||
"""
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -12,48 +20,158 @@ logger = logging.getLogger("market_adapter")
|
||||
|
||||
|
||||
class MarketDataAdapter(BaseAdapter):
|
||||
"""Concrete adapter for a market data provider (e.g., Alpha Vantage, Polygon, Yahoo)."""
|
||||
"""Abstract interface for market data providers.
|
||||
|
||||
def __init__(self, api_key: str = "", base_url: str = ""):
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
Subclasses implement fetch() for their specific market data API.
|
||||
"""
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "market_api"
|
||||
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
endpoint = config.get("endpoint", "/v2/aggs/ticker/{ticker}/prev")
|
||||
url = f"{self.base_url}{endpoint.format(ticker=ticker)}"
|
||||
params = config.get("params", {})
|
||||
if self.api_key:
|
||||
params["apiKey"] = self.api_key
|
||||
|
||||
class PolygonMarketAdapter(MarketDataAdapter):
|
||||
"""Concrete adapter for the Polygon.io REST API.
|
||||
|
||||
Supports:
|
||||
- Previous-day aggregate bars (/v2/aggs/ticker/{ticker}/prev)
|
||||
- Grouped daily bars (/v2/aggs/grouped/locale/us/market/stocks/{date})
|
||||
- Ticker details (/v3/reference/tickers/{ticker})
|
||||
|
||||
The endpoint is selected via the source config's "endpoint" field,
|
||||
defaulting to previous-day bars.
|
||||
"""
|
||||
|
||||
PREV_BARS = "/v2/aggs/ticker/{ticker}/prev"
|
||||
RANGE_BARS = "/v2/aggs/ticker/{ticker}/range/{multiplier}/{timespan}/{from_date}/{to_date}"
|
||||
TICKER_DETAILS = "/v3/reference/tickers/{ticker}"
|
||||
|
||||
def __init__(self, api_key: str, base_url: str = "https://api.polygon.io") -> None:
|
||||
self.api_key: str = api_key
|
||||
self.base_url: str = base_url.rstrip("/")
|
||||
|
||||
async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch market data from Polygon.io for a given ticker.
|
||||
|
||||
Config options:
|
||||
endpoint: One of "prev_bars" (default), "range_bars", "ticker_details"
|
||||
multiplier: Bar multiplier for range queries (default 1)
|
||||
timespan: Bar timespan for range queries (default "day")
|
||||
from_date: Start date for range queries (YYYY-MM-DD)
|
||||
to_date: End date for range queries (YYYY-MM-DD)
|
||||
adjusted: Whether bars are adjusted for splits (default true)
|
||||
"""
|
||||
endpoint_key = config.get("endpoint", "prev_bars")
|
||||
url, params = self._build_request(ticker, endpoint_key, config)
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
resp = await client.get(url, params=params)
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
resp.raise_for_status()
|
||||
|
||||
raw = resp.content
|
||||
data = resp.json()
|
||||
content_hash = hashlib.sha256(raw).hexdigest()
|
||||
|
||||
items = data.get("results", [data]) if isinstance(data, dict) else data
|
||||
items = self._extract_items(data, endpoint_key)
|
||||
|
||||
return AdapterResult(
|
||||
source_type="market_api",
|
||||
ticker=ticker,
|
||||
items=items if isinstance(items, list) else [items],
|
||||
items=items,
|
||||
raw_payload=raw,
|
||||
content_hash=content_hash,
|
||||
fetched_at=datetime.utcnow(),
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
http_status=resp.status_code,
|
||||
response_time_ms=round(elapsed_ms, 1),
|
||||
metadata={
|
||||
"provider": "polygon",
|
||||
"endpoint": endpoint_key,
|
||||
"results_count": data.get("resultsCount", len(items)),
|
||||
"request_id": data.get("request_id", ""),
|
||||
},
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("Polygon HTTP error for %s: %s", ticker, e)
|
||||
return self._error_result(
|
||||
ticker, str(e), elapsed_ms,
|
||||
http_status=e.response.status_code if e.response else None,
|
||||
raw=e.response.content if e.response else b"",
|
||||
)
|
||||
except httpx.TimeoutException as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("Polygon timeout for %s: %s", ticker, e)
|
||||
return self._error_result(ticker, f"timeout: {e}", elapsed_ms)
|
||||
except Exception as e:
|
||||
logger.error(f"Market fetch failed for {ticker}: {e}")
|
||||
return AdapterResult(
|
||||
source_type="market_api",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=b"",
|
||||
content_hash="",
|
||||
fetched_at=datetime.utcnow(),
|
||||
error=str(e),
|
||||
)
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("Polygon fetch failed for %s: %s", ticker, e)
|
||||
return self._error_result(ticker, str(e), elapsed_ms)
|
||||
|
||||
def _build_request(
|
||||
self, ticker: str, endpoint_key: str, config: dict[str, Any]
|
||||
) -> tuple[str, dict[str, str]]:
|
||||
"""Build the URL and query params for a Polygon request."""
|
||||
params: dict[str, str] = {"apiKey": self.api_key}
|
||||
|
||||
if endpoint_key == "range_bars":
|
||||
multiplier = str(config.get("multiplier", 1))
|
||||
timespan = config.get("timespan", "day")
|
||||
from_date = config.get("from_date", "")
|
||||
to_date = config.get("to_date", "")
|
||||
path = self.RANGE_BARS.format(
|
||||
ticker=ticker,
|
||||
multiplier=multiplier,
|
||||
timespan=timespan,
|
||||
from_date=from_date,
|
||||
to_date=to_date,
|
||||
)
|
||||
if config.get("adjusted") is not None:
|
||||
params["adjusted"] = str(config["adjusted"]).lower()
|
||||
if config.get("sort"):
|
||||
params["sort"] = config["sort"]
|
||||
if config.get("limit"):
|
||||
params["limit"] = str(config["limit"])
|
||||
elif endpoint_key == "ticker_details":
|
||||
path = self.TICKER_DETAILS.format(ticker=ticker)
|
||||
else:
|
||||
# Default: previous-day bars
|
||||
path = self.PREV_BARS.format(ticker=ticker)
|
||||
if config.get("adjusted") is not None:
|
||||
params["adjusted"] = str(config["adjusted"]).lower()
|
||||
|
||||
return f"{self.base_url}{path}", params
|
||||
|
||||
def _extract_items(self, data: dict[str, Any], endpoint_key: str) -> list[dict[str, Any]]:
|
||||
"""Extract the relevant items list from a Polygon response."""
|
||||
if endpoint_key == "ticker_details":
|
||||
results = data.get("results", {})
|
||||
return [results] if isinstance(results, dict) and results else []
|
||||
|
||||
# Aggregate endpoints return results as a list
|
||||
results = data.get("results", [])
|
||||
if isinstance(results, list):
|
||||
return results
|
||||
return [results] if results else []
|
||||
|
||||
def _error_result(
|
||||
self,
|
||||
ticker: str,
|
||||
error: str,
|
||||
elapsed_ms: float,
|
||||
http_status: int | None = None,
|
||||
raw: bytes = b"",
|
||||
) -> AdapterResult:
|
||||
"""Build an error AdapterResult."""
|
||||
return AdapterResult(
|
||||
source_type="market_api",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=raw,
|
||||
content_hash="",
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
error=error,
|
||||
http_status=http_status,
|
||||
response_time_ms=round(elapsed_ms, 1),
|
||||
metadata={"provider": "polygon"},
|
||||
)
|
||||
|
||||
@@ -1,8 +1,17 @@
|
||||
"""News API adapter - fetches company-linked headlines and article metadata."""
|
||||
"""News API adapter interface and concrete Polygon.io news provider.
|
||||
|
||||
The NewsDataAdapter is the abstract interface for all news data providers.
|
||||
PolygonNewsAdapter is the first concrete implementation, targeting the
|
||||
Polygon.io REST API for company-linked news articles and headlines.
|
||||
|
||||
Requirements: 2.2, 2.5, 3.1, 3.2, 3.3
|
||||
"""
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
import time
|
||||
from abc import ABC
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -11,51 +20,147 @@ from .base import AdapterResult, BaseAdapter
|
||||
logger = logging.getLogger("news_adapter")
|
||||
|
||||
|
||||
class NewsApiAdapter(BaseAdapter):
|
||||
"""Concrete adapter for a news API provider."""
|
||||
class NewsDataAdapter(BaseAdapter, ABC):
|
||||
"""Abstract interface for news data providers.
|
||||
|
||||
def __init__(self, api_key: str = "", base_url: str = ""):
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
Subclasses implement fetch() for their specific news API.
|
||||
source_type() is concrete here since all news adapters share the same type.
|
||||
"""
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "news_api"
|
||||
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
endpoint = config.get("endpoint", "/v2/everything")
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
params = config.get("params", {})
|
||||
params.setdefault("q", ticker)
|
||||
params.setdefault("sortBy", "publishedAt")
|
||||
params.setdefault("pageSize", 20)
|
||||
if self.api_key:
|
||||
params["apiKey"] = self.api_key
|
||||
|
||||
class PolygonNewsAdapter(NewsDataAdapter):
|
||||
"""Concrete adapter for the Polygon.io ticker news endpoint.
|
||||
|
||||
Supports:
|
||||
- Ticker news (/v2/reference/news?ticker={ticker})
|
||||
|
||||
Config options:
|
||||
limit: Max articles to return per request (default 20, max 1000)
|
||||
published_utc_gte: Only articles published on or after this date (YYYY-MM-DD)
|
||||
published_utc_lte: Only articles published on or before this date (YYYY-MM-DD)
|
||||
order: Sort order for results, "asc" or "desc" (default "desc")
|
||||
"""
|
||||
|
||||
NEWS_ENDPOINT = "/v2/reference/news"
|
||||
|
||||
def __init__(self, api_key: str, base_url: str = "https://api.polygon.io") -> None:
|
||||
self.api_key: str = api_key
|
||||
self.base_url: str = base_url.rstrip("/")
|
||||
|
||||
async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch news articles from Polygon.io for a given ticker.
|
||||
|
||||
Args:
|
||||
ticker: The company ticker symbol.
|
||||
config: Source-specific configuration from the sources table.
|
||||
|
||||
Returns:
|
||||
AdapterResult with raw payload, parsed article items, and metadata.
|
||||
"""
|
||||
url, params = self._build_request(ticker, config)
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
resp = await client.get(url, params=params)
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
resp.raise_for_status()
|
||||
|
||||
raw = resp.content
|
||||
data = resp.json()
|
||||
content_hash = hashlib.sha256(raw).hexdigest()
|
||||
items = self._extract_items(data)
|
||||
|
||||
articles = data.get("articles", [])
|
||||
return AdapterResult(
|
||||
source_type="news_api",
|
||||
ticker=ticker,
|
||||
items=articles,
|
||||
items=items,
|
||||
raw_payload=raw,
|
||||
content_hash=content_hash,
|
||||
fetched_at=datetime.utcnow(),
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
http_status=resp.status_code,
|
||||
response_time_ms=round(elapsed_ms, 1),
|
||||
metadata={
|
||||
"provider": "polygon",
|
||||
"results_count": data.get("count", len(items)),
|
||||
"next_url": data.get("next_url", ""),
|
||||
"request_id": data.get("request_id", ""),
|
||||
},
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("Polygon news HTTP error for %s: %s", ticker, e)
|
||||
return self._error_result(
|
||||
ticker, str(e), elapsed_ms,
|
||||
http_status=e.response.status_code if e.response else None,
|
||||
raw=e.response.content if e.response else b"",
|
||||
)
|
||||
except httpx.TimeoutException as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("Polygon news timeout for %s: %s", ticker, e)
|
||||
return self._error_result(ticker, f"timeout: {e}", elapsed_ms)
|
||||
except Exception as e:
|
||||
logger.error(f"News fetch failed for {ticker}: {e}")
|
||||
return AdapterResult(
|
||||
source_type="news_api",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=b"",
|
||||
content_hash="",
|
||||
fetched_at=datetime.utcnow(),
|
||||
error=str(e),
|
||||
)
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
logger.error("Polygon news fetch failed for %s: %s", ticker, e)
|
||||
return self._error_result(ticker, str(e), elapsed_ms)
|
||||
|
||||
def _build_request(
|
||||
self, ticker: str, config: dict[str, Any]
|
||||
) -> tuple[str, dict[str, str]]:
|
||||
"""Build the URL and query params for a Polygon news request."""
|
||||
params: dict[str, str] = {
|
||||
"apiKey": self.api_key,
|
||||
"ticker": ticker,
|
||||
}
|
||||
|
||||
limit = config.get("limit", 20)
|
||||
params["limit"] = str(min(int(limit), 1000))
|
||||
|
||||
if config.get("order"):
|
||||
params["order"] = config["order"]
|
||||
|
||||
if config.get("published_utc_gte"):
|
||||
params["published_utc.gte"] = config["published_utc_gte"]
|
||||
|
||||
if config.get("published_utc_lte"):
|
||||
params["published_utc.lte"] = config["published_utc_lte"]
|
||||
|
||||
url = f"{self.base_url}{self.NEWS_ENDPOINT}"
|
||||
return url, params
|
||||
|
||||
def _extract_items(self, data: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
"""Extract the article list from a Polygon news response.
|
||||
|
||||
Polygon returns articles under the "results" key as a list of objects,
|
||||
each containing fields like id, publisher, title, article_url, tickers,
|
||||
published_utc, description, and keywords.
|
||||
"""
|
||||
results = data.get("results", [])
|
||||
if isinstance(results, list):
|
||||
return results
|
||||
return []
|
||||
|
||||
def _error_result(
|
||||
self,
|
||||
ticker: str,
|
||||
error: str,
|
||||
elapsed_ms: float,
|
||||
http_status: int | None = None,
|
||||
raw: bytes = b"",
|
||||
) -> AdapterResult:
|
||||
"""Build an error AdapterResult for news fetches."""
|
||||
return AdapterResult(
|
||||
source_type="news_api",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=raw,
|
||||
content_hash="",
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
error=error,
|
||||
http_status=http_status,
|
||||
response_time_ms=round(elapsed_ms, 1),
|
||||
metadata={"provider": "polygon"},
|
||||
)
|
||||
|
||||
@@ -0,0 +1,603 @@
|
||||
"""Paper trading adapter - local order simulation and state sync.
|
||||
|
||||
Implements a fully local paper trading engine that simulates order
|
||||
execution without requiring a real broker API. Tracks positions,
|
||||
account balance, fills, and order events in-memory with PostgreSQL
|
||||
persistence for state sync and audit trail.
|
||||
|
||||
Requirements: 8.1, 8.3, 8.5, 2.4
|
||||
Design: Section 4.9 - Broker Adapter (paper mode)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.adapters.broker_adapter import (
|
||||
AccountInfo,
|
||||
BrokerDataAdapter,
|
||||
OrderEventType,
|
||||
OrderRequest,
|
||||
OrderResponse,
|
||||
OrderSide,
|
||||
OrderStatus,
|
||||
OrderType,
|
||||
PositionInfo,
|
||||
TradingMode,
|
||||
)
|
||||
from services.adapters.base import AdapterResult
|
||||
|
||||
logger = logging.getLogger("paper_trading")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# In-memory paper trading state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class PaperPosition:
|
||||
"""Tracks a single paper position."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ticker: str,
|
||||
quantity: float = 0.0,
|
||||
avg_entry_price: float = 0.0,
|
||||
realized_pnl: float = 0.0,
|
||||
) -> None:
|
||||
self.ticker = ticker
|
||||
self.quantity = quantity
|
||||
self.avg_entry_price = avg_entry_price
|
||||
self.realized_pnl = realized_pnl
|
||||
|
||||
def apply_fill(self, side: OrderSide, fill_qty: float, fill_price: float) -> float:
|
||||
"""Apply a fill to this position. Returns realized PnL from the fill."""
|
||||
realized = 0.0
|
||||
|
||||
if side == OrderSide.BUY:
|
||||
# Buying: average up the entry price
|
||||
total_cost = self.avg_entry_price * self.quantity + fill_price * fill_qty
|
||||
self.quantity += fill_qty
|
||||
if self.quantity > 0:
|
||||
self.avg_entry_price = total_cost / self.quantity
|
||||
else:
|
||||
# Selling: realize PnL on the sold shares
|
||||
if self.quantity > 0:
|
||||
sell_qty = min(fill_qty, self.quantity)
|
||||
realized = sell_qty * (fill_price - self.avg_entry_price)
|
||||
self.quantity -= sell_qty
|
||||
self.realized_pnl += realized
|
||||
if self.quantity <= 0:
|
||||
self.quantity = 0.0
|
||||
self.avg_entry_price = 0.0
|
||||
|
||||
return realized
|
||||
|
||||
@property
|
||||
def is_open(self) -> bool:
|
||||
return self.quantity > 0
|
||||
|
||||
def to_position_info(self, current_price: float | None = None) -> PositionInfo:
|
||||
"""Convert to a PositionInfo for the broker interface."""
|
||||
price = current_price if current_price is not None else self.avg_entry_price
|
||||
unrealized = (price - self.avg_entry_price) * self.quantity if self.quantity > 0 else 0.0
|
||||
market_value = price * self.quantity
|
||||
return PositionInfo(
|
||||
ticker=self.ticker,
|
||||
quantity=self.quantity,
|
||||
avg_entry_price=self.avg_entry_price,
|
||||
current_price=price,
|
||||
unrealized_pnl=round(unrealized, 4),
|
||||
market_value=round(market_value, 4),
|
||||
side="long" if self.quantity > 0 else "flat",
|
||||
)
|
||||
|
||||
|
||||
class PaperAccount:
|
||||
"""In-memory paper trading account state."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
account_id: str = "paper-default",
|
||||
initial_cash: float = 100_000.0,
|
||||
) -> None:
|
||||
self.account_id = account_id
|
||||
self.initial_cash = initial_cash
|
||||
self.cash = initial_cash
|
||||
self.positions: dict[str, PaperPosition] = {}
|
||||
self.orders: dict[str, OrderResponse] = {}
|
||||
self.order_events: list[dict[str, Any]] = []
|
||||
self._seen_idempotency_keys: dict[str, str] = {} # key -> order_id
|
||||
|
||||
@property
|
||||
def portfolio_value(self) -> float:
|
||||
position_value = sum(
|
||||
p.quantity * p.avg_entry_price for p in self.positions.values() if p.is_open
|
||||
)
|
||||
return self.cash + position_value
|
||||
|
||||
@property
|
||||
def buying_power(self) -> float:
|
||||
return self.cash
|
||||
|
||||
def get_position(self, ticker: str) -> PaperPosition:
|
||||
if ticker not in self.positions:
|
||||
self.positions[ticker] = PaperPosition(ticker=ticker)
|
||||
return self.positions[ticker]
|
||||
|
||||
def to_account_info(self) -> AccountInfo:
|
||||
return AccountInfo(
|
||||
account_id=self.account_id,
|
||||
buying_power=round(self.buying_power, 2),
|
||||
cash=round(self.cash, 2),
|
||||
portfolio_value=round(self.portfolio_value, 2),
|
||||
currency="USD",
|
||||
mode=TradingMode.PAPER,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Paper trading adapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class PaperTradingAdapter(BrokerDataAdapter):
|
||||
"""Local paper trading adapter that simulates order execution.
|
||||
|
||||
All orders are filled immediately at the estimated price (market orders)
|
||||
or at the limit/stop price when applicable. No real broker API is called.
|
||||
|
||||
Features:
|
||||
- Idempotent order submission via idempotency_key (Req 8.5)
|
||||
- Full order event trail for audit (Req 8.3)
|
||||
- Position tracking with average entry price
|
||||
- Cash balance management
|
||||
- State sync to/from PostgreSQL
|
||||
|
||||
The adapter operates in PAPER mode only and rejects any attempt
|
||||
to switch to LIVE mode.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
account_id: str = "paper-default",
|
||||
initial_cash: float = 100_000.0,
|
||||
simulated_slippage_pct: float = 0.001,
|
||||
) -> None:
|
||||
super().__init__(mode=TradingMode.PAPER)
|
||||
self.account = PaperAccount(account_id=account_id, initial_cash=initial_cash)
|
||||
self.slippage_pct = simulated_slippage_pct
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "broker"
|
||||
|
||||
async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch paper positions/account as a raw artifact snapshot."""
|
||||
endpoint = config.get("endpoint", "positions")
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
if endpoint == "account":
|
||||
data = self.account.to_account_info().to_dict()
|
||||
items = [data]
|
||||
elif endpoint == "orders":
|
||||
items = [
|
||||
resp.to_dict()
|
||||
for resp in self.account.orders.values()
|
||||
if resp.ticker == ticker or ticker == "*"
|
||||
]
|
||||
else:
|
||||
pos = self.account.get_position(ticker)
|
||||
data = pos.to_position_info().to_dict()
|
||||
items = [data] if pos.is_open else []
|
||||
|
||||
raw = json.dumps(items).encode()
|
||||
return AdapterResult(
|
||||
source_type="broker",
|
||||
ticker=ticker,
|
||||
items=items,
|
||||
raw_payload=raw,
|
||||
content_hash="",
|
||||
fetched_at=now,
|
||||
metadata={"provider": "paper", "mode": "paper", "endpoint": endpoint},
|
||||
)
|
||||
|
||||
async def submit_order(self, order: OrderRequest) -> OrderResponse:
|
||||
"""Simulate order submission and immediate fill.
|
||||
|
||||
Idempotency: if the same idempotency_key was already used,
|
||||
return the original response (Req 8.5).
|
||||
"""
|
||||
# Idempotency check
|
||||
existing_id = self.account._seen_idempotency_keys.get(order.idempotency_key)
|
||||
if existing_id and existing_id in self.account.orders:
|
||||
logger.info("Duplicate order key %s — returning cached response", order.idempotency_key)
|
||||
return self.account.orders[existing_id]
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
order_id = str(uuid.uuid4())
|
||||
|
||||
# Determine fill price based on order type
|
||||
fill_price = self._compute_fill_price(order)
|
||||
|
||||
# Check if we have enough cash for buys
|
||||
if order.side == OrderSide.BUY:
|
||||
required_cash = fill_price * order.quantity
|
||||
if required_cash > self.account.cash:
|
||||
resp = OrderResponse(
|
||||
broker_order_id=order_id,
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker=order.ticker,
|
||||
side=order.side,
|
||||
quantity=order.quantity,
|
||||
submitted_at=now,
|
||||
error=f"Insufficient cash: need {required_cash:.2f}, have {self.account.cash:.2f}",
|
||||
)
|
||||
self._record_event(order_id, OrderEventType.REJECTED, resp.to_dict(), now)
|
||||
self.account.orders[order_id] = resp
|
||||
self.account._seen_idempotency_keys[order.idempotency_key] = order_id
|
||||
return resp
|
||||
|
||||
# Check if we have enough shares for sells
|
||||
if order.side == OrderSide.SELL:
|
||||
pos = self.account.get_position(order.ticker)
|
||||
if pos.quantity < order.quantity:
|
||||
resp = OrderResponse(
|
||||
broker_order_id=order_id,
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker=order.ticker,
|
||||
side=order.side,
|
||||
quantity=order.quantity,
|
||||
submitted_at=now,
|
||||
error=f"Insufficient shares: need {order.quantity}, have {pos.quantity}",
|
||||
)
|
||||
self._record_event(order_id, OrderEventType.REJECTED, resp.to_dict(), now)
|
||||
self.account.orders[order_id] = resp
|
||||
self.account._seen_idempotency_keys[order.idempotency_key] = order_id
|
||||
return resp
|
||||
|
||||
# Simulate immediate fill
|
||||
position = self.account.get_position(order.ticker)
|
||||
realized_pnl = position.apply_fill(order.side, order.quantity, fill_price)
|
||||
|
||||
# Update cash
|
||||
if order.side == OrderSide.BUY:
|
||||
self.account.cash -= fill_price * order.quantity
|
||||
else:
|
||||
self.account.cash += fill_price * order.quantity
|
||||
|
||||
resp = OrderResponse(
|
||||
broker_order_id=order_id,
|
||||
status=OrderStatus.FILLED,
|
||||
ticker=order.ticker,
|
||||
side=order.side,
|
||||
quantity=order.quantity,
|
||||
filled_quantity=order.quantity,
|
||||
filled_avg_price=fill_price,
|
||||
submitted_at=now,
|
||||
raw_response={
|
||||
"realized_pnl": round(realized_pnl, 4),
|
||||
"cash_after": round(self.account.cash, 2),
|
||||
"position_qty_after": position.quantity,
|
||||
"simulated": True,
|
||||
},
|
||||
)
|
||||
|
||||
# Record events
|
||||
self._record_event(order_id, OrderEventType.SUBMITTED, {"ticker": order.ticker}, now)
|
||||
self._record_event(order_id, OrderEventType.ACCEPTED, {"ticker": order.ticker}, now)
|
||||
self._record_event(order_id, OrderEventType.FILL, {
|
||||
"fill_price": fill_price,
|
||||
"fill_qty": order.quantity,
|
||||
"realized_pnl": round(realized_pnl, 4),
|
||||
}, now)
|
||||
|
||||
self.account.orders[order_id] = resp
|
||||
self.account._seen_idempotency_keys[order.idempotency_key] = order_id
|
||||
|
||||
logger.info(
|
||||
"Paper fill: %s %s %.0f %s @ %.2f | cash=%.2f pnl=%.4f",
|
||||
order_id[:8], order.side.value, order.quantity,
|
||||
order.ticker, fill_price, self.account.cash, realized_pnl,
|
||||
)
|
||||
|
||||
return resp
|
||||
|
||||
async def cancel_order(self, broker_order_id: str) -> OrderResponse:
|
||||
"""Cancel a paper order. Only pending orders can be cancelled."""
|
||||
existing = self.account.orders.get(broker_order_id)
|
||||
if existing is None:
|
||||
return OrderResponse(
|
||||
broker_order_id=broker_order_id,
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker="",
|
||||
side=OrderSide.BUY,
|
||||
quantity=0,
|
||||
error=f"Order {broker_order_id} not found",
|
||||
)
|
||||
|
||||
# Paper orders fill immediately, so they can't be cancelled
|
||||
if existing.status == OrderStatus.FILLED:
|
||||
return OrderResponse(
|
||||
broker_order_id=broker_order_id,
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker=existing.ticker,
|
||||
side=existing.side,
|
||||
quantity=existing.quantity,
|
||||
error="Cannot cancel a filled order",
|
||||
)
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
cancelled = OrderResponse(
|
||||
broker_order_id=broker_order_id,
|
||||
status=OrderStatus.CANCELLED,
|
||||
ticker=existing.ticker,
|
||||
side=existing.side,
|
||||
quantity=existing.quantity,
|
||||
submitted_at=existing.submitted_at,
|
||||
)
|
||||
self.account.orders[broker_order_id] = cancelled
|
||||
self._record_event(broker_order_id, OrderEventType.CANCELLED, {}, now)
|
||||
return cancelled
|
||||
|
||||
async def get_order_status(self, broker_order_id: str) -> OrderResponse:
|
||||
"""Get the status of a paper order."""
|
||||
existing = self.account.orders.get(broker_order_id)
|
||||
if existing is None:
|
||||
return OrderResponse(
|
||||
broker_order_id=broker_order_id,
|
||||
status=OrderStatus.REJECTED,
|
||||
ticker="",
|
||||
side=OrderSide.BUY,
|
||||
quantity=0,
|
||||
error=f"Order {broker_order_id} not found",
|
||||
)
|
||||
return existing
|
||||
|
||||
async def get_positions(self) -> list[PositionInfo]:
|
||||
"""Get all open paper positions."""
|
||||
return [
|
||||
p.to_position_info()
|
||||
for p in self.account.positions.values()
|
||||
if p.is_open
|
||||
]
|
||||
|
||||
async def get_account(self) -> AccountInfo:
|
||||
"""Get paper account summary."""
|
||||
return self.account.to_account_info()
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def _compute_fill_price(self, order: OrderRequest) -> float:
|
||||
"""Determine the simulated fill price for an order.
|
||||
|
||||
Market orders use the limit_price as a proxy (or 0 if not set).
|
||||
Limit orders fill at the limit price.
|
||||
Stop orders fill at the stop price.
|
||||
A small slippage is applied to market orders.
|
||||
"""
|
||||
if order.order_type == OrderType.LIMIT and order.limit_price is not None:
|
||||
return order.limit_price
|
||||
if order.order_type == OrderType.STOP and order.stop_price is not None:
|
||||
return order.stop_price
|
||||
if order.order_type == OrderType.STOP_LIMIT and order.limit_price is not None:
|
||||
return order.limit_price
|
||||
|
||||
# Market order: use limit_price as estimate, or a default
|
||||
base_price = order.limit_price if order.limit_price is not None else 100.0
|
||||
if order.side == OrderSide.BUY:
|
||||
return round(base_price * (1 + self.slippage_pct), 4)
|
||||
return round(base_price * (1 - self.slippage_pct), 4)
|
||||
|
||||
def _record_event(
|
||||
self,
|
||||
order_id: str,
|
||||
event_type: OrderEventType,
|
||||
data: dict[str, Any],
|
||||
timestamp: datetime,
|
||||
) -> None:
|
||||
"""Record an order event for audit trail."""
|
||||
self.account.order_events.append({
|
||||
"order_id": order_id,
|
||||
"event_type": event_type.value,
|
||||
"data": data,
|
||||
"timestamp": timestamp.isoformat(),
|
||||
})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State sync: persist and restore paper trading state to/from PostgreSQL
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# SQL for persisting paper orders to the orders table
|
||||
_INSERT_PAPER_ORDER = """
|
||||
INSERT INTO orders (
|
||||
id, recommendation_id, broker_account_id, ticker, side, order_type,
|
||||
quantity, limit_price, stop_price, status, idempotency_key,
|
||||
broker_order_id, decision_trace, submitted_at, filled_at,
|
||||
fill_price, fill_quantity
|
||||
) VALUES (
|
||||
$1::uuid, $2, $3, $4, $5, $6,
|
||||
$7, $8, $9, $10, $11,
|
||||
$12, $13::jsonb, $14, $15,
|
||||
$16, $17
|
||||
)
|
||||
ON CONFLICT (idempotency_key) DO NOTHING
|
||||
"""
|
||||
|
||||
_INSERT_PAPER_ORDER_EVENT = """
|
||||
INSERT INTO order_events (order_id, event_type, data, broker_timestamp)
|
||||
VALUES ($1::uuid, $2, $3::jsonb, $4)
|
||||
"""
|
||||
|
||||
_UPSERT_PAPER_POSITION = """
|
||||
INSERT INTO positions (broker_account_id, ticker, quantity, avg_entry_price, realized_pnl, updated_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
ON CONFLICT (broker_account_id, ticker)
|
||||
DO UPDATE SET
|
||||
quantity = EXCLUDED.quantity,
|
||||
avg_entry_price = EXCLUDED.avg_entry_price,
|
||||
realized_pnl = EXCLUDED.realized_pnl,
|
||||
updated_at = EXCLUDED.updated_at
|
||||
"""
|
||||
|
||||
_UPSERT_PAPER_ACCOUNT = """
|
||||
INSERT INTO broker_accounts (id, provider, account_id, mode, config, active)
|
||||
VALUES ($1::uuid, 'paper', $2, 'paper', $3::jsonb, TRUE)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
config = EXCLUDED.config,
|
||||
active = TRUE
|
||||
"""
|
||||
|
||||
_LOAD_PAPER_POSITIONS = """
|
||||
SELECT ticker, quantity, avg_entry_price, COALESCE(realized_pnl, 0) AS realized_pnl
|
||||
FROM positions
|
||||
WHERE broker_account_id = $1 AND quantity > 0
|
||||
"""
|
||||
|
||||
_LOAD_PAPER_ACCOUNT_CONFIG = """
|
||||
SELECT config FROM broker_accounts
|
||||
WHERE account_id = $1 AND mode = 'paper' AND active = TRUE
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
_LOAD_PAPER_ORDERS = """
|
||||
SELECT
|
||||
id, ticker, side, order_type, quantity, status,
|
||||
idempotency_key, broker_order_id, fill_price, fill_quantity,
|
||||
submitted_at
|
||||
FROM orders
|
||||
WHERE broker_account_id = (
|
||||
SELECT id FROM broker_accounts WHERE account_id = $1 AND mode = 'paper' LIMIT 1
|
||||
)
|
||||
ORDER BY submitted_at DESC
|
||||
LIMIT 500
|
||||
"""
|
||||
|
||||
|
||||
async def sync_state_to_db(
|
||||
adapter: PaperTradingAdapter,
|
||||
pool: asyncpg.Pool,
|
||||
broker_account_uuid: str | None = None,
|
||||
) -> None:
|
||||
"""Persist the current paper trading state to PostgreSQL.
|
||||
|
||||
Writes:
|
||||
- broker_accounts row for the paper account
|
||||
- positions rows for all open positions
|
||||
- orders rows for all orders (idempotent via ON CONFLICT)
|
||||
- order_events for audit trail
|
||||
|
||||
This enables state recovery after restarts and provides the
|
||||
full execution audit trail (Requirement 8.3).
|
||||
"""
|
||||
acct = adapter.account
|
||||
now = datetime.now(timezone.utc)
|
||||
acct_uuid = broker_account_uuid or str(uuid.uuid5(uuid.NAMESPACE_DNS, acct.account_id))
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
# 1. Upsert broker account
|
||||
config_json = json.dumps({
|
||||
"initial_cash": acct.initial_cash,
|
||||
"current_cash": round(acct.cash, 2),
|
||||
"portfolio_value": round(acct.portfolio_value, 2),
|
||||
"slippage_pct": adapter.slippage_pct,
|
||||
})
|
||||
await conn.execute(_UPSERT_PAPER_ACCOUNT, acct_uuid, acct.account_id, config_json)
|
||||
|
||||
# 2. Upsert positions
|
||||
for ticker, pos in acct.positions.items():
|
||||
await conn.execute(
|
||||
_UPSERT_PAPER_POSITION,
|
||||
acct_uuid, ticker,
|
||||
pos.quantity, pos.avg_entry_price, pos.realized_pnl,
|
||||
now,
|
||||
)
|
||||
|
||||
# 3. Insert orders (idempotent)
|
||||
for order_id, resp in acct.orders.items():
|
||||
filled_at = now if resp.status == OrderStatus.FILLED else None
|
||||
await conn.execute(
|
||||
_INSERT_PAPER_ORDER,
|
||||
order_id,
|
||||
None, # recommendation_id
|
||||
acct_uuid,
|
||||
resp.ticker,
|
||||
resp.side.value,
|
||||
"market", # paper orders are always market-simulated
|
||||
resp.quantity,
|
||||
resp.filled_avg_price, # limit_price
|
||||
None, # stop_price
|
||||
resp.status.value,
|
||||
order_id, # use order_id as idempotency_key fallback
|
||||
order_id,
|
||||
json.dumps(resp.raw_response),
|
||||
resp.submitted_at,
|
||||
filled_at,
|
||||
resp.filled_avg_price,
|
||||
resp.filled_quantity,
|
||||
)
|
||||
|
||||
# 4. Insert order events
|
||||
for event in acct.order_events:
|
||||
await conn.execute(
|
||||
_INSERT_PAPER_ORDER_EVENT,
|
||||
event["order_id"],
|
||||
event["event_type"],
|
||||
json.dumps(event["data"]),
|
||||
datetime.fromisoformat(event["timestamp"]),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Synced paper state to DB: account=%s positions=%d orders=%d events=%d",
|
||||
acct.account_id, len(acct.positions), len(acct.orders), len(acct.order_events),
|
||||
)
|
||||
|
||||
# Clear events after sync to avoid re-inserting
|
||||
acct.order_events.clear()
|
||||
|
||||
|
||||
async def load_state_from_db(
|
||||
adapter: PaperTradingAdapter,
|
||||
pool: asyncpg.Pool,
|
||||
) -> bool:
|
||||
"""Restore paper trading state from PostgreSQL.
|
||||
|
||||
Loads positions and account config from the DB so the adapter
|
||||
can resume after a restart. Returns True if state was found.
|
||||
"""
|
||||
acct = adapter.account
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
# Load account config
|
||||
row = await conn.fetchrow(_LOAD_PAPER_ACCOUNT_CONFIG, acct.account_id)
|
||||
if row is None:
|
||||
logger.info("No saved paper account state for %s", acct.account_id)
|
||||
return False
|
||||
|
||||
config = json.loads(row["config"]) if isinstance(row["config"], str) else row["config"]
|
||||
acct.cash = float(config.get("current_cash", acct.initial_cash))
|
||||
|
||||
# Load positions
|
||||
pos_rows = await conn.fetch(_LOAD_PAPER_POSITIONS, acct.account_id)
|
||||
for pr in pos_rows:
|
||||
ticker = pr["ticker"]
|
||||
acct.positions[ticker] = PaperPosition(
|
||||
ticker=ticker,
|
||||
quantity=float(pr["quantity"]),
|
||||
avg_entry_price=float(pr["avg_entry_price"] or 0),
|
||||
realized_pnl=float(pr["realized_pnl"]),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Loaded paper state from DB: account=%s cash=%.2f positions=%d",
|
||||
acct.account_id, acct.cash, len(acct.positions),
|
||||
)
|
||||
return True
|
||||
@@ -0,0 +1,241 @@
|
||||
"""Resilient adapter wrapper with rate-limit coordination, retries, and backoff.
|
||||
|
||||
Wraps any BaseAdapter with:
|
||||
- Per-source-type rate limiting via Redis (distributed across workers)
|
||||
- Exponential backoff with jitter on retryable failures
|
||||
- Configurable retry counts and retryable HTTP status codes
|
||||
- Graceful degradation when Redis is unavailable
|
||||
|
||||
Requirements: 2.5, 3.4
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import redis.asyncio as aioredis
|
||||
|
||||
from services.shared.redis_keys import rate_limit_key
|
||||
|
||||
from .base import AdapterResult, BaseAdapter
|
||||
|
||||
logger = logging.getLogger("resilient_adapter")
|
||||
|
||||
# HTTP status codes that are safe to retry
|
||||
RETRYABLE_STATUS_CODES: frozenset[int] = frozenset({429, 500, 502, 503, 504})
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetryConfig:
|
||||
"""Configuration for retry and rate-limit behavior."""
|
||||
|
||||
max_retries: int = 3
|
||||
base_delay: float = 1.0
|
||||
max_delay: float = 60.0
|
||||
jitter_factor: float = 0.5
|
||||
retryable_status_codes: frozenset[int] = RETRYABLE_STATUS_CODES
|
||||
# Rate limit: max requests per window per source type
|
||||
rate_limit_max: int = 30
|
||||
rate_limit_window_seconds: int = 60
|
||||
|
||||
|
||||
# Sensible defaults per source type
|
||||
DEFAULT_RETRY_CONFIGS: dict[str, RetryConfig] = {
|
||||
"market_api": RetryConfig(max_retries=3, rate_limit_max=30),
|
||||
"news_api": RetryConfig(max_retries=3, rate_limit_max=20),
|
||||
"filings_api": RetryConfig(max_retries=2, rate_limit_max=10, base_delay=2.0),
|
||||
"web_scrape": RetryConfig(max_retries=2, rate_limit_max=10, base_delay=2.0),
|
||||
"broker": RetryConfig(max_retries=2, rate_limit_max=60, base_delay=0.5),
|
||||
}
|
||||
|
||||
|
||||
def compute_delay(attempt: int, config: RetryConfig) -> float:
|
||||
"""Compute backoff delay with jitter for a given attempt number."""
|
||||
exp_delay = config.base_delay * (2 ** attempt)
|
||||
capped = min(exp_delay, config.max_delay)
|
||||
jitter = capped * config.jitter_factor * random.random()
|
||||
return capped + jitter
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetryStats:
|
||||
"""Tracks retry statistics for observability."""
|
||||
|
||||
attempts: int = 0
|
||||
total_delay: float = 0.0
|
||||
rate_limited_waits: int = 0
|
||||
last_error: str | None = None
|
||||
retryable: bool = False
|
||||
|
||||
|
||||
class ResilientAdapter:
|
||||
"""Wraps a BaseAdapter with rate-limit coordination, retries, and backoff.
|
||||
|
||||
Usage:
|
||||
adapter = PolygonMarketAdapter(api_key="...")
|
||||
resilient = ResilientAdapter(adapter, redis=rds)
|
||||
result = await resilient.fetch(ticker, config)
|
||||
|
||||
If redis is None, rate limiting is skipped (local dev / testing).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
adapter: BaseAdapter,
|
||||
redis: aioredis.Redis | None = None,
|
||||
retry_config: RetryConfig | None = None,
|
||||
) -> None:
|
||||
self._adapter = adapter
|
||||
self._redis = redis
|
||||
source_type = adapter.source_type()
|
||||
self._config = retry_config or DEFAULT_RETRY_CONFIGS.get(
|
||||
source_type, RetryConfig()
|
||||
)
|
||||
|
||||
@property
|
||||
def adapter(self) -> BaseAdapter:
|
||||
"""Access the underlying adapter."""
|
||||
return self._adapter
|
||||
|
||||
@property
|
||||
def config(self) -> RetryConfig:
|
||||
return self._config
|
||||
|
||||
def source_type(self) -> str:
|
||||
return self._adapter.source_type()
|
||||
|
||||
async def _check_rate_limit(self) -> float:
|
||||
"""Check distributed rate limit via Redis.
|
||||
|
||||
Returns 0.0 if allowed, or the number of seconds to wait.
|
||||
"""
|
||||
if self._redis is None:
|
||||
return 0.0
|
||||
|
||||
source_type = self._adapter.source_type()
|
||||
window_sec = self._config.rate_limit_window_seconds
|
||||
# Use a time-bucketed key so counters auto-expire
|
||||
bucket = int(time.time()) // window_sec
|
||||
key = rate_limit_key(source_type, str(bucket))
|
||||
|
||||
try:
|
||||
count = await self._redis.incr(key)
|
||||
if count == 1:
|
||||
await self._redis.expire(key, window_sec * 2)
|
||||
if count > self._config.rate_limit_max:
|
||||
# Over limit — compute how long until the window rolls over
|
||||
elapsed_in_window = time.time() % window_sec
|
||||
wait = window_sec - elapsed_in_window
|
||||
return max(wait, 0.5)
|
||||
except Exception:
|
||||
# Redis unavailable — degrade gracefully, allow the request
|
||||
logger.warning("Redis rate-limit check failed, allowing request")
|
||||
return 0.0
|
||||
|
||||
def _is_retryable(self, result: AdapterResult) -> bool:
|
||||
"""Determine if a failed result is worth retrying."""
|
||||
if result.ok:
|
||||
return False
|
||||
# Retry on known retryable HTTP status codes
|
||||
if result.http_status and result.http_status in self._config.retryable_status_codes:
|
||||
return True
|
||||
# Retry on timeouts
|
||||
if result.error and "timeout" in result.error.lower():
|
||||
return True
|
||||
# Retry on connection errors
|
||||
if result.error and any(
|
||||
kw in result.error.lower()
|
||||
for kw in ("connection", "connect", "reset", "refused")
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _extract_retry_after(self, result: AdapterResult) -> float | None:
|
||||
"""Extract Retry-After hint from result metadata if present."""
|
||||
retry_after = result.metadata.get("retry_after")
|
||||
if retry_after is not None:
|
||||
try:
|
||||
return float(retry_after)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return None
|
||||
|
||||
async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch with rate-limit coordination, retries, and exponential backoff.
|
||||
|
||||
Returns the AdapterResult from the underlying adapter. On retryable
|
||||
failures, retries up to max_retries times with exponential backoff
|
||||
and jitter. Rate-limit waits are applied before each attempt.
|
||||
|
||||
The returned result's metadata includes retry stats under the
|
||||
"retry_stats" key.
|
||||
"""
|
||||
stats = RetryStats()
|
||||
last_result: AdapterResult | None = None
|
||||
|
||||
for attempt in range(self._config.max_retries + 1):
|
||||
stats.attempts = attempt + 1
|
||||
|
||||
# Rate limit check
|
||||
wait = await self._check_rate_limit()
|
||||
if wait > 0:
|
||||
stats.rate_limited_waits += 1
|
||||
logger.info(
|
||||
"Rate limited for %s/%s, waiting %.1fs",
|
||||
self.source_type(), ticker, wait,
|
||||
)
|
||||
stats.total_delay += wait
|
||||
await asyncio.sleep(wait)
|
||||
|
||||
# Execute the fetch
|
||||
result = await self._adapter.fetch(ticker, config)
|
||||
last_result = result
|
||||
|
||||
# Success — attach stats and return
|
||||
if result.ok:
|
||||
result.metadata["retry_stats"] = {
|
||||
"attempts": stats.attempts,
|
||||
"total_delay": round(stats.total_delay, 2),
|
||||
"rate_limited_waits": stats.rate_limited_waits,
|
||||
}
|
||||
return result
|
||||
|
||||
# Check if retryable
|
||||
if not self._is_retryable(result):
|
||||
stats.last_error = result.error
|
||||
stats.retryable = False
|
||||
break
|
||||
|
||||
stats.retryable = True
|
||||
stats.last_error = result.error
|
||||
|
||||
# Don't sleep after the last attempt
|
||||
if attempt < self._config.max_retries:
|
||||
# Respect Retry-After header for 429s
|
||||
retry_after = self._extract_retry_after(result)
|
||||
if result.http_status == 429 and retry_after is not None:
|
||||
delay = min(retry_after, self._config.max_delay)
|
||||
else:
|
||||
delay = compute_delay(attempt, self._config)
|
||||
|
||||
logger.info(
|
||||
"Retrying %s/%s (attempt %d/%d) after %.1fs: %s",
|
||||
self.source_type(), ticker, attempt + 1,
|
||||
self._config.max_retries + 1, delay, result.error,
|
||||
)
|
||||
stats.total_delay += delay
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
# All retries exhausted — return last result with stats
|
||||
assert last_result is not None
|
||||
last_result.metadata["retry_stats"] = {
|
||||
"attempts": stats.attempts,
|
||||
"total_delay": round(stats.total_delay, 2),
|
||||
"rate_limited_waits": stats.rate_limited_waits,
|
||||
"exhausted": True,
|
||||
"last_error": stats.last_error,
|
||||
}
|
||||
return last_result
|
||||
@@ -0,0 +1,321 @@
|
||||
"""Web scrape adapter for curated URLs and article pages.
|
||||
|
||||
Fetches full article HTML from curated URLs (investor relations pages,
|
||||
press releases, earnings transcripts, etc.) using BeautifulSoup + requests
|
||||
with retry adapters, content hashing, boilerplate awareness, and quality scoring.
|
||||
|
||||
Inspired by Noctipede crawler patterns: BeautifulSoup + requests with retry
|
||||
adapters, content hashing, boilerplate stripping, quality scoring.
|
||||
|
||||
Requirements: 1.2, 2.5, 3.1, 3.2, 3.3, 3.4
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from urllib.parse import urlparse
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from services.shared.content import content_hash, normalize_url
|
||||
|
||||
from .base import AdapterResult, BaseAdapter
|
||||
|
||||
logger = logging.getLogger("web_scrape_adapter")
|
||||
|
||||
# Default request settings
|
||||
DEFAULT_TIMEOUT = 30
|
||||
DEFAULT_USER_AGENT = "StonksOracle/1.0 (+https://stonks-oracle.celestium.life)"
|
||||
MAX_CONTENT_LENGTH = 10 * 1024 * 1024 # 10MB cap
|
||||
|
||||
|
||||
def extract_metadata_from_html(html: str, url: str) -> dict[str, str | None]:
|
||||
"""Extract title, author, publisher, published date, and links from HTML."""
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
meta: dict[str, str | None] = {}
|
||||
|
||||
# Title: prefer og:title, then <title>
|
||||
og_title = soup.find("meta", property="og:title")
|
||||
if og_title and og_title.get("content"):
|
||||
content = og_title["content"]
|
||||
meta["title"] = content.strip() if isinstance(content, str) else ""
|
||||
elif soup.title and soup.title.string:
|
||||
meta["title"] = soup.title.string.strip()
|
||||
else:
|
||||
meta["title"] = ""
|
||||
|
||||
# Author
|
||||
author_tag = soup.find("meta", attrs={"name": "author"})
|
||||
if author_tag and author_tag.get("content"):
|
||||
content = author_tag["content"]
|
||||
meta["author"] = content.strip() if isinstance(content, str) else ""
|
||||
else:
|
||||
meta["author"] = ""
|
||||
|
||||
# Publisher: og:site_name
|
||||
site_name = soup.find("meta", property="og:site_name")
|
||||
if site_name and site_name.get("content"):
|
||||
content = site_name["content"]
|
||||
meta["publisher"] = content.strip() if isinstance(content, str) else ""
|
||||
else:
|
||||
meta["publisher"] = urlparse(url).hostname or ""
|
||||
|
||||
# Published date: article:published_time or datePublished
|
||||
pub_time = soup.find("meta", property="article:published_time")
|
||||
if pub_time and pub_time.get("content"):
|
||||
content = pub_time["content"]
|
||||
meta["published_at"] = content.strip() if isinstance(content, str) else None
|
||||
else:
|
||||
# Try JSON-LD datePublished
|
||||
for script in soup.find_all("script", type="application/ld+json"):
|
||||
if script.string and "datePublished" in script.string:
|
||||
try:
|
||||
ld = json.loads(script.string)
|
||||
if isinstance(ld, dict) and "datePublished" in ld:
|
||||
meta["published_at"] = str(ld["datePublished"])
|
||||
break
|
||||
if isinstance(ld, list):
|
||||
for item in ld:
|
||||
if isinstance(item, dict) and "datePublished" in item:
|
||||
meta["published_at"] = str(item["datePublished"])
|
||||
break
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
if "published_at" not in meta:
|
||||
meta["published_at"] = None
|
||||
|
||||
# Canonical URL
|
||||
canonical = soup.find("link", rel="canonical")
|
||||
if canonical and canonical.get("href"):
|
||||
href = canonical["href"]
|
||||
meta["canonical_url"] = str(href) if href else normalize_url(url)
|
||||
else:
|
||||
og_url = soup.find("meta", property="og:url")
|
||||
if og_url and og_url.get("content"):
|
||||
content = og_url["content"]
|
||||
meta["canonical_url"] = str(content) if content else normalize_url(url)
|
||||
else:
|
||||
meta["canonical_url"] = normalize_url(url)
|
||||
|
||||
# Language
|
||||
html_tag = soup.find("html")
|
||||
if html_tag and html_tag.get("lang"):
|
||||
lang = html_tag["lang"]
|
||||
meta["language"] = str(lang)[:5] if lang else "en"
|
||||
else:
|
||||
meta["language"] = "en"
|
||||
|
||||
# Description for summary
|
||||
desc = soup.find("meta", property="og:description") or soup.find(
|
||||
"meta", attrs={"name": "description"}
|
||||
)
|
||||
if desc and desc.get("content"):
|
||||
content = desc["content"]
|
||||
meta["description"] = content.strip() if isinstance(content, str) else ""
|
||||
else:
|
||||
meta["description"] = ""
|
||||
|
||||
return meta
|
||||
|
||||
|
||||
def extract_body_text(html: str) -> str:
|
||||
"""Extract main body text from HTML, stripping nav/footer/ads."""
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
# Remove non-content elements
|
||||
for tag in soup.find_all(
|
||||
["script", "style", "nav", "footer", "header", "aside", "iframe", "noscript"]
|
||||
):
|
||||
tag.decompose()
|
||||
|
||||
# Try to find article body
|
||||
article = soup.find("article")
|
||||
if not article:
|
||||
for div in soup.find_all("div"):
|
||||
cls = div.get("class", [])
|
||||
cls_str = " ".join(cls) if isinstance(cls, list) else str(cls) if cls else ""
|
||||
if any(kw in cls_str for kw in ["article-body", "post-content", "entry-content", "story-body"]):
|
||||
article = div
|
||||
break
|
||||
|
||||
if article:
|
||||
text = article.get_text(separator="\n", strip=True)
|
||||
else:
|
||||
# Fallback: use body
|
||||
body = soup.find("body")
|
||||
text = body.get_text(separator="\n", strip=True) if body else soup.get_text(separator="\n", strip=True)
|
||||
|
||||
# Collapse whitespace
|
||||
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class WebScrapeAdapter(BaseAdapter):
|
||||
"""Adapter for fetching curated web pages and article URLs.
|
||||
|
||||
Config options (from source config):
|
||||
urls: List of URLs to scrape for this company
|
||||
url: Single URL to scrape (alternative to urls)
|
||||
timeout: Request timeout in seconds (default 30)
|
||||
user_agent: Custom user agent string
|
||||
follow_links: Whether to follow article links from index pages (default False)
|
||||
max_pages: Max pages to fetch per cycle (default 5)
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "web_scrape"
|
||||
|
||||
def bucket_name(self) -> str:
|
||||
"""Web scrape artifacts go to the news raw bucket."""
|
||||
return "stonks-raw-news"
|
||||
|
||||
async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch HTML from curated URLs for a given ticker.
|
||||
|
||||
Supports both single URL and multi-URL configs. Each URL is fetched,
|
||||
HTML is preserved as raw payload, and metadata is extracted.
|
||||
"""
|
||||
urls = config.get("urls", [])
|
||||
if not urls and config.get("url"):
|
||||
urls = [config["url"]]
|
||||
|
||||
if not urls:
|
||||
return self._error_result(ticker, "No URLs configured for web_scrape source", 0)
|
||||
|
||||
timeout = config.get("timeout", DEFAULT_TIMEOUT)
|
||||
user_agent = config.get("user_agent", DEFAULT_USER_AGENT)
|
||||
max_pages = min(config.get("max_pages", 5), 20)
|
||||
|
||||
items: list[dict[str, Any]] = []
|
||||
all_raw: list[bytes] = []
|
||||
total_elapsed = 0.0
|
||||
errors: list[str] = []
|
||||
|
||||
async with httpx.AsyncClient(
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": user_agent},
|
||||
) as client:
|
||||
for url in urls[:max_pages]:
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
resp = await client.get(url)
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
total_elapsed += elapsed_ms
|
||||
resp.raise_for_status()
|
||||
|
||||
# Content length guard
|
||||
if len(resp.content) > MAX_CONTENT_LENGTH:
|
||||
errors.append(f"Content too large for {url}: {len(resp.content)} bytes")
|
||||
continue
|
||||
|
||||
html = resp.text
|
||||
raw_bytes = resp.content
|
||||
all_raw.append(raw_bytes)
|
||||
|
||||
item_content_hash = content_hash(raw_bytes)
|
||||
meta = extract_metadata_from_html(html, url)
|
||||
body_text = extract_body_text(html)
|
||||
|
||||
item: dict[str, Any] = {
|
||||
"url": url,
|
||||
"canonical_url": meta.get("canonical_url", normalize_url(url)),
|
||||
"title": meta.get("title", ""),
|
||||
"author": meta.get("author", ""),
|
||||
"publisher": meta.get("publisher", ""),
|
||||
"published_at": meta.get("published_at"),
|
||||
"language": meta.get("language", "en"),
|
||||
"description": meta.get("description", ""),
|
||||
"content_hash": item_content_hash,
|
||||
"body_text": body_text,
|
||||
"body_length": len(body_text),
|
||||
"html_length": len(html),
|
||||
"http_status": resp.status_code,
|
||||
"response_time_ms": round(elapsed_ms, 1),
|
||||
}
|
||||
items.append(item)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
total_elapsed += elapsed_ms
|
||||
status = e.response.status_code if e.response else None
|
||||
errors.append(f"HTTP {status} for {url}: {e}")
|
||||
logger.warning("Scrape HTTP error for %s/%s: %s", ticker, url, e)
|
||||
|
||||
except httpx.TimeoutException as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
total_elapsed += elapsed_ms
|
||||
errors.append(f"Timeout for {url}: {e}")
|
||||
logger.warning("Scrape timeout for %s/%s: %s", ticker, url, e)
|
||||
|
||||
except Exception as e:
|
||||
elapsed_ms = (time.monotonic() - t0) * 1000
|
||||
total_elapsed += elapsed_ms
|
||||
errors.append(f"Error for {url}: {e}")
|
||||
logger.warning("Scrape error for %s/%s: %s", ticker, url, e)
|
||||
|
||||
if not items:
|
||||
error_msg = "; ".join(errors) if errors else "No pages fetched"
|
||||
return self._error_result(ticker, error_msg, total_elapsed)
|
||||
|
||||
# Combine all raw payloads into a single artifact
|
||||
combined_raw = json.dumps({
|
||||
"ticker": ticker,
|
||||
"fetched_at": datetime.now(timezone.utc).isoformat(),
|
||||
"pages": [
|
||||
{
|
||||
"url": item["url"],
|
||||
"content_hash": item["content_hash"],
|
||||
"html_length": item["html_length"],
|
||||
"body_length": item["body_length"],
|
||||
}
|
||||
for item in items
|
||||
],
|
||||
"errors": errors,
|
||||
}).encode("utf-8")
|
||||
|
||||
combined_hash = content_hash(
|
||||
b"".join(item["content_hash"].encode() for item in items)
|
||||
)
|
||||
|
||||
return AdapterResult(
|
||||
source_type="web_scrape",
|
||||
ticker=ticker,
|
||||
items=items,
|
||||
raw_payload=combined_raw,
|
||||
content_hash=combined_hash,
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
http_status=200,
|
||||
response_time_ms=round(total_elapsed, 1),
|
||||
metadata={
|
||||
"provider": "web_scrape",
|
||||
"pages_fetched": len(items),
|
||||
"pages_failed": len(errors),
|
||||
"errors": errors,
|
||||
},
|
||||
)
|
||||
|
||||
def _error_result(
|
||||
self,
|
||||
ticker: str,
|
||||
error: str,
|
||||
elapsed_ms: float,
|
||||
) -> AdapterResult:
|
||||
"""Build an error AdapterResult for scrape fetches."""
|
||||
return AdapterResult(
|
||||
source_type="web_scrape",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=b"",
|
||||
content_hash="",
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
error=error,
|
||||
http_status=None,
|
||||
response_time_ms=round(elapsed_ms, 1),
|
||||
metadata={"provider": "web_scrape"},
|
||||
)
|
||||
Reference in New Issue
Block a user