phase 14-15: docker build validation and helm deployment

This commit is contained in:
Celes Renata
2026-04-11 11:59:45 -07:00
parent 7394d241c9
commit ce10afa034
179 changed files with 32559 additions and 576 deletions
+170 -27
View File
@@ -1,8 +1,17 @@
"""Filings / Regulatory API adapter - fetches SEC-style submissions."""
"""Filings / Regulatory API adapter interface and concrete SEC EDGAR provider.
The FilingsDataAdapter is the abstract interface for all filings data providers.
SECEdgarAdapter is the first concrete implementation, targeting the SEC EDGAR
full-text search system (EFTS) for company filings discovery.
Requirements: 2.3, 2.5, 3.1, 3.2, 3.3
"""
import hashlib
import logging
from datetime import datetime
from typing import Any, Dict
import time
from abc import ABC
from datetime import datetime, timezone
from typing import Any
import httpx
@@ -11,48 +20,182 @@ from .base import AdapterResult, BaseAdapter
logger = logging.getLogger("filings_adapter")
class FilingsAdapter(BaseAdapter):
"""Concrete adapter for SEC EDGAR or similar filings API."""
class FilingsDataAdapter(BaseAdapter, ABC):
"""Abstract interface for filings / regulatory data providers.
def __init__(self, base_url: str = "https://efts.sec.gov", user_agent: str = "StonksOracle/1.0"):
self.base_url = base_url
self.user_agent = user_agent
Subclasses implement fetch() for their specific filings API.
source_type() is concrete here since all filings adapters share the same type.
"""
def source_type(self) -> str:
return "filings_api"
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
_cik = config.get("cik", "")
endpoint = config.get("endpoint", f"/LATEST/search-index?q=%22{ticker}%22&dateRange=custom&startdt=2026-01-01&forms=8-K,10-Q,10-K")
url = f"{self.base_url}{endpoint}"
headers = {"User-Agent": self.user_agent}
class SECEdgarAdapter(FilingsDataAdapter):
"""Concrete adapter for the SEC EDGAR full-text search system (EFTS).
Supports:
- Full-text search (/LATEST/search-index) for 8-K, 10-Q, 10-K, and other forms
- Filtering by date range, form type, and entity
The SEC EDGAR EFTS API is public and does not require an API key,
but requires a descriptive User-Agent header per SEC fair-access policy.
Config options:
cik: Company CIK number (optional, narrows search)
forms: Comma-separated form types to search (default "8-K,10-Q,10-K")
start_date: Only filings on or after this date, YYYY-MM-DD (optional)
end_date: Only filings on or before this date, YYYY-MM-DD (optional)
query: Custom search query override (optional, replaces ticker-based query)
"""
SEARCH_ENDPOINT: str = "/LATEST/search-index"
def __init__(
self,
base_url: str = "https://efts.sec.gov",
user_agent: str = "StonksOracle/1.0 ([email])",
) -> None:
self.base_url: str = base_url.rstrip("/")
self.user_agent: str = user_agent
async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
"""Fetch filings from SEC EDGAR EFTS for a given ticker.
Args:
ticker: The company ticker symbol.
config: Source-specific configuration from the sources table.
Returns:
AdapterResult with raw payload, parsed filing items, and metadata.
"""
url, params, headers = self._build_request(ticker, config)
async with httpx.AsyncClient(timeout=30) as client:
t0 = time.monotonic()
try:
resp = await client.get(url, headers=headers)
resp = await client.get(url, params=params, headers=headers)
elapsed_ms = (time.monotonic() - t0) * 1000
resp.raise_for_status()
raw = resp.content
data = resp.json()
content_hash = hashlib.sha256(raw).hexdigest()
items = self._extract_items(data)
hits = data.get("hits", {}).get("hits", [])
return AdapterResult(
source_type="filings_api",
ticker=ticker,
items=hits,
items=items,
raw_payload=raw,
content_hash=content_hash,
fetched_at=datetime.utcnow(),
fetched_at=datetime.now(timezone.utc),
http_status=resp.status_code,
response_time_ms=round(elapsed_ms, 1),
metadata={
"provider": "sec_edgar",
"results_count": len(items),
"total_hits": self._total_hits(data),
"query": params.get("q", ""),
"forms": params.get("forms", ""),
},
)
except httpx.HTTPStatusError as e:
elapsed_ms = (time.monotonic() - t0) * 1000
logger.error("SEC EDGAR HTTP error for %s: %s", ticker, e)
return self._error_result(
ticker, str(e), elapsed_ms,
http_status=e.response.status_code if e.response else None,
raw=e.response.content if e.response else b"",
)
except httpx.TimeoutException as e:
elapsed_ms = (time.monotonic() - t0) * 1000
logger.error("SEC EDGAR timeout for %s: %s", ticker, e)
return self._error_result(ticker, f"timeout: {e}", elapsed_ms)
except Exception as e:
logger.error(f"Filings fetch failed for {ticker}: {e}")
return AdapterResult(
source_type="filings_api",
ticker=ticker,
items=[],
raw_payload=b"",
content_hash="",
fetched_at=datetime.utcnow(),
error=str(e),
)
elapsed_ms = (time.monotonic() - t0) * 1000
logger.error("SEC EDGAR fetch failed for %s: %s", ticker, e)
return self._error_result(ticker, str(e), elapsed_ms)
def _build_request(
self, ticker: str, config: dict[str, Any]
) -> tuple[str, dict[str, str], dict[str, str]]:
"""Build the URL, query params, and headers for an EDGAR EFTS request."""
params: dict[str, str] = {}
headers: dict[str, str] = {"User-Agent": self.user_agent}
# Query: use custom override or default to ticker-based search
query = config.get("query")
if query:
params["q"] = str(query)
else:
params["q"] = f'"{ticker}"'
# Form types filter
forms = config.get("forms", "8-K,10-Q,10-K")
params["forms"] = str(forms)
# Date range
if config.get("start_date"):
params["dateRange"] = "custom"
params["startdt"] = str(config["start_date"])
if config.get("end_date"):
params["dateRange"] = "custom"
params["enddt"] = str(config["end_date"])
# CIK filter (entity-level narrowing)
cik = config.get("cik")
if cik:
params["q"] = f'{params["q"]} AND cik:{cik}'
url = f"{self.base_url}{self.SEARCH_ENDPOINT}"
return url, params, headers
def _extract_items(self, data: dict[str, Any]) -> list[dict[str, Any]]:
"""Extract the filing hits from an EDGAR EFTS response.
EFTS returns results under hits.hits as a list of objects,
each containing _source with fields like file_date, form_type,
entity_name, file_num, and period_of_report.
"""
hits_wrapper = data.get("hits", {})
if not isinstance(hits_wrapper, dict):
return []
hits = hits_wrapper.get("hits", [])
if isinstance(hits, list):
return hits
return []
def _total_hits(self, data: dict[str, Any]) -> int:
"""Extract total hit count from EFTS response."""
hits_wrapper = data.get("hits", {})
if not isinstance(hits_wrapper, dict):
return 0
total = hits_wrapper.get("total", {})
if isinstance(total, dict):
return int(total.get("value", 0))
if isinstance(total, int):
return total
return 0
def _error_result(
self,
ticker: str,
error: str,
elapsed_ms: float,
http_status: int | None = None,
raw: bytes = b"",
) -> AdapterResult:
"""Build an error AdapterResult for filings fetches."""
return AdapterResult(
source_type="filings_api",
ticker=ticker,
items=[],
raw_payload=raw,
content_hash="",
fetched_at=datetime.now(timezone.utc),
error=error,
http_status=http_status,
response_time_ms=round(elapsed_ms, 1),
metadata={"provider": "sec_edgar"},
)