"""Filings / Regulatory API adapter interface and concrete SEC EDGAR provider. The FilingsDataAdapter is the abstract interface for all filings data providers. SECEdgarAdapter is the first concrete implementation, targeting the SEC EDGAR full-text search system (EFTS) for company filings discovery. Requirements: 2.3, 2.5, 3.1, 3.2, 3.3 """ import hashlib import logging import time from abc import ABC from datetime import datetime, timezone from typing import Any import httpx from .base import AdapterResult, BaseAdapter logger = logging.getLogger("filings_adapter") class FilingsDataAdapter(BaseAdapter, ABC): """Abstract interface for filings / regulatory data providers. Subclasses implement fetch() for their specific filings API. source_type() is concrete here since all filings adapters share the same type. """ def source_type(self) -> str: return "filings_api" class SECEdgarAdapter(FilingsDataAdapter): """Concrete adapter for the SEC EDGAR full-text search system (EFTS). Supports: - Full-text search (/LATEST/search-index) for 8-K, 10-Q, 10-K, and other forms - Filtering by date range, form type, and entity The SEC EDGAR EFTS API is public and does not require an API key, but requires a descriptive User-Agent header per SEC fair-access policy. Config options: cik: Company CIK number (optional, narrows search) forms: Comma-separated form types to search (default "8-K,10-Q,10-K") start_date: Only filings on or after this date, YYYY-MM-DD (optional) end_date: Only filings on or before this date, YYYY-MM-DD (optional) query: Custom search query override (optional, replaces ticker-based query) """ SEARCH_ENDPOINT: str = "/LATEST/search-index" def __init__( self, base_url: str = "https://efts.sec.gov", user_agent: str = "StonksOracle/1.0 ([email])", ) -> None: self.base_url: str = base_url.rstrip("/") self.user_agent: str = user_agent async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult: """Fetch filings from SEC EDGAR EFTS for a given ticker. Args: ticker: The company ticker symbol. config: Source-specific configuration from the sources table. Returns: AdapterResult with raw payload, parsed filing items, and metadata. """ url, params, headers = self._build_request(ticker, config) async with httpx.AsyncClient(timeout=30) as client: t0 = time.monotonic() try: resp = await client.get(url, params=params, headers=headers) elapsed_ms = (time.monotonic() - t0) * 1000 resp.raise_for_status() raw = resp.content data = resp.json() content_hash = hashlib.sha256(raw).hexdigest() items = self._extract_items(data) return AdapterResult( source_type="filings_api", ticker=ticker, items=items, raw_payload=raw, content_hash=content_hash, fetched_at=datetime.now(timezone.utc), http_status=resp.status_code, response_time_ms=round(elapsed_ms, 1), metadata={ "provider": "sec_edgar", "results_count": len(items), "total_hits": self._total_hits(data), "query": params.get("q", ""), "forms": params.get("forms", ""), }, ) except httpx.HTTPStatusError as e: elapsed_ms = (time.monotonic() - t0) * 1000 logger.error("SEC EDGAR HTTP error for %s: %s", ticker, e) return self._error_result( ticker, str(e), elapsed_ms, http_status=e.response.status_code if e.response else None, raw=e.response.content if e.response else b"", ) except httpx.TimeoutException as e: elapsed_ms = (time.monotonic() - t0) * 1000 logger.error("SEC EDGAR timeout for %s: %s", ticker, e) return self._error_result(ticker, f"timeout: {e}", elapsed_ms) except Exception as e: elapsed_ms = (time.monotonic() - t0) * 1000 logger.error("SEC EDGAR fetch failed for %s: %s", ticker, e) return self._error_result(ticker, str(e), elapsed_ms) def _build_request( self, ticker: str, config: dict[str, Any] ) -> tuple[str, dict[str, str], dict[str, str]]: """Build the URL, query params, and headers for an EDGAR EFTS request.""" params: dict[str, str] = {} headers: dict[str, str] = {"User-Agent": self.user_agent} # Query: use custom override or default to ticker-based search query = config.get("query") if query: params["q"] = str(query) else: params["q"] = f'"{ticker}"' # Form types filter forms = config.get("forms", "8-K,10-Q,10-K") params["forms"] = str(forms) # Date range if config.get("start_date"): params["dateRange"] = "custom" params["startdt"] = str(config["start_date"]) if config.get("end_date"): params["dateRange"] = "custom" params["enddt"] = str(config["end_date"]) # CIK filter (entity-level narrowing) cik = config.get("cik") if cik: params["q"] = f'{params["q"]} AND cik:{cik}' url = f"{self.base_url}{self.SEARCH_ENDPOINT}" return url, params, headers def _extract_items(self, data: dict[str, Any]) -> list[dict[str, Any]]: """Extract the filing hits from an EDGAR EFTS response. EFTS returns results under hits.hits as a list of objects, each containing _source with fields like file_date, form_type, entity_name, file_num, and period_of_report. """ hits_wrapper = data.get("hits", {}) if not isinstance(hits_wrapper, dict): return [] hits = hits_wrapper.get("hits", []) if isinstance(hits, list): return hits return [] def _total_hits(self, data: dict[str, Any]) -> int: """Extract total hit count from EFTS response.""" hits_wrapper = data.get("hits", {}) if not isinstance(hits_wrapper, dict): return 0 total = hits_wrapper.get("total", {}) if isinstance(total, dict): return int(total.get("value", 0)) if isinstance(total, int): return total return 0 def _error_result( self, ticker: str, error: str, elapsed_ms: float, http_status: int | None = None, raw: bytes = b"", ) -> AdapterResult: """Build an error AdapterResult for filings fetches.""" return AdapterResult( source_type="filings_api", ticker=ticker, items=[], raw_payload=raw, content_hash="", fetched_at=datetime.now(timezone.utc), error=error, http_status=http_status, response_time_ms=round(elapsed_ms, 1), metadata={"provider": "sec_edgar"}, )