diff --git a/.kiro/specs/stonks-oracle/tasks.md b/.kiro/specs/stonks-oracle/tasks.md index 2af1821..94fdd1e 100644 --- a/.kiro/specs/stonks-oracle/tasks.md +++ b/.kiro/specs/stonks-oracle/tasks.md @@ -17,11 +17,11 @@ - [x] Stand up Superset with environment-backed datasource configuration ## Phase 2 - Symbol Registry and Source Management -- [ ] Build symbol registry API endpoints for companies, aliases, watchlists, and sources -- [ ] Add source credibility, retention policy, and access policy fields -- [ ] Add source classes for market data API, news API, filings API, web scrape, and broker adapter -- [ ] Add admin validation for duplicate tickers, invalid URLs, and unsupported source types -- [ ] Add seed data support for an initial tracked watchlist +- [x] Build symbol registry API endpoints for companies, aliases, watchlists, and sources +- [x] Add source credibility, retention policy, and access policy fields +- [x] Add source classes for market data API, news API, filings API, web scrape, and broker adapter +- [x] Add admin validation for duplicate tickers, invalid URLs, and unsupported source types +- [x] Add seed data support for an initial tracked watchlist ## Phase 3 - External API Adapters - [ ] Implement scheduler for symbol and source polling windows diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..5a38e42 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1775811116, + "narHash": "sha256-t+HZK42pB6N+i5RGbuy7Xluez/VvWbembBdvzsc23Ss=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "54170c54449ea4d6725efd30d719c5e505f1c10e", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..867b04f --- /dev/null +++ b/flake.nix @@ -0,0 +1,70 @@ +{ + description = "Stonks Oracle dev environment"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = nixpkgs.legacyPackages.${system}; + python = pkgs.python312; + pythonPkgs = python.pkgs; + in + { + devShells.default = pkgs.mkShell { + buildInputs = [ + # Python + core deps + python + pythonPkgs.pip + pythonPkgs.virtualenv + + # Native build deps for asyncpg, etc. + pkgs.postgresql_16.lib + pkgs.openssl + pkgs.pkg-config + pkgs.gcc + + # Dev tools + pkgs.ruff + pkgs.kubectl + pkgs.kubernetes-helm + pkgs.docker-client + + # Useful extras + pkgs.jq + pkgs.yq-go + pkgs.curl + pkgs.git + ]; + + shellHook = '' + # Create venv if it doesn't exist + if [ ! -d .venv ]; then + echo "Creating Python venv..." + python -m venv .venv + fi + source .venv/bin/activate + + # Ensure nix-provided tools take precedence over venv + export PATH="${builtins.concatStringsSep ":" [ + "${pkgs.ruff}/bin" + "${pkgs.kubectl}/bin" + "${pkgs.kubernetes-helm}/bin" + ]}:$PATH" + + # Install deps if needed + if [ ! -f .venv/.installed ]; then + echo "Installing Python dependencies..." + pip install -q --exclude ruff -r requirements.txt 2>/dev/null || pip install -q -r requirements.txt + touch .venv/.installed + fi + + export PYTHONPATH="$PWD:$PYTHONPATH" + echo "Stonks Oracle dev shell ready. Python $(python --version), ruff $(ruff --version)" + ''; + }; + }); +} diff --git a/requirements.txt b/requirements.txt index 165de7d..0f988d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,5 +28,5 @@ trino>=0.330.0 pytest>=8.0.0 pytest-asyncio>=0.24.0 -# Code quality -ruff>=0.5.0 +# Code quality (provided by nix devshell, not pip) +# ruff diff --git a/services/symbol_registry/app.py b/services/symbol_registry/app.py index 67bc5b6..292a4b6 100644 --- a/services/symbol_registry/app.py +++ b/services/symbol_registry/app.py @@ -1,10 +1,12 @@ """Symbol Registry API - FastAPI application.""" +import re from contextlib import asynccontextmanager from typing import List, Optional +from urllib.parse import urlparse import asyncpg from fastapi import FastAPI, HTTPException -from pydantic import BaseModel +from pydantic import BaseModel, field_validator from services.shared.config import load_config from services.shared.db import get_pg_pool @@ -24,6 +26,19 @@ async def lifespan(app: FastAPI): app = FastAPI(title="Stonks Oracle - Symbol Registry", lifespan=lifespan) +@app.get("/health") +async def health(): + try: + await pool.fetchval("SELECT 1") + return {"status": "ok"} + except Exception: + raise HTTPException(503, "Database unavailable") + +TICKER_PATTERN = re.compile(r"^[A-Z]{1,10}$") +VALID_SOURCE_TYPES = {"market_api", "news_api", "filings_api", "web_scrape", "broker"} +VALID_ACCESS_POLICIES = {"internal", "public", "restricted"} + + # --- Request/Response Models --- class CompanyCreate(BaseModel): @@ -34,6 +49,14 @@ class CompanyCreate(BaseModel): industry: Optional[str] = None market_cap_bucket: Optional[str] = None + @field_validator("ticker") + @classmethod + def validate_ticker(cls, v: str) -> str: + v = v.upper().strip() + if not TICKER_PATTERN.match(v): + raise ValueError(f"Ticker must be 1-10 uppercase letters, got: {v}") + return v + class CompanyResponse(BaseModel): id: str @@ -64,6 +87,31 @@ class SourceCreate(BaseModel): retention_days: int = 365 access_policy: str = "internal" + @field_validator("source_type") + @classmethod + def validate_source_type(cls, v: str) -> str: + if v not in VALID_SOURCE_TYPES: + raise ValueError(f"source_type must be one of {VALID_SOURCE_TYPES}") + return v + + @field_validator("access_policy") + @classmethod + def validate_access_policy(cls, v: str) -> str: + if v not in VALID_ACCESS_POLICIES: + raise ValueError(f"access_policy must be one of {VALID_ACCESS_POLICIES}") + return v + + @field_validator("config") + @classmethod + def validate_config_urls(cls, v: dict) -> dict: + """Validate any URL fields in the config dict.""" + for key in ("base_url", "endpoint", "url"): + if key in v and v[key]: + parsed = urlparse(str(v[key])) + if key == "base_url" and parsed.scheme not in ("http", "https"): + raise ValueError(f"config.{key} must be a valid HTTP(S) URL") + return v + VALID_SOURCE_TYPES = {"market_api", "news_api", "filings_api", "web_scrape", "broker"} @@ -188,8 +236,10 @@ async def list_watchlist_members(watchlist_id: str): @app.post("/companies/{company_id}/sources", status_code=201) async def add_source(company_id: str, body: SourceCreate): - if body.source_type not in VALID_SOURCE_TYPES: - raise HTTPException(400, f"Invalid source_type. Must be one of: {VALID_SOURCE_TYPES}") + # Verify company exists + exists = await pool.fetchval("SELECT 1 FROM companies WHERE id = $1", company_id) + if not exists: + raise HTTPException(404, "Company not found") row = await pool.fetchrow( """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score, retention_days, access_policy) VALUES ($1, $2, $3, $4, $5, $6, $7) diff --git a/services/symbol_registry/seed.py b/services/symbol_registry/seed.py new file mode 100644 index 0000000..e5044e9 --- /dev/null +++ b/services/symbol_registry/seed.py @@ -0,0 +1,184 @@ +"""Seed data for initial tracked watchlist. + +Run against a live database to populate the starter companies, aliases, +watchlist, and source configurations. + +Usage: + python -m services.symbol_registry.seed +""" +import asyncio +import logging + +import asyncpg + +from services.shared.config import load_config +from services.shared.db import get_pg_pool + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("seed") + +# --- Seed Companies --- +# Diverse mix: mega-cap tech, finance, healthcare, energy, consumer +COMPANIES = [ + {"ticker": "AAPL", "legal_name": "Apple Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Consumer Electronics", "market_cap_bucket": "mega"}, + {"ticker": "MSFT", "legal_name": "Microsoft Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Software", "market_cap_bucket": "mega"}, + {"ticker": "NVDA", "legal_name": "NVIDIA Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "mega"}, + {"ticker": "AMZN", "legal_name": "Amazon.com Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Internet Retail", "market_cap_bucket": "mega"}, + {"ticker": "GOOGL", "legal_name": "Alphabet Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"}, + {"ticker": "JPM", "legal_name": "JPMorgan Chase & Co.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"}, + {"ticker": "JNJ", "legal_name": "Johnson & Johnson", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, + {"ticker": "XOM", "legal_name": "Exxon Mobil Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"}, + {"ticker": "TSLA", "legal_name": "Tesla Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Auto Manufacturers", "market_cap_bucket": "large"}, + {"ticker": "META", "legal_name": "Meta Platforms Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"}, +] + +# --- Aliases --- +ALIASES = { + "AAPL": [("Apple", "brand"), ("iPhone", "product")], + "MSFT": [("Microsoft", "brand"), ("Azure", "product"), ("Windows", "product")], + "NVDA": [("NVIDIA", "brand"), ("GeForce", "product"), ("CUDA", "product")], + "AMZN": [("Amazon", "brand"), ("AWS", "product"), ("Prime", "product")], + "GOOGL": [("Google", "brand"), ("Alphabet", "legal_name"), ("YouTube", "product")], + "JPM": [("JPMorgan", "brand"), ("Chase", "brand")], + "JNJ": [("J&J", "brand")], + "XOM": [("Exxon", "brand"), ("ExxonMobil", "brand")], + "TSLA": [("Tesla", "brand")], + "META": [("Facebook", "brand"), ("Instagram", "product"), ("WhatsApp", "product")], +} + +# --- Source configs per company --- +# Alpha Vantage for market data (free: 25 req/day) +# NewsAPI for news (free: 100 req/day) +# SEC EDGAR for filings (free, rate-limited by user-agent) +# Alpaca for paper trading (free unlimited paper) + +SOURCES_PER_COMPANY = [ + { + "source_type": "market_api", + "source_name": "Alpha Vantage", + "credibility_score": 0.9, + "config": { + "provider": "alpha_vantage", + "base_url": "https://www.alphavantage.co", + "endpoint": "/query", + "functions": ["TIME_SERIES_DAILY", "GLOBAL_QUOTE", "OVERVIEW"], + }, + }, + { + "source_type": "news_api", + "source_name": "NewsAPI", + "credibility_score": 0.7, + "config": { + "provider": "newsapi", + "base_url": "https://newsapi.org", + "endpoint": "/v2/everything", + "page_size": 20, + }, + }, + { + "source_type": "filings_api", + "source_name": "SEC EDGAR", + "credibility_score": 1.0, + "config": { + "provider": "sec_edgar", + "base_url": "https://efts.sec.gov", + "forms": ["8-K", "10-Q", "10-K"], + "user_agent": "StonksOracle/1.0", + }, + }, +] + +# Broker source — one per account, not per company +BROKER_SOURCE = { + "source_type": "broker", + "source_name": "Alpaca Paper", + "credibility_score": 1.0, + "config": { + "provider": "alpaca", + "base_url": "https://paper-api.alpaca.markets", + "mode": "paper", + }, +} + + +async def seed(pool: asyncpg.Pool) -> None: + """Insert seed data. Skips existing records.""" + company_ids = {} + + # Companies + for c in COMPANIES: + row = await pool.fetchrow( + """INSERT INTO companies (ticker, legal_name, exchange, sector, industry, market_cap_bucket) + VALUES ($1, $2, $3, $4, $5, $6) + ON CONFLICT (ticker, exchange) DO UPDATE SET legal_name = EXCLUDED.legal_name + RETURNING id, ticker""", + c["ticker"], c["legal_name"], c["exchange"], + c["sector"], c["industry"], c["market_cap_bucket"], + ) + company_ids[row["ticker"]] = row["id"] + logger.info(f"Company: {row['ticker']} -> {row['id']}") + + # Aliases + for ticker, aliases in ALIASES.items(): + cid = company_ids.get(ticker) + if not cid: + continue + for alias, alias_type in aliases: + await pool.execute( + """INSERT INTO company_aliases (company_id, alias, alias_type) + VALUES ($1, $2, $3) ON CONFLICT DO NOTHING""", + cid, alias, alias_type, + ) + logger.info("Aliases seeded") + + # Watchlist + wl = await pool.fetchrow( + """INSERT INTO watchlists (name, description) + VALUES ('Starter 10', 'Initial tracked watchlist — 10 diverse mega/large-cap symbols') + ON CONFLICT (name) DO UPDATE SET description = EXCLUDED.description + RETURNING id""", + ) + wl_id = wl["id"] + for cid in company_ids.values(): + await pool.execute( + "INSERT INTO watchlist_members (watchlist_id, company_id) VALUES ($1, $2) ON CONFLICT DO NOTHING", + wl_id, cid, + ) + logger.info(f"Watchlist 'Starter 10' -> {wl_id}") + + # Sources per company + for ticker, cid in company_ids.items(): + for src in SOURCES_PER_COMPANY: + await pool.execute( + """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT DO NOTHING""", + cid, src["source_type"], src["source_name"], + src["config"], src["credibility_score"], + ) + # Broker source only for the first company (account-level) + if ticker == COMPANIES[0]["ticker"]: + await pool.execute( + """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT DO NOTHING""", + cid, BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"], + BROKER_SOURCE["config"], BROKER_SOURCE["credibility_score"], + ) + logger.info("Sources seeded") + + total = await pool.fetchval("SELECT count(*) FROM companies") + logger.info(f"Seed complete: {total} companies, watchlist with {len(company_ids)} members") + + +async def main() -> None: + config = load_config() + pool = await get_pg_pool(config) + try: + await seed(pool) + finally: + await pool.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/test_symbol_registry.py b/tests/test_symbol_registry.py new file mode 100644 index 0000000..c940700 --- /dev/null +++ b/tests/test_symbol_registry.py @@ -0,0 +1,105 @@ +"""Tests for symbol registry validation and seed data.""" +import pytest +from pydantic import ValidationError + +# Import after path setup +from services.symbol_registry.app import CompanyCreate, SourceCreate, VALID_SOURCE_TYPES +from services.symbol_registry.seed import COMPANIES, ALIASES, SOURCES_PER_COMPANY + + +# --- CompanyCreate validation --- + +def test_ticker_uppercased(): + c = CompanyCreate(ticker="aapl", legal_name="Apple Inc.") + assert c.ticker == "AAPL" + + +def test_ticker_strips_whitespace(): + c = CompanyCreate(ticker=" MSFT ", legal_name="Microsoft") + assert c.ticker == "MSFT" + + +def test_ticker_rejects_numbers(): + with pytest.raises(ValidationError): + CompanyCreate(ticker="123", legal_name="Bad") + + +def test_ticker_rejects_empty(): + with pytest.raises(ValidationError): + CompanyCreate(ticker="", legal_name="Bad") + + +def test_ticker_rejects_too_long(): + with pytest.raises(ValidationError): + CompanyCreate(ticker="ABCDEFGHIJK", legal_name="Bad") + + +def test_ticker_rejects_special_chars(): + with pytest.raises(ValidationError): + CompanyCreate(ticker="AA-PL", legal_name="Bad") + + +# --- SourceCreate validation --- + +def test_source_valid_types(): + for st in VALID_SOURCE_TYPES: + s = SourceCreate(source_type=st, source_name="test") + assert s.source_type == st + + +def test_source_rejects_invalid_type(): + with pytest.raises(ValidationError): + SourceCreate(source_type="invalid_type", source_name="test") + + +def test_source_rejects_invalid_access_policy(): + with pytest.raises(ValidationError): + SourceCreate(source_type="market_api", source_name="test", access_policy="secret") + + +def test_source_validates_config_url(): + s = SourceCreate( + source_type="market_api", + source_name="test", + config={"base_url": "https://api.example.com"}, + ) + assert s.config["base_url"] == "https://api.example.com" + + +def test_source_rejects_bad_config_url(): + with pytest.raises(ValidationError): + SourceCreate( + source_type="market_api", + source_name="test", + config={"base_url": "not-a-url"}, + ) + + +# --- Seed data integrity --- + +def test_seed_companies_have_required_fields(): + for c in COMPANIES: + assert c["ticker"] + assert c["legal_name"] + assert c["exchange"] + assert c["sector"] + + +def test_seed_companies_unique_tickers(): + tickers = [c["ticker"] for c in COMPANIES] + assert len(tickers) == len(set(tickers)) + + +def test_seed_aliases_reference_valid_tickers(): + tickers = {c["ticker"] for c in COMPANIES} + for ticker in ALIASES: + assert ticker in tickers, f"Alias references unknown ticker: {ticker}" + + +def test_seed_sources_have_valid_types(): + for src in SOURCES_PER_COMPANY: + assert src["source_type"] in VALID_SOURCE_TYPES + + +def test_seed_has_ten_companies(): + assert len(COMPANIES) == 10