phase 2: symbol registry validation, seed data, nix dev shell

- Enhanced CompanyCreate with ticker format validation (1-10 uppercase letters)
- Enhanced SourceCreate with pydantic validators for source_type, access_policy, config URLs
- Added /health endpoint to symbol registry
- Seed data: 10 companies (AAPL, MSFT, NVDA, AMZN, GOOGL, JPM, JNJ, XOM, TSLA, META)
- Seed sources: Alpha Vantage (market), NewsAPI (news), SEC EDGAR (filings), Alpaca (paper trading)
- Seed watchlist: 'Starter 10' with all companies and aliases
- Added flake.nix dev shell (nixos-25.11) with Python 3.12, ruff, pytest, kubectl, helm
- 30 passing tests, lint clean, Docker build verified
This commit is contained in:
Celes Renata
2026-04-11 03:41:41 -07:00
parent ebea70573b
commit 7394d241c9
7 changed files with 480 additions and 10 deletions
+53 -3
View File
@@ -1,10 +1,12 @@
"""Symbol Registry API - FastAPI application."""
import re
from contextlib import asynccontextmanager
from typing import List, Optional
from urllib.parse import urlparse
import asyncpg
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from pydantic import BaseModel, field_validator
from services.shared.config import load_config
from services.shared.db import get_pg_pool
@@ -24,6 +26,19 @@ async def lifespan(app: FastAPI):
app = FastAPI(title="Stonks Oracle - Symbol Registry", lifespan=lifespan)
@app.get("/health")
async def health():
try:
await pool.fetchval("SELECT 1")
return {"status": "ok"}
except Exception:
raise HTTPException(503, "Database unavailable")
TICKER_PATTERN = re.compile(r"^[A-Z]{1,10}$")
VALID_SOURCE_TYPES = {"market_api", "news_api", "filings_api", "web_scrape", "broker"}
VALID_ACCESS_POLICIES = {"internal", "public", "restricted"}
# --- Request/Response Models ---
class CompanyCreate(BaseModel):
@@ -34,6 +49,14 @@ class CompanyCreate(BaseModel):
industry: Optional[str] = None
market_cap_bucket: Optional[str] = None
@field_validator("ticker")
@classmethod
def validate_ticker(cls, v: str) -> str:
v = v.upper().strip()
if not TICKER_PATTERN.match(v):
raise ValueError(f"Ticker must be 1-10 uppercase letters, got: {v}")
return v
class CompanyResponse(BaseModel):
id: str
@@ -64,6 +87,31 @@ class SourceCreate(BaseModel):
retention_days: int = 365
access_policy: str = "internal"
@field_validator("source_type")
@classmethod
def validate_source_type(cls, v: str) -> str:
if v not in VALID_SOURCE_TYPES:
raise ValueError(f"source_type must be one of {VALID_SOURCE_TYPES}")
return v
@field_validator("access_policy")
@classmethod
def validate_access_policy(cls, v: str) -> str:
if v not in VALID_ACCESS_POLICIES:
raise ValueError(f"access_policy must be one of {VALID_ACCESS_POLICIES}")
return v
@field_validator("config")
@classmethod
def validate_config_urls(cls, v: dict) -> dict:
"""Validate any URL fields in the config dict."""
for key in ("base_url", "endpoint", "url"):
if key in v and v[key]:
parsed = urlparse(str(v[key]))
if key == "base_url" and parsed.scheme not in ("http", "https"):
raise ValueError(f"config.{key} must be a valid HTTP(S) URL")
return v
VALID_SOURCE_TYPES = {"market_api", "news_api", "filings_api", "web_scrape", "broker"}
@@ -188,8 +236,10 @@ async def list_watchlist_members(watchlist_id: str):
@app.post("/companies/{company_id}/sources", status_code=201)
async def add_source(company_id: str, body: SourceCreate):
if body.source_type not in VALID_SOURCE_TYPES:
raise HTTPException(400, f"Invalid source_type. Must be one of: {VALID_SOURCE_TYPES}")
# Verify company exists
exists = await pool.fetchval("SELECT 1 FROM companies WHERE id = $1", company_id)
if not exists:
raise HTTPException(404, "Company not found")
row = await pool.fetchrow(
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score, retention_days, access_policy)
VALUES ($1, $2, $3, $4, $5, $6, $7)
+184
View File
@@ -0,0 +1,184 @@
"""Seed data for initial tracked watchlist.
Run against a live database to populate the starter companies, aliases,
watchlist, and source configurations.
Usage:
python -m services.symbol_registry.seed
"""
import asyncio
import logging
import asyncpg
from services.shared.config import load_config
from services.shared.db import get_pg_pool
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("seed")
# --- Seed Companies ---
# Diverse mix: mega-cap tech, finance, healthcare, energy, consumer
COMPANIES = [
{"ticker": "AAPL", "legal_name": "Apple Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Consumer Electronics", "market_cap_bucket": "mega"},
{"ticker": "MSFT", "legal_name": "Microsoft Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Software", "market_cap_bucket": "mega"},
{"ticker": "NVDA", "legal_name": "NVIDIA Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "mega"},
{"ticker": "AMZN", "legal_name": "Amazon.com Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Internet Retail", "market_cap_bucket": "mega"},
{"ticker": "GOOGL", "legal_name": "Alphabet Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"},
{"ticker": "JPM", "legal_name": "JPMorgan Chase & Co.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"},
{"ticker": "JNJ", "legal_name": "Johnson & Johnson", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"},
{"ticker": "XOM", "legal_name": "Exxon Mobil Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"},
{"ticker": "TSLA", "legal_name": "Tesla Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Auto Manufacturers", "market_cap_bucket": "large"},
{"ticker": "META", "legal_name": "Meta Platforms Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"},
]
# --- Aliases ---
ALIASES = {
"AAPL": [("Apple", "brand"), ("iPhone", "product")],
"MSFT": [("Microsoft", "brand"), ("Azure", "product"), ("Windows", "product")],
"NVDA": [("NVIDIA", "brand"), ("GeForce", "product"), ("CUDA", "product")],
"AMZN": [("Amazon", "brand"), ("AWS", "product"), ("Prime", "product")],
"GOOGL": [("Google", "brand"), ("Alphabet", "legal_name"), ("YouTube", "product")],
"JPM": [("JPMorgan", "brand"), ("Chase", "brand")],
"JNJ": [("J&J", "brand")],
"XOM": [("Exxon", "brand"), ("ExxonMobil", "brand")],
"TSLA": [("Tesla", "brand")],
"META": [("Facebook", "brand"), ("Instagram", "product"), ("WhatsApp", "product")],
}
# --- Source configs per company ---
# Alpha Vantage for market data (free: 25 req/day)
# NewsAPI for news (free: 100 req/day)
# SEC EDGAR for filings (free, rate-limited by user-agent)
# Alpaca for paper trading (free unlimited paper)
SOURCES_PER_COMPANY = [
{
"source_type": "market_api",
"source_name": "Alpha Vantage",
"credibility_score": 0.9,
"config": {
"provider": "alpha_vantage",
"base_url": "https://www.alphavantage.co",
"endpoint": "/query",
"functions": ["TIME_SERIES_DAILY", "GLOBAL_QUOTE", "OVERVIEW"],
},
},
{
"source_type": "news_api",
"source_name": "NewsAPI",
"credibility_score": 0.7,
"config": {
"provider": "newsapi",
"base_url": "https://newsapi.org",
"endpoint": "/v2/everything",
"page_size": 20,
},
},
{
"source_type": "filings_api",
"source_name": "SEC EDGAR",
"credibility_score": 1.0,
"config": {
"provider": "sec_edgar",
"base_url": "https://efts.sec.gov",
"forms": ["8-K", "10-Q", "10-K"],
"user_agent": "StonksOracle/1.0",
},
},
]
# Broker source — one per account, not per company
BROKER_SOURCE = {
"source_type": "broker",
"source_name": "Alpaca Paper",
"credibility_score": 1.0,
"config": {
"provider": "alpaca",
"base_url": "https://paper-api.alpaca.markets",
"mode": "paper",
},
}
async def seed(pool: asyncpg.Pool) -> None:
"""Insert seed data. Skips existing records."""
company_ids = {}
# Companies
for c in COMPANIES:
row = await pool.fetchrow(
"""INSERT INTO companies (ticker, legal_name, exchange, sector, industry, market_cap_bucket)
VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT (ticker, exchange) DO UPDATE SET legal_name = EXCLUDED.legal_name
RETURNING id, ticker""",
c["ticker"], c["legal_name"], c["exchange"],
c["sector"], c["industry"], c["market_cap_bucket"],
)
company_ids[row["ticker"]] = row["id"]
logger.info(f"Company: {row['ticker']} -> {row['id']}")
# Aliases
for ticker, aliases in ALIASES.items():
cid = company_ids.get(ticker)
if not cid:
continue
for alias, alias_type in aliases:
await pool.execute(
"""INSERT INTO company_aliases (company_id, alias, alias_type)
VALUES ($1, $2, $3) ON CONFLICT DO NOTHING""",
cid, alias, alias_type,
)
logger.info("Aliases seeded")
# Watchlist
wl = await pool.fetchrow(
"""INSERT INTO watchlists (name, description)
VALUES ('Starter 10', 'Initial tracked watchlist — 10 diverse mega/large-cap symbols')
ON CONFLICT (name) DO UPDATE SET description = EXCLUDED.description
RETURNING id""",
)
wl_id = wl["id"]
for cid in company_ids.values():
await pool.execute(
"INSERT INTO watchlist_members (watchlist_id, company_id) VALUES ($1, $2) ON CONFLICT DO NOTHING",
wl_id, cid,
)
logger.info(f"Watchlist 'Starter 10' -> {wl_id}")
# Sources per company
for ticker, cid in company_ids.items():
for src in SOURCES_PER_COMPANY:
await pool.execute(
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score)
VALUES ($1, $2, $3, $4, $5)
ON CONFLICT DO NOTHING""",
cid, src["source_type"], src["source_name"],
src["config"], src["credibility_score"],
)
# Broker source only for the first company (account-level)
if ticker == COMPANIES[0]["ticker"]:
await pool.execute(
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score)
VALUES ($1, $2, $3, $4, $5)
ON CONFLICT DO NOTHING""",
cid, BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"],
BROKER_SOURCE["config"], BROKER_SOURCE["credibility_score"],
)
logger.info("Sources seeded")
total = await pool.fetchval("SELECT count(*) FROM companies")
logger.info(f"Seed complete: {total} companies, watchlist with {len(company_ids)} members")
async def main() -> None:
config = load_config()
pool = await get_pg_pool(config)
try:
await seed(pool)
finally:
await pool.close()
if __name__ == "__main__":
asyncio.run(main())