phase 2: symbol registry validation, seed data, nix dev shell
- Enhanced CompanyCreate with ticker format validation (1-10 uppercase letters) - Enhanced SourceCreate with pydantic validators for source_type, access_policy, config URLs - Added /health endpoint to symbol registry - Seed data: 10 companies (AAPL, MSFT, NVDA, AMZN, GOOGL, JPM, JNJ, XOM, TSLA, META) - Seed sources: Alpha Vantage (market), NewsAPI (news), SEC EDGAR (filings), Alpaca (paper trading) - Seed watchlist: 'Starter 10' with all companies and aliases - Added flake.nix dev shell (nixos-25.11) with Python 3.12, ruff, pytest, kubectl, helm - 30 passing tests, lint clean, Docker build verified
This commit is contained in:
@@ -1,10 +1,12 @@
|
||||
"""Symbol Registry API - FastAPI application."""
|
||||
import re
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import asyncpg
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool
|
||||
@@ -24,6 +26,19 @@ async def lifespan(app: FastAPI):
|
||||
app = FastAPI(title="Stonks Oracle - Symbol Registry", lifespan=lifespan)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
try:
|
||||
await pool.fetchval("SELECT 1")
|
||||
return {"status": "ok"}
|
||||
except Exception:
|
||||
raise HTTPException(503, "Database unavailable")
|
||||
|
||||
TICKER_PATTERN = re.compile(r"^[A-Z]{1,10}$")
|
||||
VALID_SOURCE_TYPES = {"market_api", "news_api", "filings_api", "web_scrape", "broker"}
|
||||
VALID_ACCESS_POLICIES = {"internal", "public", "restricted"}
|
||||
|
||||
|
||||
# --- Request/Response Models ---
|
||||
|
||||
class CompanyCreate(BaseModel):
|
||||
@@ -34,6 +49,14 @@ class CompanyCreate(BaseModel):
|
||||
industry: Optional[str] = None
|
||||
market_cap_bucket: Optional[str] = None
|
||||
|
||||
@field_validator("ticker")
|
||||
@classmethod
|
||||
def validate_ticker(cls, v: str) -> str:
|
||||
v = v.upper().strip()
|
||||
if not TICKER_PATTERN.match(v):
|
||||
raise ValueError(f"Ticker must be 1-10 uppercase letters, got: {v}")
|
||||
return v
|
||||
|
||||
|
||||
class CompanyResponse(BaseModel):
|
||||
id: str
|
||||
@@ -64,6 +87,31 @@ class SourceCreate(BaseModel):
|
||||
retention_days: int = 365
|
||||
access_policy: str = "internal"
|
||||
|
||||
@field_validator("source_type")
|
||||
@classmethod
|
||||
def validate_source_type(cls, v: str) -> str:
|
||||
if v not in VALID_SOURCE_TYPES:
|
||||
raise ValueError(f"source_type must be one of {VALID_SOURCE_TYPES}")
|
||||
return v
|
||||
|
||||
@field_validator("access_policy")
|
||||
@classmethod
|
||||
def validate_access_policy(cls, v: str) -> str:
|
||||
if v not in VALID_ACCESS_POLICIES:
|
||||
raise ValueError(f"access_policy must be one of {VALID_ACCESS_POLICIES}")
|
||||
return v
|
||||
|
||||
@field_validator("config")
|
||||
@classmethod
|
||||
def validate_config_urls(cls, v: dict) -> dict:
|
||||
"""Validate any URL fields in the config dict."""
|
||||
for key in ("base_url", "endpoint", "url"):
|
||||
if key in v and v[key]:
|
||||
parsed = urlparse(str(v[key]))
|
||||
if key == "base_url" and parsed.scheme not in ("http", "https"):
|
||||
raise ValueError(f"config.{key} must be a valid HTTP(S) URL")
|
||||
return v
|
||||
|
||||
|
||||
VALID_SOURCE_TYPES = {"market_api", "news_api", "filings_api", "web_scrape", "broker"}
|
||||
|
||||
@@ -188,8 +236,10 @@ async def list_watchlist_members(watchlist_id: str):
|
||||
|
||||
@app.post("/companies/{company_id}/sources", status_code=201)
|
||||
async def add_source(company_id: str, body: SourceCreate):
|
||||
if body.source_type not in VALID_SOURCE_TYPES:
|
||||
raise HTTPException(400, f"Invalid source_type. Must be one of: {VALID_SOURCE_TYPES}")
|
||||
# Verify company exists
|
||||
exists = await pool.fetchval("SELECT 1 FROM companies WHERE id = $1", company_id)
|
||||
if not exists:
|
||||
raise HTTPException(404, "Company not found")
|
||||
row = await pool.fetchrow(
|
||||
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score, retention_days, access_policy)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||||
|
||||
@@ -0,0 +1,184 @@
|
||||
"""Seed data for initial tracked watchlist.
|
||||
|
||||
Run against a live database to populate the starter companies, aliases,
|
||||
watchlist, and source configurations.
|
||||
|
||||
Usage:
|
||||
python -m services.symbol_registry.seed
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
import asyncpg
|
||||
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("seed")
|
||||
|
||||
# --- Seed Companies ---
|
||||
# Diverse mix: mega-cap tech, finance, healthcare, energy, consumer
|
||||
COMPANIES = [
|
||||
{"ticker": "AAPL", "legal_name": "Apple Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Consumer Electronics", "market_cap_bucket": "mega"},
|
||||
{"ticker": "MSFT", "legal_name": "Microsoft Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Software", "market_cap_bucket": "mega"},
|
||||
{"ticker": "NVDA", "legal_name": "NVIDIA Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "mega"},
|
||||
{"ticker": "AMZN", "legal_name": "Amazon.com Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Internet Retail", "market_cap_bucket": "mega"},
|
||||
{"ticker": "GOOGL", "legal_name": "Alphabet Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"},
|
||||
{"ticker": "JPM", "legal_name": "JPMorgan Chase & Co.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"},
|
||||
{"ticker": "JNJ", "legal_name": "Johnson & Johnson", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"},
|
||||
{"ticker": "XOM", "legal_name": "Exxon Mobil Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"},
|
||||
{"ticker": "TSLA", "legal_name": "Tesla Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Auto Manufacturers", "market_cap_bucket": "large"},
|
||||
{"ticker": "META", "legal_name": "Meta Platforms Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"},
|
||||
]
|
||||
|
||||
# --- Aliases ---
|
||||
ALIASES = {
|
||||
"AAPL": [("Apple", "brand"), ("iPhone", "product")],
|
||||
"MSFT": [("Microsoft", "brand"), ("Azure", "product"), ("Windows", "product")],
|
||||
"NVDA": [("NVIDIA", "brand"), ("GeForce", "product"), ("CUDA", "product")],
|
||||
"AMZN": [("Amazon", "brand"), ("AWS", "product"), ("Prime", "product")],
|
||||
"GOOGL": [("Google", "brand"), ("Alphabet", "legal_name"), ("YouTube", "product")],
|
||||
"JPM": [("JPMorgan", "brand"), ("Chase", "brand")],
|
||||
"JNJ": [("J&J", "brand")],
|
||||
"XOM": [("Exxon", "brand"), ("ExxonMobil", "brand")],
|
||||
"TSLA": [("Tesla", "brand")],
|
||||
"META": [("Facebook", "brand"), ("Instagram", "product"), ("WhatsApp", "product")],
|
||||
}
|
||||
|
||||
# --- Source configs per company ---
|
||||
# Alpha Vantage for market data (free: 25 req/day)
|
||||
# NewsAPI for news (free: 100 req/day)
|
||||
# SEC EDGAR for filings (free, rate-limited by user-agent)
|
||||
# Alpaca for paper trading (free unlimited paper)
|
||||
|
||||
SOURCES_PER_COMPANY = [
|
||||
{
|
||||
"source_type": "market_api",
|
||||
"source_name": "Alpha Vantage",
|
||||
"credibility_score": 0.9,
|
||||
"config": {
|
||||
"provider": "alpha_vantage",
|
||||
"base_url": "https://www.alphavantage.co",
|
||||
"endpoint": "/query",
|
||||
"functions": ["TIME_SERIES_DAILY", "GLOBAL_QUOTE", "OVERVIEW"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"source_type": "news_api",
|
||||
"source_name": "NewsAPI",
|
||||
"credibility_score": 0.7,
|
||||
"config": {
|
||||
"provider": "newsapi",
|
||||
"base_url": "https://newsapi.org",
|
||||
"endpoint": "/v2/everything",
|
||||
"page_size": 20,
|
||||
},
|
||||
},
|
||||
{
|
||||
"source_type": "filings_api",
|
||||
"source_name": "SEC EDGAR",
|
||||
"credibility_score": 1.0,
|
||||
"config": {
|
||||
"provider": "sec_edgar",
|
||||
"base_url": "https://efts.sec.gov",
|
||||
"forms": ["8-K", "10-Q", "10-K"],
|
||||
"user_agent": "StonksOracle/1.0",
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# Broker source — one per account, not per company
|
||||
BROKER_SOURCE = {
|
||||
"source_type": "broker",
|
||||
"source_name": "Alpaca Paper",
|
||||
"credibility_score": 1.0,
|
||||
"config": {
|
||||
"provider": "alpaca",
|
||||
"base_url": "https://paper-api.alpaca.markets",
|
||||
"mode": "paper",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def seed(pool: asyncpg.Pool) -> None:
|
||||
"""Insert seed data. Skips existing records."""
|
||||
company_ids = {}
|
||||
|
||||
# Companies
|
||||
for c in COMPANIES:
|
||||
row = await pool.fetchrow(
|
||||
"""INSERT INTO companies (ticker, legal_name, exchange, sector, industry, market_cap_bucket)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
ON CONFLICT (ticker, exchange) DO UPDATE SET legal_name = EXCLUDED.legal_name
|
||||
RETURNING id, ticker""",
|
||||
c["ticker"], c["legal_name"], c["exchange"],
|
||||
c["sector"], c["industry"], c["market_cap_bucket"],
|
||||
)
|
||||
company_ids[row["ticker"]] = row["id"]
|
||||
logger.info(f"Company: {row['ticker']} -> {row['id']}")
|
||||
|
||||
# Aliases
|
||||
for ticker, aliases in ALIASES.items():
|
||||
cid = company_ids.get(ticker)
|
||||
if not cid:
|
||||
continue
|
||||
for alias, alias_type in aliases:
|
||||
await pool.execute(
|
||||
"""INSERT INTO company_aliases (company_id, alias, alias_type)
|
||||
VALUES ($1, $2, $3) ON CONFLICT DO NOTHING""",
|
||||
cid, alias, alias_type,
|
||||
)
|
||||
logger.info("Aliases seeded")
|
||||
|
||||
# Watchlist
|
||||
wl = await pool.fetchrow(
|
||||
"""INSERT INTO watchlists (name, description)
|
||||
VALUES ('Starter 10', 'Initial tracked watchlist — 10 diverse mega/large-cap symbols')
|
||||
ON CONFLICT (name) DO UPDATE SET description = EXCLUDED.description
|
||||
RETURNING id""",
|
||||
)
|
||||
wl_id = wl["id"]
|
||||
for cid in company_ids.values():
|
||||
await pool.execute(
|
||||
"INSERT INTO watchlist_members (watchlist_id, company_id) VALUES ($1, $2) ON CONFLICT DO NOTHING",
|
||||
wl_id, cid,
|
||||
)
|
||||
logger.info(f"Watchlist 'Starter 10' -> {wl_id}")
|
||||
|
||||
# Sources per company
|
||||
for ticker, cid in company_ids.items():
|
||||
for src in SOURCES_PER_COMPANY:
|
||||
await pool.execute(
|
||||
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
ON CONFLICT DO NOTHING""",
|
||||
cid, src["source_type"], src["source_name"],
|
||||
src["config"], src["credibility_score"],
|
||||
)
|
||||
# Broker source only for the first company (account-level)
|
||||
if ticker == COMPANIES[0]["ticker"]:
|
||||
await pool.execute(
|
||||
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
ON CONFLICT DO NOTHING""",
|
||||
cid, BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"],
|
||||
BROKER_SOURCE["config"], BROKER_SOURCE["credibility_score"],
|
||||
)
|
||||
logger.info("Sources seeded")
|
||||
|
||||
total = await pool.fetchval("SELECT count(*) FROM companies")
|
||||
logger.info(f"Seed complete: {total} companies, watchlist with {len(company_ids)} members")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
config = load_config()
|
||||
pool = await get_pg_pool(config)
|
||||
try:
|
||||
await seed(pool)
|
||||
finally:
|
||||
await pool.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user