"""Seed data for initial tracked watchlist. Run against a live database to populate the starter companies, aliases, watchlist, and source configurations. Usage: python -m services.symbol_registry.seed """ import asyncio import logging import asyncpg from services.shared.config import load_config from services.shared.db import get_pg_pool logging.basicConfig(level=logging.INFO) logger = logging.getLogger("seed") # --- Seed Companies --- # Diverse mix: mega-cap tech, finance, healthcare, energy, consumer COMPANIES = [ {"ticker": "AAPL", "legal_name": "Apple Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Consumer Electronics", "market_cap_bucket": "mega"}, {"ticker": "MSFT", "legal_name": "Microsoft Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Software", "market_cap_bucket": "mega"}, {"ticker": "NVDA", "legal_name": "NVIDIA Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "mega"}, {"ticker": "AMZN", "legal_name": "Amazon.com Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Internet Retail", "market_cap_bucket": "mega"}, {"ticker": "GOOGL", "legal_name": "Alphabet Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"}, {"ticker": "JPM", "legal_name": "JPMorgan Chase & Co.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"}, {"ticker": "JNJ", "legal_name": "Johnson & Johnson", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, {"ticker": "XOM", "legal_name": "Exxon Mobil Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"}, {"ticker": "TSLA", "legal_name": "Tesla Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Auto Manufacturers", "market_cap_bucket": "large"}, {"ticker": "META", "legal_name": "Meta Platforms Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"}, ] # --- Aliases --- ALIASES = { "AAPL": [("Apple", "brand"), ("iPhone", "product")], "MSFT": [("Microsoft", "brand"), ("Azure", "product"), ("Windows", "product")], "NVDA": [("NVIDIA", "brand"), ("GeForce", "product"), ("CUDA", "product")], "AMZN": [("Amazon", "brand"), ("AWS", "product"), ("Prime", "product")], "GOOGL": [("Google", "brand"), ("Alphabet", "legal_name"), ("YouTube", "product")], "JPM": [("JPMorgan", "brand"), ("Chase", "brand")], "JNJ": [("J&J", "brand")], "XOM": [("Exxon", "brand"), ("ExxonMobil", "brand")], "TSLA": [("Tesla", "brand")], "META": [("Facebook", "brand"), ("Instagram", "product"), ("WhatsApp", "product")], } # --- Source configs per company --- # Alpha Vantage for market data (free: 25 req/day) # NewsAPI for news (free: 100 req/day) # SEC EDGAR for filings (free, rate-limited by user-agent) # Alpaca for paper trading (free unlimited paper) SOURCES_PER_COMPANY = [ { "source_type": "market_api", "source_name": "Alpha Vantage", "credibility_score": 0.9, "config": { "provider": "alpha_vantage", "base_url": "https://www.alphavantage.co", "endpoint": "/query", "functions": ["TIME_SERIES_DAILY", "GLOBAL_QUOTE", "OVERVIEW"], }, }, { "source_type": "news_api", "source_name": "NewsAPI", "credibility_score": 0.7, "config": { "provider": "newsapi", "base_url": "https://newsapi.org", "endpoint": "/v2/everything", "page_size": 20, }, }, { "source_type": "filings_api", "source_name": "SEC EDGAR", "credibility_score": 1.0, "config": { "provider": "sec_edgar", "base_url": "https://efts.sec.gov", "forms": ["8-K", "10-Q", "10-K"], "user_agent": "StonksOracle/1.0", }, }, ] # Broker source — one per account, not per company BROKER_SOURCE = { "source_type": "broker", "source_name": "Alpaca Paper", "credibility_score": 1.0, "config": { "provider": "alpaca", "base_url": "https://paper-api.alpaca.markets", "mode": "paper", }, } async def seed(pool: asyncpg.Pool) -> None: """Insert seed data. Skips existing records.""" company_ids = {} # Companies for c in COMPANIES: row = await pool.fetchrow( """INSERT INTO companies (ticker, legal_name, exchange, sector, industry, market_cap_bucket) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (ticker, exchange) DO UPDATE SET legal_name = EXCLUDED.legal_name RETURNING id, ticker""", c["ticker"], c["legal_name"], c["exchange"], c["sector"], c["industry"], c["market_cap_bucket"], ) company_ids[row["ticker"]] = row["id"] logger.info(f"Company: {row['ticker']} -> {row['id']}") # Aliases for ticker, aliases in ALIASES.items(): cid = company_ids.get(ticker) if not cid: continue for alias, alias_type in aliases: await pool.execute( """INSERT INTO company_aliases (company_id, alias, alias_type) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING""", cid, alias, alias_type, ) logger.info("Aliases seeded") # Watchlist wl = await pool.fetchrow( """INSERT INTO watchlists (name, description) VALUES ('Starter 10', 'Initial tracked watchlist — 10 diverse mega/large-cap symbols') ON CONFLICT (name) DO UPDATE SET description = EXCLUDED.description RETURNING id""", ) wl_id = wl["id"] for cid in company_ids.values(): await pool.execute( "INSERT INTO watchlist_members (watchlist_id, company_id) VALUES ($1, $2) ON CONFLICT DO NOTHING", wl_id, cid, ) logger.info(f"Watchlist 'Starter 10' -> {wl_id}") # Sources per company for ticker, cid in company_ids.items(): for src in SOURCES_PER_COMPANY: await pool.execute( """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score) VALUES ($1, $2, $3, $4, $5) ON CONFLICT DO NOTHING""", cid, src["source_type"], src["source_name"], src["config"], src["credibility_score"], ) # Broker source only for the first company (account-level) if ticker == COMPANIES[0]["ticker"]: await pool.execute( """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score) VALUES ($1, $2, $3, $4, $5) ON CONFLICT DO NOTHING""", cid, BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"], BROKER_SOURCE["config"], BROKER_SOURCE["credibility_score"], ) logger.info("Sources seeded") total = await pool.fetchval("SELECT count(*) FROM companies") logger.info(f"Seed complete: {total} companies, watchlist with {len(company_ids)} members") async def main() -> None: config = load_config() pool = await get_pg_pool(config) try: await seed(pool) finally: await pool.close() if __name__ == "__main__": asyncio.run(main())