"""Seed data for initial tracked watchlist. Run against a live database to populate the starter companies, aliases, watchlist, source configurations, macro news source, and competitor relationships. Usage: python -m services.symbol_registry.seed """ import asyncio import json import logging import asyncpg from services.shared.config import load_config from services.shared.db import get_pg_pool from services.shared.logging import setup_logging logger = logging.getLogger("seed") # --- Seed Companies (50 diverse large/mega-cap) --- COMPANIES = [ # Technology {"ticker": "AAPL", "legal_name": "Apple Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Consumer Electronics", "market_cap_bucket": "mega"}, {"ticker": "MSFT", "legal_name": "Microsoft Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Software", "market_cap_bucket": "mega"}, {"ticker": "NVDA", "legal_name": "NVIDIA Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "mega"}, {"ticker": "GOOGL", "legal_name": "Alphabet Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"}, {"ticker": "META", "legal_name": "Meta Platforms Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"}, {"ticker": "AVGO", "legal_name": "Broadcom Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "mega"}, {"ticker": "ORCL", "legal_name": "Oracle Corporation", "exchange": "NYSE", "sector": "Technology", "industry": "Software", "market_cap_bucket": "mega"}, {"ticker": "CRM", "legal_name": "Salesforce Inc.", "exchange": "NYSE", "sector": "Technology", "industry": "Software", "market_cap_bucket": "large"}, {"ticker": "AMD", "legal_name": "Advanced Micro Devices Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "large"}, {"ticker": "INTC", "legal_name": "Intel Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "large"}, {"ticker": "CSCO", "legal_name": "Cisco Systems Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Networking", "market_cap_bucket": "large"}, {"ticker": "ADBE", "legal_name": "Adobe Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Software", "market_cap_bucket": "large"}, # Consumer Cyclical {"ticker": "AMZN", "legal_name": "Amazon.com Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Internet Retail", "market_cap_bucket": "mega"}, {"ticker": "TSLA", "legal_name": "Tesla Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Auto Manufacturers", "market_cap_bucket": "large"}, {"ticker": "HD", "legal_name": "The Home Depot Inc.", "exchange": "NYSE", "sector": "Consumer Cyclical", "industry": "Home Improvement", "market_cap_bucket": "mega"}, {"ticker": "NKE", "legal_name": "Nike Inc.", "exchange": "NYSE", "sector": "Consumer Cyclical", "industry": "Footwear & Accessories", "market_cap_bucket": "large"}, {"ticker": "MCD", "legal_name": "McDonald's Corporation", "exchange": "NYSE", "sector": "Consumer Cyclical", "industry": "Restaurants", "market_cap_bucket": "large"}, {"ticker": "SBUX", "legal_name": "Starbucks Corporation", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Restaurants", "market_cap_bucket": "large"}, # Financial Services {"ticker": "JPM", "legal_name": "JPMorgan Chase & Co.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"}, {"ticker": "V", "legal_name": "Visa Inc.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Credit Services", "market_cap_bucket": "mega"}, {"ticker": "MA", "legal_name": "Mastercard Inc.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Credit Services", "market_cap_bucket": "mega"}, {"ticker": "BAC", "legal_name": "Bank of America Corporation", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"}, {"ticker": "GS", "legal_name": "Goldman Sachs Group Inc.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Capital Markets", "market_cap_bucket": "large"}, {"ticker": "MS", "legal_name": "Morgan Stanley", "exchange": "NYSE", "sector": "Financial Services", "industry": "Capital Markets", "market_cap_bucket": "large"}, {"ticker": "BRK.B", "legal_name": "Berkshire Hathaway Inc.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Insurance", "market_cap_bucket": "mega"}, # Healthcare {"ticker": "JNJ", "legal_name": "Johnson & Johnson", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, {"ticker": "UNH", "legal_name": "UnitedHealth Group Inc.", "exchange": "NYSE", "sector": "Healthcare", "industry": "Health Insurance", "market_cap_bucket": "mega"}, {"ticker": "LLY", "legal_name": "Eli Lilly and Company", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, {"ticker": "PFE", "legal_name": "Pfizer Inc.", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "large"}, {"ticker": "ABBV", "legal_name": "AbbVie Inc.", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, {"ticker": "MRK", "legal_name": "Merck & Co. Inc.", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, # Energy {"ticker": "XOM", "legal_name": "Exxon Mobil Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"}, {"ticker": "CVX", "legal_name": "Chevron Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"}, {"ticker": "COP", "legal_name": "ConocoPhillips", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas E&P", "market_cap_bucket": "large"}, {"ticker": "SLB", "legal_name": "Schlumberger Limited", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Services", "market_cap_bucket": "large"}, # Communication Services {"ticker": "NFLX", "legal_name": "Netflix Inc.", "exchange": "NASDAQ", "sector": "Communication Services", "industry": "Entertainment", "market_cap_bucket": "mega"}, {"ticker": "DIS", "legal_name": "The Walt Disney Company", "exchange": "NYSE", "sector": "Communication Services", "industry": "Entertainment", "market_cap_bucket": "large"}, {"ticker": "CMCSA", "legal_name": "Comcast Corporation", "exchange": "NASDAQ", "sector": "Communication Services", "industry": "Telecom", "market_cap_bucket": "large"}, {"ticker": "T", "legal_name": "AT&T Inc.", "exchange": "NYSE", "sector": "Communication Services", "industry": "Telecom", "market_cap_bucket": "large"}, # Industrials {"ticker": "CAT", "legal_name": "Caterpillar Inc.", "exchange": "NYSE", "sector": "Industrials", "industry": "Farm & Heavy Equipment", "market_cap_bucket": "large"}, {"ticker": "BA", "legal_name": "The Boeing Company", "exchange": "NYSE", "sector": "Industrials", "industry": "Aerospace & Defense", "market_cap_bucket": "large"}, {"ticker": "UPS", "legal_name": "United Parcel Service Inc.", "exchange": "NYSE", "sector": "Industrials", "industry": "Logistics", "market_cap_bucket": "large"}, {"ticker": "HON", "legal_name": "Honeywell International Inc.", "exchange": "NASDAQ", "sector": "Industrials", "industry": "Conglomerates", "market_cap_bucket": "large"}, # Consumer Defensive {"ticker": "PG", "legal_name": "Procter & Gamble Company", "exchange": "NYSE", "sector": "Consumer Defensive", "industry": "Household Products", "market_cap_bucket": "mega"}, {"ticker": "KO", "legal_name": "The Coca-Cola Company", "exchange": "NYSE", "sector": "Consumer Defensive", "industry": "Beverages", "market_cap_bucket": "mega"}, {"ticker": "PEP", "legal_name": "PepsiCo Inc.", "exchange": "NASDAQ", "sector": "Consumer Defensive", "industry": "Beverages", "market_cap_bucket": "mega"}, {"ticker": "WMT", "legal_name": "Walmart Inc.", "exchange": "NYSE", "sector": "Consumer Defensive", "industry": "Discount Stores", "market_cap_bucket": "mega"}, {"ticker": "COST", "legal_name": "Costco Wholesale Corporation", "exchange": "NASDAQ", "sector": "Consumer Defensive", "industry": "Discount Stores", "market_cap_bucket": "mega"}, # Real Estate / Utilities {"ticker": "AMT", "legal_name": "American Tower Corporation", "exchange": "NYSE", "sector": "Real Estate", "industry": "REIT - Specialty", "market_cap_bucket": "large"}, {"ticker": "NEE", "legal_name": "NextEra Energy Inc.", "exchange": "NYSE", "sector": "Utilities", "industry": "Utilities - Renewable", "market_cap_bucket": "large"}, ] # --- Aliases --- ALIASES = { "AAPL": [("Apple", "brand"), ("iPhone", "product")], "MSFT": [("Microsoft", "brand"), ("Azure", "product"), ("Windows", "product")], "NVDA": [("NVIDIA", "brand"), ("GeForce", "product"), ("CUDA", "product")], "AMZN": [("Amazon", "brand"), ("AWS", "product"), ("Prime", "product")], "GOOGL": [("Google", "brand"), ("Alphabet", "legal_name"), ("YouTube", "product")], "META": [("Facebook", "brand"), ("Instagram", "product"), ("WhatsApp", "product")], "JPM": [("JPMorgan", "brand"), ("Chase", "brand")], "JNJ": [("J&J", "brand")], "XOM": [("Exxon", "brand"), ("ExxonMobil", "brand")], "TSLA": [("Tesla", "brand")], "NFLX": [("Netflix", "brand")], "DIS": [("Disney", "brand"), ("Disney+", "product")], "V": [("Visa", "brand")], "MA": [("Mastercard", "brand")], "KO": [("Coca-Cola", "brand"), ("Coke", "brand")], "PEP": [("Pepsi", "brand"), ("PepsiCo", "brand")], "BA": [("Boeing", "brand")], "WMT": [("Walmart", "brand")], "COST": [("Costco", "brand")], "CRM": [("Salesforce", "brand")], "ORCL": [("Oracle", "brand")], "ADBE": [("Adobe", "brand"), ("Photoshop", "product")], "AMD": [("AMD", "brand"), ("Ryzen", "product")], "INTC": [("Intel", "brand")], "BAC": [("Bank of America", "brand"), ("BofA", "brand")], "GS": [("Goldman Sachs", "brand"), ("Goldman", "brand")], "UNH": [("UnitedHealth", "brand"), ("Optum", "product")], "LLY": [("Eli Lilly", "brand"), ("Lilly", "brand")], "PFE": [("Pfizer", "brand")], "CVX": [("Chevron", "brand")], "PG": [("P&G", "brand"), ("Procter & Gamble", "brand")], "HD": [("Home Depot", "brand")], "NKE": [("Nike", "brand")], "MCD": [("McDonald's", "brand")], "SBUX": [("Starbucks", "brand")], } # --- Competitor Relationships --- # (ticker_a, ticker_b, relationship_type, strength) COMPETITOR_RELATIONSHIPS = [ # Tech ecosystem rivals ("AAPL", "MSFT", "direct_rival", 0.75), ("AAPL", "GOOGL", "overlapping_products", 0.60), ("AAPL", "META", "overlapping_products", 0.40), ("GOOGL", "META", "direct_rival", 0.85), ("MSFT", "GOOGL", "overlapping_products", 0.70), ("MSFT", "AMZN", "overlapping_products", 0.65), ("MSFT", "ORCL", "direct_rival", 0.60), ("MSFT", "CRM", "overlapping_products", 0.55), ("MSFT", "ADBE", "overlapping_products", 0.40), ("CRM", "ORCL", "direct_rival", 0.70), ("CRM", "ADBE", "overlapping_products", 0.45), # Semiconductors ("NVDA", "AMD", "direct_rival", 0.80), ("NVDA", "INTC", "direct_rival", 0.65), ("NVDA", "AVGO", "same_sector", 0.50), ("AMD", "INTC", "direct_rival", 0.85), ("INTC", "AVGO", "same_sector", 0.40), # Supply chain: chips → tech ("NVDA", "AAPL", "supply_chain_adjacent", 0.50), ("NVDA", "MSFT", "supply_chain_adjacent", 0.60), ("NVDA", "TSLA", "supply_chain_adjacent", 0.45), ("NVDA", "META", "supply_chain_adjacent", 0.50), ("AVGO", "AAPL", "supply_chain_adjacent", 0.55), # Cloud rivals ("AMZN", "GOOGL", "overlapping_products", 0.55), # Financial services ("JPM", "BAC", "direct_rival", 0.80), ("JPM", "GS", "overlapping_products", 0.60), ("GS", "MS", "direct_rival", 0.85), ("V", "MA", "direct_rival", 0.90), # Healthcare / pharma ("JNJ", "PFE", "direct_rival", 0.65), ("JNJ", "ABBV", "direct_rival", 0.60), ("JNJ", "MRK", "direct_rival", 0.60), ("LLY", "PFE", "direct_rival", 0.70), ("LLY", "ABBV", "direct_rival", 0.65), ("LLY", "MRK", "direct_rival", 0.65), ("PFE", "MRK", "direct_rival", 0.75), ("PFE", "ABBV", "direct_rival", 0.70), ("ABBV", "MRK", "direct_rival", 0.70), # Energy ("XOM", "CVX", "direct_rival", 0.85), ("XOM", "COP", "same_sector", 0.55), ("CVX", "COP", "same_sector", 0.55), # Entertainment / streaming ("NFLX", "DIS", "direct_rival", 0.75), ("NFLX", "AMZN", "overlapping_products", 0.50), # Telecom ("CMCSA", "T", "direct_rival", 0.70), # Consumer: beverages ("KO", "PEP", "direct_rival", 0.90), # Consumer: restaurants ("MCD", "SBUX", "same_sector", 0.45), # Consumer: retail ("WMT", "COST", "direct_rival", 0.70), ("WMT", "AMZN", "overlapping_products", 0.60), ("COST", "AMZN", "overlapping_products", 0.40), ] # --- Source configs per company --- SOURCES_PER_COMPANY = [ { "source_type": "market_api", "source_name": "Polygon Market Data", "credibility_score": 0.9, "config": { "provider": "polygon", "endpoint": "intraday_bars", "adjusted": True, }, }, { "source_type": "news_api", "source_name": "Polygon News", "credibility_score": 0.7, "config": { "provider": "polygon", "limit": 20, "order": "desc", }, }, { "source_type": "filings_api", "source_name": "SEC EDGAR", "credibility_score": 1.0, "config": { "provider": "sec_edgar", "forms": "8-K,10-Q,10-K", "user_agent": "StonksOracle/1.0", }, }, ] BROKER_SOURCE = { "source_type": "broker", "source_name": "Alpaca Paper", "credibility_score": 1.0, "config": { "provider": "alpaca", "mode": "paper", }, } # Macro news source (global, not company-specific) MACRO_NEWS_SOURCE = { "source_type": "macro_news", "source_name": "Polygon Global News", "credibility_score": 0.7, "config": { "url": "https://api.polygon.io/v2/reference/news", "params": {"order": "desc"}, "results_key": "results", "provider": "polygon", "limit": 50, }, } async def seed(pool: asyncpg.Pool) -> None: """Insert seed data. Uses upsert for companies, skips existing aliases/sources.""" company_ids: dict[str, str] = {} # Companies — upsert on (ticker, exchange) for c in COMPANIES: row = await pool.fetchrow( """INSERT INTO companies (ticker, legal_name, exchange, sector, industry, market_cap_bucket) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (ticker, exchange) DO UPDATE SET legal_name = EXCLUDED.legal_name, sector = EXCLUDED.sector, industry = EXCLUDED.industry, market_cap_bucket = EXCLUDED.market_cap_bucket, updated_at = NOW() RETURNING id, ticker""", c["ticker"], c["legal_name"], c["exchange"], c["sector"], c["industry"], c["market_cap_bucket"], ) company_ids[row["ticker"]] = str(row["id"]) logger.info("Company: %s -> %s", row["ticker"], row["id"]) # Aliases for ticker, aliases in ALIASES.items(): cid = company_ids.get(ticker) if not cid: continue for alias, alias_type in aliases: await pool.execute( """INSERT INTO company_aliases (company_id, alias, alias_type) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING""", cid, alias, alias_type, ) logger.info("Aliases seeded") # Watchlist wl = await pool.fetchrow( """INSERT INTO watchlists (name, description) VALUES ('Starter 50', 'Initial tracked watchlist — 50 diverse mega/large-cap symbols') ON CONFLICT (name) DO UPDATE SET description = EXCLUDED.description RETURNING id""", ) wl_id = wl["id"] for cid in company_ids.values(): await pool.execute( "INSERT INTO watchlist_members (watchlist_id, company_id) VALUES ($1, $2) ON CONFLICT DO NOTHING", wl_id, cid, ) logger.info("Watchlist 'Starter 50' -> %s", wl_id) # Sources per company for ticker, cid in company_ids.items(): existing = await pool.fetch( "SELECT source_type, source_name FROM sources WHERE company_id = $1", cid, ) existing_set = {(r["source_type"], r["source_name"]) for r in existing} for src in SOURCES_PER_COMPANY: key = (src["source_type"], src["source_name"]) if key in existing_set: continue await pool.execute( """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score) VALUES ($1, $2, $3, $4::jsonb, $5)""", cid, src["source_type"], src["source_name"], json.dumps(src["config"]), src["credibility_score"], ) # Broker source only for the first company if ticker == COMPANIES[0]["ticker"]: bkey = (BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"]) if bkey not in existing_set: await pool.execute( """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score) VALUES ($1, $2, $3, $4::jsonb, $5)""", cid, BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"], json.dumps(BROKER_SOURCE["config"]), BROKER_SOURCE["credibility_score"], ) # Macro news source on the first company (global, but needs a company_id FK) if ticker == COMPANIES[0]["ticker"]: mkey = (MACRO_NEWS_SOURCE["source_type"], MACRO_NEWS_SOURCE["source_name"]) if mkey not in existing_set: await pool.execute( """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score) VALUES ($1, $2, $3, $4::jsonb, $5)""", cid, MACRO_NEWS_SOURCE["source_type"], MACRO_NEWS_SOURCE["source_name"], json.dumps(MACRO_NEWS_SOURCE["config"]), MACRO_NEWS_SOURCE["credibility_score"], ) logger.info("Sources seeded") # Competitor relationships rel_count = 0 for ticker_a, ticker_b, rel_type, strength in COMPETITOR_RELATIONSHIPS: cid_a = company_ids.get(ticker_a) cid_b = company_ids.get(ticker_b) if not cid_a or not cid_b: logger.warning("Skipping relationship %s-%s: company not found", ticker_a, ticker_b) continue # Use LEAST/GREATEST ordering for the unique index a_id = min(cid_a, cid_b) b_id = max(cid_a, cid_b) await pool.execute( """INSERT INTO competitor_relationships (company_a_id, company_b_id, relationship_type, strength, bidirectional, source) VALUES ($1, $2, $3, $4, TRUE, 'manual') ON CONFLICT (LEAST(company_a_id, company_b_id), GREATEST(company_a_id, company_b_id)) WHERE active = TRUE DO UPDATE SET strength = EXCLUDED.strength, relationship_type = EXCLUDED.relationship_type, updated_at = NOW()""", a_id, b_id, rel_type, strength, ) rel_count += 1 logger.info("Competitor relationships seeded: %d", rel_count) total = await pool.fetchval("SELECT count(*) FROM companies") sources_total = await pool.fetchval("SELECT count(*) FROM sources") rels_total = await pool.fetchval("SELECT count(*) FROM competitor_relationships WHERE active = TRUE") logger.info( "Seed complete: %d companies, %d sources, %d competitor relationships, watchlist with %d members", total, sources_total, rels_total, len(company_ids), ) async def main() -> None: config = load_config() setup_logging("seed", level=config.log_level, json_output=config.json_logs) pool = await get_pg_pool(config) try: await seed(pool) finally: await pool.close() if __name__ == "__main__": asyncio.run(main())