diff --git a/services/symbol_registry/seed.py b/services/symbol_registry/seed.py index b0be219..f896e0e 100644 --- a/services/symbol_registry/seed.py +++ b/services/symbol_registry/seed.py @@ -1,7 +1,7 @@ """Seed data for initial tracked watchlist. Run against a live database to populate the starter companies, aliases, -watchlist, and source configurations. +watchlist, source configurations, macro news source, and competitor relationships. Usage: python -m services.symbol_registry.seed @@ -18,19 +18,67 @@ from services.shared.logging import setup_logging logger = logging.getLogger("seed") -# --- Seed Companies --- -# Diverse mix: mega-cap tech, finance, healthcare, energy, consumer +# --- Seed Companies (50 diverse large/mega-cap) --- COMPANIES = [ + # Technology {"ticker": "AAPL", "legal_name": "Apple Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Consumer Electronics", "market_cap_bucket": "mega"}, {"ticker": "MSFT", "legal_name": "Microsoft Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Software", "market_cap_bucket": "mega"}, {"ticker": "NVDA", "legal_name": "NVIDIA Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "mega"}, - {"ticker": "AMZN", "legal_name": "Amazon.com Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Internet Retail", "market_cap_bucket": "mega"}, {"ticker": "GOOGL", "legal_name": "Alphabet Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"}, - {"ticker": "JPM", "legal_name": "JPMorgan Chase & Co.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"}, - {"ticker": "JNJ", "legal_name": "Johnson & Johnson", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, - {"ticker": "XOM", "legal_name": "Exxon Mobil Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"}, - {"ticker": "TSLA", "legal_name": "Tesla Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Auto Manufacturers", "market_cap_bucket": "large"}, {"ticker": "META", "legal_name": "Meta Platforms Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"}, + {"ticker": "AVGO", "legal_name": "Broadcom Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "mega"}, + {"ticker": "ORCL", "legal_name": "Oracle Corporation", "exchange": "NYSE", "sector": "Technology", "industry": "Software", "market_cap_bucket": "mega"}, + {"ticker": "CRM", "legal_name": "Salesforce Inc.", "exchange": "NYSE", "sector": "Technology", "industry": "Software", "market_cap_bucket": "large"}, + {"ticker": "AMD", "legal_name": "Advanced Micro Devices Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "large"}, + {"ticker": "INTC", "legal_name": "Intel Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "large"}, + {"ticker": "CSCO", "legal_name": "Cisco Systems Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Networking", "market_cap_bucket": "large"}, + {"ticker": "ADBE", "legal_name": "Adobe Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Software", "market_cap_bucket": "large"}, + # Consumer Cyclical + {"ticker": "AMZN", "legal_name": "Amazon.com Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Internet Retail", "market_cap_bucket": "mega"}, + {"ticker": "TSLA", "legal_name": "Tesla Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Auto Manufacturers", "market_cap_bucket": "large"}, + {"ticker": "HD", "legal_name": "The Home Depot Inc.", "exchange": "NYSE", "sector": "Consumer Cyclical", "industry": "Home Improvement", "market_cap_bucket": "mega"}, + {"ticker": "NKE", "legal_name": "Nike Inc.", "exchange": "NYSE", "sector": "Consumer Cyclical", "industry": "Footwear & Accessories", "market_cap_bucket": "large"}, + {"ticker": "MCD", "legal_name": "McDonald's Corporation", "exchange": "NYSE", "sector": "Consumer Cyclical", "industry": "Restaurants", "market_cap_bucket": "large"}, + {"ticker": "SBUX", "legal_name": "Starbucks Corporation", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Restaurants", "market_cap_bucket": "large"}, + # Financial Services + {"ticker": "JPM", "legal_name": "JPMorgan Chase & Co.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"}, + {"ticker": "V", "legal_name": "Visa Inc.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Credit Services", "market_cap_bucket": "mega"}, + {"ticker": "MA", "legal_name": "Mastercard Inc.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Credit Services", "market_cap_bucket": "mega"}, + {"ticker": "BAC", "legal_name": "Bank of America Corporation", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"}, + {"ticker": "GS", "legal_name": "Goldman Sachs Group Inc.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Capital Markets", "market_cap_bucket": "large"}, + {"ticker": "MS", "legal_name": "Morgan Stanley", "exchange": "NYSE", "sector": "Financial Services", "industry": "Capital Markets", "market_cap_bucket": "large"}, + {"ticker": "BRK.B", "legal_name": "Berkshire Hathaway Inc.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Insurance", "market_cap_bucket": "mega"}, + # Healthcare + {"ticker": "JNJ", "legal_name": "Johnson & Johnson", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, + {"ticker": "UNH", "legal_name": "UnitedHealth Group Inc.", "exchange": "NYSE", "sector": "Healthcare", "industry": "Health Insurance", "market_cap_bucket": "mega"}, + {"ticker": "LLY", "legal_name": "Eli Lilly and Company", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, + {"ticker": "PFE", "legal_name": "Pfizer Inc.", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "large"}, + {"ticker": "ABBV", "legal_name": "AbbVie Inc.", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, + {"ticker": "MRK", "legal_name": "Merck & Co. Inc.", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"}, + # Energy + {"ticker": "XOM", "legal_name": "Exxon Mobil Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"}, + {"ticker": "CVX", "legal_name": "Chevron Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"}, + {"ticker": "COP", "legal_name": "ConocoPhillips", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas E&P", "market_cap_bucket": "large"}, + {"ticker": "SLB", "legal_name": "Schlumberger Limited", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Services", "market_cap_bucket": "large"}, + # Communication Services + {"ticker": "NFLX", "legal_name": "Netflix Inc.", "exchange": "NASDAQ", "sector": "Communication Services", "industry": "Entertainment", "market_cap_bucket": "mega"}, + {"ticker": "DIS", "legal_name": "The Walt Disney Company", "exchange": "NYSE", "sector": "Communication Services", "industry": "Entertainment", "market_cap_bucket": "large"}, + {"ticker": "CMCSA", "legal_name": "Comcast Corporation", "exchange": "NASDAQ", "sector": "Communication Services", "industry": "Telecom", "market_cap_bucket": "large"}, + {"ticker": "T", "legal_name": "AT&T Inc.", "exchange": "NYSE", "sector": "Communication Services", "industry": "Telecom", "market_cap_bucket": "large"}, + # Industrials + {"ticker": "CAT", "legal_name": "Caterpillar Inc.", "exchange": "NYSE", "sector": "Industrials", "industry": "Farm & Heavy Equipment", "market_cap_bucket": "large"}, + {"ticker": "BA", "legal_name": "The Boeing Company", "exchange": "NYSE", "sector": "Industrials", "industry": "Aerospace & Defense", "market_cap_bucket": "large"}, + {"ticker": "UPS", "legal_name": "United Parcel Service Inc.", "exchange": "NYSE", "sector": "Industrials", "industry": "Logistics", "market_cap_bucket": "large"}, + {"ticker": "HON", "legal_name": "Honeywell International Inc.", "exchange": "NASDAQ", "sector": "Industrials", "industry": "Conglomerates", "market_cap_bucket": "large"}, + # Consumer Defensive + {"ticker": "PG", "legal_name": "Procter & Gamble Company", "exchange": "NYSE", "sector": "Consumer Defensive", "industry": "Household Products", "market_cap_bucket": "mega"}, + {"ticker": "KO", "legal_name": "The Coca-Cola Company", "exchange": "NYSE", "sector": "Consumer Defensive", "industry": "Beverages", "market_cap_bucket": "mega"}, + {"ticker": "PEP", "legal_name": "PepsiCo Inc.", "exchange": "NASDAQ", "sector": "Consumer Defensive", "industry": "Beverages", "market_cap_bucket": "mega"}, + {"ticker": "WMT", "legal_name": "Walmart Inc.", "exchange": "NYSE", "sector": "Consumer Defensive", "industry": "Discount Stores", "market_cap_bucket": "mega"}, + {"ticker": "COST", "legal_name": "Costco Wholesale Corporation", "exchange": "NASDAQ", "sector": "Consumer Defensive", "industry": "Discount Stores", "market_cap_bucket": "mega"}, + # Real Estate / Utilities + {"ticker": "AMT", "legal_name": "American Tower Corporation", "exchange": "NYSE", "sector": "Real Estate", "industry": "REIT - Specialty", "market_cap_bucket": "large"}, + {"ticker": "NEE", "legal_name": "NextEra Energy Inc.", "exchange": "NYSE", "sector": "Utilities", "industry": "Utilities - Renewable", "market_cap_bucket": "large"}, ] # --- Aliases --- @@ -40,18 +88,102 @@ ALIASES = { "NVDA": [("NVIDIA", "brand"), ("GeForce", "product"), ("CUDA", "product")], "AMZN": [("Amazon", "brand"), ("AWS", "product"), ("Prime", "product")], "GOOGL": [("Google", "brand"), ("Alphabet", "legal_name"), ("YouTube", "product")], + "META": [("Facebook", "brand"), ("Instagram", "product"), ("WhatsApp", "product")], "JPM": [("JPMorgan", "brand"), ("Chase", "brand")], "JNJ": [("J&J", "brand")], "XOM": [("Exxon", "brand"), ("ExxonMobil", "brand")], "TSLA": [("Tesla", "brand")], - "META": [("Facebook", "brand"), ("Instagram", "product"), ("WhatsApp", "product")], + "NFLX": [("Netflix", "brand")], + "DIS": [("Disney", "brand"), ("Disney+", "product")], + "V": [("Visa", "brand")], + "MA": [("Mastercard", "brand")], + "KO": [("Coca-Cola", "brand"), ("Coke", "brand")], + "PEP": [("Pepsi", "brand"), ("PepsiCo", "brand")], + "BA": [("Boeing", "brand")], + "WMT": [("Walmart", "brand")], + "COST": [("Costco", "brand")], + "CRM": [("Salesforce", "brand")], + "ORCL": [("Oracle", "brand")], + "ADBE": [("Adobe", "brand"), ("Photoshop", "product")], + "AMD": [("AMD", "brand"), ("Ryzen", "product")], + "INTC": [("Intel", "brand")], + "BAC": [("Bank of America", "brand"), ("BofA", "brand")], + "GS": [("Goldman Sachs", "brand"), ("Goldman", "brand")], + "UNH": [("UnitedHealth", "brand"), ("Optum", "product")], + "LLY": [("Eli Lilly", "brand"), ("Lilly", "brand")], + "PFE": [("Pfizer", "brand")], + "CVX": [("Chevron", "brand")], + "PG": [("P&G", "brand"), ("Procter & Gamble", "brand")], + "HD": [("Home Depot", "brand")], + "NKE": [("Nike", "brand")], + "MCD": [("McDonald's", "brand")], + "SBUX": [("Starbucks", "brand")], } -# --- Source configs per company --- -# Polygon.io for market data and news (matches PolygonMarketAdapter and PolygonNewsAdapter) -# SEC EDGAR for filings (matches SECEdgarAdapter) -# Alpaca for paper trading (matches AlpacaBrokerAdapter) +# --- Competitor Relationships --- +# (ticker_a, ticker_b, relationship_type, strength) +COMPETITOR_RELATIONSHIPS = [ + # Tech ecosystem rivals + ("AAPL", "MSFT", "direct_rival", 0.75), + ("AAPL", "GOOGL", "overlapping_products", 0.60), + ("AAPL", "META", "overlapping_products", 0.40), + ("GOOGL", "META", "direct_rival", 0.85), + ("MSFT", "GOOGL", "overlapping_products", 0.70), + ("MSFT", "AMZN", "overlapping_products", 0.65), + ("MSFT", "ORCL", "direct_rival", 0.60), + ("MSFT", "CRM", "overlapping_products", 0.55), + ("MSFT", "ADBE", "overlapping_products", 0.40), + ("CRM", "ORCL", "direct_rival", 0.70), + ("CRM", "ADBE", "overlapping_products", 0.45), + # Semiconductors + ("NVDA", "AMD", "direct_rival", 0.80), + ("NVDA", "INTC", "direct_rival", 0.65), + ("NVDA", "AVGO", "same_sector", 0.50), + ("AMD", "INTC", "direct_rival", 0.85), + ("INTC", "AVGO", "same_sector", 0.40), + # Supply chain: chips → tech + ("NVDA", "AAPL", "supply_chain_adjacent", 0.50), + ("NVDA", "MSFT", "supply_chain_adjacent", 0.60), + ("NVDA", "TSLA", "supply_chain_adjacent", 0.45), + ("NVDA", "META", "supply_chain_adjacent", 0.50), + ("AVGO", "AAPL", "supply_chain_adjacent", 0.55), + # Cloud rivals + ("AMZN", "GOOGL", "overlapping_products", 0.55), + # Financial services + ("JPM", "BAC", "direct_rival", 0.80), + ("JPM", "GS", "overlapping_products", 0.60), + ("GS", "MS", "direct_rival", 0.85), + ("V", "MA", "direct_rival", 0.90), + # Healthcare / pharma + ("JNJ", "PFE", "direct_rival", 0.65), + ("JNJ", "ABBV", "direct_rival", 0.60), + ("JNJ", "MRK", "direct_rival", 0.60), + ("LLY", "PFE", "direct_rival", 0.70), + ("LLY", "ABBV", "direct_rival", 0.65), + ("LLY", "MRK", "direct_rival", 0.65), + ("PFE", "MRK", "direct_rival", 0.75), + ("PFE", "ABBV", "direct_rival", 0.70), + ("ABBV", "MRK", "direct_rival", 0.70), + # Energy + ("XOM", "CVX", "direct_rival", 0.85), + ("XOM", "COP", "same_sector", 0.55), + ("CVX", "COP", "same_sector", 0.55), + # Entertainment / streaming + ("NFLX", "DIS", "direct_rival", 0.75), + ("NFLX", "AMZN", "overlapping_products", 0.50), + # Telecom + ("CMCSA", "T", "direct_rival", 0.70), + # Consumer: beverages + ("KO", "PEP", "direct_rival", 0.90), + # Consumer: restaurants + ("MCD", "SBUX", "same_sector", 0.45), + # Consumer: retail + ("WMT", "COST", "direct_rival", 0.70), + ("WMT", "AMZN", "overlapping_products", 0.60), + ("COST", "AMZN", "overlapping_products", 0.40), +] +# --- Source configs per company --- SOURCES_PER_COMPANY = [ { "source_type": "market_api", @@ -85,7 +217,6 @@ SOURCES_PER_COMPANY = [ }, ] -# Broker source — one per account, not per company BROKER_SOURCE = { "source_type": "broker", "source_name": "Alpaca Paper", @@ -96,10 +227,24 @@ BROKER_SOURCE = { }, } +# Macro news source (global, not company-specific) +MACRO_NEWS_SOURCE = { + "source_type": "macro_news", + "source_name": "Polygon Global News", + "credibility_score": 0.7, + "config": { + "url": "https://api.polygon.io/v2/reference/news", + "params": {"order": "desc"}, + "results_key": "results", + "provider": "polygon", + "limit": 50, + }, +} + async def seed(pool: asyncpg.Pool) -> None: """Insert seed data. Uses upsert for companies, skips existing aliases/sources.""" - company_ids = {} + company_ids: dict[str, str] = {} # Companies — upsert on (ticker, exchange) for c in COMPANIES: @@ -116,8 +261,8 @@ async def seed(pool: asyncpg.Pool) -> None: c["ticker"], c["legal_name"], c["exchange"], c["sector"], c["industry"], c["market_cap_bucket"], ) - company_ids[row["ticker"]] = row["id"] - logger.info(f"Company: {row['ticker']} -> {row['id']}") + company_ids[row["ticker"]] = str(row["id"]) + logger.info("Company: %s -> %s", row["ticker"], row["id"]) # Aliases for ticker, aliases in ALIASES.items(): @@ -135,7 +280,7 @@ async def seed(pool: asyncpg.Pool) -> None: # Watchlist wl = await pool.fetchrow( """INSERT INTO watchlists (name, description) - VALUES ('Starter 10', 'Initial tracked watchlist — 10 diverse mega/large-cap symbols') + VALUES ('Starter 50', 'Initial tracked watchlist — 50 diverse mega/large-cap symbols') ON CONFLICT (name) DO UPDATE SET description = EXCLUDED.description RETURNING id""", ) @@ -145,9 +290,9 @@ async def seed(pool: asyncpg.Pool) -> None: "INSERT INTO watchlist_members (watchlist_id, company_id) VALUES ($1, $2) ON CONFLICT DO NOTHING", wl_id, cid, ) - logger.info(f"Watchlist 'Starter 10' -> {wl_id}") + logger.info("Watchlist 'Starter 50' -> %s", wl_id) - # Sources per company — check for existing before inserting + # Sources per company for ticker, cid in company_ids.items(): existing = await pool.fetch( "SELECT source_type, source_name FROM sources WHERE company_id = $1", @@ -158,7 +303,6 @@ async def seed(pool: asyncpg.Pool) -> None: for src in SOURCES_PER_COMPANY: key = (src["source_type"], src["source_name"]) if key in existing_set: - logger.debug(f"Source {key} already exists for {ticker}, skipping") continue await pool.execute( """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score) @@ -167,7 +311,7 @@ async def seed(pool: asyncpg.Pool) -> None: json.dumps(src["config"]), src["credibility_score"], ) - # Broker source only for the first company (account-level) + # Broker source only for the first company if ticker == COMPANIES[0]["ticker"]: bkey = (BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"]) if bkey not in existing_set: @@ -177,11 +321,49 @@ async def seed(pool: asyncpg.Pool) -> None: cid, BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"], json.dumps(BROKER_SOURCE["config"]), BROKER_SOURCE["credibility_score"], ) + + # Macro news source on the first company (global, but needs a company_id FK) + if ticker == COMPANIES[0]["ticker"]: + mkey = (MACRO_NEWS_SOURCE["source_type"], MACRO_NEWS_SOURCE["source_name"]) + if mkey not in existing_set: + await pool.execute( + """INSERT INTO sources (company_id, source_type, source_name, config, credibility_score) + VALUES ($1, $2, $3, $4::jsonb, $5)""", + cid, MACRO_NEWS_SOURCE["source_type"], MACRO_NEWS_SOURCE["source_name"], + json.dumps(MACRO_NEWS_SOURCE["config"]), MACRO_NEWS_SOURCE["credibility_score"], + ) logger.info("Sources seeded") + # Competitor relationships + rel_count = 0 + for ticker_a, ticker_b, rel_type, strength in COMPETITOR_RELATIONSHIPS: + cid_a = company_ids.get(ticker_a) + cid_b = company_ids.get(ticker_b) + if not cid_a or not cid_b: + logger.warning("Skipping relationship %s-%s: company not found", ticker_a, ticker_b) + continue + # Use LEAST/GREATEST ordering for the unique index + a_id = min(cid_a, cid_b) + b_id = max(cid_a, cid_b) + await pool.execute( + """INSERT INTO competitor_relationships + (company_a_id, company_b_id, relationship_type, strength, bidirectional, source) + VALUES ($1, $2, $3, $4, TRUE, 'manual') + ON CONFLICT (LEAST(company_a_id, company_b_id), GREATEST(company_a_id, company_b_id)) + WHERE active = TRUE + DO UPDATE SET strength = EXCLUDED.strength, relationship_type = EXCLUDED.relationship_type, updated_at = NOW()""", + a_id, b_id, rel_type, strength, + ) + rel_count += 1 + logger.info("Competitor relationships seeded: %d", rel_count) + total = await pool.fetchval("SELECT count(*) FROM companies") sources_total = await pool.fetchval("SELECT count(*) FROM sources") - logger.info(f"Seed complete: {total} companies, {sources_total} sources, watchlist with {len(company_ids)} members") + rels_total = await pool.fetchval("SELECT count(*) FROM competitor_relationships WHERE active = TRUE") + logger.info( + "Seed complete: %d companies, %d sources, %d competitor relationships, watchlist with %d members", + total, sources_total, rels_total, len(company_ids), + ) async def main() -> None: