199 lines
8.2 KiB
Python
199 lines
8.2 KiB
Python
"""Seed data for initial tracked watchlist.
|
|
|
|
Run against a live database to populate the starter companies, aliases,
|
|
watchlist, and source configurations.
|
|
|
|
Usage:
|
|
python -m services.symbol_registry.seed
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
|
|
import asyncpg
|
|
|
|
from services.shared.config import load_config
|
|
from services.shared.db import get_pg_pool
|
|
from services.shared.logging import setup_logging
|
|
|
|
logger = logging.getLogger("seed")
|
|
|
|
# --- Seed Companies ---
|
|
# Diverse mix: mega-cap tech, finance, healthcare, energy, consumer
|
|
COMPANIES = [
|
|
{"ticker": "AAPL", "legal_name": "Apple Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Consumer Electronics", "market_cap_bucket": "mega"},
|
|
{"ticker": "MSFT", "legal_name": "Microsoft Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Software", "market_cap_bucket": "mega"},
|
|
{"ticker": "NVDA", "legal_name": "NVIDIA Corporation", "exchange": "NASDAQ", "sector": "Technology", "industry": "Semiconductors", "market_cap_bucket": "mega"},
|
|
{"ticker": "AMZN", "legal_name": "Amazon.com Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Internet Retail", "market_cap_bucket": "mega"},
|
|
{"ticker": "GOOGL", "legal_name": "Alphabet Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"},
|
|
{"ticker": "JPM", "legal_name": "JPMorgan Chase & Co.", "exchange": "NYSE", "sector": "Financial Services", "industry": "Banks", "market_cap_bucket": "mega"},
|
|
{"ticker": "JNJ", "legal_name": "Johnson & Johnson", "exchange": "NYSE", "sector": "Healthcare", "industry": "Drug Manufacturers", "market_cap_bucket": "mega"},
|
|
{"ticker": "XOM", "legal_name": "Exxon Mobil Corporation", "exchange": "NYSE", "sector": "Energy", "industry": "Oil & Gas Integrated", "market_cap_bucket": "mega"},
|
|
{"ticker": "TSLA", "legal_name": "Tesla Inc.", "exchange": "NASDAQ", "sector": "Consumer Cyclical", "industry": "Auto Manufacturers", "market_cap_bucket": "large"},
|
|
{"ticker": "META", "legal_name": "Meta Platforms Inc.", "exchange": "NASDAQ", "sector": "Technology", "industry": "Internet Content", "market_cap_bucket": "mega"},
|
|
]
|
|
|
|
# --- Aliases ---
|
|
ALIASES = {
|
|
"AAPL": [("Apple", "brand"), ("iPhone", "product")],
|
|
"MSFT": [("Microsoft", "brand"), ("Azure", "product"), ("Windows", "product")],
|
|
"NVDA": [("NVIDIA", "brand"), ("GeForce", "product"), ("CUDA", "product")],
|
|
"AMZN": [("Amazon", "brand"), ("AWS", "product"), ("Prime", "product")],
|
|
"GOOGL": [("Google", "brand"), ("Alphabet", "legal_name"), ("YouTube", "product")],
|
|
"JPM": [("JPMorgan", "brand"), ("Chase", "brand")],
|
|
"JNJ": [("J&J", "brand")],
|
|
"XOM": [("Exxon", "brand"), ("ExxonMobil", "brand")],
|
|
"TSLA": [("Tesla", "brand")],
|
|
"META": [("Facebook", "brand"), ("Instagram", "product"), ("WhatsApp", "product")],
|
|
}
|
|
|
|
# --- Source configs per company ---
|
|
# Polygon.io for market data and news (matches PolygonMarketAdapter and PolygonNewsAdapter)
|
|
# SEC EDGAR for filings (matches SECEdgarAdapter)
|
|
# Alpaca for paper trading (matches AlpacaBrokerAdapter)
|
|
|
|
SOURCES_PER_COMPANY = [
|
|
{
|
|
"source_type": "market_api",
|
|
"source_name": "Polygon Market Data",
|
|
"credibility_score": 0.9,
|
|
"config": {
|
|
"provider": "polygon",
|
|
"endpoint": "prev_bars",
|
|
"adjusted": True,
|
|
},
|
|
},
|
|
{
|
|
"source_type": "news_api",
|
|
"source_name": "Polygon News",
|
|
"credibility_score": 0.7,
|
|
"config": {
|
|
"provider": "polygon",
|
|
"limit": 20,
|
|
"order": "desc",
|
|
},
|
|
},
|
|
{
|
|
"source_type": "filings_api",
|
|
"source_name": "SEC EDGAR",
|
|
"credibility_score": 1.0,
|
|
"config": {
|
|
"provider": "sec_edgar",
|
|
"forms": "8-K,10-Q,10-K",
|
|
"user_agent": "StonksOracle/1.0",
|
|
},
|
|
},
|
|
]
|
|
|
|
# Broker source — one per account, not per company
|
|
BROKER_SOURCE = {
|
|
"source_type": "broker",
|
|
"source_name": "Alpaca Paper",
|
|
"credibility_score": 1.0,
|
|
"config": {
|
|
"provider": "alpaca",
|
|
"mode": "paper",
|
|
},
|
|
}
|
|
|
|
|
|
async def seed(pool: asyncpg.Pool) -> None:
|
|
"""Insert seed data. Uses upsert for companies, skips existing aliases/sources."""
|
|
company_ids = {}
|
|
|
|
# Companies — upsert on (ticker, exchange)
|
|
for c in COMPANIES:
|
|
row = await pool.fetchrow(
|
|
"""INSERT INTO companies (ticker, legal_name, exchange, sector, industry, market_cap_bucket)
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
ON CONFLICT (ticker, exchange) DO UPDATE SET
|
|
legal_name = EXCLUDED.legal_name,
|
|
sector = EXCLUDED.sector,
|
|
industry = EXCLUDED.industry,
|
|
market_cap_bucket = EXCLUDED.market_cap_bucket,
|
|
updated_at = NOW()
|
|
RETURNING id, ticker""",
|
|
c["ticker"], c["legal_name"], c["exchange"],
|
|
c["sector"], c["industry"], c["market_cap_bucket"],
|
|
)
|
|
company_ids[row["ticker"]] = row["id"]
|
|
logger.info(f"Company: {row['ticker']} -> {row['id']}")
|
|
|
|
# Aliases
|
|
for ticker, aliases in ALIASES.items():
|
|
cid = company_ids.get(ticker)
|
|
if not cid:
|
|
continue
|
|
for alias, alias_type in aliases:
|
|
await pool.execute(
|
|
"""INSERT INTO company_aliases (company_id, alias, alias_type)
|
|
VALUES ($1, $2, $3) ON CONFLICT DO NOTHING""",
|
|
cid, alias, alias_type,
|
|
)
|
|
logger.info("Aliases seeded")
|
|
|
|
# Watchlist
|
|
wl = await pool.fetchrow(
|
|
"""INSERT INTO watchlists (name, description)
|
|
VALUES ('Starter 10', 'Initial tracked watchlist — 10 diverse mega/large-cap symbols')
|
|
ON CONFLICT (name) DO UPDATE SET description = EXCLUDED.description
|
|
RETURNING id""",
|
|
)
|
|
wl_id = wl["id"]
|
|
for cid in company_ids.values():
|
|
await pool.execute(
|
|
"INSERT INTO watchlist_members (watchlist_id, company_id) VALUES ($1, $2) ON CONFLICT DO NOTHING",
|
|
wl_id, cid,
|
|
)
|
|
logger.info(f"Watchlist 'Starter 10' -> {wl_id}")
|
|
|
|
# Sources per company — check for existing before inserting
|
|
for ticker, cid in company_ids.items():
|
|
existing = await pool.fetch(
|
|
"SELECT source_type, source_name FROM sources WHERE company_id = $1",
|
|
cid,
|
|
)
|
|
existing_set = {(r["source_type"], r["source_name"]) for r in existing}
|
|
|
|
for src in SOURCES_PER_COMPANY:
|
|
key = (src["source_type"], src["source_name"])
|
|
if key in existing_set:
|
|
logger.debug(f"Source {key} already exists for {ticker}, skipping")
|
|
continue
|
|
await pool.execute(
|
|
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score)
|
|
VALUES ($1, $2, $3, $4::jsonb, $5)""",
|
|
cid, src["source_type"], src["source_name"],
|
|
json.dumps(src["config"]), src["credibility_score"],
|
|
)
|
|
|
|
# Broker source only for the first company (account-level)
|
|
if ticker == COMPANIES[0]["ticker"]:
|
|
bkey = (BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"])
|
|
if bkey not in existing_set:
|
|
await pool.execute(
|
|
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score)
|
|
VALUES ($1, $2, $3, $4::jsonb, $5)""",
|
|
cid, BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"],
|
|
json.dumps(BROKER_SOURCE["config"]), BROKER_SOURCE["credibility_score"],
|
|
)
|
|
logger.info("Sources seeded")
|
|
|
|
total = await pool.fetchval("SELECT count(*) FROM companies")
|
|
sources_total = await pool.fetchval("SELECT count(*) FROM sources")
|
|
logger.info(f"Seed complete: {total} companies, {sources_total} sources, watchlist with {len(company_ids)} members")
|
|
|
|
|
|
async def main() -> None:
|
|
config = load_config()
|
|
setup_logging("seed", level=config.log_level, json_output=config.json_logs)
|
|
pool = await get_pg_pool(config)
|
|
try:
|
|
await seed(pool)
|
|
finally:
|
|
await pool.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|