phase 17: fix scheduler config parsing, worker entry points, and seed data for Polygon sources
This commit is contained in:
@@ -7,6 +7,7 @@ Usage:
|
||||
python -m services.symbol_registry.seed
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
|
||||
import asyncpg
|
||||
@@ -47,32 +48,29 @@ ALIASES = {
|
||||
}
|
||||
|
||||
# --- Source configs per company ---
|
||||
# Alpha Vantage for market data (free: 25 req/day)
|
||||
# NewsAPI for news (free: 100 req/day)
|
||||
# SEC EDGAR for filings (free, rate-limited by user-agent)
|
||||
# Alpaca for paper trading (free unlimited paper)
|
||||
# Polygon.io for market data and news (matches PolygonMarketAdapter and PolygonNewsAdapter)
|
||||
# SEC EDGAR for filings (matches SECEdgarAdapter)
|
||||
# Alpaca for paper trading (matches AlpacaBrokerAdapter)
|
||||
|
||||
SOURCES_PER_COMPANY = [
|
||||
{
|
||||
"source_type": "market_api",
|
||||
"source_name": "Alpha Vantage",
|
||||
"source_name": "Polygon Market Data",
|
||||
"credibility_score": 0.9,
|
||||
"config": {
|
||||
"provider": "alpha_vantage",
|
||||
"base_url": "https://www.alphavantage.co",
|
||||
"endpoint": "/query",
|
||||
"functions": ["TIME_SERIES_DAILY", "GLOBAL_QUOTE", "OVERVIEW"],
|
||||
"provider": "polygon",
|
||||
"endpoint": "prev_bars",
|
||||
"adjusted": True,
|
||||
},
|
||||
},
|
||||
{
|
||||
"source_type": "news_api",
|
||||
"source_name": "NewsAPI",
|
||||
"source_name": "Polygon News",
|
||||
"credibility_score": 0.7,
|
||||
"config": {
|
||||
"provider": "newsapi",
|
||||
"base_url": "https://newsapi.org",
|
||||
"endpoint": "/v2/everything",
|
||||
"page_size": 20,
|
||||
"provider": "polygon",
|
||||
"limit": 20,
|
||||
"order": "desc",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -81,8 +79,7 @@ SOURCES_PER_COMPANY = [
|
||||
"credibility_score": 1.0,
|
||||
"config": {
|
||||
"provider": "sec_edgar",
|
||||
"base_url": "https://efts.sec.gov",
|
||||
"forms": ["8-K", "10-Q", "10-K"],
|
||||
"forms": "8-K,10-Q,10-K",
|
||||
"user_agent": "StonksOracle/1.0",
|
||||
},
|
||||
},
|
||||
@@ -95,22 +92,26 @@ BROKER_SOURCE = {
|
||||
"credibility_score": 1.0,
|
||||
"config": {
|
||||
"provider": "alpaca",
|
||||
"base_url": "https://paper-api.alpaca.markets",
|
||||
"mode": "paper",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def seed(pool: asyncpg.Pool) -> None:
|
||||
"""Insert seed data. Skips existing records."""
|
||||
"""Insert seed data. Uses upsert for companies, skips existing aliases/sources."""
|
||||
company_ids = {}
|
||||
|
||||
# Companies
|
||||
# Companies — upsert on (ticker, exchange)
|
||||
for c in COMPANIES:
|
||||
row = await pool.fetchrow(
|
||||
"""INSERT INTO companies (ticker, legal_name, exchange, sector, industry, market_cap_bucket)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
ON CONFLICT (ticker, exchange) DO UPDATE SET legal_name = EXCLUDED.legal_name
|
||||
ON CONFLICT (ticker, exchange) DO UPDATE SET
|
||||
legal_name = EXCLUDED.legal_name,
|
||||
sector = EXCLUDED.sector,
|
||||
industry = EXCLUDED.industry,
|
||||
market_cap_bucket = EXCLUDED.market_cap_bucket,
|
||||
updated_at = NOW()
|
||||
RETURNING id, ticker""",
|
||||
c["ticker"], c["legal_name"], c["exchange"],
|
||||
c["sector"], c["industry"], c["market_cap_bucket"],
|
||||
@@ -146,29 +147,41 @@ async def seed(pool: asyncpg.Pool) -> None:
|
||||
)
|
||||
logger.info(f"Watchlist 'Starter 10' -> {wl_id}")
|
||||
|
||||
# Sources per company
|
||||
# Sources per company — check for existing before inserting
|
||||
for ticker, cid in company_ids.items():
|
||||
existing = await pool.fetch(
|
||||
"SELECT source_type, source_name FROM sources WHERE company_id = $1",
|
||||
cid,
|
||||
)
|
||||
existing_set = {(r["source_type"], r["source_name"]) for r in existing}
|
||||
|
||||
for src in SOURCES_PER_COMPANY:
|
||||
key = (src["source_type"], src["source_name"])
|
||||
if key in existing_set:
|
||||
logger.debug(f"Source {key} already exists for {ticker}, skipping")
|
||||
continue
|
||||
await pool.execute(
|
||||
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
ON CONFLICT DO NOTHING""",
|
||||
VALUES ($1, $2, $3, $4::jsonb, $5)""",
|
||||
cid, src["source_type"], src["source_name"],
|
||||
src["config"], src["credibility_score"],
|
||||
json.dumps(src["config"]), src["credibility_score"],
|
||||
)
|
||||
|
||||
# Broker source only for the first company (account-level)
|
||||
if ticker == COMPANIES[0]["ticker"]:
|
||||
await pool.execute(
|
||||
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
ON CONFLICT DO NOTHING""",
|
||||
cid, BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"],
|
||||
BROKER_SOURCE["config"], BROKER_SOURCE["credibility_score"],
|
||||
)
|
||||
bkey = (BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"])
|
||||
if bkey not in existing_set:
|
||||
await pool.execute(
|
||||
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score)
|
||||
VALUES ($1, $2, $3, $4::jsonb, $5)""",
|
||||
cid, BROKER_SOURCE["source_type"], BROKER_SOURCE["source_name"],
|
||||
json.dumps(BROKER_SOURCE["config"]), BROKER_SOURCE["credibility_score"],
|
||||
)
|
||||
logger.info("Sources seeded")
|
||||
|
||||
total = await pool.fetchval("SELECT count(*) FROM companies")
|
||||
logger.info(f"Seed complete: {total} companies, watchlist with {len(company_ids)} members")
|
||||
sources_total = await pool.fetchval("SELECT count(*) FROM sources")
|
||||
logger.info(f"Seed complete: {total} companies, {sources_total} sources, watchlist with {len(company_ids)} members")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
|
||||
Reference in New Issue
Block a user