phase 0+1: project scaffold, k8s manifests, CI pipeline, steering, hooks, tests
- Repository structure for all services, infra, lakehouse, dashboards - K8s manifests targeting stonks-oracle namespace with GHCR images - Ingress via Traefik with ca-issuer TLS for internal services - ConfigMap wired to existing cluster services (pg, redis, minio, ollama) - GitHub Actions workflow for lint, test, multi-service container builds - Dockerfile with build-arg CMD per service - Makefile for local build/push/deploy - Steering rules for TDD workflow, K8s conventions, project context - Agent hooks for lint-on-save, test-on-save, k8s-validate, phase-commit - Ruff linter config, all lint issues fixed - 14 passing tests for schemas, config, redis keys - PostgreSQL migrations, Trino catalogs, Superset config, MinIO lifecycle
This commit is contained in:
@@ -0,0 +1 @@
|
||||
# Scheduler / Orchestrator Service
|
||||
@@ -0,0 +1,112 @@
|
||||
"""Scheduler - triggers ingestion cycles for tracked symbols and sources."""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import asyncpg
|
||||
import redis.asyncio as aioredis
|
||||
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.redis_keys import (
|
||||
QUEUE_INGESTION,
|
||||
lock_key,
|
||||
queue_key,
|
||||
rate_limit_key,
|
||||
)
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("scheduler")
|
||||
|
||||
# Polling cadences by source class (seconds)
|
||||
CADENCES = {
|
||||
"market_api": 60,
|
||||
"news_api": 300,
|
||||
"filings_api": 3600,
|
||||
"web_scrape": 1800,
|
||||
"broker": 30,
|
||||
}
|
||||
|
||||
|
||||
async def acquire_lock(rds: aioredis.Redis, name: str, ttl: int = 60) -> bool:
|
||||
return await rds.set(lock_key(name), "1", nx=True, ex=ttl)
|
||||
|
||||
|
||||
async def release_lock(rds: aioredis.Redis, name: str):
|
||||
await rds.delete(lock_key(name))
|
||||
|
||||
|
||||
async def check_rate_limit(rds: aioredis.Redis, source_type: str, max_per_minute: int = 30) -> bool:
|
||||
key = rate_limit_key(source_type, datetime.utcnow().strftime("%Y%m%d%H%M"))
|
||||
count = await rds.incr(key)
|
||||
if count == 1:
|
||||
await rds.expire(key, 120)
|
||||
return count <= max_per_minute
|
||||
|
||||
|
||||
async def schedule_cycle(pool: asyncpg.Pool, rds: aioredis.Redis):
|
||||
"""One scheduling pass: find due sources and enqueue ingestion jobs."""
|
||||
sources = await pool.fetch(
|
||||
"""SELECT s.id as source_id, s.company_id, s.source_type, s.source_name, s.config,
|
||||
c.ticker, c.legal_name
|
||||
FROM sources s JOIN companies c ON s.company_id = c.id
|
||||
WHERE s.active = TRUE AND c.active = TRUE
|
||||
ORDER BY s.source_type, c.ticker"""
|
||||
)
|
||||
|
||||
enqueued = 0
|
||||
for src in sources:
|
||||
source_type = src["source_type"]
|
||||
cadence = CADENCES.get(source_type, 600)
|
||||
|
||||
# Check last run
|
||||
last_run = await pool.fetchval(
|
||||
"SELECT MAX(started_at) FROM ingestion_runs WHERE source_id = $1 AND status IN ('completed', 'running')",
|
||||
src["source_id"],
|
||||
)
|
||||
if last_run and (datetime.utcnow() - last_run.replace(tzinfo=None)) < timedelta(seconds=cadence):
|
||||
continue
|
||||
|
||||
if not await check_rate_limit(rds, source_type):
|
||||
logger.warning(f"Rate limit hit for {source_type}")
|
||||
continue
|
||||
|
||||
job = {
|
||||
"source_id": str(src["source_id"]),
|
||||
"company_id": str(src["company_id"]),
|
||||
"ticker": src["ticker"],
|
||||
"source_type": source_type,
|
||||
"source_name": src["source_name"],
|
||||
"config": dict(src["config"]) if src["config"] else {},
|
||||
"scheduled_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
await rds.rpush(queue_key(QUEUE_INGESTION), json.dumps(job))
|
||||
enqueued += 1
|
||||
|
||||
if enqueued:
|
||||
logger.info(f"Enqueued {enqueued} ingestion jobs")
|
||||
|
||||
|
||||
async def main():
|
||||
config = load_config()
|
||||
pool = await get_pg_pool(config)
|
||||
rds = get_redis(config)
|
||||
|
||||
logger.info("Scheduler started")
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
if await acquire_lock(rds, "scheduler_cycle", ttl=30):
|
||||
await schedule_cycle(pool, rds)
|
||||
await release_lock(rds, "scheduler_cycle")
|
||||
except Exception as e:
|
||||
logger.error(f"Scheduler cycle error: {e}")
|
||||
await asyncio.sleep(15)
|
||||
finally:
|
||||
await pool.close()
|
||||
await rds.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user