feat: competitive intelligence & historical pattern matching layer
This commit is contained in:
@@ -50,6 +50,7 @@ DEFAULT_CADENCES: dict[str, int] = {
|
||||
"filings_api": 3600,
|
||||
"web_scrape": 1800,
|
||||
"broker": 30,
|
||||
"macro_news": 600,
|
||||
}
|
||||
|
||||
# Default rate limits per source type (requests per minute)
|
||||
@@ -59,6 +60,7 @@ DEFAULT_RATE_LIMITS: dict[str, int] = {
|
||||
"filings_api": 10,
|
||||
"web_scrape": 10,
|
||||
"broker": 60,
|
||||
"macro_news": 10,
|
||||
}
|
||||
|
||||
# How long to wait before retrying a failed source (seconds)
|
||||
@@ -141,9 +143,9 @@ def build_job_payload(
|
||||
"""Build the ingestion job payload for a source."""
|
||||
return {
|
||||
"source_id": str(source["source_id"]),
|
||||
"company_id": str(source["company_id"]),
|
||||
"ticker": source["ticker"],
|
||||
"legal_name": source["legal_name"],
|
||||
"company_id": str(source["company_id"]) if source.get("company_id") else None,
|
||||
"ticker": source.get("ticker") or "",
|
||||
"legal_name": source.get("legal_name") or "",
|
||||
"aliases": aliases,
|
||||
"source_type": source["source_type"],
|
||||
"source_name": source["source_name"],
|
||||
@@ -183,7 +185,7 @@ async def check_rate_limit(
|
||||
|
||||
|
||||
async def fetch_active_sources(pool: asyncpg.Pool) -> list[asyncpg.Record]:
|
||||
"""Fetch all active sources joined with their active companies."""
|
||||
"""Fetch all active company-specific sources joined with their active companies."""
|
||||
return await pool.fetch(
|
||||
"""SELECT s.id AS source_id,
|
||||
s.company_id,
|
||||
@@ -196,10 +198,33 @@ async def fetch_active_sources(pool: asyncpg.Pool) -> list[asyncpg.Record]:
|
||||
FROM sources s
|
||||
JOIN companies c ON s.company_id = c.id
|
||||
WHERE s.active = TRUE AND c.active = TRUE
|
||||
AND s.source_type != 'macro_news'
|
||||
ORDER BY s.source_type, c.ticker"""
|
||||
)
|
||||
|
||||
|
||||
async def fetch_macro_sources(pool: asyncpg.Pool) -> list[asyncpg.Record]:
|
||||
"""Fetch all active macro news sources.
|
||||
|
||||
Macro sources are not company-specific — they have source_type='macro_news'
|
||||
and may have company_id NULL. They are scheduled independently from
|
||||
company-specific sources.
|
||||
|
||||
Requirements: 1.1
|
||||
"""
|
||||
return await pool.fetch(
|
||||
"""SELECT s.id AS source_id,
|
||||
s.company_id,
|
||||
s.source_type,
|
||||
s.source_name,
|
||||
s.config,
|
||||
s.credibility_score
|
||||
FROM sources s
|
||||
WHERE s.active = TRUE AND s.source_type = 'macro_news'
|
||||
ORDER BY s.source_name"""
|
||||
)
|
||||
|
||||
|
||||
async def fetch_aliases_for_company(pool: asyncpg.Pool, company_id: str) -> list[str]:
|
||||
"""Fetch all aliases for a company."""
|
||||
rows = await pool.fetch(
|
||||
@@ -287,9 +312,57 @@ async def schedule_cycle(pool: asyncpg.Pool, rds: aioredis.Redis) -> int:
|
||||
source_type, src["ticker"], src["source_name"],
|
||||
)
|
||||
|
||||
# --- Schedule macro news sources (Requirement 1.1) ---
|
||||
macro_sources = await fetch_macro_sources(pool)
|
||||
for src in macro_sources:
|
||||
source_id = src["source_id"]
|
||||
source_type = src["source_type"]
|
||||
source_config = _ensure_dict(src["config"])
|
||||
|
||||
last_run = await fetch_last_run(pool, source_id)
|
||||
|
||||
last_completed_at = None
|
||||
last_status = None
|
||||
retry_count = 0
|
||||
next_retry_at = None
|
||||
|
||||
if last_run:
|
||||
last_status = last_run["status"]
|
||||
last_completed_at = last_run["completed_at"] or last_run["started_at"]
|
||||
retry_count = last_run["retry_count"] or 0
|
||||
next_retry_at = last_run["next_retry_at"]
|
||||
|
||||
if not is_source_due(
|
||||
source_type=source_type,
|
||||
source_config=source_config,
|
||||
last_completed_at=last_completed_at,
|
||||
last_status=last_status,
|
||||
retry_count=retry_count,
|
||||
next_retry_at=next_retry_at,
|
||||
now=now,
|
||||
):
|
||||
skipped_not_due += 1
|
||||
continue
|
||||
|
||||
if not await check_rate_limit(rds, source_type, now):
|
||||
logger.warning(
|
||||
"Rate limit hit for macro_news, skipping %s",
|
||||
src["source_name"],
|
||||
)
|
||||
skipped_rate_limit += 1
|
||||
continue
|
||||
|
||||
job = build_job_payload(src, [], now)
|
||||
await rds.rpush(queue_key(QUEUE_INGESTION), json.dumps(job))
|
||||
enqueued += 1
|
||||
|
||||
logger.debug(
|
||||
"Enqueued macro_news job for %s", src["source_name"],
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Cycle complete: enqueued=%d skipped_not_due=%d skipped_rate_limit=%d total_sources=%d",
|
||||
enqueued, skipped_not_due, skipped_rate_limit, len(sources),
|
||||
enqueued, skipped_not_due, skipped_rate_limit, len(sources) + len(macro_sources),
|
||||
)
|
||||
return enqueued
|
||||
|
||||
|
||||
Reference in New Issue
Block a user