feat: competitive intelligence & historical pattern matching layer

2026-04-14 19:42:48 +00:00
parent b478022ba3
commit f7a11d14ea
203 changed files with 20155 additions and 97 deletions
@@ -50,6 +50,7 @@ DEFAULT_CADENCES: dict[str, int] = {
    "filings_api": 3600,
    "web_scrape": 1800,
    "broker": 30,
+    "macro_news": 600,
 }

 # Default rate limits per source type (requests per minute)
@@ -59,6 +60,7 @@ DEFAULT_RATE_LIMITS: dict[str, int] = {
    "filings_api": 10,
    "web_scrape": 10,
    "broker": 60,
+    "macro_news": 10,
 }

 # How long to wait before retrying a failed source (seconds)
@@ -141,9 +143,9 @@ def build_job_payload(
    """Build the ingestion job payload for a source."""
    return {
        "source_id": str(source["source_id"]),
-        "company_id": str(source["company_id"]),
-        "ticker": source["ticker"],
-        "legal_name": source["legal_name"],
+        "company_id": str(source["company_id"]) if source.get("company_id") else None,
+        "ticker": source.get("ticker") or "",
+        "legal_name": source.get("legal_name") or "",
        "aliases": aliases,
        "source_type": source["source_type"],
        "source_name": source["source_name"],
@@ -183,7 +185,7 @@ async def check_rate_limit(


 async def fetch_active_sources(pool: asyncpg.Pool) -> list[asyncpg.Record]:
-    """Fetch all active sources joined with their active companies."""
+    """Fetch all active company-specific sources joined with their active companies."""
    return await pool.fetch(
        """SELECT s.id AS source_id,
                  s.company_id,
@@ -196,10 +198,33 @@ async def fetch_active_sources(pool: asyncpg.Pool) -> list[asyncpg.Record]:
           FROM sources s
           JOIN companies c ON s.company_id = c.id
           WHERE s.active = TRUE AND c.active = TRUE
+             AND s.source_type != 'macro_news'
           ORDER BY s.source_type, c.ticker"""
    )


+async def fetch_macro_sources(pool: asyncpg.Pool) -> list[asyncpg.Record]:
+    """Fetch all active macro news sources.
+
+    Macro sources are not company-specific — they have source_type='macro_news'
+    and may have company_id NULL. They are scheduled independently from
+    company-specific sources.
+
+    Requirements: 1.1
+    """
+    return await pool.fetch(
+        """SELECT s.id AS source_id,
+                  s.company_id,
+                  s.source_type,
+                  s.source_name,
+                  s.config,
+                  s.credibility_score
+           FROM sources s
+           WHERE s.active = TRUE AND s.source_type = 'macro_news'
+           ORDER BY s.source_name"""
+    )
+
+
 async def fetch_aliases_for_company(pool: asyncpg.Pool, company_id: str) -> list[str]:
    """Fetch all aliases for a company."""
    rows = await pool.fetch(
@@ -287,9 +312,57 @@ async def schedule_cycle(pool: asyncpg.Pool, rds: aioredis.Redis) -> int:
            source_type, src["ticker"], src["source_name"],
        )

+    # --- Schedule macro news sources (Requirement 1.1) ---
+    macro_sources = await fetch_macro_sources(pool)
+    for src in macro_sources:
+        source_id = src["source_id"]
+        source_type = src["source_type"]
+        source_config = _ensure_dict(src["config"])
+
+        last_run = await fetch_last_run(pool, source_id)
+
+        last_completed_at = None
+        last_status = None
+        retry_count = 0
+        next_retry_at = None
+
+        if last_run:
+            last_status = last_run["status"]
+            last_completed_at = last_run["completed_at"] or last_run["started_at"]
+            retry_count = last_run["retry_count"] or 0
+            next_retry_at = last_run["next_retry_at"]
+
+        if not is_source_due(
+            source_type=source_type,
+            source_config=source_config,
+            last_completed_at=last_completed_at,
+            last_status=last_status,
+            retry_count=retry_count,
+            next_retry_at=next_retry_at,
+            now=now,
+        ):
+            skipped_not_due += 1
+            continue
+
+        if not await check_rate_limit(rds, source_type, now):
+            logger.warning(
+                "Rate limit hit for macro_news, skipping %s",
+                src["source_name"],
+            )
+            skipped_rate_limit += 1
+            continue
+
+        job = build_job_payload(src, [], now)
+        await rds.rpush(queue_key(QUEUE_INGESTION), json.dumps(job))
+        enqueued += 1
+
+        logger.debug(
+            "Enqueued macro_news job for %s", src["source_name"],
+        )
+
    logger.info(
        "Cycle complete: enqueued=%d skipped_not_due=%d skipped_rate_limit=%d total_sources=%d",
-        enqueued, skipped_not_due, skipped_rate_limit, len(sources),
+        enqueued, skipped_not_due, skipped_rate_limit, len(sources) + len(macro_sources),
    )
    return enqueued