phase 0+1: project scaffold, k8s manifests, CI pipeline, steering, hooks, tests
- Repository structure for all services, infra, lakehouse, dashboards - K8s manifests targeting stonks-oracle namespace with GHCR images - Ingress via Traefik with ca-issuer TLS for internal services - ConfigMap wired to existing cluster services (pg, redis, minio, ollama) - GitHub Actions workflow for lint, test, multi-service container builds - Dockerfile with build-arg CMD per service - Makefile for local build/push/deploy - Steering rules for TDD workflow, K8s conventions, project context - Agent hooks for lint-on-save, test-on-save, k8s-validate, phase-commit - Ruff linter config, all lint issues fixed - 14 passing tests for schemas, config, redis keys - PostgreSQL migrations, Trino catalogs, Superset config, MinIO lifecycle
This commit is contained in:
@@ -0,0 +1 @@
|
||||
# Ingestion Adapters
|
||||
@@ -0,0 +1,29 @@
|
||||
"""Base adapter interface for all external API integrations."""
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdapterResult:
|
||||
source_type: str
|
||||
ticker: str
|
||||
items: List[Dict[str, Any]]
|
||||
raw_payload: bytes
|
||||
content_hash: str
|
||||
fetched_at: datetime
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class BaseAdapter(ABC):
|
||||
"""Interface for all ingestion adapters."""
|
||||
|
||||
@abstractmethod
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch data for a given ticker using source config."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def source_type(self) -> str:
|
||||
...
|
||||
@@ -0,0 +1,108 @@
|
||||
"""Broker API adapter - paper/live trading, orders, positions, balances."""
|
||||
import hashlib
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from .base import AdapterResult, BaseAdapter
|
||||
|
||||
logger = logging.getLogger("broker_adapter")
|
||||
|
||||
|
||||
class BrokerAdapter(BaseAdapter):
|
||||
"""Broker API adapter supporting paper and live modes."""
|
||||
|
||||
def __init__(self, api_key: str = "", api_secret: str = "", base_url: str = "", mode: str = "paper"):
|
||||
self.api_key = api_key
|
||||
self.api_secret = api_secret
|
||||
self.base_url = base_url
|
||||
self.mode = mode # paper | live
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "broker"
|
||||
|
||||
def _headers(self) -> Dict[str, str]:
|
||||
return {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
"""Fetch positions and recent orders for a ticker."""
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{self.base_url}/v2/positions/{ticker}",
|
||||
headers=self._headers(),
|
||||
)
|
||||
raw = resp.content
|
||||
data = resp.json() if resp.status_code == 200 else {}
|
||||
content_hash = hashlib.sha256(raw).hexdigest()
|
||||
|
||||
return AdapterResult(
|
||||
source_type="broker",
|
||||
ticker=ticker,
|
||||
items=[data] if data else [],
|
||||
raw_payload=raw,
|
||||
content_hash=content_hash,
|
||||
fetched_at=datetime.utcnow(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Broker fetch failed for {ticker}: {e}")
|
||||
return AdapterResult(
|
||||
source_type="broker",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=b"",
|
||||
content_hash="",
|
||||
fetched_at=datetime.utcnow(),
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
async def submit_order(
|
||||
self,
|
||||
ticker: str,
|
||||
side: str,
|
||||
qty: float,
|
||||
order_type: str = "market",
|
||||
limit_price: Optional[float] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Submit an order to the broker. Returns broker response."""
|
||||
if self.mode == "live":
|
||||
logger.warning("LIVE order submission")
|
||||
|
||||
idem_key = idempotency_key or str(uuid.uuid4())
|
||||
payload = {
|
||||
"symbol": ticker,
|
||||
"qty": str(qty),
|
||||
"side": side,
|
||||
"type": order_type,
|
||||
"time_in_force": "day",
|
||||
}
|
||||
if limit_price and order_type == "limit":
|
||||
payload["limit_price"] = str(limit_price)
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{self.base_url}/v2/orders",
|
||||
headers={**self._headers(), "Idempotency-Key": idem_key},
|
||||
json=payload,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Order rejected: {e.response.text}")
|
||||
return {"error": e.response.text, "status": e.response.status_code}
|
||||
except Exception as e:
|
||||
logger.error(f"Order submission failed: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
async def get_account(self) -> Dict[str, Any]:
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
resp = await client.get(f"{self.base_url}/v2/account", headers=self._headers())
|
||||
return resp.json()
|
||||
@@ -0,0 +1,58 @@
|
||||
"""Filings / Regulatory API adapter - fetches SEC-style submissions."""
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
|
||||
import httpx
|
||||
|
||||
from .base import AdapterResult, BaseAdapter
|
||||
|
||||
logger = logging.getLogger("filings_adapter")
|
||||
|
||||
|
||||
class FilingsAdapter(BaseAdapter):
|
||||
"""Concrete adapter for SEC EDGAR or similar filings API."""
|
||||
|
||||
def __init__(self, base_url: str = "https://efts.sec.gov", user_agent: str = "StonksOracle/1.0"):
|
||||
self.base_url = base_url
|
||||
self.user_agent = user_agent
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "filings_api"
|
||||
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
_cik = config.get("cik", "")
|
||||
endpoint = config.get("endpoint", f"/LATEST/search-index?q=%22{ticker}%22&dateRange=custom&startdt=2026-01-01&forms=8-K,10-Q,10-K")
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
|
||||
headers = {"User-Agent": self.user_agent}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
try:
|
||||
resp = await client.get(url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
raw = resp.content
|
||||
data = resp.json()
|
||||
content_hash = hashlib.sha256(raw).hexdigest()
|
||||
|
||||
hits = data.get("hits", {}).get("hits", [])
|
||||
return AdapterResult(
|
||||
source_type="filings_api",
|
||||
ticker=ticker,
|
||||
items=hits,
|
||||
raw_payload=raw,
|
||||
content_hash=content_hash,
|
||||
fetched_at=datetime.utcnow(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Filings fetch failed for {ticker}: {e}")
|
||||
return AdapterResult(
|
||||
source_type="filings_api",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=b"",
|
||||
content_hash="",
|
||||
fetched_at=datetime.utcnow(),
|
||||
error=str(e),
|
||||
)
|
||||
@@ -0,0 +1,59 @@
|
||||
"""Market data API adapter - fetches quotes, bars, and reference data."""
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
|
||||
import httpx
|
||||
|
||||
from .base import AdapterResult, BaseAdapter
|
||||
|
||||
logger = logging.getLogger("market_adapter")
|
||||
|
||||
|
||||
class MarketDataAdapter(BaseAdapter):
|
||||
"""Concrete adapter for a market data provider (e.g., Alpha Vantage, Polygon, Yahoo)."""
|
||||
|
||||
def __init__(self, api_key: str = "", base_url: str = ""):
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "market_api"
|
||||
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
endpoint = config.get("endpoint", "/v2/aggs/ticker/{ticker}/prev")
|
||||
url = f"{self.base_url}{endpoint.format(ticker=ticker)}"
|
||||
params = config.get("params", {})
|
||||
if self.api_key:
|
||||
params["apiKey"] = self.api_key
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
try:
|
||||
resp = await client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
raw = resp.content
|
||||
data = resp.json()
|
||||
content_hash = hashlib.sha256(raw).hexdigest()
|
||||
|
||||
items = data.get("results", [data]) if isinstance(data, dict) else data
|
||||
|
||||
return AdapterResult(
|
||||
source_type="market_api",
|
||||
ticker=ticker,
|
||||
items=items if isinstance(items, list) else [items],
|
||||
raw_payload=raw,
|
||||
content_hash=content_hash,
|
||||
fetched_at=datetime.utcnow(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Market fetch failed for {ticker}: {e}")
|
||||
return AdapterResult(
|
||||
source_type="market_api",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=b"",
|
||||
content_hash="",
|
||||
fetched_at=datetime.utcnow(),
|
||||
error=str(e),
|
||||
)
|
||||
@@ -0,0 +1,61 @@
|
||||
"""News API adapter - fetches company-linked headlines and article metadata."""
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
|
||||
import httpx
|
||||
|
||||
from .base import AdapterResult, BaseAdapter
|
||||
|
||||
logger = logging.getLogger("news_adapter")
|
||||
|
||||
|
||||
class NewsApiAdapter(BaseAdapter):
|
||||
"""Concrete adapter for a news API provider."""
|
||||
|
||||
def __init__(self, api_key: str = "", base_url: str = ""):
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
|
||||
def source_type(self) -> str:
|
||||
return "news_api"
|
||||
|
||||
async def fetch(self, ticker: str, config: Dict[str, Any]) -> AdapterResult:
|
||||
endpoint = config.get("endpoint", "/v2/everything")
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
params = config.get("params", {})
|
||||
params.setdefault("q", ticker)
|
||||
params.setdefault("sortBy", "publishedAt")
|
||||
params.setdefault("pageSize", 20)
|
||||
if self.api_key:
|
||||
params["apiKey"] = self.api_key
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
try:
|
||||
resp = await client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
raw = resp.content
|
||||
data = resp.json()
|
||||
content_hash = hashlib.sha256(raw).hexdigest()
|
||||
|
||||
articles = data.get("articles", [])
|
||||
return AdapterResult(
|
||||
source_type="news_api",
|
||||
ticker=ticker,
|
||||
items=articles,
|
||||
raw_payload=raw,
|
||||
content_hash=content_hash,
|
||||
fetched_at=datetime.utcnow(),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"News fetch failed for {ticker}: {e}")
|
||||
return AdapterResult(
|
||||
source_type="news_api",
|
||||
ticker=ticker,
|
||||
items=[],
|
||||
raw_payload=b"",
|
||||
content_hash="",
|
||||
fetched_at=datetime.utcnow(),
|
||||
error=str(e),
|
||||
)
|
||||
@@ -0,0 +1 @@
|
||||
# Aggregation Engine - trend summaries and signal aggregation
|
||||
@@ -0,0 +1 @@
|
||||
"""Aggregation worker - rolling trend summaries, contradiction detection, evidence ranking."""
|
||||
@@ -0,0 +1 @@
|
||||
# Query API - exposes companies, documents, trends, recommendations, and audit trails
|
||||
@@ -0,0 +1 @@
|
||||
"""Query API - FastAPI application for analytics, evidence drill-down, and admin controls."""
|
||||
@@ -0,0 +1 @@
|
||||
# Ollama Extraction Service
|
||||
@@ -0,0 +1 @@
|
||||
"""Extraction worker - sends documents to Ollama for structured intelligence extraction."""
|
||||
@@ -0,0 +1 @@
|
||||
# Ingestion Pipeline
|
||||
@@ -0,0 +1,182 @@
|
||||
"""Ingestion worker - processes jobs from the ingestion queue."""
|
||||
import asyncio
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
import asyncpg
|
||||
import redis.asyncio as aioredis
|
||||
from minio import Minio
|
||||
|
||||
from services.adapters.base import AdapterResult
|
||||
from services.adapters.filings_adapter import FilingsAdapter
|
||||
from services.adapters.market_adapter import MarketDataAdapter
|
||||
from services.adapters.news_adapter import NewsApiAdapter
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_minio, get_pg_pool, get_redis
|
||||
from services.shared.redis_keys import (
|
||||
QUEUE_INGESTION,
|
||||
QUEUE_PARSING,
|
||||
dedupe_key,
|
||||
queue_key,
|
||||
)
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("ingestion_worker")
|
||||
|
||||
BUCKET_MAP = {
|
||||
"market_api": "stonks-raw-market",
|
||||
"news_api": "stonks-raw-news",
|
||||
"filings_api": "stonks-raw-filings",
|
||||
"broker": "stonks-raw-market",
|
||||
}
|
||||
|
||||
|
||||
def build_storage_path(source_type: str, ticker: str, doc_id: str) -> str:
|
||||
now = datetime.utcnow()
|
||||
return f"{source_type}/{ticker}/{now.year}/{now.month:02d}/{now.day:02d}/{doc_id}/raw.json"
|
||||
|
||||
|
||||
async def store_raw_artifact(minio_client: Minio, bucket: str, path: str, data: bytes):
|
||||
minio_client.put_object(bucket, path, io.BytesIO(data), len(data), content_type="application/json")
|
||||
|
||||
|
||||
async def process_job(
|
||||
job: dict,
|
||||
pool: asyncpg.Pool,
|
||||
rds: aioredis.Redis,
|
||||
minio_client: Minio,
|
||||
adapters: dict,
|
||||
):
|
||||
source_type = job["source_type"]
|
||||
ticker = job["ticker"]
|
||||
source_id = job["source_id"]
|
||||
config = job.get("config", {})
|
||||
|
||||
adapter = adapters.get(source_type)
|
||||
if not adapter:
|
||||
logger.warning(f"No adapter for source_type={source_type}")
|
||||
return
|
||||
|
||||
# Record ingestion run
|
||||
run_id = await pool.fetchval(
|
||||
"""INSERT INTO ingestion_runs (source_id, company_id, source_type, status)
|
||||
VALUES ($1, $2, $3, 'running') RETURNING id""",
|
||||
source_id, job["company_id"], source_type,
|
||||
)
|
||||
|
||||
try:
|
||||
result: AdapterResult = await adapter.fetch(ticker, config)
|
||||
|
||||
if result.error:
|
||||
await pool.execute(
|
||||
"UPDATE ingestion_runs SET status='failed', error_message=$2, completed_at=NOW() WHERE id=$1",
|
||||
run_id, result.error,
|
||||
)
|
||||
return
|
||||
|
||||
# Store raw payload
|
||||
bucket = BUCKET_MAP.get(source_type, "stonks-raw-market")
|
||||
storage_path = build_storage_path(source_type, ticker, str(run_id))
|
||||
await store_raw_artifact(minio_client, bucket, storage_path, result.raw_payload)
|
||||
|
||||
# Dedupe check
|
||||
if result.content_hash:
|
||||
already_seen = await rds.get(dedupe_key(result.content_hash))
|
||||
if already_seen:
|
||||
logger.info(f"Duplicate content for {ticker}, skipping")
|
||||
await pool.execute(
|
||||
"UPDATE ingestion_runs SET status='completed', items_fetched=$2, items_new=0, completed_at=NOW() WHERE id=$1",
|
||||
run_id, len(result.items),
|
||||
)
|
||||
return
|
||||
await rds.set(dedupe_key(result.content_hash), "1", ex=86400)
|
||||
|
||||
new_items = 0
|
||||
for item in result.items:
|
||||
item_json = json.dumps(item)
|
||||
item_hash = hashlib.sha256(item_json.encode()).hexdigest()
|
||||
|
||||
# Check if document already exists
|
||||
exists = await pool.fetchval("SELECT 1 FROM documents WHERE content_hash = $1", item_hash)
|
||||
if exists:
|
||||
continue
|
||||
|
||||
title = item.get("title", item.get("name", ""))
|
||||
url = item.get("url", item.get("link", ""))
|
||||
published = item.get("publishedAt", item.get("published_at"))
|
||||
|
||||
doc_id = await pool.fetchval(
|
||||
"""INSERT INTO documents (document_type, source_type, publisher, url, title, published_at, content_hash, raw_storage_ref, status)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'ingested')
|
||||
RETURNING id""",
|
||||
"article" if source_type == "news_api" else "filing" if source_type == "filings_api" else "article",
|
||||
source_type,
|
||||
item.get("source", {}).get("name", "") if isinstance(item.get("source"), dict) else str(item.get("source", "")),
|
||||
url, title,
|
||||
datetime.fromisoformat(published.replace("Z", "+00:00")) if published else None,
|
||||
item_hash,
|
||||
f"s3://{bucket}/{storage_path}",
|
||||
)
|
||||
|
||||
# Enqueue for parsing
|
||||
await rds.rpush(queue_key(QUEUE_PARSING), json.dumps({
|
||||
"document_id": str(doc_id),
|
||||
"ticker": ticker,
|
||||
"source_type": source_type,
|
||||
"url": url,
|
||||
}))
|
||||
new_items += 1
|
||||
|
||||
await pool.execute(
|
||||
"UPDATE ingestion_runs SET status='completed', items_fetched=$2, items_new=$3, completed_at=NOW() WHERE id=$1",
|
||||
run_id, len(result.items), new_items,
|
||||
)
|
||||
logger.info(f"Ingested {ticker}/{source_type}: {len(result.items)} fetched, {new_items} new")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Ingestion error for {ticker}: {e}")
|
||||
await pool.execute(
|
||||
"UPDATE ingestion_runs SET status='failed', error_message=$2, completed_at=NOW() WHERE id=$1",
|
||||
run_id, str(e),
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
config = load_config()
|
||||
pool = await get_pg_pool(config)
|
||||
rds = get_redis(config)
|
||||
minio_client = get_minio(config)
|
||||
|
||||
adapters = {
|
||||
"market_api": MarketDataAdapter(
|
||||
api_key=config.broker.api_key or "",
|
||||
base_url="https://api.polygon.io",
|
||||
),
|
||||
"news_api": NewsApiAdapter(
|
||||
api_key="",
|
||||
base_url="https://newsapi.org",
|
||||
),
|
||||
"filings_api": FilingsAdapter(),
|
||||
}
|
||||
|
||||
logger.info("Ingestion worker started")
|
||||
queue = queue_key(QUEUE_INGESTION)
|
||||
|
||||
try:
|
||||
while True:
|
||||
raw = await rds.lpop(queue)
|
||||
if raw:
|
||||
job = json.loads(raw)
|
||||
await process_job(job, pool, rds, minio_client, adapters)
|
||||
else:
|
||||
await asyncio.sleep(2)
|
||||
finally:
|
||||
await pool.close()
|
||||
await rds.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1 @@
|
||||
# Lake Publisher - transforms operational data into analytical fact datasets
|
||||
@@ -0,0 +1 @@
|
||||
"""Lake publisher worker - writes partitioned Parquet facts to MinIO for Trino/Superset."""
|
||||
@@ -0,0 +1 @@
|
||||
# Scraper / Parser Service
|
||||
@@ -0,0 +1,209 @@
|
||||
"""Parser worker - HTML-to-text, boilerplate reduction, quality scoring."""
|
||||
import asyncio
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import asyncpg
|
||||
import httpx
|
||||
import redis.asyncio as aioredis
|
||||
from minio import Minio
|
||||
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_minio, get_pg_pool, get_redis
|
||||
from services.shared.redis_keys import QUEUE_EXTRACTION, QUEUE_PARSING, queue_key
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("parser_worker")
|
||||
|
||||
# Simple boilerplate patterns to strip
|
||||
BOILERPLATE_PATTERNS = [
|
||||
re.compile(r"(?i)subscribe to our newsletter.*?(?:\n|$)"),
|
||||
re.compile(r"(?i)click here to read more.*?(?:\n|$)"),
|
||||
re.compile(r"(?i)advertisement\s*\n"),
|
||||
re.compile(r"(?i)copyright ©.*?(?:\n|$)"),
|
||||
re.compile(r"(?i)all rights reserved.*?(?:\n|$)"),
|
||||
re.compile(r"(?i)terms of (use|service).*?(?:\n|$)"),
|
||||
re.compile(r"(?i)privacy policy.*?(?:\n|$)"),
|
||||
re.compile(r"\s*\[.*?ad.*?\]\s*", re.IGNORECASE),
|
||||
]
|
||||
|
||||
|
||||
def strip_html_tags(html: str) -> str:
|
||||
"""Basic HTML tag removal."""
|
||||
text = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
||||
text = re.sub(r"<style[^>]*>.*?</style>", "", text, flags=re.DOTALL | re.IGNORECASE)
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
text = re.sub(r" ", " ", text)
|
||||
text = re.sub(r"&", "&", text)
|
||||
text = re.sub(r"<", "<", text)
|
||||
text = re.sub(r">", ">", text)
|
||||
text = re.sub(r"&#\d+;", "", text)
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
return text
|
||||
|
||||
|
||||
def reduce_boilerplate(text: str) -> str:
|
||||
for pattern in BOILERPLATE_PATTERNS:
|
||||
text = pattern.sub("", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def score_quality(text: str) -> Tuple[float, str]:
|
||||
"""Score parse quality. Returns (score, confidence_label)."""
|
||||
word_count = len(text.split())
|
||||
if word_count < 20:
|
||||
return 0.1, "low"
|
||||
if word_count < 50:
|
||||
return 0.3, "low"
|
||||
if word_count < 150:
|
||||
return 0.6, "medium"
|
||||
return 0.85, "high"
|
||||
|
||||
|
||||
def detect_company_mentions(text: str, aliases: List[dict]) -> List[dict]:
|
||||
"""Detect company mentions using ticker, alias, and name matching."""
|
||||
mentions = []
|
||||
text_upper = text.upper()
|
||||
for alias_info in aliases:
|
||||
alias = alias_info["alias"]
|
||||
if alias.upper() in text_upper:
|
||||
mentions.append({
|
||||
"company_id": alias_info["company_id"],
|
||||
"ticker": alias_info.get("ticker", ""),
|
||||
"mention_type": alias_info.get("alias_type", "alias"),
|
||||
"confidence": 0.7,
|
||||
})
|
||||
return mentions
|
||||
|
||||
|
||||
async def fetch_html(url: str) -> Optional[str]:
|
||||
"""Fetch article HTML for scraping."""
|
||||
if not url:
|
||||
return None
|
||||
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
|
||||
try:
|
||||
resp = await client.get(url, headers={"User-Agent": "StonksOracle/1.0"})
|
||||
resp.raise_for_status()
|
||||
return resp.text
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch {url}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def process_job(
|
||||
job: dict,
|
||||
pool: asyncpg.Pool,
|
||||
rds: aioredis.Redis,
|
||||
minio_client: Minio,
|
||||
):
|
||||
doc_id = job["document_id"]
|
||||
ticker = job["ticker"]
|
||||
url = job.get("url", "")
|
||||
|
||||
# Fetch HTML if we have a URL
|
||||
html = await fetch_html(url) if url else None
|
||||
|
||||
if html:
|
||||
# Store raw HTML
|
||||
html_bytes = html.encode("utf-8")
|
||||
now = datetime.utcnow()
|
||||
html_path = f"scrape/{ticker}/{now.year}/{now.month:02d}/{now.day:02d}/{doc_id}/raw.html"
|
||||
minio_client.put_object(
|
||||
"stonks-raw-news", html_path, io.BytesIO(html_bytes), len(html_bytes),
|
||||
content_type="text/html",
|
||||
)
|
||||
|
||||
# Parse
|
||||
text = strip_html_tags(html)
|
||||
text = reduce_boilerplate(text)
|
||||
else:
|
||||
text = ""
|
||||
|
||||
quality_score, confidence = score_quality(text)
|
||||
|
||||
# Store normalized text
|
||||
if text:
|
||||
text_bytes = text.encode("utf-8")
|
||||
now = datetime.utcnow()
|
||||
norm_path = f"parsed/{ticker}/{now.year}/{now.month:02d}/{now.day:02d}/{doc_id}/normalized.txt"
|
||||
minio_client.put_object(
|
||||
"stonks-normalized", norm_path, io.BytesIO(text_bytes), len(text_bytes),
|
||||
content_type="text/plain",
|
||||
)
|
||||
else:
|
||||
norm_path = None
|
||||
|
||||
# Detect company mentions
|
||||
aliases = await pool.fetch(
|
||||
"""SELECT ca.company_id::text, ca.alias, ca.alias_type, c.ticker
|
||||
FROM company_aliases ca JOIN companies c ON ca.company_id = c.id
|
||||
UNION ALL
|
||||
SELECT c.id::text as company_id, c.ticker as alias, 'ticker' as alias_type, c.ticker
|
||||
FROM companies c
|
||||
UNION ALL
|
||||
SELECT c.id::text as company_id, c.legal_name as alias, 'legal_name' as alias_type, c.ticker
|
||||
FROM companies c"""
|
||||
)
|
||||
mentions = detect_company_mentions(text, [dict(a) for a in aliases]) if text else []
|
||||
|
||||
# Update document
|
||||
status = "parsed" if confidence != "low" else "low_quality"
|
||||
await pool.execute(
|
||||
"""UPDATE documents SET
|
||||
normalized_storage_ref=$2, parse_quality_score=$3, parse_confidence=$4, status=$5, updated_at=NOW()
|
||||
WHERE id=$1""",
|
||||
doc_id, f"s3://stonks-normalized/{norm_path}" if norm_path else None,
|
||||
quality_score, confidence, status,
|
||||
)
|
||||
|
||||
# Insert company mentions
|
||||
for m in mentions:
|
||||
await pool.execute(
|
||||
"""INSERT INTO document_company_mentions (document_id, company_id, ticker, mention_type, confidence)
|
||||
VALUES ($1, $2, $3, $4, $5) ON CONFLICT DO NOTHING""",
|
||||
doc_id, m["company_id"], m["ticker"], m["mention_type"], m["confidence"],
|
||||
)
|
||||
|
||||
# Only enqueue for extraction if quality is acceptable
|
||||
if confidence != "low":
|
||||
await rds.rpush(queue_key(QUEUE_EXTRACTION), json.dumps({
|
||||
"document_id": doc_id,
|
||||
"ticker": ticker,
|
||||
"normalized_text": text[:8000], # Truncate for prompt
|
||||
}))
|
||||
logger.info(f"Parsed doc {doc_id} for {ticker}: quality={quality_score:.2f}, confidence={confidence}")
|
||||
else:
|
||||
logger.warning(f"Low quality parse for doc {doc_id}, skipping extraction")
|
||||
|
||||
|
||||
async def main():
|
||||
config = load_config()
|
||||
pool = await get_pg_pool(config)
|
||||
rds = get_redis(config)
|
||||
minio_client = get_minio(config)
|
||||
|
||||
logger.info("Parser worker started")
|
||||
queue = queue_key(QUEUE_PARSING)
|
||||
|
||||
try:
|
||||
while True:
|
||||
raw = await rds.lpop(queue)
|
||||
if raw:
|
||||
job = json.loads(raw)
|
||||
try:
|
||||
await process_job(job, pool, rds, minio_client)
|
||||
except Exception as e:
|
||||
logger.error(f"Parse error: {e}")
|
||||
else:
|
||||
await asyncio.sleep(2)
|
||||
finally:
|
||||
await pool.close()
|
||||
await rds.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1 @@
|
||||
# Recommendation Engine - trade recommendations from aggregated signals
|
||||
@@ -0,0 +1 @@
|
||||
"""Recommendation worker - generates explainable trade recommendations from trend data."""
|
||||
@@ -0,0 +1 @@
|
||||
# Risk Engine - portfolio risk controls and trade eligibility
|
||||
@@ -0,0 +1 @@
|
||||
"""Risk engine - enforces guardrails, position limits, and trade eligibility checks."""
|
||||
@@ -0,0 +1 @@
|
||||
# Scheduler / Orchestrator Service
|
||||
@@ -0,0 +1,112 @@
|
||||
"""Scheduler - triggers ingestion cycles for tracked symbols and sources."""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import asyncpg
|
||||
import redis.asyncio as aioredis
|
||||
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool, get_redis
|
||||
from services.shared.redis_keys import (
|
||||
QUEUE_INGESTION,
|
||||
lock_key,
|
||||
queue_key,
|
||||
rate_limit_key,
|
||||
)
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("scheduler")
|
||||
|
||||
# Polling cadences by source class (seconds)
|
||||
CADENCES = {
|
||||
"market_api": 60,
|
||||
"news_api": 300,
|
||||
"filings_api": 3600,
|
||||
"web_scrape": 1800,
|
||||
"broker": 30,
|
||||
}
|
||||
|
||||
|
||||
async def acquire_lock(rds: aioredis.Redis, name: str, ttl: int = 60) -> bool:
|
||||
return await rds.set(lock_key(name), "1", nx=True, ex=ttl)
|
||||
|
||||
|
||||
async def release_lock(rds: aioredis.Redis, name: str):
|
||||
await rds.delete(lock_key(name))
|
||||
|
||||
|
||||
async def check_rate_limit(rds: aioredis.Redis, source_type: str, max_per_minute: int = 30) -> bool:
|
||||
key = rate_limit_key(source_type, datetime.utcnow().strftime("%Y%m%d%H%M"))
|
||||
count = await rds.incr(key)
|
||||
if count == 1:
|
||||
await rds.expire(key, 120)
|
||||
return count <= max_per_minute
|
||||
|
||||
|
||||
async def schedule_cycle(pool: asyncpg.Pool, rds: aioredis.Redis):
|
||||
"""One scheduling pass: find due sources and enqueue ingestion jobs."""
|
||||
sources = await pool.fetch(
|
||||
"""SELECT s.id as source_id, s.company_id, s.source_type, s.source_name, s.config,
|
||||
c.ticker, c.legal_name
|
||||
FROM sources s JOIN companies c ON s.company_id = c.id
|
||||
WHERE s.active = TRUE AND c.active = TRUE
|
||||
ORDER BY s.source_type, c.ticker"""
|
||||
)
|
||||
|
||||
enqueued = 0
|
||||
for src in sources:
|
||||
source_type = src["source_type"]
|
||||
cadence = CADENCES.get(source_type, 600)
|
||||
|
||||
# Check last run
|
||||
last_run = await pool.fetchval(
|
||||
"SELECT MAX(started_at) FROM ingestion_runs WHERE source_id = $1 AND status IN ('completed', 'running')",
|
||||
src["source_id"],
|
||||
)
|
||||
if last_run and (datetime.utcnow() - last_run.replace(tzinfo=None)) < timedelta(seconds=cadence):
|
||||
continue
|
||||
|
||||
if not await check_rate_limit(rds, source_type):
|
||||
logger.warning(f"Rate limit hit for {source_type}")
|
||||
continue
|
||||
|
||||
job = {
|
||||
"source_id": str(src["source_id"]),
|
||||
"company_id": str(src["company_id"]),
|
||||
"ticker": src["ticker"],
|
||||
"source_type": source_type,
|
||||
"source_name": src["source_name"],
|
||||
"config": dict(src["config"]) if src["config"] else {},
|
||||
"scheduled_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
await rds.rpush(queue_key(QUEUE_INGESTION), json.dumps(job))
|
||||
enqueued += 1
|
||||
|
||||
if enqueued:
|
||||
logger.info(f"Enqueued {enqueued} ingestion jobs")
|
||||
|
||||
|
||||
async def main():
|
||||
config = load_config()
|
||||
pool = await get_pg_pool(config)
|
||||
rds = get_redis(config)
|
||||
|
||||
logger.info("Scheduler started")
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
if await acquire_lock(rds, "scheduler_cycle", ttl=30):
|
||||
await schedule_cycle(pool, rds)
|
||||
await release_lock(rds, "scheduler_cycle")
|
||||
except Exception as e:
|
||||
logger.error(f"Scheduler cycle error: {e}")
|
||||
await asyncio.sleep(15)
|
||||
finally:
|
||||
await pool.close()
|
||||
await rds.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1 @@
|
||||
# Stonks Oracle - Shared modules
|
||||
@@ -0,0 +1,115 @@
|
||||
"""Shared configuration loader for all services."""
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class PostgresConfig:
|
||||
host: str = "localhost"
|
||||
port: int = 5432
|
||||
database: str = "stonks"
|
||||
user: str = "stonks"
|
||||
password: str = "stonks_dev"
|
||||
|
||||
@property
|
||||
def dsn(self) -> str:
|
||||
return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RedisConfig:
|
||||
host: str = "localhost"
|
||||
port: int = 6379
|
||||
db: int = 0
|
||||
password: Optional[str] = None
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
auth = f":{self.password}@" if self.password else ""
|
||||
return f"redis://{auth}{self.host}:{self.port}/{self.db}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MinioConfig:
|
||||
endpoint: str = "localhost:9000"
|
||||
access_key: str = "minioadmin"
|
||||
secret_key: str = "minioadmin"
|
||||
secure: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class OllamaConfig:
|
||||
base_url: str = "http://localhost:11434"
|
||||
model: str = "llama3.1:8b"
|
||||
timeout: int = 120
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrinoConfig:
|
||||
host: str = "localhost"
|
||||
port: int = 8080
|
||||
catalog: str = "lakehouse"
|
||||
schema: str = "stonks"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrokerConfig:
|
||||
mode: str = "paper" # paper | live
|
||||
api_key: Optional[str] = None
|
||||
api_secret: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig:
|
||||
postgres: PostgresConfig = field(default_factory=PostgresConfig)
|
||||
redis: RedisConfig = field(default_factory=RedisConfig)
|
||||
minio: MinioConfig = field(default_factory=MinioConfig)
|
||||
ollama: OllamaConfig = field(default_factory=OllamaConfig)
|
||||
trino: TrinoConfig = field(default_factory=TrinoConfig)
|
||||
broker: BrokerConfig = field(default_factory=BrokerConfig)
|
||||
log_level: str = "INFO"
|
||||
|
||||
|
||||
def load_config() -> AppConfig:
|
||||
"""Load configuration from environment variables with sensible defaults."""
|
||||
return AppConfig(
|
||||
postgres=PostgresConfig(
|
||||
host=os.getenv("POSTGRES_HOST", "localhost"),
|
||||
port=int(os.getenv("POSTGRES_PORT", "5432")),
|
||||
database=os.getenv("POSTGRES_DB", "stonks"),
|
||||
user=os.getenv("POSTGRES_USER", "stonks"),
|
||||
password=os.getenv("POSTGRES_PASSWORD", "stonks_dev"),
|
||||
),
|
||||
redis=RedisConfig(
|
||||
host=os.getenv("REDIS_HOST", "localhost"),
|
||||
port=int(os.getenv("REDIS_PORT", "6379")),
|
||||
db=int(os.getenv("REDIS_DB", "0")),
|
||||
password=os.getenv("REDIS_PASSWORD", None),
|
||||
),
|
||||
minio=MinioConfig(
|
||||
endpoint=os.getenv("MINIO_ENDPOINT", "localhost:9000"),
|
||||
access_key=os.getenv("MINIO_ACCESS_KEY", "minioadmin"),
|
||||
secret_key=os.getenv("MINIO_SECRET_KEY", "minioadmin"),
|
||||
secure=os.getenv("MINIO_SECURE", "false").lower() == "true",
|
||||
),
|
||||
ollama=OllamaConfig(
|
||||
base_url=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"),
|
||||
model=os.getenv("OLLAMA_MODEL", "llama3.1:8b"),
|
||||
timeout=int(os.getenv("OLLAMA_TIMEOUT", "120")),
|
||||
),
|
||||
trino=TrinoConfig(
|
||||
host=os.getenv("TRINO_HOST", "localhost"),
|
||||
port=int(os.getenv("TRINO_PORT", "8080")),
|
||||
catalog=os.getenv("TRINO_CATALOG", "lakehouse"),
|
||||
schema=os.getenv("TRINO_SCHEMA", "stonks"),
|
||||
),
|
||||
broker=BrokerConfig(
|
||||
mode=os.getenv("BROKER_MODE", "paper"),
|
||||
api_key=os.getenv("BROKER_API_KEY", None),
|
||||
api_secret=os.getenv("BROKER_API_SECRET", None),
|
||||
base_url=os.getenv("BROKER_BASE_URL", None),
|
||||
),
|
||||
log_level=os.getenv("LOG_LEVEL", "INFO"),
|
||||
)
|
||||
@@ -0,0 +1,33 @@
|
||||
"""Database connection helpers."""
|
||||
import asyncpg
|
||||
import redis.asyncio as aioredis
|
||||
from minio import Minio
|
||||
|
||||
from .config import AppConfig
|
||||
|
||||
|
||||
async def get_pg_pool(config: AppConfig) -> asyncpg.Pool:
|
||||
"""Create a PostgreSQL connection pool."""
|
||||
return await asyncpg.create_pool(
|
||||
dsn=config.postgres.dsn,
|
||||
min_size=2,
|
||||
max_size=10,
|
||||
)
|
||||
|
||||
|
||||
def get_redis(config: AppConfig) -> aioredis.Redis:
|
||||
"""Create a Redis async client."""
|
||||
return aioredis.from_url(
|
||||
config.redis.url,
|
||||
decode_responses=True,
|
||||
)
|
||||
|
||||
|
||||
def get_minio(config: AppConfig) -> Minio:
|
||||
"""Create a MinIO client."""
|
||||
return Minio(
|
||||
config.minio.endpoint,
|
||||
access_key=config.minio.access_key,
|
||||
secret_key=config.minio.secret_key,
|
||||
secure=config.minio.secure,
|
||||
)
|
||||
@@ -0,0 +1,56 @@
|
||||
"""Redis key conventions and queue abstractions."""
|
||||
|
||||
# --- Key prefixes ---
|
||||
PREFIX = "stonks"
|
||||
|
||||
# Distributed locks
|
||||
LOCK_PREFIX = f"{PREFIX}:lock"
|
||||
|
||||
# Rate limit counters
|
||||
RATE_LIMIT_PREFIX = f"{PREFIX}:ratelimit"
|
||||
|
||||
# Job queues
|
||||
QUEUE_PREFIX = f"{PREFIX}:queue"
|
||||
|
||||
# Dedupe markers
|
||||
DEDUPE_PREFIX = f"{PREFIX}:dedupe"
|
||||
|
||||
# Cache
|
||||
CACHE_PREFIX = f"{PREFIX}:cache"
|
||||
|
||||
# Retry backoff state
|
||||
RETRY_PREFIX = f"{PREFIX}:retry"
|
||||
|
||||
|
||||
def lock_key(resource: str) -> str:
|
||||
return f"{LOCK_PREFIX}:{resource}"
|
||||
|
||||
|
||||
def rate_limit_key(source: str, window: str) -> str:
|
||||
return f"{RATE_LIMIT_PREFIX}:{source}:{window}"
|
||||
|
||||
|
||||
def queue_key(queue_name: str) -> str:
|
||||
return f"{QUEUE_PREFIX}:{queue_name}"
|
||||
|
||||
|
||||
def dedupe_key(content_hash: str) -> str:
|
||||
return f"{DEDUPE_PREFIX}:{content_hash}"
|
||||
|
||||
|
||||
def cache_key(namespace: str, key: str) -> str:
|
||||
return f"{CACHE_PREFIX}:{namespace}:{key}"
|
||||
|
||||
|
||||
def retry_key(job_id: str) -> str:
|
||||
return f"{RETRY_PREFIX}:{job_id}"
|
||||
|
||||
|
||||
# --- Queue names ---
|
||||
QUEUE_INGESTION = "ingestion"
|
||||
QUEUE_PARSING = "parsing"
|
||||
QUEUE_EXTRACTION = "extraction"
|
||||
QUEUE_AGGREGATION = "aggregation"
|
||||
QUEUE_RECOMMENDATION = "recommendation"
|
||||
QUEUE_LAKE_PUBLISH = "lake_publish"
|
||||
QUEUE_TRADE = "trade"
|
||||
@@ -0,0 +1,169 @@
|
||||
"""Typed JSON schemas for document intelligence, trend summaries, and recommendations."""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
# --- Enums ---
|
||||
|
||||
class DocumentType(str, Enum):
|
||||
ARTICLE = "article"
|
||||
FILING = "filing"
|
||||
TRANSCRIPT = "transcript"
|
||||
PRESS_RELEASE = "press_release"
|
||||
|
||||
|
||||
class SourceType(str, Enum):
|
||||
MARKET_API = "market_api"
|
||||
NEWS_API = "news_api"
|
||||
FILINGS_API = "filings_api"
|
||||
WEB_SCRAPE = "web_scrape"
|
||||
BROKER = "broker"
|
||||
|
||||
|
||||
class Sentiment(str, Enum):
|
||||
POSITIVE = "positive"
|
||||
NEGATIVE = "negative"
|
||||
NEUTRAL = "neutral"
|
||||
MIXED = "mixed"
|
||||
|
||||
|
||||
class CatalystType(str, Enum):
|
||||
EARNINGS = "earnings"
|
||||
PRODUCT = "product"
|
||||
LEGAL = "legal"
|
||||
MACRO = "macro"
|
||||
SUPPLY_CHAIN = "supply_chain"
|
||||
M_AND_A = "m_and_a"
|
||||
RATING_CHANGE = "rating_change"
|
||||
OTHER = "other"
|
||||
|
||||
|
||||
class TrendDirection(str, Enum):
|
||||
BULLISH = "bullish"
|
||||
BEARISH = "bearish"
|
||||
MIXED = "mixed"
|
||||
NEUTRAL = "neutral"
|
||||
|
||||
|
||||
class ActionType(str, Enum):
|
||||
BUY = "buy"
|
||||
SELL = "sell"
|
||||
HOLD = "hold"
|
||||
WATCH = "watch"
|
||||
|
||||
|
||||
class RecommendationMode(str, Enum):
|
||||
INFORMATIONAL = "informational"
|
||||
PAPER_ELIGIBLE = "paper_eligible"
|
||||
LIVE_ELIGIBLE = "live_eligible"
|
||||
|
||||
|
||||
class TrendWindow(str, Enum):
|
||||
INTRADAY = "intraday"
|
||||
ONE_DAY = "1d"
|
||||
SEVEN_DAY = "7d"
|
||||
THIRTY_DAY = "30d"
|
||||
NINETY_DAY = "90d"
|
||||
|
||||
|
||||
# --- Document Intelligence ---
|
||||
|
||||
class CompanyImpact(BaseModel):
|
||||
ticker: str
|
||||
company_name: str
|
||||
relevance: float = Field(ge=0, le=1)
|
||||
sentiment: Sentiment
|
||||
impact_score: float = Field(ge=0, le=1)
|
||||
impact_horizon: str
|
||||
catalyst_type: CatalystType
|
||||
key_facts: List[str] = Field(default_factory=list)
|
||||
risks: List[str] = Field(default_factory=list)
|
||||
evidence_spans: List[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ModelMetadata(BaseModel):
|
||||
provider: str = "ollama"
|
||||
model_name: str = ""
|
||||
prompt_version: str = ""
|
||||
schema_version: str = "2.0.0"
|
||||
|
||||
|
||||
class DocumentIntelligence(BaseModel):
|
||||
document_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
document_type: DocumentType = DocumentType.ARTICLE
|
||||
summary: str = ""
|
||||
companies: List[CompanyImpact] = Field(default_factory=list)
|
||||
macro_themes: List[str] = Field(default_factory=list)
|
||||
novelty_score: float = Field(ge=0, le=1, default=0.5)
|
||||
source_credibility: float = Field(ge=0, le=1, default=0.5)
|
||||
extraction_warnings: List[str] = Field(default_factory=list)
|
||||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||||
model: ModelMetadata = Field(default_factory=ModelMetadata)
|
||||
|
||||
|
||||
# --- Trend Summary ---
|
||||
|
||||
class TrendSummary(BaseModel):
|
||||
entity_type: str = "company"
|
||||
entity_id: str = ""
|
||||
window: TrendWindow = TrendWindow.SEVEN_DAY
|
||||
trend_direction: TrendDirection = TrendDirection.NEUTRAL
|
||||
trend_strength: float = Field(ge=0, le=1, default=0.5)
|
||||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||||
top_supporting_evidence: List[str] = Field(default_factory=list)
|
||||
top_opposing_evidence: List[str] = Field(default_factory=list)
|
||||
dominant_catalysts: List[str] = Field(default_factory=list)
|
||||
material_risks: List[str] = Field(default_factory=list)
|
||||
contradiction_score: float = Field(ge=0, le=1, default=0.0)
|
||||
generated_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
# --- Recommendation ---
|
||||
|
||||
class PositionSizing(BaseModel):
|
||||
portfolio_pct: float = Field(ge=0, le=1, default=0.02)
|
||||
max_loss_pct: float = Field(ge=0, le=1, default=0.005)
|
||||
|
||||
|
||||
class Recommendation(BaseModel):
|
||||
recommendation_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
ticker: str = ""
|
||||
action: ActionType = ActionType.WATCH
|
||||
mode: RecommendationMode = RecommendationMode.INFORMATIONAL
|
||||
confidence: float = Field(ge=0, le=1, default=0.5)
|
||||
time_horizon: str = ""
|
||||
thesis: str = ""
|
||||
invalidation_conditions: List[str] = Field(default_factory=list)
|
||||
position_sizing: PositionSizing = Field(default_factory=PositionSizing)
|
||||
evidence_refs: List[str] = Field(default_factory=list)
|
||||
model_metadata: ModelMetadata = Field(default_factory=ModelMetadata)
|
||||
generated_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
# --- Document Metadata ---
|
||||
|
||||
class StorageRefs(BaseModel):
|
||||
raw_html: Optional[str] = None
|
||||
raw_payload: Optional[str] = None
|
||||
normalized_text: Optional[str] = None
|
||||
|
||||
|
||||
class DocumentMetadata(BaseModel):
|
||||
document_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
document_type: DocumentType = DocumentType.ARTICLE
|
||||
symbol_candidates: List[str] = Field(default_factory=list)
|
||||
source_type: SourceType = SourceType.NEWS_API
|
||||
publisher: str = ""
|
||||
url: Optional[str] = None
|
||||
canonical_url: Optional[str] = None
|
||||
title: str = ""
|
||||
published_at: Optional[datetime] = None
|
||||
retrieved_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
language: str = "en"
|
||||
content_hash: str = ""
|
||||
storage_refs: StorageRefs = Field(default_factory=StorageRefs)
|
||||
@@ -0,0 +1 @@
|
||||
# Symbol Registry Service
|
||||
@@ -0,0 +1,209 @@
|
||||
"""Symbol Registry API - FastAPI application."""
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import List, Optional
|
||||
|
||||
import asyncpg
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.shared.config import load_config
|
||||
from services.shared.db import get_pg_pool
|
||||
|
||||
config = load_config()
|
||||
pool: Optional[asyncpg.Pool] = None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
global pool
|
||||
pool = await get_pg_pool(config)
|
||||
yield
|
||||
await pool.close()
|
||||
|
||||
|
||||
app = FastAPI(title="Stonks Oracle - Symbol Registry", lifespan=lifespan)
|
||||
|
||||
|
||||
# --- Request/Response Models ---
|
||||
|
||||
class CompanyCreate(BaseModel):
|
||||
ticker: str
|
||||
legal_name: str
|
||||
exchange: Optional[str] = None
|
||||
sector: Optional[str] = None
|
||||
industry: Optional[str] = None
|
||||
market_cap_bucket: Optional[str] = None
|
||||
|
||||
|
||||
class CompanyResponse(BaseModel):
|
||||
id: str
|
||||
ticker: str
|
||||
legal_name: str
|
||||
exchange: Optional[str]
|
||||
sector: Optional[str]
|
||||
industry: Optional[str]
|
||||
market_cap_bucket: Optional[str]
|
||||
active: bool
|
||||
|
||||
|
||||
class AliasCreate(BaseModel):
|
||||
alias: str
|
||||
alias_type: str = "brand"
|
||||
|
||||
|
||||
class WatchlistCreate(BaseModel):
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class SourceCreate(BaseModel):
|
||||
source_type: str
|
||||
source_name: str
|
||||
config: dict = {}
|
||||
credibility_score: float = 0.5
|
||||
retention_days: int = 365
|
||||
access_policy: str = "internal"
|
||||
|
||||
|
||||
VALID_SOURCE_TYPES = {"market_api", "news_api", "filings_api", "web_scrape", "broker"}
|
||||
|
||||
|
||||
# --- Company Endpoints ---
|
||||
|
||||
@app.post("/companies", response_model=CompanyResponse, status_code=201)
|
||||
async def create_company(body: CompanyCreate):
|
||||
try:
|
||||
row = await pool.fetchrow(
|
||||
"""INSERT INTO companies (ticker, legal_name, exchange, sector, industry, market_cap_bucket)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
RETURNING id, ticker, legal_name, exchange, sector, industry, market_cap_bucket, active""",
|
||||
body.ticker.upper(), body.legal_name, body.exchange, body.sector,
|
||||
body.industry, body.market_cap_bucket,
|
||||
)
|
||||
except asyncpg.UniqueViolationError:
|
||||
raise HTTPException(409, f"Company {body.ticker} on {body.exchange} already exists")
|
||||
return dict(row)
|
||||
|
||||
|
||||
@app.get("/companies", response_model=List[CompanyResponse])
|
||||
async def list_companies(active: bool = True):
|
||||
rows = await pool.fetch(
|
||||
"SELECT id, ticker, legal_name, exchange, sector, industry, market_cap_bucket, active FROM companies WHERE active = $1 ORDER BY ticker",
|
||||
active,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
@app.get("/companies/{company_id}", response_model=CompanyResponse)
|
||||
async def get_company(company_id: str):
|
||||
row = await pool.fetchrow(
|
||||
"SELECT id, ticker, legal_name, exchange, sector, industry, market_cap_bucket, active FROM companies WHERE id = $1",
|
||||
company_id,
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(404, "Company not found")
|
||||
return dict(row)
|
||||
|
||||
|
||||
@app.put("/companies/{company_id}", response_model=CompanyResponse)
|
||||
async def update_company(company_id: str, body: CompanyCreate):
|
||||
row = await pool.fetchrow(
|
||||
"""UPDATE companies SET ticker=$2, legal_name=$3, exchange=$4, sector=$5, industry=$6, market_cap_bucket=$7, updated_at=NOW()
|
||||
WHERE id=$1
|
||||
RETURNING id, ticker, legal_name, exchange, sector, industry, market_cap_bucket, active""",
|
||||
company_id, body.ticker.upper(), body.legal_name, body.exchange,
|
||||
body.sector, body.industry, body.market_cap_bucket,
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(404, "Company not found")
|
||||
return dict(row)
|
||||
|
||||
|
||||
# --- Alias Endpoints ---
|
||||
|
||||
@app.post("/companies/{company_id}/aliases", status_code=201)
|
||||
async def add_alias(company_id: str, body: AliasCreate):
|
||||
row = await pool.fetchrow(
|
||||
"INSERT INTO company_aliases (company_id, alias, alias_type) VALUES ($1, $2, $3) RETURNING id, alias, alias_type",
|
||||
company_id, body.alias, body.alias_type,
|
||||
)
|
||||
return dict(row)
|
||||
|
||||
|
||||
@app.get("/companies/{company_id}/aliases")
|
||||
async def list_aliases(company_id: str):
|
||||
rows = await pool.fetch(
|
||||
"SELECT id, alias, alias_type FROM company_aliases WHERE company_id = $1",
|
||||
company_id,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
# --- Watchlist Endpoints ---
|
||||
|
||||
@app.post("/watchlists", status_code=201)
|
||||
async def create_watchlist(body: WatchlistCreate):
|
||||
try:
|
||||
row = await pool.fetchrow(
|
||||
"INSERT INTO watchlists (name, description) VALUES ($1, $2) RETURNING id, name, description, active",
|
||||
body.name, body.description,
|
||||
)
|
||||
except asyncpg.UniqueViolationError:
|
||||
raise HTTPException(409, f"Watchlist '{body.name}' already exists")
|
||||
return dict(row)
|
||||
|
||||
|
||||
@app.get("/watchlists")
|
||||
async def list_watchlists():
|
||||
rows = await pool.fetch("SELECT id, name, description, active FROM watchlists ORDER BY name")
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
@app.post("/watchlists/{watchlist_id}/members/{company_id}", status_code=201)
|
||||
async def add_watchlist_member(watchlist_id: str, company_id: str):
|
||||
try:
|
||||
await pool.execute(
|
||||
"INSERT INTO watchlist_members (watchlist_id, company_id) VALUES ($1, $2)",
|
||||
watchlist_id, company_id,
|
||||
)
|
||||
except asyncpg.UniqueViolationError:
|
||||
raise HTTPException(409, "Already a member")
|
||||
except asyncpg.ForeignKeyViolationError:
|
||||
raise HTTPException(404, "Watchlist or company not found")
|
||||
return {"status": "added"}
|
||||
|
||||
|
||||
@app.get("/watchlists/{watchlist_id}/members")
|
||||
async def list_watchlist_members(watchlist_id: str):
|
||||
rows = await pool.fetch(
|
||||
"""SELECT c.id, c.ticker, c.legal_name, c.exchange, c.sector, c.industry, c.market_cap_bucket, c.active
|
||||
FROM companies c JOIN watchlist_members wm ON c.id = wm.company_id
|
||||
WHERE wm.watchlist_id = $1 ORDER BY c.ticker""",
|
||||
watchlist_id,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
# --- Source Endpoints ---
|
||||
|
||||
@app.post("/companies/{company_id}/sources", status_code=201)
|
||||
async def add_source(company_id: str, body: SourceCreate):
|
||||
if body.source_type not in VALID_SOURCE_TYPES:
|
||||
raise HTTPException(400, f"Invalid source_type. Must be one of: {VALID_SOURCE_TYPES}")
|
||||
row = await pool.fetchrow(
|
||||
"""INSERT INTO sources (company_id, source_type, source_name, config, credibility_score, retention_days, access_policy)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||||
RETURNING id, source_type, source_name, credibility_score, active""",
|
||||
company_id, body.source_type, body.source_name,
|
||||
body.config, body.credibility_score, body.retention_days, body.access_policy,
|
||||
)
|
||||
return dict(row)
|
||||
|
||||
|
||||
@app.get("/companies/{company_id}/sources")
|
||||
async def list_sources(company_id: str):
|
||||
rows = await pool.fetch(
|
||||
"SELECT id, source_type, source_name, config, credibility_score, retention_days, access_policy, active FROM sources WHERE company_id = $1",
|
||||
company_id,
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
Reference in New Issue
Block a user