88ad1e8d99
- Add scheduler and ingestion unit tests (test_scheduler_unit.py, test_ingestion_unit.py) - Add all 13 app services + dashboard to docker-compose.yml - Add full documentation suite: API reference, Helm reference, Docker deployment guide, 3 architecture diagrams (K8s, Docker Compose, data pipeline), AI agent guide, backup/restore guide, observability/metrics reference, per-service docs - Add intelligence pipeline deep-dive docs with Mermaid diagrams - Update README with documentation index and links - Add specs for comprehensive-quality-docs, intelligence-pipeline-deep-dive, sanitized-pipeline-docs
356 lines
18 KiB
Markdown
356 lines
18 KiB
Markdown
# Kubernetes Architecture — Stonks Oracle
|
|
|
|
This document describes the Kubernetes deployment topology for Stonks Oracle, derived from the Helm chart at `infra/helm/stonks-oracle/`.
|
|
|
|
All application workloads deploy to the `stonks-oracle` namespace. External cluster services (PostgreSQL, Redis, MinIO, Ollama) run in their own namespaces and are referenced via cross-namespace DNS.
|
|
|
|
## Deployment Diagram
|
|
|
|
```mermaid
|
|
graph TB
|
|
%% ── External traffic ──────────────────────────────────────────
|
|
internet((Internet))
|
|
|
|
subgraph traefik ["kube-system (Traefik Ingress Controller)"]
|
|
direction LR
|
|
ing_dash["stonks.celestium.life"]
|
|
ing_api["stonks-api.celestium.life"]
|
|
ing_reg["stonks-registry.celestium.life"]
|
|
ing_trade["stonks-trading.celestium.life"]
|
|
ing_superset["stonks-dash.celestium.life"]
|
|
ing_trino["stonks-trino.celestium.life"]
|
|
end
|
|
|
|
internet --> traefik
|
|
|
|
%% ── stonks-oracle namespace ───────────────────────────────────
|
|
subgraph ns ["stonks-oracle namespace"]
|
|
direction TB
|
|
|
|
%% ── API Tier (ingress-facing) ─────────────────────────────
|
|
subgraph api_tier ["API Tier"]
|
|
direction LR
|
|
query_api["query-api<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
|
symbol_registry["symbol-registry<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
|
end
|
|
|
|
%% ── Frontend Tier ─────────────────────────────────────────
|
|
subgraph frontend_tier ["Frontend Tier"]
|
|
dashboard["dashboard<br/><i>Deployment (1 replica)</i><br/>:8080<br/><i>nginx-unprivileged</i>"]
|
|
end
|
|
|
|
%% ── Trading Tier ──────────────────────────────────────────
|
|
subgraph trading_tier ["Trading Tier"]
|
|
direction LR
|
|
trading_engine["trading-engine<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
|
risk_engine["risk-engine<br/><i>Deployment (1 replica)</i><br/>:8000"]
|
|
broker_adapter["broker-adapter<br/><i>Deployment (1 replica)</i><br/><i>queue-driven worker</i>"]
|
|
end
|
|
|
|
%% ── Orchestration Tier ────────────────────────────────────
|
|
subgraph orchestration_tier ["Orchestration Tier"]
|
|
scheduler["scheduler<br/><i>Deployment (1 replica)</i><br/><i>runs migrations + seed</i>"]
|
|
end
|
|
|
|
%% ── Processing Tier (pipeline workers) ────────────────────
|
|
subgraph processing_tier ["Processing Tier (pipeline workers)"]
|
|
direction LR
|
|
ingestion["ingestion<br/><i>Deployment (2 replicas)</i>"]
|
|
parser["parser<br/><i>Deployment (2 replicas)</i>"]
|
|
extractor["extractor<br/><i>Deployment (1 replica)</i>"]
|
|
aggregation["aggregation<br/><i>Deployment (4 replicas)</i>"]
|
|
recommendation["recommendation<br/><i>Deployment (1 replica)</i>"]
|
|
end
|
|
|
|
%% ── Analytics Tier ────────────────────────────────────────
|
|
subgraph analytics_tier ["Analytics Tier"]
|
|
direction LR
|
|
lake_publisher["lake-publisher<br/><i>Deployment (1 replica)</i><br/><i>queue-driven worker</i>"]
|
|
hive_metastore["hive-metastore<br/><i>Deployment (1 replica)</i><br/>:9083<br/><i>apache/hive:4.0.0</i>"]
|
|
trino["trino<br/><i>Deployment (1 replica)</i><br/>:8080<br/><i>trinodb/trino:latest</i>"]
|
|
superset["superset<br/><i>Deployment (1 replica)</i><br/>:8088<br/><i>custom image</i>"]
|
|
end
|
|
|
|
%% ── Helm Secrets ──────────────────────────────────────────
|
|
subgraph secrets_block ["Helm-Managed Secrets"]
|
|
direction LR
|
|
sec_core["stonks-core-secrets<br/><i>POSTGRES_PASSWORD</i><br/><i>MINIO_ACCESS_KEY</i><br/><i>MINIO_SECRET_KEY</i><br/><i>REDIS_PASSWORD</i>"]
|
|
sec_broker["stonks-broker-secrets<br/><i>BROKER_API_KEY</i><br/><i>BROKER_API_SECRET</i><br/><i>BROKER_BASE_URL</i>"]
|
|
sec_market["stonks-market-secrets<br/><i>MARKET_DATA_API_KEY</i>"]
|
|
sec_gmail["stonks-gmail-secrets<br/><i>GMAIL_SENDER</i><br/><i>GMAIL_RECIPIENT</i><br/><i>GMAIL_APP_PASSWORD</i>"]
|
|
sec_dashboard["stonks-dashboard-secrets<br/><i>SUPERSET_SECRET_KEY</i><br/><i>SUPERSET_ADMIN_PASSWORD</i>"]
|
|
end
|
|
|
|
%% ── ConfigMap ─────────────────────────────────────────────
|
|
configmap["stonks-config<br/><i>ConfigMap</i><br/><i>All env vars from values.yaml config block</i>"]
|
|
end
|
|
|
|
%% ── External Cluster Services ─────────────────────────────────
|
|
subgraph pg_ns ["postgresql-service namespace"]
|
|
postgres[("PostgreSQL<br/>postgresql-rw:5432")]
|
|
end
|
|
|
|
subgraph redis_ns ["redis-service namespace"]
|
|
redis[("Redis<br/>redis-master:6379")]
|
|
end
|
|
|
|
subgraph minio_ns ["minio-service namespace"]
|
|
minio[("MinIO<br/>minio:80")]
|
|
end
|
|
|
|
subgraph ollama_ns ["ollama-service namespace"]
|
|
ollama[("Ollama<br/>ollama:11434<br/><i>GPU: 4070 Ti Super</i>")]
|
|
end
|
|
|
|
%% ── Ingress Routes ────────────────────────────────────────────
|
|
ing_dash -->|":8080"| dashboard
|
|
ing_api -->|":8000"| query_api
|
|
ing_reg -->|":8000"| symbol_registry
|
|
ing_trade -->|":8000"| trading_engine
|
|
ing_superset -->|":8088"| superset
|
|
ing_trino -->|":8080"| trino
|
|
|
|
%% ── Dashboard → Backend APIs ──────────────────────────────────
|
|
dashboard -.->|"/api/ proxy"| query_api
|
|
dashboard -.->|"/registry/ proxy"| symbol_registry
|
|
dashboard -.->|"/risk/ proxy"| risk_engine
|
|
|
|
%% ── Pipeline data flow (via Redis queues) ─────────────────────
|
|
scheduler -->|"enqueue jobs"| redis
|
|
ingestion -->|"stonks:queue:parsing"| redis
|
|
parser -->|"stonks:queue:extraction"| redis
|
|
extractor -->|"stonks:queue:aggregation"| redis
|
|
aggregation -->|"stonks:queue:recommendation"| redis
|
|
recommendation -->|"stonks:queue:trading_decisions"| redis
|
|
trading_engine -->|"stonks:queue:broker_orders"| redis
|
|
broker_adapter -->|"read orders"| redis
|
|
lake_publisher -->|"stonks:queue:lake_publish"| redis
|
|
|
|
%% ── External service connections ──────────────────────────────
|
|
scheduler --> postgres
|
|
scheduler --> redis
|
|
ingestion --> postgres
|
|
ingestion --> redis
|
|
ingestion --> minio
|
|
parser --> postgres
|
|
parser --> redis
|
|
extractor --> postgres
|
|
extractor --> redis
|
|
extractor --> ollama
|
|
aggregation --> postgres
|
|
aggregation --> redis
|
|
recommendation --> postgres
|
|
recommendation --> redis
|
|
trading_engine --> postgres
|
|
trading_engine --> redis
|
|
risk_engine --> postgres
|
|
broker_adapter --> postgres
|
|
broker_adapter --> redis
|
|
lake_publisher --> postgres
|
|
lake_publisher --> minio
|
|
query_api --> postgres
|
|
query_api --> redis
|
|
query_api --> minio
|
|
symbol_registry --> postgres
|
|
|
|
%% ── Analytics plane connections ───────────────────────────────
|
|
lake_publisher -->|"Parquet → s3a://stonks-lakehouse"| minio
|
|
hive_metastore -->|"s3a:// catalog"| minio
|
|
trino -->|"thrift://hive-metastore:9083"| hive_metastore
|
|
superset -->|"trino:8080"| trino
|
|
query_api -->|"trino:8080"| trino
|
|
superset --> postgres
|
|
superset --> redis
|
|
|
|
%% ── Trading tier external egress ──────────────────────────────
|
|
trading_engine -->|"HTTPS :443<br/>Alpaca API"| internet
|
|
trading_engine -->|"SMTP :587<br/>Gmail notifications"| internet
|
|
broker_adapter -->|"HTTPS :443<br/>Alpaca API"| internet
|
|
ingestion -->|"HTTPS :443<br/>Polygon.io / News APIs"| internet
|
|
|
|
%% ── Secret consumption ────────────────────────────────────────
|
|
sec_core -.-> query_api
|
|
sec_core -.-> symbol_registry
|
|
sec_core -.-> scheduler
|
|
sec_core -.-> ingestion
|
|
sec_core -.-> parser
|
|
sec_core -.-> extractor
|
|
sec_core -.-> aggregation
|
|
sec_core -.-> recommendation
|
|
sec_core -.-> trading_engine
|
|
sec_core -.-> risk_engine
|
|
sec_core -.-> broker_adapter
|
|
sec_core -.-> lake_publisher
|
|
sec_core -.-> hive_metastore
|
|
sec_core -.-> trino
|
|
sec_core -.-> superset
|
|
|
|
sec_broker -.-> ingestion
|
|
sec_broker -.-> trading_engine
|
|
sec_broker -.-> risk_engine
|
|
sec_broker -.-> broker_adapter
|
|
|
|
sec_market -.-> ingestion
|
|
|
|
sec_gmail -.-> trading_engine
|
|
|
|
sec_dashboard -.-> superset
|
|
|
|
configmap -.-> query_api
|
|
configmap -.-> symbol_registry
|
|
configmap -.-> scheduler
|
|
configmap -.-> ingestion
|
|
configmap -.-> parser
|
|
configmap -.-> extractor
|
|
configmap -.-> aggregation
|
|
configmap -.-> recommendation
|
|
configmap -.-> trading_engine
|
|
configmap -.-> risk_engine
|
|
configmap -.-> broker_adapter
|
|
configmap -.-> lake_publisher
|
|
configmap -.-> superset
|
|
|
|
%% ── Styles ────────────────────────────────────────────────────
|
|
classDef apiSvc fill:#4a90d9,stroke:#2c5f8a,color:#fff
|
|
classDef frontendSvc fill:#50c878,stroke:#2e7d46,color:#fff
|
|
classDef tradingSvc fill:#e8a838,stroke:#b07d1a,color:#fff
|
|
classDef processSvc fill:#9b59b6,stroke:#6c3483,color:#fff
|
|
classDef orchSvc fill:#1abc9c,stroke:#148f77,color:#fff
|
|
classDef analyticsSvc fill:#e74c3c,stroke:#a93226,color:#fff
|
|
classDef extSvc fill:#95a5a6,stroke:#717d7e,color:#fff
|
|
classDef secretSvc fill:#f5f5dc,stroke:#999,color:#333
|
|
classDef configSvc fill:#dfe6e9,stroke:#999,color:#333
|
|
|
|
class query_api,symbol_registry apiSvc
|
|
class dashboard frontendSvc
|
|
class trading_engine,risk_engine,broker_adapter tradingSvc
|
|
class scheduler orchSvc
|
|
class ingestion,parser,extractor,aggregation,recommendation processSvc
|
|
class lake_publisher,hive_metastore,trino,superset analyticsSvc
|
|
class postgres,redis,minio,ollama extSvc
|
|
class sec_core,sec_broker,sec_market,sec_gmail,sec_dashboard secretSvc
|
|
class configmap configSvc
|
|
```
|
|
|
|
## Network Policy Boundaries
|
|
|
|
The Helm chart deploys a **default-deny-ingress** policy that blocks all inbound traffic to pods in the `stonks-oracle` namespace. Each service that needs inbound connections has an explicit allow policy:
|
|
|
|
```mermaid
|
|
graph LR
|
|
subgraph netpol ["Network Policies — stonks-oracle namespace"]
|
|
direction TB
|
|
|
|
deny["🔒 default-deny-ingress<br/><i>Blocks ALL ingress to all pods</i>"]
|
|
|
|
subgraph allows ["Explicit Allow Rules"]
|
|
direction TB
|
|
|
|
np_dash["allow-dashboard-ingress<br/>dashboard :8080<br/>← kube-system (Traefik)"]
|
|
|
|
np_api["allow-query-api-ingress<br/>query-api :8000<br/>← kube-system (Traefik)<br/>← dashboard pod"]
|
|
|
|
np_reg["allow-symbol-registry-ingress<br/>symbol-registry :8000<br/>← kube-system (Traefik)<br/>← dashboard pod"]
|
|
|
|
np_trade["allow-trading-engine-ingress<br/>trading-engine :8000<br/>← kube-system (Traefik)<br/>← query-api pod<br/>← dashboard pod<br/><i>Egress: PostgreSQL :5432,</i><br/><i>Redis :6379, HTTPS :443, SMTP :587</i>"]
|
|
|
|
np_risk["allow-risk-engine-ingress<br/>risk-engine :8000<br/>← broker-adapter pod<br/>← query-api pod<br/>← dashboard pod"]
|
|
|
|
np_superset["allow-superset-ingress<br/>superset :8088<br/>← kube-system (Traefik)"]
|
|
|
|
np_trino["allow-trino-ingress<br/>trino :8080<br/>← superset pod<br/>← query-api pod<br/>← kube-system (Traefik)"]
|
|
|
|
np_hive["allow-hive-metastore-ingress<br/>hive-metastore :9083<br/>← trino pod<br/>← lake-publisher pod"]
|
|
|
|
np_broker["deny-broker-adapter-ingress<br/>broker-adapter<br/><i>No inbound traffic allowed</i>"]
|
|
end
|
|
end
|
|
|
|
style deny fill:#e74c3c,stroke:#c0392b,color:#fff
|
|
style np_broker fill:#e74c3c,stroke:#c0392b,color:#fff
|
|
style np_dash fill:#2ecc71,stroke:#27ae60,color:#fff
|
|
style np_api fill:#2ecc71,stroke:#27ae60,color:#fff
|
|
style np_reg fill:#2ecc71,stroke:#27ae60,color:#fff
|
|
style np_trade fill:#f39c12,stroke:#d68910,color:#fff
|
|
style np_risk fill:#f39c12,stroke:#d68910,color:#fff
|
|
style np_superset fill:#2ecc71,stroke:#27ae60,color:#fff
|
|
style np_trino fill:#2ecc71,stroke:#27ae60,color:#fff
|
|
style np_hive fill:#3498db,stroke:#2980b9,color:#fff
|
|
```
|
|
|
|
### Services Without Ingress Policies (Pipeline Workers)
|
|
|
|
The following services have **no inbound network policy** — they are queue-driven workers that only make outbound connections to PostgreSQL, Redis, MinIO, and Ollama. The default-deny-ingress policy blocks any unsolicited inbound traffic:
|
|
|
|
| Service | Tier | Behavior |
|
|
|---------|------|----------|
|
|
| scheduler | orchestration | Polls DB, enqueues to Redis |
|
|
| ingestion | processing | Reads from `stonks:queue:ingestion`, writes to DB/MinIO/Redis |
|
|
| parser | processing | Reads from `stonks:queue:parsing`, writes to DB/Redis |
|
|
| extractor | processing | Reads from `stonks:queue:extraction`, calls Ollama, writes to DB/Redis |
|
|
| aggregation | processing | Reads from `stonks:queue:aggregation`, writes to DB/Redis |
|
|
| recommendation | processing | Reads from `stonks:queue:recommendation`, writes to DB/Redis |
|
|
| lake-publisher | analytics | Reads from `stonks:queue:lake_publish`, writes Parquet to MinIO |
|
|
|
|
## Service Tier Summary
|
|
|
|
| Tier | Services | Ingress? | Replicas | Notes |
|
|
|------|----------|----------|----------|-------|
|
|
| **api** | query-api, symbol-registry | Yes (Traefik) | 1 each | FastAPI, readiness probes on `/docs` |
|
|
| **frontend** | dashboard | Yes (Traefik) | 1 | nginx-unprivileged on :8080, proxies to API services |
|
|
| **trading** | trading-engine, risk-engine, broker-adapter | trading-engine: Yes; risk-engine: internal only; broker-adapter: denied | 1 each | trading-engine has egress to Alpaca + Gmail |
|
|
| **orchestration** | scheduler | No | 1 | Runs DB migrations + seed as init containers |
|
|
| **processing** | ingestion, parser, extractor, aggregation, recommendation | No | 2, 2, 1, 4, 1 | Pipeline-gated by `pipelineEnabled` toggle |
|
|
| **analytics** | lake-publisher, trino, hive-metastore, superset | trino + superset: Yes; others: No | 1 each | lake-publisher is pipeline-gated |
|
|
|
|
## Secret Consumption Map
|
|
|
|
| Secret | Keys | Consumers |
|
|
|--------|------|-----------|
|
|
| `stonks-core-secrets` | POSTGRES_PASSWORD, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, REDIS_PASSWORD | All 13 app services + hive-metastore, trino, superset |
|
|
| `stonks-broker-secrets` | BROKER_API_KEY, BROKER_API_SECRET, BROKER_BASE_URL | ingestion, trading-engine, risk-engine, broker-adapter |
|
|
| `stonks-market-secrets` | MARKET_DATA_API_KEY | ingestion |
|
|
| `stonks-gmail-secrets` | GMAIL_SENDER, GMAIL_RECIPIENT, GMAIL_APP_PASSWORD | trading-engine |
|
|
| `stonks-dashboard-secrets` | SUPERSET_SECRET_KEY, SUPERSET_ADMIN_PASSWORD | superset |
|
|
|
|
## Pipeline Toggle
|
|
|
|
Setting `pipelineEnabled: false` in `values.yaml` scales all services with `pipeline: true` to 0 replicas. This affects:
|
|
|
|
- scheduler, ingestion, parser, extractor, aggregation, recommendation, broker-adapter, lake-publisher
|
|
|
|
API-tier services (query-api, symbol-registry), trading-tier services (trading-engine, risk-engine), analytics services (trino, hive-metastore, superset), and the dashboard always run regardless of this toggle.
|
|
|
|
## External Cluster Services
|
|
|
|
These services run outside the `stonks-oracle` namespace and are referenced via cross-namespace DNS:
|
|
|
|
| Service | Namespace | DNS | Port | Notes |
|
|
|---------|-----------|-----|------|-------|
|
|
| PostgreSQL | `postgresql-service` | `postgresql-rw.postgresql-service.svc.cluster.local` | 5432 | CloudNativePG managed |
|
|
| Redis | `redis-service` | `redis-master.redis-service.svc.cluster.local` | 6379 | Password in `stonks-core-secrets` |
|
|
| MinIO | `minio-service` | `minio.minio-service.svc.cluster.local` | 80 | S3-compatible object store |
|
|
| Ollama | `ollama-service` | `ollama.ollama-service.svc.cluster.local` | 11434 | LLM inference, GPU: 4070 Ti Super 16GB |
|
|
|
|
## Analytics Plane
|
|
|
|
The analytics stack runs within the `stonks-oracle` namespace:
|
|
|
|
1. **Lake Publisher** writes Parquet fact tables to MinIO at `s3a://stonks-lakehouse/warehouse`
|
|
2. **Hive Metastore** (Apache Hive 4.0.0) manages table metadata, backed by embedded Derby DB with a PVC for persistence. Connects to MinIO for S3A filesystem access.
|
|
3. **Trino** queries the lakehouse via Hive Metastore (thrift://hive-metastore:9083). Exposes two catalogs: `lakehouse` (Hive connector) and `iceberg` (Iceberg connector). Both connect to MinIO for data access.
|
|
4. **Superset** connects to Trino for lakehouse queries and to PostgreSQL for its metadata DB. Uses Redis for caching. Exposed externally via Traefik ingress.
|
|
|
|
## Ingress Routes
|
|
|
|
All ingress resources use the `traefik` IngressClass with TLS certificates issued by the `ca-issuer` ClusterIssuer:
|
|
|
|
| Domain | Backend Service | Port | TLS Secret |
|
|
|--------|----------------|------|------------|
|
|
| `stonks.celestium.life` | dashboard | 8080 | `stonks-dashboard-tls` |
|
|
| `stonks-api.celestium.life` | query-api | 8000 | `stonks-api-tls` |
|
|
| `stonks-registry.celestium.life` | symbol-registry | 8000 | `stonks-registry-tls` |
|
|
| `stonks-trading.celestium.life` | trading-engine | 8000 | `stonks-trading-tls` |
|
|
| `stonks-dash.celestium.life` | superset | 8088 | `stonks-dash-tls` |
|
|
| `stonks-trino.celestium.life` | trino | 8080 | `stonks-trino-tls` |
|