phase 0+1: project scaffold, k8s manifests, CI pipeline, steering, hooks, tests

- Repository structure for all services, infra, lakehouse, dashboards
- K8s manifests targeting stonks-oracle namespace with GHCR images
- Ingress via Traefik with ca-issuer TLS for internal services
- ConfigMap wired to existing cluster services (pg, redis, minio, ollama)
- GitHub Actions workflow for lint, test, multi-service container builds
- Dockerfile with build-arg CMD per service
- Makefile for local build/push/deploy
- Steering rules for TDD workflow, K8s conventions, project context
- Agent hooks for lint-on-save, test-on-save, k8s-validate, phase-commit
- Ruff linter config, all lint issues fixed
- 14 passing tests for schemas, config, redis keys
- PostgreSQL migrations, Trino catalogs, Superset config, MinIO lifecycle
This commit is contained in:
Celes Renata
2026-04-11 03:25:08 -07:00
parent 8cfc4f423b
commit ebea70573b
90 changed files with 3590 additions and 19 deletions
+16
View File
@@ -0,0 +1,16 @@
# Lakehouse Schemas
Analytical fact table definitions for MinIO-backed datasets queried via Trino.
## Fact Tables
- `lake.market_bars` — OHLCV bar data
- `lake.market_quotes` — quote snapshots
- `lake.company_events` — corporate actions and events
- `lake.documents` — ingested document metadata
- `lake.document_extractions` — AI extraction outputs
- `lake.trade_signals` — aggregated trend signals
- `lake.trade_orders` — order submission records
- `lake.trade_fills` — fill and execution records
- `lake.positions_daily` — end-of-day position snapshots
- `lake.pnl_daily` — daily PnL records
- `lake.prediction_vs_outcome` — prediction accuracy tracking
@@ -0,0 +1,21 @@
-- Analytical fact table: document_extractions
-- Partitioned by dt and model_version on MinIO
CREATE TABLE IF NOT EXISTS lakehouse.stonks.document_extractions (
document_id VARCHAR,
ticker VARCHAR,
sentiment VARCHAR,
impact_score DOUBLE,
catalyst_type VARCHAR,
confidence DOUBLE,
novelty_score DOUBLE,
model_name VARCHAR,
prompt_version VARCHAR,
extraction_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE,
model_version VARCHAR
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt', 'model_version'],
external_location = 's3a://stonks-lakehouse/warehouse/document_extractions/'
);
+20
View File
@@ -0,0 +1,20 @@
-- Analytical fact table: documents
-- Partitioned by dt and source_type on MinIO
-- Path: s3://stonks-lakehouse/warehouse/documents/dt={yyyy-mm-dd}/source_type={type}/part-*.parquet
CREATE TABLE IF NOT EXISTS lakehouse.stonks.documents (
document_id VARCHAR,
document_type VARCHAR,
source_type VARCHAR,
ticker VARCHAR,
publisher VARCHAR,
title VARCHAR,
published_at TIMESTAMP(6) WITH TIME ZONE,
content_hash VARCHAR,
confidence DOUBLE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/documents/'
);
+20
View File
@@ -0,0 +1,20 @@
-- Analytical fact table: market_bars
-- Partitioned by dt (date) on MinIO
-- Path: s3://stonks-lakehouse/warehouse/market_bars/dt={yyyy-mm-dd}/part-*.parquet
CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_bars (
ticker VARCHAR,
open_price DOUBLE,
high_price DOUBLE,
low_price DOUBLE,
close_price DOUBLE,
volume BIGINT,
vwap DOUBLE,
bar_timestamp TIMESTAMP(6) WITH TIME ZONE,
source VARCHAR,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/market_bars/'
);
+15
View File
@@ -0,0 +1,15 @@
-- Analytical fact table: pnl_daily
-- Partitioned by dt on MinIO
CREATE TABLE IF NOT EXISTS lakehouse.stonks.pnl_daily (
ticker VARCHAR,
realized_pnl DOUBLE,
unrealized_pnl DOUBLE,
total_pnl DOUBLE,
broker_account VARCHAR,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/pnl_daily/'
);
+17
View File
@@ -0,0 +1,17 @@
-- Analytical fact table: positions_daily
-- Partitioned by dt on MinIO
CREATE TABLE IF NOT EXISTS lakehouse.stonks.positions_daily (
ticker VARCHAR,
quantity DOUBLE,
avg_entry_price DOUBLE,
close_price DOUBLE,
unrealized_pnl DOUBLE,
broker_account VARCHAR,
snapshot_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/positions_daily/'
);
@@ -0,0 +1,19 @@
-- Analytical fact table: prediction_vs_outcome
-- Partitioned by dt on MinIO
CREATE TABLE IF NOT EXISTS lakehouse.stonks.prediction_vs_outcome (
recommendation_id VARCHAR,
ticker VARCHAR,
predicted_action VARCHAR,
predicted_confidence DOUBLE,
actual_move_pct DOUBLE,
outcome VARCHAR,
horizon_days INTEGER,
predicted_at TIMESTAMP(6) WITH TIME ZONE,
evaluated_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/prediction_vs_outcome/'
);
+18
View File
@@ -0,0 +1,18 @@
-- Analytical fact table: trade_fills
-- Partitioned by dt on MinIO
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_fills (
fill_id VARCHAR,
order_id VARCHAR,
ticker VARCHAR,
side VARCHAR,
fill_price DOUBLE,
fill_quantity DOUBLE,
broker_account VARCHAR,
filled_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/trade_fills/'
);
+19
View File
@@ -0,0 +1,19 @@
-- Analytical fact table: trade_orders
-- Partitioned by dt on MinIO
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_orders (
order_id VARCHAR,
ticker VARCHAR,
side VARCHAR,
order_type VARCHAR,
quantity DOUBLE,
limit_price DOUBLE,
status VARCHAR,
broker_account VARCHAR,
submitted_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/trade_orders/'
);
+18
View File
@@ -0,0 +1,18 @@
-- Analytical fact table: trade_signals
-- Partitioned by dt on MinIO
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_signals (
signal_id VARCHAR,
ticker VARCHAR,
trend_direction VARCHAR,
trend_strength DOUBLE,
confidence DOUBLE,
action VARCHAR,
time_horizon VARCHAR,
generated_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/trade_signals/'
);