phase 14-15: docker build validation and helm deployment

This commit is contained in:
Celes Renata
2026-04-11 11:59:45 -07:00
parent 7394d241c9
commit ce10afa034
179 changed files with 32559 additions and 576 deletions
+25 -9
View File
@@ -2,15 +2,31 @@
Analytical fact table definitions for MinIO-backed datasets queried via Trino.
All tables use Hive-compatible partition layouts on MinIO (`s3a://stonks-lakehouse/warehouse/`)
and are defined in the `lakehouse.stonks` schema. Parquet is the storage format.
## Fact Tables
- `lake.market_bars` — OHLCV bar data
- `lake.market_quotes` — quote snapshots
- `lake.company_events` — corporate actions and events
- `lake.documents` — ingested document metadata
- `lake.document_extractions` — AI extraction outputs
- `lake.trade_signals` — aggregated trend signals
- `lake.trade_orders` — order submission records
- `lake.trade_fills` — fill and execution records
- `lake.market_bars` — OHLCV bar data per symbol per interval
- `lake.market_quotes` bid/ask quote snapshots
- `lake.company_events` — corporate actions, earnings, filings, and issuer events
- `lake.documents` — ingested document metadata (articles, filings, transcripts)
- `lake.document_extractions` — AI extraction outputs per document per company
- `lake.trade_signals` — aggregated trend signals and recommendation actions
- `lake.trade_orders` — order submission records (paper and live)
- `lake.trade_fills` — fill and execution records from broker
- `lake.positions_daily` — end-of-day position snapshots
- `lake.pnl_daily` — daily PnL records
- `lake.pnl_daily` — daily PnL records per symbol per account
- `lake.prediction_vs_outcome` — prediction accuracy tracking
- `lake.model_performance` — extraction model performance metrics
## Partitioning
- Most tables partition by `dt` (date)
- `document_extractions`, `prediction_vs_outcome`, and `model_performance` also partition by `model_version`
## Trino Catalogs
- `lakehouse` catalog (Hive connector) for external Hive-compatible tables
- `iceberg` catalog (Iceberg connector) for managed Iceberg tables
## Views
Example SQL views for dashboards and ad hoc analysis are in `lakehouse/views/`.
See `lakehouse/views/README.md` for details.
+24
View File
@@ -0,0 +1,24 @@
-- Analytical fact table: company_events
-- Corporate actions, earnings, filings, and other issuer events.
-- Partitioned by dt (date) on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/company_events/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 2.3, 9.4, 9.5, 10.1
-- Design ref: Section 7 (lake.company_events)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.company_events (
event_id VARCHAR,
ticker VARCHAR,
event_type VARCHAR,
event_subtype VARCHAR,
title VARCHAR,
description VARCHAR,
source VARCHAR,
source_url VARCHAR,
event_at TIMESTAMP(6) WITH TIME ZONE,
ingested_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/company_events/'
);
+13 -1
View File
@@ -1,16 +1,28 @@
-- Analytical fact table: document_extractions
-- Partitioned by dt and model_version on MinIO
-- AI extraction outputs per document per company.
-- Partitioned by dt and model_version on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/document_extractions/dt={yyyy-mm-dd}/model_version={ver}/part-*.parquet
-- Requirements: 5.3, 5.5, 9.4, 9.5, 10.1, 10.4
-- Design ref: Section 6.3, Section 7 (lake.document_extractions)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.document_extractions (
document_id VARCHAR,
ticker VARCHAR,
company_name VARCHAR,
relevance DOUBLE,
sentiment VARCHAR,
impact_score DOUBLE,
impact_horizon VARCHAR,
catalyst_type VARCHAR,
confidence DOUBLE,
novelty_score DOUBLE,
source_credibility DOUBLE,
key_facts VARCHAR,
risks VARCHAR,
macro_themes VARCHAR,
model_name VARCHAR,
prompt_version VARCHAR,
schema_version VARCHAR,
extraction_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE,
model_version VARCHAR
+9 -2
View File
@@ -1,6 +1,9 @@
-- Analytical fact table: documents
-- Partitioned by dt and source_type on MinIO
-- Path: s3://stonks-lakehouse/warehouse/documents/dt={yyyy-mm-dd}/source_type={type}/part-*.parquet
-- Ingested document metadata for articles, filings, transcripts, and press releases.
-- Partitioned by dt on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/documents/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 3.1, 3.3, 9.4, 9.5, 10.1, 10.4
-- Design ref: Section 6.2, Section 7 (lake.documents)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.documents (
document_id VARCHAR,
@@ -9,7 +12,11 @@ CREATE TABLE IF NOT EXISTS lakehouse.stonks.documents (
ticker VARCHAR,
publisher VARCHAR,
title VARCHAR,
url VARCHAR,
canonical_url VARCHAR,
language VARCHAR,
published_at TIMESTAMP(6) WITH TIME ZONE,
retrieved_at TIMESTAMP(6) WITH TIME ZONE,
content_hash VARCHAR,
confidence DOUBLE,
dt DATE
+6 -1
View File
@@ -1,6 +1,9 @@
-- Analytical fact table: market_bars
-- Partitioned by dt (date) on MinIO
-- OHLCV bar data for tracked symbols.
-- Partitioned by dt (date) on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/market_bars/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 2.1, 9.4, 9.5, 10.1
-- Design ref: Section 7 (lake.market_bars)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_bars (
ticker VARCHAR,
@@ -10,7 +13,9 @@ CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_bars (
close_price DOUBLE,
volume BIGINT,
vwap DOUBLE,
trade_count BIGINT,
bar_timestamp TIMESTAMP(6) WITH TIME ZONE,
bar_interval VARCHAR,
source VARCHAR,
dt DATE
) WITH (
+23
View File
@@ -0,0 +1,23 @@
-- Analytical fact table: market_quotes
-- Quote snapshots for tracked symbols.
-- Partitioned by dt (date) on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/market_quotes/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 2.1, 9.4, 9.5, 10.1
-- Design ref: Section 7 (lake.market_quotes)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_quotes (
ticker VARCHAR,
bid_price DOUBLE,
ask_price DOUBLE,
bid_size BIGINT,
ask_size BIGINT,
last_price DOUBLE,
last_size BIGINT,
source VARCHAR,
quote_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/market_quotes/'
);
+33
View File
@@ -0,0 +1,33 @@
-- Analytical fact table: model_performance
-- Tracks extraction model performance for Trino/Superset dashboards.
-- Partitioned by dt and model_name on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/model_performance/dt={yyyy-mm-dd}/model_name={name}/part-*.parquet
-- Requirements: 12.1, 12.2
CREATE TABLE IF NOT EXISTS lakehouse.stonks.model_performance (
document_id VARCHAR,
ticker VARCHAR,
model_name VARCHAR,
prompt_version VARCHAR,
schema_version VARCHAR,
success BOOLEAN,
attempt_count INTEGER,
total_duration_ms INTEGER,
first_attempt_duration_ms INTEGER,
final_attempt_duration_ms INTEGER,
confidence DOUBLE,
validation_status VARCHAR,
validation_error_count INTEGER,
validation_warning_count INTEGER,
retry_count INTEGER,
input_token_estimate INTEGER,
output_token_estimate INTEGER,
company_count INTEGER,
recorded_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE,
model_version VARCHAR
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt', 'model_version'],
external_location = 's3a://stonks-lakehouse/warehouse/model_performance/'
);
+8 -1
View File
@@ -1,12 +1,19 @@
-- Analytical fact table: pnl_daily
-- Partitioned by dt on MinIO
-- Daily profit and loss records per symbol per account.
-- Partitioned by dt on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/pnl_daily/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 9.4, 9.5, 10.1, 10.3
-- Design ref: Section 7 (lake.pnl_daily)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.pnl_daily (
ticker VARCHAR,
realized_pnl DOUBLE,
unrealized_pnl DOUBLE,
total_pnl DOUBLE,
fees DOUBLE,
net_pnl DOUBLE,
broker_account VARCHAR,
execution_mode VARCHAR,
dt DATE
) WITH (
format = 'PARQUET',
+7 -1
View File
@@ -1,13 +1,19 @@
-- Analytical fact table: positions_daily
-- Partitioned by dt on MinIO
-- End-of-day position snapshots.
-- Partitioned by dt on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/positions_daily/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 9.4, 9.5, 10.1, 10.3
-- Design ref: Section 7 (lake.positions_daily)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.positions_daily (
ticker VARCHAR,
quantity DOUBLE,
avg_entry_price DOUBLE,
close_price DOUBLE,
market_value DOUBLE,
unrealized_pnl DOUBLE,
broker_account VARCHAR,
execution_mode VARCHAR,
snapshot_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
) WITH (
+16 -11
View File
@@ -1,19 +1,24 @@
-- Analytical fact table: prediction_vs_outcome
-- Partitioned by dt on MinIO
-- Prediction accuracy tracking: predicted signals vs realized market moves.
-- Partitioned by dt and model_version on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/prediction_vs_outcome/dt={yyyy-mm-dd}/model_version={ver}/part-*.parquet
-- Requirements: 9.4, 9.5, 10.1, 10.3
-- Design ref: Section 7 (lake.prediction_vs_outcome)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.prediction_vs_outcome (
recommendation_id VARCHAR,
ticker VARCHAR,
predicted_action VARCHAR,
recommendation_id VARCHAR,
ticker VARCHAR,
predicted_action VARCHAR,
predicted_confidence DOUBLE,
actual_move_pct DOUBLE,
outcome VARCHAR,
horizon_days INTEGER,
predicted_at TIMESTAMP(6) WITH TIME ZONE,
evaluated_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
actual_move_pct DOUBLE,
outcome VARCHAR,
horizon_days INTEGER,
predicted_at TIMESTAMP(6) WITH TIME ZONE,
evaluated_at TIMESTAMP(6) WITH TIME ZONE,
model_version VARCHAR,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
partitioned_by = ARRAY['dt', 'model_version'],
external_location = 's3a://stonks-lakehouse/warehouse/prediction_vs_outcome/'
);
+6 -1
View File
@@ -1,5 +1,9 @@
-- Analytical fact table: trade_fills
-- Partitioned by dt on MinIO
-- Fill and execution records from broker.
-- Partitioned by dt on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/trade_fills/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 9.4, 9.5, 10.1, 10.3
-- Design ref: Section 7 (lake.trade_fills)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_fills (
fill_id VARCHAR,
@@ -8,6 +12,7 @@ CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_fills (
side VARCHAR,
fill_price DOUBLE,
fill_quantity DOUBLE,
commission DOUBLE,
broker_account VARCHAR,
filled_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
+7 -1
View File
@@ -1,14 +1,20 @@
-- Analytical fact table: trade_orders
-- Partitioned by dt on MinIO
-- Order submission records for paper and live trading.
-- Partitioned by dt on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/trade_orders/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 8.3, 9.4, 9.5, 10.1, 10.3
-- Design ref: Section 7 (lake.trade_orders)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_orders (
order_id VARCHAR,
recommendation_id VARCHAR,
ticker VARCHAR,
side VARCHAR,
order_type VARCHAR,
quantity DOUBLE,
limit_price DOUBLE,
status VARCHAR,
execution_mode VARCHAR,
broker_account VARCHAR,
submitted_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
+18 -10
View File
@@ -1,16 +1,24 @@
-- Analytical fact table: trade_signals
-- Partitioned by dt on MinIO
-- Aggregated trend signals and recommendation actions.
-- Partitioned by dt on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/trade_signals/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 6.1, 6.2, 6.4, 6.5, 7.1, 9.4, 9.5, 10.1
-- Design ref: Section 6.4, Section 6.5, Section 7 (lake.trade_signals)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_signals (
signal_id VARCHAR,
ticker VARCHAR,
trend_direction VARCHAR,
trend_strength DOUBLE,
confidence DOUBLE,
action VARCHAR,
time_horizon VARCHAR,
generated_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
signal_id VARCHAR,
ticker VARCHAR,
trend_direction VARCHAR,
trend_strength DOUBLE,
confidence DOUBLE,
contradiction_score DOUBLE,
dominant_catalysts VARCHAR,
material_risks VARCHAR,
action VARCHAR,
time_horizon VARCHAR,
recommendation_id VARCHAR,
generated_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],