phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -2,15 +2,31 @@
|
||||
|
||||
Analytical fact table definitions for MinIO-backed datasets queried via Trino.
|
||||
|
||||
All tables use Hive-compatible partition layouts on MinIO (`s3a://stonks-lakehouse/warehouse/`)
|
||||
and are defined in the `lakehouse.stonks` schema. Parquet is the storage format.
|
||||
|
||||
## Fact Tables
|
||||
- `lake.market_bars` — OHLCV bar data
|
||||
- `lake.market_quotes` — quote snapshots
|
||||
- `lake.company_events` — corporate actions and events
|
||||
- `lake.documents` — ingested document metadata
|
||||
- `lake.document_extractions` — AI extraction outputs
|
||||
- `lake.trade_signals` — aggregated trend signals
|
||||
- `lake.trade_orders` — order submission records
|
||||
- `lake.trade_fills` — fill and execution records
|
||||
- `lake.market_bars` — OHLCV bar data per symbol per interval
|
||||
- `lake.market_quotes` — bid/ask quote snapshots
|
||||
- `lake.company_events` — corporate actions, earnings, filings, and issuer events
|
||||
- `lake.documents` — ingested document metadata (articles, filings, transcripts)
|
||||
- `lake.document_extractions` — AI extraction outputs per document per company
|
||||
- `lake.trade_signals` — aggregated trend signals and recommendation actions
|
||||
- `lake.trade_orders` — order submission records (paper and live)
|
||||
- `lake.trade_fills` — fill and execution records from broker
|
||||
- `lake.positions_daily` — end-of-day position snapshots
|
||||
- `lake.pnl_daily` — daily PnL records
|
||||
- `lake.pnl_daily` — daily PnL records per symbol per account
|
||||
- `lake.prediction_vs_outcome` — prediction accuracy tracking
|
||||
- `lake.model_performance` — extraction model performance metrics
|
||||
|
||||
## Partitioning
|
||||
- Most tables partition by `dt` (date)
|
||||
- `document_extractions`, `prediction_vs_outcome`, and `model_performance` also partition by `model_version`
|
||||
|
||||
## Trino Catalogs
|
||||
- `lakehouse` catalog (Hive connector) for external Hive-compatible tables
|
||||
- `iceberg` catalog (Iceberg connector) for managed Iceberg tables
|
||||
|
||||
## Views
|
||||
Example SQL views for dashboards and ad hoc analysis are in `lakehouse/views/`.
|
||||
See `lakehouse/views/README.md` for details.
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
-- Analytical fact table: company_events
|
||||
-- Corporate actions, earnings, filings, and other issuer events.
|
||||
-- Partitioned by dt (date) on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/company_events/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 2.3, 9.4, 9.5, 10.1
|
||||
-- Design ref: Section 7 (lake.company_events)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.company_events (
|
||||
event_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
event_type VARCHAR,
|
||||
event_subtype VARCHAR,
|
||||
title VARCHAR,
|
||||
description VARCHAR,
|
||||
source VARCHAR,
|
||||
source_url VARCHAR,
|
||||
event_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
ingested_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt'],
|
||||
external_location = 's3a://stonks-lakehouse/warehouse/company_events/'
|
||||
);
|
||||
@@ -1,16 +1,28 @@
|
||||
-- Analytical fact table: document_extractions
|
||||
-- Partitioned by dt and model_version on MinIO
|
||||
-- AI extraction outputs per document per company.
|
||||
-- Partitioned by dt and model_version on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/document_extractions/dt={yyyy-mm-dd}/model_version={ver}/part-*.parquet
|
||||
-- Requirements: 5.3, 5.5, 9.4, 9.5, 10.1, 10.4
|
||||
-- Design ref: Section 6.3, Section 7 (lake.document_extractions)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.document_extractions (
|
||||
document_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
company_name VARCHAR,
|
||||
relevance DOUBLE,
|
||||
sentiment VARCHAR,
|
||||
impact_score DOUBLE,
|
||||
impact_horizon VARCHAR,
|
||||
catalyst_type VARCHAR,
|
||||
confidence DOUBLE,
|
||||
novelty_score DOUBLE,
|
||||
source_credibility DOUBLE,
|
||||
key_facts VARCHAR,
|
||||
risks VARCHAR,
|
||||
macro_themes VARCHAR,
|
||||
model_name VARCHAR,
|
||||
prompt_version VARCHAR,
|
||||
schema_version VARCHAR,
|
||||
extraction_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE,
|
||||
model_version VARCHAR
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
-- Analytical fact table: documents
|
||||
-- Partitioned by dt and source_type on MinIO
|
||||
-- Path: s3://stonks-lakehouse/warehouse/documents/dt={yyyy-mm-dd}/source_type={type}/part-*.parquet
|
||||
-- Ingested document metadata for articles, filings, transcripts, and press releases.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/documents/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 3.1, 3.3, 9.4, 9.5, 10.1, 10.4
|
||||
-- Design ref: Section 6.2, Section 7 (lake.documents)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.documents (
|
||||
document_id VARCHAR,
|
||||
@@ -9,7 +12,11 @@ CREATE TABLE IF NOT EXISTS lakehouse.stonks.documents (
|
||||
ticker VARCHAR,
|
||||
publisher VARCHAR,
|
||||
title VARCHAR,
|
||||
url VARCHAR,
|
||||
canonical_url VARCHAR,
|
||||
language VARCHAR,
|
||||
published_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
retrieved_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
content_hash VARCHAR,
|
||||
confidence DOUBLE,
|
||||
dt DATE
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
-- Analytical fact table: market_bars
|
||||
-- Partitioned by dt (date) on MinIO
|
||||
-- OHLCV bar data for tracked symbols.
|
||||
-- Partitioned by dt (date) on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/market_bars/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 2.1, 9.4, 9.5, 10.1
|
||||
-- Design ref: Section 7 (lake.market_bars)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_bars (
|
||||
ticker VARCHAR,
|
||||
@@ -10,7 +13,9 @@ CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_bars (
|
||||
close_price DOUBLE,
|
||||
volume BIGINT,
|
||||
vwap DOUBLE,
|
||||
trade_count BIGINT,
|
||||
bar_timestamp TIMESTAMP(6) WITH TIME ZONE,
|
||||
bar_interval VARCHAR,
|
||||
source VARCHAR,
|
||||
dt DATE
|
||||
) WITH (
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
-- Analytical fact table: market_quotes
|
||||
-- Quote snapshots for tracked symbols.
|
||||
-- Partitioned by dt (date) on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/market_quotes/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 2.1, 9.4, 9.5, 10.1
|
||||
-- Design ref: Section 7 (lake.market_quotes)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_quotes (
|
||||
ticker VARCHAR,
|
||||
bid_price DOUBLE,
|
||||
ask_price DOUBLE,
|
||||
bid_size BIGINT,
|
||||
ask_size BIGINT,
|
||||
last_price DOUBLE,
|
||||
last_size BIGINT,
|
||||
source VARCHAR,
|
||||
quote_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt'],
|
||||
external_location = 's3a://stonks-lakehouse/warehouse/market_quotes/'
|
||||
);
|
||||
@@ -0,0 +1,33 @@
|
||||
-- Analytical fact table: model_performance
|
||||
-- Tracks extraction model performance for Trino/Superset dashboards.
|
||||
-- Partitioned by dt and model_name on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/model_performance/dt={yyyy-mm-dd}/model_name={name}/part-*.parquet
|
||||
-- Requirements: 12.1, 12.2
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.model_performance (
|
||||
document_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
model_name VARCHAR,
|
||||
prompt_version VARCHAR,
|
||||
schema_version VARCHAR,
|
||||
success BOOLEAN,
|
||||
attempt_count INTEGER,
|
||||
total_duration_ms INTEGER,
|
||||
first_attempt_duration_ms INTEGER,
|
||||
final_attempt_duration_ms INTEGER,
|
||||
confidence DOUBLE,
|
||||
validation_status VARCHAR,
|
||||
validation_error_count INTEGER,
|
||||
validation_warning_count INTEGER,
|
||||
retry_count INTEGER,
|
||||
input_token_estimate INTEGER,
|
||||
output_token_estimate INTEGER,
|
||||
company_count INTEGER,
|
||||
recorded_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE,
|
||||
model_version VARCHAR
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt', 'model_version'],
|
||||
external_location = 's3a://stonks-lakehouse/warehouse/model_performance/'
|
||||
);
|
||||
@@ -1,12 +1,19 @@
|
||||
-- Analytical fact table: pnl_daily
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Daily profit and loss records per symbol per account.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/pnl_daily/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.pnl_daily)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.pnl_daily (
|
||||
ticker VARCHAR,
|
||||
realized_pnl DOUBLE,
|
||||
unrealized_pnl DOUBLE,
|
||||
total_pnl DOUBLE,
|
||||
fees DOUBLE,
|
||||
net_pnl DOUBLE,
|
||||
broker_account VARCHAR,
|
||||
execution_mode VARCHAR,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
-- Analytical fact table: positions_daily
|
||||
-- Partitioned by dt on MinIO
|
||||
-- End-of-day position snapshots.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/positions_daily/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.positions_daily)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.positions_daily (
|
||||
ticker VARCHAR,
|
||||
quantity DOUBLE,
|
||||
avg_entry_price DOUBLE,
|
||||
close_price DOUBLE,
|
||||
market_value DOUBLE,
|
||||
unrealized_pnl DOUBLE,
|
||||
broker_account VARCHAR,
|
||||
execution_mode VARCHAR,
|
||||
snapshot_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
) WITH (
|
||||
|
||||
@@ -1,19 +1,24 @@
|
||||
-- Analytical fact table: prediction_vs_outcome
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Prediction accuracy tracking: predicted signals vs realized market moves.
|
||||
-- Partitioned by dt and model_version on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/prediction_vs_outcome/dt={yyyy-mm-dd}/model_version={ver}/part-*.parquet
|
||||
-- Requirements: 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.prediction_vs_outcome)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.prediction_vs_outcome (
|
||||
recommendation_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
predicted_action VARCHAR,
|
||||
recommendation_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
predicted_action VARCHAR,
|
||||
predicted_confidence DOUBLE,
|
||||
actual_move_pct DOUBLE,
|
||||
outcome VARCHAR,
|
||||
horizon_days INTEGER,
|
||||
predicted_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
evaluated_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
actual_move_pct DOUBLE,
|
||||
outcome VARCHAR,
|
||||
horizon_days INTEGER,
|
||||
predicted_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
evaluated_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
model_version VARCHAR,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt'],
|
||||
partitioned_by = ARRAY['dt', 'model_version'],
|
||||
external_location = 's3a://stonks-lakehouse/warehouse/prediction_vs_outcome/'
|
||||
);
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
-- Analytical fact table: trade_fills
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Fill and execution records from broker.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/trade_fills/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.trade_fills)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_fills (
|
||||
fill_id VARCHAR,
|
||||
@@ -8,6 +12,7 @@ CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_fills (
|
||||
side VARCHAR,
|
||||
fill_price DOUBLE,
|
||||
fill_quantity DOUBLE,
|
||||
commission DOUBLE,
|
||||
broker_account VARCHAR,
|
||||
filled_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
-- Analytical fact table: trade_orders
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Order submission records for paper and live trading.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/trade_orders/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 8.3, 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.trade_orders)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_orders (
|
||||
order_id VARCHAR,
|
||||
recommendation_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
side VARCHAR,
|
||||
order_type VARCHAR,
|
||||
quantity DOUBLE,
|
||||
limit_price DOUBLE,
|
||||
status VARCHAR,
|
||||
execution_mode VARCHAR,
|
||||
broker_account VARCHAR,
|
||||
submitted_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
|
||||
@@ -1,16 +1,24 @@
|
||||
-- Analytical fact table: trade_signals
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Aggregated trend signals and recommendation actions.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/trade_signals/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 6.1, 6.2, 6.4, 6.5, 7.1, 9.4, 9.5, 10.1
|
||||
-- Design ref: Section 6.4, Section 6.5, Section 7 (lake.trade_signals)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_signals (
|
||||
signal_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
trend_direction VARCHAR,
|
||||
trend_strength DOUBLE,
|
||||
confidence DOUBLE,
|
||||
action VARCHAR,
|
||||
time_horizon VARCHAR,
|
||||
generated_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
signal_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
trend_direction VARCHAR,
|
||||
trend_strength DOUBLE,
|
||||
confidence DOUBLE,
|
||||
contradiction_score DOUBLE,
|
||||
dominant_catalysts VARCHAR,
|
||||
material_risks VARCHAR,
|
||||
action VARCHAR,
|
||||
time_horizon VARCHAR,
|
||||
recommendation_id VARCHAR,
|
||||
generated_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt'],
|
||||
|
||||
Reference in New Issue
Block a user