phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -2,15 +2,31 @@
|
||||
|
||||
Analytical fact table definitions for MinIO-backed datasets queried via Trino.
|
||||
|
||||
All tables use Hive-compatible partition layouts on MinIO (`s3a://stonks-lakehouse/warehouse/`)
|
||||
and are defined in the `lakehouse.stonks` schema. Parquet is the storage format.
|
||||
|
||||
## Fact Tables
|
||||
- `lake.market_bars` — OHLCV bar data
|
||||
- `lake.market_quotes` — quote snapshots
|
||||
- `lake.company_events` — corporate actions and events
|
||||
- `lake.documents` — ingested document metadata
|
||||
- `lake.document_extractions` — AI extraction outputs
|
||||
- `lake.trade_signals` — aggregated trend signals
|
||||
- `lake.trade_orders` — order submission records
|
||||
- `lake.trade_fills` — fill and execution records
|
||||
- `lake.market_bars` — OHLCV bar data per symbol per interval
|
||||
- `lake.market_quotes` — bid/ask quote snapshots
|
||||
- `lake.company_events` — corporate actions, earnings, filings, and issuer events
|
||||
- `lake.documents` — ingested document metadata (articles, filings, transcripts)
|
||||
- `lake.document_extractions` — AI extraction outputs per document per company
|
||||
- `lake.trade_signals` — aggregated trend signals and recommendation actions
|
||||
- `lake.trade_orders` — order submission records (paper and live)
|
||||
- `lake.trade_fills` — fill and execution records from broker
|
||||
- `lake.positions_daily` — end-of-day position snapshots
|
||||
- `lake.pnl_daily` — daily PnL records
|
||||
- `lake.pnl_daily` — daily PnL records per symbol per account
|
||||
- `lake.prediction_vs_outcome` — prediction accuracy tracking
|
||||
- `lake.model_performance` — extraction model performance metrics
|
||||
|
||||
## Partitioning
|
||||
- Most tables partition by `dt` (date)
|
||||
- `document_extractions`, `prediction_vs_outcome`, and `model_performance` also partition by `model_version`
|
||||
|
||||
## Trino Catalogs
|
||||
- `lakehouse` catalog (Hive connector) for external Hive-compatible tables
|
||||
- `iceberg` catalog (Iceberg connector) for managed Iceberg tables
|
||||
|
||||
## Views
|
||||
Example SQL views for dashboards and ad hoc analysis are in `lakehouse/views/`.
|
||||
See `lakehouse/views/README.md` for details.
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
-- Analytical fact table: company_events
|
||||
-- Corporate actions, earnings, filings, and other issuer events.
|
||||
-- Partitioned by dt (date) on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/company_events/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 2.3, 9.4, 9.5, 10.1
|
||||
-- Design ref: Section 7 (lake.company_events)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.company_events (
|
||||
event_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
event_type VARCHAR,
|
||||
event_subtype VARCHAR,
|
||||
title VARCHAR,
|
||||
description VARCHAR,
|
||||
source VARCHAR,
|
||||
source_url VARCHAR,
|
||||
event_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
ingested_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt'],
|
||||
external_location = 's3a://stonks-lakehouse/warehouse/company_events/'
|
||||
);
|
||||
@@ -1,16 +1,28 @@
|
||||
-- Analytical fact table: document_extractions
|
||||
-- Partitioned by dt and model_version on MinIO
|
||||
-- AI extraction outputs per document per company.
|
||||
-- Partitioned by dt and model_version on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/document_extractions/dt={yyyy-mm-dd}/model_version={ver}/part-*.parquet
|
||||
-- Requirements: 5.3, 5.5, 9.4, 9.5, 10.1, 10.4
|
||||
-- Design ref: Section 6.3, Section 7 (lake.document_extractions)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.document_extractions (
|
||||
document_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
company_name VARCHAR,
|
||||
relevance DOUBLE,
|
||||
sentiment VARCHAR,
|
||||
impact_score DOUBLE,
|
||||
impact_horizon VARCHAR,
|
||||
catalyst_type VARCHAR,
|
||||
confidence DOUBLE,
|
||||
novelty_score DOUBLE,
|
||||
source_credibility DOUBLE,
|
||||
key_facts VARCHAR,
|
||||
risks VARCHAR,
|
||||
macro_themes VARCHAR,
|
||||
model_name VARCHAR,
|
||||
prompt_version VARCHAR,
|
||||
schema_version VARCHAR,
|
||||
extraction_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE,
|
||||
model_version VARCHAR
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
-- Analytical fact table: documents
|
||||
-- Partitioned by dt and source_type on MinIO
|
||||
-- Path: s3://stonks-lakehouse/warehouse/documents/dt={yyyy-mm-dd}/source_type={type}/part-*.parquet
|
||||
-- Ingested document metadata for articles, filings, transcripts, and press releases.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/documents/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 3.1, 3.3, 9.4, 9.5, 10.1, 10.4
|
||||
-- Design ref: Section 6.2, Section 7 (lake.documents)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.documents (
|
||||
document_id VARCHAR,
|
||||
@@ -9,7 +12,11 @@ CREATE TABLE IF NOT EXISTS lakehouse.stonks.documents (
|
||||
ticker VARCHAR,
|
||||
publisher VARCHAR,
|
||||
title VARCHAR,
|
||||
url VARCHAR,
|
||||
canonical_url VARCHAR,
|
||||
language VARCHAR,
|
||||
published_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
retrieved_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
content_hash VARCHAR,
|
||||
confidence DOUBLE,
|
||||
dt DATE
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
-- Analytical fact table: market_bars
|
||||
-- Partitioned by dt (date) on MinIO
|
||||
-- OHLCV bar data for tracked symbols.
|
||||
-- Partitioned by dt (date) on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/market_bars/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 2.1, 9.4, 9.5, 10.1
|
||||
-- Design ref: Section 7 (lake.market_bars)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_bars (
|
||||
ticker VARCHAR,
|
||||
@@ -10,7 +13,9 @@ CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_bars (
|
||||
close_price DOUBLE,
|
||||
volume BIGINT,
|
||||
vwap DOUBLE,
|
||||
trade_count BIGINT,
|
||||
bar_timestamp TIMESTAMP(6) WITH TIME ZONE,
|
||||
bar_interval VARCHAR,
|
||||
source VARCHAR,
|
||||
dt DATE
|
||||
) WITH (
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
-- Analytical fact table: market_quotes
|
||||
-- Quote snapshots for tracked symbols.
|
||||
-- Partitioned by dt (date) on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/market_quotes/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 2.1, 9.4, 9.5, 10.1
|
||||
-- Design ref: Section 7 (lake.market_quotes)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.market_quotes (
|
||||
ticker VARCHAR,
|
||||
bid_price DOUBLE,
|
||||
ask_price DOUBLE,
|
||||
bid_size BIGINT,
|
||||
ask_size BIGINT,
|
||||
last_price DOUBLE,
|
||||
last_size BIGINT,
|
||||
source VARCHAR,
|
||||
quote_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt'],
|
||||
external_location = 's3a://stonks-lakehouse/warehouse/market_quotes/'
|
||||
);
|
||||
@@ -0,0 +1,33 @@
|
||||
-- Analytical fact table: model_performance
|
||||
-- Tracks extraction model performance for Trino/Superset dashboards.
|
||||
-- Partitioned by dt and model_name on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/model_performance/dt={yyyy-mm-dd}/model_name={name}/part-*.parquet
|
||||
-- Requirements: 12.1, 12.2
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.model_performance (
|
||||
document_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
model_name VARCHAR,
|
||||
prompt_version VARCHAR,
|
||||
schema_version VARCHAR,
|
||||
success BOOLEAN,
|
||||
attempt_count INTEGER,
|
||||
total_duration_ms INTEGER,
|
||||
first_attempt_duration_ms INTEGER,
|
||||
final_attempt_duration_ms INTEGER,
|
||||
confidence DOUBLE,
|
||||
validation_status VARCHAR,
|
||||
validation_error_count INTEGER,
|
||||
validation_warning_count INTEGER,
|
||||
retry_count INTEGER,
|
||||
input_token_estimate INTEGER,
|
||||
output_token_estimate INTEGER,
|
||||
company_count INTEGER,
|
||||
recorded_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE,
|
||||
model_version VARCHAR
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt', 'model_version'],
|
||||
external_location = 's3a://stonks-lakehouse/warehouse/model_performance/'
|
||||
);
|
||||
@@ -1,12 +1,19 @@
|
||||
-- Analytical fact table: pnl_daily
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Daily profit and loss records per symbol per account.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/pnl_daily/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.pnl_daily)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.pnl_daily (
|
||||
ticker VARCHAR,
|
||||
realized_pnl DOUBLE,
|
||||
unrealized_pnl DOUBLE,
|
||||
total_pnl DOUBLE,
|
||||
fees DOUBLE,
|
||||
net_pnl DOUBLE,
|
||||
broker_account VARCHAR,
|
||||
execution_mode VARCHAR,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
-- Analytical fact table: positions_daily
|
||||
-- Partitioned by dt on MinIO
|
||||
-- End-of-day position snapshots.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/positions_daily/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.positions_daily)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.positions_daily (
|
||||
ticker VARCHAR,
|
||||
quantity DOUBLE,
|
||||
avg_entry_price DOUBLE,
|
||||
close_price DOUBLE,
|
||||
market_value DOUBLE,
|
||||
unrealized_pnl DOUBLE,
|
||||
broker_account VARCHAR,
|
||||
execution_mode VARCHAR,
|
||||
snapshot_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
) WITH (
|
||||
|
||||
@@ -1,19 +1,24 @@
|
||||
-- Analytical fact table: prediction_vs_outcome
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Prediction accuracy tracking: predicted signals vs realized market moves.
|
||||
-- Partitioned by dt and model_version on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/prediction_vs_outcome/dt={yyyy-mm-dd}/model_version={ver}/part-*.parquet
|
||||
-- Requirements: 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.prediction_vs_outcome)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.prediction_vs_outcome (
|
||||
recommendation_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
predicted_action VARCHAR,
|
||||
recommendation_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
predicted_action VARCHAR,
|
||||
predicted_confidence DOUBLE,
|
||||
actual_move_pct DOUBLE,
|
||||
outcome VARCHAR,
|
||||
horizon_days INTEGER,
|
||||
predicted_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
evaluated_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
actual_move_pct DOUBLE,
|
||||
outcome VARCHAR,
|
||||
horizon_days INTEGER,
|
||||
predicted_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
evaluated_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
model_version VARCHAR,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt'],
|
||||
partitioned_by = ARRAY['dt', 'model_version'],
|
||||
external_location = 's3a://stonks-lakehouse/warehouse/prediction_vs_outcome/'
|
||||
);
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
-- Analytical fact table: trade_fills
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Fill and execution records from broker.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/trade_fills/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.trade_fills)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_fills (
|
||||
fill_id VARCHAR,
|
||||
@@ -8,6 +12,7 @@ CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_fills (
|
||||
side VARCHAR,
|
||||
fill_price DOUBLE,
|
||||
fill_quantity DOUBLE,
|
||||
commission DOUBLE,
|
||||
broker_account VARCHAR,
|
||||
filled_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
-- Analytical fact table: trade_orders
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Order submission records for paper and live trading.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/trade_orders/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 8.3, 9.4, 9.5, 10.1, 10.3
|
||||
-- Design ref: Section 7 (lake.trade_orders)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_orders (
|
||||
order_id VARCHAR,
|
||||
recommendation_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
side VARCHAR,
|
||||
order_type VARCHAR,
|
||||
quantity DOUBLE,
|
||||
limit_price DOUBLE,
|
||||
status VARCHAR,
|
||||
execution_mode VARCHAR,
|
||||
broker_account VARCHAR,
|
||||
submitted_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
|
||||
@@ -1,16 +1,24 @@
|
||||
-- Analytical fact table: trade_signals
|
||||
-- Partitioned by dt on MinIO
|
||||
-- Aggregated trend signals and recommendation actions.
|
||||
-- Partitioned by dt on MinIO.
|
||||
-- Path: s3://stonks-lakehouse/warehouse/trade_signals/dt={yyyy-mm-dd}/part-*.parquet
|
||||
-- Requirements: 6.1, 6.2, 6.4, 6.5, 7.1, 9.4, 9.5, 10.1
|
||||
-- Design ref: Section 6.4, Section 6.5, Section 7 (lake.trade_signals)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lakehouse.stonks.trade_signals (
|
||||
signal_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
trend_direction VARCHAR,
|
||||
trend_strength DOUBLE,
|
||||
confidence DOUBLE,
|
||||
action VARCHAR,
|
||||
time_horizon VARCHAR,
|
||||
generated_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
signal_id VARCHAR,
|
||||
ticker VARCHAR,
|
||||
trend_direction VARCHAR,
|
||||
trend_strength DOUBLE,
|
||||
confidence DOUBLE,
|
||||
contradiction_score DOUBLE,
|
||||
dominant_catalysts VARCHAR,
|
||||
material_risks VARCHAR,
|
||||
action VARCHAR,
|
||||
time_horizon VARCHAR,
|
||||
recommendation_id VARCHAR,
|
||||
generated_at TIMESTAMP(6) WITH TIME ZONE,
|
||||
dt DATE
|
||||
) WITH (
|
||||
format = 'PARQUET',
|
||||
partitioned_by = ARRAY['dt'],
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
# Lakehouse Views
|
||||
|
||||
Example SQL views for Trino over MinIO-backed analytical fact tables.
|
||||
|
||||
These views are designed to be created in the `lakehouse.stonks` schema and
|
||||
can be used directly in Superset dashboards or ad hoc Trino queries.
|
||||
|
||||
## Views
|
||||
|
||||
- `prediction_accuracy` — Joins predicted signals with realized market moves to score prediction quality
|
||||
- `paper_trade_scorecard` — Aggregates paper trading performance by symbol with win rates and PnL
|
||||
- `paper_trade_detail` — Per-order paper trade detail with fill prices and realized outcomes
|
||||
- `signal_hit_rate` — Daily signal accuracy summary across all symbols
|
||||
|
||||
## Usage
|
||||
|
||||
Connect to Trino and run each `.sql` file to create the view:
|
||||
|
||||
```bash
|
||||
trino --catalog lakehouse --schema stonks < lakehouse/views/prediction_accuracy.sql
|
||||
```
|
||||
|
||||
Or paste into the Superset SQL Lab to explore interactively.
|
||||
@@ -0,0 +1,47 @@
|
||||
-- View: paper_trade_detail
|
||||
-- Per-order paper trade detail joining orders, fills, and the originating
|
||||
-- recommendation's prediction outcome. Useful for drill-down from the scorecard.
|
||||
-- Requirements: 10.1, 10.3, 10.4
|
||||
-- Design ref: Section 9.2 (evidence-to-outcome drill-down)
|
||||
|
||||
CREATE OR REPLACE VIEW lakehouse.stonks.paper_trade_detail AS
|
||||
SELECT
|
||||
o.order_id,
|
||||
o.recommendation_id,
|
||||
o.ticker,
|
||||
o.side,
|
||||
o.order_type,
|
||||
o.quantity,
|
||||
o.limit_price,
|
||||
o.status AS order_status,
|
||||
o.submitted_at,
|
||||
f.fill_id,
|
||||
f.fill_price,
|
||||
f.fill_quantity,
|
||||
f.commission,
|
||||
f.filled_at,
|
||||
-- Slippage: difference between limit and fill price (buys positive = worse)
|
||||
CASE
|
||||
WHEN o.limit_price IS NOT NULL AND o.limit_price > 0 THEN
|
||||
(f.fill_price - o.limit_price) / o.limit_price * 100
|
||||
ELSE NULL
|
||||
END AS slippage_pct,
|
||||
-- Link back to prediction outcome
|
||||
pvo.predicted_action,
|
||||
pvo.predicted_confidence,
|
||||
pvo.actual_move_pct,
|
||||
pvo.outcome AS prediction_outcome,
|
||||
o.broker_account,
|
||||
o.dt
|
||||
FROM
|
||||
lakehouse.stonks.trade_orders o
|
||||
LEFT JOIN
|
||||
lakehouse.stonks.trade_fills f
|
||||
ON o.order_id = f.order_id
|
||||
AND o.dt = f.dt
|
||||
LEFT JOIN
|
||||
lakehouse.stonks.prediction_vs_outcome pvo
|
||||
ON o.recommendation_id = pvo.recommendation_id
|
||||
AND o.dt = pvo.dt
|
||||
WHERE
|
||||
o.execution_mode = 'paper';
|
||||
@@ -0,0 +1,42 @@
|
||||
-- View: paper_trade_scorecard
|
||||
-- Aggregates paper trading performance per symbol with win rates, PnL, and
|
||||
-- average fill quality. Filters to paper execution mode only.
|
||||
-- Requirements: 10.1, 10.2, 10.3
|
||||
-- Design ref: Section 9.2 (paper trading PnL scorecard)
|
||||
|
||||
CREATE OR REPLACE VIEW lakehouse.stonks.paper_trade_scorecard AS
|
||||
SELECT
|
||||
pnl.ticker,
|
||||
pnl.broker_account,
|
||||
COUNT(DISTINCT pnl.dt) AS trading_days,
|
||||
SUM(pnl.realized_pnl) AS total_realized_pnl,
|
||||
SUM(pnl.unrealized_pnl) AS total_unrealized_pnl,
|
||||
SUM(pnl.net_pnl) AS total_net_pnl,
|
||||
SUM(pnl.fees) AS total_fees,
|
||||
AVG(pnl.net_pnl) AS avg_daily_pnl,
|
||||
-- Win rate: fraction of days with positive net PnL
|
||||
CAST(
|
||||
COUNT(CASE WHEN pnl.net_pnl > 0 THEN 1 END) AS DOUBLE
|
||||
) / NULLIF(COUNT(*), 0) AS win_rate,
|
||||
-- Worst and best single-day PnL
|
||||
MIN(pnl.net_pnl) AS worst_day_pnl,
|
||||
MAX(pnl.net_pnl) AS best_day_pnl,
|
||||
-- Order counts from trade_orders
|
||||
COUNT(DISTINCT o.order_id) AS total_orders,
|
||||
COUNT(DISTINCT CASE WHEN o.status = 'filled' THEN o.order_id END)
|
||||
AS filled_orders,
|
||||
MIN(pnl.dt) AS first_trade_date,
|
||||
MAX(pnl.dt) AS last_trade_date
|
||||
FROM
|
||||
lakehouse.stonks.pnl_daily pnl
|
||||
LEFT JOIN
|
||||
lakehouse.stonks.trade_orders o
|
||||
ON pnl.ticker = o.ticker
|
||||
AND pnl.broker_account = o.broker_account
|
||||
AND pnl.dt = o.dt
|
||||
AND o.execution_mode = 'paper'
|
||||
WHERE
|
||||
pnl.execution_mode = 'paper'
|
||||
GROUP BY
|
||||
pnl.ticker,
|
||||
pnl.broker_account;
|
||||
@@ -0,0 +1,44 @@
|
||||
-- View: prediction_accuracy
|
||||
-- Joins prediction_vs_outcome with trade_signals and market_bars to provide
|
||||
-- a comprehensive prediction accuracy scorecard.
|
||||
-- Requirements: 10.1, 10.2, 10.3, 10.4
|
||||
-- Design ref: Section 9.2 (prediction confidence vs realized move)
|
||||
|
||||
CREATE OR REPLACE VIEW lakehouse.stonks.prediction_accuracy AS
|
||||
SELECT
|
||||
pvo.recommendation_id,
|
||||
pvo.ticker,
|
||||
pvo.predicted_action,
|
||||
pvo.predicted_confidence,
|
||||
pvo.actual_move_pct,
|
||||
pvo.outcome,
|
||||
pvo.horizon_days,
|
||||
pvo.predicted_at,
|
||||
pvo.evaluated_at,
|
||||
pvo.model_version,
|
||||
ts.trend_direction,
|
||||
ts.trend_strength,
|
||||
ts.contradiction_score,
|
||||
ts.dominant_catalysts,
|
||||
-- Confidence bucket for dashboard grouping
|
||||
CASE
|
||||
WHEN pvo.predicted_confidence >= 0.8 THEN 'high'
|
||||
WHEN pvo.predicted_confidence >= 0.5 THEN 'medium'
|
||||
ELSE 'low'
|
||||
END AS confidence_bucket,
|
||||
-- Direction correctness: did the predicted action match the actual move?
|
||||
CASE
|
||||
WHEN pvo.predicted_action = 'buy' AND pvo.actual_move_pct > 0 THEN true
|
||||
WHEN pvo.predicted_action = 'sell' AND pvo.actual_move_pct < 0 THEN true
|
||||
WHEN pvo.predicted_action IN ('hold', 'watch') THEN NULL
|
||||
ELSE false
|
||||
END AS direction_correct,
|
||||
-- Magnitude of prediction error
|
||||
ABS(pvo.actual_move_pct) AS abs_move_pct,
|
||||
pvo.dt
|
||||
FROM
|
||||
lakehouse.stonks.prediction_vs_outcome pvo
|
||||
LEFT JOIN
|
||||
lakehouse.stonks.trade_signals ts
|
||||
ON pvo.recommendation_id = ts.recommendation_id
|
||||
AND pvo.dt = ts.dt;
|
||||
@@ -0,0 +1,31 @@
|
||||
-- View: signal_hit_rate
|
||||
-- Daily summary of signal accuracy across all symbols and model versions.
|
||||
-- Designed for the Superset prediction accuracy dashboard.
|
||||
-- Requirements: 10.1, 10.2, 10.3
|
||||
-- Design ref: Section 9.2 (prediction confidence vs realized move)
|
||||
|
||||
CREATE OR REPLACE VIEW lakehouse.stonks.signal_hit_rate AS
|
||||
SELECT
|
||||
pvo.dt,
|
||||
pvo.model_version,
|
||||
COUNT(*) AS total_predictions,
|
||||
COUNT(CASE WHEN pvo.outcome = 'correct' THEN 1 END) AS correct_predictions,
|
||||
COUNT(CASE WHEN pvo.outcome = 'incorrect' THEN 1 END) AS incorrect_predictions,
|
||||
COUNT(CASE WHEN pvo.outcome = 'neutral' THEN 1 END) AS neutral_predictions,
|
||||
-- Hit rate
|
||||
CAST(
|
||||
COUNT(CASE WHEN pvo.outcome = 'correct' THEN 1 END) AS DOUBLE
|
||||
) / NULLIF(COUNT(*), 0) AS hit_rate,
|
||||
-- Average confidence of correct vs incorrect
|
||||
AVG(CASE WHEN pvo.outcome = 'correct' THEN pvo.predicted_confidence END)
|
||||
AS avg_confidence_correct,
|
||||
AVG(CASE WHEN pvo.outcome = 'incorrect' THEN pvo.predicted_confidence END)
|
||||
AS avg_confidence_incorrect,
|
||||
-- Average realized move magnitude
|
||||
AVG(ABS(pvo.actual_move_pct)) AS avg_abs_move_pct,
|
||||
AVG(pvo.actual_move_pct) AS avg_move_pct
|
||||
FROM
|
||||
lakehouse.stonks.prediction_vs_outcome pvo
|
||||
GROUP BY
|
||||
pvo.dt,
|
||||
pvo.model_version;
|
||||
Reference in New Issue
Block a user