Files
stonks-oracle/lakehouse/schemas/document_extractions.sql
T

34 lines
1.2 KiB
SQL

-- Analytical fact table: document_extractions
-- AI extraction outputs per document per company.
-- Partitioned by dt and model_version on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/document_extractions/dt={yyyy-mm-dd}/model_version={ver}/part-*.parquet
-- Requirements: 5.3, 5.5, 9.4, 9.5, 10.1, 10.4
-- Design ref: Section 6.3, Section 7 (lake.document_extractions)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.document_extractions (
document_id VARCHAR,
ticker VARCHAR,
company_name VARCHAR,
relevance DOUBLE,
sentiment VARCHAR,
impact_score DOUBLE,
impact_horizon VARCHAR,
catalyst_type VARCHAR,
confidence DOUBLE,
novelty_score DOUBLE,
source_credibility DOUBLE,
key_facts VARCHAR,
risks VARCHAR,
macro_themes VARCHAR,
model_name VARCHAR,
prompt_version VARCHAR,
schema_version VARCHAR,
extraction_at TIMESTAMP(6) WITH TIME ZONE,
dt DATE,
model_version VARCHAR
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt', 'model_version'],
external_location = 's3a://stonks-lakehouse/warehouse/document_extractions/'
);