34 lines
1.2 KiB
SQL
34 lines
1.2 KiB
SQL
-- Analytical fact table: document_extractions
|
|
-- AI extraction outputs per document per company.
|
|
-- Partitioned by dt and model_version on MinIO.
|
|
-- Path: s3://stonks-lakehouse/warehouse/document_extractions/dt={yyyy-mm-dd}/model_version={ver}/part-*.parquet
|
|
-- Requirements: 5.3, 5.5, 9.4, 9.5, 10.1, 10.4
|
|
-- Design ref: Section 6.3, Section 7 (lake.document_extractions)
|
|
|
|
CREATE TABLE IF NOT EXISTS lakehouse.stonks.document_extractions (
|
|
document_id VARCHAR,
|
|
ticker VARCHAR,
|
|
company_name VARCHAR,
|
|
relevance DOUBLE,
|
|
sentiment VARCHAR,
|
|
impact_score DOUBLE,
|
|
impact_horizon VARCHAR,
|
|
catalyst_type VARCHAR,
|
|
confidence DOUBLE,
|
|
novelty_score DOUBLE,
|
|
source_credibility DOUBLE,
|
|
key_facts VARCHAR,
|
|
risks VARCHAR,
|
|
macro_themes VARCHAR,
|
|
model_name VARCHAR,
|
|
prompt_version VARCHAR,
|
|
schema_version VARCHAR,
|
|
extraction_at TIMESTAMP(6) WITH TIME ZONE,
|
|
dt DATE,
|
|
model_version VARCHAR
|
|
) WITH (
|
|
format = 'PARQUET',
|
|
partitioned_by = ARRAY['dt', 'model_version'],
|
|
external_location = 's3a://stonks-lakehouse/warehouse/document_extractions/'
|
|
);
|