Files

28 lines
948 B
SQL

-- Analytical fact table: documents
-- Ingested document metadata for articles, filings, transcripts, and press releases.
-- Partitioned by dt on MinIO.
-- Path: s3://stonks-lakehouse/warehouse/documents/dt={yyyy-mm-dd}/part-*.parquet
-- Requirements: 3.1, 3.3, 9.4, 9.5, 10.1, 10.4
-- Design ref: Section 6.2, Section 7 (lake.documents)
CREATE TABLE IF NOT EXISTS lakehouse.stonks.documents (
document_id VARCHAR,
document_type VARCHAR,
source_type VARCHAR,
ticker VARCHAR,
publisher VARCHAR,
title VARCHAR,
url VARCHAR,
canonical_url VARCHAR,
language VARCHAR,
published_at TIMESTAMP(6) WITH TIME ZONE,
retrieved_at TIMESTAMP(6) WITH TIME ZONE,
content_hash VARCHAR,
confidence DOUBLE,
dt DATE
) WITH (
format = 'PARQUET',
partitioned_by = ARRAY['dt'],
external_location = 's3a://stonks-lakehouse/warehouse/documents/'
);