28 lines
948 B
SQL
28 lines
948 B
SQL
-- Analytical fact table: documents
|
|
-- Ingested document metadata for articles, filings, transcripts, and press releases.
|
|
-- Partitioned by dt on MinIO.
|
|
-- Path: s3://stonks-lakehouse/warehouse/documents/dt={yyyy-mm-dd}/part-*.parquet
|
|
-- Requirements: 3.1, 3.3, 9.4, 9.5, 10.1, 10.4
|
|
-- Design ref: Section 6.2, Section 7 (lake.documents)
|
|
|
|
CREATE TABLE IF NOT EXISTS lakehouse.stonks.documents (
|
|
document_id VARCHAR,
|
|
document_type VARCHAR,
|
|
source_type VARCHAR,
|
|
ticker VARCHAR,
|
|
publisher VARCHAR,
|
|
title VARCHAR,
|
|
url VARCHAR,
|
|
canonical_url VARCHAR,
|
|
language VARCHAR,
|
|
published_at TIMESTAMP(6) WITH TIME ZONE,
|
|
retrieved_at TIMESTAMP(6) WITH TIME ZONE,
|
|
content_hash VARCHAR,
|
|
confidence DOUBLE,
|
|
dt DATE
|
|
) WITH (
|
|
format = 'PARQUET',
|
|
partitioned_by = ARRAY['dt'],
|
|
external_location = 's3a://stonks-lakehouse/warehouse/documents/'
|
|
);
|