ebea70573b
- Repository structure for all services, infra, lakehouse, dashboards - K8s manifests targeting stonks-oracle namespace with GHCR images - Ingress via Traefik with ca-issuer TLS for internal services - ConfigMap wired to existing cluster services (pg, redis, minio, ollama) - GitHub Actions workflow for lint, test, multi-service container builds - Dockerfile with build-arg CMD per service - Makefile for local build/push/deploy - Steering rules for TDD workflow, K8s conventions, project context - Agent hooks for lint-on-save, test-on-save, k8s-validate, phase-commit - Ruff linter config, all lint issues fixed - 14 passing tests for schemas, config, redis keys - PostgreSQL migrations, Trino catalogs, Superset config, MinIO lifecycle
115 lines
4.4 KiB
SQL
115 lines
4.4 KiB
SQL
-- Stonks Oracle - Documents and Intelligence Schema
|
|
|
|
-- ============================================================
|
|
-- Market Snapshots
|
|
-- ============================================================
|
|
|
|
CREATE TABLE market_snapshots (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
company_id UUID NOT NULL REFERENCES companies(id),
|
|
ticker VARCHAR(20) NOT NULL,
|
|
snapshot_type VARCHAR(50) NOT NULL,
|
|
data JSONB NOT NULL,
|
|
source_provider VARCHAR(100),
|
|
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
storage_ref VARCHAR(1000),
|
|
content_hash VARCHAR(128)
|
|
);
|
|
CREATE INDEX idx_market_snapshots_ticker ON market_snapshots(ticker, captured_at DESC);
|
|
CREATE INDEX idx_market_snapshots_hash ON market_snapshots(content_hash);
|
|
|
|
-- ============================================================
|
|
-- Documents
|
|
-- ============================================================
|
|
|
|
CREATE TABLE documents (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
document_type VARCHAR(50) NOT NULL,
|
|
source_type VARCHAR(50) NOT NULL,
|
|
publisher VARCHAR(500),
|
|
url TEXT,
|
|
canonical_url TEXT,
|
|
title TEXT,
|
|
published_at TIMESTAMPTZ,
|
|
retrieved_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
language VARCHAR(10) DEFAULT 'en',
|
|
content_hash VARCHAR(128) NOT NULL,
|
|
raw_storage_ref VARCHAR(1000),
|
|
normalized_storage_ref VARCHAR(1000),
|
|
parse_quality_score FLOAT,
|
|
parse_confidence VARCHAR(20) DEFAULT 'unknown',
|
|
status VARCHAR(50) NOT NULL DEFAULT 'ingested',
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
CREATE UNIQUE INDEX idx_documents_hash ON documents(content_hash);
|
|
CREATE INDEX idx_documents_status ON documents(status);
|
|
CREATE INDEX idx_documents_published ON documents(published_at DESC);
|
|
|
|
CREATE TABLE document_versions (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
version INTEGER NOT NULL DEFAULT 1,
|
|
content_hash VARCHAR(128) NOT NULL,
|
|
storage_ref VARCHAR(1000),
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
CREATE TABLE document_company_mentions (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
company_id UUID NOT NULL REFERENCES companies(id),
|
|
ticker VARCHAR(20) NOT NULL,
|
|
mention_type VARCHAR(50) DEFAULT 'direct',
|
|
confidence FLOAT DEFAULT 0.5,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
CREATE INDEX idx_doc_mentions_doc ON document_company_mentions(document_id);
|
|
CREATE INDEX idx_doc_mentions_company ON document_company_mentions(company_id);
|
|
|
|
-- ============================================================
|
|
-- Document Intelligence (AI Extraction)
|
|
-- ============================================================
|
|
|
|
CREATE TABLE document_intelligence (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
summary TEXT,
|
|
macro_themes JSONB DEFAULT '[]',
|
|
novelty_score FLOAT,
|
|
source_credibility FLOAT,
|
|
extraction_warnings JSONB DEFAULT '[]',
|
|
confidence FLOAT,
|
|
model_provider VARCHAR(50),
|
|
model_name VARCHAR(200),
|
|
prompt_version VARCHAR(100),
|
|
schema_version VARCHAR(50),
|
|
raw_output_ref VARCHAR(1000),
|
|
prompt_ref VARCHAR(1000),
|
|
validation_status VARCHAR(50) DEFAULT 'pending',
|
|
validation_errors JSONB DEFAULT '[]',
|
|
retry_count INTEGER DEFAULT 0,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
CREATE INDEX idx_doc_intel_document ON document_intelligence(document_id);
|
|
CREATE INDEX idx_doc_intel_validation ON document_intelligence(validation_status);
|
|
|
|
CREATE TABLE document_impact_records (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
intelligence_id UUID NOT NULL REFERENCES document_intelligence(id) ON DELETE CASCADE,
|
|
company_id UUID NOT NULL REFERENCES companies(id),
|
|
ticker VARCHAR(20) NOT NULL,
|
|
relevance FLOAT,
|
|
sentiment VARCHAR(20),
|
|
impact_score FLOAT,
|
|
impact_horizon VARCHAR(50),
|
|
catalyst_type VARCHAR(50),
|
|
key_facts JSONB DEFAULT '[]',
|
|
risks JSONB DEFAULT '[]',
|
|
evidence_spans JSONB DEFAULT '[]',
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
CREATE INDEX idx_impact_intel ON document_impact_records(intelligence_id);
|
|
CREATE INDEX idx_impact_company ON document_impact_records(company_id);
|