Files
stonks-oracle/infra/migrations/002_documents_and_intelligence.sql
Celes Renata ebea70573b phase 0+1: project scaffold, k8s manifests, CI pipeline, steering, hooks, tests
- Repository structure for all services, infra, lakehouse, dashboards
- K8s manifests targeting stonks-oracle namespace with GHCR images
- Ingress via Traefik with ca-issuer TLS for internal services
- ConfigMap wired to existing cluster services (pg, redis, minio, ollama)
- GitHub Actions workflow for lint, test, multi-service container builds
- Dockerfile with build-arg CMD per service
- Makefile for local build/push/deploy
- Steering rules for TDD workflow, K8s conventions, project context
- Agent hooks for lint-on-save, test-on-save, k8s-validate, phase-commit
- Ruff linter config, all lint issues fixed
- 14 passing tests for schemas, config, redis keys
- PostgreSQL migrations, Trino catalogs, Superset config, MinIO lifecycle
2026-04-11 03:25:08 -07:00

115 lines
4.4 KiB
SQL

-- Stonks Oracle - Documents and Intelligence Schema
-- ============================================================
-- Market Snapshots
-- ============================================================
CREATE TABLE market_snapshots (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
company_id UUID NOT NULL REFERENCES companies(id),
ticker VARCHAR(20) NOT NULL,
snapshot_type VARCHAR(50) NOT NULL,
data JSONB NOT NULL,
source_provider VARCHAR(100),
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
storage_ref VARCHAR(1000),
content_hash VARCHAR(128)
);
CREATE INDEX idx_market_snapshots_ticker ON market_snapshots(ticker, captured_at DESC);
CREATE INDEX idx_market_snapshots_hash ON market_snapshots(content_hash);
-- ============================================================
-- Documents
-- ============================================================
CREATE TABLE documents (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
document_type VARCHAR(50) NOT NULL,
source_type VARCHAR(50) NOT NULL,
publisher VARCHAR(500),
url TEXT,
canonical_url TEXT,
title TEXT,
published_at TIMESTAMPTZ,
retrieved_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
language VARCHAR(10) DEFAULT 'en',
content_hash VARCHAR(128) NOT NULL,
raw_storage_ref VARCHAR(1000),
normalized_storage_ref VARCHAR(1000),
parse_quality_score FLOAT,
parse_confidence VARCHAR(20) DEFAULT 'unknown',
status VARCHAR(50) NOT NULL DEFAULT 'ingested',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE UNIQUE INDEX idx_documents_hash ON documents(content_hash);
CREATE INDEX idx_documents_status ON documents(status);
CREATE INDEX idx_documents_published ON documents(published_at DESC);
CREATE TABLE document_versions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
version INTEGER NOT NULL DEFAULT 1,
content_hash VARCHAR(128) NOT NULL,
storage_ref VARCHAR(1000),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE document_company_mentions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
company_id UUID NOT NULL REFERENCES companies(id),
ticker VARCHAR(20) NOT NULL,
mention_type VARCHAR(50) DEFAULT 'direct',
confidence FLOAT DEFAULT 0.5,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_doc_mentions_doc ON document_company_mentions(document_id);
CREATE INDEX idx_doc_mentions_company ON document_company_mentions(company_id);
-- ============================================================
-- Document Intelligence (AI Extraction)
-- ============================================================
CREATE TABLE document_intelligence (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
document_id UUID NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
summary TEXT,
macro_themes JSONB DEFAULT '[]',
novelty_score FLOAT,
source_credibility FLOAT,
extraction_warnings JSONB DEFAULT '[]',
confidence FLOAT,
model_provider VARCHAR(50),
model_name VARCHAR(200),
prompt_version VARCHAR(100),
schema_version VARCHAR(50),
raw_output_ref VARCHAR(1000),
prompt_ref VARCHAR(1000),
validation_status VARCHAR(50) DEFAULT 'pending',
validation_errors JSONB DEFAULT '[]',
retry_count INTEGER DEFAULT 0,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_doc_intel_document ON document_intelligence(document_id);
CREATE INDEX idx_doc_intel_validation ON document_intelligence(validation_status);
CREATE TABLE document_impact_records (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
intelligence_id UUID NOT NULL REFERENCES document_intelligence(id) ON DELETE CASCADE,
company_id UUID NOT NULL REFERENCES companies(id),
ticker VARCHAR(20) NOT NULL,
relevance FLOAT,
sentiment VARCHAR(20),
impact_score FLOAT,
impact_horizon VARCHAR(50),
catalyst_type VARCHAR(50),
key_facts JSONB DEFAULT '[]',
risks JSONB DEFAULT '[]',
evidence_spans JSONB DEFAULT '[]',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_impact_intel ON document_impact_records(intelligence_id);
CREATE INDEX idx_impact_company ON document_impact_records(company_id);