diff --git a/infra/migrations/023_trend_windows_upsert_and_cleanup.sql b/infra/migrations/023_trend_windows_upsert_and_cleanup.sql index 8f3d6e2..ae1a46b 100644 --- a/infra/migrations/023_trend_windows_upsert_and_cleanup.sql +++ b/infra/migrations/023_trend_windows_upsert_and_cleanup.sql @@ -1,7 +1,49 @@ -- Fix trend_windows to upsert instead of accumulating rows. --- Add unique constraint so ON CONFLICT works, then deduplicate existing data. +-- First, preserve all historical data in trend_history before deduplicating. --- Step 1: Keep only the most recent row per (entity_type, entity_id, window) +-- Step 1: Create trend_history table if it doesn't exist yet +-- (migration 024 also creates it, but we need it here first to preserve data) +CREATE TABLE IF NOT EXISTS trend_history ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + entity_type VARCHAR(50) NOT NULL DEFAULT 'company', + entity_id VARCHAR(100) NOT NULL, + "window" VARCHAR(20) NOT NULL, + trend_direction VARCHAR(20) NOT NULL DEFAULT 'neutral', + trend_strength FLOAT DEFAULT 0.5, + confidence FLOAT DEFAULT 0.5, + contradiction_score FLOAT DEFAULT 0.0, + dominant_catalysts JSONB DEFAULT '[]', + material_risks JSONB DEFAULT '[]', + generated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_trend_history_lookup + ON trend_history (entity_id, "window", generated_at DESC); + +CREATE INDEX IF NOT EXISTS idx_trend_history_generated + ON trend_history (generated_at DESC); + +-- Step 2: Copy ALL existing trend_windows rows into trend_history +-- before we delete any. This preserves the full history for charting. +INSERT INTO trend_history ( + entity_type, entity_id, "window", trend_direction, + trend_strength, confidence, contradiction_score, + dominant_catalysts, material_risks, generated_at +) +SELECT + entity_type, entity_id, "window", trend_direction, + trend_strength, confidence, contradiction_score, + dominant_catalysts, material_risks, generated_at +FROM trend_windows +WHERE NOT EXISTS ( + SELECT 1 FROM trend_history th + WHERE th.entity_id = trend_windows.entity_id + AND th."window" = trend_windows."window" + AND th.generated_at = trend_windows.generated_at +); + +-- Step 3: Keep only the most recent row per (entity_type, entity_id, window) +-- in trend_windows (it becomes the "latest snapshot" table) DELETE FROM trend_windows WHERE id NOT IN ( SELECT DISTINCT ON (entity_type, entity_id, "window") id @@ -9,15 +51,15 @@ WHERE id NOT IN ( ORDER BY entity_type, entity_id, "window", generated_at DESC ); --- Step 2: Add unique constraint for upsert +-- Step 4: Add unique constraint for upsert CREATE UNIQUE INDEX IF NOT EXISTS idx_trend_windows_entity_window ON trend_windows (entity_type, entity_id, "window"); --- Step 3: Clean up old competitive signal records (keep last 30 days) +-- Step 5: Clean up old competitive signal records (keep last 30 days) DELETE FROM competitive_signal_records WHERE computed_at < NOW() - INTERVAL '30 days'; --- Step 4: Add a partial index to speed up the NOT EXISTS check in the +-- Step 6: Add a partial index to speed up the NOT EXISTS check in the -- aggregation propagation query CREATE INDEX IF NOT EXISTS idx_competitive_signals_source_doc_ticker ON competitive_signal_records (source_document_id, source_ticker); diff --git a/infra/migrations/024_trend_history.sql b/infra/migrations/024_trend_history.sql index f5643d7..33bfda2 100644 --- a/infra/migrations/024_trend_history.sql +++ b/infra/migrations/024_trend_history.sql @@ -1,6 +1,11 @@ -- Trend history table for time-series charting. -- trend_windows stores the latest snapshot per (entity, window) via upsert. -- trend_history stores every snapshot so the frontend can plot trend evolution. +-- +-- Note: migration 023 already creates this table and seeds it from +-- trend_windows. This migration ensures the table exists if 023 was +-- run before this version, and backfills from recommendations for +-- richer historical data. CREATE TABLE IF NOT EXISTS trend_history ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), @@ -22,16 +27,51 @@ CREATE INDEX IF NOT EXISTS idx_trend_history_lookup CREATE INDEX IF NOT EXISTS idx_trend_history_generated ON trend_history (generated_at DESC); --- Seed history from existing trend_windows so charts aren't empty --- on first deploy. This gives at least one data point per ticker/window. +-- Backfill from recommendations if trend_history is sparse. +-- This reconstructs approximate trend history from the recommendation +-- records that were generated throughout the day, giving one data point +-- per ticker per window per hour. INSERT INTO trend_history ( entity_type, entity_id, "window", trend_direction, trend_strength, confidence, contradiction_score, dominant_catalysts, material_risks, generated_at ) SELECT - entity_type, entity_id, "window", trend_direction, - trend_strength, confidence, contradiction_score, - dominant_catalysts, material_risks, generated_at -FROM trend_windows -ON CONFLICT DO NOTHING; + 'company', + r.ticker, + CASE + WHEN r.time_horizon LIKE 'intraday%' THEN 'intraday' + WHEN r.time_horizon LIKE 'swing_1d_3d%' THEN '1d' + WHEN r.time_horizon LIKE 'swing_1d_10d%' THEN '7d' + WHEN r.time_horizon LIKE 'position_10d_30d%' THEN '30d' + WHEN r.time_horizon LIKE 'position_30d_90d%' THEN '90d' + ELSE '7d' + END, + CASE + WHEN r.action = 'buy' THEN 'bullish' + WHEN r.action = 'sell' THEN 'bearish' + WHEN r.action = 'hold' THEN 'neutral' + ELSE 'mixed' + END, + r.confidence * 0.8, + r.confidence, + CASE WHEN r.risk_classification = 'high' THEN 0.4 + WHEN r.risk_classification = 'very_high' THEN 0.6 + WHEN r.risk_classification = 'moderate' THEN 0.2 + ELSE 0.1 + END, + '[]'::jsonb, + '[]'::jsonb, + r.generated_at +FROM ( + SELECT DISTINCT ON (ticker, time_horizon, date_trunc('hour', generated_at)) + ticker, action, confidence, time_horizon, risk_classification, generated_at + FROM recommendations + WHERE generated_at >= NOW() - INTERVAL '7 days' + ORDER BY ticker, time_horizon, date_trunc('hour', generated_at), generated_at DESC +) r +WHERE NOT EXISTS ( + SELECT 1 FROM trend_history th + WHERE th.entity_id = r.ticker + AND th.generated_at = r.generated_at +);