fix: migrations preserve trend history across reinstalls

Migration 023 was deleting all but the latest trend_windows row per
entity before 024 could save them to trend_history. On reinstall,
this wiped the entire history every time.

Fixed by restructuring:
- 023 now creates trend_history FIRST and copies all trend_windows
  rows into it before deduplicating trend_windows down to latest-only.
  Uses NOT EXISTS to avoid duplicating rows on re-runs.
- 024 is now idempotent: ensures table/indexes exist and backfills
  from recommendations (last 7 days, 1 point per ticker/window/hour)
  to reconstruct approximate history even if trend_windows was sparse.

Both migrations are safe to re-run on existing databases.
This commit is contained in:
Celes Renata
2026-04-17 01:15:28 +00:00
parent 2360c501e4
commit 86b549e5e1
2 changed files with 94 additions and 12 deletions
@@ -1,7 +1,49 @@
-- Fix trend_windows to upsert instead of accumulating rows.
-- Add unique constraint so ON CONFLICT works, then deduplicate existing data.
-- First, preserve all historical data in trend_history before deduplicating.
-- Step 1: Keep only the most recent row per (entity_type, entity_id, window)
-- Step 1: Create trend_history table if it doesn't exist yet
-- (migration 024 also creates it, but we need it here first to preserve data)
CREATE TABLE IF NOT EXISTS trend_history (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
entity_type VARCHAR(50) NOT NULL DEFAULT 'company',
entity_id VARCHAR(100) NOT NULL,
"window" VARCHAR(20) NOT NULL,
trend_direction VARCHAR(20) NOT NULL DEFAULT 'neutral',
trend_strength FLOAT DEFAULT 0.5,
confidence FLOAT DEFAULT 0.5,
contradiction_score FLOAT DEFAULT 0.0,
dominant_catalysts JSONB DEFAULT '[]',
material_risks JSONB DEFAULT '[]',
generated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_trend_history_lookup
ON trend_history (entity_id, "window", generated_at DESC);
CREATE INDEX IF NOT EXISTS idx_trend_history_generated
ON trend_history (generated_at DESC);
-- Step 2: Copy ALL existing trend_windows rows into trend_history
-- before we delete any. This preserves the full history for charting.
INSERT INTO trend_history (
entity_type, entity_id, "window", trend_direction,
trend_strength, confidence, contradiction_score,
dominant_catalysts, material_risks, generated_at
)
SELECT
entity_type, entity_id, "window", trend_direction,
trend_strength, confidence, contradiction_score,
dominant_catalysts, material_risks, generated_at
FROM trend_windows
WHERE NOT EXISTS (
SELECT 1 FROM trend_history th
WHERE th.entity_id = trend_windows.entity_id
AND th."window" = trend_windows."window"
AND th.generated_at = trend_windows.generated_at
);
-- Step 3: Keep only the most recent row per (entity_type, entity_id, window)
-- in trend_windows (it becomes the "latest snapshot" table)
DELETE FROM trend_windows
WHERE id NOT IN (
SELECT DISTINCT ON (entity_type, entity_id, "window") id
@@ -9,15 +51,15 @@ WHERE id NOT IN (
ORDER BY entity_type, entity_id, "window", generated_at DESC
);
-- Step 2: Add unique constraint for upsert
-- Step 4: Add unique constraint for upsert
CREATE UNIQUE INDEX IF NOT EXISTS idx_trend_windows_entity_window
ON trend_windows (entity_type, entity_id, "window");
-- Step 3: Clean up old competitive signal records (keep last 30 days)
-- Step 5: Clean up old competitive signal records (keep last 30 days)
DELETE FROM competitive_signal_records
WHERE computed_at < NOW() - INTERVAL '30 days';
-- Step 4: Add a partial index to speed up the NOT EXISTS check in the
-- Step 6: Add a partial index to speed up the NOT EXISTS check in the
-- aggregation propagation query
CREATE INDEX IF NOT EXISTS idx_competitive_signals_source_doc_ticker
ON competitive_signal_records (source_document_id, source_ticker);
+47 -7
View File
@@ -1,6 +1,11 @@
-- Trend history table for time-series charting.
-- trend_windows stores the latest snapshot per (entity, window) via upsert.
-- trend_history stores every snapshot so the frontend can plot trend evolution.
--
-- Note: migration 023 already creates this table and seeds it from
-- trend_windows. This migration ensures the table exists if 023 was
-- run before this version, and backfills from recommendations for
-- richer historical data.
CREATE TABLE IF NOT EXISTS trend_history (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
@@ -22,16 +27,51 @@ CREATE INDEX IF NOT EXISTS idx_trend_history_lookup
CREATE INDEX IF NOT EXISTS idx_trend_history_generated
ON trend_history (generated_at DESC);
-- Seed history from existing trend_windows so charts aren't empty
-- on first deploy. This gives at least one data point per ticker/window.
-- Backfill from recommendations if trend_history is sparse.
-- This reconstructs approximate trend history from the recommendation
-- records that were generated throughout the day, giving one data point
-- per ticker per window per hour.
INSERT INTO trend_history (
entity_type, entity_id, "window", trend_direction,
trend_strength, confidence, contradiction_score,
dominant_catalysts, material_risks, generated_at
)
SELECT
entity_type, entity_id, "window", trend_direction,
trend_strength, confidence, contradiction_score,
dominant_catalysts, material_risks, generated_at
FROM trend_windows
ON CONFLICT DO NOTHING;
'company',
r.ticker,
CASE
WHEN r.time_horizon LIKE 'intraday%' THEN 'intraday'
WHEN r.time_horizon LIKE 'swing_1d_3d%' THEN '1d'
WHEN r.time_horizon LIKE 'swing_1d_10d%' THEN '7d'
WHEN r.time_horizon LIKE 'position_10d_30d%' THEN '30d'
WHEN r.time_horizon LIKE 'position_30d_90d%' THEN '90d'
ELSE '7d'
END,
CASE
WHEN r.action = 'buy' THEN 'bullish'
WHEN r.action = 'sell' THEN 'bearish'
WHEN r.action = 'hold' THEN 'neutral'
ELSE 'mixed'
END,
r.confidence * 0.8,
r.confidence,
CASE WHEN r.risk_classification = 'high' THEN 0.4
WHEN r.risk_classification = 'very_high' THEN 0.6
WHEN r.risk_classification = 'moderate' THEN 0.2
ELSE 0.1
END,
'[]'::jsonb,
'[]'::jsonb,
r.generated_at
FROM (
SELECT DISTINCT ON (ticker, time_horizon, date_trunc('hour', generated_at))
ticker, action, confidence, time_horizon, risk_classification, generated_at
FROM recommendations
WHERE generated_at >= NOW() - INTERVAL '7 days'
ORDER BY ticker, time_horizon, date_trunc('hour', generated_at), generated_at DESC
) r
WHERE NOT EXISTS (
SELECT 1 FROM trend_history th
WHERE th.entity_id = r.ticker
AND th.generated_at = r.generated_at
);