-- Fix max_tokens default: 32768 is the full context window, not a reasonable -- output limit. vLLM rejects requests where max_tokens >= context_window -- because there's no room left for the input prompt. -- -- Change the column default to 4096 (sufficient for structured JSON extraction -- output) and update any existing rows still at the old default. ALTER TABLE ai_agents ALTER COLUMN max_tokens SET DEFAULT 4096; ALTER TABLE agent_variants ALTER COLUMN max_tokens SET DEFAULT 4096; UPDATE ai_agents SET max_tokens = 4096, updated_at = NOW() WHERE max_tokens = 32768; UPDATE agent_variants SET max_tokens = 4096 WHERE max_tokens = 32768;