From b38fb24f147d0a75710f3b542eca42c2e202471d Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Wed, 29 Apr 2026 04:33:21 +0000 Subject: [PATCH] fix: ensure production uses DB-configured model/provider from UI - Migration 026: update seed defaults from ollama to vllm/AxionML - Migration 031: fix existing rows still on old ollama defaults - Helm values: set OLLAMA_BASE_URL to cluster ollama endpoint (was empty) - Extractor: guard against switching to ollama when base_url is empty - OllamaClient: validate base_url on construction to fail fast --- infra/helm/stonks-oracle/values.yaml | 2 +- infra/migrations/026_ai_agents.sql | 12 ++-- infra/migrations/031_fix_agent_defaults.sql | 13 +++++ services/extractor/client.py | 5 ++ services/extractor/main.py | 62 +++++++++++++-------- 5 files changed, 64 insertions(+), 30 deletions(-) create mode 100644 infra/migrations/031_fix_agent_defaults.sql diff --git a/infra/helm/stonks-oracle/values.yaml b/infra/helm/stonks-oracle/values.yaml index 8b38a04..5258c91 100644 --- a/infra/helm/stonks-oracle/values.yaml +++ b/infra/helm/stonks-oracle/values.yaml @@ -174,7 +174,7 @@ config: REDIS_DB: "0" MINIO_ENDPOINT: "minio.minio-service.svc.cluster.local:80" MINIO_SECURE: "false" - OLLAMA_BASE_URL: "" + OLLAMA_BASE_URL: "http://ollama.ollama-service.svc.cluster.local:11434" OLLAMA_MODEL: "qwen3.5:9b-fast" OLLAMA_TIMEOUT: "240" OLLAMA_MAX_RETRIES: "2" diff --git a/infra/migrations/026_ai_agents.sql b/infra/migrations/026_ai_agents.sql index a84987e..9a89a14 100644 --- a/infra/migrations/026_ai_agents.sql +++ b/infra/migrations/026_ai_agents.sql @@ -37,8 +37,8 @@ SELECT * FROM (VALUES 'Document Intelligence Extractor', 'document-extractor', 'Extracts structured intelligence (sentiment, catalysts, impact scores, key facts, risks) from company news, SEC filings, earnings transcripts, and press releases.', - 'ollama', - 'qwen3.5:9b-fast', + 'vllm', + 'AxionML/Qwen3.5-9B-NVFP4', E'You are a financial document analyst. Extract structured data as JSON. Return ONLY a single JSON object. No markdown fences, no explanation, no text before or after the JSON. Every field in the schema is required. Use "other" for catalyst_type if unsure. Keep evidence_spans short (under 20 words each). Keep key_facts to 3-5 items max.', 'document-intel-v2', '2.0.0', @@ -48,8 +48,8 @@ SELECT * FROM (VALUES 'Global Event Classifier', 'event-classifier', 'Classifies global/geopolitical news into structured macro events with impact type, severity, affected regions/sectors/commodities, and estimated duration.', - 'ollama', - 'qwen3.5:9b-fast', + 'vllm', + 'AxionML/Qwen3.5-9B-NVFP4', E'You classify MACRO-LEVEL global news into structured event JSON. Return ONLY a single JSON object. No markdown, no explanation. Every field is required. Keep key_facts to 3-5 items. Keep summary under 3 sentences.\n\nCRITICAL: Only classify articles about MACRO events that affect entire markets, sectors, or economies. Examples: trade wars, interest rate changes, commodity supply disruptions, regulatory changes, geopolitical conflicts, natural disasters.\n\nDO NOT classify as macro events: individual company earnings, lawsuits against a single company, single-company management changes, individual stock analysis, company-specific debt or bankruptcy, product launches by one company. For these, set severity to "low", confidence below 0.3, and leave affected_regions, affected_sectors, and affected_commodities as empty arrays.', 'event-classification-v1', '1.0.0', @@ -59,8 +59,8 @@ SELECT * FROM (VALUES 'Thesis Rewriter', 'thesis-rewriter', 'Rewrites deterministic trade thesis summaries into clear, professional analyst prose. Optional layer — system falls back to deterministic thesis if this fails.', - 'ollama', - 'qwen3.5:9b-fast', + 'vllm', + 'AxionML/Qwen3.5-9B-NVFP4', E'You are a concise financial analyst. You rewrite structured trade thesis summaries into clear, professional prose suitable for an internal research note.\n\nSTRICT RULES:\n1. Do NOT add any information that is not present in the input.\n2. Do NOT fabricate numbers, dates, company names, or analyst opinions.\n3. Keep the rewrite under 150 words.\n4. Preserve all factual claims, risk notes, and evidence counts from the input.\n5. Use a neutral, professional tone. Avoid hype or marketing language.\n6. Return ONLY the rewritten thesis text. No JSON, no markdown, no commentary.', 'thesis-rewrite-v1', '1.0.0', diff --git a/infra/migrations/031_fix_agent_defaults.sql b/infra/migrations/031_fix_agent_defaults.sql new file mode 100644 index 0000000..ca84312 --- /dev/null +++ b/infra/migrations/031_fix_agent_defaults.sql @@ -0,0 +1,13 @@ +-- Fix agent default model_provider and model_name to match production config. +-- The original migration 026 seeded with 'ollama'/'qwen3.5:9b-fast' but production +-- uses vLLM. This migration updates agents that still have the old defaults, +-- preserving any user customizations (only updates if model_name matches the old default). + +UPDATE ai_agents + SET model_provider = 'vllm', + model_name = 'AxionML/Qwen3.5-9B-NVFP4', + max_tokens = 4096, + updated_at = NOW() + WHERE slug IN ('document-extractor', 'event-classifier', 'thesis-rewriter') + AND source = 'system' + AND model_name = 'qwen3.5:9b-fast'; diff --git a/services/extractor/client.py b/services/extractor/client.py index 8b4852b..e474dcb 100644 --- a/services/extractor/client.py +++ b/services/extractor/client.py @@ -140,6 +140,11 @@ class OllamaClient: max_retries: int | None = None, http_client: httpx.AsyncClient | None = None, ) -> None: + if not config.base_url or not config.base_url.startswith(("http://", "https://")): + raise ValueError( + f"OllamaClient requires a valid base_url (got {config.base_url!r}). " + "Set OLLAMA_BASE_URL environment variable." + ) self._config = config self._max_retries = max_retries if max_retries is not None else config.max_retries self._base_delay = config.retry_base_delay diff --git a/services/extractor/main.py b/services/extractor/main.py index 163899b..464e136 100644 --- a/services/extractor/main.py +++ b/services/extractor/main.py @@ -486,17 +486,25 @@ async def main() -> None: model_changed = new_cfg.model != extractor_client._config.model if provider_changed or model_changed: - logger.info( - "Extractor provider switch: old_provider=%s new_provider=%s " - "model=%s variant=%s", - old_provider, new_provider, - new_resolved.model_name, new_resolved.variant_id, - ) - await extractor_client.close() - extractor_client = build_llm_client( - new_resolved, config.ollama, config.vllm, - ) - extractor_provider = new_provider + # Guard: don't switch to ollama if base_url is empty + if new_provider == "ollama" and not config.ollama.base_url: + logger.warning( + "DB resolved provider=ollama but OLLAMA_BASE_URL is empty — " + "keeping current %s client. Fix the agent config in the UI.", + extractor_provider, + ) + else: + logger.info( + "Extractor provider switch: old_provider=%s new_provider=%s " + "model=%s variant=%s", + old_provider, new_provider, + new_resolved.model_name, new_resolved.variant_id, + ) + await extractor_client.close() + extractor_client = build_llm_client( + new_resolved, config.ollama, config.vllm, + ) + extractor_provider = new_provider else: # Same provider and model — just update config in-place extractor_client._config = new_cfg # type: ignore[assignment] @@ -517,18 +525,26 @@ async def main() -> None: cls_model_changed = new_cls_cfg.model != classifier_client._config.model if cls_provider_changed or cls_model_changed: - logger.info( - "Classifier provider switch: old_provider=%s new_provider=%s " - "model=%s variant=%s", - old_cls_provider, new_cls_provider, - new_cls_resolved.model_name, new_cls_resolved.variant_id, - ) - if classifier_client is not extractor_client: - await classifier_client.close() - classifier_client = build_llm_client( - new_cls_resolved, config.ollama, config.vllm, - ) - classifier_provider = new_cls_provider + # Guard: don't switch to ollama if base_url is empty + if new_cls_provider == "ollama" and not config.ollama.base_url: + logger.warning( + "DB resolved classifier provider=ollama but OLLAMA_BASE_URL is empty — " + "keeping current %s client. Fix the agent config in the UI.", + classifier_provider, + ) + else: + logger.info( + "Classifier provider switch: old_provider=%s new_provider=%s " + "model=%s variant=%s", + old_cls_provider, new_cls_provider, + new_cls_resolved.model_name, new_cls_resolved.variant_id, + ) + if classifier_client is not extractor_client: + await classifier_client.close() + classifier_client = build_llm_client( + new_cls_resolved, config.ollama, config.vllm, + ) + classifier_provider = new_cls_provider elif classifier_client is extractor_client and new_cls_cfg.model != extractor_client._config.model: classifier_client = build_llm_client( new_cls_resolved, config.ollama, config.vllm,