fix: default model to qwen3.5:9b + improve event classifier prompt

- Migration 026 and OllamaConfig now default to qwen3.5:9b instead of
  llama3.1:8b. Existing deployments keep their current model (qwen3.5:9b-fast)
  since the migration uses WHERE NOT EXISTS on slug.

- Event classifier system prompt expanded with macro-vs-company filtering:
  explicitly instructs the model to NOT classify single-company news
  (lawsuits, earnings, management changes, debt crises) as macro events.
  Sets severity=low and confidence<0.3 for company-specific articles.
  Reserves 'critical' severity for multi-country/global market events.
  Prevents over-tagging event_types by requiring direct description.

- Updated test_system_prompt_is_concise threshold to accommodate the
  expanded prompt (300 → 1000 chars).
This commit is contained in:
Celes Renata
2026-04-17 02:53:38 +00:00
parent 90614dd7bb
commit c501ccea40
4 changed files with 123 additions and 12 deletions
+114 -4
View File
@@ -14,6 +14,7 @@ from __future__ import annotations
import asyncio
import json
import logging
import time
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
@@ -22,6 +23,7 @@ from typing import Any
import asyncpg
from minio import Minio
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
from services.shared.schemas import (
EstimatedDuration,
ImpactType,
@@ -183,15 +185,28 @@ def get_event_json_schema() -> dict[str, Any]:
# ---------------------------------------------------------------------------
_SYSTEM_PROMPT = """\
You classify global news into structured macro event JSON. \
You classify MACRO-LEVEL global news into structured event JSON. \
Return ONLY a single JSON object. No markdown, no explanation. \
Every field is required. Keep key_facts to 3-5 items. Keep summary under 3 sentences."""
Every field is required. Keep key_facts to 3-5 items. Keep summary under 3 sentences.
CRITICAL: Only classify articles about MACRO events that affect entire markets, \
sectors, or economies. Examples: trade wars, interest rate changes, commodity \
supply disruptions, regulatory changes, geopolitical conflicts, natural disasters.
DO NOT classify as macro events: individual company earnings, lawsuits against \
a single company, single-company management changes, individual stock analysis, \
company-specific debt or bankruptcy, product launches by one company. \
For these, set severity to "low", confidence below 0.3, and leave \
affected_regions, affected_sectors, and affected_commodities as empty arrays."""
_ANTI_HALLUCINATION_RULES = """\
RULES:
- Only extract facts EXPLICITLY stated in the text. Do NOT fabricate.
- If vague or speculative, set confidence below 0.4.
- Distinguish announced policy from rumored policy."""
- Distinguish announced policy from rumored policy.
- If the article is about a SINGLE COMPANY (not a sector or market), set severity to "low" and confidence below 0.3.
- Only tag event_types that are DIRECTLY described in the article. Do NOT infer secondary effects.
- severity "critical" is reserved for events affecting multiple countries or entire global markets."""
def build_event_classification_prompt(text: str) -> dict[str, str]:
@@ -447,6 +462,11 @@ async def classify_global_event(
dedicated event classification prompt and JSON schema. Follows the
same retry policy as document extraction.
Resolves runtime config for the "event-classifier" agent slug from
the database, preferring an active variant's model_name and
system_prompt if one exists. Falls back to the OllamaClient's
existing config if resolution fails.
Persists prompt, raw output, and final event to MinIO and PostgreSQL
when the respective clients are provided.
@@ -464,10 +484,40 @@ async def classify_global_event(
ValueError: If classification fails after all retries.
"""
ts = datetime.now(timezone.utc)
start_time = time.monotonic()
# Resolve event-classifier config from DB for variant override
resolved: ResolvedAgentConfig | None = None
if pool is not None:
try:
resolver = AgentConfigResolver(pool, ttl_seconds=60)
resolved = await resolver.resolve("event-classifier")
except Exception:
logger.warning(
"Failed to resolve event-classifier config — using defaults",
exc_info=True,
)
prompts = build_event_classification_prompt(normalized_text)
json_schema = get_event_json_schema()
model_name = ollama_client._config.model
# Override model_name and system_prompt from resolved config
if resolved is not None:
model_name = resolved.model_name
if resolved.system_prompt:
prompts["system"] = resolved.system_prompt
# Input token limit truncation
if resolved is not None and resolved.input_token_limit > 0:
max_chars = resolved.input_token_limit * 4
if len(normalized_text) > max_chars:
normalized_text = normalized_text[:max_chars]
# Rebuild prompts with truncated text
prompts = build_event_classification_prompt(normalized_text)
if resolved.system_prompt:
prompts["system"] = resolved.system_prompt
# Persist prompt to MinIO
if minio_client:
try:
@@ -480,6 +530,8 @@ async def classify_global_event(
# Call Ollama using the client's internal _call_ollama method
# We reuse the retry logic pattern from OllamaClient.extract()
max_retries = ollama_client._max_retries
if resolved is not None:
max_retries = resolved.max_retries
last_error: str | None = None
raw_output = ""
@@ -515,6 +567,36 @@ async def classify_global_event(
"Failed to persist global event for doc %s", document_id,
)
# Log to agent_performance_log with variant attribution
if pool is not None and resolved is not None:
duration_ms = int((time.monotonic() - start_time) * 1000)
output_tokens = len(raw_output) // 4 if raw_output else 0
try:
await pool.execute(
"""INSERT INTO agent_performance_log
(agent_id, variant_id, document_id, ticker, success,
duration_ms, confidence, retry_count,
input_tokens, output_tokens, error_message)
VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5,
$6, $7, $8, $9, $10, $11)""",
resolved.agent_id,
resolved.variant_id,
document_id,
"",
True,
duration_ms,
event.confidence,
attempt_num,
len(normalized_text) // 4,
output_tokens,
None,
)
except Exception:
logger.warning(
"Failed to log event-classifier performance for doc %s",
document_id, exc_info=True,
)
return event
except (json.JSONDecodeError, KeyError, TypeError) as exc:
@@ -538,7 +620,35 @@ async def classify_global_event(
)
await asyncio.sleep(delay)
# All retries exhausted — persist failure and raise
# All retries exhausted — log failure performance and persist
if pool is not None and resolved is not None:
duration_ms = int((time.monotonic() - start_time) * 1000)
try:
await pool.execute(
"""INSERT INTO agent_performance_log
(agent_id, variant_id, document_id, ticker, success,
duration_ms, confidence, retry_count,
input_tokens, output_tokens, error_message)
VALUES ($1::uuid, $2::uuid, $3::uuid, $4, $5,
$6, $7, $8, $9, $10, $11)""",
resolved.agent_id,
resolved.variant_id,
document_id,
"",
False,
duration_ms,
0.0,
max_retries + 1,
len(normalized_text) // 4,
0,
last_error,
)
except Exception:
logger.warning(
"Failed to log event-classifier failure performance for doc %s",
document_id, exc_info=True,
)
if minio_client:
try:
_upload_classification_result(