feat: agent variants — migration, API, service integration, frontend, tests

- Migration 027: agent_variants table with single-active enforcement, variant_id column on agent_performance_log - API: full CRUD, clone from agent/variant, activate/deactivate, per-variant performance metrics and history endpoints - Services: extractor, event classifier, thesis rewriter all wired to AgentConfigResolver with variant override support - Frontend: variant list, comparison view, create/edit/clone forms, activate/delete actions on Agents page - Tests: API tests + 5 property-based tests (single-active invariant, clone preservation, config resolution, slug determinism, update idempotence) - Spec files for agent-variants feature
2026-04-17 05:15:42 +00:00
parent 734bf001a7
commit 7c23c044d7
14 changed files with 3118 additions and 120 deletions
@@ -16,8 +16,10 @@ from __future__ import annotations
 import logging
 import time

+import asyncpg
 import httpx

+from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
 from services.shared.config import OllamaConfig
 from services.shared.schemas import TrendSummary

@@ -86,6 +88,7 @@ async def rewrite_thesis_with_llm(
    summary: TrendSummary,
    config: OllamaConfig,
    http_client: httpx.AsyncClient | None = None,
+    pool: asyncpg.Pool | None = None,
 ) -> str:
    """Rewrite a deterministic thesis using a local Ollama model.

@@ -93,22 +96,90 @@ async def rewrite_thesis_with_llm(
    deterministic thesis unchanged. This ensures the LLM layer is
    purely additive and never blocks recommendation generation.

+    Resolves runtime config for the "thesis-rewriter" agent slug from
+    the database when a pool is provided, preferring an active variant's
+    model, timeout, and max_retries. Falls back to the passed
+    OllamaConfig if resolution fails.
+
    Args:
        deterministic_thesis: The rule-based thesis string.
        summary: The trend summary that produced the thesis.
        config: Ollama connection and model configuration.
        http_client: Optional shared HTTP client for connection reuse.
+        pool: Optional asyncpg pool for config resolution and performance logging.

    Returns:
        The LLM-rewritten thesis on success, or the original on failure.
    """
+    start_time = time.monotonic()
+
+    # Resolve thesis-rewriter config from DB for variant override
+    resolved: ResolvedAgentConfig | None = None
+    effective_config = config
+    if pool is not None:
+        try:
+            resolver = AgentConfigResolver(pool, ttl_seconds=60)
+            resolved = await resolver.resolve("thesis-rewriter")
+            if resolved is not None:
+                effective_config = OllamaConfig(
+                    base_url=config.base_url,
+                    model=resolved.model_name,
+                    timeout=resolved.timeout_seconds,
+                    max_retries=resolved.max_retries,
+                    retry_base_delay=config.retry_base_delay,
+                    retry_max_delay=config.retry_max_delay,
+                    retry_backoff_multiplier=config.retry_backoff_multiplier,
+                    max_tokens=resolved.max_tokens,
+                    context_window=resolved.context_window,
+                )
+                logger.info(
+                    "Thesis rewriter using resolved config: model=%s variant=%s",
+                    resolved.model_name, resolved.variant_id,
+                )
+        except Exception:
+            logger.warning(
+                "Failed to resolve thesis-rewriter config — using passed config",
+                exc_info=True,
+            )
+
+    # Token budget enforcement
+    if (
+        resolved is not None
+        and resolved.token_budget > 0
+        and resolved.variant_id is not None
+        and pool is not None
+    ):
+        try:
+            row = await pool.fetchrow(
+                """SELECT COALESCE(SUM(input_tokens + output_tokens), 0) AS total_tokens
+                   FROM agent_performance_log
+                   WHERE variant_id = $1
+                     AND recorded_at >= NOW() - INTERVAL '1 hour'""",
+                resolved.variant_id,
+            )
+            used = int(row["total_tokens"]) if row else 0
+            if used >= resolved.token_budget:
+                logger.warning(
+                    "Token budget exceeded for thesis-rewriter variant %s: used %d / budget %d",
+                    resolved.variant_id, used, resolved.token_budget,
+                )
+                return deterministic_thesis
+        except Exception:
+            logger.warning("Failed to check token budget for thesis-rewriter", exc_info=True)
+
    prompts = build_thesis_rewrite_prompt(deterministic_thesis, summary)

+    # Override system prompt from resolved config if available
+    if resolved is not None and resolved.system_prompt:
+        prompts["system"] = resolved.system_prompt
+
    owns_client = http_client is None
-    client = http_client or httpx.AsyncClient(timeout=config.timeout)
+    client = http_client or httpx.AsyncClient(timeout=effective_config.timeout)

    try:
-        rewritten = await _call_ollama_thesis(client, config, prompts)
+        rewritten = await _call_ollama_thesis(client, effective_config, prompts)
+        duration_ms = int((time.monotonic() - start_time) * 1000)
+
        if rewritten:
            logger.info(
                "LLM thesis rewrite succeeded for %s (%d chars → %d chars)",
@@ -116,24 +187,94 @@ async def rewrite_thesis_with_llm(
                len(deterministic_thesis),
                len(rewritten),
            )
+            # Log success to agent_performance_log
+            if pool is not None and resolved is not None:
+                await _log_thesis_performance(
+                    pool,
+                    resolved=resolved,
+                    ticker=summary.entity_id,
+                    success=True,
+                    duration_ms=duration_ms,
+                    input_tokens=len(deterministic_thesis) // 4,
+                    output_tokens=len(rewritten) // 4,
+                )
            return rewritten

        logger.warning(
            "LLM thesis rewrite returned empty for %s — using deterministic thesis",
            summary.entity_id,
        )
+        # Log failure to agent_performance_log
+        if pool is not None and resolved is not None:
+            await _log_thesis_performance(
+                pool,
+                resolved=resolved,
+                ticker=summary.entity_id,
+                success=False,
+                duration_ms=duration_ms,
+                input_tokens=len(deterministic_thesis) // 4,
+                output_tokens=0,
+                error_message="empty_response",
+            )
        return deterministic_thesis
    except Exception:
+        duration_ms = int((time.monotonic() - start_time) * 1000)
        logger.exception(
            "LLM thesis rewrite failed for %s — using deterministic thesis",
            summary.entity_id,
        )
+        if pool is not None and resolved is not None:
+            await _log_thesis_performance(
+                pool,
+                resolved=resolved,
+                ticker=summary.entity_id,
+                success=False,
+                duration_ms=duration_ms,
+                input_tokens=len(deterministic_thesis) // 4,
+                output_tokens=0,
+                error_message="exception",
+            )
        return deterministic_thesis
    finally:
        if owns_client:
            await client.aclose()


+async def _log_thesis_performance(
+    pool: asyncpg.Pool,
+    *,
+    resolved: ResolvedAgentConfig,
+    ticker: str,
+    success: bool,
+    duration_ms: int,
+    input_tokens: int = 0,
+    output_tokens: int = 0,
+    error_message: str | None = None,
+) -> None:
+    """Insert a performance log entry for the thesis rewriter agent."""
+    try:
+        await pool.execute(
+            """INSERT INTO agent_performance_log
+                   (agent_id, variant_id, document_id, ticker, success,
+                    duration_ms, confidence, retry_count,
+                    input_tokens, output_tokens, error_message)
+               VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
+            resolved.agent_id,
+            resolved.variant_id,
+            None,  # no document_id for thesis rewrites
+            ticker,
+            success,
+            duration_ms,
+            0.0,  # no confidence score for rewrites
+            0,
+            input_tokens,
+            output_tokens,
+            error_message,
+        )
+    except Exception:
+        logger.warning("Failed to log thesis-rewriter performance", exc_info=True)
+
+
 async def _call_ollama_thesis(
    client: httpx.AsyncClient,
    config: OllamaConfig,
@@ -154,6 +295,10 @@ async def _call_ollama_thesis(
        "stream": False,
    }

+    # Support context_window override via num_ctx (Requirement 10.4)
+    if config.context_window > 0:
+        payload["options"] = {"num_ctx": config.context_window}
+
    resp = await client.post(
        f"{config.base_url}/api/chat",
        json=payload,
@@ -796,6 +796,7 @@ async def generate_recommendation(
            deterministic_thesis=deterministic_thesis,
            summary=summary,
            config=ollama_config,
+            pool=pool,
        )
        # If the LLM returned the same text as the deterministic thesis,
        # treat it as a no-op (fallback was used).