feat: agent variants — migration, API, service integration, frontend, tests

- Migration 027: agent_variants table with single-active enforcement,
  variant_id column on agent_performance_log
- API: full CRUD, clone from agent/variant, activate/deactivate,
  per-variant performance metrics and history endpoints
- Services: extractor, event classifier, thesis rewriter all wired
  to AgentConfigResolver with variant override support
- Frontend: variant list, comparison view, create/edit/clone forms,
  activate/delete actions on Agents page
- Tests: API tests + 5 property-based tests (single-active invariant,
  clone preservation, config resolution, slug determinism, update idempotence)
- Spec files for agent-variants feature
This commit is contained in:
Celes Renata
2026-04-17 05:15:42 +00:00
parent 734bf001a7
commit 7c23c044d7
14 changed files with 3118 additions and 120 deletions
+147 -2
View File
@@ -16,8 +16,10 @@ from __future__ import annotations
import logging
import time
import asyncpg
import httpx
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
from services.shared.config import OllamaConfig
from services.shared.schemas import TrendSummary
@@ -86,6 +88,7 @@ async def rewrite_thesis_with_llm(
summary: TrendSummary,
config: OllamaConfig,
http_client: httpx.AsyncClient | None = None,
pool: asyncpg.Pool | None = None,
) -> str:
"""Rewrite a deterministic thesis using a local Ollama model.
@@ -93,22 +96,90 @@ async def rewrite_thesis_with_llm(
deterministic thesis unchanged. This ensures the LLM layer is
purely additive and never blocks recommendation generation.
Resolves runtime config for the "thesis-rewriter" agent slug from
the database when a pool is provided, preferring an active variant's
model, timeout, and max_retries. Falls back to the passed
OllamaConfig if resolution fails.
Args:
deterministic_thesis: The rule-based thesis string.
summary: The trend summary that produced the thesis.
config: Ollama connection and model configuration.
http_client: Optional shared HTTP client for connection reuse.
pool: Optional asyncpg pool for config resolution and performance logging.
Returns:
The LLM-rewritten thesis on success, or the original on failure.
"""
start_time = time.monotonic()
# Resolve thesis-rewriter config from DB for variant override
resolved: ResolvedAgentConfig | None = None
effective_config = config
if pool is not None:
try:
resolver = AgentConfigResolver(pool, ttl_seconds=60)
resolved = await resolver.resolve("thesis-rewriter")
if resolved is not None:
effective_config = OllamaConfig(
base_url=config.base_url,
model=resolved.model_name,
timeout=resolved.timeout_seconds,
max_retries=resolved.max_retries,
retry_base_delay=config.retry_base_delay,
retry_max_delay=config.retry_max_delay,
retry_backoff_multiplier=config.retry_backoff_multiplier,
max_tokens=resolved.max_tokens,
context_window=resolved.context_window,
)
logger.info(
"Thesis rewriter using resolved config: model=%s variant=%s",
resolved.model_name, resolved.variant_id,
)
except Exception:
logger.warning(
"Failed to resolve thesis-rewriter config — using passed config",
exc_info=True,
)
# Token budget enforcement
if (
resolved is not None
and resolved.token_budget > 0
and resolved.variant_id is not None
and pool is not None
):
try:
row = await pool.fetchrow(
"""SELECT COALESCE(SUM(input_tokens + output_tokens), 0) AS total_tokens
FROM agent_performance_log
WHERE variant_id = $1
AND recorded_at >= NOW() - INTERVAL '1 hour'""",
resolved.variant_id,
)
used = int(row["total_tokens"]) if row else 0
if used >= resolved.token_budget:
logger.warning(
"Token budget exceeded for thesis-rewriter variant %s: used %d / budget %d",
resolved.variant_id, used, resolved.token_budget,
)
return deterministic_thesis
except Exception:
logger.warning("Failed to check token budget for thesis-rewriter", exc_info=True)
prompts = build_thesis_rewrite_prompt(deterministic_thesis, summary)
# Override system prompt from resolved config if available
if resolved is not None and resolved.system_prompt:
prompts["system"] = resolved.system_prompt
owns_client = http_client is None
client = http_client or httpx.AsyncClient(timeout=config.timeout)
client = http_client or httpx.AsyncClient(timeout=effective_config.timeout)
try:
rewritten = await _call_ollama_thesis(client, config, prompts)
rewritten = await _call_ollama_thesis(client, effective_config, prompts)
duration_ms = int((time.monotonic() - start_time) * 1000)
if rewritten:
logger.info(
"LLM thesis rewrite succeeded for %s (%d chars → %d chars)",
@@ -116,24 +187,94 @@ async def rewrite_thesis_with_llm(
len(deterministic_thesis),
len(rewritten),
)
# Log success to agent_performance_log
if pool is not None and resolved is not None:
await _log_thesis_performance(
pool,
resolved=resolved,
ticker=summary.entity_id,
success=True,
duration_ms=duration_ms,
input_tokens=len(deterministic_thesis) // 4,
output_tokens=len(rewritten) // 4,
)
return rewritten
logger.warning(
"LLM thesis rewrite returned empty for %s — using deterministic thesis",
summary.entity_id,
)
# Log failure to agent_performance_log
if pool is not None and resolved is not None:
await _log_thesis_performance(
pool,
resolved=resolved,
ticker=summary.entity_id,
success=False,
duration_ms=duration_ms,
input_tokens=len(deterministic_thesis) // 4,
output_tokens=0,
error_message="empty_response",
)
return deterministic_thesis
except Exception:
duration_ms = int((time.monotonic() - start_time) * 1000)
logger.exception(
"LLM thesis rewrite failed for %s — using deterministic thesis",
summary.entity_id,
)
if pool is not None and resolved is not None:
await _log_thesis_performance(
pool,
resolved=resolved,
ticker=summary.entity_id,
success=False,
duration_ms=duration_ms,
input_tokens=len(deterministic_thesis) // 4,
output_tokens=0,
error_message="exception",
)
return deterministic_thesis
finally:
if owns_client:
await client.aclose()
async def _log_thesis_performance(
pool: asyncpg.Pool,
*,
resolved: ResolvedAgentConfig,
ticker: str,
success: bool,
duration_ms: int,
input_tokens: int = 0,
output_tokens: int = 0,
error_message: str | None = None,
) -> None:
"""Insert a performance log entry for the thesis rewriter agent."""
try:
await pool.execute(
"""INSERT INTO agent_performance_log
(agent_id, variant_id, document_id, ticker, success,
duration_ms, confidence, retry_count,
input_tokens, output_tokens, error_message)
VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
resolved.agent_id,
resolved.variant_id,
None, # no document_id for thesis rewrites
ticker,
success,
duration_ms,
0.0, # no confidence score for rewrites
0,
input_tokens,
output_tokens,
error_message,
)
except Exception:
logger.warning("Failed to log thesis-rewriter performance", exc_info=True)
async def _call_ollama_thesis(
client: httpx.AsyncClient,
config: OllamaConfig,
@@ -154,6 +295,10 @@ async def _call_ollama_thesis(
"stream": False,
}
# Support context_window override via num_ctx (Requirement 10.4)
if config.context_window > 0:
payload["options"] = {"num_ctx": config.context_window}
resp = await client.post(
f"{config.base_url}/api/chat",
json=payload,
+1
View File
@@ -796,6 +796,7 @@ async def generate_recommendation(
deterministic_thesis=deterministic_thesis,
summary=summary,
config=ollama_config,
pool=pool,
)
# If the LLM returned the same text as the deterministic thesis,
# treat it as a no-op (fallback was used).