feat: agent variants — migration, API, service integration, frontend, tests
- Migration 027: agent_variants table with single-active enforcement, variant_id column on agent_performance_log - API: full CRUD, clone from agent/variant, activate/deactivate, per-variant performance metrics and history endpoints - Services: extractor, event classifier, thesis rewriter all wired to AgentConfigResolver with variant override support - Frontend: variant list, comparison view, create/edit/clone forms, activate/delete actions on Agents page - Tests: API tests + 5 property-based tests (single-active invariant, clone preservation, config resolution, slug determinism, update idempotence) - Spec files for agent-variants feature
This commit is contained in:
@@ -16,8 +16,10 @@ from __future__ import annotations
|
||||
import logging
|
||||
import time
|
||||
|
||||
import asyncpg
|
||||
import httpx
|
||||
|
||||
from services.shared.agent_config import AgentConfigResolver, ResolvedAgentConfig
|
||||
from services.shared.config import OllamaConfig
|
||||
from services.shared.schemas import TrendSummary
|
||||
|
||||
@@ -86,6 +88,7 @@ async def rewrite_thesis_with_llm(
|
||||
summary: TrendSummary,
|
||||
config: OllamaConfig,
|
||||
http_client: httpx.AsyncClient | None = None,
|
||||
pool: asyncpg.Pool | None = None,
|
||||
) -> str:
|
||||
"""Rewrite a deterministic thesis using a local Ollama model.
|
||||
|
||||
@@ -93,22 +96,90 @@ async def rewrite_thesis_with_llm(
|
||||
deterministic thesis unchanged. This ensures the LLM layer is
|
||||
purely additive and never blocks recommendation generation.
|
||||
|
||||
Resolves runtime config for the "thesis-rewriter" agent slug from
|
||||
the database when a pool is provided, preferring an active variant's
|
||||
model, timeout, and max_retries. Falls back to the passed
|
||||
OllamaConfig if resolution fails.
|
||||
|
||||
Args:
|
||||
deterministic_thesis: The rule-based thesis string.
|
||||
summary: The trend summary that produced the thesis.
|
||||
config: Ollama connection and model configuration.
|
||||
http_client: Optional shared HTTP client for connection reuse.
|
||||
pool: Optional asyncpg pool for config resolution and performance logging.
|
||||
|
||||
Returns:
|
||||
The LLM-rewritten thesis on success, or the original on failure.
|
||||
"""
|
||||
start_time = time.monotonic()
|
||||
|
||||
# Resolve thesis-rewriter config from DB for variant override
|
||||
resolved: ResolvedAgentConfig | None = None
|
||||
effective_config = config
|
||||
if pool is not None:
|
||||
try:
|
||||
resolver = AgentConfigResolver(pool, ttl_seconds=60)
|
||||
resolved = await resolver.resolve("thesis-rewriter")
|
||||
if resolved is not None:
|
||||
effective_config = OllamaConfig(
|
||||
base_url=config.base_url,
|
||||
model=resolved.model_name,
|
||||
timeout=resolved.timeout_seconds,
|
||||
max_retries=resolved.max_retries,
|
||||
retry_base_delay=config.retry_base_delay,
|
||||
retry_max_delay=config.retry_max_delay,
|
||||
retry_backoff_multiplier=config.retry_backoff_multiplier,
|
||||
max_tokens=resolved.max_tokens,
|
||||
context_window=resolved.context_window,
|
||||
)
|
||||
logger.info(
|
||||
"Thesis rewriter using resolved config: model=%s variant=%s",
|
||||
resolved.model_name, resolved.variant_id,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to resolve thesis-rewriter config — using passed config",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Token budget enforcement
|
||||
if (
|
||||
resolved is not None
|
||||
and resolved.token_budget > 0
|
||||
and resolved.variant_id is not None
|
||||
and pool is not None
|
||||
):
|
||||
try:
|
||||
row = await pool.fetchrow(
|
||||
"""SELECT COALESCE(SUM(input_tokens + output_tokens), 0) AS total_tokens
|
||||
FROM agent_performance_log
|
||||
WHERE variant_id = $1
|
||||
AND recorded_at >= NOW() - INTERVAL '1 hour'""",
|
||||
resolved.variant_id,
|
||||
)
|
||||
used = int(row["total_tokens"]) if row else 0
|
||||
if used >= resolved.token_budget:
|
||||
logger.warning(
|
||||
"Token budget exceeded for thesis-rewriter variant %s: used %d / budget %d",
|
||||
resolved.variant_id, used, resolved.token_budget,
|
||||
)
|
||||
return deterministic_thesis
|
||||
except Exception:
|
||||
logger.warning("Failed to check token budget for thesis-rewriter", exc_info=True)
|
||||
|
||||
prompts = build_thesis_rewrite_prompt(deterministic_thesis, summary)
|
||||
|
||||
# Override system prompt from resolved config if available
|
||||
if resolved is not None and resolved.system_prompt:
|
||||
prompts["system"] = resolved.system_prompt
|
||||
|
||||
owns_client = http_client is None
|
||||
client = http_client or httpx.AsyncClient(timeout=config.timeout)
|
||||
client = http_client or httpx.AsyncClient(timeout=effective_config.timeout)
|
||||
|
||||
try:
|
||||
rewritten = await _call_ollama_thesis(client, config, prompts)
|
||||
rewritten = await _call_ollama_thesis(client, effective_config, prompts)
|
||||
duration_ms = int((time.monotonic() - start_time) * 1000)
|
||||
|
||||
if rewritten:
|
||||
logger.info(
|
||||
"LLM thesis rewrite succeeded for %s (%d chars → %d chars)",
|
||||
@@ -116,24 +187,94 @@ async def rewrite_thesis_with_llm(
|
||||
len(deterministic_thesis),
|
||||
len(rewritten),
|
||||
)
|
||||
# Log success to agent_performance_log
|
||||
if pool is not None and resolved is not None:
|
||||
await _log_thesis_performance(
|
||||
pool,
|
||||
resolved=resolved,
|
||||
ticker=summary.entity_id,
|
||||
success=True,
|
||||
duration_ms=duration_ms,
|
||||
input_tokens=len(deterministic_thesis) // 4,
|
||||
output_tokens=len(rewritten) // 4,
|
||||
)
|
||||
return rewritten
|
||||
|
||||
logger.warning(
|
||||
"LLM thesis rewrite returned empty for %s — using deterministic thesis",
|
||||
summary.entity_id,
|
||||
)
|
||||
# Log failure to agent_performance_log
|
||||
if pool is not None and resolved is not None:
|
||||
await _log_thesis_performance(
|
||||
pool,
|
||||
resolved=resolved,
|
||||
ticker=summary.entity_id,
|
||||
success=False,
|
||||
duration_ms=duration_ms,
|
||||
input_tokens=len(deterministic_thesis) // 4,
|
||||
output_tokens=0,
|
||||
error_message="empty_response",
|
||||
)
|
||||
return deterministic_thesis
|
||||
except Exception:
|
||||
duration_ms = int((time.monotonic() - start_time) * 1000)
|
||||
logger.exception(
|
||||
"LLM thesis rewrite failed for %s — using deterministic thesis",
|
||||
summary.entity_id,
|
||||
)
|
||||
if pool is not None and resolved is not None:
|
||||
await _log_thesis_performance(
|
||||
pool,
|
||||
resolved=resolved,
|
||||
ticker=summary.entity_id,
|
||||
success=False,
|
||||
duration_ms=duration_ms,
|
||||
input_tokens=len(deterministic_thesis) // 4,
|
||||
output_tokens=0,
|
||||
error_message="exception",
|
||||
)
|
||||
return deterministic_thesis
|
||||
finally:
|
||||
if owns_client:
|
||||
await client.aclose()
|
||||
|
||||
|
||||
async def _log_thesis_performance(
|
||||
pool: asyncpg.Pool,
|
||||
*,
|
||||
resolved: ResolvedAgentConfig,
|
||||
ticker: str,
|
||||
success: bool,
|
||||
duration_ms: int,
|
||||
input_tokens: int = 0,
|
||||
output_tokens: int = 0,
|
||||
error_message: str | None = None,
|
||||
) -> None:
|
||||
"""Insert a performance log entry for the thesis rewriter agent."""
|
||||
try:
|
||||
await pool.execute(
|
||||
"""INSERT INTO agent_performance_log
|
||||
(agent_id, variant_id, document_id, ticker, success,
|
||||
duration_ms, confidence, retry_count,
|
||||
input_tokens, output_tokens, error_message)
|
||||
VALUES ($1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8, $9, $10, $11)""",
|
||||
resolved.agent_id,
|
||||
resolved.variant_id,
|
||||
None, # no document_id for thesis rewrites
|
||||
ticker,
|
||||
success,
|
||||
duration_ms,
|
||||
0.0, # no confidence score for rewrites
|
||||
0,
|
||||
input_tokens,
|
||||
output_tokens,
|
||||
error_message,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to log thesis-rewriter performance", exc_info=True)
|
||||
|
||||
|
||||
async def _call_ollama_thesis(
|
||||
client: httpx.AsyncClient,
|
||||
config: OllamaConfig,
|
||||
@@ -154,6 +295,10 @@ async def _call_ollama_thesis(
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
# Support context_window override via num_ctx (Requirement 10.4)
|
||||
if config.context_window > 0:
|
||||
payload["options"] = {"num_ctx": config.context_window}
|
||||
|
||||
resp = await client.post(
|
||||
f"{config.base_url}/api/chat",
|
||||
json=payload,
|
||||
|
||||
@@ -796,6 +796,7 @@ async def generate_recommendation(
|
||||
deterministic_thesis=deterministic_thesis,
|
||||
summary=summary,
|
||||
config=ollama_config,
|
||||
pool=pool,
|
||||
)
|
||||
# If the LLM returned the same text as the deterministic thesis,
|
||||
# treat it as a no-op (fallback was used).
|
||||
|
||||
Reference in New Issue
Block a user