feat: AI Agents management page with per-agent performance tracking

New Agents tab in the sidebar (Ops group) for viewing, editing, and
creating AI agent configurations:

Database (migration 026):
- ai_agents table: editable configs for each LLM agent (model, prompts,
  temperature, tokens, retries). source='system' for built-in,
  source='user' for custom. Seeds 3 system agents (Document Extractor,
  Event Classifier, Thesis Rewriter) using WHERE NOT EXISTS to never
  overwrite user edits across reinstalls.
- agent_performance_log table: per-invocation metrics (duration,
  confidence, retries, tokens, errors) linked to agent config.

API endpoints:
- GET/POST /api/agents — list and create agents
- GET/PUT/DELETE /api/agents/{id} — view, edit, delete (system agents
  can be edited but not deleted)
- GET /api/agents/{id}/performance — aggregated metrics (success rate,
  avg/p95 latency, confidence, token usage)
- GET /api/agents/{id}/performance/history — hourly time series

Frontend:
- AgentsPage with sidebar list + detail panel
- Agent detail: config display, system prompt viewer, performance
  dashboard with metrics cards and time-series chart
- Edit form: all config fields editable including system prompt,
  model, temperature, tokens, retries
- Create form: new user-defined agents with auto-slug generation
- System agents show blue badge, user agents show green badge
This commit is contained in:
Celes Renata
2026-04-17 01:24:35 +00:00
parent 86b549e5e1
commit 45752b9a29
5 changed files with 742 additions and 0 deletions
+187
View File
@@ -2640,3 +2640,190 @@ async def get_decision_history(
"decisions": decisions,
"count": len(decisions),
}
# ---------------------------------------------------------------------------
# AI Agents (Editable agent configurations + performance tracking)
# ---------------------------------------------------------------------------
class AgentUpdateBody(BaseModel):
name: Optional[str] = None
purpose: Optional[str] = None
model_provider: Optional[str] = None
model_name: Optional[str] = None
system_prompt: Optional[str] = None
user_prompt_template: Optional[str] = None
prompt_version: Optional[str] = None
schema_version: Optional[str] = None
temperature: Optional[float] = None
max_tokens: Optional[int] = None
timeout_seconds: Optional[int] = None
max_retries: Optional[int] = None
active: Optional[bool] = None
class AgentCreateBody(BaseModel):
name: str
slug: str
purpose: str = ""
model_provider: str = "ollama"
model_name: str = "llama3.1:8b"
system_prompt: str = ""
user_prompt_template: str = ""
prompt_version: str = ""
schema_version: str = "1.0.0"
temperature: float = 0.0
max_tokens: int = 32768
timeout_seconds: int = 120
max_retries: int = 2
@app.get("/api/agents")
async def list_agents(active_only: bool = False):
"""List all AI agent configurations."""
where = "WHERE active = TRUE" if active_only else ""
rows = await pool.fetch(
f"""SELECT id, name, slug, purpose, model_provider, model_name,
system_prompt, user_prompt_template, prompt_version,
schema_version, temperature, max_tokens, timeout_seconds,
max_retries, active, source, created_at, updated_at
FROM ai_agents {where}
ORDER BY source DESC, name ASC"""
)
return [_row_to_dict(r) for r in rows]
@app.get("/api/agents/{agent_id}")
async def get_agent(agent_id: str):
"""Get a single agent configuration."""
row = await pool.fetchrow(
"""SELECT id, name, slug, purpose, model_provider, model_name,
system_prompt, user_prompt_template, prompt_version,
schema_version, temperature, max_tokens, timeout_seconds,
max_retries, active, source, created_at, updated_at
FROM ai_agents WHERE id = $1""",
agent_id,
)
if not row:
raise HTTPException(404, "Agent not found")
return _row_to_dict(row)
@app.post("/api/agents", status_code=201)
async def create_agent(body: AgentCreateBody):
"""Create a new user-defined agent."""
row = await pool.fetchrow(
"""INSERT INTO ai_agents (
name, slug, purpose, model_provider, model_name,
system_prompt, user_prompt_template, prompt_version,
schema_version, temperature, max_tokens, timeout_seconds,
max_retries, source
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, 'user')
RETURNING id, name, slug, source, created_at""",
body.name, body.slug, body.purpose, body.model_provider, body.model_name,
body.system_prompt, body.user_prompt_template, body.prompt_version,
body.schema_version, body.temperature, body.max_tokens, body.timeout_seconds,
body.max_retries,
)
return _row_to_dict(row)
@app.put("/api/agents/{agent_id}")
async def update_agent(agent_id: str, body: AgentUpdateBody):
"""Update an agent configuration.
Both system and user agents can be edited. User changes are preserved
across reinstalls because migration 026 only inserts system agents
that don't already exist (by slug).
"""
updates: list[str] = []
params: list[Any] = []
idx = 1
for field_name, value in body.model_dump(exclude_none=True).items():
updates.append(f"{field_name} = ${idx}")
params.append(value)
idx += 1
if not updates:
raise HTTPException(400, "No fields to update")
updates.append("updated_at = NOW()")
set_clause = ", ".join(updates)
params.append(agent_id)
row = await pool.fetchrow(
f"""UPDATE ai_agents SET {set_clause}
WHERE id = ${idx}
RETURNING id, name, slug, purpose, model_provider, model_name,
system_prompt, user_prompt_template, prompt_version,
schema_version, temperature, max_tokens, timeout_seconds,
max_retries, active, source, created_at, updated_at""",
*params,
)
if not row:
raise HTTPException(404, "Agent not found")
return _row_to_dict(row)
@app.delete("/api/agents/{agent_id}")
async def delete_agent(agent_id: str):
"""Delete a user-created agent. System agents cannot be deleted."""
row = await pool.fetchrow(
"SELECT source FROM ai_agents WHERE id = $1", agent_id,
)
if not row:
raise HTTPException(404, "Agent not found")
if row["source"] == "system":
raise HTTPException(403, "Cannot delete system agents — deactivate instead")
await pool.execute("DELETE FROM ai_agents WHERE id = $1", agent_id)
return {"deleted": True}
@app.get("/api/agents/{agent_id}/performance")
async def get_agent_performance(agent_id: str, hours: int = Query(default=24, le=720)):
"""Get aggregated performance metrics for an agent."""
row = await pool.fetchrow(
"""SELECT
COUNT(*) AS total_invocations,
COUNT(*) FILTER (WHERE success) AS successes,
COUNT(*) FILTER (WHERE NOT success) AS failures,
ROUND(AVG(duration_ms)::numeric) AS avg_duration_ms,
ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY duration_ms)::numeric) AS p95_duration_ms,
ROUND(AVG(confidence)::numeric, 4) AS avg_confidence,
ROUND(AVG(retry_count)::numeric, 2) AS avg_retries,
SUM(input_tokens) AS total_input_tokens,
SUM(output_tokens) AS total_output_tokens
FROM agent_performance_log
WHERE agent_id = $1
AND recorded_at >= NOW() - make_interval(hours => $2)""",
agent_id, hours,
)
d = _row_to_dict(row) if row else {}
total = int(d.get("total_invocations", 0) or 0)
successes = int(d.get("successes", 0) or 0)
d["success_rate"] = round(successes / total, 4) if total > 0 else None
return d
@app.get("/api/agents/{agent_id}/performance/history")
async def get_agent_performance_history(
agent_id: str,
hours: int = Query(default=24, le=720),
):
"""Get hourly performance time-series for an agent."""
rows = await pool.fetch(
"""SELECT
date_trunc('hour', recorded_at) AS hour,
COUNT(*) AS invocations,
COUNT(*) FILTER (WHERE success) AS successes,
ROUND(AVG(duration_ms)::numeric) AS avg_duration_ms,
ROUND(AVG(confidence)::numeric, 4) AS avg_confidence
FROM agent_performance_log
WHERE agent_id = $1
AND recorded_at >= NOW() - make_interval(hours => $2)
GROUP BY 1 ORDER BY 1""",
agent_id, hours,
)
return [_row_to_dict(r) for r in rows]