feat: AI Agents management page with per-agent performance tracking

New Agents tab in the sidebar (Ops group) for viewing, editing, and creating AI agent configurations: Database (migration 026): - ai_agents table: editable configs for each LLM agent (model, prompts, temperature, tokens, retries). source='system' for built-in, source='user' for custom. Seeds 3 system agents (Document Extractor, Event Classifier, Thesis Rewriter) using WHERE NOT EXISTS to never overwrite user edits across reinstalls. - agent_performance_log table: per-invocation metrics (duration, confidence, retries, tokens, errors) linked to agent config. API endpoints: - GET/POST /api/agents — list and create agents - GET/PUT/DELETE /api/agents/{id} — view, edit, delete (system agents can be edited but not deleted) - GET /api/agents/{id}/performance — aggregated metrics (success rate, avg/p95 latency, confidence, token usage) - GET /api/agents/{id}/performance/history — hourly time series Frontend: - AgentsPage with sidebar list + detail panel - Agent detail: config display, system prompt viewer, performance dashboard with metrics cards and time-series chart - Edit form: all config fields editable including system prompt, model, temperature, tokens, retries - Create form: new user-defined agents with auto-slug generation - System agents show blue badge, user agents show green badge
2026-04-17 01:24:35 +00:00
parent 86b549e5e1
commit 45752b9a29
5 changed files with 742 additions and 0 deletions
@@ -2640,3 +2640,190 @@ async def get_decision_history(
        "decisions": decisions,
        "count": len(decisions),
    }
+
+# ---------------------------------------------------------------------------
+# AI Agents  (Editable agent configurations + performance tracking)
+# ---------------------------------------------------------------------------
+
+
+class AgentUpdateBody(BaseModel):
+    name: Optional[str] = None
+    purpose: Optional[str] = None
+    model_provider: Optional[str] = None
+    model_name: Optional[str] = None
+    system_prompt: Optional[str] = None
+    user_prompt_template: Optional[str] = None
+    prompt_version: Optional[str] = None
+    schema_version: Optional[str] = None
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+    timeout_seconds: Optional[int] = None
+    max_retries: Optional[int] = None
+    active: Optional[bool] = None
+
+
+class AgentCreateBody(BaseModel):
+    name: str
+    slug: str
+    purpose: str = ""
+    model_provider: str = "ollama"
+    model_name: str = "llama3.1:8b"
+    system_prompt: str = ""
+    user_prompt_template: str = ""
+    prompt_version: str = ""
+    schema_version: str = "1.0.0"
+    temperature: float = 0.0
+    max_tokens: int = 32768
+    timeout_seconds: int = 120
+    max_retries: int = 2
+
+
+@app.get("/api/agents")
+async def list_agents(active_only: bool = False):
+    """List all AI agent configurations."""
+    where = "WHERE active = TRUE" if active_only else ""
+    rows = await pool.fetch(
+        f"""SELECT id, name, slug, purpose, model_provider, model_name,
+                   system_prompt, user_prompt_template, prompt_version,
+                   schema_version, temperature, max_tokens, timeout_seconds,
+                   max_retries, active, source, created_at, updated_at
+            FROM ai_agents {where}
+            ORDER BY source DESC, name ASC"""
+    )
+    return [_row_to_dict(r) for r in rows]
+
+
+@app.get("/api/agents/{agent_id}")
+async def get_agent(agent_id: str):
+    """Get a single agent configuration."""
+    row = await pool.fetchrow(
+        """SELECT id, name, slug, purpose, model_provider, model_name,
+                  system_prompt, user_prompt_template, prompt_version,
+                  schema_version, temperature, max_tokens, timeout_seconds,
+                  max_retries, active, source, created_at, updated_at
+           FROM ai_agents WHERE id = $1""",
+        agent_id,
+    )
+    if not row:
+        raise HTTPException(404, "Agent not found")
+    return _row_to_dict(row)
+
+
+@app.post("/api/agents", status_code=201)
+async def create_agent(body: AgentCreateBody):
+    """Create a new user-defined agent."""
+    row = await pool.fetchrow(
+        """INSERT INTO ai_agents (
+               name, slug, purpose, model_provider, model_name,
+               system_prompt, user_prompt_template, prompt_version,
+               schema_version, temperature, max_tokens, timeout_seconds,
+               max_retries, source
+           ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, 'user')
+           RETURNING id, name, slug, source, created_at""",
+        body.name, body.slug, body.purpose, body.model_provider, body.model_name,
+        body.system_prompt, body.user_prompt_template, body.prompt_version,
+        body.schema_version, body.temperature, body.max_tokens, body.timeout_seconds,
+        body.max_retries,
+    )
+    return _row_to_dict(row)
+
+
+@app.put("/api/agents/{agent_id}")
+async def update_agent(agent_id: str, body: AgentUpdateBody):
+    """Update an agent configuration.
+
+    Both system and user agents can be edited. User changes are preserved
+    across reinstalls because migration 026 only inserts system agents
+    that don't already exist (by slug).
+    """
+    updates: list[str] = []
+    params: list[Any] = []
+    idx = 1
+
+    for field_name, value in body.model_dump(exclude_none=True).items():
+        updates.append(f"{field_name} = ${idx}")
+        params.append(value)
+        idx += 1
+
+    if not updates:
+        raise HTTPException(400, "No fields to update")
+
+    updates.append("updated_at = NOW()")
+    set_clause = ", ".join(updates)
+    params.append(agent_id)
+
+    row = await pool.fetchrow(
+        f"""UPDATE ai_agents SET {set_clause}
+            WHERE id = ${idx}
+            RETURNING id, name, slug, purpose, model_provider, model_name,
+                      system_prompt, user_prompt_template, prompt_version,
+                      schema_version, temperature, max_tokens, timeout_seconds,
+                      max_retries, active, source, created_at, updated_at""",
+        *params,
+    )
+    if not row:
+        raise HTTPException(404, "Agent not found")
+    return _row_to_dict(row)
+
+
+@app.delete("/api/agents/{agent_id}")
+async def delete_agent(agent_id: str):
+    """Delete a user-created agent. System agents cannot be deleted."""
+    row = await pool.fetchrow(
+        "SELECT source FROM ai_agents WHERE id = $1", agent_id,
+    )
+    if not row:
+        raise HTTPException(404, "Agent not found")
+    if row["source"] == "system":
+        raise HTTPException(403, "Cannot delete system agents — deactivate instead")
+
+    await pool.execute("DELETE FROM ai_agents WHERE id = $1", agent_id)
+    return {"deleted": True}
+
+
+@app.get("/api/agents/{agent_id}/performance")
+async def get_agent_performance(agent_id: str, hours: int = Query(default=24, le=720)):
+    """Get aggregated performance metrics for an agent."""
+    row = await pool.fetchrow(
+        """SELECT
+               COUNT(*) AS total_invocations,
+               COUNT(*) FILTER (WHERE success) AS successes,
+               COUNT(*) FILTER (WHERE NOT success) AS failures,
+               ROUND(AVG(duration_ms)::numeric) AS avg_duration_ms,
+               ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY duration_ms)::numeric) AS p95_duration_ms,
+               ROUND(AVG(confidence)::numeric, 4) AS avg_confidence,
+               ROUND(AVG(retry_count)::numeric, 2) AS avg_retries,
+               SUM(input_tokens) AS total_input_tokens,
+               SUM(output_tokens) AS total_output_tokens
+           FROM agent_performance_log
+           WHERE agent_id = $1
+             AND recorded_at >= NOW() - make_interval(hours => $2)""",
+        agent_id, hours,
+    )
+    d = _row_to_dict(row) if row else {}
+    total = int(d.get("total_invocations", 0) or 0)
+    successes = int(d.get("successes", 0) or 0)
+    d["success_rate"] = round(successes / total, 4) if total > 0 else None
+    return d
+
+
+@app.get("/api/agents/{agent_id}/performance/history")
+async def get_agent_performance_history(
+    agent_id: str,
+    hours: int = Query(default=24, le=720),
+):
+    """Get hourly performance time-series for an agent."""
+    rows = await pool.fetch(
+        """SELECT
+               date_trunc('hour', recorded_at) AS hour,
+               COUNT(*) AS invocations,
+               COUNT(*) FILTER (WHERE success) AS successes,
+               ROUND(AVG(duration_ms)::numeric) AS avg_duration_ms,
+               ROUND(AVG(confidence)::numeric, 4) AS avg_confidence
+           FROM agent_performance_log
+           WHERE agent_id = $1
+             AND recorded_at >= NOW() - make_interval(hours => $2)
+           GROUP BY 1 ORDER BY 1""",
+        agent_id, hours,
+    )
+    return [_row_to_dict(r) for r in rows]