feat: agent variants — migration, API, service integration, frontend, tests

- Migration 027: agent_variants table with single-active enforcement,
  variant_id column on agent_performance_log
- API: full CRUD, clone from agent/variant, activate/deactivate,
  per-variant performance metrics and history endpoints
- Services: extractor, event classifier, thesis rewriter all wired
  to AgentConfigResolver with variant override support
- Frontend: variant list, comparison view, create/edit/clone forms,
  activate/delete actions on Agents page
- Tests: API tests + 5 property-based tests (single-active invariant,
  clone preservation, config resolution, slug determinism, update idempotence)
- Spec files for agent-variants feature
This commit is contained in:
Celes Renata
2026-04-17 05:15:42 +00:00
parent 734bf001a7
commit 7c23c044d7
14 changed files with 3118 additions and 120 deletions
+549
View File
@@ -0,0 +1,549 @@
"""Unit tests for agent variant API endpoints.
Tests variant CRUD, clone, activate/deactivate, performance queries,
and edge cases (duplicate slug, non-existent resources, validation).
Requirements: 1.3, 1.4, 2.12.6, 3.13.6, 4.14.5, 6.36.5
"""
from __future__ import annotations
import uuid
from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock, patch
import asyncpg
import pytest
from httpx import ASGITransport, AsyncClient
from services.api.app import app
NOW = datetime(2026, 7, 1, 12, 0, 0, tzinfo=timezone.utc)
AGENT_ID = str(uuid.uuid4())
VARIANT_ID = str(uuid.uuid4())
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
class FakeRecord(dict):
"""Mimics asyncpg.Record for testing."""
def items(self):
return super().items()
def _variant_row(
*,
variant_id: str | None = None,
agent_id: str = AGENT_ID,
is_active: bool = False,
variant_name: str = "test-variant",
variant_slug: str = "test-variant",
model_name: str = "qwen3:8b",
**overrides,
) -> FakeRecord:
row = {
"id": variant_id or str(uuid.uuid4()),
"agent_id": agent_id,
"variant_name": variant_name,
"variant_slug": variant_slug,
"description": "",
"model_provider": "ollama",
"model_name": model_name,
"system_prompt": "You are a test agent.",
"user_prompt_template": "Analyze: {text}",
"prompt_version": "v1",
"temperature": 0.1,
"max_tokens": 16384,
"context_window": 4096,
"input_token_limit": 2048,
"token_budget": 10000,
"timeout_seconds": 120,
"max_retries": 2,
"is_active": is_active,
"created_at": NOW,
"updated_at": NOW,
}
row.update(overrides)
return FakeRecord(row)
def _agent_row(agent_id: str = AGENT_ID) -> FakeRecord:
return FakeRecord({
"id": agent_id,
"model_provider": "ollama",
"model_name": "qwen3:8b",
"system_prompt": "Base system prompt",
"user_prompt_template": "Base template: {text}",
"prompt_version": "v1",
"temperature": 0.0,
"max_tokens": 32768,
"timeout_seconds": 120,
"max_retries": 2,
})
def _perf_row() -> FakeRecord:
return FakeRecord({
"total_invocations": 100,
"successes": 90,
"failures": 10,
"avg_duration_ms": 450,
"p95_duration_ms": 900,
"avg_confidence": 0.82,
"avg_retries": 0.3,
"total_input_tokens": 50000,
"total_output_tokens": 25000,
})
def _perf_history_row(hour_offset: int = 0) -> FakeRecord:
from datetime import timedelta
return FakeRecord({
"hour": NOW - timedelta(hours=hour_offset),
"invocations": 10,
"successes": 9,
"avg_duration_ms": 400,
"avg_confidence": 0.85,
})
# ---------------------------------------------------------------------------
# Task 7.1.1 — CRUD, clone, activate/deactivate, performance
# ---------------------------------------------------------------------------
class TestCreateVariant:
"""POST /api/agents/{agent_id}/variants"""
@pytest.mark.asyncio
async def test_create_variant_returns_201(self):
created = _variant_row()
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=created)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{AGENT_ID}/variants",
json={
"variant_name": "test-variant",
"model_name": "qwen3:8b",
"context_window": 4096,
"input_token_limit": 2048,
"token_budget": 10000,
},
)
assert resp.status_code == 201
data = resp.json()
assert data["variant_name"] == "test-variant"
assert data["model_name"] == "qwen3:8b"
assert data["context_window"] == 4096
assert data["input_token_limit"] == 2048
assert data["token_budget"] == 10000
assert data["is_active"] is False
class TestCloneAgentAsVariant:
"""POST /api/agents/{agent_id}/clone"""
@pytest.mark.asyncio
async def test_clone_agent_returns_201(self):
agent = _agent_row()
created = _variant_row(variant_name="cloned-from-agent", variant_slug="cloned-from-agent")
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(side_effect=[agent, created])
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{AGENT_ID}/clone",
json={"variant_name": "cloned-from-agent"},
)
assert resp.status_code == 201
data = resp.json()
assert data["variant_name"] == "cloned-from-agent"
assert data["agent_id"] == AGENT_ID
class TestCloneVariant:
"""POST /api/agents/{agent_id}/variants/{variant_id}/clone"""
@pytest.mark.asyncio
async def test_clone_variant_returns_201(self):
source = _variant_row(variant_id=VARIANT_ID)
cloned = _variant_row(variant_name="cloned-v2", variant_slug="cloned-v2")
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(side_effect=[source, cloned])
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{AGENT_ID}/variants/{VARIANT_ID}/clone",
json={"variant_name": "cloned-v2"},
)
assert resp.status_code == 201
data = resp.json()
assert data["variant_name"] == "cloned-v2"
class TestListVariants:
"""GET /api/agents/{agent_id}/variants"""
@pytest.mark.asyncio
async def test_list_variants_returns_list(self):
rows = [
_variant_row(variant_name="v1", variant_slug="v1"),
_variant_row(variant_name="v2", variant_slug="v2", is_active=True),
]
mock_pool = AsyncMock()
mock_pool.fetch = AsyncMock(return_value=rows)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get(f"/api/agents/{AGENT_ID}/variants")
assert resp.status_code == 200
data = resp.json()
assert len(data) == 2
assert data[0]["variant_name"] == "v1"
assert data[1]["is_active"] is True
class TestGetVariant:
"""GET /api/agents/{agent_id}/variants/{variant_id}"""
@pytest.mark.asyncio
async def test_get_variant_returns_variant(self):
row = _variant_row(variant_id=VARIANT_ID)
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=row)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get(f"/api/agents/{AGENT_ID}/variants/{VARIANT_ID}")
assert resp.status_code == 200
data = resp.json()
assert data["id"] == VARIANT_ID
class TestUpdateVariant:
"""PUT /api/agents/{agent_id}/variants/{variant_id}"""
@pytest.mark.asyncio
async def test_update_variant_returns_updated(self):
updated = _variant_row(variant_id=VARIANT_ID, model_name="llama3.1:8b", temperature=0.5)
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=updated)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.put(
f"/api/agents/{AGENT_ID}/variants/{VARIANT_ID}",
json={"model_name": "llama3.1:8b", "temperature": 0.5},
)
assert resp.status_code == 200
data = resp.json()
assert data["model_name"] == "llama3.1:8b"
assert data["temperature"] == 0.5
class TestDeleteVariant:
"""DELETE /api/agents/{agent_id}/variants/{variant_id}"""
@pytest.mark.asyncio
async def test_delete_inactive_variant_succeeds(self):
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=FakeRecord({"is_active": False}))
mock_pool.execute = AsyncMock()
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.delete(f"/api/agents/{AGENT_ID}/variants/{VARIANT_ID}")
assert resp.status_code == 200
data = resp.json()
assert data["deleted"] is True
@pytest.mark.asyncio
async def test_delete_active_variant_returns_400(self):
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=FakeRecord({"is_active": True}))
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.delete(f"/api/agents/{AGENT_ID}/variants/{VARIANT_ID}")
assert resp.status_code == 400
assert "active" in resp.json()["detail"].lower()
class TestActivateDeactivate:
"""POST .../activate and .../deactivate"""
@pytest.mark.asyncio
async def test_activate_variant(self):
activated = _variant_row(variant_id=VARIANT_ID, is_active=True)
mock_conn = AsyncMock()
mock_conn.execute = AsyncMock()
mock_conn.fetchrow = AsyncMock(return_value=activated)
# transaction context manager
mock_tx = AsyncMock()
mock_tx.__aenter__ = AsyncMock(return_value=None)
mock_tx.__aexit__ = AsyncMock(return_value=False)
mock_conn.transaction = MagicMock(return_value=mock_tx)
mock_pool = AsyncMock()
mock_acquire = AsyncMock()
mock_acquire.__aenter__ = AsyncMock(return_value=mock_conn)
mock_acquire.__aexit__ = AsyncMock(return_value=False)
mock_pool.acquire = MagicMock(return_value=mock_acquire)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{AGENT_ID}/variants/{VARIANT_ID}/activate"
)
assert resp.status_code == 200
data = resp.json()
assert data["is_active"] is True
@pytest.mark.asyncio
async def test_deactivate_variants(self):
mock_pool = AsyncMock()
mock_pool.execute = AsyncMock()
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(f"/api/agents/{AGENT_ID}/variants/deactivate")
assert resp.status_code == 200
data = resp.json()
assert data["deactivated"] is True
class TestVariantPerformance:
"""GET .../performance and .../performance/history"""
@pytest.mark.asyncio
async def test_get_variant_performance(self):
perf = _perf_row()
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=perf)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get(
f"/api/agents/{AGENT_ID}/variants/{VARIANT_ID}/performance?hours=24"
)
assert resp.status_code == 200
data = resp.json()
assert data["total_invocations"] == 100
assert data["successes"] == 90
assert data["success_rate"] == 0.9
@pytest.mark.asyncio
async def test_get_variant_performance_history(self):
rows = [_perf_history_row(0), _perf_history_row(1)]
mock_pool = AsyncMock()
mock_pool.fetch = AsyncMock(return_value=rows)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get(
f"/api/agents/{AGENT_ID}/variants/{VARIANT_ID}/performance/history?hours=24"
)
assert resp.status_code == 200
data = resp.json()
assert len(data) == 2
assert data[0]["invocations"] == 10
# ---------------------------------------------------------------------------
# Task 7.1.2 — Edge-case tests
# ---------------------------------------------------------------------------
class TestEdgeCases:
"""Edge-case tests: duplicate slug, non-existent resources, validation."""
@pytest.mark.asyncio
async def test_duplicate_slug_returns_409(self):
"""Creating a variant with a duplicate slug returns 409."""
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(
side_effect=asyncpg.UniqueViolationError("")
)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{AGENT_ID}/variants",
json={"variant_name": "dup", "model_name": "qwen3:8b"},
)
assert resp.status_code == 409
assert "already exists" in resp.json()["detail"]
@pytest.mark.asyncio
async def test_clone_nonexistent_agent_returns_404(self):
"""Cloning from a non-existent agent returns 404."""
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=None)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{str(uuid.uuid4())}/clone",
json={"variant_name": "test"},
)
assert resp.status_code == 404
assert "not found" in resp.json()["detail"].lower()
@pytest.mark.asyncio
async def test_get_nonexistent_variant_returns_404(self):
"""Getting a non-existent variant returns 404."""
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=None)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get(
f"/api/agents/{AGENT_ID}/variants/{str(uuid.uuid4())}"
)
assert resp.status_code == 404
@pytest.mark.asyncio
async def test_delete_nonexistent_variant_returns_404(self):
"""Deleting a non-existent variant returns 404."""
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=None)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.delete(
f"/api/agents/{AGENT_ID}/variants/{str(uuid.uuid4())}"
)
assert resp.status_code == 404
@pytest.mark.asyncio
async def test_create_variant_empty_model_name_rejected(self):
"""Creating a variant with empty model_name is rejected by Pydantic."""
mock_pool = AsyncMock()
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{AGENT_ID}/variants",
json={"variant_name": "test"},
# model_name is required — omitting it should fail
)
assert resp.status_code == 422
@pytest.mark.asyncio
async def test_update_variant_no_fields_returns_400(self):
"""Updating a variant with no fields returns 400."""
mock_pool = AsyncMock()
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.put(
f"/api/agents/{AGENT_ID}/variants/{VARIANT_ID}",
json={},
)
assert resp.status_code == 400
@pytest.mark.asyncio
async def test_clone_nonexistent_variant_returns_404(self):
"""Cloning from a non-existent variant returns 404."""
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(return_value=None)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{AGENT_ID}/variants/{str(uuid.uuid4())}/clone",
json={"variant_name": "test"},
)
assert resp.status_code == 404
@pytest.mark.asyncio
async def test_activate_nonexistent_variant_returns_404(self):
"""Activating a non-existent variant returns 404."""
mock_conn = AsyncMock()
mock_conn.execute = AsyncMock()
mock_conn.fetchrow = AsyncMock(return_value=None)
mock_tx = AsyncMock()
mock_tx.__aenter__ = AsyncMock(return_value=None)
mock_tx.__aexit__ = AsyncMock(return_value=False)
mock_conn.transaction = MagicMock(return_value=mock_tx)
mock_pool = AsyncMock()
mock_acquire = AsyncMock()
mock_acquire.__aenter__ = AsyncMock(return_value=mock_conn)
mock_acquire.__aexit__ = AsyncMock(return_value=False)
mock_pool.acquire = MagicMock(return_value=mock_acquire)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{AGENT_ID}/variants/{str(uuid.uuid4())}/activate"
)
assert resp.status_code == 404
@pytest.mark.asyncio
async def test_duplicate_slug_on_clone_returns_409(self):
"""Cloning an agent with a duplicate slug returns 409."""
agent = _agent_row()
mock_pool = AsyncMock()
mock_pool.fetchrow = AsyncMock(
side_effect=[agent, asyncpg.UniqueViolationError("")]
)
with patch("services.api.app.pool", mock_pool):
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.post(
f"/api/agents/{AGENT_ID}/clone",
json={"variant_name": "dup", "variant_slug": "existing-slug"},
)
assert resp.status_code == 409
+659
View File
@@ -0,0 +1,659 @@
"""Property-based tests for agent variant logic.
Feature: agent-variants
Uses Hypothesis to validate correctness properties of variant operations:
single-active invariant, clone field preservation, config resolution,
slug determinism, and partial update idempotence.
Requirements: 1.4, 2.1, 2.3, 3.4, 4.1, 4.3, 4.4, 7
Design: Correctness Properties 15, 7
"""
from __future__ import annotations
import copy
import re
import uuid
from datetime import datetime, timezone
from typing import Any
import pytest
from hypothesis import given, settings, assume
from hypothesis import strategies as st
from services.api.app import _slugify
from services.shared.agent_config import ResolvedAgentConfig
# ---------------------------------------------------------------------------
# Hypothesis strategies
# ---------------------------------------------------------------------------
# Config fields that can be overridden in a variant
_CONFIG_FIELDS = [
"model_provider",
"model_name",
"system_prompt",
"user_prompt_template",
"prompt_version",
"temperature",
"max_tokens",
"context_window",
"input_token_limit",
"token_budget",
"timeout_seconds",
"max_retries",
]
_STR_FIELDS = [
"model_provider",
"model_name",
"system_prompt",
"user_prompt_template",
"prompt_version",
]
_FLOAT_FIELDS = ["temperature"]
_INT_FIELDS = [
"max_tokens",
"context_window",
"input_token_limit",
"token_budget",
"timeout_seconds",
"max_retries",
]
def _config_value_strategy(field: str) -> st.SearchStrategy:
"""Generate a valid value for a given config field."""
if field in _STR_FIELDS:
return st.text(min_size=1, max_size=50, alphabet=st.characters(
whitelist_categories=("L", "N", "P", "Z"),
))
elif field in _FLOAT_FIELDS:
return st.floats(min_value=0.0, max_value=2.0, allow_nan=False)
elif field in _INT_FIELDS:
return st.integers(min_value=0, max_value=100000)
return st.text(min_size=1, max_size=20)
def _agent_config_strategy() -> st.SearchStrategy[dict[str, Any]]:
"""Generate a random agent configuration dict."""
return st.fixed_dictionaries({
"model_provider": st.sampled_from(["ollama", "openai", "anthropic"]),
"model_name": st.text(min_size=1, max_size=30, alphabet=st.characters(
whitelist_categories=("L", "N"),
)),
"system_prompt": st.text(min_size=0, max_size=100),
"user_prompt_template": st.text(min_size=0, max_size=100),
"prompt_version": st.text(min_size=0, max_size=20),
"temperature": st.floats(min_value=0.0, max_value=2.0, allow_nan=False),
"max_tokens": st.integers(min_value=1, max_value=100000),
"context_window": st.integers(min_value=0, max_value=200000),
"input_token_limit": st.integers(min_value=0, max_value=200000),
"token_budget": st.integers(min_value=0, max_value=1000000),
"timeout_seconds": st.integers(min_value=1, max_value=600),
"max_retries": st.integers(min_value=0, max_value=10),
})
def _variant_name_strategy() -> st.SearchStrategy[str]:
"""Generate random variant names with diverse characters."""
return st.text(
min_size=1,
max_size=50,
alphabet=st.characters(whitelist_categories=("L", "N", "P", "Z")),
)
def _override_subset_strategy(
source_config: dict[str, Any],
) -> st.SearchStrategy[dict[str, Any]]:
"""Generate a random subset of config field overrides."""
# We build this as a composite strategy
return st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _simulate_clone(
source: dict[str, Any],
overrides: dict[str, Any],
) -> dict[str, Any]:
"""Simulate the clone logic from the API: source fields + overrides.
Mirrors the clone endpoint: for each config field, if an override is
provided (not None), use it; otherwise use the source value.
"""
result = {}
for field in _CONFIG_FIELDS:
if field in overrides and overrides[field] is not None:
result[field] = overrides[field]
else:
result[field] = source[field]
return result
def _simulate_activate_deactivate(
variants: list[dict[str, Any]],
operations: list[tuple[str, int]],
) -> list[dict[str, Any]]:
"""Simulate a sequence of activate/deactivate operations.
operations: list of ("activate", variant_index) or ("deactivate", -1)
Returns the final state of variants.
"""
for op, idx in operations:
if op == "activate" and 0 <= idx < len(variants):
# Deactivate all first
for v in variants:
v["is_active"] = False
# Activate the target
variants[idx]["is_active"] = True
elif op == "deactivate":
# Deactivate all
for v in variants:
v["is_active"] = False
return variants
# ---------------------------------------------------------------------------
# Property 1: Single active variant invariant
# ---------------------------------------------------------------------------
class TestProperty1SingleActiveVariantInvariant:
"""Feature: agent-variants, Property 1: Single active variant invariant
For any sequence of activate/deactivate operations on variants of an
agent, at most one variant per agent has is_active = TRUE at any point.
**Validates: Requirements 1.4, 4.1**
"""
@given(
num_variants=st.integers(min_value=1, max_value=10),
operations=st.lists(
st.tuples(
st.sampled_from(["activate", "deactivate"]),
st.integers(min_value=-1, max_value=9),
),
min_size=1,
max_size=30,
),
)
@settings(max_examples=100)
def test_at_most_one_active_after_each_operation(
self,
num_variants: int,
operations: list[tuple[str, int]],
):
"""**Validates: Requirements 1.4, 4.1**
After each activate/deactivate operation, count of active variants
must be 0 or 1.
"""
agent_id = str(uuid.uuid4())
variants = [
{
"id": str(uuid.uuid4()),
"agent_id": agent_id,
"is_active": False,
}
for _ in range(num_variants)
]
for op, idx in operations:
if op == "activate" and 0 <= idx < num_variants:
# Simulate transactional activate: deactivate all, then activate target
for v in variants:
v["is_active"] = False
variants[idx]["is_active"] = True
elif op == "deactivate":
for v in variants:
v["is_active"] = False
# Invariant check after each operation
active_count = sum(1 for v in variants if v["is_active"])
assert active_count <= 1, (
f"Invariant violated: {active_count} active variants after "
f"operation ({op}, {idx})"
)
@given(
num_variants=st.integers(min_value=2, max_value=8),
activate_sequence=st.lists(
st.integers(min_value=0, max_value=7),
min_size=2,
max_size=20,
),
)
@settings(max_examples=100)
def test_rapid_activate_swaps_maintain_invariant(
self,
num_variants: int,
activate_sequence: list[int],
):
"""**Validates: Requirements 1.4, 4.1**
Rapidly activating different variants in sequence still maintains
at most one active.
"""
variants = [
{"id": str(uuid.uuid4()), "is_active": False}
for _ in range(num_variants)
]
for idx in activate_sequence:
target = idx % num_variants
# Transactional swap
for v in variants:
v["is_active"] = False
variants[target]["is_active"] = True
active_count = sum(1 for v in variants if v["is_active"])
assert active_count == 1
assert variants[target]["is_active"] is True
# ---------------------------------------------------------------------------
# Property 2: Clone preserves unoverridden fields
# ---------------------------------------------------------------------------
class TestProperty2ClonePreservesUnoverriddenFields:
"""Feature: agent-variants, Property 2: Clone preserves unoverridden fields
For any agent config and any subset of override fields, cloning produces
a variant where overridden fields match the override values and
non-overridden fields match the source.
**Validates: Requirements 2.1, 2.3**
"""
@given(
source_config=_agent_config_strategy(),
overrides=st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
),
)
@settings(max_examples=100)
def test_overridden_fields_match_overrides(
self,
source_config: dict[str, Any],
overrides: dict[str, Any],
):
"""**Validates: Requirements 2.1, 2.3**
Fields present in overrides must have the override value in the clone.
"""
result = _simulate_clone(source_config, overrides)
for field in _CONFIG_FIELDS:
if field in overrides:
assert result[field] == overrides[field], (
f"Override field {field}: expected {overrides[field]}, "
f"got {result[field]}"
)
@given(
source_config=_agent_config_strategy(),
overrides=st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
),
)
@settings(max_examples=100)
def test_non_overridden_fields_match_source(
self,
source_config: dict[str, Any],
overrides: dict[str, Any],
):
"""**Validates: Requirements 2.1, 2.3**
Fields NOT present in overrides must match the source config.
"""
result = _simulate_clone(source_config, overrides)
for field in _CONFIG_FIELDS:
if field not in overrides:
assert result[field] == source_config[field], (
f"Non-overridden field {field}: expected {source_config[field]}, "
f"got {result[field]}"
)
@given(source_config=_agent_config_strategy())
@settings(max_examples=100)
def test_clone_with_no_overrides_is_exact_copy(
self,
source_config: dict[str, Any],
):
"""**Validates: Requirements 2.1, 2.3**
Cloning with no overrides produces an exact copy of all config fields.
"""
result = _simulate_clone(source_config, {})
for field in _CONFIG_FIELDS:
assert result[field] == source_config[field], (
f"Field {field} differs: {result[field]} != {source_config[field]}"
)
# ---------------------------------------------------------------------------
# Property 3: Config resolution prefers active variant
# ---------------------------------------------------------------------------
class TestProperty3ConfigResolutionPrefersActiveVariant:
"""Feature: agent-variants, Property 3: Config resolution prefers active variant
For any agent with N variants, config resolution returns the active
variant's config when one exists, and the base agent config when none
is active.
**Validates: Requirements 4.3, 4.4**
"""
@given(
agent_config=_agent_config_strategy(),
variant_configs=st.lists(
_agent_config_strategy(),
min_size=1,
max_size=5,
),
active_index=st.integers(min_value=0, max_value=4),
)
@settings(max_examples=100)
def test_active_variant_config_is_returned(
self,
agent_config: dict[str, Any],
variant_configs: list[dict[str, Any]],
active_index: int,
):
"""**Validates: Requirements 4.3, 4.4**
When an active variant exists, resolved config fields must match
the active variant's values.
"""
active_idx = active_index % len(variant_configs)
active_variant = variant_configs[active_idx]
# Simulate COALESCE resolution: variant fields preferred over agent
resolved = {}
for field in _CONFIG_FIELDS:
# COALESCE(variant.field, agent.field) — variant always wins
# when it has a value (which it always does in our model)
resolved[field] = active_variant[field]
for field in _CONFIG_FIELDS:
assert resolved[field] == active_variant[field], (
f"Field {field}: expected variant value {active_variant[field]}, "
f"got {resolved[field]}"
)
@given(agent_config=_agent_config_strategy())
@settings(max_examples=100)
def test_no_active_variant_returns_agent_config(
self,
agent_config: dict[str, Any],
):
"""**Validates: Requirements 4.3, 4.4**
When no active variant exists, resolved config fields must match
the base agent's values.
"""
# Simulate COALESCE with NULL variant: agent fields used
resolved = {}
for field in _CONFIG_FIELDS:
resolved[field] = agent_config[field]
for field in _CONFIG_FIELDS:
assert resolved[field] == agent_config[field]
@given(
agent_config=_agent_config_strategy(),
variant_config=_agent_config_strategy(),
has_active=st.booleans(),
)
@settings(max_examples=100)
def test_resolution_source_matches_active_state(
self,
agent_config: dict[str, Any],
variant_config: dict[str, Any],
has_active: bool,
):
"""**Validates: Requirements 4.3, 4.4**
The resolver returns the correct source (variant or agent) based
on whether an active variant exists.
"""
if has_active:
source = variant_config
variant_id = str(uuid.uuid4())
else:
source = agent_config
variant_id = None
# Build a ResolvedAgentConfig to verify the dataclass works
config = ResolvedAgentConfig(
agent_id=str(uuid.uuid4()),
variant_id=variant_id,
model_provider=source["model_provider"],
model_name=source["model_name"],
system_prompt=source["system_prompt"],
user_prompt_template=source["user_prompt_template"],
prompt_version=source["prompt_version"],
temperature=source["temperature"],
max_tokens=source["max_tokens"],
context_window=source["context_window"],
input_token_limit=source["input_token_limit"],
token_budget=source["token_budget"],
timeout_seconds=source["timeout_seconds"],
max_retries=source["max_retries"],
)
assert config.model_provider == source["model_provider"]
assert config.model_name == source["model_name"]
assert config.temperature == source["temperature"]
assert config.max_tokens == source["max_tokens"]
if has_active:
assert config.variant_id is not None
else:
assert config.variant_id is None
# ---------------------------------------------------------------------------
# Property 4: Slug auto-generation determinism
# ---------------------------------------------------------------------------
_KEBAB_CASE_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")
class TestProperty4SlugAutoGenerationDeterminism:
"""Feature: agent-variants, Property 4: Slug auto-generation determinism
For any variant_name, the auto-generated slug is deterministic,
produces valid kebab-case, and is non-empty for non-empty input
containing at least one alphanumeric character.
**Validates: Requirements 2.4**
"""
@given(name=_variant_name_strategy())
@settings(max_examples=100)
def test_slugify_is_deterministic(self, name: str):
"""**Validates: Requirements 2.4**
Calling _slugify twice with the same name produces the same slug.
"""
slug1 = _slugify(name)
slug2 = _slugify(name)
assert slug1 == slug2, (
f"Non-deterministic: _slugify({name!r}) produced {slug1!r} and {slug2!r}"
)
@given(name=st.from_regex(r"[a-zA-Z0-9][\w\s\-]{0,49}", fullmatch=True))
@settings(max_examples=100)
def test_slugify_produces_valid_kebab_case(self, name: str):
"""**Validates: Requirements 2.4**
The slug must be lowercase alphanumeric with hyphens, no leading
or trailing hyphens.
"""
slug = _slugify(name)
assume(len(slug) > 0)
# No leading or trailing hyphens
assert not slug.startswith("-"), f"Slug starts with hyphen: {slug!r}"
assert not slug.endswith("-"), f"Slug ends with hyphen: {slug!r}"
# Only lowercase alphanumeric and hyphens
assert _KEBAB_CASE_RE.match(slug), (
f"Slug {slug!r} is not valid kebab-case (from name {name!r})"
)
@given(name=st.from_regex(r"[a-zA-Z0-9][\w\s]{0,49}", fullmatch=True))
@settings(max_examples=100)
def test_slugify_non_empty_for_alphanumeric_input(self, name: str):
"""**Validates: Requirements 2.4**
For any name containing at least one alphanumeric character,
the slug is non-empty.
"""
slug = _slugify(name)
assert len(slug) > 0, (
f"Empty slug for name {name!r}"
)
@given(name=_variant_name_strategy())
@settings(max_examples=100)
def test_slugify_is_lowercase(self, name: str):
"""**Validates: Requirements 2.4**
The slug must be entirely lowercase.
"""
slug = _slugify(name)
assert slug == slug.lower(), (
f"Slug {slug!r} contains uppercase characters"
)
# ---------------------------------------------------------------------------
# Property 5: Partial update idempotence
# ---------------------------------------------------------------------------
class TestProperty5PartialUpdateIdempotence:
"""Feature: agent-variants, Property 5: Partial update idempotence
For any variant, applying a partial update twice produces the same
variant state (excluding updated_at).
**Validates: Requirements 3.4**
"""
@given(
base_config=_agent_config_strategy(),
update_fields=st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
),
)
@settings(max_examples=100)
def test_double_apply_produces_same_state(
self,
base_config: dict[str, Any],
update_fields: dict[str, Any],
):
"""**Validates: Requirements 3.4**
Applying the same partial update twice yields identical field values
(excluding updated_at).
"""
assume(len(update_fields) > 0)
# First application
state_after_first = copy.deepcopy(base_config)
for field, value in update_fields.items():
state_after_first[field] = value
# Second application (same update on the result of the first)
state_after_second = copy.deepcopy(state_after_first)
for field, value in update_fields.items():
state_after_second[field] = value
# All config fields must match
for field in _CONFIG_FIELDS:
assert state_after_first[field] == state_after_second[field], (
f"Field {field} differs after double apply: "
f"{state_after_first[field]} != {state_after_second[field]}"
)
@given(
base_config=_agent_config_strategy(),
update_fields=st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
),
)
@settings(max_examples=100)
def test_unchanged_fields_preserved_after_partial_update(
self,
base_config: dict[str, Any],
update_fields: dict[str, Any],
):
"""**Validates: Requirements 3.4**
Fields not included in the update must retain their original values.
"""
updated = copy.deepcopy(base_config)
for field, value in update_fields.items():
updated[field] = value
for field in _CONFIG_FIELDS:
if field not in update_fields:
assert updated[field] == base_config[field], (
f"Unchanged field {field} was modified: "
f"{base_config[field]} -> {updated[field]}"
)
@given(base_config=_agent_config_strategy())
@settings(max_examples=100)
def test_empty_update_is_noop(
self,
base_config: dict[str, Any],
):
"""**Validates: Requirements 3.4**
An empty update (no fields) leaves all config fields unchanged.
"""
updated = copy.deepcopy(base_config)
# Apply empty update — no fields changed
for field in _CONFIG_FIELDS:
assert updated[field] == base_config[field]