7c23c044d7
- Migration 027: agent_variants table with single-active enforcement, variant_id column on agent_performance_log - API: full CRUD, clone from agent/variant, activate/deactivate, per-variant performance metrics and history endpoints - Services: extractor, event classifier, thesis rewriter all wired to AgentConfigResolver with variant override support - Frontend: variant list, comparison view, create/edit/clone forms, activate/delete actions on Agents page - Tests: API tests + 5 property-based tests (single-active invariant, clone preservation, config resolution, slug determinism, update idempotence) - Spec files for agent-variants feature
660 lines
21 KiB
Python
660 lines
21 KiB
Python
"""Property-based tests for agent variant logic.
|
||
|
||
Feature: agent-variants
|
||
|
||
Uses Hypothesis to validate correctness properties of variant operations:
|
||
single-active invariant, clone field preservation, config resolution,
|
||
slug determinism, and partial update idempotence.
|
||
|
||
Requirements: 1.4, 2.1, 2.3, 3.4, 4.1, 4.3, 4.4, 7
|
||
Design: Correctness Properties 1–5, 7
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import copy
|
||
import re
|
||
import uuid
|
||
from datetime import datetime, timezone
|
||
from typing import Any
|
||
|
||
import pytest
|
||
from hypothesis import given, settings, assume
|
||
from hypothesis import strategies as st
|
||
|
||
from services.api.app import _slugify
|
||
from services.shared.agent_config import ResolvedAgentConfig
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Hypothesis strategies
|
||
# ---------------------------------------------------------------------------
|
||
|
||
# Config fields that can be overridden in a variant
|
||
_CONFIG_FIELDS = [
|
||
"model_provider",
|
||
"model_name",
|
||
"system_prompt",
|
||
"user_prompt_template",
|
||
"prompt_version",
|
||
"temperature",
|
||
"max_tokens",
|
||
"context_window",
|
||
"input_token_limit",
|
||
"token_budget",
|
||
"timeout_seconds",
|
||
"max_retries",
|
||
]
|
||
|
||
_STR_FIELDS = [
|
||
"model_provider",
|
||
"model_name",
|
||
"system_prompt",
|
||
"user_prompt_template",
|
||
"prompt_version",
|
||
]
|
||
|
||
_FLOAT_FIELDS = ["temperature"]
|
||
|
||
_INT_FIELDS = [
|
||
"max_tokens",
|
||
"context_window",
|
||
"input_token_limit",
|
||
"token_budget",
|
||
"timeout_seconds",
|
||
"max_retries",
|
||
]
|
||
|
||
|
||
def _config_value_strategy(field: str) -> st.SearchStrategy:
|
||
"""Generate a valid value for a given config field."""
|
||
if field in _STR_FIELDS:
|
||
return st.text(min_size=1, max_size=50, alphabet=st.characters(
|
||
whitelist_categories=("L", "N", "P", "Z"),
|
||
))
|
||
elif field in _FLOAT_FIELDS:
|
||
return st.floats(min_value=0.0, max_value=2.0, allow_nan=False)
|
||
elif field in _INT_FIELDS:
|
||
return st.integers(min_value=0, max_value=100000)
|
||
return st.text(min_size=1, max_size=20)
|
||
|
||
|
||
def _agent_config_strategy() -> st.SearchStrategy[dict[str, Any]]:
|
||
"""Generate a random agent configuration dict."""
|
||
return st.fixed_dictionaries({
|
||
"model_provider": st.sampled_from(["ollama", "openai", "anthropic"]),
|
||
"model_name": st.text(min_size=1, max_size=30, alphabet=st.characters(
|
||
whitelist_categories=("L", "N"),
|
||
)),
|
||
"system_prompt": st.text(min_size=0, max_size=100),
|
||
"user_prompt_template": st.text(min_size=0, max_size=100),
|
||
"prompt_version": st.text(min_size=0, max_size=20),
|
||
"temperature": st.floats(min_value=0.0, max_value=2.0, allow_nan=False),
|
||
"max_tokens": st.integers(min_value=1, max_value=100000),
|
||
"context_window": st.integers(min_value=0, max_value=200000),
|
||
"input_token_limit": st.integers(min_value=0, max_value=200000),
|
||
"token_budget": st.integers(min_value=0, max_value=1000000),
|
||
"timeout_seconds": st.integers(min_value=1, max_value=600),
|
||
"max_retries": st.integers(min_value=0, max_value=10),
|
||
})
|
||
|
||
|
||
def _variant_name_strategy() -> st.SearchStrategy[str]:
|
||
"""Generate random variant names with diverse characters."""
|
||
return st.text(
|
||
min_size=1,
|
||
max_size=50,
|
||
alphabet=st.characters(whitelist_categories=("L", "N", "P", "Z")),
|
||
)
|
||
|
||
|
||
def _override_subset_strategy(
|
||
source_config: dict[str, Any],
|
||
) -> st.SearchStrategy[dict[str, Any]]:
|
||
"""Generate a random subset of config field overrides."""
|
||
# We build this as a composite strategy
|
||
return st.fixed_dictionaries(
|
||
{},
|
||
optional={
|
||
field: _config_value_strategy(field)
|
||
for field in _CONFIG_FIELDS
|
||
},
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def _simulate_clone(
|
||
source: dict[str, Any],
|
||
overrides: dict[str, Any],
|
||
) -> dict[str, Any]:
|
||
"""Simulate the clone logic from the API: source fields + overrides.
|
||
|
||
Mirrors the clone endpoint: for each config field, if an override is
|
||
provided (not None), use it; otherwise use the source value.
|
||
"""
|
||
result = {}
|
||
for field in _CONFIG_FIELDS:
|
||
if field in overrides and overrides[field] is not None:
|
||
result[field] = overrides[field]
|
||
else:
|
||
result[field] = source[field]
|
||
return result
|
||
|
||
|
||
def _simulate_activate_deactivate(
|
||
variants: list[dict[str, Any]],
|
||
operations: list[tuple[str, int]],
|
||
) -> list[dict[str, Any]]:
|
||
"""Simulate a sequence of activate/deactivate operations.
|
||
|
||
operations: list of ("activate", variant_index) or ("deactivate", -1)
|
||
Returns the final state of variants.
|
||
"""
|
||
for op, idx in operations:
|
||
if op == "activate" and 0 <= idx < len(variants):
|
||
# Deactivate all first
|
||
for v in variants:
|
||
v["is_active"] = False
|
||
# Activate the target
|
||
variants[idx]["is_active"] = True
|
||
elif op == "deactivate":
|
||
# Deactivate all
|
||
for v in variants:
|
||
v["is_active"] = False
|
||
return variants
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Property 1: Single active variant invariant
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestProperty1SingleActiveVariantInvariant:
|
||
"""Feature: agent-variants, Property 1: Single active variant invariant
|
||
|
||
For any sequence of activate/deactivate operations on variants of an
|
||
agent, at most one variant per agent has is_active = TRUE at any point.
|
||
|
||
**Validates: Requirements 1.4, 4.1**
|
||
"""
|
||
|
||
@given(
|
||
num_variants=st.integers(min_value=1, max_value=10),
|
||
operations=st.lists(
|
||
st.tuples(
|
||
st.sampled_from(["activate", "deactivate"]),
|
||
st.integers(min_value=-1, max_value=9),
|
||
),
|
||
min_size=1,
|
||
max_size=30,
|
||
),
|
||
)
|
||
@settings(max_examples=100)
|
||
def test_at_most_one_active_after_each_operation(
|
||
self,
|
||
num_variants: int,
|
||
operations: list[tuple[str, int]],
|
||
):
|
||
"""**Validates: Requirements 1.4, 4.1**
|
||
|
||
After each activate/deactivate operation, count of active variants
|
||
must be 0 or 1.
|
||
"""
|
||
agent_id = str(uuid.uuid4())
|
||
variants = [
|
||
{
|
||
"id": str(uuid.uuid4()),
|
||
"agent_id": agent_id,
|
||
"is_active": False,
|
||
}
|
||
for _ in range(num_variants)
|
||
]
|
||
|
||
for op, idx in operations:
|
||
if op == "activate" and 0 <= idx < num_variants:
|
||
# Simulate transactional activate: deactivate all, then activate target
|
||
for v in variants:
|
||
v["is_active"] = False
|
||
variants[idx]["is_active"] = True
|
||
elif op == "deactivate":
|
||
for v in variants:
|
||
v["is_active"] = False
|
||
|
||
# Invariant check after each operation
|
||
active_count = sum(1 for v in variants if v["is_active"])
|
||
assert active_count <= 1, (
|
||
f"Invariant violated: {active_count} active variants after "
|
||
f"operation ({op}, {idx})"
|
||
)
|
||
|
||
@given(
|
||
num_variants=st.integers(min_value=2, max_value=8),
|
||
activate_sequence=st.lists(
|
||
st.integers(min_value=0, max_value=7),
|
||
min_size=2,
|
||
max_size=20,
|
||
),
|
||
)
|
||
@settings(max_examples=100)
|
||
def test_rapid_activate_swaps_maintain_invariant(
|
||
self,
|
||
num_variants: int,
|
||
activate_sequence: list[int],
|
||
):
|
||
"""**Validates: Requirements 1.4, 4.1**
|
||
|
||
Rapidly activating different variants in sequence still maintains
|
||
at most one active.
|
||
"""
|
||
variants = [
|
||
{"id": str(uuid.uuid4()), "is_active": False}
|
||
for _ in range(num_variants)
|
||
]
|
||
|
||
for idx in activate_sequence:
|
||
target = idx % num_variants
|
||
# Transactional swap
|
||
for v in variants:
|
||
v["is_active"] = False
|
||
variants[target]["is_active"] = True
|
||
|
||
active_count = sum(1 for v in variants if v["is_active"])
|
||
assert active_count == 1
|
||
assert variants[target]["is_active"] is True
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Property 2: Clone preserves unoverridden fields
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestProperty2ClonePreservesUnoverriddenFields:
|
||
"""Feature: agent-variants, Property 2: Clone preserves unoverridden fields
|
||
|
||
For any agent config and any subset of override fields, cloning produces
|
||
a variant where overridden fields match the override values and
|
||
non-overridden fields match the source.
|
||
|
||
**Validates: Requirements 2.1, 2.3**
|
||
"""
|
||
|
||
@given(
|
||
source_config=_agent_config_strategy(),
|
||
overrides=st.fixed_dictionaries(
|
||
{},
|
||
optional={
|
||
field: _config_value_strategy(field)
|
||
for field in _CONFIG_FIELDS
|
||
},
|
||
),
|
||
)
|
||
@settings(max_examples=100)
|
||
def test_overridden_fields_match_overrides(
|
||
self,
|
||
source_config: dict[str, Any],
|
||
overrides: dict[str, Any],
|
||
):
|
||
"""**Validates: Requirements 2.1, 2.3**
|
||
|
||
Fields present in overrides must have the override value in the clone.
|
||
"""
|
||
result = _simulate_clone(source_config, overrides)
|
||
|
||
for field in _CONFIG_FIELDS:
|
||
if field in overrides:
|
||
assert result[field] == overrides[field], (
|
||
f"Override field {field}: expected {overrides[field]}, "
|
||
f"got {result[field]}"
|
||
)
|
||
|
||
@given(
|
||
source_config=_agent_config_strategy(),
|
||
overrides=st.fixed_dictionaries(
|
||
{},
|
||
optional={
|
||
field: _config_value_strategy(field)
|
||
for field in _CONFIG_FIELDS
|
||
},
|
||
),
|
||
)
|
||
@settings(max_examples=100)
|
||
def test_non_overridden_fields_match_source(
|
||
self,
|
||
source_config: dict[str, Any],
|
||
overrides: dict[str, Any],
|
||
):
|
||
"""**Validates: Requirements 2.1, 2.3**
|
||
|
||
Fields NOT present in overrides must match the source config.
|
||
"""
|
||
result = _simulate_clone(source_config, overrides)
|
||
|
||
for field in _CONFIG_FIELDS:
|
||
if field not in overrides:
|
||
assert result[field] == source_config[field], (
|
||
f"Non-overridden field {field}: expected {source_config[field]}, "
|
||
f"got {result[field]}"
|
||
)
|
||
|
||
@given(source_config=_agent_config_strategy())
|
||
@settings(max_examples=100)
|
||
def test_clone_with_no_overrides_is_exact_copy(
|
||
self,
|
||
source_config: dict[str, Any],
|
||
):
|
||
"""**Validates: Requirements 2.1, 2.3**
|
||
|
||
Cloning with no overrides produces an exact copy of all config fields.
|
||
"""
|
||
result = _simulate_clone(source_config, {})
|
||
|
||
for field in _CONFIG_FIELDS:
|
||
assert result[field] == source_config[field], (
|
||
f"Field {field} differs: {result[field]} != {source_config[field]}"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Property 3: Config resolution prefers active variant
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestProperty3ConfigResolutionPrefersActiveVariant:
|
||
"""Feature: agent-variants, Property 3: Config resolution prefers active variant
|
||
|
||
For any agent with N variants, config resolution returns the active
|
||
variant's config when one exists, and the base agent config when none
|
||
is active.
|
||
|
||
**Validates: Requirements 4.3, 4.4**
|
||
"""
|
||
|
||
@given(
|
||
agent_config=_agent_config_strategy(),
|
||
variant_configs=st.lists(
|
||
_agent_config_strategy(),
|
||
min_size=1,
|
||
max_size=5,
|
||
),
|
||
active_index=st.integers(min_value=0, max_value=4),
|
||
)
|
||
@settings(max_examples=100)
|
||
def test_active_variant_config_is_returned(
|
||
self,
|
||
agent_config: dict[str, Any],
|
||
variant_configs: list[dict[str, Any]],
|
||
active_index: int,
|
||
):
|
||
"""**Validates: Requirements 4.3, 4.4**
|
||
|
||
When an active variant exists, resolved config fields must match
|
||
the active variant's values.
|
||
"""
|
||
active_idx = active_index % len(variant_configs)
|
||
active_variant = variant_configs[active_idx]
|
||
|
||
# Simulate COALESCE resolution: variant fields preferred over agent
|
||
resolved = {}
|
||
for field in _CONFIG_FIELDS:
|
||
# COALESCE(variant.field, agent.field) — variant always wins
|
||
# when it has a value (which it always does in our model)
|
||
resolved[field] = active_variant[field]
|
||
|
||
for field in _CONFIG_FIELDS:
|
||
assert resolved[field] == active_variant[field], (
|
||
f"Field {field}: expected variant value {active_variant[field]}, "
|
||
f"got {resolved[field]}"
|
||
)
|
||
|
||
@given(agent_config=_agent_config_strategy())
|
||
@settings(max_examples=100)
|
||
def test_no_active_variant_returns_agent_config(
|
||
self,
|
||
agent_config: dict[str, Any],
|
||
):
|
||
"""**Validates: Requirements 4.3, 4.4**
|
||
|
||
When no active variant exists, resolved config fields must match
|
||
the base agent's values.
|
||
"""
|
||
# Simulate COALESCE with NULL variant: agent fields used
|
||
resolved = {}
|
||
for field in _CONFIG_FIELDS:
|
||
resolved[field] = agent_config[field]
|
||
|
||
for field in _CONFIG_FIELDS:
|
||
assert resolved[field] == agent_config[field]
|
||
|
||
@given(
|
||
agent_config=_agent_config_strategy(),
|
||
variant_config=_agent_config_strategy(),
|
||
has_active=st.booleans(),
|
||
)
|
||
@settings(max_examples=100)
|
||
def test_resolution_source_matches_active_state(
|
||
self,
|
||
agent_config: dict[str, Any],
|
||
variant_config: dict[str, Any],
|
||
has_active: bool,
|
||
):
|
||
"""**Validates: Requirements 4.3, 4.4**
|
||
|
||
The resolver returns the correct source (variant or agent) based
|
||
on whether an active variant exists.
|
||
"""
|
||
if has_active:
|
||
source = variant_config
|
||
variant_id = str(uuid.uuid4())
|
||
else:
|
||
source = agent_config
|
||
variant_id = None
|
||
|
||
# Build a ResolvedAgentConfig to verify the dataclass works
|
||
config = ResolvedAgentConfig(
|
||
agent_id=str(uuid.uuid4()),
|
||
variant_id=variant_id,
|
||
model_provider=source["model_provider"],
|
||
model_name=source["model_name"],
|
||
system_prompt=source["system_prompt"],
|
||
user_prompt_template=source["user_prompt_template"],
|
||
prompt_version=source["prompt_version"],
|
||
temperature=source["temperature"],
|
||
max_tokens=source["max_tokens"],
|
||
context_window=source["context_window"],
|
||
input_token_limit=source["input_token_limit"],
|
||
token_budget=source["token_budget"],
|
||
timeout_seconds=source["timeout_seconds"],
|
||
max_retries=source["max_retries"],
|
||
)
|
||
|
||
assert config.model_provider == source["model_provider"]
|
||
assert config.model_name == source["model_name"]
|
||
assert config.temperature == source["temperature"]
|
||
assert config.max_tokens == source["max_tokens"]
|
||
|
||
if has_active:
|
||
assert config.variant_id is not None
|
||
else:
|
||
assert config.variant_id is None
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Property 4: Slug auto-generation determinism
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_KEBAB_CASE_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")
|
||
|
||
|
||
class TestProperty4SlugAutoGenerationDeterminism:
|
||
"""Feature: agent-variants, Property 4: Slug auto-generation determinism
|
||
|
||
For any variant_name, the auto-generated slug is deterministic,
|
||
produces valid kebab-case, and is non-empty for non-empty input
|
||
containing at least one alphanumeric character.
|
||
|
||
**Validates: Requirements 2.4**
|
||
"""
|
||
|
||
@given(name=_variant_name_strategy())
|
||
@settings(max_examples=100)
|
||
def test_slugify_is_deterministic(self, name: str):
|
||
"""**Validates: Requirements 2.4**
|
||
|
||
Calling _slugify twice with the same name produces the same slug.
|
||
"""
|
||
slug1 = _slugify(name)
|
||
slug2 = _slugify(name)
|
||
assert slug1 == slug2, (
|
||
f"Non-deterministic: _slugify({name!r}) produced {slug1!r} and {slug2!r}"
|
||
)
|
||
|
||
@given(name=st.from_regex(r"[a-zA-Z0-9][\w\s\-]{0,49}", fullmatch=True))
|
||
@settings(max_examples=100)
|
||
def test_slugify_produces_valid_kebab_case(self, name: str):
|
||
"""**Validates: Requirements 2.4**
|
||
|
||
The slug must be lowercase alphanumeric with hyphens, no leading
|
||
or trailing hyphens.
|
||
"""
|
||
slug = _slugify(name)
|
||
assume(len(slug) > 0)
|
||
|
||
# No leading or trailing hyphens
|
||
assert not slug.startswith("-"), f"Slug starts with hyphen: {slug!r}"
|
||
assert not slug.endswith("-"), f"Slug ends with hyphen: {slug!r}"
|
||
|
||
# Only lowercase alphanumeric and hyphens
|
||
assert _KEBAB_CASE_RE.match(slug), (
|
||
f"Slug {slug!r} is not valid kebab-case (from name {name!r})"
|
||
)
|
||
|
||
@given(name=st.from_regex(r"[a-zA-Z0-9][\w\s]{0,49}", fullmatch=True))
|
||
@settings(max_examples=100)
|
||
def test_slugify_non_empty_for_alphanumeric_input(self, name: str):
|
||
"""**Validates: Requirements 2.4**
|
||
|
||
For any name containing at least one alphanumeric character,
|
||
the slug is non-empty.
|
||
"""
|
||
slug = _slugify(name)
|
||
assert len(slug) > 0, (
|
||
f"Empty slug for name {name!r}"
|
||
)
|
||
|
||
@given(name=_variant_name_strategy())
|
||
@settings(max_examples=100)
|
||
def test_slugify_is_lowercase(self, name: str):
|
||
"""**Validates: Requirements 2.4**
|
||
|
||
The slug must be entirely lowercase.
|
||
"""
|
||
slug = _slugify(name)
|
||
assert slug == slug.lower(), (
|
||
f"Slug {slug!r} contains uppercase characters"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Property 5: Partial update idempotence
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
class TestProperty5PartialUpdateIdempotence:
|
||
"""Feature: agent-variants, Property 5: Partial update idempotence
|
||
|
||
For any variant, applying a partial update twice produces the same
|
||
variant state (excluding updated_at).
|
||
|
||
**Validates: Requirements 3.4**
|
||
"""
|
||
|
||
@given(
|
||
base_config=_agent_config_strategy(),
|
||
update_fields=st.fixed_dictionaries(
|
||
{},
|
||
optional={
|
||
field: _config_value_strategy(field)
|
||
for field in _CONFIG_FIELDS
|
||
},
|
||
),
|
||
)
|
||
@settings(max_examples=100)
|
||
def test_double_apply_produces_same_state(
|
||
self,
|
||
base_config: dict[str, Any],
|
||
update_fields: dict[str, Any],
|
||
):
|
||
"""**Validates: Requirements 3.4**
|
||
|
||
Applying the same partial update twice yields identical field values
|
||
(excluding updated_at).
|
||
"""
|
||
assume(len(update_fields) > 0)
|
||
|
||
# First application
|
||
state_after_first = copy.deepcopy(base_config)
|
||
for field, value in update_fields.items():
|
||
state_after_first[field] = value
|
||
|
||
# Second application (same update on the result of the first)
|
||
state_after_second = copy.deepcopy(state_after_first)
|
||
for field, value in update_fields.items():
|
||
state_after_second[field] = value
|
||
|
||
# All config fields must match
|
||
for field in _CONFIG_FIELDS:
|
||
assert state_after_first[field] == state_after_second[field], (
|
||
f"Field {field} differs after double apply: "
|
||
f"{state_after_first[field]} != {state_after_second[field]}"
|
||
)
|
||
|
||
@given(
|
||
base_config=_agent_config_strategy(),
|
||
update_fields=st.fixed_dictionaries(
|
||
{},
|
||
optional={
|
||
field: _config_value_strategy(field)
|
||
for field in _CONFIG_FIELDS
|
||
},
|
||
),
|
||
)
|
||
@settings(max_examples=100)
|
||
def test_unchanged_fields_preserved_after_partial_update(
|
||
self,
|
||
base_config: dict[str, Any],
|
||
update_fields: dict[str, Any],
|
||
):
|
||
"""**Validates: Requirements 3.4**
|
||
|
||
Fields not included in the update must retain their original values.
|
||
"""
|
||
updated = copy.deepcopy(base_config)
|
||
for field, value in update_fields.items():
|
||
updated[field] = value
|
||
|
||
for field in _CONFIG_FIELDS:
|
||
if field not in update_fields:
|
||
assert updated[field] == base_config[field], (
|
||
f"Unchanged field {field} was modified: "
|
||
f"{base_config[field]} -> {updated[field]}"
|
||
)
|
||
|
||
@given(base_config=_agent_config_strategy())
|
||
@settings(max_examples=100)
|
||
def test_empty_update_is_noop(
|
||
self,
|
||
base_config: dict[str, Any],
|
||
):
|
||
"""**Validates: Requirements 3.4**
|
||
|
||
An empty update (no fields) leaves all config fields unchanged.
|
||
"""
|
||
updated = copy.deepcopy(base_config)
|
||
# Apply empty update — no fields changed
|
||
|
||
for field in _CONFIG_FIELDS:
|
||
assert updated[field] == base_config[field]
|