Files
stonks-oracle/tests/test_pbt_agent_variants.py
T
Celes Renata c85c0068a2 fix: clean up utcnow deprecation warnings, fix 12 failing tests, add CI/CD pipeline manifests
- Replace all datetime.utcnow() with datetime.now(tz=timezone.utc) across 8 files
- Fix 12 failing tests to match current implementation behavior
- Fix pytest_plugins in non-top-level conftest (moved to root conftest.py)
- Auto-fix 189 lint issues (import sorting, unused imports)
- Add CI/CD pipeline infrastructure (ARC, ArgoCD, Kargo manifests)
- Add values-beta.yaml and values-paper.yaml for staged deployments
- Update GitHub Actions workflow to use self-hosted-gremlin runners
- Add integration-test job to CI pipeline

Result: 1596 passed, 0 failed, 0 warnings
2026-04-18 03:59:28 +00:00

657 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Property-based tests for agent variant logic.
Feature: agent-variants
Uses Hypothesis to validate correctness properties of variant operations:
single-active invariant, clone field preservation, config resolution,
slug determinism, and partial update idempotence.
Requirements: 1.4, 2.1, 2.3, 3.4, 4.1, 4.3, 4.4, 7
Design: Correctness Properties 15, 7
"""
from __future__ import annotations
import copy
import re
import uuid
from typing import Any
from hypothesis import assume, given, settings
from hypothesis import strategies as st
from services.api.app import _slugify
from services.shared.agent_config import ResolvedAgentConfig
# ---------------------------------------------------------------------------
# Hypothesis strategies
# ---------------------------------------------------------------------------
# Config fields that can be overridden in a variant
_CONFIG_FIELDS = [
"model_provider",
"model_name",
"system_prompt",
"user_prompt_template",
"prompt_version",
"temperature",
"max_tokens",
"context_window",
"input_token_limit",
"token_budget",
"timeout_seconds",
"max_retries",
]
_STR_FIELDS = [
"model_provider",
"model_name",
"system_prompt",
"user_prompt_template",
"prompt_version",
]
_FLOAT_FIELDS = ["temperature"]
_INT_FIELDS = [
"max_tokens",
"context_window",
"input_token_limit",
"token_budget",
"timeout_seconds",
"max_retries",
]
def _config_value_strategy(field: str) -> st.SearchStrategy:
"""Generate a valid value for a given config field."""
if field in _STR_FIELDS:
return st.text(min_size=1, max_size=50, alphabet=st.characters(
whitelist_categories=("L", "N", "P", "Z"),
))
elif field in _FLOAT_FIELDS:
return st.floats(min_value=0.0, max_value=2.0, allow_nan=False)
elif field in _INT_FIELDS:
return st.integers(min_value=0, max_value=100000)
return st.text(min_size=1, max_size=20)
def _agent_config_strategy() -> st.SearchStrategy[dict[str, Any]]:
"""Generate a random agent configuration dict."""
return st.fixed_dictionaries({
"model_provider": st.sampled_from(["ollama", "openai", "anthropic"]),
"model_name": st.text(min_size=1, max_size=30, alphabet=st.characters(
whitelist_categories=("L", "N"),
)),
"system_prompt": st.text(min_size=0, max_size=100),
"user_prompt_template": st.text(min_size=0, max_size=100),
"prompt_version": st.text(min_size=0, max_size=20),
"temperature": st.floats(min_value=0.0, max_value=2.0, allow_nan=False),
"max_tokens": st.integers(min_value=1, max_value=100000),
"context_window": st.integers(min_value=0, max_value=200000),
"input_token_limit": st.integers(min_value=0, max_value=200000),
"token_budget": st.integers(min_value=0, max_value=1000000),
"timeout_seconds": st.integers(min_value=1, max_value=600),
"max_retries": st.integers(min_value=0, max_value=10),
})
def _variant_name_strategy() -> st.SearchStrategy[str]:
"""Generate random variant names with diverse characters."""
return st.text(
min_size=1,
max_size=50,
alphabet=st.characters(whitelist_categories=("L", "N", "P", "Z")),
)
def _override_subset_strategy(
source_config: dict[str, Any],
) -> st.SearchStrategy[dict[str, Any]]:
"""Generate a random subset of config field overrides."""
# We build this as a composite strategy
return st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _simulate_clone(
source: dict[str, Any],
overrides: dict[str, Any],
) -> dict[str, Any]:
"""Simulate the clone logic from the API: source fields + overrides.
Mirrors the clone endpoint: for each config field, if an override is
provided (not None), use it; otherwise use the source value.
"""
result = {}
for field in _CONFIG_FIELDS:
if field in overrides and overrides[field] is not None:
result[field] = overrides[field]
else:
result[field] = source[field]
return result
def _simulate_activate_deactivate(
variants: list[dict[str, Any]],
operations: list[tuple[str, int]],
) -> list[dict[str, Any]]:
"""Simulate a sequence of activate/deactivate operations.
operations: list of ("activate", variant_index) or ("deactivate", -1)
Returns the final state of variants.
"""
for op, idx in operations:
if op == "activate" and 0 <= idx < len(variants):
# Deactivate all first
for v in variants:
v["is_active"] = False
# Activate the target
variants[idx]["is_active"] = True
elif op == "deactivate":
# Deactivate all
for v in variants:
v["is_active"] = False
return variants
# ---------------------------------------------------------------------------
# Property 1: Single active variant invariant
# ---------------------------------------------------------------------------
class TestProperty1SingleActiveVariantInvariant:
"""Feature: agent-variants, Property 1: Single active variant invariant
For any sequence of activate/deactivate operations on variants of an
agent, at most one variant per agent has is_active = TRUE at any point.
**Validates: Requirements 1.4, 4.1**
"""
@given(
num_variants=st.integers(min_value=1, max_value=10),
operations=st.lists(
st.tuples(
st.sampled_from(["activate", "deactivate"]),
st.integers(min_value=-1, max_value=9),
),
min_size=1,
max_size=30,
),
)
@settings(max_examples=100)
def test_at_most_one_active_after_each_operation(
self,
num_variants: int,
operations: list[tuple[str, int]],
):
"""**Validates: Requirements 1.4, 4.1**
After each activate/deactivate operation, count of active variants
must be 0 or 1.
"""
agent_id = str(uuid.uuid4())
variants = [
{
"id": str(uuid.uuid4()),
"agent_id": agent_id,
"is_active": False,
}
for _ in range(num_variants)
]
for op, idx in operations:
if op == "activate" and 0 <= idx < num_variants:
# Simulate transactional activate: deactivate all, then activate target
for v in variants:
v["is_active"] = False
variants[idx]["is_active"] = True
elif op == "deactivate":
for v in variants:
v["is_active"] = False
# Invariant check after each operation
active_count = sum(1 for v in variants if v["is_active"])
assert active_count <= 1, (
f"Invariant violated: {active_count} active variants after "
f"operation ({op}, {idx})"
)
@given(
num_variants=st.integers(min_value=2, max_value=8),
activate_sequence=st.lists(
st.integers(min_value=0, max_value=7),
min_size=2,
max_size=20,
),
)
@settings(max_examples=100)
def test_rapid_activate_swaps_maintain_invariant(
self,
num_variants: int,
activate_sequence: list[int],
):
"""**Validates: Requirements 1.4, 4.1**
Rapidly activating different variants in sequence still maintains
at most one active.
"""
variants = [
{"id": str(uuid.uuid4()), "is_active": False}
for _ in range(num_variants)
]
for idx in activate_sequence:
target = idx % num_variants
# Transactional swap
for v in variants:
v["is_active"] = False
variants[target]["is_active"] = True
active_count = sum(1 for v in variants if v["is_active"])
assert active_count == 1
assert variants[target]["is_active"] is True
# ---------------------------------------------------------------------------
# Property 2: Clone preserves unoverridden fields
# ---------------------------------------------------------------------------
class TestProperty2ClonePreservesUnoverriddenFields:
"""Feature: agent-variants, Property 2: Clone preserves unoverridden fields
For any agent config and any subset of override fields, cloning produces
a variant where overridden fields match the override values and
non-overridden fields match the source.
**Validates: Requirements 2.1, 2.3**
"""
@given(
source_config=_agent_config_strategy(),
overrides=st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
),
)
@settings(max_examples=100)
def test_overridden_fields_match_overrides(
self,
source_config: dict[str, Any],
overrides: dict[str, Any],
):
"""**Validates: Requirements 2.1, 2.3**
Fields present in overrides must have the override value in the clone.
"""
result = _simulate_clone(source_config, overrides)
for field in _CONFIG_FIELDS:
if field in overrides:
assert result[field] == overrides[field], (
f"Override field {field}: expected {overrides[field]}, "
f"got {result[field]}"
)
@given(
source_config=_agent_config_strategy(),
overrides=st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
),
)
@settings(max_examples=100)
def test_non_overridden_fields_match_source(
self,
source_config: dict[str, Any],
overrides: dict[str, Any],
):
"""**Validates: Requirements 2.1, 2.3**
Fields NOT present in overrides must match the source config.
"""
result = _simulate_clone(source_config, overrides)
for field in _CONFIG_FIELDS:
if field not in overrides:
assert result[field] == source_config[field], (
f"Non-overridden field {field}: expected {source_config[field]}, "
f"got {result[field]}"
)
@given(source_config=_agent_config_strategy())
@settings(max_examples=100)
def test_clone_with_no_overrides_is_exact_copy(
self,
source_config: dict[str, Any],
):
"""**Validates: Requirements 2.1, 2.3**
Cloning with no overrides produces an exact copy of all config fields.
"""
result = _simulate_clone(source_config, {})
for field in _CONFIG_FIELDS:
assert result[field] == source_config[field], (
f"Field {field} differs: {result[field]} != {source_config[field]}"
)
# ---------------------------------------------------------------------------
# Property 3: Config resolution prefers active variant
# ---------------------------------------------------------------------------
class TestProperty3ConfigResolutionPrefersActiveVariant:
"""Feature: agent-variants, Property 3: Config resolution prefers active variant
For any agent with N variants, config resolution returns the active
variant's config when one exists, and the base agent config when none
is active.
**Validates: Requirements 4.3, 4.4**
"""
@given(
agent_config=_agent_config_strategy(),
variant_configs=st.lists(
_agent_config_strategy(),
min_size=1,
max_size=5,
),
active_index=st.integers(min_value=0, max_value=4),
)
@settings(max_examples=100)
def test_active_variant_config_is_returned(
self,
agent_config: dict[str, Any],
variant_configs: list[dict[str, Any]],
active_index: int,
):
"""**Validates: Requirements 4.3, 4.4**
When an active variant exists, resolved config fields must match
the active variant's values.
"""
active_idx = active_index % len(variant_configs)
active_variant = variant_configs[active_idx]
# Simulate COALESCE resolution: variant fields preferred over agent
resolved = {}
for field in _CONFIG_FIELDS:
# COALESCE(variant.field, agent.field) — variant always wins
# when it has a value (which it always does in our model)
resolved[field] = active_variant[field]
for field in _CONFIG_FIELDS:
assert resolved[field] == active_variant[field], (
f"Field {field}: expected variant value {active_variant[field]}, "
f"got {resolved[field]}"
)
@given(agent_config=_agent_config_strategy())
@settings(max_examples=100)
def test_no_active_variant_returns_agent_config(
self,
agent_config: dict[str, Any],
):
"""**Validates: Requirements 4.3, 4.4**
When no active variant exists, resolved config fields must match
the base agent's values.
"""
# Simulate COALESCE with NULL variant: agent fields used
resolved = {}
for field in _CONFIG_FIELDS:
resolved[field] = agent_config[field]
for field in _CONFIG_FIELDS:
assert resolved[field] == agent_config[field]
@given(
agent_config=_agent_config_strategy(),
variant_config=_agent_config_strategy(),
has_active=st.booleans(),
)
@settings(max_examples=100)
def test_resolution_source_matches_active_state(
self,
agent_config: dict[str, Any],
variant_config: dict[str, Any],
has_active: bool,
):
"""**Validates: Requirements 4.3, 4.4**
The resolver returns the correct source (variant or agent) based
on whether an active variant exists.
"""
if has_active:
source = variant_config
variant_id = str(uuid.uuid4())
else:
source = agent_config
variant_id = None
# Build a ResolvedAgentConfig to verify the dataclass works
config = ResolvedAgentConfig(
agent_id=str(uuid.uuid4()),
variant_id=variant_id,
model_provider=source["model_provider"],
model_name=source["model_name"],
system_prompt=source["system_prompt"],
user_prompt_template=source["user_prompt_template"],
prompt_version=source["prompt_version"],
temperature=source["temperature"],
max_tokens=source["max_tokens"],
context_window=source["context_window"],
input_token_limit=source["input_token_limit"],
token_budget=source["token_budget"],
timeout_seconds=source["timeout_seconds"],
max_retries=source["max_retries"],
)
assert config.model_provider == source["model_provider"]
assert config.model_name == source["model_name"]
assert config.temperature == source["temperature"]
assert config.max_tokens == source["max_tokens"]
if has_active:
assert config.variant_id is not None
else:
assert config.variant_id is None
# ---------------------------------------------------------------------------
# Property 4: Slug auto-generation determinism
# ---------------------------------------------------------------------------
_KEBAB_CASE_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")
class TestProperty4SlugAutoGenerationDeterminism:
"""Feature: agent-variants, Property 4: Slug auto-generation determinism
For any variant_name, the auto-generated slug is deterministic,
produces valid kebab-case, and is non-empty for non-empty input
containing at least one alphanumeric character.
**Validates: Requirements 2.4**
"""
@given(name=_variant_name_strategy())
@settings(max_examples=100)
def test_slugify_is_deterministic(self, name: str):
"""**Validates: Requirements 2.4**
Calling _slugify twice with the same name produces the same slug.
"""
slug1 = _slugify(name)
slug2 = _slugify(name)
assert slug1 == slug2, (
f"Non-deterministic: _slugify({name!r}) produced {slug1!r} and {slug2!r}"
)
@given(name=st.from_regex(r"[a-zA-Z0-9][\w\s\-]{0,49}", fullmatch=True))
@settings(max_examples=100)
def test_slugify_produces_valid_kebab_case(self, name: str):
"""**Validates: Requirements 2.4**
The slug must be lowercase alphanumeric with hyphens, no leading
or trailing hyphens.
"""
slug = _slugify(name)
assume(len(slug) > 0)
# No leading or trailing hyphens
assert not slug.startswith("-"), f"Slug starts with hyphen: {slug!r}"
assert not slug.endswith("-"), f"Slug ends with hyphen: {slug!r}"
# Only lowercase alphanumeric and hyphens
assert _KEBAB_CASE_RE.match(slug), (
f"Slug {slug!r} is not valid kebab-case (from name {name!r})"
)
@given(name=st.from_regex(r"[a-zA-Z0-9][\w\s]{0,49}", fullmatch=True))
@settings(max_examples=100)
def test_slugify_non_empty_for_alphanumeric_input(self, name: str):
"""**Validates: Requirements 2.4**
For any name containing at least one alphanumeric character,
the slug is non-empty.
"""
slug = _slugify(name)
assert len(slug) > 0, (
f"Empty slug for name {name!r}"
)
@given(name=_variant_name_strategy())
@settings(max_examples=100)
def test_slugify_is_lowercase(self, name: str):
"""**Validates: Requirements 2.4**
The slug must be entirely lowercase.
"""
slug = _slugify(name)
assert slug == slug.lower(), (
f"Slug {slug!r} contains uppercase characters"
)
# ---------------------------------------------------------------------------
# Property 5: Partial update idempotence
# ---------------------------------------------------------------------------
class TestProperty5PartialUpdateIdempotence:
"""Feature: agent-variants, Property 5: Partial update idempotence
For any variant, applying a partial update twice produces the same
variant state (excluding updated_at).
**Validates: Requirements 3.4**
"""
@given(
base_config=_agent_config_strategy(),
update_fields=st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
),
)
@settings(max_examples=100)
def test_double_apply_produces_same_state(
self,
base_config: dict[str, Any],
update_fields: dict[str, Any],
):
"""**Validates: Requirements 3.4**
Applying the same partial update twice yields identical field values
(excluding updated_at).
"""
assume(len(update_fields) > 0)
# First application
state_after_first = copy.deepcopy(base_config)
for field, value in update_fields.items():
state_after_first[field] = value
# Second application (same update on the result of the first)
state_after_second = copy.deepcopy(state_after_first)
for field, value in update_fields.items():
state_after_second[field] = value
# All config fields must match
for field in _CONFIG_FIELDS:
assert state_after_first[field] == state_after_second[field], (
f"Field {field} differs after double apply: "
f"{state_after_first[field]} != {state_after_second[field]}"
)
@given(
base_config=_agent_config_strategy(),
update_fields=st.fixed_dictionaries(
{},
optional={
field: _config_value_strategy(field)
for field in _CONFIG_FIELDS
},
),
)
@settings(max_examples=100)
def test_unchanged_fields_preserved_after_partial_update(
self,
base_config: dict[str, Any],
update_fields: dict[str, Any],
):
"""**Validates: Requirements 3.4**
Fields not included in the update must retain their original values.
"""
updated = copy.deepcopy(base_config)
for field, value in update_fields.items():
updated[field] = value
for field in _CONFIG_FIELDS:
if field not in update_fields:
assert updated[field] == base_config[field], (
f"Unchanged field {field} was modified: "
f"{base_config[field]} -> {updated[field]}"
)
@given(base_config=_agent_config_strategy())
@settings(max_examples=100)
def test_empty_update_is_noop(
self,
base_config: dict[str, Any],
):
"""**Validates: Requirements 3.4**
An empty update (no fields) leaves all config fields unchanged.
"""
updated = copy.deepcopy(base_config)
# Apply empty update — no fields changed
for field in _CONFIG_FIELDS:
assert updated[field] == base_config[field]