stonks-oracle/tests/test_pbt_agent_variants.py

"""Property-based tests for agent variant logic.

Feature: agent-variants

Uses Hypothesis to validate correctness properties of variant operations:
single-active invariant, clone field preservation, config resolution,
slug determinism, and partial update idempotence.

Requirements: 1.4, 2.1, 2.3, 3.4, 4.1, 4.3, 4.4, 7
Design: Correctness Properties 1–5, 7
"""
from __future__ import annotations

import copy
import re
import uuid
from typing import Any

from hypothesis import assume, given, settings
from hypothesis import strategies as st

from services.api.app import _slugify
from services.shared.agent_config import ResolvedAgentConfig

# ---------------------------------------------------------------------------
# Hypothesis strategies
# ---------------------------------------------------------------------------

# Config fields that can be overridden in a variant
_CONFIG_FIELDS = [
    "model_provider",
    "model_name",
    "system_prompt",
    "user_prompt_template",
    "prompt_version",
    "temperature",
    "max_tokens",
    "context_window",
    "input_token_limit",
    "token_budget",
    "timeout_seconds",
    "max_retries",
]

_STR_FIELDS = [
    "model_provider",
    "model_name",
    "system_prompt",
    "user_prompt_template",
    "prompt_version",
]

_FLOAT_FIELDS = ["temperature"]

_INT_FIELDS = [
    "max_tokens",
    "context_window",
    "input_token_limit",
    "token_budget",
    "timeout_seconds",
    "max_retries",
]


def _config_value_strategy(field: str) -> st.SearchStrategy:
    """Generate a valid value for a given config field."""
    if field in _STR_FIELDS:
        return st.text(min_size=1, max_size=50, alphabet=st.characters(
            whitelist_categories=("L", "N", "P", "Z"),
        ))
    elif field in _FLOAT_FIELDS:
        return st.floats(min_value=0.0, max_value=2.0, allow_nan=False)
    elif field in _INT_FIELDS:
        return st.integers(min_value=0, max_value=100000)
    return st.text(min_size=1, max_size=20)


def _agent_config_strategy() -> st.SearchStrategy[dict[str, Any]]:
    """Generate a random agent configuration dict."""
    return st.fixed_dictionaries({
        "model_provider": st.sampled_from(["ollama", "openai", "anthropic"]),
        "model_name": st.text(min_size=1, max_size=30, alphabet=st.characters(
            whitelist_categories=("L", "N"),
        )),
        "system_prompt": st.text(min_size=0, max_size=100),
        "user_prompt_template": st.text(min_size=0, max_size=100),
        "prompt_version": st.text(min_size=0, max_size=20),
        "temperature": st.floats(min_value=0.0, max_value=2.0, allow_nan=False),
        "max_tokens": st.integers(min_value=1, max_value=100000),
        "context_window": st.integers(min_value=0, max_value=200000),
        "input_token_limit": st.integers(min_value=0, max_value=200000),
        "token_budget": st.integers(min_value=0, max_value=1000000),
        "timeout_seconds": st.integers(min_value=1, max_value=600),
        "max_retries": st.integers(min_value=0, max_value=10),
    })


def _variant_name_strategy() -> st.SearchStrategy[str]:
    """Generate random variant names with diverse characters."""
    return st.text(
        min_size=1,
        max_size=50,
        alphabet=st.characters(whitelist_categories=("L", "N", "P", "Z")),
    )


def _override_subset_strategy(
    source_config: dict[str, Any],
) -> st.SearchStrategy[dict[str, Any]]:
    """Generate a random subset of config field overrides."""
    # We build this as a composite strategy
    return st.fixed_dictionaries(
        {},
        optional={
            field: _config_value_strategy(field)
            for field in _CONFIG_FIELDS
        },
    )


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _simulate_clone(
    source: dict[str, Any],
    overrides: dict[str, Any],
) -> dict[str, Any]:
    """Simulate the clone logic from the API: source fields + overrides.

    Mirrors the clone endpoint: for each config field, if an override is
    provided (not None), use it; otherwise use the source value.
    """
    result = {}
    for field in _CONFIG_FIELDS:
        if field in overrides and overrides[field] is not None:
            result[field] = overrides[field]
        else:
            result[field] = source[field]
    return result


def _simulate_activate_deactivate(
    variants: list[dict[str, Any]],
    operations: list[tuple[str, int]],
) -> list[dict[str, Any]]:
    """Simulate a sequence of activate/deactivate operations.

    operations: list of ("activate", variant_index) or ("deactivate", -1)
    Returns the final state of variants.
    """
    for op, idx in operations:
        if op == "activate" and 0 <= idx < len(variants):
            # Deactivate all first
            for v in variants:
                v["is_active"] = False
            # Activate the target
            variants[idx]["is_active"] = True
        elif op == "deactivate":
            # Deactivate all
            for v in variants:
                v["is_active"] = False
    return variants


# ---------------------------------------------------------------------------
# Property 1: Single active variant invariant
# ---------------------------------------------------------------------------


class TestProperty1SingleActiveVariantInvariant:
    """Feature: agent-variants, Property 1: Single active variant invariant

    For any sequence of activate/deactivate operations on variants of an
    agent, at most one variant per agent has is_active = TRUE at any point.

    **Validates: Requirements 1.4, 4.1**
    """

    @given(
        num_variants=st.integers(min_value=1, max_value=10),
        operations=st.lists(
            st.tuples(
                st.sampled_from(["activate", "deactivate"]),
                st.integers(min_value=-1, max_value=9),
            ),
            min_size=1,
            max_size=30,
        ),
    )
    @settings(max_examples=100)
    def test_at_most_one_active_after_each_operation(
        self,
        num_variants: int,
        operations: list[tuple[str, int]],
    ):
        """**Validates: Requirements 1.4, 4.1**

        After each activate/deactivate operation, count of active variants
        must be 0 or 1.
        """
        agent_id = str(uuid.uuid4())
        variants = [
            {
                "id": str(uuid.uuid4()),
                "agent_id": agent_id,
                "is_active": False,
            }
            for _ in range(num_variants)
        ]

        for op, idx in operations:
            if op == "activate" and 0 <= idx < num_variants:
                # Simulate transactional activate: deactivate all, then activate target
                for v in variants:
                    v["is_active"] = False
                variants[idx]["is_active"] = True
            elif op == "deactivate":
                for v in variants:
                    v["is_active"] = False

            # Invariant check after each operation
            active_count = sum(1 for v in variants if v["is_active"])
            assert active_count <= 1, (
                f"Invariant violated: {active_count} active variants after "
                f"operation ({op}, {idx})"
            )

    @given(
        num_variants=st.integers(min_value=2, max_value=8),
        activate_sequence=st.lists(
            st.integers(min_value=0, max_value=7),
            min_size=2,
            max_size=20,
        ),
    )
    @settings(max_examples=100)
    def test_rapid_activate_swaps_maintain_invariant(
        self,
        num_variants: int,
        activate_sequence: list[int],
    ):
        """**Validates: Requirements 1.4, 4.1**

        Rapidly activating different variants in sequence still maintains
        at most one active.
        """
        variants = [
            {"id": str(uuid.uuid4()), "is_active": False}
            for _ in range(num_variants)
        ]

        for idx in activate_sequence:
            target = idx % num_variants
            # Transactional swap
            for v in variants:
                v["is_active"] = False
            variants[target]["is_active"] = True

            active_count = sum(1 for v in variants if v["is_active"])
            assert active_count == 1
            assert variants[target]["is_active"] is True


# ---------------------------------------------------------------------------
# Property 2: Clone preserves unoverridden fields
# ---------------------------------------------------------------------------


class TestProperty2ClonePreservesUnoverriddenFields:
    """Feature: agent-variants, Property 2: Clone preserves unoverridden fields

    For any agent config and any subset of override fields, cloning produces
    a variant where overridden fields match the override values and
    non-overridden fields match the source.

    **Validates: Requirements 2.1, 2.3**
    """

    @given(
        source_config=_agent_config_strategy(),
        overrides=st.fixed_dictionaries(
            {},
            optional={
                field: _config_value_strategy(field)
                for field in _CONFIG_FIELDS
            },
        ),
    )
    @settings(max_examples=100)
    def test_overridden_fields_match_overrides(
        self,
        source_config: dict[str, Any],
        overrides: dict[str, Any],
    ):
        """**Validates: Requirements 2.1, 2.3**

        Fields present in overrides must have the override value in the clone.
        """
        result = _simulate_clone(source_config, overrides)

        for field in _CONFIG_FIELDS:
            if field in overrides:
                assert result[field] == overrides[field], (
                    f"Override field {field}: expected {overrides[field]}, "
                    f"got {result[field]}"
                )

    @given(
        source_config=_agent_config_strategy(),
        overrides=st.fixed_dictionaries(
            {},
            optional={
                field: _config_value_strategy(field)
                for field in _CONFIG_FIELDS
            },
        ),
    )
    @settings(max_examples=100)
    def test_non_overridden_fields_match_source(
        self,
        source_config: dict[str, Any],
        overrides: dict[str, Any],
    ):
        """**Validates: Requirements 2.1, 2.3**

        Fields NOT present in overrides must match the source config.
        """
        result = _simulate_clone(source_config, overrides)

        for field in _CONFIG_FIELDS:
            if field not in overrides:
                assert result[field] == source_config[field], (
                    f"Non-overridden field {field}: expected {source_config[field]}, "
                    f"got {result[field]}"
                )

    @given(source_config=_agent_config_strategy())
    @settings(max_examples=100)
    def test_clone_with_no_overrides_is_exact_copy(
        self,
        source_config: dict[str, Any],
    ):
        """**Validates: Requirements 2.1, 2.3**

        Cloning with no overrides produces an exact copy of all config fields.
        """
        result = _simulate_clone(source_config, {})

        for field in _CONFIG_FIELDS:
            assert result[field] == source_config[field], (
                f"Field {field} differs: {result[field]} != {source_config[field]}"
            )


# ---------------------------------------------------------------------------
# Property 3: Config resolution prefers active variant
# ---------------------------------------------------------------------------


class TestProperty3ConfigResolutionPrefersActiveVariant:
    """Feature: agent-variants, Property 3: Config resolution prefers active variant

    For any agent with N variants, config resolution returns the active
    variant's config when one exists, and the base agent config when none
    is active.

    **Validates: Requirements 4.3, 4.4**
    """

    @given(
        agent_config=_agent_config_strategy(),
        variant_configs=st.lists(
            _agent_config_strategy(),
            min_size=1,
            max_size=5,
        ),
        active_index=st.integers(min_value=0, max_value=4),
    )
    @settings(max_examples=100)
    def test_active_variant_config_is_returned(
        self,
        agent_config: dict[str, Any],
        variant_configs: list[dict[str, Any]],
        active_index: int,
    ):
        """**Validates: Requirements 4.3, 4.4**

        When an active variant exists, resolved config fields must match
        the active variant's values.
        """
        active_idx = active_index % len(variant_configs)
        active_variant = variant_configs[active_idx]

        # Simulate COALESCE resolution: variant fields preferred over agent
        resolved = {}
        for field in _CONFIG_FIELDS:
            # COALESCE(variant.field, agent.field) — variant always wins
            # when it has a value (which it always does in our model)
            resolved[field] = active_variant[field]

        for field in _CONFIG_FIELDS:
            assert resolved[field] == active_variant[field], (
                f"Field {field}: expected variant value {active_variant[field]}, "
                f"got {resolved[field]}"
            )

    @given(agent_config=_agent_config_strategy())
    @settings(max_examples=100)
    def test_no_active_variant_returns_agent_config(
        self,
        agent_config: dict[str, Any],
    ):
        """**Validates: Requirements 4.3, 4.4**

        When no active variant exists, resolved config fields must match
        the base agent's values.
        """
        # Simulate COALESCE with NULL variant: agent fields used
        resolved = {}
        for field in _CONFIG_FIELDS:
            resolved[field] = agent_config[field]

        for field in _CONFIG_FIELDS:
            assert resolved[field] == agent_config[field]

    @given(
        agent_config=_agent_config_strategy(),
        variant_config=_agent_config_strategy(),
        has_active=st.booleans(),
    )
    @settings(max_examples=100)
    def test_resolution_source_matches_active_state(
        self,
        agent_config: dict[str, Any],
        variant_config: dict[str, Any],
        has_active: bool,
    ):
        """**Validates: Requirements 4.3, 4.4**

        The resolver returns the correct source (variant or agent) based
        on whether an active variant exists.
        """
        if has_active:
            source = variant_config
            variant_id = str(uuid.uuid4())
        else:
            source = agent_config
            variant_id = None

        # Build a ResolvedAgentConfig to verify the dataclass works
        config = ResolvedAgentConfig(
            agent_id=str(uuid.uuid4()),
            variant_id=variant_id,
            model_provider=source["model_provider"],
            model_name=source["model_name"],
            system_prompt=source["system_prompt"],
            user_prompt_template=source["user_prompt_template"],
            prompt_version=source["prompt_version"],
            temperature=source["temperature"],
            max_tokens=source["max_tokens"],
            context_window=source["context_window"],
            input_token_limit=source["input_token_limit"],
            token_budget=source["token_budget"],
            timeout_seconds=source["timeout_seconds"],
            max_retries=source["max_retries"],
        )

        assert config.model_provider == source["model_provider"]
        assert config.model_name == source["model_name"]
        assert config.temperature == source["temperature"]
        assert config.max_tokens == source["max_tokens"]

        if has_active:
            assert config.variant_id is not None
        else:
            assert config.variant_id is None


# ---------------------------------------------------------------------------
# Property 4: Slug auto-generation determinism
# ---------------------------------------------------------------------------

_KEBAB_CASE_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")


class TestProperty4SlugAutoGenerationDeterminism:
    """Feature: agent-variants, Property 4: Slug auto-generation determinism

    For any variant_name, the auto-generated slug is deterministic,
    produces valid kebab-case, and is non-empty for non-empty input
    containing at least one alphanumeric character.

    **Validates: Requirements 2.4**
    """

    @given(name=_variant_name_strategy())
    @settings(max_examples=100)
    def test_slugify_is_deterministic(self, name: str):
        """**Validates: Requirements 2.4**

        Calling _slugify twice with the same name produces the same slug.
        """
        slug1 = _slugify(name)
        slug2 = _slugify(name)
        assert slug1 == slug2, (
            f"Non-deterministic: _slugify({name!r}) produced {slug1!r} and {slug2!r}"
        )

    @given(name=st.from_regex(r"[a-zA-Z0-9][\w\s\-]{0,49}", fullmatch=True))
    @settings(max_examples=100)
    def test_slugify_produces_valid_kebab_case(self, name: str):
        """**Validates: Requirements 2.4**

        The slug must be lowercase alphanumeric with hyphens, no leading
        or trailing hyphens.
        """
        slug = _slugify(name)
        assume(len(slug) > 0)

        # No leading or trailing hyphens
        assert not slug.startswith("-"), f"Slug starts with hyphen: {slug!r}"
        assert not slug.endswith("-"), f"Slug ends with hyphen: {slug!r}"

        # Only lowercase alphanumeric and hyphens
        assert _KEBAB_CASE_RE.match(slug), (
            f"Slug {slug!r} is not valid kebab-case (from name {name!r})"
        )

    @given(name=st.from_regex(r"[a-zA-Z0-9][\w\s]{0,49}", fullmatch=True))
    @settings(max_examples=100)
    def test_slugify_non_empty_for_alphanumeric_input(self, name: str):
        """**Validates: Requirements 2.4**

        For any name containing at least one alphanumeric character,
        the slug is non-empty.
        """
        slug = _slugify(name)
        assert len(slug) > 0, (
            f"Empty slug for name {name!r}"
        )

    @given(name=_variant_name_strategy())
    @settings(max_examples=100)
    def test_slugify_is_lowercase(self, name: str):
        """**Validates: Requirements 2.4**

        The slug must be entirely lowercase.
        """
        slug = _slugify(name)
        assert slug == slug.lower(), (
            f"Slug {slug!r} contains uppercase characters"
        )


# ---------------------------------------------------------------------------
# Property 5: Partial update idempotence
# ---------------------------------------------------------------------------


class TestProperty5PartialUpdateIdempotence:
    """Feature: agent-variants, Property 5: Partial update idempotence

    For any variant, applying a partial update twice produces the same
    variant state (excluding updated_at).

    **Validates: Requirements 3.4**
    """

    @given(
        base_config=_agent_config_strategy(),
        update_fields=st.fixed_dictionaries(
            {},
            optional={
                field: _config_value_strategy(field)
                for field in _CONFIG_FIELDS
            },
        ),
    )
    @settings(max_examples=100)
    def test_double_apply_produces_same_state(
        self,
        base_config: dict[str, Any],
        update_fields: dict[str, Any],
    ):
        """**Validates: Requirements 3.4**

        Applying the same partial update twice yields identical field values
        (excluding updated_at).
        """
        assume(len(update_fields) > 0)

        # First application
        state_after_first = copy.deepcopy(base_config)
        for field, value in update_fields.items():
            state_after_first[field] = value

        # Second application (same update on the result of the first)
        state_after_second = copy.deepcopy(state_after_first)
        for field, value in update_fields.items():
            state_after_second[field] = value

        # All config fields must match
        for field in _CONFIG_FIELDS:
            assert state_after_first[field] == state_after_second[field], (
                f"Field {field} differs after double apply: "
                f"{state_after_first[field]} != {state_after_second[field]}"
            )

    @given(
        base_config=_agent_config_strategy(),
        update_fields=st.fixed_dictionaries(
            {},
            optional={
                field: _config_value_strategy(field)
                for field in _CONFIG_FIELDS
            },
        ),
    )
    @settings(max_examples=100)
    def test_unchanged_fields_preserved_after_partial_update(
        self,
        base_config: dict[str, Any],
        update_fields: dict[str, Any],
    ):
        """**Validates: Requirements 3.4**

        Fields not included in the update must retain their original values.
        """
        updated = copy.deepcopy(base_config)
        for field, value in update_fields.items():
            updated[field] = value

        for field in _CONFIG_FIELDS:
            if field not in update_fields:
                assert updated[field] == base_config[field], (
                    f"Unchanged field {field} was modified: "
                    f"{base_config[field]} -> {updated[field]}"
                )

    @given(base_config=_agent_config_strategy())
    @settings(max_examples=100)
    def test_empty_update_is_noop(
        self,
        base_config: dict[str, Any],
    ):
        """**Validates: Requirements 3.4**

        An empty update (no fields) leaves all config fields unchanged.
        """
        updated = copy.deepcopy(base_config)
        # Apply empty update — no fields changed

        for field in _CONFIG_FIELDS:
            assert updated[field] == base_config[field]