stonks-oracle/tests/test_pbt_agent_variants.py

"""Property-based tests for agent variant logic.

Feature: agent-variants

Uses Hypothesis to validate correctness properties of variant operations:
single-active invariant, clone field preservation, config resolution,
slug determinism, and partial update idempotence.

Requirements: 1.4, 2.1, 2.3, 3.4, 4.1, 4.3, 4.4, 7
Design: Correctness Properties 1–5, 7
"""
from __future__ import annotations

import copy
import re
import uuid
from datetime import datetime, timezone
from typing import Any

import pytest
from hypothesis import given, settings, assume
from hypothesis import strategies as st

from services.api.app import _slugify
from services.shared.agent_config import ResolvedAgentConfig


# ---------------------------------------------------------------------------
# Hypothesis strategies
# ---------------------------------------------------------------------------

# Config fields that can be overridden in a variant
_CONFIG_FIELDS = [
    "model_provider",
    "model_name",
    "system_prompt",
    "user_prompt_template",
    "prompt_version",
    "temperature",
    "max_tokens",
    "context_window",
    "input_token_limit",
    "token_budget",
    "timeout_seconds",
    "max_retries",
]

_STR_FIELDS = [
    "model_provider",
    "model_name",
    "system_prompt",
    "user_prompt_template",
    "prompt_version",
]

_FLOAT_FIELDS = ["temperature"]

_INT_FIELDS = [
    "max_tokens",
    "context_window",
    "input_token_limit",
    "token_budget",
    "timeout_seconds",
    "max_retries",
]


def _config_value_strategy(field: str) -> st.SearchStrategy:
    """Generate a valid value for a given config field."""
    if field in _STR_FIELDS:
        return st.text(min_size=1, max_size=50, alphabet=st.characters(
            whitelist_categories=("L", "N", "P", "Z"),
        ))
    elif field in _FLOAT_FIELDS:
        return st.floats(min_value=0.0, max_value=2.0, allow_nan=False)
    elif field in _INT_FIELDS:
        return st.integers(min_value=0, max_value=100000)
    return st.text(min_size=1, max_size=20)


def _agent_config_strategy() -> st.SearchStrategy[dict[str, Any]]:
    """Generate a random agent configuration dict."""
    return st.fixed_dictionaries({
        "model_provider": st.sampled_from(["ollama", "openai", "anthropic"]),
        "model_name": st.text(min_size=1, max_size=30, alphabet=st.characters(
            whitelist_categories=("L", "N"),
        )),
        "system_prompt": st.text(min_size=0, max_size=100),
        "user_prompt_template": st.text(min_size=0, max_size=100),
        "prompt_version": st.text(min_size=0, max_size=20),
        "temperature": st.floats(min_value=0.0, max_value=2.0, allow_nan=False),
        "max_tokens": st.integers(min_value=1, max_value=100000),
        "context_window": st.integers(min_value=0, max_value=200000),
        "input_token_limit": st.integers(min_value=0, max_value=200000),
        "token_budget": st.integers(min_value=0, max_value=1000000),
        "timeout_seconds": st.integers(min_value=1, max_value=600),
        "max_retries": st.integers(min_value=0, max_value=10),
    })


def _variant_name_strategy() -> st.SearchStrategy[str]:
    """Generate random variant names with diverse characters."""
    return st.text(
        min_size=1,
        max_size=50,
        alphabet=st.characters(whitelist_categories=("L", "N", "P", "Z")),
    )


def _override_subset_strategy(
    source_config: dict[str, Any],
) -> st.SearchStrategy[dict[str, Any]]:
    """Generate a random subset of config field overrides."""
    # We build this as a composite strategy
    return st.fixed_dictionaries(
        {},
        optional={
            field: _config_value_strategy(field)
            for field in _CONFIG_FIELDS
        },
    )


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _simulate_clone(
    source: dict[str, Any],
    overrides: dict[str, Any],
) -> dict[str, Any]:
    """Simulate the clone logic from the API: source fields + overrides.

    Mirrors the clone endpoint: for each config field, if an override is
    provided (not None), use it; otherwise use the source value.
    """
    result = {}
    for field in _CONFIG_FIELDS:
        if field in overrides and overrides[field] is not None:
            result[field] = overrides[field]
        else:
            result[field] = source[field]
    return result


def _simulate_activate_deactivate(
    variants: list[dict[str, Any]],
    operations: list[tuple[str, int]],
) -> list[dict[str, Any]]:
    """Simulate a sequence of activate/deactivate operations.

    operations: list of ("activate", variant_index) or ("deactivate", -1)
    Returns the final state of variants.
    """
    for op, idx in operations:
        if op == "activate" and 0 <= idx < len(variants):
            # Deactivate all first
            for v in variants:
                v["is_active"] = False
            # Activate the target
            variants[idx]["is_active"] = True
        elif op == "deactivate":
            # Deactivate all
            for v in variants:
                v["is_active"] = False
    return variants


# ---------------------------------------------------------------------------
# Property 1: Single active variant invariant
# ---------------------------------------------------------------------------


class TestProperty1SingleActiveVariantInvariant:
    """Feature: agent-variants, Property 1: Single active variant invariant

    For any sequence of activate/deactivate operations on variants of an
    agent, at most one variant per agent has is_active = TRUE at any point.

    **Validates: Requirements 1.4, 4.1**
    """

    @given(
        num_variants=st.integers(min_value=1, max_value=10),
        operations=st.lists(
            st.tuples(
                st.sampled_from(["activate", "deactivate"]),
                st.integers(min_value=-1, max_value=9),
            ),
            min_size=1,
            max_size=30,
        ),
    )
    @settings(max_examples=100)
    def test_at_most_one_active_after_each_operation(
        self,
        num_variants: int,
        operations: list[tuple[str, int]],
    ):
        """**Validates: Requirements 1.4, 4.1**

        After each activate/deactivate operation, count of active variants
        must be 0 or 1.
        """
        agent_id = str(uuid.uuid4())
        variants = [
            {
                "id": str(uuid.uuid4()),
                "agent_id": agent_id,
                "is_active": False,
            }
            for _ in range(num_variants)
        ]

        for op, idx in operations:
            if op == "activate" and 0 <= idx < num_variants:
                # Simulate transactional activate: deactivate all, then activate target
                for v in variants:
                    v["is_active"] = False
                variants[idx]["is_active"] = True
            elif op == "deactivate":
                for v in variants:
                    v["is_active"] = False

            # Invariant check after each operation
            active_count = sum(1 for v in variants if v["is_active"])
            assert active_count <= 1, (
                f"Invariant violated: {active_count} active variants after "
                f"operation ({op}, {idx})"
            )

    @given(
        num_variants=st.integers(min_value=2, max_value=8),
        activate_sequence=st.lists(
            st.integers(min_value=0, max_value=7),
            min_size=2,
            max_size=20,
        ),
    )
    @settings(max_examples=100)
    def test_rapid_activate_swaps_maintain_invariant(
        self,
        num_variants: int,
        activate_sequence: list[int],
    ):
        """**Validates: Requirements 1.4, 4.1**

        Rapidly activating different variants in sequence still maintains
        at most one active.
        """
        variants = [
            {"id": str(uuid.uuid4()), "is_active": False}
            for _ in range(num_variants)
        ]

        for idx in activate_sequence:
            target = idx % num_variants
            # Transactional swap
            for v in variants:
                v["is_active"] = False
            variants[target]["is_active"] = True

            active_count = sum(1 for v in variants if v["is_active"])
            assert active_count == 1
            assert variants[target]["is_active"] is True


# ---------------------------------------------------------------------------
# Property 2: Clone preserves unoverridden fields
# ---------------------------------------------------------------------------


class TestProperty2ClonePreservesUnoverriddenFields:
    """Feature: agent-variants, Property 2: Clone preserves unoverridden fields

    For any agent config and any subset of override fields, cloning produces
    a variant where overridden fields match the override values and
    non-overridden fields match the source.

    **Validates: Requirements 2.1, 2.3**
    """

    @given(
        source_config=_agent_config_strategy(),
        overrides=st.fixed_dictionaries(
            {},
            optional={
                field: _config_value_strategy(field)
                for field in _CONFIG_FIELDS
            },
        ),
    )
    @settings(max_examples=100)
    def test_overridden_fields_match_overrides(
        self,
        source_config: dict[str, Any],
        overrides: dict[str, Any],
    ):
        """**Validates: Requirements 2.1, 2.3**

        Fields present in overrides must have the override value in the clone.
        """
        result = _simulate_clone(source_config, overrides)

        for field in _CONFIG_FIELDS:
            if field in overrides:
                assert result[field] == overrides[field], (
                    f"Override field {field}: expected {overrides[field]}, "
                    f"got {result[field]}"
                )

    @given(
        source_config=_agent_config_strategy(),
        overrides=st.fixed_dictionaries(
            {},
            optional={
                field: _config_value_strategy(field)
                for field in _CONFIG_FIELDS
            },
        ),
    )
    @settings(max_examples=100)
    def test_non_overridden_fields_match_source(
        self,
        source_config: dict[str, Any],
        overrides: dict[str, Any],
    ):
        """**Validates: Requirements 2.1, 2.3**

        Fields NOT present in overrides must match the source config.
        """
        result = _simulate_clone(source_config, overrides)

        for field in _CONFIG_FIELDS:
            if field not in overrides:
                assert result[field] == source_config[field], (
                    f"Non-overridden field {field}: expected {source_config[field]}, "
                    f"got {result[field]}"
                )

    @given(source_config=_agent_config_strategy())
    @settings(max_examples=100)
    def test_clone_with_no_overrides_is_exact_copy(
        self,
        source_config: dict[str, Any],
    ):
        """**Validates: Requirements 2.1, 2.3**

        Cloning with no overrides produces an exact copy of all config fields.
        """
        result = _simulate_clone(source_config, {})

        for field in _CONFIG_FIELDS:
            assert result[field] == source_config[field], (
                f"Field {field} differs: {result[field]} != {source_config[field]}"
            )


# ---------------------------------------------------------------------------
# Property 3: Config resolution prefers active variant
# ---------------------------------------------------------------------------


class TestProperty3ConfigResolutionPrefersActiveVariant:
    """Feature: agent-variants, Property 3: Config resolution prefers active variant

    For any agent with N variants, config resolution returns the active
    variant's config when one exists, and the base agent config when none
    is active.

    **Validates: Requirements 4.3, 4.4**
    """

    @given(
        agent_config=_agent_config_strategy(),
        variant_configs=st.lists(
            _agent_config_strategy(),
            min_size=1,
            max_size=5,
        ),
        active_index=st.integers(min_value=0, max_value=4),
    )
    @settings(max_examples=100)
    def test_active_variant_config_is_returned(
        self,
        agent_config: dict[str, Any],
        variant_configs: list[dict[str, Any]],
        active_index: int,
    ):
        """**Validates: Requirements 4.3, 4.4**

        When an active variant exists, resolved config fields must match
        the active variant's values.
        """
        active_idx = active_index % len(variant_configs)
        active_variant = variant_configs[active_idx]

        # Simulate COALESCE resolution: variant fields preferred over agent
        resolved = {}
        for field in _CONFIG_FIELDS:
            # COALESCE(variant.field, agent.field) — variant always wins
            # when it has a value (which it always does in our model)
            resolved[field] = active_variant[field]

        for field in _CONFIG_FIELDS:
            assert resolved[field] == active_variant[field], (
                f"Field {field}: expected variant value {active_variant[field]}, "
                f"got {resolved[field]}"
            )

    @given(agent_config=_agent_config_strategy())
    @settings(max_examples=100)
    def test_no_active_variant_returns_agent_config(
        self,
        agent_config: dict[str, Any],
    ):
        """**Validates: Requirements 4.3, 4.4**

        When no active variant exists, resolved config fields must match
        the base agent's values.
        """
        # Simulate COALESCE with NULL variant: agent fields used
        resolved = {}
        for field in _CONFIG_FIELDS:
            resolved[field] = agent_config[field]

        for field in _CONFIG_FIELDS:
            assert resolved[field] == agent_config[field]

    @given(
        agent_config=_agent_config_strategy(),
        variant_config=_agent_config_strategy(),
        has_active=st.booleans(),
    )
    @settings(max_examples=100)
    def test_resolution_source_matches_active_state(
        self,
        agent_config: dict[str, Any],
        variant_config: dict[str, Any],
        has_active: bool,
    ):
        """**Validates: Requirements 4.3, 4.4**

        The resolver returns the correct source (variant or agent) based
        on whether an active variant exists.
        """
        if has_active:
            source = variant_config
            variant_id = str(uuid.uuid4())
        else:
            source = agent_config
            variant_id = None

        # Build a ResolvedAgentConfig to verify the dataclass works
        config = ResolvedAgentConfig(
            agent_id=str(uuid.uuid4()),
            variant_id=variant_id,
            model_provider=source["model_provider"],
            model_name=source["model_name"],
            system_prompt=source["system_prompt"],
            user_prompt_template=source["user_prompt_template"],
            prompt_version=source["prompt_version"],
            temperature=source["temperature"],
            max_tokens=source["max_tokens"],
            context_window=source["context_window"],
            input_token_limit=source["input_token_limit"],
            token_budget=source["token_budget"],
            timeout_seconds=source["timeout_seconds"],
            max_retries=source["max_retries"],
        )

        assert config.model_provider == source["model_provider"]
        assert config.model_name == source["model_name"]
        assert config.temperature == source["temperature"]
        assert config.max_tokens == source["max_tokens"]

        if has_active:
            assert config.variant_id is not None
        else:
            assert config.variant_id is None


# ---------------------------------------------------------------------------
# Property 4: Slug auto-generation determinism
# ---------------------------------------------------------------------------

_KEBAB_CASE_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")


class TestProperty4SlugAutoGenerationDeterminism:
    """Feature: agent-variants, Property 4: Slug auto-generation determinism

    For any variant_name, the auto-generated slug is deterministic,
    produces valid kebab-case, and is non-empty for non-empty input
    containing at least one alphanumeric character.

    **Validates: Requirements 2.4**
    """

    @given(name=_variant_name_strategy())
    @settings(max_examples=100)
    def test_slugify_is_deterministic(self, name: str):
        """**Validates: Requirements 2.4**

        Calling _slugify twice with the same name produces the same slug.
        """
        slug1 = _slugify(name)
        slug2 = _slugify(name)
        assert slug1 == slug2, (
            f"Non-deterministic: _slugify({name!r}) produced {slug1!r} and {slug2!r}"
        )

    @given(name=st.from_regex(r"[a-zA-Z0-9][\w\s\-]{0,49}", fullmatch=True))
    @settings(max_examples=100)
    def test_slugify_produces_valid_kebab_case(self, name: str):
        """**Validates: Requirements 2.4**

        The slug must be lowercase alphanumeric with hyphens, no leading
        or trailing hyphens.
        """
        slug = _slugify(name)
        assume(len(slug) > 0)

        # No leading or trailing hyphens
        assert not slug.startswith("-"), f"Slug starts with hyphen: {slug!r}"
        assert not slug.endswith("-"), f"Slug ends with hyphen: {slug!r}"

        # Only lowercase alphanumeric and hyphens
        assert _KEBAB_CASE_RE.match(slug), (
            f"Slug {slug!r} is not valid kebab-case (from name {name!r})"
        )

    @given(name=st.from_regex(r"[a-zA-Z0-9][\w\s]{0,49}", fullmatch=True))
    @settings(max_examples=100)
    def test_slugify_non_empty_for_alphanumeric_input(self, name: str):
        """**Validates: Requirements 2.4**

        For any name containing at least one alphanumeric character,
        the slug is non-empty.
        """
        slug = _slugify(name)
        assert len(slug) > 0, (
            f"Empty slug for name {name!r}"
        )

    @given(name=_variant_name_strategy())
    @settings(max_examples=100)
    def test_slugify_is_lowercase(self, name: str):
        """**Validates: Requirements 2.4**

        The slug must be entirely lowercase.
        """
        slug = _slugify(name)
        assert slug == slug.lower(), (
            f"Slug {slug!r} contains uppercase characters"
        )


# ---------------------------------------------------------------------------
# Property 5: Partial update idempotence
# ---------------------------------------------------------------------------


class TestProperty5PartialUpdateIdempotence:
    """Feature: agent-variants, Property 5: Partial update idempotence

    For any variant, applying a partial update twice produces the same
    variant state (excluding updated_at).

    **Validates: Requirements 3.4**
    """

    @given(
        base_config=_agent_config_strategy(),
        update_fields=st.fixed_dictionaries(
            {},
            optional={
                field: _config_value_strategy(field)
                for field in _CONFIG_FIELDS
            },
        ),
    )
    @settings(max_examples=100)
    def test_double_apply_produces_same_state(
        self,
        base_config: dict[str, Any],
        update_fields: dict[str, Any],
    ):
        """**Validates: Requirements 3.4**

        Applying the same partial update twice yields identical field values
        (excluding updated_at).
        """
        assume(len(update_fields) > 0)

        # First application
        state_after_first = copy.deepcopy(base_config)
        for field, value in update_fields.items():
            state_after_first[field] = value

        # Second application (same update on the result of the first)
        state_after_second = copy.deepcopy(state_after_first)
        for field, value in update_fields.items():
            state_after_second[field] = value

        # All config fields must match
        for field in _CONFIG_FIELDS:
            assert state_after_first[field] == state_after_second[field], (
                f"Field {field} differs after double apply: "
                f"{state_after_first[field]} != {state_after_second[field]}"
            )

    @given(
        base_config=_agent_config_strategy(),
        update_fields=st.fixed_dictionaries(
            {},
            optional={
                field: _config_value_strategy(field)
                for field in _CONFIG_FIELDS
            },
        ),
    )
    @settings(max_examples=100)
    def test_unchanged_fields_preserved_after_partial_update(
        self,
        base_config: dict[str, Any],
        update_fields: dict[str, Any],
    ):
        """**Validates: Requirements 3.4**

        Fields not included in the update must retain their original values.
        """
        updated = copy.deepcopy(base_config)
        for field, value in update_fields.items():
            updated[field] = value

        for field in _CONFIG_FIELDS:
            if field not in update_fields:
                assert updated[field] == base_config[field], (
                    f"Unchanged field {field} was modified: "
                    f"{base_config[field]} -> {updated[field]}"
                )

    @given(base_config=_agent_config_strategy())
    @settings(max_examples=100)
    def test_empty_update_is_noop(
        self,
        base_config: dict[str, Any],
    ):
        """**Validates: Requirements 3.4**

        An empty update (no fields) leaves all config fields unchanged.
        """
        updated = copy.deepcopy(base_config)
        # Apply empty update — no fields changed

        for field in _CONFIG_FIELDS:
            assert updated[field] == base_config[field]