stonks-oracle/tests/test_signal_engine_normalizer.py

"""Unit tests for services.signal_engine.normalizer.

Tests the input normalizer's data assembly, sentinel handling, timestamp
validation, and derived field computation.

Requirements: 1.1, 1.2, 1.3, 1.4, 1.5
"""

from __future__ import annotations

from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock

import pytest

from services.signal_engine.config import SignalEngineConfig
from services.signal_engine.models import OHLCVBar
from services.signal_engine.normalizer import (
    TIMEFRAMES,
    _aggregate_bars_by_period,
    _polygon_bar_to_ohlcv,
    _validate_monotonic_timestamps,
    normalize_input,
)

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _make_bar_row(ts_ms: int, o: float, h: float, l: float, c: float, v: float) -> MagicMock:
    """Create a mock asyncpg.Record with Polygon bar data."""
    row = MagicMock()
    row.__getitem__ = lambda self, key: {
        "data": {"t": ts_ms, "o": o, "h": h, "l": l, "c": c, "v": v},
    }[key]
    return row


def _make_bar(ts_ms: int, c: float = 100.0) -> OHLCVBar:
    """Create an OHLCVBar with a given timestamp and close price."""
    return OHLCVBar(
        timestamp=datetime.fromtimestamp(ts_ms / 1000, tz=timezone.utc),
        open=c - 1,
        high=c + 1,
        low=c - 2,
        close=c,
        volume=1000.0,
    )


# ---------------------------------------------------------------------------
# _polygon_bar_to_ohlcv
# ---------------------------------------------------------------------------


class TestPolygonBarToOhlcv:
    def test_valid_bar(self):
        row = _make_bar_row(1700000000000, 100.0, 105.0, 99.0, 103.0, 5000.0)
        bar = _polygon_bar_to_ohlcv(row)
        assert bar is not None
        assert bar.open == 100.0
        assert bar.high == 105.0
        assert bar.low == 99.0
        assert bar.close == 103.0
        assert bar.volume == 5000.0
        assert bar.timestamp.year == 2023

    def test_missing_timestamp_returns_none(self):
        row = MagicMock()
        row.__getitem__ = lambda self, key: {"data": {"o": 1, "h": 2, "l": 0, "c": 1, "v": 10}}[key]
        assert _polygon_bar_to_ohlcv(row) is None

    def test_non_dict_data_returns_none(self):
        row = MagicMock()
        row.__getitem__ = lambda self, key: {"data": "not a dict"}[key]
        assert _polygon_bar_to_ohlcv(row) is None


# ---------------------------------------------------------------------------
# _validate_monotonic_timestamps
# ---------------------------------------------------------------------------


class TestValidateMonotonicTimestamps:
    def test_already_monotonic(self):
        bars = [_make_bar(1000 * i) for i in [1000, 2000, 3000]]
        result = _validate_monotonic_timestamps(bars, "D", "AAPL")
        assert result is bars  # same reference — no sorting needed

    def test_non_monotonic_gets_sorted(self):
        bars = [_make_bar(1000 * i) for i in [3000, 1000, 2000]]
        result = _validate_monotonic_timestamps(bars, "D", "AAPL")
        timestamps = [b.timestamp for b in result]
        assert timestamps == sorted(timestamps)

    def test_single_bar(self):
        bars = [_make_bar(1000000)]
        result = _validate_monotonic_timestamps(bars, "D", "AAPL")
        assert len(result) == 1

    def test_empty_list(self):
        result = _validate_monotonic_timestamps([], "D", "AAPL")
        assert result == []


# ---------------------------------------------------------------------------
# _aggregate_bars_by_period
# ---------------------------------------------------------------------------


class TestAggregate:
    def test_weekly_aggregation(self):
        # Create 10 daily bars spanning ~2 weeks
        from datetime import timedelta

        base = datetime(2024, 1, 1, tzinfo=timezone.utc)
        daily = []
        for i in range(10):
            ts = base + timedelta(days=i)
            daily.append(
                OHLCVBar(
                    timestamp=ts,
                    open=100.0 + i,
                    high=110.0 + i,
                    low=90.0 + i,
                    close=105.0 + i,
                    volume=1000.0,
                )
            )

        weekly = _aggregate_bars_by_period(daily, "week")
        assert len(weekly) >= 2  # should span at least 2 ISO weeks
        # Each weekly bar should have correct OHLCV aggregation
        for w in weekly:
            assert w.volume >= 1000.0  # at least one day's volume

    def test_monthly_aggregation(self):
        from datetime import timedelta

        base = datetime(2024, 1, 15, tzinfo=timezone.utc)
        daily = []
        for i in range(45):  # spans Jan and Feb
            ts = base + timedelta(days=i)
            daily.append(
                OHLCVBar(
                    timestamp=ts,
                    open=100.0,
                    high=110.0,
                    low=90.0,
                    close=105.0,
                    volume=500.0,
                )
            )

        monthly = _aggregate_bars_by_period(daily, "month")
        assert len(monthly) >= 2  # should span at least 2 months

    def test_empty_input(self):
        assert _aggregate_bars_by_period([], "week") == []


# ---------------------------------------------------------------------------
# normalize_input — integration with mocked DB
# ---------------------------------------------------------------------------


class TestNormalizeInput:
    """Test the full normalize_input function with mocked asyncpg pool."""

    @pytest.fixture
    def config(self):
        return SignalEngineConfig()

    @pytest.fixture
    def mock_pool(self):
        pool = AsyncMock()
        return pool

    def _setup_pool_with_data(self, pool):
        """Configure the mock pool to return realistic data."""
        ts_base = 1700000000000  # Nov 2023

        # Daily bars
        daily_rows = []
        for i in range(5):
            row = MagicMock()
            data = {
                "t": ts_base + i * 86400000,
                "o": 100.0 + i,
                "h": 105.0 + i,
                "l": 98.0 + i,
                "c": 103.0 + i,
                "v": 10000.0,
            }
            row.__getitem__ = lambda self, key, d=data: {"data": d}[key]
            daily_rows.append(row)

        # Trend window row
        trend_row = MagicMock()
        trend_row.__getitem__ = lambda self, key: {"confidence": 0.75}[key]

        # Earnings row
        from datetime import date, timedelta
        future_date = date.today() + timedelta(days=30)
        earnings_row = MagicMock()
        earnings_row.__getitem__ = lambda self, key: {"earnings_date": future_date}[key]

        # Macro impact rows
        macro_rows = []
        for direction in ["positive", "positive", "negative"]:
            row = MagicMock()
            row.__getitem__ = lambda self, key, d=direction: {
                "impact_direction": d,
                "macro_impact_score": 0.5,
                "confidence": 0.8,
            }[key]
            macro_rows.append(row)

        # Position rows — empty
        position_rows = []

        # Configure pool.fetch / pool.fetchrow responses
        call_count = {"fetch": 0, "fetchrow": 0}

        async def mock_fetch(query, *args):
            q = query.strip().lower()
            if "market_snapshots" in q and "snapshot_type = 'bar'" in q:
                return daily_rows
            if "market_snapshots" in q and "intraday_bar" in q:
                return []
            if "macro_impact_records" in q:
                return macro_rows
            if "position_stop_levels" in q:
                return position_rows
            return []

        async def mock_fetchrow(query, *args):
            q = query.strip().lower()
            if "trend_windows" in q:
                return trend_row
            if "earnings_calendar" in q:
                return earnings_row
            return None

        pool.fetch = mock_fetch
        pool.fetchrow = mock_fetchrow

    @pytest.mark.asyncio
    async def test_full_normalization(self, mock_pool, config):
        self._setup_pool_with_data(mock_pool)
        result = await normalize_input(mock_pool, "AAPL", config)

        assert result.ticker == "AAPL"
        assert result.evaluated_at is not None
        assert len(result.bars["D"]) == 5
        assert result.valuation_score == 0.75
        assert result.earnings_proximity_days is not None
        assert result.earnings_proximity_days > 0
        assert result.macro_bias != 0.0  # should be positive-leaning
        assert result.open_positions == []
        assert len(result.closing_prices) == 5
        assert len(result.returns) == 4  # n-1 returns
        assert result.current_price is not None

    @pytest.mark.asyncio
    async def test_sentinel_values_on_empty_data(self, mock_pool, config):
        """When all data sources return empty, sentinels are used."""
        async def empty_fetch(query, *args):
            return []

        async def empty_fetchrow(query, *args):
            return None

        mock_pool.fetch = empty_fetch
        mock_pool.fetchrow = empty_fetchrow

        result = await normalize_input(mock_pool, "UNKNOWN", config)

        assert result.ticker == "UNKNOWN"
        assert all(result.bars[tf] == [] for tf in TIMEFRAMES)
        assert result.valuation_score is None
        assert result.earnings_proximity_days is None
        assert result.macro_bias == 0.0
        assert result.open_positions == []
        assert result.closing_prices == []
        assert result.returns == []
        assert result.current_price is None

    @pytest.mark.asyncio
    async def test_db_errors_produce_sentinels(self, mock_pool, config):
        """When DB queries raise exceptions, sentinels are used."""
        async def failing_fetch(query, *args):
            raise Exception("DB connection lost")

        async def failing_fetchrow(query, *args):
            raise Exception("DB connection lost")

        mock_pool.fetch = failing_fetch
        mock_pool.fetchrow = failing_fetchrow

        result = await normalize_input(mock_pool, "FAIL", config)

        assert result.ticker == "FAIL"
        assert all(result.bars[tf] == [] for tf in TIMEFRAMES)
        assert result.valuation_score is None
        assert result.earnings_proximity_days is None
        assert result.macro_bias == 0.0
        assert result.open_positions == []
        assert result.current_price is None

    @pytest.mark.asyncio
    async def test_weekly_monthly_derived_from_daily(self, mock_pool, config):
        """Weekly and monthly bars are derived from daily bars."""
        ts_base = 1700000000000
        daily_rows = []
        for i in range(30):  # 30 days of data
            row = MagicMock()
            data = {
                "t": ts_base + i * 86400000,
                "o": 100.0,
                "h": 110.0,
                "l": 90.0,
                "c": 105.0,
                "v": 1000.0,
            }
            row.__getitem__ = lambda self, key, d=data: {"data": d}[key]
            daily_rows.append(row)

        async def mock_fetch(query, *args):
            q = query.strip().lower()
            if "market_snapshots" in q and "snapshot_type = 'bar'" in q:
                return daily_rows
            return []

        async def mock_fetchrow(query, *args):
            return None

        mock_pool.fetch = mock_fetch
        mock_pool.fetchrow = mock_fetchrow

        result = await normalize_input(mock_pool, "AAPL", config)

        assert len(result.bars["D"]) == 30
        assert len(result.bars["W"]) > 0  # weekly derived
        assert len(result.bars["M"]) > 0  # monthly derived

    @pytest.mark.asyncio
    async def test_current_price_from_shortest_timeframe(self, mock_pool, config):
        """current_price comes from the shortest available timeframe."""
        ts_base = 1700000000000

        # Only provide intraday bars (M30), no daily
        intraday_rows = []
        for i in range(3):
            row = MagicMock()
            data = {
                "t": ts_base + i * 1800000,  # 30-min intervals
                "o": 100.0,
                "h": 110.0,
                "l": 90.0,
                "c": 150.0 + i,  # last close = 152.0
                "v": 500.0,
            }
            row.__getitem__ = lambda self, key, d=data: {"data": d}[key]
            intraday_rows.append(row)

        async def mock_fetch(query, *args):
            q = query.strip().lower()
            if "intraday_bar" in q:
                return intraday_rows
            return []

        async def mock_fetchrow(query, *args):
            return None

        mock_pool.fetch = mock_fetch
        mock_pool.fetchrow = mock_fetchrow

        result = await normalize_input(mock_pool, "AAPL", config)

        # M30 is the shortest timeframe and has data
        assert result.current_price == 152.0

    @pytest.mark.asyncio
    async def test_macro_bias_computation(self, mock_pool, config):
        """macro_bias is a weighted average of direction scores."""
        macro_rows = []
        # 2 positive, 1 negative — should lean positive
        for direction, score, conf in [
            ("positive", 0.8, 0.9),
            ("positive", 0.6, 0.7),
            ("negative", 0.3, 0.5),
        ]:
            row = MagicMock()
            row.__getitem__ = lambda self, key, d=direction, s=score, c=conf: {
                "impact_direction": d,
                "macro_impact_score": s,
                "confidence": c,
            }[key]
            macro_rows.append(row)

        async def mock_fetch(query, *args):
            if "macro_impact_records" in query:
                return macro_rows
            return []

        async def mock_fetchrow(query, *args):
            return None

        mock_pool.fetch = mock_fetch
        mock_pool.fetchrow = mock_fetchrow

        result = await normalize_input(mock_pool, "AAPL", config)

        # Weighted: pos(0.8*0.9=0.72) + pos(0.6*0.7=0.42) + neg(0.3*0.5=0.15)
        # = (1.0*0.72 + 1.0*0.42 + (-1.0)*0.15) / (0.72+0.42+0.15)
        # = (0.72 + 0.42 - 0.15) / 1.29 ≈ 0.767
        assert result.macro_bias > 0.0
        assert result.macro_bias <= 1.0