feat: add remote vLLM support with provider abstraction layer

- LLMClient Protocol for provider-agnostic inference - VLLMClient for OpenAI-compatible /v1/chat/completions API - LLM client factory with provider routing (ollama/vllm) - VLLMConfig with VLLM_* environment variable loading - Updated extractor worker with health check and provider switching - Updated event classifier to use LLMClient protocol - Helm values for vLLM configuration - 18 unit tests + 6 property-based tests - Full backward compatibility preserved
2026-04-23 08:17:23 +00:00
parent 63e4fb96ea
commit 117b693b19
15 changed files with 1876 additions and 77 deletions
@@ -274,19 +274,19 @@ class TestParseClassificationResponse:

 class TestClassifyGlobalEvent:
    def _make_mock_client(self, raw_output: str, error: str | None = None):
-        """Create a mock OllamaClient with configurable response."""
+        """Create a mock LLMClient with configurable response."""
        client = MagicMock()
        client._config = MagicMock()
        client._config.model = "llama3.1:8b"
-        client._max_retries = 2
-        client._base_delay = 0.01
-        client._max_delay = 0.1
-        client._backoff_multiplier = 2.0
+        client._config.max_retries = 2
+        client._config.retry_base_delay = 0.01
+        client._config.retry_max_delay = 0.1
+        client._config.retry_backoff_multiplier = 2.0

        attempt = MagicMock()
        attempt.raw_output = raw_output
        attempt.error = error
-        client._call_ollama = AsyncMock(return_value=attempt)
+        client.call_llm = AsyncMock(return_value=attempt)
        return client

    @pytest.mark.asyncio
@@ -314,7 +314,7 @@ class TestClassifyGlobalEvent:
        assert event.severity == "critical"
        assert event.confidence == 0.9
        assert event.source_document_id == "doc-123"
-        client._call_ollama.assert_called_once()
+        client.call_llm.assert_called_once()

    @pytest.mark.asyncio
    async def test_retries_on_error(self):
@@ -340,11 +340,11 @@ class TestClassifyGlobalEvent:
        success_attempt.error = None

        client = self._make_mock_client("")
-        client._call_ollama = AsyncMock(side_effect=[fail_attempt, success_attempt])
+        client.call_llm = AsyncMock(side_effect=[fail_attempt, success_attempt])

        event = await classify_global_event("text", "doc-456", client)
        assert event.severity == "high"
-        assert client._call_ollama.call_count == 2
+        assert client.call_llm.call_count == 2

    @pytest.mark.asyncio
    async def test_raises_after_exhausted_retries(self):
@@ -353,12 +353,12 @@ class TestClassifyGlobalEvent:
        fail_attempt.error = "timeout"

        client = self._make_mock_client("")
-        client._call_ollama = AsyncMock(return_value=fail_attempt)
+        client.call_llm = AsyncMock(return_value=fail_attempt)

        with pytest.raises(ValueError, match="Event classification failed"):
            await classify_global_event("text", "doc-789", client)

-        assert client._call_ollama.call_count == 3  # initial + 2 retries
+        assert client.call_llm.call_count == 3  # initial + 2 retries

    @pytest.mark.asyncio
    async def test_minio_persistence_called(self):