feat: add remote vLLM support with provider abstraction layer

- LLMClient Protocol for provider-agnostic inference
- VLLMClient for OpenAI-compatible /v1/chat/completions API
- LLM client factory with provider routing (ollama/vllm)
- VLLMConfig with VLLM_* environment variable loading
- Updated extractor worker with health check and provider switching
- Updated event classifier to use LLMClient protocol
- Helm values for vLLM configuration
- 18 unit tests + 6 property-based tests
- Full backward compatibility preserved
This commit is contained in:
Celes Renata
2026-04-23 08:17:23 +00:00
parent 63e4fb96ea
commit 117b693b19
15 changed files with 1876 additions and 77 deletions
+11 -11
View File
@@ -274,19 +274,19 @@ class TestParseClassificationResponse:
class TestClassifyGlobalEvent:
def _make_mock_client(self, raw_output: str, error: str | None = None):
"""Create a mock OllamaClient with configurable response."""
"""Create a mock LLMClient with configurable response."""
client = MagicMock()
client._config = MagicMock()
client._config.model = "llama3.1:8b"
client._max_retries = 2
client._base_delay = 0.01
client._max_delay = 0.1
client._backoff_multiplier = 2.0
client._config.max_retries = 2
client._config.retry_base_delay = 0.01
client._config.retry_max_delay = 0.1
client._config.retry_backoff_multiplier = 2.0
attempt = MagicMock()
attempt.raw_output = raw_output
attempt.error = error
client._call_ollama = AsyncMock(return_value=attempt)
client.call_llm = AsyncMock(return_value=attempt)
return client
@pytest.mark.asyncio
@@ -314,7 +314,7 @@ class TestClassifyGlobalEvent:
assert event.severity == "critical"
assert event.confidence == 0.9
assert event.source_document_id == "doc-123"
client._call_ollama.assert_called_once()
client.call_llm.assert_called_once()
@pytest.mark.asyncio
async def test_retries_on_error(self):
@@ -340,11 +340,11 @@ class TestClassifyGlobalEvent:
success_attempt.error = None
client = self._make_mock_client("")
client._call_ollama = AsyncMock(side_effect=[fail_attempt, success_attempt])
client.call_llm = AsyncMock(side_effect=[fail_attempt, success_attempt])
event = await classify_global_event("text", "doc-456", client)
assert event.severity == "high"
assert client._call_ollama.call_count == 2
assert client.call_llm.call_count == 2
@pytest.mark.asyncio
async def test_raises_after_exhausted_retries(self):
@@ -353,12 +353,12 @@ class TestClassifyGlobalEvent:
fail_attempt.error = "timeout"
client = self._make_mock_client("")
client._call_ollama = AsyncMock(return_value=fail_attempt)
client.call_llm = AsyncMock(return_value=fail_attempt)
with pytest.raises(ValueError, match="Event classification failed"):
await classify_global_event("text", "doc-789", client)
assert client._call_ollama.call_count == 3 # initial + 2 retries
assert client.call_llm.call_count == 3 # initial + 2 retries
@pytest.mark.asyncio
async def test_minio_persistence_called(self):