feat: add remote vLLM support with provider abstraction layer
- LLMClient Protocol for provider-agnostic inference - VLLMClient for OpenAI-compatible /v1/chat/completions API - LLM client factory with provider routing (ollama/vllm) - VLLMConfig with VLLM_* environment variable loading - Updated extractor worker with health check and provider switching - Updated event classifier to use LLMClient protocol - Helm values for vLLM configuration - 18 unit tests + 6 property-based tests - Full backward compatibility preserved
This commit is contained in:
@@ -274,19 +274,19 @@ class TestParseClassificationResponse:
|
||||
|
||||
class TestClassifyGlobalEvent:
|
||||
def _make_mock_client(self, raw_output: str, error: str | None = None):
|
||||
"""Create a mock OllamaClient with configurable response."""
|
||||
"""Create a mock LLMClient with configurable response."""
|
||||
client = MagicMock()
|
||||
client._config = MagicMock()
|
||||
client._config.model = "llama3.1:8b"
|
||||
client._max_retries = 2
|
||||
client._base_delay = 0.01
|
||||
client._max_delay = 0.1
|
||||
client._backoff_multiplier = 2.0
|
||||
client._config.max_retries = 2
|
||||
client._config.retry_base_delay = 0.01
|
||||
client._config.retry_max_delay = 0.1
|
||||
client._config.retry_backoff_multiplier = 2.0
|
||||
|
||||
attempt = MagicMock()
|
||||
attempt.raw_output = raw_output
|
||||
attempt.error = error
|
||||
client._call_ollama = AsyncMock(return_value=attempt)
|
||||
client.call_llm = AsyncMock(return_value=attempt)
|
||||
return client
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -314,7 +314,7 @@ class TestClassifyGlobalEvent:
|
||||
assert event.severity == "critical"
|
||||
assert event.confidence == 0.9
|
||||
assert event.source_document_id == "doc-123"
|
||||
client._call_ollama.assert_called_once()
|
||||
client.call_llm.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_retries_on_error(self):
|
||||
@@ -340,11 +340,11 @@ class TestClassifyGlobalEvent:
|
||||
success_attempt.error = None
|
||||
|
||||
client = self._make_mock_client("")
|
||||
client._call_ollama = AsyncMock(side_effect=[fail_attempt, success_attempt])
|
||||
client.call_llm = AsyncMock(side_effect=[fail_attempt, success_attempt])
|
||||
|
||||
event = await classify_global_event("text", "doc-456", client)
|
||||
assert event.severity == "high"
|
||||
assert client._call_ollama.call_count == 2
|
||||
assert client.call_llm.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raises_after_exhausted_retries(self):
|
||||
@@ -353,12 +353,12 @@ class TestClassifyGlobalEvent:
|
||||
fail_attempt.error = "timeout"
|
||||
|
||||
client = self._make_mock_client("")
|
||||
client._call_ollama = AsyncMock(return_value=fail_attempt)
|
||||
client.call_llm = AsyncMock(return_value=fail_attempt)
|
||||
|
||||
with pytest.raises(ValueError, match="Event classification failed"):
|
||||
await classify_global_event("text", "doc-789", client)
|
||||
|
||||
assert client._call_ollama.call_count == 3 # initial + 2 retries
|
||||
assert client.call_llm.call_count == 3 # initial + 2 retries
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_minio_persistence_called(self):
|
||||
|
||||
Reference in New Issue
Block a user