117b693b19
- LLMClient Protocol for provider-agnostic inference - VLLMClient for OpenAI-compatible /v1/chat/completions API - LLM client factory with provider routing (ollama/vllm) - VLLMConfig with VLLM_* environment variable loading - Updated extractor worker with health check and provider switching - Updated event classifier to use LLMClient protocol - Helm values for vLLM configuration - 18 unit tests + 6 property-based tests - Full backward compatibility preserved
45 lines
1.3 KiB
Python
45 lines
1.3 KiB
Python
"""LLM client protocol for provider abstraction.
|
|
|
|
Defines the structural interface that both OllamaClient and VLLMClient
|
|
must satisfy, using typing.Protocol for duck-typing compatibility.
|
|
|
|
Requirements: 1.1, 1.2
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING, Protocol, runtime_checkable
|
|
|
|
if TYPE_CHECKING:
|
|
from services.extractor.client import ExtractionAttempt
|
|
|
|
|
|
@runtime_checkable
|
|
class LLMClient(Protocol):
|
|
"""Protocol defining the contract for LLM inference clients.
|
|
|
|
Both OllamaClient and VLLMClient satisfy this protocol via
|
|
structural subtyping — no inheritance required.
|
|
"""
|
|
|
|
async def call_llm(
|
|
self,
|
|
prompts: dict[str, str],
|
|
json_schema: dict[str, object],
|
|
document_text: str = "",
|
|
) -> ExtractionAttempt:
|
|
"""Send a chat completion request and return an extraction attempt.
|
|
|
|
Args:
|
|
prompts: Dict with 'system' and 'user' prompt strings.
|
|
json_schema: JSON schema hint for structured output.
|
|
document_text: Optional raw document text for context.
|
|
|
|
Returns:
|
|
An ExtractionAttempt with raw output, validation, and error info.
|
|
"""
|
|
...
|
|
|
|
async def close(self) -> None:
|
|
"""Release underlying HTTP resources."""
|
|
...
|