"""LLM client factory for provider-based routing. Returns the appropriate LLM client (OllamaClient or VLLMClient) based on the resolved ``model_provider`` from the agent config. Falls back to OllamaClient for unknown or missing providers. Requirements: 3.4, 3.5, 3.6, 9.5 Design: LLM Client Factory """ from __future__ import annotations import logging import httpx from services.extractor.client import OllamaClient from services.extractor.vllm_client import VLLMClient from services.shared.agent_config import ResolvedAgentConfig from services.shared.config import OllamaConfig, VLLMConfig from services.shared.llm_protocol import LLMClient logger = logging.getLogger(__name__) # Providers that map to OllamaClient (including empty / None). _OLLAMA_PROVIDERS = frozenset({"ollama", "", None}) def build_config_from_resolved( resolved: ResolvedAgentConfig, base_ollama: OllamaConfig, base_vllm: VLLMConfig, ) -> OllamaConfig | VLLMConfig: """Build a provider-specific config from a resolved agent config. Merges the resolved agent-level overrides (model_name, timeout, retries, max_tokens, context_window) with the base environment config (base_url, retry delays, provider-specific defaults). Args: resolved: Runtime config resolved from the database. base_ollama: Base OllamaConfig loaded from environment variables. base_vllm: Base VLLMConfig loaded from environment variables. Returns: An ``OllamaConfig`` or ``VLLMConfig`` depending on the provider. """ provider = (resolved.model_provider or "").strip().lower() if provider == "vllm": return VLLMConfig( base_url=base_vllm.base_url, model=resolved.model_name, timeout=resolved.timeout_seconds, max_retries=resolved.max_retries, retry_base_delay=base_vllm.retry_base_delay, retry_max_delay=base_vllm.retry_max_delay, retry_backoff_multiplier=base_vllm.retry_backoff_multiplier, max_tokens=resolved.max_tokens, temperature=base_vllm.temperature, api_key=base_vllm.api_key, ) # Default: Ollama config (covers "ollama", "", None, and unknown) if provider not in _OLLAMA_PROVIDERS: logger.warning( "Unknown model_provider %r for agent %s — treating as ollama", resolved.model_provider, resolved.agent_id, ) return OllamaConfig( base_url=base_ollama.base_url, model=resolved.model_name, timeout=resolved.timeout_seconds, max_retries=resolved.max_retries, retry_base_delay=base_ollama.retry_base_delay, retry_max_delay=base_ollama.retry_max_delay, retry_backoff_multiplier=base_ollama.retry_backoff_multiplier, max_tokens=resolved.max_tokens, stall_timeout=base_ollama.stall_timeout, loop_window=base_ollama.loop_window, loop_threshold=base_ollama.loop_threshold, context_window=resolved.context_window, ) def build_llm_client( resolved: ResolvedAgentConfig | None, ollama_config: OllamaConfig, vllm_config: VLLMConfig, http_client: httpx.AsyncClient | None = None, ) -> LLMClient: """Return the appropriate LLM client based on the resolved provider. Provider routing: - ``None`` / ``""`` / ``"ollama"`` → :class:`OllamaClient` - ``"vllm"`` → :class:`VLLMClient` - Unknown value → log warning, fall back to :class:`OllamaClient` When *resolved* is ``None`` (DB lookup failed), the base ``ollama_config`` is used directly. Args: resolved: Resolved agent config (may be ``None``). ollama_config: Base OllamaConfig from environment. vllm_config: Base VLLMConfig from environment. http_client: Optional shared httpx client for testing. Returns: An LLM client satisfying the :class:`LLMClient` protocol. """ if resolved is None: logger.info("No resolved agent config — defaulting to OllamaClient") return OllamaClient(ollama_config, http_client=http_client) provider = (resolved.model_provider or "").strip().lower() if provider == "vllm": cfg = build_config_from_resolved(resolved, ollama_config, vllm_config) logger.info( "Building VLLMClient for agent %s (model=%s)", resolved.agent_id, cfg.model, # type: ignore[union-attr] ) return VLLMClient(cfg, http_client=http_client) # type: ignore[arg-type] if provider not in _OLLAMA_PROVIDERS: logger.warning( "Unknown model_provider %r for agent %s — falling back to OllamaClient", resolved.model_provider, resolved.agent_id, ) cfg = build_config_from_resolved(resolved, ollama_config, vllm_config) return OllamaClient(cfg, http_client=http_client) # type: ignore[arg-type]