diff --git a/services/shared/config.py b/services/shared/config.py index 2ff747c..6fb95fc 100644 --- a/services/shared/config.py +++ b/services/shared/config.py @@ -61,13 +61,13 @@ class VLLMConfig: Requirements: 3.1, 3.2 """ base_url: str = "http://192.168.42.254:8000" - model: str = "RedHatAI/Qwen3.6-35B-A3B-NVFP4" + model: str = "AxionML/Qwen3.5-9B-NVFP4" timeout: int = 120 max_retries: int = 2 retry_base_delay: float = 1.0 retry_max_delay: float = 10.0 retry_backoff_multiplier: float = 2.0 - max_tokens: int = 32768 + max_tokens: int = 4096 temperature: float = 0.7 api_key: str = "" # Optional, for authenticated vLLM deployments @@ -287,7 +287,7 @@ def load_config() -> AppConfig: retry_base_delay=float(os.getenv("VLLM_RETRY_BASE_DELAY", "1.0")), retry_max_delay=float(os.getenv("VLLM_RETRY_MAX_DELAY", "10.0")), retry_backoff_multiplier=float(os.getenv("VLLM_RETRY_BACKOFF_MULTIPLIER", "2.0")), - max_tokens=int(os.getenv("VLLM_MAX_TOKENS", "32768")), + max_tokens=int(os.getenv("VLLM_MAX_TOKENS", "4096")), temperature=float(os.getenv("VLLM_TEMPERATURE", "0.7")), api_key=os.getenv("VLLM_API_KEY", ""), ), diff --git a/tests/test_vllm_client.py b/tests/test_vllm_client.py index 742fb21..1f52a4c 100644 --- a/tests/test_vllm_client.py +++ b/tests/test_vllm_client.py @@ -386,11 +386,11 @@ def test_appconfig_vllm_defaults(): assert hasattr(cfg, "vllm") assert isinstance(cfg.vllm, VLLMConfig) assert cfg.vllm.base_url == "http://192.168.42.254:8000" - assert cfg.vllm.model == "RedHatAI/Qwen3.6-35B-A3B-NVFP4" + assert cfg.vllm.model == "AxionML/Qwen3.5-9B-NVFP4" assert cfg.vllm.timeout == 120 assert cfg.vllm.max_retries == 2 assert cfg.vllm.temperature == 0.7 - assert cfg.vllm.max_tokens == 32768 + assert cfg.vllm.max_tokens == 4096 assert cfg.vllm.api_key == ""