diff --git a/services/recommendation/thesis_llm.py b/services/recommendation/thesis_llm.py index 218e32e..1ff0248 100644 --- a/services/recommendation/thesis_llm.py +++ b/services/recommendation/thesis_llm.py @@ -405,6 +405,8 @@ async def _call_vllm_thesis( "max_tokens": config.max_tokens, "temperature": config.temperature, "stream": False, + # Disable thinking/reasoning mode for Qwen3 models on vLLM + "chat_template_kwargs": {"enable_thinking": False}, } headers: dict[str, str] = {"Content-Type": "application/json"}