fix: remove think=false (Ollama bug #14645), bump max_tokens to 32k
This commit is contained in:
@@ -228,7 +228,12 @@ class OllamaClient:
|
||||
],
|
||||
"format": json_schema,
|
||||
"stream": True,
|
||||
"think": False,
|
||||
# NOTE: Do NOT set "think": False here. Ollama has a known bug
|
||||
# (issues #14645, #15260) where think=false silently disables
|
||||
# the format constraint for qwen3.5 and gemma4 models, causing
|
||||
# the model to output plain text instead of valid JSON.
|
||||
# Omitting "think" lets the model use thinking mode (slightly
|
||||
# slower but structured output actually works).
|
||||
}
|
||||
|
||||
url = f"{self._config.base_url}/api/chat"
|
||||
|
||||
Reference in New Issue
Block a user