fix: remove think=false (Ollama bug #14645), bump max_tokens to 32k

This commit is contained in:
Celes Renata
2026-04-14 23:50:28 +00:00
parent b0fc207c1a
commit 01726af360
2 changed files with 7 additions and 2 deletions
+6 -1
View File
@@ -228,7 +228,12 @@ class OllamaClient:
],
"format": json_schema,
"stream": True,
"think": False,
# NOTE: Do NOT set "think": False here. Ollama has a known bug
# (issues #14645, #15260) where think=false silently disables
# the format constraint for qwen3.5 and gemma4 models, causing
# the model to output plain text instead of valid JSON.
# Omitting "think" lets the model use thinking mode (slightly
# slower but structured output actually works).
}
url = f"{self._config.base_url}/api/chat"
+1 -1
View File
@@ -47,7 +47,7 @@ class OllamaConfig:
retry_base_delay: float = 1.0
retry_max_delay: float = 10.0
retry_backoff_multiplier: float = 2.0
max_tokens: int = 4096
max_tokens: int = 32768
stall_timeout: float = 30.0
loop_window: int = 64
loop_threshold: float = 0.5