diff --git a/services/extractor/client.py b/services/extractor/client.py index 3d7737c..fb27cd4 100644 --- a/services/extractor/client.py +++ b/services/extractor/client.py @@ -228,7 +228,12 @@ class OllamaClient: ], "format": json_schema, "stream": True, - "think": False, + # NOTE: Do NOT set "think": False here. Ollama has a known bug + # (issues #14645, #15260) where think=false silently disables + # the format constraint for qwen3.5 and gemma4 models, causing + # the model to output plain text instead of valid JSON. + # Omitting "think" lets the model use thinking mode (slightly + # slower but structured output actually works). } url = f"{self._config.base_url}/api/chat" diff --git a/services/shared/config.py b/services/shared/config.py index 5de1620..480e3c3 100644 --- a/services/shared/config.py +++ b/services/shared/config.py @@ -47,7 +47,7 @@ class OllamaConfig: retry_base_delay: float = 1.0 retry_max_delay: float = 10.0 retry_backoff_multiplier: float = 2.0 - max_tokens: int = 4096 + max_tokens: int = 32768 stall_timeout: float = 30.0 loop_window: int = 64 loop_threshold: float = 0.5