From 6e2f174b197e3a854b1d59d17e5b21edea5afb43 Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Sun, 12 Apr 2026 12:35:24 -0700 Subject: [PATCH] phase 17: disable qwen3.5 thinking mode (think:false) to reduce latency and improve structured output --- services/extractor/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/services/extractor/client.py b/services/extractor/client.py index 71ad34e..7d4d344 100644 --- a/services/extractor/client.py +++ b/services/extractor/client.py @@ -220,6 +220,7 @@ class OllamaClient: ], "format": json_schema, "stream": False, + "think": False, } url = f"{self._config.base_url}/api/chat"