fix: update stall timer during thinking phase to prevent premature stream abort

2026-04-15 00:06:49 +00:00
parent 01726af360
commit 8b5b692d3c
1 changed files with 6 additions and 0 deletions
@@ -280,6 +280,12 @@ class OllamaClient:

                msg = frame.get("message", {})
                token = msg.get("content", "") if isinstance(msg, dict) else ""
+                # During thinking mode, the model emits tokens in msg.thinking
+                # before msg.content. We don't accumulate thinking tokens but
+                # must update last_chunk_time so the stall guard doesn't fire.
+                thinking_token = msg.get("thinking", "") if isinstance(msg, dict) else ""
+                if thinking_token:
+                    last_chunk_time = time.monotonic()
                if not token:
                    continue