From 8b5b692d3c98a61d5cb55e8e9ee3d4a510da3c04 Mon Sep 17 00:00:00 2001
From: Celes Renata <celes@frameshift.net>
Date: Wed, 15 Apr 2026 00:06:49 +0000
Subject: [PATCH] fix: update stall timer during thinking phase to prevent
 premature stream abort

---
 services/extractor/client.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/services/extractor/client.py b/services/extractor/client.py
index fb27cd4..48c7f34 100644
--- a/services/extractor/client.py
+++ b/services/extractor/client.py
@@ -280,6 +280,12 @@ class OllamaClient:
 
                 msg = frame.get("message", {})
                 token = msg.get("content", "") if isinstance(msg, dict) else ""
+                # During thinking mode, the model emits tokens in msg.thinking
+                # before msg.content. We don't accumulate thinking tokens but
+                # must update last_chunk_time so the stall guard doesn't fire.
+                thinking_token = msg.get("thinking", "") if isinstance(msg, dict) else ""
+                if thinking_token:
+                    last_chunk_time = time.monotonic()
                 if not token:
                     continue