From 8b5b692d3c98a61d5cb55e8e9ee3d4a510da3c04 Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Wed, 15 Apr 2026 00:06:49 +0000 Subject: [PATCH] fix: update stall timer during thinking phase to prevent premature stream abort --- services/extractor/client.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/services/extractor/client.py b/services/extractor/client.py index fb27cd4..48c7f34 100644 --- a/services/extractor/client.py +++ b/services/extractor/client.py @@ -280,6 +280,12 @@ class OllamaClient: msg = frame.get("message", {}) token = msg.get("content", "") if isinstance(msg, dict) else "" + # During thinking mode, the model emits tokens in msg.thinking + # before msg.content. We don't accumulate thinking tokens but + # must update last_chunk_time so the stall guard doesn't fire. + thinking_token = msg.get("thinking", "") if isinstance(msg, dict) else "" + if thinking_token: + last_chunk_time = time.monotonic() if not token: continue