diff --git a/services/extractor/client.py b/services/extractor/client.py index fb27cd4..48c7f34 100644 --- a/services/extractor/client.py +++ b/services/extractor/client.py @@ -280,6 +280,12 @@ class OllamaClient: msg = frame.get("message", {}) token = msg.get("content", "") if isinstance(msg, dict) else "" + # During thinking mode, the model emits tokens in msg.thinking + # before msg.content. We don't accumulate thinking tokens but + # must update last_chunk_time so the stall guard doesn't fire. + thinking_token = msg.get("thinking", "") if isinstance(msg, dict) else "" + if thinking_token: + last_chunk_time = time.monotonic() if not token: continue