fix: update stall timer during thinking phase to prevent premature stream abort
This commit is contained in:
@@ -280,6 +280,12 @@ class OllamaClient:
|
||||
|
||||
msg = frame.get("message", {})
|
||||
token = msg.get("content", "") if isinstance(msg, dict) else ""
|
||||
# During thinking mode, the model emits tokens in msg.thinking
|
||||
# before msg.content. We don't accumulate thinking tokens but
|
||||
# must update last_chunk_time so the stall guard doesn't fire.
|
||||
thinking_token = msg.get("thinking", "") if isinstance(msg, dict) else ""
|
||||
if thinking_token:
|
||||
last_chunk_time = time.monotonic()
|
||||
if not token:
|
||||
continue
|
||||
|
||||
|
||||
Reference in New Issue
Block a user