fix: update stall timer during thinking phase to prevent premature stream abort
This commit is contained in:
@@ -280,6 +280,12 @@ class OllamaClient:
|
|||||||
|
|
||||||
msg = frame.get("message", {})
|
msg = frame.get("message", {})
|
||||||
token = msg.get("content", "") if isinstance(msg, dict) else ""
|
token = msg.get("content", "") if isinstance(msg, dict) else ""
|
||||||
|
# During thinking mode, the model emits tokens in msg.thinking
|
||||||
|
# before msg.content. We don't accumulate thinking tokens but
|
||||||
|
# must update last_chunk_time so the stall guard doesn't fire.
|
||||||
|
thinking_token = msg.get("thinking", "") if isinstance(msg, dict) else ""
|
||||||
|
if thinking_token:
|
||||||
|
last_chunk_time = time.monotonic()
|
||||||
if not token:
|
if not token:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user