fix: reduce LLM timeouts — truncate docs to 8k/6k chars, cut num_predict 16k→4k, tighten prompts, trim anti-hallucination rules
This commit is contained in:
@@ -259,7 +259,7 @@ class OllamaClient:
|
||||
"stream": False,
|
||||
"think": False,
|
||||
"options": {
|
||||
"num_predict": 16384,
|
||||
"num_predict": 4096,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user