fix: reduce LLM timeouts — truncate docs to 8k/6k chars, cut num_predict 16k→4k, tighten prompts, trim anti-hallucination rules

2026-04-16 18:56:11 +00:00
parent 3a856cf6ff
commit 693d9e0d60
3 changed files with 22 additions and 18 deletions
@@ -259,7 +259,7 @@ class OllamaClient:
            "stream": False,
            "think": False,
            "options": {
-                "num_predict": 16384,
+                "num_predict": 4096,
            },
        }