fix: reduce LLM timeouts — truncate docs to 8k/6k chars, cut num_predict 16k→4k, tighten prompts, trim anti-hallucination rules

2026-04-16 18:56:11 +00:00
parent 3a856cf6ff
commit 693d9e0d60
3 changed files with 22 additions and 18 deletions
@@ -259,7 +259,7 @@ class OllamaClient:
            "stream": False,
            "think": False,
            "options": {
-                "num_predict": 16384,
+                "num_predict": 4096,
            },
        }

@@ -183,22 +183,15 @@ def get_event_json_schema() -> dict[str, Any]:
 # ---------------------------------------------------------------------------

 _SYSTEM_PROMPT = """\
-You classify global news articles into structured macro event intelligence. \
-Read the article carefully and extract the event classification. \
-Return ONLY valid JSON matching the schema. No commentary, no markdown, no explanation."""
+You classify global news into structured macro event JSON. \
+Return ONLY a single JSON object. No markdown, no explanation. \
+Every field is required. Keep key_facts to 3-5 items. Keep summary under 3 sentences."""

 _ANTI_HALLUCINATION_RULES = """\
-CRITICAL RULES — read carefully:
-1. Only extract information EXPLICITLY stated in the article text.
-2. Do NOT infer, speculate, or fabricate facts, regions, sectors, or commodities.
-3. If the article mentions multiple distinct impact types, include ALL of them in event_types.
-4. For affected_regions, only include regions explicitly mentioned or clearly implied by the event.
-5. For affected_sectors, only include sectors with a clear causal link to the event.
-6. For affected_commodities, only include commodities directly referenced or obviously impacted.
-7. For key_facts, each fact must be directly supported by a specific passage in the text.
-8. If the article is vague or speculative, set confidence LOW (below 0.4).
-9. Do NOT treat journalist speculation or opinion as confirmed fact.
-10. Distinguish between announced policy and proposed/rumored policy."""
+RULES:
+- Only extract facts EXPLICITLY stated in the text. Do NOT fabricate.
+- If vague or speculative, set confidence below 0.4.
+- Distinguish announced policy from rumored policy."""


 def build_event_classification_prompt(text: str) -> dict[str, str]:
@@ -210,6 +203,11 @@ def build_event_classification_prompt(text: str) -> dict[str, str]:
    Returns:
        Dict with 'system' and 'user' prompt strings.
    """
+    # Truncate long articles to reduce inference time
+    max_chars = 6000
+    if len(text) > max_chars:
+        text = text[:max_chars] + "\n[... truncated ...]"
+
    user_prompt = f"""\
 Classify this global news article as a macro event. Fill every field.

@@ -24,9 +24,10 @@ EXTRACTION_JSON_SCHEMA: dict[str, Any] = generate_json_schema()
 # --- Anti-hallucination system prompt ---

 SYSTEM_PROMPT = """\
-You extract structured financial intelligence from documents into JSON. \
-Read the document text carefully and fill every field. \
-Return ONLY valid JSON. No commentary, no markdown, no explanation."""
+You are a financial document analyst. Extract structured data as JSON. \
+Return ONLY a single JSON object. No markdown fences, no explanation, no text before or after the JSON. \
+Every field in the schema is required. Use "other" for catalyst_type if unsure. \
+Keep evidence_spans short (under 20 words each). Keep key_facts to 3-5 items max."""

 # --- Document-type-specific guidance ---

@@ -98,6 +99,11 @@ def build_extraction_prompt(

    doc_id_line = f"Document ID: {document_id}\n" if document_id else ""

+    # Truncate long documents to reduce prompt size and inference time
+    max_doc_chars = 8000
+    if len(document_text) > max_doc_chars:
+        document_text = document_text[:max_doc_chars] + "\n[... truncated for extraction ...]"
+
    user_prompt = f"""\
 Extract structured intelligence from this document. Fill every field.