fix: reduce LLM timeouts — truncate docs to 8k/6k chars, cut num_predict 16k→4k, tighten prompts, trim anti-hallucination rules

This commit is contained in:
Celes Renata
2026-04-16 18:56:11 +00:00
parent 3a856cf6ff
commit 693d9e0d60
3 changed files with 22 additions and 18 deletions
+1 -1
View File
@@ -259,7 +259,7 @@ class OllamaClient:
"stream": False,
"think": False,
"options": {
"num_predict": 16384,
"num_predict": 4096,
},
}
+12 -14
View File
@@ -183,22 +183,15 @@ def get_event_json_schema() -> dict[str, Any]:
# ---------------------------------------------------------------------------
_SYSTEM_PROMPT = """\
You classify global news articles into structured macro event intelligence. \
Read the article carefully and extract the event classification. \
Return ONLY valid JSON matching the schema. No commentary, no markdown, no explanation."""
You classify global news into structured macro event JSON. \
Return ONLY a single JSON object. No markdown, no explanation. \
Every field is required. Keep key_facts to 3-5 items. Keep summary under 3 sentences."""
_ANTI_HALLUCINATION_RULES = """\
CRITICAL RULES — read carefully:
1. Only extract information EXPLICITLY stated in the article text.
2. Do NOT infer, speculate, or fabricate facts, regions, sectors, or commodities.
3. If the article mentions multiple distinct impact types, include ALL of them in event_types.
4. For affected_regions, only include regions explicitly mentioned or clearly implied by the event.
5. For affected_sectors, only include sectors with a clear causal link to the event.
6. For affected_commodities, only include commodities directly referenced or obviously impacted.
7. For key_facts, each fact must be directly supported by a specific passage in the text.
8. If the article is vague or speculative, set confidence LOW (below 0.4).
9. Do NOT treat journalist speculation or opinion as confirmed fact.
10. Distinguish between announced policy and proposed/rumored policy."""
RULES:
- Only extract facts EXPLICITLY stated in the text. Do NOT fabricate.
- If vague or speculative, set confidence below 0.4.
- Distinguish announced policy from rumored policy."""
def build_event_classification_prompt(text: str) -> dict[str, str]:
@@ -210,6 +203,11 @@ def build_event_classification_prompt(text: str) -> dict[str, str]:
Returns:
Dict with 'system' and 'user' prompt strings.
"""
# Truncate long articles to reduce inference time
max_chars = 6000
if len(text) > max_chars:
text = text[:max_chars] + "\n[... truncated ...]"
user_prompt = f"""\
Classify this global news article as a macro event. Fill every field.
+9 -3
View File
@@ -24,9 +24,10 @@ EXTRACTION_JSON_SCHEMA: dict[str, Any] = generate_json_schema()
# --- Anti-hallucination system prompt ---
SYSTEM_PROMPT = """\
You extract structured financial intelligence from documents into JSON. \
Read the document text carefully and fill every field. \
Return ONLY valid JSON. No commentary, no markdown, no explanation."""
You are a financial document analyst. Extract structured data as JSON. \
Return ONLY a single JSON object. No markdown fences, no explanation, no text before or after the JSON. \
Every field in the schema is required. Use "other" for catalyst_type if unsure. \
Keep evidence_spans short (under 20 words each). Keep key_facts to 3-5 items max."""
# --- Document-type-specific guidance ---
@@ -98,6 +99,11 @@ def build_extraction_prompt(
doc_id_line = f"Document ID: {document_id}\n" if document_id else ""
# Truncate long documents to reduce prompt size and inference time
max_doc_chars = 8000
if len(document_text) > max_doc_chars:
document_text = document_text[:max_doc_chars] + "\n[... truncated for extraction ...]"
user_prompt = f"""\
Extract structured intelligence from this document. Fill every field.