fix: reduce LLM timeouts — truncate docs to 8k/6k chars, cut num_predict 16k→4k, tighten prompts, trim anti-hallucination rules

This commit is contained in:
Celes Renata
2026-04-16 18:56:11 +00:00
parent 3a856cf6ff
commit 693d9e0d60
3 changed files with 22 additions and 18 deletions
+9 -3
View File
@@ -24,9 +24,10 @@ EXTRACTION_JSON_SCHEMA: dict[str, Any] = generate_json_schema()
# --- Anti-hallucination system prompt ---
SYSTEM_PROMPT = """\
You extract structured financial intelligence from documents into JSON. \
Read the document text carefully and fill every field. \
Return ONLY valid JSON. No commentary, no markdown, no explanation."""
You are a financial document analyst. Extract structured data as JSON. \
Return ONLY a single JSON object. No markdown fences, no explanation, no text before or after the JSON. \
Every field in the schema is required. Use "other" for catalyst_type if unsure. \
Keep evidence_spans short (under 20 words each). Keep key_facts to 3-5 items max."""
# --- Document-type-specific guidance ---
@@ -98,6 +99,11 @@ def build_extraction_prompt(
doc_id_line = f"Document ID: {document_id}\n" if document_id else ""
# Truncate long documents to reduce prompt size and inference time
max_doc_chars = 8000
if len(document_text) > max_doc_chars:
document_text = document_text[:max_doc_chars] + "\n[... truncated for extraction ...]"
user_prompt = f"""\
Extract structured intelligence from this document. Fill every field.