fix: reduce LLM timeouts — truncate docs to 8k/6k chars, cut num_predict 16k→4k, tighten prompts, trim anti-hallucination rules

2026-04-16 18:56:11 +00:00
parent 3a856cf6ff
commit 693d9e0d60
3 changed files with 22 additions and 18 deletions
@@ -24,9 +24,10 @@ EXTRACTION_JSON_SCHEMA: dict[str, Any] = generate_json_schema()
 # --- Anti-hallucination system prompt ---

 SYSTEM_PROMPT = """\
-You extract structured financial intelligence from documents into JSON. \
-Read the document text carefully and fill every field. \
-Return ONLY valid JSON. No commentary, no markdown, no explanation."""
+You are a financial document analyst. Extract structured data as JSON. \
+Return ONLY a single JSON object. No markdown fences, no explanation, no text before or after the JSON. \
+Every field in the schema is required. Use "other" for catalyst_type if unsure. \
+Keep evidence_spans short (under 20 words each). Keep key_facts to 3-5 items max."""

 # --- Document-type-specific guidance ---

@@ -98,6 +99,11 @@ def build_extraction_prompt(

    doc_id_line = f"Document ID: {document_id}\n" if document_id else ""

+    # Truncate long documents to reduce prompt size and inference time
+    max_doc_chars = 8000
+    if len(document_text) > max_doc_chars:
+        document_text = document_text[:max_doc_chars] + "\n[... truncated for extraction ...]"
+
    user_prompt = f"""\
 Extract structured intelligence from this document. Fill every field.