fix: reduce LLM timeouts — truncate docs to 8k/6k chars, cut num_predict 16k→4k, tighten prompts, trim anti-hallucination rules
This commit is contained in:
@@ -24,9 +24,10 @@ EXTRACTION_JSON_SCHEMA: dict[str, Any] = generate_json_schema()
|
||||
# --- Anti-hallucination system prompt ---
|
||||
|
||||
SYSTEM_PROMPT = """\
|
||||
You extract structured financial intelligence from documents into JSON. \
|
||||
Read the document text carefully and fill every field. \
|
||||
Return ONLY valid JSON. No commentary, no markdown, no explanation."""
|
||||
You are a financial document analyst. Extract structured data as JSON. \
|
||||
Return ONLY a single JSON object. No markdown fences, no explanation, no text before or after the JSON. \
|
||||
Every field in the schema is required. Use "other" for catalyst_type if unsure. \
|
||||
Keep evidence_spans short (under 20 words each). Keep key_facts to 3-5 items max."""
|
||||
|
||||
# --- Document-type-specific guidance ---
|
||||
|
||||
@@ -98,6 +99,11 @@ def build_extraction_prompt(
|
||||
|
||||
doc_id_line = f"Document ID: {document_id}\n" if document_id else ""
|
||||
|
||||
# Truncate long documents to reduce prompt size and inference time
|
||||
max_doc_chars = 8000
|
||||
if len(document_text) > max_doc_chars:
|
||||
document_text = document_text[:max_doc_chars] + "\n[... truncated for extraction ...]"
|
||||
|
||||
user_prompt = f"""\
|
||||
Extract structured intelligence from this document. Fill every field.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user