phase 17: remove embedded JSON schema from user prompt (4.7KB saved), Ollama format param handles it

This commit is contained in:
Celes Renata
2026-04-12 09:28:28 -07:00
parent 57d0fc7d33
commit 28b3361833
3 changed files with 75 additions and 5 deletions
+1 -5
View File
@@ -8,7 +8,6 @@ Requirements: 5.1, 5.2, 5.3, 5.4, 5.5
"""
from __future__ import annotations
import json
from typing import Any
from services.extractor.schemas import SCHEMA_VERSION, generate_json_schema
@@ -110,8 +109,6 @@ def build_extraction_prompt(
"Use your judgment — but only include companies where the connection is clear from the text."
)
schema_str = json.dumps(EXTRACTION_JSON_SCHEMA, indent=2)
doc_id_line = f"Document ID: {document_id}\n" if document_id else ""
user_prompt = f"""\
@@ -120,8 +117,7 @@ Extract structured intelligence from the following document.
{doc_id_line}Document type: {document_type}
{doctype_guidance}
{ticker_hint}
Your output MUST be a single JSON object conforming to this schema:
{schema_str}
Return a JSON object with: summary, companies (array with ticker, company_name, relevance, sentiment, impact_score, impact_horizon, catalyst_type, key_facts, risks, evidence_spans), macro_themes, novelty_score, confidence, extraction_warnings.
REMEMBER:
- Only extract what is explicitly in the text below.