fix: reject empty LLM classifications for global events

When the LLM returns empty summary and no key facts, raise ValueError so the retry logic kicks in instead of persisting an empty event. Also strip whitespace from summary and filter empty key_facts entries. Cleaned up 17 empty events from the database.
2026-04-15 19:46:31 +00:00
parent 326c409d63
commit 3ff910433f
1 changed files with 12 additions and 2 deletions
@@ -281,6 +281,16 @@ def _parse_classification_response(
    else:
        confidence = 0.5
    summary = str(data.get("summary", "")).strip()
    key_facts = [str(f) for f in data.get("key_facts", []) if str(f).strip()]
    # Reject empty classifications — the LLM produced no useful output
    if not summary and not key_facts:
        raise ValueError(
            f"Empty classification for document {document_id}: "
            "no summary and no key facts"
        )
    return GlobalEvent(
        event_id=str(uuid.uuid4()),
        event_types=_normalize_event_types(data.get("event_types", [])),
@@ -288,8 +298,8 @@ def _parse_classification_response(
        affected_regions=[str(r) for r in data.get("affected_regions", [])],
        affected_sectors=[str(s) for s in data.get("affected_sectors", [])],
        affected_commodities=[str(c) for c in data.get("affected_commodities", [])],
-        summary=str(data.get("summary", "")),
+        summary=summary,
-        key_facts=[str(f) for f in data.get("key_facts", [])],
+        key_facts=key_facts,
        estimated_duration=_normalize_duration(data.get("estimated_duration", "short_term")),
        confidence=confidence,
        source_document_id=document_id,