fix: reject empty LLM classifications for global events

When the LLM returns empty summary and no key facts, raise ValueError
so the retry logic kicks in instead of persisting an empty event.
Also strip whitespace from summary and filter empty key_facts entries.

Cleaned up 17 empty events from the database.
This commit is contained in:
Celes Renata
2026-04-15 19:46:31 +00:00
parent 326c409d63
commit 3ff910433f
+12 -2
View File
@@ -281,6 +281,16 @@ def _parse_classification_response(
else: else:
confidence = 0.5 confidence = 0.5
summary = str(data.get("summary", "")).strip()
key_facts = [str(f) for f in data.get("key_facts", []) if str(f).strip()]
# Reject empty classifications — the LLM produced no useful output
if not summary and not key_facts:
raise ValueError(
f"Empty classification for document {document_id}: "
"no summary and no key facts"
)
return GlobalEvent( return GlobalEvent(
event_id=str(uuid.uuid4()), event_id=str(uuid.uuid4()),
event_types=_normalize_event_types(data.get("event_types", [])), event_types=_normalize_event_types(data.get("event_types", [])),
@@ -288,8 +298,8 @@ def _parse_classification_response(
affected_regions=[str(r) for r in data.get("affected_regions", [])], affected_regions=[str(r) for r in data.get("affected_regions", [])],
affected_sectors=[str(s) for s in data.get("affected_sectors", [])], affected_sectors=[str(s) for s in data.get("affected_sectors", [])],
affected_commodities=[str(c) for c in data.get("affected_commodities", [])], affected_commodities=[str(c) for c in data.get("affected_commodities", [])],
summary=str(data.get("summary", "")), summary=summary,
key_facts=[str(f) for f in data.get("key_facts", [])], key_facts=key_facts,
estimated_duration=_normalize_duration(data.get("estimated_duration", "short_term")), estimated_duration=_normalize_duration(data.get("estimated_duration", "short_term")),
confidence=confidence, confidence=confidence,
source_document_id=document_id, source_document_id=document_id,