fix: reject empty LLM classifications for global events
When the LLM returns empty summary and no key facts, raise ValueError so the retry logic kicks in instead of persisting an empty event. Also strip whitespace from summary and filter empty key_facts entries. Cleaned up 17 empty events from the database.
This commit is contained in:
@@ -281,6 +281,16 @@ def _parse_classification_response(
|
|||||||
else:
|
else:
|
||||||
confidence = 0.5
|
confidence = 0.5
|
||||||
|
|
||||||
|
summary = str(data.get("summary", "")).strip()
|
||||||
|
key_facts = [str(f) for f in data.get("key_facts", []) if str(f).strip()]
|
||||||
|
|
||||||
|
# Reject empty classifications — the LLM produced no useful output
|
||||||
|
if not summary and not key_facts:
|
||||||
|
raise ValueError(
|
||||||
|
f"Empty classification for document {document_id}: "
|
||||||
|
"no summary and no key facts"
|
||||||
|
)
|
||||||
|
|
||||||
return GlobalEvent(
|
return GlobalEvent(
|
||||||
event_id=str(uuid.uuid4()),
|
event_id=str(uuid.uuid4()),
|
||||||
event_types=_normalize_event_types(data.get("event_types", [])),
|
event_types=_normalize_event_types(data.get("event_types", [])),
|
||||||
@@ -288,8 +298,8 @@ def _parse_classification_response(
|
|||||||
affected_regions=[str(r) for r in data.get("affected_regions", [])],
|
affected_regions=[str(r) for r in data.get("affected_regions", [])],
|
||||||
affected_sectors=[str(s) for s in data.get("affected_sectors", [])],
|
affected_sectors=[str(s) for s in data.get("affected_sectors", [])],
|
||||||
affected_commodities=[str(c) for c in data.get("affected_commodities", [])],
|
affected_commodities=[str(c) for c in data.get("affected_commodities", [])],
|
||||||
summary=str(data.get("summary", "")),
|
summary=summary,
|
||||||
key_facts=[str(f) for f in data.get("key_facts", [])],
|
key_facts=key_facts,
|
||||||
estimated_duration=_normalize_duration(data.get("estimated_duration", "short_term")),
|
estimated_duration=_normalize_duration(data.get("estimated_duration", "short_term")),
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
source_document_id=document_id,
|
source_document_id=document_id,
|
||||||
|
|||||||
Reference in New Issue
Block a user