diff --git a/services/extractor/event_classifier.py b/services/extractor/event_classifier.py index b0db953..493bdd9 100644 --- a/services/extractor/event_classifier.py +++ b/services/extractor/event_classifier.py @@ -281,6 +281,16 @@ def _parse_classification_response( else: confidence = 0.5 + summary = str(data.get("summary", "")).strip() + key_facts = [str(f) for f in data.get("key_facts", []) if str(f).strip()] + + # Reject empty classifications — the LLM produced no useful output + if not summary and not key_facts: + raise ValueError( + f"Empty classification for document {document_id}: " + "no summary and no key facts" + ) + return GlobalEvent( event_id=str(uuid.uuid4()), event_types=_normalize_event_types(data.get("event_types", [])), @@ -288,8 +298,8 @@ def _parse_classification_response( affected_regions=[str(r) for r in data.get("affected_regions", [])], affected_sectors=[str(s) for s in data.get("affected_sectors", [])], affected_commodities=[str(c) for c in data.get("affected_commodities", [])], - summary=str(data.get("summary", "")), - key_facts=[str(f) for f in data.get("key_facts", [])], + summary=summary, + key_facts=key_facts, estimated_duration=_normalize_duration(data.get("estimated_duration", "short_term")), confidence=confidence, source_document_id=document_id,