fix: catch ValueError in classification retry loop + add debug logging for raw output

This commit is contained in:
Celes Renata
2026-04-17 17:00:32 +00:00
parent f054e97b5b
commit 523d3ea749
+21 -4
View File
@@ -291,13 +291,30 @@ def _parse_classification_response(
cleaned = _strip_markdown_fences(raw_json)
cleaned = _repair_json(cleaned)
# DEBUG: log raw vs cleaned to diagnose persistent list issue
logger.info(
"Classification parse debug doc=%s raw_len=%d cleaned_len=%d raw_start=%s cleaned_start=%s",
document_id, len(raw_json), len(cleaned),
repr(raw_json[:300]), repr(cleaned[:300]),
)
data = json.loads(cleaned)
# Model sometimes wraps the object in a list — unwrap it
if isinstance(data, list) and len(data) == 1 and isinstance(data[0], dict):
# Model sometimes wraps the object in a single-element list — unwrap it
if isinstance(data, list):
if len(data) == 1 and isinstance(data[0], dict):
data = data[0]
elif len(data) == 0:
raise ValueError(
f"Empty list from model for document {document_id}. "
f"Raw output ({len(raw_json)} chars): {raw_json[:500]}"
)
if not isinstance(data, dict):
raise ValueError(f"Expected a JSON object, got {type(data).__name__}")
raise ValueError(
f"Expected a JSON object, got {type(data).__name__} for document {document_id}. "
f"Raw output ({len(raw_json)} chars): {raw_json[:500]}"
)
confidence = data.get("confidence", 0.5)
if isinstance(confidence, (int, float)):
@@ -613,7 +630,7 @@ async def classify_global_event(
return event
except (json.JSONDecodeError, KeyError, TypeError) as exc:
except (json.JSONDecodeError, KeyError, TypeError, ValueError) as exc:
last_error = f"parse_error: {exc}"
logger.warning(
"Classification parse error for doc %s attempt %d: %s",