fix: catch ValueError in classification retry loop + add debug logging for raw output
This commit is contained in:
@@ -291,13 +291,30 @@ def _parse_classification_response(
|
|||||||
|
|
||||||
cleaned = _strip_markdown_fences(raw_json)
|
cleaned = _strip_markdown_fences(raw_json)
|
||||||
cleaned = _repair_json(cleaned)
|
cleaned = _repair_json(cleaned)
|
||||||
|
|
||||||
|
# DEBUG: log raw vs cleaned to diagnose persistent list issue
|
||||||
|
logger.info(
|
||||||
|
"Classification parse debug doc=%s raw_len=%d cleaned_len=%d raw_start=%s cleaned_start=%s",
|
||||||
|
document_id, len(raw_json), len(cleaned),
|
||||||
|
repr(raw_json[:300]), repr(cleaned[:300]),
|
||||||
|
)
|
||||||
|
|
||||||
data = json.loads(cleaned)
|
data = json.loads(cleaned)
|
||||||
|
|
||||||
# Model sometimes wraps the object in a list — unwrap it
|
# Model sometimes wraps the object in a single-element list — unwrap it
|
||||||
if isinstance(data, list) and len(data) == 1 and isinstance(data[0], dict):
|
if isinstance(data, list):
|
||||||
data = data[0]
|
if len(data) == 1 and isinstance(data[0], dict):
|
||||||
|
data = data[0]
|
||||||
|
elif len(data) == 0:
|
||||||
|
raise ValueError(
|
||||||
|
f"Empty list from model for document {document_id}. "
|
||||||
|
f"Raw output ({len(raw_json)} chars): {raw_json[:500]}"
|
||||||
|
)
|
||||||
if not isinstance(data, dict):
|
if not isinstance(data, dict):
|
||||||
raise ValueError(f"Expected a JSON object, got {type(data).__name__}")
|
raise ValueError(
|
||||||
|
f"Expected a JSON object, got {type(data).__name__} for document {document_id}. "
|
||||||
|
f"Raw output ({len(raw_json)} chars): {raw_json[:500]}"
|
||||||
|
)
|
||||||
|
|
||||||
confidence = data.get("confidence", 0.5)
|
confidence = data.get("confidence", 0.5)
|
||||||
if isinstance(confidence, (int, float)):
|
if isinstance(confidence, (int, float)):
|
||||||
@@ -613,7 +630,7 @@ async def classify_global_event(
|
|||||||
|
|
||||||
return event
|
return event
|
||||||
|
|
||||||
except (json.JSONDecodeError, KeyError, TypeError) as exc:
|
except (json.JSONDecodeError, KeyError, TypeError, ValueError) as exc:
|
||||||
last_error = f"parse_error: {exc}"
|
last_error = f"parse_error: {exc}"
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Classification parse error for doc %s attempt %d: %s",
|
"Classification parse error for doc %s attempt %d: %s",
|
||||||
|
|||||||
Reference in New Issue
Block a user