feat: retry failed extractions button on pipeline page

- POST /api/ops/pipeline/retry-failed endpoint resets extraction_failed
  docs to parsed, deletes failed intelligence rows, and re-enqueues
  them (batch of 200)
- Scheduler now auto-retries extraction_failed docs every ~10 minutes
  (100 per cycle, 60-min cooldown per doc)
- Pipeline page shows 'Retry Failed (N)' button when extraction_failed
  count > 0, with pending/success/error states
This commit is contained in:
Celes Renata
2026-04-20 08:09:29 +00:00
parent 5289f0f195
commit de35279269
5 changed files with 152 additions and 2 deletions
+52
View File
@@ -1869,6 +1869,58 @@ async def pipeline_stream(request: Request):
)
@app.post("/api/ops/pipeline/retry-failed")
async def retry_failed_extractions_endpoint():
"""Re-enqueue documents stuck in extraction_failed for another attempt.
Resets up to 200 extraction_failed documents back to 'parsed',
deletes their failed intelligence rows, and pushes them onto the
extraction queue. Returns the count of documents re-enqueued.
"""
rows = await pool.fetch(
"""SELECT d.id, d.document_type, dcm.ticker
FROM documents d
LEFT JOIN document_company_mentions dcm ON d.id = dcm.document_id
WHERE d.status = 'extraction_failed'
ORDER BY d.updated_at ASC
LIMIT 200""",
)
if not rows:
return {"retried": 0, "message": "No extraction-failed documents to retry"}
doc_ids = []
for row in rows:
doc_type = row["document_type"]
if doc_type == "macro_event":
target = "stonks:queue:macro_classification"
else:
target = "stonks:queue:extraction"
await rds.rpush(target, json.dumps({
"document_id": str(row["id"]),
"ticker": row["ticker"] or "",
}))
doc_ids.append(row["id"])
# Delete failed intelligence rows so extractor starts fresh
await pool.execute(
"""DELETE FROM document_intelligence
WHERE document_id = ANY($1::uuid[])
AND validation_status = 'failed'""",
doc_ids,
)
# Reset status to 'parsed' and touch updated_at
await pool.execute(
"""UPDATE documents
SET status = 'parsed', updated_at = NOW()
WHERE id = ANY($1::uuid[])""",
doc_ids,
)
return {"retried": len(doc_ids), "message": f"Re-enqueued {len(doc_ids)} documents for extraction"}
@app.get("/api/ops/sources/coverage-gaps")
async def get_source_coverage_gaps():
"""Identify symbols with missing or insufficient source coverage.