feat: retry failed extractions button on pipeline page
- POST /api/ops/pipeline/retry-failed endpoint resets extraction_failed docs to parsed, deletes failed intelligence rows, and re-enqueues them (batch of 200) - Scheduler now auto-retries extraction_failed docs every ~10 minutes (100 per cycle, 60-min cooldown per doc) - Pipeline page shows 'Retry Failed (N)' button when extraction_failed count > 0, with pending/success/error states
This commit is contained in:
@@ -1869,6 +1869,58 @@ async def pipeline_stream(request: Request):
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/ops/pipeline/retry-failed")
|
||||
async def retry_failed_extractions_endpoint():
|
||||
"""Re-enqueue documents stuck in extraction_failed for another attempt.
|
||||
|
||||
Resets up to 200 extraction_failed documents back to 'parsed',
|
||||
deletes their failed intelligence rows, and pushes them onto the
|
||||
extraction queue. Returns the count of documents re-enqueued.
|
||||
"""
|
||||
rows = await pool.fetch(
|
||||
"""SELECT d.id, d.document_type, dcm.ticker
|
||||
FROM documents d
|
||||
LEFT JOIN document_company_mentions dcm ON d.id = dcm.document_id
|
||||
WHERE d.status = 'extraction_failed'
|
||||
ORDER BY d.updated_at ASC
|
||||
LIMIT 200""",
|
||||
)
|
||||
|
||||
if not rows:
|
||||
return {"retried": 0, "message": "No extraction-failed documents to retry"}
|
||||
|
||||
doc_ids = []
|
||||
for row in rows:
|
||||
doc_type = row["document_type"]
|
||||
if doc_type == "macro_event":
|
||||
target = "stonks:queue:macro_classification"
|
||||
else:
|
||||
target = "stonks:queue:extraction"
|
||||
|
||||
await rds.rpush(target, json.dumps({
|
||||
"document_id": str(row["id"]),
|
||||
"ticker": row["ticker"] or "",
|
||||
}))
|
||||
doc_ids.append(row["id"])
|
||||
|
||||
# Delete failed intelligence rows so extractor starts fresh
|
||||
await pool.execute(
|
||||
"""DELETE FROM document_intelligence
|
||||
WHERE document_id = ANY($1::uuid[])
|
||||
AND validation_status = 'failed'""",
|
||||
doc_ids,
|
||||
)
|
||||
# Reset status to 'parsed' and touch updated_at
|
||||
await pool.execute(
|
||||
"""UPDATE documents
|
||||
SET status = 'parsed', updated_at = NOW()
|
||||
WHERE id = ANY($1::uuid[])""",
|
||||
doc_ids,
|
||||
)
|
||||
|
||||
return {"retried": len(doc_ids), "message": f"Re-enqueued {len(doc_ids)} documents for extraction"}
|
||||
|
||||
|
||||
@app.get("/api/ops/sources/coverage-gaps")
|
||||
async def get_source_coverage_gaps():
|
||||
"""Identify symbols with missing or insufficient source coverage.
|
||||
|
||||
Reference in New Issue
Block a user