fix: prevent duplicate queue entries with Redis SET markers
Recovery sweeps and the retry endpoint now check a per-document Redis key (SET NX, 1h TTL) before pushing to the queue. If the marker exists, the doc is already enqueued and gets skipped. This prevents the scheduler from re-enqueuing the same parsed docs every 5 minutes.
This commit is contained in:
+9
-4
@@ -1891,6 +1891,7 @@ async def retry_failed_extractions_endpoint():
|
||||
return {"retried": 0, "message": "No extraction-failed documents to retry"}
|
||||
|
||||
doc_ids = []
|
||||
enqueued_set_prefix = f"{QUEUE_PREFIX}:enqueued"
|
||||
for row in rows:
|
||||
doc_type = row["document_type"]
|
||||
if doc_type == "macro_event":
|
||||
@@ -1898,10 +1899,14 @@ async def retry_failed_extractions_endpoint():
|
||||
else:
|
||||
target = queue_key("extraction")
|
||||
|
||||
await rds.rpush(target, json.dumps({
|
||||
"document_id": str(row["id"]),
|
||||
"ticker": row["ticker"] or "",
|
||||
}))
|
||||
doc_id = str(row["id"])
|
||||
marker = f"{enqueued_set_prefix}:{doc_id}"
|
||||
added = await rds.set(marker, "1", nx=True, ex=3600)
|
||||
if added:
|
||||
await rds.rpush(target, json.dumps({
|
||||
"document_id": doc_id,
|
||||
"ticker": row["ticker"] or "",
|
||||
}))
|
||||
doc_ids.append(row["id"])
|
||||
|
||||
# Delete failed intelligence rows so extractor starts fresh
|
||||
|
||||
Reference in New Issue
Block a user