From 7071bba92dad8b6a73a0d109719259a01ab7ad8e Mon Sep 17 00:00:00 2001
From: Celes Renata <celes@frameshift.net>
Date: Mon, 20 Apr 2026 18:05:30 +0000
Subject: [PATCH] fix: increase stale threshold to 4h to prevent duplicate
 enqueuing

The 30-minute threshold was shorter than the queue drain time, causing
the recovery sweep to re-enqueue docs that were already queued but not
yet processed. Bumped to 4 hours with matching marker TTL.
---
 services/scheduler/app.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/services/scheduler/app.py b/services/scheduler/app.py
index 67d3b79..906705e 100644
--- a/services/scheduler/app.py
+++ b/services/scheduler/app.py
@@ -534,7 +534,9 @@ async def main() -> None:
 
 
 # How long a document can sit in "parsed" before we consider it orphaned
-STALE_PARSED_THRESHOLD_MINUTES: int = 30
+# Must be longer than the expected queue drain time to avoid re-enqueuing
+# docs that are already queued but not yet processed.
+STALE_PARSED_THRESHOLD_MINUTES: int = 240
 
 # How long after an extraction failure before we retry
 EXTRACTION_FAILED_RETRY_MINUTES: int = 60
@@ -542,7 +544,7 @@ EXTRACTION_FAILED_RETRY_MINUTES: int = 60
 # Redis set key for tracking enqueued doc IDs (prevents duplicate enqueuing)
 _ENQUEUED_SET = f"{QUEUE_PREFIX}:enqueued"
 # How long an enqueued marker lives before it can be re-enqueued (seconds)
-_ENQUEUED_TTL = 3600
+_ENQUEUED_TTL = 14400  # 4 hours — matches STALE_PARSED_THRESHOLD_MINUTES
 
 
 async def _enqueue_if_new(