diff --git a/services/scheduler/app.py b/services/scheduler/app.py index 67d3b79..906705e 100644 --- a/services/scheduler/app.py +++ b/services/scheduler/app.py @@ -534,7 +534,9 @@ async def main() -> None: # How long a document can sit in "parsed" before we consider it orphaned -STALE_PARSED_THRESHOLD_MINUTES: int = 30 +# Must be longer than the expected queue drain time to avoid re-enqueuing +# docs that are already queued but not yet processed. +STALE_PARSED_THRESHOLD_MINUTES: int = 240 # How long after an extraction failure before we retry EXTRACTION_FAILED_RETRY_MINUTES: int = 60 @@ -542,7 +544,7 @@ EXTRACTION_FAILED_RETRY_MINUTES: int = 60 # Redis set key for tracking enqueued doc IDs (prevents duplicate enqueuing) _ENQUEUED_SET = f"{QUEUE_PREFIX}:enqueued" # How long an enqueued marker lives before it can be re-enqueued (seconds) -_ENQUEUED_TTL = 3600 +_ENQUEUED_TTL = 14400 # 4 hours — matches STALE_PARSED_THRESHOLD_MINUTES async def _enqueue_if_new(