diff --git a/services/scheduler/app.py b/services/scheduler/app.py index 539d3f0..63c485b 100644 --- a/services/scheduler/app.py +++ b/services/scheduler/app.py @@ -522,6 +522,9 @@ async def recover_stale_documents(pool: asyncpg.Pool, rds: aioredis.Redis) -> in LEFT JOIN document_company_mentions dcm ON d.id = dcm.document_id WHERE d.status = 'parsed' AND d.updated_at < NOW() - INTERVAL '1 minute' * $1 + AND NOT EXISTS ( + SELECT 1 FROM global_events ge WHERE ge.source_document_id = d.id + ) ORDER BY d.created_at ASC LIMIT 100""", STALE_PARSED_THRESHOLD_MINUTES,