fix: track last_published_at per source to avoid re-fetching same articles — applies to both news_api and macro_news

This commit is contained in:
Celes Renata
2026-04-16 18:12:12 +00:00
parent 513310abba
commit 1043710b6d
3 changed files with 29 additions and 0 deletions
+17
View File
@@ -195,6 +195,23 @@ async def process_job(
extra={"ticker": ticker, "source_type": source_type, "count": new_items},
)
# Track the latest published_utc so next fetch only gets newer articles
if source_type in ("macro_news", "news_api") and result.items:
latest_pub = None
for item in result.items:
pub = item.get("published_utc")
if pub and (latest_pub is None or pub > latest_pub):
latest_pub = pub
if latest_pub:
try:
await pool.execute(
"UPDATE sources SET config = config || $1::jsonb WHERE id = $2",
json.dumps({"last_published_at": latest_pub}),
source_id,
)
except Exception:
pass # Non-critical
except Exception as e:
INGESTION_ERRORS.labels(source_type=source_type).inc()
INGESTION_JOBS_TOTAL.labels(source_type=source_type, status="error").inc()