fix: track last_published_at per source to avoid re-fetching same articles — applies to both news_api and macro_news
This commit is contained in:
@@ -46,6 +46,9 @@ class MacroNewsAdapter(BaseAdapter):
|
||||
The ticker parameter is ignored for macro sources — these are
|
||||
global/geopolitical news, not company-specific.
|
||||
|
||||
Uses published_utc.gt to only fetch articles newer than the last
|
||||
successful fetch, avoiding re-fetching the same articles.
|
||||
|
||||
Args:
|
||||
ticker: Ignored for macro sources (may be empty string).
|
||||
config: Source-specific configuration with url, params, etc.
|
||||
@@ -67,6 +70,11 @@ class MacroNewsAdapter(BaseAdapter):
|
||||
limit = config.get("limit", 20)
|
||||
params["limit"] = str(min(int(limit), 1000))
|
||||
|
||||
# Use last_published_at from config to only fetch newer articles
|
||||
last_published = config.get("last_published_at")
|
||||
if last_published:
|
||||
params["published_utc.gt"] = last_published
|
||||
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
|
||||
@@ -128,6 +128,10 @@ class PolygonNewsAdapter(NewsDataAdapter):
|
||||
if config.get("published_utc_lte"):
|
||||
params["published_utc.lte"] = config["published_utc_lte"]
|
||||
|
||||
# Auto-filter to only fetch articles newer than last successful fetch
|
||||
if config.get("last_published_at") and "published_utc.gt" not in params:
|
||||
params["published_utc.gt"] = config["last_published_at"]
|
||||
|
||||
url = f"{self.base_url}{self.NEWS_ENDPOINT}"
|
||||
return url, params
|
||||
|
||||
|
||||
Reference in New Issue
Block a user