phase 17: fix Polygon article_url and published_utc field mapping in metadata persistence

This commit is contained in:
Celes Renata
2026-04-12 02:58:30 -07:00
parent 0ac4493bd4
commit 264b83ea56
2 changed files with 5 additions and 5 deletions
+2 -2
View File
@@ -232,7 +232,7 @@ def _extract_publisher(item: dict[str, Any]) -> str:
def _parse_published_at(item: dict[str, Any]) -> datetime | None:
"""Parse published_at from various adapter item formats."""
raw = item.get("publishedAt") or item.get("published_at")
raw = item.get("publishedAt") or item.get("published_at") or item.get("published_utc")
if not raw:
return None
if isinstance(raw, datetime):
@@ -392,7 +392,7 @@ async def _persist_document_items(
json.dumps(item, sort_keys=True)
)
title = item.get("title", item.get("name", ""))
url = item.get("url", item.get("link", ""))
url = item.get("url", item.get("link", item.get("article_url", "")))
canonical_url = item.get("canonical_url") or (
normalize_url(url) if url else None
)