phase 17: fix Polygon article_url and published_utc field mapping in metadata persistence
This commit is contained in:
@@ -232,7 +232,7 @@ def _extract_publisher(item: dict[str, Any]) -> str:
|
||||
|
||||
def _parse_published_at(item: dict[str, Any]) -> datetime | None:
|
||||
"""Parse published_at from various adapter item formats."""
|
||||
raw = item.get("publishedAt") or item.get("published_at")
|
||||
raw = item.get("publishedAt") or item.get("published_at") or item.get("published_utc")
|
||||
if not raw:
|
||||
return None
|
||||
if isinstance(raw, datetime):
|
||||
@@ -392,7 +392,7 @@ async def _persist_document_items(
|
||||
json.dumps(item, sort_keys=True)
|
||||
)
|
||||
title = item.get("title", item.get("name", ""))
|
||||
url = item.get("url", item.get("link", ""))
|
||||
url = item.get("url", item.get("link", item.get("article_url", "")))
|
||||
canonical_url = item.get("canonical_url") or (
|
||||
normalize_url(url) if url else None
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user