From 264b83ea56f909b2665c78e057d613002ecd79ad Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Sun, 12 Apr 2026 02:58:30 -0700 Subject: [PATCH] phase 17: fix Polygon article_url and published_utc field mapping in metadata persistence --- .kiro/specs/stonks-oracle/tasks.md | 6 +++--- services/shared/metadata.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.kiro/specs/stonks-oracle/tasks.md b/.kiro/specs/stonks-oracle/tasks.md index 9dafa18..2df35e1 100644 --- a/.kiro/specs/stonks-oracle/tasks.md +++ b/.kiro/specs/stonks-oracle/tasks.md @@ -182,7 +182,7 @@ - Verify companies and sources appear in the dashboard and via `curl https://stonks-registry.celestium.life/companies` - _Requirements: 1.1, 1.2, 1.3, 2.1_ -- [-] 17.2 Wire the scheduler to enqueue ingestion jobs for active sources +- [x] 17.2 Wire the scheduler to enqueue ingestion jobs for active sources - Verify the scheduler service reads active companies and sources from PostgreSQL - Verify it enqueues Redis jobs for each source on its polling interval - Check scheduler logs: `kubectl logs -n stonks-oracle deployment/scheduler --tail=50` @@ -190,7 +190,7 @@ - Fix any issues with the scheduler → source → Redis queue flow - _Requirements: 3.1, 3.2_ -- [ ] 17.3 Validate ingestion workers pull data from Polygon and persist to MinIO/PostgreSQL +- [x] 17.3 Validate ingestion workers pull data from Polygon and persist to MinIO/PostgreSQL - Check ingestion worker logs for successful API calls to Polygon - Verify raw market data artifacts land in MinIO `stonks-raw-market` bucket - Verify document metadata rows appear in PostgreSQL `documents` table @@ -198,7 +198,7 @@ - Debug and fix any adapter errors (auth, rate limits, response parsing) - _Requirements: 4.1, 4.2, 4.3_ -- [ ] 17.4 Validate parser normalizes documents and extractor produces intelligence +- [-] 17.4 Validate parser normalizes documents and extractor produces intelligence - Check parser worker logs for document processing - Verify normalized text appears in MinIO `stonks-normalized` bucket - Verify `parse_quality_score` and `parse_confidence` are set on documents diff --git a/services/shared/metadata.py b/services/shared/metadata.py index 564a418..79f9c78 100644 --- a/services/shared/metadata.py +++ b/services/shared/metadata.py @@ -232,7 +232,7 @@ def _extract_publisher(item: dict[str, Any]) -> str: def _parse_published_at(item: dict[str, Any]) -> datetime | None: """Parse published_at from various adapter item formats.""" - raw = item.get("publishedAt") or item.get("published_at") + raw = item.get("publishedAt") or item.get("published_at") or item.get("published_utc") if not raw: return None if isinstance(raw, datetime): @@ -392,7 +392,7 @@ async def _persist_document_items( json.dumps(item, sort_keys=True) ) title = item.get("title", item.get("name", "")) - url = item.get("url", item.get("link", "")) + url = item.get("url", item.get("link", item.get("article_url", ""))) canonical_url = item.get("canonical_url") or ( normalize_url(url) if url else None )