phase 17: fix Polygon article_url and published_utc field mapping in metadata persistence
This commit is contained in:
@@ -182,7 +182,7 @@
|
|||||||
- Verify companies and sources appear in the dashboard and via `curl https://stonks-registry.celestium.life/companies`
|
- Verify companies and sources appear in the dashboard and via `curl https://stonks-registry.celestium.life/companies`
|
||||||
- _Requirements: 1.1, 1.2, 1.3, 2.1_
|
- _Requirements: 1.1, 1.2, 1.3, 2.1_
|
||||||
|
|
||||||
- [-] 17.2 Wire the scheduler to enqueue ingestion jobs for active sources
|
- [x] 17.2 Wire the scheduler to enqueue ingestion jobs for active sources
|
||||||
- Verify the scheduler service reads active companies and sources from PostgreSQL
|
- Verify the scheduler service reads active companies and sources from PostgreSQL
|
||||||
- Verify it enqueues Redis jobs for each source on its polling interval
|
- Verify it enqueues Redis jobs for each source on its polling interval
|
||||||
- Check scheduler logs: `kubectl logs -n stonks-oracle deployment/scheduler --tail=50`
|
- Check scheduler logs: `kubectl logs -n stonks-oracle deployment/scheduler --tail=50`
|
||||||
@@ -190,7 +190,7 @@
|
|||||||
- Fix any issues with the scheduler → source → Redis queue flow
|
- Fix any issues with the scheduler → source → Redis queue flow
|
||||||
- _Requirements: 3.1, 3.2_
|
- _Requirements: 3.1, 3.2_
|
||||||
|
|
||||||
- [ ] 17.3 Validate ingestion workers pull data from Polygon and persist to MinIO/PostgreSQL
|
- [x] 17.3 Validate ingestion workers pull data from Polygon and persist to MinIO/PostgreSQL
|
||||||
- Check ingestion worker logs for successful API calls to Polygon
|
- Check ingestion worker logs for successful API calls to Polygon
|
||||||
- Verify raw market data artifacts land in MinIO `stonks-raw-market` bucket
|
- Verify raw market data artifacts land in MinIO `stonks-raw-market` bucket
|
||||||
- Verify document metadata rows appear in PostgreSQL `documents` table
|
- Verify document metadata rows appear in PostgreSQL `documents` table
|
||||||
@@ -198,7 +198,7 @@
|
|||||||
- Debug and fix any adapter errors (auth, rate limits, response parsing)
|
- Debug and fix any adapter errors (auth, rate limits, response parsing)
|
||||||
- _Requirements: 4.1, 4.2, 4.3_
|
- _Requirements: 4.1, 4.2, 4.3_
|
||||||
|
|
||||||
- [ ] 17.4 Validate parser normalizes documents and extractor produces intelligence
|
- [-] 17.4 Validate parser normalizes documents and extractor produces intelligence
|
||||||
- Check parser worker logs for document processing
|
- Check parser worker logs for document processing
|
||||||
- Verify normalized text appears in MinIO `stonks-normalized` bucket
|
- Verify normalized text appears in MinIO `stonks-normalized` bucket
|
||||||
- Verify `parse_quality_score` and `parse_confidence` are set on documents
|
- Verify `parse_quality_score` and `parse_confidence` are set on documents
|
||||||
|
|||||||
@@ -232,7 +232,7 @@ def _extract_publisher(item: dict[str, Any]) -> str:
|
|||||||
|
|
||||||
def _parse_published_at(item: dict[str, Any]) -> datetime | None:
|
def _parse_published_at(item: dict[str, Any]) -> datetime | None:
|
||||||
"""Parse published_at from various adapter item formats."""
|
"""Parse published_at from various adapter item formats."""
|
||||||
raw = item.get("publishedAt") or item.get("published_at")
|
raw = item.get("publishedAt") or item.get("published_at") or item.get("published_utc")
|
||||||
if not raw:
|
if not raw:
|
||||||
return None
|
return None
|
||||||
if isinstance(raw, datetime):
|
if isinstance(raw, datetime):
|
||||||
@@ -392,7 +392,7 @@ async def _persist_document_items(
|
|||||||
json.dumps(item, sort_keys=True)
|
json.dumps(item, sort_keys=True)
|
||||||
)
|
)
|
||||||
title = item.get("title", item.get("name", ""))
|
title = item.get("title", item.get("name", ""))
|
||||||
url = item.get("url", item.get("link", ""))
|
url = item.get("url", item.get("link", item.get("article_url", "")))
|
||||||
canonical_url = item.get("canonical_url") or (
|
canonical_url = item.get("canonical_url") or (
|
||||||
normalize_url(url) if url else None
|
normalize_url(url) if url else None
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user