phase 17: fix SEC EDGAR 403 — use descriptive User-Agent with contact email per fair access policy

This commit is contained in:
Celes Renata
2026-04-12 09:50:29 -07:00
parent 311d76dc0b
commit 2e42310f07
3 changed files with 110 additions and 1 deletions
+6 -1
View File
@@ -45,9 +45,14 @@ async def fetch_html(url: str) -> Optional[str]:
"""Fetch article HTML for scraping."""
if not url:
return None
# SEC EDGAR requires a descriptive User-Agent with contact email per fair access policy
if "sec.gov" in url:
ua = "StonksOracle/1.0 (stonks-oracle-bot; contact@celestium.life)"
else:
ua = "StonksOracle/1.0"
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
try:
resp = await client.get(url, headers={"User-Agent": "StonksOracle/1.0"})
resp = await client.get(url, headers={"User-Agent": ua, "Accept-Encoding": "gzip, deflate"})
resp.raise_for_status()
return resp.text
except Exception as e: