phase 17: fix SEC EDGAR 403 — use descriptive User-Agent with contact email per fair access policy
This commit is contained in:
@@ -45,9 +45,14 @@ async def fetch_html(url: str) -> Optional[str]:
|
||||
"""Fetch article HTML for scraping."""
|
||||
if not url:
|
||||
return None
|
||||
# SEC EDGAR requires a descriptive User-Agent with contact email per fair access policy
|
||||
if "sec.gov" in url:
|
||||
ua = "StonksOracle/1.0 (stonks-oracle-bot; contact@celestium.life)"
|
||||
else:
|
||||
ua = "StonksOracle/1.0"
|
||||
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
|
||||
try:
|
||||
resp = await client.get(url, headers={"User-Agent": "StonksOracle/1.0"})
|
||||
resp = await client.get(url, headers={"User-Agent": ua, "Accept-Encoding": "gzip, deflate"})
|
||||
resp.raise_for_status()
|
||||
return resp.text
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user