phase 17: fix parser URL lookup from DB and extractor text field name mismatch
This commit is contained in:
@@ -96,6 +96,14 @@ async def process_job(
|
||||
|
||||
set_trace_context(trace_id=job.get("_trace_id") or new_trace_id())
|
||||
|
||||
# If no URL in job, look it up from the documents table
|
||||
if not url:
|
||||
row = await pool.fetchrow(
|
||||
"SELECT url FROM documents WHERE id = $1::uuid", doc_id,
|
||||
)
|
||||
if row and row["url"]:
|
||||
url = row["url"]
|
||||
|
||||
# Fetch HTML if we have a URL
|
||||
html = await fetch_html(url) if url else None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user