From 0ac4493bd44dfca34d951a3773bf5bbe90021e9e Mon Sep 17 00:00:00 2001
From: Celes Renata <celes@frameshift.net>
Date: Sun, 12 Apr 2026 02:54:23 -0700
Subject: [PATCH] phase 17: fix parser URL lookup from DB and extractor text
 field name mismatch

---
 services/extractor/main.py | 2 +-
 services/parser/worker.py  | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/services/extractor/main.py b/services/extractor/main.py
index 20634d5..eb2d1b3 100644
--- a/services/extractor/main.py
+++ b/services/extractor/main.py
@@ -48,7 +48,7 @@ async def main() -> None:
             job = json.loads(payload)
             document_id = job.get("document_id", "")
             ticker = job.get("ticker", "")
-            text = job.get("text", "")
+            text = job.get("text", "") or job.get("normalized_text", "")
 
             logger.info("Processing extraction job for doc %s / %s", document_id, ticker)
 
diff --git a/services/parser/worker.py b/services/parser/worker.py
index 5547947..78083c3 100644
--- a/services/parser/worker.py
+++ b/services/parser/worker.py
@@ -96,6 +96,14 @@ async def process_job(
 
     set_trace_context(trace_id=job.get("_trace_id") or new_trace_id())
 
+    # If no URL in job, look it up from the documents table
+    if not url:
+        row = await pool.fetchrow(
+            "SELECT url FROM documents WHERE id = $1::uuid", doc_id,
+        )
+        if row and row["url"]:
+            url = row["url"]
+
     # Fetch HTML if we have a URL
     html = await fetch_html(url) if url else None