ci: fix lint errors across project, update ruff.toml per-file ignores

This commit is contained in:
Celes Renata
2026-04-18 21:02:28 +00:00
parent 4d1894c652
commit 5f6d23888a
34 changed files with 1441 additions and 188 deletions
+3 -1
View File
@@ -1,5 +1,7 @@
import json
import os
from minio import Minio
import os, json
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
objs = list(mc.list_objects("stonks-raw-filings", recursive=True))
+4 -2
View File
@@ -1,5 +1,7 @@
import json
import os
from minio import Minio
import os, json
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
@@ -26,7 +28,7 @@ for o in objs:
if raw:
print(f" output ({len(raw)} chars): {raw[:200]}")
else:
print(f" output: (empty)")
print(" output: (empty)")
if not objs:
print(f"No LLM result found for {target}")
+11 -7
View File
@@ -1,11 +1,15 @@
import os
from minio import Minio
import os, json
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
# Check the most recent extraction - what text did the model get?
# Look at the normalized text for a known doc
import asyncio, asyncpg
import asyncio
import asyncpg
async def main():
pool = await asyncpg.create_pool(
@@ -15,20 +19,20 @@ async def main():
user=os.environ["POSTGRES_USER"],
password=os.environ["POSTGRES_PASSWORD"],
)
# Get a recently extracted doc
row = await pool.fetchrow(
"SELECT id, title, normalized_storage_ref, parse_quality_score "
"FROM documents WHERE source_type = 'news_api' AND parse_quality_score > 0.8 "
"ORDER BY updated_at DESC LIMIT 1"
)
if row:
print(f"Doc: {row['id']}")
print(f"Title: {row['title']}")
print(f"Quality: {row['parse_quality_score']}")
print(f"Ref: {row['normalized_storage_ref']}")
ref = row["normalized_storage_ref"]
parts = ref.replace("s3://", "").split("/", 1)
if len(parts) == 2:
@@ -37,9 +41,9 @@ async def main():
obj.close()
obj.release_conn()
print(f"Text length: {len(text)} chars")
print(f"First 500 chars:")
print("First 500 chars:")
print(text[:500])
await pool.close()
asyncio.run(main())
+3 -1
View File
@@ -1,5 +1,7 @@
import json
import os
from minio import Minio
import os, json
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
raw_objs = list(mc.list_objects("stonks-llm-results", recursive=True))
+3 -1
View File
@@ -1,5 +1,7 @@
import json
import os
from minio import Minio
import os, json
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
raw_objs = list(mc.list_objects("stonks-llm-results", recursive=True))
+3 -1
View File
@@ -1,5 +1,7 @@
import json
import os
from minio import Minio
import os, json
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
raw_objs = list(mc.list_objects("stonks-llm-results", recursive=True))
+4 -1
View File
@@ -1,4 +1,7 @@
import redis, os
import os
import redis
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
for q in ["ingestion","parsing","extraction","aggregation","recommendation","lake_publish","broker_orders"]:
depth = r.llen(f"stonks:queue:{q}")
+5 -3
View File
@@ -1,5 +1,7 @@
import json
import os
from minio import Minio
import os, json
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
@@ -15,11 +17,11 @@ for o in raw_objs[:3]:
raw_out = last.get("raw_output", "")
ticker = o.object_name.split("/")[1]
doc_id = o.object_name.split("/")[-2]
print(f"=== {ticker} / {doc_id[:8]} ===")
print(f" success: {data.get('success')}")
print(f" duration: {data.get('total_duration_ms')}ms")
try:
parsed = json.loads(raw_out)
print(f" summary: {parsed.get('summary', '')[:120]}")
+4 -1
View File
@@ -1,4 +1,7 @@
import redis, os
import os
import redis
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
keys = list(r.scan_iter("stonks:dedupe:*"))
if keys:
+4 -1
View File
@@ -1,4 +1,7 @@
import redis, os
import os
import redis
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
for q in ["ingestion","parsing","extraction","aggregation","recommendation","lake_publish","broker_orders"]:
key = f"stonks:queue:{q}"
+10 -4
View File
@@ -1,4 +1,10 @@
import asyncio, asyncpg, json, os, redis
import asyncio
import json
import os
import asyncpg
import redis
async def main():
pool = await asyncpg.create_pool(
@@ -9,19 +15,19 @@ async def main():
password=os.environ["POSTGRES_PASSWORD"],
)
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
rows = await pool.fetch(
"SELECT d.id, dcm.ticker FROM documents d "
"LEFT JOIN document_company_mentions dcm ON d.id = dcm.document_id "
"WHERE d.status = 'parsed'"
)
for row in rows:
r.rpush("stonks:queue:extraction", json.dumps({
"document_id": str(row["id"]),
"ticker": row["ticker"] or "",
}))
print(f"Enqueued {len(rows)} parsed docs for extraction")
await pool.close()
+11 -5
View File
@@ -1,4 +1,10 @@
import asyncio, asyncpg, json, os, redis
import asyncio
import json
import os
import asyncpg
import redis
async def main():
pool = await asyncpg.create_pool(
@@ -9,26 +15,26 @@ async def main():
password=os.environ["POSTGRES_PASSWORD"],
)
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
# Reset filing docs to ingested
await pool.execute(
"UPDATE documents SET status = 'ingested', parse_quality_score = NULL, parse_confidence = NULL "
"WHERE source_type = 'filings_api' AND status = 'low_quality' AND url IS NOT NULL"
)
rows = await pool.fetch(
"SELECT d.id, dcm.ticker FROM documents d "
"LEFT JOIN document_company_mentions dcm ON d.id = dcm.document_id "
"WHERE d.source_type = 'filings_api' AND d.status = 'ingested' "
"LIMIT 20" # Start with 20 to test
)
for row in rows:
r.rpush("stonks:queue:parsing", json.dumps({
"document_id": str(row["id"]),
"ticker": row["ticker"] or "",
}))
print(f"Enqueued {len(rows)} filing docs for parsing (test batch)")
await pool.close()
+10 -4
View File
@@ -1,4 +1,10 @@
import asyncio, asyncpg, json, os, redis
import asyncio
import json
import os
import asyncpg
import redis
async def main():
pool = await asyncpg.create_pool(
@@ -9,20 +15,20 @@ async def main():
password=os.environ["POSTGRES_PASSWORD"],
)
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD', '')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
rows = await pool.fetch(
"SELECT d.id, dcm.ticker FROM documents d "
"LEFT JOIN document_company_mentions dcm ON d.id = dcm.document_id "
"WHERE d.source_type = 'news_api' AND d.parse_quality_score > 0.7 "
"ORDER BY d.parse_quality_score DESC LIMIT 5"
)
for row in rows:
r.rpush("stonks:queue:extraction", json.dumps({
"document_id": str(row["id"]),
"ticker": row["ticker"] or "",
}))
print(f"Enqueued {len(rows)} high-quality docs for re-extraction")
await pool.close()
+7 -1
View File
@@ -1,4 +1,10 @@
import asyncio, asyncpg, json, os, redis
import asyncio
import json
import os
import asyncpg
import redis
async def main():
pool = await asyncpg.create_pool(