ci: fix lint errors across project, update ruff.toml per-file ignores
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from minio import Minio
|
||||
import os, json
|
||||
|
||||
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
|
||||
objs = list(mc.list_objects("stonks-raw-filings", recursive=True))
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from minio import Minio
|
||||
import os, json
|
||||
|
||||
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
|
||||
|
||||
@@ -26,7 +28,7 @@ for o in objs:
|
||||
if raw:
|
||||
print(f" output ({len(raw)} chars): {raw[:200]}")
|
||||
else:
|
||||
print(f" output: (empty)")
|
||||
print(" output: (empty)")
|
||||
|
||||
if not objs:
|
||||
print(f"No LLM result found for {target}")
|
||||
|
||||
+11
-7
@@ -1,11 +1,15 @@
|
||||
import os
|
||||
|
||||
from minio import Minio
|
||||
import os, json
|
||||
|
||||
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
|
||||
|
||||
# Check the most recent extraction - what text did the model get?
|
||||
# Look at the normalized text for a known doc
|
||||
import asyncio, asyncpg
|
||||
import asyncio
|
||||
|
||||
import asyncpg
|
||||
|
||||
|
||||
async def main():
|
||||
pool = await asyncpg.create_pool(
|
||||
@@ -15,20 +19,20 @@ async def main():
|
||||
user=os.environ["POSTGRES_USER"],
|
||||
password=os.environ["POSTGRES_PASSWORD"],
|
||||
)
|
||||
|
||||
|
||||
# Get a recently extracted doc
|
||||
row = await pool.fetchrow(
|
||||
"SELECT id, title, normalized_storage_ref, parse_quality_score "
|
||||
"FROM documents WHERE source_type = 'news_api' AND parse_quality_score > 0.8 "
|
||||
"ORDER BY updated_at DESC LIMIT 1"
|
||||
)
|
||||
|
||||
|
||||
if row:
|
||||
print(f"Doc: {row['id']}")
|
||||
print(f"Title: {row['title']}")
|
||||
print(f"Quality: {row['parse_quality_score']}")
|
||||
print(f"Ref: {row['normalized_storage_ref']}")
|
||||
|
||||
|
||||
ref = row["normalized_storage_ref"]
|
||||
parts = ref.replace("s3://", "").split("/", 1)
|
||||
if len(parts) == 2:
|
||||
@@ -37,9 +41,9 @@ async def main():
|
||||
obj.close()
|
||||
obj.release_conn()
|
||||
print(f"Text length: {len(text)} chars")
|
||||
print(f"First 500 chars:")
|
||||
print("First 500 chars:")
|
||||
print(text[:500])
|
||||
|
||||
|
||||
await pool.close()
|
||||
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from minio import Minio
|
||||
import os, json
|
||||
|
||||
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
|
||||
raw_objs = list(mc.list_objects("stonks-llm-results", recursive=True))
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from minio import Minio
|
||||
import os, json
|
||||
|
||||
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
|
||||
raw_objs = list(mc.list_objects("stonks-llm-results", recursive=True))
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from minio import Minio
|
||||
import os, json
|
||||
|
||||
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
|
||||
raw_objs = list(mc.list_objects("stonks-llm-results", recursive=True))
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import redis, os
|
||||
import os
|
||||
|
||||
import redis
|
||||
|
||||
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
|
||||
for q in ["ingestion","parsing","extraction","aggregation","recommendation","lake_publish","broker_orders"]:
|
||||
depth = r.llen(f"stonks:queue:{q}")
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from minio import Minio
|
||||
import os, json
|
||||
|
||||
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
|
||||
|
||||
@@ -15,11 +17,11 @@ for o in raw_objs[:3]:
|
||||
raw_out = last.get("raw_output", "")
|
||||
ticker = o.object_name.split("/")[1]
|
||||
doc_id = o.object_name.split("/")[-2]
|
||||
|
||||
|
||||
print(f"=== {ticker} / {doc_id[:8]} ===")
|
||||
print(f" success: {data.get('success')}")
|
||||
print(f" duration: {data.get('total_duration_ms')}ms")
|
||||
|
||||
|
||||
try:
|
||||
parsed = json.loads(raw_out)
|
||||
print(f" summary: {parsed.get('summary', '')[:120]}")
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import redis, os
|
||||
import os
|
||||
|
||||
import redis
|
||||
|
||||
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
|
||||
keys = list(r.scan_iter("stonks:dedupe:*"))
|
||||
if keys:
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import redis, os
|
||||
import os
|
||||
|
||||
import redis
|
||||
|
||||
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
|
||||
for q in ["ingestion","parsing","extraction","aggregation","recommendation","lake_publish","broker_orders"]:
|
||||
key = f"stonks:queue:{q}"
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
import asyncio, asyncpg, json, os, redis
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
import asyncpg
|
||||
import redis
|
||||
|
||||
|
||||
async def main():
|
||||
pool = await asyncpg.create_pool(
|
||||
@@ -9,19 +15,19 @@ async def main():
|
||||
password=os.environ["POSTGRES_PASSWORD"],
|
||||
)
|
||||
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
|
||||
|
||||
|
||||
rows = await pool.fetch(
|
||||
"SELECT d.id, dcm.ticker FROM documents d "
|
||||
"LEFT JOIN document_company_mentions dcm ON d.id = dcm.document_id "
|
||||
"WHERE d.status = 'parsed'"
|
||||
)
|
||||
|
||||
|
||||
for row in rows:
|
||||
r.rpush("stonks:queue:extraction", json.dumps({
|
||||
"document_id": str(row["id"]),
|
||||
"ticker": row["ticker"] or "",
|
||||
}))
|
||||
|
||||
|
||||
print(f"Enqueued {len(rows)} parsed docs for extraction")
|
||||
await pool.close()
|
||||
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
import asyncio, asyncpg, json, os, redis
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
import asyncpg
|
||||
import redis
|
||||
|
||||
|
||||
async def main():
|
||||
pool = await asyncpg.create_pool(
|
||||
@@ -9,26 +15,26 @@ async def main():
|
||||
password=os.environ["POSTGRES_PASSWORD"],
|
||||
)
|
||||
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD','')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
|
||||
|
||||
|
||||
# Reset filing docs to ingested
|
||||
await pool.execute(
|
||||
"UPDATE documents SET status = 'ingested', parse_quality_score = NULL, parse_confidence = NULL "
|
||||
"WHERE source_type = 'filings_api' AND status = 'low_quality' AND url IS NOT NULL"
|
||||
)
|
||||
|
||||
|
||||
rows = await pool.fetch(
|
||||
"SELECT d.id, dcm.ticker FROM documents d "
|
||||
"LEFT JOIN document_company_mentions dcm ON d.id = dcm.document_id "
|
||||
"WHERE d.source_type = 'filings_api' AND d.status = 'ingested' "
|
||||
"LIMIT 20" # Start with 20 to test
|
||||
)
|
||||
|
||||
|
||||
for row in rows:
|
||||
r.rpush("stonks:queue:parsing", json.dumps({
|
||||
"document_id": str(row["id"]),
|
||||
"ticker": row["ticker"] or "",
|
||||
}))
|
||||
|
||||
|
||||
print(f"Enqueued {len(rows)} filing docs for parsing (test batch)")
|
||||
await pool.close()
|
||||
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
import asyncio, asyncpg, json, os, redis
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
import asyncpg
|
||||
import redis
|
||||
|
||||
|
||||
async def main():
|
||||
pool = await asyncpg.create_pool(
|
||||
@@ -9,20 +15,20 @@ async def main():
|
||||
password=os.environ["POSTGRES_PASSWORD"],
|
||||
)
|
||||
r = redis.from_url(f"redis://:{os.environ.get('REDIS_PASSWORD', '')}@{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0")
|
||||
|
||||
|
||||
rows = await pool.fetch(
|
||||
"SELECT d.id, dcm.ticker FROM documents d "
|
||||
"LEFT JOIN document_company_mentions dcm ON d.id = dcm.document_id "
|
||||
"WHERE d.source_type = 'news_api' AND d.parse_quality_score > 0.7 "
|
||||
"ORDER BY d.parse_quality_score DESC LIMIT 5"
|
||||
)
|
||||
|
||||
|
||||
for row in rows:
|
||||
r.rpush("stonks:queue:extraction", json.dumps({
|
||||
"document_id": str(row["id"]),
|
||||
"ticker": row["ticker"] or "",
|
||||
}))
|
||||
|
||||
|
||||
print(f"Enqueued {len(rows)} high-quality docs for re-extraction")
|
||||
await pool.close()
|
||||
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
import asyncio, asyncpg, json, os, redis
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
import asyncpg
|
||||
import redis
|
||||
|
||||
|
||||
async def main():
|
||||
pool = await asyncpg.create_pool(
|
||||
|
||||
Reference in New Issue
Block a user