ci: fix lint errors across project, update ruff.toml per-file ignores

This commit is contained in:
Celes Renata
2026-04-18 21:02:28 +00:00
parent 4d1894c652
commit 5f6d23888a
34 changed files with 1441 additions and 188 deletions
+11 -7
View File
@@ -1,11 +1,15 @@
import os
from minio import Minio
import os, json
mc = Minio(os.environ["MINIO_ENDPOINT"], access_key=os.environ["MINIO_ACCESS_KEY"], secret_key=os.environ["MINIO_SECRET_KEY"], secure=False)
# Check the most recent extraction - what text did the model get?
# Look at the normalized text for a known doc
import asyncio, asyncpg
import asyncio
import asyncpg
async def main():
pool = await asyncpg.create_pool(
@@ -15,20 +19,20 @@ async def main():
user=os.environ["POSTGRES_USER"],
password=os.environ["POSTGRES_PASSWORD"],
)
# Get a recently extracted doc
row = await pool.fetchrow(
"SELECT id, title, normalized_storage_ref, parse_quality_score "
"FROM documents WHERE source_type = 'news_api' AND parse_quality_score > 0.8 "
"ORDER BY updated_at DESC LIMIT 1"
)
if row:
print(f"Doc: {row['id']}")
print(f"Title: {row['title']}")
print(f"Quality: {row['parse_quality_score']}")
print(f"Ref: {row['normalized_storage_ref']}")
ref = row["normalized_storage_ref"]
parts = ref.replace("s3://", "").split("/", 1)
if len(parts) == 2:
@@ -37,9 +41,9 @@ async def main():
obj.close()
obj.release_conn()
print(f"Text length: {len(text)} chars")
print(f"First 500 chars:")
print("First 500 chars:")
print(text[:500])
await pool.close()
asyncio.run(main())