fix: data quality query and suppression fallback in recommendation worker

- Fix _DATA_QUALITY_QUERY: remove nonexistent d.source_id/s.source_class,
  use d.source_type directly
- Fix LIMIT 1 applied after jsonb expansion by restructuring as CTE
- Fix fallback build_quality_context_from_summary returning empty
  source_types which always triggered LOW_SOURCE_DIVERSITY suppression
- Update test to reflect corrected fallback behavior
This commit is contained in:
Celes Renata
2026-04-14 06:57:46 +00:00
parent 4fbddc307a
commit b478022ba3
3 changed files with 22 additions and 20 deletions
+16 -12
View File
@@ -58,26 +58,30 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
_DATA_QUALITY_QUERY = """
WITH latest_trend AS (
SELECT top_supporting_evidence, top_opposing_evidence
FROM trend_windows
WHERE entity_id = $1 AND "window" = $2
ORDER BY generated_at DESC
LIMIT 1
),
evidence_ids AS (
SELECT jsonb_array_elements_text(
COALESCE(lt.top_supporting_evidence, '[]'::jsonb)
|| COALESCE(lt.top_opposing_evidence, '[]'::jsonb)
) AS eid
FROM latest_trend lt
)
SELECT
COUNT(*) AS total_documents,
COUNT(*) FILTER (WHERE di.validation_status = 'valid') AS valid_documents,
COUNT(*) FILTER (WHERE di.validation_status = 'failed') AS failed_documents,
AVG(di.confidence) FILTER (WHERE di.validation_status = 'valid') AS avg_extraction_confidence,
MAX(d.published_at) AS newest_evidence_at,
ARRAY_AGG(DISTINCT s.source_class) FILTER (WHERE s.source_class IS NOT NULL) AS source_types
ARRAY_AGG(DISTINCT d.source_type) FILTER (WHERE d.source_type IS NOT NULL) AS source_types
FROM documents d
JOIN document_intelligence di ON di.document_id = d.id
LEFT JOIN sources s ON d.source_id = s.id
WHERE d.id = ANY(
SELECT UNNEST(
COALESCE(tw.top_supporting_evidence, '[]'::jsonb)
|| COALESCE(tw.top_opposing_evidence, '[]'::jsonb)
)::uuid
FROM trend_windows tw
WHERE tw.entity_id = $1 AND tw."window" = $2
ORDER BY tw.generated_at DESC
LIMIT 1
)
WHERE d.id::text IN (SELECT eid FROM evidence_ids)
"""