fix: data quality query and suppression fallback in recommendation worker
- Fix _DATA_QUALITY_QUERY: remove nonexistent d.source_id/s.source_class, use d.source_type directly - Fix LIMIT 1 applied after jsonb expansion by restructuring as CTE - Fix fallback build_quality_context_from_summary returning empty source_types which always triggered LOW_SOURCE_DIVERSITY suppression - Update test to reflect corrected fallback behavior
This commit is contained in:
@@ -107,7 +107,8 @@ def build_quality_context_from_summary(
|
||||
|
||||
This is a fallback when full document-level quality metrics aren't
|
||||
available. It uses the trend summary's evidence counts and confidence
|
||||
as proxies.
|
||||
as proxies. We assume at least one source type contributed so that
|
||||
the fallback does not automatically trigger LOW_SOURCE_DIVERSITY.
|
||||
"""
|
||||
total = len(summary.top_supporting_evidence) + len(summary.top_opposing_evidence)
|
||||
return DataQualityContext(
|
||||
@@ -116,7 +117,7 @@ def build_quality_context_from_summary(
|
||||
failed_documents=0,
|
||||
avg_extraction_confidence=summary.confidence,
|
||||
newest_evidence_at=summary.generated_at,
|
||||
source_types=set(),
|
||||
source_types={"unknown"},
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -58,26 +58,30 @@ logger = logging.getLogger(__name__)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DATA_QUALITY_QUERY = """
|
||||
WITH latest_trend AS (
|
||||
SELECT top_supporting_evidence, top_opposing_evidence
|
||||
FROM trend_windows
|
||||
WHERE entity_id = $1 AND "window" = $2
|
||||
ORDER BY generated_at DESC
|
||||
LIMIT 1
|
||||
),
|
||||
evidence_ids AS (
|
||||
SELECT jsonb_array_elements_text(
|
||||
COALESCE(lt.top_supporting_evidence, '[]'::jsonb)
|
||||
|| COALESCE(lt.top_opposing_evidence, '[]'::jsonb)
|
||||
) AS eid
|
||||
FROM latest_trend lt
|
||||
)
|
||||
SELECT
|
||||
COUNT(*) AS total_documents,
|
||||
COUNT(*) FILTER (WHERE di.validation_status = 'valid') AS valid_documents,
|
||||
COUNT(*) FILTER (WHERE di.validation_status = 'failed') AS failed_documents,
|
||||
AVG(di.confidence) FILTER (WHERE di.validation_status = 'valid') AS avg_extraction_confidence,
|
||||
MAX(d.published_at) AS newest_evidence_at,
|
||||
ARRAY_AGG(DISTINCT s.source_class) FILTER (WHERE s.source_class IS NOT NULL) AS source_types
|
||||
ARRAY_AGG(DISTINCT d.source_type) FILTER (WHERE d.source_type IS NOT NULL) AS source_types
|
||||
FROM documents d
|
||||
JOIN document_intelligence di ON di.document_id = d.id
|
||||
LEFT JOIN sources s ON d.source_id = s.id
|
||||
WHERE d.id = ANY(
|
||||
SELECT UNNEST(
|
||||
COALESCE(tw.top_supporting_evidence, '[]'::jsonb)
|
||||
|| COALESCE(tw.top_opposing_evidence, '[]'::jsonb)
|
||||
)::uuid
|
||||
FROM trend_windows tw
|
||||
WHERE tw.entity_id = $1 AND tw."window" = $2
|
||||
ORDER BY tw.generated_at DESC
|
||||
LIMIT 1
|
||||
)
|
||||
WHERE d.id::text IN (SELECT eid FROM evidence_ids)
|
||||
"""
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user