fix: stop tagging all macro_news articles as macro_event — default to article, let extractor reclassify. Also reduced fetch limit to 20 and cadence to 30min
This commit is contained in:
@@ -209,12 +209,17 @@ async def persist_broker_event(
|
||||
|
||||
|
||||
def _resolve_document_type(source_type: str) -> str:
|
||||
"""Map source_type to a document_type value."""
|
||||
"""Map source_type to a document_type value.
|
||||
|
||||
Note: macro_news articles default to 'article' — the extractor
|
||||
reclassifies them as 'macro_event' only if the content is truly
|
||||
about macro/global events (not company-specific news).
|
||||
"""
|
||||
mapping = {
|
||||
"news_api": "article",
|
||||
"filings_api": "filing",
|
||||
"web_scrape": "press_release",
|
||||
"macro_news": "macro_event",
|
||||
"macro_news": "article",
|
||||
}
|
||||
return mapping.get(source_type, "article")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user