fix: stop tagging all macro_news articles as macro_event — default to article, let extractor reclassify. Also reduced fetch limit to 20 and cadence to 30min
This commit is contained in:
@@ -209,12 +209,17 @@ async def persist_broker_event(
|
|||||||
|
|
||||||
|
|
||||||
def _resolve_document_type(source_type: str) -> str:
|
def _resolve_document_type(source_type: str) -> str:
|
||||||
"""Map source_type to a document_type value."""
|
"""Map source_type to a document_type value.
|
||||||
|
|
||||||
|
Note: macro_news articles default to 'article' — the extractor
|
||||||
|
reclassifies them as 'macro_event' only if the content is truly
|
||||||
|
about macro/global events (not company-specific news).
|
||||||
|
"""
|
||||||
mapping = {
|
mapping = {
|
||||||
"news_api": "article",
|
"news_api": "article",
|
||||||
"filings_api": "filing",
|
"filings_api": "filing",
|
||||||
"web_scrape": "press_release",
|
"web_scrape": "press_release",
|
||||||
"macro_news": "macro_event",
|
"macro_news": "article",
|
||||||
}
|
}
|
||||||
return mapping.get(source_type, "article")
|
return mapping.get(source_type, "article")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user