phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
# Replay Dataset for Deterministic Extraction Testing
|
||||
|
||||
Archived document fixtures used to verify that the extraction pipeline
|
||||
produces consistent, schema-valid results across code changes.
|
||||
|
||||
Each fixture is a JSON file containing:
|
||||
- `document_id`: stable identifier for the fixture
|
||||
- `document_type`: article, filing, transcript, or press_release
|
||||
- `document_text`: normalized text as it would arrive from the parser
|
||||
- `known_tickers`: ticker hints passed to the extraction prompt
|
||||
- `expected_extraction`: the expected extraction result (schema-valid)
|
||||
- `metadata`: fixture provenance info (created_at, description, schema_version)
|
||||
|
||||
The replay runner (`tests/test_replay_extraction.py`) loads these fixtures,
|
||||
validates the expected outputs against the current extraction schema, and
|
||||
optionally runs them through a live Ollama instance for end-to-end checks.
|
||||
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"document_id": "replay-001-aapl-earnings",
|
||||
"document_type": "article",
|
||||
"document_text": "Apple Inc. reported fiscal Q1 2026 results that exceeded Wall Street expectations. Revenue came in at $124.3 billion, up 9% year-over-year, driven by strong iPhone 17 demand and a 22% surge in Services revenue to $26.1 billion. CEO Tim Cook highlighted record-setting performance in emerging markets, particularly India and Southeast Asia.\n\nEarnings per share of $2.41 beat the consensus estimate of $2.35. Gross margin expanded to 46.9%, up from 45.9% a year ago, reflecting favorable product mix and supply chain efficiencies.\n\nAnalysts at Morgan Stanley raised their price target to $245 from $230, citing the Services growth trajectory. However, Greater China revenue declined 4% amid increased competition from Huawei, which Cook acknowledged as a headwind.\n\nApple also announced a $110 billion share buyback program, the largest in corporate history, and raised its quarterly dividend by 5% to $0.26 per share.",
|
||||
"known_tickers": ["AAPL"],
|
||||
"expected_extraction": {
|
||||
"summary": "Apple reported Q1 2026 results beating expectations with $124.3B revenue up 9% YoY, driven by iPhone 17 demand and 22% Services growth, though China revenue declined 4%.",
|
||||
"companies": [
|
||||
{
|
||||
"ticker": "AAPL",
|
||||
"company_name": "Apple Inc.",
|
||||
"relevance": 0.95,
|
||||
"sentiment": "positive",
|
||||
"impact_score": 0.8,
|
||||
"impact_horizon": "1d_30d",
|
||||
"catalyst_type": "earnings",
|
||||
"key_facts": [
|
||||
"Revenue $124.3 billion, up 9% year-over-year",
|
||||
"EPS $2.41 beat consensus of $2.35",
|
||||
"Services revenue surged 22% to $26.1 billion",
|
||||
"Greater China revenue declined 4%",
|
||||
"$110 billion share buyback announced"
|
||||
],
|
||||
"risks": [
|
||||
"Greater China revenue declined 4% amid Huawei competition"
|
||||
],
|
||||
"evidence_spans": [
|
||||
"Revenue came in at $124.3 billion, up 9% year-over-year",
|
||||
"Earnings per share of $2.41 beat the consensus estimate of $2.35",
|
||||
"Greater China revenue declined 4% amid increased competition from Huawei"
|
||||
]
|
||||
}
|
||||
],
|
||||
"macro_themes": [],
|
||||
"novelty_score": 0.5,
|
||||
"confidence": 0.9,
|
||||
"extraction_warnings": []
|
||||
},
|
||||
"metadata": {
|
||||
"created_at": "2026-04-11",
|
||||
"description": "Synthetic Apple earnings article for replay testing",
|
||||
"schema_version": "2.0.0",
|
||||
"category": "earnings_beat"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"document_id": "replay-004-low-quality",
|
||||
"document_type": "article",
|
||||
"document_text": "Error 403 Forbidden. Access denied. Please subscribe to continue reading. Cookie preferences updated. Share on Twitter. Share on Facebook.",
|
||||
"known_tickers": ["AAPL"],
|
||||
"expected_extraction": {
|
||||
"summary": "",
|
||||
"companies": [],
|
||||
"macro_themes": [],
|
||||
"novelty_score": 0.1,
|
||||
"confidence": 0.1,
|
||||
"extraction_warnings": ["insufficient_content"]
|
||||
},
|
||||
"metadata": {
|
||||
"created_at": "2026-04-11",
|
||||
"description": "Garbled/paywall document that should produce empty extraction with low confidence (Req 4.3, 5.4)",
|
||||
"schema_version": "2.0.0",
|
||||
"category": "low_quality"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"document_id": "replay-005-msft-press-release",
|
||||
"document_type": "press_release",
|
||||
"document_text": "REDMOND, Wash. — April 8, 2026 — Microsoft Corp. today announced it has entered into a definitive agreement to acquire Nuance Communications, Inc. subsidiary DataSphere AI for approximately $4.2 billion in an all-cash transaction. The acquisition is expected to close in Q3 2026, subject to regulatory approval.\n\nDataSphere AI specializes in healthcare-specific large language models and clinical decision support systems deployed across 1,200 hospitals in the United States. The acquisition will strengthen Microsoft's Azure Health Cloud platform and expand its presence in the $280 billion global healthcare IT market.\n\nSatya Nadella, Chairman and CEO of Microsoft, said: \"DataSphere AI's clinical language models are the most advanced in the industry. This acquisition accelerates our mission to empower every healthcare organization with AI that improves patient outcomes.\"\n\nThe transaction is expected to be accretive to Microsoft's earnings per share within 18 months of closing. Microsoft plans to integrate DataSphere's technology into Azure AI services and the Microsoft Cloud for Healthcare platform.",
|
||||
"known_tickers": ["MSFT"],
|
||||
"expected_extraction": {
|
||||
"summary": "Microsoft announced a $4.2 billion all-cash acquisition of DataSphere AI, a healthcare LLM company deployed in 1,200 U.S. hospitals, to strengthen Azure Health Cloud.",
|
||||
"companies": [
|
||||
{
|
||||
"ticker": "MSFT",
|
||||
"company_name": "Microsoft Corp.",
|
||||
"relevance": 0.95,
|
||||
"sentiment": "positive",
|
||||
"impact_score": 0.7,
|
||||
"impact_horizon": "1d_30d",
|
||||
"catalyst_type": "m_and_a",
|
||||
"key_facts": [
|
||||
"Acquiring DataSphere AI for $4.2 billion in all-cash transaction",
|
||||
"Expected to close Q3 2026 subject to regulatory approval",
|
||||
"DataSphere deployed across 1,200 hospitals in the United States",
|
||||
"Expected to be accretive to EPS within 18 months"
|
||||
],
|
||||
"risks": [
|
||||
"Subject to regulatory approval"
|
||||
],
|
||||
"evidence_spans": [
|
||||
"entered into a definitive agreement to acquire Nuance Communications, Inc. subsidiary DataSphere AI for approximately $4.2 billion",
|
||||
"deployed across 1,200 hospitals in the United States",
|
||||
"expected to be accretive to Microsoft's earnings per share within 18 months of closing"
|
||||
]
|
||||
}
|
||||
],
|
||||
"macro_themes": ["ai_capex"],
|
||||
"novelty_score": 0.75,
|
||||
"confidence": 0.9,
|
||||
"extraction_warnings": []
|
||||
},
|
||||
"metadata": {
|
||||
"created_at": "2026-04-11",
|
||||
"description": "Synthetic Microsoft M&A press release for replay testing",
|
||||
"schema_version": "2.0.0",
|
||||
"category": "press_release_m_and_a"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
{
|
||||
"document_id": "replay-003-multi-company",
|
||||
"document_type": "article",
|
||||
"document_text": "The semiconductor sector faced a turbulent week as new U.S. export restrictions targeting advanced AI chips sent shockwaves through the industry. NVIDIA Corporation saw its shares drop 7% on Monday after the Commerce Department announced expanded controls on shipments of H200 and B100 GPUs to several Middle Eastern countries.\n\nAdvanced Micro Devices was also affected, declining 4.2%, though analysts noted AMD's exposure to the restricted markets is smaller than NVIDIA's. Bernstein analyst Stacy Rasgon estimated NVIDIA could lose $4-5 billion in annual revenue from the new restrictions, while AMD's impact would be closer to $800 million.\n\nMeanwhile, Taiwan Semiconductor Manufacturing Company reported that its advanced packaging capacity for AI chips remains fully booked through 2027, suggesting underlying demand remains robust despite the regulatory headwinds. TSMC shares rose 1.3% on the news.\n\nIntel Corporation, which has been positioning its Gaudi 3 accelerator as a domestic alternative, saw a modest 2.1% gain as investors speculated the restrictions could redirect demand toward U.S.-manufactured alternatives.",
|
||||
"known_tickers": ["NVDA", "AMD", "TSM", "INTC"],
|
||||
"expected_extraction": {
|
||||
"summary": "New U.S. export restrictions on advanced AI chips hit NVIDIA (-7%) and AMD (-4.2%), while TSMC reported full AI packaging capacity through 2027 and Intel gained on domestic alternative positioning.",
|
||||
"companies": [
|
||||
{
|
||||
"ticker": "NVDA",
|
||||
"company_name": "NVIDIA Corporation",
|
||||
"relevance": 0.9,
|
||||
"sentiment": "negative",
|
||||
"impact_score": 0.8,
|
||||
"impact_horizon": "1d_30d",
|
||||
"catalyst_type": "macro",
|
||||
"key_facts": [
|
||||
"Shares dropped 7% on expanded export controls",
|
||||
"H200 and B100 GPUs targeted by new restrictions",
|
||||
"Estimated $4-5 billion annual revenue loss from restrictions"
|
||||
],
|
||||
"risks": [
|
||||
"Expanded U.S. export controls on AI chip shipments to Middle Eastern countries"
|
||||
],
|
||||
"evidence_spans": [
|
||||
"NVIDIA Corporation saw its shares drop 7% on Monday after the Commerce Department announced expanded controls",
|
||||
"NVIDIA could lose $4-5 billion in annual revenue from the new restrictions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ticker": "AMD",
|
||||
"company_name": "Advanced Micro Devices",
|
||||
"relevance": 0.7,
|
||||
"sentiment": "negative",
|
||||
"impact_score": 0.55,
|
||||
"impact_horizon": "1d_30d",
|
||||
"catalyst_type": "macro",
|
||||
"key_facts": [
|
||||
"Shares declined 4.2%",
|
||||
"Estimated $800 million annual revenue impact"
|
||||
],
|
||||
"risks": [
|
||||
"Exposure to restricted export markets"
|
||||
],
|
||||
"evidence_spans": [
|
||||
"Advanced Micro Devices was also affected, declining 4.2%",
|
||||
"AMD's impact would be closer to $800 million"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ticker": "TSM",
|
||||
"company_name": "Taiwan Semiconductor Manufacturing Company",
|
||||
"relevance": 0.65,
|
||||
"sentiment": "positive",
|
||||
"impact_score": 0.5,
|
||||
"impact_horizon": "1d_7d",
|
||||
"catalyst_type": "product",
|
||||
"key_facts": [
|
||||
"Advanced packaging capacity for AI chips fully booked through 2027",
|
||||
"Shares rose 1.3%"
|
||||
],
|
||||
"risks": [],
|
||||
"evidence_spans": [
|
||||
"advanced packaging capacity for AI chips remains fully booked through 2027",
|
||||
"TSMC shares rose 1.3% on the news"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ticker": "INTC",
|
||||
"company_name": "Intel Corporation",
|
||||
"relevance": 0.5,
|
||||
"sentiment": "positive",
|
||||
"impact_score": 0.35,
|
||||
"impact_horizon": "1d_7d",
|
||||
"catalyst_type": "macro",
|
||||
"key_facts": [
|
||||
"Gaudi 3 accelerator positioned as domestic alternative",
|
||||
"Shares gained 2.1%"
|
||||
],
|
||||
"risks": [],
|
||||
"evidence_spans": [
|
||||
"Intel Corporation, which has been positioning its Gaudi 3 accelerator as a domestic alternative, saw a modest 2.1% gain"
|
||||
]
|
||||
}
|
||||
],
|
||||
"macro_themes": ["ai_capex"],
|
||||
"novelty_score": 0.7,
|
||||
"confidence": 0.85,
|
||||
"extraction_warnings": []
|
||||
},
|
||||
"metadata": {
|
||||
"created_at": "2026-04-11",
|
||||
"description": "Synthetic multi-company semiconductor article for replay testing (Req 5.5)",
|
||||
"schema_version": "2.0.0",
|
||||
"category": "multi_company"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"document_id": "replay-002-tsla-filing",
|
||||
"document_type": "filing",
|
||||
"document_text": "UNITED STATES SECURITIES AND EXCHANGE COMMISSION\nWashington, D.C. 20549\nFORM 8-K\n\nCURRENT REPORT\nPursuant to Section 13 or 15(d) of the Securities Exchange Act of 1934\n\nDate of Report: March 28, 2026\n\nTESLA, INC.\n(Exact name of registrant as specified in its charter)\n\nItem 2.02 Results of Operations and Financial Condition.\n\nOn March 28, 2026, Tesla, Inc. issued a press release announcing its financial results for the fiscal quarter ended March 31, 2026. Total revenue was $25.8 billion, compared to $23.3 billion in the prior year quarter. Automotive revenue was $20.1 billion. Energy generation and storage revenue increased 67% to $3.2 billion.\n\nGAAP net income was $2.1 billion, or $0.61 per diluted share. Non-GAAP net income was $2.5 billion, or $0.73 per diluted share.\n\nThe Company disclosed that vehicle deliveries totaled 478,000 units, below the consensus estimate of 495,000 units. Management attributed the shortfall to production line retooling for the refreshed Model Y at the Fremont and Shanghai factories.\n\nRisk Factors: The Company noted ongoing regulatory uncertainty in the European Union regarding autonomous driving software certification, which could delay Full Self-Driving rollout in key markets. Additionally, lithium carbonate prices have increased 18% quarter-over-quarter, pressuring battery cell costs.",
|
||||
"known_tickers": ["TSLA"],
|
||||
"expected_extraction": {
|
||||
"summary": "Tesla 8-K filing reports Q1 2026 results with $25.8B revenue, but vehicle deliveries of 478K missed consensus of 495K due to Model Y retooling. Energy segment grew 67%.",
|
||||
"companies": [
|
||||
{
|
||||
"ticker": "TSLA",
|
||||
"company_name": "Tesla, Inc.",
|
||||
"relevance": 0.95,
|
||||
"sentiment": "mixed",
|
||||
"impact_score": 0.75,
|
||||
"impact_horizon": "1d_30d",
|
||||
"catalyst_type": "earnings",
|
||||
"key_facts": [
|
||||
"Total revenue $25.8 billion vs $23.3 billion prior year",
|
||||
"Vehicle deliveries 478,000 units, below consensus of 495,000",
|
||||
"Energy generation and storage revenue increased 67% to $3.2 billion",
|
||||
"GAAP net income $2.1 billion or $0.61 per diluted share"
|
||||
],
|
||||
"risks": [
|
||||
"EU regulatory uncertainty regarding autonomous driving software certification",
|
||||
"Lithium carbonate prices increased 18% quarter-over-quarter"
|
||||
],
|
||||
"evidence_spans": [
|
||||
"Total revenue was $25.8 billion, compared to $23.3 billion in the prior year quarter",
|
||||
"vehicle deliveries totaled 478,000 units, below the consensus estimate of 495,000 units",
|
||||
"lithium carbonate prices have increased 18% quarter-over-quarter, pressuring battery cell costs"
|
||||
]
|
||||
}
|
||||
],
|
||||
"macro_themes": [],
|
||||
"novelty_score": 0.45,
|
||||
"confidence": 0.88,
|
||||
"extraction_warnings": []
|
||||
},
|
||||
"metadata": {
|
||||
"created_at": "2026-04-11",
|
||||
"description": "Synthetic Tesla 8-K filing for replay testing",
|
||||
"schema_version": "2.0.0",
|
||||
"category": "sec_filing"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user