"""Integration tests for the full ingest-to-recommendation flow. Exercises the pipeline end-to-end through all stages: Ingestion → Parsing → Extraction → Aggregation → Recommendation Each stage uses the real logic functions from the service modules. External infrastructure (PostgreSQL, MinIO, Redis, Ollama) is replaced with lightweight fakes that preserve the data contracts between stages. Requirements: 3.1-3.4, 4.1-4.3, 5.1-5.5, 6.1-6.5, 7.1-7.4 """ from __future__ import annotations import json import uuid from datetime import datetime, timedelta, timezone from unittest.mock import AsyncMock, MagicMock, patch import pytest from services.aggregation.worker import ( ImpactRow, assemble_trend_with_evidence, build_weighted_signals, ) from services.extractor.client import ExtractionAttempt, ExtractionResponse from services.extractor.schemas import ExtractionResult, ValidationReport, validate_extraction from services.extractor.worker import persist_extraction from services.parser.html_parser import ParsedDocument, detect_company_mentions, parse_html from services.parser.worker import build_parser_output_json from services.recommendation.eligibility import EligibilityConfig, evaluate_eligibility from services.recommendation.suppression import ( DataQualityContext, SuppressionConfig, evaluate_suppression, ) from services.recommendation.worker import ( build_recommendation, build_thesis, classify_risk, ) from services.shared.schemas import ( ActionType, RecommendationMode, TrendDirection, TrendWindow, ) NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc) # --------------------------------------------------------------------------- # Shared test fixtures # --------------------------------------------------------------------------- SAMPLE_HTML = """ Apple Reports Record Q2 Earnings

Apple Reports Record Q2 Earnings

Apple Inc. (AAPL) reported record quarterly revenue of $120 billion, beating analyst expectations by 8%. CEO Tim Cook cited strong iPhone and services growth as key drivers.

The company also announced a $100 billion share buyback program, signaling confidence in future cash flows. Analysts at Goldman Sachs raised their price target to $250.

However, regulatory scrutiny in the EU remains a risk factor, with potential fines related to the Digital Markets Act.

""" SAMPLE_EXTRACTION_JSON = { "summary": "Apple reported record Q2 revenue of $120B, beating expectations by 8%. " "Announced $100B buyback. EU regulatory risk remains.", "companies": [ { "ticker": "AAPL", "company_name": "Apple Inc.", "relevance": 0.95, "sentiment": "positive", "impact_score": 0.75, "impact_horizon": "1d_30d", "catalyst_type": "earnings", "key_facts": [ "Record quarterly revenue of $120 billion", "$100 billion share buyback announced", "Goldman Sachs raised price target to $250", ], "risks": ["EU regulatory scrutiny under Digital Markets Act"], "evidence_spans": [ "Apple Inc. (AAPL) reported record quarterly revenue of $120 billion", "beating analyst expectations by 8%", "announced a $100 billion share buyback program", ], } ], "macro_themes": ["consumer_tech", "buybacks"], "novelty_score": 0.7, "confidence": 0.88, "extraction_warnings": [], } COMPANY_ALIASES = [ {"company_id": "comp-1", "alias": "AAPL", "alias_type": "ticker", "ticker": "AAPL"}, {"company_id": "comp-1", "alias": "Apple Inc.", "alias_type": "legal_name", "ticker": "AAPL"}, ] # --------------------------------------------------------------------------- # Stage 1: Parsing # --------------------------------------------------------------------------- class TestParsingStage: """Verify the HTML parsing pipeline produces structured output.""" def test_parse_html_extracts_body_text(self): parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-earnings") assert parsed.body_text is not None assert "record quarterly revenue" in parsed.body_text.lower() # Boilerplate should be stripped assert "Site Navigation" not in parsed.body_text assert "Copyright" not in parsed.body_text def test_parse_html_extracts_metadata(self): parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-earnings") assert parsed.title == "Apple Reports Record Q2 Earnings" assert parsed.quality_score > 0.0 assert parsed.confidence != "low" def test_detect_company_mentions_finds_aapl(self): parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-earnings") mentions = detect_company_mentions(parsed.body_text, COMPANY_ALIASES) tickers_found = {m["ticker"] for m in mentions} assert "AAPL" in tickers_found def test_parser_output_json_structure(self): parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-earnings") mentions = detect_company_mentions(parsed.body_text, COMPANY_ALIASES) output = build_parser_output_json(parsed, mentions) assert "quality_score" in output assert "mentioned_companies" in output assert isinstance(output["mentioned_companies"], list) assert output["title"] == "Apple Reports Record Q2 Earnings" # --------------------------------------------------------------------------- # Stage 2: Extraction validation # --------------------------------------------------------------------------- class TestExtractionStage: """Verify extraction schema validation and result construction.""" def test_validate_extraction_accepts_valid_json(self): report = validate_extraction(SAMPLE_EXTRACTION_JSON) assert report.valid assert report.parsed is not None assert report.parsed.companies[0].ticker == "AAPL" def test_validate_extraction_rejects_invalid_json(self): report = validate_extraction("not json at all") assert not report.valid assert len(report.errors) > 0 def test_validate_extraction_rejects_bad_schema(self): bad = {"summary": "test"} # missing required fields report = validate_extraction(bad) assert not report.valid def test_extraction_result_matches_intelligence_schema(self): result = ExtractionResult.model_validate(SAMPLE_EXTRACTION_JSON) assert result.confidence == 0.88 assert len(result.companies) == 1 assert result.companies[0].catalyst_type.value == "earnings" assert result.novelty_score == 0.7 def test_validate_extraction_with_document_text_checks_evidence(self): """Evidence grounding check should warn if spans not found.""" report = validate_extraction( SAMPLE_EXTRACTION_JSON, document_text="Completely unrelated text about weather.", ) # Should still be valid (evidence grounding is a warning, not error) assert report.valid assert any("evidence_span_not_found" in w for w in report.warnings) # --------------------------------------------------------------------------- # Stage 3: Extraction persistence (mocked infra) # --------------------------------------------------------------------------- class TestExtractionPersistence: """Verify extraction artifacts are persisted correctly.""" @pytest.mark.asyncio async def test_persist_successful_extraction_creates_all_artifacts(self): result_obj = ExtractionResult.model_validate(SAMPLE_EXTRACTION_JSON) validation = ValidationReport(valid=True, errors=[], warnings=[], parsed=result_obj) attempt = ExtractionAttempt( raw_output=json.dumps(SAMPLE_EXTRACTION_JSON), validation=validation, error=None, duration_ms=450, model="test-model", ) response = ExtractionResponse( success=True, result=result_obj, attempts=[attempt], prompt_metadata={"prompt_version": "document-intel-v2", "schema_version": "2.0.0"}, model="test-model", total_duration_ms=450, ) pool = AsyncMock() pool.fetchval = AsyncMock(side_effect=["intel-1", "impact-1", "metrics-1"]) pool.execute = AsyncMock() minio = MagicMock() persist_result = await persist_extraction( pool=pool, minio_client=minio, document_id=str(uuid.uuid4()), ticker="AAPL", extraction_response=response, company_id_map={"AAPL": "comp-1"}, source_credibility=0.8, timestamp=NOW, ) assert persist_result.success assert persist_result.intelligence_id == "intel-1" assert persist_result.impact_ids == ["impact-1"] # 4 MinIO uploads: prompt, raw_output, validation, intelligence assert minio.put_object.call_count == 4 # --------------------------------------------------------------------------- # Stage 4: Aggregation # --------------------------------------------------------------------------- class TestAggregationStage: """Verify trend summary assembly from document impact records.""" def _make_impacts_from_extraction(self) -> list[ImpactRow]: """Build ImpactRows that mirror what the extraction stage would produce.""" return [ ImpactRow( document_id="doc-1", confidence=0.88, novelty_score=0.7, source_credibility=0.8, sentiment="positive", impact_score=0.75, catalyst_type="earnings", key_facts=["Record revenue $120B", "$100B buyback"], risks=["EU regulatory scrutiny"], published_at=NOW - timedelta(hours=2), ), ImpactRow( document_id="doc-2", confidence=0.72, novelty_score=0.5, source_credibility=0.7, sentiment="positive", impact_score=0.6, catalyst_type="rating_change", key_facts=["Goldman raised target to $250"], risks=[], published_at=NOW - timedelta(hours=4), ), ImpactRow( document_id="doc-3", confidence=0.65, novelty_score=0.4, source_credibility=0.6, sentiment="negative", impact_score=0.4, catalyst_type="legal", key_facts=["EU DMA investigation"], risks=["Potential fines"], published_at=NOW - timedelta(hours=6), ), ] def test_aggregation_produces_bullish_trend(self): impacts = self._make_impacts_from_extraction() signals = build_weighted_signals(impacts, NOW, "7d") assembled = assemble_trend_with_evidence( "AAPL", "7d", signals, impacts, reference_time=NOW, ) summary = assembled.summary assert summary.entity_id == "AAPL" assert summary.window == TrendWindow.SEVEN_DAY # Two positive, one negative → should be bullish assert summary.trend_direction == TrendDirection.BULLISH assert summary.trend_strength > 0 assert summary.confidence > 0 assert len(summary.top_supporting_evidence) >= 1 assert len(summary.top_opposing_evidence) >= 1 assert summary.contradiction_score > 0 # has opposing signal def test_aggregation_evidence_rankings_are_populated(self): impacts = self._make_impacts_from_extraction() signals = build_weighted_signals(impacts, NOW, "7d") assembled = assemble_trend_with_evidence( "AAPL", "7d", signals, impacts, reference_time=NOW, ) # Supporting evidence should include the positive docs supporting_ids = {e.document_id for e in assembled.supporting_evidence} assert "doc-1" in supporting_ids assert "doc-2" in supporting_ids # Opposing evidence should include the negative doc opposing_ids = {e.document_id for e in assembled.opposing_evidence} assert "doc-3" in opposing_ids def test_aggregation_extracts_catalysts_and_risks(self): impacts = self._make_impacts_from_extraction() signals = build_weighted_signals(impacts, NOW, "7d") assembled = assemble_trend_with_evidence( "AAPL", "7d", signals, impacts, reference_time=NOW, ) summary = assembled.summary assert len(summary.dominant_catalysts) > 0 assert "earnings" in summary.dominant_catalysts assert len(summary.material_risks) > 0 # --------------------------------------------------------------------------- # Stage 5: Recommendation # --------------------------------------------------------------------------- class TestRecommendationStage: """Verify recommendation generation from trend summaries.""" def _make_trend_from_aggregation(self): """Build a TrendSummary that mirrors aggregation output.""" impacts = [ ImpactRow( document_id="doc-1", confidence=0.88, novelty_score=0.7, source_credibility=0.8, sentiment="positive", impact_score=0.75, catalyst_type="earnings", key_facts=["Record revenue"], risks=["EU regulatory"], published_at=NOW - timedelta(hours=2), ), ImpactRow( document_id="doc-2", confidence=0.72, novelty_score=0.5, source_credibility=0.7, sentiment="positive", impact_score=0.6, catalyst_type="rating_change", key_facts=["Target raised"], risks=[], published_at=NOW - timedelta(hours=4), ), ImpactRow( document_id="doc-3", confidence=0.65, novelty_score=0.4, source_credibility=0.6, sentiment="negative", impact_score=0.4, catalyst_type="legal", key_facts=["DMA investigation"], risks=["Potential fines"], published_at=NOW - timedelta(hours=6), ), ] signals = build_weighted_signals(impacts, NOW, "7d") assembled = assemble_trend_with_evidence( "AAPL", "7d", signals, impacts, reference_time=NOW, ) return assembled.summary def test_eligibility_produces_buy_for_bullish_trend(self): summary = self._make_trend_from_aggregation() result = evaluate_eligibility(summary) assert result.action == ActionType.BUY assert result.eligible def test_recommendation_has_thesis_and_evidence(self): summary = self._make_trend_from_aggregation() result = evaluate_eligibility(summary) rec = build_recommendation(summary, result, reference_time=NOW) assert rec.ticker == "AAPL" assert rec.action == ActionType.BUY assert len(rec.thesis) > 0 assert "[risk:" in rec.thesis assert len(rec.evidence_refs) > 0 assert rec.time_horizon == "swing_1d_10d" def test_recommendation_position_sizing_is_bounded(self): summary = self._make_trend_from_aggregation() result = evaluate_eligibility(summary) rec = build_recommendation(summary, result, reference_time=NOW) assert 0 < rec.position_sizing.portfolio_pct <= 0.05 assert 0 < rec.position_sizing.max_loss_pct <= 0.01 def test_recommendation_mode_reflects_confidence(self): summary = self._make_trend_from_aggregation() result = evaluate_eligibility(summary) rec = build_recommendation(summary, result, reference_time=NOW) # With 3 impact records the aggregated confidence is moderate (~0.41), # which is below the paper_confidence_threshold (0.50). The eligibility # engine correctly assigns INFORMATIONAL mode for BUY actions with # sub-threshold confidence. This validates Requirement 7.4. if summary.confidence >= 0.50: assert rec.mode in ( RecommendationMode.PAPER_ELIGIBLE, RecommendationMode.LIVE_ELIGIBLE, ) else: assert rec.mode == RecommendationMode.INFORMATIONAL def test_suppression_blocks_low_quality_data(self): summary = self._make_trend_from_aggregation() low_quality_ctx = DataQualityContext( total_documents=5, valid_documents=1, failed_documents=4, avg_extraction_confidence=0.2, newest_evidence_at=NOW - timedelta(days=14), source_types=set(), ) suppression = evaluate_suppression( summary, quality_ctx=low_quality_ctx, reference_time=NOW, ) assert suppression.suppressed assert len(suppression.reasons) > 0 # --------------------------------------------------------------------------- # Full pipeline integration # --------------------------------------------------------------------------- class TestFullPipelineIntegration: """End-to-end test wiring all stages together with real logic.""" def test_html_to_recommendation_pipeline(self): """Walk a document through parse → validate extraction → aggregate → recommend.""" # --- Stage 1: Parse HTML --- parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-q2") assert parsed.body_text assert parsed.confidence != "low" mentions = detect_company_mentions(parsed.body_text, COMPANY_ALIASES) assert any(m["ticker"] == "AAPL" for m in mentions) # --- Stage 2: Validate extraction output --- report = validate_extraction( SAMPLE_EXTRACTION_JSON, document_text=parsed.body_text, ) assert report.valid extraction = report.parsed assert extraction is not None assert extraction.companies[0].ticker == "AAPL" # --- Stage 3: Build impact records from extraction --- company = extraction.companies[0] impact = ImpactRow( document_id="doc-pipeline-1", confidence=extraction.confidence, novelty_score=extraction.novelty_score, source_credibility=0.8, sentiment=company.sentiment.value, impact_score=company.impact_score, catalyst_type=company.catalyst_type.value, key_facts=company.key_facts, risks=company.risks, published_at=NOW - timedelta(hours=1), ) # Add a second supporting document for richer aggregation impact2 = ImpactRow( document_id="doc-pipeline-2", confidence=0.75, novelty_score=0.5, source_credibility=0.7, sentiment="positive", impact_score=0.6, catalyst_type="rating_change", key_facts=["Analyst upgrade"], risks=[], published_at=NOW - timedelta(hours=3), ) impacts = [impact, impact2] # --- Stage 4: Aggregate into trend summary --- signals = build_weighted_signals(impacts, NOW, "7d") assembled = assemble_trend_with_evidence( "AAPL", "7d", signals, impacts, reference_time=NOW, ) summary = assembled.summary assert summary.trend_direction == TrendDirection.BULLISH assert summary.confidence > 0 assert len(summary.top_supporting_evidence) > 0 # --- Stage 5: Generate recommendation --- eligibility = evaluate_eligibility(summary) assert eligibility.action == ActionType.BUY assert eligibility.eligible rec = build_recommendation(summary, eligibility, reference_time=NOW) # Final assertions: the recommendation is coherent end-to-end assert rec.ticker == "AAPL" assert rec.action == ActionType.BUY assert rec.confidence == summary.confidence assert len(rec.evidence_refs) > 0 assert rec.thesis.startswith("[risk:") assert "AAPL" in rec.thesis assert "bullish" in rec.thesis assert rec.time_horizon == "swing_1d_10d" assert 0 < rec.position_sizing.portfolio_pct <= 0.05 def test_low_quality_document_is_blocked(self): """A low-quality parse should not produce a trade-eligible recommendation.""" # Minimal HTML that produces a low-quality parse bad_html = "

Ad. Subscribe now.

" parsed = parse_html(bad_html, "https://example.com/junk") # Low quality parse → should not advance to extraction # The parser worker checks confidence != "low" before enqueuing if parsed.confidence == "low" or parsed.quality_score < 0.3: # This is the expected path: document blocked at parse stage return # If somehow it passes parsing, suppression should catch it # Build a minimal trend with low data quality from services.shared.schemas import TrendSummary summary = TrendSummary( entity_type="company", entity_id="JUNK", window=TrendWindow.SEVEN_DAY, trend_direction=TrendDirection.BULLISH, trend_strength=0.3, confidence=0.3, top_supporting_evidence=["doc-1"], generated_at=NOW, ) suppression = evaluate_suppression(summary, reference_time=NOW) # With only 1 evidence doc and low confidence, should be suppressed assert suppression.suppressed def test_bearish_signal_produces_sell_recommendation(self): """Negative sentiment documents should produce a SELL recommendation.""" impacts = [ ImpactRow( document_id="doc-bear-1", confidence=0.82, novelty_score=0.6, source_credibility=0.8, sentiment="negative", impact_score=0.7, catalyst_type="legal", key_facts=["Major lawsuit filed"], risks=["Potential $5B fine"], published_at=NOW - timedelta(hours=1), ), ImpactRow( document_id="doc-bear-2", confidence=0.78, novelty_score=0.5, source_credibility=0.75, sentiment="negative", impact_score=0.65, catalyst_type="earnings", key_facts=["Revenue miss by 15%"], risks=["Guidance lowered"], published_at=NOW - timedelta(hours=3), ), ] signals = build_weighted_signals(impacts, NOW, "7d") assembled = assemble_trend_with_evidence( "TSLA", "7d", signals, impacts, reference_time=NOW, ) summary = assembled.summary assert summary.trend_direction == TrendDirection.BEARISH eligibility = evaluate_eligibility(summary) assert eligibility.action == ActionType.SELL rec = build_recommendation(summary, eligibility, reference_time=NOW) assert rec.ticker == "TSLA" assert rec.action == ActionType.SELL assert "SELL" in rec.thesis def test_contradictory_signals_produce_mixed_or_watch(self): """Equal opposing signals should result in WATCH or MIXED direction.""" impacts = [ ImpactRow( document_id="doc-pos", confidence=0.8, novelty_score=0.5, source_credibility=0.8, sentiment="positive", impact_score=0.6, catalyst_type="earnings", key_facts=["Beat expectations"], risks=[], published_at=NOW - timedelta(hours=1), ), ImpactRow( document_id="doc-neg", confidence=0.8, novelty_score=0.5, source_credibility=0.8, sentiment="negative", impact_score=0.6, catalyst_type="legal", key_facts=["Lawsuit filed"], risks=["Regulatory risk"], published_at=NOW - timedelta(hours=1), ), ] signals = build_weighted_signals(impacts, NOW, "7d") assembled = assemble_trend_with_evidence( "MSFT", "7d", signals, impacts, reference_time=NOW, ) summary = assembled.summary assert summary.trend_direction in (TrendDirection.MIXED, TrendDirection.NEUTRAL) assert summary.contradiction_score > 0 eligibility = evaluate_eligibility(summary) rec = build_recommendation(summary, eligibility, reference_time=NOW) # Contradictory signals → WATCH or HOLD, mode should be informational assert rec.action in (ActionType.WATCH, ActionType.HOLD) assert rec.mode == RecommendationMode.INFORMATIONAL