phase 14-15: docker build validation and helm deployment

2026-04-11 11:59:45 -07:00
parent 7394d241c9
commit ce10afa034
179 changed files with 32559 additions and 576 deletions
@@ -0,0 +1,16 @@
+# Replay Dataset for Deterministic Extraction Testing
+
+Archived document fixtures used to verify that the extraction pipeline
+produces consistent, schema-valid results across code changes.
+
+Each fixture is a JSON file containing:
+- `document_id`: stable identifier for the fixture
+- `document_type`: article, filing, transcript, or press_release
+- `document_text`: normalized text as it would arrive from the parser
+- `known_tickers`: ticker hints passed to the extraction prompt
+- `expected_extraction`: the expected extraction result (schema-valid)
+- `metadata`: fixture provenance info (created_at, description, schema_version)
+
+The replay runner (`tests/test_replay_extraction.py`) loads these fixtures,
+validates the expected outputs against the current extraction schema, and
+optionally runs them through a live Ollama instance for end-to-end checks.
@@ -0,0 +1,45 @@
+{
+  "document_id": "replay-001-aapl-earnings",
+  "document_type": "article",
+  "document_text": "Apple Inc. reported fiscal Q1 2026 results that exceeded Wall Street expectations. Revenue came in at $124.3 billion, up 9% year-over-year, driven by strong iPhone 17 demand and a 22% surge in Services revenue to $26.1 billion. CEO Tim Cook highlighted record-setting performance in emerging markets, particularly India and Southeast Asia.\n\nEarnings per share of $2.41 beat the consensus estimate of $2.35. Gross margin expanded to 46.9%, up from 45.9% a year ago, reflecting favorable product mix and supply chain efficiencies.\n\nAnalysts at Morgan Stanley raised their price target to $245 from $230, citing the Services growth trajectory. However, Greater China revenue declined 4% amid increased competition from Huawei, which Cook acknowledged as a headwind.\n\nApple also announced a $110 billion share buyback program, the largest in corporate history, and raised its quarterly dividend by 5% to $0.26 per share.",
+  "known_tickers": ["AAPL"],
+  "expected_extraction": {
+    "summary": "Apple reported Q1 2026 results beating expectations with $124.3B revenue up 9% YoY, driven by iPhone 17 demand and 22% Services growth, though China revenue declined 4%.",
+    "companies": [
+      {
+        "ticker": "AAPL",
+        "company_name": "Apple Inc.",
+        "relevance": 0.95,
+        "sentiment": "positive",
+        "impact_score": 0.8,
+        "impact_horizon": "1d_30d",
+        "catalyst_type": "earnings",
+        "key_facts": [
+          "Revenue $124.3 billion, up 9% year-over-year",
+          "EPS $2.41 beat consensus of $2.35",
+          "Services revenue surged 22% to $26.1 billion",
+          "Greater China revenue declined 4%",
+          "$110 billion share buyback announced"
+        ],
+        "risks": [
+          "Greater China revenue declined 4% amid Huawei competition"
+        ],
+        "evidence_spans": [
+          "Revenue came in at $124.3 billion, up 9% year-over-year",
+          "Earnings per share of $2.41 beat the consensus estimate of $2.35",
+          "Greater China revenue declined 4% amid increased competition from Huawei"
+        ]
+      }
+    ],
+    "macro_themes": [],
+    "novelty_score": 0.5,
+    "confidence": 0.9,
+    "extraction_warnings": []
+  },
+  "metadata": {
+    "created_at": "2026-04-11",
+    "description": "Synthetic Apple earnings article for replay testing",
+    "schema_version": "2.0.0",
+    "category": "earnings_beat"
+  }
+}
@@ -0,0 +1,20 @@
+{
+  "document_id": "replay-004-low-quality",
+  "document_type": "article",
+  "document_text": "Error 403 Forbidden. Access denied. Please subscribe to continue reading. Cookie preferences updated. Share on Twitter. Share on Facebook.",
+  "known_tickers": ["AAPL"],
+  "expected_extraction": {
+    "summary": "",
+    "companies": [],
+    "macro_themes": [],
+    "novelty_score": 0.1,
+    "confidence": 0.1,
+    "extraction_warnings": ["insufficient_content"]
+  },
+  "metadata": {
+    "created_at": "2026-04-11",
+    "description": "Garbled/paywall document that should produce empty extraction with low confidence (Req 4.3, 5.4)",
+    "schema_version": "2.0.0",
+    "category": "low_quality"
+  }
+}
@@ -0,0 +1,44 @@
+{
+  "document_id": "replay-005-msft-press-release",
+  "document_type": "press_release",
+  "document_text": "REDMOND, Wash. — April 8, 2026 — Microsoft Corp. today announced it has entered into a definitive agreement to acquire Nuance Communications, Inc. subsidiary DataSphere AI for approximately $4.2 billion in an all-cash transaction. The acquisition is expected to close in Q3 2026, subject to regulatory approval.\n\nDataSphere AI specializes in healthcare-specific large language models and clinical decision support systems deployed across 1,200 hospitals in the United States. The acquisition will strengthen Microsoft's Azure Health Cloud platform and expand its presence in the $280 billion global healthcare IT market.\n\nSatya Nadella, Chairman and CEO of Microsoft, said: \"DataSphere AI's clinical language models are the most advanced in the industry. This acquisition accelerates our mission to empower every healthcare organization with AI that improves patient outcomes.\"\n\nThe transaction is expected to be accretive to Microsoft's earnings per share within 18 months of closing. Microsoft plans to integrate DataSphere's technology into Azure AI services and the Microsoft Cloud for Healthcare platform.",
+  "known_tickers": ["MSFT"],
+  "expected_extraction": {
+    "summary": "Microsoft announced a $4.2 billion all-cash acquisition of DataSphere AI, a healthcare LLM company deployed in 1,200 U.S. hospitals, to strengthen Azure Health Cloud.",
+    "companies": [
+      {
+        "ticker": "MSFT",
+        "company_name": "Microsoft Corp.",
+        "relevance": 0.95,
+        "sentiment": "positive",
+        "impact_score": 0.7,
+        "impact_horizon": "1d_30d",
+        "catalyst_type": "m_and_a",
+        "key_facts": [
+          "Acquiring DataSphere AI for $4.2 billion in all-cash transaction",
+          "Expected to close Q3 2026 subject to regulatory approval",
+          "DataSphere deployed across 1,200 hospitals in the United States",
+          "Expected to be accretive to EPS within 18 months"
+        ],
+        "risks": [
+          "Subject to regulatory approval"
+        ],
+        "evidence_spans": [
+          "entered into a definitive agreement to acquire Nuance Communications, Inc. subsidiary DataSphere AI for approximately $4.2 billion",
+          "deployed across 1,200 hospitals in the United States",
+          "expected to be accretive to Microsoft's earnings per share within 18 months of closing"
+        ]
+      }
+    ],
+    "macro_themes": ["ai_capex"],
+    "novelty_score": 0.75,
+    "confidence": 0.9,
+    "extraction_warnings": []
+  },
+  "metadata": {
+    "created_at": "2026-04-11",
+    "description": "Synthetic Microsoft M&A press release for replay testing",
+    "schema_version": "2.0.0",
+    "category": "press_release_m_and_a"
+  }
+}
@@ -0,0 +1,97 @@
+{
+  "document_id": "replay-003-multi-company",
+  "document_type": "article",
+  "document_text": "The semiconductor sector faced a turbulent week as new U.S. export restrictions targeting advanced AI chips sent shockwaves through the industry. NVIDIA Corporation saw its shares drop 7% on Monday after the Commerce Department announced expanded controls on shipments of H200 and B100 GPUs to several Middle Eastern countries.\n\nAdvanced Micro Devices was also affected, declining 4.2%, though analysts noted AMD's exposure to the restricted markets is smaller than NVIDIA's. Bernstein analyst Stacy Rasgon estimated NVIDIA could lose $4-5 billion in annual revenue from the new restrictions, while AMD's impact would be closer to $800 million.\n\nMeanwhile, Taiwan Semiconductor Manufacturing Company reported that its advanced packaging capacity for AI chips remains fully booked through 2027, suggesting underlying demand remains robust despite the regulatory headwinds. TSMC shares rose 1.3% on the news.\n\nIntel Corporation, which has been positioning its Gaudi 3 accelerator as a domestic alternative, saw a modest 2.1% gain as investors speculated the restrictions could redirect demand toward U.S.-manufactured alternatives.",
+  "known_tickers": ["NVDA", "AMD", "TSM", "INTC"],
+  "expected_extraction": {
+    "summary": "New U.S. export restrictions on advanced AI chips hit NVIDIA (-7%) and AMD (-4.2%), while TSMC reported full AI packaging capacity through 2027 and Intel gained on domestic alternative positioning.",
+    "companies": [
+      {
+        "ticker": "NVDA",
+        "company_name": "NVIDIA Corporation",
+        "relevance": 0.9,
+        "sentiment": "negative",
+        "impact_score": 0.8,
+        "impact_horizon": "1d_30d",
+        "catalyst_type": "macro",
+        "key_facts": [
+          "Shares dropped 7% on expanded export controls",
+          "H200 and B100 GPUs targeted by new restrictions",
+          "Estimated $4-5 billion annual revenue loss from restrictions"
+        ],
+        "risks": [
+          "Expanded U.S. export controls on AI chip shipments to Middle Eastern countries"
+        ],
+        "evidence_spans": [
+          "NVIDIA Corporation saw its shares drop 7% on Monday after the Commerce Department announced expanded controls",
+          "NVIDIA could lose $4-5 billion in annual revenue from the new restrictions"
+        ]
+      },
+      {
+        "ticker": "AMD",
+        "company_name": "Advanced Micro Devices",
+        "relevance": 0.7,
+        "sentiment": "negative",
+        "impact_score": 0.55,
+        "impact_horizon": "1d_30d",
+        "catalyst_type": "macro",
+        "key_facts": [
+          "Shares declined 4.2%",
+          "Estimated $800 million annual revenue impact"
+        ],
+        "risks": [
+          "Exposure to restricted export markets"
+        ],
+        "evidence_spans": [
+          "Advanced Micro Devices was also affected, declining 4.2%",
+          "AMD's impact would be closer to $800 million"
+        ]
+      },
+      {
+        "ticker": "TSM",
+        "company_name": "Taiwan Semiconductor Manufacturing Company",
+        "relevance": 0.65,
+        "sentiment": "positive",
+        "impact_score": 0.5,
+        "impact_horizon": "1d_7d",
+        "catalyst_type": "product",
+        "key_facts": [
+          "Advanced packaging capacity for AI chips fully booked through 2027",
+          "Shares rose 1.3%"
+        ],
+        "risks": [],
+        "evidence_spans": [
+          "advanced packaging capacity for AI chips remains fully booked through 2027",
+          "TSMC shares rose 1.3% on the news"
+        ]
+      },
+      {
+        "ticker": "INTC",
+        "company_name": "Intel Corporation",
+        "relevance": 0.5,
+        "sentiment": "positive",
+        "impact_score": 0.35,
+        "impact_horizon": "1d_7d",
+        "catalyst_type": "macro",
+        "key_facts": [
+          "Gaudi 3 accelerator positioned as domestic alternative",
+          "Shares gained 2.1%"
+        ],
+        "risks": [],
+        "evidence_spans": [
+          "Intel Corporation, which has been positioning its Gaudi 3 accelerator as a domestic alternative, saw a modest 2.1% gain"
+        ]
+      }
+    ],
+    "macro_themes": ["ai_capex"],
+    "novelty_score": 0.7,
+    "confidence": 0.85,
+    "extraction_warnings": []
+  },
+  "metadata": {
+    "created_at": "2026-04-11",
+    "description": "Synthetic multi-company semiconductor article for replay testing (Req 5.5)",
+    "schema_version": "2.0.0",
+    "category": "multi_company"
+  }
+}
@@ -0,0 +1,45 @@
+{
+  "document_id": "replay-002-tsla-filing",
+  "document_type": "filing",
+  "document_text": "UNITED STATES SECURITIES AND EXCHANGE COMMISSION\nWashington, D.C. 20549\nFORM 8-K\n\nCURRENT REPORT\nPursuant to Section 13 or 15(d) of the Securities Exchange Act of 1934\n\nDate of Report: March 28, 2026\n\nTESLA, INC.\n(Exact name of registrant as specified in its charter)\n\nItem 2.02 Results of Operations and Financial Condition.\n\nOn March 28, 2026, Tesla, Inc. issued a press release announcing its financial results for the fiscal quarter ended March 31, 2026. Total revenue was $25.8 billion, compared to $23.3 billion in the prior year quarter. Automotive revenue was $20.1 billion. Energy generation and storage revenue increased 67% to $3.2 billion.\n\nGAAP net income was $2.1 billion, or $0.61 per diluted share. Non-GAAP net income was $2.5 billion, or $0.73 per diluted share.\n\nThe Company disclosed that vehicle deliveries totaled 478,000 units, below the consensus estimate of 495,000 units. Management attributed the shortfall to production line retooling for the refreshed Model Y at the Fremont and Shanghai factories.\n\nRisk Factors: The Company noted ongoing regulatory uncertainty in the European Union regarding autonomous driving software certification, which could delay Full Self-Driving rollout in key markets. Additionally, lithium carbonate prices have increased 18% quarter-over-quarter, pressuring battery cell costs.",
+  "known_tickers": ["TSLA"],
+  "expected_extraction": {
+    "summary": "Tesla 8-K filing reports Q1 2026 results with $25.8B revenue, but vehicle deliveries of 478K missed consensus of 495K due to Model Y retooling. Energy segment grew 67%.",
+    "companies": [
+      {
+        "ticker": "TSLA",
+        "company_name": "Tesla, Inc.",
+        "relevance": 0.95,
+        "sentiment": "mixed",
+        "impact_score": 0.75,
+        "impact_horizon": "1d_30d",
+        "catalyst_type": "earnings",
+        "key_facts": [
+          "Total revenue $25.8 billion vs $23.3 billion prior year",
+          "Vehicle deliveries 478,000 units, below consensus of 495,000",
+          "Energy generation and storage revenue increased 67% to $3.2 billion",
+          "GAAP net income $2.1 billion or $0.61 per diluted share"
+        ],
+        "risks": [
+          "EU regulatory uncertainty regarding autonomous driving software certification",
+          "Lithium carbonate prices increased 18% quarter-over-quarter"
+        ],
+        "evidence_spans": [
+          "Total revenue was $25.8 billion, compared to $23.3 billion in the prior year quarter",
+          "vehicle deliveries totaled 478,000 units, below the consensus estimate of 495,000 units",
+          "lithium carbonate prices have increased 18% quarter-over-quarter, pressuring battery cell costs"
+        ]
+      }
+    ],
+    "macro_themes": [],
+    "novelty_score": 0.45,
+    "confidence": 0.88,
+    "extraction_warnings": []
+  },
+  "metadata": {
+    "created_at": "2026-04-11",
+    "description": "Synthetic Tesla 8-K filing for replay testing",
+    "schema_version": "2.0.0",
+    "category": "sec_filing"
+  }
+}
@@ -0,0 +1,100 @@
+"""Tests for adapter base interface and result types."""
+from datetime import datetime
+
+from services.adapters.base import AdapterResult, BaseAdapter
+
+
+class TestAdapterResult:
+    def test_ok_when_items_and_no_error(self):
+        r = AdapterResult(
+            source_type="market_api",
+            ticker="AAPL",
+            items=[{"price": 150}],
+            raw_payload=b'{"price":150}',
+            content_hash="abc123",
+            fetched_at=datetime(2026, 4, 11),
+        )
+        assert r.ok is True
+        assert r.item_count == 1
+
+    def test_not_ok_when_error(self):
+        r = AdapterResult(
+            source_type="market_api",
+            ticker="AAPL",
+            items=[],
+            raw_payload=b"",
+            content_hash="",
+            fetched_at=datetime(2026, 4, 11),
+            error="timeout",
+        )
+        assert r.ok is False
+
+    def test_not_ok_when_empty_items(self):
+        r = AdapterResult(
+            source_type="news_api",
+            ticker="MSFT",
+            items=[],
+            raw_payload=b"{}",
+            content_hash="def456",
+            fetched_at=datetime(2026, 4, 11),
+        )
+        assert r.ok is False
+
+    def test_http_metadata_defaults(self):
+        r = AdapterResult(
+            source_type="market_api",
+            ticker="AAPL",
+            items=[{"x": 1}],
+            raw_payload=b"x",
+            content_hash="h",
+            fetched_at=datetime(2026, 4, 11),
+        )
+        assert r.http_status is None
+        assert r.response_time_ms is None
+        assert r.metadata == {}
+
+
+class _StubAdapter(BaseAdapter):
+    async def fetch(self, ticker, config):
+        return AdapterResult(
+            source_type="market_api",
+            ticker=ticker,
+            items=[],
+            raw_payload=b"",
+            content_hash="",
+            fetched_at=datetime(2026, 4, 11),
+        )
+
+    def source_type(self):
+        return "market_api"
+
+
+class _FilingsStub(BaseAdapter):
+    async def fetch(self, ticker, config):
+        return AdapterResult(
+            source_type="filings_api",
+            ticker=ticker,
+            items=[],
+            raw_payload=b"",
+            content_hash="",
+            fetched_at=datetime(2026, 4, 11),
+        )
+
+    def source_type(self):
+        return "filings_api"
+
+
+class TestBaseAdapterHelpers:
+    def test_bucket_name_market(self):
+        adapter = _StubAdapter()
+        assert adapter.bucket_name() == "stonks-raw-market"
+
+    def test_bucket_name_filings(self):
+        adapter = _FilingsStub()
+        assert adapter.bucket_name() == "stonks-raw-filings"
+
+    def test_artifact_path_format(self):
+        adapter = _StubAdapter()
+        now = datetime(2026, 4, 11, 14, 30)
+        path = adapter.artifact_path("AAPL", "doc-123", now)
+        assert path == "market_api/AAPL/2026/04/11/doc-123/raw.json"
@@ -0,0 +1,248 @@
+"""Tests for aggregation scoring — recency decay, source credibility weighting,
+and market context integration."""
+from datetime import datetime, timedelta, timezone
+
+from services.aggregation.scoring import (
+    DEFAULT_CONFIG,
+    ScoringConfig,
+    WeightedSignal,
+    compute_signal_weight,
+    credibility_weight,
+    market_context_multiplier,
+    recency_weight,
+    sentiment_to_numeric,
+    weighted_sentiment_average,
+)
+from services.shared.schemas import MarketContext
+
+
+# ---------------------------------------------------------------------------
+# recency_weight
+# ---------------------------------------------------------------------------
+
+
+def test_recency_weight_at_zero_age():
+    """A document published exactly at reference time gets weight 1.0."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    assert recency_weight(now, now, "7d") == 1.0
+
+
+def test_recency_weight_future_document():
+    """A document published after reference time is clamped to 1.0."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    future = now + timedelta(hours=1)
+    assert recency_weight(future, now, "7d") == 1.0
+
+
+def test_recency_weight_at_one_half_life():
+    """After exactly one half-life the weight should be ~0.5."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    half_life_7d = DEFAULT_CONFIG.half_life_hours["7d"]  # 72 hours
+    published = now - timedelta(hours=half_life_7d)
+    w = recency_weight(published, now, "7d")
+    assert abs(w - 0.5) < 1e-9
+
+
+def test_recency_weight_very_old_clamps_to_min():
+    """A very old document should not go below min_recency_weight."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    ancient = now - timedelta(days=365)
+    w = recency_weight(ancient, now, "7d")
+    assert w == DEFAULT_CONFIG.min_recency_weight
+
+
+def test_recency_weight_different_windows():
+    """Shorter windows decay faster than longer ones."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    published = now - timedelta(hours=24)
+    w_intraday = recency_weight(published, now, "intraday")
+    w_90d = recency_weight(published, now, "90d")
+    assert w_intraday < w_90d
+
+
+def test_recency_weight_naive_datetimes():
+    """Naive datetimes are treated as UTC."""
+    now = datetime(2026, 4, 11, 12, 0, 0)
+    published = now - timedelta(hours=72)
+    w = recency_weight(published, now, "7d")
+    assert abs(w - 0.5) < 1e-9
+
+
+# ---------------------------------------------------------------------------
+# credibility_weight
+# ---------------------------------------------------------------------------
+
+
+def test_credibility_weight_high():
+    """High credibility source gets weight close to 1.0."""
+    assert abs(credibility_weight(1.0) - 1.0) < 1e-9
+
+
+def test_credibility_weight_low_clamped():
+    """Credibility below floor is clamped to floor."""
+    w = credibility_weight(0.0)
+    assert abs(w - DEFAULT_CONFIG.credibility_floor) < 1e-9
+
+
+def test_credibility_weight_mid():
+    """Mid-range credibility passes through with exponent=1."""
+    assert abs(credibility_weight(0.5) - 0.5) < 1e-9
+
+
+def test_credibility_weight_custom_exponent():
+    """Custom exponent penalises low credibility more."""
+    cfg = ScoringConfig(credibility_exponent=2.0)
+    w = credibility_weight(0.5, config=cfg)
+    assert abs(w - 0.25) < 1e-9
+
+
+# ---------------------------------------------------------------------------
+# compute_signal_weight
+# ---------------------------------------------------------------------------
+
+
+def test_signal_weight_gates_low_confidence():
+    """Documents below confidence floor get zero combined weight."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    sw = compute_signal_weight(
+        published_at=now,
+        reference_time=now,
+        window="7d",
+        source_credibility=0.8,
+        extraction_confidence=0.1,  # below default 0.2 floor
+    )
+    assert sw.combined == 0.0
+    assert sw.confidence_gate == 0.0
+
+
+def test_signal_weight_fresh_high_credibility():
+    """Fresh doc with high credibility and default novelty gets a strong weight."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    sw = compute_signal_weight(
+        published_at=now,
+        reference_time=now,
+        window="7d",
+        source_credibility=1.0,
+        novelty_score=0.5,
+        extraction_confidence=0.8,
+    )
+    # recency=1.0, credibility=1.0, bonus=0.125, gate=1.0
+    expected = 1.0 * 1.0 * (1.0 + 0.125)
+    assert abs(sw.combined - expected) < 1e-9
+
+
+def test_signal_weight_novelty_bonus():
+    """Higher novelty gives a proportionally higher combined weight."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    sw_low = compute_signal_weight(now, now, "7d", 0.8, novelty_score=0.0, extraction_confidence=0.8)
+    sw_high = compute_signal_weight(now, now, "7d", 0.8, novelty_score=1.0, extraction_confidence=0.8)
+    assert sw_high.combined > sw_low.combined
+
+
+# ---------------------------------------------------------------------------
+# sentiment helpers
+# ---------------------------------------------------------------------------
+
+
+def test_sentiment_to_numeric():
+    assert sentiment_to_numeric("positive") == 1.0
+    assert sentiment_to_numeric("negative") == -1.0
+    assert sentiment_to_numeric("neutral") == 0.0
+    assert sentiment_to_numeric("mixed") == 0.0
+    assert sentiment_to_numeric("unknown") == 0.0
+
+
+def test_weighted_sentiment_average_empty():
+    assert weighted_sentiment_average([]) == 0.0
+
+
+def test_weighted_sentiment_average_single():
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    sw = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8)
+    signals = [WeightedSignal("doc1", sw, sentiment_value=1.0, impact_score=0.7)]
+    avg = weighted_sentiment_average(signals)
+    assert abs(avg - 1.0) < 1e-9  # single positive signal → 1.0
+
+
+def test_weighted_sentiment_average_opposing():
+    """Equal-weight opposing signals should cancel to ~0."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    sw = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8)
+    signals = [
+        WeightedSignal("doc1", sw, sentiment_value=1.0, impact_score=0.5),
+        WeightedSignal("doc2", sw, sentiment_value=-1.0, impact_score=0.5),
+    ]
+    avg = weighted_sentiment_average(signals)
+    assert abs(avg) < 1e-9
+
+
+# ---------------------------------------------------------------------------
+# market_context_multiplier
+# ---------------------------------------------------------------------------
+
+
+def test_market_context_multiplier_none():
+    """No market context returns 1.0 (no adjustment)."""
+    assert market_context_multiplier(None) == 1.0
+
+
+def test_market_context_multiplier_no_data():
+    """MarketContext with no bars returns 1.0."""
+    ctx = MarketContext(ticker="AAPL", bars_available=0)
+    assert market_context_multiplier(ctx) == 1.0
+
+
+def test_market_context_multiplier_low_volatility():
+    """Below-threshold volatility produces no boost."""
+    ctx = MarketContext(ticker="AAPL", volatility=0.5, volume_change_pct=10.0, bars_available=5)
+    assert market_context_multiplier(ctx) == 1.0
+
+
+def test_market_context_multiplier_high_volatility():
+    """Above-threshold volatility produces a boost > 1.0."""
+    ctx = MarketContext(ticker="AAPL", volatility=3.0, volume_change_pct=10.0, bars_available=5)
+    m = market_context_multiplier(ctx)
+    assert m > 1.0
+    assert m <= 1.0 + DEFAULT_CONFIG.volatility_recency_boost_max + DEFAULT_CONFIG.volume_surge_boost
+
+
+def test_market_context_multiplier_volume_surge():
+    """Volume surge above threshold adds a boost."""
+    ctx = MarketContext(ticker="AAPL", volatility=0.5, volume_change_pct=80.0, bars_available=5)
+    m = market_context_multiplier(ctx)
+    assert abs(m - (1.0 + DEFAULT_CONFIG.volume_surge_boost)) < 1e-9
+
+
+def test_market_context_multiplier_both_triggers():
+    """Both volatility and volume surge stack."""
+    ctx = MarketContext(ticker="AAPL", volatility=3.0, volume_change_pct=80.0, bars_available=5)
+    m = market_context_multiplier(ctx)
+    # Should be > 1.0 + volume_surge_boost alone
+    assert m > 1.0 + DEFAULT_CONFIG.volume_surge_boost
+
+
+# ---------------------------------------------------------------------------
+# compute_signal_weight with market context
+# ---------------------------------------------------------------------------
+
+
+def test_signal_weight_with_market_context_boost():
+    """Market context with high volatility should increase combined weight."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    ctx = MarketContext(ticker="AAPL", volatility=3.0, volume_change_pct=80.0, bars_available=10)
+
+    sw_no_ctx = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8)
+    sw_with_ctx = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.8, market_ctx=ctx)
+
+    assert sw_with_ctx.combined > sw_no_ctx.combined
+    assert sw_with_ctx.market_ctx_multiplier > 1.0
+    assert sw_no_ctx.market_ctx_multiplier == 1.0
+
+
+def test_signal_weight_market_context_gated_still_zero():
+    """Low confidence docs stay at zero even with market context boost."""
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    ctx = MarketContext(ticker="AAPL", volatility=5.0, volume_change_pct=100.0, bars_available=10)
+
+    sw = compute_signal_weight(now, now, "7d", 0.8, extraction_confidence=0.1, market_ctx=ctx)
+    assert sw.combined == 0.0
@@ -0,0 +1,318 @@
+"""Tests for aggregation worker — rolling window trend summary computation.
+
+Tests the pure logic functions (no DB required). The async DB functions
+are covered by integration tests.
+"""
+from datetime import datetime, timedelta, timezone
+
+from services.aggregation.scoring import (
+    ScoringConfig,
+    WeightedSignal,
+    compute_signal_weight,
+)
+from services.aggregation.worker import (
+    AggregationConfig,
+    AssembledTrend,
+    ImpactRow,
+    assemble_trend_summary,
+    assemble_trend_with_evidence,
+    build_weighted_signals,
+    compute_contradiction_score,
+    compute_trend_confidence,
+    derive_trend_direction,
+    extract_catalysts_and_risks,
+    rank_evidence,
+)
+from services.shared.schemas import MarketContext, TrendDirection, TrendWindow
+
+NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+
+def _make_impact(
+    doc_id: str = "doc-1",
+    sentiment: str = "positive",
+    impact_score: float = 0.7,
+    catalyst_type: str = "earnings",
+    confidence: float = 0.8,
+    source_credibility: float = 0.8,
+    novelty_score: float = 0.5,
+    published_at: datetime | None = None,
+    risks: list[str] | None = None,
+) -> ImpactRow:
+    return ImpactRow(
+        document_id=doc_id,
+        confidence=confidence,
+        novelty_score=novelty_score,
+        source_credibility=source_credibility,
+        sentiment=sentiment,
+        impact_score=impact_score,
+        catalyst_type=catalyst_type,
+        key_facts=["some fact"],
+        risks=risks or [],
+        published_at=published_at or NOW - timedelta(hours=1),
+    )
+
+
+# ---------------------------------------------------------------------------
+# build_weighted_signals
+# ---------------------------------------------------------------------------
+
+
+def test_build_weighted_signals_basic():
+    impacts = [_make_impact("d1"), _make_impact("d2", sentiment="negative")]
+    signals = build_weighted_signals(impacts, NOW, "7d")
+    assert len(signals) == 2
+    assert signals[0].document_id == "d1"
+    assert signals[0].sentiment_value == 1.0
+    assert signals[1].sentiment_value == -1.0
+    assert all(s.weight.combined > 0 for s in signals)
+
+
+def test_build_weighted_signals_low_confidence_gated():
+    impacts = [_make_impact("d1", confidence=0.1)]
+    signals = build_weighted_signals(impacts, NOW, "7d")
+    assert signals[0].weight.combined == 0.0
+
+
+# ---------------------------------------------------------------------------
+# derive_trend_direction
+# ---------------------------------------------------------------------------
+
+
+def test_direction_bullish():
+    assert derive_trend_direction(0.5) == TrendDirection.BULLISH
+
+
+def test_direction_bearish():
+    assert derive_trend_direction(-0.5) == TrendDirection.BEARISH
+
+
+def test_direction_neutral():
+    assert derive_trend_direction(0.05) == TrendDirection.NEUTRAL
+
+
+def test_direction_mixed_high_contradiction():
+    assert derive_trend_direction(0.1, contradiction_score=0.2) == TrendDirection.MIXED
+
+
+def test_direction_bullish_despite_contradiction():
+    """Strong sentiment overrides contradiction."""
+    assert derive_trend_direction(0.5, contradiction_score=0.3) == TrendDirection.BULLISH
+
+
+# ---------------------------------------------------------------------------
+# compute_contradiction_score
+# ---------------------------------------------------------------------------
+
+
+def test_contradiction_no_signals():
+    assert compute_contradiction_score([]) == 0.0
+
+
+def test_contradiction_all_positive():
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    signals = [
+        WeightedSignal("d1", sw, sentiment_value=1.0, impact_score=0.5),
+        WeightedSignal("d2", sw, sentiment_value=1.0, impact_score=0.5),
+    ]
+    assert compute_contradiction_score(signals) == 0.0
+
+
+def test_contradiction_equal_opposing():
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    signals = [
+        WeightedSignal("d1", sw, sentiment_value=1.0, impact_score=0.5),
+        WeightedSignal("d2", sw, sentiment_value=-1.0, impact_score=0.5),
+    ]
+    score = compute_contradiction_score(signals)
+    assert abs(score - 0.5) < 1e-4
+
+
+def test_contradiction_mostly_positive():
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    signals = [
+        WeightedSignal("d1", sw, sentiment_value=1.0, impact_score=0.8),
+        WeightedSignal("d2", sw, sentiment_value=1.0, impact_score=0.8),
+        WeightedSignal("d3", sw, sentiment_value=-1.0, impact_score=0.3),
+    ]
+    score = compute_contradiction_score(signals)
+    assert 0.0 < score < 0.5
+
+
+# ---------------------------------------------------------------------------
+# rank_evidence
+# ---------------------------------------------------------------------------
+
+
+def test_rank_evidence_separates_supporting_opposing():
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    signals = [
+        WeightedSignal("pos1", sw, sentiment_value=1.0, impact_score=0.9),
+        WeightedSignal("pos2", sw, sentiment_value=1.0, impact_score=0.3),
+        WeightedSignal("neg1", sw, sentiment_value=-1.0, impact_score=0.7),
+        WeightedSignal("neutral1", sw, sentiment_value=0.0, impact_score=0.5),
+    ]
+    supporting, opposing = rank_evidence(signals)
+    assert supporting == ["pos1", "pos2"]
+    assert opposing == ["neg1"]
+
+
+def test_rank_evidence_respects_max():
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    signals = [
+        WeightedSignal(f"d{i}", sw, sentiment_value=1.0, impact_score=0.5)
+        for i in range(20)
+    ]
+    supporting, opposing = rank_evidence(signals, max_refs=3)
+    assert len(supporting) == 3
+    assert len(opposing) == 0
+
+
+# ---------------------------------------------------------------------------
+# extract_catalysts_and_risks
+# ---------------------------------------------------------------------------
+
+
+def test_extract_catalysts_and_risks():
+    impacts = [
+        _make_impact("d1", catalyst_type="earnings", risks=["regulatory risk"]),
+        _make_impact("d2", catalyst_type="earnings", risks=["supply chain"]),
+        _make_impact("d3", catalyst_type="product", risks=["regulatory risk"]),
+    ]
+    signals = build_weighted_signals(impacts, NOW, "7d")
+    catalysts, risks = extract_catalysts_and_risks(impacts, signals)
+    assert catalysts[0] == "earnings"  # highest cumulative weight
+    assert "product" in catalysts
+    # Risks should be deduplicated
+    risk_lower = [r.lower() for r in risks]
+    assert risk_lower.count("regulatory risk") == 1
+
+
+# ---------------------------------------------------------------------------
+# compute_trend_confidence
+# ---------------------------------------------------------------------------
+
+
+def test_confidence_no_signals():
+    assert compute_trend_confidence([], 0.0) == 0.0
+
+
+def test_confidence_increases_with_more_signals():
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    few = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(2)]
+    many = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(15)]
+    c_few = compute_trend_confidence(few, 0.0)
+    c_many = compute_trend_confidence(many, 0.0)
+    assert c_many > c_few
+
+
+def test_confidence_penalized_by_contradiction():
+    sw = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    signals = [WeightedSignal(f"d{i}", sw, 1.0, 0.5) for i in range(5)]
+    c_low = compute_trend_confidence(signals, 0.0)
+    c_high = compute_trend_confidence(signals, 0.5)
+    assert c_high < c_low
+
+
+# ---------------------------------------------------------------------------
+# assemble_trend_summary
+# ---------------------------------------------------------------------------
+
+
+def test_assemble_trend_summary_bullish():
+    impacts = [
+        _make_impact("d1", sentiment="positive", impact_score=0.8),
+        _make_impact("d2", sentiment="positive", impact_score=0.6),
+    ]
+    signals = build_weighted_signals(impacts, NOW, "7d")
+    summary = assemble_trend_summary("AAPL", "7d", signals, impacts, reference_time=NOW)
+
+    assert summary.entity_id == "AAPL"
+    assert summary.window == TrendWindow.SEVEN_DAY
+    assert summary.trend_direction == TrendDirection.BULLISH
+    assert summary.trend_strength > 0
+    assert summary.confidence > 0
+    assert len(summary.top_supporting_evidence) > 0
+    assert summary.generated_at == NOW
+
+
+def test_assemble_trend_summary_mixed():
+    impacts = [
+        _make_impact("d1", sentiment="positive", impact_score=0.5),
+        _make_impact("d2", sentiment="negative", impact_score=0.5),
+    ]
+    signals = build_weighted_signals(impacts, NOW, "7d")
+    summary = assemble_trend_summary("TSLA", "7d", signals, impacts, reference_time=NOW)
+
+    # Equal opposing signals → neutral or mixed
+    assert summary.trend_direction in (TrendDirection.NEUTRAL, TrendDirection.MIXED)
+    assert summary.contradiction_score > 0
+
+
+def test_assemble_trend_summary_empty():
+    summary = assemble_trend_summary("AAPL", "7d", [], [], reference_time=NOW)
+    assert summary.trend_direction == TrendDirection.NEUTRAL
+    assert summary.trend_strength == 0.0
+    assert summary.confidence == 0.0
+
+
+def test_assemble_trend_summary_with_market_context():
+    impacts = [_make_impact("d1")]
+    ctx = MarketContext(ticker="AAPL", volatility=3.0, bars_available=5)
+    signals = build_weighted_signals(impacts, NOW, "7d", market_ctx=ctx)
+    summary = assemble_trend_summary("AAPL", "7d", signals, impacts, market_ctx=ctx, reference_time=NOW)
+    assert summary.market_context is not None
+    assert summary.market_context.ticker == "AAPL"
+
+
+# ---------------------------------------------------------------------------
+# AggregationConfig
+# ---------------------------------------------------------------------------
+
+
+def test_aggregation_config_defaults():
+    cfg = AggregationConfig()
+    assert len(cfg.effective_windows()) == len(TrendWindow)
+    assert isinstance(cfg.effective_scoring(), ScoringConfig)
+
+
+def test_aggregation_config_custom_windows():
+    cfg = AggregationConfig(windows=["7d", "30d"])
+    assert cfg.effective_windows() == ["7d", "30d"]
+
+
+# ---------------------------------------------------------------------------
+# assemble_trend_with_evidence
+# ---------------------------------------------------------------------------
+
+
+def test_assemble_trend_with_evidence_returns_ranked_details():
+    impacts = [
+        _make_impact("d1", sentiment="positive", impact_score=0.8),
+        _make_impact("d2", sentiment="negative", impact_score=0.6),
+        _make_impact("d3", sentiment="positive", impact_score=0.5),
+    ]
+    signals = build_weighted_signals(impacts, NOW, "7d")
+    result = assemble_trend_with_evidence("AAPL", "7d", signals, impacts, reference_time=NOW)
+
+    assert isinstance(result, AssembledTrend)
+    assert result.summary.entity_id == "AAPL"
+    # Supporting evidence should contain the positive docs
+    assert len(result.supporting_evidence) == 2
+    assert all(e.sentiment_value > 0 for e in result.supporting_evidence)
+    # Opposing evidence should contain the negative doc
+    assert len(result.opposing_evidence) == 1
+    assert result.opposing_evidence[0].document_id == "d2"
+    # Rank scores should be positive
+    assert all(e.rank_score > 0 for e in result.supporting_evidence)
+    assert all(e.rank_score > 0 for e in result.opposing_evidence)
+    # Summary evidence IDs should match
+    assert result.summary.top_supporting_evidence == [e.document_id for e in result.supporting_evidence]
+    assert result.summary.top_opposing_evidence == [e.document_id for e in result.opposing_evidence]
+
+
+def test_assemble_trend_with_evidence_empty_signals():
+    result = assemble_trend_with_evidence("AAPL", "7d", [], [], reference_time=NOW)
+    assert result.supporting_evidence == []
+    assert result.opposing_evidence == []
+    assert result.summary.trend_direction == TrendDirection.NEUTRAL
@@ -0,0 +1,306 @@
+"""Tests for operational alerting rules.
+
+Requirements: 12.3
+"""
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from services.shared.alerting import (
+    Alert,
+    AlertState,
+    check_analytical_lag,
+    check_broker_issues,
+    check_schema_failure_spike,
+    check_source_failures,
+    evaluate_alerts,
+)
+from services.shared.config import AlertingConfig
+
+
+@pytest.fixture
+def config():
+    return AlertingConfig(
+        source_failure_threshold=3,
+        source_failure_window_hours=6,
+        schema_failure_rate_threshold=0.3,
+        schema_failure_window_hours=1,
+        lake_lag_threshold_minutes=60,
+        broker_error_threshold=3,
+        broker_error_window_hours=1,
+        check_interval_seconds=120,
+    )
+
+
+@pytest.fixture
+def state():
+    return AlertState()
+
+
+# ---------------------------------------------------------------------------
+# AlertState unit tests
+# ---------------------------------------------------------------------------
+
+
+class TestAlertState:
+    def test_fire_new_alert_returns_true(self, state):
+        alert = Alert(rule="source_failures", severity="warning", summary="test",
+                      details={"key": "src1"})
+        assert state.fire(alert) is True
+
+    def test_fire_existing_alert_returns_false(self, state):
+        alert = Alert(rule="source_failures", severity="warning", summary="test",
+                      details={"key": "src1"})
+        state.fire(alert)
+        assert state.fire(alert) is False
+
+    def test_resolve_active_returns_true(self, state):
+        alert = Alert(rule="source_failures", severity="warning", summary="test",
+                      details={"key": "src1"})
+        state.fire(alert)
+        assert state.resolve("source_failures", "src1") is True
+
+    def test_resolve_inactive_returns_false(self, state):
+        assert state.resolve("source_failures", "src1") is False
+
+    def test_is_firing(self, state):
+        alert = Alert(rule="broker_issues", severity="critical", summary="test",
+                      details={"key": "global"})
+        assert state.is_firing("broker_issues", "global") is False
+        state.fire(alert)
+        assert state.is_firing("broker_issues", "global") is True
+
+    def test_multiple_alerts_same_rule_different_keys(self, state):
+        a1 = Alert(rule="source_failures", severity="warning", summary="s1",
+                   details={"key": "src1"})
+        a2 = Alert(rule="source_failures", severity="warning", summary="s2",
+                   details={"key": "src2"})
+        assert state.fire(a1) is True
+        assert state.fire(a2) is True
+        assert state.is_firing("source_failures", "src1") is True
+        assert state.is_firing("source_failures", "src2") is True
+        state.resolve("source_failures", "src1")
+        assert state.is_firing("source_failures", "src1") is False
+        assert state.is_firing("source_failures", "src2") is True
+
+
+# ---------------------------------------------------------------------------
+# check_source_failures
+# ---------------------------------------------------------------------------
+
+
+class TestCheckSourceFailures:
+    @pytest.mark.asyncio
+    async def test_returns_alerts_for_failing_sources(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetch.return_value = [
+            {
+                "source_id": "uuid-1",
+                "consecutive_failures": 3,
+                "source_type": "news_api",
+                "source_name": "reuters",
+                "ticker": "AAPL",
+            },
+        ]
+
+        alerts = await check_source_failures(mock_pool, config)
+        assert len(alerts) == 1
+        assert alerts[0].rule == "source_failures"
+        assert alerts[0].severity == "warning"
+        assert "AAPL" in alerts[0].summary
+        assert alerts[0].details["source_id"] == "uuid-1"
+
+    @pytest.mark.asyncio
+    async def test_returns_empty_when_no_failures(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetch.return_value = []
+
+        alerts = await check_source_failures(mock_pool, config)
+        assert alerts == []
+
+
+# ---------------------------------------------------------------------------
+# check_schema_failure_spike
+# ---------------------------------------------------------------------------
+
+
+class TestCheckSchemaFailureSpike:
+    @pytest.mark.asyncio
+    async def test_fires_when_rate_exceeds_threshold(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetchrow.return_value = {"total": 100, "failed": 40}
+
+        alerts = await check_schema_failure_spike(mock_pool, config)
+        assert len(alerts) == 1
+        assert alerts[0].rule == "schema_failure_spike"
+        assert alerts[0].details["failure_rate"] == 0.4
+
+    @pytest.mark.asyncio
+    async def test_critical_severity_above_50_percent(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetchrow.return_value = {"total": 100, "failed": 60}
+
+        alerts = await check_schema_failure_spike(mock_pool, config)
+        assert len(alerts) == 1
+        assert alerts[0].severity == "critical"
+
+    @pytest.mark.asyncio
+    async def test_no_alert_below_threshold(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetchrow.return_value = {"total": 100, "failed": 10}
+
+        alerts = await check_schema_failure_spike(mock_pool, config)
+        assert alerts == []
+
+    @pytest.mark.asyncio
+    async def test_no_alert_when_no_extractions(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetchrow.return_value = {"total": 0, "failed": 0}
+
+        alerts = await check_schema_failure_spike(mock_pool, config)
+        assert alerts == []
+
+
+# ---------------------------------------------------------------------------
+# check_analytical_lag
+# ---------------------------------------------------------------------------
+
+
+class TestCheckAnalyticalLag:
+    @pytest.mark.asyncio
+    async def test_fires_for_stale_tables(self, config):
+        mock_pool = AsyncMock()
+        stale_time = datetime(2026, 4, 10, 10, 0, 0, tzinfo=timezone.utc)
+        mock_pool.fetch.return_value = [
+            {"table_name": "market_bars", "last_publish": stale_time},
+        ]
+
+        alerts = await check_analytical_lag(mock_pool, config)
+        assert len(alerts) == 1
+        assert alerts[0].rule == "analytical_lag"
+        assert "market_bars" in alerts[0].summary
+
+    @pytest.mark.asyncio
+    async def test_no_alert_when_recent(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetch.return_value = []
+
+        alerts = await check_analytical_lag(mock_pool, config)
+        assert alerts == []
+
+
+# ---------------------------------------------------------------------------
+# check_broker_issues
+# ---------------------------------------------------------------------------
+
+
+class TestCheckBrokerIssues:
+    @pytest.mark.asyncio
+    async def test_fires_on_consecutive_errors(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetch.return_value = [{"error_count": 5}]
+
+        alerts = await check_broker_issues(mock_pool, config)
+        assert len(alerts) == 1
+        assert alerts[0].rule == "broker_issues"
+        assert alerts[0].severity == "critical"
+
+    @pytest.mark.asyncio
+    async def test_no_alert_below_threshold(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetch.return_value = [{"error_count": 1}]
+
+        alerts = await check_broker_issues(mock_pool, config)
+        assert alerts == []
+
+    @pytest.mark.asyncio
+    async def test_no_alert_when_no_events(self, config):
+        mock_pool = AsyncMock()
+        mock_pool.fetch.return_value = []
+
+        alerts = await check_broker_issues(mock_pool, config)
+        assert alerts == []
+
+
+# ---------------------------------------------------------------------------
+# evaluate_alerts integration
+# ---------------------------------------------------------------------------
+
+
+class TestEvaluateAlerts:
+    @pytest.mark.asyncio
+    async def test_newly_fired_alerts_returned(self, config, state):
+        mock_pool = AsyncMock()
+
+        with patch("services.shared.alerting.check_source_failures") as mock_src, \
+             patch("services.shared.alerting.check_schema_failure_spike") as mock_schema, \
+             patch("services.shared.alerting.check_analytical_lag") as mock_lag, \
+             patch("services.shared.alerting.check_broker_issues") as mock_broker:
+
+            mock_src.return_value = [
+                Alert(rule="source_failures", severity="warning",
+                      summary="src fail", details={"key": "s1"}),
+            ]
+            mock_schema.return_value = []
+            mock_lag.return_value = []
+            mock_broker.return_value = []
+
+            fired = await evaluate_alerts(mock_pool, config, state)
+            assert len(fired) == 1
+            assert fired[0].rule == "source_failures"
+            assert state.is_firing("source_failures", "s1")
+
+    @pytest.mark.asyncio
+    async def test_repeated_alert_not_returned_again(self, config, state):
+        mock_pool = AsyncMock()
+        alert = Alert(rule="broker_issues", severity="critical",
+                      summary="broker down", details={"key": "global"})
+
+        with patch("services.shared.alerting.check_source_failures", return_value=[]), \
+             patch("services.shared.alerting.check_schema_failure_spike", return_value=[]), \
+             patch("services.shared.alerting.check_analytical_lag", return_value=[]), \
+             patch("services.shared.alerting.check_broker_issues", return_value=[alert]):
+
+            fired1 = await evaluate_alerts(mock_pool, config, state)
+            assert len(fired1) == 1
+
+            fired2 = await evaluate_alerts(mock_pool, config, state)
+            assert len(fired2) == 0
+
+    @pytest.mark.asyncio
+    async def test_resolved_alert_clears_state(self, config, state):
+        mock_pool = AsyncMock()
+        alert = Alert(rule="broker_issues", severity="critical",
+                      summary="broker down", details={"key": "global"})
+
+        with patch("services.shared.alerting.check_source_failures", return_value=[]), \
+             patch("services.shared.alerting.check_schema_failure_spike", return_value=[]), \
+             patch("services.shared.alerting.check_analytical_lag", return_value=[]), \
+             patch("services.shared.alerting.check_broker_issues") as mock_broker:
+
+            # Fire
+            mock_broker.return_value = [alert]
+            await evaluate_alerts(mock_pool, config, state)
+            assert state.is_firing("broker_issues", "global")
+
+            # Resolve
+            mock_broker.return_value = []
+            await evaluate_alerts(mock_pool, config, state)
+            assert not state.is_firing("broker_issues", "global")
+
+    @pytest.mark.asyncio
+    async def test_rule_exception_does_not_crash(self, config, state):
+        mock_pool = AsyncMock()
+
+        with patch("services.shared.alerting.check_source_failures",
+                    side_effect=Exception("db down")), \
+             patch("services.shared.alerting.check_schema_failure_spike", return_value=[]), \
+             patch("services.shared.alerting.check_analytical_lag", return_value=[]), \
+             patch("services.shared.alerting.check_broker_issues", return_value=[]):
+
+            # Should not raise
+            fired = await evaluate_alerts(mock_pool, config, state)
+            assert fired == []
@@ -0,0 +1,160 @@
+"""Tests for the execution audit trail module.
+
+Validates audit event construction, event type constants, and the
+convenience helpers that record each stage of the execution pipeline.
+"""
+from services.shared.audit import (
+    AUDIT_ORDER_CANCELLED,
+    AUDIT_ORDER_DUPLICATE,
+    AUDIT_ORDER_FILLED,
+    AUDIT_ORDER_REJECTED,
+    AUDIT_ORDER_SUBMITTED,
+    AUDIT_POSITION_CLOSED,
+    AUDIT_POSITION_OPENED,
+    AUDIT_POSITION_UPDATED,
+    AUDIT_RECOMMENDATION_GENERATED,
+    AUDIT_RECOMMENDATION_SUPPRESSED,
+    AUDIT_RISK_EVALUATED,
+    AUDIT_RISK_REJECTED,
+    AUDIT_TRADING_MODE_CHANGED,
+)
+
+
+# ---------------------------------------------------------------------------
+# Event type constants
+# ---------------------------------------------------------------------------
+
+
+class TestAuditEventTypes:
+    """Verify event type constants are well-formed and distinct."""
+
+    def test_recommendation_events(self):
+        assert AUDIT_RECOMMENDATION_GENERATED == "recommendation.generated"
+        assert AUDIT_RECOMMENDATION_SUPPRESSED == "recommendation.suppressed"
+
+    def test_risk_events(self):
+        assert AUDIT_RISK_EVALUATED == "risk.evaluated"
+        assert AUDIT_RISK_REJECTED == "risk.rejected"
+
+    def test_order_events(self):
+        assert AUDIT_ORDER_SUBMITTED == "order.submitted"
+        assert AUDIT_ORDER_FILLED == "order.filled"
+        assert AUDIT_ORDER_REJECTED == "order.rejected"
+        assert AUDIT_ORDER_CANCELLED == "order.cancelled"
+        assert AUDIT_ORDER_DUPLICATE == "order.duplicate_prevented"
+
+    def test_position_events(self):
+        assert AUDIT_POSITION_OPENED == "position.opened"
+        assert AUDIT_POSITION_CLOSED == "position.closed"
+        assert AUDIT_POSITION_UPDATED == "position.updated"
+
+    def test_trading_mode_event(self):
+        assert AUDIT_TRADING_MODE_CHANGED == "trading.mode_changed"
+
+    def test_all_event_types_unique(self):
+        all_types = [
+            AUDIT_RECOMMENDATION_GENERATED,
+            AUDIT_RECOMMENDATION_SUPPRESSED,
+            AUDIT_RISK_EVALUATED,
+            AUDIT_RISK_REJECTED,
+            AUDIT_ORDER_SUBMITTED,
+            AUDIT_ORDER_FILLED,
+            AUDIT_ORDER_REJECTED,
+            AUDIT_ORDER_CANCELLED,
+            AUDIT_ORDER_DUPLICATE,
+            AUDIT_POSITION_OPENED,
+            AUDIT_POSITION_CLOSED,
+            AUDIT_POSITION_UPDATED,
+            AUDIT_TRADING_MODE_CHANGED,
+        ]
+        assert len(all_types) == len(set(all_types))
+
+    def test_event_types_follow_dot_notation(self):
+        """All event types should follow entity.action pattern."""
+        all_types = [
+            AUDIT_RECOMMENDATION_GENERATED,
+            AUDIT_RECOMMENDATION_SUPPRESSED,
+            AUDIT_RISK_EVALUATED,
+            AUDIT_RISK_REJECTED,
+            AUDIT_ORDER_SUBMITTED,
+            AUDIT_ORDER_FILLED,
+            AUDIT_ORDER_REJECTED,
+            AUDIT_ORDER_CANCELLED,
+            AUDIT_ORDER_DUPLICATE,
+            AUDIT_POSITION_OPENED,
+            AUDIT_POSITION_CLOSED,
+            AUDIT_POSITION_UPDATED,
+            AUDIT_TRADING_MODE_CHANGED,
+        ]
+        for t in all_types:
+            assert "." in t, f"Event type {t} should use dot notation"
+            parts = t.split(".")
+            assert len(parts) == 2, f"Event type {t} should have exactly one dot"
+            assert all(p for p in parts), f"Event type {t} has empty parts"
+
+
+# ---------------------------------------------------------------------------
+# Module imports and structure
+# ---------------------------------------------------------------------------
+
+
+class TestAuditModuleStructure:
+    """Verify the audit module exports the expected functions."""
+
+    def test_record_audit_event_exists(self):
+        from services.shared.audit import record_audit_event
+        assert callable(record_audit_event)
+
+    def test_convenience_helpers_exist(self):
+        from services.shared.audit import (
+            audit_recommendation_generated,
+            audit_risk_evaluated,
+            audit_order_submitted,
+            audit_order_filled,
+            audit_order_rejected,
+            audit_order_cancelled,
+            audit_duplicate_prevented,
+            audit_position_change,
+            audit_trading_mode_changed,
+        )
+        for fn in [
+            audit_recommendation_generated,
+            audit_risk_evaluated,
+            audit_order_submitted,
+            audit_order_filled,
+            audit_order_rejected,
+            audit_order_cancelled,
+            audit_duplicate_prevented,
+            audit_position_change,
+            audit_trading_mode_changed,
+        ]:
+            assert callable(fn)
+
+    def test_query_helpers_exist(self):
+        from services.shared.audit import (
+            get_order_audit_trail,
+            get_entity_audit_trail,
+        )
+        assert callable(get_order_audit_trail)
+        assert callable(get_entity_audit_trail)
+
+
+# ---------------------------------------------------------------------------
+# Broker service audit integration
+# ---------------------------------------------------------------------------
+
+
+class TestBrokerServiceAuditImports:
+    """Verify the broker service uses audit functions from the audit module."""
+
+    def test_broker_service_has_audit_calls(self):
+        """The broker service module should reference audit functions."""
+        import inspect
+        import services.adapters.broker_service as bs
+
+        source = inspect.getsource(bs)
+        assert "audit_order_submitted" in source
+        assert "audit_order_filled" in source
+        assert "audit_order_rejected" in source
+        assert "audit_risk_evaluated" in source
+        assert "audit_duplicate_prevented" in source
@@ -0,0 +1,417 @@
+"""Tests for the broker API adapter interface and Alpaca implementation.
+
+Validates data structures, request building, response parsing, and fail-closed behavior.
+"""
+from services.adapters.broker_adapter import (
+    AccountInfo,
+    AlpacaBrokerAdapter,
+    BrokerDataAdapter,
+    OrderEventType,
+    OrderRequest,
+    OrderResponse,
+    OrderSide,
+    OrderStatus,
+    OrderType,
+    PositionInfo,
+    TradingMode,
+)
+
+
+# --- Fake Alpaca responses ---
+
+ALPACA_ORDER_RESPONSE = {
+    "id": "order-abc-123",
+    "client_order_id": "client-001",
+    "status": "accepted",
+    "symbol": "AAPL",
+    "side": "buy",
+    "qty": "10",
+    "filled_qty": "0",
+    "filled_avg_price": None,
+    "type": "market",
+    "time_in_force": "day",
+    "created_at": "2026-04-11T14:00:00Z",
+}
+
+ALPACA_FILLED_ORDER = {
+    "id": "order-def-456",
+    "status": "filled",
+    "symbol": "AAPL",
+    "side": "buy",
+    "qty": "10",
+    "filled_qty": "10",
+    "filled_avg_price": "172.50",
+    "type": "market",
+    "time_in_force": "day",
+}
+
+ALPACA_REJECTED_ORDER = {
+    "id": "order-ghi-789",
+    "status": "rejected",
+    "symbol": "AAPL",
+    "side": "sell",
+    "qty": "100",
+    "filled_qty": "0",
+    "filled_avg_price": None,
+}
+
+ALPACA_POSITION = {
+    "symbol": "AAPL",
+    "qty": "10",
+    "avg_entry_price": "172.50",
+    "current_price": "175.00",
+    "unrealized_pl": "25.00",
+    "market_value": "1750.00",
+    "side": "long",
+}
+
+ALPACA_ACCOUNT = {
+    "id": "acct-001",
+    "buying_power": "50000.00",
+    "cash": "25000.00",
+    "portfolio_value": "75000.00",
+    "currency": "USD",
+}
+
+
+# --- Enum tests ---
+
+
+class TestBrokerEnums:
+    def test_order_side_values(self):
+        assert OrderSide.BUY.value == "buy"
+        assert OrderSide.SELL.value == "sell"
+
+    def test_order_type_values(self):
+        assert OrderType.MARKET.value == "market"
+        assert OrderType.LIMIT.value == "limit"
+        assert OrderType.STOP.value == "stop"
+        assert OrderType.STOP_LIMIT.value == "stop_limit"
+
+    def test_order_status_values(self):
+        assert OrderStatus.PENDING.value == "pending"
+        assert OrderStatus.FILLED.value == "filled"
+        assert OrderStatus.REJECTED.value == "rejected"
+
+    def test_trading_mode_values(self):
+        assert TradingMode.PAPER.value == "paper"
+        assert TradingMode.LIVE.value == "live"
+
+    def test_order_event_type_values(self):
+        assert OrderEventType.SUBMITTED.value == "submitted"
+        assert OrderEventType.FILL.value == "fill"
+        assert OrderEventType.CANCELLED.value == "cancelled"
+
+
+# --- OrderRequest tests ---
+
+
+class TestOrderRequest:
+    def test_basic_market_order(self):
+        req = OrderRequest(
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=10,
+        )
+        assert req.ticker == "AAPL"
+        assert req.side == OrderSide.BUY
+        assert req.quantity == 10
+        assert req.order_type == OrderType.MARKET
+        assert req.time_in_force == "day"
+        assert req.idempotency_key  # auto-generated
+
+    def test_limit_order(self):
+        req = OrderRequest(
+            ticker="MSFT",
+            side=OrderSide.SELL,
+            quantity=5,
+            order_type=OrderType.LIMIT,
+            limit_price=400.0,
+        )
+        assert req.order_type == OrderType.LIMIT
+        assert req.limit_price == 400.0
+
+    def test_custom_idempotency_key(self):
+        req = OrderRequest(
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=1,
+            idempotency_key="my-key-123",
+        )
+        assert req.idempotency_key == "my-key-123"
+
+    def test_to_dict(self):
+        req = OrderRequest(
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=10,
+            order_type=OrderType.LIMIT,
+            limit_price=170.0,
+            idempotency_key="key-1",
+        )
+        d = req.to_dict()
+        assert d["ticker"] == "AAPL"
+        assert d["side"] == "buy"
+        assert d["quantity"] == 10
+        assert d["order_type"] == "limit"
+        assert d["limit_price"] == 170.0
+        assert d["idempotency_key"] == "key-1"
+
+    def test_to_dict_omits_none_prices(self):
+        req = OrderRequest(ticker="AAPL", side=OrderSide.BUY, quantity=1)
+        d = req.to_dict()
+        assert "limit_price" not in d
+        assert "stop_price" not in d
+
+
+# --- OrderResponse tests ---
+
+
+class TestOrderResponse:
+    def test_ok_when_accepted(self):
+        resp = OrderResponse(
+            broker_order_id="abc",
+            status=OrderStatus.ACCEPTED,
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=10,
+        )
+        assert resp.ok is True
+
+    def test_not_ok_when_rejected(self):
+        resp = OrderResponse(
+            broker_order_id="abc",
+            status=OrderStatus.REJECTED,
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=10,
+            error="insufficient funds",
+        )
+        assert resp.ok is False
+
+    def test_not_ok_when_error(self):
+        resp = OrderResponse(
+            broker_order_id="abc",
+            status=OrderStatus.SUBMITTED,
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=10,
+            error="network failure",
+        )
+        assert resp.ok is False
+
+    def test_to_dict(self):
+        resp = OrderResponse(
+            broker_order_id="order-1",
+            status=OrderStatus.FILLED,
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=10,
+            filled_quantity=10,
+            filled_avg_price=172.5,
+        )
+        d = resp.to_dict()
+        assert d["broker_order_id"] == "order-1"
+        assert d["status"] == "filled"
+        assert d["filled_avg_price"] == 172.5
+
+
+# --- PositionInfo tests ---
+
+
+class TestPositionInfo:
+    def test_basic_position(self):
+        pos = PositionInfo(
+            ticker="AAPL",
+            quantity=10,
+            avg_entry_price=172.5,
+            current_price=175.0,
+            unrealized_pnl=25.0,
+            market_value=1750.0,
+        )
+        assert pos.ticker == "AAPL"
+        assert pos.side == "long"
+
+    def test_to_dict(self):
+        pos = PositionInfo(
+            ticker="AAPL",
+            quantity=10,
+            avg_entry_price=172.5,
+            current_price=175.0,
+            unrealized_pnl=25.0,
+            market_value=1750.0,
+            side="short",
+        )
+        d = pos.to_dict()
+        assert d["side"] == "short"
+        assert d["unrealized_pnl"] == 25.0
+
+
+# --- AccountInfo tests ---
+
+
+class TestAccountInfo:
+    def test_basic_account(self):
+        acct = AccountInfo(
+            account_id="acct-1",
+            buying_power=50000,
+            cash=25000,
+            portfolio_value=75000,
+        )
+        assert acct.mode == TradingMode.PAPER
+        assert acct.currency == "USD"
+
+    def test_to_dict(self):
+        acct = AccountInfo(
+            account_id="acct-1",
+            buying_power=50000,
+            cash=25000,
+            portfolio_value=75000,
+            mode=TradingMode.LIVE,
+        )
+        d = acct.to_dict()
+        assert d["mode"] == "live"
+        assert d["portfolio_value"] == 75000
+
+
+# --- AlpacaBrokerAdapter tests ---
+
+
+class TestAlpacaSourceType:
+    def test_source_type(self):
+        adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s")
+        assert adapter.source_type() == "broker"
+
+    def test_inherits_broker_data_adapter(self):
+        assert issubclass(AlpacaBrokerAdapter, BrokerDataAdapter)
+
+    def test_bucket_name(self):
+        adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s")
+        assert adapter.bucket_name() == "stonks-raw-broker"
+
+    def test_default_mode_is_paper(self):
+        adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s")
+        assert adapter.mode == TradingMode.PAPER
+
+    def test_paper_base_url(self):
+        adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s", mode=TradingMode.PAPER)
+        assert "paper" in adapter.base_url
+
+    def test_live_base_url(self):
+        adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s", mode=TradingMode.LIVE)
+        assert "paper" not in adapter.base_url
+
+    def test_custom_base_url(self):
+        adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s", base_url="http://localhost:8080/")
+        assert adapter.base_url == "http://localhost:8080"
+
+
+class TestAlpacaHeaders:
+    def test_headers_contain_api_keys(self):
+        adapter = AlpacaBrokerAdapter(api_key="my-key", api_secret="my-secret")
+        headers = adapter._headers()
+        assert headers["APCA-API-KEY-ID"] == "my-key"
+        assert headers["APCA-API-SECRET-KEY"] == "my-secret"
+        assert headers["Content-Type"] == "application/json"
+
+
+class TestAlpacaBuildFetchUrl:
+    def setup_method(self):
+        self.adapter = AlpacaBrokerAdapter(
+            api_key="k", api_secret="s", base_url="https://paper-api.alpaca.markets"
+        )
+
+    def test_positions_url(self):
+        url = self.adapter._build_fetch_url("AAPL", "positions")
+        assert url == "https://paper-api.alpaca.markets/v2/positions/AAPL"
+
+    def test_orders_url(self):
+        url = self.adapter._build_fetch_url("AAPL", "orders")
+        assert "v2/orders" in url
+        assert "symbols=AAPL" in url
+
+    def test_account_url(self):
+        url = self.adapter._build_fetch_url("AAPL", "account")
+        assert url == "https://paper-api.alpaca.markets/v2/account"
+
+    def test_default_is_positions(self):
+        url = self.adapter._build_fetch_url("AAPL", "unknown")
+        assert "/v2/positions/AAPL" in url
+
+
+class TestAlpacaParseOrderResponse:
+    def setup_method(self):
+        self.adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s")
+
+    def test_parse_accepted_order(self):
+        resp = self.adapter._parse_order_response(ALPACA_ORDER_RESPONSE)
+        assert resp.broker_order_id == "order-abc-123"
+        assert resp.status == OrderStatus.ACCEPTED
+        assert resp.ticker == "AAPL"
+        assert resp.side == OrderSide.BUY
+        assert resp.quantity == 10
+        assert resp.filled_quantity == 0
+        assert resp.filled_avg_price is None
+
+    def test_parse_filled_order(self):
+        resp = self.adapter._parse_order_response(ALPACA_FILLED_ORDER)
+        assert resp.status == OrderStatus.FILLED
+        assert resp.filled_quantity == 10
+        assert resp.filled_avg_price == 172.5
+
+    def test_parse_rejected_order(self):
+        resp = self.adapter._parse_order_response(ALPACA_REJECTED_ORDER)
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.ok is False
+
+    def test_parse_unknown_status_defaults_to_pending(self):
+        data = {**ALPACA_ORDER_RESPONSE, "status": "some_new_status"}
+        resp = self.adapter._parse_order_response(data)
+        assert resp.status == OrderStatus.PENDING
+
+    def test_parse_sell_side(self):
+        data = {**ALPACA_ORDER_RESPONSE, "side": "sell"}
+        resp = self.adapter._parse_order_response(data)
+        assert resp.side == OrderSide.SELL
+
+
+class TestAlpacaParsePosition:
+    def setup_method(self):
+        self.adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s")
+
+    def test_parse_position(self):
+        pos = self.adapter._parse_position(ALPACA_POSITION)
+        assert pos.ticker == "AAPL"
+        assert pos.quantity == 10
+        assert pos.avg_entry_price == 172.5
+        assert pos.current_price == 175.0
+        assert pos.unrealized_pnl == 25.0
+        assert pos.market_value == 1750.0
+        assert pos.side == "long"
+
+    def test_parse_position_missing_fields(self):
+        pos = self.adapter._parse_position({"symbol": "TSLA"})
+        assert pos.ticker == "TSLA"
+        assert pos.quantity == 0
+        assert pos.avg_entry_price == 0
+
+
+class TestAlpacaErrorResult:
+    def test_error_result_fields(self):
+        adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s")
+        result = adapter._error_result("AAPL", "rate limited", 150.0, http_status=429, raw=b"slow down")
+        assert not result.ok
+        assert result.error == "rate limited"
+        assert result.http_status == 429
+        assert result.response_time_ms == 150.0
+        assert result.raw_payload == b"slow down"
+        assert result.metadata["provider"] == "alpaca"
+        assert result.metadata["mode"] == "paper"
+        assert result.source_type == "broker"
+
+    def test_error_result_defaults(self):
+        adapter = AlpacaBrokerAdapter(api_key="k", api_secret="s")
+        result = adapter._error_result("MSFT", "timeout", 200.0)
+        assert result.http_status is None
+        assert result.raw_payload == b""
+        assert result.ticker == "MSFT"
@@ -0,0 +1,261 @@
+"""Tests for the broker service - sandbox integration wiring.
+
+Validates job parsing, risk evaluation integration, order building,
+and the overall process_order_job flow using a mock Alpaca adapter.
+"""
+import pytest
+
+from services.adapters.broker_adapter import (
+    AlpacaBrokerAdapter,
+    OrderRequest,
+    OrderResponse,
+    OrderSide,
+    OrderStatus,
+    OrderType,
+    TradingMode,
+)
+from services.adapters.broker_service import (
+    build_order_request,
+    build_proposed_order,
+    generate_idempotency_key,
+)
+from services.risk.engine import (
+    AccountRiskState,
+    PortfolioRiskConfig,
+    ProposedOrder,
+    TradingMode as RiskTradingMode,
+    evaluate_order,
+)
+from services.shared.redis_keys import QUEUE_BROKER
+
+
+# ---------------------------------------------------------------------------
+# build_order_request tests
+# ---------------------------------------------------------------------------
+
+
+class TestBuildOrderRequest:
+    def test_basic_buy_market(self):
+        job = {
+            "ticker": "AAPL",
+            "side": "buy",
+            "quantity": 10,
+            "order_type": "market",
+            "idempotency_key": "key-1",
+        }
+        req = build_order_request(job)
+        assert req.ticker == "AAPL"
+        assert req.side == OrderSide.BUY
+        assert req.quantity == 10
+        assert req.order_type == OrderType.MARKET
+        assert req.idempotency_key == "key-1"
+
+    def test_sell_limit_order(self):
+        job = {
+            "ticker": "MSFT",
+            "side": "sell",
+            "quantity": 5,
+            "order_type": "limit",
+            "limit_price": 400.0,
+        }
+        req = build_order_request(job)
+        assert req.side == OrderSide.SELL
+        assert req.order_type == OrderType.LIMIT
+        assert req.limit_price == 400.0
+
+    def test_stop_order(self):
+        job = {
+            "ticker": "TSLA",
+            "side": "sell",
+            "quantity": 3,
+            "order_type": "stop",
+            "stop_price": 200.0,
+        }
+        req = build_order_request(job)
+        assert req.order_type == OrderType.STOP
+        assert req.stop_price == 200.0
+
+    def test_defaults(self):
+        job = {"ticker": "GOOG"}
+        req = build_order_request(job)
+        assert req.side == OrderSide.BUY
+        assert req.quantity == 0
+        assert req.order_type == OrderType.MARKET
+        assert req.time_in_force == "day"
+        assert req.idempotency_key  # deterministic from job content
+
+    def test_deterministic_key_without_explicit(self):
+        """Without an explicit key, the same job produces the same key."""
+        job = {"ticker": "AAPL", "side": "buy", "quantity": 10}
+        req1 = build_order_request(job)
+        req2 = build_order_request(job)
+        assert req1.idempotency_key == req2.idempotency_key
+
+    def test_custom_time_in_force(self):
+        job = {"ticker": "AAPL", "time_in_force": "gtc"}
+        req = build_order_request(job)
+        assert req.time_in_force == "gtc"
+
+
+# ---------------------------------------------------------------------------
+# build_proposed_order tests
+# ---------------------------------------------------------------------------
+
+
+class TestBuildProposedOrder:
+    def test_basic_proposed_order(self):
+        job = {
+            "ticker": "AAPL",
+            "side": "buy",
+            "quantity": 10,
+            "estimated_value": 1500.0,
+            "confidence": 0.85,
+            "sector": "technology",
+            "recommendation_id": "rec-123",
+        }
+        proposed = build_proposed_order(job)
+        assert proposed.ticker == "AAPL"
+        assert proposed.action == "buy"
+        assert proposed.quantity == 10
+        assert proposed.estimated_value == 1500.0
+        assert proposed.confidence == 0.85
+        assert proposed.sector == "technology"
+        assert proposed.recommendation_id == "rec-123"
+
+    def test_defaults(self):
+        job = {"ticker": "GOOG"}
+        proposed = build_proposed_order(job)
+        assert proposed.action == "buy"
+        assert proposed.quantity == 0
+        assert proposed.estimated_value == 0
+        assert proposed.sector == ""
+        assert proposed.recommendation_id is None
+
+
+# ---------------------------------------------------------------------------
+# Risk evaluation integration with broker service flow
+# ---------------------------------------------------------------------------
+
+
+class TestRiskEvaluationIntegration:
+    """Verify that risk evaluation correctly gates order submission."""
+
+    def test_order_passes_risk_in_paper_mode(self):
+        config = PortfolioRiskConfig(trading_mode=RiskTradingMode.PAPER)
+        state = AccountRiskState(
+            portfolio_value=100_000.0,
+            cash=50_000.0,
+            buying_power=50_000.0,
+        )
+        proposed = ProposedOrder(
+            ticker="AAPL",
+            action="buy",
+            quantity=10,
+            estimated_value=1500.0,
+            sector="technology",
+        )
+        result = evaluate_order(proposed, config, state)
+        assert result.eligible
+        assert result.allowed_mode == RiskTradingMode.PAPER
+
+    def test_order_blocked_when_trading_disabled(self):
+        config = PortfolioRiskConfig(trading_mode=RiskTradingMode.DISABLED)
+        proposed = ProposedOrder(ticker="AAPL", quantity=10, estimated_value=1500.0)
+        result = evaluate_order(proposed, config)
+        assert not result.eligible
+        assert "disabled" in result.rejection_reasons[0].lower()
+
+    def test_order_blocked_by_position_size(self):
+        config = PortfolioRiskConfig(trading_mode=RiskTradingMode.PAPER)
+        config.position_limits.max_position_value = 1000.0
+        state = AccountRiskState(portfolio_value=100_000.0)
+        proposed = ProposedOrder(
+            ticker="AAPL",
+            quantity=100,
+            estimated_value=15_000.0,
+        )
+        result = evaluate_order(proposed, config, state)
+        assert not result.eligible
+
+
+# ---------------------------------------------------------------------------
+# Alpaca adapter sandbox mode verification
+# ---------------------------------------------------------------------------
+
+
+class TestAlpacaSandboxMode:
+    def test_paper_mode_uses_sandbox_url(self):
+        adapter = AlpacaBrokerAdapter(
+            api_key="test-key",
+            api_secret="test-secret",
+            mode=TradingMode.PAPER,
+        )
+        assert adapter.mode == TradingMode.PAPER
+        assert "paper" in adapter.base_url
+
+    def test_custom_sandbox_url(self):
+        adapter = AlpacaBrokerAdapter(
+            api_key="test-key",
+            api_secret="test-secret",
+            mode=TradingMode.PAPER,
+            base_url="https://paper-api.alpaca.markets",
+        )
+        assert adapter.base_url == "https://paper-api.alpaca.markets"
+
+    def test_headers_set_correctly(self):
+        adapter = AlpacaBrokerAdapter(
+            api_key="pk-test",
+            api_secret="sk-test",
+        )
+        headers = adapter._headers()
+        assert headers["APCA-API-KEY-ID"] == "pk-test"
+        assert headers["APCA-API-SECRET-KEY"] == "sk-test"
+
+
+# ---------------------------------------------------------------------------
+# Queue name constant
+# ---------------------------------------------------------------------------
+
+
+class TestQueueConstant:
+    def test_broker_queue_name(self):
+        assert QUEUE_BROKER == "broker_orders"
+
+
+# ---------------------------------------------------------------------------
+# Idempotency key generation tests
+# ---------------------------------------------------------------------------
+
+
+class TestGenerateIdempotencyKey:
+    def test_explicit_key_passthrough(self):
+        job = {"ticker": "AAPL", "idempotency_key": "my-explicit-key"}
+        assert generate_idempotency_key(job) == "my-explicit-key"
+
+    def test_deterministic_without_explicit_key(self):
+        job = {"ticker": "AAPL", "side": "buy", "quantity": 10, "order_type": "market"}
+        key1 = generate_idempotency_key(job)
+        key2 = generate_idempotency_key(job)
+        assert key1 == key2
+        assert len(key1) == 40  # sha256 truncated to 40 chars
+
+    def test_different_jobs_produce_different_keys(self):
+        job_a = {"ticker": "AAPL", "side": "buy", "quantity": 10}
+        job_b = {"ticker": "AAPL", "side": "sell", "quantity": 10}
+        assert generate_idempotency_key(job_a) != generate_idempotency_key(job_b)
+
+    def test_quantity_difference_changes_key(self):
+        job_a = {"ticker": "AAPL", "side": "buy", "quantity": 10}
+        job_b = {"ticker": "AAPL", "side": "buy", "quantity": 20}
+        assert generate_idempotency_key(job_a) != generate_idempotency_key(job_b)
+
+    def test_recommendation_id_included(self):
+        job_a = {"ticker": "AAPL", "recommendation_id": "rec-1"}
+        job_b = {"ticker": "AAPL", "recommendation_id": "rec-2"}
+        assert generate_idempotency_key(job_a) != generate_idempotency_key(job_b)
+
+    def test_minimal_job_still_produces_key(self):
+        job = {"ticker": "AAPL"}
+        key = generate_idempotency_key(job)
+        assert key
+        assert len(key) == 40
@@ -1,5 +1,5 @@
 """Basic tests for shared config loader."""
-from services.shared.config import load_config, AppConfig
+from services.shared.config import load_config, AppConfig, AlertingConfig


 def test_load_config_returns_app_config():
@@ -20,3 +20,13 @@ def test_redis_url_format():
 def test_default_broker_mode():
    config = load_config()
    assert config.broker.mode == "paper"
+
+
+def test_alerting_config_defaults():
+    config = load_config()
+    assert isinstance(config.alerting, AlertingConfig)
+    assert config.alerting.source_failure_threshold == 3
+    assert config.alerting.schema_failure_rate_threshold == 0.3
+    assert config.alerting.lake_lag_threshold_minutes == 60
+    assert config.alerting.broker_error_threshold == 3
+    assert config.alerting.check_interval_seconds == 120
@@ -0,0 +1,84 @@
+"""Tests for shared canonical URL normalization and content hashing.
+
+Validates normalize_url, content_hash, and content_hash_str from
+services.shared.content.
+
+Requirements: 3.2, 3.3
+"""
+import hashlib
+
+from services.shared.content import content_hash, content_hash_str, normalize_url
+
+
+class TestNormalizeUrl:
+    def test_lowercases_scheme_and_host(self):
+        assert normalize_url("HTTPS://Example.COM/path") == "https://example.com/path"
+
+    def test_strips_trailing_slash(self):
+        assert normalize_url("https://example.com/path/") == "https://example.com/path"
+
+    def test_strips_fragment(self):
+        result = normalize_url("https://example.com/path#section")
+        assert "#" not in result
+        assert result == "https://example.com/path"
+
+    def test_preserves_query(self):
+        assert normalize_url("https://example.com/path?q=test") == "https://example.com/path?q=test"
+
+    def test_sorts_query_params(self):
+        result = normalize_url("https://example.com/path?z=1&a=2")
+        assert result == "https://example.com/path?a=2&z=1"
+
+    def test_preserves_non_standard_port(self):
+        result = normalize_url("https://example.com:8443/path")
+        assert ":8443" in result
+
+    def test_strips_default_port_443(self):
+        result = normalize_url("https://example.com:443/path")
+        assert ":443" not in result
+
+    def test_strips_default_port_80(self):
+        result = normalize_url("http://example.com:80/path")
+        assert ":80" not in result
+
+    def test_root_path(self):
+        assert normalize_url("https://example.com") == "https://example.com/"
+
+    def test_defaults_scheme_to_https(self):
+        result = normalize_url("//example.com/path")
+        assert result.startswith("https://")
+
+    def test_deterministic_for_same_input(self):
+        url = "https://example.com/article?b=2&a=1#frag"
+        assert normalize_url(url) == normalize_url(url)
+
+
+class TestContentHash:
+    def test_returns_sha256_hex(self):
+        data = b"hello world"
+        expected = hashlib.sha256(data).hexdigest()
+        assert content_hash(data) == expected
+
+    def test_deterministic(self):
+        data = b"test content"
+        assert content_hash(data) == content_hash(data)
+
+    def test_different_content_different_hash(self):
+        assert content_hash(b"aaa") != content_hash(b"bbb")
+
+    def test_empty_bytes(self):
+        result = content_hash(b"")
+        assert len(result) == 64  # SHA-256 hex length
+
+
+class TestContentHashStr:
+    def test_matches_manual_sha256(self):
+        text = "hello world"
+        expected = hashlib.sha256(text.encode("utf-8")).hexdigest()
+        assert content_hash_str(text) == expected
+
+    def test_deterministic(self):
+        assert content_hash_str("test") == content_hash_str("test")
+
+    def test_different_text_different_hash(self):
+        assert content_hash_str("aaa") != content_hash_str("bbb")
@@ -0,0 +1,165 @@
+"""Tests for contradiction detection and disagreement representation.
+
+Requirements: 6.4, 6.5
+"""
+from datetime import datetime, timezone
+
+from services.aggregation.contradiction import (
+    CatalystEntry,
+    ContradictionResult,
+    detect_contradictions,
+)
+from services.aggregation.scoring import WeightedSignal, compute_signal_weight
+
+NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+
+def _sw(doc_id: str, sentiment: float, impact: float = 0.5) -> WeightedSignal:
+    """Helper to build a WeightedSignal with default scoring."""
+    w = compute_signal_weight(NOW, NOW, "7d", 0.8, extraction_confidence=0.8)
+    return WeightedSignal(doc_id, w, sentiment_value=sentiment, impact_score=impact)
+
+
+# ---------------------------------------------------------------------------
+# Overall score (backward compat with compute_contradiction_score)
+# ---------------------------------------------------------------------------
+
+
+def test_no_signals_returns_zero():
+    result = detect_contradictions([])
+    assert result.score == 0.0
+    assert result.details == []
+
+
+def test_all_positive_no_contradiction():
+    signals = [_sw("d1", 1.0), _sw("d2", 1.0)]
+    result = detect_contradictions(signals)
+    assert result.score == 0.0
+    assert len(result.details) == 0
+
+
+def test_equal_opposing_gives_half():
+    signals = [_sw("d1", 1.0, 0.5), _sw("d2", -1.0, 0.5)]
+    result = detect_contradictions(signals)
+    assert abs(result.score - 0.5) < 1e-4
+
+
+def test_neutral_signals_ignored():
+    signals = [_sw("d1", 0.0), _sw("d2", 0.0)]
+    result = detect_contradictions(signals)
+    assert result.score == 0.0
+    assert result.details == []
+
+
+# ---------------------------------------------------------------------------
+# Sentiment disagreement detail
+# ---------------------------------------------------------------------------
+
+
+def test_sentiment_disagreement_detail_present():
+    signals = [_sw("d1", 1.0, 0.6), _sw("d2", -1.0, 0.4)]
+    result = detect_contradictions(signals)
+    sentiments = [d for d in result.details if d.dimension == "sentiment"]
+    assert len(sentiments) == 1
+    detail = sentiments[0]
+    assert detail.positive_doc_ids == ["d1"]
+    assert detail.negative_doc_ids == ["d2"]
+    assert detail.positive_weight > 0
+    assert detail.negative_weight > 0
+    assert "positive" in detail.description.lower() or "sentiment" in detail.description.lower()
+
+
+def test_no_sentiment_detail_when_all_agree():
+    signals = [_sw("d1", 1.0), _sw("d2", 1.0)]
+    result = detect_contradictions(signals)
+    sentiments = [d for d in result.details if d.dimension == "sentiment"]
+    assert len(sentiments) == 0
+
+
+# ---------------------------------------------------------------------------
+# Catalyst disagreement detail
+# ---------------------------------------------------------------------------
+
+
+def test_catalyst_disagreement_detected():
+    signals = [_sw("d1", 1.0, 0.7), _sw("d2", -1.0, 0.5)]
+    entries = [
+        CatalystEntry("d1", "earnings"),
+        CatalystEntry("d2", "earnings"),
+    ]
+    result = detect_contradictions(signals, entries)
+    catalyst_details = [d for d in result.details if d.dimension.startswith("catalyst:")]
+    assert len(catalyst_details) == 1
+    assert catalyst_details[0].dimension == "catalyst:earnings"
+    assert catalyst_details[0].positive_doc_ids == ["d1"]
+    assert catalyst_details[0].negative_doc_ids == ["d2"]
+
+
+def test_no_catalyst_disagreement_when_same_sentiment():
+    signals = [_sw("d1", 1.0), _sw("d2", 1.0)]
+    entries = [
+        CatalystEntry("d1", "earnings"),
+        CatalystEntry("d2", "earnings"),
+    ]
+    result = detect_contradictions(signals, entries)
+    catalyst_details = [d for d in result.details if d.dimension.startswith("catalyst:")]
+    assert len(catalyst_details) == 0
+
+
+def test_catalyst_disagreement_across_types():
+    """Different catalyst types with internal disagreement each get a detail."""
+    signals = [
+        _sw("d1", 1.0, 0.5),
+        _sw("d2", -1.0, 0.5),
+        _sw("d3", 1.0, 0.5),
+        _sw("d4", -1.0, 0.5),
+    ]
+    entries = [
+        CatalystEntry("d1", "earnings"),
+        CatalystEntry("d2", "earnings"),
+        CatalystEntry("d3", "product"),
+        CatalystEntry("d4", "product"),
+    ]
+    result = detect_contradictions(signals, entries)
+    catalyst_details = [d for d in result.details if d.dimension.startswith("catalyst:")]
+    dims = {d.dimension for d in catalyst_details}
+    assert "catalyst:earnings" in dims
+    assert "catalyst:product" in dims
+
+
+# ---------------------------------------------------------------------------
+# Integration with assemble_trend_summary
+# ---------------------------------------------------------------------------
+
+
+def test_trend_summary_includes_disagreement_details():
+    """assemble_trend_summary should populate disagreement_details."""
+    from datetime import timedelta
+
+    from services.aggregation.worker import (
+        ImpactRow,
+        assemble_trend_summary,
+        build_weighted_signals,
+    )
+
+    impacts = [
+        ImpactRow(
+            document_id="d1", confidence=0.8, novelty_score=0.5,
+            source_credibility=0.8, sentiment="positive", impact_score=0.7,
+            catalyst_type="earnings", key_facts=[], risks=[],
+            published_at=NOW - timedelta(hours=1),
+        ),
+        ImpactRow(
+            document_id="d2", confidence=0.8, novelty_score=0.5,
+            source_credibility=0.8, sentiment="negative", impact_score=0.7,
+            catalyst_type="earnings", key_facts=[], risks=[],
+            published_at=NOW - timedelta(hours=2),
+        ),
+    ]
+    signals = build_weighted_signals(impacts, NOW, "7d")
+    summary = assemble_trend_summary("AAPL", "7d", signals, impacts, reference_time=NOW)
+
+    assert summary.contradiction_score > 0
+    assert len(summary.disagreement_details) > 0
+    dims = {d.dimension for d in summary.disagreement_details}
+    assert "sentiment" in dims
@@ -0,0 +1,208 @@
+"""Tests for dead-letter queue support and replay tooling."""
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from services.shared.dead_letter import (
+    DEFAULT_MAX_ATTEMPTS,
+    dlq_length,
+    dlq_summary,
+    peek_dlq,
+    purge_dlq,
+    replay_all,
+    replay_one,
+    send_to_dlq,
+    wrap_dlq_entry,
+)
+from services.shared.redis_keys import dlq_key, queue_key
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+class FakeRedis:
+    """Minimal async Redis fake backed by plain dicts."""
+
+    def __init__(self):
+        self._data: dict[str, list[str]] = {}
+
+    async def rpush(self, key: str, value: str) -> int:
+        self._data.setdefault(key, []).append(value)
+        return len(self._data[key])
+
+    async def lpop(self, key: str) -> str | None:
+        lst = self._data.get(key, [])
+        if not lst:
+            return None
+        return lst.pop(0)
+
+    async def llen(self, key: str) -> int:
+        return len(self._data.get(key, []))
+
+    async def lrange(self, key: str, start: int, end: int) -> list[str]:
+        lst = self._data.get(key, [])
+        return lst[start:end + 1]
+
+    async def delete(self, key: str) -> int:
+        if key in self._data:
+            del self._data[key]
+            return 1
+        return 0
+
+
+@pytest.fixture
+def rds():
+    return FakeRedis()
+
+
+SAMPLE_JOB = {"ticker": "AAPL", "source_type": "news_api", "source_id": "src-1"}
+
+
+# ---------------------------------------------------------------------------
+# wrap_dlq_entry
+# ---------------------------------------------------------------------------
+
+def test_wrap_dlq_entry_structure():
+    entry = wrap_dlq_entry(SAMPLE_JOB, "ingestion", "timeout", attempt=2, worker="ingestion_worker")
+    assert entry["original_payload"] == SAMPLE_JOB
+    assert entry["queue"] == "ingestion"
+    assert entry["error"] == "timeout"
+    assert entry["attempt"] == 2
+    assert entry["worker"] == "ingestion_worker"
+    assert "dead_lettered_at" in entry
+
+
+# ---------------------------------------------------------------------------
+# send_to_dlq / dlq_length
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_send_to_dlq_and_length(rds):
+    await send_to_dlq(rds, "parsing", SAMPLE_JOB, error="parse failure", attempt=3)
+    length = await dlq_length(rds, "parsing")
+    assert length == 1
+
+    # Verify the stored entry
+    raw = rds._data[dlq_key("parsing")][0]
+    entry = json.loads(raw)
+    assert entry["original_payload"] == SAMPLE_JOB
+    assert entry["error"] == "parse failure"
+    assert entry["attempt"] == 3
+
+
+# ---------------------------------------------------------------------------
+# peek_dlq
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_peek_dlq(rds):
+    for i in range(5):
+        await send_to_dlq(rds, "extraction", {"doc": i}, error=f"err-{i}")
+
+    items = await peek_dlq(rds, "extraction", start=0, count=3)
+    assert len(items) == 3
+    assert items[0]["original_payload"]["doc"] == 0
+    assert items[2]["original_payload"]["doc"] == 2
+
+    # DLQ should still have all 5 items (peek doesn't remove)
+    assert await dlq_length(rds, "extraction") == 5
+
+
+# ---------------------------------------------------------------------------
+# replay_one
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_replay_one(rds):
+    await send_to_dlq(rds, "ingestion", SAMPLE_JOB, error="timeout")
+    await send_to_dlq(rds, "ingestion", {"ticker": "MSFT"}, error="timeout")
+
+    entry = await replay_one(rds, "ingestion")
+    assert entry is not None
+    assert entry["original_payload"] == SAMPLE_JOB
+
+    # Original payload should now be in the source queue
+    source_queue = queue_key("ingestion")
+    raw = await rds.lpop(source_queue)
+    assert raw is not None
+    assert json.loads(raw) == SAMPLE_JOB
+
+    # DLQ should have 1 remaining
+    assert await dlq_length(rds, "ingestion") == 1
+
+
+@pytest.mark.asyncio
+async def test_replay_one_empty(rds):
+    result = await replay_one(rds, "ingestion")
+    assert result is None
+
+
+# ---------------------------------------------------------------------------
+# replay_all
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_replay_all(rds):
+    for i in range(4):
+        await send_to_dlq(rds, "aggregation", {"idx": i}, error="fail")
+
+    count = await replay_all(rds, "aggregation")
+    assert count == 4
+
+    # DLQ should be empty
+    assert await dlq_length(rds, "aggregation") == 0
+
+    # Source queue should have 4 items
+    source_queue = queue_key("aggregation")
+    assert await rds.llen(source_queue) == 4
+
+
+@pytest.mark.asyncio
+async def test_replay_all_empty(rds):
+    count = await replay_all(rds, "aggregation")
+    assert count == 0
+
+
+# ---------------------------------------------------------------------------
+# purge_dlq
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_purge_dlq(rds):
+    for i in range(3):
+        await send_to_dlq(rds, "parsing", {"idx": i}, error="fail")
+
+    removed = await purge_dlq(rds, "parsing")
+    assert removed == 3
+    assert await dlq_length(rds, "parsing") == 0
+
+
+@pytest.mark.asyncio
+async def test_purge_dlq_empty(rds):
+    removed = await purge_dlq(rds, "parsing")
+    assert removed == 0
+
+
+# ---------------------------------------------------------------------------
+# dlq_summary
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_dlq_summary(rds):
+    await send_to_dlq(rds, "ingestion", {"a": 1}, error="e")
+    await send_to_dlq(rds, "ingestion", {"b": 2}, error="e")
+    await send_to_dlq(rds, "parsing", {"c": 3}, error="e")
+
+    summary = await dlq_summary(rds, ["ingestion", "parsing", "extraction"])
+    assert summary == {"ingestion": 2, "parsing": 1, "extraction": 0}
+
+
+# ---------------------------------------------------------------------------
+# DEFAULT_MAX_ATTEMPTS constant
+# ---------------------------------------------------------------------------
+
+def test_default_max_attempts():
+    assert DEFAULT_MAX_ATTEMPTS == 3
@@ -0,0 +1,187 @@
+"""Tests for cross-source deduplication logic.
+
+Validates the pure functions and key-building helpers in services.shared.dedupe.
+Async functions that require Redis/PostgreSQL are tested with lightweight fakes.
+
+Requirements: 3.2, 3.3
+"""
+from __future__ import annotations
+
+from unittest.mock import AsyncMock
+
+import pytest
+
+from services.shared.dedupe import (
+    DedupeResult,
+    _hash_dedupe_key,
+    _url_dedupe_key,
+    check_duplicate,
+    dedupe_items,
+    mark_as_seen,
+)
+from services.shared.redis_keys import DEDUPE_PREFIX
+
+
+class TestDedupeKeyBuilders:
+    def test_hash_dedupe_key_format(self):
+        key = _hash_dedupe_key("abc123")
+        assert key == f"{DEDUPE_PREFIX}:abc123"
+
+    def test_url_dedupe_key_is_hashed(self):
+        key = _url_dedupe_key("https://example.com/article")
+        assert key.startswith(f"{DEDUPE_PREFIX}:url:")
+        # Should be deterministic
+        assert key == _url_dedupe_key("https://example.com/article")
+
+    def test_url_dedupe_key_differs_for_different_urls(self):
+        k1 = _url_dedupe_key("https://a.com/1")
+        k2 = _url_dedupe_key("https://b.com/2")
+        assert k1 != k2
+
+
+class TestDedupeResult:
+    def test_not_duplicate(self):
+        r = DedupeResult(is_duplicate=False)
+        assert not r.is_duplicate
+        assert r.existing_document_id is None
+        assert r.match_type is None
+
+    def test_duplicate_with_details(self):
+        r = DedupeResult(
+            is_duplicate=True,
+            existing_document_id="doc-123",
+            match_type="canonical_url",
+        )
+        assert r.is_duplicate
+        assert r.existing_document_id == "doc-123"
+
+
+class FakeRedis:
+    """Minimal async Redis fake for dedupe tests."""
+
+    def __init__(self, data: dict[str, str] | None = None):
+        self._data: dict[str, str] = data or {}
+
+    async def get(self, key: str) -> str | None:
+        return self._data.get(key)
+
+    async def set(self, key: str, value: str, ex: int | None = None) -> None:
+        self._data[key] = value
+
+
+class FakePool:
+    """Minimal async PG pool fake that returns None for all queries."""
+
+    def __init__(self, rows: dict[str, dict | None] | None = None):
+        self._rows = rows or {}
+
+    async def fetchrow(self, query: str, *args) -> dict | None:
+        # Match on the first arg (content_hash or canonical_url)
+        if args:
+            return self._rows.get(str(args[0]))
+        return None
+
+
+@pytest.mark.asyncio
+async def test_check_duplicate_no_match():
+    rds = FakeRedis()
+    pool = FakePool()
+    result = await check_duplicate(
+        pool, rds, content_hash="newhash", url="https://example.com/new"
+    )
+    assert not result.is_duplicate
+
+
+@pytest.mark.asyncio
+async def test_check_duplicate_redis_hash_hit():
+    hash_key = _hash_dedupe_key("existinghash")
+    rds = FakeRedis({hash_key: "doc-abc"})
+    pool = FakePool()
+    result = await check_duplicate(pool, rds, content_hash="existinghash")
+    assert result.is_duplicate
+    assert result.existing_document_id == "doc-abc"
+    assert result.match_type == "content_hash"
+
+
+@pytest.mark.asyncio
+async def test_check_duplicate_redis_url_hit():
+    canonical = "https://example.com/article"
+    url_key = _url_dedupe_key(canonical)
+    rds = FakeRedis({url_key: "doc-xyz"})
+    pool = FakePool()
+    result = await check_duplicate(
+        pool, rds, content_hash="newhash", canonical_url=canonical
+    )
+    assert result.is_duplicate
+    assert result.existing_document_id == "doc-xyz"
+    assert result.match_type == "canonical_url"
+
+
+@pytest.mark.asyncio
+async def test_check_duplicate_pg_hash_fallback():
+    rds = FakeRedis()
+    pool = FakePool({"pghash": {"id": "doc-pg1"}})
+    result = await check_duplicate(pool, rds, content_hash="pghash")
+    assert result.is_duplicate
+    assert result.existing_document_id == "doc-pg1"
+    assert result.match_type == "content_hash"
+    # Should have warmed Redis cache
+    assert rds._data.get(_hash_dedupe_key("pghash")) == "doc-pg1"
+
+
+@pytest.mark.asyncio
+async def test_check_duplicate_pg_url_fallback():
+    canonical = "https://example.com/filing"
+    rds = FakeRedis()
+    pool = FakePool({canonical: {"id": "doc-pg2"}})
+    result = await check_duplicate(
+        pool, rds, content_hash="nomatch", canonical_url=canonical
+    )
+    assert result.is_duplicate
+    assert result.existing_document_id == "doc-pg2"
+    assert result.match_type == "canonical_url"
+
+
+@pytest.mark.asyncio
+async def test_dedupe_items_partitions_correctly():
+    """dedupe_items should split items into new and duplicate groups."""
+    existing_hash = "existinghash"
+    hash_key = _hash_dedupe_key(existing_hash)
+    rds = FakeRedis({hash_key: "doc-old"})
+    pool = FakePool()
+
+    items = [
+        {"title": "New Article", "content_hash": "newhash", "url": "https://a.com/1"},
+        {"title": "Dup Article", "content_hash": existing_hash, "url": "https://b.com/2"},
+        {"title": "Another New", "content_hash": "anothernew", "url": "https://c.com/3"},
+    ]
+
+    new, dups = await dedupe_items(pool, rds, items)
+    assert len(new) == 2
+    assert len(dups) == 1
+    assert dups[0]["title"] == "Dup Article"
+    assert dups[0]["_dedupe_existing_id"] == "doc-old"
+
+
+@pytest.mark.asyncio
+async def test_mark_as_seen_sets_redis_keys():
+    rds = FakeRedis()
+    await mark_as_seen(
+        rds,
+        content_hash="hash123",
+        canonical_url="https://example.com/page",
+        document_id="doc-new",
+    )
+    assert rds._data[_hash_dedupe_key("hash123")] == "doc-new"
+    assert rds._data[_url_dedupe_key("https://example.com/page")] == "doc-new"
+
+
+@pytest.mark.asyncio
+async def test_mark_as_seen_handles_none_url():
+    rds = FakeRedis()
+    await mark_as_seen(
+        rds, content_hash="hash456", canonical_url=None, document_id="doc-x"
+    )
+    assert rds._data[_hash_dedupe_key("hash456")] == "doc-x"
+    # No URL key should be set
+    assert len(rds._data) == 1
@@ -0,0 +1,136 @@
+"""Tests for evidence ranking — composite scoring for supporting/opposing docs.
+
+Requirements: 6.5
+"""
+from datetime import datetime, timedelta, timezone
+
+from services.aggregation.evidence import (
+    EvidenceRankConfig,
+    compute_evidence_rank,
+    rank_evidence,
+    rank_evidence_detailed,
+)
+from services.aggregation.scoring import WeightedSignal, compute_signal_weight
+
+NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+
+def _sw(
+    doc_id: str = "doc-1",
+    sentiment: float = 1.0,
+    impact: float = 0.7,
+    credibility: float = 0.8,
+    confidence: float = 0.8,
+    age_hours: float = 1.0,
+) -> WeightedSignal:
+    published = NOW - timedelta(hours=age_hours)
+    weight = compute_signal_weight(
+        published_at=published,
+        reference_time=NOW,
+        window="7d",
+        source_credibility=credibility,
+        extraction_confidence=confidence,
+    )
+    return WeightedSignal(
+        document_id=doc_id,
+        weight=weight,
+        sentiment_value=sentiment,
+        impact_score=impact,
+    )
+
+
+# ---------------------------------------------------------------------------
+# compute_evidence_rank
+# ---------------------------------------------------------------------------
+
+
+def test_rank_score_positive():
+    sig = _sw("d1", sentiment=1.0, impact=0.9, credibility=1.0)
+    ranked = compute_evidence_rank(sig)
+    assert ranked.rank_score > 0
+    assert ranked.document_id == "d1"
+    assert ranked.sentiment_value == 1.0
+
+
+def test_higher_impact_ranks_higher():
+    low = _sw("low", impact=0.3)
+    high = _sw("high", impact=0.9)
+    r_low = compute_evidence_rank(low)
+    r_high = compute_evidence_rank(high)
+    assert r_high.rank_score > r_low.rank_score
+
+
+def test_fresher_doc_ranks_higher():
+    old = _sw("old", age_hours=100.0)
+    fresh = _sw("fresh", age_hours=1.0)
+    r_old = compute_evidence_rank(old)
+    r_fresh = compute_evidence_rank(fresh)
+    assert r_fresh.rank_score > r_old.rank_score
+
+
+def test_higher_credibility_ranks_higher():
+    low_cred = _sw("low", credibility=0.2)
+    high_cred = _sw("high", credibility=1.0)
+    r_low = compute_evidence_rank(low_cred)
+    r_high = compute_evidence_rank(high_cred)
+    assert r_high.rank_score > r_low.rank_score
+
+
+# ---------------------------------------------------------------------------
+# rank_evidence
+# ---------------------------------------------------------------------------
+
+
+def test_rank_evidence_separates_sides():
+    signals = [
+        _sw("pos1", sentiment=1.0, impact=0.9),
+        _sw("pos2", sentiment=1.0, impact=0.3),
+        _sw("neg1", sentiment=-1.0, impact=0.7),
+        _sw("neutral", sentiment=0.0, impact=0.5),
+    ]
+    supporting, opposing = rank_evidence(signals)
+    assert "pos1" in supporting
+    assert "pos2" in supporting
+    assert "neg1" in opposing
+    assert "neutral" not in supporting and "neutral" not in opposing
+
+
+def test_rank_evidence_ordered_by_composite():
+    signals = [
+        _sw("weak", sentiment=1.0, impact=0.2, credibility=0.3),
+        _sw("strong", sentiment=1.0, impact=0.9, credibility=1.0),
+    ]
+    supporting, _ = rank_evidence(signals)
+    assert supporting[0] == "strong"
+
+
+def test_rank_evidence_respects_max_refs():
+    signals = [_sw(f"d{i}", sentiment=1.0) for i in range(20)]
+    cfg = EvidenceRankConfig(max_refs=3)
+    supporting, opposing = rank_evidence(signals, config=cfg)
+    assert len(supporting) == 3
+    assert len(opposing) == 0
+
+
+def test_rank_evidence_empty():
+    supporting, opposing = rank_evidence([])
+    assert supporting == []
+    assert opposing == []
+
+
+# ---------------------------------------------------------------------------
+# rank_evidence_detailed
+# ---------------------------------------------------------------------------
+
+
+def test_detailed_returns_ranked_evidence_objects():
+    signals = [
+        _sw("pos1", sentiment=1.0, impact=0.9),
+        _sw("neg1", sentiment=-1.0, impact=0.7),
+    ]
+    sup, opp = rank_evidence_detailed(signals)
+    assert len(sup) == 1
+    assert sup[0].document_id == "pos1"
+    assert sup[0].rank_score > 0
+    assert len(opp) == 1
+    assert opp[0].document_id == "neg1"
@@ -0,0 +1,168 @@
+"""Tests for extraction model performance metrics collection.
+
+Validates that collect_metrics correctly computes metrics from
+ExtractionResponse objects for both successful and failed extractions.
+
+Requirements: 5.2, 5.4, 12.1, 12.2
+"""
+from __future__ import annotations
+
+from services.extractor.client import ExtractionAttempt, ExtractionResponse
+from services.extractor.metrics import collect_metrics
+from services.extractor.schemas import ExtractionResult, ValidationReport
+
+
+def _make_valid_result() -> ExtractionResult:
+    return ExtractionResult.model_validate({
+        "summary": "Apple beat earnings expectations.",
+        "companies": [
+            {
+                "ticker": "AAPL",
+                "company_name": "Apple Inc.",
+                "relevance": 0.95,
+                "sentiment": "positive",
+                "impact_score": 0.7,
+                "impact_horizon": "1d_30d",
+                "catalyst_type": "earnings",
+                "key_facts": ["Revenue up 12%"],
+                "risks": [],
+                "evidence_spans": ["Apple beat expectations"],
+            }
+        ],
+        "macro_themes": ["ai_capex"],
+        "novelty_score": 0.6,
+        "confidence": 0.85,
+        "extraction_warnings": [],
+    })
+
+
+def _make_success_response() -> ExtractionResponse:
+    result = _make_valid_result()
+    validation = ValidationReport(valid=True, errors=[], warnings=["low_novelty"], parsed=result)
+    attempt = ExtractionAttempt(
+        raw_output=result.model_dump_json(),
+        validation=validation,
+        error=None,
+        duration_ms=500,
+        model="test-model",
+    )
+    return ExtractionResponse(
+        success=True,
+        result=result,
+        attempts=[attempt],
+        prompt_metadata={"prompt_version": "document-intel-v1", "schema_version": "2.0.0"},
+        model="test-model",
+        total_duration_ms=500,
+    )
+
+
+def _make_failed_response_with_retries() -> ExtractionResponse:
+    attempt1 = ExtractionAttempt(
+        raw_output="bad json",
+        validation=None,
+        error="invalid_json",
+        duration_ms=200,
+        model="test-model",
+    )
+    attempt2 = ExtractionAttempt(
+        raw_output="still bad output here",
+        validation=ValidationReport(
+            valid=False,
+            errors=["schema_fail", "missing_companies"],
+            warnings=["truncated"],
+        ),
+        error="schema_fail; missing_companies",
+        duration_ms=300,
+        model="test-model",
+    )
+    return ExtractionResponse(
+        success=False,
+        result=None,
+        attempts=[attempt1, attempt2],
+        prompt_metadata={"prompt_version": "document-intel-v1", "schema_version": "2.0.0"},
+        model="test-model",
+        total_duration_ms=500,
+    )
+
+
+def test_collect_metrics_success():
+    """Successful extraction produces correct metrics."""
+    resp = _make_success_response()
+    m = collect_metrics(
+        resp,
+        document_id="doc-1",
+        ticker="AAPL",
+        document_text_length=4000,
+    )
+
+    assert m.document_id == "doc-1"
+    assert m.ticker == "AAPL"
+    assert m.model_name == "test-model"
+    assert m.prompt_version == "document-intel-v1"
+    assert m.schema_version == "2.0.0"
+    assert m.success is True
+    assert m.attempt_count == 1
+    assert m.total_duration_ms == 500
+    assert m.first_attempt_duration_ms == 500
+    assert m.final_attempt_duration_ms == 500
+    assert m.confidence == 0.85
+    assert m.validation_status == "valid"
+    assert m.validation_error_count == 0
+    assert m.validation_warning_count == 1
+    assert m.retry_count == 0
+    assert m.input_token_estimate == 1000  # 4000 / 4
+    assert m.output_token_estimate > 0
+    assert m.company_count == 1
+
+
+def test_collect_metrics_failed_with_retries():
+    """Failed extraction with retries produces correct metrics."""
+    resp = _make_failed_response_with_retries()
+    m = collect_metrics(
+        resp,
+        document_id="doc-2",
+        ticker="MSFT",
+        document_text_length=2000,
+    )
+
+    assert m.success is False
+    assert m.attempt_count == 2
+    assert m.retry_count == 1
+    assert m.first_attempt_duration_ms == 200
+    assert m.final_attempt_duration_ms == 300
+    assert m.total_duration_ms == 500
+    assert m.validation_status == "failed"
+    assert m.validation_error_count == 2
+    assert m.validation_warning_count == 1
+    assert "schema_fail" in m.validation_errors
+    assert m.confidence == 0.0
+    assert m.company_count == 0
+    assert m.input_token_estimate == 500  # 2000 / 4
+
+
+def test_collect_metrics_empty_attempts():
+    """Response with no attempts produces safe defaults."""
+    resp = ExtractionResponse(
+        success=False,
+        result=None,
+        attempts=[],
+        prompt_metadata={},
+        model="test-model",
+        total_duration_ms=0,
+    )
+    m = collect_metrics(resp, document_id="doc-3")
+
+    assert m.attempt_count == 0
+    assert m.retry_count == 0
+    assert m.first_attempt_duration_ms == 0
+    assert m.final_attempt_duration_ms == 0
+    assert m.validation_status == "unknown"
+    assert m.confidence == 0.0
+
+
+def test_collect_metrics_no_document_text_length():
+    """Zero document text length produces zero token estimate."""
+    resp = _make_success_response()
+    m = collect_metrics(resp, document_text_length=0)
+
+    assert m.input_token_estimate == 0
@@ -0,0 +1,120 @@
+"""Tests for extraction prompt templates."""
+import json
+
+from services.extractor.prompts import (
+    EXTRACTION_JSON_SCHEMA,
+    PROMPT_VERSION,
+    SCHEMA_VERSION,
+    SYSTEM_PROMPT,
+    build_extraction_prompt,
+    get_json_schema,
+    get_prompt_metadata,
+)
+from services.shared.schemas import CatalystType, DocumentType, Sentiment
+
+
+def test_build_extraction_prompt_basic():
+    """Prompt contains system and user keys with document text embedded."""
+    result = build_extraction_prompt(
+        document_text="Apple reported record Q4 earnings.",
+        document_type=DocumentType.ARTICLE,
+    )
+    assert "system" in result
+    assert "user" in result
+    assert "Apple reported record Q4 earnings." in result["user"]
+    assert "DOCUMENT TEXT" in result["user"]
+
+
+def test_system_prompt_has_anti_hallucination_rules():
+    """System prompt includes key anti-hallucination instructions."""
+    assert "NEVER fabricate" in SYSTEM_PROMPT
+    assert "NEVER infer" in SYSTEM_PROMPT
+    assert "verbatim quotes" in SYSTEM_PROMPT
+    assert "ONLY extract information explicitly stated" in SYSTEM_PROMPT
+    assert "insufficient_content" in SYSTEM_PROMPT
+
+
+def test_build_prompt_includes_json_schema():
+    """User prompt embeds the full JSON schema for structured output."""
+    result = build_extraction_prompt(document_text="test", document_type=DocumentType.ARTICLE)
+    # Schema should be serialized into the user prompt
+    assert '"summary"' in result["user"]
+    assert '"companies"' in result["user"]
+    assert '"evidence_spans"' in result["user"]
+
+
+def test_build_prompt_with_known_tickers():
+    """Known tickers are included as hints but with a warning not to force-include them."""
+    result = build_extraction_prompt(
+        document_text="Some text",
+        document_type=DocumentType.ARTICLE,
+        known_tickers=["AAPL", "MSFT"],
+    )
+    assert "AAPL" in result["user"]
+    assert "MSFT" in result["user"]
+    assert "Do NOT include a ticker just because" in result["user"]
+
+
+def test_build_prompt_without_tickers():
+    """When no tickers are provided, no ticker hint appears."""
+    result = build_extraction_prompt(document_text="Some text", document_type=DocumentType.ARTICLE)
+    assert "may be referenced" not in result["user"]
+
+
+def test_build_prompt_document_type_guidance():
+    """Each document type gets specific guidance in the prompt."""
+    for dtype in DocumentType:
+        result = build_extraction_prompt(document_text="text", document_type=dtype)
+        assert "Document type:" in result["user"]
+
+
+def test_build_prompt_filing_guidance():
+    """Filing documents get SEC-specific guidance."""
+    result = build_extraction_prompt(document_text="text", document_type=DocumentType.FILING)
+    assert "regulatory filing" in result["user"]
+
+
+def test_build_prompt_transcript_guidance():
+    """Transcript documents get earnings-call-specific guidance."""
+    result = build_extraction_prompt(document_text="text", document_type=DocumentType.TRANSCRIPT)
+    assert "forward-looking" in result["user"]
+
+
+def test_build_prompt_with_document_id():
+    """Document ID is included in the prompt when provided."""
+    result = build_extraction_prompt(
+        document_text="text",
+        document_type=DocumentType.ARTICLE,
+        document_id="abc-123",
+    )
+    assert "abc-123" in result["user"]
+
+
+def test_get_prompt_metadata():
+    """Metadata returns current prompt and schema versions."""
+    meta = get_prompt_metadata()
+    assert meta["prompt_version"] == PROMPT_VERSION
+    assert meta["schema_version"] == SCHEMA_VERSION
+
+
+def test_get_json_schema_is_valid():
+    """JSON schema has required top-level structure."""
+    schema = get_json_schema()
+    assert schema["type"] == "object"
+    assert "summary" in schema["required"]
+    assert "companies" in schema["required"]
+    assert "confidence" in schema["required"]
+
+
+def test_json_schema_enum_values_match_pydantic():
+    """Schema enum values match the Pydantic enum definitions."""
+    company_props = EXTRACTION_JSON_SCHEMA["properties"]["companies"]["items"]["properties"]
+    assert set(company_props["sentiment"]["enum"]) == {s.value for s in Sentiment}
+    assert set(company_props["catalyst_type"]["enum"]) == {c.value for c in CatalystType}
+
+
+def test_json_schema_is_serializable():
+    """Schema can be serialized to JSON without errors."""
+    serialized = json.dumps(EXTRACTION_JSON_SCHEMA)
+    parsed = json.loads(serialized)
+    assert parsed["type"] == "object"
@@ -0,0 +1,317 @@
+"""Tests for extractor JSON schema definitions and validation."""
+import json
+
+from services.extractor.schemas import (
+    SCHEMA_VERSION,
+    VALID_IMPACT_HORIZONS,
+    ExtractionResult,
+    generate_json_schema,
+    get_schema_version,
+    validate_extraction,
+)
+from services.shared.schemas import CatalystType, Sentiment
+
+
+def test_generate_json_schema_top_level_structure():
+    """Generated schema is a valid JSON Schema object with required fields."""
+    schema = generate_json_schema()
+    assert schema["type"] == "object"
+    assert "summary" in schema["required"]
+    assert "companies" in schema["required"]
+    assert "confidence" in schema["required"]
+    assert "extraction_warnings" in schema["required"]
+
+
+def test_generate_json_schema_no_refs():
+    """Generated schema has no $ref or $defs — fully inlined."""
+    schema = generate_json_schema()
+    serialized = json.dumps(schema)
+    assert "$ref" not in serialized
+    assert "$defs" not in serialized
+
+
+def test_generate_json_schema_serializable():
+    """Schema round-trips through JSON serialization."""
+    schema = generate_json_schema()
+    text = json.dumps(schema)
+    parsed = json.loads(text)
+    assert parsed["type"] == "object"
+
+
+def test_generate_json_schema_company_properties():
+    """Company items include all required extraction fields."""
+    schema = generate_json_schema()
+    company_schema = schema["properties"]["companies"]["items"]
+    required = company_schema["required"]
+    assert "ticker" in required
+    assert "sentiment" in required
+    assert "catalyst_type" in required
+    assert "evidence_spans" in required
+
+
+def test_generate_json_schema_enum_values():
+    """Enum values in the schema match the Pydantic enum definitions."""
+    schema = generate_json_schema()
+    company_props = schema["properties"]["companies"]["items"]["properties"]
+    sentiment_vals = _extract_enum_values(company_props["sentiment"])
+    catalyst_vals = _extract_enum_values(company_props["catalyst_type"])
+    assert set(sentiment_vals) == {s.value for s in Sentiment}
+    assert set(catalyst_vals) == {c.value for c in CatalystType}
+
+
+def test_get_schema_version():
+    assert get_schema_version() == SCHEMA_VERSION
+
+
+# --- Validation tests ---
+
+
+def _valid_extraction() -> dict:
+    """Minimal valid extraction result."""
+    return {
+        "summary": "Apple beat earnings expectations.",
+        "companies": [
+            {
+                "ticker": "AAPL",
+                "company_name": "Apple Inc.",
+                "relevance": 0.95,
+                "sentiment": "positive",
+                "impact_score": 0.7,
+                "impact_horizon": "1d_30d",
+                "catalyst_type": "earnings",
+                "key_facts": ["Revenue up 12%"],
+                "risks": [],
+                "evidence_spans": ["Apple beat expectations"],
+            }
+        ],
+        "macro_themes": ["ai_capex"],
+        "novelty_score": 0.6,
+        "confidence": 0.85,
+        "extraction_warnings": [],
+    }
+
+
+def test_validate_extraction_valid_dict():
+    report = validate_extraction(_valid_extraction())
+    assert report.valid
+    assert report.parsed is not None
+    assert report.parsed.companies[0].ticker == "AAPL"
+
+
+def test_validate_extraction_valid_json_string():
+    report = validate_extraction(json.dumps(_valid_extraction()))
+    assert report.valid
+    assert report.parsed is not None
+
+
+def test_validate_extraction_invalid_json():
+    report = validate_extraction("{bad json")
+    assert not report.valid
+    assert any("Invalid JSON" in e for e in report.errors)
+
+
+def test_validate_extraction_not_object():
+    report = validate_extraction("[1, 2, 3]")
+    assert not report.valid
+    assert any("object" in e.lower() for e in report.errors)
+
+
+def test_validate_extraction_missing_required_field():
+    data = _valid_extraction()
+    del data["summary"]
+    report = validate_extraction(data)
+    assert not report.valid
+
+
+def test_validate_extraction_invalid_enum():
+    data = _valid_extraction()
+    data["companies"][0]["sentiment"] = "super_bullish"
+    report = validate_extraction(data)
+    assert not report.valid
+
+
+def test_validate_extraction_out_of_range():
+    data = _valid_extraction()
+    data["confidence"] = 1.5
+    report = validate_extraction(data)
+    assert not report.valid
+
+
+def test_validate_semantic_empty_summary_warning():
+    data = _valid_extraction()
+    data["summary"] = ""
+    report = validate_extraction(data)
+    assert report.valid
+    assert "empty_summary" in report.warnings
+
+
+def test_validate_semantic_low_confidence_with_companies():
+    data = _valid_extraction()
+    data["confidence"] = 0.2
+    report = validate_extraction(data)
+    assert report.valid
+    assert "low_confidence_with_companies" in report.warnings
+
+
+def test_validate_semantic_no_evidence_spans():
+    data = _valid_extraction()
+    data["companies"][0]["evidence_spans"] = []
+    report = validate_extraction(data)
+    assert report.valid
+    assert any("no_evidence_spans" in w for w in report.warnings)
+
+
+def test_validate_semantic_high_impact_no_facts():
+    data = _valid_extraction()
+    data["companies"][0]["key_facts"] = []
+    data["companies"][0]["impact_score"] = 0.8
+    report = validate_extraction(data)
+    assert report.valid
+    assert any("high_impact_no_facts" in w for w in report.warnings)
+
+
+def test_extraction_result_model_roundtrip():
+    """ExtractionResult can be created and serialized back to dict."""
+    result = ExtractionResult(
+        summary="Test",
+        companies=[],
+        macro_themes=[],
+        novelty_score=0.5,
+        confidence=0.5,
+        extraction_warnings=[],
+    )
+    data = result.model_dump()
+    assert data["summary"] == "Test"
+    reparsed = ExtractionResult.model_validate(data)
+    assert reparsed.summary == "Test"
+
+
+def test_all_top_level_fields_required():
+    """All top-level fields appear in the schema's required list."""
+    schema = generate_json_schema()
+    required = set(schema["required"])
+    expected = {"summary", "companies", "macro_themes", "novelty_score", "confidence", "extraction_warnings"}
+    assert expected == required
+
+
+def test_all_company_fields_required():
+    """All company item fields appear in the schema's required list."""
+    schema = generate_json_schema()
+    company_required = set(schema["properties"]["companies"]["items"]["required"])
+    expected = {
+        "ticker", "company_name", "relevance", "sentiment",
+        "impact_score", "impact_horizon", "catalyst_type",
+        "key_facts", "risks", "evidence_spans",
+    }
+    assert expected == company_required
+
+
+# --- Semantic validation: error-level checks ---
+
+
+def test_validate_semantic_missing_ticker_is_error():
+    """A company with an empty ticker produces a semantic error, not just a warning."""
+    data = _valid_extraction()
+    data["companies"][0]["ticker"] = ""
+    report = validate_extraction(data)
+    assert not report.valid
+    assert any("company_missing_ticker" in e for e in report.errors)
+
+
+def test_validate_semantic_invalid_impact_horizon_is_error():
+    """An unrecognized impact_horizon produces a semantic error."""
+    data = _valid_extraction()
+    data["companies"][0]["impact_horizon"] = "forever"
+    report = validate_extraction(data)
+    assert not report.valid
+    assert any("invalid_impact_horizon" in e for e in report.errors)
+
+
+def test_validate_semantic_all_valid_horizons_accepted():
+    """Every value in VALID_IMPACT_HORIZONS passes validation."""
+    for horizon in VALID_IMPACT_HORIZONS:
+        data = _valid_extraction()
+        data["companies"][0]["impact_horizon"] = horizon
+        report = validate_extraction(data)
+        assert report.valid, f"Horizon {horizon!r} should be valid"
+
+
+def test_validate_semantic_duplicate_ticker_is_error():
+    """Two company entries with the same ticker produce a semantic error."""
+    data = _valid_extraction()
+    second = dict(data["companies"][0])
+    data["companies"].append(second)
+    report = validate_extraction(data)
+    assert not report.valid
+    assert any("duplicate_ticker_AAPL" in e for e in report.errors)
+
+
+# --- Semantic validation: warning-level checks ---
+
+
+def test_validate_semantic_invalid_ticker_format_warning():
+    """A lowercase or overly long ticker produces a warning."""
+    data = _valid_extraction()
+    data["companies"][0]["ticker"] = "aapl"
+    report = validate_extraction(data)
+    assert report.valid  # warning, not error
+    assert any("invalid_ticker_format" in w for w in report.warnings)
+
+
+def test_validate_semantic_evidence_span_too_short():
+    data = _valid_extraction()
+    data["companies"][0]["evidence_spans"] = ["short"]
+    report = validate_extraction(data)
+    assert report.valid
+    assert any("evidence_span_too_short" in w for w in report.warnings)
+
+
+def test_validate_semantic_evidence_span_too_long():
+    data = _valid_extraction()
+    data["companies"][0]["evidence_spans"] = ["x" * 501]
+    report = validate_extraction(data)
+    assert report.valid
+    assert any("evidence_span_too_long" in w for w in report.warnings)
+
+
+def test_validate_semantic_strong_sentiment_low_impact():
+    data = _valid_extraction()
+    data["companies"][0]["sentiment"] = "positive"
+    data["companies"][0]["impact_score"] = 0.05
+    report = validate_extraction(data)
+    assert report.valid
+    assert any("strong_sentiment_low_impact" in w for w in report.warnings)
+
+
+# --- Evidence grounding ---
+
+
+def test_validate_evidence_grounding_found():
+    """Evidence spans present in document_text produce no grounding warnings."""
+    data = _valid_extraction()
+    doc_text = "Apple beat expectations with record revenue."
+    report = validate_extraction(data, document_text=doc_text)
+    assert report.valid
+    assert not any("evidence_span_not_found" in w for w in report.warnings)
+
+
+def test_validate_evidence_grounding_not_found():
+    """Evidence spans NOT in document_text produce a grounding warning."""
+    data = _valid_extraction()
+    doc_text = "Completely unrelated document about weather."
+    report = validate_extraction(data, document_text=doc_text)
+    assert report.valid
+    assert any("evidence_span_not_found" in w for w in report.warnings)
+
+
+# --- Helpers ---
+
+
+def _extract_enum_values(prop: dict) -> list:
+    """Extract enum values from a JSON schema property, handling anyOf patterns."""
+    if "enum" in prop:
+        return prop["enum"]
+    for option in prop.get("anyOf", []):
+        if "enum" in option:
+            return option["enum"]
+    return []
@@ -0,0 +1,200 @@
+"""Tests for the extraction worker persistence logic.
+
+Validates that persist_extraction correctly uploads artifacts to MinIO
+and persists intelligence/impact records to PostgreSQL.
+
+Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 9.1, 9.2
+"""
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from services.extractor.client import ExtractionAttempt, ExtractionResponse
+from services.extractor.schemas import ExtractionResult, ValidationReport
+from services.extractor.worker import persist_extraction
+
+
+def _make_valid_result() -> ExtractionResult:
+    """Build a minimal valid ExtractionResult."""
+    return ExtractionResult.model_validate({
+        "summary": "Apple beat earnings expectations.",
+        "companies": [
+            {
+                "ticker": "AAPL",
+                "company_name": "Apple Inc.",
+                "relevance": 0.95,
+                "sentiment": "positive",
+                "impact_score": 0.7,
+                "impact_horizon": "1d_30d",
+                "catalyst_type": "earnings",
+                "key_facts": ["Revenue up 12%"],
+                "risks": [],
+                "evidence_spans": ["Apple beat expectations"],
+            }
+        ],
+        "macro_themes": ["ai_capex"],
+        "novelty_score": 0.6,
+        "confidence": 0.85,
+        "extraction_warnings": [],
+    })
+
+
+def _make_success_response() -> ExtractionResponse:
+    """Build a successful ExtractionResponse with one attempt."""
+    result = _make_valid_result()
+    validation = ValidationReport(valid=True, errors=[], warnings=[], parsed=result)
+    attempt = ExtractionAttempt(
+        raw_output=result.model_dump_json(),
+        validation=validation,
+        error=None,
+        duration_ms=500,
+        model="test-model",
+    )
+    return ExtractionResponse(
+        success=True,
+        result=result,
+        attempts=[attempt],
+        prompt_metadata={"prompt_version": "document-intel-v1", "schema_version": "2.0.0"},
+        model="test-model",
+        total_duration_ms=500,
+    )
+
+
+def _make_failed_response() -> ExtractionResponse:
+    """Build a failed ExtractionResponse with two attempts."""
+    attempt1 = ExtractionAttempt(
+        raw_output="bad json",
+        validation=None,
+        error="invalid_json",
+        duration_ms=200,
+        model="test-model",
+    )
+    attempt2 = ExtractionAttempt(
+        raw_output="still bad",
+        validation=ValidationReport(valid=False, errors=["schema_fail"], warnings=[]),
+        error="schema_fail",
+        duration_ms=300,
+        model="test-model",
+    )
+    return ExtractionResponse(
+        success=False,
+        result=None,
+        attempts=[attempt1, attempt2],
+        prompt_metadata={"prompt_version": "document-intel-v1", "schema_version": "2.0.0"},
+        model="test-model",
+        total_duration_ms=500,
+    )
+
+
+def _mock_pool(intel_id: str = "intel-uuid-1", impact_id: str = "impact-uuid-1") -> AsyncMock:
+    """Create a mock asyncpg pool that returns predictable UUIDs."""
+    pool = AsyncMock()
+    # Side effects: intelligence insert, impact insert, metrics insert
+    pool.fetchval = AsyncMock(side_effect=[intel_id, impact_id, "metrics-uuid-1"])
+    pool.execute = AsyncMock()
+    return pool
+
+
+def _mock_minio() -> MagicMock:
+    """Create a mock MinIO client."""
+    client = MagicMock()
+    return client
+
+
+@pytest.mark.asyncio
+async def test_persist_successful_extraction():
+    """Successful extraction persists all artifacts and intelligence records."""
+    pool = _mock_pool()
+    minio = _mock_minio()
+    response = _make_success_response()
+    ts = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+    result = await persist_extraction(
+        pool=pool,
+        minio_client=minio,
+        document_id="doc-123",
+        ticker="AAPL",
+        extraction_response=response,
+        company_id_map={"AAPL": "company-uuid-1"},
+        source_credibility=0.8,
+        timestamp=ts,
+    )
+
+    assert result.success
+    assert result.intelligence_id == "intel-uuid-1"
+    assert result.impact_ids == ["impact-uuid-1"]
+    assert result.prompt_ref is not None
+    assert "stonks-llm-prompts" in result.prompt_ref
+    assert result.raw_output_ref is not None
+    assert "stonks-llm-results" in result.raw_output_ref
+    assert result.validation_ref is not None
+    assert result.intelligence_ref is not None
+
+    # MinIO should have 4 uploads: prompt, raw output, validation, intelligence
+    assert minio.put_object.call_count == 4
+
+    # PostgreSQL: 1 intelligence insert + 1 impact insert + 1 metrics insert + 1 status update
+    assert pool.fetchval.call_count == 3
+    assert pool.execute.call_count == 1
+
+
+@pytest.mark.asyncio
+async def test_persist_failed_extraction():
+    """Failed extraction still persists attempt data and marks document as failed."""
+    pool = AsyncMock()
+    pool.fetchval = AsyncMock(return_value="intel-uuid-fail")
+    pool.execute = AsyncMock()
+    minio = _mock_minio()
+    response = _make_failed_response()
+    ts = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+    result = await persist_extraction(
+        pool=pool,
+        minio_client=minio,
+        document_id="doc-456",
+        ticker="AAPL",
+        extraction_response=response,
+        timestamp=ts,
+    )
+
+    assert not result.success
+    assert result.intelligence_id == "intel-uuid-fail"
+    assert result.intelligence_ref is None  # no final intelligence on failure
+    assert result.prompt_ref is not None
+    assert result.raw_output_ref is not None
+    assert result.validation_ref is not None
+
+    # MinIO: 3 uploads (prompt, raw output, validation — no intelligence)
+    assert minio.put_object.call_count == 3
+
+    # PostgreSQL: 1 intelligence insert + 1 metrics insert + 1 status update
+    assert pool.fetchval.call_count == 2
+    assert pool.execute.call_count == 1
+
+
+@pytest.mark.asyncio
+async def test_persist_skips_impact_without_company_id():
+    """Impact records are skipped when company_id_map doesn't have the ticker."""
+    pool = AsyncMock()
+    pool.fetchval = AsyncMock(return_value="intel-uuid-2")
+    pool.execute = AsyncMock()
+    minio = _mock_minio()
+    response = _make_success_response()
+
+    result = await persist_extraction(
+        pool=pool,
+        minio_client=minio,
+        document_id="doc-789",
+        ticker="AAPL",
+        extraction_response=response,
+        company_id_map={},  # no mapping for AAPL
+    )
+
+    assert result.success
+    assert result.impact_ids == []
+    # 1 fetchval for intelligence + 1 for metrics, no impact insert
+    assert pool.fetchval.call_count == 2
@@ -0,0 +1,332 @@
+"""Validate fail-closed behavior for broker outages and ambiguous order states.
+
+Tests that the system rejects orders rather than risking duplicates or
+ambiguous execution when the broker API is unavailable, returns errors,
+times out, or returns unexpected/ambiguous responses.
+
+Requirements: 8.4, 8.5, N5
+Design: Section 10 - Reliability and Safety
+"""
+from __future__ import annotations
+
+from datetime import datetime, timedelta, timezone
+from unittest.mock import AsyncMock, patch
+
+import httpx
+import pytest
+
+from services.adapters.broker_adapter import (
+    AlpacaBrokerAdapter,
+    OrderRequest,
+    OrderResponse,
+    OrderSide,
+    OrderStatus,
+    OrderType,
+    TradingMode,
+)
+from services.risk.engine import (
+    AccountRiskState,
+    DailyLossLimits,
+    PortfolioRiskConfig,
+    PositionLimits,
+    ProposedOrder,
+    RiskCheckResult,
+    TradingMode as RiskTradingMode,
+    evaluate_order,
+)
+
+NOW = datetime(2026, 4, 11, 14, 0, 0, tzinfo=timezone.utc)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_adapter(base_url: str = "https://paper-api.alpaca.markets") -> AlpacaBrokerAdapter:
+    return AlpacaBrokerAdapter(
+        api_key="test-key",
+        api_secret="test-secret",
+        mode=TradingMode.PAPER,
+        base_url=base_url,
+    )
+
+
+def _make_buy_order(ticker: str = "AAPL", qty: float = 10) -> OrderRequest:
+    return OrderRequest(
+        ticker=ticker,
+        side=OrderSide.BUY,
+        quantity=qty,
+        order_type=OrderType.MARKET,
+        idempotency_key=f"test-{ticker}-{qty}",
+    )
+
+
+# ---------------------------------------------------------------------------
+# 1. Broker network outage — submit_order fails closed
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestSubmitOrderFailsClosed:
+    """submit_order must return REJECTED on any network/transport error."""
+
+    async def test_connection_error_returns_rejected(self):
+        adapter = _make_adapter()
+        order = _make_buy_order()
+
+        with patch("httpx.AsyncClient.post", side_effect=httpx.ConnectError("connection refused")):
+            resp = await adapter.submit_order(order)
+
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.ok is False
+        assert "fail-closed" in resp.error
+
+    async def test_timeout_returns_rejected(self):
+        adapter = _make_adapter()
+        order = _make_buy_order()
+
+        with patch("httpx.AsyncClient.post", side_effect=httpx.ReadTimeout("read timed out")):
+            resp = await adapter.submit_order(order)
+
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.ok is False
+        assert "fail-closed" in resp.error
+
+    async def test_dns_error_returns_rejected(self):
+        adapter = _make_adapter()
+        order = _make_buy_order()
+
+        with patch("httpx.AsyncClient.post", side_effect=httpx.ConnectError("DNS resolution failed")):
+            resp = await adapter.submit_order(order)
+
+        assert resp.status == OrderStatus.REJECTED
+        assert "fail-closed" in resp.error
+
+    async def test_http_500_returns_rejected(self):
+        """Broker internal server error should result in rejection."""
+        adapter = _make_adapter()
+        order = _make_buy_order()
+
+        mock_resp = httpx.Response(500, text="Internal Server Error", request=httpx.Request("POST", "http://test"))
+        with patch("httpx.AsyncClient.post", side_effect=httpx.HTTPStatusError("500", response=mock_resp, request=mock_resp.request)):
+            resp = await adapter.submit_order(order)
+
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.ok is False
+        assert resp.broker_order_id == ""
+
+    async def test_http_503_returns_rejected(self):
+        """Broker service unavailable should result in rejection."""
+        adapter = _make_adapter()
+        order = _make_buy_order()
+
+        mock_resp = httpx.Response(503, text="Service Unavailable", request=httpx.Request("POST", "http://test"))
+        with patch("httpx.AsyncClient.post", side_effect=httpx.HTTPStatusError("503", response=mock_resp, request=mock_resp.request)):
+            resp = await adapter.submit_order(order)
+
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.ok is False
+
+    async def test_rejected_order_has_empty_broker_id(self):
+        """Fail-closed responses must not carry a broker order ID that could be confused with a real order."""
+        adapter = _make_adapter()
+        order = _make_buy_order()
+
+        with patch("httpx.AsyncClient.post", side_effect=Exception("unexpected")):
+            resp = await adapter.submit_order(order)
+
+        assert resp.broker_order_id == ""
+        assert resp.filled_quantity == 0
+        assert resp.filled_avg_price is None
+
+
+# ---------------------------------------------------------------------------
+# 2. Ambiguous order states — get_order_status fails closed
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestGetOrderStatusFailsClosed:
+    """get_order_status must return REJECTED on errors, not an ambiguous state."""
+
+    async def test_network_error_returns_rejected(self):
+        adapter = _make_adapter()
+
+        with patch("httpx.AsyncClient.get", side_effect=httpx.ConnectError("refused")):
+            resp = await adapter.get_order_status("order-123")
+
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.error is not None
+
+    async def test_timeout_returns_rejected(self):
+        adapter = _make_adapter()
+
+        with patch("httpx.AsyncClient.get", side_effect=httpx.ReadTimeout("timeout")):
+            resp = await adapter.get_order_status("order-123")
+
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.error is not None
+
+
+# ---------------------------------------------------------------------------
+# 3. Cancel order fails closed
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestCancelOrderFailsClosed:
+    """cancel_order must return REJECTED on errors rather than leaving order in unknown state."""
+
+    async def test_network_error_returns_rejected(self):
+        adapter = _make_adapter()
+
+        with patch("httpx.AsyncClient.delete", side_effect=httpx.ConnectError("refused")):
+            resp = await adapter.cancel_order("order-456")
+
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.error is not None
+
+    async def test_timeout_returns_rejected(self):
+        adapter = _make_adapter()
+
+        with patch("httpx.AsyncClient.delete", side_effect=httpx.ReadTimeout("timeout")):
+            resp = await adapter.cancel_order("order-456")
+
+        assert resp.status == OrderStatus.REJECTED
+
+
+# ---------------------------------------------------------------------------
+# 4. Position and account queries degrade safely
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestPositionAccountDegradation:
+    """Position/account queries must return safe defaults on broker outage."""
+
+    async def test_get_positions_returns_empty_on_outage(self):
+        adapter = _make_adapter()
+
+        with patch("httpx.AsyncClient.get", side_effect=httpx.ConnectError("refused")):
+            positions = await adapter.get_positions()
+
+        assert positions == []
+
+    async def test_get_account_returns_zeroed_on_outage(self):
+        adapter = _make_adapter()
+
+        with patch("httpx.AsyncClient.get", side_effect=httpx.ConnectError("refused")):
+            acct = await adapter.get_account()
+
+        assert acct.buying_power == 0
+        assert acct.cash == 0
+        assert acct.portfolio_value == 0
+        assert acct.account_id == ""
+
+
+# ---------------------------------------------------------------------------
+# 5. Risk engine fails closed with degraded state
+# ---------------------------------------------------------------------------
+
+
+class TestRiskEngineFailClosed:
+    """Risk engine must reject orders when account state is missing or degraded."""
+
+    def test_zero_portfolio_value_blocks_buy(self):
+        """If broker is down and portfolio_value is 0, position pct → 1.0 → fail."""
+        config = PortfolioRiskConfig()
+        state = AccountRiskState(portfolio_value=0.0, cash=0.0)
+        order = ProposedOrder(
+            ticker="AAPL", sector="Technology",
+            estimated_value=1000, quantity=10,
+        )
+        result = evaluate_order(order, config, state)
+        assert not result.passed
+        pct_check = next(c for c in result.checks if c.check_name == "max_position_pct")
+        assert pct_check.result == RiskCheckResult.FAIL
+        assert pct_check.actual == 1.0
+
+    def test_disabled_mode_blocks_all_orders(self):
+        config = PortfolioRiskConfig(trading_mode=RiskTradingMode.DISABLED)
+        state = AccountRiskState(portfolio_value=100_000.0, cash=50_000.0)
+        order = ProposedOrder(
+            ticker="AAPL", sector="Technology",
+            estimated_value=1000, quantity=10,
+        )
+        result = evaluate_order(order, config, state)
+        assert not result.passed
+        assert any("disabled" in r.lower() for r in result.rejection_reasons)
+
+    def test_degraded_state_with_zero_buying_power(self):
+        """When broker returns zeroed account, position value check should still block large orders."""
+        config = PortfolioRiskConfig(
+            position_limits=PositionLimits(max_position_value=5_000.0),
+        )
+        state = AccountRiskState(
+            portfolio_value=0.0, cash=0.0, buying_power=0.0,
+        )
+        order = ProposedOrder(
+            ticker="AAPL", sector="Technology",
+            estimated_value=10_000.0, quantity=50,
+        )
+        result = evaluate_order(order, config, state)
+        assert not result.passed
+
+    def test_multiple_risk_failures_all_captured_on_degraded_state(self):
+        """Degraded state should trigger multiple failures, all recorded for audit."""
+        config = PortfolioRiskConfig(
+            position_limits=PositionLimits(max_position_value=500),
+            daily_loss=DailyLossLimits(max_daily_loss_value=0),
+        )
+        state = AccountRiskState(portfolio_value=0.0, daily_pnl=-1.0)
+        order = ProposedOrder(
+            ticker="AAPL", sector="Technology",
+            estimated_value=1000, quantity=10,
+        )
+        result = evaluate_order(order, config, state)
+        assert not result.passed
+        assert len(result.rejection_reasons) >= 2
+        # Full decision trace is preserved
+        assert len(result.checks) > 0
+        assert result.config_snapshot is not None
+        assert result.state_snapshot is not None
+
+
+# ---------------------------------------------------------------------------
+# 6. Fetch (ingestion path) fails closed
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestFetchFailsClosed:
+    """Broker fetch() for ingestion must return error result, not raise."""
+
+    async def test_fetch_connection_error_returns_error_result(self):
+        adapter = _make_adapter()
+
+        with patch("httpx.AsyncClient.get", side_effect=httpx.ConnectError("refused")):
+            result = await adapter.fetch("AAPL", {"endpoint": "positions"})
+
+        assert not result.ok
+        assert result.error is not None
+        assert result.items == []
+
+    async def test_fetch_timeout_returns_error_result(self):
+        adapter = _make_adapter()
+
+        with patch("httpx.AsyncClient.get", side_effect=httpx.ReadTimeout("timeout")):
+            result = await adapter.fetch("AAPL", {"endpoint": "orders"})
+
+        assert not result.ok
+        assert result.error is not None
+
+    async def test_fetch_http_429_returns_error_result(self):
+        adapter = _make_adapter()
+
+        mock_resp = httpx.Response(429, text="Rate limited", request=httpx.Request("GET", "http://test"))
+        with patch("httpx.AsyncClient.get", side_effect=httpx.HTTPStatusError("429", response=mock_resp, request=mock_resp.request)):
+            result = await adapter.fetch("AAPL", {"endpoint": "positions"})
+
+        assert not result.ok
+        assert result.http_status == 429
@@ -0,0 +1,177 @@
+"""Tests for the SEC EDGAR filings adapter.
+
+Validates request building, response parsing, and error handling.
+"""
+from services.adapters.filings_adapter import FilingsDataAdapter, SECEdgarAdapter
+
+
+# --- Fake EDGAR EFTS responses ---
+
+EFTS_RESPONSE = {
+    "hits": {
+        "total": {"value": 3, "relation": "eq"},
+        "hits": [
+            {
+                "_id": "0001234567-26-000001",
+                "_source": {
+                    "file_date": "2026-04-01",
+                    "form_type": "8-K",
+                    "entity_name": "Apple Inc.",
+                    "file_num": "001-36743",
+                    "period_of_report": "2026-03-31",
+                },
+            },
+            {
+                "_id": "0001234567-26-000002",
+                "_source": {
+                    "file_date": "2026-03-15",
+                    "form_type": "10-Q",
+                    "entity_name": "Apple Inc.",
+                    "file_num": "001-36743",
+                    "period_of_report": "2026-03-15",
+                },
+            },
+            {
+                "_id": "0001234567-26-000003",
+                "_source": {
+                    "file_date": "2026-01-30",
+                    "form_type": "10-K",
+                    "entity_name": "Apple Inc.",
+                    "file_num": "001-36743",
+                    "period_of_report": "2025-12-31",
+                },
+            },
+        ],
+    }
+}
+
+EMPTY_EFTS_RESPONSE = {
+    "hits": {
+        "total": {"value": 0, "relation": "eq"},
+        "hits": [],
+    }
+}
+
+
+class TestSECEdgarSourceType:
+    def test_source_type(self):
+        adapter = SECEdgarAdapter()
+        assert adapter.source_type() == "filings_api"
+
+    def test_inherits_filings_data_adapter(self):
+        assert issubclass(SECEdgarAdapter, FilingsDataAdapter)
+
+    def test_bucket_name(self):
+        adapter = SECEdgarAdapter()
+        assert adapter.bucket_name() == "stonks-raw-filings"
+
+
+class TestSECEdgarBuildRequest:
+    def setup_method(self):
+        self.adapter = SECEdgarAdapter(
+            base_url="https://efts.sec.gov",
+            user_agent="TestAgent/1.0",
+        )
+
+    def test_default_request(self):
+        url, params, headers = self.adapter._build_request("AAPL", {})
+        assert url == "https://efts.sec.gov/LATEST/search-index"
+        assert params["q"] == '"AAPL"'
+        assert params["forms"] == "8-K,10-Q,10-K"
+        assert headers["User-Agent"] == "TestAgent/1.0"
+
+    def test_custom_forms(self):
+        _, params, _ = self.adapter._build_request("AAPL", {"forms": "8-K"})
+        assert params["forms"] == "8-K"
+
+    def test_date_range(self):
+        config = {"start_date": "2026-01-01", "end_date": "2026-04-10"}
+        _, params, _ = self.adapter._build_request("AAPL", config)
+        assert params["dateRange"] == "custom"
+        assert params["startdt"] == "2026-01-01"
+        assert params["enddt"] == "2026-04-10"
+
+    def test_cik_filter(self):
+        _, params, _ = self.adapter._build_request("AAPL", {"cik": "0000320193"})
+        assert "cik:0000320193" in params["q"]
+        assert '"AAPL"' in params["q"]
+
+    def test_custom_query_override(self):
+        _, params, _ = self.adapter._build_request("AAPL", {"query": "apple AND revenue"})
+        assert params["q"] == "apple AND revenue"
+
+    def test_trailing_slash_stripped(self):
+        adapter = SECEdgarAdapter(base_url="https://efts.sec.gov/")
+        url, _, _ = adapter._build_request("AAPL", {})
+        assert "//LATEST" not in url
+
+    def test_no_date_params_when_absent(self):
+        _, params, _ = self.adapter._build_request("AAPL", {})
+        assert "dateRange" not in params
+        assert "startdt" not in params
+        assert "enddt" not in params
+
+
+class TestSECEdgarExtractItems:
+    def setup_method(self):
+        self.adapter = SECEdgarAdapter()
+
+    def test_extract_filings(self):
+        items = self.adapter._extract_items(EFTS_RESPONSE)
+        assert len(items) == 3
+        assert items[0]["_id"] == "0001234567-26-000001"
+        assert items[0]["_source"]["form_type"] == "8-K"
+
+    def test_extract_empty_results(self):
+        items = self.adapter._extract_items(EMPTY_EFTS_RESPONSE)
+        assert items == []
+
+    def test_extract_missing_hits_key(self):
+        items = self.adapter._extract_items({"status": "OK"})
+        assert items == []
+
+    def test_extract_non_dict_hits(self):
+        items = self.adapter._extract_items({"hits": "unexpected"})
+        assert items == []
+
+    def test_extract_non_list_inner_hits(self):
+        items = self.adapter._extract_items({"hits": {"hits": "bad"}})
+        assert items == []
+
+
+class TestSECEdgarTotalHits:
+    def setup_method(self):
+        self.adapter = SECEdgarAdapter()
+
+    def test_total_hits_dict(self):
+        assert self.adapter._total_hits(EFTS_RESPONSE) == 3
+
+    def test_total_hits_int(self):
+        data = {"hits": {"total": 5, "hits": []}}
+        assert self.adapter._total_hits(data) == 5
+
+    def test_total_hits_missing(self):
+        assert self.adapter._total_hits({}) == 0
+
+    def test_total_hits_non_dict_hits(self):
+        assert self.adapter._total_hits({"hits": "bad"}) == 0
+
+
+class TestSECEdgarErrorResult:
+    def test_error_result_fields(self):
+        adapter = SECEdgarAdapter()
+        result = adapter._error_result("AAPL", "rate limited", 150.0, http_status=429, raw=b"slow down")
+        assert not result.ok
+        assert result.error == "rate limited"
+        assert result.http_status == 429
+        assert result.response_time_ms == 150.0
+        assert result.raw_payload == b"slow down"
+        assert result.metadata["provider"] == "sec_edgar"
+        assert result.source_type == "filings_api"
+
+    def test_error_result_defaults(self):
+        adapter = SECEdgarAdapter()
+        result = adapter._error_result("MSFT", "timeout", 200.0)
+        assert result.http_status is None
+        assert result.raw_payload == b""
+        assert result.ticker == "MSFT"
@@ -0,0 +1,582 @@
+"""Tests for the HTML-to-text parsing pipeline.
+
+Validates body extraction, metadata extraction, boilerplate removal,
+quality scoring, link extraction, document type inference, and company
+mention detection.
+
+Requirements: 4.1, 4.2, 4.3
+"""
+from services.parser.html_parser import (
+    CompanyMention,
+    ParsedDocument,
+    QualitySignals,
+    _block_score,
+    _collapse_whitespace,
+    _detect_repeated_blocks,
+    _link_density,
+    _remove_short_orphan_lines,
+    _text_density,
+    detect_company_mentions,
+    extract_body_text,
+    extract_metadata,
+    extract_outbound_links,
+    infer_document_type,
+    parse_html,
+    score_parse_quality,
+    score_quality,
+)
+
+RICH_HTML = """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <title>Apple Q2 Earnings Beat Expectations</title>
+    <meta property="og:title" content="Apple Q2 Earnings Beat" />
+    <meta property="og:site_name" content="TechNews" />
+    <meta property="og:description" content="Apple reported strong Q2 results." />
+    <meta name="author" content="Jane Reporter" />
+    <meta name="keywords" content="apple, earnings, tech" />
+    <meta property="article:published_time" content="2026-04-10T14:00:00Z" />
+    <link rel="canonical" href="https://technews.example.com/apple-q2-earnings" />
+</head>
+<body>
+    <nav>Navigation links here</nav>
+    <article>
+        <h1>Apple Q2 Earnings Beat Expectations</h1>
+        <p>Apple Inc. reported quarterly revenue of $95 billion, exceeding analyst estimates.
+        The company saw strong growth in its services division and iPhone sales across all
+        major markets worldwide. Revenue from the App Store and iCloud subscriptions
+        continued to climb, contributing significantly to the overall results.</p>
+        <p>CEO Tim Cook highlighted the company's commitment to innovation and expanding
+        its ecosystem. The services segment alone generated over $20 billion in revenue,
+        marking a new quarterly record for the division.</p>
+        <a href="https://other-site.com/analysis">External analysis</a>
+        <a href="https://technews.example.com/related">Related article</a>
+    </article>
+    <footer>Copyright 2026 TechNews. All rights reserved. Privacy policy applies.</footer>
+    <div class="sidebar">Sidebar content</div>
+    <div class="newsletter">Subscribe to our newsletter for updates</div>
+</body>
+</html>"""
+
+MINIMAL_HTML = "<html><body><p>Short.</p></body></html>"
+
+BOILERPLATE_HTML = """<html><body>
+<nav>Menu items</nav>
+<div class="article-body">
+    <p>The actual article content is here with enough words to pass quality checks.
+    This paragraph discusses important market developments and financial results
+    that are relevant to investors and analysts tracking the technology sector.</p>
+</div>
+<aside class="sidebar">Related links</aside>
+<div class="advertisement">Buy stuff</div>
+<footer>Copyright © 2026. All rights reserved. Terms of service apply.</footer>
+</body></html>"""
+
+
+class TestExtractBodyText:
+    def test_extracts_article_content(self):
+        text = extract_body_text(RICH_HTML)
+        assert "Apple Inc. reported quarterly revenue" in text
+        assert "strong growth" in text
+
+    def test_strips_nav_footer_sidebar(self):
+        text = extract_body_text(RICH_HTML)
+        assert "Navigation links here" not in text
+        assert "Sidebar content" not in text
+
+    def test_strips_boilerplate_text(self):
+        text = extract_body_text(BOILERPLATE_HTML)
+        assert "Subscribe to our newsletter" not in text
+        assert "Copyright ©" not in text
+
+    def test_finds_article_body_class(self):
+        text = extract_body_text(BOILERPLATE_HTML)
+        assert "actual article content" in text
+
+    def test_minimal_html_returns_text(self):
+        text = extract_body_text(MINIMAL_HTML)
+        assert "Short." in text
+
+    def test_strips_script_and_style(self):
+        html = "<html><body><script>alert('x')</script><style>.x{color:red}</style><p>Real content here</p></body></html>"
+        text = extract_body_text(html)
+        assert "alert" not in text
+        assert "color:red" not in text
+        assert "Real content here" in text
+
+    def test_empty_html(self):
+        text = extract_body_text("")
+        assert text == ""
+
+
+class TestExtractMetadata:
+    def test_extracts_title(self):
+        meta = extract_metadata(RICH_HTML, "https://technews.example.com/article")
+        assert meta["title"] == "Apple Q2 Earnings Beat"
+
+    def test_extracts_author(self):
+        meta = extract_metadata(RICH_HTML, "https://technews.example.com/article")
+        assert meta["author"] == "Jane Reporter"
+
+    def test_extracts_publisher(self):
+        meta = extract_metadata(RICH_HTML, "https://technews.example.com/article")
+        assert meta["publisher"] == "TechNews"
+
+    def test_extracts_published_at(self):
+        meta = extract_metadata(RICH_HTML, "https://technews.example.com/article")
+        assert meta["published_at"] == "2026-04-10T14:00:00Z"
+
+    def test_extracts_canonical_url(self):
+        meta = extract_metadata(RICH_HTML, "https://technews.example.com/article")
+        assert meta["canonical_url"] == "https://technews.example.com/apple-q2-earnings"
+
+    def test_extracts_language(self):
+        meta = extract_metadata(RICH_HTML, "https://technews.example.com/article")
+        assert meta["language"] == "en"
+
+    def test_extracts_keywords(self):
+        meta = extract_metadata(RICH_HTML, "https://technews.example.com/article")
+        assert meta["tags"] is not None
+        assert "apple" in str(meta["tags"])
+
+    def test_fallback_publisher_from_hostname(self):
+        meta = extract_metadata(MINIMAL_HTML, "https://example.com/page")
+        assert meta["publisher"] == "example.com"
+
+    def test_no_url_publisher_empty(self):
+        meta = extract_metadata(MINIMAL_HTML, "")
+        assert meta["publisher"] == ""
+
+
+class TestExtractOutboundLinks:
+    def test_finds_external_links(self):
+        links = extract_outbound_links(RICH_HTML, "https://technews.example.com/article")
+        assert "https://other-site.com/analysis" in links
+
+    def test_excludes_same_host_links(self):
+        links = extract_outbound_links(RICH_HTML, "https://technews.example.com/article")
+        assert all("technews.example.com" not in link for link in links)
+
+    def test_deduplicates_links(self):
+        html = '<html><body><a href="https://ext.com/a">1</a><a href="https://ext.com/a">2</a></body></html>'
+        links = extract_outbound_links(html, "https://example.com")
+        assert links.count("https://ext.com/a") == 1
+
+    def test_ignores_fragment_and_javascript(self):
+        html = '<html><body><a href="#top">top</a><a href="javascript:void(0)">js</a></body></html>'
+        links = extract_outbound_links(html, "https://example.com")
+        assert links == []
+
+
+class TestScoreQuality:
+    def test_very_short_text_low(self):
+        score, conf = score_quality("hello world")
+        assert score < 0.5
+        # With default body_found=True, very short text lands in medium
+        assert conf in ("low", "medium")
+
+    def test_medium_text(self):
+        words = [f"word{i}" for i in range(100)]
+        text = " ".join(words) + "."
+        score, conf = score_quality(text)
+        # 100 diverse words with sentence structure scores well
+        assert conf in ("medium", "high")
+
+    def test_long_diverse_text_high(self):
+        words = [f"word{i}" for i in range(300)]
+        text = ". ".join(" ".join(words[i:i+10]) for i in range(0, 300, 10)) + "."
+        score, conf = score_quality(text)
+        assert conf == "high"
+        assert score >= 0.65
+
+    def test_empty_text_low(self):
+        score, conf = score_quality("")
+        assert conf == "low"
+        assert score < 0.35
+
+
+class TestScoreParseQuality:
+    """Tests for the multi-signal quality scoring function."""
+
+    def test_returns_four_tuple(self):
+        score, conf, signals, warnings = score_parse_quality("hello world")
+        assert isinstance(score, float)
+        assert conf in ("low", "medium", "high")
+        assert isinstance(signals, QualitySignals)
+        assert isinstance(warnings, list)
+
+    def test_empty_text_is_low(self):
+        score, conf, signals, warnings = score_parse_quality("")
+        assert conf == "low"
+        assert "very_short_text" in warnings
+
+    def test_short_text_warns(self):
+        text = " ".join(["word"] * 30)
+        _score, _conf, _signals, warnings = score_parse_quality(text)
+        assert "short_text" in warnings
+
+    def test_body_not_found_warns(self):
+        text = " ".join([f"word{i}" for i in range(100)]) + "."
+        _score, _conf, signals, warnings = score_parse_quality(text, body_found=False)
+        assert "no_article_body_found" in warnings
+        assert signals.body_found_signal < 0.5
+
+    def test_metadata_boosts_score(self):
+        text = ". ".join(" ".join(f"word{i}" for i in range(j, j+10)) for j in range(0, 200, 10)) + "."
+        score_no_meta, _, _, _ = score_parse_quality(text)
+        score_with_meta, _, _, _ = score_parse_quality(
+            text, has_title=True, has_author=True, has_publisher=True, has_published_at=True,
+        )
+        assert score_with_meta > score_no_meta
+
+    def test_signals_as_dict(self):
+        _, _, signals, _ = score_parse_quality("hello world")
+        d = signals.as_dict()
+        assert "word_count" in d
+        assert "diversity" in d
+        assert "body_found" in d
+
+    def test_well_structured_article_scores_high(self):
+        paragraphs = []
+        for i in range(5):
+            sentences = ". ".join(f"Sentence {j} of paragraph {i} with diverse vocabulary" for j in range(4))
+            paragraphs.append(sentences + ".")
+        text = "\n\n".join(paragraphs)
+        score, conf, signals, warnings = score_parse_quality(
+            text, body_found=True, has_title=True, has_author=True,
+            has_publisher=True, has_published_at=True,
+        )
+        assert conf == "high"
+        assert score >= 0.7
+        assert signals.paragraph_signal == 1.0
+        assert signals.body_found_signal == 1.0
+
+
+class TestInferDocumentType:
+    def test_filing_from_url(self):
+        assert infer_document_type("", "https://sec.gov/filing/10-k") == "filing"
+
+    def test_transcript_from_url(self):
+        assert infer_document_type("", "https://example.com/earnings-call-transcript") == "transcript"
+
+    def test_press_release_from_url(self):
+        assert infer_document_type("", "https://example.com/press-release/q2") == "press_release"
+
+    def test_default_article(self):
+        assert infer_document_type("", "https://example.com/news/story") == "article"
+
+
+class TestDetectCompanyMentions:
+    def test_detects_ticker(self):
+        aliases = [{"company_id": "1", "alias": "AAPL", "alias_type": "ticker", "ticker": "AAPL"}]
+        mentions = detect_company_mentions("Shares of AAPL rose 5% today", aliases)
+        assert len(mentions) == 1
+        assert mentions[0]["ticker"] == "AAPL"
+        assert mentions[0]["confidence"] == 0.9  # ticker confidence
+
+    def test_detects_company_name(self):
+        aliases = [{"company_id": "1", "alias": "Apple Inc.", "alias_type": "legal_name", "ticker": "AAPL"}]
+        mentions = detect_company_mentions("Apple Inc. reported strong earnings", aliases)
+        assert len(mentions) == 1
+        assert mentions[0]["confidence"] == 0.85  # legal_name confidence
+
+    def test_no_false_positive_short_ticker(self):
+        aliases = [{"company_id": "1", "alias": "A", "alias_type": "ticker", "ticker": "A"}]
+        mentions = detect_company_mentions("This is a sentence about nothing", aliases)
+        assert len(mentions) == 0
+
+    def test_short_ticker_case_sensitive(self):
+        aliases = [{"company_id": "1", "alias": "AI", "alias_type": "ticker", "ticker": "AI"}]
+        # "AI" as a word should match case-sensitively
+        mentions = detect_company_mentions("The AI revolution is here", aliases)
+        assert len(mentions) == 1
+        # Lowercase "ai" should not match
+        mentions2 = detect_company_mentions("the ai revolution is here", aliases)
+        assert len(mentions2) == 0
+
+    def test_deduplicates_by_company(self):
+        aliases = [
+            {"company_id": "1", "alias": "AAPL", "alias_type": "ticker", "ticker": "AAPL"},
+            {"company_id": "1", "alias": "Apple Inc.", "alias_type": "legal_name", "ticker": "AAPL"},
+        ]
+        mentions = detect_company_mentions("AAPL Apple Inc. reported earnings", aliases)
+        assert len(mentions) == 1
+        # Should keep the higher confidence (ticker=0.9 > legal_name=0.85)
+        assert mentions[0]["confidence"] == 0.9
+
+    def test_match_count_accumulated(self):
+        aliases = [{"company_id": "1", "alias": "AAPL", "alias_type": "ticker", "ticker": "AAPL"}]
+        mentions = detect_company_mentions("AAPL beat estimates. AAPL shares rose.", aliases)
+        assert len(mentions) == 1
+        assert mentions[0]["match_count"] == 2
+
+    def test_multiple_companies(self):
+        aliases = [
+            {"company_id": "1", "alias": "AAPL", "alias_type": "ticker", "ticker": "AAPL"},
+            {"company_id": "2", "alias": "MSFT", "alias_type": "ticker", "ticker": "MSFT"},
+        ]
+        mentions = detect_company_mentions("AAPL and MSFT both reported earnings", aliases)
+        assert len(mentions) == 2
+        tickers = {m["ticker"] for m in mentions}
+        assert tickers == {"AAPL", "MSFT"}
+
+    def test_brand_alias(self):
+        aliases = [{"company_id": "1", "alias": "iPhone", "alias_type": "brand", "ticker": "AAPL"}]
+        mentions = detect_company_mentions("The new iPhone sales exceeded expectations", aliases)
+        assert len(mentions) == 1
+        assert mentions[0]["confidence"] == 0.6  # brand confidence
+
+    def test_empty_text(self):
+        aliases = [{"company_id": "1", "alias": "AAPL", "alias_type": "ticker", "ticker": "AAPL"}]
+        assert detect_company_mentions("", aliases) == []
+
+    def test_empty_aliases(self):
+        assert detect_company_mentions("Some text about stocks", []) == []
+
+    def test_case_insensitive_name_match(self):
+        aliases = [{"company_id": "1", "alias": "Apple Inc.", "alias_type": "legal_name", "ticker": "AAPL"}]
+        mentions = detect_company_mentions("APPLE INC. reported earnings", aliases)
+        assert len(mentions) == 1
+
+
+class TestParseHtml:
+    def test_returns_parsed_document(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert isinstance(result, ParsedDocument)
+
+    def test_body_text_populated(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert "Apple Inc." in result.body_text
+        assert result.word_count > 0
+
+    def test_metadata_populated(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert result.title == "Apple Q2 Earnings Beat"
+        assert result.author == "Jane Reporter"
+        assert result.publisher == "TechNews"
+
+    def test_quality_scoring(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert result.quality_score > 0
+        assert result.confidence in ("low", "medium", "high")
+
+    def test_quality_signals_populated(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert isinstance(result.quality_signals, QualitySignals)
+        assert result.quality_signals.body_found_signal == 1.0
+        assert result.quality_signals.metadata_signal > 0
+
+    def test_low_quality_flag_on_minimal(self):
+        result = parse_html(MINIMAL_HTML, "")
+        assert result.low_quality_flag is True
+        assert result.confidence == "low"
+
+    def test_rich_html_not_low_quality(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert result.low_quality_flag is False
+
+    def test_quality_warnings_list(self):
+        result = parse_html(MINIMAL_HTML, "")
+        assert isinstance(result.quality_warnings, list)
+
+    def test_tags_extracted(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert "apple" in result.tags
+
+    def test_document_type_inferred(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert result.document_type == "article"
+
+    def test_outbound_links(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert any("other-site.com" in link for link in result.outbound_links)
+
+    def test_mentioned_companies_with_aliases(self):
+        aliases = [
+            {"company_id": "1", "alias": "AAPL", "alias_type": "ticker", "ticker": "AAPL"},
+            {"company_id": "1", "alias": "Apple Inc.", "alias_type": "legal_name", "ticker": "AAPL"},
+        ]
+        result = parse_html(RICH_HTML, "https://technews.example.com/article", aliases=aliases)
+        assert len(result.mentioned_companies) == 1
+        assert result.mentioned_companies[0].ticker == "AAPL"
+        assert isinstance(result.mentioned_companies[0], CompanyMention)
+
+    def test_no_mentions_without_aliases(self):
+        result = parse_html(RICH_HTML, "https://technews.example.com/article")
+        assert result.mentioned_companies == []
+
+# --- HTML fixtures for boilerplate reduction tests ---
+
+NO_SEMANTIC_HTML = """<html><body>
+<div class="top-bar"><a href="/">Home</a> <a href="/about">About</a> <a href="/contact">Contact</a></div>
+<div class="main-content">
+    <p>The Federal Reserve announced a 25 basis point rate cut on Wednesday,
+    surprising markets that had expected rates to remain unchanged. Bond yields
+    fell sharply across the curve, with the 10-year Treasury dropping to 3.8 percent.
+    Equity markets rallied on the news, with the S&P 500 gaining 1.2 percent by close.</p>
+    <p>Analysts noted that the decision reflects growing concerns about slowing economic
+    growth and weakening labor market data. Several Fed governors had signaled openness
+    to easing in recent speeches, but the timing caught many off guard.</p>
+    <p>Market participants are now pricing in additional cuts at the next two meetings,
+    with futures indicating a 70 percent probability of another reduction in September.</p>
+</div>
+<div class="link-list"><a href="/1">Story 1</a><a href="/2">Story 2</a><a href="/3">Story 3</a><a href="/4">Story 4</a><a href="/5">Story 5</a></div>
+</body></html>"""
+
+HEAVY_BOILERPLATE_HTML = """<html><body>
+<div class="cookie-banner">We use cookies. Accept all cookies.</div>
+<div class="signup-form">Sign up for free alerts</div>
+<nav class="menu">Home | Markets | Tech | Opinion</nav>
+<article>
+    <p>Tesla reported record deliveries in Q1 2026, shipping over 500,000 vehicles
+    globally. The company attributed the strong performance to expanded production
+    capacity at its Berlin and Austin gigafactories, as well as growing demand for
+    the refreshed Model Y across European and Asian markets.</p>
+    <p>Revenue for the quarter came in at $28 billion, beating consensus estimates
+    by roughly 4 percent. Automotive gross margins improved to 19.5 percent,
+    reversing a trend of compression seen throughout 2025.</p>
+</article>
+<div class="social-share">Share this article on Twitter Facebook LinkedIn</div>
+<div class="related-posts">You may also like: Story A, Story B</div>
+<div class="ad-container">Sponsored content here</div>
+<footer>Copyright © 2026 FinanceDaily. All rights reserved. Terms of service. Privacy policy.</footer>
+</body></html>"""
+
+REPEATED_BLOCKS_HTML = """<html><body>
+<article>
+    <p>Apple announced a new partnership with Samsung to develop next-generation
+    display technology for future iPhone models. The collaboration is expected to
+    yield OLED panels with improved brightness and energy efficiency.</p>
+    <p>This is a developing story. Check back for updates as more information becomes available.</p>
+    <p>Industry analysts view the partnership as a strategic move to secure supply
+    chain advantages ahead of the 2027 product cycle. Display costs represent a
+    significant portion of iPhone bill of materials.</p>
+    <p>This is a developing story. Check back for updates as more information becomes available.</p>
+</article>
+</body></html>"""
+
+
+class TestTextDensityScoring:
+    """Tests for text-density-based block scoring heuristics."""
+
+    def test_content_rich_div_has_high_density(self):
+        from bs4 import BeautifulSoup
+        html = "<div><p>This is a substantial paragraph with real content about markets.</p></div>"
+        soup = BeautifulSoup(html, "html.parser")
+        tag = soup.find("div")
+        assert _text_density(tag) > _MIN_TEXT_DENSITY
+
+    def test_link_heavy_div_has_high_link_density(self):
+        from bs4 import BeautifulSoup
+        html = '<div><a href="/a">Link one</a> <a href="/b">Link two</a> <a href="/c">Link three</a></div>'
+        soup = BeautifulSoup(html, "html.parser")
+        tag = soup.find("div")
+        assert _link_density(tag) > 0.8
+
+    def test_article_div_has_low_link_density(self):
+        from bs4 import BeautifulSoup
+        html = "<div><p>A long paragraph of article text that discusses important financial results and market movements in detail.</p></div>"
+        soup = BeautifulSoup(html, "html.parser")
+        tag = soup.find("div")
+        assert _link_density(tag) < 0.1
+
+    def test_block_score_prefers_content_over_nav(self):
+        from bs4 import BeautifulSoup
+        content_html = "<div>" + "<p>Substantial article paragraph with real content about markets and earnings.</p>" * 3 + "</div>"
+        nav_html = '<div><a href="/a">Link</a><a href="/b">Link</a><a href="/c">Link</a><a href="/d">Link</a></div>'
+        soup_c = BeautifulSoup(content_html, "html.parser")
+        soup_n = BeautifulSoup(nav_html, "html.parser")
+        assert _block_score(soup_c.find("div")) > _block_score(soup_n.find("div"))
+
+
+class TestBoilerplateReduction:
+    """Tests for enhanced boilerplate reduction pipeline."""
+
+    def test_strips_cookie_banner(self):
+        text = extract_body_text(HEAVY_BOILERPLATE_HTML)
+        assert "cookie" not in text.lower()
+
+    def test_strips_signup_form(self):
+        text = extract_body_text(HEAVY_BOILERPLATE_HTML)
+        assert "Sign up for free" not in text
+
+    def test_strips_social_share(self):
+        text = extract_body_text(HEAVY_BOILERPLATE_HTML)
+        assert "Share this article" not in text
+
+    def test_strips_ad_container(self):
+        text = extract_body_text(HEAVY_BOILERPLATE_HTML)
+        assert "Sponsored content" not in text
+
+    def test_strips_related_posts(self):
+        text = extract_body_text(HEAVY_BOILERPLATE_HTML)
+        assert "You may also like" not in text
+
+    def test_preserves_article_content(self):
+        text = extract_body_text(HEAVY_BOILERPLATE_HTML)
+        assert "Tesla reported record deliveries" in text
+        assert "Revenue for the quarter" in text
+
+    def test_strips_copyright_footer(self):
+        text = extract_body_text(HEAVY_BOILERPLATE_HTML)
+        assert "Copyright ©" not in text
+
+
+class TestBodyExtractionFallback:
+    """Tests for text-density fallback when no semantic selector matches."""
+
+    def test_finds_content_without_article_tag(self):
+        text = extract_body_text(NO_SEMANTIC_HTML)
+        assert "Federal Reserve announced" in text
+        assert "25 basis point rate cut" in text
+
+    def test_prefers_content_over_nav_links(self):
+        text = extract_body_text(NO_SEMANTIC_HTML)
+        # The nav-like link list should not dominate the output
+        assert "Story 1" not in text or "Federal Reserve" in text
+
+
+class TestRepeatedBlockDetection:
+    """Tests for repeated/template text detection."""
+
+    def test_collapses_repeated_template_text(self):
+        text = extract_body_text(REPEATED_BLOCKS_HTML)
+        count = text.count("This is a developing story")
+        assert count <= 1
+
+    def test_preserves_unique_content(self):
+        text = extract_body_text(REPEATED_BLOCKS_HTML)
+        assert "Apple announced a new partnership" in text
+        assert "Industry analysts view" in text
+
+
+class TestOrphanLineRemoval:
+    """Tests for short orphan line removal."""
+
+    def test_removes_short_fragments(self):
+        text = _remove_short_orphan_lines("OK\nThis is a real sentence about markets.\nHi")
+        assert "OK" not in text
+        assert "Hi" not in text
+        assert "real sentence" in text
+
+    def test_keeps_short_lines_with_punctuation(self):
+        text = _remove_short_orphan_lines("Breaking news.\nDetails follow in the article.")
+        assert "Breaking news." in text
+
+
+class TestCollapseWhitespace:
+    """Tests for whitespace collapsing."""
+
+    def test_collapses_multiple_blank_lines(self):
+        text = _collapse_whitespace("Line one.\n\n\n\nLine two.")
+        assert "\n\n\n" not in text
+        assert "Line one." in text
+        assert "Line two." in text
+
+    def test_strips_leading_trailing(self):
+        text = _collapse_whitespace("\n\n  Hello world.  \n\n")
+        assert text == "Hello world."
+
+
+# Import the constant for use in density tests
+from services.parser.html_parser import _MIN_TEXT_DENSITY
@@ -0,0 +1,161 @@
+"""Tests for Iceberg table creation and metadata management."""
+from datetime import date
+
+import pyarrow as pa
+
+from services.lake_publisher.iceberg import (
+    ICEBERG_CATALOG,
+    ICEBERG_SCHEMA,
+    TABLE_SCHEMAS,
+    IcebergManager,
+    IcebergTableDef,
+    _arrow_type_to_trino,
+    get_all_table_defs,
+    get_table_def,
+)
+from services.lake_publisher.partitions import TABLE_PARTITIONS, PartitionSpec
+
+
+# ---------------------------------------------------------------------------
+# _arrow_type_to_trino
+# ---------------------------------------------------------------------------
+
+
+def test_arrow_to_trino_string():
+    assert _arrow_type_to_trino(pa.string()) == "VARCHAR"
+
+
+def test_arrow_to_trino_float64():
+    assert _arrow_type_to_trino(pa.float64()) == "DOUBLE"
+
+
+def test_arrow_to_trino_int64():
+    assert _arrow_type_to_trino(pa.int64()) == "BIGINT"
+
+
+def test_arrow_to_trino_int32():
+    assert _arrow_type_to_trino(pa.int32()) == "INTEGER"
+
+
+def test_arrow_to_trino_bool():
+    assert _arrow_type_to_trino(pa.bool_()) == "BOOLEAN"
+
+
+def test_arrow_to_trino_date32():
+    assert _arrow_type_to_trino(pa.date32()) == "DATE"
+
+
+def test_arrow_to_trino_timestamp_utc():
+    assert _arrow_type_to_trino(pa.timestamp("us", tz="UTC")) == "TIMESTAMP(6) WITH TIME ZONE"
+
+
+def test_arrow_to_trino_timestamp_no_tz():
+    assert _arrow_type_to_trino(pa.timestamp("us")) == "TIMESTAMP(6)"
+
+
+# ---------------------------------------------------------------------------
+# TABLE_SCHEMAS registry
+# ---------------------------------------------------------------------------
+
+
+def test_table_schemas_covers_all_partitions():
+    """Every table in TABLE_PARTITIONS should have a corresponding PyArrow schema."""
+    for table_name in TABLE_PARTITIONS:
+        assert table_name in TABLE_SCHEMAS, f"Missing schema for {table_name}"
+
+
+# ---------------------------------------------------------------------------
+# IcebergTableDef
+# ---------------------------------------------------------------------------
+
+
+def test_table_def_qualified_name():
+    td = get_table_def("trade_signals")
+    assert td.qualified_name == f"{ICEBERG_CATALOG}.{ICEBERG_SCHEMA}.trade_signals"
+
+
+def test_table_def_location():
+    td = get_table_def("trade_signals")
+    assert td.location == "s3a://stonks-lakehouse/warehouse/trade_signals/"
+
+
+def test_table_def_column_defs():
+    td = get_table_def("trade_signals")
+    cols = td.column_defs_sql()
+    col_names = [c.strip().split()[0] for c in cols]
+    assert "signal_id" in col_names
+    assert "ticker" in col_names
+    assert "dt" in col_names
+
+
+def test_table_def_partition_keys_dt_only():
+    td = get_table_def("trade_signals")
+    part = td.partition_keys_sql()
+    assert "'dt'" in part
+    assert "model_version" not in part
+
+
+def test_table_def_partition_keys_with_extra():
+    td = get_table_def("document_extractions")
+    part = td.partition_keys_sql()
+    assert "'dt'" in part
+    assert "'model_version'" in part
+
+
+def test_create_table_sql_structure():
+    td = get_table_def("market_bars")
+    sql = td.create_table_sql()
+    assert "CREATE TABLE IF NOT EXISTS" in sql
+    assert f"{ICEBERG_CATALOG}.{ICEBERG_SCHEMA}.market_bars" in sql
+    assert "format = 'PARQUET'" in sql
+    assert "s3a://stonks-lakehouse/warehouse/market_bars/" in sql
+    assert "partitioning" in sql
+
+
+def test_create_table_sql_columns_match_schema():
+    td = get_table_def("market_bars")
+    sql = td.create_table_sql()
+    # All columns from the PyArrow schema should appear
+    for i in range(len(td.schema)):
+        col_name = td.schema.field(i).name
+        assert col_name in sql, f"Column {col_name} missing from DDL"
+
+
+# ---------------------------------------------------------------------------
+# get_all_table_defs / get_table_def
+# ---------------------------------------------------------------------------
+
+
+def test_get_all_table_defs_count():
+    defs = get_all_table_defs()
+    assert len(defs) == len(TABLE_PARTITIONS)
+
+
+def test_get_table_def_unknown_raises():
+    try:
+        get_table_def("nonexistent_table")
+        assert False, "Should have raised ValueError"
+    except ValueError:
+        pass
+
+
+def test_get_all_table_defs_all_generate_valid_sql():
+    """Every table def should produce syntactically reasonable DDL."""
+    for td in get_all_table_defs():
+        sql = td.create_table_sql()
+        assert "CREATE TABLE IF NOT EXISTS" in sql
+        assert td.table_name in sql
+        assert "PARQUET" in sql
+
+
+# ---------------------------------------------------------------------------
+# IcebergManager (unit tests with no Trino connection)
+# ---------------------------------------------------------------------------
+
+
+def test_iceberg_manager_defaults():
+    mgr = IcebergManager()
+    assert mgr.host == "localhost"
+    assert mgr.port == 8080
+    assert mgr.catalog == ICEBERG_CATALOG
+    assert mgr.schema == ICEBERG_SCHEMA
@@ -0,0 +1,648 @@
+"""Integration tests for the full ingest-to-recommendation flow.
+
+Exercises the pipeline end-to-end through all stages:
+  Ingestion → Parsing → Extraction → Aggregation → Recommendation
+
+Each stage uses the real logic functions from the service modules.
+External infrastructure (PostgreSQL, MinIO, Redis, Ollama) is replaced
+with lightweight fakes that preserve the data contracts between stages.
+
+Requirements: 3.1-3.4, 4.1-4.3, 5.1-5.5, 6.1-6.5, 7.1-7.4
+"""
+from __future__ import annotations
+
+import json
+import uuid
+from datetime import datetime, timedelta, timezone
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from services.aggregation.worker import (
+    ImpactRow,
+    assemble_trend_with_evidence,
+    build_weighted_signals,
+)
+from services.extractor.client import ExtractionAttempt, ExtractionResponse
+from services.extractor.schemas import ExtractionResult, ValidationReport, validate_extraction
+from services.extractor.worker import persist_extraction
+from services.parser.html_parser import ParsedDocument, detect_company_mentions, parse_html
+from services.parser.worker import build_parser_output_json
+from services.recommendation.eligibility import EligibilityConfig, evaluate_eligibility
+from services.recommendation.suppression import (
+    DataQualityContext,
+    SuppressionConfig,
+    evaluate_suppression,
+)
+from services.recommendation.worker import (
+    build_recommendation,
+    build_thesis,
+    classify_risk,
+)
+from services.shared.schemas import (
+    ActionType,
+    RecommendationMode,
+    TrendDirection,
+    TrendWindow,
+)
+
+NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+# ---------------------------------------------------------------------------
+# Shared test fixtures
+# ---------------------------------------------------------------------------
+
+SAMPLE_HTML = """
+<html>
+<head><title>Apple Reports Record Q2 Earnings</title>
+<meta name="author" content="Jane Doe">
+<meta property="article:published_time" content="2026-04-10T08:00:00Z">
+</head>
+<body>
+<nav>Site Navigation</nav>
+<article>
+<h1>Apple Reports Record Q2 Earnings</h1>
+<p>Apple Inc. (AAPL) reported record quarterly revenue of $120 billion,
+beating analyst expectations by 8%. CEO Tim Cook cited strong iPhone and
+services growth as key drivers.</p>
+<p>The company also announced a $100 billion share buyback program,
+signaling confidence in future cash flows. Analysts at Goldman Sachs
+raised their price target to $250.</p>
+<p>However, regulatory scrutiny in the EU remains a risk factor,
+with potential fines related to the Digital Markets Act.</p>
+</article>
+<footer>Copyright 2026</footer>
+</body>
+</html>
+"""
+
+SAMPLE_EXTRACTION_JSON = {
+    "summary": "Apple reported record Q2 revenue of $120B, beating expectations by 8%. "
+    "Announced $100B buyback. EU regulatory risk remains.",
+    "companies": [
+        {
+            "ticker": "AAPL",
+            "company_name": "Apple Inc.",
+            "relevance": 0.95,
+            "sentiment": "positive",
+            "impact_score": 0.75,
+            "impact_horizon": "1d_30d",
+            "catalyst_type": "earnings",
+            "key_facts": [
+                "Record quarterly revenue of $120 billion",
+                "$100 billion share buyback announced",
+                "Goldman Sachs raised price target to $250",
+            ],
+            "risks": ["EU regulatory scrutiny under Digital Markets Act"],
+            "evidence_spans": [
+                "Apple Inc. (AAPL) reported record quarterly revenue of $120 billion",
+                "beating analyst expectations by 8%",
+                "announced a $100 billion share buyback program",
+            ],
+        }
+    ],
+    "macro_themes": ["consumer_tech", "buybacks"],
+    "novelty_score": 0.7,
+    "confidence": 0.88,
+    "extraction_warnings": [],
+}
+
+COMPANY_ALIASES = [
+    {"company_id": "comp-1", "alias": "AAPL", "alias_type": "ticker", "ticker": "AAPL"},
+    {"company_id": "comp-1", "alias": "Apple Inc.", "alias_type": "legal_name", "ticker": "AAPL"},
+]
+
+
+# ---------------------------------------------------------------------------
+# Stage 1: Parsing
+# ---------------------------------------------------------------------------
+
+
+class TestParsingStage:
+    """Verify the HTML parsing pipeline produces structured output."""
+
+    def test_parse_html_extracts_body_text(self):
+        parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-earnings")
+        assert parsed.body_text is not None
+        assert "record quarterly revenue" in parsed.body_text.lower()
+        # Boilerplate should be stripped
+        assert "Site Navigation" not in parsed.body_text
+        assert "Copyright" not in parsed.body_text
+
+    def test_parse_html_extracts_metadata(self):
+        parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-earnings")
+        assert parsed.title == "Apple Reports Record Q2 Earnings"
+        assert parsed.quality_score > 0.0
+        assert parsed.confidence != "low"
+
+    def test_detect_company_mentions_finds_aapl(self):
+        parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-earnings")
+        mentions = detect_company_mentions(parsed.body_text, COMPANY_ALIASES)
+        tickers_found = {m["ticker"] for m in mentions}
+        assert "AAPL" in tickers_found
+
+    def test_parser_output_json_structure(self):
+        parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-earnings")
+        mentions = detect_company_mentions(parsed.body_text, COMPANY_ALIASES)
+        output = build_parser_output_json(parsed, mentions)
+        assert "quality_score" in output
+        assert "mentioned_companies" in output
+        assert isinstance(output["mentioned_companies"], list)
+        assert output["title"] == "Apple Reports Record Q2 Earnings"
+
+
+# ---------------------------------------------------------------------------
+# Stage 2: Extraction validation
+# ---------------------------------------------------------------------------
+
+
+class TestExtractionStage:
+    """Verify extraction schema validation and result construction."""
+
+    def test_validate_extraction_accepts_valid_json(self):
+        report = validate_extraction(SAMPLE_EXTRACTION_JSON)
+        assert report.valid
+        assert report.parsed is not None
+        assert report.parsed.companies[0].ticker == "AAPL"
+
+    def test_validate_extraction_rejects_invalid_json(self):
+        report = validate_extraction("not json at all")
+        assert not report.valid
+        assert len(report.errors) > 0
+
+    def test_validate_extraction_rejects_bad_schema(self):
+        bad = {"summary": "test"}  # missing required fields
+        report = validate_extraction(bad)
+        assert not report.valid
+
+    def test_extraction_result_matches_intelligence_schema(self):
+        result = ExtractionResult.model_validate(SAMPLE_EXTRACTION_JSON)
+        assert result.confidence == 0.88
+        assert len(result.companies) == 1
+        assert result.companies[0].catalyst_type.value == "earnings"
+        assert result.novelty_score == 0.7
+
+    def test_validate_extraction_with_document_text_checks_evidence(self):
+        """Evidence grounding check should warn if spans not found."""
+        report = validate_extraction(
+            SAMPLE_EXTRACTION_JSON,
+            document_text="Completely unrelated text about weather.",
+        )
+        # Should still be valid (evidence grounding is a warning, not error)
+        assert report.valid
+        assert any("evidence_span_not_found" in w for w in report.warnings)
+
+
+# ---------------------------------------------------------------------------
+# Stage 3: Extraction persistence (mocked infra)
+# ---------------------------------------------------------------------------
+
+
+class TestExtractionPersistence:
+    """Verify extraction artifacts are persisted correctly."""
+
+    @pytest.mark.asyncio
+    async def test_persist_successful_extraction_creates_all_artifacts(self):
+        result_obj = ExtractionResult.model_validate(SAMPLE_EXTRACTION_JSON)
+        validation = ValidationReport(valid=True, errors=[], warnings=[], parsed=result_obj)
+        attempt = ExtractionAttempt(
+            raw_output=json.dumps(SAMPLE_EXTRACTION_JSON),
+            validation=validation,
+            error=None,
+            duration_ms=450,
+            model="test-model",
+        )
+        response = ExtractionResponse(
+            success=True,
+            result=result_obj,
+            attempts=[attempt],
+            prompt_metadata={"prompt_version": "document-intel-v2", "schema_version": "2.0.0"},
+            model="test-model",
+            total_duration_ms=450,
+        )
+
+        pool = AsyncMock()
+        pool.fetchval = AsyncMock(side_effect=["intel-1", "impact-1", "metrics-1"])
+        pool.execute = AsyncMock()
+        minio = MagicMock()
+
+        persist_result = await persist_extraction(
+            pool=pool,
+            minio_client=minio,
+            document_id=str(uuid.uuid4()),
+            ticker="AAPL",
+            extraction_response=response,
+            company_id_map={"AAPL": "comp-1"},
+            source_credibility=0.8,
+            timestamp=NOW,
+        )
+
+        assert persist_result.success
+        assert persist_result.intelligence_id == "intel-1"
+        assert persist_result.impact_ids == ["impact-1"]
+        # 4 MinIO uploads: prompt, raw_output, validation, intelligence
+        assert minio.put_object.call_count == 4
+
+
+# ---------------------------------------------------------------------------
+# Stage 4: Aggregation
+# ---------------------------------------------------------------------------
+
+
+class TestAggregationStage:
+    """Verify trend summary assembly from document impact records."""
+
+    def _make_impacts_from_extraction(self) -> list[ImpactRow]:
+        """Build ImpactRows that mirror what the extraction stage would produce."""
+        return [
+            ImpactRow(
+                document_id="doc-1",
+                confidence=0.88,
+                novelty_score=0.7,
+                source_credibility=0.8,
+                sentiment="positive",
+                impact_score=0.75,
+                catalyst_type="earnings",
+                key_facts=["Record revenue $120B", "$100B buyback"],
+                risks=["EU regulatory scrutiny"],
+                published_at=NOW - timedelta(hours=2),
+            ),
+            ImpactRow(
+                document_id="doc-2",
+                confidence=0.72,
+                novelty_score=0.5,
+                source_credibility=0.7,
+                sentiment="positive",
+                impact_score=0.6,
+                catalyst_type="rating_change",
+                key_facts=["Goldman raised target to $250"],
+                risks=[],
+                published_at=NOW - timedelta(hours=4),
+            ),
+            ImpactRow(
+                document_id="doc-3",
+                confidence=0.65,
+                novelty_score=0.4,
+                source_credibility=0.6,
+                sentiment="negative",
+                impact_score=0.4,
+                catalyst_type="legal",
+                key_facts=["EU DMA investigation"],
+                risks=["Potential fines"],
+                published_at=NOW - timedelta(hours=6),
+            ),
+        ]
+
+    def test_aggregation_produces_bullish_trend(self):
+        impacts = self._make_impacts_from_extraction()
+        signals = build_weighted_signals(impacts, NOW, "7d")
+        assembled = assemble_trend_with_evidence(
+            "AAPL", "7d", signals, impacts, reference_time=NOW,
+        )
+        summary = assembled.summary
+
+        assert summary.entity_id == "AAPL"
+        assert summary.window == TrendWindow.SEVEN_DAY
+        # Two positive, one negative → should be bullish
+        assert summary.trend_direction == TrendDirection.BULLISH
+        assert summary.trend_strength > 0
+        assert summary.confidence > 0
+        assert len(summary.top_supporting_evidence) >= 1
+        assert len(summary.top_opposing_evidence) >= 1
+        assert summary.contradiction_score > 0  # has opposing signal
+
+    def test_aggregation_evidence_rankings_are_populated(self):
+        impacts = self._make_impacts_from_extraction()
+        signals = build_weighted_signals(impacts, NOW, "7d")
+        assembled = assemble_trend_with_evidence(
+            "AAPL", "7d", signals, impacts, reference_time=NOW,
+        )
+
+        # Supporting evidence should include the positive docs
+        supporting_ids = {e.document_id for e in assembled.supporting_evidence}
+        assert "doc-1" in supporting_ids
+        assert "doc-2" in supporting_ids
+
+        # Opposing evidence should include the negative doc
+        opposing_ids = {e.document_id for e in assembled.opposing_evidence}
+        assert "doc-3" in opposing_ids
+
+    def test_aggregation_extracts_catalysts_and_risks(self):
+        impacts = self._make_impacts_from_extraction()
+        signals = build_weighted_signals(impacts, NOW, "7d")
+        assembled = assemble_trend_with_evidence(
+            "AAPL", "7d", signals, impacts, reference_time=NOW,
+        )
+        summary = assembled.summary
+
+        assert len(summary.dominant_catalysts) > 0
+        assert "earnings" in summary.dominant_catalysts
+        assert len(summary.material_risks) > 0
+
+
+# ---------------------------------------------------------------------------
+# Stage 5: Recommendation
+# ---------------------------------------------------------------------------
+
+
+class TestRecommendationStage:
+    """Verify recommendation generation from trend summaries."""
+
+    def _make_trend_from_aggregation(self):
+        """Build a TrendSummary that mirrors aggregation output."""
+        impacts = [
+            ImpactRow(
+                document_id="doc-1", confidence=0.88, novelty_score=0.7,
+                source_credibility=0.8, sentiment="positive", impact_score=0.75,
+                catalyst_type="earnings", key_facts=["Record revenue"],
+                risks=["EU regulatory"], published_at=NOW - timedelta(hours=2),
+            ),
+            ImpactRow(
+                document_id="doc-2", confidence=0.72, novelty_score=0.5,
+                source_credibility=0.7, sentiment="positive", impact_score=0.6,
+                catalyst_type="rating_change", key_facts=["Target raised"],
+                risks=[], published_at=NOW - timedelta(hours=4),
+            ),
+            ImpactRow(
+                document_id="doc-3", confidence=0.65, novelty_score=0.4,
+                source_credibility=0.6, sentiment="negative", impact_score=0.4,
+                catalyst_type="legal", key_facts=["DMA investigation"],
+                risks=["Potential fines"], published_at=NOW - timedelta(hours=6),
+            ),
+        ]
+        signals = build_weighted_signals(impacts, NOW, "7d")
+        assembled = assemble_trend_with_evidence(
+            "AAPL", "7d", signals, impacts, reference_time=NOW,
+        )
+        return assembled.summary
+
+    def test_eligibility_produces_buy_for_bullish_trend(self):
+        summary = self._make_trend_from_aggregation()
+        result = evaluate_eligibility(summary)
+        assert result.action == ActionType.BUY
+        assert result.eligible
+
+    def test_recommendation_has_thesis_and_evidence(self):
+        summary = self._make_trend_from_aggregation()
+        result = evaluate_eligibility(summary)
+        rec = build_recommendation(summary, result, reference_time=NOW)
+
+        assert rec.ticker == "AAPL"
+        assert rec.action == ActionType.BUY
+        assert len(rec.thesis) > 0
+        assert "[risk:" in rec.thesis
+        assert len(rec.evidence_refs) > 0
+        assert rec.time_horizon == "swing_1d_10d"
+
+    def test_recommendation_position_sizing_is_bounded(self):
+        summary = self._make_trend_from_aggregation()
+        result = evaluate_eligibility(summary)
+        rec = build_recommendation(summary, result, reference_time=NOW)
+
+        assert 0 < rec.position_sizing.portfolio_pct <= 0.05
+        assert 0 < rec.position_sizing.max_loss_pct <= 0.01
+
+    def test_recommendation_mode_reflects_confidence(self):
+        summary = self._make_trend_from_aggregation()
+        result = evaluate_eligibility(summary)
+        rec = build_recommendation(summary, result, reference_time=NOW)
+
+        # With 3 impact records the aggregated confidence is moderate (~0.41),
+        # which is below the paper_confidence_threshold (0.50). The eligibility
+        # engine correctly assigns INFORMATIONAL mode for BUY actions with
+        # sub-threshold confidence. This validates Requirement 7.4.
+        if summary.confidence >= 0.50:
+            assert rec.mode in (
+                RecommendationMode.PAPER_ELIGIBLE,
+                RecommendationMode.LIVE_ELIGIBLE,
+            )
+        else:
+            assert rec.mode == RecommendationMode.INFORMATIONAL
+
+    def test_suppression_blocks_low_quality_data(self):
+        summary = self._make_trend_from_aggregation()
+        low_quality_ctx = DataQualityContext(
+            total_documents=5,
+            valid_documents=1,
+            failed_documents=4,
+            avg_extraction_confidence=0.2,
+            newest_evidence_at=NOW - timedelta(days=14),
+            source_types=set(),
+        )
+        suppression = evaluate_suppression(
+            summary, quality_ctx=low_quality_ctx, reference_time=NOW,
+        )
+        assert suppression.suppressed
+        assert len(suppression.reasons) > 0
+
+
+# ---------------------------------------------------------------------------
+# Full pipeline integration
+# ---------------------------------------------------------------------------
+
+
+class TestFullPipelineIntegration:
+    """End-to-end test wiring all stages together with real logic."""
+
+    def test_html_to_recommendation_pipeline(self):
+        """Walk a document through parse → validate extraction → aggregate → recommend."""
+
+        # --- Stage 1: Parse HTML ---
+        parsed = parse_html(SAMPLE_HTML, "https://example.com/apple-q2")
+        assert parsed.body_text
+        assert parsed.confidence != "low"
+
+        mentions = detect_company_mentions(parsed.body_text, COMPANY_ALIASES)
+        assert any(m["ticker"] == "AAPL" for m in mentions)
+
+        # --- Stage 2: Validate extraction output ---
+        report = validate_extraction(
+            SAMPLE_EXTRACTION_JSON,
+            document_text=parsed.body_text,
+        )
+        assert report.valid
+        extraction = report.parsed
+        assert extraction is not None
+        assert extraction.companies[0].ticker == "AAPL"
+
+        # --- Stage 3: Build impact records from extraction ---
+        company = extraction.companies[0]
+        impact = ImpactRow(
+            document_id="doc-pipeline-1",
+            confidence=extraction.confidence,
+            novelty_score=extraction.novelty_score,
+            source_credibility=0.8,
+            sentiment=company.sentiment.value,
+            impact_score=company.impact_score,
+            catalyst_type=company.catalyst_type.value,
+            key_facts=company.key_facts,
+            risks=company.risks,
+            published_at=NOW - timedelta(hours=1),
+        )
+
+        # Add a second supporting document for richer aggregation
+        impact2 = ImpactRow(
+            document_id="doc-pipeline-2",
+            confidence=0.75,
+            novelty_score=0.5,
+            source_credibility=0.7,
+            sentiment="positive",
+            impact_score=0.6,
+            catalyst_type="rating_change",
+            key_facts=["Analyst upgrade"],
+            risks=[],
+            published_at=NOW - timedelta(hours=3),
+        )
+
+        impacts = [impact, impact2]
+
+        # --- Stage 4: Aggregate into trend summary ---
+        signals = build_weighted_signals(impacts, NOW, "7d")
+        assembled = assemble_trend_with_evidence(
+            "AAPL", "7d", signals, impacts, reference_time=NOW,
+        )
+        summary = assembled.summary
+
+        assert summary.trend_direction == TrendDirection.BULLISH
+        assert summary.confidence > 0
+        assert len(summary.top_supporting_evidence) > 0
+
+        # --- Stage 5: Generate recommendation ---
+        eligibility = evaluate_eligibility(summary)
+        assert eligibility.action == ActionType.BUY
+        assert eligibility.eligible
+
+        rec = build_recommendation(summary, eligibility, reference_time=NOW)
+
+        # Final assertions: the recommendation is coherent end-to-end
+        assert rec.ticker == "AAPL"
+        assert rec.action == ActionType.BUY
+        assert rec.confidence == summary.confidence
+        assert len(rec.evidence_refs) > 0
+        assert rec.thesis.startswith("[risk:")
+        assert "AAPL" in rec.thesis
+        assert "bullish" in rec.thesis
+        assert rec.time_horizon == "swing_1d_10d"
+        assert 0 < rec.position_sizing.portfolio_pct <= 0.05
+
+    def test_low_quality_document_is_blocked(self):
+        """A low-quality parse should not produce a trade-eligible recommendation."""
+
+        # Minimal HTML that produces a low-quality parse
+        bad_html = "<html><body><p>Ad. Subscribe now.</p></body></html>"
+        parsed = parse_html(bad_html, "https://example.com/junk")
+
+        # Low quality parse → should not advance to extraction
+        # The parser worker checks confidence != "low" before enqueuing
+        if parsed.confidence == "low" or parsed.quality_score < 0.3:
+            # This is the expected path: document blocked at parse stage
+            return
+
+        # If somehow it passes parsing, suppression should catch it
+        # Build a minimal trend with low data quality
+        from services.shared.schemas import TrendSummary
+        summary = TrendSummary(
+            entity_type="company",
+            entity_id="JUNK",
+            window=TrendWindow.SEVEN_DAY,
+            trend_direction=TrendDirection.BULLISH,
+            trend_strength=0.3,
+            confidence=0.3,
+            top_supporting_evidence=["doc-1"],
+            generated_at=NOW,
+        )
+        suppression = evaluate_suppression(summary, reference_time=NOW)
+        # With only 1 evidence doc and low confidence, should be suppressed
+        assert suppression.suppressed
+
+    def test_bearish_signal_produces_sell_recommendation(self):
+        """Negative sentiment documents should produce a SELL recommendation."""
+
+        impacts = [
+            ImpactRow(
+                document_id="doc-bear-1",
+                confidence=0.82,
+                novelty_score=0.6,
+                source_credibility=0.8,
+                sentiment="negative",
+                impact_score=0.7,
+                catalyst_type="legal",
+                key_facts=["Major lawsuit filed"],
+                risks=["Potential $5B fine"],
+                published_at=NOW - timedelta(hours=1),
+            ),
+            ImpactRow(
+                document_id="doc-bear-2",
+                confidence=0.78,
+                novelty_score=0.5,
+                source_credibility=0.75,
+                sentiment="negative",
+                impact_score=0.65,
+                catalyst_type="earnings",
+                key_facts=["Revenue miss by 15%"],
+                risks=["Guidance lowered"],
+                published_at=NOW - timedelta(hours=3),
+            ),
+        ]
+
+        signals = build_weighted_signals(impacts, NOW, "7d")
+        assembled = assemble_trend_with_evidence(
+            "TSLA", "7d", signals, impacts, reference_time=NOW,
+        )
+        summary = assembled.summary
+
+        assert summary.trend_direction == TrendDirection.BEARISH
+
+        eligibility = evaluate_eligibility(summary)
+        assert eligibility.action == ActionType.SELL
+
+        rec = build_recommendation(summary, eligibility, reference_time=NOW)
+        assert rec.ticker == "TSLA"
+        assert rec.action == ActionType.SELL
+        assert "SELL" in rec.thesis
+
+    def test_contradictory_signals_produce_mixed_or_watch(self):
+        """Equal opposing signals should result in WATCH or MIXED direction."""
+
+        impacts = [
+            ImpactRow(
+                document_id="doc-pos",
+                confidence=0.8,
+                novelty_score=0.5,
+                source_credibility=0.8,
+                sentiment="positive",
+                impact_score=0.6,
+                catalyst_type="earnings",
+                key_facts=["Beat expectations"],
+                risks=[],
+                published_at=NOW - timedelta(hours=1),
+            ),
+            ImpactRow(
+                document_id="doc-neg",
+                confidence=0.8,
+                novelty_score=0.5,
+                source_credibility=0.8,
+                sentiment="negative",
+                impact_score=0.6,
+                catalyst_type="legal",
+                key_facts=["Lawsuit filed"],
+                risks=["Regulatory risk"],
+                published_at=NOW - timedelta(hours=1),
+            ),
+        ]
+
+        signals = build_weighted_signals(impacts, NOW, "7d")
+        assembled = assemble_trend_with_evidence(
+            "MSFT", "7d", signals, impacts, reference_time=NOW,
+        )
+        summary = assembled.summary
+
+        assert summary.trend_direction in (TrendDirection.MIXED, TrendDirection.NEUTRAL)
+        assert summary.contradiction_score > 0
+
+        eligibility = evaluate_eligibility(summary)
+        rec = build_recommendation(summary, eligibility, reference_time=NOW)
+
+        # Contradictory signals → WATCH or HOLD, mode should be informational
+        assert rec.action in (ActionType.WATCH, ActionType.HOLD)
+        assert rec.mode == RecommendationMode.INFORMATIONAL
@@ -0,0 +1,212 @@
+"""Tests for Kubernetes manifest security hardening.
+
+Validates that all deployments in infra/k8s/ follow security best practices:
+- Scoped secrets (no monolithic stonks-secrets)
+- Pod security contexts (runAsNonRoot, seccompProfile)
+- Container security contexts (no privilege escalation, drop ALL caps)
+- automountServiceAccountToken disabled
+- Broker secrets only on trading-tier pods
+"""
+from __future__ import annotations
+
+import glob
+from pathlib import Path
+
+import yaml
+
+
+K8S_DIR = Path("infra/k8s")
+
+# Services that legitimately need broker secrets
+BROKER_SECRET_ALLOWED = {"broker-adapter", "risk-engine"}
+
+# Services that legitimately need market-data secrets
+MARKET_SECRET_ALLOWED = {"ingestion-worker"}
+
+
+def _load_deployments() -> list[tuple[str, dict]]:
+    """Load all Deployment objects from infra/k8s/*.yaml."""
+    deployments = []
+    for path in sorted(K8S_DIR.glob("*.yaml")):
+        with open(path) as f:
+            for doc in yaml.safe_load_all(f):
+                if doc and doc.get("kind") == "Deployment":
+                    name = doc["metadata"]["name"]
+                    deployments.append((name, doc))
+    return deployments
+
+
+def _get_secret_refs(spec: dict) -> list[str]:
+    """Extract all secretRef names from a pod spec's envFrom."""
+    refs = []
+    for container in spec.get("containers", []):
+        for env_from in container.get("envFrom", []):
+            secret = env_from.get("secretRef", {})
+            if secret.get("name"):
+                refs.append(secret["name"])
+    return refs
+
+
+class TestSecretScoping:
+    """Verify that the monolithic stonks-secrets is no longer used."""
+
+    def test_no_monolithic_secret_ref(self):
+        """No deployment should reference the old stonks-secrets."""
+        for name, dep in _load_deployments():
+            pod_spec = dep["spec"]["template"]["spec"]
+            refs = _get_secret_refs(pod_spec)
+            assert "stonks-secrets" not in refs, (
+                f"Deployment {name} still references monolithic stonks-secrets"
+            )
+
+    def test_broker_secrets_only_on_trading_tier(self):
+        """Only broker-adapter and risk-engine should have broker secrets."""
+        for name, dep in _load_deployments():
+            pod_spec = dep["spec"]["template"]["spec"]
+            refs = _get_secret_refs(pod_spec)
+            if "stonks-broker-secrets" in refs:
+                assert name in BROKER_SECRET_ALLOWED, (
+                    f"Deployment {name} has broker secrets but is not in "
+                    f"allowed set {BROKER_SECRET_ALLOWED}"
+                )
+
+    def test_market_secrets_only_on_ingestion(self):
+        """Only ingestion-worker should have market-data secrets."""
+        for name, dep in _load_deployments():
+            pod_spec = dep["spec"]["template"]["spec"]
+            refs = _get_secret_refs(pod_spec)
+            if "stonks-market-secrets" in refs:
+                assert name in MARKET_SECRET_ALLOWED, (
+                    f"Deployment {name} has market secrets but is not in "
+                    f"allowed set {MARKET_SECRET_ALLOWED}"
+                )
+
+
+class TestPodSecurityContext:
+    """Verify pod-level security settings."""
+
+    def test_run_as_non_root(self):
+        for name, dep in _load_deployments():
+            pod_sec = dep["spec"]["template"]["spec"].get("securityContext", {})
+            assert pod_sec.get("runAsNonRoot") is True, (
+                f"Deployment {name} missing runAsNonRoot: true"
+            )
+
+    def test_seccomp_profile(self):
+        for name, dep in _load_deployments():
+            pod_sec = dep["spec"]["template"]["spec"].get("securityContext", {})
+            seccomp = pod_sec.get("seccompProfile", {})
+            assert seccomp.get("type") == "RuntimeDefault", (
+                f"Deployment {name} missing seccompProfile RuntimeDefault"
+            )
+
+    def test_automount_service_account_disabled(self):
+        for name, dep in _load_deployments():
+            pod_spec = dep["spec"]["template"]["spec"]
+            assert pod_spec.get("automountServiceAccountToken") is False, (
+                f"Deployment {name} should set automountServiceAccountToken: false"
+            )
+
+
+class TestContainerSecurityContext:
+    """Verify container-level security settings."""
+
+    def test_no_privilege_escalation(self):
+        for name, dep in _load_deployments():
+            for container in dep["spec"]["template"]["spec"]["containers"]:
+                sec = container.get("securityContext", {})
+                assert sec.get("allowPrivilegeEscalation") is False, (
+                    f"Deployment {name}, container {container['name']} "
+                    f"missing allowPrivilegeEscalation: false"
+                )
+
+    def test_drop_all_capabilities(self):
+        for name, dep in _load_deployments():
+            for container in dep["spec"]["template"]["spec"]["containers"]:
+                sec = container.get("securityContext", {})
+                caps = sec.get("capabilities", {})
+                assert "ALL" in caps.get("drop", []), (
+                    f"Deployment {name}, container {container['name']} "
+                    f"should drop ALL capabilities"
+                )
+
+
+class TestNetworkPolicies:
+    """Verify network policy manifests exist and cover key patterns."""
+
+    def _load_netpols(self) -> list[dict]:
+        policies = []
+        for path in K8S_DIR.glob("*.yaml"):
+            with open(path) as f:
+                for doc in yaml.safe_load_all(f):
+                    if doc and doc.get("kind") == "NetworkPolicy":
+                        policies.append(doc)
+        return policies
+
+    def test_default_deny_exists(self):
+        policies = self._load_netpols()
+        deny_policies = [
+            p for p in policies
+            if p["metadata"]["name"] == "default-deny-ingress"
+        ]
+        assert len(deny_policies) == 1, "Missing default-deny-ingress NetworkPolicy"
+
+    def test_broker_adapter_denied_ingress(self):
+        policies = self._load_netpols()
+        broker_policies = [
+            p for p in policies
+            if p["spec"].get("podSelector", {}).get("matchLabels", {}).get("app") == "broker-adapter"
+        ]
+        assert len(broker_policies) >= 1, "Missing NetworkPolicy for broker-adapter"
+        # Should have empty ingress (deny all inbound)
+        for p in broker_policies:
+            assert p["spec"].get("ingress") == [] or p["spec"].get("ingress") is None, (
+                "broker-adapter should deny all ingress"
+            )
+
+    def test_risk_engine_restricted_ingress(self):
+        policies = self._load_netpols()
+        risk_policies = [
+            p for p in policies
+            if p["spec"].get("podSelector", {}).get("matchLabels", {}).get("app") == "risk-engine"
+        ]
+        assert len(risk_policies) >= 1, "Missing NetworkPolicy for risk-engine"
+
+
+class TestSecretsManifest:
+    """Verify the secrets manifest uses scoped secrets."""
+
+    def _load_secrets(self) -> list[dict]:
+        secrets = []
+        path = K8S_DIR / "secrets.yaml"
+        with open(path) as f:
+            for doc in yaml.safe_load_all(f):
+                if doc and doc.get("kind") == "Secret":
+                    secrets.append(doc)
+        return secrets
+
+    def test_scoped_secrets_exist(self):
+        secrets = self._load_secrets()
+        names = {s["metadata"]["name"] for s in secrets}
+        assert "stonks-core-secrets" in names
+        assert "stonks-broker-secrets" in names
+        assert "stonks-market-secrets" in names
+        assert "stonks-dashboard-secrets" in names
+
+    def test_no_monolithic_secret(self):
+        secrets = self._load_secrets()
+        names = {s["metadata"]["name"] for s in secrets}
+        assert "stonks-secrets" not in names, (
+            "Monolithic stonks-secrets should be replaced by scoped secrets"
+        )
+
+    def test_no_plaintext_defaults(self):
+        """Secret values should be REPLACE_ME placeholders, not real defaults."""
+        secrets = self._load_secrets()
+        for secret in secrets:
+            for key, value in secret.get("stringData", {}).items():
+                if value:  # skip empty strings (e.g. REDIS_PASSWORD)
+                    assert value != "changeme", (
+                        f"Secret {secret['metadata']['name']}.{key} "
+                        f"still has 'changeme' default"
+                    )
@@ -0,0 +1,603 @@
+"""Validate lake publication and Trino query correctness over partitioned MinIO datasets.
+
+Ensures that:
+- PyArrow schemas in worker.py match the lakehouse DDL column definitions
+- Iceberg DDL generated from PyArrow schemas is consistent with lakehouse DDL
+- Partition layouts are Hive-compatible and discoverable by Trino
+- Published Parquet files embed partition columns in the data
+- Cross-table join keys used by views are present and type-consistent
+- All 12 analytical fact tables have aligned schema definitions across layers
+
+Requirements: 9.4, 9.5, 10.1, 10.3, N4, N6
+Design ref: Section 5.2, 5.3, 7, 8.4
+"""
+from __future__ import annotations
+
+import io
+import re
+from datetime import date, datetime, timezone
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import pyarrow as pa
+import pyarrow.parquet as pq
+
+from services.lake_publisher.iceberg import (
+    ICEBERG_CATALOG,
+    ICEBERG_SCHEMA,
+    TABLE_SCHEMAS,
+    IcebergTableDef,
+    _arrow_type_to_trino,
+    get_all_table_defs,
+    get_table_def,
+)
+from services.lake_publisher.partitions import (
+    LAKEHOUSE_BUCKET,
+    TABLE_PARTITIONS,
+    WAREHOUSE_PREFIX,
+    partition_path,
+    partition_values,
+)
+from services.lake_publisher.worker import (
+    COMPANY_EVENTS_SCHEMA,
+    DOCUMENTS_SCHEMA,
+    DOCUMENT_EXTRACTIONS_SCHEMA,
+    MARKET_BARS_SCHEMA,
+    MARKET_QUOTES_SCHEMA,
+    MODEL_PERFORMANCE_SCHEMA,
+    PNL_DAILY_SCHEMA,
+    POSITIONS_DAILY_SCHEMA,
+    PREDICTION_VS_OUTCOME_SCHEMA,
+    TRADE_FILLS_SCHEMA,
+    TRADE_ORDERS_SCHEMA,
+    TRADE_SIGNALS_SCHEMA,
+    publish_market_bar,
+    publish_document_fact,
+    publish_document_extraction,
+    publish_trade_signal,
+    publish_trade_order,
+    publish_trade_fill,
+    publish_position_daily,
+    publish_pnl_daily,
+    publish_company_event,
+    publish_market_quote,
+    publish_prediction_fact,
+    publish_model_performance,
+)
+from services.shared.schemas import (
+    ActionType,
+    ModelMetadata,
+    PositionSizing,
+    Recommendation,
+    RecommendationMode,
+)
+
+NOW = datetime(2026, 4, 11, 14, 30, 0, tzinfo=timezone.utc)
+LAKEHOUSE_DDL_DIR = Path("lakehouse/schemas")
+
+# All 12 expected analytical fact tables
+ALL_TABLES = [
+    "market_bars",
+    "market_quotes",
+    "company_events",
+    "documents",
+    "document_extractions",
+    "trade_signals",
+    "trade_orders",
+    "trade_fills",
+    "positions_daily",
+    "pnl_daily",
+    "prediction_vs_outcome",
+    "model_performance",
+]
+
+# Map table names to their PyArrow schemas for direct reference
+PYARROW_SCHEMAS: dict[str, pa.Schema] = {
+    "market_bars": MARKET_BARS_SCHEMA,
+    "market_quotes": MARKET_QUOTES_SCHEMA,
+    "company_events": COMPANY_EVENTS_SCHEMA,
+    "documents": DOCUMENTS_SCHEMA,
+    "document_extractions": DOCUMENT_EXTRACTIONS_SCHEMA,
+    "trade_signals": TRADE_SIGNALS_SCHEMA,
+    "trade_orders": TRADE_ORDERS_SCHEMA,
+    "trade_fills": TRADE_FILLS_SCHEMA,
+    "positions_daily": POSITIONS_DAILY_SCHEMA,
+    "pnl_daily": PNL_DAILY_SCHEMA,
+    "prediction_vs_outcome": PREDICTION_VS_OUTCOME_SCHEMA,
+    "model_performance": MODEL_PERFORMANCE_SCHEMA,
+}
+
+
+# ---------------------------------------------------------------------------
+# Helpers: parse lakehouse DDL SQL files
+# ---------------------------------------------------------------------------
+
+def _parse_ddl_columns(sql_path: Path) -> list[tuple[str, str]]:
+    """Parse column definitions from a lakehouse DDL SQL file.
+
+    Returns list of (column_name, trino_type) tuples in declaration order.
+    Includes partition columns from the partitioned_by clause appended at the end,
+    since Hive DDL separates them but PyArrow/Iceberg schemas include them inline.
+    """
+    text = sql_path.read_text()
+    # Extract the column block — match balanced parens for the CREATE TABLE body.
+    # The column block ends at the closing ) before WITH.
+    match = re.search(
+        r"CREATE TABLE[^(]+\((.*)\)\s*WITH",
+        text, re.DOTALL | re.IGNORECASE,
+    )
+    if not match:
+        return []
+    col_block = match.group(1)
+    columns = []
+    for line in col_block.strip().split("\n"):
+        line = line.strip().rstrip(",")
+        if not line or line.startswith("--"):
+            continue
+        # Split only on first whitespace to keep multi-word types intact
+        parts = line.split(None, 1)
+        if len(parts) >= 2:
+            col_name = parts[0].lower()
+            col_type = parts[1].upper().strip()
+            columns.append((col_name, col_type))
+    return columns
+
+
+def _parse_ddl_partitions(sql_path: Path) -> list[str]:
+    """Parse partition keys from a lakehouse DDL SQL file."""
+    text = sql_path.read_text()
+    match = re.search(r"partitioned_by\s*=\s*ARRAY\[([^\]]+)\]", text, re.IGNORECASE)
+    if not match:
+        return []
+    raw = match.group(1)
+    return [k.strip().strip("'\"") for k in raw.split(",")]
+
+
+# ---------------------------------------------------------------------------
+# 1. All 12 tables are registered across all layers
+# ---------------------------------------------------------------------------
+
+
+def test_all_tables_in_partition_registry():
+    """Every expected analytical table is registered in TABLE_PARTITIONS."""
+    for table in ALL_TABLES:
+        assert table in TABLE_PARTITIONS, f"{table} missing from TABLE_PARTITIONS"
+
+
+def test_all_tables_in_schema_registry():
+    """Every expected analytical table has a PyArrow schema in TABLE_SCHEMAS."""
+    for table in ALL_TABLES:
+        assert table in TABLE_SCHEMAS, f"{table} missing from TABLE_SCHEMAS"
+
+
+def test_all_tables_have_ddl_files():
+    """Every expected analytical table has a lakehouse DDL SQL file."""
+    for table in ALL_TABLES:
+        ddl_path = LAKEHOUSE_DDL_DIR / f"{table}.sql"
+        assert ddl_path.exists(), f"Missing DDL file: {ddl_path}"
+
+
+def test_all_tables_have_iceberg_defs():
+    """Every table in TABLE_PARTITIONS produces a valid IcebergTableDef."""
+    defs = get_all_table_defs()
+    def_names = {d.table_name for d in defs}
+    for table in ALL_TABLES:
+        assert table in def_names, f"{table} missing from Iceberg table defs"
+
+
+# ---------------------------------------------------------------------------
+# 2. PyArrow schema ↔ Lakehouse DDL column alignment
+# ---------------------------------------------------------------------------
+
+
+def test_pyarrow_columns_match_ddl():
+    """PyArrow schema column names and order match the lakehouse DDL for every table."""
+    for table in ALL_TABLES:
+        ddl_path = LAKEHOUSE_DDL_DIR / f"{table}.sql"
+        if not ddl_path.exists():
+            continue
+        ddl_cols = _parse_ddl_columns(ddl_path)
+        ddl_col_names = [c[0] for c in ddl_cols]
+
+        arrow_schema = PYARROW_SCHEMAS[table]
+        arrow_col_names = [arrow_schema.field(i).name for i in range(len(arrow_schema))]
+
+        assert arrow_col_names == ddl_col_names, (
+            f"Column mismatch for {table}:\n"
+            f"  PyArrow: {arrow_col_names}\n"
+            f"  DDL:     {ddl_col_names}"
+        )
+
+
+def test_pyarrow_types_compatible_with_ddl():
+    """PyArrow types map to Trino types that match the lakehouse DDL."""
+    for table in ALL_TABLES:
+        ddl_path = LAKEHOUSE_DDL_DIR / f"{table}.sql"
+        if not ddl_path.exists():
+            continue
+        ddl_cols = _parse_ddl_columns(ddl_path)
+        ddl_type_map = {name: typ for name, typ in ddl_cols}
+
+        arrow_schema = PYARROW_SCHEMAS[table]
+        for i in range(len(arrow_schema)):
+            col_name = arrow_schema.field(i).name
+            arrow_type = arrow_schema.field(i).type
+            trino_type = _arrow_type_to_trino(arrow_type)
+
+            ddl_type = ddl_type_map.get(col_name, "")
+            assert trino_type == ddl_type, (
+                f"Type mismatch for {table}.{col_name}: "
+                f"PyArrow→Trino={trino_type}, DDL={ddl_type}"
+            )
+
+
+# ---------------------------------------------------------------------------
+# 3. Partition key alignment across layers
+# ---------------------------------------------------------------------------
+
+
+def test_partition_keys_match_ddl():
+    """Partition keys in TABLE_PARTITIONS match the DDL partitioned_by clause."""
+    for table in ALL_TABLES:
+        ddl_path = LAKEHOUSE_DDL_DIR / f"{table}.sql"
+        if not ddl_path.exists():
+            continue
+        ddl_parts = _parse_ddl_partitions(ddl_path)
+        spec = TABLE_PARTITIONS[table]
+        arrow_parts = list(spec.all_keys)
+
+        assert arrow_parts == ddl_parts, (
+            f"Partition key mismatch for {table}: "
+            f"TABLE_PARTITIONS={arrow_parts}, DDL={ddl_parts}"
+        )
+
+
+def test_iceberg_partition_keys_match():
+    """Iceberg DDL partition keys match TABLE_PARTITIONS for every table."""
+    for td in get_all_table_defs():
+        spec = TABLE_PARTITIONS[td.table_name]
+        expected_keys = list(spec.all_keys)
+        # Parse from the generated SQL
+        sql = td.create_table_sql()
+        match = re.search(r"partitioning = ARRAY\[([^\]]+)\]", sql)
+        if expected_keys:
+            assert match is not None, f"No partitioning clause for {td.table_name}"
+            parsed = [k.strip().strip("'") for k in match.group(1).split(",")]
+            assert parsed == expected_keys, (
+                f"Iceberg partition mismatch for {td.table_name}: "
+                f"expected={expected_keys}, got={parsed}"
+            )
+
+
+# ---------------------------------------------------------------------------
+# 4. Partition columns are embedded in PyArrow schemas
+# ---------------------------------------------------------------------------
+
+
+def test_partition_columns_in_pyarrow_schemas():
+    """Partition columns (dt, model_version, etc.) appear in the PyArrow schema
+    so they are written into Parquet files, not just inferred from paths."""
+    for table in ALL_TABLES:
+        schema = PYARROW_SCHEMAS[table]
+        spec = TABLE_PARTITIONS[table]
+        col_names = {schema.field(i).name for i in range(len(schema))}
+        for key in spec.all_keys:
+            assert key in col_names, (
+                f"Partition column '{key}' missing from PyArrow schema for {table}"
+            )
+
+
+# ---------------------------------------------------------------------------
+# 5. Hive-compatible partition path format
+# ---------------------------------------------------------------------------
+
+
+def test_partition_paths_are_hive_compatible():
+    """Partition paths follow Hive key=value directory convention."""
+    for table in ALL_TABLES:
+        spec = TABLE_PARTITIONS[table]
+        extras = {}
+        if spec.extra_keys:
+            extras = {k: "test_val" for k in spec.extra_keys}
+        path = partition_path(table, NOW, extras)
+
+        # Must start with warehouse prefix
+        assert path.startswith(f"{WAREHOUSE_PREFIX}/{table}/"), (
+            f"Path for {table} doesn't start with warehouse prefix: {path}"
+        )
+        # Must contain dt= partition
+        assert "dt=2026-04-11" in path, f"Missing dt partition in path for {table}: {path}"
+        # Must end with .parquet
+        assert path.endswith(".parquet"), f"Path for {table} doesn't end with .parquet: {path}"
+        # Extra partition keys must appear
+        for key in spec.extra_keys:
+            assert f"{key}=test_val" in path, (
+                f"Missing extra partition {key} in path for {table}: {path}"
+            )
+
+
+def test_partition_path_dt_from_date_object():
+    """partition_path works with both datetime and date objects."""
+    d = date(2026, 4, 11)
+    path = partition_path("market_bars", d)
+    assert "dt=2026-04-11" in path
+
+
+# ---------------------------------------------------------------------------
+# 6. Published Parquet files contain partition columns in data
+# ---------------------------------------------------------------------------
+
+
+def _capture_parquet(mock_client: MagicMock) -> pa.Table:
+    """Extract the Parquet table from a MagicMock MinIO client's put_object call."""
+    put_call = mock_client.put_object.call_args
+    buf = put_call[0][2]
+    buf.seek(0)
+    return pq.read_table(buf)
+
+
+def test_published_market_bar_has_dt_column():
+    client = MagicMock()
+    publish_market_bar(
+        client, ticker="AAPL", open_price=150.0, high_price=155.0,
+        low_price=149.0, close_price=153.0, volume=1000000,
+        bar_timestamp=NOW, source="test",
+    )
+    table = _capture_parquet(client)
+    assert "dt" in table.column_names
+    assert table.column("dt")[0].as_py() == date(2026, 4, 11)
+
+
+def test_published_document_extraction_has_partition_columns():
+    client = MagicMock()
+    publish_document_extraction(
+        client, document_id="doc-1", ticker="AAPL", sentiment="positive",
+        impact_score=0.7, catalyst_type="earnings", confidence=0.85,
+        extraction_at=NOW, model_name="test-model", prompt_version="v1",
+        schema_version="2.0.0",
+    )
+    table = _capture_parquet(client)
+    assert "dt" in table.column_names
+    assert "model_version" in table.column_names
+    assert table.column("dt")[0].as_py() == date(2026, 4, 11)
+    assert table.column("model_version")[0].as_py() == "2.0.0"
+
+
+def test_published_prediction_vs_outcome_has_partition_columns():
+    client = MagicMock()
+    rec = Recommendation(
+        recommendation_id="rec-001", ticker="AAPL", action=ActionType.BUY,
+        mode=RecommendationMode.PAPER_ELIGIBLE, confidence=0.72,
+        time_horizon="swing_1d_10d", thesis="test",
+        invalidation_conditions=["x"], position_sizing=PositionSizing(portfolio_pct=0.02, max_loss_pct=0.005),
+        evidence_refs=["doc1"], model_metadata=ModelMetadata(provider="ollama", model_name="test-v1"),
+        generated_at=NOW,
+    )
+    publish_prediction_fact(client, rec)
+    table = _capture_parquet(client)
+    assert "dt" in table.column_names
+    assert "model_version" in table.column_names
+
+
+def test_published_model_performance_has_partition_columns():
+    client = MagicMock()
+    publish_model_performance(
+        client, document_id="doc-1", model_name="gpt-oss:20b",
+        success=True, total_duration_ms=1500, recorded_at=NOW,
+        schema_version="2.0.0",
+    )
+    table = _capture_parquet(client)
+    assert "dt" in table.column_names
+    assert "model_version" in table.column_names
+    assert table.column("model_version")[0].as_py() == "2.0.0"
+
+
+# ---------------------------------------------------------------------------
+# 7. Parquet schema matches PyArrow schema for every publisher
+# ---------------------------------------------------------------------------
+
+
+def _publish_and_verify_schema(table_name: str, publish_fn, expected_schema: pa.Schema):
+    """Helper: call a publish function, read back the Parquet, verify column names match."""
+    client = MagicMock()
+    publish_fn(client)
+    table = _capture_parquet(client)
+    expected_names = [expected_schema.field(i).name for i in range(len(expected_schema))]
+    assert list(table.column_names) == expected_names, (
+        f"Parquet column mismatch for {table_name}: "
+        f"got={list(table.column_names)}, expected={expected_names}"
+    )
+
+
+def test_parquet_schema_market_bars():
+    _publish_and_verify_schema("market_bars", lambda c: publish_market_bar(
+        c, "AAPL", 150.0, 155.0, 149.0, 153.0, 1000000, NOW, "test",
+    ), MARKET_BARS_SCHEMA)
+
+
+def test_parquet_schema_market_quotes():
+    _publish_and_verify_schema("market_quotes", lambda c: publish_market_quote(
+        c, "AAPL", 150.0, 150.5, 150.25, NOW, "test",
+    ), MARKET_QUOTES_SCHEMA)
+
+
+def test_parquet_schema_company_events():
+    _publish_and_verify_schema("company_events", lambda c: publish_company_event(
+        c, "evt-1", "AAPL", "earnings", "Q1 Earnings", NOW, "test",
+    ), COMPANY_EVENTS_SCHEMA)
+
+
+def test_parquet_schema_documents():
+    _publish_and_verify_schema("documents", lambda c: publish_document_fact(
+        c, "doc-1", "article", "news_api", "AAPL", "Reuters", "Test", NOW, "hash123",
+    ), DOCUMENTS_SCHEMA)
+
+
+def test_parquet_schema_trade_orders():
+    _publish_and_verify_schema("trade_orders", lambda c: publish_trade_order(
+        c, "ord-1", "AAPL", "buy", "market", 10.0, None, "filled", "acct-1", NOW,
+    ), TRADE_ORDERS_SCHEMA)
+
+
+def test_parquet_schema_trade_fills():
+    _publish_and_verify_schema("trade_fills", lambda c: publish_trade_fill(
+        c, "fill-1", "ord-1", "AAPL", "buy", 150.25, 10.0, "acct-1", NOW,
+    ), TRADE_FILLS_SCHEMA)
+
+
+def test_parquet_schema_positions_daily():
+    _publish_and_verify_schema("positions_daily", lambda c: publish_position_daily(
+        c, "AAPL", 100.0, 145.0, 150.0, 500.0, "acct-1", NOW,
+    ), POSITIONS_DAILY_SCHEMA)
+
+
+def test_parquet_schema_pnl_daily():
+    _publish_and_verify_schema("pnl_daily", lambda c: publish_pnl_daily(
+        c, "AAPL", 200.0, 500.0, 700.0, "acct-1", NOW,
+    ), PNL_DAILY_SCHEMA)
+
+
+# ---------------------------------------------------------------------------
+# 8. Cross-table join keys for views
+# ---------------------------------------------------------------------------
+
+
+def test_prediction_accuracy_view_join_keys():
+    """prediction_accuracy view joins prediction_vs_outcome with trade_signals
+    on recommendation_id and dt — both tables must have these columns."""
+    pvo_cols = {PREDICTION_VS_OUTCOME_SCHEMA.field(i).name for i in range(len(PREDICTION_VS_OUTCOME_SCHEMA))}
+    ts_cols = {TRADE_SIGNALS_SCHEMA.field(i).name for i in range(len(TRADE_SIGNALS_SCHEMA))}
+    assert "recommendation_id" in pvo_cols
+    assert "recommendation_id" in ts_cols
+    assert "dt" in pvo_cols
+    assert "dt" in ts_cols
+
+
+def test_paper_trade_scorecard_view_join_keys():
+    """paper_trade_scorecard joins pnl_daily with trade_orders
+    on ticker, broker_account, and dt."""
+    pnl_cols = {PNL_DAILY_SCHEMA.field(i).name for i in range(len(PNL_DAILY_SCHEMA))}
+    ord_cols = {TRADE_ORDERS_SCHEMA.field(i).name for i in range(len(TRADE_ORDERS_SCHEMA))}
+    for key in ["ticker", "broker_account", "dt"]:
+        assert key in pnl_cols, f"pnl_daily missing join key: {key}"
+        assert key in ord_cols, f"trade_orders missing join key: {key}"
+
+
+def test_paper_trade_detail_view_join_keys():
+    """paper_trade_detail joins trade_orders, trade_fills, and prediction_vs_outcome."""
+    ord_cols = {TRADE_ORDERS_SCHEMA.field(i).name for i in range(len(TRADE_ORDERS_SCHEMA))}
+    fill_cols = {TRADE_FILLS_SCHEMA.field(i).name for i in range(len(TRADE_FILLS_SCHEMA))}
+    pvo_cols = {PREDICTION_VS_OUTCOME_SCHEMA.field(i).name for i in range(len(PREDICTION_VS_OUTCOME_SCHEMA))}
+
+    # orders ↔ fills on order_id, dt
+    assert "order_id" in ord_cols
+    assert "order_id" in fill_cols
+    assert "dt" in ord_cols
+    assert "dt" in fill_cols
+
+    # orders ↔ prediction_vs_outcome on recommendation_id, dt
+    assert "recommendation_id" in ord_cols
+    assert "recommendation_id" in pvo_cols
+
+
+def test_signal_hit_rate_view_columns():
+    """signal_hit_rate groups by dt and model_version from prediction_vs_outcome."""
+    pvo_cols = {PREDICTION_VS_OUTCOME_SCHEMA.field(i).name for i in range(len(PREDICTION_VS_OUTCOME_SCHEMA))}
+    assert "dt" in pvo_cols
+    assert "model_version" in pvo_cols
+    assert "outcome" in pvo_cols
+    assert "predicted_confidence" in pvo_cols
+    assert "actual_move_pct" in pvo_cols
+
+
+# ---------------------------------------------------------------------------
+# 9. Iceberg DDL consistency with lakehouse DDL
+# ---------------------------------------------------------------------------
+
+
+def test_iceberg_ddl_columns_match_lakehouse_ddl():
+    """Iceberg CREATE TABLE columns match the lakehouse DDL columns for every table."""
+    for td in get_all_table_defs():
+        ddl_path = LAKEHOUSE_DDL_DIR / f"{td.table_name}.sql"
+        if not ddl_path.exists():
+            continue
+        ddl_cols = _parse_ddl_columns(ddl_path)
+        ddl_col_names = [c[0] for c in ddl_cols]
+
+        iceberg_sql = td.create_table_sql()
+        # Extract column block from Iceberg DDL (greedy to handle nested parens)
+        match = re.search(r"CREATE TABLE[^(]+\((.*)\)\s*WITH", iceberg_sql, re.DOTALL)
+        assert match is not None, f"Could not parse Iceberg DDL for {td.table_name}"
+        iceberg_col_block = match.group(1)
+        iceberg_col_names = []
+        for line in iceberg_col_block.strip().split("\n"):
+            line = line.strip().rstrip(",")
+            if line:
+                parts = line.split()
+                if parts:
+                    iceberg_col_names.append(parts[0].lower())
+
+        assert iceberg_col_names == ddl_col_names, (
+            f"Iceberg DDL column mismatch for {td.table_name}:\n"
+            f"  Iceberg: {iceberg_col_names}\n"
+            f"  DDL:     {ddl_col_names}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# 10. MinIO bucket and path conventions
+# ---------------------------------------------------------------------------
+
+
+def test_lakehouse_bucket_name():
+    assert LAKEHOUSE_BUCKET == "stonks-lakehouse"
+
+
+def test_warehouse_prefix():
+    assert WAREHOUSE_PREFIX == "warehouse"
+
+
+def test_all_paths_use_warehouse_prefix():
+    """Every table's partition path starts with warehouse/{table_name}/."""
+    for table in ALL_TABLES:
+        spec = TABLE_PARTITIONS[table]
+        extras = {k: "v" for k in spec.extra_keys}
+        path = partition_path(table, NOW, extras)
+        assert path.startswith(f"warehouse/{table}/"), (
+            f"Path for {table} doesn't follow convention: {path}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# 11. Iceberg table locations point to correct MinIO paths
+# ---------------------------------------------------------------------------
+
+
+def test_iceberg_locations_match_ddl_external_locations():
+    """Iceberg table locations use s3a:// and match the lakehouse DDL external_location."""
+    for td in get_all_table_defs():
+        expected = f"s3a://{LAKEHOUSE_BUCKET}/{WAREHOUSE_PREFIX}/{td.table_name}/"
+        assert td.location == expected, (
+            f"Iceberg location mismatch for {td.table_name}: "
+            f"got={td.location}, expected={expected}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# 12. Partition values are injected correctly
+# ---------------------------------------------------------------------------
+
+
+def test_partition_values_dt_only():
+    pv = partition_values(NOW)
+    assert pv == {"dt": date(2026, 4, 11)}
+
+
+def test_partition_values_with_model_version():
+    pv = partition_values(NOW, {"model_version": "2.0.0"})
+    assert pv == {"dt": date(2026, 4, 11), "model_version": "2.0.0"}
+
+
+def test_partition_values_from_date():
+    pv = partition_values(date(2026, 4, 11))
+    assert pv == {"dt": date(2026, 4, 11)}
@@ -0,0 +1,596 @@
+"""Tests for lake publisher worker — writing prediction facts as Parquet to MinIO."""
+from datetime import date, datetime, timezone
+from unittest.mock import MagicMock
+
+import pyarrow.parquet as pq
+
+from services.lake_publisher.partitions import (
+    LAKEHOUSE_BUCKET,
+    TABLE_PARTITIONS,
+    PartitionSpec,
+    partition_path,
+    partition_values,
+    s3_uri,
+)
+from services.lake_publisher.worker import (
+    _parse_horizon_days,
+    _partition_path,
+    build_trade_signal_row,
+    publish_trade_signal,
+    publish_prediction_fact,
+    publish_recommendation_facts,
+    build_trade_order_row,
+    publish_trade_order,
+    build_trade_fill_row,
+    publish_trade_fill,
+    build_position_daily_row,
+    publish_position_daily,
+    publish_positions_daily_batch,
+    build_model_performance_row,
+    publish_model_performance,
+    publish_market_bars_batch,
+    publish_trade_signals_batch,
+    publish_model_performance_batch,
+)
+from services.shared.schemas import (
+    ActionType,
+    ModelMetadata,
+    PositionSizing,
+    Recommendation,
+    RecommendationMode,
+)
+
+NOW = datetime(2026, 4, 11, 14, 30, 0, tzinfo=timezone.utc)
+
+
+def _make_rec(
+    ticker: str = "AAPL",
+    action: ActionType = ActionType.BUY,
+    confidence: float = 0.72,
+    time_horizon: str = "swing_1d_10d",
+    rec_id: str = "rec-001",
+) -> Recommendation:
+    return Recommendation(
+        recommendation_id=rec_id,
+        ticker=ticker,
+        action=action,
+        mode=RecommendationMode.PAPER_ELIGIBLE,
+        confidence=confidence,
+        time_horizon=time_horizon,
+        thesis="[risk:low] Test thesis",
+        invalidation_conditions=["price drops below support"],
+        position_sizing=PositionSizing(portfolio_pct=0.02, max_loss_pct=0.005),
+        evidence_refs=["doc1", "doc2"],
+        model_metadata=ModelMetadata(provider="deterministic", model_name="eligibility-v1"),
+        generated_at=NOW,
+    )
+
+
+# ---------------------------------------------------------------------------
+# _parse_horizon_days
+# ---------------------------------------------------------------------------
+
+
+def test_parse_horizon_days_swing():
+    assert _parse_horizon_days("swing_1d_10d") == 10
+
+
+def test_parse_horizon_days_position():
+    assert _parse_horizon_days("position_10d_30d") == 30
+
+
+def test_parse_horizon_days_intraday():
+    assert _parse_horizon_days("scalp_intraday") == 1
+
+
+def test_parse_horizon_days_empty():
+    assert _parse_horizon_days("") == 0
+
+
+def test_parse_horizon_days_no_numbers():
+    assert _parse_horizon_days("unknown") == 0
+
+
+# ---------------------------------------------------------------------------
+# Partition module tests
+# ---------------------------------------------------------------------------
+
+
+def test_partition_spec_all_keys():
+    spec = PartitionSpec("test_table", extra_keys=("model_version",))
+    assert spec.all_keys == ("dt", "model_version")
+
+
+def test_partition_spec_dt_only():
+    spec = PartitionSpec("simple")
+    assert spec.all_keys == ("dt",)
+
+
+def test_table_partitions_registry():
+    assert "market_bars" in TABLE_PARTITIONS
+    assert "model_performance" in TABLE_PARTITIONS
+    assert TABLE_PARTITIONS["model_performance"].extra_keys == ("model_version",)
+    assert TABLE_PARTITIONS["prediction_vs_outcome"].extra_keys == ("model_version",)
+    assert TABLE_PARTITIONS["document_extractions"].extra_keys == ("model_version",)
+    assert TABLE_PARTITIONS["trade_signals"].extra_keys == ()
+
+
+def test_partition_values_dt_only():
+    pv = partition_values(NOW)
+    assert pv == {"dt": date(2026, 4, 11)}
+
+
+def test_partition_values_with_extras():
+    pv = partition_values(NOW, {"model_version": "v2"})
+    assert pv == {"dt": date(2026, 4, 11), "model_version": "v2"}
+
+
+def test_s3_uri():
+    assert s3_uri("warehouse/t/dt=2026-04-11/part-abc.parquet") == \
+        "s3://stonks-lakehouse/warehouse/t/dt=2026-04-11/part-abc.parquet"
+
+
+# ---------------------------------------------------------------------------
+# _partition_path (via partitions module)
+# ---------------------------------------------------------------------------
+
+
+def test_partition_path_basic():
+    path = partition_path("trade_signals", NOW)
+    assert path.startswith("warehouse/trade_signals/dt=2026-04-11/")
+    assert path.endswith(".parquet")
+
+
+def test_partition_path_with_extra_partitions():
+    path = partition_path("model_performance", NOW, {"model_version": "v1"})
+    assert "model_version=v1" in path
+
+
+def test_partition_path_custom_file_id():
+    path = partition_path("trade_signals", NOW, file_id="custom123")
+    assert "part-custom123.parquet" in path
+
+
+def test_partition_path_legacy_wrapper():
+    """The _partition_path wrapper in worker.py still works."""
+    path = _partition_path("trade_signals", NOW)
+    assert path.startswith("warehouse/trade_signals/dt=2026-04-11/")
+
+
+# ---------------------------------------------------------------------------
+# build_trade_signal_row
+# ---------------------------------------------------------------------------
+
+
+def test_build_trade_signal_row():
+    rec = _make_rec()
+    row = build_trade_signal_row(rec, trend_direction="bullish", trend_strength=0.68)
+    assert row["signal_id"] == "rec-001"
+    assert row["ticker"] == "AAPL"
+    assert row["trend_direction"] == "bullish"
+    assert row["trend_strength"] == 0.68
+    assert row["confidence"] == 0.72
+    assert row["action"] == "buy"
+    assert row["time_horizon"] == "swing_1d_10d"
+    assert row["generated_at"] == NOW
+    assert row["dt"] == date(2026, 4, 11)
+
+
+# ---------------------------------------------------------------------------
+# publish_trade_signal
+# ---------------------------------------------------------------------------
+
+
+def test_publish_trade_signal_writes_parquet():
+    client = MagicMock()
+    rec = _make_rec()
+
+    ref = publish_trade_signal(client, rec, trend_direction="bullish", trend_strength=0.68)
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/trade_signals/")
+    assert client.put_object.call_count == 1
+
+    # Verify the written bytes are valid Parquet
+    put_call = client.put_object.call_args
+    assert put_call[0][0] == LAKEHOUSE_BUCKET
+    written_buf = put_call[0][2]
+    written_buf.seek(0)
+    table = pq.read_table(written_buf)
+    assert table.num_rows == 1
+    assert table.column("ticker")[0].as_py() == "AAPL"
+    assert table.column("action")[0].as_py() == "buy"
+
+
+# ---------------------------------------------------------------------------
+# publish_prediction_fact
+# ---------------------------------------------------------------------------
+
+
+def test_publish_prediction_fact_writes_parquet():
+    client = MagicMock()
+    rec = _make_rec()
+
+    ref = publish_prediction_fact(client, rec)
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/prediction_vs_outcome/")
+    assert "model_version=" in ref
+    assert client.put_object.call_count == 1
+
+    put_call = client.put_object.call_args
+    written_buf = put_call[0][2]
+    written_buf.seek(0)
+    table = pq.read_table(written_buf)
+    assert table.num_rows == 1
+    assert table.column("predicted_action")[0].as_py() == "buy"
+    assert table.column("outcome")[0].as_py() == "pending"
+    assert table.column("horizon_days")[0].as_py() == 10
+    assert table.column("dt")[0].as_py() == date(2026, 4, 11)
+
+
+# ---------------------------------------------------------------------------
+# publish_recommendation_facts
+# ---------------------------------------------------------------------------
+
+
+def test_publish_recommendation_facts_writes_both_tables():
+    client = MagicMock()
+    rec = _make_rec()
+
+    refs = publish_recommendation_facts(client, rec, "bullish", 0.68)
+
+    assert "trade_signals" in refs
+    assert "prediction_vs_outcome" in refs
+    assert client.put_object.call_count == 2
+
+
+# ---------------------------------------------------------------------------
+# build_trade_order_row
+# ---------------------------------------------------------------------------
+
+
+def test_build_trade_order_row():
+    row = build_trade_order_row(
+        order_id="ord-001",
+        ticker="AAPL",
+        side="buy",
+        order_type="market",
+        quantity=10.0,
+        limit_price=None,
+        status="filled",
+        broker_account="acct-001",
+        submitted_at=NOW,
+    )
+    assert row["order_id"] == "ord-001"
+    assert row["ticker"] == "AAPL"
+    assert row["side"] == "buy"
+    assert row["order_type"] == "market"
+    assert row["quantity"] == 10.0
+    assert row["limit_price"] is None
+    assert row["status"] == "filled"
+    assert row["broker_account"] == "acct-001"
+    assert row["submitted_at"] == NOW
+    assert row["dt"] == date(2026, 4, 11)
+
+
+# ---------------------------------------------------------------------------
+# publish_trade_order
+# ---------------------------------------------------------------------------
+
+
+def test_publish_trade_order_writes_parquet():
+    client = MagicMock()
+
+    ref = publish_trade_order(
+        client,
+        order_id="ord-001",
+        ticker="AAPL",
+        side="buy",
+        order_type="market",
+        quantity=10.0,
+        limit_price=None,
+        status="filled",
+        broker_account="acct-001",
+        submitted_at=NOW,
+    )
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/trade_orders/")
+    assert client.put_object.call_count == 1
+
+    put_call = client.put_object.call_args
+    assert put_call[0][0] == LAKEHOUSE_BUCKET
+    written_buf = put_call[0][2]
+    written_buf.seek(0)
+    table = pq.read_table(written_buf)
+    assert table.num_rows == 1
+    assert table.column("ticker")[0].as_py() == "AAPL"
+    assert table.column("side")[0].as_py() == "buy"
+    assert table.column("status")[0].as_py() == "filled"
+
+
+# ---------------------------------------------------------------------------
+# build_trade_fill_row
+# ---------------------------------------------------------------------------
+
+
+def test_build_trade_fill_row():
+    row = build_trade_fill_row(
+        fill_id="fill-001",
+        order_id="ord-001",
+        ticker="AAPL",
+        side="buy",
+        fill_price=150.25,
+        fill_quantity=10.0,
+        broker_account="acct-001",
+        filled_at=NOW,
+    )
+    assert row["fill_id"] == "fill-001"
+    assert row["order_id"] == "ord-001"
+    assert row["fill_price"] == 150.25
+    assert row["fill_quantity"] == 10.0
+
+
+# ---------------------------------------------------------------------------
+# publish_trade_fill
+# ---------------------------------------------------------------------------
+
+
+def test_publish_trade_fill_writes_parquet():
+    client = MagicMock()
+
+    ref = publish_trade_fill(
+        client,
+        fill_id="fill-001",
+        order_id="ord-001",
+        ticker="AAPL",
+        side="buy",
+        fill_price=150.25,
+        fill_quantity=10.0,
+        broker_account="acct-001",
+        filled_at=NOW,
+    )
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/trade_fills/")
+    assert client.put_object.call_count == 1
+
+    put_call = client.put_object.call_args
+    written_buf = put_call[0][2]
+    written_buf.seek(0)
+    table = pq.read_table(written_buf)
+    assert table.num_rows == 1
+    assert table.column("fill_price")[0].as_py() == 150.25
+    assert table.column("ticker")[0].as_py() == "AAPL"
+
+
+# ---------------------------------------------------------------------------
+# build_position_daily_row
+# ---------------------------------------------------------------------------
+
+
+def test_build_position_daily_row():
+    row = build_position_daily_row(
+        ticker="AAPL",
+        quantity=100.0,
+        avg_entry_price=145.00,
+        close_price=150.00,
+        unrealized_pnl=500.0,
+        broker_account="acct-001",
+        snapshot_at=NOW,
+    )
+    assert row["ticker"] == "AAPL"
+    assert row["quantity"] == 100.0
+    assert row["unrealized_pnl"] == 500.0
+
+
+# ---------------------------------------------------------------------------
+# publish_position_daily
+# ---------------------------------------------------------------------------
+
+
+def test_publish_position_daily_writes_parquet():
+    client = MagicMock()
+
+    ref = publish_position_daily(
+        client,
+        ticker="AAPL",
+        quantity=100.0,
+        avg_entry_price=145.00,
+        close_price=150.00,
+        unrealized_pnl=500.0,
+        broker_account="acct-001",
+        snapshot_at=NOW,
+    )
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/positions_daily/")
+    assert client.put_object.call_count == 1
+
+    put_call = client.put_object.call_args
+    written_buf = put_call[0][2]
+    written_buf.seek(0)
+    table = pq.read_table(written_buf)
+    assert table.num_rows == 1
+    assert table.column("ticker")[0].as_py() == "AAPL"
+    assert table.column("close_price")[0].as_py() == 150.00
+
+
+# ---------------------------------------------------------------------------
+# publish_positions_daily_batch
+# ---------------------------------------------------------------------------
+
+
+def test_publish_positions_daily_batch_writes_parquet():
+    client = MagicMock()
+
+    positions = [
+        {"ticker": "AAPL", "quantity": 100.0, "avg_entry_price": 145.0, "close_price": 150.0, "unrealized_pnl": 500.0},
+        {"ticker": "MSFT", "quantity": 50.0, "avg_entry_price": 300.0, "close_price": 310.0, "unrealized_pnl": 500.0},
+    ]
+
+    ref = publish_positions_daily_batch(client, positions, "acct-001", NOW)
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/positions_daily/")
+    assert client.put_object.call_count == 1
+
+    put_call = client.put_object.call_args
+    written_buf = put_call[0][2]
+    written_buf.seek(0)
+    table = pq.read_table(written_buf)
+    assert table.num_rows == 2
+
+
+def test_publish_positions_daily_batch_empty():
+    client = MagicMock()
+
+    ref = publish_positions_daily_batch(client, [], "acct-001", NOW)
+
+    assert ref == ""
+    assert client.put_object.call_count == 0
+
+
+# ---------------------------------------------------------------------------
+# build_model_performance_row
+# ---------------------------------------------------------------------------
+
+
+def test_build_model_performance_row():
+    row = build_model_performance_row(
+        document_id="doc-001",
+        model_name="gpt-oss:20b",
+        success=True,
+        total_duration_ms=1500,
+        recorded_at=NOW,
+        ticker="AAPL",
+        prompt_version="document-intel-v2",
+        schema_version="2.0.0",
+        attempt_count=2,
+        confidence=0.86,
+        validation_status="valid",
+        retry_count=1,
+        input_token_estimate=500,
+        output_token_estimate=200,
+        company_count=3,
+    )
+    assert row["document_id"] == "doc-001"
+    assert row["model_name"] == "gpt-oss:20b"
+    assert row["success"] is True
+    assert row["total_duration_ms"] == 1500
+    assert row["attempt_count"] == 2
+    assert row["confidence"] == 0.86
+    assert row["company_count"] == 3
+    assert row["dt"] == date(2026, 4, 11)
+    assert row["model_version"] == "2.0.0"
+
+
+# ---------------------------------------------------------------------------
+# publish_model_performance
+# ---------------------------------------------------------------------------
+
+
+def test_publish_model_performance_writes_parquet():
+    client = MagicMock()
+
+    ref = publish_model_performance(
+        client,
+        document_id="doc-001",
+        model_name="gpt-oss:20b",
+        success=True,
+        total_duration_ms=1500,
+        recorded_at=NOW,
+        ticker="AAPL",
+        prompt_version="document-intel-v2",
+        schema_version="2.0.0",
+        confidence=0.86,
+        validation_status="valid",
+    )
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/model_performance/")
+    assert "model_version=2.0.0" in ref
+    assert client.put_object.call_count == 1
+
+    put_call = client.put_object.call_args
+    written_buf = put_call[0][2]
+    written_buf.seek(0)
+    table = pq.read_table(written_buf)
+    assert table.num_rows == 1
+    assert table.column("model_name")[0].as_py() == "gpt-oss:20b"
+    assert table.column("success")[0].as_py() is True
+    assert table.column("confidence")[0].as_py() == 0.86
+
+
+# ---------------------------------------------------------------------------
+# Batch publish helpers
+# ---------------------------------------------------------------------------
+
+
+def test_publish_market_bars_batch():
+    client = MagicMock()
+    bars: list[dict[str, object]] = [
+        {
+            "ticker": "AAPL", "open_price": 150.0, "high_price": 155.0,
+            "low_price": 149.0, "close_price": 153.0, "volume": 1000000,
+            "vwap": 152.0, "trade_count": 5000, "bar_timestamp": NOW,
+            "bar_interval": "1d", "source": "test",
+            "dt": date(2026, 4, 11),
+        },
+        {
+            "ticker": "MSFT", "open_price": 300.0, "high_price": 310.0,
+            "low_price": 298.0, "close_price": 305.0, "volume": 800000,
+            "vwap": 304.0, "trade_count": 4000, "bar_timestamp": NOW,
+            "bar_interval": "1d", "source": "test",
+            "dt": date(2026, 4, 11),
+        },
+    ]
+
+    ref = publish_market_bars_batch(client, bars, NOW)
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/market_bars/")
+    assert client.put_object.call_count == 1
+
+    put_call = client.put_object.call_args
+    written_buf = put_call[0][2]
+    written_buf.seek(0)
+    table = pq.read_table(written_buf)
+    assert table.num_rows == 2
+
+
+def test_publish_batch_empty_returns_empty():
+    client = MagicMock()
+    ref = publish_market_bars_batch(client, [], NOW)
+    assert ref == ""
+    assert client.put_object.call_count == 0
+
+
+def test_publish_trade_signals_batch():
+    client = MagicMock()
+    rec = _make_rec()
+    rows = [build_trade_signal_row(rec, "bullish", 0.68)]
+
+    ref = publish_trade_signals_batch(client, rows, NOW)
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/trade_signals/")
+    assert client.put_object.call_count == 1
+
+
+def test_publish_model_performance_batch():
+    client = MagicMock()
+    rows = [
+        build_model_performance_row(
+            document_id="doc-001", model_name="gpt-oss:20b",
+            success=True, total_duration_ms=1500, recorded_at=NOW,
+        ),
+        build_model_performance_row(
+            document_id="doc-002", model_name="gpt-oss:20b",
+            success=False, total_duration_ms=3000, recorded_at=NOW,
+        ),
+    ]
+
+    ref = publish_model_performance_batch(client, rows, NOW, model_version="v2")
+
+    assert ref.startswith(f"s3://{LAKEHOUSE_BUCKET}/warehouse/model_performance/")
+    assert "model_version=v2" in ref
+    assert client.put_object.call_count == 1
+
+    put_call = client.put_object.call_args
+    written_buf = put_call[0][2]
+    written_buf.seek(0)
+    table = pq.read_table(written_buf)
+    assert table.num_rows == 2
@@ -0,0 +1,355 @@
+"""Tests for lake publisher job runner — dispatching operational data to analytical facts."""
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from services.lake_publisher.jobs import (
+    _jsonb_to_str,
+    dispatch_job,
+    publish_document_job,
+    publish_extraction_job,
+    publish_market_snapshot_job,
+    publish_order_job,
+    publish_fills_job,
+    publish_positions_job,
+    publish_pnl_job,
+    publish_bulk_documents_job,
+    publish_bulk_extractions_job,
+)
+
+NOW = datetime(2026, 4, 11, 14, 30, 0, tzinfo=timezone.utc)
+
+
+# ---------------------------------------------------------------------------
+# _jsonb_to_str
+# ---------------------------------------------------------------------------
+
+
+def test_jsonb_to_str_list():
+    assert _jsonb_to_str(["a", "b", "c"]) == "a, b, c"
+
+
+def test_jsonb_to_str_json_string():
+    assert _jsonb_to_str('["x", "y"]') == "x, y"
+
+
+def test_jsonb_to_str_plain_string():
+    assert _jsonb_to_str("hello") == "hello"
+
+
+def test_jsonb_to_str_none():
+    assert _jsonb_to_str(None) == ""
+
+
+# ---------------------------------------------------------------------------
+# publish_document_job
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_publish_document_job_found():
+    pool = AsyncMock()
+    pool.fetchrow.return_value = {
+        "id": "doc-uuid-1",
+        "document_type": "article",
+        "source_type": "news_api",
+        "publisher": "Reuters",
+        "title": "Test Article",
+        "url": "https://example.com/article",
+        "canonical_url": "https://example.com/article",
+        "language": "en",
+        "published_at": NOW,
+        "retrieved_at": NOW,
+        "content_hash": "abc123",
+        "parse_quality_score": 0.85,
+        "ticker": "AAPL",
+    }
+    minio_client = MagicMock()
+
+    ref = await publish_document_job(pool, minio_client, "doc-uuid-1")
+
+    assert ref.startswith("s3://stonks-lakehouse/warehouse/documents/")
+    assert minio_client.put_object.call_count == 1
+
+
+@pytest.mark.asyncio
+async def test_publish_document_job_not_found():
+    pool = AsyncMock()
+    pool.fetchrow.return_value = None
+    minio_client = MagicMock()
+
+    ref = await publish_document_job(pool, minio_client, "missing-uuid")
+    assert ref == ""
+    assert minio_client.put_object.call_count == 0
+
+
+# ---------------------------------------------------------------------------
+# publish_extraction_job
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_publish_extraction_job():
+    pool = AsyncMock()
+    pool.fetch.return_value = [
+        {
+            "document_id": "doc-uuid-1",
+            "ticker": "AAPL",
+            "relevance": 0.9,
+            "sentiment": "positive",
+            "impact_score": 0.7,
+            "impact_horizon": "1d_30d",
+            "catalyst_type": "earnings",
+            "confidence": 0.85,
+            "novelty_score": 0.6,
+            "source_credibility": 0.8,
+            "key_facts": ["strong earnings"],
+            "risks": ["regulatory"],
+            "macro_themes": ["ai_capex"],
+            "model_name": "gpt-oss:20b",
+            "prompt_version": "document-intel-v2",
+            "schema_version": "2.0.0",
+            "extraction_at": NOW,
+            "company_name": "Apple Inc.",
+        },
+    ]
+    minio_client = MagicMock()
+
+    refs = await publish_extraction_job(pool, minio_client, "doc-uuid-1")
+
+    assert len(refs) == 1
+    assert refs[0].startswith("s3://stonks-lakehouse/warehouse/document_extractions/")
+    assert minio_client.put_object.call_count == 1
+
+
+@pytest.mark.asyncio
+async def test_publish_extraction_job_empty():
+    pool = AsyncMock()
+    pool.fetch.return_value = []
+    minio_client = MagicMock()
+
+    refs = await publish_extraction_job(pool, minio_client, "doc-uuid-1")
+    assert refs == []
+
+
+# ---------------------------------------------------------------------------
+# publish_market_snapshot_job
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_publish_market_snapshot_bar():
+    pool = AsyncMock()
+    pool.fetchrow.return_value = {
+        "ticker": "AAPL",
+        "snapshot_type": "bar",
+        "data": {"open": 150.0, "high": 155.0, "low": 149.0, "close": 153.0,
+                 "volume": 1000000, "vwap": 152.0, "trade_count": 5000},
+        "source_provider": "polygon",
+        "captured_at": NOW,
+    }
+    minio_client = MagicMock()
+
+    refs = await publish_market_snapshot_job(pool, minio_client, "snap-uuid-1")
+
+    assert len(refs) == 1
+    assert refs[0].startswith("s3://stonks-lakehouse/warehouse/market_bars/")
+
+
+@pytest.mark.asyncio
+async def test_publish_market_snapshot_quote():
+    pool = AsyncMock()
+    pool.fetchrow.return_value = {
+        "ticker": "AAPL",
+        "snapshot_type": "quote",
+        "data": {"bid_price": 150.0, "ask_price": 150.5, "last_price": 150.25},
+        "source_provider": "polygon",
+        "captured_at": NOW,
+    }
+    minio_client = MagicMock()
+
+    refs = await publish_market_snapshot_job(pool, minio_client, "snap-uuid-1")
+
+    assert len(refs) == 1
+    assert refs[0].startswith("s3://stonks-lakehouse/warehouse/market_quotes/")
+
+
+# ---------------------------------------------------------------------------
+# publish_order_job
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_publish_order_job():
+    pool = AsyncMock()
+    pool.fetchrow.return_value = {
+        "id": "ord-uuid-1",
+        "recommendation_id": "rec-uuid-1",
+        "ticker": "AAPL",
+        "side": "buy",
+        "order_type": "market",
+        "quantity": 10,
+        "limit_price": None,
+        "status": "filled",
+        "submitted_at": NOW,
+        "fill_price": 150.25,
+        "fill_quantity": 10,
+        "filled_at": NOW,
+        "broker_account": "acct-001",
+        "execution_mode": "paper",
+    }
+    minio_client = MagicMock()
+
+    ref = await publish_order_job(pool, minio_client, "ord-uuid-1")
+
+    assert ref.startswith("s3://stonks-lakehouse/warehouse/trade_orders/")
+    assert minio_client.put_object.call_count == 1
+
+
+# ---------------------------------------------------------------------------
+# publish_fills_job
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_publish_fills_job():
+    pool = AsyncMock()
+    pool.fetch.return_value = [
+        {
+            "fill_id": "fill-uuid-1",
+            "order_id": "ord-uuid-1",
+            "data": {"fill_price": 150.25, "fill_quantity": 10, "commission": 0.5},
+            "broker_timestamp": NOW,
+            "ticker": "AAPL",
+            "side": "buy",
+            "broker_account": "acct-001",
+        },
+    ]
+    minio_client = MagicMock()
+
+    refs = await publish_fills_job(pool, minio_client, "ord-uuid-1")
+
+    assert len(refs) == 1
+    assert refs[0].startswith("s3://stonks-lakehouse/warehouse/trade_fills/")
+
+
+# ---------------------------------------------------------------------------
+# publish_positions_job
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_publish_positions_job():
+    pool = AsyncMock()
+    pool.fetch.return_value = [
+        {
+            "ticker": "AAPL",
+            "quantity": 100,
+            "avg_entry_price": 145.0,
+            "current_price": 150.0,
+            "unrealized_pnl": 500.0,
+            "realized_pnl": 0,
+            "broker_account": "acct-001",
+            "execution_mode": "paper",
+        },
+    ]
+    minio_client = MagicMock()
+
+    ref = await publish_positions_job(pool, minio_client, "acct-uuid-1")
+
+    assert ref.startswith("s3://stonks-lakehouse/warehouse/positions_daily/")
+    assert minio_client.put_object.call_count == 1
+
+
+# ---------------------------------------------------------------------------
+# publish_pnl_job
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_publish_pnl_job():
+    pool = AsyncMock()
+    pool.fetch.return_value = [
+        {
+            "ticker": "AAPL",
+            "quantity": 100,
+            "avg_entry_price": 145.0,
+            "current_price": 150.0,
+            "unrealized_pnl": 500.0,
+            "realized_pnl": 200.0,
+            "broker_account": "acct-001",
+            "execution_mode": "paper",
+        },
+    ]
+    minio_client = MagicMock()
+
+    refs = await publish_pnl_job(pool, minio_client, "acct-uuid-1")
+
+    assert len(refs) == 1
+    assert refs[0].startswith("s3://stonks-lakehouse/warehouse/pnl_daily/")
+
+
+# ---------------------------------------------------------------------------
+# dispatch_job
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_dispatch_unknown_job_type():
+    pool = AsyncMock()
+    minio_client = MagicMock()
+
+    result = await dispatch_job(pool, minio_client, {"job_type": "unknown", "entity_id": "x"})
+
+    assert result["error"] is not None
+    assert "Unknown" in str(result["error"])
+
+
+@pytest.mark.asyncio
+async def test_dispatch_document_job():
+    pool = AsyncMock()
+    pool.fetchrow.return_value = {
+        "id": "doc-uuid-1",
+        "document_type": "article",
+        "source_type": "news_api",
+        "publisher": "Reuters",
+        "title": "Test",
+        "url": "",
+        "canonical_url": "",
+        "language": "en",
+        "published_at": NOW,
+        "retrieved_at": NOW,
+        "content_hash": "abc",
+        "parse_quality_score": 0.8,
+        "ticker": "AAPL",
+    }
+    minio_client = MagicMock()
+
+    result = await dispatch_job(
+        pool, minio_client,
+        {"job_type": "document", "entity_id": "doc-uuid-1"},
+    )
+
+    assert result["error"] is None
+    refs = result["refs"]
+    assert isinstance(refs, list)
+    assert len(refs) == 1
+
+
+@pytest.mark.asyncio
+async def test_dispatch_job_handles_exception():
+    pool = AsyncMock()
+    pool.fetchrow.side_effect = Exception("DB down")
+    minio_client = MagicMock()
+
+    result = await dispatch_job(
+        pool, minio_client,
+        {"job_type": "document", "entity_id": "doc-uuid-1"},
+    )
+
+    assert result["error"] is not None
+    assert "DB down" in str(result["error"])
@@ -0,0 +1,136 @@
+"""Tests for structured logging and distributed tracing."""
+import json
+import logging
+
+from services.shared.logging import (
+    JSONFormatter,
+    Span,
+    extract_trace_context,
+    get_service_name,
+    get_span_id,
+    get_trace_id,
+    inject_trace_context,
+    new_span_id,
+    new_trace_id,
+    set_trace_context,
+    setup_logging,
+)
+
+
+def test_new_trace_id_format():
+    tid = new_trace_id()
+    assert len(tid) == 16
+    assert tid.isalnum()
+
+
+def test_new_span_id_format():
+    sid = new_span_id()
+    assert len(sid) == 8
+    assert sid.isalnum()
+
+
+def test_set_and_get_trace_context():
+    set_trace_context(trace_id="abc123", span_id="sp01", service="test_svc")
+    assert get_trace_id() == "abc123"
+    assert get_span_id() == "sp01"
+    assert get_service_name() == "test_svc"
+
+
+def test_json_formatter_output():
+    set_trace_context(trace_id="trace42", span_id="span7", service="fmt_test")
+    formatter = JSONFormatter()
+    record = logging.LogRecord(
+        name="test_logger", level=logging.INFO, pathname="", lineno=0,
+        msg="hello world", args=(), exc_info=None,
+    )
+    output = formatter.format(record)
+    parsed = json.loads(output)
+    assert parsed["message"] == "hello world"
+    assert parsed["level"] == "INFO"
+    assert parsed["trace_id"] == "trace42"
+    assert parsed["span_id"] == "span7"
+    assert parsed["service"] == "fmt_test"
+    assert "timestamp" in parsed
+
+
+def test_json_formatter_extra_fields():
+    set_trace_context(trace_id="t1", service="extra_test")
+    formatter = JSONFormatter()
+    record = logging.LogRecord(
+        name="test", level=logging.WARNING, pathname="", lineno=0,
+        msg="doc processed", args=(), exc_info=None,
+    )
+    record.ticker = "AAPL"
+    record.document_id = "doc-123"
+    output = formatter.format(record)
+    parsed = json.loads(output)
+    assert parsed["ticker"] == "AAPL"
+    assert parsed["document_id"] == "doc-123"
+
+
+def test_span_sets_and_restores_context():
+    set_trace_context(trace_id="parent_trace", span_id="parent_span", service="span_test")
+    parent_span = get_span_id()
+
+    with Span("test_op", ticker="MSFT") as span:
+        assert get_trace_id() == "parent_trace"
+        assert get_span_id() == span.span_id
+        assert span.span_id != parent_span
+
+    # Context restored after span exits
+    assert get_span_id() == parent_span
+
+
+def test_span_records_duration():
+    set_trace_context(service="dur_test")
+    with Span("slow_op") as span:
+        pass  # instant
+    assert span.duration_ms >= 0
+
+
+def test_span_generates_trace_id_if_missing():
+    set_trace_context(trace_id="", service="gen_test")
+    with Span("auto_trace") as span:
+        assert len(span.trace_id) == 16
+
+
+def test_inject_trace_context():
+    set_trace_context(trace_id="inject_trace")
+    payload = inject_trace_context({"ticker": "GOOG"})
+    assert payload["_trace_id"] == "inject_trace"
+    assert payload["ticker"] == "GOOG"
+
+
+def test_extract_trace_context():
+    payload = {"ticker": "TSLA", "_trace_id": "extracted_trace"}
+    extract_trace_context(payload)
+    assert get_trace_id() == "extracted_trace"
+
+
+def test_extract_trace_context_generates_new_if_missing():
+    payload = {"ticker": "AMZN"}
+    extract_trace_context(payload)
+    assert len(get_trace_id()) == 16
+
+
+def test_setup_logging_json_mode():
+    setup_logging("test_service", level="DEBUG", json_output=True)
+    root = logging.getLogger()
+    assert len(root.handlers) == 1
+    assert isinstance(root.handlers[0].formatter, JSONFormatter)
+    assert root.level == logging.DEBUG
+    assert get_service_name() == "test_service"
+
+
+def test_setup_logging_text_mode():
+    setup_logging("text_service", level="WARNING", json_output=False)
+    root = logging.getLogger()
+    assert len(root.handlers) == 1
+    assert not isinstance(root.handlers[0].formatter, JSONFormatter)
+    assert root.level == logging.WARNING
+
+
+def test_config_json_logs_field():
+    from services.shared.config import load_config
+    config = load_config()
+    assert isinstance(config.json_logs, bool)
@@ -0,0 +1,165 @@
+"""Tests for the Polygon.io market data adapter.
+
+Validates request building, response parsing, and error handling.
+"""
+from services.adapters.market_adapter import MarketDataAdapter, PolygonMarketAdapter
+
+
+# --- Fake Polygon responses ---
+
+PREV_BARS_RESPONSE = {
+    "ticker": "AAPL",
+    "queryCount": 1,
+    "resultsCount": 1,
+    "adjusted": True,
+    "results": [
+        {
+            "T": "AAPL",
+            "v": 58_350_544,
+            "vw": 171.5322,
+            "o": 171.0,
+            "c": 172.28,
+            "h": 173.1,
+            "l": 170.5,
+            "t": 1712793600000,
+            "n": 620_123,
+        }
+    ],
+    "status": "OK",
+    "request_id": "req-abc-123",
+}
+
+TICKER_DETAILS_RESPONSE = {
+    "results": {
+        "ticker": "AAPL",
+        "name": "Apple Inc.",
+        "market": "stocks",
+        "locale": "us",
+        "primary_exchange": "XNAS",
+        "type": "CS",
+        "currency_name": "usd",
+        "market_cap": 2_700_000_000_000,
+    },
+    "status": "OK",
+    "request_id": "req-def-456",
+}
+
+RANGE_BARS_RESPONSE = {
+    "ticker": "AAPL",
+    "queryCount": 3,
+    "resultsCount": 3,
+    "adjusted": True,
+    "results": [
+        {"T": "AAPL", "o": 170.0, "c": 171.0, "h": 172.0, "l": 169.5, "v": 50_000_000, "t": 1712620800000},
+        {"T": "AAPL", "o": 171.0, "c": 172.0, "h": 173.0, "l": 170.0, "v": 55_000_000, "t": 1712707200000},
+        {"T": "AAPL", "o": 172.0, "c": 172.5, "h": 174.0, "l": 171.0, "v": 48_000_000, "t": 1712793600000},
+    ],
+    "status": "OK",
+    "request_id": "req-ghi-789",
+}
+
+
+class TestPolygonSourceType:
+    def test_source_type(self):
+        adapter = PolygonMarketAdapter(api_key="k")
+        assert adapter.source_type() == "market_api"
+
+    def test_inherits_market_data_adapter(self):
+        assert issubclass(PolygonMarketAdapter, MarketDataAdapter)
+
+    def test_bucket_name(self):
+        adapter = PolygonMarketAdapter(api_key="k")
+        assert adapter.bucket_name() == "stonks-raw-market"
+
+
+class TestPolygonBuildRequest:
+    def setup_method(self):
+        self.adapter = PolygonMarketAdapter(api_key="test-key", base_url="https://api.polygon.io")
+
+    def test_prev_bars_default(self):
+        url, params = self.adapter._build_request("AAPL", "prev_bars", {})
+        assert url == "https://api.polygon.io/v2/aggs/ticker/AAPL/prev"
+        assert params["apiKey"] == "test-key"
+
+    def test_prev_bars_with_adjusted(self):
+        url, params = self.adapter._build_request("AAPL", "prev_bars", {"adjusted": False})
+        assert params["adjusted"] == "false"
+
+    def test_range_bars(self):
+        config = {
+            "multiplier": 1,
+            "timespan": "day",
+            "from_date": "2026-04-01",
+            "to_date": "2026-04-10",
+            "adjusted": True,
+            "limit": 50,
+            "sort": "asc",
+        }
+        url, params = self.adapter._build_request("AAPL", "range_bars", config)
+        assert "/v2/aggs/ticker/AAPL/range/1/day/2026-04-01/2026-04-10" in url
+        assert params["adjusted"] == "true"
+        assert params["limit"] == "50"
+        assert params["sort"] == "asc"
+
+    def test_ticker_details(self):
+        url, params = self.adapter._build_request("MSFT", "ticker_details", {})
+        assert url == "https://api.polygon.io/v3/reference/tickers/MSFT"
+        assert params["apiKey"] == "test-key"
+
+    def test_unknown_endpoint_defaults_to_prev(self):
+        url, _ = self.adapter._build_request("AAPL", "unknown_thing", {})
+        assert "/v2/aggs/ticker/AAPL/prev" in url
+
+    def test_trailing_slash_stripped(self):
+        adapter = PolygonMarketAdapter(api_key="k", base_url="https://api.polygon.io/")
+        url, _ = adapter._build_request("AAPL", "prev_bars", {})
+        assert "//v2" not in url
+
+
+class TestPolygonExtractItems:
+    def setup_method(self):
+        self.adapter = PolygonMarketAdapter(api_key="k")
+
+    def test_extract_prev_bars(self):
+        items = self.adapter._extract_items(PREV_BARS_RESPONSE, "prev_bars")
+        assert len(items) == 1
+        assert items[0]["T"] == "AAPL"
+
+    def test_extract_range_bars(self):
+        items = self.adapter._extract_items(RANGE_BARS_RESPONSE, "range_bars")
+        assert len(items) == 3
+
+    def test_extract_ticker_details(self):
+        items = self.adapter._extract_items(TICKER_DETAILS_RESPONSE, "ticker_details")
+        assert len(items) == 1
+        assert items[0]["ticker"] == "AAPL"
+
+    def test_extract_empty_results_list(self):
+        items = self.adapter._extract_items({"results": [], "status": "OK"}, "prev_bars")
+        assert items == []
+
+    def test_extract_missing_results_key(self):
+        items = self.adapter._extract_items({"status": "OK"}, "prev_bars")
+        assert items == []
+
+    def test_extract_ticker_details_empty(self):
+        items = self.adapter._extract_items({"results": {}, "status": "OK"}, "ticker_details")
+        assert items == []
+
+
+class TestPolygonErrorResult:
+    def test_error_result_fields(self):
+        adapter = PolygonMarketAdapter(api_key="k")
+        result = adapter._error_result("AAPL", "something broke", 42.5, http_status=500, raw=b"err")
+        assert not result.ok
+        assert result.error == "something broke"
+        assert result.http_status == 500
+        assert result.response_time_ms == 42.5
+        assert result.raw_payload == b"err"
+        assert result.metadata["provider"] == "polygon"
+
+    def test_error_result_defaults(self):
+        adapter = PolygonMarketAdapter(api_key="k")
+        result = adapter._error_result("AAPL", "timeout", 100.0)
+        assert result.http_status is None
+        assert result.raw_payload == b""
@@ -0,0 +1,139 @@
+"""Tests for metadata persistence helpers.
+
+Validates the helper functions in services.shared.metadata that don't
+require a live database connection: type resolution, publisher extraction,
+date parsing, market snapshot type inference, and retry/failure tracking
+computations.
+
+Requirements: 3.3, 3.4, 9.2
+"""
+from datetime import datetime, timezone
+
+from services.shared.metadata import (
+    RETRY_BACKOFF_BASE,
+    RETRY_BACKOFF_MAX,
+    RETRY_MAX_COUNT,
+    _extract_publisher,
+    _infer_market_snapshot_type,
+    _parse_published_at,
+    _resolve_document_type,
+    compute_next_retry_at,
+)
+
+
+class TestResolveDocumentType:
+    def test_news_api(self):
+        assert _resolve_document_type("news_api") == "article"
+
+    def test_filings_api(self):
+        assert _resolve_document_type("filings_api") == "filing"
+
+    def test_web_scrape(self):
+        assert _resolve_document_type("web_scrape") == "press_release"
+
+    def test_unknown_defaults_to_article(self):
+        assert _resolve_document_type("something_else") == "article"
+
+
+class TestExtractPublisher:
+    def test_direct_publisher_field(self):
+        assert _extract_publisher({"publisher": "Reuters"}) == "Reuters"
+
+    def test_source_dict_with_name(self):
+        assert _extract_publisher({"source": {"name": "Bloomberg"}}) == "Bloomberg"
+
+    def test_source_string(self):
+        assert _extract_publisher({"source": "AP News"}) == "AP News"
+
+    def test_empty_item(self):
+        assert _extract_publisher({}) == ""
+
+    def test_publisher_takes_precedence(self):
+        item = {"publisher": "Reuters", "source": {"name": "Bloomberg"}}
+        assert _extract_publisher(item) == "Reuters"
+
+
+class TestParsePublishedAt:
+    def test_iso_format_with_z(self):
+        result = _parse_published_at({"publishedAt": "2026-04-10T12:00:00Z"})
+        assert result is not None
+        assert result.year == 2026
+        assert result.month == 4
+
+    def test_iso_format_with_offset(self):
+        result = _parse_published_at({"published_at": "2026-04-10T12:00:00+00:00"})
+        assert result is not None
+
+    def test_none_when_missing(self):
+        assert _parse_published_at({}) is None
+
+    def test_datetime_passthrough(self):
+        dt = datetime(2026, 1, 1, tzinfo=timezone.utc)
+        result = _parse_published_at({"publishedAt": dt})
+        assert result is dt
+
+    def test_invalid_string_returns_none(self):
+        assert _parse_published_at({"publishedAt": "not-a-date"}) is None
+
+
+class TestInferMarketSnapshotType:
+    def test_bar_from_ohlc(self):
+        item = {"o": 100, "h": 105, "l": 99, "c": 103, "v": 1000}
+        assert _infer_market_snapshot_type(item) == "bar"
+
+    def test_ticker_details_from_market_cap(self):
+        item = {"market_cap": 2_000_000_000, "name": "Apple"}
+        assert _infer_market_snapshot_type(item) == "ticker_details"
+
+    def test_ticker_details_from_sic_code(self):
+        item = {"sic_code": "3674", "name": "NVIDIA"}
+        assert _infer_market_snapshot_type(item) == "ticker_details"
+
+    def test_quote_from_bid_ask(self):
+        item = {"bid": 100.5, "ask": 101.0}
+        assert _infer_market_snapshot_type(item) == "quote"
+
+    def test_generic_snapshot_fallback(self):
+        item = {"some_field": "value"}
+        assert _infer_market_snapshot_type(item) == "snapshot"
+
+
+class TestComputeNextRetryAt:
+    def test_first_retry_uses_base_delay(self):
+        now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+        result = compute_next_retry_at(0, now=now)
+        expected_seconds = RETRY_BACKOFF_BASE  # 60s
+        delta = (result - now).total_seconds()
+        assert delta == expected_seconds
+
+    def test_exponential_growth(self):
+        now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+        d0 = (compute_next_retry_at(0, now=now) - now).total_seconds()
+        d1 = (compute_next_retry_at(1, now=now) - now).total_seconds()
+        d2 = (compute_next_retry_at(2, now=now) - now).total_seconds()
+        assert d1 == d0 * 2
+        assert d2 == d1 * 2
+
+    def test_capped_at_max(self):
+        now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+        result = compute_next_retry_at(20, now=now)
+        delta = (result - now).total_seconds()
+        assert delta == RETRY_BACKOFF_MAX
+
+    def test_defaults_to_utc_now(self):
+        before = datetime.now(timezone.utc)
+        result = compute_next_retry_at(0)
+        after = datetime.now(timezone.utc)
+        assert before <= result
+        assert (result - after).total_seconds() <= RETRY_BACKOFF_BASE + 1
+
+
+class TestRetryConstants:
+    def test_max_count_is_reasonable(self):
+        assert RETRY_MAX_COUNT == 10
+
+    def test_backoff_base_is_one_minute(self):
+        assert RETRY_BACKOFF_BASE == 60
+
+    def test_backoff_max_is_one_hour(self):
+        assert RETRY_BACKOFF_MAX == 3600
@@ -0,0 +1,151 @@
+"""Tests for Prometheus metrics definitions and instrumentation."""
+from prometheus_client import Counter, Gauge, Histogram, Info
+
+from services.shared.metrics import (
+    ACTIVE_JOBS,
+    AGGREGATION_CONTRADICTION_SCORE,
+    AGGREGATION_DURATION,
+    AGGREGATION_SIGNALS_PROCESSED,
+    AGGREGATION_WINDOWS_COMPUTED,
+    ALERT_ACTIVE,
+    ALERT_CHECK_DURATION,
+    ALERTS_FIRED,
+    ALERTS_RESOLVED,
+    EXTRACTION_ATTEMPTS,
+    EXTRACTION_CONFIDENCE,
+    EXTRACTION_DURATION,
+    EXTRACTION_JOBS_TOTAL,
+    EXTRACTION_RETRIES,
+    EXTRACTION_TOKEN_ESTIMATE,
+    EXTRACTION_VALIDATION_ERRORS,
+    INGESTION_ADAPTER_DURATION,
+    INGESTION_ERRORS,
+    INGESTION_ITEMS_DEDUPED,
+    INGESTION_ITEMS_FETCHED,
+    INGESTION_ITEMS_NEW,
+    INGESTION_JOBS_TOTAL,
+    LAKE_FACTS_PUBLISHED,
+    LAKE_PUBLISH_BYTES,
+    LAKE_PUBLISH_DURATION,
+    LAKE_PUBLISH_ERRORS,
+    ORDERS_DUPLICATES_PREVENTED,
+    ORDERS_FILLED,
+    ORDERS_REJECTED,
+    ORDERS_SUBMITTED,
+    PARSE_DURATION,
+    PARSE_JOBS_TOTAL,
+    PARSE_LOW_QUALITY_TOTAL,
+    PARSE_QUALITY_SCORE,
+    POSITIONS_SYNCED,
+    RECOMMENDATION_CONFIDENCE,
+    RECOMMENDATION_GENERATED,
+    RECOMMENDATION_SUPPRESSED,
+    RISK_CHECK_FAILURES,
+    RISK_EVALUATIONS_TOTAL,
+    SERVICE_INFO,
+)
+
+
+def test_ingestion_metrics_are_correct_types():
+    assert isinstance(INGESTION_JOBS_TOTAL, Counter)
+    assert isinstance(INGESTION_ITEMS_FETCHED, Counter)
+    assert isinstance(INGESTION_ITEMS_NEW, Counter)
+    assert isinstance(INGESTION_ITEMS_DEDUPED, Counter)
+    assert isinstance(INGESTION_ERRORS, Counter)
+    assert isinstance(INGESTION_ADAPTER_DURATION, Histogram)
+
+
+def test_parse_metrics_are_correct_types():
+    assert isinstance(PARSE_JOBS_TOTAL, Counter)
+    assert isinstance(PARSE_QUALITY_SCORE, Histogram)
+    assert isinstance(PARSE_LOW_QUALITY_TOTAL, Counter)
+    assert isinstance(PARSE_DURATION, Histogram)
+
+
+def test_extraction_metrics_are_correct_types():
+    assert isinstance(EXTRACTION_JOBS_TOTAL, Counter)
+    assert isinstance(EXTRACTION_ATTEMPTS, Counter)
+    assert isinstance(EXTRACTION_RETRIES, Counter)
+    assert isinstance(EXTRACTION_DURATION, Histogram)
+    assert isinstance(EXTRACTION_CONFIDENCE, Histogram)
+    assert isinstance(EXTRACTION_VALIDATION_ERRORS, Counter)
+    assert isinstance(EXTRACTION_TOKEN_ESTIMATE, Counter)
+
+
+def test_aggregation_metrics_are_correct_types():
+    assert isinstance(AGGREGATION_WINDOWS_COMPUTED, Counter)
+    assert isinstance(AGGREGATION_SIGNALS_PROCESSED, Counter)
+    assert isinstance(AGGREGATION_CONTRADICTION_SCORE, Histogram)
+    assert isinstance(AGGREGATION_DURATION, Histogram)
+
+
+def test_recommendation_metrics_are_correct_types():
+    assert isinstance(RECOMMENDATION_GENERATED, Counter)
+    assert isinstance(RECOMMENDATION_SUPPRESSED, Counter)
+    assert isinstance(RECOMMENDATION_CONFIDENCE, Histogram)
+
+
+def test_lake_metrics_are_correct_types():
+    assert isinstance(LAKE_FACTS_PUBLISHED, Counter)
+    assert isinstance(LAKE_PUBLISH_DURATION, Histogram)
+    assert isinstance(LAKE_PUBLISH_ERRORS, Counter)
+    assert isinstance(LAKE_PUBLISH_BYTES, Counter)
+
+
+def test_trading_metrics_are_correct_types():
+    assert isinstance(ORDERS_SUBMITTED, Counter)
+    assert isinstance(ORDERS_REJECTED, Counter)
+    assert isinstance(ORDERS_FILLED, Counter)
+    assert isinstance(ORDERS_DUPLICATES_PREVENTED, Counter)
+    assert isinstance(RISK_EVALUATIONS_TOTAL, Counter)
+    assert isinstance(RISK_CHECK_FAILURES, Counter)
+    assert isinstance(POSITIONS_SYNCED, Counter)
+
+
+def test_active_jobs_gauge():
+    assert isinstance(ACTIVE_JOBS, Gauge)
+
+
+def test_alerting_metrics_are_correct_types():
+    assert isinstance(ALERTS_FIRED, Counter)
+    assert isinstance(ALERTS_RESOLVED, Counter)
+    assert isinstance(ALERT_CHECK_DURATION, Histogram)
+    assert isinstance(ALERT_ACTIVE, Gauge)
+
+
+def test_service_info():
+    assert isinstance(SERVICE_INFO, Info)
+
+
+def test_counter_labels_work():
+    """Verify labeled counters can be incremented without error."""
+    INGESTION_JOBS_TOTAL.labels(source_type="news_api", status="success").inc()
+    INGESTION_ITEMS_FETCHED.labels(source_type="market_api").inc(5)
+    EXTRACTION_JOBS_TOTAL.labels(status="success").inc()
+    AGGREGATION_WINDOWS_COMPUTED.labels(window="7d").inc()
+    RECOMMENDATION_GENERATED.labels(action="buy", mode="paper_eligible").inc()
+    LAKE_FACTS_PUBLISHED.labels(table_name="trade_signals").inc()
+    ORDERS_SUBMITTED.labels(side="buy", order_type="market", mode="paper").inc()
+    ORDERS_REJECTED.labels(reason_category="risk_engine").inc()
+    RISK_EVALUATIONS_TOTAL.labels(result="passed").inc()
+
+
+def test_histogram_observe_works():
+    """Verify histograms accept observations without error."""
+    INGESTION_ADAPTER_DURATION.labels(source_type="news_api").observe(1.5)
+    PARSE_QUALITY_SCORE.observe(0.85)
+    PARSE_DURATION.observe(0.3)
+    EXTRACTION_DURATION.observe(5.2)
+    EXTRACTION_CONFIDENCE.observe(0.9)
+    AGGREGATION_CONTRADICTION_SCORE.observe(0.15)
+    AGGREGATION_DURATION.labels(window="7d").observe(0.8)
+    RECOMMENDATION_CONFIDENCE.observe(0.72)
+    LAKE_PUBLISH_DURATION.labels(table_name="market_bars").observe(0.05)
+
+
+def test_metrics_endpoint_import():
+    """Verify the prometheus_client generate_latest works."""
+    from prometheus_client import generate_latest
+    output = generate_latest()
+    assert isinstance(output, bytes)
+    assert b"stonks_" in output
@@ -0,0 +1,143 @@
+"""Tests for the Polygon.io news adapter.
+
+Validates request building, response parsing, and error handling.
+"""
+from services.adapters.news_adapter import NewsDataAdapter, PolygonNewsAdapter
+
+
+# --- Fake Polygon news responses ---
+
+NEWS_RESPONSE = {
+    "results": [
+        {
+            "id": "abc123",
+            "publisher": {"name": "Reuters", "homepage_url": "https://reuters.com"},
+            "title": "Apple Reports Record Revenue",
+            "article_url": "https://reuters.com/apple-record",
+            "tickers": ["AAPL"],
+            "published_utc": "2026-04-10T14:30:00Z",
+            "description": "Apple Inc. reported record quarterly revenue.",
+            "keywords": ["earnings", "apple", "revenue"],
+        },
+        {
+            "id": "def456",
+            "publisher": {"name": "Bloomberg", "homepage_url": "https://bloomberg.com"},
+            "title": "Apple Supply Chain Update",
+            "article_url": "https://bloomberg.com/apple-supply",
+            "tickers": ["AAPL", "TSM"],
+            "published_utc": "2026-04-10T12:00:00Z",
+            "description": "Supply chain adjustments for upcoming product cycle.",
+            "keywords": ["supply_chain", "apple"],
+        },
+    ],
+    "status": "OK",
+    "request_id": "req-news-001",
+    "count": 2,
+    "next_url": "https://api.polygon.io/v2/reference/news?cursor=abc",
+}
+
+EMPTY_NEWS_RESPONSE = {
+    "results": [],
+    "status": "OK",
+    "request_id": "req-news-002",
+    "count": 0,
+}
+
+
+class TestPolygonNewsSourceType:
+    def test_source_type(self):
+        adapter = PolygonNewsAdapter(api_key="k")
+        assert adapter.source_type() == "news_api"
+
+    def test_inherits_news_data_adapter(self):
+        assert issubclass(PolygonNewsAdapter, NewsDataAdapter)
+
+    def test_bucket_name(self):
+        adapter = PolygonNewsAdapter(api_key="k")
+        assert adapter.bucket_name() == "stonks-raw-news"
+
+
+class TestPolygonNewsBuildRequest:
+    def setup_method(self):
+        self.adapter = PolygonNewsAdapter(api_key="test-key", base_url="https://api.polygon.io")
+
+    def test_default_request(self):
+        url, params = self.adapter._build_request("AAPL", {})
+        assert url == "https://api.polygon.io/v2/reference/news"
+        assert params["apiKey"] == "test-key"
+        assert params["ticker"] == "AAPL"
+        assert params["limit"] == "20"
+
+    def test_custom_limit(self):
+        _, params = self.adapter._build_request("AAPL", {"limit": 50})
+        assert params["limit"] == "50"
+
+    def test_limit_capped_at_1000(self):
+        _, params = self.adapter._build_request("AAPL", {"limit": 5000})
+        assert params["limit"] == "1000"
+
+    def test_order_param(self):
+        _, params = self.adapter._build_request("AAPL", {"order": "asc"})
+        assert params["order"] == "asc"
+
+    def test_date_filters(self):
+        config = {
+            "published_utc_gte": "2026-04-01",
+            "published_utc_lte": "2026-04-10",
+        }
+        _, params = self.adapter._build_request("AAPL", config)
+        assert params["published_utc.gte"] == "2026-04-01"
+        assert params["published_utc.lte"] == "2026-04-10"
+
+    def test_no_date_filters_when_absent(self):
+        _, params = self.adapter._build_request("AAPL", {})
+        assert "published_utc.gte" not in params
+        assert "published_utc.lte" not in params
+
+    def test_trailing_slash_stripped(self):
+        adapter = PolygonNewsAdapter(api_key="k", base_url="https://api.polygon.io/")
+        url, _ = adapter._build_request("AAPL", {})
+        assert "//v2" not in url
+
+
+class TestPolygonNewsExtractItems:
+    def setup_method(self):
+        self.adapter = PolygonNewsAdapter(api_key="k")
+
+    def test_extract_articles(self):
+        items = self.adapter._extract_items(NEWS_RESPONSE)
+        assert len(items) == 2
+        assert items[0]["title"] == "Apple Reports Record Revenue"
+        assert items[1]["tickers"] == ["AAPL", "TSM"]
+
+    def test_extract_empty_results(self):
+        items = self.adapter._extract_items(EMPTY_NEWS_RESPONSE)
+        assert items == []
+
+    def test_extract_missing_results_key(self):
+        items = self.adapter._extract_items({"status": "OK"})
+        assert items == []
+
+    def test_extract_non_list_results(self):
+        items = self.adapter._extract_items({"results": "unexpected"})
+        assert items == []
+
+
+class TestPolygonNewsErrorResult:
+    def test_error_result_fields(self):
+        adapter = PolygonNewsAdapter(api_key="k")
+        result = adapter._error_result("AAPL", "rate limited", 150.0, http_status=429, raw=b"slow down")
+        assert not result.ok
+        assert result.error == "rate limited"
+        assert result.http_status == 429
+        assert result.response_time_ms == 150.0
+        assert result.raw_payload == b"slow down"
+        assert result.metadata["provider"] == "polygon"
+        assert result.source_type == "news_api"
+
+    def test_error_result_defaults(self):
+        adapter = PolygonNewsAdapter(api_key="k")
+        result = adapter._error_result("MSFT", "timeout", 200.0)
+        assert result.http_status is None
+        assert result.raw_payload == b""
+        assert result.ticker == "MSFT"
@@ -0,0 +1,388 @@
+"""Tests for the Ollama client wrapper."""
+import json
+from unittest.mock import AsyncMock, patch
+
+import httpx
+import pytest
+
+from services.extractor.client import (
+    ExtractionResponse,
+    OllamaClient,
+    _compute_backoff,
+    _is_retryable,
+)
+from services.shared.config import OllamaConfig
+
+
+def _valid_extraction_json() -> str:
+    """Minimal valid extraction result as JSON string."""
+    return json.dumps({
+        "summary": "Apple beat earnings expectations.",
+        "companies": [
+            {
+                "ticker": "AAPL",
+                "company_name": "Apple Inc.",
+                "relevance": 0.95,
+                "sentiment": "positive",
+                "impact_score": 0.7,
+                "impact_horizon": "1d_30d",
+                "catalyst_type": "earnings",
+                "key_facts": ["Revenue up 12%"],
+                "risks": [],
+                "evidence_spans": ["Apple beat expectations"],
+            }
+        ],
+        "macro_themes": ["ai_capex"],
+        "novelty_score": 0.6,
+        "confidence": 0.85,
+        "extraction_warnings": [],
+    })
+
+
+def _ollama_response(content: str) -> httpx.Response:
+    """Build a fake Ollama /api/chat response."""
+    body = {"message": {"role": "assistant", "content": content}}
+    return httpx.Response(200, json=body)
+
+
+def _make_config() -> OllamaConfig:
+    return OllamaConfig(
+        base_url="http://test:11434",
+        model="test-model",
+        timeout=10,
+        retry_base_delay=0.0,
+        retry_max_delay=0.0,
+        retry_backoff_multiplier=2.0,
+    )
+
+
+@pytest.mark.asyncio
+async def test_extract_success():
+    """Successful extraction on first attempt."""
+    transport = httpx.MockTransport(
+        lambda req: _ollama_response(_valid_extraction_json())
+    )
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), http_client=http)
+
+    resp = await client.extract(
+        document_text="Apple reported record Q4 earnings.",
+        document_type="article",
+        document_id="doc-1",
+    )
+
+    assert resp.success
+    assert resp.result is not None
+    assert resp.result.companies[0].ticker == "AAPL"
+    assert len(resp.attempts) == 1
+    assert resp.attempts[0].error is None
+    assert resp.model == "test-model"
+    assert resp.prompt_metadata["prompt_version"]
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_retry_on_invalid_json():
+    """Client retries when model returns invalid JSON, then succeeds."""
+    call_count = 0
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        nonlocal call_count
+        call_count += 1
+        if call_count == 1:
+            return _ollama_response("not valid json {{{")
+        return _ollama_response(_valid_extraction_json())
+
+    transport = httpx.MockTransport(handler)
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), max_retries=2, http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert resp.success
+    assert len(resp.attempts) == 2
+    assert resp.attempts[0].error is not None
+    assert resp.attempts[1].error is None
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_all_retries_exhausted():
+    """All retries fail — response indicates failure with all attempts recorded."""
+    transport = httpx.MockTransport(
+        lambda req: _ollama_response("bad output")
+    )
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), max_retries=1, http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert not resp.success
+    assert resp.result is None
+    assert len(resp.attempts) == 2  # initial + 1 retry
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_http_timeout():
+    """HTTP timeout is captured as an error."""
+    def handler(request: httpx.Request) -> httpx.Response:
+        raise httpx.ReadTimeout("timed out")
+
+    transport = httpx.MockTransport(handler)
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), max_retries=0, http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert not resp.success
+    assert resp.attempts[0].error == "timeout"
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_http_500():
+    """HTTP 500 is captured as an error."""
+    transport = httpx.MockTransport(
+        lambda req: httpx.Response(500, text="Internal Server Error")
+    )
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), max_retries=0, http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert not resp.success
+    assert "500" in (resp.attempts[0].error or "")
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_empty_model_response():
+    """Empty content from model is treated as an error."""
+    transport = httpx.MockTransport(
+        lambda req: _ollama_response("")
+    )
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), max_retries=0, http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert not resp.success
+    assert resp.attempts[0].error == "empty_model_response"
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_schema_validation_failure():
+    """Model returns valid JSON but missing required fields."""
+    bad_extraction = json.dumps({"summary": "test"})  # missing companies, etc.
+    transport = httpx.MockTransport(
+        lambda req: _ollama_response(bad_extraction)
+    )
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), max_retries=0, http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert not resp.success
+    assert resp.attempts[0].validation is not None
+    assert not resp.attempts[0].validation.valid
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_with_known_tickers():
+    """Known tickers are passed through to the prompt builder."""
+    transport = httpx.MockTransport(
+        lambda req: _ollama_response(_valid_extraction_json())
+    )
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), http_client=http)
+
+    resp = await client.extract(
+        document_text="test",
+        document_type="article",
+        known_tickers=["AAPL", "MSFT"],
+    )
+
+    assert resp.success
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_sends_structured_format():
+    """The request payload includes the JSON schema in the format field."""
+    captured_payload: dict[str, object] = {}
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        captured_payload.update(json.loads(request.content))
+        return _ollama_response(_valid_extraction_json())
+
+    transport = httpx.MockTransport(handler)
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), http_client=http)
+
+    await client.extract(document_text="test", document_type="article")
+
+    assert "format" in captured_payload
+    assert isinstance(captured_payload["format"], dict)
+    assert captured_payload["stream"] is False
+    assert captured_payload["model"] == "test-model"
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_non_retryable_http_400_stops_immediately():
+    """HTTP 400 is non-retryable — client stops after first attempt."""
+    call_count = 0
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        nonlocal call_count
+        call_count += 1
+        return httpx.Response(400, text="Bad Request")
+
+    transport = httpx.MockTransport(handler)
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), max_retries=3, http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert not resp.success
+    assert len(resp.attempts) == 1  # no retries for 400
+    assert resp.attempts[0].error == "http_400"
+    assert not resp.attempts[0].retryable
+    assert call_count == 1
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_retryable_http_500_retries():
+    """HTTP 500 is retryable — client retries up to max_retries."""
+    call_count = 0
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        nonlocal call_count
+        call_count += 1
+        if call_count <= 2:
+            return httpx.Response(500, text="Internal Server Error")
+        return _ollama_response(_valid_extraction_json())
+
+    transport = httpx.MockTransport(handler)
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), max_retries=3, http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert resp.success
+    assert len(resp.attempts) == 3
+    assert resp.attempts[0].retryable is True
+    assert resp.attempts[1].retryable is True
+    assert call_count == 3
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_retryable_field_on_success():
+    """Successful attempt has retryable=True (default)."""
+    transport = httpx.MockTransport(
+        lambda req: _ollama_response(_valid_extraction_json())
+    )
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(_make_config(), http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert resp.success
+    assert resp.attempts[0].retryable is True
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_backoff_is_called_between_retries():
+    """asyncio.sleep is called with increasing delays between retries."""
+    config = OllamaConfig(
+        base_url="http://test:11434",
+        model="test-model",
+        timeout=10,
+        retry_base_delay=1.0,
+        retry_max_delay=10.0,
+        retry_backoff_multiplier=2.0,
+    )
+    transport = httpx.MockTransport(
+        lambda req: _ollama_response("bad output")
+    )
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(config, max_retries=2, http_client=http)
+
+    with patch("services.extractor.client.asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
+        resp = await client.extract(document_text="test", document_type="article")
+
+    assert not resp.success
+    assert len(resp.attempts) == 3  # initial + 2 retries
+    assert mock_sleep.call_count == 2
+    # First backoff: 1.0 * 2^0 = 1.0
+    assert mock_sleep.call_args_list[0].args[0] == pytest.approx(1.0)
+    # Second backoff: 1.0 * 2^1 = 2.0
+    assert mock_sleep.call_args_list[1].args[0] == pytest.approx(2.0)
+
+    await client.close()
+
+
+@pytest.mark.asyncio
+async def test_extract_uses_config_max_retries():
+    """Client uses max_retries from config when not overridden."""
+    config = OllamaConfig(
+        base_url="http://test:11434",
+        model="test-model",
+        timeout=10,
+        max_retries=1,
+        retry_base_delay=0.0,
+    )
+    transport = httpx.MockTransport(
+        lambda req: _ollama_response("bad output")
+    )
+    http = httpx.AsyncClient(transport=transport)
+    client = OllamaClient(config, http_client=http)
+
+    resp = await client.extract(document_text="test", document_type="article")
+
+    assert not resp.success
+    assert len(resp.attempts) == 2  # initial + 1 retry from config
+
+    await client.close()
+
+
+def test_compute_backoff():
+    """Backoff computation respects multiplier and max delay."""
+    assert _compute_backoff(0, 1.0, 10.0, 2.0) == 1.0
+    assert _compute_backoff(1, 1.0, 10.0, 2.0) == 2.0
+    assert _compute_backoff(2, 1.0, 10.0, 2.0) == 4.0
+    assert _compute_backoff(3, 1.0, 10.0, 2.0) == 8.0
+    assert _compute_backoff(4, 1.0, 10.0, 2.0) == 10.0  # capped at max
+
+
+def test_is_retryable():
+    """Error classification for retry decisions."""
+    assert _is_retryable("timeout") is True
+    assert _is_retryable("http_500") is True
+    assert _is_retryable("connection_error: refused") is True
+    assert _is_retryable("empty_model_response") is True
+    assert _is_retryable("invalid_response_json") is True
+    assert _is_retryable("http_400") is False
+    assert _is_retryable("http_401") is False
+    assert _is_retryable("http_403") is False
+    assert _is_retryable("http_404") is False
+    assert _is_retryable("http_422") is False
+    assert _is_retryable(None) is False
@@ -0,0 +1,142 @@
+"""Tests for the operator approval workflow for live trading mode.
+
+Validates:
+- requires_approval logic for paper/live/disabled modes
+- ApprovalRequest model behavior (pending, expired)
+- compute_expiry calculation
+- Integration with broker service process_order_job flow
+"""
+from datetime import datetime, timedelta, timezone
+
+from services.risk.approval import (
+    ApprovalRequest,
+    ApprovalStatus,
+    compute_expiry,
+    requires_approval,
+)
+from services.risk.engine import (
+    OperatorApproval,
+    PortfolioRiskConfig,
+    TradingMode,
+)
+
+
+# ---------------------------------------------------------------------------
+# requires_approval tests
+# ---------------------------------------------------------------------------
+
+
+class TestRequiresApproval:
+    def test_paper_mode_auto_approved(self):
+        """Paper orders are auto-approved by default."""
+        config = PortfolioRiskConfig(trading_mode=TradingMode.PAPER)
+        assert requires_approval(config) is False
+
+    def test_paper_mode_approval_required_when_auto_approve_off(self):
+        """Paper orders need approval when auto_approve_paper is False."""
+        config = PortfolioRiskConfig(
+            trading_mode=TradingMode.PAPER,
+            operator_approval=OperatorApproval(auto_approve_paper=False),
+        )
+        assert requires_approval(config) is True
+
+    def test_live_mode_requires_approval_by_default(self):
+        """Live orders require approval by default."""
+        config = PortfolioRiskConfig(trading_mode=TradingMode.LIVE)
+        assert requires_approval(config) is True
+
+    def test_live_mode_no_approval_when_disabled(self):
+        """Live orders skip approval when require_approval_for_live is False."""
+        config = PortfolioRiskConfig(
+            trading_mode=TradingMode.LIVE,
+            operator_approval=OperatorApproval(require_approval_for_live=False),
+        )
+        assert requires_approval(config) is False
+
+    def test_disabled_mode_never_requires_approval(self):
+        """Disabled mode never requires approval (blocked upstream)."""
+        config = PortfolioRiskConfig(trading_mode=TradingMode.DISABLED)
+        assert requires_approval(config) is False
+
+    def test_override_trading_mode_parameter(self):
+        """The trading_mode parameter overrides config.trading_mode."""
+        config = PortfolioRiskConfig(trading_mode=TradingMode.PAPER)
+        # Override to live — should require approval
+        assert requires_approval(config, trading_mode=TradingMode.LIVE) is True
+
+
+# ---------------------------------------------------------------------------
+# compute_expiry tests
+# ---------------------------------------------------------------------------
+
+
+class TestComputeExpiry:
+    def test_default_30_minutes(self):
+        now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+        config = PortfolioRiskConfig()
+        expiry = compute_expiry(config, now=now)
+        assert expiry == now + timedelta(minutes=30)
+
+    def test_custom_timeout(self):
+        now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+        config = PortfolioRiskConfig(
+            operator_approval=OperatorApproval(approval_timeout_minutes=60),
+        )
+        expiry = compute_expiry(config, now=now)
+        assert expiry == now + timedelta(minutes=60)
+
+
+# ---------------------------------------------------------------------------
+# ApprovalRequest model tests
+# ---------------------------------------------------------------------------
+
+
+class TestApprovalRequest:
+    def test_defaults(self):
+        req = ApprovalRequest(ticker="AAPL")
+        assert req.ticker == "AAPL"
+        assert req.status == ApprovalStatus.PENDING
+        assert req.is_pending is True
+        assert req.approval_id  # auto-generated UUID
+
+    def test_is_expired_when_past_expiry(self):
+        past = datetime.now(timezone.utc) - timedelta(minutes=5)
+        req = ApprovalRequest(ticker="AAPL", expires_at=past)
+        assert req.is_expired is True
+
+    def test_not_expired_when_future_expiry(self):
+        future = datetime.now(timezone.utc) + timedelta(minutes=30)
+        req = ApprovalRequest(ticker="AAPL", expires_at=future)
+        assert req.is_expired is False
+
+    def test_approved_is_not_expired(self):
+        past = datetime.now(timezone.utc) - timedelta(minutes=5)
+        req = ApprovalRequest(
+            ticker="AAPL",
+            status=ApprovalStatus.APPROVED,
+            expires_at=past,
+        )
+        assert req.is_expired is False
+
+    def test_to_dict_roundtrip(self):
+        req = ApprovalRequest(
+            ticker="MSFT",
+            side="sell",
+            quantity=50.0,
+            estimated_value=15000.0,
+            recommendation_id="rec-123",
+        )
+        d = req.to_dict()
+        assert d["ticker"] == "MSFT"
+        assert d["side"] == "sell"
+        assert d["quantity"] == 50.0
+        assert d["status"] == "pending"
+        assert d["recommendation_id"] == "rec-123"
+
+    def test_explicit_expired_status(self):
+        req = ApprovalRequest(
+            ticker="AAPL",
+            status=ApprovalStatus.EXPIRED,
+        )
+        assert req.is_expired is True
+        assert req.is_pending is False
@@ -0,0 +1,339 @@
+"""Tests for the paper trading adapter - local order simulation and state sync.
+
+Validates position tracking, order fills, idempotency, cash management,
+and the PaperAccount/PaperPosition data structures.
+"""
+import pytest
+
+from services.adapters.broker_adapter import (
+    OrderRequest,
+    OrderResponse,
+    OrderSide,
+    OrderStatus,
+    OrderType,
+    PositionInfo,
+    TradingMode,
+)
+from services.adapters.paper_trading import (
+    PaperAccount,
+    PaperPosition,
+    PaperTradingAdapter,
+)
+
+
+# ---------------------------------------------------------------------------
+# PaperPosition tests
+# ---------------------------------------------------------------------------
+
+
+class TestPaperPosition:
+    def test_new_position_is_not_open(self):
+        pos = PaperPosition(ticker="AAPL")
+        assert not pos.is_open
+        assert pos.quantity == 0.0
+
+    def test_buy_fill_opens_position(self):
+        pos = PaperPosition(ticker="AAPL")
+        pnl = pos.apply_fill(OrderSide.BUY, 10, 150.0)
+        assert pos.is_open
+        assert pos.quantity == 10
+        assert pos.avg_entry_price == 150.0
+        assert pnl == 0.0
+
+    def test_sell_fill_realizes_pnl(self):
+        pos = PaperPosition(ticker="AAPL", quantity=10, avg_entry_price=150.0)
+        pnl = pos.apply_fill(OrderSide.SELL, 5, 160.0)
+        assert pos.quantity == 5
+        assert pnl == 50.0  # 5 shares * $10 gain
+        assert pos.realized_pnl == 50.0
+
+    def test_sell_all_closes_position(self):
+        pos = PaperPosition(ticker="AAPL", quantity=10, avg_entry_price=150.0)
+        pos.apply_fill(OrderSide.SELL, 10, 140.0)
+        assert not pos.is_open
+        assert pos.quantity == 0
+        assert pos.avg_entry_price == 0.0
+        assert pos.realized_pnl == -100.0  # 10 * -$10
+
+    def test_buy_averages_up(self):
+        pos = PaperPosition(ticker="AAPL", quantity=10, avg_entry_price=100.0)
+        pos.apply_fill(OrderSide.BUY, 10, 200.0)
+        assert pos.quantity == 20
+        assert pos.avg_entry_price == 150.0  # (1000 + 2000) / 20
+
+    def test_to_position_info(self):
+        pos = PaperPosition(ticker="AAPL", quantity=10, avg_entry_price=150.0)
+        info = pos.to_position_info(current_price=160.0)
+        assert isinstance(info, PositionInfo)
+        assert info.ticker == "AAPL"
+        assert info.quantity == 10
+        assert info.unrealized_pnl == 100.0  # 10 * $10
+        assert info.market_value == 1600.0
+
+    def test_to_position_info_no_current_price(self):
+        pos = PaperPosition(ticker="AAPL", quantity=10, avg_entry_price=150.0)
+        info = pos.to_position_info()
+        assert info.current_price == 150.0
+        assert info.unrealized_pnl == 0.0
+
+
+# ---------------------------------------------------------------------------
+# PaperAccount tests
+# ---------------------------------------------------------------------------
+
+
+class TestPaperAccount:
+    def test_default_account(self):
+        acct = PaperAccount()
+        assert acct.cash == 100_000.0
+        assert acct.portfolio_value == 100_000.0
+        assert acct.buying_power == 100_000.0
+
+    def test_custom_initial_cash(self):
+        acct = PaperAccount(initial_cash=50_000.0)
+        assert acct.cash == 50_000.0
+
+    def test_get_position_creates_new(self):
+        acct = PaperAccount()
+        pos = acct.get_position("AAPL")
+        assert pos.ticker == "AAPL"
+        assert pos.quantity == 0
+
+    def test_get_position_returns_existing(self):
+        acct = PaperAccount()
+        pos1 = acct.get_position("AAPL")
+        pos1.quantity = 10
+        pos2 = acct.get_position("AAPL")
+        assert pos2.quantity == 10
+
+    def test_portfolio_value_includes_positions(self):
+        acct = PaperAccount(initial_cash=10_000.0)
+        acct.cash = 5_000.0
+        pos = acct.get_position("AAPL")
+        pos.quantity = 10
+        pos.avg_entry_price = 100.0
+        # portfolio = cash + position value = 5000 + 1000 = 6000
+        assert acct.portfolio_value == 6_000.0
+
+    def test_to_account_info(self):
+        acct = PaperAccount(account_id="test-acct")
+        info = acct.to_account_info()
+        assert info.account_id == "test-acct"
+        assert info.mode == TradingMode.PAPER
+        assert info.cash == 100_000.0
+
+
+# ---------------------------------------------------------------------------
+# PaperTradingAdapter tests
+# ---------------------------------------------------------------------------
+
+
+class TestPaperTradingAdapterBasics:
+    def test_mode_is_paper(self):
+        adapter = PaperTradingAdapter()
+        assert adapter.mode == TradingMode.PAPER
+
+    def test_source_type(self):
+        adapter = PaperTradingAdapter()
+        assert adapter.source_type() == "broker"
+
+    def test_custom_initial_cash(self):
+        adapter = PaperTradingAdapter(initial_cash=50_000.0)
+        assert adapter.account.cash == 50_000.0
+
+
+@pytest.mark.asyncio
+class TestPaperTradingSubmitOrder:
+    async def test_buy_market_order_fills(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        order = OrderRequest(
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=10,
+            order_type=OrderType.LIMIT,
+            limit_price=150.0,
+        )
+        resp = await adapter.submit_order(order)
+        assert resp.status == OrderStatus.FILLED
+        assert resp.filled_quantity == 10
+        assert resp.filled_avg_price == 150.0
+        assert resp.ok
+        # Cash should decrease
+        assert adapter.account.cash < 100_000.0
+
+    async def test_sell_order_realizes_pnl(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        # Buy first
+        buy = OrderRequest(ticker="AAPL", side=OrderSide.BUY, quantity=10,
+                           order_type=OrderType.LIMIT, limit_price=150.0)
+        await adapter.submit_order(buy)
+
+        # Sell at higher price
+        sell = OrderRequest(ticker="AAPL", side=OrderSide.SELL, quantity=10,
+                            order_type=OrderType.LIMIT, limit_price=160.0)
+        resp = await adapter.submit_order(sell)
+        assert resp.status == OrderStatus.FILLED
+        assert resp.raw_response["realized_pnl"] == 100.0  # 10 * $10
+
+    async def test_insufficient_cash_rejects(self):
+        adapter = PaperTradingAdapter(initial_cash=1_000.0)
+        order = OrderRequest(
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=100,
+            order_type=OrderType.LIMIT,
+            limit_price=150.0,
+        )
+        resp = await adapter.submit_order(order)
+        assert resp.status == OrderStatus.REJECTED
+        assert "Insufficient cash" in resp.error
+
+    async def test_insufficient_shares_rejects(self):
+        adapter = PaperTradingAdapter()
+        order = OrderRequest(
+            ticker="AAPL",
+            side=OrderSide.SELL,
+            quantity=10,
+            order_type=OrderType.LIMIT,
+            limit_price=150.0,
+        )
+        resp = await adapter.submit_order(order)
+        assert resp.status == OrderStatus.REJECTED
+        assert "Insufficient shares" in resp.error
+
+    async def test_idempotency_returns_same_response(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        order = OrderRequest(
+            ticker="AAPL",
+            side=OrderSide.BUY,
+            quantity=10,
+            order_type=OrderType.LIMIT,
+            limit_price=150.0,
+            idempotency_key="test-key-1",
+        )
+        resp1 = await adapter.submit_order(order)
+        resp2 = await adapter.submit_order(order)
+        assert resp1.broker_order_id == resp2.broker_order_id
+        assert resp1.status == resp2.status
+        # Cash should only be deducted once
+        assert adapter.account.cash == pytest.approx(100_000.0 - 1500.0)
+
+    async def test_order_events_recorded(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        order = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=5,
+            order_type=OrderType.LIMIT, limit_price=100.0,
+        )
+        await adapter.submit_order(order)
+        events = adapter.account.order_events
+        event_types = [e["event_type"] for e in events]
+        assert "submitted" in event_types
+        assert "accepted" in event_types
+        assert "fill" in event_types
+
+    async def test_stop_order_fills_at_stop_price(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        order = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=10,
+            order_type=OrderType.STOP, stop_price=145.0,
+        )
+        resp = await adapter.submit_order(order)
+        assert resp.filled_avg_price == 145.0
+
+
+@pytest.mark.asyncio
+class TestPaperTradingCancelAndStatus:
+    async def test_cancel_filled_order_rejected(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        order = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=5,
+            order_type=OrderType.LIMIT, limit_price=100.0,
+        )
+        resp = await adapter.submit_order(order)
+        cancel_resp = await adapter.cancel_order(resp.broker_order_id)
+        assert cancel_resp.status == OrderStatus.REJECTED
+        assert "filled" in cancel_resp.error.lower()
+
+    async def test_cancel_unknown_order(self):
+        adapter = PaperTradingAdapter()
+        resp = await adapter.cancel_order("nonexistent-id")
+        assert resp.status == OrderStatus.REJECTED
+        assert "not found" in resp.error
+
+    async def test_get_order_status(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        order = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=5,
+            order_type=OrderType.LIMIT, limit_price=100.0,
+        )
+        resp = await adapter.submit_order(order)
+        status = await adapter.get_order_status(resp.broker_order_id)
+        assert status.status == OrderStatus.FILLED
+
+    async def test_get_unknown_order_status(self):
+        adapter = PaperTradingAdapter()
+        resp = await adapter.get_order_status("nonexistent")
+        assert resp.status == OrderStatus.REJECTED
+
+
+@pytest.mark.asyncio
+class TestPaperTradingPositionsAndAccount:
+    async def test_get_positions_empty(self):
+        adapter = PaperTradingAdapter()
+        positions = await adapter.get_positions()
+        assert positions == []
+
+    async def test_get_positions_after_buy(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        order = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=10,
+            order_type=OrderType.LIMIT, limit_price=150.0,
+        )
+        await adapter.submit_order(order)
+        positions = await adapter.get_positions()
+        assert len(positions) == 1
+        assert positions[0].ticker == "AAPL"
+        assert positions[0].quantity == 10
+
+    async def test_get_account(self):
+        adapter = PaperTradingAdapter(initial_cash=50_000.0, account_id="test")
+        info = await adapter.get_account()
+        assert info.account_id == "test"
+        assert info.cash == 50_000.0
+        assert info.mode == TradingMode.PAPER
+
+
+@pytest.mark.asyncio
+class TestPaperTradingFetch:
+    async def test_fetch_positions(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        buy = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=5,
+            order_type=OrderType.LIMIT, limit_price=100.0,
+        )
+        await adapter.submit_order(buy)
+        result = await adapter.fetch("AAPL", {"endpoint": "positions"})
+        assert result.ok
+        assert len(result.items) == 1
+        assert result.metadata["provider"] == "paper"
+
+    async def test_fetch_account(self):
+        adapter = PaperTradingAdapter()
+        result = await adapter.fetch("*", {"endpoint": "account"})
+        assert result.ok
+        assert result.items[0]["mode"] == "paper"
+
+    async def test_fetch_orders(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        buy = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=5,
+            order_type=OrderType.LIMIT, limit_price=100.0,
+        )
+        await adapter.submit_order(buy)
+        result = await adapter.fetch("AAPL", {"endpoint": "orders"})
+        assert len(result.items) == 1
+
+    async def test_fetch_empty_position(self):
+        adapter = PaperTradingAdapter()
+        result = await adapter.fetch("AAPL", {"endpoint": "positions"})
+        assert len(result.items) == 0
@@ -0,0 +1,627 @@
+"""Paper trading simulation scenarios.
+
+End-to-end scenarios that exercise the full recommendation-to-execution
+pipeline through the paper trading adapter, risk engine, and position
+tracking. Each scenario simulates a realistic trading session using
+real logic from all service modules — no mocked business logic.
+
+Covers:
+- Single-symbol buy-and-sell round trips with P&L verification
+- Multi-symbol portfolio construction and diversification
+- Risk engine rejection scenarios (position limits, daily loss, lockouts)
+- Idempotent order submission under replay conditions
+- Insufficient funds and insufficient shares edge cases
+- Recommendation-driven order flow (bullish → buy, bearish → sell)
+- Portfolio drawdown halting via daily loss limits
+- News-shock lockout preventing trades during high-impact events
+
+Requirements: 7.1-7.4, 8.1-8.5
+"""
+from __future__ import annotations
+
+from datetime import datetime, timedelta, timezone
+
+import pytest
+
+from services.adapters.broker_adapter import (
+    OrderRequest,
+    OrderSide,
+    OrderStatus,
+    OrderType,
+    TradingMode,
+)
+from services.adapters.paper_trading import PaperTradingAdapter
+from services.aggregation.worker import (
+    ImpactRow,
+    assemble_trend_with_evidence,
+    build_weighted_signals,
+)
+from services.recommendation.eligibility import evaluate_eligibility
+from services.recommendation.worker import build_recommendation
+from services.risk.engine import (
+    AccountRiskState,
+    DailyLossLimits,
+    NewsShockLockout,
+    PortfolioRiskConfig,
+    PositionLimits,
+    ProposedOrder,
+    RiskCheckResult,
+    SectorExposureLimits,
+    SymbolCooldown,
+    evaluate_order,
+)
+from services.shared.schemas import (
+    ActionType,
+    RecommendationMode,
+)
+
+NOW = datetime(2026, 4, 11, 14, 0, 0, tzinfo=timezone.utc)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _bullish_impacts(ticker: str, count: int = 3) -> list[ImpactRow]:
+    """Generate bullish impact rows for aggregation."""
+    return [
+        ImpactRow(
+            document_id=f"doc-bull-{ticker}-{i}",
+            confidence=0.80 + i * 0.02,
+            novelty_score=0.6,
+            source_credibility=0.8,
+            sentiment="positive",
+            impact_score=0.70 + i * 0.03,
+            catalyst_type="earnings",
+            key_facts=[f"Strong Q{i+1} results for {ticker}"],
+            risks=[],
+            published_at=NOW - timedelta(hours=i + 1),
+        )
+        for i in range(count)
+    ]
+
+
+def _bearish_impacts(ticker: str, count: int = 3) -> list[ImpactRow]:
+    """Generate bearish impact rows for aggregation."""
+    return [
+        ImpactRow(
+            document_id=f"doc-bear-{ticker}-{i}",
+            confidence=0.78 + i * 0.02,
+            novelty_score=0.55,
+            source_credibility=0.75,
+            sentiment="negative",
+            impact_score=0.65 + i * 0.03,
+            catalyst_type="legal",
+            key_facts=[f"Regulatory action against {ticker}"],
+            risks=[f"Potential fine for {ticker}"],
+            published_at=NOW - timedelta(hours=i + 1),
+        )
+        for i in range(count)
+    ]
+
+
+def _build_trend_and_recommendation(impacts, ticker, window="7d"):
+    """Run aggregation + eligibility + recommendation for a set of impacts."""
+    signals = build_weighted_signals(impacts, NOW, window)
+    assembled = assemble_trend_with_evidence(
+        ticker, window, signals, impacts, reference_time=NOW,
+    )
+    summary = assembled.summary
+    eligibility = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, eligibility, reference_time=NOW)
+    return summary, eligibility, rec
+
+
+def _risk_state_from_adapter(adapter: PaperTradingAdapter) -> AccountRiskState:
+    """Build an AccountRiskState snapshot from the paper adapter's in-memory state."""
+    acct = adapter.account
+    positions_by_symbol = {
+        t: p.quantity * p.avg_entry_price
+        for t, p in acct.positions.items()
+        if p.is_open
+    }
+    return AccountRiskState(
+        account_id=acct.account_id,
+        portfolio_value=acct.portfolio_value,
+        cash=acct.cash,
+        buying_power=acct.buying_power,
+        positions_by_symbol=positions_by_symbol,
+        open_position_count=sum(1 for p in acct.positions.values() if p.is_open),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Scenario 1: Single-symbol buy-sell round trip
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestSingleSymbolRoundTrip:
+    """Buy shares, sell at a profit, verify P&L and cash reconciliation."""
+
+    async def test_buy_hold_sell_profit(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+
+        # Generate bullish recommendation
+        impacts = _bullish_impacts("AAPL")
+        summary, eligibility, rec = _build_trend_and_recommendation(impacts, "AAPL")
+        assert rec.action == ActionType.BUY
+
+        # Execute buy
+        buy = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=50,
+            order_type=OrderType.LIMIT, limit_price=180.0,
+        )
+        buy_resp = await adapter.submit_order(buy)
+        assert buy_resp.status == OrderStatus.FILLED
+        assert adapter.account.cash == pytest.approx(100_000.0 - 50 * 180.0)
+
+        # Verify position
+        positions = await adapter.get_positions()
+        assert len(positions) == 1
+        assert positions[0].ticker == "AAPL"
+        assert positions[0].quantity == 50
+
+        # Sell at higher price
+        sell = OrderRequest(
+            ticker="AAPL", side=OrderSide.SELL, quantity=50,
+            order_type=OrderType.LIMIT, limit_price=195.0,
+        )
+        sell_resp = await adapter.submit_order(sell)
+        assert sell_resp.status == OrderStatus.FILLED
+        assert sell_resp.raw_response["realized_pnl"] == pytest.approx(50 * 15.0)
+
+        # Cash should be back to initial + profit
+        expected_cash = 100_000.0 + 50 * 15.0
+        assert adapter.account.cash == pytest.approx(expected_cash)
+
+        # Position should be closed
+        positions = await adapter.get_positions()
+        assert len(positions) == 0
+
+    async def test_buy_hold_sell_loss(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+
+        buy = OrderRequest(
+            ticker="TSLA", side=OrderSide.BUY, quantity=20,
+            order_type=OrderType.LIMIT, limit_price=250.0,
+        )
+        await adapter.submit_order(buy)
+
+        sell = OrderRequest(
+            ticker="TSLA", side=OrderSide.SELL, quantity=20,
+            order_type=OrderType.LIMIT, limit_price=230.0,
+        )
+        sell_resp = await adapter.submit_order(sell)
+        assert sell_resp.raw_response["realized_pnl"] == pytest.approx(-400.0)
+
+        expected_cash = 100_000.0 - 400.0
+        assert adapter.account.cash == pytest.approx(expected_cash)
+
+
+# ---------------------------------------------------------------------------
+# Scenario 2: Multi-symbol portfolio construction
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestMultiSymbolPortfolio:
+    """Build a diversified portfolio across multiple symbols."""
+
+    async def test_build_three_position_portfolio(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+
+        orders = [
+            ("AAPL", 20, 180.0),
+            ("MSFT", 15, 420.0),
+            ("GOOGL", 10, 175.0),
+        ]
+        total_cost = 0.0
+        for ticker, qty, price in orders:
+            req = OrderRequest(
+                ticker=ticker, side=OrderSide.BUY, quantity=qty,
+                order_type=OrderType.LIMIT, limit_price=price,
+            )
+            resp = await adapter.submit_order(req)
+            assert resp.status == OrderStatus.FILLED
+            total_cost += qty * price
+
+        assert adapter.account.cash == pytest.approx(100_000.0 - total_cost)
+
+        positions = await adapter.get_positions()
+        tickers = {p.ticker for p in positions}
+        assert tickers == {"AAPL", "MSFT", "GOOGL"}
+
+        # Portfolio value = cash + position value at entry
+        assert adapter.account.portfolio_value == pytest.approx(100_000.0)
+
+    async def test_partial_liquidation(self):
+        adapter = PaperTradingAdapter(initial_cash=50_000.0)
+
+        # Buy two positions
+        await adapter.submit_order(OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=30,
+            order_type=OrderType.LIMIT, limit_price=150.0,
+        ))
+        await adapter.submit_order(OrderRequest(
+            ticker="MSFT", side=OrderSide.BUY, quantity=10,
+            order_type=OrderType.LIMIT, limit_price=400.0,
+        ))
+
+        # Sell only AAPL
+        await adapter.submit_order(OrderRequest(
+            ticker="AAPL", side=OrderSide.SELL, quantity=30,
+            order_type=OrderType.LIMIT, limit_price=155.0,
+        ))
+
+        positions = await adapter.get_positions()
+        assert len(positions) == 1
+        assert positions[0].ticker == "MSFT"
+
+
+# ---------------------------------------------------------------------------
+# Scenario 3: Risk engine blocks unsafe orders
+# ---------------------------------------------------------------------------
+
+
+class TestRiskEngineBlocking:
+    """Verify risk engine prevents orders that violate configured limits."""
+
+    def test_position_size_limit_blocks_large_order(self):
+        config = PortfolioRiskConfig(
+            position_limits=PositionLimits(max_position_value=5_000.0),
+        )
+        state = AccountRiskState(
+            portfolio_value=100_000.0, cash=100_000.0,
+        )
+        order = ProposedOrder(
+            ticker="AAPL", sector="Technology",
+            estimated_value=10_000.0, quantity=50,
+        )
+        result = evaluate_order(order, config, state)
+        assert not result.passed
+        assert any(
+            c.check_name == "max_position_value" and c.result == RiskCheckResult.FAIL
+            for c in result.checks
+        )
+
+    def test_sector_concentration_blocks_overweight(self):
+        config = PortfolioRiskConfig(
+            sector_exposure=SectorExposureLimits(max_sector_pct=0.20),
+        )
+        state = AccountRiskState(
+            portfolio_value=100_000.0,
+            positions_by_sector={"Technology": 18_000.0},
+        )
+        order = ProposedOrder(
+            ticker="NVDA", sector="Technology",
+            estimated_value=5_000.0, quantity=20,
+        )
+        result = evaluate_order(order, config, state)
+        assert not result.passed
+
+    def test_daily_loss_halt_blocks_further_trading(self):
+        config = PortfolioRiskConfig(
+            daily_loss=DailyLossLimits(
+                max_daily_loss_pct=0.02,
+                max_daily_loss_value=2_000.0,
+            ),
+        )
+        state = AccountRiskState(
+            portfolio_value=100_000.0,
+            daily_pnl=-2_500.0,
+        )
+        order = ProposedOrder(
+            ticker="AAPL", sector="Technology",
+            estimated_value=1_000.0, quantity=5,
+        )
+        result = evaluate_order(order, config, state)
+        assert not result.passed
+        loss_failures = [
+            c for c in result.checks
+            if c.check_name.startswith("daily_loss") and c.result == RiskCheckResult.FAIL
+        ]
+        assert len(loss_failures) >= 1
+
+    def test_news_shock_lockout_blocks_trade(self):
+        lockout_expiry = NOW + timedelta(minutes=45)
+        config = PortfolioRiskConfig(
+            news_shock=NewsShockLockout(enabled=True, lockout_minutes=60),
+        )
+        state = AccountRiskState(
+            portfolio_value=100_000.0,
+            locked_symbols={"AAPL": lockout_expiry},
+        )
+        order = ProposedOrder(
+            ticker="AAPL", sector="Technology",
+            estimated_value=1_000.0, quantity=5,
+        )
+        result = evaluate_order(order, config, state, now=NOW)
+        assert not result.passed
+        assert any(
+            c.check_name == "news_shock_lockout" and c.result == RiskCheckResult.FAIL
+            for c in result.checks
+        )
+
+    def test_symbol_cooldown_blocks_rapid_retrade(self):
+        last_trade = NOW - timedelta(minutes=5)
+        config = PortfolioRiskConfig(
+            symbol_cooldown=SymbolCooldown(cooldown_minutes=15),
+        )
+        state = AccountRiskState(
+            portfolio_value=100_000.0,
+            last_trade_times={"AAPL": last_trade},
+        )
+        order = ProposedOrder(
+            ticker="AAPL", sector="Technology",
+            estimated_value=1_000.0, quantity=5,
+        )
+        result = evaluate_order(order, config, state, now=NOW)
+        assert not result.passed
+
+
+# ---------------------------------------------------------------------------
+# Scenario 4: Recommendation-driven order flow
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestRecommendationDrivenOrders:
+    """Simulate the full path: signals → recommendation → risk check → paper fill."""
+
+    async def test_bullish_recommendation_to_paper_buy(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        impacts = _bullish_impacts("AAPL", count=4)
+        summary, eligibility, rec = _build_trend_and_recommendation(impacts, "AAPL")
+
+        assert rec.action == ActionType.BUY
+        assert rec.confidence > 0
+
+        # Risk check the proposed order
+        risk_state = _risk_state_from_adapter(adapter)
+        proposed = ProposedOrder(
+            ticker="AAPL", sector="Technology",
+            estimated_value=rec.position_sizing.portfolio_pct * risk_state.portfolio_value,
+            quantity=10,
+            confidence=rec.confidence,
+            recommendation_id=rec.recommendation_id,
+        )
+        risk_eval = evaluate_order(proposed, PortfolioRiskConfig(), risk_state)
+        assert risk_eval.passed
+
+        # Execute the paper order
+        order = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=10,
+            order_type=OrderType.LIMIT, limit_price=180.0,
+        )
+        resp = await adapter.submit_order(order)
+        assert resp.status == OrderStatus.FILLED
+
+    async def test_bearish_recommendation_to_paper_sell(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+
+        # First buy a position to sell
+        await adapter.submit_order(OrderRequest(
+            ticker="TSLA", side=OrderSide.BUY, quantity=20,
+            order_type=OrderType.LIMIT, limit_price=250.0,
+        ))
+
+        # Generate bearish recommendation
+        impacts = _bearish_impacts("TSLA", count=3)
+        summary, eligibility, rec = _build_trend_and_recommendation(impacts, "TSLA")
+        assert rec.action == ActionType.SELL
+
+        # Execute the sell
+        sell = OrderRequest(
+            ticker="TSLA", side=OrderSide.SELL, quantity=20,
+            order_type=OrderType.LIMIT, limit_price=240.0,
+        )
+        resp = await adapter.submit_order(sell)
+        assert resp.status == OrderStatus.FILLED
+        assert resp.raw_response["realized_pnl"] == pytest.approx(-200.0)
+
+    async def test_low_confidence_recommendation_is_informational(self):
+        """Low-confidence signals should produce informational-only recommendations."""
+        impacts = [
+            ImpactRow(
+                document_id="doc-weak-1",
+                confidence=0.40,
+                novelty_score=0.3,
+                source_credibility=0.5,
+                sentiment="positive",
+                impact_score=0.3,
+                catalyst_type="other",
+                key_facts=["Minor update"],
+                risks=[],
+                published_at=NOW - timedelta(hours=1),
+            ),
+            ImpactRow(
+                document_id="doc-weak-2",
+                confidence=0.35,
+                novelty_score=0.2,
+                source_credibility=0.4,
+                sentiment="positive",
+                impact_score=0.25,
+                catalyst_type="other",
+                key_facts=["Routine filing"],
+                risks=[],
+                published_at=NOW - timedelta(hours=3),
+            ),
+        ]
+        _, _, rec = _build_trend_and_recommendation(impacts, "XYZ")
+        assert rec.mode == RecommendationMode.INFORMATIONAL
+
+
+# ---------------------------------------------------------------------------
+# Scenario 5: Idempotent order submission
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestIdempotentOrderSubmission:
+    """Verify duplicate orders with the same idempotency key are not double-executed."""
+
+    async def test_duplicate_buy_only_fills_once(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        order = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=10,
+            order_type=OrderType.LIMIT, limit_price=150.0,
+            idempotency_key="idem-buy-1",
+        )
+
+        resp1 = await adapter.submit_order(order)
+        resp2 = await adapter.submit_order(order)
+
+        assert resp1.broker_order_id == resp2.broker_order_id
+        # Cash deducted only once
+        assert adapter.account.cash == pytest.approx(100_000.0 - 1_500.0)
+        # Only one position entry
+        pos = adapter.account.get_position("AAPL")
+        assert pos.quantity == 10
+
+
+# ---------------------------------------------------------------------------
+# Scenario 6: Insufficient funds and shares
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestInsufficientResources:
+    """Verify the adapter rejects orders when resources are insufficient."""
+
+    async def test_buy_exceeding_cash_rejected(self):
+        adapter = PaperTradingAdapter(initial_cash=5_000.0)
+        order = OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=100,
+            order_type=OrderType.LIMIT, limit_price=180.0,
+        )
+        resp = await adapter.submit_order(order)
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.error is not None and "Insufficient cash" in resp.error
+
+    async def test_sell_more_than_held_rejected(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        await adapter.submit_order(OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=10,
+            order_type=OrderType.LIMIT, limit_price=150.0,
+        ))
+        sell = OrderRequest(
+            ticker="AAPL", side=OrderSide.SELL, quantity=20,
+            order_type=OrderType.LIMIT, limit_price=155.0,
+        )
+        resp = await adapter.submit_order(sell)
+        assert resp.status == OrderStatus.REJECTED
+        assert resp.error is not None and "Insufficient shares" in resp.error
+
+
+# ---------------------------------------------------------------------------
+# Scenario 7: Portfolio drawdown halts trading
+# ---------------------------------------------------------------------------
+
+
+class TestDrawdownHalt:
+    """Simulate a losing session that triggers the daily loss circuit breaker."""
+
+    def test_cumulative_losses_trigger_halt(self):
+        """After multiple losing trades, the risk engine should block new orders."""
+        config = PortfolioRiskConfig(
+            daily_loss=DailyLossLimits(
+                max_daily_loss_pct=0.03,
+                max_daily_loss_value=3_000.0,
+                max_daily_trades=50,
+            ),
+        )
+
+        # Simulate state after several losing trades
+        state = AccountRiskState(
+            portfolio_value=97_000.0,
+            cash=47_000.0,
+            daily_pnl=-3_200.0,
+            daily_trade_count=8,
+        )
+
+        order = ProposedOrder(
+            ticker="NVDA", sector="Technology",
+            estimated_value=2_000.0, quantity=5,
+        )
+        result = evaluate_order(order, config, state)
+        assert not result.passed
+        # Both pct and value limits should be breached
+        failed_checks = {
+            c.check_name for c in result.checks if c.result == RiskCheckResult.FAIL
+        }
+        assert "daily_loss_value" in failed_checks
+
+
+# ---------------------------------------------------------------------------
+# Scenario 8: Full session simulation
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestFullTradingSession:
+    """Simulate a realistic multi-trade session with mixed outcomes."""
+
+    async def test_morning_session_with_mixed_results(self):
+        adapter = PaperTradingAdapter(initial_cash=100_000.0)
+        initial_cash = 100_000.0
+
+        # Trade 1: Buy AAPL, sell at profit
+        await adapter.submit_order(OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=30,
+            order_type=OrderType.LIMIT, limit_price=180.0,
+        ))
+        resp1 = await adapter.submit_order(OrderRequest(
+            ticker="AAPL", side=OrderSide.SELL, quantity=30,
+            order_type=OrderType.LIMIT, limit_price=185.0,
+        ))
+        pnl_1 = resp1.raw_response["realized_pnl"]
+        assert pnl_1 == pytest.approx(150.0)
+
+        # Trade 2: Buy TSLA, sell at loss
+        await adapter.submit_order(OrderRequest(
+            ticker="TSLA", side=OrderSide.BUY, quantity=10,
+            order_type=OrderType.LIMIT, limit_price=250.0,
+        ))
+        resp2 = await adapter.submit_order(OrderRequest(
+            ticker="TSLA", side=OrderSide.SELL, quantity=10,
+            order_type=OrderType.LIMIT, limit_price=242.0,
+        ))
+        pnl_2 = resp2.raw_response["realized_pnl"]
+        assert pnl_2 == pytest.approx(-80.0)
+
+        # Trade 3: Buy MSFT, hold (don't sell)
+        await adapter.submit_order(OrderRequest(
+            ticker="MSFT", side=OrderSide.BUY, quantity=5,
+            order_type=OrderType.LIMIT, limit_price=420.0,
+        ))
+
+        # Verify final state
+        positions = await adapter.get_positions()
+        assert len(positions) == 1
+        assert positions[0].ticker == "MSFT"
+
+        # Cash = initial + AAPL profit + TSLA loss - MSFT cost
+        expected_cash = initial_cash + 150.0 - 80.0 - (5 * 420.0)
+        assert adapter.account.cash == pytest.approx(expected_cash)
+
+        # Audit trail should have events for all trades
+        event_count = len(adapter.account.order_events)
+        # 5 orders × 3 events each (submitted, accepted, fill) = 15
+        # (rejected orders get fewer events, but all 5 here are fills)
+        assert event_count == 15
+
+    async def test_account_info_reflects_session(self):
+        adapter = PaperTradingAdapter(initial_cash=50_000.0, account_id="sim-session")
+
+        await adapter.submit_order(OrderRequest(
+            ticker="AAPL", side=OrderSide.BUY, quantity=10,
+            order_type=OrderType.LIMIT, limit_price=180.0,
+        ))
+
+        acct = await adapter.get_account()
+        assert acct.account_id == "sim-session"
+        assert acct.mode == TradingMode.PAPER
+        assert acct.cash == pytest.approx(50_000.0 - 1_800.0)
+        assert acct.portfolio_value == pytest.approx(50_000.0)
@@ -0,0 +1,80 @@
+"""Tests for parser worker helper functions.
+
+Validates build_parser_output_json produces the expected structure
+from ParsedDocument and mention data.
+
+Requirements: 4.1, 4.2, 4.3, 9.1
+"""
+from services.parser.html_parser import ParsedDocument, QualitySignals
+from services.parser.worker import build_parser_output_json
+
+
+class TestBuildParserOutputJson:
+    def test_includes_all_metadata_fields(self):
+        parsed = ParsedDocument(
+            body_text="Apple reported strong earnings.",
+            title="Apple Earnings",
+            author="Jane Reporter",
+            publisher="TechNews",
+            published_at="2026-04-10T14:00:00Z",
+            canonical_url="https://technews.example.com/apple",
+            language="en",
+            description="Apple Q2 results.",
+            document_type="article",
+            word_count=5,
+            outbound_links=["https://other.com/analysis"],
+            tags=["apple", "earnings"],
+            quality_score=0.75,
+            confidence="high",
+            low_quality_flag=False,
+            quality_warnings=[],
+            quality_signals=QualitySignals(
+                word_count_signal=0.8,
+                diversity_signal=0.9,
+                sentence_signal=1.0,
+                paragraph_signal=0.5,
+                body_found_signal=1.0,
+                metadata_signal=1.0,
+            ),
+        )
+        mentions = [
+            {"company_id": "1", "ticker": "AAPL", "mention_type": "ticker", "confidence": 0.9, "match_count": 2},
+        ]
+        result = build_parser_output_json(parsed, mentions)
+
+        assert result["title"] == "Apple Earnings"
+        assert result["author"] == "Jane Reporter"
+        assert result["publisher"] == "TechNews"
+        assert result["published_at"] == "2026-04-10T14:00:00Z"
+        assert result["canonical_url"] == "https://technews.example.com/apple"
+        assert result["language"] == "en"
+        assert result["description"] == "Apple Q2 results."
+        assert result["document_type"] == "article"
+        assert result["word_count"] == 5
+        assert result["outbound_links"] == ["https://other.com/analysis"]
+        assert result["tags"] == ["apple", "earnings"]
+        assert result["quality_score"] == 0.75
+        assert result["confidence"] == "high"
+        assert result["low_quality_flag"] is False
+        assert result["quality_warnings"] == []
+        assert result["mentioned_companies"] == mentions
+
+    def test_quality_signals_serialized(self):
+        parsed = ParsedDocument(
+            quality_signals=QualitySignals(
+                word_count_signal=0.3,
+                diversity_signal=0.5,
+            ),
+        )
+        result = build_parser_output_json(parsed, [])
+        signals = result["quality_signals"]
+        assert signals["word_count"] == 0.3
+        assert signals["diversity"] == 0.5
+
+    def test_empty_parsed_document(self):
+        parsed = ParsedDocument()
+        result = build_parser_output_json(parsed, [])
+        assert result["title"] == ""
+        assert "body_text" not in result  # body text stored separately in MinIO
+        assert result["mentioned_companies"] == []
+        assert result["confidence"] == "low"
@@ -0,0 +1,105 @@
+"""Tests for the Query API app structure and helper functions."""
+import json
+from datetime import datetime, timezone
+
+import pytest
+
+from services.api.app import _parse_jsonb, _row_to_dict, app
+
+
+# --- _parse_jsonb ---
+
+def test_parse_jsonb_dict():
+    assert _parse_jsonb({"a": 1}) == {"a": 1}
+
+
+def test_parse_jsonb_list():
+    assert _parse_jsonb([1, 2]) == [1, 2]
+
+
+def test_parse_jsonb_string():
+    assert _parse_jsonb('{"x": 1}') == {"x": 1}
+
+
+def test_parse_jsonb_list_string():
+    assert _parse_jsonb('["a", "b"]') == ["a", "b"]
+
+
+def test_parse_jsonb_none():
+    assert _parse_jsonb(None) is None
+
+
+def test_parse_jsonb_invalid_string():
+    assert _parse_jsonb("not json") == "not json"
+
+
+# --- _row_to_dict ---
+
+class FakeRecord(dict):
+    """Mimics asyncpg.Record enough for _row_to_dict."""
+    def items(self):
+        return super().items()
+
+
+def test_row_to_dict_converts_datetime():
+    dt = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    row = FakeRecord({"created_at": dt, "name": "test"})
+    result = _row_to_dict(row)
+    assert result["created_at"] == dt.isoformat()
+    assert result["name"] == "test"
+
+
+def test_row_to_dict_passes_primitives():
+    row = FakeRecord({"count": 42, "active": True, "label": "ok", "val": None})
+    result = _row_to_dict(row)
+    assert result == {"count": 42, "active": True, "label": "ok", "val": None}
+
+
+# --- App structure ---
+
+def test_app_has_expected_routes():
+    paths = [route.path for route in app.routes]
+    assert "/health" in paths
+    assert "/api/companies" in paths
+    assert "/api/companies/{company_id}" in paths
+    assert "/api/documents" in paths
+    assert "/api/documents/{document_id}" in paths
+    assert "/api/trends" in paths
+    assert "/api/trends/{trend_id}" in paths
+    assert "/api/trends/{trend_id}/evidence" in paths
+    assert "/api/recommendations" in paths
+    assert "/api/recommendations/{recommendation_id}" in paths
+    assert "/api/recommendations/{recommendation_id}/evidence" in paths
+    assert "/api/orders" in paths
+    assert "/api/orders/{order_id}" in paths
+    assert "/api/positions" in paths
+    assert "/api/audit/{entity_type}/{entity_id}" in paths
+
+
+def test_app_has_admin_routes():
+    paths = [route.path for route in app.routes]
+    # Source health
+    assert "/api/admin/sources/health" in paths
+    assert "/api/admin/sources/{source_id}/runs" in paths
+    assert "/api/admin/sources/{source_id}/toggle" in paths
+    assert "/api/admin/sources/{source_id}/credibility" in paths
+    # Symbol configs
+    assert "/api/admin/companies/{company_id}/toggle" in paths
+    assert "/api/admin/companies/{company_id}/sector" in paths
+    assert "/api/admin/companies/coverage" in paths
+    # Trading mode
+    assert "/api/admin/trading/config" in paths
+    assert "/api/admin/trading/mode" in paths
+    assert "/api/admin/trading/approvals" in paths
+    assert "/api/admin/trading/approvals/{approval_id}" in paths
+    assert "/api/admin/trading/lockouts" in paths
+
+
+def test_app_has_ops_dashboard_routes():
+    paths = [route.path for route in app.routes]
+    assert "/api/ops/ingestion/throughput" in paths
+    assert "/api/ops/ingestion/summary" in paths
+    assert "/api/ops/model/failures" in paths
+    assert "/api/ops/model/performance" in paths
+    assert "/api/ops/pipeline/health" in paths
+    assert "/api/ops/sources/coverage-gaps" in paths
@@ -0,0 +1,283 @@
+"""Tests for deterministic recommendation eligibility logic."""
+from typing import Any
+
+from services.recommendation.eligibility import (
+    DEFAULT_ELIGIBILITY_CONFIG,
+    EligibilityConfig,
+    RejectionReason,
+    evaluate_eligibility,
+)
+from services.shared.schemas import (
+    ActionType,
+    RecommendationMode,
+    TrendDirection,
+    TrendSummary,
+    TrendWindow,
+)
+
+
+def _make_summary(**overrides: Any) -> TrendSummary:
+    """Build a TrendSummary with sensible defaults for testing."""
+    defaults = dict(
+        entity_type="company",
+        entity_id="AAPL",
+        window=TrendWindow.SEVEN_DAY,
+        trend_direction=TrendDirection.BULLISH,
+        trend_strength=0.5,
+        confidence=0.6,
+        top_supporting_evidence=["doc1", "doc2", "doc3"],
+        top_opposing_evidence=[],
+        dominant_catalysts=["earnings"],
+        material_risks=["regulatory scrutiny"],
+        contradiction_score=0.1,
+    )
+    defaults.update(overrides)
+    return TrendSummary(**defaults)
+
+
+# ---------------------------------------------------------------------------
+# Gate checks
+# ---------------------------------------------------------------------------
+
+
+def test_eligible_strong_bullish():
+    """A strong bullish trend with good confidence passes all gates."""
+    summary = _make_summary(
+        trend_strength=0.5, confidence=0.6, contradiction_score=0.1,
+    )
+    result = evaluate_eligibility(summary)
+    assert result.eligible is True
+    assert result.rejection_reasons == []
+    assert result.action == ActionType.BUY
+
+
+def test_rejected_low_confidence():
+    """Below min_confidence → rejected."""
+    summary = _make_summary(confidence=0.2)
+    result = evaluate_eligibility(summary)
+    assert result.eligible is False
+    assert RejectionReason.LOW_CONFIDENCE in result.rejection_reasons
+
+
+def test_rejected_low_strength():
+    """Below min_trend_strength → rejected."""
+    summary = _make_summary(trend_strength=0.05)
+    result = evaluate_eligibility(summary)
+    assert result.eligible is False
+    assert RejectionReason.LOW_TREND_STRENGTH in result.rejection_reasons
+
+
+def test_rejected_high_contradiction():
+    """Above max_contradiction_score → rejected."""
+    summary = _make_summary(contradiction_score=0.7)
+    result = evaluate_eligibility(summary)
+    assert result.eligible is False
+    assert RejectionReason.HIGH_CONTRADICTION in result.rejection_reasons
+
+
+def test_rejected_insufficient_evidence():
+    """Too few evidence documents → rejected."""
+    summary = _make_summary(
+        top_supporting_evidence=["doc1"],
+        top_opposing_evidence=[],
+    )
+    result = evaluate_eligibility(summary)
+    assert result.eligible is False
+    assert RejectionReason.INSUFFICIENT_EVIDENCE in result.rejection_reasons
+
+
+def test_rejected_neutral_direction():
+    """Neutral trend direction → rejected."""
+    summary = _make_summary(trend_direction=TrendDirection.NEUTRAL)
+    result = evaluate_eligibility(summary)
+    assert result.eligible is False
+    assert RejectionReason.NEUTRAL_DIRECTION in result.rejection_reasons
+
+
+def test_rejected_forces_informational_mode():
+    """Any rejection forces mode to informational (Req 7.4)."""
+    summary = _make_summary(confidence=0.2)
+    result = evaluate_eligibility(summary)
+    assert result.eligible is False
+    assert result.mode == RecommendationMode.INFORMATIONAL
+
+
+# ---------------------------------------------------------------------------
+# Action mapping
+# ---------------------------------------------------------------------------
+
+
+def test_action_buy_strong_bullish():
+    summary = _make_summary(
+        trend_direction=TrendDirection.BULLISH, trend_strength=0.4,
+    )
+    result = evaluate_eligibility(summary)
+    assert result.action == ActionType.BUY
+
+
+def test_action_sell_strong_bearish():
+    summary = _make_summary(
+        trend_direction=TrendDirection.BEARISH, trend_strength=0.4,
+    )
+    result = evaluate_eligibility(summary)
+    assert result.action == ActionType.SELL
+
+
+def test_action_hold_weak_bullish_decent_confidence():
+    """Weak bullish with decent confidence → HOLD."""
+    summary = _make_summary(
+        trend_direction=TrendDirection.BULLISH,
+        trend_strength=0.15,
+        confidence=0.55,
+    )
+    result = evaluate_eligibility(summary)
+    assert result.action == ActionType.HOLD
+
+
+def test_action_watch_weak_bullish_low_confidence():
+    """Weak bullish with low confidence → WATCH."""
+    summary = _make_summary(
+        trend_direction=TrendDirection.BULLISH,
+        trend_strength=0.15,
+        confidence=0.40,
+    )
+    result = evaluate_eligibility(summary)
+    assert result.action == ActionType.WATCH
+
+
+def test_action_watch_mixed():
+    summary = _make_summary(trend_direction=TrendDirection.MIXED)
+    result = evaluate_eligibility(summary)
+    assert result.action == ActionType.WATCH
+
+
+# ---------------------------------------------------------------------------
+# Mode escalation
+# ---------------------------------------------------------------------------
+
+
+def test_mode_informational_for_hold():
+    """HOLD actions are always informational."""
+    summary = _make_summary(
+        trend_direction=TrendDirection.BULLISH,
+        trend_strength=0.15,
+        confidence=0.55,
+    )
+    result = evaluate_eligibility(summary)
+    assert result.action == ActionType.HOLD
+    assert result.mode == RecommendationMode.INFORMATIONAL
+
+
+def test_mode_paper_eligible():
+    """BUY with confidence >= paper threshold → paper_eligible."""
+    summary = _make_summary(
+        trend_strength=0.4, confidence=0.55, contradiction_score=0.1,
+    )
+    result = evaluate_eligibility(summary)
+    assert result.action == ActionType.BUY
+    assert result.mode == RecommendationMode.PAPER_ELIGIBLE
+
+
+def test_mode_live_eligible():
+    """BUY with high confidence, low contradiction, enough evidence → live_eligible."""
+    summary = _make_summary(
+        trend_strength=0.5,
+        confidence=0.75,
+        contradiction_score=0.1,
+        top_supporting_evidence=["d1", "d2", "d3", "d4"],
+        top_opposing_evidence=["d5"],
+    )
+    result = evaluate_eligibility(summary)
+    assert result.action == ActionType.BUY
+    assert result.mode == RecommendationMode.LIVE_ELIGIBLE
+
+
+def test_mode_not_live_high_contradiction():
+    """High contradiction blocks live even with high confidence."""
+    summary = _make_summary(
+        trend_strength=0.5,
+        confidence=0.75,
+        contradiction_score=0.4,
+        top_supporting_evidence=["d1", "d2", "d3", "d4", "d5"],
+        top_opposing_evidence=[],
+    )
+    result = evaluate_eligibility(summary)
+    assert result.mode != RecommendationMode.LIVE_ELIGIBLE
+
+
+def test_mode_informational_low_confidence_buy():
+    """BUY with confidence below paper threshold → informational."""
+    summary = _make_summary(
+        trend_strength=0.4, confidence=0.40,
+    )
+    result = evaluate_eligibility(summary)
+    assert result.action == ActionType.BUY
+    assert result.mode == RecommendationMode.INFORMATIONAL
+
+
+# ---------------------------------------------------------------------------
+# Position sizing
+# ---------------------------------------------------------------------------
+
+
+def test_position_sizing_scales_with_confidence():
+    """Higher confidence → larger portfolio allocation."""
+    low = _make_summary(confidence=0.40, trend_strength=0.4)
+    high = _make_summary(confidence=0.80, trend_strength=0.4)
+    r_low = evaluate_eligibility(low)
+    r_high = evaluate_eligibility(high)
+    assert r_high.position_sizing.portfolio_pct > r_low.position_sizing.portfolio_pct
+
+
+def test_position_sizing_penalised_by_contradiction():
+    """Higher contradiction → smaller portfolio allocation."""
+    clean = _make_summary(contradiction_score=0.05, trend_strength=0.4)
+    messy = _make_summary(contradiction_score=0.50, trend_strength=0.4)
+    r_clean = evaluate_eligibility(clean)
+    r_messy = evaluate_eligibility(messy)
+    assert r_clean.position_sizing.portfolio_pct > r_messy.position_sizing.portfolio_pct
+
+
+def test_position_sizing_within_bounds():
+    """Sizing should always stay within configured bounds."""
+    cfg = DEFAULT_ELIGIBILITY_CONFIG
+    for conf in [0.35, 0.5, 0.7, 0.9]:
+        for contra in [0.0, 0.3, 0.55]:
+            summary = _make_summary(confidence=conf, contradiction_score=contra, trend_strength=0.4)
+            result = evaluate_eligibility(summary)
+            assert result.position_sizing.portfolio_pct >= cfg.base_portfolio_pct * 0.5
+            assert result.position_sizing.portfolio_pct <= cfg.max_portfolio_pct
+            assert result.position_sizing.max_loss_pct >= cfg.base_max_loss_pct * 0.5
+            assert result.position_sizing.max_loss_pct <= cfg.max_max_loss_pct
+
+
+# ---------------------------------------------------------------------------
+# Time horizon and invalidation
+# ---------------------------------------------------------------------------
+
+
+def test_time_horizon_mapped():
+    summary = _make_summary(window=TrendWindow.SEVEN_DAY)
+    result = evaluate_eligibility(summary)
+    assert result.time_horizon == "swing_1d_10d"
+
+
+def test_invalidation_conditions_present():
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    assert len(result.invalidation_conditions) > 0
+    assert any("AAPL" in c for c in result.invalidation_conditions)
+
+
+# ---------------------------------------------------------------------------
+# Custom config
+# ---------------------------------------------------------------------------
+
+
+def test_custom_config_stricter_gates():
+    """A stricter config rejects what the default would accept."""
+    strict = EligibilityConfig(min_confidence=0.80)
+    summary = _make_summary(confidence=0.60)
+    result = evaluate_eligibility(summary, config=strict)
+    assert result.eligible is False
+    assert RejectionReason.LOW_CONFIDENCE in result.rejection_reasons
@@ -0,0 +1,283 @@
+"""Tests for recommendation worker — generating recommendations from trend data.
+
+Tests the pure logic functions (no DB required). Async DB functions
+are covered by integration tests.
+"""
+from datetime import datetime, timezone
+
+from services.recommendation.eligibility import evaluate_eligibility
+from services.recommendation.worker import (
+    _extract_risk_classification,
+    build_recommendation,
+    build_thesis,
+    classify_risk,
+)
+from services.shared.schemas import (
+    ActionType,
+    RecommendationMode,
+    TrendDirection,
+    TrendSummary,
+    TrendWindow,
+)
+
+NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+
+def _make_summary(
+    ticker: str = "AAPL",
+    direction: TrendDirection = TrendDirection.BULLISH,
+    strength: float = 0.5,
+    confidence: float = 0.65,
+    contradiction: float = 0.1,
+    supporting: list[str] | None = None,
+    opposing: list[str] | None = None,
+    catalysts: list[str] | None = None,
+    risks: list[str] | None = None,
+    window: TrendWindow = TrendWindow.SEVEN_DAY,
+) -> TrendSummary:
+    return TrendSummary(
+        entity_type="company",
+        entity_id=ticker,
+        window=window,
+        trend_direction=direction,
+        trend_strength=strength,
+        confidence=confidence,
+        top_supporting_evidence=supporting or ["doc1", "doc2", "doc3"],
+        top_opposing_evidence=opposing or [],
+        dominant_catalysts=catalysts or ["earnings"],
+        material_risks=risks or ["regulatory scrutiny"],
+        contradiction_score=contradiction,
+        generated_at=NOW,
+    )
+
+
+# ---------------------------------------------------------------------------
+# build_thesis
+# ---------------------------------------------------------------------------
+
+
+def test_thesis_contains_ticker_and_direction():
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    thesis = build_thesis(summary, result)
+    assert "AAPL" in thesis
+    assert "bullish" in thesis
+
+
+def test_thesis_includes_catalysts():
+    summary = _make_summary(catalysts=["product", "m_and_a"])
+    result = evaluate_eligibility(summary)
+    thesis = build_thesis(summary, result)
+    assert "product" in thesis
+
+
+def test_thesis_includes_contradiction_note():
+    summary = _make_summary(contradiction=0.3)
+    result = evaluate_eligibility(summary)
+    thesis = build_thesis(summary, result)
+    assert "disagreement" in thesis
+
+
+def test_thesis_includes_risks():
+    summary = _make_summary(risks=["supply chain disruption"])
+    result = evaluate_eligibility(summary)
+    thesis = build_thesis(summary, result)
+    assert "supply chain disruption" in thesis
+
+
+def test_thesis_includes_evidence_counts():
+    summary = _make_summary(
+        supporting=["d1", "d2"],
+        opposing=["d3"],
+    )
+    result = evaluate_eligibility(summary)
+    thesis = build_thesis(summary, result)
+    assert "2 supporting" in thesis
+    assert "1 opposing" in thesis
+
+
+def test_thesis_includes_action():
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    thesis = build_thesis(summary, result)
+    assert "BUY" in thesis
+
+
+# ---------------------------------------------------------------------------
+# classify_risk
+# ---------------------------------------------------------------------------
+
+
+def test_risk_low_for_strong_signal():
+    summary = _make_summary(
+        confidence=0.8,
+        contradiction=0.05,
+        supporting=["d1", "d2", "d3", "d4", "d5"],
+    )
+    result = evaluate_eligibility(summary)
+    assert classify_risk(summary, result) == "low"
+
+
+def test_risk_high_for_weak_signal():
+    summary = _make_summary(
+        confidence=0.36,
+        contradiction=0.55,
+        supporting=["d1"],
+        opposing=[],
+    )
+    result = evaluate_eligibility(summary)
+    risk = classify_risk(summary, result)
+    assert risk in ("high", "very_high")
+
+
+def test_risk_moderate_for_mixed():
+    summary = _make_summary(
+        confidence=0.5,
+        contradiction=0.2,
+        supporting=["d1", "d2"],
+        opposing=["d3"],
+    )
+    result = evaluate_eligibility(summary)
+    assert classify_risk(summary, result) == "moderate"
+
+
+# ---------------------------------------------------------------------------
+# build_recommendation
+# ---------------------------------------------------------------------------
+
+
+def test_build_recommendation_basic():
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result, reference_time=NOW)
+
+    assert rec.ticker == "AAPL"
+    assert rec.action == ActionType.BUY
+    assert rec.confidence == summary.confidence
+    assert rec.time_horizon == "swing_1d_10d"
+    assert rec.generated_at == NOW
+    assert len(rec.evidence_refs) == 3  # 3 supporting + 0 opposing
+    assert rec.model_metadata.provider == "deterministic"
+
+
+def test_build_recommendation_includes_risk_in_thesis():
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result)
+    assert rec.thesis.startswith("[risk:")
+
+
+def test_build_recommendation_with_llm_thesis():
+    """When llm_thesis is provided, it replaces the deterministic body."""
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    llm_text = "Apple exhibits a bullish posture driven by strong earnings."
+    rec = build_recommendation(summary, result, llm_thesis=llm_text)
+    assert llm_text in rec.thesis
+    assert rec.thesis.startswith("[risk:")
+    assert rec.model_metadata.provider == "ollama"
+    assert rec.model_metadata.model_name == "thesis-rewrite"
+
+
+def test_build_recommendation_without_llm_thesis_uses_deterministic():
+    """When llm_thesis is None, the deterministic thesis is used."""
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result)
+    assert rec.model_metadata.provider == "deterministic"
+    assert rec.model_metadata.model_name == "eligibility-v1"
+
+
+def test_build_recommendation_combines_evidence():
+    summary = _make_summary(
+        supporting=["s1", "s2"],
+        opposing=["o1"],
+    )
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result)
+    assert rec.evidence_refs == ["s1", "s2", "o1"]
+
+
+def test_build_recommendation_position_sizing():
+    summary = _make_summary(confidence=0.7)
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result)
+    assert rec.position_sizing.portfolio_pct == result.position_sizing.portfolio_pct
+    assert rec.position_sizing.max_loss_pct == result.position_sizing.max_loss_pct
+
+
+def test_build_recommendation_invalidation_conditions():
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result)
+    assert len(rec.invalidation_conditions) > 0
+
+
+def test_build_recommendation_ineligible_is_informational():
+    """When eligibility fails, mode should be informational (Req 7.4)."""
+    summary = _make_summary(confidence=0.2)
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result)
+    assert rec.mode == RecommendationMode.INFORMATIONAL
+
+
+def test_build_recommendation_sell_action():
+    summary = _make_summary(direction=TrendDirection.BEARISH, strength=0.5)
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result)
+    assert rec.action == ActionType.SELL
+    assert "SELL" in rec.thesis
+
+
+# ---------------------------------------------------------------------------
+# _extract_risk_classification
+# ---------------------------------------------------------------------------
+
+
+def test_extract_risk_classification_from_thesis():
+    assert _extract_risk_classification("[risk:low] Some thesis text") == "low"
+    assert _extract_risk_classification("[risk:very_high] Bad signal") == "very_high"
+
+
+def test_extract_risk_classification_missing_prefix():
+    assert _extract_risk_classification("No risk prefix here") == "moderate"
+
+
+def test_extract_risk_classification_empty():
+    assert _extract_risk_classification("") == "moderate"
+
+
+# ---------------------------------------------------------------------------
+# build_recommendation stores full model metadata
+# ---------------------------------------------------------------------------
+
+
+def test_build_recommendation_model_metadata_deterministic():
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result, reference_time=NOW)
+    assert rec.model_metadata.provider == "deterministic"
+    assert rec.model_metadata.model_name == "eligibility-v1"
+    assert rec.model_metadata.schema_version == "1.0.0"
+
+
+def test_build_recommendation_model_metadata_llm():
+    summary = _make_summary()
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(
+        summary, result, reference_time=NOW,
+        llm_thesis="Rewritten thesis text.",
+    )
+    assert rec.model_metadata.provider == "ollama"
+    assert rec.model_metadata.model_name == "thesis-rewrite"
+    assert rec.model_metadata.prompt_version != ""
+
+
+def test_build_recommendation_risk_classification_in_thesis():
+    """The risk classification should be embedded in the thesis prefix."""
+    summary = _make_summary(confidence=0.8, contradiction=0.05,
+                            supporting=["d1", "d2", "d3", "d4", "d5"])
+    result = evaluate_eligibility(summary)
+    rec = build_recommendation(summary, result, reference_time=NOW)
+    risk = _extract_risk_classification(rec.thesis)
+    assert risk == classify_risk(summary, result)
@@ -0,0 +1,208 @@
+"""Replay dataset tests for deterministic extraction validation.
+
+Loads archived document fixtures and validates that their expected
+extraction outputs still pass the current schema and semantic checks.
+This catches schema regressions, prompt contract changes, and
+validation rule drift without requiring a live Ollama instance.
+
+Requirements: 5.1, 5.2, 5.3, 5.4, 5.5
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from services.extractor.replay import (
+    FIXTURES_DIR,
+    compare_extraction,
+    load_all_fixtures,
+    load_fixture,
+    validate_all_fixtures,
+    validate_fixture,
+)
+from services.extractor.schemas import (
+    ExtractionResult,
+    get_schema_version,
+    validate_extraction,
+)
+
+
+# ---------------------------------------------------------------------------
+# Fixture loading
+# ---------------------------------------------------------------------------
+
+FIXTURE_DIR = FIXTURES_DIR
+
+
+def _fixture_paths() -> list[Path]:
+    """Collect all .json fixture files."""
+    if not FIXTURE_DIR.is_dir():
+        return []
+    return sorted(FIXTURE_DIR.glob("*.json"))
+
+
+def test_fixtures_directory_exists():
+    """The replay fixtures directory exists and contains JSON files."""
+    assert FIXTURE_DIR.is_dir(), f"Missing fixtures dir: {FIXTURE_DIR}"
+    paths = _fixture_paths()
+    assert len(paths) >= 3, f"Expected at least 3 fixtures, found {len(paths)}"
+
+
+def test_load_all_fixtures():
+    """All fixture files load without errors."""
+    fixtures = load_all_fixtures()
+    assert len(fixtures) >= 3
+    for f in fixtures:
+        assert f.document_id
+        assert f.document_text
+        assert f.expected_extraction
+
+
+def test_fixture_ids_unique():
+    """Every fixture has a unique document_id."""
+    fixtures = load_all_fixtures()
+    ids = [f.document_id for f in fixtures]
+    assert len(ids) == len(set(ids)), f"Duplicate fixture IDs: {ids}"
+
+
+# ---------------------------------------------------------------------------
+# Schema validation — the core deterministic test
+# ---------------------------------------------------------------------------
+
+def test_all_expected_extractions_pass_schema():
+    """Every fixture's expected_extraction passes current schema validation.
+
+    This is the primary regression gate. If a fixture fails here, either
+    the fixture needs updating or the schema change is breaking.
+    """
+    results = validate_all_fixtures()
+    assert len(results) >= 3
+
+    failures = [r for r in results if not r.schema_valid]
+    if failures:
+        msgs = []
+        for f in failures:
+            errs = f.validation_report.errors if f.validation_report else [f.error or "unknown"]
+            msgs.append(f"  {f.fixture_id}: {errs}")
+        pytest.fail(
+            f"{len(failures)} fixture(s) failed schema validation:\n" + "\n".join(msgs)
+        )
+
+
+@pytest.mark.parametrize("fixture_path", _fixture_paths(), ids=lambda p: p.stem)
+def test_individual_fixture_schema_valid(fixture_path: Path):
+    """Each fixture individually passes schema and semantic validation."""
+    fixture = load_fixture(fixture_path)
+    result = validate_fixture(fixture)
+    assert result.schema_valid, (
+        f"Fixture {fixture.document_id} failed: "
+        f"{result.validation_report.errors if result.validation_report else result.error}"
+    )
+    assert result.schema_version == get_schema_version()
+
+
+# ---------------------------------------------------------------------------
+# Expected extraction structural checks
+# ---------------------------------------------------------------------------
+
+@pytest.mark.parametrize("fixture_path", _fixture_paths(), ids=lambda p: p.stem)
+def test_expected_extraction_roundtrips(fixture_path: Path):
+    """Expected extraction can be parsed into ExtractionResult and back."""
+    fixture = load_fixture(fixture_path)
+    parsed = fixture.expected_result
+    dumped = parsed.model_dump(mode="json")
+    reparsed = ExtractionResult.model_validate(dumped)
+    assert reparsed.summary == parsed.summary
+    assert len(reparsed.companies) == len(parsed.companies)
+
+
+def test_low_quality_fixture_has_empty_companies():
+    """The low-quality garbled fixture should have no companies."""
+    fixtures = load_all_fixtures()
+    low_q = [f for f in fixtures if "low-quality" in f.document_id]
+    assert len(low_q) == 1
+    fixture = low_q[0]
+    assert len(fixture.expected_result.companies) == 0
+    assert fixture.expected_result.confidence <= 0.3
+
+
+def test_multi_company_fixture_has_multiple_tickers():
+    """The multi-company fixture should reference multiple companies."""
+    fixtures = load_all_fixtures()
+    multi = [f for f in fixtures if "multi-company" in f.document_id]
+    assert len(multi) == 1
+    fixture = multi[0]
+    tickers = [c.ticker for c in fixture.expected_result.companies]
+    assert len(tickers) >= 3
+
+
+# ---------------------------------------------------------------------------
+# Evidence grounding checks
+# ---------------------------------------------------------------------------
+
+@pytest.mark.parametrize("fixture_path", _fixture_paths(), ids=lambda p: p.stem)
+def test_evidence_spans_grounded_in_document(fixture_path: Path):
+    """Evidence spans in expected extractions appear in the document text."""
+    fixture = load_fixture(fixture_path)
+    report = validate_extraction(
+        fixture.expected_extraction,
+        document_text=fixture.document_text,
+    )
+    grounding_warnings = [
+        w for w in report.warnings if "evidence_span_not_found" in w
+    ]
+    assert not grounding_warnings, (
+        f"Fixture {fixture.document_id} has ungrounded evidence: {grounding_warnings}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Comparison logic tests (using synthetic data, no Ollama needed)
+# ---------------------------------------------------------------------------
+
+def test_compare_extraction_perfect_match():
+    """Comparison reports match when actual equals expected."""
+    fixtures = load_all_fixtures()
+    fixture = fixtures[0]
+    actual = fixture.expected_result  # identical
+    result = compare_extraction(fixture, actual)
+    assert result.companies_match
+    assert result.sentiment_match
+    assert result.catalyst_match
+    assert result.actual_schema_valid
+
+
+def test_compare_extraction_company_mismatch():
+    """Comparison detects when actual has different companies."""
+    fixtures = load_all_fixtures()
+    # Pick a fixture with companies
+    fixture = [f for f in fixtures if f.expected_result.companies][0]
+    # Build an actual result with no companies
+    actual = ExtractionResult(
+        summary="Different",
+        companies=[],
+        macro_themes=[],
+        novelty_score=0.5,
+        confidence=0.5,
+        extraction_warnings=[],
+    )
+    result = compare_extraction(fixture, actual)
+    assert not result.companies_match
+    assert any("company_mismatch" in w for w in result.warnings)
+
+
+def test_compare_extraction_sentiment_mismatch():
+    """Comparison detects sentiment drift."""
+    fixtures = load_all_fixtures()
+    fixture = [f for f in fixtures if f.expected_result.companies][0]
+    # Clone expected but flip sentiment
+    actual_data = fixture.expected_extraction.copy()
+    actual_data = {**actual_data}
+    companies = [dict(c) for c in actual_data["companies"]]
+    companies[0]["sentiment"] = "negative" if companies[0]["sentiment"] != "negative" else "positive"
+    actual_data["companies"] = companies
+    actual = ExtractionResult.model_validate(actual_data)
+    result = compare_extraction(fixture, actual)
+    assert result.companies_match  # same tickers
+    assert not result.sentiment_match  # different sentiment
@@ -0,0 +1,214 @@
+"""Tests for the resilient adapter wrapper.
+
+Validates retry logic, backoff computation, rate-limit coordination,
+and retryable error classification.
+"""
+from datetime import datetime, timezone
+from typing import Any
+
+import pytest
+
+from services.adapters.base import AdapterResult, BaseAdapter
+from services.adapters.resilient import (
+    ResilientAdapter,
+    RetryConfig,
+    compute_delay,
+)
+
+
+# --- Helpers ---
+
+
+def _make_result(
+    ok: bool = True,
+    error: str | None = None,
+    http_status: int | None = None,
+    metadata: dict[str, Any] | None = None,
+) -> AdapterResult:
+    return AdapterResult(
+        source_type="market_api",
+        ticker="AAPL",
+        items=[{"price": 150}] if ok else [],
+        raw_payload=b'{"ok":true}' if ok else b"",
+        content_hash="abc" if ok else "",
+        fetched_at=datetime.now(timezone.utc),
+        error=error,
+        http_status=http_status,
+        metadata=metadata or {},
+    )
+
+
+class FakeAdapter(BaseAdapter):
+    """Adapter that returns a sequence of pre-configured results."""
+
+    def __init__(self, results: list[AdapterResult]) -> None:
+        self._results = list(results)
+        self._call_count = 0
+
+    @property
+    def call_count(self) -> int:
+        return self._call_count
+
+    async def fetch(self, ticker: str, config: dict[str, Any]) -> AdapterResult:
+        idx = min(self._call_count, len(self._results) - 1)
+        self._call_count += 1
+        return self._results[idx]
+
+    def source_type(self) -> str:
+        return "market_api"
+
+
+# --- Tests ---
+
+
+class TestComputeDelay:
+    def test_first_attempt_is_base_delay_plus_jitter(self):
+        cfg = RetryConfig(base_delay=1.0, max_delay=60.0, jitter_factor=0.0)
+        delay = compute_delay(0, cfg)
+        assert delay == pytest.approx(1.0, abs=0.01)
+
+    def test_exponential_growth(self):
+        cfg = RetryConfig(base_delay=1.0, max_delay=60.0, jitter_factor=0.0)
+        d0 = compute_delay(0, cfg)
+        d1 = compute_delay(1, cfg)
+        d2 = compute_delay(2, cfg)
+        assert d1 == pytest.approx(2.0, abs=0.01)
+        assert d2 == pytest.approx(4.0, abs=0.01)
+        assert d2 > d1 > d0
+
+    def test_capped_at_max_delay(self):
+        cfg = RetryConfig(base_delay=1.0, max_delay=10.0, jitter_factor=0.0)
+        delay = compute_delay(10, cfg)
+        assert delay <= 10.0
+
+    def test_jitter_adds_randomness(self):
+        cfg = RetryConfig(base_delay=1.0, max_delay=60.0, jitter_factor=1.0)
+        delays = {compute_delay(0, cfg) for _ in range(20)}
+        # With jitter_factor=1.0, we should see some variation
+        assert len(delays) > 1
+
+
+class TestRetryableClassification:
+    def setup_method(self) -> None:
+        adapter = FakeAdapter([_make_result()])
+        self.resilient = ResilientAdapter(adapter)
+
+    def test_ok_result_not_retryable(self):
+        result = _make_result(ok=True)
+        assert self.resilient._is_retryable(result) is False
+
+    def test_429_is_retryable(self):
+        result = _make_result(ok=False, error="rate limited", http_status=429)
+        assert self.resilient._is_retryable(result) is True
+
+    def test_500_is_retryable(self):
+        result = _make_result(ok=False, error="server error", http_status=500)
+        assert self.resilient._is_retryable(result) is True
+
+    def test_503_is_retryable(self):
+        result = _make_result(ok=False, error="unavailable", http_status=503)
+        assert self.resilient._is_retryable(result) is True
+
+    def test_400_not_retryable(self):
+        result = _make_result(ok=False, error="bad request", http_status=400)
+        assert self.resilient._is_retryable(result) is False
+
+    def test_401_not_retryable(self):
+        result = _make_result(ok=False, error="unauthorized", http_status=401)
+        assert self.resilient._is_retryable(result) is False
+
+    def test_timeout_error_retryable(self):
+        result = _make_result(ok=False, error="timeout: read timed out")
+        assert self.resilient._is_retryable(result) is True
+
+    def test_connection_error_retryable(self):
+        result = _make_result(ok=False, error="Connection refused")
+        assert self.resilient._is_retryable(result) is True
+
+    def test_generic_error_not_retryable(self):
+        result = _make_result(ok=False, error="invalid JSON response")
+        assert self.resilient._is_retryable(result) is False
+
+
+
+@pytest.mark.asyncio
+class TestResilientFetch:
+    async def test_success_on_first_try(self):
+        adapter = FakeAdapter([_make_result(ok=True)])
+        resilient = ResilientAdapter(
+            adapter, retry_config=RetryConfig(max_retries=2, base_delay=0.01)
+        )
+        result = await resilient.fetch("AAPL", {})
+        assert result.ok
+        assert adapter.call_count == 1
+        assert result.metadata["retry_stats"]["attempts"] == 1
+
+    async def test_retries_on_retryable_then_succeeds(self):
+        results = [
+            _make_result(ok=False, error="server error", http_status=500),
+            _make_result(ok=False, error="server error", http_status=500),
+            _make_result(ok=True),
+        ]
+        adapter = FakeAdapter(results)
+        resilient = ResilientAdapter(
+            adapter, retry_config=RetryConfig(max_retries=3, base_delay=0.01)
+        )
+        result = await resilient.fetch("AAPL", {})
+        assert result.ok
+        assert adapter.call_count == 3
+        assert result.metadata["retry_stats"]["attempts"] == 3
+
+    async def test_exhausts_retries(self):
+        fail = _make_result(ok=False, error="server error", http_status=500)
+        adapter = FakeAdapter([fail, fail, fail, fail])
+        resilient = ResilientAdapter(
+            adapter, retry_config=RetryConfig(max_retries=2, base_delay=0.01)
+        )
+        result = await resilient.fetch("AAPL", {})
+        assert not result.ok
+        assert adapter.call_count == 3  # initial + 2 retries
+        assert result.metadata["retry_stats"]["exhausted"] is True
+
+    async def test_no_retry_on_non_retryable(self):
+        fail = _make_result(ok=False, error="bad request", http_status=400)
+        adapter = FakeAdapter([fail])
+        resilient = ResilientAdapter(
+            adapter, retry_config=RetryConfig(max_retries=3, base_delay=0.01)
+        )
+        result = await resilient.fetch("AAPL", {})
+        assert not result.ok
+        assert adapter.call_count == 1
+
+    async def test_retry_after_respected_for_429(self):
+        fail_429 = _make_result(
+            ok=False, error="rate limited", http_status=429,
+            metadata={"retry_after": 0.05},
+        )
+        results = [fail_429, _make_result(ok=True)]
+        adapter = FakeAdapter(results)
+        resilient = ResilientAdapter(
+            adapter, retry_config=RetryConfig(max_retries=2, base_delay=0.01)
+        )
+        result = await resilient.fetch("AAPL", {})
+        assert result.ok
+        assert adapter.call_count == 2
+        # Should have waited at least the retry_after amount
+        assert result.metadata["retry_stats"]["total_delay"] >= 0.04
+
+    async def test_source_type_passthrough(self):
+        adapter = FakeAdapter([_make_result()])
+        resilient = ResilientAdapter(adapter)
+        assert resilient.source_type() == "market_api"
+
+    async def test_default_config_for_known_source_type(self):
+        adapter = FakeAdapter([_make_result()])
+        resilient = ResilientAdapter(adapter)
+        # market_api default is 30 rate limit max
+        assert resilient.config.rate_limit_max == 30
+
+    async def test_custom_config_overrides_default(self):
+        adapter = FakeAdapter([_make_result()])
+        custom = RetryConfig(max_retries=5, rate_limit_max=100)
+        resilient = ResilientAdapter(adapter, retry_config=custom)
+        assert resilient.config.max_retries == 5
+        assert resilient.config.rate_limit_max == 100
@@ -0,0 +1,172 @@
+"""Tests for data retention and lifecycle controls.
+
+Validates retention policy resolution, expired object detection,
+cleanup logic, and DB record cleanup.
+
+Requirements: N3
+"""
+from datetime import datetime, timedelta, timezone
+from unittest.mock import MagicMock
+
+from services.shared.config import RetentionConfig
+from services.shared.retention import (
+    RetentionPolicy,
+    cleanup_bucket,
+    cutoff_date,
+    default_retention_days,
+    delete_expired_objects,
+    list_expired_objects,
+    merge_policies,
+    resolve_policies,
+)
+
+
+class TestDefaultRetentionDays:
+    def test_known_buckets(self):
+        config = RetentionConfig()
+        assert default_retention_days("stonks-raw-market", config) == 90
+        assert default_retention_days("stonks-raw-news", config) == 180
+        assert default_retention_days("stonks-raw-filings", config) == 365
+        assert default_retention_days("stonks-lakehouse", config) == 730
+        assert default_retention_days("stonks-audit", config) == 730
+
+    def test_unknown_bucket_defaults_to_365(self):
+        config = RetentionConfig()
+        assert default_retention_days("unknown-bucket", config) == 365
+
+    def test_custom_config_values(self):
+        config = RetentionConfig(raw_market_days=30, audit_days=1000)
+        assert default_retention_days("stonks-raw-market", config) == 30
+        assert default_retention_days("stonks-audit", config) == 1000
+
+
+class TestResolvePolicies:
+    def test_returns_policy_per_bucket(self):
+        config = RetentionConfig()
+        policies = resolve_policies(config)
+        bucket_names = [p.bucket_name for p in policies]
+        assert "stonks-raw-market" in bucket_names
+        assert "stonks-lakehouse" in bucket_names
+        assert len(policies) == 8
+
+    def test_uses_config_values(self):
+        config = RetentionConfig(raw_news_days=60)
+        policies = resolve_policies(config)
+        news_policy = next(p for p in policies if p.bucket_name == "stonks-raw-news")
+        assert news_policy.retention_days == 60
+
+
+class TestMergePolicies:
+    def test_db_overrides_config(self):
+        config_policies = [
+            RetentionPolicy("stonks-raw-market", 90),
+            RetentionPolicy("stonks-raw-news", 180),
+        ]
+        db_policies = {
+            "stonks-raw-market": RetentionPolicy("stonks-raw-market", 30),
+        }
+        merged = merge_policies(config_policies, db_policies)
+        market = next(p for p in merged if p.bucket_name == "stonks-raw-market")
+        news = next(p for p in merged if p.bucket_name == "stonks-raw-news")
+        assert market.retention_days == 30  # DB override
+        assert news.retention_days == 180  # config default
+
+    def test_empty_db_uses_all_config(self):
+        config_policies = [RetentionPolicy("stonks-audit", 730)]
+        merged = merge_policies(config_policies, {})
+        assert len(merged) == 1
+        assert merged[0].retention_days == 730
+
+
+class TestCutoffDate:
+    def test_calculates_cutoff(self):
+        now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+        cutoff = cutoff_date(90, now)
+        expected = now - timedelta(days=90)
+        assert cutoff == expected
+
+    def test_uses_current_time_when_none(self):
+        cutoff = cutoff_date(30)
+        assert cutoff < datetime.now(timezone.utc)
+
+
+class TestListExpiredObjects:
+    def test_finds_expired_objects(self):
+        client = MagicMock()
+        now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+        old_obj = MagicMock()
+        old_obj.object_name = "old/file.json"
+        old_obj.last_modified = now - timedelta(days=100)
+
+        new_obj = MagicMock()
+        new_obj.object_name = "new/file.json"
+        new_obj.last_modified = now - timedelta(days=10)
+
+        client.list_objects.return_value = [old_obj, new_obj]
+
+        expired = list_expired_objects(client, "stonks-raw-market", 90, now=now)
+        assert expired == ["old/file.json"]
+
+    def test_respects_batch_size(self):
+        client = MagicMock()
+        now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+        objects = []
+        for i in range(10):
+            obj = MagicMock()
+            obj.object_name = f"file_{i}.json"
+            obj.last_modified = now - timedelta(days=200)
+            objects.append(obj)
+
+        client.list_objects.return_value = objects
+        expired = list_expired_objects(client, "test-bucket", 90, batch_size=3, now=now)
+        assert len(expired) == 3
+
+    def test_handles_list_error(self):
+        client = MagicMock()
+        client.list_objects.side_effect = Exception("connection error")
+        expired = list_expired_objects(client, "test-bucket", 90)
+        assert expired == []
+
+
+class TestDeleteExpiredObjects:
+    def test_deletes_all(self):
+        client = MagicMock()
+        count = delete_expired_objects(client, "test-bucket", ["a.json", "b.json"])
+        assert count == 2
+        assert client.remove_object.call_count == 2
+
+    def test_handles_partial_failure(self):
+        client = MagicMock()
+        client.remove_object.side_effect = [None, Exception("fail"), None]
+        count = delete_expired_objects(client, "test-bucket", ["a", "b", "c"])
+        assert count == 2
+
+
+class TestCleanupBucket:
+    def test_full_cleanup_flow(self):
+        client = MagicMock()
+        now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+        old_obj = MagicMock()
+        old_obj.object_name = "expired.json"
+        old_obj.last_modified = now - timedelta(days=200)
+        client.list_objects.return_value = [old_obj]
+
+        policy = RetentionPolicy("stonks-raw-market", 90)
+        result = cleanup_bucket(client, policy, now=now)
+
+        assert result.bucket_name == "stonks-raw-market"
+        assert result.objects_scanned == 1
+        assert result.objects_deleted == 1
+
+    def test_no_expired_objects(self):
+        client = MagicMock()
+        client.list_objects.return_value = []
+
+        policy = RetentionPolicy("stonks-raw-news", 180)
+        result = cleanup_bucket(client, policy)
+
+        assert result.objects_scanned == 0
+        assert result.objects_deleted == 0
@@ -0,0 +1,413 @@
+"""Tests for the portfolio and account risk configuration model and enforcement."""
+
+from datetime import datetime, timedelta, timezone
+
+from services.risk.engine import (
+    AccountRiskState,
+    DailyLossLimits,
+    DEFAULT_RISK_CONFIG,
+    NewsShockLockout,
+    OperatorApproval,
+    PortfolioRiskConfig,
+    PositionLimits,
+    ProposedOrder,
+    RiskCheckDetail,
+    RiskCheckResult,
+    RiskEvaluation,
+    SectorExposureLimits,
+    SymbolCooldown,
+    TradingMode,
+    evaluate_order,
+)
+
+
+def test_default_risk_config_is_paper_mode():
+    """Default config should be paper trading mode."""
+    cfg = PortfolioRiskConfig()
+    assert cfg.trading_mode == TradingMode.PAPER
+    assert cfg.active is True
+
+
+def test_position_limits_defaults():
+    limits = PositionLimits()
+    assert limits.max_position_pct == 0.05
+    assert limits.max_position_value == 10_000.0
+    assert limits.max_shares_per_order == 1000.0
+
+
+def test_sector_exposure_defaults():
+    limits = SectorExposureLimits()
+    assert limits.max_sector_pct == 0.25
+    assert limits.max_sectors == 10
+
+
+def test_daily_loss_defaults():
+    limits = DailyLossLimits()
+    assert limits.max_daily_loss_pct == 0.02
+    assert limits.max_daily_loss_value == 1_000.0
+    assert limits.max_daily_trades == 20
+
+
+def test_news_shock_lockout_defaults():
+    lockout = NewsShockLockout()
+    assert lockout.enabled is True
+    assert lockout.lockout_minutes == 60
+    assert lockout.impact_threshold == 0.80
+    assert "earnings" in lockout.catalyst_types
+
+
+def test_operator_approval_defaults():
+    approval = OperatorApproval()
+    assert approval.require_approval_for_live is True
+    assert approval.auto_approve_paper is True
+    assert approval.approval_timeout_minutes == 30
+
+
+def test_symbol_cooldown_defaults():
+    cooldown = SymbolCooldown()
+    assert cooldown.cooldown_minutes == 15
+    assert cooldown.max_open_positions_per_symbol == 1
+
+
+def test_portfolio_config_roundtrip_json():
+    """Config should survive serialization to JSON and back."""
+    cfg = PortfolioRiskConfig(
+        name="test-profile",
+        trading_mode=TradingMode.LIVE,
+        position_limits=PositionLimits(max_position_pct=0.10),
+        daily_loss=DailyLossLimits(max_daily_trades=5),
+    )
+    data = cfg.to_db_json()
+    restored = PortfolioRiskConfig.from_db_json(data)
+
+    assert restored.name == "test-profile"
+    assert restored.trading_mode == TradingMode.LIVE
+    assert restored.position_limits.max_position_pct == 0.10
+    assert restored.daily_loss.max_daily_trades == 5
+    # Nested defaults should survive
+    assert restored.sector_exposure.max_sector_pct == 0.25
+    assert restored.news_shock.enabled is True
+
+
+def test_account_risk_state_defaults():
+    state = AccountRiskState(account_id="test-acct")
+    assert state.portfolio_value == 0.0
+    assert state.daily_trade_count == 0
+    assert state.positions_by_symbol == {}
+    assert state.positions_by_sector == {}
+    assert state.locked_symbols == {}
+
+
+def test_account_risk_state_with_positions():
+    state = AccountRiskState(
+        account_id="acct-1",
+        portfolio_value=100_000.0,
+        cash=50_000.0,
+        daily_pnl=-500.0,
+        daily_trade_count=3,
+        positions_by_symbol={"AAPL": 10_000.0, "MSFT": 5_000.0},
+        positions_by_sector={"Technology": 15_000.0},
+    )
+    assert state.positions_by_symbol["AAPL"] == 10_000.0
+    assert state.positions_by_sector["Technology"] == 15_000.0
+    assert state.daily_pnl == -500.0
+
+
+def test_risk_evaluation_passed_property():
+    """passed should be True only when eligible and no rejections."""
+    passing = RiskEvaluation(
+        ticker="AAPL",
+        eligible=True,
+        allowed_mode=TradingMode.PAPER,
+        checks=[
+            RiskCheckDetail(check_name="position_size", result=RiskCheckResult.PASS),
+        ],
+    )
+    assert passing.passed is True
+
+    failing = RiskEvaluation(
+        ticker="AAPL",
+        eligible=False,
+        allowed_mode=TradingMode.DISABLED,
+        rejection_reasons=["max_daily_loss_exceeded"],
+        checks=[
+            RiskCheckDetail(
+                check_name="daily_loss",
+                result=RiskCheckResult.FAIL,
+                message="Daily loss limit exceeded",
+                threshold=0.02,
+                actual=0.03,
+            ),
+        ],
+    )
+    assert failing.passed is False
+
+
+def test_risk_evaluation_captures_config_snapshot():
+    """Evaluation should be able to store the config used for reproducibility."""
+    cfg = PortfolioRiskConfig(name="snapshot-test")
+    state = AccountRiskState(account_id="acct-1", portfolio_value=50_000.0)
+
+    evaluation = RiskEvaluation(
+        ticker="TSLA",
+        eligible=True,
+        allowed_mode=TradingMode.PAPER,
+        config_snapshot=cfg,
+        state_snapshot=state,
+    )
+    assert evaluation.config_snapshot is not None
+    assert evaluation.config_snapshot.name == "snapshot-test"
+    assert evaluation.state_snapshot is not None
+    assert evaluation.state_snapshot.portfolio_value == 50_000.0
+
+
+def test_trading_mode_disabled():
+    """DISABLED mode should be available for halting all trading."""
+    cfg = PortfolioRiskConfig(trading_mode=TradingMode.DISABLED)
+    assert cfg.trading_mode == TradingMode.DISABLED
+
+
+def test_default_risk_config_singleton():
+    """Module-level default should be a valid paper config."""
+    assert DEFAULT_RISK_CONFIG.trading_mode == TradingMode.PAPER
+    assert DEFAULT_RISK_CONFIG.name == "default"
+
+
+# ===================================================================
+# Enforcement logic tests (hard blocks)
+# ===================================================================
+
+
+def _make_config(**overrides) -> PortfolioRiskConfig:
+    return PortfolioRiskConfig(
+        trading_mode=overrides.get("trading_mode", TradingMode.PAPER),
+        position_limits=overrides.get("position_limits", PositionLimits()),
+        sector_exposure=overrides.get("sector_exposure", SectorExposureLimits()),
+        daily_loss=overrides.get("daily_loss", DailyLossLimits()),
+        news_shock=overrides.get("news_shock", NewsShockLockout()),
+        symbol_cooldown=overrides.get("symbol_cooldown", SymbolCooldown()),
+    )
+
+
+def _make_state(**overrides) -> AccountRiskState:
+    return AccountRiskState(
+        account_id=overrides.get("account_id", "test-acct"),
+        portfolio_value=overrides.get("portfolio_value", 100_000.0),
+        cash=overrides.get("cash", 50_000.0),
+        daily_pnl=overrides.get("daily_pnl", 0.0),
+        daily_trade_count=overrides.get("daily_trade_count", 0),
+        positions_by_symbol=overrides.get("positions_by_symbol", {}),
+        positions_by_sector=overrides.get("positions_by_sector", {}),
+        last_trade_times=overrides.get("last_trade_times", {}),
+        locked_symbols=overrides.get("locked_symbols", {}),
+    )
+
+
+# --- Trading mode gate ---
+
+
+def test_evaluate_order_disabled_mode_blocks():
+    """Orders are rejected when trading mode is DISABLED."""
+    config = _make_config(trading_mode=TradingMode.DISABLED)
+    order = ProposedOrder(ticker="AAPL", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, _make_state())
+    assert result.passed is False
+    assert any("disabled" in r.lower() for r in result.rejection_reasons)
+
+
+def test_evaluate_order_paper_mode_passes():
+    """A clean order in paper mode should pass all checks."""
+    config = _make_config()
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, _make_state())
+    assert result.passed is True
+    assert result.allowed_mode == TradingMode.PAPER
+
+
+# --- Max position size ---
+
+
+def test_position_value_exceeded():
+    config = _make_config(position_limits=PositionLimits(max_position_value=5000))
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=6000, quantity=10)
+    result = evaluate_order(order, config, _make_state())
+    assert result.passed is False
+    assert any(c.check_name == "max_position_value" and c.result == RiskCheckResult.FAIL for c in result.checks)
+
+
+def test_position_value_includes_existing():
+    """Existing position value is added to the new order value."""
+    config = _make_config(position_limits=PositionLimits(max_position_value=5000))
+    state = _make_state(positions_by_symbol={"AAPL": 3000.0})
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=2500, quantity=5)
+    result = evaluate_order(order, config, state)
+    assert result.passed is False
+    fail_check = next(c for c in result.checks if c.check_name == "max_position_value")
+    assert fail_check.actual == 5500.0
+
+
+def test_position_pct_exceeded():
+    config = _make_config(position_limits=PositionLimits(max_position_pct=0.05))
+    state = _make_state(portfolio_value=100_000)
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=6000, quantity=10)
+    result = evaluate_order(order, config, state)
+    assert any(c.check_name == "max_position_pct" and c.result == RiskCheckResult.FAIL for c in result.checks)
+
+
+def test_max_shares_exceeded():
+    config = _make_config(position_limits=PositionLimits(max_shares_per_order=100))
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=200)
+    result = evaluate_order(order, config, _make_state())
+    assert any(c.check_name == "max_shares_per_order" and c.result == RiskCheckResult.FAIL for c in result.checks)
+
+
+# --- Sector exposure ---
+
+
+def test_sector_exposure_exceeded():
+    config = _make_config(sector_exposure=SectorExposureLimits(max_sector_pct=0.25))
+    state = _make_state(
+        portfolio_value=100_000,
+        positions_by_sector={"Technology": 20_000.0},
+    )
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=6000, quantity=10)
+    result = evaluate_order(order, config, state)
+    assert any(c.check_name == "sector_exposure" and c.result == RiskCheckResult.FAIL for c in result.checks)
+
+
+def test_sector_exposure_no_sector_warns():
+    """Missing sector on order produces a warning, not a failure."""
+    config = _make_config()
+    order = ProposedOrder(ticker="AAPL", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, _make_state())
+    sector_check = next(c for c in result.checks if c.check_name == "sector_exposure")
+    assert sector_check.result == RiskCheckResult.WARN
+
+
+# --- Daily loss limits ---
+
+
+def test_daily_loss_pct_exceeded():
+    config = _make_config(daily_loss=DailyLossLimits(max_daily_loss_pct=0.02))
+    state = _make_state(portfolio_value=100_000, daily_pnl=-2500)
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, state)
+    assert any(c.check_name == "daily_loss_pct" and c.result == RiskCheckResult.FAIL for c in result.checks)
+
+
+def test_daily_loss_value_exceeded():
+    config = _make_config(daily_loss=DailyLossLimits(max_daily_loss_value=500))
+    state = _make_state(daily_pnl=-600)
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, state)
+    assert any(c.check_name == "daily_loss_value" and c.result == RiskCheckResult.FAIL for c in result.checks)
+
+
+def test_daily_trade_count_exceeded():
+    config = _make_config(daily_loss=DailyLossLimits(max_daily_trades=5))
+    state = _make_state(daily_trade_count=5)
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, state)
+    assert any(c.check_name == "daily_trade_count" and c.result == RiskCheckResult.FAIL for c in result.checks)
+
+
+def test_positive_pnl_does_not_trigger_loss_limit():
+    """Positive P&L should not trigger daily loss checks."""
+    config = _make_config(daily_loss=DailyLossLimits(max_daily_loss_pct=0.02))
+    state = _make_state(portfolio_value=100_000, daily_pnl=5000)
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, state)
+    loss_checks = [c for c in result.checks if c.check_name.startswith("daily_loss")]
+    assert all(c.result == RiskCheckResult.PASS for c in loss_checks)
+
+
+# --- News-shock lockout ---
+
+
+def test_news_shock_lockout_blocks():
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    lockout_expiry = now + timedelta(minutes=30)
+    state = _make_state(locked_symbols={"AAPL": lockout_expiry})
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, _make_config(), state, now=now)
+    assert any(c.check_name == "news_shock_lockout" and c.result == RiskCheckResult.FAIL for c in result.checks)
+
+
+def test_news_shock_lockout_expired_passes():
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    lockout_expiry = now - timedelta(minutes=5)  # already expired
+    state = _make_state(locked_symbols={"AAPL": lockout_expiry})
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, _make_config(), state, now=now)
+    lockout_check = next(c for c in result.checks if c.check_name == "news_shock_lockout")
+    assert lockout_check.result == RiskCheckResult.PASS
+
+
+def test_news_shock_lockout_disabled_passes():
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    lockout_expiry = now + timedelta(minutes=30)
+    config = _make_config(news_shock=NewsShockLockout(enabled=False))
+    state = _make_state(locked_symbols={"AAPL": lockout_expiry})
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, state, now=now)
+    lockout_check = next(c for c in result.checks if c.check_name == "news_shock_lockout")
+    assert lockout_check.result == RiskCheckResult.PASS
+
+
+# --- Symbol cooldown ---
+
+
+def test_symbol_cooldown_blocks():
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    last_trade = now - timedelta(minutes=5)  # 5 min ago, default cooldown is 15
+    state = _make_state(last_trade_times={"AAPL": last_trade})
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, _make_config(), state, now=now)
+    assert any(c.check_name == "symbol_cooldown" and c.result == RiskCheckResult.FAIL for c in result.checks)
+
+
+def test_symbol_cooldown_expired_passes():
+    now = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+    last_trade = now - timedelta(minutes=20)  # 20 min ago, cooldown is 15
+    state = _make_state(last_trade_times={"AAPL": last_trade})
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, _make_config(), state, now=now)
+    cooldown_check = next(c for c in result.checks if c.check_name == "symbol_cooldown")
+    assert cooldown_check.result == RiskCheckResult.PASS
+
+
+# --- Combined scenarios ---
+
+
+def test_multiple_failures_all_captured():
+    """When multiple checks fail, all rejection reasons are captured."""
+    config = _make_config(
+        position_limits=PositionLimits(max_position_value=500),
+        daily_loss=DailyLossLimits(max_daily_loss_value=100),
+    )
+    state = _make_state(daily_pnl=-200)
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, state)
+    assert result.passed is False
+    assert len(result.rejection_reasons) >= 2
+
+
+def test_evaluation_captures_snapshots():
+    """Config and state snapshots are stored for reproducibility."""
+    config = _make_config()
+    state = _make_state(portfolio_value=75_000)
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, state)
+    assert result.config_snapshot is not None
+    assert result.state_snapshot is not None
+    assert result.state_snapshot.portfolio_value == 75_000
+
+
+def test_fail_closed_no_state():
+    """With zero portfolio value, position pct check should fail-closed for non-zero orders."""
+    config = _make_config()
+    state = _make_state(portfolio_value=0.0)
+    order = ProposedOrder(ticker="AAPL", sector="Technology", estimated_value=1000, quantity=10)
+    result = evaluate_order(order, config, state)
+    # position_pct = 1.0 when portfolio is 0 and order value > 0 → exceeds 0.05
+    assert any(c.check_name == "max_position_pct" and c.result == RiskCheckResult.FAIL for c in result.checks)
@@ -0,0 +1,173 @@
+"""Tests for sector and market rollup aggregation.
+
+Tests the pure rollup logic (no DB required).
+
+Requirements: 6.3, 6.4, 6.5
+"""
+from datetime import datetime, timezone
+
+from services.aggregation.rollups import (
+    CompanyTrendRow,
+    rollup_trends,
+    _build_rollup_disagreement,
+    _derive_rollup_direction,
+)
+from services.shared.schemas import TrendDirection, TrendWindow
+
+NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+
+def _make_trend(
+    ticker: str = "AAPL",
+    sector: str = "Technology",
+    window: str = "7d",
+    direction: str = "bullish",
+    strength: float = 0.6,
+    confidence: float = 0.8,
+    contradiction: float = 0.1,
+    catalysts: list[str] | None = None,
+    risks: list[str] | None = None,
+    supporting: list[str] | None = None,
+    opposing: list[str] | None = None,
+) -> CompanyTrendRow:
+    return CompanyTrendRow(
+        entity_id=ticker,
+        sector=sector,
+        window=window,
+        trend_direction=direction,
+        trend_strength=strength,
+        confidence=confidence,
+        contradiction_score=contradiction,
+        dominant_catalysts=catalysts or ["earnings"],
+        material_risks=risks or [],
+        top_supporting_evidence=supporting or ["doc-1"],
+        top_opposing_evidence=opposing or [],
+    )
+
+
+# ---------------------------------------------------------------------------
+# rollup_trends
+# ---------------------------------------------------------------------------
+
+
+def test_rollup_empty():
+    summary = rollup_trends([], "sector", "Technology", "7d", NOW)
+    assert summary.entity_type == "sector"
+    assert summary.entity_id == "Technology"
+    assert summary.trend_direction == TrendDirection.NEUTRAL
+    assert summary.trend_strength == 0.0
+    assert summary.confidence == 0.0
+
+
+def test_rollup_single_bullish():
+    trends = [_make_trend("AAPL", direction="bullish", strength=0.7, confidence=0.9)]
+    summary = rollup_trends(trends, "sector", "Technology", "7d", NOW)
+    assert summary.trend_direction == TrendDirection.BULLISH
+    assert summary.trend_strength > 0
+    assert summary.confidence > 0
+    assert summary.window == TrendWindow.SEVEN_DAY
+
+
+def test_rollup_mixed_directions():
+    trends = [
+        _make_trend("AAPL", direction="bullish", strength=0.6, confidence=0.8),
+        _make_trend("MSFT", direction="bearish", strength=0.6, confidence=0.8),
+    ]
+    summary = rollup_trends(trends, "sector", "Technology", "7d", NOW)
+    # Equal and opposite → neutral or mixed
+    assert summary.trend_direction in (TrendDirection.NEUTRAL, TrendDirection.MIXED)
+
+
+def test_rollup_confidence_weighted():
+    """Higher-confidence company should dominate the rollup direction."""
+    trends = [
+        _make_trend("AAPL", direction="bullish", strength=0.8, confidence=0.95),
+        _make_trend("MSFT", direction="bearish", strength=0.3, confidence=0.2),
+    ]
+    summary = rollup_trends(trends, "sector", "Technology", "7d", NOW)
+    assert summary.trend_direction == TrendDirection.BULLISH
+
+
+def test_rollup_catalysts_aggregated():
+    trends = [
+        _make_trend("AAPL", catalysts=["earnings", "product"], confidence=0.8),
+        _make_trend("MSFT", catalysts=["product", "macro"], confidence=0.6),
+    ]
+    summary = rollup_trends(trends, "sector", "Technology", "7d", NOW)
+    # "product" appears in both → should be top catalyst
+    assert "product" in summary.dominant_catalysts
+
+
+def test_rollup_risks_deduplicated():
+    trends = [
+        _make_trend("AAPL", risks=["regulatory risk", "supply chain"], confidence=0.8),
+        _make_trend("MSFT", risks=["Regulatory Risk", "tariffs"], confidence=0.6),
+    ]
+    summary = rollup_trends(trends, "sector", "Technology", "7d", NOW)
+    risk_lower = [r.lower() for r in summary.material_risks]
+    assert risk_lower.count("regulatory risk") == 1
+
+
+def test_rollup_evidence_collected():
+    trends = [
+        _make_trend("AAPL", supporting=["doc-1", "doc-2"], opposing=["doc-3"]),
+        _make_trend("MSFT", supporting=["doc-4"], opposing=["doc-5"]),
+    ]
+    summary = rollup_trends(trends, "market", "all", "7d", NOW)
+    assert "doc-1" in summary.top_supporting_evidence
+    assert "doc-4" in summary.top_supporting_evidence
+    assert "doc-3" in summary.top_opposing_evidence
+
+
+def test_rollup_market_entity_type():
+    trends = [_make_trend("AAPL"), _make_trend("JPM", sector="Financials")]
+    summary = rollup_trends(trends, "market", "all", "7d", NOW)
+    assert summary.entity_type == "market"
+    assert summary.entity_id == "all"
+
+
+# ---------------------------------------------------------------------------
+# _derive_rollup_direction
+# ---------------------------------------------------------------------------
+
+
+def test_derive_direction_bullish():
+    assert _derive_rollup_direction(0.5, 0.0) == TrendDirection.BULLISH
+
+
+def test_derive_direction_bearish():
+    assert _derive_rollup_direction(-0.5, 0.0) == TrendDirection.BEARISH
+
+
+def test_derive_direction_neutral():
+    assert _derive_rollup_direction(0.05, 0.0) == TrendDirection.NEUTRAL
+
+
+def test_derive_direction_mixed_high_contradiction():
+    assert _derive_rollup_direction(0.1, 0.2) == TrendDirection.MIXED
+
+
+# ---------------------------------------------------------------------------
+# _build_rollup_disagreement
+# ---------------------------------------------------------------------------
+
+
+def test_disagreement_no_conflict():
+    trends = [
+        _make_trend("AAPL", direction="bullish"),
+        _make_trend("MSFT", direction="bullish"),
+    ]
+    details = _build_rollup_disagreement(trends, "Technology")
+    assert details == []
+
+
+def test_disagreement_with_conflict():
+    trends = [
+        _make_trend("AAPL", direction="bullish", confidence=0.8),
+        _make_trend("MSFT", direction="bearish", confidence=0.7),
+    ]
+    details = _build_rollup_disagreement(trends, "Technology")
+    assert len(details) == 1
+    assert details[0].dimension == "company_direction"
+    assert "AAPL" in details[0].positive_doc_ids
+    assert "MSFT" in details[0].negative_doc_ids
@@ -0,0 +1,131 @@
+"""Tests for scheduler polling logic."""
+from datetime import datetime, timedelta
+
+from services.scheduler.app import (
+    DEFAULT_CADENCES,
+    MAX_RETRY_COUNT,
+    build_job_payload,
+    compute_backoff,
+    get_cadence_for_source,
+    is_source_due,
+)
+
+
+class TestGetCadenceForSource:
+    def test_default_cadence_market_api(self):
+        assert get_cadence_for_source("market_api", None) == 60
+
+    def test_default_cadence_news_api(self):
+        assert get_cadence_for_source("news_api", {}) == 300
+
+    def test_default_cadence_unknown_type(self):
+        assert get_cadence_for_source("unknown", None) == 600
+
+    def test_override_from_config(self):
+        config = {"polling_interval_seconds": 120}
+        assert get_cadence_for_source("market_api", config) == 120
+
+    def test_override_minimum_clamp(self):
+        config = {"polling_interval_seconds": 5}
+        assert get_cadence_for_source("market_api", config) == 10
+
+    def test_invalid_override_falls_back(self):
+        config = {"polling_interval_seconds": "not_a_number"}
+        assert get_cadence_for_source("news_api", config) == DEFAULT_CADENCES["news_api"]
+
+
+class TestComputeBackoff:
+    def test_first_retry(self):
+        assert compute_backoff(0) == 60
+
+    def test_second_retry(self):
+        assert compute_backoff(1) == 120
+
+    def test_capped_at_max(self):
+        assert compute_backoff(20) == 3600
+
+
+class TestIsSourceDue:
+    def _now(self):
+        return datetime(2026, 4, 11, 12, 0, 0)
+
+    def test_never_run_is_due(self):
+        assert is_source_due("market_api", None, None, None, 0, None, self._now())
+
+    def test_completed_within_cadence_not_due(self):
+        last = self._now() - timedelta(seconds=30)
+        assert not is_source_due("market_api", None, last, "completed", 0, None, self._now())
+
+    def test_completed_past_cadence_is_due(self):
+        last = self._now() - timedelta(seconds=120)
+        assert is_source_due("market_api", None, last, "completed", 0, None, self._now())
+
+    def test_running_not_due(self):
+        last = self._now() - timedelta(seconds=5)
+        assert not is_source_due("market_api", None, last, "running", 0, None, self._now())
+
+    def test_failed_within_backoff_not_due(self):
+        last = self._now() - timedelta(seconds=30)
+        next_retry = self._now() + timedelta(seconds=30)
+        assert not is_source_due("market_api", None, last, "failed", 1, next_retry, self._now())
+
+    def test_failed_past_backoff_is_due(self):
+        last = self._now() - timedelta(seconds=120)
+        next_retry = self._now() - timedelta(seconds=10)
+        assert is_source_due("market_api", None, last, "failed", 1, next_retry, self._now())
+
+    def test_failed_max_retries_not_due(self):
+        last = self._now() - timedelta(seconds=120)
+        assert not is_source_due(
+            "market_api", None, last, "failed", MAX_RETRY_COUNT, None, self._now()
+        )
+
+    def test_custom_cadence_respected(self):
+        config = {"polling_interval_seconds": 600}
+        last = self._now() - timedelta(seconds=300)
+        assert not is_source_due("market_api", config, last, "completed", 0, None, self._now())
+
+        last_old = self._now() - timedelta(seconds=700)
+        assert is_source_due("market_api", config, last_old, "completed", 0, None, self._now())
+
+
+class TestBuildJobPayload:
+    def test_payload_structure(self):
+        source = {
+            "source_id": "sid-1",
+            "company_id": "cid-1",
+            "ticker": "AAPL",
+            "legal_name": "Apple Inc.",
+            "source_type": "news_api",
+            "source_name": "NewsAPI",
+            "config": {"endpoint": "/v2/everything"},
+            "credibility_score": 0.8,
+        }
+        now = datetime(2026, 4, 11, 12, 0, 0)
+        job = build_job_payload(source, ["Apple", "iPhone"], now)
+
+        assert job["source_id"] == "sid-1"
+        assert job["company_id"] == "cid-1"
+        assert job["ticker"] == "AAPL"
+        assert job["legal_name"] == "Apple Inc."
+        assert job["aliases"] == ["Apple", "iPhone"]
+        assert job["source_type"] == "news_api"
+        assert job["config"] == {"endpoint": "/v2/everything"}
+        assert job["credibility_score"] == 0.8
+        assert job["scheduled_at"] == now.isoformat()
+
+    def test_payload_null_config(self):
+        source = {
+            "source_id": "sid-2",
+            "company_id": "cid-2",
+            "ticker": "MSFT",
+            "legal_name": "Microsoft Corp.",
+            "source_type": "market_api",
+            "source_name": "Polygon",
+            "config": None,
+            "credibility_score": None,
+        }
+        job = build_job_payload(source, [], datetime(2026, 4, 11, 12, 0, 0))
+        assert job["config"] == {}
+        assert job["credibility_score"] == 0.5
+        assert job["aliases"] == []
@@ -0,0 +1,212 @@
+"""Tests for shared MinIO storage utilities.
+
+Validates bucket mapping, path building, storage refs, bucket creation,
+artifact upload, and download from services.shared.storage.
+
+Requirements: 3.1, 3.2, 3.3, 9.1
+"""
+from datetime import datetime, timezone
+from unittest.mock import MagicMock
+
+from services.shared.storage import (
+    ALL_BUCKETS,
+    bucket_for_source,
+    build_artifact_path,
+    download_artifact,
+    ensure_buckets,
+    storage_ref,
+    upload_artifact,
+    upload_html_artifact,
+    upload_normalized_text,
+    upload_parser_output,
+    upload_raw_artifact,
+)
+
+
+class TestBucketForSource:
+    def test_market_api(self):
+        assert bucket_for_source("market_api") == "stonks-raw-market"
+
+    def test_news_api(self):
+        assert bucket_for_source("news_api") == "stonks-raw-news"
+
+    def test_filings_api(self):
+        assert bucket_for_source("filings_api") == "stonks-raw-filings"
+
+    def test_web_scrape(self):
+        assert bucket_for_source("web_scrape") == "stonks-raw-news"
+
+    def test_broker(self):
+        assert bucket_for_source("broker") == "stonks-raw-market"
+
+    def test_unknown_defaults_to_market(self):
+        assert bucket_for_source("unknown_type") == "stonks-raw-market"
+
+
+class TestBuildArtifactPath:
+    def test_default_path_format(self):
+        ts = datetime(2026, 4, 11, 14, 30, 0, tzinfo=timezone.utc)
+        path = build_artifact_path("news_api", "AAPL", "doc-123", timestamp=ts)
+        assert path == "news_api/AAPL/2026/04/11/doc-123/raw.json"
+
+    def test_custom_artifact_name_and_ext(self):
+        ts = datetime(2026, 1, 5, 0, 0, 0, tzinfo=timezone.utc)
+        path = build_artifact_path(
+            "web_scrape", "MSFT", "doc-456",
+            artifact_name="raw", ext="html", timestamp=ts,
+        )
+        assert path == "web_scrape/MSFT/2026/01/05/doc-456/raw.html"
+
+    def test_uses_utc_now_when_no_timestamp(self):
+        path = build_artifact_path("market_api", "GOOG", "run-1")
+        # Just verify it has the expected structure
+        parts = path.split("/")
+        assert parts[0] == "market_api"
+        assert parts[1] == "GOOG"
+        assert len(parts) == 7  # source/ticker/yyyy/mm/dd/doc_id/file
+
+
+class TestStorageRef:
+    def test_builds_s3_uri(self):
+        ref = storage_ref("stonks-raw-news", "news_api/AAPL/2026/04/11/doc-1/raw.json")
+        assert ref == "s3://stonks-raw-news/news_api/AAPL/2026/04/11/doc-1/raw.json"
+
+
+class TestEnsureBuckets:
+    def test_creates_missing_buckets(self):
+        client = MagicMock()
+        client.bucket_exists.return_value = False
+        created = ensure_buckets(client, ["bucket-a", "bucket-b"])
+        assert created == ["bucket-a", "bucket-b"]
+        assert client.make_bucket.call_count == 2
+
+    def test_skips_existing_buckets(self):
+        client = MagicMock()
+        client.bucket_exists.return_value = True
+        created = ensure_buckets(client, ["bucket-a"])
+        assert created == []
+        client.make_bucket.assert_not_called()
+
+    def test_defaults_to_all_buckets(self):
+        client = MagicMock()
+        client.bucket_exists.return_value = True
+        ensure_buckets(client)
+        assert client.bucket_exists.call_count == len(ALL_BUCKETS)
+
+
+class TestUploadArtifact:
+    def test_uploads_and_returns_ref(self):
+        client = MagicMock()
+        ref = upload_artifact(
+            client, "stonks-raw-news", "path/to/obj.json",
+            b'{"key": "value"}', content_type="application/json",
+        )
+        assert ref == "s3://stonks-raw-news/path/to/obj.json"
+        client.put_object.assert_called_once()
+        args, kwargs = client.put_object.call_args
+        assert args[0] == "stonks-raw-news"
+        assert args[1] == "path/to/obj.json"
+        assert kwargs["length"] == len(b'{"key": "value"}')
+        assert kwargs["content_type"] == "application/json"
+
+    def test_passes_metadata(self):
+        client = MagicMock()
+        upload_artifact(
+            client, "stonks-raw-market", "p.json",
+            b"data", metadata={"ticker": "AAPL"},
+        )
+        _, kwargs = client.put_object.call_args
+        assert kwargs["metadata"] == {"ticker": "AAPL"}
+
+
+class TestUploadRawArtifact:
+    def test_market_api_json(self):
+        client = MagicMock()
+        ts = datetime(2026, 4, 11, 0, 0, 0, tzinfo=timezone.utc)
+        ref = upload_raw_artifact(
+            client, source_type="market_api", ticker="AAPL",
+            document_id="run-1", data=b'{"bars":[]}',
+            artifact_type="raw_json", timestamp=ts,
+        )
+        assert "stonks-raw-market" in ref
+        assert "market_api/AAPL/2026/04/11/run-1/raw.json" in ref
+        _, kwargs = client.put_object.call_args
+        assert kwargs["content_type"] == "application/json"
+
+    def test_web_scrape_html(self):
+        client = MagicMock()
+        ts = datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc)
+        ref = upload_raw_artifact(
+            client, source_type="web_scrape", ticker="TSLA",
+            document_id="doc-5", data=b"<html></html>",
+            artifact_type="raw_html", timestamp=ts,
+        )
+        assert "stonks-raw-news" in ref
+        assert "raw.html" in ref
+        _, kwargs = client.put_object.call_args
+        assert kwargs["content_type"] == "text/html"
+
+
+class TestUploadHtmlArtifact:
+    def test_stores_in_web_scrape_path(self):
+        client = MagicMock()
+        ts = datetime(2026, 6, 15, 0, 0, 0, tzinfo=timezone.utc)
+        ref = upload_html_artifact(
+            client, ticker="NVDA", document_id="page-1",
+            html_bytes=b"<html><body>test</body></html>", timestamp=ts,
+        )
+        assert "stonks-raw-news" in ref
+        assert "web_scrape/NVDA/2026/06/15/page-1/raw.html" in ref
+
+
+class TestDownloadArtifact:
+    def test_reads_and_returns_bytes(self):
+        client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.read.return_value = b"file contents"
+        client.get_object.return_value = mock_response
+
+        data = download_artifact(client, "stonks-raw-news", "path/to/obj.json")
+        assert data == b"file contents"
+        client.get_object.assert_called_once_with("stonks-raw-news", "path/to/obj.json")
+        mock_response.close.assert_called_once()
+        mock_response.release_conn.assert_called_once()
+
+
+class TestUploadNormalizedText:
+    def test_stores_in_normalized_bucket(self):
+        client = MagicMock()
+        ts = datetime(2026, 4, 11, 0, 0, 0, tzinfo=timezone.utc)
+        ref = upload_normalized_text(
+            client, ticker="AAPL", document_id="doc-1",
+            text_bytes=b"Normalized article text here.",
+            timestamp=ts,
+        )
+        assert "stonks-normalized" in ref
+        assert "parsed/AAPL/2026/04/11/doc-1/normalized.txt" in ref
+        _, kwargs = client.put_object.call_args
+        assert kwargs["content_type"] == "text/plain"
+
+    def test_path_uses_current_time_when_no_timestamp(self):
+        client = MagicMock()
+        ref = upload_normalized_text(
+            client, ticker="MSFT", document_id="doc-2",
+            text_bytes=b"Some text.",
+        )
+        assert "stonks-normalized" in ref
+        assert "normalized.txt" in ref
+
+
+class TestUploadParserOutput:
+    def test_stores_json_in_normalized_bucket(self):
+        client = MagicMock()
+        ts = datetime(2026, 4, 11, 0, 0, 0, tzinfo=timezone.utc)
+        ref = upload_parser_output(
+            client, ticker="AAPL", document_id="doc-1",
+            output_bytes=b'{"quality_score": 0.8}',
+            timestamp=ts,
+        )
+        assert "stonks-normalized" in ref
+        assert "parsed/AAPL/2026/04/11/doc-1/parser_output.json" in ref
+        _, kwargs = client.put_object.call_args
+        assert kwargs["content_type"] == "application/json"
@@ -0,0 +1,190 @@
+"""Tests for recommendation suppression logic (data quality checks).
+
+Requirements: 7.4
+"""
+from datetime import datetime, timedelta, timezone
+
+from services.recommendation.suppression import (
+    DataQualityContext,
+    SuppressionConfig,
+    SuppressionReason,
+    build_quality_context_from_summary,
+    evaluate_suppression,
+)
+from services.shared.schemas import TrendDirection, TrendSummary, TrendWindow
+
+NOW = datetime(2026, 4, 11, 12, 0, 0, tzinfo=timezone.utc)
+
+
+def _make_summary(**overrides) -> TrendSummary:
+    defaults = dict(
+        entity_type="company",
+        entity_id="AAPL",
+        window=TrendWindow.SEVEN_DAY,
+        trend_direction=TrendDirection.BULLISH,
+        trend_strength=0.5,
+        confidence=0.65,
+        top_supporting_evidence=["doc1", "doc2", "doc3"],
+        top_opposing_evidence=[],
+        dominant_catalysts=["earnings"],
+        material_risks=["regulatory scrutiny"],
+        contradiction_score=0.1,
+        generated_at=NOW,
+    )
+    defaults.update(overrides)
+    return TrendSummary(**defaults)
+
+
+def _make_quality_ctx(**overrides) -> DataQualityContext:
+    defaults = dict(
+        total_documents=5,
+        valid_documents=4,
+        failed_documents=1,
+        avg_extraction_confidence=0.7,
+        newest_evidence_at=NOW - timedelta(hours=6),
+        source_types={"news_api", "filings_api"},
+    )
+    defaults.update(overrides)
+    return DataQualityContext(**defaults)
+
+
+# ---------------------------------------------------------------------------
+# No suppression for good quality data
+# ---------------------------------------------------------------------------
+
+
+def test_no_suppression_good_quality():
+    summary = _make_summary()
+    ctx = _make_quality_ctx()
+    result = evaluate_suppression(summary, ctx, reference_time=NOW)
+    assert result.suppressed is False
+    assert result.reasons == []
+    assert result.data_quality_score > 0.3
+
+
+# ---------------------------------------------------------------------------
+# Suppression triggers
+# ---------------------------------------------------------------------------
+
+
+def test_suppressed_low_extraction_confidence():
+    summary = _make_summary()
+    ctx = _make_quality_ctx(avg_extraction_confidence=0.2)
+    result = evaluate_suppression(summary, ctx, reference_time=NOW)
+    assert result.suppressed is True
+    assert SuppressionReason.LOW_DATA_CONFIDENCE in result.reasons
+
+
+def test_suppressed_stale_evidence():
+    summary = _make_summary()
+    ctx = _make_quality_ctx(newest_evidence_at=NOW - timedelta(days=10))
+    result = evaluate_suppression(summary, ctx, reference_time=NOW)
+    assert result.suppressed is True
+    assert SuppressionReason.STALE_EVIDENCE in result.reasons
+
+
+def test_suppressed_high_failure_rate():
+    summary = _make_summary()
+    ctx = _make_quality_ctx(total_documents=10, failed_documents=6, valid_documents=4)
+    result = evaluate_suppression(summary, ctx, reference_time=NOW)
+    assert result.suppressed is True
+    assert SuppressionReason.HIGH_EXTRACTION_FAILURE_RATE in result.reasons
+
+
+def test_suppressed_insufficient_valid_documents():
+    summary = _make_summary(
+        top_supporting_evidence=["doc1"],
+        top_opposing_evidence=[],
+    )
+    ctx = _make_quality_ctx(total_documents=1, valid_documents=1, failed_documents=0)
+    result = evaluate_suppression(summary, ctx, reference_time=NOW)
+    assert result.suppressed is True
+    assert SuppressionReason.INSUFFICIENT_VALID_DOCUMENTS in result.reasons
+
+
+def test_suppressed_low_source_diversity():
+    """When min_source_types > available source types, suppression fires."""
+    summary = _make_summary()
+    ctx = _make_quality_ctx(source_types=set())
+    config = SuppressionConfig(min_source_types=2)
+    result = evaluate_suppression(summary, ctx, config=config, reference_time=NOW)
+    assert result.suppressed is True
+    assert SuppressionReason.LOW_SOURCE_DIVERSITY in result.reasons
+
+
+# ---------------------------------------------------------------------------
+# Fallback to summary-based context
+# ---------------------------------------------------------------------------
+
+
+def test_fallback_context_from_summary():
+    summary = _make_summary(confidence=0.7)
+    ctx = build_quality_context_from_summary(summary)
+    assert ctx.total_documents == 3  # 3 supporting + 0 opposing
+    assert ctx.valid_documents == 3
+    assert ctx.avg_extraction_confidence == 0.7
+
+
+def test_no_suppression_with_summary_fallback():
+    """When no quality context is provided, summary-based fallback is used."""
+    summary = _make_summary(confidence=0.7)
+    # Default config has min_source_types=1, but fallback has empty source_types.
+    # With min_source_types=1 and empty source_types, LOW_SOURCE_DIVERSITY fires
+    # only when total_documents > 0. But default min_source_types is 1 and
+    # len(set()) = 0 < 1, so it would fire. Let's use a config that relaxes this.
+    config = SuppressionConfig(min_source_types=0)
+    result = evaluate_suppression(summary, config=config, reference_time=NOW)
+    assert result.suppressed is False
+
+
+# ---------------------------------------------------------------------------
+# Data quality score
+# ---------------------------------------------------------------------------
+
+
+def test_quality_score_high_for_good_data():
+    summary = _make_summary()
+    ctx = _make_quality_ctx(
+        avg_extraction_confidence=0.85,
+        newest_evidence_at=NOW - timedelta(hours=1),
+        total_documents=10,
+        valid_documents=10,
+        failed_documents=0,
+    )
+    result = evaluate_suppression(summary, ctx, reference_time=NOW)
+    assert result.data_quality_score > 0.7
+
+
+def test_quality_score_low_for_bad_data():
+    summary = _make_summary()
+    ctx = _make_quality_ctx(
+        avg_extraction_confidence=0.1,
+        newest_evidence_at=NOW - timedelta(days=14),
+        total_documents=3,
+        valid_documents=1,
+        failed_documents=2,
+    )
+    result = evaluate_suppression(summary, ctx, reference_time=NOW)
+    assert result.data_quality_score < 0.3
+
+
+# ---------------------------------------------------------------------------
+# Custom config
+# ---------------------------------------------------------------------------
+
+
+def test_custom_config_stricter_thresholds():
+    summary = _make_summary()
+    ctx = _make_quality_ctx(avg_extraction_confidence=0.5)
+    strict = SuppressionConfig(min_avg_extraction_confidence=0.6)
+    result = evaluate_suppression(summary, ctx, config=strict, reference_time=NOW)
+    assert result.suppressed is True
+    assert SuppressionReason.LOW_DATA_CONFIDENCE in result.reasons
+
+
+def test_custom_config_relaxed_thresholds():
+    summary = _make_summary()
+    ctx = _make_quality_ctx(avg_extraction_confidence=0.3)
+    relaxed = SuppressionConfig(min_avg_extraction_confidence=0.2)
+    result = evaluate_suppression(summary, ctx, config=relaxed, reference_time=NOW)
+    assert SuppressionReason.LOW_DATA_CONFIDENCE not in result.reasons
@@ -0,0 +1,113 @@
+"""Tests for the optional LLM thesis rewriting layer.
+
+Tests prompt construction and the rewrite function's fallback behavior.
+"""
+from __future__ import annotations
+
+import pytest
+
+from services.recommendation.thesis_llm import (
+    THESIS_SYSTEM_PROMPT,
+    build_thesis_rewrite_prompt,
+    rewrite_thesis_with_llm,
+)
+from services.shared.config import OllamaConfig
+from services.shared.schemas import (
+    TrendDirection,
+    TrendSummary,
+    TrendWindow,
+)
+
+
+def _make_summary(
+    ticker: str = "AAPL",
+    direction: TrendDirection = TrendDirection.BULLISH,
+    strength: float = 0.5,
+    confidence: float = 0.65,
+    contradiction: float = 0.1,
+    catalysts: list[str] | None = None,
+    risks: list[str] | None = None,
+) -> TrendSummary:
+    return TrendSummary(
+        entity_type="company",
+        entity_id=ticker,
+        window=TrendWindow.SEVEN_DAY,
+        trend_direction=direction,
+        trend_strength=strength,
+        confidence=confidence,
+        top_supporting_evidence=["doc1", "doc2"],
+        top_opposing_evidence=[],
+        dominant_catalysts=catalysts or ["earnings"],
+        material_risks=risks or ["regulatory scrutiny"],
+        contradiction_score=contradiction,
+    )
+
+
+DETERMINISTIC_THESIS = (
+    "AAPL shows a bullish trend over the 7d window with strength 0.50 "
+    "and confidence 0.65. Dominant catalysts: earnings. "
+    "Key risks: regulatory scrutiny. "
+    "Based on 2 supporting and 0 opposing evidence documents. "
+    "Recommendation: BUY (paper eligible)."
+)
+
+
+# ---------------------------------------------------------------------------
+# Prompt construction
+# ---------------------------------------------------------------------------
+
+
+def test_prompt_contains_deterministic_thesis():
+    summary = _make_summary()
+    prompts = build_thesis_rewrite_prompt(DETERMINISTIC_THESIS, summary)
+    assert DETERMINISTIC_THESIS in prompts["user"]
+
+
+def test_prompt_system_is_thesis_system_prompt():
+    summary = _make_summary()
+    prompts = build_thesis_rewrite_prompt(DETERMINISTIC_THESIS, summary)
+    assert prompts["system"] == THESIS_SYSTEM_PROMPT
+
+
+def test_prompt_includes_ticker_context():
+    summary = _make_summary(ticker="MSFT")
+    prompts = build_thesis_rewrite_prompt(DETERMINISTIC_THESIS, summary)
+    assert "MSFT" in prompts["user"]
+
+
+def test_prompt_includes_catalysts():
+    summary = _make_summary(catalysts=["product", "m_and_a"])
+    prompts = build_thesis_rewrite_prompt(DETERMINISTIC_THESIS, summary)
+    assert "product" in prompts["user"]
+
+
+def test_prompt_includes_risks():
+    summary = _make_summary(risks=["supply chain disruption"])
+    prompts = build_thesis_rewrite_prompt(DETERMINISTIC_THESIS, summary)
+    assert "supply chain disruption" in prompts["user"]
+
+
+def test_prompt_includes_trend_metrics():
+    summary = _make_summary(strength=0.72, confidence=0.88, contradiction=0.15)
+    prompts = build_thesis_rewrite_prompt(DETERMINISTIC_THESIS, summary)
+    assert "0.72" in prompts["user"]
+    assert "0.88" in prompts["user"]
+    assert "0.15" in prompts["user"]
+
+
+# ---------------------------------------------------------------------------
+# Fallback behavior — LLM failure returns deterministic thesis
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_rewrite_falls_back_on_connection_error():
+    """When Ollama is unreachable, the deterministic thesis is returned."""
+    summary = _make_summary()
+    config = OllamaConfig(base_url="http://localhost:99999", timeout=2)
+    result = await rewrite_thesis_with_llm(
+        deterministic_thesis=DETERMINISTIC_THESIS,
+        summary=summary,
+        config=config,
+    )
+    assert result == DETERMINISTIC_THESIS
@@ -0,0 +1,147 @@
+"""Tests for the web scrape adapter.
+
+Validates URL normalization, HTML metadata extraction, body text extraction,
+and adapter result construction.
+"""
+import pytest
+
+from services.adapters.web_scrape_adapter import (
+    WebScrapeAdapter,
+    extract_body_text,
+    extract_metadata_from_html,
+)
+from services.shared.content import normalize_url
+
+
+SAMPLE_HTML = """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <title>Apple Q2 Earnings Beat Expectations</title>
+    <meta property="og:title" content="Apple Q2 Earnings Beat Expectations" />
+    <meta property="og:site_name" content="TechNews" />
+    <meta property="og:description" content="Apple reported strong Q2 results." />
+    <meta name="author" content="Jane Reporter" />
+    <meta property="article:published_time" content="2026-04-10T14:00:00Z" />
+    <link rel="canonical" href="https://technews.example.com/apple-q2-earnings" />
+</head>
+<body>
+    <nav>Navigation links here</nav>
+    <article>
+        <h1>Apple Q2 Earnings Beat Expectations</h1>
+        <p>Apple Inc. reported quarterly revenue of $95 billion, exceeding analyst estimates.</p>
+        <p>The company saw strong growth in its services division and iPhone sales.</p>
+    </article>
+    <footer>Copyright 2026 TechNews</footer>
+</body>
+</html>"""
+
+MINIMAL_HTML = """<html><body><p>Short content.</p></body></html>"""
+
+
+class TestNormalizeUrl:
+    def test_basic_normalization(self):
+        assert normalize_url("HTTPS://Example.COM/path") == "https://example.com/path"
+
+    def test_strips_trailing_slash(self):
+        assert normalize_url("https://example.com/path/") == "https://example.com/path"
+
+    def test_strips_fragment(self):
+        result = normalize_url("https://example.com/path#section")
+        assert "#" not in result
+
+    def test_preserves_query(self):
+        result = normalize_url("https://example.com/path?q=test")
+        assert result == "https://example.com/path?q=test"
+
+    def test_preserves_non_standard_port(self):
+        result = normalize_url("https://example.com:8443/path")
+        assert ":8443" in result
+
+    def test_root_path(self):
+        result = normalize_url("https://example.com")
+        assert result == "https://example.com/"
+
+
+class TestExtractMetadataFromHtml:
+    def test_extracts_title(self):
+        meta = extract_metadata_from_html(SAMPLE_HTML, "https://technews.example.com/article")
+        assert meta["title"] == "Apple Q2 Earnings Beat Expectations"
+
+    def test_extracts_author(self):
+        meta = extract_metadata_from_html(SAMPLE_HTML, "https://technews.example.com/article")
+        assert meta["author"] == "Jane Reporter"
+
+    def test_extracts_publisher(self):
+        meta = extract_metadata_from_html(SAMPLE_HTML, "https://technews.example.com/article")
+        assert meta["publisher"] == "TechNews"
+
+    def test_extracts_published_at(self):
+        meta = extract_metadata_from_html(SAMPLE_HTML, "https://technews.example.com/article")
+        assert meta["published_at"] == "2026-04-10T14:00:00Z"
+
+    def test_extracts_canonical_url(self):
+        meta = extract_metadata_from_html(SAMPLE_HTML, "https://technews.example.com/article")
+        assert meta["canonical_url"] == "https://technews.example.com/apple-q2-earnings"
+
+    def test_extracts_language(self):
+        meta = extract_metadata_from_html(SAMPLE_HTML, "https://technews.example.com/article")
+        assert meta["language"] == "en"
+
+    def test_fallback_publisher_from_hostname(self):
+        meta = extract_metadata_from_html(MINIMAL_HTML, "https://example.com/page")
+        assert meta["publisher"] == "example.com"
+
+    def test_fallback_title_empty(self):
+        meta = extract_metadata_from_html(MINIMAL_HTML, "https://example.com/page")
+        assert meta["title"] == ""
+
+
+class TestExtractBodyText:
+    def test_extracts_article_content(self):
+        text = extract_body_text(SAMPLE_HTML)
+        assert "Apple Inc. reported quarterly revenue" in text
+        assert "strong growth" in text
+
+    def test_strips_nav_and_footer(self):
+        text = extract_body_text(SAMPLE_HTML)
+        assert "Navigation links here" not in text
+        assert "Copyright 2026" not in text
+
+    def test_strips_script_and_style(self):
+        html = "<html><body><script>alert('x')</script><style>.x{}</style><p>Content</p></body></html>"
+        text = extract_body_text(html)
+        assert "alert" not in text
+        assert "Content" in text
+
+    def test_minimal_html(self):
+        text = extract_body_text(MINIMAL_HTML)
+        assert "Short content." in text
+
+
+class TestWebScrapeAdapterSourceType:
+    def test_source_type(self):
+        adapter = WebScrapeAdapter()
+        assert adapter.source_type() == "web_scrape"
+
+    def test_bucket_name(self):
+        adapter = WebScrapeAdapter()
+        assert adapter.bucket_name() == "stonks-raw-news"
+
+
+class TestWebScrapeAdapterErrorResult:
+    def test_error_on_no_urls(self):
+        adapter = WebScrapeAdapter()
+        result = adapter._error_result("AAPL", "No URLs configured", 0)
+        assert not result.ok
+        assert result.error == "No URLs configured"
+        assert result.source_type == "web_scrape"
+        assert result.ticker == "AAPL"
+
+
+@pytest.mark.asyncio
+async def test_fetch_no_urls_configured():
+    adapter = WebScrapeAdapter()
+    result = await adapter.fetch("AAPL", {})
+    assert not result.ok
+    assert result.error is not None
+    assert "No URLs configured" in result.error