"""Tests for shared MinIO storage utilities. Validates bucket mapping, path building, storage refs, bucket creation, artifact upload, and download from services.shared.storage. Requirements: 3.1, 3.2, 3.3, 9.1 """ from datetime import datetime, timezone from unittest.mock import MagicMock from services.shared.storage import ( ALL_BUCKETS, bucket_for_source, build_artifact_path, download_artifact, ensure_buckets, storage_ref, upload_artifact, upload_html_artifact, upload_normalized_text, upload_parser_output, upload_raw_artifact, ) class TestBucketForSource: def test_market_api(self): assert bucket_for_source("market_api") == "stonks-raw-market" def test_news_api(self): assert bucket_for_source("news_api") == "stonks-raw-news" def test_filings_api(self): assert bucket_for_source("filings_api") == "stonks-raw-filings" def test_web_scrape(self): assert bucket_for_source("web_scrape") == "stonks-raw-news" def test_broker(self): assert bucket_for_source("broker") == "stonks-raw-market" def test_unknown_defaults_to_market(self): assert bucket_for_source("unknown_type") == "stonks-raw-market" class TestBuildArtifactPath: def test_default_path_format(self): ts = datetime(2026, 4, 11, 14, 30, 0, tzinfo=timezone.utc) path = build_artifact_path("news_api", "AAPL", "doc-123", timestamp=ts) assert path == "news_api/AAPL/2026/04/11/doc-123/raw.json" def test_custom_artifact_name_and_ext(self): ts = datetime(2026, 1, 5, 0, 0, 0, tzinfo=timezone.utc) path = build_artifact_path( "web_scrape", "MSFT", "doc-456", artifact_name="raw", ext="html", timestamp=ts, ) assert path == "web_scrape/MSFT/2026/01/05/doc-456/raw.html" def test_uses_utc_now_when_no_timestamp(self): path = build_artifact_path("market_api", "GOOG", "run-1") # Just verify it has the expected structure parts = path.split("/") assert parts[0] == "market_api" assert parts[1] == "GOOG" assert len(parts) == 7 # source/ticker/yyyy/mm/dd/doc_id/file class TestStorageRef: def test_builds_s3_uri(self): ref = storage_ref("stonks-raw-news", "news_api/AAPL/2026/04/11/doc-1/raw.json") assert ref == "s3://stonks-raw-news/news_api/AAPL/2026/04/11/doc-1/raw.json" class TestEnsureBuckets: def test_creates_missing_buckets(self): client = MagicMock() client.bucket_exists.return_value = False created = ensure_buckets(client, ["bucket-a", "bucket-b"]) assert created == ["bucket-a", "bucket-b"] assert client.make_bucket.call_count == 2 def test_skips_existing_buckets(self): client = MagicMock() client.bucket_exists.return_value = True created = ensure_buckets(client, ["bucket-a"]) assert created == [] client.make_bucket.assert_not_called() def test_defaults_to_all_buckets(self): client = MagicMock() client.bucket_exists.return_value = True ensure_buckets(client) assert client.bucket_exists.call_count == len(ALL_BUCKETS) class TestUploadArtifact: def test_uploads_and_returns_ref(self): client = MagicMock() ref = upload_artifact( client, "stonks-raw-news", "path/to/obj.json", b'{"key": "value"}', content_type="application/json", ) assert ref == "s3://stonks-raw-news/path/to/obj.json" client.put_object.assert_called_once() args, kwargs = client.put_object.call_args assert args[0] == "stonks-raw-news" assert args[1] == "path/to/obj.json" assert kwargs["length"] == len(b'{"key": "value"}') assert kwargs["content_type"] == "application/json" def test_passes_metadata(self): client = MagicMock() upload_artifact( client, "stonks-raw-market", "p.json", b"data", metadata={"ticker": "AAPL"}, ) _, kwargs = client.put_object.call_args assert kwargs["metadata"] == {"ticker": "AAPL"} class TestUploadRawArtifact: def test_market_api_json(self): client = MagicMock() ts = datetime(2026, 4, 11, 0, 0, 0, tzinfo=timezone.utc) ref = upload_raw_artifact( client, source_type="market_api", ticker="AAPL", document_id="run-1", data=b'{"bars":[]}', artifact_type="raw_json", timestamp=ts, ) assert "stonks-raw-market" in ref assert "market_api/AAPL/2026/04/11/run-1/raw.json" in ref _, kwargs = client.put_object.call_args assert kwargs["content_type"] == "application/json" def test_web_scrape_html(self): client = MagicMock() ts = datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc) ref = upload_raw_artifact( client, source_type="web_scrape", ticker="TSLA", document_id="doc-5", data=b"", artifact_type="raw_html", timestamp=ts, ) assert "stonks-raw-news" in ref assert "raw.html" in ref _, kwargs = client.put_object.call_args assert kwargs["content_type"] == "text/html" class TestUploadHtmlArtifact: def test_stores_in_web_scrape_path(self): client = MagicMock() ts = datetime(2026, 6, 15, 0, 0, 0, tzinfo=timezone.utc) ref = upload_html_artifact( client, ticker="NVDA", document_id="page-1", html_bytes=b"test", timestamp=ts, ) assert "stonks-raw-news" in ref assert "web_scrape/NVDA/2026/06/15/page-1/raw.html" in ref class TestDownloadArtifact: def test_reads_and_returns_bytes(self): client = MagicMock() mock_response = MagicMock() mock_response.read.return_value = b"file contents" client.get_object.return_value = mock_response data = download_artifact(client, "stonks-raw-news", "path/to/obj.json") assert data == b"file contents" client.get_object.assert_called_once_with("stonks-raw-news", "path/to/obj.json") mock_response.close.assert_called_once() mock_response.release_conn.assert_called_once() class TestUploadNormalizedText: def test_stores_in_normalized_bucket(self): client = MagicMock() ts = datetime(2026, 4, 11, 0, 0, 0, tzinfo=timezone.utc) ref = upload_normalized_text( client, ticker="AAPL", document_id="doc-1", text_bytes=b"Normalized article text here.", timestamp=ts, ) assert "stonks-normalized" in ref assert "parsed/AAPL/2026/04/11/doc-1/normalized.txt" in ref _, kwargs = client.put_object.call_args assert kwargs["content_type"] == "text/plain" def test_path_uses_current_time_when_no_timestamp(self): client = MagicMock() ref = upload_normalized_text( client, ticker="MSFT", document_id="doc-2", text_bytes=b"Some text.", ) assert "stonks-normalized" in ref assert "normalized.txt" in ref class TestUploadParserOutput: def test_stores_json_in_normalized_bucket(self): client = MagicMock() ts = datetime(2026, 4, 11, 0, 0, 0, tzinfo=timezone.utc) ref = upload_parser_output( client, ticker="AAPL", document_id="doc-1", output_bytes=b'{"quality_score": 0.8}', timestamp=ts, ) assert "stonks-normalized" in ref assert "parsed/AAPL/2026/04/11/doc-1/parser_output.json" in ref _, kwargs = client.put_object.call_args assert kwargs["content_type"] == "application/json"