phase 14-15: docker build validation and helm deployment
This commit is contained in:
@@ -0,0 +1,212 @@
|
||||
"""Tests for shared MinIO storage utilities.
|
||||
|
||||
Validates bucket mapping, path building, storage refs, bucket creation,
|
||||
artifact upload, and download from services.shared.storage.
|
||||
|
||||
Requirements: 3.1, 3.2, 3.3, 9.1
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from services.shared.storage import (
|
||||
ALL_BUCKETS,
|
||||
bucket_for_source,
|
||||
build_artifact_path,
|
||||
download_artifact,
|
||||
ensure_buckets,
|
||||
storage_ref,
|
||||
upload_artifact,
|
||||
upload_html_artifact,
|
||||
upload_normalized_text,
|
||||
upload_parser_output,
|
||||
upload_raw_artifact,
|
||||
)
|
||||
|
||||
|
||||
class TestBucketForSource:
|
||||
def test_market_api(self):
|
||||
assert bucket_for_source("market_api") == "stonks-raw-market"
|
||||
|
||||
def test_news_api(self):
|
||||
assert bucket_for_source("news_api") == "stonks-raw-news"
|
||||
|
||||
def test_filings_api(self):
|
||||
assert bucket_for_source("filings_api") == "stonks-raw-filings"
|
||||
|
||||
def test_web_scrape(self):
|
||||
assert bucket_for_source("web_scrape") == "stonks-raw-news"
|
||||
|
||||
def test_broker(self):
|
||||
assert bucket_for_source("broker") == "stonks-raw-market"
|
||||
|
||||
def test_unknown_defaults_to_market(self):
|
||||
assert bucket_for_source("unknown_type") == "stonks-raw-market"
|
||||
|
||||
|
||||
class TestBuildArtifactPath:
|
||||
def test_default_path_format(self):
|
||||
ts = datetime(2026, 4, 11, 14, 30, 0, tzinfo=timezone.utc)
|
||||
path = build_artifact_path("news_api", "AAPL", "doc-123", timestamp=ts)
|
||||
assert path == "news_api/AAPL/2026/04/11/doc-123/raw.json"
|
||||
|
||||
def test_custom_artifact_name_and_ext(self):
|
||||
ts = datetime(2026, 1, 5, 0, 0, 0, tzinfo=timezone.utc)
|
||||
path = build_artifact_path(
|
||||
"web_scrape", "MSFT", "doc-456",
|
||||
artifact_name="raw", ext="html", timestamp=ts,
|
||||
)
|
||||
assert path == "web_scrape/MSFT/2026/01/05/doc-456/raw.html"
|
||||
|
||||
def test_uses_utc_now_when_no_timestamp(self):
|
||||
path = build_artifact_path("market_api", "GOOG", "run-1")
|
||||
# Just verify it has the expected structure
|
||||
parts = path.split("/")
|
||||
assert parts[0] == "market_api"
|
||||
assert parts[1] == "GOOG"
|
||||
assert len(parts) == 7 # source/ticker/yyyy/mm/dd/doc_id/file
|
||||
|
||||
|
||||
class TestStorageRef:
|
||||
def test_builds_s3_uri(self):
|
||||
ref = storage_ref("stonks-raw-news", "news_api/AAPL/2026/04/11/doc-1/raw.json")
|
||||
assert ref == "s3://stonks-raw-news/news_api/AAPL/2026/04/11/doc-1/raw.json"
|
||||
|
||||
|
||||
class TestEnsureBuckets:
|
||||
def test_creates_missing_buckets(self):
|
||||
client = MagicMock()
|
||||
client.bucket_exists.return_value = False
|
||||
created = ensure_buckets(client, ["bucket-a", "bucket-b"])
|
||||
assert created == ["bucket-a", "bucket-b"]
|
||||
assert client.make_bucket.call_count == 2
|
||||
|
||||
def test_skips_existing_buckets(self):
|
||||
client = MagicMock()
|
||||
client.bucket_exists.return_value = True
|
||||
created = ensure_buckets(client, ["bucket-a"])
|
||||
assert created == []
|
||||
client.make_bucket.assert_not_called()
|
||||
|
||||
def test_defaults_to_all_buckets(self):
|
||||
client = MagicMock()
|
||||
client.bucket_exists.return_value = True
|
||||
ensure_buckets(client)
|
||||
assert client.bucket_exists.call_count == len(ALL_BUCKETS)
|
||||
|
||||
|
||||
class TestUploadArtifact:
|
||||
def test_uploads_and_returns_ref(self):
|
||||
client = MagicMock()
|
||||
ref = upload_artifact(
|
||||
client, "stonks-raw-news", "path/to/obj.json",
|
||||
b'{"key": "value"}', content_type="application/json",
|
||||
)
|
||||
assert ref == "s3://stonks-raw-news/path/to/obj.json"
|
||||
client.put_object.assert_called_once()
|
||||
args, kwargs = client.put_object.call_args
|
||||
assert args[0] == "stonks-raw-news"
|
||||
assert args[1] == "path/to/obj.json"
|
||||
assert kwargs["length"] == len(b'{"key": "value"}')
|
||||
assert kwargs["content_type"] == "application/json"
|
||||
|
||||
def test_passes_metadata(self):
|
||||
client = MagicMock()
|
||||
upload_artifact(
|
||||
client, "stonks-raw-market", "p.json",
|
||||
b"data", metadata={"ticker": "AAPL"},
|
||||
)
|
||||
_, kwargs = client.put_object.call_args
|
||||
assert kwargs["metadata"] == {"ticker": "AAPL"}
|
||||
|
||||
|
||||
class TestUploadRawArtifact:
|
||||
def test_market_api_json(self):
|
||||
client = MagicMock()
|
||||
ts = datetime(2026, 4, 11, 0, 0, 0, tzinfo=timezone.utc)
|
||||
ref = upload_raw_artifact(
|
||||
client, source_type="market_api", ticker="AAPL",
|
||||
document_id="run-1", data=b'{"bars":[]}',
|
||||
artifact_type="raw_json", timestamp=ts,
|
||||
)
|
||||
assert "stonks-raw-market" in ref
|
||||
assert "market_api/AAPL/2026/04/11/run-1/raw.json" in ref
|
||||
_, kwargs = client.put_object.call_args
|
||||
assert kwargs["content_type"] == "application/json"
|
||||
|
||||
def test_web_scrape_html(self):
|
||||
client = MagicMock()
|
||||
ts = datetime(2026, 3, 1, 0, 0, 0, tzinfo=timezone.utc)
|
||||
ref = upload_raw_artifact(
|
||||
client, source_type="web_scrape", ticker="TSLA",
|
||||
document_id="doc-5", data=b"<html></html>",
|
||||
artifact_type="raw_html", timestamp=ts,
|
||||
)
|
||||
assert "stonks-raw-news" in ref
|
||||
assert "raw.html" in ref
|
||||
_, kwargs = client.put_object.call_args
|
||||
assert kwargs["content_type"] == "text/html"
|
||||
|
||||
|
||||
class TestUploadHtmlArtifact:
|
||||
def test_stores_in_web_scrape_path(self):
|
||||
client = MagicMock()
|
||||
ts = datetime(2026, 6, 15, 0, 0, 0, tzinfo=timezone.utc)
|
||||
ref = upload_html_artifact(
|
||||
client, ticker="NVDA", document_id="page-1",
|
||||
html_bytes=b"<html><body>test</body></html>", timestamp=ts,
|
||||
)
|
||||
assert "stonks-raw-news" in ref
|
||||
assert "web_scrape/NVDA/2026/06/15/page-1/raw.html" in ref
|
||||
|
||||
|
||||
class TestDownloadArtifact:
|
||||
def test_reads_and_returns_bytes(self):
|
||||
client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.read.return_value = b"file contents"
|
||||
client.get_object.return_value = mock_response
|
||||
|
||||
data = download_artifact(client, "stonks-raw-news", "path/to/obj.json")
|
||||
assert data == b"file contents"
|
||||
client.get_object.assert_called_once_with("stonks-raw-news", "path/to/obj.json")
|
||||
mock_response.close.assert_called_once()
|
||||
mock_response.release_conn.assert_called_once()
|
||||
|
||||
|
||||
class TestUploadNormalizedText:
|
||||
def test_stores_in_normalized_bucket(self):
|
||||
client = MagicMock()
|
||||
ts = datetime(2026, 4, 11, 0, 0, 0, tzinfo=timezone.utc)
|
||||
ref = upload_normalized_text(
|
||||
client, ticker="AAPL", document_id="doc-1",
|
||||
text_bytes=b"Normalized article text here.",
|
||||
timestamp=ts,
|
||||
)
|
||||
assert "stonks-normalized" in ref
|
||||
assert "parsed/AAPL/2026/04/11/doc-1/normalized.txt" in ref
|
||||
_, kwargs = client.put_object.call_args
|
||||
assert kwargs["content_type"] == "text/plain"
|
||||
|
||||
def test_path_uses_current_time_when_no_timestamp(self):
|
||||
client = MagicMock()
|
||||
ref = upload_normalized_text(
|
||||
client, ticker="MSFT", document_id="doc-2",
|
||||
text_bytes=b"Some text.",
|
||||
)
|
||||
assert "stonks-normalized" in ref
|
||||
assert "normalized.txt" in ref
|
||||
|
||||
|
||||
class TestUploadParserOutput:
|
||||
def test_stores_json_in_normalized_bucket(self):
|
||||
client = MagicMock()
|
||||
ts = datetime(2026, 4, 11, 0, 0, 0, tzinfo=timezone.utc)
|
||||
ref = upload_parser_output(
|
||||
client, ticker="AAPL", document_id="doc-1",
|
||||
output_bytes=b'{"quality_score": 0.8}',
|
||||
timestamp=ts,
|
||||
)
|
||||
assert "stonks-normalized" in ref
|
||||
assert "parsed/AAPL/2026/04/11/doc-1/parser_output.json" in ref
|
||||
_, kwargs = client.put_object.call_args
|
||||
assert kwargs["content_type"] == "application/json"
|
||||
Reference in New Issue
Block a user