feat: stage-isolated infrastructure — separate Postgres DBs, Redis DBs, and MinIO bucket prefixes per stage

2026-04-19 22:20:03 +00:00
parent 2621b3c5c5
commit 5c63264393
10 changed files with 96 additions and 30 deletions
@@ -21,6 +21,7 @@ Requirements: 3.1, 3.2, 3.3, 9.1
 """
 import io
 import logging
+import os
 from datetime import datetime, timezone
 from typing import Mapping

@@ -29,8 +30,13 @@ from minio.error import S3Error

 logger = logging.getLogger("storage")

-# All known buckets the platform uses
-ALL_BUCKETS = [
+# Optional bucket prefix for stage isolation (e.g. "beta", "paper").
+# When set, all bucket names become "beta-stonks-raw-market", etc.
+# Reads from DEPLOY_STAGE env var set by the Helm configmap.
+_BUCKET_PREFIX = os.getenv("DEPLOY_STAGE", "")
+
+# All known base bucket names the platform uses
+_BASE_BUCKETS = [
    "stonks-raw-market",
    "stonks-raw-news",
    "stonks-raw-filings",
@@ -41,14 +47,25 @@ ALL_BUCKETS = [
    "stonks-audit",
 ]

-# Map source_type to the correct raw bucket
+
+def _prefixed(bucket: str) -> str:
+    """Apply the stage prefix to a bucket name."""
+    if _BUCKET_PREFIX:
+        return f"{_BUCKET_PREFIX}-{bucket}"
+    return bucket
+
+
+# Public list with prefix applied
+ALL_BUCKETS = [_prefixed(b) for b in _BASE_BUCKETS]
+
+# Map source_type to the correct raw bucket (with prefix)
 SOURCE_BUCKET_MAP: dict[str, str] = {
-    "market_api": "stonks-raw-market",
-    "news_api": "stonks-raw-news",
-    "filings_api": "stonks-raw-filings",
-    "web_scrape": "stonks-raw-news",
-    "broker": "stonks-raw-market",
-    "macro_news": "stonks-raw-news",
+    "market_api": _prefixed("stonks-raw-market"),
+    "news_api": _prefixed("stonks-raw-news"),
+    "filings_api": _prefixed("stonks-raw-filings"),
+    "web_scrape": _prefixed("stonks-raw-news"),
+    "broker": _prefixed("stonks-raw-market"),
+    "macro_news": _prefixed("stonks-raw-news"),
 }

 # Map artifact type to content type and file extension
@@ -62,7 +79,7 @@ ARTIFACT_CONTENT_TYPES: dict[str, tuple[str, str]] = {

 def bucket_for_source(source_type: str) -> str:
    """Return the MinIO bucket name for a given source type."""
-    return SOURCE_BUCKET_MAP.get(source_type, "stonks-raw-market")
+    return SOURCE_BUCKET_MAP.get(source_type, _prefixed("stonks-raw-market"))


 def build_artifact_path(
@@ -227,7 +244,7 @@ def upload_normalized_text(
        f"{document_id}/normalized.txt"
    )
    return upload_artifact(
-        client, "stonks-normalized", path, text_bytes,
+        client, _prefixed("stonks-normalized"), path, text_bytes,
        content_type="text/plain", metadata=metadata,
    )

@@ -251,7 +268,7 @@ def upload_parser_output(
        f"{document_id}/parser_output.json"
    )
    return upload_artifact(
-        client, "stonks-normalized", path, output_bytes,
+        client, _prefixed("stonks-normalized"), path, output_bytes,
        content_type="application/json", metadata=metadata,
    )

@@ -275,7 +292,7 @@ def upload_extraction_prompt(
        f"{document_id}/prompt.json"
    )
    return upload_artifact(
-        client, "stonks-llm-prompts", path, prompt_data,
+        client, _prefixed("stonks-llm-prompts"), path, prompt_data,
        content_type="application/json", metadata=metadata,
    )

@@ -300,7 +317,7 @@ def upload_extraction_raw_output(
        f"{document_id}/raw_output_{attempt_index}.json"
    )
    return upload_artifact(
-        client, "stonks-llm-results", path, output_data,
+        client, _prefixed("stonks-llm-results"), path, output_data,
        content_type="application/json", metadata=metadata,
    )

@@ -324,7 +341,7 @@ def upload_extraction_validation(
        f"{document_id}/validation.json"
    )
    return upload_artifact(
-        client, "stonks-llm-results", path, validation_data,
+        client, _prefixed("stonks-llm-results"), path, validation_data,
        content_type="application/json", metadata=metadata,
    )

@@ -348,7 +365,7 @@ def upload_extraction_intelligence(
        f"{document_id}/intelligence.json"
    )
    return upload_artifact(
-        client, "stonks-llm-results", path, intelligence_data,
+        client, _prefixed("stonks-llm-results"), path, intelligence_data,
        content_type="application/json", metadata=metadata,
    )