feat: pipeline on/off toggle with per-stage Helm control

- Added pipelineEnabled flag to Helm values (default: true)
- Worker services (scheduler, ingestion, parser, extractor, aggregation,
  recommendation, broker-adapter, lake-publisher) scale to 0 when disabled
- API services always run regardless of toggle
- Redis-based runtime toggle: POST /api/ops/pipeline/toggle
- Scheduler checks the flag before each cycle
- Frontend: green/red Pipeline ON/OFF button on the pipeline page
- Beta defaults to pipelineEnabled: false
- Base values.yaml: blanked external URLs (Ollama, Polygon, Alpaca)
  so stages only connect to what they explicitly configure
This commit is contained in:
Celes Renata
2026-04-21 00:21:53 +00:00
parent a19ed086fe
commit be526ae614
14 changed files with 923 additions and 104 deletions
@@ -11,7 +11,7 @@ metadata:
{{- include "stonks.labels" $root | nindent 4 }}
stonks-oracle/tier: {{ $svc.tier }}
spec:
replicas: {{ $svc.replicas }}
replicas: {{ if and (hasKey $svc "pipeline") $svc.pipeline (not $root.Values.pipelineEnabled) }}0{{ else }}{{ $svc.replicas }}{{ end }}
selector:
matchLabels:
app: {{ $svc.image }}
+4 -24
View File
@@ -6,31 +6,11 @@
image:
tag: latest
## Single replica for API services, disable pipeline workers
## Beta is for API testing only — no ingestion/extraction/aggregation
## Pipeline OFF by default — beta is for API testing only
pipelineEnabled: false
## Single replica for API services
services:
scheduler:
replicas: 0
symbolRegistry:
replicas: 1
ingestion:
replicas: 0
parser:
replicas: 0
extractor:
replicas: 0
aggregation:
replicas: 0
recommendation:
replicas: 0
tradingEngine:
replicas: 1
riskEngine:
replicas: 1
brokerAdapter:
replicas: 0
lakePublisher:
replicas: 0
queryApi:
replicas: 1
dashboard:
+13
View File
@@ -4,10 +4,16 @@ image:
pullPolicy: Always
tag: latest
## Pipeline toggle — when false, all worker services (ingestion, parsing,
## extraction, aggregation, recommendation, broker, lake-publisher, scheduler)
## are scaled to 0. API services always run.
pipelineEnabled: true
## Service deployments — replicas and resource overrides
services:
scheduler:
replicas: 1
pipeline: true
image: scheduler
command: "python -m services.scheduler.app"
tier: orchestration
@@ -32,6 +38,7 @@ services:
ingestion:
replicas: 2
pipeline: true
image: ingestion
command: "python -m services.ingestion.worker"
tier: ingestion
@@ -42,6 +49,7 @@ services:
parser:
replicas: 2
pipeline: true
image: parser
command: "python -m services.parser.worker"
tier: processing
@@ -52,6 +60,7 @@ services:
extractor:
replicas: 1
pipeline: true
image: extractor
command: "python -m services.extractor.main"
tier: processing
@@ -62,6 +71,7 @@ services:
aggregation:
replicas: 4
pipeline: true
image: aggregation
command: "python -m services.aggregation.main"
tier: processing
@@ -72,6 +82,7 @@ services:
recommendation:
replicas: 1
pipeline: true
image: recommendation
command: "python -m services.recommendation.main"
tier: processing
@@ -107,6 +118,7 @@ services:
brokerAdapter:
replicas: 1
pipeline: true
image: broker-adapter
command: "python -m services.adapters.broker_service"
tier: trading
@@ -117,6 +129,7 @@ services:
lakePublisher:
replicas: 1
pipeline: true
image: lake-publisher
command: "python -m services.lake_publisher.jobs"
tier: analytics
+409
View File
@@ -0,0 +1,409 @@
#!/bin/bash
# Beta-to-Paper promotion gate
#
# Deploys the given image tag to the beta namespace, runs integration tests
# against the live beta services, and promotes to paper-trading if all pass.
#
# This script is the single source of truth for the promotion decision.
# CI calls it; humans can call it too.
#
# Usage: bash infra/inttest/promote.sh [OPTIONS]
#
# Options:
# --image-tag TAG Docker image tag to deploy (required)
# --skip-promote Run tests but don't promote even if green
# --skip-teardown Leave beta namespace running after tests
# --results-file PATH Path for JSON results output (default: beta-gate-results.json)
# --timeout SECONDS Max wait for services to become ready (default: 180)
# -h, --help Show usage
#
# Exit codes:
# 0 All tests passed, promotion succeeded (or --skip-promote)
# 1 Test failures — promotion blocked
# 2 Infrastructure/deployment failure
# 3 Promotion step failed (tests passed but helm upgrade failed)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# ── Defaults ─────────────────────────────────────────────────────────────────
IMAGE_TAG=""
SKIP_PROMOTE=false
SKIP_TEARDOWN=false
RESULTS_FILE="beta-gate-results.json"
READY_TIMEOUT=180
BETA_NAMESPACE="stonks-oracle-beta"
PAPER_NAMESPACE="stonks-oracle"
# ── Helpers ──────────────────────────────────────────────────────────────────
usage() {
cat <<EOF
Usage: bash infra/inttest/promote.sh [OPTIONS]
Options:
--image-tag TAG Docker image tag to deploy (required)
--skip-promote Run tests but don't promote even if green
--skip-teardown Leave beta namespace running after tests
--results-file PATH Path for JSON results output (default: beta-gate-results.json)
--timeout SECONDS Max wait for services to become ready (default: 180)
-h, --help Show usage
Exit codes:
0 All tests passed, promotion succeeded (or --skip-promote)
1 Test failures — promotion blocked
2 Infrastructure/deployment failure
3 Promotion step failed (tests passed but helm upgrade failed)
EOF
exit 0
}
log() {
echo "[$(date -u +"%H:%M:%S")] [beta-gate] $*"
}
die() {
log "FATAL: $*"
exit 2
}
# ── Parse CLI args ───────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case $1 in
--image-tag)
IMAGE_TAG="$2"
shift 2
;;
--skip-promote)
SKIP_PROMOTE=true
shift
;;
--skip-teardown)
SKIP_TEARDOWN=true
shift
;;
--results-file)
RESULTS_FILE="$2"
shift 2
;;
--timeout)
READY_TIMEOUT="$2"
shift 2
;;
-h|--help)
usage
;;
*)
echo "Unknown option: $1"
usage
;;
esac
done
if [ -z "$IMAGE_TAG" ]; then
echo "ERROR: --image-tag is required"
usage
fi
STARTED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
PIPELINE_START=$(date +%s)
log "Beta gate starting"
log " Image tag: $IMAGE_TAG"
log " Beta NS: $BETA_NAMESPACE"
log " Paper NS: $PAPER_NAMESPACE"
log " Skip promote: $SKIP_PROMOTE"
# ══════════════════════════════════════════════════════════════════════════════
# Stage 1: Deploy to beta namespace
# ══════════════════════════════════════════════════════════════════════════════
log "▶ Stage 1: Deploy to beta"
# Ensure beta namespace exists
kubectl create namespace "$BETA_NAMESPACE" 2>/dev/null || true
# Create beta database if it doesn't exist
log "Ensuring beta database exists ..."
kubectl exec -n postgresql-service postgresql-1 -c postgres -- \
psql -U postgres -tc "SELECT 1 FROM pg_database WHERE datname = 'stonks_beta'" \
| grep -q 1 || \
kubectl exec -n postgresql-service postgresql-1 -c postgres -- \
psql -U postgres -c "CREATE DATABASE stonks_beta OWNER stonks;" 2>/dev/null || true
# Apply migrations to beta database
log "Applying migrations to beta database ..."
for migration in $(ls "$REPO_ROOT/infra/migrations/"*.sql | sort); do
kubectl exec -n postgresql-service postgresql-1 -c postgres -- \
psql -U stonks -d stonks_beta -f - < "$migration" 2>/dev/null || true
done
# Deploy via Helm with beta values
log "Helm upgrade to beta namespace ..."
if ! helm upgrade --install stonks-oracle-beta \
"$REPO_ROOT/infra/helm/stonks-oracle" \
-n "$BETA_NAMESPACE" \
-f "$REPO_ROOT/infra/helm/stonks-oracle/values-beta.yaml" \
--set "image.tag=$IMAGE_TAG" \
--wait \
--timeout "${READY_TIMEOUT}s"; then
log "Helm deploy to beta failed"
DEPLOY_STATUS="failed"
else
DEPLOY_STATUS="ok"
fi
if [ "$DEPLOY_STATUS" != "ok" ]; then
log "Beta deployment failed — checking pod status"
kubectl get pods -n "$BETA_NAMESPACE" -o wide 2>&1 || true
kubectl get events -n "$BETA_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
cat > "$RESULTS_FILE" <<EOF
{
"run_id": "beta-gate-${IMAGE_TAG}",
"image_tag": "${IMAGE_TAG}",
"started_at": "${STARTED_AT}",
"completed_at": "$(date -u +"%Y-%m-%dT%H:%M:%SZ")",
"exit_code": 2,
"stage": "deploy",
"deploy_status": "failed",
"test_status": "skipped",
"promote_status": "blocked",
"tests": {"total": 0, "passed": 0, "failed": 0, "errors": 0}
}
EOF
exit 2
fi
log "✓ Beta deployment ready"
# ══════════════════════════════════════════════════════════════════════════════
# Stage 2: Seed beta database
# ══════════════════════════════════════════════════════════════════════════════
log "▶ Stage 2: Seed beta data"
# Run seed against beta database via a temporary pod
SEED_IMAGE="registry.celestium.life/stonks-oracle/query-api:${IMAGE_TAG}"
# Clean up any previous seed pod
kubectl delete pod seed-beta -n "$BETA_NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true
if ! kubectl run seed-beta \
--image="$SEED_IMAGE" \
--restart=Never \
--rm \
--attach \
--pod-running-timeout=3m \
--namespace="$BETA_NAMESPACE" \
--image-pull-policy=Always \
--env="POSTGRES_HOST=postgresql-rw.postgresql-service.svc.cluster.local" \
--env="POSTGRES_PORT=5432" \
--env="POSTGRES_DB=stonks_beta" \
--env="POSTGRES_USER=stonks" \
--env="POSTGRES_PASSWORD=St0nks0racl3!" \
--env="MINIO_ENDPOINT=minio.minio-service.svc.cluster.local:80" \
--env="MINIO_SECURE=false" \
--env="MINIO_ACCESS_KEY=minioadmin" \
--env="MINIO_SECRET_KEY=minioadmin" \
--command -- python -m tests.integration.seed_sandbox 2>/dev/null; then
log "WARNING: Seed may have partially failed (could be idempotent re-run)"
fi
log "✓ Beta data seeded"
# ══════════════════════════════════════════════════════════════════════════════
# Stage 3: Run integration tests against beta
# ══════════════════════════════════════════════════════════════════════════════
log "▶ Stage 3: Run integration tests"
# Determine service URLs within the beta namespace
QUERY_API_URL="http://query-api.${BETA_NAMESPACE}.svc.cluster.local:8000"
REGISTRY_API_URL="http://symbol-registry.${BETA_NAMESPACE}.svc.cluster.local:8000"
RISK_API_URL="http://risk.${BETA_NAMESPACE}.svc.cluster.local:8000"
TRADING_API_URL="http://trading-engine.${BETA_NAMESPACE}.svc.cluster.local:8000"
# Clean up any previous runner
kubectl delete pod beta-test-runner -n "$BETA_NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true
# Run tests as a pod in the beta namespace
log "Starting test runner pod ..."
kubectl run beta-test-runner \
--image="$SEED_IMAGE" \
--restart=Never \
--namespace="$BETA_NAMESPACE" \
--image-pull-policy=Always \
--env="QUERY_API_URL=$QUERY_API_URL" \
--env="REGISTRY_API_URL=$REGISTRY_API_URL" \
--env="RISK_API_URL=$RISK_API_URL" \
--env="TRADING_API_URL=$TRADING_API_URL" \
--env="POSTGRES_HOST=postgresql-rw.postgresql-service.svc.cluster.local" \
--env="POSTGRES_PORT=5432" \
--env="POSTGRES_DB=stonks_beta" \
--env="POSTGRES_USER=stonks" \
--env="POSTGRES_PASSWORD=St0nks0racl3!" \
--env="REDIS_HOST=redis-master.redis-service.svc.cluster.local" \
--env="REDIS_PORT=6379" \
--env="REDIS_DB=1" \
--env="REDIS_PASSWORD=" \
--env="BROKER_MODE=paper" \
--env="LOG_LEVEL=INFO" \
--command -- python -m pytest tests/integration/ -v --tb=short -q
# Wait for the test runner to complete
log "Waiting for test runner (timeout: 600s) ..."
TEST_EXIT_CODE=0
if ! kubectl wait --for=condition=Ready=false pod/beta-test-runner \
-n "$BETA_NAMESPACE" --timeout=600s 2>/dev/null; then
# Pod may have already completed — check its status
true
fi
# Wait for pod to reach terminal state
for i in $(seq 1 120); do
POD_PHASE=$(kubectl get pod beta-test-runner -n "$BETA_NAMESPACE" \
-o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")
if [ "$POD_PHASE" = "Succeeded" ] || [ "$POD_PHASE" = "Failed" ]; then
break
fi
sleep 5
done
# Collect results
TEST_OUTPUT=$(kubectl logs beta-test-runner -n "$BETA_NAMESPACE" 2>/dev/null || true)
POD_PHASE=$(kubectl get pod beta-test-runner -n "$BETA_NAMESPACE" \
-o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")
if [ "$POD_PHASE" = "Failed" ]; then
TEST_EXIT_CODE=1
fi
# Parse test counts
TESTS_PASSED=$(echo "$TEST_OUTPUT" | grep -oP '\d+(?= passed)' | tail -1 || echo "0")
TESTS_FAILED=$(echo "$TEST_OUTPUT" | grep -oP '\d+(?= failed)' | tail -1 || echo "0")
TESTS_ERRORS=$(echo "$TEST_OUTPUT" | grep -oP '\d+(?= error)' | tail -1 || echo "0")
TESTS_PASSED=${TESTS_PASSED:-0}
TESTS_FAILED=${TESTS_FAILED:-0}
TESTS_ERRORS=${TESTS_ERRORS:-0}
TESTS_TOTAL=$(( TESTS_PASSED + TESTS_FAILED + TESTS_ERRORS ))
log "Test results: ${TESTS_PASSED} passed, ${TESTS_FAILED} failed, ${TESTS_ERRORS} errors"
# Print test output for CI visibility
if [ -n "$TEST_OUTPUT" ]; then
echo "─── Test Output ───"
echo "$TEST_OUTPUT" | tail -60
echo "─── End Test Output ───"
fi
# Clean up test runner
kubectl delete pod beta-test-runner -n "$BETA_NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true
# ══════════════════════════════════════════════════════════════════════════════
# Stage 4: Promotion decision
# ══════════════════════════════════════════════════════════════════════════════
PROMOTE_STATUS="blocked"
FINAL_EXIT=0
if [ "$TESTS_FAILED" -gt 0 ] || [ "$TESTS_ERRORS" -gt 0 ] || [ "$TEST_EXIT_CODE" -ne 0 ]; then
log "✗ GATE FAILED — ${TESTS_FAILED} failures, ${TESTS_ERRORS} errors"
log " Promotion to paper-trading BLOCKED"
PROMOTE_STATUS="blocked"
FINAL_EXIT=1
elif [ "$SKIP_PROMOTE" = true ]; then
log "✓ Tests passed — promotion skipped (--skip-promote)"
PROMOTE_STATUS="skipped"
FINAL_EXIT=0
else
log "▶ Stage 4: Promoting to paper-trading"
log " Upgrading $PAPER_NAMESPACE with image tag $IMAGE_TAG ..."
if helm upgrade --install stonks-oracle \
"$REPO_ROOT/infra/helm/stonks-oracle" \
-n "$PAPER_NAMESPACE" \
--set "image.tag=$IMAGE_TAG" \
--wait \
--timeout 300s; then
log "✓ PROMOTED — paper-trading now running $IMAGE_TAG"
PROMOTE_STATUS="promoted"
FINAL_EXIT=0
# Rolling restart to pick up new images
log "Rolling restart of API services ..."
kubectl rollout restart deployment/query-api deployment/symbol-registry \
deployment/trading-engine deployment/risk-engine \
deployment/aggregation deployment/recommendation \
-n "$PAPER_NAMESPACE" 2>/dev/null || true
else
log "✗ Promotion failed — helm upgrade error"
PROMOTE_STATUS="failed"
FINAL_EXIT=3
fi
fi
# ══════════════════════════════════════════════════════════════════════════════
# Stage 5: Cleanup beta (optional)
# ══════════════════════════════════════════════════════════════════════════════
if [ "$SKIP_TEARDOWN" = false ] && [ "$PROMOTE_STATUS" = "promoted" ]; then
log "Scaling down beta deployment (keeping namespace for next run) ..."
helm upgrade stonks-oracle-beta \
"$REPO_ROOT/infra/helm/stonks-oracle" \
-n "$BETA_NAMESPACE" \
-f "$REPO_ROOT/infra/helm/stonks-oracle/values-beta.yaml" \
--set "image.tag=$IMAGE_TAG" \
--set "services.queryApi.replicas=0" \
--set "services.symbolRegistry.replicas=0" \
--set "services.tradingEngine.replicas=0" \
--set "services.riskEngine.replicas=0" \
--set "services.scheduler.replicas=0" \
--set "services.ingestion.replicas=0" \
--set "services.parser.replicas=0" \
--set "services.extractor.replicas=0" \
--set "services.aggregation.replicas=0" \
--set "services.recommendation.replicas=0" \
--set "services.brokerAdapter.replicas=0" \
--set "services.lakePublisher.replicas=0" \
--set "services.dashboard.replicas=0" \
2>/dev/null || true
log "Beta scaled to zero"
fi
# ══════════════════════════════════════════════════════════════════════════════
# Write results
# ══════════════════════════════════════════════════════════════════════════════
COMPLETED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
PIPELINE_END=$(date +%s)
PIPELINE_DURATION=$(( PIPELINE_END - PIPELINE_START ))
cat > "$RESULTS_FILE" <<EOF
{
"run_id": "beta-gate-${IMAGE_TAG}",
"image_tag": "${IMAGE_TAG}",
"started_at": "${STARTED_AT}",
"completed_at": "${COMPLETED_AT}",
"duration_s": ${PIPELINE_DURATION},
"exit_code": ${FINAL_EXIT},
"deploy_status": "${DEPLOY_STATUS}",
"test_status": "$([ "$FINAL_EXIT" -le 1 ] && echo "completed" || echo "error")",
"promote_status": "${PROMOTE_STATUS}",
"tests": {
"total": ${TESTS_TOTAL},
"passed": ${TESTS_PASSED},
"failed": ${TESTS_FAILED},
"errors": ${TESTS_ERRORS}
}
}
EOF
log "Results written to $RESULTS_FILE"
echo ""
log "═══════════════════════════════════════════════════"
log " Beta Gate Summary"
log "═══════════════════════════════════════════════════"
log " Image: $IMAGE_TAG"
log " Duration: ${PIPELINE_DURATION}s"
log " Tests: ${TESTS_PASSED}/${TESTS_TOTAL} passed"
log " Promotion: ${PROMOTE_STATUS}"
log " Exit code: $FINAL_EXIT"
log "═══════════════════════════════════════════════════"
echo ""
exit "$FINAL_EXIT"