be526ae614
- Added pipelineEnabled flag to Helm values (default: true) - Worker services (scheduler, ingestion, parser, extractor, aggregation, recommendation, broker-adapter, lake-publisher) scale to 0 when disabled - API services always run regardless of toggle - Redis-based runtime toggle: POST /api/ops/pipeline/toggle - Scheduler checks the flag before each cycle - Frontend: green/red Pipeline ON/OFF button on the pipeline page - Beta defaults to pipelineEnabled: false - Base values.yaml: blanked external URLs (Ollama, Polygon, Alpaca) so stages only connect to what they explicitly configure
410 lines
16 KiB
Bash
Executable File
410 lines
16 KiB
Bash
Executable File
#!/bin/bash
|
|
# Beta-to-Paper promotion gate
|
|
#
|
|
# Deploys the given image tag to the beta namespace, runs integration tests
|
|
# against the live beta services, and promotes to paper-trading if all pass.
|
|
#
|
|
# This script is the single source of truth for the promotion decision.
|
|
# CI calls it; humans can call it too.
|
|
#
|
|
# Usage: bash infra/inttest/promote.sh [OPTIONS]
|
|
#
|
|
# Options:
|
|
# --image-tag TAG Docker image tag to deploy (required)
|
|
# --skip-promote Run tests but don't promote even if green
|
|
# --skip-teardown Leave beta namespace running after tests
|
|
# --results-file PATH Path for JSON results output (default: beta-gate-results.json)
|
|
# --timeout SECONDS Max wait for services to become ready (default: 180)
|
|
# -h, --help Show usage
|
|
#
|
|
# Exit codes:
|
|
# 0 All tests passed, promotion succeeded (or --skip-promote)
|
|
# 1 Test failures — promotion blocked
|
|
# 2 Infrastructure/deployment failure
|
|
# 3 Promotion step failed (tests passed but helm upgrade failed)
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
|
|
# ── Defaults ─────────────────────────────────────────────────────────────────
|
|
IMAGE_TAG=""
|
|
SKIP_PROMOTE=false
|
|
SKIP_TEARDOWN=false
|
|
RESULTS_FILE="beta-gate-results.json"
|
|
READY_TIMEOUT=180
|
|
BETA_NAMESPACE="stonks-oracle-beta"
|
|
PAPER_NAMESPACE="stonks-oracle"
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
usage() {
|
|
cat <<EOF
|
|
Usage: bash infra/inttest/promote.sh [OPTIONS]
|
|
|
|
Options:
|
|
--image-tag TAG Docker image tag to deploy (required)
|
|
--skip-promote Run tests but don't promote even if green
|
|
--skip-teardown Leave beta namespace running after tests
|
|
--results-file PATH Path for JSON results output (default: beta-gate-results.json)
|
|
--timeout SECONDS Max wait for services to become ready (default: 180)
|
|
-h, --help Show usage
|
|
|
|
Exit codes:
|
|
0 All tests passed, promotion succeeded (or --skip-promote)
|
|
1 Test failures — promotion blocked
|
|
2 Infrastructure/deployment failure
|
|
3 Promotion step failed (tests passed but helm upgrade failed)
|
|
EOF
|
|
exit 0
|
|
}
|
|
|
|
log() {
|
|
echo "[$(date -u +"%H:%M:%S")] [beta-gate] $*"
|
|
}
|
|
|
|
die() {
|
|
log "FATAL: $*"
|
|
exit 2
|
|
}
|
|
|
|
# ── Parse CLI args ───────────────────────────────────────────────────────────
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--image-tag)
|
|
IMAGE_TAG="$2"
|
|
shift 2
|
|
;;
|
|
--skip-promote)
|
|
SKIP_PROMOTE=true
|
|
shift
|
|
;;
|
|
--skip-teardown)
|
|
SKIP_TEARDOWN=true
|
|
shift
|
|
;;
|
|
--results-file)
|
|
RESULTS_FILE="$2"
|
|
shift 2
|
|
;;
|
|
--timeout)
|
|
READY_TIMEOUT="$2"
|
|
shift 2
|
|
;;
|
|
-h|--help)
|
|
usage
|
|
;;
|
|
*)
|
|
echo "Unknown option: $1"
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ -z "$IMAGE_TAG" ]; then
|
|
echo "ERROR: --image-tag is required"
|
|
usage
|
|
fi
|
|
|
|
STARTED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
PIPELINE_START=$(date +%s)
|
|
|
|
log "Beta gate starting"
|
|
log " Image tag: $IMAGE_TAG"
|
|
log " Beta NS: $BETA_NAMESPACE"
|
|
log " Paper NS: $PAPER_NAMESPACE"
|
|
log " Skip promote: $SKIP_PROMOTE"
|
|
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
# Stage 1: Deploy to beta namespace
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
log "▶ Stage 1: Deploy to beta"
|
|
|
|
# Ensure beta namespace exists
|
|
kubectl create namespace "$BETA_NAMESPACE" 2>/dev/null || true
|
|
|
|
# Create beta database if it doesn't exist
|
|
log "Ensuring beta database exists ..."
|
|
kubectl exec -n postgresql-service postgresql-1 -c postgres -- \
|
|
psql -U postgres -tc "SELECT 1 FROM pg_database WHERE datname = 'stonks_beta'" \
|
|
| grep -q 1 || \
|
|
kubectl exec -n postgresql-service postgresql-1 -c postgres -- \
|
|
psql -U postgres -c "CREATE DATABASE stonks_beta OWNER stonks;" 2>/dev/null || true
|
|
|
|
# Apply migrations to beta database
|
|
log "Applying migrations to beta database ..."
|
|
for migration in $(ls "$REPO_ROOT/infra/migrations/"*.sql | sort); do
|
|
kubectl exec -n postgresql-service postgresql-1 -c postgres -- \
|
|
psql -U stonks -d stonks_beta -f - < "$migration" 2>/dev/null || true
|
|
done
|
|
|
|
# Deploy via Helm with beta values
|
|
log "Helm upgrade to beta namespace ..."
|
|
if ! helm upgrade --install stonks-oracle-beta \
|
|
"$REPO_ROOT/infra/helm/stonks-oracle" \
|
|
-n "$BETA_NAMESPACE" \
|
|
-f "$REPO_ROOT/infra/helm/stonks-oracle/values-beta.yaml" \
|
|
--set "image.tag=$IMAGE_TAG" \
|
|
--wait \
|
|
--timeout "${READY_TIMEOUT}s"; then
|
|
log "Helm deploy to beta failed"
|
|
DEPLOY_STATUS="failed"
|
|
else
|
|
DEPLOY_STATUS="ok"
|
|
fi
|
|
|
|
if [ "$DEPLOY_STATUS" != "ok" ]; then
|
|
log "Beta deployment failed — checking pod status"
|
|
kubectl get pods -n "$BETA_NAMESPACE" -o wide 2>&1 || true
|
|
kubectl get events -n "$BETA_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
|
|
|
|
cat > "$RESULTS_FILE" <<EOF
|
|
{
|
|
"run_id": "beta-gate-${IMAGE_TAG}",
|
|
"image_tag": "${IMAGE_TAG}",
|
|
"started_at": "${STARTED_AT}",
|
|
"completed_at": "$(date -u +"%Y-%m-%dT%H:%M:%SZ")",
|
|
"exit_code": 2,
|
|
"stage": "deploy",
|
|
"deploy_status": "failed",
|
|
"test_status": "skipped",
|
|
"promote_status": "blocked",
|
|
"tests": {"total": 0, "passed": 0, "failed": 0, "errors": 0}
|
|
}
|
|
EOF
|
|
exit 2
|
|
fi
|
|
|
|
log "✓ Beta deployment ready"
|
|
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
# Stage 2: Seed beta database
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
log "▶ Stage 2: Seed beta data"
|
|
|
|
# Run seed against beta database via a temporary pod
|
|
SEED_IMAGE="registry.celestium.life/stonks-oracle/query-api:${IMAGE_TAG}"
|
|
|
|
# Clean up any previous seed pod
|
|
kubectl delete pod seed-beta -n "$BETA_NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true
|
|
|
|
if ! kubectl run seed-beta \
|
|
--image="$SEED_IMAGE" \
|
|
--restart=Never \
|
|
--rm \
|
|
--attach \
|
|
--pod-running-timeout=3m \
|
|
--namespace="$BETA_NAMESPACE" \
|
|
--image-pull-policy=Always \
|
|
--env="POSTGRES_HOST=postgresql-rw.postgresql-service.svc.cluster.local" \
|
|
--env="POSTGRES_PORT=5432" \
|
|
--env="POSTGRES_DB=stonks_beta" \
|
|
--env="POSTGRES_USER=stonks" \
|
|
--env="POSTGRES_PASSWORD=St0nks0racl3!" \
|
|
--env="MINIO_ENDPOINT=minio.minio-service.svc.cluster.local:80" \
|
|
--env="MINIO_SECURE=false" \
|
|
--env="MINIO_ACCESS_KEY=minioadmin" \
|
|
--env="MINIO_SECRET_KEY=minioadmin" \
|
|
--command -- python -m tests.integration.seed_sandbox 2>/dev/null; then
|
|
log "WARNING: Seed may have partially failed (could be idempotent re-run)"
|
|
fi
|
|
|
|
log "✓ Beta data seeded"
|
|
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
# Stage 3: Run integration tests against beta
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
log "▶ Stage 3: Run integration tests"
|
|
|
|
# Determine service URLs within the beta namespace
|
|
QUERY_API_URL="http://query-api.${BETA_NAMESPACE}.svc.cluster.local:8000"
|
|
REGISTRY_API_URL="http://symbol-registry.${BETA_NAMESPACE}.svc.cluster.local:8000"
|
|
RISK_API_URL="http://risk.${BETA_NAMESPACE}.svc.cluster.local:8000"
|
|
TRADING_API_URL="http://trading-engine.${BETA_NAMESPACE}.svc.cluster.local:8000"
|
|
|
|
# Clean up any previous runner
|
|
kubectl delete pod beta-test-runner -n "$BETA_NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true
|
|
|
|
# Run tests as a pod in the beta namespace
|
|
log "Starting test runner pod ..."
|
|
kubectl run beta-test-runner \
|
|
--image="$SEED_IMAGE" \
|
|
--restart=Never \
|
|
--namespace="$BETA_NAMESPACE" \
|
|
--image-pull-policy=Always \
|
|
--env="QUERY_API_URL=$QUERY_API_URL" \
|
|
--env="REGISTRY_API_URL=$REGISTRY_API_URL" \
|
|
--env="RISK_API_URL=$RISK_API_URL" \
|
|
--env="TRADING_API_URL=$TRADING_API_URL" \
|
|
--env="POSTGRES_HOST=postgresql-rw.postgresql-service.svc.cluster.local" \
|
|
--env="POSTGRES_PORT=5432" \
|
|
--env="POSTGRES_DB=stonks_beta" \
|
|
--env="POSTGRES_USER=stonks" \
|
|
--env="POSTGRES_PASSWORD=St0nks0racl3!" \
|
|
--env="REDIS_HOST=redis-master.redis-service.svc.cluster.local" \
|
|
--env="REDIS_PORT=6379" \
|
|
--env="REDIS_DB=1" \
|
|
--env="REDIS_PASSWORD=" \
|
|
--env="BROKER_MODE=paper" \
|
|
--env="LOG_LEVEL=INFO" \
|
|
--command -- python -m pytest tests/integration/ -v --tb=short -q
|
|
|
|
# Wait for the test runner to complete
|
|
log "Waiting for test runner (timeout: 600s) ..."
|
|
TEST_EXIT_CODE=0
|
|
if ! kubectl wait --for=condition=Ready=false pod/beta-test-runner \
|
|
-n "$BETA_NAMESPACE" --timeout=600s 2>/dev/null; then
|
|
# Pod may have already completed — check its status
|
|
true
|
|
fi
|
|
|
|
# Wait for pod to reach terminal state
|
|
for i in $(seq 1 120); do
|
|
POD_PHASE=$(kubectl get pod beta-test-runner -n "$BETA_NAMESPACE" \
|
|
-o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")
|
|
if [ "$POD_PHASE" = "Succeeded" ] || [ "$POD_PHASE" = "Failed" ]; then
|
|
break
|
|
fi
|
|
sleep 5
|
|
done
|
|
|
|
# Collect results
|
|
TEST_OUTPUT=$(kubectl logs beta-test-runner -n "$BETA_NAMESPACE" 2>/dev/null || true)
|
|
POD_PHASE=$(kubectl get pod beta-test-runner -n "$BETA_NAMESPACE" \
|
|
-o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")
|
|
|
|
if [ "$POD_PHASE" = "Failed" ]; then
|
|
TEST_EXIT_CODE=1
|
|
fi
|
|
|
|
# Parse test counts
|
|
TESTS_PASSED=$(echo "$TEST_OUTPUT" | grep -oP '\d+(?= passed)' | tail -1 || echo "0")
|
|
TESTS_FAILED=$(echo "$TEST_OUTPUT" | grep -oP '\d+(?= failed)' | tail -1 || echo "0")
|
|
TESTS_ERRORS=$(echo "$TEST_OUTPUT" | grep -oP '\d+(?= error)' | tail -1 || echo "0")
|
|
TESTS_PASSED=${TESTS_PASSED:-0}
|
|
TESTS_FAILED=${TESTS_FAILED:-0}
|
|
TESTS_ERRORS=${TESTS_ERRORS:-0}
|
|
TESTS_TOTAL=$(( TESTS_PASSED + TESTS_FAILED + TESTS_ERRORS ))
|
|
|
|
log "Test results: ${TESTS_PASSED} passed, ${TESTS_FAILED} failed, ${TESTS_ERRORS} errors"
|
|
|
|
# Print test output for CI visibility
|
|
if [ -n "$TEST_OUTPUT" ]; then
|
|
echo "─── Test Output ───"
|
|
echo "$TEST_OUTPUT" | tail -60
|
|
echo "─── End Test Output ───"
|
|
fi
|
|
|
|
# Clean up test runner
|
|
kubectl delete pod beta-test-runner -n "$BETA_NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true
|
|
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
# Stage 4: Promotion decision
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
PROMOTE_STATUS="blocked"
|
|
FINAL_EXIT=0
|
|
|
|
if [ "$TESTS_FAILED" -gt 0 ] || [ "$TESTS_ERRORS" -gt 0 ] || [ "$TEST_EXIT_CODE" -ne 0 ]; then
|
|
log "✗ GATE FAILED — ${TESTS_FAILED} failures, ${TESTS_ERRORS} errors"
|
|
log " Promotion to paper-trading BLOCKED"
|
|
PROMOTE_STATUS="blocked"
|
|
FINAL_EXIT=1
|
|
elif [ "$SKIP_PROMOTE" = true ]; then
|
|
log "✓ Tests passed — promotion skipped (--skip-promote)"
|
|
PROMOTE_STATUS="skipped"
|
|
FINAL_EXIT=0
|
|
else
|
|
log "▶ Stage 4: Promoting to paper-trading"
|
|
log " Upgrading $PAPER_NAMESPACE with image tag $IMAGE_TAG ..."
|
|
|
|
if helm upgrade --install stonks-oracle \
|
|
"$REPO_ROOT/infra/helm/stonks-oracle" \
|
|
-n "$PAPER_NAMESPACE" \
|
|
--set "image.tag=$IMAGE_TAG" \
|
|
--wait \
|
|
--timeout 300s; then
|
|
log "✓ PROMOTED — paper-trading now running $IMAGE_TAG"
|
|
PROMOTE_STATUS="promoted"
|
|
FINAL_EXIT=0
|
|
|
|
# Rolling restart to pick up new images
|
|
log "Rolling restart of API services ..."
|
|
kubectl rollout restart deployment/query-api deployment/symbol-registry \
|
|
deployment/trading-engine deployment/risk-engine \
|
|
deployment/aggregation deployment/recommendation \
|
|
-n "$PAPER_NAMESPACE" 2>/dev/null || true
|
|
else
|
|
log "✗ Promotion failed — helm upgrade error"
|
|
PROMOTE_STATUS="failed"
|
|
FINAL_EXIT=3
|
|
fi
|
|
fi
|
|
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
# Stage 5: Cleanup beta (optional)
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
if [ "$SKIP_TEARDOWN" = false ] && [ "$PROMOTE_STATUS" = "promoted" ]; then
|
|
log "Scaling down beta deployment (keeping namespace for next run) ..."
|
|
helm upgrade stonks-oracle-beta \
|
|
"$REPO_ROOT/infra/helm/stonks-oracle" \
|
|
-n "$BETA_NAMESPACE" \
|
|
-f "$REPO_ROOT/infra/helm/stonks-oracle/values-beta.yaml" \
|
|
--set "image.tag=$IMAGE_TAG" \
|
|
--set "services.queryApi.replicas=0" \
|
|
--set "services.symbolRegistry.replicas=0" \
|
|
--set "services.tradingEngine.replicas=0" \
|
|
--set "services.riskEngine.replicas=0" \
|
|
--set "services.scheduler.replicas=0" \
|
|
--set "services.ingestion.replicas=0" \
|
|
--set "services.parser.replicas=0" \
|
|
--set "services.extractor.replicas=0" \
|
|
--set "services.aggregation.replicas=0" \
|
|
--set "services.recommendation.replicas=0" \
|
|
--set "services.brokerAdapter.replicas=0" \
|
|
--set "services.lakePublisher.replicas=0" \
|
|
--set "services.dashboard.replicas=0" \
|
|
2>/dev/null || true
|
|
log "Beta scaled to zero"
|
|
fi
|
|
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
# Write results
|
|
# ══════════════════════════════════════════════════════════════════════════════
|
|
COMPLETED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
PIPELINE_END=$(date +%s)
|
|
PIPELINE_DURATION=$(( PIPELINE_END - PIPELINE_START ))
|
|
|
|
cat > "$RESULTS_FILE" <<EOF
|
|
{
|
|
"run_id": "beta-gate-${IMAGE_TAG}",
|
|
"image_tag": "${IMAGE_TAG}",
|
|
"started_at": "${STARTED_AT}",
|
|
"completed_at": "${COMPLETED_AT}",
|
|
"duration_s": ${PIPELINE_DURATION},
|
|
"exit_code": ${FINAL_EXIT},
|
|
"deploy_status": "${DEPLOY_STATUS}",
|
|
"test_status": "$([ "$FINAL_EXIT" -le 1 ] && echo "completed" || echo "error")",
|
|
"promote_status": "${PROMOTE_STATUS}",
|
|
"tests": {
|
|
"total": ${TESTS_TOTAL},
|
|
"passed": ${TESTS_PASSED},
|
|
"failed": ${TESTS_FAILED},
|
|
"errors": ${TESTS_ERRORS}
|
|
}
|
|
}
|
|
EOF
|
|
|
|
log "Results written to $RESULTS_FILE"
|
|
echo ""
|
|
log "═══════════════════════════════════════════════════"
|
|
log " Beta Gate Summary"
|
|
log "═══════════════════════════════════════════════════"
|
|
log " Image: $IMAGE_TAG"
|
|
log " Duration: ${PIPELINE_DURATION}s"
|
|
log " Tests: ${TESTS_PASSED}/${TESTS_TOTAL} passed"
|
|
log " Promotion: ${PROMOTE_STATUS}"
|
|
log " Exit code: $FINAL_EXIT"
|
|
log "═══════════════════════════════════════════════════"
|
|
echo ""
|
|
|
|
exit "$FINAL_EXIT"
|