#!/bin/bash # Integration test pipeline — standalone orchestration script # # Deploys an ephemeral Kubernetes sandbox (postgres, redis, minio, services), # seeds deterministic data, runs the integration test suite, collects results, # and tears everything down. # # Designed to be invoked by any CI/CD system or a human developer. # # Usage: bash infra/inttest/run_pipeline.sh [OPTIONS] # # Options: # --image-tag TAG Docker image tag to deploy (default: latest) # --namespace NAME Override namespace name (default: stonks-inttest-) # --skip-teardown Leave namespace running after tests (for debugging) # --results-file PATH Path for JSON results output (default: inttest-results.json) # -h, --help Show usage # # Exit codes: # 0 All tests passed # 1 One or more test failures # 2 Infrastructure setup failure set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" # ── Defaults ───────────────────────────────────────────────────────────────── IMAGE_TAG="latest" NAMESPACE="stonks-inttest-$(date +%s)" SKIP_TEARDOWN=false RESULTS_FILE="inttest-results.json" # ── Stage tracking ─────────────────────────────────────────────────────────── declare -A STAGE_START declare -A STAGE_DURATION declare -A STAGE_STATUS PIPELINE_EXIT_CODE=0 PIPELINE_START=$(date +%s) STARTED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # ── Helpers ────────────────────────────────────────────────────────────────── usage() { cat <) --skip-teardown Leave namespace running after tests (for debugging) --results-file PATH Path for JSON results output (default: inttest-results.json) -h, --help Show usage Exit codes: 0 All tests passed 1 One or more test failures 2 Infrastructure setup failure EOF exit 0 } log() { echo "[$(date -u +"%H:%M:%S")] $*" } stage_start() { local name="$1" log "▶ Stage: $name" STAGE_START[$name]=$(date +%s) } stage_end() { local name="$1" local status="${2:-ok}" local end_ts end_ts=$(date +%s) STAGE_DURATION[$name]=$(( end_ts - ${STAGE_START[$name]} )) STAGE_STATUS[$name]="$status" log "✓ Stage: $name completed in ${STAGE_DURATION[$name]}s (${status})" } stage_fail() { local name="$1" local end_ts end_ts=$(date +%s) STAGE_DURATION[$name]=$(( end_ts - ${STAGE_START[$name]} )) STAGE_STATUS[$name]="failed" log "✗ Stage: $name FAILED after ${STAGE_DURATION[$name]}s" } debug_pod_failure() { local pod_name="$1" local label="${2:-}" log "─── DEBUG: pod failure diagnostics ───" if [ -n "$label" ]; then # Find pod by label selector local found_pod found_pod=$(kubectl get pods -n "$NAMESPACE" -l "$label" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) if [ -n "$found_pod" ]; then pod_name="$found_pod" fi fi log "Pod describe ($pod_name):" kubectl describe pod "$pod_name" -n "$NAMESPACE" 2>&1 | tail -40 || true log "Pod logs ($pod_name):" kubectl logs "$pod_name" -n "$NAMESPACE" --tail=60 2>&1 || true log "Pod status:" kubectl get pod "$pod_name" -n "$NAMESPACE" -o wide 2>&1 || true log "Recent events in namespace:" kubectl get events -n "$NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true log "─── END DEBUG ───" } # ── Parse CLI args ─────────────────────────────────────────────────────────── while [[ $# -gt 0 ]]; do case $1 in --image-tag) IMAGE_TAG="$2" shift 2 ;; --namespace) NAMESPACE="$2" shift 2 ;; --skip-teardown) SKIP_TEARDOWN=true shift ;; --results-file) RESULTS_FILE="$2" shift 2 ;; -h|--help) usage ;; *) echo "Unknown option: $1" echo "Run with --help for usage." exit 2 ;; esac done export NAMESPACE export IMAGE_TAG log "Pipeline starting" log " Namespace: $NAMESPACE" log " Image tag: $IMAGE_TAG" log " Results: $RESULTS_FILE" log " Teardown: $([ "$SKIP_TEARDOWN" = true ] && echo "SKIPPED" || echo "enabled")" # ── Test result tracking ───────────────────────────────────────────────────── TESTS_TOTAL=0 TESTS_PASSED=0 TESTS_FAILED=0 TESTS_ERRORS=0 PROFILING_JSON="" # ── Write JSON results ─────────────────────────────────────────────────────── write_results() { local completed_at completed_at=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # Build stages JSON local stages_json="{" local first=true for stage_name in infra_deploy seed_data service_deploy integration_tests teardown; do local dur="${STAGE_DURATION[$stage_name]:-0}" local st="${STAGE_STATUS[$stage_name]:-skipped}" if [ "$first" = true ]; then first=false else stages_json+="," fi stages_json+="\"${stage_name}\":{\"duration_s\":${dur},\"status\":\"${st}\"}" done stages_json+="}" # Build profiling section local profiling_section if [ -n "$PROFILING_JSON" ] && [ -f "$PROFILING_JSON" ]; then profiling_section=$(cat "$PROFILING_JSON") else profiling_section='{"endpoints":{},"slow_endpoints":[]}' fi cat > "$RESULTS_FILE" </dev/null || true stage_end "teardown" "ok" fi write_results log "Pipeline finished with exit code $PIPELINE_EXIT_CODE" } trap cleanup EXIT # ══════════════════════════════════════════════════════════════════════════════ # Stage: Create namespace # ══════════════════════════════════════════════════════════════════════════════ stage_start "infra_deploy" log "Creating namespace $NAMESPACE ..." if ! kubectl create namespace "$NAMESPACE"; then log "FATAL: Failed to create namespace $NAMESPACE" stage_fail "infra_deploy" PIPELINE_EXIT_CODE=2 exit 2 fi # ── Create GHCR image pull secret (if token available) ─────────────────────── # NOTE: Images now served from Harbor at registry.celestium.life (no auth needed for pulls) # This block is kept for backward compatibility but is no longer required if [ -n "${GHCR_TOKEN:-}" ]; then log "GHCR_TOKEN set but images are on local Harbor — skipping GHCR secret" else log "Images served from registry.celestium.life (no pull secret needed)" fi # ── Create Docker Hub pull secret (avoid rate limits) ──────────────────────── if [ -n "${DOCKERHUB_USER:-}" ] && [ -n "${DOCKERHUB_TOKEN:-}" ]; then log "Creating dockerhub-credentials secret ..." kubectl create secret docker-registry dockerhub-credentials \ --docker-server=https://index.docker.io/v1/ \ --docker-username="$DOCKERHUB_USER" \ --docker-password="$DOCKERHUB_TOKEN" \ -n "$NAMESPACE" || true else log "DOCKERHUB_USER/TOKEN not set — skipping Docker Hub pull secret" fi # ── Create proxy CA cert ConfigMap (for Squid SSL bump) ───────────────────── CA_CERT_URL="http://192.168.42.1/home.crt" if curl -sf "$CA_CERT_URL" -o /tmp/home.crt 2>/dev/null; then kubectl create configmap proxy-ca-cert --from-file=ca.crt=/tmp/home.crt -n "$NAMESPACE" 2>/dev/null || true log "proxy-ca-cert ConfigMap created" fi # ══════════════════════════════════════════════════════════════════════════════ # Stage: Deploy infra (postgres, redis, minio) # ══════════════════════════════════════════════════════════════════════════════ log "Creating postgres-migrations ConfigMap ..." if ! kubectl create configmap postgres-migrations \ --from-file="$REPO_ROOT/infra/migrations/" \ -n "$NAMESPACE"; then log "FATAL: Failed to create postgres-migrations ConfigMap" stage_fail "infra_deploy" PIPELINE_EXIT_CODE=2 exit 2 fi log "Applying postgres manifest ..." envsubst < "$REPO_ROOT/infra/inttest/postgres.yaml" | kubectl apply -n "$NAMESPACE" -f - log "Applying redis manifest ..." envsubst < "$REPO_ROOT/infra/inttest/redis.yaml" | kubectl apply -n "$NAMESPACE" -f - log "Applying minio manifest ..." envsubst < "$REPO_ROOT/infra/inttest/minio.yaml" | kubectl apply -n "$NAMESPACE" -f - # Wait for pods to be created before checking readiness log "Waiting for infra pods to be created ..." for i in $(seq 1 30); do POD_COUNT=$(kubectl get pods -n "$NAMESPACE" -l 'app in (postgres,redis,minio)' --no-headers 2>/dev/null | wc -l) if [ "$POD_COUNT" -ge 3 ]; then break fi sleep 2 done log "Waiting for postgres readiness ..." if ! kubectl wait --for=condition=ready pod -l app=postgres -n "$NAMESPACE" --timeout=120s; then log "FATAL: PostgreSQL did not become ready" debug_pod_failure "postgres" "app=postgres" stage_fail "infra_deploy" PIPELINE_EXIT_CODE=2 exit 2 fi log "Waiting for redis readiness ..." if ! kubectl wait --for=condition=ready pod -l app=redis -n "$NAMESPACE" --timeout=60s; then log "FATAL: Redis did not become ready" debug_pod_failure "redis" "app=redis" stage_fail "infra_deploy" PIPELINE_EXIT_CODE=2 exit 2 fi log "Waiting for minio readiness ..." if ! kubectl wait --for=condition=ready pod -l app=minio -n "$NAMESPACE" --timeout=60s; then log "FATAL: MinIO did not become ready" debug_pod_failure "minio" "app=minio" stage_fail "infra_deploy" PIPELINE_EXIT_CODE=2 exit 2 fi stage_end "infra_deploy" "ok" # ══════════════════════════════════════════════════════════════════════════════ # Stage: Seed data # ══════════════════════════════════════════════════════════════════════════════ stage_start "seed_data" SEED_IMAGE="registry.celestium.life/stonks-oracle/query-api:${IMAGE_TAG}" log "Seeding sandbox database ..." if ! kubectl run seed-sandbox \ --image="$SEED_IMAGE" \ --restart=Never \ --attach \ --pod-running-timeout=5m \ --namespace="$NAMESPACE" \ --image-pull-policy=Always \ --overrides='{ "spec": { "securityContext": {"runAsNonRoot": true, "runAsUser": 1000, "runAsGroup": 1000} } }' \ --env="POSTGRES_HOST=postgres" \ --env="POSTGRES_PORT=5432" \ --env="POSTGRES_DB=stonks" \ --env="POSTGRES_USER=stonks" \ --env="POSTGRES_PASSWORD=inttest" \ --env="MINIO_ENDPOINT=minio:9000" \ --env="MINIO_SECURE=false" \ --env="MINIO_ACCESS_KEY=minioadmin" \ --env="MINIO_SECRET_KEY=minioadmin" \ --command -- python -m tests.integration.seed_sandbox; then log "FATAL: Database seed failed" log "Seed pod logs:" kubectl logs seed-sandbox -n "$NAMESPACE" --tail=50 2>&1 || true kubectl delete pod seed-sandbox -n "$NAMESPACE" --ignore-not-found > /dev/null 2>&1 || true stage_fail "seed_data" PIPELINE_EXIT_CODE=2 exit 2 fi log "Seeding MinIO buckets ..." if ! kubectl run seed-minio \ --image="$SEED_IMAGE" \ --restart=Never \ --rm \ --attach \ --pod-running-timeout=5m \ --namespace="$NAMESPACE" \ --image-pull-policy=Always \ --overrides='{ "spec": { "securityContext": {"runAsNonRoot": true, "runAsUser": 1000, "runAsGroup": 1000} } }' \ --env="MINIO_ENDPOINT=minio:9000" \ --env="MINIO_SECURE=false" \ --env="MINIO_ACCESS_KEY=minioadmin" \ --env="MINIO_SECRET_KEY=minioadmin" \ --command -- python -m tests.integration.seed_minio; then log "FATAL: MinIO seed failed" debug_pod_failure "seed-minio" "run=seed-minio" stage_fail "seed_data" PIPELINE_EXIT_CODE=2 exit 2 fi stage_end "seed_data" "ok" # ══════════════════════════════════════════════════════════════════════════════ # Stage: Deploy services # ══════════════════════════════════════════════════════════════════════════════ stage_start "service_deploy" log "Applying services manifest (image tag: $IMAGE_TAG) ..." envsubst < "$REPO_ROOT/infra/inttest/services.yaml" \ | sed "s/:latest/:${IMAGE_TAG}/g" \ | kubectl apply -n "$NAMESPACE" -f - log "Waiting for all API services to become ready ..." # Wait for pods to be created by the ReplicaSet controller before checking readiness. # kubectl wait fails immediately with "no matching resources found" if no pods exist yet. for i in $(seq 1 30); do POD_COUNT=$(kubectl get pods -n "$NAMESPACE" -l tier=api --no-headers 2>/dev/null | wc -l) if [ "$POD_COUNT" -ge 4 ]; then break fi sleep 2 done if ! kubectl wait --for=condition=ready pod -l tier=api -n "$NAMESPACE" --timeout=120s; then log "FATAL: API services did not become ready" log "Pod statuses:" kubectl get pods -n "$NAMESPACE" -l tier=api -o wide 2>&1 || true for pod in $(kubectl get pods -n "$NAMESPACE" -l tier=api --no-headers -o custom-columns=':metadata.name' 2>/dev/null); do debug_pod_failure "$pod" done stage_fail "service_deploy" PIPELINE_EXIT_CODE=2 exit 2 fi stage_end "service_deploy" "ok" # ══════════════════════════════════════════════════════════════════════════════ # Stage: Run integration tests # ══════════════════════════════════════════════════════════════════════════════ stage_start "integration_tests" log "Applying test runner job (image tag: $IMAGE_TAG) ..." envsubst < "$REPO_ROOT/infra/inttest/runner.yaml" \ | sed "s/:latest/:${IMAGE_TAG}/g" \ | kubectl apply -n "$NAMESPACE" -f - log "Waiting for test runner to complete (timeout: 300s) ..." # Use kubectl wait which handles condition matching reliably. # Wait for Complete first; if that times out, check for Failed. JOB_DONE=false if kubectl wait --for=condition=complete job/inttest-runner -n "$NAMESPACE" --timeout=300s 2>/dev/null; then log "Test runner completed successfully" stage_end "integration_tests" "ok" JOB_DONE=true else # Check if it failed (non-zero exit) vs timed out FAILED=$(kubectl get job inttest-runner -n "$NAMESPACE" -o jsonpath='{.status.conditions[?(@.type=="Failed")].status}' 2>/dev/null || true) if [ "$FAILED" = "True" ]; then log "Test runner job reported failure (tests failed)" stage_fail "integration_tests" PIPELINE_EXIT_CODE=1 JOB_DONE=true fi fi if [ "$JOB_DONE" = false ]; then log "Test runner timed out after 300s" debug_pod_failure "inttest-runner" "app=inttest-runner" stage_fail "integration_tests" PIPELINE_EXIT_CODE=1 fi # ══════════════════════════════════════════════════════════════════════════════ # Stage: Collect results # ══════════════════════════════════════════════════════════════════════════════ log "Collecting test results ..." # Get the runner pod name RUNNER_POD=$(kubectl get pods -n "$NAMESPACE" -l app=inttest-runner -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true) if [ -n "$RUNNER_POD" ]; then # Collect test logs log "Collecting test logs from $RUNNER_POD ..." kubectl logs "$RUNNER_POD" -n "$NAMESPACE" 2>/dev/null || true # Try to copy profiling report PROFILING_TMP="/tmp/profiling-report-$$.json" if kubectl cp "$NAMESPACE/$RUNNER_POD:/tmp/profiling-report.json" "$PROFILING_TMP" 2>/dev/null; then log "Profiling report collected" PROFILING_JSON="$PROFILING_TMP" else log "No profiling report found (test may not have produced one)" rm -f "$PROFILING_TMP" fi # Parse test counts from logs (pytest output format: "X passed, Y failed, Z errors") TEST_OUTPUT=$(kubectl logs "$RUNNER_POD" -n "$NAMESPACE" 2>/dev/null || true) if [ -n "$TEST_OUTPUT" ]; then # Extract counts from pytest summary line like "172 passed, 6 failed" TESTS_PASSED=$(echo "$TEST_OUTPUT" | grep -o '[0-9]* passed' | tail -1 | grep -o '[0-9]*' || echo "0") TESTS_FAILED=$(echo "$TEST_OUTPUT" | grep -o '[0-9]* failed' | tail -1 | grep -o '[0-9]*' || echo "0") TESTS_ERRORS=$(echo "$TEST_OUTPUT" | grep -o '[0-9]* error' | tail -1 | grep -o '[0-9]*' || echo "0") TESTS_PASSED=${TESTS_PASSED:-0} TESTS_FAILED=${TESTS_FAILED:-0} TESTS_ERRORS=${TESTS_ERRORS:-0} TESTS_TOTAL=$(( TESTS_PASSED + TESTS_FAILED + TESTS_ERRORS )) fi else log "Could not find runner pod — results unavailable" fi # If tests had failures, ensure exit code reflects it if [ "$TESTS_FAILED" -gt 0 ] || [ "$TESTS_ERRORS" -gt 0 ]; then PIPELINE_EXIT_CODE=1 fi # Mark integration_tests stage if not already done if [ -z "${STAGE_STATUS[integration_tests]:-}" ]; then if [ "$PIPELINE_EXIT_CODE" -eq 0 ]; then stage_end "integration_tests" "ok" else stage_fail "integration_tests" fi fi # ══════════════════════════════════════════════════════════════════════════════ # Summary # ══════════════════════════════════════════════════════════════════════════════ PIPELINE_END=$(date +%s) PIPELINE_DURATION=$(( PIPELINE_END - PIPELINE_START )) echo "" log "═══════════════════════════════════════════════════" log " Pipeline Summary" log "═══════════════════════════════════════════════════" log " Namespace: $NAMESPACE" log " Image tag: $IMAGE_TAG" log " Duration: ${PIPELINE_DURATION}s" log " Tests: ${TESTS_PASSED} passed, ${TESTS_FAILED} failed, ${TESTS_ERRORS} errors" log " Exit code: $PIPELINE_EXIT_CODE" log "═══════════════════════════════════════════════════" echo "" # Teardown + results writing handled by the EXIT trap exit "$PIPELINE_EXIT_CODE"