fix: bump seed pod timeout to 5m and add debug diagnostics on pipeline failures
This commit is contained in:
@@ -88,6 +88,29 @@ stage_fail() {
|
||||
log "✗ Stage: $name FAILED after ${STAGE_DURATION[$name]}s"
|
||||
}
|
||||
|
||||
debug_pod_failure() {
|
||||
local pod_name="$1"
|
||||
local label="${2:-}"
|
||||
log "─── DEBUG: pod failure diagnostics ───"
|
||||
if [ -n "$label" ]; then
|
||||
# Find pod by label selector
|
||||
local found_pod
|
||||
found_pod=$(kubectl get pods -n "$NAMESPACE" -l "$label" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)
|
||||
if [ -n "$found_pod" ]; then
|
||||
pod_name="$found_pod"
|
||||
fi
|
||||
fi
|
||||
log "Pod describe ($pod_name):"
|
||||
kubectl describe pod "$pod_name" -n "$NAMESPACE" 2>&1 | tail -40 || true
|
||||
log "Pod logs ($pod_name):"
|
||||
kubectl logs "$pod_name" -n "$NAMESPACE" --tail=60 2>&1 || true
|
||||
log "Pod status:"
|
||||
kubectl get pod "$pod_name" -n "$NAMESPACE" -o wide 2>&1 || true
|
||||
log "Recent events in namespace:"
|
||||
kubectl get events -n "$NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
|
||||
log "─── END DEBUG ───"
|
||||
}
|
||||
|
||||
# ── Parse CLI args ───────────────────────────────────────────────────────────
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
@@ -268,6 +291,7 @@ envsubst < "$REPO_ROOT/infra/inttest/minio.yaml" | kubectl apply -n "$NAMESPACE"
|
||||
log "Waiting for postgres readiness ..."
|
||||
if ! kubectl wait --for=condition=ready pod -l app=postgres -n "$NAMESPACE" --timeout=120s; then
|
||||
log "FATAL: PostgreSQL did not become ready"
|
||||
debug_pod_failure "postgres" "app=postgres"
|
||||
stage_fail "infra_deploy"
|
||||
PIPELINE_EXIT_CODE=2
|
||||
exit 2
|
||||
@@ -276,6 +300,7 @@ fi
|
||||
log "Waiting for redis readiness ..."
|
||||
if ! kubectl wait --for=condition=ready pod -l app=redis -n "$NAMESPACE" --timeout=60s; then
|
||||
log "FATAL: Redis did not become ready"
|
||||
debug_pod_failure "redis" "app=redis"
|
||||
stage_fail "infra_deploy"
|
||||
PIPELINE_EXIT_CODE=2
|
||||
exit 2
|
||||
@@ -284,13 +309,19 @@ fi
|
||||
log "Waiting for minio readiness ..."
|
||||
if ! kubectl wait --for=condition=ready pod -l app=minio -n "$NAMESPACE" --timeout=60s; then
|
||||
log "FATAL: MinIO did not become ready"
|
||||
debug_pod_failure "minio" "app=minio"
|
||||
stage_fail "infra_deploy"
|
||||
PIPELINE_EXIT_CODE=2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
log "Waiting for minio-bucket-init job ..."
|
||||
kubectl wait --for=condition=complete job/minio-bucket-init -n "$NAMESPACE" --timeout=60s || true
|
||||
if ! kubectl wait --for=condition=complete job/minio-bucket-init -n "$NAMESPACE" --timeout=120s; then
|
||||
log "WARNING: minio-bucket-init job did not complete within 120s"
|
||||
log "Bucket-init pod logs:"
|
||||
kubectl logs -l app=minio-bucket-init -n "$NAMESPACE" --tail=30 2>&1 || true
|
||||
kubectl describe job/minio-bucket-init -n "$NAMESPACE" 2>&1 | tail -20 || true
|
||||
fi
|
||||
|
||||
stage_end "infra_deploy" "ok"
|
||||
|
||||
@@ -307,6 +338,7 @@ if ! kubectl run seed-sandbox \
|
||||
--restart=Never \
|
||||
--rm \
|
||||
--attach \
|
||||
--pod-running-timeout=5m \
|
||||
--namespace="$NAMESPACE" \
|
||||
--image-pull-policy=Always \
|
||||
--overrides='{
|
||||
@@ -326,6 +358,7 @@ if ! kubectl run seed-sandbox \
|
||||
--env="MINIO_SECRET_KEY=minioadmin" \
|
||||
--command -- python -m tests.integration.seed_sandbox; then
|
||||
log "FATAL: Database seed failed"
|
||||
debug_pod_failure "seed-sandbox" "run=seed-sandbox"
|
||||
stage_fail "seed_data"
|
||||
PIPELINE_EXIT_CODE=2
|
||||
exit 2
|
||||
@@ -337,6 +370,7 @@ if ! kubectl run seed-minio \
|
||||
--restart=Never \
|
||||
--rm \
|
||||
--attach \
|
||||
--pod-running-timeout=5m \
|
||||
--namespace="$NAMESPACE" \
|
||||
--image-pull-policy=Always \
|
||||
--overrides='{
|
||||
@@ -351,6 +385,7 @@ if ! kubectl run seed-minio \
|
||||
--env="MINIO_SECRET_KEY=minioadmin" \
|
||||
--command -- python -m tests.integration.seed_minio; then
|
||||
log "FATAL: MinIO seed failed"
|
||||
debug_pod_failure "seed-minio" "run=seed-minio"
|
||||
stage_fail "seed_data"
|
||||
PIPELINE_EXIT_CODE=2
|
||||
exit 2
|
||||
@@ -371,6 +406,11 @@ envsubst < "$REPO_ROOT/infra/inttest/services.yaml" \
|
||||
log "Waiting for all API services to become ready ..."
|
||||
if ! kubectl wait --for=condition=ready pod -l tier=api -n "$NAMESPACE" --timeout=120s; then
|
||||
log "FATAL: API services did not become ready"
|
||||
log "Pod statuses:"
|
||||
kubectl get pods -n "$NAMESPACE" -l tier=api -o wide 2>&1 || true
|
||||
for pod in $(kubectl get pods -n "$NAMESPACE" -l tier=api --no-headers -o custom-columns=':metadata.name' 2>/dev/null); do
|
||||
debug_pod_failure "$pod"
|
||||
done
|
||||
stage_fail "service_deploy"
|
||||
PIPELINE_EXIT_CODE=2
|
||||
exit 2
|
||||
@@ -398,6 +438,7 @@ else
|
||||
if kubectl wait --for=condition=failed job/inttest-runner -n "$NAMESPACE" --timeout=5s 2>/dev/null; then
|
||||
log "Test runner job reported failure"
|
||||
fi
|
||||
debug_pod_failure "inttest-runner" "app=inttest-runner"
|
||||
stage_fail "integration_tests"
|
||||
PIPELINE_EXIT_CODE=1
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user