fix: bump seed pod timeout to 5m and add debug diagnostics on pipeline failures

This commit is contained in:
Celes Renata
2026-04-19 06:34:58 +00:00
parent 19b63dd369
commit ebafe795c1
+42 -1
View File
@@ -88,6 +88,29 @@ stage_fail() {
log "✗ Stage: $name FAILED after ${STAGE_DURATION[$name]}s"
}
debug_pod_failure() {
local pod_name="$1"
local label="${2:-}"
log "─── DEBUG: pod failure diagnostics ───"
if [ -n "$label" ]; then
# Find pod by label selector
local found_pod
found_pod=$(kubectl get pods -n "$NAMESPACE" -l "$label" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)
if [ -n "$found_pod" ]; then
pod_name="$found_pod"
fi
fi
log "Pod describe ($pod_name):"
kubectl describe pod "$pod_name" -n "$NAMESPACE" 2>&1 | tail -40 || true
log "Pod logs ($pod_name):"
kubectl logs "$pod_name" -n "$NAMESPACE" --tail=60 2>&1 || true
log "Pod status:"
kubectl get pod "$pod_name" -n "$NAMESPACE" -o wide 2>&1 || true
log "Recent events in namespace:"
kubectl get events -n "$NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
log "─── END DEBUG ───"
}
# ── Parse CLI args ───────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case $1 in
@@ -268,6 +291,7 @@ envsubst < "$REPO_ROOT/infra/inttest/minio.yaml" | kubectl apply -n "$NAMESPACE"
log "Waiting for postgres readiness ..."
if ! kubectl wait --for=condition=ready pod -l app=postgres -n "$NAMESPACE" --timeout=120s; then
log "FATAL: PostgreSQL did not become ready"
debug_pod_failure "postgres" "app=postgres"
stage_fail "infra_deploy"
PIPELINE_EXIT_CODE=2
exit 2
@@ -276,6 +300,7 @@ fi
log "Waiting for redis readiness ..."
if ! kubectl wait --for=condition=ready pod -l app=redis -n "$NAMESPACE" --timeout=60s; then
log "FATAL: Redis did not become ready"
debug_pod_failure "redis" "app=redis"
stage_fail "infra_deploy"
PIPELINE_EXIT_CODE=2
exit 2
@@ -284,13 +309,19 @@ fi
log "Waiting for minio readiness ..."
if ! kubectl wait --for=condition=ready pod -l app=minio -n "$NAMESPACE" --timeout=60s; then
log "FATAL: MinIO did not become ready"
debug_pod_failure "minio" "app=minio"
stage_fail "infra_deploy"
PIPELINE_EXIT_CODE=2
exit 2
fi
log "Waiting for minio-bucket-init job ..."
kubectl wait --for=condition=complete job/minio-bucket-init -n "$NAMESPACE" --timeout=60s || true
if ! kubectl wait --for=condition=complete job/minio-bucket-init -n "$NAMESPACE" --timeout=120s; then
log "WARNING: minio-bucket-init job did not complete within 120s"
log "Bucket-init pod logs:"
kubectl logs -l app=minio-bucket-init -n "$NAMESPACE" --tail=30 2>&1 || true
kubectl describe job/minio-bucket-init -n "$NAMESPACE" 2>&1 | tail -20 || true
fi
stage_end "infra_deploy" "ok"
@@ -307,6 +338,7 @@ if ! kubectl run seed-sandbox \
--restart=Never \
--rm \
--attach \
--pod-running-timeout=5m \
--namespace="$NAMESPACE" \
--image-pull-policy=Always \
--overrides='{
@@ -326,6 +358,7 @@ if ! kubectl run seed-sandbox \
--env="MINIO_SECRET_KEY=minioadmin" \
--command -- python -m tests.integration.seed_sandbox; then
log "FATAL: Database seed failed"
debug_pod_failure "seed-sandbox" "run=seed-sandbox"
stage_fail "seed_data"
PIPELINE_EXIT_CODE=2
exit 2
@@ -337,6 +370,7 @@ if ! kubectl run seed-minio \
--restart=Never \
--rm \
--attach \
--pod-running-timeout=5m \
--namespace="$NAMESPACE" \
--image-pull-policy=Always \
--overrides='{
@@ -351,6 +385,7 @@ if ! kubectl run seed-minio \
--env="MINIO_SECRET_KEY=minioadmin" \
--command -- python -m tests.integration.seed_minio; then
log "FATAL: MinIO seed failed"
debug_pod_failure "seed-minio" "run=seed-minio"
stage_fail "seed_data"
PIPELINE_EXIT_CODE=2
exit 2
@@ -371,6 +406,11 @@ envsubst < "$REPO_ROOT/infra/inttest/services.yaml" \
log "Waiting for all API services to become ready ..."
if ! kubectl wait --for=condition=ready pod -l tier=api -n "$NAMESPACE" --timeout=120s; then
log "FATAL: API services did not become ready"
log "Pod statuses:"
kubectl get pods -n "$NAMESPACE" -l tier=api -o wide 2>&1 || true
for pod in $(kubectl get pods -n "$NAMESPACE" -l tier=api --no-headers -o custom-columns=':metadata.name' 2>/dev/null); do
debug_pod_failure "$pod"
done
stage_fail "service_deploy"
PIPELINE_EXIT_CODE=2
exit 2
@@ -398,6 +438,7 @@ else
if kubectl wait --for=condition=failed job/inttest-runner -n "$NAMESPACE" --timeout=5s 2>/dev/null; then
log "Test runner job reported failure"
fi
debug_pod_failure "inttest-runner" "app=inttest-runner"
stage_fail "integration_tests"
PIPELINE_EXIT_CODE=1
fi