From 5289f0f19556627aa09f88540dd90ba0482a0c6b Mon Sep 17 00:00:00 2001 From: Celes Renata Date: Mon, 20 Apr 2026 07:47:10 +0000 Subject: [PATCH] fix: use kubectl wait for job completion detection in inttest pipeline The polling loop checked conditions[0].type which missed the Complete condition when it wasn't at index 0. Switch to kubectl wait --for=condition=complete which handles condition matching reliably. --- infra/inttest/run_pipeline.sh | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/infra/inttest/run_pipeline.sh b/infra/inttest/run_pipeline.sh index f08a8fd..bcad48f 100755 --- a/infra/inttest/run_pipeline.sh +++ b/infra/inttest/run_pipeline.sh @@ -428,24 +428,23 @@ envsubst < "$REPO_ROOT/infra/inttest/runner.yaml" \ | kubectl apply -n "$NAMESPACE" -f - log "Waiting for test runner to complete (timeout: 600s) ..." -# Wait for either complete or failed — whichever comes first +# Use kubectl wait which handles condition matching reliably. +# Wait for Complete first; if that times out, check for Failed. JOB_DONE=false -for i in $(seq 1 120); do - STATUS=$(kubectl get job inttest-runner -n "$NAMESPACE" -o jsonpath='{.status.conditions[0].type}' 2>/dev/null || true) - if [ "$STATUS" = "Complete" ]; then - log "Test runner completed successfully" - stage_end "integration_tests" "ok" - JOB_DONE=true - break - elif [ "$STATUS" = "Failed" ]; then +if kubectl wait --for=condition=complete job/inttest-runner -n "$NAMESPACE" --timeout=600s 2>/dev/null; then + log "Test runner completed successfully" + stage_end "integration_tests" "ok" + JOB_DONE=true +else + # Check if it failed (non-zero exit) vs timed out + FAILED=$(kubectl get job inttest-runner -n "$NAMESPACE" -o jsonpath='{.status.conditions[?(@.type=="Failed")].status}' 2>/dev/null || true) + if [ "$FAILED" = "True" ]; then log "Test runner job reported failure (tests failed)" stage_fail "integration_tests" PIPELINE_EXIT_CODE=1 JOB_DONE=true - break fi - sleep 5 -done +fi if [ "$JOB_DONE" = false ]; then log "Test runner timed out after 600s" debug_pod_failure "inttest-runner" "app=inttest-runner"