fix(ci): add retry loop to health check, increase wait times

- Replace single-shot curl with 12-attempt retry (15s between attempts)
- Increase deploy sleep 15s -> 30s to give Swarm time to stabilize
- Increase healthcheck initial wait 40s -> 60s
- Fixes race condition where Traefik routes /api/health to frontend before backend registers
This commit is contained in:
MatheusAlves96 2026-04-22 23:57:20 -03:00
parent e1c2a33686
commit 7a53865408

View file

@ -135,7 +135,7 @@ jobs:
--prune \
saas-imobiliaria
sleep 15
sleep 30
docker stack services saas-imobiliaria"
# ────────────────────────────────────────────────────────────────────────────
@ -153,7 +153,7 @@ jobs:
run: apk add --no-cache curl openssl
- name: Wait for stack to stabilize
run: sleep 40
run: sleep 60
- name: Frontend HTTPS
run: |
@ -169,11 +169,17 @@ jobs:
- name: Backend /api/health
run: |
R=$(curl -s --max-time 15 --resolve "${{ vars.DOMAIN }}:443:${{ vars.SSH_HOST }}" "https://${{ vars.DOMAIN }}/api/health")
S=$(curl -s -o /dev/null -w "%{http_code}" --max-time 15 --resolve "${{ vars.DOMAIN }}:443:${{ vars.SSH_HOST }}" "https://${{ vars.DOMAIN }}/api/health")
echo "Health: $S → $R"
[ "$S" = "200" ] || (echo "❌ Health falhou ($S)" && exit 1)
echo "$R" | grep -q '"db": "ok"' || (echo "❌ DB não conectado" && exit 1)
for i in $(seq 1 12); do
R=$(curl -sk --max-time 15 --resolve "${{ vars.DOMAIN }}:443:${{ vars.SSH_HOST }}" "https://${{ vars.DOMAIN }}/api/health")
S=$(curl -sk -o /dev/null -w "%{http_code}" --max-time 15 --resolve "${{ vars.DOMAIN }}:443:${{ vars.SSH_HOST }}" "https://${{ vars.DOMAIN }}/api/health")
echo "Health attempt $i: $S → $R"
if [ "$S" = "200" ] && echo "$R" | grep -q '"db": "ok"'; then
echo "✅ Backend healthy"
break
fi
[ $i -eq 12 ] && echo "❌ Health falhou após 12 tentativas" && exit 1
sleep 15
done
- name: TLS certificate validity
run: |