fix(ci): staged deploy with crash detection and full diagnostics
CI/CD / CI · API (dotnet build + test) (push) Successful in 1m4s
CI/CD / CI · Admin API (dotnet build) (push) Successful in 35s
CI/CD / CI · Dashboard (tsc) (push) Successful in 1m4s
CI/CD / CI · Admin Web (tsc) (push) Successful in 35s
CI/CD / CI · Website (tsc) (push) Successful in 44s
CI/CD / CI · Koja (tsc) (push) Successful in 54s
CI/CD / Deploy · all services (push) Successful in 22s

- Start api alone first; web/admin-api each wait for their own step
  so a health-check wait never blocks unrelated services
- Detect crash-loops via RestartCount > 1 (restart:unless-stopped hides
  the exited state behind rapid restarts — count is reliable)
- Dump up to 120 lines of api logs immediately on crash/timeout
- Log infra network state (json) in attach step + failure dump so we
  can see exactly which aliases are registered on meezi_default
- admin-api and admin-web are now started in separate steps, same pattern

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-01 13:44:52 +03:30
parent dac59cd180
commit c3ca39ed15
+74 -24
View File
@@ -333,52 +333,102 @@ jobs:
docker network disconnect meezi_default meezi-redis 2>/dev/null || true
docker network connect --alias postgres meezi_default meezi-db
docker network connect --alias redis meezi_default meezi-redis
echo "=== infra network state ==="
docker inspect meezi-db --format='meezi-db networks={{json .NetworkSettings.Networks}}' 2>&1 || true
docker inspect meezi-redis --format='meezi-redis networks={{json .NetworkSettings.Networks}}' 2>&1 || true
- name: Deploy main app services
- name: Start API
# --no-deps skips all depends_on checks so compose starts api immediately
# without trying to verify postgres/redis health (they're not compose-managed).
run: docker compose up -d --no-deps api
- name: Wait for API healthy
# Poll ourselves so we can detect crashes early and print logs before
# restart-policy smothers them. Mirrors healthcheck: start_period=40s,
# interval=10s, retries=12 → up to 3 min total.
# Also checks RestartCount: restart:unless-stopped hides crashes behind
# rapid restarts, so state=exited is fleeting — a rising count tells us.
run: |
docker compose up -d --no-deps api web website koja
echo "Waiting for meezi-api (up to 3 min)..."
for i in $(seq 1 36); do
HEALTH=$(docker inspect --format='{{.State.Health.Status}}' meezi-api 2>/dev/null || echo "missing")
STATE=$(docker inspect --format='{{.State.Status}}' meezi-api 2>/dev/null || echo "missing")
RESTARTS=$(docker inspect --format='{{.RestartCount}}' meezi-api 2>/dev/null || echo "0")
echo " [$i/36] state=$STATE health=$HEALTH restarts=$RESTARTS"
[ "$HEALTH" = "healthy" ] && echo "✅ meezi-api healthy" && break
if [ "$STATE" = "exited" ] || [ "$STATE" = "dead" ]; then
echo "❌ meezi-api crashed (state=$STATE) — logs:"
docker logs meezi-api 2>&1 | tail -120
exit 1
fi
if [ "$RESTARTS" -gt 1 ]; then
echo "❌ meezi-api crash-loop (restarts=$RESTARTS) — logs:"
docker logs meezi-api 2>&1 | tail -120
exit 1
fi
[ "$i" = "36" ] && echo "❌ meezi-api timeout (3 min)" \
&& docker logs meezi-api 2>&1 | tail -80 && exit 1
sleep 5
done
- name: Deploy admin services
- name: Start web services
# API is healthy at this point; start the three Next.js frontends.
run: docker compose up -d --no-deps web website koja
- name: Start admin API
run: |
docker compose \
-f docker-compose.yml \
-f docker-compose.admin.yml \
up -d \
--no-deps \
admin-api admin-web
- name: Wait for main API healthy
run: |
for i in $(seq 1 24); do
STATUS=$(docker inspect --format='{{.State.Health.Status}}' meezi-api 2>/dev/null || echo "missing")
echo " [$i/24] $STATUS"
[ "$STATUS" = "healthy" ] && echo "✅ meezi-api healthy" && break
[ "$i" = "24" ] && echo "❌ meezi-api timeout" && docker compose logs --tail=40 api && exit 1
sleep 5
done
up -d --no-deps admin-api
- name: Wait for admin API healthy
run: |
for i in $(seq 1 24); do
STATUS=$(docker inspect --format='{{.State.Health.Status}}' meezi-admin-api 2>/dev/null || echo "missing")
echo " [$i/24] $STATUS"
[ "$STATUS" = "healthy" ] && echo "✅ meezi-admin-api healthy" && break
[ "$i" = "24" ] && echo "❌ meezi-admin-api timeout" && docker compose -f docker-compose.yml -f docker-compose.admin.yml logs --tail=40 admin-api && exit 1
echo "Waiting for meezi-admin-api (up to 3 min)..."
for i in $(seq 1 36); do
HEALTH=$(docker inspect --format='{{.State.Health.Status}}' meezi-admin-api 2>/dev/null || echo "missing")
STATE=$(docker inspect --format='{{.State.Status}}' meezi-admin-api 2>/dev/null || echo "missing")
RESTARTS=$(docker inspect --format='{{.RestartCount}}' meezi-admin-api 2>/dev/null || echo "0")
echo " [$i/36] state=$STATE health=$HEALTH restarts=$RESTARTS"
[ "$HEALTH" = "healthy" ] && echo "✅ meezi-admin-api healthy" && break
if [ "$STATE" = "exited" ] || [ "$STATE" = "dead" ]; then
echo "❌ meezi-admin-api crashed (state=$STATE) — logs:"
docker logs meezi-admin-api 2>&1 | tail -80
exit 1
fi
if [ "$RESTARTS" -gt 1 ]; then
echo "❌ meezi-admin-api crash-loop (restarts=$RESTARTS) — logs:"
docker logs meezi-admin-api 2>&1 | tail -80
exit 1
fi
[ "$i" = "36" ] && echo "❌ meezi-admin-api timeout (3 min)" \
&& docker logs meezi-admin-api 2>&1 | tail -80 && exit 1
sleep 5
done
- name: Start admin web
run: |
docker compose \
-f docker-compose.yml \
-f docker-compose.admin.yml \
up -d --no-deps admin-web
- name: Show all running containers
if: always()
run: docker compose -f docker-compose.yml -f docker-compose.admin.yml ps
- name: Dump API logs on failure
- name: Dump logs on failure
if: failure()
run: |
echo "=== meezi-api logs ==="
docker logs meezi-api --tail=60 2>&1 || true
docker logs meezi-api --tail=120 2>&1 || true
echo "=== meezi-admin-api logs ==="
docker logs meezi-admin-api --tail=30 2>&1 || true
docker logs meezi-admin-api --tail=80 2>&1 || true
echo "=== meezi_default network ==="
docker network inspect meezi_default 2>&1 || true
echo "=== meezi-db network state ==="
docker inspect meezi-db --format='{{json .NetworkSettings.Networks}}' 2>&1 || true
echo "=== meezi-redis network state ==="
docker inspect meezi-redis --format='{{json .NetworkSettings.Networks}}' 2>&1 || true
# Intentionally no image pruning — disk cleanup is done manually on the server.