mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-13 03:19:01 +00:00
ci: add exponential-backoff retry to digest-validity guard
The Phase 2 commit's CI run (2026-05-13T19:50 against 69a2b5c) failed
on digest-validity.sh with HTTP 429 from ghcr.io while resolving the
lscr.io/linuxserver/openssh-server digest. ghcr.io rate-limits
unauthenticated manifest HEAD requests aggressively; the existing
guard had no retry, so a single 429 failed the whole CI gate.
Fix: retry on 429 / 502 / 503 / 504 with exponential backoff (2s,
4s, 8s; max 3 retries per ref). Non-retryable errors (400, 401, 403,
404, 5xx that aren't gateway-class) still fail fast — we only retry
on the transient-rate-limit + gateway-blip class. Each retry logs
the attempt count so a future operator investigating an outage can
see how many attempts happened before the final verdict.
The local re-run after the fix shows all 15 verifiable digests
resolve cleanly (no retries were needed on this particular run — the
429 was transient, as expected).
Not a Phase-1/2/3 regression; this is a pre-existing fragility in a
guard that's been in place since ci-pipeline-cleanup Phase 7. The
fix lands as a small follow-on to Phase 3 because the prompt's
recommended ratchet is 'CI guards should be reliable enough to gate
the build, or they should be advisory.'
This commit is contained in:
@@ -122,30 +122,53 @@ for ref in "${REFS[@]}"; do
|
|||||||
auth_header="Authorization: Bearer $tok"
|
auth_header="Authorization: Bearer $tok"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# HEAD the manifest by digest.
|
# HEAD the manifest by digest, with exponential-backoff retry on
|
||||||
if [ -n "$auth_header" ]; then
|
# transient registry errors (HTTP 429 rate-limit, 502/503/504 gateway
|
||||||
|
# blips). ghcr.io aggressively rate-limits unauthenticated HEAD
|
||||||
|
# requests against the linuxserver/* namespace; pre-2026-05-13 this
|
||||||
|
# caused intermittent CI failures on the Phase 2 commit's run
|
||||||
|
# (workflow log lscr.io/linuxserver/openssh-server → ghcr.io 429).
|
||||||
|
# Backoff schedule: 2s → 4s → 8s, max 3 retries per ref.
|
||||||
|
build_curl_args() {
|
||||||
|
if [ -n "$auth_header" ]; then
|
||||||
|
echo "-H|$auth_header"
|
||||||
|
fi
|
||||||
|
echo "-H|Accept: application/vnd.oci.image.index.v1+json"
|
||||||
|
echo "-H|Accept: application/vnd.docker.distribution.manifest.list.v2+json"
|
||||||
|
echo "-H|Accept: application/vnd.oci.image.manifest.v1+json"
|
||||||
|
echo "-H|Accept: application/vnd.docker.distribution.manifest.v2+json"
|
||||||
|
}
|
||||||
|
attempt=0
|
||||||
|
max_attempts=3
|
||||||
|
code="000"
|
||||||
|
while [ "$attempt" -lt "$max_attempts" ]; do
|
||||||
|
mapfile -t cargs < <(build_curl_args)
|
||||||
|
cargs_expanded=()
|
||||||
|
for arg in "${cargs[@]}"; do
|
||||||
|
IFS='|' read -r flag val <<< "$arg"
|
||||||
|
cargs_expanded+=("$flag" "$val")
|
||||||
|
done
|
||||||
code=$(curl -sS -o /dev/null -w "%{http_code}" \
|
code=$(curl -sS -o /dev/null -w "%{http_code}" \
|
||||||
-H "$auth_header" \
|
"${cargs_expanded[@]}" \
|
||||||
-H "Accept: application/vnd.oci.image.index.v1+json" \
|
|
||||||
-H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
|
|
||||||
-H "Accept: application/vnd.oci.image.manifest.v1+json" \
|
|
||||||
-H "Accept: application/vnd.docker.distribution.manifest.v2+json" \
|
|
||||||
"https://${registry}/v2/${img}/manifests/${digest}")
|
"https://${registry}/v2/${img}/manifests/${digest}")
|
||||||
else
|
# Success or non-retryable failure → stop.
|
||||||
code=$(curl -sS -o /dev/null -w "%{http_code}" \
|
if [ "$code" = "200" ] || ! [[ "$code" =~ ^(429|502|503|504)$ ]]; then
|
||||||
-H "Accept: application/vnd.oci.image.index.v1+json" \
|
break
|
||||||
-H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
|
fi
|
||||||
-H "Accept: application/vnd.oci.image.manifest.v1+json" \
|
attempt=$((attempt + 1))
|
||||||
-H "Accept: application/vnd.docker.distribution.manifest.v2+json" \
|
if [ "$attempt" -lt "$max_attempts" ]; then
|
||||||
"https://${registry}/v2/${img}/manifests/${digest}")
|
sleep_secs=$((2 ** attempt))
|
||||||
fi
|
echo " retry $attempt/$max_attempts after HTTP $code on $ref (sleep ${sleep_secs}s)"
|
||||||
|
sleep "$sleep_secs"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
if [ "$code" != "200" ]; then
|
if [ "$code" != "200" ]; then
|
||||||
echo "::error::digest does not resolve: ${ref}"
|
echo "::error::digest does not resolve: ${ref}"
|
||||||
echo " registry: $registry"
|
echo " registry: $registry"
|
||||||
echo " image: $img"
|
echo " image: $img"
|
||||||
echo " digest: $digest"
|
echo " digest: $digest"
|
||||||
echo " HTTP: $code"
|
echo " HTTP: $code (retried $attempt time(s) for 429/502/503/504)"
|
||||||
fail=1
|
fail=1
|
||||||
else
|
else
|
||||||
echo "OK $ref"
|
echo "OK $ref"
|
||||||
|
|||||||
Reference in New Issue
Block a user