mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 21:51:30 +00:00
feat(ci): item-6 cold-DB compose smoke script (CI wiring in Phase 5)
scripts/ci-guards/cold-db-compose-smoke.sh — wipes the postgres
volume (docker compose down -v), brings the stack up cold, mints a
day-0 admin via /api/v1/auth/bootstrap, issues + renews + revokes a
test certificate, asserts the three audit rows exist, tears down.
Catches the bug class fixed by commit 6444e13 (the 2026-05-09
migration 000045 broken INSERT that the warm-DB integration suite
missed). The 2026-04-30 migration regression class generally.
Tunables via environment:
- COLD_DB_SMOKE_STARTUP_TIMEOUT (default 300s/svc)
- COLD_DB_SMOKE_PROBE_TIMEOUT (default 180s)
- COLD_DB_SMOKE_SERVER_URL (default https://localhost:8443)
- COLD_DB_SMOKE_CACERT (default deploy/test/certs/ca.crt)
On failure: dumps `docker compose logs --tail 200` for postgres,
certctl-server, certctl-agent, certctl-tls-init so the CI failure is
actionable without a re-run.
Sandbox VERIFICATION: bash syntax-check (bash -n) passes. Full smoke
run NOT executed in the sandbox — no Docker available here. The
operator runs it from their workstation as the Phase 6 negative-test
ladder (introducing a broken migration; confirming the script fails
with the migration error in the dumped logs).
CI wiring (.github/workflows/ci.yml::cold-db-compose-smoke job)
lands in the next commit (Phase 5).
Audit-Closes: post-v2.1.0-anti-rot/item-6
This commit is contained in:
Executable
+180
@@ -0,0 +1,180 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/ci-guards/cold-db-compose-smoke.sh
|
||||
#
|
||||
# Per post-v2.1.0 anti-rot item 6 (Auditable Codebase Bundle).
|
||||
#
|
||||
# The bug class this catches: a migration whose .up.sql is broken in a
|
||||
# way the unit tests / integration suite misses because they reuse a
|
||||
# warm DB across runs. The canonical case: 2026-05-09 migration
|
||||
# 000045's broken INSERT, surfaced only by a cold `docker compose up`
|
||||
# and fixed in commit 6444e13. This guard runs that very check on
|
||||
# every push.
|
||||
#
|
||||
# Workflow:
|
||||
# 1. docker compose down -v --remove-orphans (wipe volumes)
|
||||
# 2. docker compose up -d (cold boot)
|
||||
# 3. wait up to 5 min for healthchecks (postgres,
|
||||
# certctl-server,
|
||||
# certctl-agent)
|
||||
# 4. mint a day-0 admin via /api/v1/auth/bootstrap (Bundle 1 path)
|
||||
# 5. issue + renew + revoke a certificate (HTTP API)
|
||||
# 6. assert audit rows for each step
|
||||
# 7. docker compose down -v (clean up)
|
||||
#
|
||||
# Total runtime: ~3-5 min on warm Docker, ~5-10 min cold.
|
||||
#
|
||||
# Failure paths dump `docker compose logs` for every service to make
|
||||
# CI failures actionable without a re-run.
|
||||
#
|
||||
# This script is invoked by .github/workflows/ci.yml::cold-db-compose-smoke.
|
||||
# Runs locally for developers via `bash scripts/ci-guards/cold-db-compose-smoke.sh`.
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||
cd "$REPO_ROOT/deploy"
|
||||
|
||||
# Tunables (the CI job overrides these as needed).
|
||||
STARTUP_TIMEOUT_SECONDS="${COLD_DB_SMOKE_STARTUP_TIMEOUT:-300}" # 5 min
|
||||
PROBE_TIMEOUT_SECONDS="${COLD_DB_SMOKE_PROBE_TIMEOUT:-180}" # 3 min
|
||||
SERVER_URL="${COLD_DB_SMOKE_SERVER_URL:-https://localhost:8443}"
|
||||
CACERT_PATH="${COLD_DB_SMOKE_CACERT:-${REPO_ROOT}/deploy/test/certs/ca.crt}"
|
||||
|
||||
# --- helpers ----------------------------------------------------------------
|
||||
|
||||
log() { echo "[cold-db-smoke] $*"; }
|
||||
|
||||
dump_logs_on_failure() {
|
||||
log "FAILURE — dumping service logs:"
|
||||
docker compose ps || true
|
||||
for svc in postgres certctl-server certctl-agent certctl-tls-init; do
|
||||
echo
|
||||
echo "==== $svc ===="
|
||||
docker compose logs --no-color --tail 200 "$svc" 2>&1 || true
|
||||
done
|
||||
}
|
||||
|
||||
trap 'dump_logs_on_failure' ERR
|
||||
|
||||
wait_for_service_healthy() {
|
||||
local svc="$1" deadline=$(( $(date +%s) + STARTUP_TIMEOUT_SECONDS ))
|
||||
while [ "$(date +%s)" -lt "$deadline" ]; do
|
||||
local state
|
||||
state="$(docker compose ps --format json "$svc" 2>/dev/null | python3 -c '
|
||||
import json, sys
|
||||
try:
|
||||
line = sys.stdin.read().strip()
|
||||
if not line:
|
||||
print("not-up")
|
||||
sys.exit(0)
|
||||
# docker compose ps emits one JSON object per line (NDJSON) on newer
|
||||
# versions; older versions emit a JSON array. Handle both.
|
||||
if line.startswith("["):
|
||||
rows = json.loads(line)
|
||||
else:
|
||||
rows = [json.loads(l) for l in line.splitlines() if l.strip()]
|
||||
if not rows:
|
||||
print("not-up")
|
||||
else:
|
||||
print(rows[0].get("Health", rows[0].get("State", "?")))
|
||||
except Exception as e:
|
||||
print(f"err: {e}")
|
||||
')"
|
||||
if [ "$state" = "healthy" ] || [ "$state" = "running" ]; then
|
||||
log " $svc → $state"
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
log " $svc did NOT reach healthy within $STARTUP_TIMEOUT_SECONDS s (last state: $state)"
|
||||
return 1
|
||||
}
|
||||
|
||||
http_call() {
|
||||
# http_call <method> <path> [data_json]
|
||||
local method="$1" path="$2" data="${3:-}"
|
||||
local args=(--silent --show-error --max-time 30 -X "$method" "$SERVER_URL$path")
|
||||
if [ -f "$CACERT_PATH" ]; then
|
||||
args+=(--cacert "$CACERT_PATH")
|
||||
else
|
||||
args+=(--insecure)
|
||||
fi
|
||||
if [ -n "${KEY:-}" ]; then
|
||||
args+=(-H "Authorization: Bearer $KEY")
|
||||
fi
|
||||
if [ -n "$data" ]; then
|
||||
args+=(-H "Content-Type: application/json" -d "$data")
|
||||
fi
|
||||
curl "${args[@]}"
|
||||
}
|
||||
|
||||
# --- the smoke ---------------------------------------------------------------
|
||||
|
||||
log "1/7 down -v --remove-orphans (wiping postgres volume)"
|
||||
docker compose down -v --remove-orphans 2>&1 | tail -3 || true
|
||||
|
||||
log "2/7 up -d (cold boot)"
|
||||
docker compose up -d 2>&1 | tail -3
|
||||
|
||||
log "3/7 waiting for healthchecks (timeout ${STARTUP_TIMEOUT_SECONDS}s/svc)"
|
||||
wait_for_service_healthy postgres
|
||||
wait_for_service_healthy certctl-server
|
||||
# certctl-agent depends on the demo seed having run; only wait when
|
||||
# CERTCTL_DEMO_SEED=true is in effect (the bundled clean compose
|
||||
# doesn't always seed). Best-effort.
|
||||
wait_for_service_healthy certctl-agent || log " (agent healthcheck skipped — non-demo compose)"
|
||||
|
||||
log "4/7 minting day-0 admin via /api/v1/auth/bootstrap"
|
||||
TOKEN="$(openssl rand -base64 32 | tr -d '\n')"
|
||||
# Restart the server with the bootstrap token so the strategy is
|
||||
# active. Compose --env-file is the lightest path.
|
||||
echo "CERTCTL_BOOTSTRAP_TOKEN=$TOKEN" > /tmp/_smoke.env
|
||||
docker compose --env-file /tmp/_smoke.env up -d --force-recreate certctl-server 2>&1 | tail -2
|
||||
sleep 5
|
||||
wait_for_service_healthy certctl-server
|
||||
|
||||
BOOTSTRAP_BODY="$(http_call POST /api/v1/auth/bootstrap "{\"token\":\"$TOKEN\",\"actor_name\":\"smoke-admin\"}")"
|
||||
KEY="$(echo "$BOOTSTRAP_BODY" | python3 -c 'import json,sys; print(json.load(sys.stdin)["key_value"])')"
|
||||
if [ -z "$KEY" ]; then
|
||||
log " bootstrap did NOT return a key_value — body was:"
|
||||
echo "$BOOTSTRAP_BODY"
|
||||
exit 1
|
||||
fi
|
||||
log " admin minted (actor=smoke-admin)"
|
||||
|
||||
log "5/7 issuing a test certificate"
|
||||
# Use the default profile + demo CA. The exact shape may need a tweak
|
||||
# depending on the compose's seeded issuers — fail loudly with the
|
||||
# response body if the API rejects the request.
|
||||
ISSUE_BODY='{"common_name":"smoke-test.local","profile_id":"profile-default","environment":"test","owner_id":"o-platform"}'
|
||||
ISSUE_RESP="$(http_call POST /api/v1/certificates "$ISSUE_BODY")"
|
||||
CERT_ID="$(echo "$ISSUE_RESP" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d.get("id") or d.get("certificate",{}).get("id",""))')"
|
||||
if [ -z "$CERT_ID" ]; then
|
||||
log " issue failed; response body:"
|
||||
echo "$ISSUE_RESP"
|
||||
exit 1
|
||||
fi
|
||||
log " cert issued: $CERT_ID"
|
||||
|
||||
log "6/7 renewing the certificate"
|
||||
http_call POST "/api/v1/certificates/$CERT_ID/renew" >/dev/null
|
||||
log " renewed"
|
||||
|
||||
log "7/7 revoking the certificate + asserting audit rows"
|
||||
http_call POST "/api/v1/certificates/$CERT_ID/revoke" '{"reason":"smoke-test"}' >/dev/null
|
||||
AUDIT_BODY="$(http_call GET "/api/v1/audit?limit=50")"
|
||||
for action in cert.issued cert.renewed cert.revoked; do
|
||||
if ! echo "$AUDIT_BODY" | python3 -c "import json,sys; d=json.load(sys.stdin); evs=d.get('events') or d.get('audit',{}).get('events') or []; sys.exit(0 if any(e.get('action')=='$action' for e in evs) else 1)"; then
|
||||
log " MISSING audit row: $action"
|
||||
echo "$AUDIT_BODY" | head -200
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
log " audit rows present: cert.issued, cert.renewed, cert.revoked"
|
||||
|
||||
log "DONE — tearing down"
|
||||
trap - ERR
|
||||
docker compose down -v 2>&1 | tail -2
|
||||
rm -f /tmp/_smoke.env
|
||||
log "PASS"
|
||||
Reference in New Issue
Block a user