From a1c7741e1bf6d63efab959b5471a4579c0aaa105 Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Fri, 1 May 2026 01:39:18 +0000 Subject: [PATCH] fix(deploy/test) + ci(guard): drop dead SCEP profile from test compose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deploy-vendor-e2e job has been failing with the certctl-test-server container restarting endlessly. Diagnostic dump (added in 3b96b35) finally surfaced the actual cause: Failed to load configuration: SCEP profile 0 (PathID="e2eintune") has empty CHALLENGE_PASSWORD — refuse to start (CWE-306: per-profile shared secret is the sole application-layer auth boundary; an empty password would allow any client reaching /scep/e2eintune to enroll a CSR against issuer "iss-local") Same shape as the encryption-key fix that landed in c4157fd: a config validation gate added in code that the test compose never got updated to satisfy, hidden pre-Phase-5 because the matrix-collapse hadn't yet forced the certctl-server to actually boot in CI. Root cause is more interesting than just "missing env var." The 2026-04-29 SCEP RFC 8894 + Intune master bundle Phase I added an `e2eintune` SCEP profile to docker-compose.test.yml expecting deploy/test/scep_intune_e2e_test.go to exercise it. That integration test does exist (//go:build integration) but **NO CI job ever selects it** — ci.yml's deploy-vendor-e2e job runs only `-run 'VendorEdge_'` (line 379), and no other job invokes `go test -tags integration` with a SCEP selector. Confirmed via `grep -rnE "scep_intune|SCEPIntune" .github/workflows/` returning empty. Worse: the supporting fixtures (ra.crt + ra.key + intune_trust_anchor.pem) were documented in deploy/test/fixtures/README.md with the regeneration recipe but never actually committed. Pre-Phase-5 the test stack didn't fully boot the server in CI, so the entire stack of debt — dead config + missing fixtures + no consumer test — sat silent until the matrix collapse forced the boot path. Fixing this with a fake CHALLENGE_PASSWORD value would silence the immediate validator but leave the real problem in place: maintenance cost on test config that no test exercises. Same critique applies to "let me commit fake fixtures" — the fixtures alone don't add test coverage when no CI job runs the SCEP test. The complete-path fix is to make the test compose match what CI actually exercises: - deploy/docker-compose.test.yml: drop CERTCTL_SCEP_ENABLED + the full e2eintune profile env var family (10 lines) + the ./test/fixtures volume mount (1 line). Replace with an in-line comment explaining why SCEP is intentionally disabled and what needs to come back together when SCEP is added to CI for real. - scripts/ci-guards/test-compose-scep-coherence.sh (new, 22nd guard): refuses any future state where CERTCTL_SCEP_ENABLED=true in test compose without ALL of: 1. A CI job that runs the SCEP integration test (matched by scep_intune | SCEPIntune | -run [Ss]cep in ci.yml) 2. The fixture files actually committed (ra.crt, ra.key, intune_trust_anchor.pem) 3. The ./test/fixtures:/etc/certctl/scep:ro volume mount Verified manually with the same pattern as the H-1 guard: clean tree → exit 0; deliberate SCEP_ENABLED=true regression → exit 1 with 5 ::error:: annotations covering each gap; restore → exit 0 again. - scripts/ci-guards/README.md: 21 → 22 guards, new row. The fixtures README at deploy/test/fixtures/README.md keeps the regeneration recipe so the eventual SCEP CI job lands cleanly: the operator who adds the SCEP job restores the env vars, regenerates + commits the fixtures, and the guard auto-passes. Pattern (now firm across this CI-stabilization sequence): - Pre-existing latent bug - Old CI structurally hid it (per-vendor matrix, missing boot path) - Phase-5 matrix collapse + new diagnostic infra exposed it - Direct fix unblocks today - Regression guard prevents the same shape of drift forever Encryption-key (c4157fd) was the same shape; this is its sibling. --- deploy/docker-compose.test.yml | 60 +++++++------ scripts/ci-guards/README.md | 3 +- .../ci-guards/test-compose-scep-coherence.sh | 85 +++++++++++++++++++ 3 files changed, 119 insertions(+), 29 deletions(-) create mode 100755 scripts/ci-guards/test-compose-scep-coherence.sh diff --git a/deploy/docker-compose.test.yml b/deploy/docker-compose.test.yml index 13de399..acf9818 100644 --- a/deploy/docker-compose.test.yml +++ b/deploy/docker-compose.test.yml @@ -284,26 +284,34 @@ services: CERTCTL_EST_ENABLED: "true" CERTCTL_EST_ISSUER_ID: iss-local - # SCEP RFC 8894 + Intune master prompt §10.2 + §13 acceptance - # (deploy/test/scep_intune_e2e_test.go integration variant). - # Closed in the 2026-04-29 audit-closure bundle (Phase I). + # SCEP intentionally NOT configured in this stack. # - # Publishes /scep/e2eintune?operation=... with the Intune - # dispatcher enabled. The deterministic Connector signing cert - # is bind-mounted at the path below; the matching private key - # lives ONLY on the test side (see - # deploy/test/scep_intune_e2e_test.go::generateE2EIntuneTrustAnchor). - CERTCTL_SCEP_ENABLED: "true" - CERTCTL_SCEP_PROFILES: "e2eintune" - CERTCTL_SCEP_PROFILE_E2EINTUNE_ISSUER_ID: iss-local - CERTCTL_SCEP_PROFILE_E2EINTUNE_RA_CERT_PATH: /etc/certctl/scep/ra.crt - CERTCTL_SCEP_PROFILE_E2EINTUNE_RA_KEY_PATH: /etc/certctl/scep/ra.key - CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_ENABLED: "true" - CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_CONNECTOR_CERT_PATH: /etc/certctl/scep/intune_trust_anchor.pem - CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_AUDIENCE: https://localhost:8443/scep/e2eintune - CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_CHALLENGE_VALIDITY: 60m - CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_CLOCK_SKEW_TOLERANCE: 60s - CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_PER_DEVICE_RATE_LIMIT_24H: 3 + # The 2026-04-29 master bundle Phase I added an `e2eintune` SCEP + # profile to this compose file with the intent that + # deploy/test/scep_intune_e2e_test.go would exercise it. That + # integration test exists (//go:build integration) but no CI job + # actually selects it — ci.yml's deploy-vendor-e2e job runs only + # `-run 'VendorEdge_'` (line 379), and no other job ever invokes + # `go test -tags integration` with a SCEP selector. + # + # The result was dead config: SCEP_ENABLED=true triggered the + # per-profile validator chain at server boot, but the supporting + # fixtures (ra.crt + ra.key + intune_trust_anchor.pem) were never + # committed to deploy/test/fixtures/ — only the README documenting + # how to regenerate them. Pre-Phase-5 (ci-pipeline-cleanup matrix + # collapse) the test stack didn't fully boot the certctl-server in + # CI, so the gap was hidden. Once the matrix collapsed and the + # collapsed deploy-vendor-e2e job started actually booting the + # server, the fail-loud gate at config.go:2069 (CWE-306, empty + # CHALLENGE_PASSWORD) fired and blocked CI. + # + # CERTCTL_SCEP_ENABLED is unset → default false → the validator + # skips the entire SCEP block. Coherence guard at + # scripts/ci-guards/test-compose-scep-coherence.sh refuses any + # future edit that re-enables SCEP without ALSO (a) adding a CI + # job that runs the SCEP integration test and (b) committing the + # required fixtures. The README at deploy/test/fixtures/README.md + # keeps the regen recipe so the eventual SCEP CI job lands cleanly. # Dynamic issuer/target config encryption (M34/M35). # @@ -346,15 +354,11 @@ services: # agent mounts the same host path at the same container path (see below) # so /etc/certctl/tls/ca.crt resolves to the *same* bytes on both sides. - ./test/certs:/etc/certctl/tls:ro - # SCEP RFC 8894 + Intune master prompt §10.2 + §13 acceptance: the - # e2eintune profile's RA cert/key + Intune Connector trust anchor - # PEM. The PEM is the deterministic public cert matching the test- - # side private key in deploy/test/scep_intune_e2e_test.go (re-run - # `go test -tags integration -run='^TestRegenerateE2EIntuneFixture$' - # -update-fixture ./deploy/test/...` to regenerate after a seed - # change). RA cert/key live alongside; tls-init container generates - # them at boot. - - ./test/fixtures:/etc/certctl/scep:ro + # SCEP fixtures volume mount removed alongside the SCEP env vars + # above. When a CI job that runs scep_intune_e2e_test.go is added, + # restore both this mount AND the env vars together — the coherence + # guard at scripts/ci-guards/test-compose-scep-coherence.sh + # enforces that they move as a unit. networks: certctl-test: ipv4_address: 10.30.50.6 diff --git a/scripts/ci-guards/README.md b/scripts/ci-guards/README.md index 9411747..d9892d6 100644 --- a/scripts/ci-guards/README.md +++ b/scripts/ci-guards/README.md @@ -53,7 +53,7 @@ Current helpers: 4. CI auto-picks up new scripts via the `for g in scripts/ci-guards/*.sh` loop in the `Regression guards` step — no ci.yml change required. -## The 21 guards in this directory +## The 22 guards in this directory | ID | Finding | Catches | |---|---|---| @@ -78,6 +78,7 @@ Current helpers: | `bundle-8-L-019-dangerously-set-inner-html` | L-019 (CWE-79) XSS | `dangerouslySetInnerHTML` outside `safeHtml.ts` | | `bundle-8-M-009-bare-usemutation` | M-009 + M-029 mutation contract | Bare `useMutation()` outside `useTrackedMutation` wrapper | | `H-1-encryption-key-min-length` | H-1 closure follow-up (post-Phase-5 surfacing) | `CERTCTL_CONFIG_ENCRYPTION_KEY` literal in any `deploy/docker-compose*.yml` shorter than the 32-byte floor enforced by `internal/config/config.go::Validate()` | +| `test-compose-scep-coherence` | post-Phase-5 surfacing of dead SCEP test config | `CERTCTL_SCEP_ENABLED=true` in test compose without (a) a CI job that runs the SCEP integration test, (b) the `ra.crt` + `ra.key` + `intune_trust_anchor.pem` fixtures committed to `deploy/test/fixtures/`, AND (c) the matching volume mount | ## Guards explicitly NOT here diff --git a/scripts/ci-guards/test-compose-scep-coherence.sh b/scripts/ci-guards/test-compose-scep-coherence.sh new file mode 100755 index 0000000..f2395df --- /dev/null +++ b/scripts/ci-guards/test-compose-scep-coherence.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +# scripts/ci-guards/test-compose-scep-coherence.sh +# +# Enforces that deploy/docker-compose.test.yml's SCEP profile config +# stays coherent with the rest of the test infrastructure: if SCEP is +# enabled in test compose, then there MUST be a CI job that exercises +# the SCEP integration test, AND the supporting fixture files must +# actually exist on disk (not just be documented in the fixtures README). +# +# Background. The 2026-04-29 SCEP RFC 8894 + Intune master bundle +# Phase I added an `e2eintune` SCEP profile to docker-compose.test.yml +# expecting deploy/test/scep_intune_e2e_test.go to exercise it. The +# test exists (//go:build integration) but was never wired into any +# CI job, AND the supporting fixtures (ra.crt + ra.key + +# intune_trust_anchor.pem) were documented in deploy/test/fixtures/ +# README.md but never committed. Pre-Phase-5 (ci-pipeline-cleanup +# matrix collapse) the test stack didn't fully boot the certctl-server, +# so the gap was hidden. Post-collapse the boot validator at +# config.go::Validate() fired with CWE-306 (empty CHALLENGE_PASSWORD) +# and blocked deploy-vendor-e2e. +# +# That bundle's CI commit (this guard's predecessor commit) dropped the +# SCEP env vars + the fixtures volume mount from compose. This guard +# stops the same drift from ever recurring silently. To re-enable SCEP +# in test compose: +# +# 1. Restore the SCEP env vars (CERTCTL_SCEP_ENABLED=true + +# CERTCTL_SCEP_PROFILES + per-profile CHALLENGE_PASSWORD + ...). +# 2. Restore the volume mount `./test/fixtures:/etc/certctl/scep:ro`. +# 3. Commit the supporting fixtures (ra.crt, ra.key, +# intune_trust_anchor.pem) per deploy/test/fixtures/README.md. +# 4. Add a CI job that runs `go test -tags integration -run 'SCEPIntune'` +# against the same compose stack — without it, the SCEP plumbing +# in test compose is paying maintenance cost for zero benefit. +# +# All four must move together. This guard refuses any partial state. +# +# Per the contract documented in scripts/ci-guards/README.md: +# bare callable, no args, no env, exit 0 on clean. + +set -e + +GUARD_NAME="test-compose-scep-coherence" +COMPOSE_FILE="deploy/docker-compose.test.yml" +CI_FILE=".github/workflows/ci.yml" +FIXTURES_DIR="deploy/test/fixtures" + +failed=0 + +# Phase 1: is SCEP enabled in test compose? Match `CERTCTL_SCEP_ENABLED:` +# followed by an optional quote then `true`. +if grep -qE '^\s*CERTCTL_SCEP_ENABLED:\s*"?true"?\s*$' "$COMPOSE_FILE"; then + echo "Detected CERTCTL_SCEP_ENABLED=true in $COMPOSE_FILE" + + # Phase 2: is there a CI job that runs the SCEP integration test? + # Match either an explicit selector (-run 'SCEPIntune' or similar) or + # a direct reference to scep_intune_e2e_test.go. + if ! grep -qE "scep_intune|SCEPIntune|SCEPProfile.*E2E|-run.*[Ss]cep" "$CI_FILE"; then + echo "::error file=${CI_FILE}::CERTCTL_SCEP_ENABLED=true in ${COMPOSE_FILE} but no CI job runs the SCEP integration test. Add a job that invokes 'go test -tags integration -run SCEPIntune' against the same compose stack, OR remove the SCEP env vars from compose." + failed=1 + fi + + # Phase 3: are the required fixture files present? + for f in ra.crt ra.key intune_trust_anchor.pem; do + if [ ! -f "${FIXTURES_DIR}/${f}" ]; then + echo "::error file=${COMPOSE_FILE}::CERTCTL_SCEP_ENABLED=true in ${COMPOSE_FILE} but required SCEP fixture is missing: ${FIXTURES_DIR}/${f}. See ${FIXTURES_DIR}/README.md for the regeneration recipe." + failed=1 + fi + done + + # Phase 4: is the volume mount present? Without it, the cert/key + # paths inside the container resolve to nothing. + if ! grep -qE '^\s*-\s+\./test/fixtures:/etc/certctl/scep:ro\s*$' "$COMPOSE_FILE"; then + echo "::error file=${COMPOSE_FILE}::CERTCTL_SCEP_ENABLED=true but the './test/fixtures:/etc/certctl/scep:ro' volume mount is missing. SCEP profile would have no fixture access." + failed=1 + fi +fi + +if [ "$failed" -ne 0 ]; then + echo "" + echo "${GUARD_NAME}: FAILED — SCEP test config is incoherent across compose, CI workflow, and fixtures." + exit 1 +fi + +echo "${GUARD_NAME}: clean."