name: CI on: push: branches: - master - v2-dev pull_request: branches: - master jobs: go-build-and-test: name: Go Build & Test runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v5 with: go-version: '1.25.9' - name: Go Build run: | go build ./cmd/server/... go build ./cmd/agent/... go build ./cmd/mcp-server/... go build ./cmd/cli/... - name: Go Vet run: go vet ./... - name: Install golangci-lint run: | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.11.4 - name: Run golangci-lint run: golangci-lint run ./... --timeout 5m - name: Install govulncheck run: go install golang.org/x/vuln/cmd/govulncheck@latest - name: Run govulncheck # Bundle-7 / D-001 partial: govulncheck distinguishes called-vs-uncalled # advisories. Default exit code is non-zero only when YOUR code calls # the vulnerable function — deferred-call advisories show up in the # output but don't fail the gate. See .govulnignore for the # suppression contract if a triaged false-positive needs to be muted. run: govulncheck ./... - name: Install staticcheck (Bundle-7 / D-001) run: go install honnef.co/go/tools/cmd/staticcheck@latest - name: Run staticcheck # Bundle-7 / D-001: Go static analysis additive to vet. Suppressed # rules live in staticcheck.conf with documented justifications; # adding a new entry requires an explicit security review. # # SOFT gate (continue-on-error: true) until M-028 closes the 6 # remaining SA1019 deprecated-API sites: # - cmd/server/main_test.go × 3: middleware.NewAuth → NewAuthWithNamedKeys # - internal/api/handler/scep.go: csr.Attributes → Extensions # - internal/connector/issuer/local/local.go: elliptic.Marshal → crypto/ecdh # When M-028 ships, flip continue-on-error to false to make this # a hard gate. Until then, the step still annotates findings on PRs. continue-on-error: true run: staticcheck ./... - name: Forbidden auth-type literal regression guard (G-1) # G-1 closed the JWT silent auth downgrade by removing "jwt" from the # accepted CERTCTL_AUTH_TYPE values. This step grep-fails the build # if "jwt" reappears in any of the *additive* auth-type surfaces: # the validAuthTypes / ValidAuthTypes() set, the OpenAPI enum, the # helm chart's allowed-types list, or the .env.example default. # Comment lines and the dedicated rejection branch in config.go # (`c.Auth.Type == "jwt"`) are intentionally exempt — those are the # G-1 fix itself, not a regression. # # Connector packages (internal/connector/) are exempt because the # Google OAuth2 service-account JWT and step-ca provisioner one- # time-token JWT are external-protocol uses, unrelated to certctl's # own auth shape. Test files (_test.go) are exempt so negative # tests can pass the literal. # # See docs/upgrade-to-v2-jwt-removal.md for the closure rationale, # or internal/config/config.go::ValidAuthTypes for the allowed set. run: | set -e # Scoped patterns that indicate "jwt" being added back to an # allowed-set surface. Each catches a regression shape we've # actually seen in pre-G-1 code: # - Go map/slice literal: "jwt": true or "jwt", # - Go switch case: case "jwt" # - YAML enum: enum: [..., jwt, ...] or - jwt # - .env conditional: AUTH_TYPE.*"jwt"|=jwt$ BAD=$(grep -rnEH \ -e '"jwt"\s*:\s*true' \ -e '"jwt"\s*,' \ -e 'case\s+"jwt"' \ -e 'enum:.*\bjwt\b' \ -e '^\s*-\s*jwt\s*$' \ -e 'AUTH_TYPE\s*=\s*jwt\s*$' \ -e 'AUTH_TYPE\s*=\s*jwt\s*#' \ -e 'auth\.type\s*=\s*jwt\s*$' \ -e 'AuthType\("jwt"\)' \ internal/config/ \ internal/api/ \ cmd/ \ api/openapi.yaml \ .env.example \ deploy/.env.example \ deploy/helm/certctl/values.yaml \ deploy/helm/certctl/templates/ \ 2>/dev/null \ | grep -v '_test.go' \ | grep -vE '^\s*[^:]+:[0-9]+:\s*(//|#)' \ | grep -v 'is no longer accepted' \ || true) if [ -n "$BAD" ]; then echo "G-1 regression: \"jwt\" reappeared in an allowed-set surface:" echo "$BAD" echo "" echo "Allowed surface for 'jwt' literals: comment lines, the" echo "dedicated rejection branch in internal/config/config.go," echo "and connector packages (Google OAuth2, step-ca)." echo "See docs/upgrade-to-v2-jwt-removal.md and" echo "internal/config/config.go::ValidAuthTypes()." exit 1 fi - name: Forbidden api_key_hash JSON-shape regression guard (G-2) # G-2 closed cat-s5-apikey_leak by tagging Agent.APIKeyHash # `json:"-"` and adding a defense-in-depth Agent.MarshalJSON that # zeroes the field on the marshal-time copy. This step grep-fails # the build if `api_key_hash` reappears in any of the *additive* # JSON-emitting surfaces: a Go struct json tag in internal/domain/, # an OpenAPI Agent schema property, a TypeScript field declaration # in web/src/, or an enum-list / discriminator in handler # production code. # # Repository, migration, seed, service, integration-test, and # unit-test files are exempt — those are server-internal use # sites (the DB column stays, the in-memory struct field stays, # the auth-lookup path stays). Comment lines are exempt so the # G-2 closure rationale can stay in the source. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-s5-apikey_leak for the closure rationale, or # internal/domain/connector.go::Agent::MarshalJSON for the # redaction enforcement. run: | set -e # Scoped patterns that indicate api_key_hash being added back # to a JSON-emitting surface. Each catches a regression shape # that pre-G-2 actually shipped or that a future refactor # could plausibly introduce: # - Go struct tag: `json:"api_key_hash"` # - Frontend interface: api_key_hash[?]: string # - OpenAPI schema property: api_key_hash: (column-aligned) # - YAML enum / array: - api_key_hash BAD=$(grep -rnEH \ -e 'json:"api_key_hash[",]' \ -e '^\s*api_key_hash\??\s*:' \ -e '^\s*-\s*api_key_hash\s*$' \ internal/domain/ \ internal/api/ \ cmd/ \ api/openapi.yaml \ web/src/ \ 2>/dev/null \ | grep -v '_test.go' \ | grep -vE '^\s*[^:]+:[0-9]+:\s*(//|#)' \ || true) if [ -n "$BAD" ]; then echo "G-2 regression: api_key_hash reappeared in a JSON-emitting surface:" echo "$BAD" echo "" echo "Allowed surface for api_key_hash literals: comment lines," echo "the database column (migrations/), the in-memory struct" echo "field tagged \`json:\"-\"\`, and the repository / service" echo "use sites. See internal/domain/connector.go::Agent and" echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md" echo "cat-s5-apikey_leak for the closure rationale." exit 1 fi - name: Forbidden plaintext HEALTHCHECK regression guard (U-2) # U-2 closed cat-u-healthcheck_protocol_mismatch by switching the # published image's HEALTHCHECK from `curl -f http://localhost: # 8443/health` (always failed against the HTTPS-only listener) to # `curl -fsk https://localhost:8443/health`. This step grep-fails # the build if any Dockerfile in the repo carries the pre-U-2 # plaintext shape — either explicitly (`http://localhost:8443/ # health` in a HEALTHCHECK) or via the looser pattern of any # HEALTHCHECK that targets `http://` against the certctl server # port. # # Comment lines and the docs/upgrade-to-tls.md:182 expected-to- # fail invariant ("plaintext is gone, expect Connection refused") # are intentionally exempt — we DO want the upgrade-doc string # `http://localhost:8443/health` to remain there, since it # documents what operators should test for to confirm plaintext # is dead. The guardrail is scoped to Dockerfile* only, so docs # are out of its reach. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-u-healthcheck_protocol_mismatch for the closure rationale, # or deploy/test/healthcheck_test.go for the binary-image # contract the runtime test pins. run: | set -e # Patterns that catch the actual regression shapes: # - HEALTHCHECK directive carrying any http:// (even if the # port differs, no plaintext probe should ship). # - The exact pre-U-2 string for grep-friendliness. BAD=$(grep -rnEH \ -e 'HEALTHCHECK.*http://' \ -e 'curl[^|&;]*-f[^|&;]*http://localhost:8443/health' \ Dockerfile Dockerfile.agent Dockerfile.* 2>/dev/null \ | grep -vE '^\s*[^:]+:[0-9]+:\s*#' \ || true) if [ -n "$BAD" ]; then echo "U-2 regression: plaintext HEALTHCHECK reappeared in a Dockerfile:" echo "$BAD" echo "" echo "Allowed: HTTPS HEALTHCHECK with -k (acceptable for" echo "localhost-to-localhost), or non-HTTP probe shapes" echo "(pgrep, /proc check). See Dockerfile / Dockerfile.agent" echo "for the post-U-2 reference shape and" echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md" echo "cat-u-healthcheck_protocol_mismatch for rationale." exit 1 fi - name: Forbidden migration mount in compose initdb (U-3) # U-3 closed cat-u-seed_initdb_schema_drift (GitHub #10) by # eliminating the dual-source-of-truth between # `migrations/*.up.sql` mounted into postgres # `/docker-entrypoint-initdb.d/` and the same files re-applied at # runtime by `RunMigrations`. Pre-U-3 every new migration that # the seed depended on (000013 added `policy_rules.severity`, # 000017 renames `retry_interval_seconds`, etc.) had to be added # by hand to the compose mount list; missing the update crashed # initdb on first boot, postgres flagged unhealthy, and the # whole stack failed to start from a fresh clone. Post-U-3 the # server is the single source of truth — `RunMigrations` + # `RunSeed` apply everything at boot. # # This step grep-fails the build if any compose file under # `deploy/` re-introduces a `migrations/.*\.sql` mount into # `/docker-entrypoint-initdb.d`. Comments are exempt so the # post-fix rationale block in the compose files (which # documents WHY the mounts were removed) doesn't trip the guard. # The demo overlay's `seed_demo.sql` is the explicit exception: # it is tolerated only when it lives behind the # CERTCTL_DEMO_SEED env var (post-U-3 demo path) — bare initdb # mounts are NOT tolerated. The grep matches all compose # mount-list shapes (`-` indented, `volumes:` indented, both), # so any future drift surfaces here before the operator hits it # on a fresh clone. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-u-seed_initdb_schema_drift for the closure rationale, or # internal/repository/postgres/db.go::RunSeed for the runtime # contract. run: | set -e BAD=$(grep -rnEH \ -e 'migrations/.*\.sql:.*docker-entrypoint-initdb' \ -e 'seed.*\.sql:.*docker-entrypoint-initdb' \ deploy/docker-compose.yml \ deploy/docker-compose.test.yml \ deploy/docker-compose.demo.yml \ 2>/dev/null \ | grep -vE '^\s*[^:]+:[0-9]+:\s*#' \ || true) if [ -n "$BAD" ]; then echo "U-3 regression: migration/seed mount into postgres initdb reappeared:" echo "$BAD" echo "" echo "The post-U-3 contract is: postgres comes up with an empty" echo "schema and the server applies migrations + seed at boot via" echo "internal/repository/postgres.RunMigrations + RunSeed. Demo" echo "data lives behind CERTCTL_DEMO_SEED=true (RunDemoSeed)," echo "not an initdb mount. See" echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md" echo "cat-u-seed_initdb_schema_drift for the closure rationale." exit 1 fi - name: Forbidden StatusBadge dead-key + TS phantom-field regression guard (D-1 + D-2) # D-1 master closed cat-d-359e92c20cbf (Agent: 'Stale' dead key, # 'Degraded' missing), cat-d-9f4c8e4a91f1 (Notification: 'dead' # missing), cat-d-1447e04732e7 (Cert: 'PendingIssuance' dead # key), cat-f-cert_detail_page_key_render_fallback (render-site # uses cert.X directly), and cat-f-ae0d06b6588f (Certificate # TS phantom fields). This step grep-fails the build if either # half of the closure is reverted: # # 1. The dead StatusBadge keys ('Stale' for Agent, 'PendingIssuance' # for Cert) reappearing as map literals, OR # 2. The five phantom Certificate TS fields (serial_number, # fingerprint_sha256, key_algorithm, key_size, issued_at) # reappearing on the `Certificate` interface in types.ts # (CertificateVersion legitimately carries them and is # explicitly excluded by the awk pre-filter below). # # Comments are exempt so the closure prose in StatusBadge.tsx + # types.ts can stay. Test files are exempt so negative tests # asserting the dead keys fall through to neutral keep working. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-d-* / cat-f-* for the closure rationale, or # web/src/components/StatusBadge.test.tsx for the live # enum-coverage contract. run: | set -e BAD_BADGE=$(grep -nE "^\s*(Stale|PendingIssuance)\s*:\s*'badge-" \ web/src/components/StatusBadge.tsx 2>/dev/null \ | grep -v '\.test\.' \ | grep -vE '^\s*[^:]+:[0-9]+:\s*//' \ || true) if [ -n "$BAD_BADGE" ]; then echo "D-1 regression: dead StatusBadge key reappeared:" echo "$BAD_BADGE" echo "" echo "Allowed surface: comment lines naming the removed key in" echo "the file's preamble. The Go-side AgentStatus values are" echo "Online/Offline/Degraded (no Stale); CertificateStatus values" echo "are Pending/Active/... (no PendingIssuance). See" echo "web/src/components/StatusBadge.test.tsx for the contract." exit 1 fi # Certificate TS phantom-field check. Scoped to the # `export interface Certificate {` block in web/src/api/types.ts # — CertificateVersion legitimately declares these fields and # must NOT trip the guardrail. The awk window opens on the # exact `Certificate {` header (not `CertificateVersion {`, # not `CertificateProfile {`) and closes at the first `}`, # then the grep matches a phantom-field declaration anywhere # in that window. BAD_TS=$(awk ' /^export interface Certificate \{/ { flag=1; next } flag && /^\}/ { flag=0 } flag { print FILENAME":"NR":"$0 } ' web/src/api/types.ts \ | grep -E '\b(serial_number|fingerprint_sha256|key_algorithm|key_size|issued_at)\??\s*:' \ || true) if [ -n "$BAD_TS" ]; then echo "D-1 regression: Certificate TS interface re-added a phantom field:" echo "$BAD_TS" echo "" echo "These fields live on CertificateVersion, not ManagedCertificate." echo "The Go-side ManagedCertificate has never carried them; the" echo "TS optional declarations were silently undefined on every" echo "list response. Render-site consumers (e.g. CertificateDetailPage)" echo "use latestVersion?.field as the canonical access path." echo "See coverage-gap-audit-2026-04-24-v5/unified-audit.md" echo "cat-f-ae0d06b6588f for the closure rationale." exit 1 fi # D-2 master closed five diff-05x06-* type-drift findings: # Agent (5 phantoms), Issuer (1 phantom), Notification (1 phantom) # — TRIM half. The Target (2 missing fields) and DiscoveredCertificate # (1 missing field) — ADD half is pinned by the literal-construction # blocks in web/src/api/types.test.ts, not a CI grep. The phantom- # trim regression vector is an awk-windowed grep per interface # mirroring the D-1 Certificate check above. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # diff-05x06-7cdf4e78ae24 (Agent), diff-05x06-97fab8783a5c (Issuer), # diff-05x06-caba9eb3620e (Notification) for the closure rationale. # D-2 Agent phantom-field check. The grep matches `last_heartbeat` # but NOT `last_heartbeat_at` (the legitimate Go-emitted field) — # the `\b...\b` boundaries plus the `grep -v 'last_heartbeat_at'` # filter handle that. BAD_AGENT=$(awk ' /^export interface Agent \{/ { flag=1; next } flag && /^\}/ { flag=0 } flag { print FILENAME":"NR":"$0 } ' web/src/api/types.ts \ | grep -E '\b(last_heartbeat|capabilities|tags|created_at|updated_at)\??\s*:' \ | grep -v 'last_heartbeat_at' \ || true) if [ -n "$BAD_AGENT" ]; then echo "D-2 regression: Agent TS interface re-added a phantom field:" echo "$BAD_AGENT" echo "" echo "The Go-side internal/domain/connector.go::Agent emits exactly:" echo "id, name, hostname, status, last_heartbeat_at?, registered_at," echo "os, architecture, ip_address, version, retired_at?, retired_reason?." echo "The five fields blocked by this guard (last_heartbeat," echo "capabilities, tags, created_at, updated_at) were TS phantoms" echo "the Go struct never emitted. See unified-audit.md" echo "diff-05x06-7cdf4e78ae24 for closure rationale." exit 1 fi # D-2 Issuer phantom-field check. BAD_ISSUER=$(awk ' /^export interface Issuer \{/ { flag=1; next } flag && /^\}/ { flag=0 } flag { print FILENAME":"NR":"$0 } ' web/src/api/types.ts \ | grep -E '\bstatus\??\s*:' \ || true) if [ -n "$BAD_ISSUER" ]; then echo "D-2 regression: Issuer TS interface re-added a phantom 'status' field:" echo "$BAD_ISSUER" echo "" echo "The Go-side internal/domain/connector.go::Issuer has no 'status'" echo "field — only 'enabled' (bool). Render sites derive the displayed" echo "status from 'enabled' at the call site (see" echo "web/src/pages/IssuersPage.tsx::issuerStatus). See unified-audit.md" echo "diff-05x06-97fab8783a5c for closure rationale." exit 1 fi # D-2 Notification phantom-field check. BAD_NOTIF=$(awk ' /^export interface Notification \{/ { flag=1; next } flag && /^\}/ { flag=0 } flag { print FILENAME":"NR":"$0 } ' web/src/api/types.ts \ | grep -E '\bsubject\??\s*:' \ || true) if [ -n "$BAD_NOTIF" ]; then echo "D-2 regression: Notification TS interface re-added a phantom 'subject' field:" echo "$BAD_NOTIF" echo "" echo "The Go-side internal/domain/notification.go::NotificationEvent" echo "has no 'subject' field — only 'message'. Pre-D-2 the consumer" echo "at NotificationsPage.tsx had a dead '|| n.subject' fallback" echo "that always fell through. See unified-audit.md" echo "diff-05x06-caba9eb3620e for closure rationale." exit 1 fi - name: Forbidden client-side bulk-action loop regression guard (L-1) # L-1 master closed cat-l-fa0c1ac07ab5 (bulk-renew loop) and # cat-l-8a1fb258a38a (bulk-reassign loop) by adding server-side # bulk endpoints (POST /api/v1/certificates/bulk-renew and # POST /api/v1/certificates/bulk-reassign) that the GUI calls # in a single round-trip. Pre-L-1 the GUI looped per-cert # HTTP calls — 100 selected certs = 100 round-trips × ~50–200ms # each = a 5–20-second wedge during which the operator stares # at a progress bar. # # This step grep-fails the build if either loop shape reappears # in CertificatesPage.tsx. Patterns catch the actual pre-L-1 # shapes: # - `for (const id of ids) { await triggerRenewal(id) }` # - `for (const id of ids) { await updateCertificate(id, { owner_id }) }` # - `for (let i = 0; i < ids.length; i++) { await triggerRenewal(ids[i]) }` # # Allowed: comment lines explaining the pre-L-1 pattern in the # docblock above each handler. Test files (_test.tsx) exempt # so negative-pattern tests can keep working. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-l-fa0c1ac07ab5 and cat-l-8a1fb258a38a for closure # rationale, or web/src/api/client.ts::bulkRenewCertificates # / bulkReassignCertificates for the canonical call path. run: | set -e BAD_LOOP=$(grep -nE 'for[[:space:]]*\(' web/src/pages/CertificatesPage.tsx 2>/dev/null \ | grep -E 'await[[:space:]]+(triggerRenewal|updateCertificate)\(' \ | grep -v '\.test\.' \ | grep -vE '^\s*[^:]+:[0-9]+:\s*//' \ || true) if [ -n "$BAD_LOOP" ]; then echo "L-1 regression: client-side bulk-action loop reappeared in CertificatesPage.tsx:" echo "$BAD_LOOP" echo "" echo "Use bulkRenewCertificates({ certificate_ids: [...] }) or" echo "bulkReassignCertificates({ certificate_ids: [...], owner_id, team_id? })" echo "instead of looping per-item HTTP calls. See" echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md cat-l-* for rationale." exit 1 fi - name: Forbidden orphan-CRUD client function regression guard (B-1) # B-1 master closed four audit findings — three orphan-update fns # (cat-b-31ceb6aaa9f1, cat-b-7a34f893a8f9) and one orphan CRUD # surface (cat-b-4631ca092bee, RenewalPolicy) — by wiring per-page # Edit modals so every backend write endpoint has at least one # GUI consumer. The fourth finding (cat-b-9b97ffb35ef7) deleted # the dead `exportCertificatePEM` duplicate. # # Pre-B-1 the failure mode was: backend ships a CRUD handler, # client.ts ships the matching `update*` / `delete*` / `create*` # function, but no page imports it. Operators were forced to # `psql` directly to edit team names, owner emails, agent-group # match rules, issuer names, profile names, or any renewal-policy # field — turning a 30-second GUI task into a 30-minute database # excursion with audit-trail gaps. # # This step fails the build if any of the eight previously-orphan # client functions loses its page consumer (i.e. a future refactor # accidentally re-orphans them). Each fn must have ≥1 non-test # consumer under web/src/pages/. Tests (*.test.ts(x)) and the # client.ts definition file itself are exempt. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-b-31ceb6aaa9f1, cat-b-7a34f893a8f9, cat-b-4631ca092bee, # cat-b-9b97ffb35ef7 for closure rationale. run: | set -e ORPHAN_FNS="updateOwner updateTeam updateAgentGroup updateIssuer updateProfile createRenewalPolicy updateRenewalPolicy deleteRenewalPolicy" FAIL=0 for fn in $ORPHAN_FNS; do HITS=$(grep -rE "\b${fn}\b" web/src/pages/ 2>/dev/null \ | grep -vE '\.test\.(ts|tsx):' \ | wc -l) if [ "$HITS" -eq 0 ]; then echo "::error::B-1 regression: client function '${fn}' has zero consumers under web/src/pages/." echo " Every backend CRUD endpoint must have a GUI consumer to avoid forcing operators to psql." echo " Either restore the page consumer or delete the client function in the same commit." FAIL=1 fi done # cat-b-9b97ffb35ef7: exportCertificatePEM was deleted as a dead # duplicate of downloadCertificatePEM. Block resurrection. if grep -nE 'export\s+const\s+exportCertificatePEM' web/src/api/client.ts >/dev/null 2>&1; then echo "::error::B-1 regression: exportCertificatePEM was removed as a dead duplicate of downloadCertificatePEM." echo " If a JSON variant is needed, add an explicit page consumer in the same commit." FAIL=1 fi if [ "$FAIL" -ne 0 ]; then exit 1 fi echo "B-1 orphan-CRUD client function guardrail: all 8 functions have page consumers." - name: Forbidden strings.Contains(err.Error()) regression guard (S-2) # S-2 closure (cat-s6-efc7f6f6bd50): replaced 30 brittle # substring-match error-dispatch sites in internal/api/handler/ # with errors.Is + typed sentinels (repository.ErrNotFound, # repository.ErrForeignKeyConstraint via the # repository.IsForeignKeyError helper). This step grep-fails # the build if any new strings.Contains(err.Error(), "not found") # or strings.Contains(err.Error(), "violates foreign key") # site appears under internal/api/handler/. # # Allowed: closure-comments documenting the convention (e.g. # bulk_reassignment.go's "post-M-1 errToStatus convention" # docblock); domain-specific substring patterns that are # legitimately one-off ("cannot approve", "cannot reject", # "cannot be parsed", "challenge password") — flagged as # deferred follow-ups in the S-2 commit message. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-s6-efc7f6f6bd50 for closure rationale. run: | set -e BAD=$(grep -rnE 'strings\.Contains\(err\.Error\(\),\s*"(not found|violates foreign key|RESTRICT)"' internal/api/handler/ 2>/dev/null \ | grep -vE '^\s*[^:]+:[0-9]+:\s*//' \ || true) if [ -n "$BAD" ]; then echo "S-2 regression: brittle substring-match error-dispatch reappeared:" echo "$BAD" echo "" echo "Use errors.Is(err, repository.ErrNotFound) for not-found dispatch," echo "or repository.IsForeignKeyError(err) for FK violations." echo "See coverage-gap-audit-2026-04-24-v5/unified-audit.md" echo "cat-s6-efc7f6f6bd50 for closure rationale." exit 1 fi echo "S-2 typed-sentinel error-dispatch guardrail: clean." - name: Race Detection run: go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/crypto/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -timeout 300s - name: Go Test with Coverage run: | go test ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/integration/... ./internal/connector/issuer/... ./internal/connector/target/... ./internal/connector/notifier/... ./internal/connector/discovery/... ./internal/crypto/... ./internal/mcp/... ./internal/cli/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -cover -coverprofile=coverage.out - name: Check Coverage Thresholds run: | # Extract per-package coverage from test output echo "=== Coverage Report ===" go tool cover -func=coverage.out | tail -1 # Check service layer coverage (target: 60%+) SERVICE_COV=$(go tool cover -func=coverage.out | grep 'internal/service' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}') echo "Service layer coverage: ${SERVICE_COV}%" # Check handler layer coverage (target: 60%+) HANDLER_COV=$(go tool cover -func=coverage.out | grep 'internal/api/handler' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}') echo "Handler layer coverage: ${HANDLER_COV}%" # Check domain layer coverage (target: 40%+) DOMAIN_COV=$(go tool cover -func=coverage.out | grep 'internal/domain' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}') echo "Domain layer coverage: ${DOMAIN_COV}%" # Check middleware layer coverage (target: 50%+) MIDDLEWARE_COV=$(go tool cover -func=coverage.out | grep 'internal/api/middleware' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}') echo "Middleware layer coverage: ${MIDDLEWARE_COV}%" # Check crypto package coverage (target: 85%+) # M-8 rationale: encryption primitives are a security-critical gate. # v2 format, key-derivation, fallback, and fail-closed sentinel paths # all need exhaustive coverage to avoid silent regressions (CWE-916 / CWE-329). CRYPTO_COV=$(go tool cover -func=coverage.out | grep 'internal/crypto' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}') echo "Crypto package coverage: ${CRYPTO_COV}%" # Bundle-7 / Audit H-005 — extended crypto-cluster gates per CLAUDE.md. # internal/pkcs7/ is at 100% at HEAD (encoder-only, exhaustively tested # via Bundle-4 fuzz targets + unit tests). internal/connector/issuer/local/ # is at 68.3% at HEAD; H-010 tracks the gap and will lift this floor # to 85% once the missing CSR-validation + CA-cert-loading tests land. PKCS7_COV=$(go tool cover -func=coverage.out | grep 'internal/pkcs7' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}') echo "PKCS7 package coverage: ${PKCS7_COV}%" LOCAL_ISSUER_COV=$(go tool cover -func=coverage.out | grep 'internal/connector/issuer/local' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}') echo "Local-issuer coverage: ${LOCAL_ISSUER_COV}%" # Fail if thresholds not met if [ "$(echo "$SERVICE_COV < 55" | bc -l)" -eq 1 ]; then echo "::error::Service layer coverage ${SERVICE_COV}% is below 55% threshold" exit 1 fi if [ "$(echo "$HANDLER_COV < 60" | bc -l)" -eq 1 ]; then echo "::error::Handler layer coverage ${HANDLER_COV}% is below 60% threshold" exit 1 fi if [ "$(echo "$DOMAIN_COV < 40" | bc -l)" -eq 1 ]; then echo "::error::Domain layer coverage ${DOMAIN_COV}% is below 40% threshold" exit 1 fi if [ "$(echo "$MIDDLEWARE_COV < 30" | bc -l)" -eq 1 ]; then echo "::error::Middleware layer coverage ${MIDDLEWARE_COV}% is below 30% threshold" exit 1 fi if [ "$(echo "$CRYPTO_COV < 85" | bc -l)" -eq 1 ]; then echo "::error::Crypto package coverage ${CRYPTO_COV}% is below 85% threshold" exit 1 fi # Bundle-7 / H-005: pkcs7 hard gate (currently 100% — protects regressions). if [ "$(echo "$PKCS7_COV < 85" | bc -l)" -eq 1 ]; then echo "::error::PKCS7 package coverage ${PKCS7_COV}% is below 85% threshold" exit 1 fi # Bundle-7 / H-005 / H-010: local-issuer SOFT gate at 65% — H-010 # tracks the gap from 68.3% (HEAD) → 85% (CLAUDE.md target). Once # H-010's missing test cases land, raise this floor to 85. if [ "$(echo "$LOCAL_ISSUER_COV < 65" | bc -l)" -eq 1 ]; then echo "::error::Local-issuer coverage ${LOCAL_ISSUER_COV}% is below 65% transitional floor (H-010 will raise to 85%)" exit 1 fi echo "Coverage thresholds passed!" - name: Upload Coverage Report uses: actions/upload-artifact@v4 with: name: go-coverage path: coverage.out retention-days: 30 frontend-build: name: Frontend Build runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Node.js uses: actions/setup-node@v4 with: node-version: '22' - name: Install Dependencies working-directory: web run: npm ci - name: TypeScript Check working-directory: web run: npx tsc --noEmit - name: Run Frontend Tests working-directory: web run: npx vitest run - name: Build Frontend working-directory: web run: npx vite build - name: Forbidden hardcoded source-count prose regression guard (S-1) # S-1 master closed cat-s1-9ce1cbe26876 (README + features.md # stale numeric counts; explicit CLAUDE.md violation per # "version-stamped numbers rot") and # cat-s1-features_md_issuer_count_contradiction (features.md # self-disagreed on issuer count: 9 vs 12 in the same doc). # The fix replaced source-derived numbers in prose with # "rebuild via " patterns documented in CLAUDE.md:: # "Current-state commands". This step grep-fails the build if # any of the previously-stale sites reintroduces a hardcoded # count. # # Allowed surfaces: demo-fixture prose in README ("32 # certificates" — those are seed_demo.sql facts, not live # source counts), historical-milestone counts in # WORKSPACE-CHANGELOG.md, the testing-guide example phrasing # ("README claims 8 issuer connectors but only 6 exist"), # and any number that quotes the source command immediately # adjacent. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-s1-9ce1cbe26876 + cat-s1-features_md_issuer_count_contradiction # for closure rationale. run: | set -e BAD=$(grep -rnE '\b[0-9]+\s+(issuer connectors?|target connectors?|notifier connectors?|discovery connectors?|MCP tools|OpenAPI operations|migrations|database tables|frontend pages|HTTP routes)\b' \ README.md docs/ 2>/dev/null \ | grep -vE 'WORKSPACE-CHANGELOG|seed_demo|demo override' \ | grep -vE 'DRIFT HAZARD|Source: |Rebuild|rebuild via|grep -|wc -l|ls -d|find ' \ | grep -vE 'README claims [0-9]+ issuer connectors but only [0-9]+ exist' \ || true) if [ -n "$BAD" ]; then echo "S-1 regression: hardcoded source-count prose reappeared:" echo "$BAD" echo "" echo "CLAUDE.md rule: 'Numeric claims about current state rot.'" echo "Replace the count with the grep command from CLAUDE.md::" echo "'Current-state commands' (e.g. 'ls -d internal/connector/issuer/*/ | wc -l')" echo "or rephrase to reference the rebuild command on the same line." echo "See coverage-gap-audit-2026-04-24-v5/unified-audit.md" echo "cat-s1-9ce1cbe26876 for closure rationale." exit 1 fi echo "S-1 stale-counts guardrail: clean." - name: Documented orphan client fns sync guard (P-1) # P-1 master closed diff-04x03-d24864996ad4 + cat-b-dc46aadab98e # by documenting 17 detail-page-candidate orphan client.ts # functions in a docblock at the top of web/src/api/client.ts. # This step verifies the docblock list ↔ export list relationship: # every name listed in the docblock must still be declared as # an export below it (catches drift where someone deletes the # export but forgets the docblock, or vice versa). # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # diff-04x03-d24864996ad4 + cat-b-dc46aadab98e for closure rationale. run: | set -e DOCUMENTED='getAgentGroup getAgentGroupMembers getAuditEvent getCertificateDeployments getDiscoveredCertificate getHealthCheck getHealthCheckHistory getNetworkScanTarget getNotification getOCSPStatus getOwner getPolicy getPolicyViolations getRenewalPolicy getTeam registerAgent updateHealthCheck' MISSING="" for fn in $DOCUMENTED; do if ! grep -qE "^export const ${fn}\b" web/src/api/client.ts; then MISSING="${MISSING}${fn} " fi done if [ -n "$MISSING" ]; then echo "P-1 regression: documented orphan(s) missing from client.ts exports:" echo " $MISSING" echo "" echo "Either restore the export, or delete the corresponding line" echo "in the documented-orphans docblock at the top of client.ts." echo "See coverage-gap-audit-2026-04-24-v5/unified-audit.md" echo "diff-04x03-d24864996ad4 for closure rationale." exit 1 fi echo "P-1 documented-orphans sync guard: clean ($(echo $DOCUMENTED | wc -w) fns verified)." - name: Frontend page-coverage regression guard (T-1) # T-1 closure (cat-s2-c24a548076c6): pre-T-1 only 3 of 28 pages # had Vitest coverage. T-1 lifted that to 11/28 by writing tests # for the 8 highest-leverage pages (CertificatesPage filter + # pagination state, the new B-1 Edit modals, the D-2 type-trim # render sites, etc.). The remaining pages are deferred to per- # page commits — when the next feature change touches them, the # test gets added in the same commit. This step blocks new # pages from landing without tests. # # Allowlist: pages that are explicitly deferred — listed below # with a one-line "why deferred" justification. Each entry must # be removed when the page gets its test. # - LoginPage: static auth form, no business logic # - AuditPage: read-only timeline; D-2 already trimmed # - ShortLivedPage: derived view of certs already covered by CertificatesPage # - DigestPage: server-rendered digest; minimal client logic # - ObservabilityPage: exposes Prometheus / Grafana links only # - HealthMonitorPage: wraps M-006 health check timeline; M-006 has its own tests # - NetworkScanPage: wraps the network scanner UX; SSRF unit-tested in domain # - JobsPage: covered transitively via AgentDetailPage # - JobDetailPage: drill-down view; covered transitively via JobsPage # - AgentFleetPage: bulk overview; covered transitively via AgentsPage # - ProfilesPage: CRUD form; mirrors PoliciesPage shape (covered) # - CertificateDetailPage: drill-down view; covered transitively via CertificatesPage # - IssuerDetailPage: drill-down view; covered transitively via IssuersPage # - TargetDetailPage: drill-down view; covered transitively via TargetsPage # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-s2-c24a548076c6 for closure rationale. run: | set -e ALLOW='^(LoginPage|AuditPage|ShortLivedPage|DigestPage|ObservabilityPage|HealthMonitorPage|NetworkScanPage|JobsPage|JobDetailPage|AgentFleetPage|ProfilesPage|CertificateDetailPage|IssuerDetailPage|TargetDetailPage)$' UNTESTED="" for f in web/src/pages/*.tsx; do base=$(basename "$f" .tsx) case "$f" in *.test.tsx) continue ;; esac if [ -f "web/src/pages/${base}.test.tsx" ]; then continue; fi if echo "$base" | grep -qE "$ALLOW"; then continue; fi UNTESTED="${UNTESTED}${base} " done if [ -n "$UNTESTED" ]; then echo "T-1 regression: page(s) without sibling .test.tsx and not on the deferred allowlist:" echo " $UNTESTED" echo "" echo "Either add web/src/pages/.test.tsx (mirror NotificationsPage.test.tsx)," echo "or add the page to the ALLOW pattern in .github/workflows/ci.yml with a" echo "one-line 'why deferred' comment. See" echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md cat-s2-c24a548076c6" echo "for closure rationale." exit 1 fi ALLOWLIST_SIZE=$(echo "$ALLOW" | tr '|' '\n' | wc -l) echo "T-1 page-coverage guardrail: clean (allowlist size: $ALLOWLIST_SIZE pages deferred)." - name: Bundle-8 / L-015 target=_blank rel=noopener regression guard # Audit L-015 / CWE-1022 (reverse-tabnabbing): every # MUST carry rel="noopener noreferrer" so a malicious page at the # target URL cannot navigate the opener window via window.opener. # At Bundle-8 close (commit b566355→) all 3 sites in the codebase # already comply — this guard prevents regression. The # ExternalLink component (web/src/components/ExternalLink.tsx) # is the recommended way to add new external links. run: | set -e OFFENDERS=$(grep -rnE 'target=["'"'"']?_blank["'"'"']?' web/src/ 2>/dev/null \ | grep -v 'noopener noreferrer' \ | grep -v 'web/src/components/ExternalLink.tsx' \ || true) if [ -n "$OFFENDERS" ]; then echo "L-015 regression: target=\"_blank\" without rel=\"noopener noreferrer\":" echo "$OFFENDERS" echo "" echo "Either add rel=\"noopener noreferrer\" inline," echo "or migrate to from web/src/components/ExternalLink.tsx." exit 1 fi echo "L-015 target=_blank guardrail: clean." - name: Bundle-8 / L-019 dangerouslySetInnerHTML regression guard # Audit L-019 / CWE-79 (XSS): no production code may use # dangerouslySetInnerHTML directly. At Bundle-8 close the codebase # has 0 sites; future genuine needs MUST route through # web/src/utils/safeHtml.ts::sanitizeHtml. run: | set -e OFFENDERS=$(grep -rnE 'dangerouslySetInnerHTML' web/src/ 2>/dev/null \ | grep -v 'web/src/utils/safeHtml.ts' \ || true) if [ -n "$OFFENDERS" ]; then echo "L-019 regression: dangerouslySetInnerHTML used outside safeHtml.ts:" echo "$OFFENDERS" echo "" echo "Route through web/src/utils/safeHtml.ts::sanitizeHtml — see file" echo "header for the activation procedure (DOMPurify dependency)." exit 1 fi echo "L-019 dangerouslySetInnerHTML guardrail: clean." - name: Bundle-8 / M-009 mutation invalidation contract guard # Audit M-009: every useMutation must either invalidate the # queries it changes OR document why no invalidation is needed. # SOFT guard — counts useMutation sites and asserts the budget # doesn't grow without a corresponding invalidateQueries / setQueryData / # useTrackedMutation reference. Stricter per-site enforcement is # tracked as M-029 (covers the long-tail useListParams + useTrackedMutation # migration of the existing 56 useMutation sites). run: | set -e MUTATIONS=$(grep -rcE 'useMutation\(|useTrackedMutation\(' web/src/ 2>/dev/null \ | awk -F: '{s+=$2} END{print s}') INVALIDATIONS=$(grep -rcE 'invalidateQueries|setQueryData|removeQueries|invalidates:' web/src/ 2>/dev/null \ | awk -F: '{s+=$2} END{print s}') echo "M-009 budget — useMutation sites: $MUTATIONS / invalidation sites: $INVALIDATIONS" # At Bundle-8 close: 56 useMutation + 70 invalidation. We allow # +5 mutations growth before requiring invalidation parity. If # the gap widens, audit the new mutation sites for missing # invalidation pairs. BUDGET=$((INVALIDATIONS + 5)) if [ "$MUTATIONS" -gt "$BUDGET" ]; then echo "M-009 regression: $MUTATIONS useMutation sites exceeds invalidation budget ($BUDGET)." echo "New mutations should pair with invalidateQueries/setQueryData OR migrate to" echo "useTrackedMutation (web/src/hooks/useTrackedMutation.ts) with explicit invalidates." exit 1 fi - name: Forbidden env-var docs drift regression guard (G-3) # G-3 master closed cat-g-163dae19bc59 (docs-only env vars # phantom in features.md), cat-g-b8f8f8796159 (6 config-only # env vars never documented), and cat-g-renewal_check_interval_rename_drift # (features.md still advertised the pre-rename # CERTCTL_RENEWAL_CHECK_INTERVAL after it was renamed to # CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL). This step runs # `comm -23` both ways between the env vars defined in Go # source (config.go + cmd/agent + deploy/test fixtures + ACME # DNS-01 script env exports) and the env vars mentioned in # README + docs/ + deploy/helm/. # # Allowlist: env vars that are documented as integration- # surface contracts (script env exports for ACME DNS-01, # OpenSSL CA scripts, StepCA per-issuer-config-blob fields, # Webhook per-notifier-config-blob fields, ACME EAB, audit # exclusion, demo-stack overrides) but not consumed directly # by config.go. Each entry below has a one-line justification # — if you add a new entry, add the justification too. # # See coverage-gap-audit-2026-04-24-v5/unified-audit.md # cat-g-* for closure rationale. run: | set -e # Defined: config.go + agent + cli + mcp-server + server cmds + test fixtures + ACME DNS export { grep -nE '"CERTCTL_[A-Z_]+"' internal/config/config.go | sed -E 's/.*"(CERTCTL_[A-Z_]+)".*/\1/' grep -rhoE '"CERTCTL_[A-Z_]+"' cmd/agent/*.go cmd/cli/*.go cmd/mcp-server/*.go cmd/server/*.go 2>/dev/null | sed -E 's/"(CERTCTL_[A-Z_]+)"/\1/' grep -rhoE 'CERTCTL_[A-Z_]+' deploy/test/qa_test.go internal/connector/issuer/acme/dns.go 2>/dev/null } | grep -E '^CERTCTL_' | sort -u > /tmp/g3-defined.txt # Documented: README + docs + helm grep -rhoE '\bCERTCTL_[A-Z_]+\b' README.md docs/ deploy/helm/ 2>/dev/null | sort -u > /tmp/g3-docs.txt # Allowlist of env vars documented as external integration contracts. # Each entry justifies itself in one line; if you add to this list, # add the justification. ALLOWED='^( CERTCTL_OPENSSL_SIGN_SCRIPT| CERTCTL_OPENSSL_REVOKE_SCRIPT| CERTCTL_OPENSSL_CRL_SCRIPT| CERTCTL_OPENSSL_TIMEOUT_SECONDS| CERTCTL_STEPCA_URL| CERTCTL_STEPCA_FINGERPRINT| CERTCTL_STEPCA_PROVISIONER| CERTCTL_STEPCA_PROVISIONER_NAME| CERTCTL_STEPCA_PROVISIONER_KEY| CERTCTL_STEPCA_PROVISIONER_JWK| CERTCTL_STEPCA_PROVISIONER_PASSWORD| CERTCTL_STEPCA_PASSWORD| CERTCTL_STEPCA_KEY_PATH| CERTCTL_STEPCA_ROOT_CA| CERTCTL_WEBHOOK_URL| CERTCTL_WEBHOOK_SECRET| CERTCTL_ACME_EAB_KID| CERTCTL_ACME_EAB_HMAC| CERTCTL_ACME_DNS_PROPAGATION_WAIT| CERTCTL_AUDIT_EXCLUDE_PATHS| CERTCTL_TLS_| CERTCTL_TLS_INSECURE_SKIP_VERIFY| CERTCTL_SERVER_CA_BUNDLE_PATH| CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY| CERTCTL_QA_[A-Z_]+ )$' # ^ The CERTCTL_OPENSSL_* / CERTCTL_STEPCA_* / CERTCTL_WEBHOOK_* / # CERTCTL_ACME_EAB_* / CERTCTL_ACME_DNS_PROPAGATION_WAIT / # CERTCTL_AUDIT_EXCLUDE_PATHS / CERTCTL_TLS_* / CERTCTL_SERVER_* / # CERTCTL_QA_* sets are documented integration-surface contracts # (script invocations, per-issuer config-blob field names, # per-notifier config-blob field names, demo-stack overrides, # test fixtures) — not server-side env vars in config.go. # The audit's "37 docs-only" count over-flagged these; the # closure narrows the gate to the specific drift sites # (renewal-interval rename + 6 config-only) and allowlists # the documented external contracts here. ALLOWED_FLAT=$(echo "$ALLOWED" | tr -d '\n ') DOCS_ONLY=$(comm -13 /tmp/g3-defined.txt /tmp/g3-docs.txt | grep -vE "$ALLOWED_FLAT" || true) CONFIG_ONLY=$(comm -23 /tmp/g3-defined.txt /tmp/g3-docs.txt || true) if [ -n "$DOCS_ONLY" ]; then echo "G-3 regression: env var(s) mentioned in docs but not defined in Go source AND not in the documented integration-surface allowlist:" echo "$DOCS_ONLY" echo "" echo "Either delete from docs (phantom/typo) or add to config.go," echo "or add to the ALLOWED list with a one-line justification." exit 1 fi if [ -n "$CONFIG_ONLY" ]; then echo "G-3 regression: env var(s) defined in Go source but never documented:" echo "$CONFIG_ONLY" echo "" echo "Add an entry to docs/features.md (or another canonical doc) so operators can find it." exit 1 fi echo "G-3 env-var docs drift guardrail: clean." helm-lint: name: Helm Chart Validation runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Helm uses: azure/setup-helm@v4 with: version: '3.13.0' # HTTPS-Everywhere (v2.0.47): the chart fails render when no TLS source is # configured. Every lint/template invocation below must pick exactly one # provisioning mode — see deploy/helm/certctl/templates/_helpers.tpl # (certctl.tls.required) and docs/tls.md. - name: Lint Helm Chart run: | helm lint deploy/helm/certctl/ \ --set server.tls.existingSecret=certctl-tls-ci - name: Template Helm Chart (existingSecret mode) run: | helm template certctl deploy/helm/certctl/ \ --set server.tls.existingSecret=certctl-tls-ci \ > /dev/null - name: Template Helm Chart (cert-manager mode) run: | helm template certctl deploy/helm/certctl/ \ --set server.tls.certManager.enabled=true \ --set server.tls.certManager.issuerRef.name=letsencrypt-prod \ > /dev/null - name: Template Helm Chart (guard fails without TLS) run: | # Inverse test: the chart MUST refuse to render when no TLS source is # configured. If this ever renders successfully, the fail-loud guard # in certctl.tls.required has regressed. if helm template certctl deploy/helm/certctl/ > /dev/null 2>&1; then echo "::error::Helm chart rendered without a TLS source — fail-loud guard regressed" exit 1 fi