certctl/.github/workflows/ci.yml

name: CI

on:
  push:
    branches:
      - master
      - v2-dev
  pull_request:
    branches:
      - master

jobs:
  go-build-and-test:
    name: Go Build & Test
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version: '1.25.9'

      - name: Go Build
        run: |
          go build ./cmd/server/...
          go build ./cmd/agent/...
          go build ./cmd/mcp-server/...
          go build ./cmd/cli/...

      - name: Go Vet
        run: go vet ./...

      - name: Install golangci-lint
        run: |
          curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.11.4

      - name: Run golangci-lint
        run: golangci-lint run ./... --timeout 5m

      - name: Install govulncheck
        run: go install golang.org/x/vuln/cmd/govulncheck@latest

      - name: Run govulncheck
        run: govulncheck ./...

      - name: Forbidden auth-type literal regression guard (G-1)
        # G-1 closed the JWT silent auth downgrade by removing "jwt" from the
        # accepted CERTCTL_AUTH_TYPE values. This step grep-fails the build
        # if "jwt" reappears in any of the *additive* auth-type surfaces:
        # the validAuthTypes / ValidAuthTypes() set, the OpenAPI enum, the
        # helm chart's allowed-types list, or the .env.example default.
        # Comment lines and the dedicated rejection branch in config.go
        # (`c.Auth.Type == "jwt"`) are intentionally exempt — those are the
        # G-1 fix itself, not a regression.
        #
        # Connector packages (internal/connector/) are exempt because the
        # Google OAuth2 service-account JWT and step-ca provisioner one-
        # time-token JWT are external-protocol uses, unrelated to certctl's
        # own auth shape. Test files (_test.go) are exempt so negative
        # tests can pass the literal.
        #
        # See docs/upgrade-to-v2-jwt-removal.md for the closure rationale,
        # or internal/config/config.go::ValidAuthTypes for the allowed set.
        run: |
          set -e

          # Scoped patterns that indicate "jwt" being added back to an
          # allowed-set surface. Each catches a regression shape we've
          # actually seen in pre-G-1 code:
          #   - Go map/slice literal:  "jwt": true   or   "jwt",
          #   - Go switch case:        case "jwt"
          #   - YAML enum:             enum: [..., jwt, ...]   or   - jwt
          #   - .env conditional:      AUTH_TYPE.*"jwt"|=jwt$
          BAD=$(grep -rnEH \
              -e '"jwt"\s*:\s*true' \
              -e '"jwt"\s*,' \
              -e 'case\s+"jwt"' \
              -e 'enum:.*\bjwt\b' \
              -e '^\s*-\s*jwt\s*$' \
              -e 'AUTH_TYPE\s*=\s*jwt\s*$' \
              -e 'AUTH_TYPE\s*=\s*jwt\s*#' \
              -e 'auth\.type\s*=\s*jwt\s*$' \
              -e 'AuthType\("jwt"\)' \
              internal/config/ \
              internal/api/ \
              cmd/ \
              api/openapi.yaml \
              .env.example \
              deploy/.env.example \
              deploy/helm/certctl/values.yaml \
              deploy/helm/certctl/templates/ \
              2>/dev/null \
              | grep -v '_test.go' \
              | grep -vE '^\s*[^:]+:[0-9]+:\s*(//|#)' \
              | grep -v 'is no longer accepted' \
              || true)
          if [ -n "$BAD" ]; then
            echo "G-1 regression: \"jwt\" reappeared in an allowed-set surface:"
            echo "$BAD"
            echo ""
            echo "Allowed surface for 'jwt' literals: comment lines, the"
            echo "dedicated rejection branch in internal/config/config.go,"
            echo "and connector packages (Google OAuth2, step-ca)."
            echo "See docs/upgrade-to-v2-jwt-removal.md and"
            echo "internal/config/config.go::ValidAuthTypes()."
            exit 1
          fi

      - name: Forbidden api_key_hash JSON-shape regression guard (G-2)
        # G-2 closed cat-s5-apikey_leak by tagging Agent.APIKeyHash
        # `json:"-"` and adding a defense-in-depth Agent.MarshalJSON that
        # zeroes the field on the marshal-time copy. This step grep-fails
        # the build if `api_key_hash` reappears in any of the *additive*
        # JSON-emitting surfaces: a Go struct json tag in internal/domain/,
        # an OpenAPI Agent schema property, a TypeScript field declaration
        # in web/src/, or an enum-list / discriminator in handler
        # production code.
        #
        # Repository, migration, seed, service, integration-test, and
        # unit-test files are exempt — those are server-internal use
        # sites (the DB column stays, the in-memory struct field stays,
        # the auth-lookup path stays). Comment lines are exempt so the
        # G-2 closure rationale can stay in the source.
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # cat-s5-apikey_leak for the closure rationale, or
        # internal/domain/connector.go::Agent::MarshalJSON for the
        # redaction enforcement.
        run: |
          set -e

          # Scoped patterns that indicate api_key_hash being added back
          # to a JSON-emitting surface. Each catches a regression shape
          # that pre-G-2 actually shipped or that a future refactor
          # could plausibly introduce:
          #   - Go struct tag:           `json:"api_key_hash"`
          #   - Frontend interface:      api_key_hash[?]: string
          #   - OpenAPI schema property: api_key_hash:   (column-aligned)
          #   - YAML enum / array:       - api_key_hash
          BAD=$(grep -rnEH \
              -e 'json:"api_key_hash[",]' \
              -e '^\s*api_key_hash\??\s*:' \
              -e '^\s*-\s*api_key_hash\s*$' \
              internal/domain/ \
              internal/api/ \
              cmd/ \
              api/openapi.yaml \
              web/src/ \
              2>/dev/null \
              | grep -v '_test.go' \
              | grep -vE '^\s*[^:]+:[0-9]+:\s*(//|#)' \
              || true)
          if [ -n "$BAD" ]; then
            echo "G-2 regression: api_key_hash reappeared in a JSON-emitting surface:"
            echo "$BAD"
            echo ""
            echo "Allowed surface for api_key_hash literals: comment lines,"
            echo "the database column (migrations/), the in-memory struct"
            echo "field tagged \`json:\"-\"\`, and the repository / service"
            echo "use sites. See internal/domain/connector.go::Agent and"
            echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md"
            echo "cat-s5-apikey_leak for the closure rationale."
            exit 1
          fi

      - name: Forbidden plaintext HEALTHCHECK regression guard (U-2)
        # U-2 closed cat-u-healthcheck_protocol_mismatch by switching the
        # published image's HEALTHCHECK from `curl -f http://localhost:
        # 8443/health` (always failed against the HTTPS-only listener) to
        # `curl -fsk https://localhost:8443/health`. This step grep-fails
        # the build if any Dockerfile in the repo carries the pre-U-2
        # plaintext shape — either explicitly (`http://localhost:8443/
        # health` in a HEALTHCHECK) or via the looser pattern of any
        # HEALTHCHECK that targets `http://` against the certctl server
        # port.
        #
        # Comment lines and the docs/upgrade-to-tls.md:182 expected-to-
        # fail invariant ("plaintext is gone, expect Connection refused")
        # are intentionally exempt — we DO want the upgrade-doc string
        # `http://localhost:8443/health` to remain there, since it
        # documents what operators should test for to confirm plaintext
        # is dead. The guardrail is scoped to Dockerfile* only, so docs
        # are out of its reach.
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # cat-u-healthcheck_protocol_mismatch for the closure rationale,
        # or deploy/test/healthcheck_test.go for the binary-image
        # contract the runtime test pins.
        run: |
          set -e

          # Patterns that catch the actual regression shapes:
          #   - HEALTHCHECK directive carrying any http:// (even if the
          #     port differs, no plaintext probe should ship).
          #   - The exact pre-U-2 string for grep-friendliness.
          BAD=$(grep -rnEH \
              -e 'HEALTHCHECK.*http://' \
              -e 'curl[^|&;]*-f[^|&;]*http://localhost:8443/health' \
              Dockerfile Dockerfile.agent Dockerfile.* 2>/dev/null \
              | grep -vE '^\s*[^:]+:[0-9]+:\s*#' \
              || true)
          if [ -n "$BAD" ]; then
            echo "U-2 regression: plaintext HEALTHCHECK reappeared in a Dockerfile:"
            echo "$BAD"
            echo ""
            echo "Allowed: HTTPS HEALTHCHECK with -k (acceptable for"
            echo "localhost-to-localhost), or non-HTTP probe shapes"
            echo "(pgrep, /proc check). See Dockerfile / Dockerfile.agent"
            echo "for the post-U-2 reference shape and"
            echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md"
            echo "cat-u-healthcheck_protocol_mismatch for rationale."
            exit 1
          fi

      - name: Forbidden migration mount in compose initdb (U-3)
        # U-3 closed cat-u-seed_initdb_schema_drift (GitHub #10) by
        # eliminating the dual-source-of-truth between
        # `migrations/*.up.sql` mounted into postgres
        # `/docker-entrypoint-initdb.d/` and the same files re-applied at
        # runtime by `RunMigrations`. Pre-U-3 every new migration that
        # the seed depended on (000013 added `policy_rules.severity`,
        # 000017 renames `retry_interval_seconds`, etc.) had to be added
        # by hand to the compose mount list; missing the update crashed
        # initdb on first boot, postgres flagged unhealthy, and the
        # whole stack failed to start from a fresh clone. Post-U-3 the
        # server is the single source of truth — `RunMigrations` +
        # `RunSeed` apply everything at boot.
        #
        # This step grep-fails the build if any compose file under
        # `deploy/` re-introduces a `migrations/.*\.sql` mount into
        # `/docker-entrypoint-initdb.d`. Comments are exempt so the
        # post-fix rationale block in the compose files (which
        # documents WHY the mounts were removed) doesn't trip the guard.
        # The demo overlay's `seed_demo.sql` is the explicit exception:
        # it is tolerated only when it lives behind the
        # CERTCTL_DEMO_SEED env var (post-U-3 demo path) — bare initdb
        # mounts are NOT tolerated. The grep matches all compose
        # mount-list shapes (`-` indented, `volumes:` indented, both),
        # so any future drift surfaces here before the operator hits it
        # on a fresh clone.
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # cat-u-seed_initdb_schema_drift for the closure rationale, or
        # internal/repository/postgres/db.go::RunSeed for the runtime
        # contract.
        run: |
          set -e

          BAD=$(grep -rnEH \
              -e 'migrations/.*\.sql:.*docker-entrypoint-initdb' \
              -e 'seed.*\.sql:.*docker-entrypoint-initdb' \
              deploy/docker-compose.yml \
              deploy/docker-compose.test.yml \
              deploy/docker-compose.demo.yml \
              2>/dev/null \
              | grep -vE '^\s*[^:]+:[0-9]+:\s*#' \
              || true)
          if [ -n "$BAD" ]; then
            echo "U-3 regression: migration/seed mount into postgres initdb reappeared:"
            echo "$BAD"
            echo ""
            echo "The post-U-3 contract is: postgres comes up with an empty"
            echo "schema and the server applies migrations + seed at boot via"
            echo "internal/repository/postgres.RunMigrations + RunSeed. Demo"
            echo "data lives behind CERTCTL_DEMO_SEED=true (RunDemoSeed),"
            echo "not an initdb mount. See"
            echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md"
            echo "cat-u-seed_initdb_schema_drift for the closure rationale."
            exit 1
          fi

      - name: Forbidden StatusBadge dead-key + TS phantom-field regression guard (D-1 + D-2)
        # D-1 master closed cat-d-359e92c20cbf (Agent: 'Stale' dead key,
        # 'Degraded' missing), cat-d-9f4c8e4a91f1 (Notification: 'dead'
        # missing), cat-d-1447e04732e7 (Cert: 'PendingIssuance' dead
        # key), cat-f-cert_detail_page_key_render_fallback (render-site
        # uses cert.X directly), and cat-f-ae0d06b6588f (Certificate
        # TS phantom fields). This step grep-fails the build if either
        # half of the closure is reverted:
        #
        #   1. The dead StatusBadge keys ('Stale' for Agent, 'PendingIssuance'
        #      for Cert) reappearing as map literals, OR
        #   2. The five phantom Certificate TS fields (serial_number,
        #      fingerprint_sha256, key_algorithm, key_size, issued_at)
        #      reappearing on the `Certificate` interface in types.ts
        #      (CertificateVersion legitimately carries them and is
        #      explicitly excluded by the awk pre-filter below).
        #
        # Comments are exempt so the closure prose in StatusBadge.tsx +
        # types.ts can stay. Test files are exempt so negative tests
        # asserting the dead keys fall through to neutral keep working.
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # cat-d-* / cat-f-* for the closure rationale, or
        # web/src/components/StatusBadge.test.tsx for the live
        # enum-coverage contract.
        run: |
          set -e

          BAD_BADGE=$(grep -nE "^\s*(Stale|PendingIssuance)\s*:\s*'badge-" \
              web/src/components/StatusBadge.tsx 2>/dev/null \
              | grep -v '\.test\.' \
              | grep -vE '^\s*[^:]+:[0-9]+:\s*//' \
              || true)
          if [ -n "$BAD_BADGE" ]; then
            echo "D-1 regression: dead StatusBadge key reappeared:"
            echo "$BAD_BADGE"
            echo ""
            echo "Allowed surface: comment lines naming the removed key in"
            echo "the file's preamble. The Go-side AgentStatus values are"
            echo "Online/Offline/Degraded (no Stale); CertificateStatus values"
            echo "are Pending/Active/... (no PendingIssuance). See"
            echo "web/src/components/StatusBadge.test.tsx for the contract."
            exit 1
          fi

          # Certificate TS phantom-field check. Scoped to the
          # `export interface Certificate {` block in web/src/api/types.ts
          # — CertificateVersion legitimately declares these fields and
          # must NOT trip the guardrail. The awk window opens on the
          # exact `Certificate {` header (not `CertificateVersion {`,
          # not `CertificateProfile {`) and closes at the first `}`,
          # then the grep matches a phantom-field declaration anywhere
          # in that window.
          BAD_TS=$(awk '
            /^export interface Certificate \{/ { flag=1; next }
            flag && /^\}/                     { flag=0 }
            flag                              { print FILENAME":"NR":"$0 }
          ' web/src/api/types.ts \
            | grep -E '\b(serial_number|fingerprint_sha256|key_algorithm|key_size|issued_at)\??\s*:' \
            || true)
          if [ -n "$BAD_TS" ]; then
            echo "D-1 regression: Certificate TS interface re-added a phantom field:"
            echo "$BAD_TS"
            echo ""
            echo "These fields live on CertificateVersion, not ManagedCertificate."
            echo "The Go-side ManagedCertificate has never carried them; the"
            echo "TS optional declarations were silently undefined on every"
            echo "list response. Render-site consumers (e.g. CertificateDetailPage)"
            echo "use latestVersion?.field as the canonical access path."
            echo "See coverage-gap-audit-2026-04-24-v5/unified-audit.md"
            echo "cat-f-ae0d06b6588f for the closure rationale."
            exit 1
          fi

          # D-2 master closed five diff-05x06-* type-drift findings:
          # Agent (5 phantoms), Issuer (1 phantom), Notification (1 phantom)
          # — TRIM half. The Target (2 missing fields) and DiscoveredCertificate
          # (1 missing field) — ADD half is pinned by the literal-construction
          # blocks in web/src/api/types.test.ts, not a CI grep. The phantom-
          # trim regression vector is an awk-windowed grep per interface
          # mirroring the D-1 Certificate check above.
          #
          # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
          # diff-05x06-7cdf4e78ae24 (Agent), diff-05x06-97fab8783a5c (Issuer),
          # diff-05x06-caba9eb3620e (Notification) for the closure rationale.

          # D-2 Agent phantom-field check. The grep matches `last_heartbeat`
          # but NOT `last_heartbeat_at` (the legitimate Go-emitted field) —
          # the `\b...\b` boundaries plus the `grep -v 'last_heartbeat_at'`
          # filter handle that.
          BAD_AGENT=$(awk '
            /^export interface Agent \{/ { flag=1; next }
            flag && /^\}/                 { flag=0 }
            flag                          { print FILENAME":"NR":"$0 }
          ' web/src/api/types.ts \
            | grep -E '\b(last_heartbeat|capabilities|tags|created_at|updated_at)\??\s*:' \
            | grep -v 'last_heartbeat_at' \
            || true)
          if [ -n "$BAD_AGENT" ]; then
            echo "D-2 regression: Agent TS interface re-added a phantom field:"
            echo "$BAD_AGENT"
            echo ""
            echo "The Go-side internal/domain/connector.go::Agent emits exactly:"
            echo "id, name, hostname, status, last_heartbeat_at?, registered_at,"
            echo "os, architecture, ip_address, version, retired_at?, retired_reason?."
            echo "The five fields blocked by this guard (last_heartbeat,"
            echo "capabilities, tags, created_at, updated_at) were TS phantoms"
            echo "the Go struct never emitted. See unified-audit.md"
            echo "diff-05x06-7cdf4e78ae24 for closure rationale."
            exit 1
          fi

          # D-2 Issuer phantom-field check.
          BAD_ISSUER=$(awk '
            /^export interface Issuer \{/ { flag=1; next }
            flag && /^\}/                  { flag=0 }
            flag                           { print FILENAME":"NR":"$0 }
          ' web/src/api/types.ts \
            | grep -E '\bstatus\??\s*:' \
            || true)
          if [ -n "$BAD_ISSUER" ]; then
            echo "D-2 regression: Issuer TS interface re-added a phantom 'status' field:"
            echo "$BAD_ISSUER"
            echo ""
            echo "The Go-side internal/domain/connector.go::Issuer has no 'status'"
            echo "field — only 'enabled' (bool). Render sites derive the displayed"
            echo "status from 'enabled' at the call site (see"
            echo "web/src/pages/IssuersPage.tsx::issuerStatus). See unified-audit.md"
            echo "diff-05x06-97fab8783a5c for closure rationale."
            exit 1
          fi

          # D-2 Notification phantom-field check.
          BAD_NOTIF=$(awk '
            /^export interface Notification \{/ { flag=1; next }
            flag && /^\}/                        { flag=0 }
            flag                                 { print FILENAME":"NR":"$0 }
          ' web/src/api/types.ts \
            | grep -E '\bsubject\??\s*:' \
            || true)
          if [ -n "$BAD_NOTIF" ]; then
            echo "D-2 regression: Notification TS interface re-added a phantom 'subject' field:"
            echo "$BAD_NOTIF"
            echo ""
            echo "The Go-side internal/domain/notification.go::NotificationEvent"
            echo "has no 'subject' field — only 'message'. Pre-D-2 the consumer"
            echo "at NotificationsPage.tsx had a dead '|| n.subject' fallback"
            echo "that always fell through. See unified-audit.md"
            echo "diff-05x06-caba9eb3620e for closure rationale."
            exit 1
          fi

      - name: Forbidden client-side bulk-action loop regression guard (L-1)
        # L-1 master closed cat-l-fa0c1ac07ab5 (bulk-renew loop) and
        # cat-l-8a1fb258a38a (bulk-reassign loop) by adding server-side
        # bulk endpoints (POST /api/v1/certificates/bulk-renew and
        # POST /api/v1/certificates/bulk-reassign) that the GUI calls
        # in a single round-trip. Pre-L-1 the GUI looped per-cert
        # HTTP calls — 100 selected certs = 100 round-trips × ~50–200ms
        # each = a 5–20-second wedge during which the operator stares
        # at a progress bar.
        #
        # This step grep-fails the build if either loop shape reappears
        # in CertificatesPage.tsx. Patterns catch the actual pre-L-1
        # shapes:
        #   - `for (const id of ids) { await triggerRenewal(id) }`
        #   - `for (const id of ids) { await updateCertificate(id, { owner_id }) }`
        #   - `for (let i = 0; i < ids.length; i++) { await triggerRenewal(ids[i]) }`
        #
        # Allowed: comment lines explaining the pre-L-1 pattern in the
        # docblock above each handler. Test files (_test.tsx) exempt
        # so negative-pattern tests can keep working.
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # cat-l-fa0c1ac07ab5 and cat-l-8a1fb258a38a for closure
        # rationale, or web/src/api/client.ts::bulkRenewCertificates
        # / bulkReassignCertificates for the canonical call path.
        run: |
          set -e

          BAD_LOOP=$(grep -nE 'for[[:space:]]*\(' web/src/pages/CertificatesPage.tsx 2>/dev/null \
              | grep -E 'await[[:space:]]+(triggerRenewal|updateCertificate)\(' \
              | grep -v '\.test\.' \
              | grep -vE '^\s*[^:]+:[0-9]+:\s*//' \
              || true)
          if [ -n "$BAD_LOOP" ]; then
            echo "L-1 regression: client-side bulk-action loop reappeared in CertificatesPage.tsx:"
            echo "$BAD_LOOP"
            echo ""
            echo "Use bulkRenewCertificates({ certificate_ids: [...] }) or"
            echo "bulkReassignCertificates({ certificate_ids: [...], owner_id, team_id? })"
            echo "instead of looping per-item HTTP calls. See"
            echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md cat-l-* for rationale."
            exit 1
          fi

      - name: Forbidden orphan-CRUD client function regression guard (B-1)
        # B-1 master closed four audit findings — three orphan-update fns
        # (cat-b-31ceb6aaa9f1, cat-b-7a34f893a8f9) and one orphan CRUD
        # surface (cat-b-4631ca092bee, RenewalPolicy) — by wiring per-page
        # Edit modals so every backend write endpoint has at least one
        # GUI consumer. The fourth finding (cat-b-9b97ffb35ef7) deleted
        # the dead `exportCertificatePEM` duplicate.
        #
        # Pre-B-1 the failure mode was: backend ships a CRUD handler,
        # client.ts ships the matching `update*` / `delete*` / `create*`
        # function, but no page imports it. Operators were forced to
        # `psql` directly to edit team names, owner emails, agent-group
        # match rules, issuer names, profile names, or any renewal-policy
        # field — turning a 30-second GUI task into a 30-minute database
        # excursion with audit-trail gaps.
        #
        # This step fails the build if any of the eight previously-orphan
        # client functions loses its page consumer (i.e. a future refactor
        # accidentally re-orphans them). Each fn must have ≥1 non-test
        # consumer under web/src/pages/. Tests (*.test.ts(x)) and the
        # client.ts definition file itself are exempt.
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # cat-b-31ceb6aaa9f1, cat-b-7a34f893a8f9, cat-b-4631ca092bee,
        # cat-b-9b97ffb35ef7 for closure rationale.
        run: |
          set -e
          ORPHAN_FNS="updateOwner updateTeam updateAgentGroup updateIssuer updateProfile createRenewalPolicy updateRenewalPolicy deleteRenewalPolicy"
          FAIL=0
          for fn in $ORPHAN_FNS; do
            HITS=$(grep -rE "\b${fn}\b" web/src/pages/ 2>/dev/null \
                | grep -vE '\.test\.(ts|tsx):' \
                | wc -l)
            if [ "$HITS" -eq 0 ]; then
              echo "::error::B-1 regression: client function '${fn}' has zero consumers under web/src/pages/."
              echo "  Every backend CRUD endpoint must have a GUI consumer to avoid forcing operators to psql."
              echo "  Either restore the page consumer or delete the client function in the same commit."
              FAIL=1
            fi
          done
          # cat-b-9b97ffb35ef7: exportCertificatePEM was deleted as a dead
          # duplicate of downloadCertificatePEM. Block resurrection.
          if grep -nE 'export\s+const\s+exportCertificatePEM' web/src/api/client.ts >/dev/null 2>&1; then
            echo "::error::B-1 regression: exportCertificatePEM was removed as a dead duplicate of downloadCertificatePEM."
            echo "  If a JSON variant is needed, add an explicit page consumer in the same commit."
            FAIL=1
          fi
          if [ "$FAIL" -ne 0 ]; then
            exit 1
          fi
          echo "B-1 orphan-CRUD client function guardrail: all 8 functions have page consumers."

      - name: Forbidden strings.Contains(err.Error()) regression guard (S-2)
        # S-2 closure (cat-s6-efc7f6f6bd50): replaced 30 brittle
        # substring-match error-dispatch sites in internal/api/handler/
        # with errors.Is + typed sentinels (repository.ErrNotFound,
        # repository.ErrForeignKeyConstraint via the
        # repository.IsForeignKeyError helper). This step grep-fails
        # the build if any new strings.Contains(err.Error(), "not found")
        # or strings.Contains(err.Error(), "violates foreign key")
        # site appears under internal/api/handler/.
        #
        # Allowed: closure-comments documenting the convention (e.g.
        # bulk_reassignment.go's "post-M-1 errToStatus convention"
        # docblock); domain-specific substring patterns that are
        # legitimately one-off ("cannot approve", "cannot reject",
        # "cannot be parsed", "challenge password") — flagged as
        # deferred follow-ups in the S-2 commit message.
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # cat-s6-efc7f6f6bd50 for closure rationale.
        run: |
          set -e
          BAD=$(grep -rnE 'strings\.Contains\(err\.Error\(\),\s*"(not found|violates foreign key|RESTRICT)"' internal/api/handler/ 2>/dev/null \
              | grep -vE '^\s*[^:]+:[0-9]+:\s*//' \
              || true)
          if [ -n "$BAD" ]; then
            echo "S-2 regression: brittle substring-match error-dispatch reappeared:"
            echo "$BAD"
            echo ""
            echo "Use errors.Is(err, repository.ErrNotFound) for not-found dispatch,"
            echo "or repository.IsForeignKeyError(err) for FK violations."
            echo "See coverage-gap-audit-2026-04-24-v5/unified-audit.md"
            echo "cat-s6-efc7f6f6bd50 for closure rationale."
            exit 1
          fi
          echo "S-2 typed-sentinel error-dispatch guardrail: clean."

      - name: Race Detection
        run: go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/crypto/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -timeout 300s

      - name: Go Test with Coverage
        run: |
          go test ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/integration/... ./internal/connector/issuer/... ./internal/connector/target/... ./internal/connector/notifier/... ./internal/connector/discovery/... ./internal/crypto/... ./internal/mcp/... ./internal/cli/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -cover -coverprofile=coverage.out

      - name: Check Coverage Thresholds
        run: |
          # Extract per-package coverage from test output
          echo "=== Coverage Report ==="
          go tool cover -func=coverage.out | tail -1

          # Check service layer coverage (target: 60%+)
          SERVICE_COV=$(go tool cover -func=coverage.out | grep 'internal/service' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
          echo "Service layer coverage: ${SERVICE_COV}%"

          # Check handler layer coverage (target: 60%+)
          HANDLER_COV=$(go tool cover -func=coverage.out | grep 'internal/api/handler' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
          echo "Handler layer coverage: ${HANDLER_COV}%"

          # Check domain layer coverage (target: 40%+)
          DOMAIN_COV=$(go tool cover -func=coverage.out | grep 'internal/domain' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
          echo "Domain layer coverage: ${DOMAIN_COV}%"

          # Check middleware layer coverage (target: 50%+)
          MIDDLEWARE_COV=$(go tool cover -func=coverage.out | grep 'internal/api/middleware' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
          echo "Middleware layer coverage: ${MIDDLEWARE_COV}%"

          # Check crypto package coverage (target: 85%+)
          # M-8 rationale: encryption primitives are a security-critical gate.
          # v2 format, key-derivation, fallback, and fail-closed sentinel paths
          # all need exhaustive coverage to avoid silent regressions (CWE-916 / CWE-329).
          CRYPTO_COV=$(go tool cover -func=coverage.out | grep 'internal/crypto' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
          echo "Crypto package coverage: ${CRYPTO_COV}%"

          # Fail if thresholds not met
          if [ "$(echo "$SERVICE_COV < 55" | bc -l)" -eq 1 ]; then
            echo "::error::Service layer coverage ${SERVICE_COV}% is below 55% threshold"
            exit 1
          fi
          if [ "$(echo "$HANDLER_COV < 60" | bc -l)" -eq 1 ]; then
            echo "::error::Handler layer coverage ${HANDLER_COV}% is below 60% threshold"
            exit 1
          fi
          if [ "$(echo "$DOMAIN_COV < 40" | bc -l)" -eq 1 ]; then
            echo "::error::Domain layer coverage ${DOMAIN_COV}% is below 40% threshold"
            exit 1
          fi
          if [ "$(echo "$MIDDLEWARE_COV < 30" | bc -l)" -eq 1 ]; then
            echo "::error::Middleware layer coverage ${MIDDLEWARE_COV}% is below 30% threshold"
            exit 1
          fi
          if [ "$(echo "$CRYPTO_COV < 85" | bc -l)" -eq 1 ]; then
            echo "::error::Crypto package coverage ${CRYPTO_COV}% is below 85% threshold"
            exit 1
          fi
          echo "Coverage thresholds passed!"

      - name: Upload Coverage Report
        uses: actions/upload-artifact@v4
        with:
          name: go-coverage
          path: coverage.out
          retention-days: 30

  frontend-build:
    name: Frontend Build
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '22'

      - name: Install Dependencies
        working-directory: web
        run: npm ci

      - name: TypeScript Check
        working-directory: web
        run: npx tsc --noEmit

      - name: Run Frontend Tests
        working-directory: web
        run: npx vitest run

      - name: Build Frontend
        working-directory: web
        run: npx vite build

      - name: Forbidden hardcoded source-count prose regression guard (S-1)
        # S-1 master closed cat-s1-9ce1cbe26876 (README + features.md
        # stale numeric counts; explicit CLAUDE.md violation per
        # "version-stamped numbers rot") and
        # cat-s1-features_md_issuer_count_contradiction (features.md
        # self-disagreed on issuer count: 9 vs 12 in the same doc).
        # The fix replaced source-derived numbers in prose with
        # "rebuild via <command>" patterns documented in CLAUDE.md::
        # "Current-state commands". This step grep-fails the build if
        # any of the previously-stale sites reintroduces a hardcoded
        # count.
        #
        # Allowed surfaces: demo-fixture prose in README ("32
        # certificates" — those are seed_demo.sql facts, not live
        # source counts), historical-milestone counts in
        # WORKSPACE-CHANGELOG.md, the testing-guide example phrasing
        # ("README claims 8 issuer connectors but only 6 exist"),
        # and any number that quotes the source command immediately
        # adjacent.
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # cat-s1-9ce1cbe26876 + cat-s1-features_md_issuer_count_contradiction
        # for closure rationale.
        run: |
          set -e
          BAD=$(grep -rnE '\b[0-9]+\s+(issuer connectors?|target connectors?|notifier connectors?|discovery connectors?|MCP tools|OpenAPI operations|migrations|database tables|frontend pages|HTTP routes)\b' \
              README.md docs/ 2>/dev/null \
              | grep -vE 'WORKSPACE-CHANGELOG|seed_demo|demo override' \
              | grep -vE 'DRIFT HAZARD|Source: |Rebuild|rebuild via|grep -|wc -l|ls -d|find ' \
              | grep -vE 'README claims [0-9]+ issuer connectors but only [0-9]+ exist' \
              || true)
          if [ -n "$BAD" ]; then
            echo "S-1 regression: hardcoded source-count prose reappeared:"
            echo "$BAD"
            echo ""
            echo "CLAUDE.md rule: 'Numeric claims about current state rot.'"
            echo "Replace the count with the grep command from CLAUDE.md::"
            echo "'Current-state commands' (e.g. 'ls -d internal/connector/issuer/*/ | wc -l')"
            echo "or rephrase to reference the rebuild command on the same line."
            echo "See coverage-gap-audit-2026-04-24-v5/unified-audit.md"
            echo "cat-s1-9ce1cbe26876 for closure rationale."
            exit 1
          fi
          echo "S-1 stale-counts guardrail: clean."

      - name: Documented orphan client fns sync guard (P-1)
        # P-1 master closed diff-04x03-d24864996ad4 + cat-b-dc46aadab98e
        # by documenting 17 detail-page-candidate orphan client.ts
        # functions in a docblock at the top of web/src/api/client.ts.
        # This step verifies the docblock list ↔ export list relationship:
        # every name listed in the docblock must still be declared as
        # an export below it (catches drift where someone deletes the
        # export but forgets the docblock, or vice versa).
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # diff-04x03-d24864996ad4 + cat-b-dc46aadab98e for closure rationale.
        run: |
          set -e
          DOCUMENTED='getAgentGroup getAgentGroupMembers getAuditEvent getCertificateDeployments getDiscoveredCertificate getHealthCheck getHealthCheckHistory getNetworkScanTarget getNotification getOCSPStatus getOwner getPolicy getPolicyViolations getRenewalPolicy getTeam registerAgent updateHealthCheck'
          MISSING=""
          for fn in $DOCUMENTED; do
            if ! grep -qE "^export const ${fn}\b" web/src/api/client.ts; then
              MISSING="${MISSING}${fn} "
            fi
          done
          if [ -n "$MISSING" ]; then
            echo "P-1 regression: documented orphan(s) missing from client.ts exports:"
            echo "  $MISSING"
            echo ""
            echo "Either restore the export, or delete the corresponding line"
            echo "in the documented-orphans docblock at the top of client.ts."
            echo "See coverage-gap-audit-2026-04-24-v5/unified-audit.md"
            echo "diff-04x03-d24864996ad4 for closure rationale."
            exit 1
          fi
          echo "P-1 documented-orphans sync guard: clean ($(echo $DOCUMENTED | wc -w) fns verified)."

      - name: Forbidden env-var docs drift regression guard (G-3)
        # G-3 master closed cat-g-163dae19bc59 (docs-only env vars
        # phantom in features.md), cat-g-b8f8f8796159 (6 config-only
        # env vars never documented), and cat-g-renewal_check_interval_rename_drift
        # (features.md still advertised the pre-rename
        # CERTCTL_RENEWAL_CHECK_INTERVAL after it was renamed to
        # CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL). This step runs
        # `comm -23` both ways between the env vars defined in Go
        # source (config.go + cmd/agent + deploy/test fixtures + ACME
        # DNS-01 script env exports) and the env vars mentioned in
        # README + docs/ + deploy/helm/.
        #
        # Allowlist: env vars that are documented as integration-
        # surface contracts (script env exports for ACME DNS-01,
        # OpenSSL CA scripts, StepCA per-issuer-config-blob fields,
        # Webhook per-notifier-config-blob fields, ACME EAB, audit
        # exclusion, demo-stack overrides) but not consumed directly
        # by config.go. Each entry below has a one-line justification
        # — if you add a new entry, add the justification too.
        #
        # See coverage-gap-audit-2026-04-24-v5/unified-audit.md
        # cat-g-* for closure rationale.
        run: |
          set -e
          # Defined: config.go + agent + cli + mcp-server + server cmds + test fixtures + ACME DNS export
          {
            grep -nE '"CERTCTL_[A-Z_]+"' internal/config/config.go | sed -E 's/.*"(CERTCTL_[A-Z_]+)".*/\1/'
            grep -rhoE '"CERTCTL_[A-Z_]+"' cmd/agent/*.go cmd/cli/*.go cmd/mcp-server/*.go cmd/server/*.go 2>/dev/null | sed -E 's/"(CERTCTL_[A-Z_]+)"/\1/'
            grep -rhoE 'CERTCTL_[A-Z_]+' deploy/test/qa_test.go internal/connector/issuer/acme/dns.go 2>/dev/null
          } | grep -E '^CERTCTL_' | sort -u > /tmp/g3-defined.txt
          # Documented: README + docs + helm
          grep -rhoE '\bCERTCTL_[A-Z_]+\b' README.md docs/ deploy/helm/ 2>/dev/null | sort -u > /tmp/g3-docs.txt
          # Allowlist of env vars documented as external integration contracts.
          # Each entry justifies itself in one line; if you add to this list,
          # add the justification.
          ALLOWED='^(
          CERTCTL_OPENSSL_SIGN_SCRIPT|
          CERTCTL_OPENSSL_REVOKE_SCRIPT|
          CERTCTL_OPENSSL_CRL_SCRIPT|
          CERTCTL_OPENSSL_TIMEOUT_SECONDS|
          CERTCTL_STEPCA_URL|
          CERTCTL_STEPCA_FINGERPRINT|
          CERTCTL_STEPCA_PROVISIONER|
          CERTCTL_STEPCA_PROVISIONER_NAME|
          CERTCTL_STEPCA_PROVISIONER_KEY|
          CERTCTL_STEPCA_PROVISIONER_JWK|
          CERTCTL_STEPCA_PROVISIONER_PASSWORD|
          CERTCTL_STEPCA_PASSWORD|
          CERTCTL_STEPCA_KEY_PATH|
          CERTCTL_STEPCA_ROOT_CA|
          CERTCTL_WEBHOOK_URL|
          CERTCTL_WEBHOOK_SECRET|
          CERTCTL_ACME_EAB_KID|
          CERTCTL_ACME_EAB_HMAC|
          CERTCTL_ACME_DNS_PROPAGATION_WAIT|
          CERTCTL_AUDIT_EXCLUDE_PATHS|
          CERTCTL_TLS_|
          CERTCTL_TLS_INSECURE_SKIP_VERIFY|
          CERTCTL_SERVER_CA_BUNDLE_PATH|
          CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY|
          CERTCTL_QA_[A-Z_]+
          )$'
          # ^ The CERTCTL_OPENSSL_* / CERTCTL_STEPCA_* / CERTCTL_WEBHOOK_* /
          # CERTCTL_ACME_EAB_* / CERTCTL_ACME_DNS_PROPAGATION_WAIT /
          # CERTCTL_AUDIT_EXCLUDE_PATHS / CERTCTL_TLS_* / CERTCTL_SERVER_* /
          # CERTCTL_QA_* sets are documented integration-surface contracts
          # (script invocations, per-issuer config-blob field names,
          # per-notifier config-blob field names, demo-stack overrides,
          # test fixtures) — not server-side env vars in config.go.
          # The audit's "37 docs-only" count over-flagged these; the
          # closure narrows the gate to the specific drift sites
          # (renewal-interval rename + 6 config-only) and allowlists
          # the documented external contracts here.
          ALLOWED_FLAT=$(echo "$ALLOWED" | tr -d '\n ')
          DOCS_ONLY=$(comm -13 /tmp/g3-defined.txt /tmp/g3-docs.txt | grep -vE "$ALLOWED_FLAT" || true)
          CONFIG_ONLY=$(comm -23 /tmp/g3-defined.txt /tmp/g3-docs.txt || true)
          if [ -n "$DOCS_ONLY" ]; then
            echo "G-3 regression: env var(s) mentioned in docs but not defined in Go source AND not in the documented integration-surface allowlist:"
            echo "$DOCS_ONLY"
            echo ""
            echo "Either delete from docs (phantom/typo) or add to config.go,"
            echo "or add to the ALLOWED list with a one-line justification."
            exit 1
          fi
          if [ -n "$CONFIG_ONLY" ]; then
            echo "G-3 regression: env var(s) defined in Go source but never documented:"
            echo "$CONFIG_ONLY"
            echo ""
            echo "Add an entry to docs/features.md (or another canonical doc) so operators can find it."
            exit 1
          fi
          echo "G-3 env-var docs drift guardrail: clean."

  helm-lint:
    name: Helm Chart Validation
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Install Helm
        uses: azure/setup-helm@v4
        with:
          version: '3.13.0'

      # HTTPS-Everywhere (v2.0.47): the chart fails render when no TLS source is
      # configured. Every lint/template invocation below must pick exactly one
      # provisioning mode — see deploy/helm/certctl/templates/_helpers.tpl
      # (certctl.tls.required) and docs/tls.md.
      - name: Lint Helm Chart
        run: |
          helm lint deploy/helm/certctl/ \
            --set server.tls.existingSecret=certctl-tls-ci

      - name: Template Helm Chart (existingSecret mode)
        run: |
          helm template certctl deploy/helm/certctl/ \
            --set server.tls.existingSecret=certctl-tls-ci \
            > /dev/null

      - name: Template Helm Chart (cert-manager mode)
        run: |
          helm template certctl deploy/helm/certctl/ \
            --set server.tls.certManager.enabled=true \
            --set server.tls.certManager.issuerRef.name=letsencrypt-prod \
            > /dev/null

      - name: Template Helm Chart (guard fails without TLS)
        run: |
          # Inverse test: the chart MUST refuse to render when no TLS source is
          # configured. If this ever renders successfully, the fail-loud guard
          # in certctl.tls.required has regressed.
          if helm template certctl deploy/helm/certctl/ > /dev/null 2>&1; then
            echo "::error::Helm chart rendered without a TLS source — fail-loud guard regressed"
            exit 1
          fi