mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 22:41:31 +00:00
Compare commits
109 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 36e722ba12 | |||
| d6959a75c1 | |||
| 97b23e98d9 | |||
| 4cf5fcdb4f | |||
| 1ee67b7792 | |||
| 128d0eeaa8 | |||
| 9834b4e4a4 | |||
| cab579368b | |||
| 4e5522a999 | |||
| 55ce86b132 | |||
| 52248be717 | |||
| 04c7eca615 | |||
| 6e646e0fe8 | |||
| 675b87ba63 | |||
| 707d8de4fb | |||
| 0725713e19 | |||
| 1ee77c89f8 | |||
| 4bc8b3e723 | |||
| 469611650c | |||
| 91642e2860 | |||
| 0200c7f4a4 | |||
| fe7e766510 | |||
| ff7357f889 | |||
| 3287e174dc | |||
| a53a4b845b | |||
| 9143da5fa8 | |||
| b3cc7cbdb2 | |||
| eef1db0f0a | |||
| 72f5246ce3 | |||
| cb308bb4c7 | |||
| ad93e99158 | |||
| 9d0c3dfa15 | |||
| 2c9602db71 | |||
| ef670fa6da | |||
| 5a6ec39cfd | |||
| e3196e7b50 | |||
| bea69efd12 | |||
| 283ec27ca4 | |||
| a67a6b6c30 | |||
| ccd89c348f | |||
| 478a141498 | |||
| 2497be496d | |||
| 25dd6c07f3 | |||
| eb14236166 | |||
| bbb628243f | |||
| cdc9d03d5b | |||
| e951d319d0 | |||
| d14a45401b | |||
| 655e2879e6 | |||
| e757ef1471 | |||
| 27afa4463d | |||
| 80450c7180 | |||
| c655e0f8c5 | |||
| 5abeeb882b | |||
| b1df6dab27 | |||
| 672e1d991d | |||
| 89b910a8f1 | |||
| 6315ef102a | |||
| 119986fa7e | |||
| 3853b7460c | |||
| e9947dc0fe | |||
| b813660c74 | |||
| 387fb555ac | |||
| f549a7aa79 | |||
| b219e5d68a | |||
| 1f6cf0eafa | |||
| a49eae8155 | |||
| 1c7d085f16 | |||
| cc6eec3608 | |||
| 86fb140414 | |||
| 13cd4d98ba | |||
| 84bc1245a1 | |||
| e1bcde4cf1 | |||
| 3f619bcaac | |||
| f3a85d6b08 | |||
| 596d86a206 | |||
| f2e60b93a3 | |||
| f16a9c767a | |||
| 3a27c87b3f | |||
| 0ed8676066 | |||
| bcefb11e65 | |||
| 75cf8475f5 | |||
| c015cab2f4 | |||
| 3da6584ab8 | |||
| 68f6fd474b | |||
| 614e4e636b | |||
| 370f856725 | |||
| 7382e5f03b | |||
| 5567d4b411 | |||
| e5516d7286 | |||
| fd94e0bd19 | |||
| d0415d3b5e | |||
| c6efa4ab39 | |||
| dedf7fa3a9 | |||
| 4b5927dfff | |||
| cc03f55006 | |||
| 93e1dc598c | |||
| 25f33b830f | |||
| 7d6ef44e21 | |||
| dfa4dbbcbd | |||
| f92c997a50 | |||
| 697c0be9f3 | |||
| 8f146e08d6 | |||
| e6088c79a3 | |||
| e19b8c95fe | |||
| 995b72df05 | |||
| 9954fd1100 | |||
| 2a14a1da01 | |||
| 5a53b648b1 |
@@ -19,7 +19,7 @@ jobs:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.25'
|
||||
go-version: '1.25.9'
|
||||
|
||||
- name: Go Build
|
||||
run: |
|
||||
@@ -45,11 +45,11 @@ jobs:
|
||||
run: govulncheck ./...
|
||||
|
||||
- name: Race Detection
|
||||
run: go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/domain/... ./internal/validation/... -count=1 -timeout 300s
|
||||
run: go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/crypto/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -timeout 300s
|
||||
|
||||
- name: Go Test with Coverage
|
||||
run: |
|
||||
go test ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/integration/... ./internal/connector/issuer/... ./internal/connector/target/... ./internal/connector/notifier/... ./internal/mcp/... ./internal/cli/... ./internal/domain/... ./internal/validation/... -count=1 -cover -coverprofile=coverage.out
|
||||
go test ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/integration/... ./internal/connector/issuer/... ./internal/connector/target/... ./internal/connector/notifier/... ./internal/connector/discovery/... ./internal/crypto/... ./internal/mcp/... ./internal/cli/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -cover -coverprofile=coverage.out
|
||||
|
||||
- name: Check Coverage Thresholds
|
||||
run: |
|
||||
@@ -73,6 +73,13 @@ jobs:
|
||||
MIDDLEWARE_COV=$(go tool cover -func=coverage.out | grep 'internal/api/middleware' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
|
||||
echo "Middleware layer coverage: ${MIDDLEWARE_COV}%"
|
||||
|
||||
# Check crypto package coverage (target: 85%+)
|
||||
# M-8 rationale: encryption primitives are a security-critical gate.
|
||||
# v2 format, key-derivation, fallback, and fail-closed sentinel paths
|
||||
# all need exhaustive coverage to avoid silent regressions (CWE-916 / CWE-329).
|
||||
CRYPTO_COV=$(go tool cover -func=coverage.out | grep 'internal/crypto' | awk '{print $NF}' | sed 's/%//' | awk '{sum+=$1; n++} END {if(n>0) printf "%.1f", sum/n; else print "0"}')
|
||||
echo "Crypto package coverage: ${CRYPTO_COV}%"
|
||||
|
||||
# Fail if thresholds not met
|
||||
if [ "$(echo "$SERVICE_COV < 55" | bc -l)" -eq 1 ]; then
|
||||
echo "::error::Service layer coverage ${SERVICE_COV}% is below 55% threshold"
|
||||
@@ -90,6 +97,10 @@ jobs:
|
||||
echo "::error::Middleware layer coverage ${MIDDLEWARE_COV}% is below 30% threshold"
|
||||
exit 1
|
||||
fi
|
||||
if [ "$(echo "$CRYPTO_COV < 85" | bc -l)" -eq 1 ]; then
|
||||
echo "::error::Crypto package coverage ${CRYPTO_COV}% is below 85% threshold"
|
||||
exit 1
|
||||
fi
|
||||
echo "Coverage thresholds passed!"
|
||||
|
||||
- name: Upload Coverage Report
|
||||
@@ -137,8 +148,34 @@ jobs:
|
||||
with:
|
||||
version: '3.13.0'
|
||||
|
||||
# HTTPS-Everywhere (v2.0.47): the chart fails render when no TLS source is
|
||||
# configured. Every lint/template invocation below must pick exactly one
|
||||
# provisioning mode — see deploy/helm/certctl/templates/_helpers.tpl
|
||||
# (certctl.tls.required) and docs/tls.md.
|
||||
- name: Lint Helm Chart
|
||||
run: helm lint deploy/helm/certctl/
|
||||
run: |
|
||||
helm lint deploy/helm/certctl/ \
|
||||
--set server.tls.existingSecret=certctl-tls-ci
|
||||
|
||||
- name: Template Helm Chart
|
||||
run: helm template certctl deploy/helm/certctl/ > /dev/null
|
||||
- name: Template Helm Chart (existingSecret mode)
|
||||
run: |
|
||||
helm template certctl deploy/helm/certctl/ \
|
||||
--set server.tls.existingSecret=certctl-tls-ci \
|
||||
> /dev/null
|
||||
|
||||
- name: Template Helm Chart (cert-manager mode)
|
||||
run: |
|
||||
helm template certctl deploy/helm/certctl/ \
|
||||
--set server.tls.certManager.enabled=true \
|
||||
--set server.tls.certManager.issuerRef.name=letsencrypt-prod \
|
||||
> /dev/null
|
||||
|
||||
- name: Template Helm Chart (guard fails without TLS)
|
||||
run: |
|
||||
# Inverse test: the chart MUST refuse to render when no TLS source is
|
||||
# configured. If this ever renders successfully, the fail-loud guard
|
||||
# in certctl.tls.required has regressed.
|
||||
if helm template certctl deploy/helm/certctl/ > /dev/null 2>&1; then
|
||||
echo "::error::Helm chart rendered without a TLS source — fail-loud guard regressed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
+292
-43
@@ -7,40 +7,30 @@ on:
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
GO_VERSION: '1.22'
|
||||
# Keep in lock-step with .github/workflows/ci.yml (M-3).
|
||||
GO_VERSION: '1.25.9'
|
||||
IMAGE_NAMESPACE: shankar0123
|
||||
|
||||
jobs:
|
||||
# Cross-compile agent and server binaries for multiple platforms
|
||||
# ----------------------------------------------------------------------
|
||||
# build-binaries (M-3): matrix build every (binary × OS × arch) tuple.
|
||||
# For each tuple we produce: the binary, a SPDX-JSON SBOM, a keyless
|
||||
# Cosign signature + certificate bundle, and a single-line sha256sum
|
||||
# file. All artefacts are uploaded to a workflow-scoped artifact; the
|
||||
# aggregate-checksums job fans them back in for release upload.
|
||||
# ----------------------------------------------------------------------
|
||||
build-binaries:
|
||||
name: Build Cross-Platform Binaries
|
||||
name: Build ${{ matrix.binary }} (${{ matrix.os }}/${{ matrix.arch }})
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
contents: read
|
||||
id-token: write # Cosign keyless OIDC identity token
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# Agent binaries (4 platforms)
|
||||
- os: linux
|
||||
arch: amd64
|
||||
binary: agent
|
||||
- os: linux
|
||||
arch: arm64
|
||||
binary: agent
|
||||
- os: darwin
|
||||
arch: amd64
|
||||
binary: agent
|
||||
- os: darwin
|
||||
arch: arm64
|
||||
binary: agent
|
||||
# Server binaries (2 platforms)
|
||||
- os: linux
|
||||
arch: amd64
|
||||
binary: server
|
||||
- os: linux
|
||||
arch: arm64
|
||||
binary: server
|
||||
|
||||
binary: [agent, server, cli, mcp-server]
|
||||
os: [linux, darwin]
|
||||
arch: [amd64, arm64]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
@@ -51,35 +41,174 @@ jobs:
|
||||
|
||||
- name: Extract version from tag
|
||||
id: version
|
||||
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Build ${{ matrix.binary }} binary (${{ matrix.os }}-${{ matrix.arch }})
|
||||
- name: Build binary
|
||||
id: build
|
||||
env:
|
||||
GOOS: ${{ matrix.os }}
|
||||
GOARCH: ${{ matrix.arch }}
|
||||
CGO_ENABLED: 0
|
||||
CGO_ENABLED: '0'
|
||||
VERSION: ${{ steps.version.outputs.VERSION }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
OUTPUT_NAME="certctl-${{ matrix.binary }}-${{ matrix.os }}-${{ matrix.arch }}"
|
||||
go build -ldflags="-w -s -X main.Version=${{ steps.version.outputs.VERSION }}" \
|
||||
mkdir -p dist
|
||||
go build \
|
||||
-trimpath \
|
||||
-ldflags="-w -s -X main.Version=${VERSION}" \
|
||||
-o "dist/${OUTPUT_NAME}" \
|
||||
"./cmd/${{ matrix.binary }}"
|
||||
ls -lh "dist/${OUTPUT_NAME}"
|
||||
echo "output_name=${OUTPUT_NAME}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Upload binaries to release
|
||||
- name: Generate SBOM (SPDX-JSON)
|
||||
uses: anchore/sbom-action@e22c389904149dbc22b58101806040fa8d37a610 # v0.24.0
|
||||
with:
|
||||
file: dist/${{ steps.build.outputs.output_name }}
|
||||
format: spdx-json
|
||||
output-file: dist/${{ steps.build.outputs.output_name }}.sbom.spdx.json
|
||||
upload-artifact: false
|
||||
upload-release-assets: false
|
||||
|
||||
- name: Install Cosign
|
||||
uses: sigstore/cosign-installer@cad07c2e89fa2edd6e2d7bab4c1aa38e53f76003 # v4.1.1
|
||||
|
||||
- name: Keyless-sign binary with Cosign
|
||||
env:
|
||||
OUTPUT_NAME: ${{ steps.build.outputs.output_name }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Cosign v3.0 (shipped by cosign-installer@v4.1.1 default
|
||||
# cosign-release=v3.0.5) removed --output-signature/--output-certificate
|
||||
# on sign-blob. The replacement is --bundle, which emits a unified
|
||||
# Sigstore bundle (signature + cert chain + Rekor inclusion proof) as
|
||||
# a single .sigstore.json artefact. M-11.
|
||||
cosign sign-blob \
|
||||
--yes \
|
||||
--bundle "dist/${OUTPUT_NAME}.sigstore.json" \
|
||||
"dist/${OUTPUT_NAME}"
|
||||
|
||||
- name: Compute SHA-256 sidecar
|
||||
env:
|
||||
OUTPUT_NAME: ${{ steps.build.outputs.output_name }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
cd dist
|
||||
sha256sum "${OUTPUT_NAME}" > "${OUTPUT_NAME}.sha256"
|
||||
cat "${OUTPUT_NAME}.sha256"
|
||||
|
||||
- name: Upload build artefacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: binary-${{ steps.build.outputs.output_name }}
|
||||
path: |
|
||||
dist/${{ steps.build.outputs.output_name }}
|
||||
dist/${{ steps.build.outputs.output_name }}.sigstore.json
|
||||
dist/${{ steps.build.outputs.output_name }}.sbom.spdx.json
|
||||
dist/${{ steps.build.outputs.output_name }}.sha256
|
||||
if-no-files-found: error
|
||||
retention-days: 7
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# aggregate-checksums (M-3): fan in every matrix artefact, produce a
|
||||
# single checksums.txt (sha256sum format, compatible with `sha256sum
|
||||
# -c`), sign it with Cosign, upload everything to the GitHub Release,
|
||||
# and emit a base64-encoded hash manifest for the SLSA generator.
|
||||
# ----------------------------------------------------------------------
|
||||
aggregate-checksums:
|
||||
name: Aggregate checksums & sign
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-binaries]
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write # Cosign keyless OIDC identity token
|
||||
outputs:
|
||||
hashes: ${{ steps.hashes.outputs.hashes }}
|
||||
steps:
|
||||
- name: Download binary artefacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: binary-*
|
||||
path: artifacts
|
||||
merge-multiple: true
|
||||
|
||||
- name: Aggregate SHA-256 sums
|
||||
id: hashes
|
||||
run: |
|
||||
set -euo pipefail
|
||||
cd artifacts
|
||||
: > checksums.txt
|
||||
for f in certctl-*; do
|
||||
case "$f" in
|
||||
*.sigstore.json|*.sbom.spdx.json|*.sha256|checksums.txt)
|
||||
continue ;;
|
||||
esac
|
||||
sha256sum "$f" >> checksums.txt
|
||||
done
|
||||
echo "=== checksums.txt ==="
|
||||
cat checksums.txt
|
||||
# base64 hashes (single line, no wrapping) for SLSA generator.
|
||||
HASHES=$(base64 -w0 < checksums.txt)
|
||||
echo "hashes=${HASHES}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Install Cosign
|
||||
uses: sigstore/cosign-installer@cad07c2e89fa2edd6e2d7bab4c1aa38e53f76003 # v4.1.1
|
||||
|
||||
- name: Keyless-sign checksums.txt
|
||||
run: |
|
||||
set -euo pipefail
|
||||
cd artifacts
|
||||
# Cosign v3.0 --bundle replaces the removed v2 flag pair
|
||||
# --output-signature / --output-certificate. See M-11.
|
||||
cosign sign-blob \
|
||||
--yes \
|
||||
--bundle checksums.txt.sigstore.json \
|
||||
checksums.txt
|
||||
|
||||
- name: Upload artefacts to GitHub Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: |
|
||||
dist/certctl-agent-*
|
||||
dist/certctl-server-*
|
||||
artifacts/certctl-*
|
||||
artifacts/checksums.txt
|
||||
artifacts/checksums.txt.sigstore.json
|
||||
|
||||
# Build and push Docker images
|
||||
# ----------------------------------------------------------------------
|
||||
# provenance-binaries (M-3): SLSA Level 3 provenance for every binary.
|
||||
# The SLSA generic generator reusable workflow runs in a hermetic
|
||||
# workflow run, producing multiple.intoto.jsonl from the base64 hash
|
||||
# manifest and uploading it as a release asset.
|
||||
# ----------------------------------------------------------------------
|
||||
provenance-binaries:
|
||||
name: SLSA provenance (binaries)
|
||||
needs: [aggregate-checksums]
|
||||
permissions:
|
||||
actions: read
|
||||
id-token: write
|
||||
contents: write
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
with:
|
||||
base64-subjects: "${{ needs.aggregate-checksums.outputs.hashes }}"
|
||||
upload-assets: true
|
||||
provenance-name: multiple.intoto.jsonl
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# build-and-push-docker: push container images to GHCR with native
|
||||
# SLSA L3 provenance (mode=max) and SBOM attestations emitted by
|
||||
# docker/build-push-action@v6, plus a keyless Cosign signature on the
|
||||
# image digest for identity-bound verification. The M-4 proxy-propagation
|
||||
# build-args block is retained verbatim — M-3 only adds supply-chain
|
||||
# steps; it never touches M-4 wiring.
|
||||
# ----------------------------------------------------------------------
|
||||
build-and-push-docker:
|
||||
name: Build & Push Docker Images
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
packages: write
|
||||
id-token: write # Cosign keyless OIDC identity token
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -93,40 +222,90 @@ jobs:
|
||||
|
||||
- name: Extract version from tag
|
||||
id: version
|
||||
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Install Cosign
|
||||
uses: sigstore/cosign-installer@cad07c2e89fa2edd6e2d7bab4c1aa38e53f76003 # v4.1.1
|
||||
|
||||
- name: Build and push server image
|
||||
id: server-push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.REGISTRY }}/shankar0123/certctl-server:${{ steps.version.outputs.VERSION }}
|
||||
${{ env.REGISTRY }}/shankar0123/certctl-server:latest
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/certctl-server:${{ steps.version.outputs.VERSION }}
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/certctl-server:latest
|
||||
# Proxy propagation (M-4, Issue #9) — forwards runner-level proxy
|
||||
# secrets into the Docker build so self-hosted runners behind
|
||||
# corporate proxies can reach public registries. GitHub-hosted
|
||||
# runners don't need proxies, so the secrets are optional and
|
||||
# resolve to empty strings when unset — byte-identical to the
|
||||
# pre-fix behaviour for the public-runner path.
|
||||
build-args: |
|
||||
HTTP_PROXY=${{ secrets.HTTP_PROXY }}
|
||||
HTTPS_PROXY=${{ secrets.HTTPS_PROXY }}
|
||||
NO_PROXY=${{ secrets.NO_PROXY }}
|
||||
# Supply-chain hardening (M-3): emit native SLSA L3 provenance
|
||||
# and SBOM attestations bound to the image manifest.
|
||||
provenance: mode=max
|
||||
sbom: true
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Keyless-sign server image with Cosign
|
||||
env:
|
||||
DIGEST: ${{ steps.server-push.outputs.digest }}
|
||||
IMAGE: ${{ env.REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/certctl-server
|
||||
run: |
|
||||
set -euo pipefail
|
||||
cosign sign --yes "${IMAGE}@${DIGEST}"
|
||||
|
||||
- name: Build and push agent image
|
||||
id: agent-push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile.agent
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.REGISTRY }}/shankar0123/certctl-agent:${{ steps.version.outputs.VERSION }}
|
||||
${{ env.REGISTRY }}/shankar0123/certctl-agent:latest
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/certctl-agent:${{ steps.version.outputs.VERSION }}
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/certctl-agent:latest
|
||||
# Proxy propagation (M-4, Issue #9) — see server-image step for
|
||||
# rationale. Empty secrets resolve to empty build args, leaving
|
||||
# the un-proxied code path byte-identical to the pre-fix tree.
|
||||
build-args: |
|
||||
HTTP_PROXY=${{ secrets.HTTP_PROXY }}
|
||||
HTTPS_PROXY=${{ secrets.HTTPS_PROXY }}
|
||||
NO_PROXY=${{ secrets.NO_PROXY }}
|
||||
# Supply-chain hardening (M-3): emit native SLSA L3 provenance
|
||||
# and SBOM attestations bound to the image manifest.
|
||||
provenance: mode=max
|
||||
sbom: true
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
# Create release notes with all artifacts
|
||||
- name: Keyless-sign agent image with Cosign
|
||||
env:
|
||||
DIGEST: ${{ steps.agent-push.outputs.digest }}
|
||||
IMAGE: ${{ env.REGISTRY }}/${{ env.IMAGE_NAMESPACE }}/certctl-agent
|
||||
run: |
|
||||
set -euo pipefail
|
||||
cosign sign --yes "${IMAGE}@${DIGEST}"
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# create-release: stamp the release body. The actual asset uploads are
|
||||
# handled by aggregate-checksums (binaries, SBOMs, sigs, certs,
|
||||
# checksums.txt + signature) and the SLSA generator (multiple.intoto.jsonl).
|
||||
# ----------------------------------------------------------------------
|
||||
create-release:
|
||||
name: Create Release Notes
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-binaries, build-and-push-docker]
|
||||
needs: [build-binaries, aggregate-checksums, provenance-binaries, build-and-push-docker]
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
@@ -135,7 +314,7 @@ jobs:
|
||||
|
||||
- name: Extract version from tag
|
||||
id: version
|
||||
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Create release with notes
|
||||
uses: softprops/action-gh-release@v2
|
||||
@@ -197,6 +376,76 @@ jobs:
|
||||
|
||||
- **Linux x86_64**: `certctl-server-linux-amd64`
|
||||
- **Linux ARM64**: `certctl-server-linux-arm64`
|
||||
- **macOS x86_64**: `certctl-server-darwin-amd64`
|
||||
- **macOS ARM64 (Apple Silicon)**: `certctl-server-darwin-arm64`
|
||||
|
||||
## CLI & MCP Server Binaries
|
||||
|
||||
The `certctl-cli` (REST API wrapper) and `certctl-mcp-server` (Model Context
|
||||
Protocol bridge) binaries ship for all four platforms as well:
|
||||
|
||||
- `certctl-cli-{linux,darwin}-{amd64,arm64}`
|
||||
- `certctl-mcp-server-{linux,darwin}-{amd64,arm64}`
|
||||
|
||||
## Verifying this release
|
||||
|
||||
Every binary, `checksums.txt`, and container image is signed with Cosign
|
||||
keyless OIDC. Each binary ships with a SPDX-JSON SBOM. Binaries are covered
|
||||
by SLSA Level 3 provenance; container images carry native SLSA L3 provenance
|
||||
and SBOM attestations (docker/build-push-action `provenance: mode=max`,
|
||||
`sbom: true`) in addition to a Cosign signature on the digest.
|
||||
|
||||
**1. Verify SHA-256 checksums:**
|
||||
|
||||
```bash
|
||||
sha256sum -c checksums.txt
|
||||
```
|
||||
|
||||
**2. Verify the Cosign signature on checksums.txt (keyless OIDC):**
|
||||
|
||||
```bash
|
||||
cosign verify-blob \
|
||||
--bundle checksums.txt.sigstore.json \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
checksums.txt
|
||||
```
|
||||
|
||||
Replace `checksums.txt` with any individual binary name to verify that
|
||||
artefact directly (each binary ships with its own `.sigstore.json`
|
||||
bundle, e.g. `cosign verify-blob --bundle certctl-agent-linux-amd64.sigstore.json …`).
|
||||
|
||||
**3. Verify SLSA Level 3 provenance (binaries):**
|
||||
|
||||
```bash
|
||||
slsa-verifier verify-artifact \
|
||||
--provenance-path multiple.intoto.jsonl \
|
||||
--source-uri github.com/shankar0123/certctl \
|
||||
--source-tag ${{ steps.version.outputs.VERSION }} \
|
||||
certctl-agent-linux-amd64
|
||||
```
|
||||
|
||||
**4. Verify container image signature and attestations:**
|
||||
|
||||
```bash
|
||||
IMAGE=ghcr.io/shankar0123/certctl-server:${{ steps.version.outputs.VERSION }}
|
||||
cosign verify \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
|
||||
# SBOM attestation (SPDX-JSON) emitted by docker/build-push-action
|
||||
cosign verify-attestation --type spdxjson \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
|
||||
# SLSA provenance attestation (mode=max)
|
||||
cosign verify-attestation --type slsaprovenance \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
```
|
||||
|
||||
## Helm Chart
|
||||
|
||||
|
||||
+18
-2
@@ -63,12 +63,28 @@ certctl-cli
|
||||
/server
|
||||
/agent
|
||||
/cli
|
||||
/mcp-server
|
||||
|
||||
# Private strategy docs
|
||||
roadmap.md
|
||||
SECURITY_REMEDIATION.md
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
mcp-server
|
||||
|
||||
# Local Go build/module caches (session-scoped, never committed)
|
||||
/.gocache/
|
||||
/.gomodcache/
|
||||
/.gopath/
|
||||
/.gomodcache-gopath/
|
||||
|
||||
# Design scratch files (session-scoped)
|
||||
/.i004-design.md
|
||||
/.i005-design.md
|
||||
|
||||
# HTTPS-Everywhere (M-007) Phase 6: the docker-compose.test.yml tls-init
|
||||
# container writes ca.crt / server.crt / server.key into this directory so
|
||||
# the host-side integration_test.go binary can pin the CA via
|
||||
# CERTCTL_TEST_CA_BUNDLE=./certs/ca.crt. Material is regenerated on every
|
||||
# `docker compose up` and never belongs in git.
|
||||
/deploy/test/certs/
|
||||
|
||||
@@ -6,6 +6,7 @@ run:
|
||||
linters:
|
||||
default: none
|
||||
enable:
|
||||
- contextcheck
|
||||
- govet
|
||||
- staticcheck
|
||||
- unused
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to certctl are documented in this file. Dates use ISO 8601. Versions follow [Semantic Versioning](https://semver.org/).
|
||||
|
||||
## [2.2.0] — 2026-04-19
|
||||
|
||||
### HTTPS Everywhere — The Irony
|
||||
|
||||
> certctl manages other teams' certificates. Until v2.2, it didn't terminate TLS on its own control plane. We treated the server as an internal service sitting behind whatever TLS-terminating infrastructure the operator already owned — reverse proxies, Kubernetes Ingress controllers, service mesh sidecars. Working through an EST coverage-gap audit surfaced this as a credibility problem we wanted to fix head-on: a cert-lifecycle product should ship with HTTPS by default. This release flips that. Self-signed bootstrap for docker-compose demos, operator-supplied Secret for Helm (with optional cert-manager integration), and a one-step cutover with no backward-compat bridge. Out-of-date agents will fail at the TLS handshake layer on upgrade; the upgrade guide walks operators through the roll.
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- **HTTPS-only control plane. The plaintext HTTP listener is gone.** There is no `CERTCTL_TLS_ENABLED=false` escape hatch and no `:8080` fallback. Operators who were running certctl behind their own TLS terminator must either (a) continue doing so and let the downstream TLS terminator talk to certctl's HTTPS listener, or (b) bring their own cert/key and terminate on certctl directly. Either path requires config changes — see `docs/upgrade-to-tls.md` for a one-step cutover.
|
||||
- **Agents reject `CERTCTL_SERVER_URL=http://...` at startup.** This is a pre-flight config validation failure with a fail-loud diagnostic pointing at `docs/upgrade-to-tls.md`. Not a TCP-refused, not a TLS-handshake-error — the agent will not even attempt the network call. Every agent deployment must be reconfigured before upgrading the server.
|
||||
- **CLI and MCP clients require `https://` URLs.** Same pre-flight rejection of plaintext schemes.
|
||||
- **TLS 1.2 is not supported. TLS 1.3 only.** The server's `tls.Config.MinVersion` is pinned to `tls.VersionTLS13`. Any client still negotiating TLS 1.2 will fail at the handshake. Modern curl, Go stdlib, browsers, and Kubernetes tooling all default to 1.3-capable; legacy clients may need an upgrade.
|
||||
- **Helm chart requires a TLS source.** `helm install` without one of `server.tls.existingSecret`, `server.tls.certManager.enabled`, or (for eval only) `server.tls.selfSigned.enabled` fails at template time with a diagnostic pointing at `docs/tls.md`. There is no default-to-plaintext path.
|
||||
|
||||
### Added
|
||||
|
||||
- **Self-signed bootstrap for Docker Compose demos.** A `certctl-tls-init` init container runs before the server on first boot, generates a SAN-valid self-signed cert into `deploy/test/certs/`, and exits. The server mounts the resulting cert/key. Every curl in the demo stack pins against `./deploy/test/certs/ca.crt` with `--cacert`.
|
||||
- **Helm chart TLS provisioning — three modes.** Operator-supplied Secret (`server.tls.existingSecret`), cert-manager integration (`server.tls.certManager.enabled` with issuer selection), or self-signed (`server.tls.selfSigned.enabled` — eval only, not supported for production). Chart templates enforce exactly one is active.
|
||||
- **Hot-reload of TLS cert/key on `SIGHUP`.** Overwrite the cert/key on disk, send `SIGHUP` to the server PID, watch the `slog.Info("tls.reload", ...)` log line, and new TLS connections use the new cert. Failure during reload is logged and does not crash the server; the previous cert remains in use.
|
||||
- **Agent CA-bundle env vars.** `CERTCTL_SERVER_CA_BUNDLE_PATH` points at a PEM file the agent's HTTP client will trust. `CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY` disables verification (development only — the agent logs a loud warning at startup). `install-agent.sh` writes both as commented template lines into the generated `agent.env`.
|
||||
- **Integration test suite runs over HTTPS.** `go test -tags=integration ./deploy/test/...` stands up the full Compose stack, extracts the self-signed CA bundle, and exercises every certctl API over `https://localhost:8443`. All 34 subtests green.
|
||||
- **`docs/tls.md`** — cert provisioning patterns: bring-your-own Secret, cert-manager, self-signed bootstrap, SAN requirements, rotation workflows, SIGHUP reload semantics, troubleshooting.
|
||||
- **`docs/upgrade-to-tls.md`** — one-step cutover guide for existing v2.1 operators. Walks through the agent fleet roll, Helm upgrade sequencing, downgrade-is-not-supported warnings, and cert-provisioning decision tree.
|
||||
|
||||
### Changed
|
||||
|
||||
- `cmd/server/main.go` now calls `http.Server.ListenAndServeTLS(certFile, keyFile)`. The plaintext `ListenAndServe` code path is deleted — `grep -rn "ListenAndServe[^T]" cmd/ internal/` returns zero hits.
|
||||
- All documentation curls (`docs/testing-guide.md`, `docs/quickstart.md`, `deploy/helm/INSTALLATION.md`, `deploy/helm/DEPLOYMENT_GUIDE.md`, `deploy/ENVIRONMENTS.md`, `docs/openapi.md`, migration guides, example READMEs) use `https://localhost:8443` and `--cacert` against the demo stack's bundle.
|
||||
- OpenAPI spec (`api/openapi.yaml`) `servers` blocks default to `https://localhost:8443`.
|
||||
|
||||
### Security
|
||||
|
||||
- TLS 1.3 pinned via `tls.Config.MinVersion = tls.VersionTLS13`.
|
||||
- Plaintext HTTP listener removed entirely — no port 8080, no `Upgrade-Insecure-Requests`, no HSTS-required redirect dance. There is only one port: 8443, TLS 1.3.
|
||||
- `grep -rn "http://" cmd/ internal/` returns zero hits outside test fixtures and the agent-side URL-scheme rejection error message.
|
||||
|
||||
### Upgrade Notes
|
||||
|
||||
Read `docs/upgrade-to-tls.md` before upgrading. The short version:
|
||||
|
||||
1. Pick a TLS source — bring-your-own cert, cert-manager, or self-signed bootstrap.
|
||||
2. Upgrade the server with TLS configured. First boot over HTTPS.
|
||||
3. Roll the agent fleet: set `CERTCTL_SERVER_URL=https://...` and, if using a private CA, `CERTCTL_SERVER_CA_BUNDLE_PATH`. Old agents will fail loud at startup — expected.
|
||||
4. Roll CLI/MCP clients the same way.
|
||||
|
||||
There is no backward-compat bridge. There is no dual-listener mode. The cutover is one step.
|
||||
+30
-4
@@ -3,17 +3,43 @@
|
||||
# Stage 1: Build frontend
|
||||
FROM node:20-alpine AS frontend
|
||||
|
||||
# Proxy propagation (M-4, Issue #9) — defaulted to empty so un-proxied builds
|
||||
# behave identically to the pre-fix tree. When `HTTP_PROXY`/`HTTPS_PROXY`/
|
||||
# `NO_PROXY` are forwarded via `docker build --build-arg` (or compose
|
||||
# `build.args`), they are re-exported as ENV with both upper- and lower-case
|
||||
# names because npm/apk/curl read the lowercase variants while Go, Node, and
|
||||
# most HTTP libraries read the uppercase ones.
|
||||
ARG HTTP_PROXY=
|
||||
ARG HTTPS_PROXY=
|
||||
ARG NO_PROXY=
|
||||
ENV HTTP_PROXY=${HTTP_PROXY} \
|
||||
HTTPS_PROXY=${HTTPS_PROXY} \
|
||||
NO_PROXY=${NO_PROXY} \
|
||||
http_proxy=${HTTP_PROXY} \
|
||||
https_proxy=${HTTPS_PROXY} \
|
||||
no_proxy=${NO_PROXY}
|
||||
|
||||
WORKDIR /app/web
|
||||
|
||||
COPY web/package.json web/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY web/ .
|
||||
RUN npm run build
|
||||
RUN npm ci --include=dev || npm ci --include=dev && \
|
||||
node_modules/.bin/tsc --version && \
|
||||
npm run build
|
||||
|
||||
# Stage 2: Build Go binary
|
||||
FROM golang:1.25-alpine AS builder
|
||||
|
||||
# Proxy propagation (M-4, Issue #9) — see Stage 1 rationale.
|
||||
ARG HTTP_PROXY=
|
||||
ARG HTTPS_PROXY=
|
||||
ARG NO_PROXY=
|
||||
ENV HTTP_PROXY=${HTTP_PROXY} \
|
||||
HTTPS_PROXY=${HTTPS_PROXY} \
|
||||
NO_PROXY=${NO_PROXY} \
|
||||
http_proxy=${HTTP_PROXY} \
|
||||
https_proxy=${HTTPS_PROXY} \
|
||||
no_proxy=${NO_PROXY}
|
||||
|
||||
RUN apk add --no-cache git ca-certificates tzdata
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@@ -2,6 +2,22 @@
|
||||
# Stage 1: Build
|
||||
FROM golang:1.25-alpine AS builder
|
||||
|
||||
# Proxy propagation (M-4, Issue #9) — defaulted to empty so un-proxied builds
|
||||
# behave identically to the pre-fix tree. When `HTTP_PROXY`/`HTTPS_PROXY`/
|
||||
# `NO_PROXY` are forwarded via `docker build --build-arg` (or compose
|
||||
# `build.args`), they are re-exported as ENV with both upper- and lower-case
|
||||
# names because apk and curl read the lowercase variants while Go reads the
|
||||
# uppercase ones.
|
||||
ARG HTTP_PROXY=
|
||||
ARG HTTPS_PROXY=
|
||||
ARG NO_PROXY=
|
||||
ENV HTTP_PROXY=${HTTP_PROXY} \
|
||||
HTTPS_PROXY=${HTTPS_PROXY} \
|
||||
NO_PROXY=${NO_PROXY} \
|
||||
http_proxy=${HTTP_PROXY} \
|
||||
https_proxy=${HTTPS_PROXY} \
|
||||
no_proxy=${NO_PROXY}
|
||||
|
||||
RUN apk add --no-cache git ca-certificates
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@@ -6,13 +6,20 @@ Licensor: Shankar Reddy
|
||||
Licensed Work: certctl
|
||||
The Licensed Work is (c) 2026 Shankar Reddy.
|
||||
Additional Use Grant: You may make use of the Licensed Work, provided that
|
||||
you may not use the Licensed Work for a Certificate
|
||||
Management Service. A "Certificate Management Service"
|
||||
is a commercial offering that allows third parties
|
||||
(other than your employees and contractors acting on
|
||||
your behalf) to access and/or use the Licensed Work's
|
||||
certificate lifecycle management functionality as part
|
||||
of a hosted or managed service.
|
||||
you may not use the Licensed Work for a Commercial
|
||||
Certificate Service. A "Commercial Certificate Service"
|
||||
is any product, service, or offering in which a third
|
||||
party (other than your employees and contractors
|
||||
acting on your behalf) accesses, uses, or benefits
|
||||
from the Licensed Work's certificate management
|
||||
functionality — including but not limited to lifecycle
|
||||
management, discovery, monitoring, alerting, renewal
|
||||
automation, deployment, and revocation — as part of
|
||||
or in connection with an offering for which
|
||||
compensation is received. This restriction applies
|
||||
regardless of whether the Licensed Work is hosted,
|
||||
managed, embedded, bundled, or integrated with
|
||||
another product or service.
|
||||
|
||||
Change Date: March 14, 2033
|
||||
|
||||
|
||||
@@ -36,83 +36,101 @@ gantt
|
||||
47 days :crit, 2020-01-01, 47d
|
||||
```
|
||||
|
||||
> **Actively maintained — shipping weekly.** Found something? [Open a GitHub issue](https://github.com/shankar0123/certctl/issues) — issues get triaged same-day. CI runs 1,554+ tests with race detection, static analysis, and vulnerability scanning on every commit.
|
||||
> **Actively maintained — shipping weekly.** Found something? [Open a GitHub issue](https://github.com/shankar0123/certctl/issues) — issues get triaged same-day. CI runs the full test suite with race detection, static analysis, and vulnerability scanning on every commit.
|
||||
|
||||
## Why certctl Exists
|
||||
**Ready to try it?** Jump to the [Quick Start](#quick-start) — you'll have a running dashboard in under 5 minutes.
|
||||
|
||||
Certificate lifecycle tooling today falls into two camps: expensive enterprise platforms (Venafi, Keyfactor, Sectigo) that cost six figures and take months to deploy, or single-purpose tools (cert-manager, certbot) that handle one slice of the problem. If you run a mixed infrastructure — some NGINX, some Apache, a few HAProxy nodes, IIS on Windows, maybe an F5 — and you need to manage certificates from multiple CAs, there's nothing self-hosted that covers the full lifecycle without vendor lock-in.
|
||||
## Documentation
|
||||
|
||||
certctl fills that gap. It's **CA-agnostic** — plug in any certificate authority: Let's Encrypt via ACME, Smallstep step-ca, HashiCorp Vault PKI, DigiCert CertCentral, your enterprise ADCS via sub-CA mode, or any custom CA through a shell script adapter. Run multiple issuers simultaneously for different certificate types.
|
||||
|
||||
It's **target-agnostic**. Agents deploy certificates to NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, and IIS (local PowerShell or remote WinRM) — all using the same pluggable connector model. The control plane never initiates outbound connections — agents poll for work, which means certctl works behind firewalls, across network zones, and in air-gapped environments.
|
||||
|
||||
For a detailed comparison with CertKit, KeyTalk, and enterprise platforms, see [Why certctl?](docs/why-certctl.md)
|
||||
|
||||
## Who Is This For
|
||||
|
||||
**Platform engineering and DevOps teams** managing 10–500+ certificates across mixed infrastructure who need automated renewal, deployment, and a single dashboard for visibility. If you're currently running certbot cron jobs, manually renewing certs, or stitching together scripts — certctl replaces all of that.
|
||||
|
||||
**Security and compliance teams** who need an immutable audit trail, certificate ownership tracking, policy enforcement, and evidence for SOC 2, PCI-DSS 4.0, or NIST SP 800-57 audits.
|
||||
|
||||
**Small teams without enterprise budgets** who need the lifecycle automation that Venafi and Keyfactor provide but can't justify six-figure licensing for a 50-server environment.
|
||||
|
||||
## What It Does
|
||||
|
||||
- **Certificates renew and deploy themselves.** The scheduler monitors expiration, creates renewal jobs, issues certificates through your CA, and deploys them to target servers — all without human intervention. ACME ARI (RFC 9702) lets your CA tell certctl exactly when to renew.
|
||||
|
||||
- **You see everything in one place.** A 25-page operational dashboard shows every certificate across every server: status, ownership, expiration timeline, deployment history with TLS verification, discovery triage, and real-time agent fleet health. Bulk operations (renew, revoke, reassign) work across selections.
|
||||
|
||||
- **Private keys never leave your servers.** Agents generate ECDSA P-256 keys locally and submit only the CSR. The control plane never touches private keys. Post-deployment TLS verification confirms the right certificate is actually being served.
|
||||
|
||||
- **Discover what you don't know about.** Agents scan filesystems for existing PEM/DER certificates. The network scanner probes TLS endpoints across CIDR ranges without requiring agents. Both feed into a triage workflow where you claim, dismiss, or import discovered certificates.
|
||||
|
||||
- **Everything is auditable.** Immutable append-only audit trail records every lifecycle action, every API call, and every approval decision. Certificate digest emails deliver daily briefings. Prometheus metrics endpoint for Grafana dashboards.
|
||||
|
||||
- **Multiple interfaces for different workflows.** REST API (97 endpoints) for automation, CLI for scripting, MCP server for AI assistants (Claude, Cursor, Windsurf), EST server (RFC 7030) for device enrollment, Helm chart for Kubernetes, and the web dashboard for day-to-day operations.
|
||||
|
||||
For the full capability breakdown — revocation infrastructure (CRL + OCSP), policy engine, certificate profiles, S/MIME support, approval workflows, and more — see the [Feature Inventory](docs/features.md).
|
||||
| Guide | Description |
|
||||
|-------|-------------|
|
||||
| [Why certctl?](docs/why-certctl.md) | How certctl compares to ACME clients, agent-based SaaS, and enterprise platforms |
|
||||
| [Concepts](docs/concepts.md) | TLS certificates explained from scratch — for beginners who know nothing about certs |
|
||||
| [Quick Start](docs/quickstart.md) | 5-minute setup — dashboard, API, CLI, discovery, stakeholder demo flow |
|
||||
| [Docker Compose Environments](deploy/ENVIRONMENTS.md) | Service-by-service walkthrough of all 4 compose files, env var reference |
|
||||
| [Deployment Examples](docs/examples.md) | 5 turnkey scenarios (ACME+NGINX, wildcard DNS-01, private CA, step-ca, multi-issuer) with migration guides |
|
||||
| [Advanced Demo](docs/demo-advanced.md) | Issue a certificate end-to-end with technical deep-dives |
|
||||
| [Architecture](docs/architecture.md) | System design, data flow diagrams, security model |
|
||||
| [Feature Inventory](docs/features.md) | Complete reference of all capabilities, API endpoints, and configuration |
|
||||
| [Connector Reference](docs/connectors.md) | Configuration for all issuer, target, and notifier connectors |
|
||||
| [MCP Server](docs/mcp.md) | AI integration via Model Context Protocol — setup, available tools, examples |
|
||||
| [OpenAPI 3.1 Spec](docs/openapi.md) | API reference guide with endpoint overview ([raw spec](api/openapi.yaml)) |
|
||||
| [Compliance Mapping](docs/compliance.md) | SOC 2 Type II, PCI-DSS 4.0, NIST SP 800-57 alignment guides |
|
||||
| [Migrate from certbot](docs/migrate-from-certbot.md) | Step-by-step migration from certbot cron jobs to certctl |
|
||||
| [Migrate from acme.sh](docs/migrate-from-acmesh.md) | Migration guide for acme.sh users, DNS hook compatibility |
|
||||
| [certctl for cert-manager users](docs/certctl-for-cert-manager-users.md) | How certctl complements cert-manager for mixed infrastructure |
|
||||
| [Test Environment](docs/test-env.md) | Docker Compose test environment with real CA backends |
|
||||
| [Testing Guide](docs/testing-guide.md) | Comprehensive test procedures, smoke tests, and release sign-off checklist |
|
||||
|
||||
## Supported Integrations
|
||||
|
||||
### Certificate Issuers
|
||||
| Issuer | Status | Type |
|
||||
|--------|--------|------|
|
||||
| Local CA (self-signed + sub-CA) | Implemented | `GenericCA` |
|
||||
| ACME v2 (Let's Encrypt, Sectigo) | Implemented (HTTP-01 + DNS-01 + DNS-PERSIST-01) | `ACME` |
|
||||
| ACME EAB (ZeroSSL, Google Trust) | Implemented (auto-fetch EAB from ZeroSSL) | `ACME` |
|
||||
| step-ca | Implemented | `StepCA` |
|
||||
| OpenSSL / Custom CA | Implemented | `OpenSSL` |
|
||||
| Vault PKI | Beta | `VaultPKI` |
|
||||
| DigiCert CertCentral | Beta | `DigiCert` |
|
||||
| Sectigo SCM | Beta | `Sectigo` |
|
||||
|
||||
**Vault PKI, DigiCert, and Sectigo connectors are in beta.** If you hit any bugs or unexpected behavior, please [open a GitHub issue](https://github.com/shankar0123/certctl/issues) -- we're actively testing these and want to hear from real users.
|
||||
| Issuer | Type | Notes |
|
||||
|--------|------|-------|
|
||||
| Local CA (self-signed + sub-CA) | `GenericCA` | Sub-CA mode chains to enterprise root (ADCS, etc.) |
|
||||
| ACME v2 (Let's Encrypt, ZeroSSL, etc.) | `ACME` | HTTP-01, DNS-01, DNS-PERSIST-01 challenges. EAB auto-fetch from ZeroSSL. Profile selection (`tlsserver`, `shortlived`). |
|
||||
| step-ca (Smallstep) | `StepCA` | JWK provisioner auth, issuance + renewal + revocation |
|
||||
| OpenSSL / Custom CA | `OpenSSL` | Shell script adapter — any CA with a CLI |
|
||||
| HashiCorp Vault PKI | `VaultPKI` | Token auth, synchronous issuance, CRL/OCSP delegated to Vault |
|
||||
| DigiCert CertCentral | `DigiCert` | Async order model, OV/EV support, PEM bundle parsing |
|
||||
| Sectigo SCM | `Sectigo` | 3-header auth, DV/OV/EV, collect-not-ready graceful handling |
|
||||
| Google Cloud CAS | `GoogleCAS` | OAuth2 service account, synchronous issuance, CA pool selection |
|
||||
| AWS ACM Private CA | `AWSACMPCA` | Synchronous issuance, configurable signing algorithm/template ARN |
|
||||
| Entrust Certificate Services | `Entrust` | mTLS client certificate auth, synchronous/approval-pending issuance |
|
||||
| GlobalSign Atlas HVCA | `GlobalSign` | mTLS + API key/secret dual auth, serial-based tracking |
|
||||
| EJBCA (Keyfactor) | `EJBCA` | Dual auth (mTLS or OAuth2), self-hosted open-source CA |
|
||||
|
||||
**Note:** ADCS integration is handled via the Local CA's sub-CA mode — certctl operates as a subordinate CA with its signing certificate issued by ADCS. Any CA with a shell-accessible signing interface can be integrated today via the OpenSSL/Custom CA connector.
|
||||
**Note:** ADCS integration is handled via the Local CA's sub-CA mode — certctl operates as a subordinate CA with its signing certificate issued by ADCS. Any CA with a shell-accessible signing interface can be integrated via the OpenSSL/Custom CA connector.
|
||||
|
||||
### Deployment Targets
|
||||
| Target | Status | Type |
|
||||
|--------|--------|------|
|
||||
| NGINX | Implemented | `NGINX` |
|
||||
| Apache httpd | Implemented | `Apache` |
|
||||
| HAProxy | Implemented | `HAProxy` |
|
||||
| Traefik | Implemented | `Traefik` |
|
||||
| Caddy | Implemented | `Caddy` |
|
||||
| Envoy | Implemented | `Envoy` |
|
||||
| Postfix | Implemented | `Postfix` |
|
||||
| Dovecot | Implemented | `Dovecot` |
|
||||
| Microsoft IIS | Implemented (local + WinRM) | `IIS` |
|
||||
| F5 BIG-IP | Interface only | `F5` |
|
||||
|
||||
| Target | Type | Notes |
|
||||
|--------|------|-------|
|
||||
| NGINX | `NGINX` | File write, config validation, reload |
|
||||
| Apache httpd | `Apache` | Separate cert/chain/key files, configtest, graceful reload |
|
||||
| HAProxy | `HAProxy` | Combined PEM file, validate, reload |
|
||||
| Traefik | `Traefik` | File provider deployment, auto-reload via filesystem watch |
|
||||
| Caddy | `Caddy` | Dual-mode: admin API hot-reload or file-based |
|
||||
| Envoy | `Envoy` | File-based with optional SDS JSON config |
|
||||
| Postfix | `Postfix` | Mail server TLS, pairs with S/MIME support |
|
||||
| Dovecot | `Dovecot` | Mail server TLS, pairs with S/MIME support |
|
||||
| Microsoft IIS | `IIS` | Local PowerShell or remote WinRM, PEM→PFX, SNI support |
|
||||
| F5 BIG-IP | `F5` | iControl REST via proxy agent, transaction-based atomic updates |
|
||||
| SSH (Agentless) | `SSH` | SFTP cert/key deployment to any Linux/Unix server |
|
||||
| Windows Certificate Store | `WinCertStore` | PowerShell Import-PfxCertificate, configurable store/location |
|
||||
| Java Keystore | `JavaKeystore` | PEM→PKCS#12→keytool pipeline, JKS and PKCS12 formats |
|
||||
| Kubernetes Secrets | `KubernetesSecrets` | `kubernetes.io/tls` Secrets, in-cluster or kubeconfig auth |
|
||||
|
||||
### Enrollment Protocols
|
||||
|
||||
| Protocol | Standard | Use Case |
|
||||
|----------|----------|----------|
|
||||
| EST (Enrollment over Secure Transport) | RFC 7030 | Device enrollment, WiFi/802.1X, IoT |
|
||||
| SCEP (Simple Certificate Enrollment Protocol) | RFC 8894 | MDM platforms (Jamf, Intune), network devices |
|
||||
| ACME v2 | RFC 8555 | Public CA automated issuance (Let's Encrypt, ZeroSSL) |
|
||||
| ACME ARI (Renewal Information) | RFC 9773 | CA-directed renewal timing — the CA tells you when to renew |
|
||||
|
||||
### Standards & Revocation
|
||||
|
||||
| Capability | Standard | Notes |
|
||||
|------------|----------|-------|
|
||||
| DER-encoded X.509 CRL | RFC 5280 | Per-issuer, signed by issuing CA, 24h validity |
|
||||
| Embedded OCSP responder | RFC 6960 | Good/revoked/unknown status per issuer |
|
||||
| S/MIME certificates | RFC 8551 | Email protection EKU, adaptive KeyUsage flags |
|
||||
| Certificate export | — | PEM (JSON/file) and PKCS#12 formats |
|
||||
| ACME DNS-PERSIST-01 | IETF draft | Standing validation record, no per-renewal DNS updates |
|
||||
|
||||
### Notifiers
|
||||
| Notifier | Status | Type |
|
||||
|----------|--------|------|
|
||||
| Email (SMTP) | Implemented | `Email` |
|
||||
| Webhooks | Implemented | `Webhook` |
|
||||
| Slack | Implemented | `Slack` |
|
||||
| Microsoft Teams | Implemented | `Teams` |
|
||||
| PagerDuty | Implemented | `PagerDuty` |
|
||||
| OpsGenie | Implemented | `OpsGenie` |
|
||||
|
||||
| Notifier | Type |
|
||||
|----------|------|
|
||||
| Email (SMTP) | `Email` |
|
||||
| Webhooks | `Webhook` |
|
||||
| Slack | `Slack` |
|
||||
| Microsoft Teams | `Teams` |
|
||||
| PagerDuty | `PagerDuty` |
|
||||
| OpsGenie | `OpsGenie` |
|
||||
|
||||
All connectors are pluggable — build your own by implementing the [connector interface](docs/connectors.md).
|
||||
|
||||
@@ -120,32 +138,55 @@ All connectors are pluggable — build your own by implementing the [connector i
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-dashboard.png"><img src="docs/screenshots/v2-dashboard.png" width="270" alt="Dashboard"></a><br><b>Dashboard</b><br><sub>Stats, expiration heatmap, renewal trends</sub></td>
|
||||
<td><a href="docs/screenshots/v2-certificates.png"><img src="docs/screenshots/v2-certificates.png" width="270" alt="Certificates"></a><br><b>Certificates</b><br><sub>Inventory with status, owner, team filters</sub></td>
|
||||
<td><a href="docs/screenshots/v2-agents.png"><img src="docs/screenshots/v2-agents.png" width="270" alt="Agents"></a><br><b>Agents</b><br><sub>Fleet health, OS/arch, IP, version</sub></td>
|
||||
<td><a href="docs/screenshots/v2-dashboard.png"><img src="docs/screenshots/v2-dashboard.png" width="400" alt="Dashboard"></a><br><b>Dashboard</b><br><sub>Stats, expiration heatmap, renewal trends, issuance rate</sub></td>
|
||||
<td><a href="docs/screenshots/v2-certificates.png"><img src="docs/screenshots/v2-certificates.png" width="400" alt="Certificates"></a><br><b>Certificates</b><br><sub>Inventory with bulk ops, status filters, owner/team columns</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-fleet.png"><img src="docs/screenshots/v2-fleet.png" width="270" alt="Fleet Overview"></a><br><b>Fleet Overview</b><br><sub>OS distribution, status breakdown</sub></td>
|
||||
<td><a href="docs/screenshots/v2-jobs.png"><img src="docs/screenshots/v2-jobs.png" width="270" alt="Jobs"></a><br><b>Jobs</b><br><sub>Issuance, renewal, deployment queue</sub></td>
|
||||
<td><a href="docs/screenshots/v2-notifications.png"><img src="docs/screenshots/v2-notifications.png" width="270" alt="Notifications"></a><br><b>Notifications</b><br><sub>Expiration warnings, renewal results</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-policies.png"><img src="docs/screenshots/v2-policies.png" width="270" alt="Policies"></a><br><b>Policies</b><br><sub>Ownership, lifetime, renewal rules</sub></td>
|
||||
<td><a href="docs/screenshots/v2-profiles.png"><img src="docs/screenshots/v2-profiles.png" width="270" alt="Profiles"></a><br><b>Profiles</b><br><sub>Key types, max TTL, crypto constraints</sub></td>
|
||||
<td><a href="docs/screenshots/v2-issuers.png"><img src="docs/screenshots/v2-issuers.png" width="270" alt="Issuers"></a><br><b>Issuers</b><br><sub>Local CA, ACME, step-ca, Vault PKI, DigiCert</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-targets.png"><img src="docs/screenshots/v2-targets.png" width="270" alt="Targets"></a><br><b>Targets</b><br><sub>NGINX, Apache, HAProxy, Traefik, Caddy, IIS deployment</sub></td>
|
||||
<td><a href="docs/screenshots/v2-owners.png"><img src="docs/screenshots/v2-owners.png" width="270" alt="Owners"></a><br><b>Owners</b><br><sub>Cert ownership with team assignment</sub></td>
|
||||
<td><a href="docs/screenshots/v2-teams.png"><img src="docs/screenshots/v2-teams.png" width="270" alt="Teams"></a><br><b>Teams</b><br><sub>Org grouping for notification routing</sub></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="docs/screenshots/v2-agent-groups.png"><img src="docs/screenshots/v2-agent-groups.png" width="270" alt="Agent Groups"></a><br><b>Agent Groups</b><br><sub>Dynamic grouping by OS, arch, CIDR</sub></td>
|
||||
<td><a href="docs/screenshots/v2-audit-trail.png"><img src="docs/screenshots/v2-audit-trail.png" width="270" alt="Audit Trail"></a><br><b>Audit Trail</b><br><sub>Immutable log, CSV/JSON export</sub></td>
|
||||
<td><a href="docs/screenshots/v2-short-lived.png"><img src="docs/screenshots/v2-short-lived.png" width="270" alt="Short-Lived"></a><br><b>Short-Lived Creds</b><br><sub>Ephemeral certs with live TTL countdown</sub></td>
|
||||
<td><a href="docs/screenshots/v2-issuers.png"><img src="docs/screenshots/v2-issuers.png" width="400" alt="Issuers"></a><br><b>Issuers</b><br><sub>Catalog with 10 CA types, GUI config, test connection</sub></td>
|
||||
<td><a href="docs/screenshots/v2-jobs.png"><img src="docs/screenshots/v2-jobs.png" width="400" alt="Jobs"></a><br><b>Jobs</b><br><sub>Issuance, renewal, deployment queue with approval workflow</sub></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
**[See all screenshots →](docs/screenshots/)**
|
||||
|
||||
## Why certctl
|
||||
|
||||
Certificate lifecycle tooling falls into two camps: enterprise platforms (Venafi, Keyfactor) that cost six figures and take months to deploy, or single-purpose tools (certbot, cert-manager) that handle one slice of the problem. certctl fills the gap — full lifecycle automation, self-hosted, free, CA-agnostic, and target-agnostic. If you're running certbot cron jobs, manually renewing certs, or stitching together scripts across mixed infrastructure, certctl replaces all of that.
|
||||
|
||||
Built for **platform engineering and DevOps teams** managing 10–500+ certificates, **security and compliance teams** who need audit trails and policy enforcement for SOC 2, PCI-DSS 4.0, or NIST SP 800-57 ([compliance mapping included](docs/compliance.md)), and **small teams without enterprise budgets** who need Venafi-grade automation for a 50-server environment. For a detailed comparison, see [Why certctl?](docs/why-certctl.md)
|
||||
|
||||
**Architecture.** Go 1.25 control plane with handler→service→repository layering, PostgreSQL 16 backend (21 tables), and a pull-only deployment model — the server never initiates outbound connections. Agents poll for work. For network appliances and agentless servers, a proxy agent in the same network zone handles deployment via the target's API (WinRM, iControl REST, SSH/SFTP). Background scheduler runs 7 loops: renewal with ARI integration (1h), job processing (30s), agent health (2m), notifications (1m), short-lived cert expiry (30s), network scanning (6h), certificate digest (24h). See [Architecture Guide](docs/architecture.md) for full system diagrams.
|
||||
|
||||
**Security-first.** Agents generate ECDSA P-256 keys locally — private keys never touch the control plane. API key auth enforced by default with SHA-256 hashing and constant-time comparison. CORS deny-by-default. Shell injection prevention on all connector scripts. SSRF protection (reserved IP filtering) on the network scanner. Atomic idempotency guards on scheduler loops. Issuer and target credentials encrypted at rest with AES-256-GCM. Every API call recorded to an immutable audit trail with actor attribution, body hash, and latency tracking. CI runs race detection, 11 linters, and vulnerability scanning on every commit.
|
||||
|
||||
**Key design decisions.** TEXT primary keys — human-readable prefixed IDs (`mc-api-prod`, `t-platform`, `o-alice`) so you can identify resources at a glance in logs and queries. Idempotent migrations (`IF NOT EXISTS`, `ON CONFLICT DO NOTHING`) safe for repeated execution. Dynamic configuration via GUI with AES-256-GCM encrypted credential storage and env var backward compatibility. Handlers define their own service interfaces for clean dependency inversion.
|
||||
|
||||
## What It Does
|
||||
|
||||
**Automated lifecycle.** Certificates renew and deploy themselves. The scheduler monitors expiration, issues through your CA, and deploys to targets — zero human intervention. ACME ARI (RFC 9773) lets the CA direct renewal timing. Ready for 47-day (SC-081v3) and 6-day (Let's Encrypt shortlived) certificate lifetimes.
|
||||
|
||||
**Operational dashboard.** 26-page GUI covers the entire lifecycle: certificate inventory with bulk ops, deployment timeline with rollback, discovery triage, network scan management, agent fleet health, short-lived credential countdown, approval workflows, and observability metrics. Configure issuers and targets from the dashboard — no env var editing, no server restarts.
|
||||
|
||||
**Private keys stay on your servers.** Agents generate ECDSA P-256 keys locally, submit only the CSR. The control plane never touches private keys. After deployment, agents probe the live TLS endpoint and compare SHA-256 fingerprints to confirm the right certificate is actually being served.
|
||||
|
||||
**Discovery.** Agents scan filesystems for existing PEM/DER certificates. The network scanner probes TLS endpoints across CIDR ranges without agents. Cloud discovery finds certificates in AWS Secrets Manager, Azure Key Vault, and GCP Secret Manager. Continuous TLS health monitoring tracks endpoint status (healthy/degraded/down/cert_mismatch) with configurable thresholds and historical probe data. All discovery modes feed into a unified triage workflow — claim, dismiss, or import what you find.
|
||||
|
||||
**Policy engine.** Certificate profiles constrain key types, max TTL, and EKUs — with crypto policy enforcement that validates every CSR against profile rules before it reaches the issuer. MaxTTL caps are enforced per issuer connector. Approval workflows pause jobs for human review. Ownership tracking routes notifications to the right team. Agent groups match devices by OS, architecture, IP CIDR, and version.
|
||||
|
||||
**Enrollment protocols.** EST server (RFC 7030) for device and WiFi enrollment. SCEP server (RFC 8894) for MDM platforms and network devices. S/MIME issuance with email protection EKU.
|
||||
|
||||
**Revocation.** Single and bulk revocation (by profile, owner, agent, or issuer). DER-encoded X.509 CRL per issuer, signed by the issuing CA. Embedded OCSP responder. RFC 5280 reason codes. Short-lived certs (TTL < 1 hour) are exempt — expiry is sufficient revocation.
|
||||
|
||||
**Audit and observability.** Immutable append-only audit trail records every lifecycle action, every API call, and every approval decision. Prometheus metrics endpoint. Scheduled certificate digest emails. Continuous endpoint health monitoring with state machine transitions and real-time alerts.
|
||||
|
||||
**Notifications.** Slack, Teams, PagerDuty, OpsGenie, SMTP, webhooks. Routed by certificate owner. Daily digest emails with stats and expiring certs.
|
||||
|
||||
**Multiple interfaces.** REST API (111 routes), CLI (12 commands), MCP server (80 tools for Claude, Cursor, Windsurf), Helm chart, web dashboard. Certificate export in PEM and PKCS#12.
|
||||
|
||||
**First-run onboarding.** Wizard guides you through connecting a CA, deploying an agent, and issuing your first certificate. Or start with the pre-populated demo — 32 certificates, 10 issuers, 180 days of history.
|
||||
|
||||
For the complete capability breakdown, see the [Feature Inventory](docs/features.md).
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Docker Compose (Recommended)
|
||||
@@ -156,18 +197,23 @@ cd certctl
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
Wait ~30 seconds, then open **http://localhost:8443** in your browser.
|
||||
Wait ~30 seconds, then open **https://localhost:8443** in your browser. (The shipped `docker-compose.yml` self-signs a cert via the `certctl-tls-init` init container on first boot — accept the browser warning for the demo, or feed the generated `ca.crt` to your client.) The onboarding wizard walks you through connecting a CA, deploying an agent, and issuing your first certificate.
|
||||
|
||||
The dashboard comes pre-loaded with 32 demo certificates across 7 issuers, 8 agents, 180 days of job history, discovery scan data, and network scan targets — a realistic snapshot of a certificate inventory that looks like it's been running for months.
|
||||
**Want a pre-populated demo instead?** Add the demo override to see 32 certificates across 10 issuers, 8 agents, and 180 days of realistic history:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8443/health
|
||||
# {"status":"healthy"}
|
||||
|
||||
curl -s http://localhost:8443/api/v1/certificates | jq '.total'
|
||||
# 32
|
||||
docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.demo.yml up -d --build
|
||||
```
|
||||
|
||||
The `deploy/` directory has four compose files: `docker-compose.yml` (base platform), `docker-compose.demo.yml` (demo data overlay), `docker-compose.dev.yml` (PgAdmin + debug logging), and `docker-compose.test.yml` (standalone integration tests with real CA backends). See the [Docker Compose Environments Guide](deploy/ENVIRONMENTS.md) for a service-by-service walkthrough, or the [Quick Start](docs/quickstart.md#docker-compose-environments) for a summary.
|
||||
|
||||
```bash
|
||||
curl --cacert $(docker compose -f deploy/docker-compose.yml exec -T certctl-server cat /etc/certctl/tls/ca.crt) https://localhost:8443/health
|
||||
# {"status":"healthy"}
|
||||
```
|
||||
|
||||
The control plane is HTTPS-only (TLS 1.3, no plaintext listener). See [`docs/tls.md`](docs/tls.md) for cert provisioning patterns and [`docs/upgrade-to-tls.md`](docs/upgrade-to-tls.md) if you're upgrading from a pre-v2.2 release.
|
||||
|
||||
### Agent Install (One-Liner)
|
||||
|
||||
```bash
|
||||
@@ -176,6 +222,16 @@ curl -sSL https://raw.githubusercontent.com/shankar0123/certctl/master/install-a
|
||||
|
||||
Detects your OS and architecture, downloads the binary, configures systemd (Linux) or launchd (macOS), and starts the agent. See [install-agent.sh](install-agent.sh) for details.
|
||||
|
||||
### Helm Chart (Kubernetes)
|
||||
|
||||
```bash
|
||||
helm install certctl deploy/helm/certctl/ \
|
||||
--set server.apiKey=your-api-key \
|
||||
--set postgres.password=your-db-password
|
||||
```
|
||||
|
||||
Production-ready chart with Server Deployment, PostgreSQL StatefulSet, Agent DaemonSet, health probes, security contexts (non-root, read-only rootfs), and optional Ingress. See [values.yaml](deploy/helm/certctl/values.yaml) for all configuration options.
|
||||
|
||||
### Docker Pull
|
||||
|
||||
```bash
|
||||
@@ -183,6 +239,74 @@ docker pull shankar0123.docker.scarf.sh/certctl-server
|
||||
docker pull shankar0123.docker.scarf.sh/certctl-agent
|
||||
```
|
||||
|
||||
## Verifying this release
|
||||
|
||||
Every `v*` tag publishes signed, attested release artefacts. Binaries
|
||||
(`certctl-agent`, `certctl-server`, `certctl-cli`, `certctl-mcp-server` for
|
||||
`linux|darwin × amd64|arm64`) ship alongside a `checksums.txt`, per-binary
|
||||
SPDX-JSON SBOMs, Cosign signatures, and SLSA Level 3 provenance. Container
|
||||
images on `ghcr.io/shankar0123/certctl-{server,agent}` are built with
|
||||
`docker/build-push-action` `provenance: mode=max` + `sbom: true` and are
|
||||
additionally signed with Cosign at the image digest.
|
||||
|
||||
All signatures use Cosign keyless OIDC; the signing identity is the
|
||||
release workflow running on a signed tag.
|
||||
|
||||
**1. Verify SHA-256 checksums:**
|
||||
|
||||
```bash
|
||||
sha256sum -c checksums.txt
|
||||
```
|
||||
|
||||
**2. Verify the Cosign signature on `checksums.txt`:**
|
||||
|
||||
```bash
|
||||
cosign verify-blob \
|
||||
--bundle checksums.txt.sigstore.json \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
checksums.txt
|
||||
```
|
||||
|
||||
Every individual binary ships with its own `.sigstore.json` bundle
|
||||
(unified Sigstore bundle containing signature, certificate chain, and
|
||||
Rekor inclusion proof). Swap `checksums.txt` for any binary name and
|
||||
point `--bundle` at the matching `<binary>.sigstore.json` to verify it
|
||||
directly.
|
||||
|
||||
**3. Verify SLSA Level 3 provenance on a binary:**
|
||||
|
||||
```bash
|
||||
slsa-verifier verify-artifact \
|
||||
--provenance-path multiple.intoto.jsonl \
|
||||
--source-uri github.com/shankar0123/certctl \
|
||||
--source-tag v2.1.0 \
|
||||
certctl-agent-linux-amd64
|
||||
```
|
||||
|
||||
**4. Verify a container image signature and its SBOM / provenance attestations:**
|
||||
|
||||
```bash
|
||||
IMAGE=ghcr.io/shankar0123/certctl-server:v2.1.0
|
||||
|
||||
cosign verify \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
|
||||
# SBOM attestation (SPDX-JSON, emitted by docker/build-push-action)
|
||||
cosign verify-attestation --type spdxjson \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
|
||||
# SLSA provenance attestation (docker/build-push-action `provenance: mode=max`)
|
||||
cosign verify-attestation --type slsaprovenance \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
Pick the scenario closest to your setup and have it running in 2 minutes.
|
||||
@@ -197,32 +321,6 @@ Pick the scenario closest to your setup and have it running in 2 minutes.
|
||||
|
||||
Each directory contains a `docker-compose.yml` and a `README.md` explaining the scenario, prerequisites, and customization.
|
||||
|
||||
## Architecture
|
||||
|
||||
**Control plane** (Go 1.25 net/http) → **PostgreSQL 16** (21 tables, TEXT primary keys) → **Agents** (key generation, CSR submission, cert deployment). For Windows servers without a local agent, a proxy agent in the same network zone handles deployment via WinRM. Background scheduler runs 7 loops: renewal checks (1h), job processing (30s), agent health (2m), notifications (1m), short-lived cert expiry (30s), network scanning (6h), certificate digest (24h). See [Architecture Guide](docs/architecture.md) for full system diagrams and data flow.
|
||||
|
||||
### Key Design Decisions
|
||||
|
||||
- **Private keys isolated from the control plane.** Agents generate ECDSA P-256 keys locally and submit CSRs (public key only). The server signs the CSR and returns the certificate — private keys never touch the control plane. Server-side keygen is available via `CERTCTL_KEYGEN_MODE=server` for demo/development only.
|
||||
- **TEXT primary keys, not UUIDs.** IDs are human-readable prefixed strings (`mc-api-prod`, `t-platform`, `o-alice`) so you can identify resource types at a glance in logs and queries.
|
||||
- **Handler → Service → Repository layering.** Handlers define their own service interfaces for clean dependency inversion. No global service singletons.
|
||||
- **Idempotent migrations.** All schema uses `IF NOT EXISTS` and seed data uses `ON CONFLICT (id) DO NOTHING`, safe for repeated execution.
|
||||
|
||||
## Documentation
|
||||
|
||||
| Guide | Description |
|
||||
|-------|-------------|
|
||||
| [Why certctl?](docs/why-certctl.md) | How certctl compares to ACME clients, agent-based SaaS, and enterprise platforms |
|
||||
| [Concepts](docs/concepts.md) | TLS certificates explained from scratch — for beginners who know nothing about certs |
|
||||
| [Quick Start](docs/quickstart.md) | 5-minute setup — dashboard, API, CLI, discovery, stakeholder demo flow |
|
||||
| [Deployment Examples](docs/examples.md) | 5 turnkey scenarios (ACME+NGINX, wildcard DNS-01, private CA, step-ca, multi-issuer) with migration guides |
|
||||
| [Advanced Demo](docs/demo-advanced.md) | Issue a certificate end-to-end with technical deep-dives |
|
||||
| [Architecture](docs/architecture.md) | System design, data flow diagrams, security model |
|
||||
| [Feature Inventory](docs/features.md) | Complete reference of all V2 capabilities, API endpoints, and configuration |
|
||||
| [Connector Reference](docs/connectors.md) | Configuration for all 7 issuers, 10 targets, and 5 notifier connectors |
|
||||
| [Compliance Mapping](docs/compliance.md) | SOC 2 Type II, PCI-DSS 4.0, NIST SP 800-57 alignment guides |
|
||||
| [OpenAPI 3.1 Spec](api/openapi.yaml) | 97 operations, full request/response schemas |
|
||||
|
||||
## CLI
|
||||
|
||||
```bash
|
||||
@@ -230,8 +328,9 @@ Each directory contains a `docker-compose.yml` and a `README.md` explaining the
|
||||
go install github.com/shankar0123/certctl/cmd/cli@latest
|
||||
|
||||
# Configure
|
||||
export CERTCTL_SERVER_URL=http://localhost:8443
|
||||
export CERTCTL_SERVER_URL=https://localhost:8443
|
||||
export CERTCTL_API_KEY=your-api-key
|
||||
export CERTCTL_SERVER_CA_BUNDLE_PATH=/path/to/ca.crt # or --ca-bundle on the CLI; --insecure for dev self-signed
|
||||
|
||||
# Usage
|
||||
certctl-cli certs list # List all certificates
|
||||
@@ -246,16 +345,19 @@ certctl-cli certs list --format json # JSON output (default: table)
|
||||
|
||||
## MCP Server (AI Integration)
|
||||
|
||||
certctl ships a standalone MCP (Model Context Protocol) server that exposes all API endpoints as tools for AI assistants — Claude, Cursor, Windsurf, OpenClaw, VS Code Copilot, and any MCP-compatible client.
|
||||
certctl ships a standalone MCP (Model Context Protocol) server that exposes all 80 API endpoints as tools for AI assistants — Claude, Cursor, Windsurf, OpenClaw, VS Code Copilot, and any MCP-compatible client.
|
||||
|
||||
```bash
|
||||
# Install and run
|
||||
go install github.com/shankar0123/certctl/cmd/mcp-server@latest
|
||||
export CERTCTL_SERVER_URL=http://localhost:8443
|
||||
export CERTCTL_SERVER_URL=https://localhost:8443
|
||||
export CERTCTL_API_KEY=your-api-key
|
||||
export CERTCTL_SERVER_CA_BUNDLE_PATH=/path/to/ca.crt # required for self-signed bootstrap
|
||||
mcp-server
|
||||
```
|
||||
|
||||
The MCP server is env-vars-only — there are no CLI flags for TLS. If you must bypass verification for local development against a self-signed cert, set `CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=true`. Never set that in production.
|
||||
|
||||
**Claude Desktop** (`claude_desktop_config.json`):
|
||||
```json
|
||||
{
|
||||
@@ -263,18 +365,15 @@ mcp-server
|
||||
"certctl": {
|
||||
"command": "mcp-server",
|
||||
"env": {
|
||||
"CERTCTL_SERVER_URL": "http://localhost:8443",
|
||||
"CERTCTL_API_KEY": "your-api-key"
|
||||
"CERTCTL_SERVER_URL": "https://localhost:8443",
|
||||
"CERTCTL_API_KEY": "your-api-key",
|
||||
"CERTCTL_SERVER_CA_BUNDLE_PATH": "/path/to/ca.crt"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Security
|
||||
|
||||
certctl is designed with a security-first architecture. Agents generate ECDSA P-256 keys locally — private keys never touch the control plane. API key auth is enforced by default with SHA-256 hashing and constant-time comparison. CORS is deny-by-default. All connector scripts are validated against shell injection. The network scanner filters reserved IP ranges (SSRF protection). Scheduler loops use atomic idempotency guards. Every API call is recorded to an immutable audit trail with actor attribution, SHA-256 body hash, and latency tracking. See the [Architecture Guide](docs/architecture.md) for the full security model.
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
@@ -285,7 +384,7 @@ govulncheck ./... # Vulnerability scan
|
||||
make docker-up # Start Docker Compose stack
|
||||
```
|
||||
|
||||
CI runs on every push: `go vet`, `go test -race`, `golangci-lint`, `govulncheck`, and per-layer coverage thresholds (service 55%, handler 60%, domain 40%, middleware 30%). Frontend CI runs TypeScript type checking, Vitest tests, and Vite production build.
|
||||
CI runs on every push: `go vet`, `go test -race`, `golangci-lint`, `govulncheck`, and per-layer coverage thresholds (service 55%, handler 60%, domain 40%, middleware 30%). Frontend CI runs TypeScript type checking, Vitest tests, and Vite production build. 1,668 Go test functions with 625+ subtests, plus frontend test suite.
|
||||
|
||||
## Roadmap
|
||||
|
||||
@@ -293,19 +392,17 @@ CI runs on every push: `go vet`, `go test -race`, `golangci-lint`, `govulncheck`
|
||||
Core lifecycle management — Local CA + ACME v2 issuers, NGINX target connector, agent-side key generation, API auth + rate limiting, React dashboard, CI pipeline with coverage gates, Docker images on GHCR.
|
||||
|
||||
### V2: Operational Maturity — Shipped
|
||||
30+ milestones, 1,554+ tests. Sub-CA mode, ACME DNS-01/DNS-PERSIST-01, step-ca, Vault PKI, DigiCert CertCentral, OpenSSL/Custom CA issuers. NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS targets. RFC 5280 revocation with CRL + OCSP. Certificate profiles, ownership tracking, approval workflows. Filesystem and network certificate discovery. Prometheus metrics, dashboard charts, agent fleet overview. EST server (RFC 7030), ACME ARI (RFC 9702), certificate export, S/MIME support, Helm chart, MCP server, CLI, scheduled digest emails. Slack, Teams, PagerDuty, OpsGenie, SMTP notifications. Compliance mapping (SOC 2, PCI-DSS 4.0, NIST SP 800-57). See the [Feature Inventory](docs/features.md) for details.
|
||||
|
||||
**Coming in v2.1.0:** Dynamic issuer and target configuration via GUI (no env var restarts), first-run onboarding wizard.
|
||||
30+ milestones shipping enterprise-grade features for free. Sub-CA mode, ACME DNS-01/DNS-PERSIST-01/EAB/ARI (RFC 9773)/profile selection, step-ca, Vault PKI, DigiCert CertCentral, Sectigo SCM, Google CAS, AWS ACM PCA, Entrust, GlobalSign, EJBCA, OpenSSL/Custom CA issuers. NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS (WinRM), F5 BIG-IP, SSH, Windows Certificate Store, Java Keystore, Kubernetes Secrets targets. EST server (RFC 7030) and SCEP server (RFC 8894) enrollment protocols. RFC 5280 revocation with DER CRL + embedded OCSP responder. Certificate profiles, ownership tracking, team assignment, agent groups, interactive approval workflows. Filesystem, network, and cloud secret manager (AWS SM, Azure KV, GCP SM) certificate discovery with triage GUI. Dynamic issuer/target configuration via GUI with AES-256-GCM encrypted storage. First-run onboarding wizard. Post-deployment TLS verification. Certificate export (PEM/PKCS#12). S/MIME support. Prometheus metrics. Scheduled certificate digest emails. Slack, Teams, PagerDuty, OpsGenie, SMTP notifications. MCP server (80 tools), CLI (12 commands), Helm chart. Compliance mapping (SOC 2, PCI-DSS 4.0, NIST SP 800-57). 5 turnkey deployment examples. Agent install script. Migration guides from certbot, acme.sh, and cert-manager. See the [Feature Inventory](docs/features.md) for details.
|
||||
|
||||
### V3: certctl Pro
|
||||
Team access controls and identity provider integration (OIDC/SSO). Role-based access control with profile-gating. Event-driven architecture (NATS) with real-time operational views. Advanced search DSL, compliance and risk scoring, bulk fleet operations.
|
||||
Enterprise capabilities for larger deployments are available in the commercial tier.
|
||||
|
||||
### V4+: Cloud, Scale & Passive Discovery
|
||||
Passive network discovery (TLS listener), Kubernetes integration (cert-manager external issuer, Secrets target), cloud infrastructure targets (AWS ALB/ACM, Azure Key Vault), extended CA support (Google CAS, EJBCA, Sectigo), and platform-scale features (Terraform provider, multi-tenancy, HSM support).
|
||||
### V4+: Cloud & Scale
|
||||
Kubernetes cert-manager external issuer, cloud infrastructure targets, extended CA support, and platform-scale features.
|
||||
|
||||
## License
|
||||
|
||||
Certctl is licensed under the [Business Source License 1.1](LICENSE). The source code is publicly available and free to use, modify, and self-host. The one restriction: you may not offer certctl as a managed/hosted certificate management service to third parties. The BSL 1.1 license converts automatically to Apache 2.0 on March 1, 2033, providing perpetual freedom.
|
||||
Certctl is licensed under the [Business Source License 1.1](LICENSE). The source code is publicly available and free to use, modify, and self-host. The one restriction: you may not use certctl's certificate management functionality as part of a commercial offering to third parties, whether hosted, managed, embedded, bundled, or integrated. The BSL 1.1 license converts automatically to Apache 2.0 on March 14, 2033.
|
||||
|
||||
For licensing inquiries: certctl@proton.me
|
||||
|
||||
|
||||
+1423
-52
File diff suppressed because it is too large
Load Diff
+881
-20
File diff suppressed because it is too large
Load Diff
+287
-21
@@ -8,21 +8,25 @@ import (
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/sha256"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
@@ -31,7 +35,11 @@ import (
|
||||
"github.com/shankar0123/certctl/internal/connector/target/caddy"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/envoy"
|
||||
pf "github.com/shankar0123/certctl/internal/connector/target/postfix"
|
||||
sshconn "github.com/shankar0123/certctl/internal/connector/target/ssh"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/f5"
|
||||
jks "github.com/shankar0123/certctl/internal/connector/target/javakeystore"
|
||||
k8s "github.com/shankar0123/certctl/internal/connector/target/k8ssecret"
|
||||
wcs "github.com/shankar0123/certctl/internal/connector/target/wincertstore"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/haproxy"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/iis"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/nginx"
|
||||
@@ -40,15 +48,27 @@ import (
|
||||
|
||||
// AgentConfig represents the agent-side configuration.
|
||||
type AgentConfig struct {
|
||||
ServerURL string // Control plane server URL (e.g., http://localhost:8443)
|
||||
APIKey string // Agent API key for authentication
|
||||
AgentName string // Agent name for identification
|
||||
AgentID string // Agent ID for API calls (set after registration or from env)
|
||||
Hostname string // Server hostname
|
||||
KeyDir string // Directory for storing private keys (default: /var/lib/certctl/keys)
|
||||
DiscoveryDirs []string // Directories to scan for certificates (comma-separated via env)
|
||||
ServerURL string // Control plane server URL (e.g., https://localhost:8443) — must be https:// scheme
|
||||
APIKey string // Agent API key for authentication
|
||||
AgentName string // Agent name for identification
|
||||
AgentID string // Agent ID for API calls (set after registration or from env)
|
||||
Hostname string // Server hostname
|
||||
KeyDir string // Directory for storing private keys (default: /var/lib/certctl/keys)
|
||||
DiscoveryDirs []string // Directories to scan for certificates (comma-separated via env)
|
||||
CABundlePath string // Optional path to a PEM-encoded CA bundle that signed the server's cert (empty = system roots)
|
||||
InsecureSkipVerify bool // Dev-only: skip TLS certificate verification. Never enable in production. See docs/tls.md.
|
||||
}
|
||||
|
||||
// ErrAgentRetired is the sentinel returned by [Agent.Run] when the control
|
||||
// plane responds with HTTP 410 Gone to a heartbeat or work-poll request — the
|
||||
// canonical signal that this agent's row has been soft-retired server-side
|
||||
// (see I-004 in cowork/certctl-coverage-gap-audit.md). The binary must
|
||||
// terminate cleanly: an init-system restart would only produce another 410
|
||||
// and wedge the host in a restart loop. main() translates this sentinel into
|
||||
// a zero exit code so systemd (Restart=on-failure) and launchd do not respawn
|
||||
// the process. Do not wrap this error — main() matches it with errors.Is.
|
||||
var ErrAgentRetired = fmt.Errorf("agent retired by control plane")
|
||||
|
||||
// Agent represents the local agent that runs on target servers.
|
||||
// It periodically sends heartbeats, polls for work, executes deployment and CSR jobs,
|
||||
// and scans configured directories for existing certificates.
|
||||
@@ -64,6 +84,17 @@ type Agent struct {
|
||||
pollInterval time.Duration
|
||||
discoveryInterval time.Duration
|
||||
consecutiveFailures int
|
||||
|
||||
// I-004: terminal retirement signal. retiredSignal is closed exactly once
|
||||
// (guarded by retiredOnce) when either sendHeartbeat or pollForWork
|
||||
// observes HTTP 410 Gone. The Run() select loop picks up the close and
|
||||
// returns ErrAgentRetired, unwinding the goroutine cleanly so main() can
|
||||
// log + exit(0). Using a channel + sync.Once (rather than an atomic bool
|
||||
// + polling) lets us fall through the select statement immediately instead
|
||||
// of waiting for the next ticker; the zero-allocation close is safe to
|
||||
// race with ctx.Done() and other cases.
|
||||
retiredOnce sync.Once
|
||||
retiredSignal chan struct{}
|
||||
}
|
||||
|
||||
// WorkResponse represents the response from the work polling endpoint.
|
||||
@@ -86,15 +117,78 @@ type JobItem struct {
|
||||
}
|
||||
|
||||
// NewAgent creates a new agent instance.
|
||||
func NewAgent(cfg *AgentConfig, logger *slog.Logger) *Agent {
|
||||
//
|
||||
// The returned HTTP client enforces HTTPS-only control-plane access per the
|
||||
// HTTPS-Everywhere milestone (see docs/tls.md). TLS 1.3 is required; the
|
||||
// optional CABundlePath loads a PEM bundle into RootCAs so the agent can
|
||||
// trust internal / self-signed server certs without touching system trust
|
||||
// stores. InsecureSkipVerify is a dev-only escape hatch — callers must log a
|
||||
// loud warning when it's set; never enable in production (see §2.4 of the
|
||||
// milestone spec and docs/upgrade-to-tls.md).
|
||||
//
|
||||
// Returns an error if CABundlePath is set but unreadable or malformed — fail
|
||||
// loud at startup rather than silently fall back to system roots, which would
|
||||
// turn a misconfigured bundle path into a cryptic "x509: certificate signed
|
||||
// by unknown authority" on the first heartbeat.
|
||||
func NewAgent(cfg *AgentConfig, logger *slog.Logger) (*Agent, error) {
|
||||
tlsConfig := &tls.Config{
|
||||
MinVersion: tls.VersionTLS13,
|
||||
InsecureSkipVerify: cfg.InsecureSkipVerify, //nolint:gosec // opt-in dev escape hatch, documented in docs/tls.md
|
||||
}
|
||||
if cfg.CABundlePath != "" {
|
||||
pemBytes, err := os.ReadFile(cfg.CABundlePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading CA bundle at %q: %w", cfg.CABundlePath, err)
|
||||
}
|
||||
pool := x509.NewCertPool()
|
||||
if !pool.AppendCertsFromPEM(pemBytes) {
|
||||
return nil, fmt.Errorf("CA bundle at %q contains no valid PEM-encoded certificates", cfg.CABundlePath)
|
||||
}
|
||||
tlsConfig.RootCAs = pool
|
||||
}
|
||||
|
||||
httpClient := &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: tlsConfig,
|
||||
ForceAttemptHTTP2: true,
|
||||
MaxIdleConns: 10,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
ExpectContinueTimeout: 1 * time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
return &Agent{
|
||||
config: cfg,
|
||||
logger: logger,
|
||||
client: &http.Client{Timeout: 30 * time.Second},
|
||||
client: httpClient,
|
||||
heartbeatInterval: 60 * time.Second,
|
||||
pollInterval: 30 * time.Second,
|
||||
discoveryInterval: 6 * time.Hour, // scan for certs every 6 hours
|
||||
}
|
||||
retiredSignal: make(chan struct{}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// markRetired records that the control plane has declared this agent retired
|
||||
// (HTTP 410 Gone on heartbeat or work poll). Idempotent via sync.Once — if
|
||||
// both the heartbeat and work-poll paths observe 410 in the same tick, only
|
||||
// the first close() runs and we avoid a runtime panic. Emits an ERROR-level
|
||||
// log line so init-system journaling captures it prominently, and includes
|
||||
// the source (heartbeat/work_poll), response body, and status code so the
|
||||
// operator can verify it's a genuine retirement signal rather than a
|
||||
// misrouted request. After this returns, the select-loop case in Run()
|
||||
// observes the closed channel on its next iteration and returns
|
||||
// ErrAgentRetired.
|
||||
func (a *Agent) markRetired(source string, statusCode int, body string) {
|
||||
a.retiredOnce.Do(func() {
|
||||
a.logger.Error("agent has been retired by control plane — shutting down",
|
||||
"source", source,
|
||||
"status", statusCode,
|
||||
"body", body,
|
||||
"agent_id", a.config.AgentID)
|
||||
close(a.retiredSignal)
|
||||
})
|
||||
}
|
||||
|
||||
// Run starts the agent's main loop.
|
||||
@@ -150,6 +244,19 @@ func (a *Agent) Run(ctx context.Context) error {
|
||||
a.logger.Info("agent shutting down", "reason", ctx.Err())
|
||||
return ctx.Err()
|
||||
|
||||
// I-004: retiredSignal is closed exactly once (via markRetired's
|
||||
// sync.Once) when either sendHeartbeat or pollForWork observes HTTP 410
|
||||
// Gone from the control plane. Falling through this case immediately
|
||||
// (rather than waiting for the next ticker) lets the agent shut down
|
||||
// quickly once retirement is confirmed — every extra heartbeat against a
|
||||
// retired row is wasted work and noise in the audit trail. Returning
|
||||
// ErrAgentRetired propagates up to main(), which matches it with
|
||||
// errors.Is and exits(0) so systemd/launchd do not respawn the process.
|
||||
case <-a.retiredSignal:
|
||||
a.logger.Info("agent retired signal received — exiting event loop",
|
||||
"agent_id", a.config.AgentID)
|
||||
return ErrAgentRetired
|
||||
|
||||
case <-heartbeatTicker.C:
|
||||
a.sendHeartbeat(ctx)
|
||||
|
||||
@@ -162,7 +269,14 @@ func (a *Agent) Run(ctx context.Context) error {
|
||||
a.logger.Warn("backing off due to consecutive failures",
|
||||
"failures", a.consecutiveFailures,
|
||||
"backoff", backoff.String())
|
||||
time.Sleep(backoff)
|
||||
// F-003: ctx-aware wait so graceful shutdown does not stall on
|
||||
// a long backoff. If ctx cancels mid-backoff, return to the
|
||||
// outer loop so the <-ctx.Done() case can trigger clean exit.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
continue
|
||||
case <-time.After(backoff):
|
||||
}
|
||||
}
|
||||
a.pollForWork(ctx)
|
||||
|
||||
@@ -205,6 +319,22 @@ func (a *Agent) sendHeartbeat(ctx context.Context) {
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// I-004: HTTP 410 Gone is the terminal signal from the control plane that
|
||||
// this agent's row has been soft-retired (see internal/api/handler/agent.go
|
||||
// heartbeat path + AgentRetirementService). Treat it separately from the
|
||||
// generic non-200 error branch: record the event to markRetired (which closes
|
||||
// retiredSignal exactly once via sync.Once) and return without bumping
|
||||
// consecutiveFailures — this is not a transient failure, it's a clean
|
||||
// shutdown. The Run() select loop picks up the closed channel on its next
|
||||
// iteration and returns ErrAgentRetired, which main() translates into an
|
||||
// exit(0) so systemd/launchd don't respawn the process into another 410
|
||||
// loop.
|
||||
if resp.StatusCode == http.StatusGone {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
a.markRetired("heartbeat", resp.StatusCode, string(body))
|
||||
return
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
a.logger.Error("heartbeat rejected",
|
||||
@@ -233,6 +363,19 @@ func (a *Agent) pollForWork(ctx context.Context) {
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// I-004: same terminal-retirement handling as sendHeartbeat. Work-poll is the
|
||||
// other hot path that can observe an agent's soft-retirement; if the
|
||||
// heartbeat tick happens to fire after a work-poll tick within the same
|
||||
// retirement window, this branch catches it first. markRetired's sync.Once
|
||||
// guards idempotency so racing both paths in the same tick only closes the
|
||||
// signal channel once. No consecutiveFailures increment — retirement is
|
||||
// not a transient failure.
|
||||
if resp.StatusCode == http.StatusGone {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
a.markRetired("work_poll", resp.StatusCode, string(body))
|
||||
return
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
a.logger.Error("work poll rejected",
|
||||
@@ -585,7 +728,11 @@ func (a *Agent) createTargetConnector(targetType string, configJSON json.RawMess
|
||||
return nil, fmt.Errorf("invalid F5 config: %w", err)
|
||||
}
|
||||
}
|
||||
return f5.New(&cfg, a.logger), nil
|
||||
conn, err := f5.New(&cfg, a.logger)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create F5 connector: %w", err)
|
||||
}
|
||||
return conn, nil
|
||||
|
||||
case "IIS":
|
||||
var cfg iis.Config
|
||||
@@ -643,6 +790,42 @@ func (a *Agent) createTargetConnector(targetType string, configJSON json.RawMess
|
||||
}
|
||||
return pf.New(&cfg, a.logger), nil
|
||||
|
||||
case "SSH":
|
||||
var cfg sshconn.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid SSH config: %w", err)
|
||||
}
|
||||
}
|
||||
return sshconn.New(&cfg, a.logger)
|
||||
|
||||
case "WinCertStore":
|
||||
var cfg wcs.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid WinCertStore config: %w", err)
|
||||
}
|
||||
}
|
||||
return wcs.New(&cfg, a.logger)
|
||||
|
||||
case "JavaKeystore":
|
||||
var cfg jks.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid JavaKeystore config: %w", err)
|
||||
}
|
||||
}
|
||||
return jks.New(&cfg, a.logger), nil
|
||||
|
||||
case "KubernetesSecrets":
|
||||
var cfg k8s.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid KubernetesSecrets config: %w", err)
|
||||
}
|
||||
}
|
||||
return k8s.New(&cfg, a.logger)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported target type: %s", targetType)
|
||||
}
|
||||
@@ -987,12 +1170,14 @@ func certKeyInfo(cert *x509.Certificate) (string, int) {
|
||||
|
||||
func main() {
|
||||
// Parse command-line flags (with env var fallbacks for Docker deployment)
|
||||
serverURL := flag.String("server", getEnvDefault("CERTCTL_SERVER_URL", "http://localhost:8443"), "Control plane server URL")
|
||||
serverURL := flag.String("server", getEnvDefault("CERTCTL_SERVER_URL", "https://localhost:8443"), "Control plane server URL (must be https://)")
|
||||
apiKey := flag.String("api-key", getEnvDefault("CERTCTL_API_KEY", ""), "Agent API key")
|
||||
agentName := flag.String("name", getEnvDefault("CERTCTL_AGENT_NAME", "certctl-agent"), "Agent name")
|
||||
agentID := flag.String("agent-id", getEnvDefault("CERTCTL_AGENT_ID", ""), "Agent ID (from registration)")
|
||||
keyDir := flag.String("key-dir", getEnvDefault("CERTCTL_KEY_DIR", "/var/lib/certctl/keys"), "Directory for storing private keys")
|
||||
discoveryDirsStr := flag.String("discovery-dirs", getEnvDefault("CERTCTL_DISCOVERY_DIRS", ""), "Comma-separated directories to scan for certificates")
|
||||
caBundlePath := flag.String("ca-bundle", getEnvDefault("CERTCTL_SERVER_CA_BUNDLE_PATH", ""), "Path to a PEM-encoded CA bundle that signed the server's TLS cert (optional; falls back to system roots)")
|
||||
insecureSkipVerify := flag.Bool("insecure-skip-verify", getEnvBoolDefault("CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY", false), "Dev-only: skip TLS certificate verification. Never enable in production. See docs/tls.md.")
|
||||
flag.Parse()
|
||||
|
||||
if *apiKey == "" {
|
||||
@@ -1006,6 +1191,18 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Pre-flight URL-scheme validation — reject plaintext http:// before any
|
||||
// network call. The HTTPS-Everywhere milestone (§2.4, §7) mandates that
|
||||
// mis-configured agents fail loudly at startup with a diagnostic pointing
|
||||
// at the upgrade guide, rather than producing a TCP-refused or
|
||||
// TLS-handshake-error that obscures the actual cause.
|
||||
if err := validateHTTPSScheme(*serverURL); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
fmt.Fprintf(os.Stderr, "\nThe certctl control plane is HTTPS-only as of v2.2.\n")
|
||||
fmt.Fprintf(os.Stderr, "See docs/upgrade-to-tls.md for the cutover walkthrough.\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Set up structured logging
|
||||
logLevel := slog.LevelInfo
|
||||
if getEnvDefault("CERTCTL_LOG_LEVEL", "info") == "debug" {
|
||||
@@ -1034,17 +1231,27 @@ func main() {
|
||||
|
||||
// Create agent configuration
|
||||
agentCfg := &AgentConfig{
|
||||
ServerURL: *serverURL,
|
||||
APIKey: *apiKey,
|
||||
AgentName: *agentName,
|
||||
AgentID: *agentID,
|
||||
Hostname: hostname,
|
||||
KeyDir: *keyDir,
|
||||
DiscoveryDirs: discoveryDirs,
|
||||
ServerURL: *serverURL,
|
||||
APIKey: *apiKey,
|
||||
AgentName: *agentName,
|
||||
AgentID: *agentID,
|
||||
Hostname: hostname,
|
||||
KeyDir: *keyDir,
|
||||
DiscoveryDirs: discoveryDirs,
|
||||
CABundlePath: *caBundlePath,
|
||||
InsecureSkipVerify: *insecureSkipVerify,
|
||||
}
|
||||
|
||||
if agentCfg.InsecureSkipVerify {
|
||||
logger.Warn("TLS certificate verification is disabled (CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=true) — never enable this in production")
|
||||
}
|
||||
|
||||
// Create and start agent
|
||||
agent := NewAgent(agentCfg, logger)
|
||||
agent, err := NewAgent(agentCfg, logger)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: failed to initialize agent: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Create context with cancellation for graceful shutdown
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
@@ -1073,6 +1280,19 @@ func main() {
|
||||
cancel()
|
||||
<-errChan
|
||||
case err := <-errChan:
|
||||
// I-004: ErrAgentRetired is a terminal, *clean* shutdown — the control
|
||||
// plane responded HTTP 410 Gone on heartbeat/work-poll, meaning this
|
||||
// agent's row has been soft-retired and will never be reachable again.
|
||||
// Exit 0 so systemd's Restart=on-failure and launchd's KeepAlive do NOT
|
||||
// respawn the process into another 410 loop (which would wedge the host
|
||||
// and spam the control plane). Operators can observe the retirement via
|
||||
// audit_events or the AgentsPage retired tab; the terminal log line on
|
||||
// the way out is enough for post-mortem forensics.
|
||||
if errors.Is(err, ErrAgentRetired) {
|
||||
logger.Info("agent retired by control plane — exiting without restart",
|
||||
"agent_id", agentCfg.AgentID)
|
||||
return
|
||||
}
|
||||
if err != context.Canceled {
|
||||
logger.Error("agent error", "error", err)
|
||||
os.Exit(1)
|
||||
@@ -1089,3 +1309,49 @@ func getEnvDefault(key, defaultValue string) string {
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
// getEnvBoolDefault parses an environment variable as a boolean. Accepts "1",
|
||||
// "t", "true", "T", "TRUE", "True" as true; anything else (including empty)
|
||||
// returns the provided default. Kept permissive on purpose so operators can
|
||||
// flip the dev-only TLS skip-verify toggle with any common truthy spelling
|
||||
// without having to remember exactly what we parse.
|
||||
func getEnvBoolDefault(key string, defaultValue bool) bool {
|
||||
raw := os.Getenv(key)
|
||||
if raw == "" {
|
||||
return defaultValue
|
||||
}
|
||||
switch strings.ToLower(strings.TrimSpace(raw)) {
|
||||
case "1", "t", "true", "yes", "on":
|
||||
return true
|
||||
case "0", "f", "false", "no", "off":
|
||||
return false
|
||||
default:
|
||||
return defaultValue
|
||||
}
|
||||
}
|
||||
|
||||
// validateHTTPSScheme enforces the HTTPS-Everywhere milestone's §7 acceptance
|
||||
// criterion: "Agent with CERTCTL_SERVER_URL=http://... fails at startup with
|
||||
// a fail-loud diagnostic pointing at docs/upgrade-to-tls.md. Not TCP-refused,
|
||||
// not TLS-handshake-error — a pre-flight config validation failure before any
|
||||
// network call." Returns a descriptive error; the caller prints the upgrade
|
||||
// guide pointer and exits non-zero.
|
||||
func validateHTTPSScheme(serverURL string) error {
|
||||
if serverURL == "" {
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL is empty — set it to an https:// URL (e.g., https://certctl-server:8443)")
|
||||
}
|
||||
u, err := url.Parse(serverURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL %q is not a valid URL: %w", serverURL, err)
|
||||
}
|
||||
switch strings.ToLower(u.Scheme) {
|
||||
case "https":
|
||||
return nil
|
||||
case "http":
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL %q uses plaintext http:// — the certctl control plane is HTTPS-only", serverURL)
|
||||
case "":
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL %q is missing a scheme — expected https://", serverURL)
|
||||
default:
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL %q uses unsupported scheme %q — expected https://", serverURL, u.Scheme)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,7 +228,7 @@ func TestReportVerificationResult_Success(t *testing.T) {
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-api-key",
|
||||
}
|
||||
agent := NewAgent(cfg, nil)
|
||||
agent, _ := NewAgent(cfg, nil)
|
||||
|
||||
result := &VerificationResult{
|
||||
ExpectedFingerprint: "abc123",
|
||||
@@ -244,7 +244,7 @@ func TestReportVerificationResult_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestReportVerificationResult_MissingFields(t *testing.T) {
|
||||
agent := NewAgent(&AgentConfig{}, nil)
|
||||
agent, _ := NewAgent(&AgentConfig{}, nil)
|
||||
|
||||
result := &VerificationResult{
|
||||
Verified: true,
|
||||
@@ -343,7 +343,7 @@ func TestReportVerificationResult_ServerError(t *testing.T) {
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-api-key",
|
||||
}
|
||||
agent := NewAgent(cfg, nil)
|
||||
agent, _ := NewAgent(cfg, nil)
|
||||
|
||||
result := &VerificationResult{
|
||||
ExpectedFingerprint: "abc123",
|
||||
|
||||
+86
-10
@@ -3,7 +3,9 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/cli"
|
||||
)
|
||||
@@ -27,35 +29,50 @@ Commands:
|
||||
certs renew ID Trigger certificate renewal
|
||||
certs revoke ID Revoke a certificate
|
||||
|
||||
agents list List agents
|
||||
agents get ID Get agent details
|
||||
agents list List agents (add --retired to list soft-retired agents)
|
||||
agents get ID Get agent details
|
||||
agents retire ID Soft-retire an agent (add --force --reason "…" to cascade)
|
||||
|
||||
jobs list List jobs
|
||||
jobs get ID Get job details
|
||||
jobs cancel ID Cancel a pending job
|
||||
|
||||
import FILE Bulk import certificates from PEM file(s)
|
||||
Required: --owner-id, --team-id, --renewal-policy-id, --issuer-id
|
||||
Optional: --name-template (default {cn}), --environment (default imported)
|
||||
|
||||
status Show server health + summary stats
|
||||
version Show CLI version
|
||||
|
||||
Examples:
|
||||
certctl-cli --server http://localhost:8443 --api-key mykey certs list
|
||||
certctl-cli --server https://localhost:8443 --api-key mykey certs list
|
||||
certctl-cli certs renew mc-prod --format json
|
||||
certctl-cli import certs.pem
|
||||
`)
|
||||
}
|
||||
|
||||
serverURL := fs.String("server", os.Getenv("CERTCTL_SERVER_URL"), "certctl server URL (env: CERTCTL_SERVER_URL)")
|
||||
if *serverURL == "" {
|
||||
*serverURL = "http://localhost:8443"
|
||||
// HTTPS-Everywhere (v2.2): the server is HTTPS-only. The default URL uses
|
||||
// https://; plaintext http:// is rejected by validateHTTPSScheme below.
|
||||
defaultServer := os.Getenv("CERTCTL_SERVER_URL")
|
||||
if defaultServer == "" {
|
||||
defaultServer = "https://localhost:8443"
|
||||
}
|
||||
serverURL := fs.String("server", defaultServer, "certctl server URL — must be https:// (env: CERTCTL_SERVER_URL)")
|
||||
|
||||
apiKey := fs.String("api-key", os.Getenv("CERTCTL_API_KEY"), "API key for authentication (env: CERTCTL_API_KEY)")
|
||||
format := fs.String("format", "table", "Output format: table, json")
|
||||
caBundlePath := fs.String("ca-bundle", os.Getenv("CERTCTL_SERVER_CA_BUNDLE_PATH"), "Path to a PEM-encoded CA bundle that signed the server cert (env: CERTCTL_SERVER_CA_BUNDLE_PATH)")
|
||||
insecure := fs.Bool("insecure", strings.EqualFold(os.Getenv("CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY"), "true"), "Skip TLS certificate verification — dev only, never set in production (env: CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY)")
|
||||
|
||||
fs.Parse(os.Args[1:])
|
||||
|
||||
if err := validateHTTPSScheme(*serverURL); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
fmt.Fprintf(os.Stderr, "\nThe certctl control plane is HTTPS-only as of v2.2.\n")
|
||||
fmt.Fprintf(os.Stderr, "See docs/upgrade-to-tls.md for the cutover walkthrough.\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
args := fs.Args()
|
||||
if len(args) == 0 {
|
||||
fs.Usage()
|
||||
@@ -63,13 +80,16 @@ Examples:
|
||||
}
|
||||
|
||||
// Create client
|
||||
client := cli.NewClient(*serverURL, *apiKey, *format)
|
||||
client, err := cli.NewClient(*serverURL, *apiKey, *format, *caBundlePath, *insecure)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Dispatch to appropriate command
|
||||
command := args[0]
|
||||
cmdArgs := args[1:]
|
||||
|
||||
var err error
|
||||
switch command {
|
||||
case "certs":
|
||||
err = handleCerts(client, cmdArgs)
|
||||
@@ -130,15 +150,27 @@ func handleCerts(client *cli.Client, args []string) error {
|
||||
reason = subArgs[2]
|
||||
}
|
||||
return client.RevokeCertificate(id, reason)
|
||||
case "bulk-revoke":
|
||||
return client.BulkRevokeCertificates(subArgs)
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown subcommand: certs %s\n", subcommand)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// handleAgents dispatches the `agents` subcommands.
|
||||
//
|
||||
// I-004 additions:
|
||||
//
|
||||
// agents list --retired — hit the opt-in /agents/retired endpoint
|
||||
// instead of the default listing (which
|
||||
// filters retired rows out).
|
||||
// agents retire <id> — soft-retire an agent (DELETE /agents/{id}).
|
||||
// --force cascades; --reason is required with
|
||||
// --force (mirrors ErrForceReasonRequired).
|
||||
func handleAgents(client *cli.Client, args []string) error {
|
||||
if len(args) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: agents <list|get> [options]\n")
|
||||
fmt.Fprintf(os.Stderr, "usage: agents <list|get|retire> [options]\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -147,13 +179,34 @@ func handleAgents(client *cli.Client, args []string) error {
|
||||
|
||||
switch subcommand {
|
||||
case "list":
|
||||
return client.ListAgents(subArgs)
|
||||
// --retired flag splits to a separate endpoint. We intercept it
|
||||
// client-side and strip it before delegating, so both code paths
|
||||
// share the --page/--per-page flag parsing inside the client.
|
||||
retired := false
|
||||
rest := make([]string, 0, len(subArgs))
|
||||
for _, a := range subArgs {
|
||||
if a == "--retired" {
|
||||
retired = true
|
||||
continue
|
||||
}
|
||||
rest = append(rest, a)
|
||||
}
|
||||
if retired {
|
||||
return client.ListRetiredAgents(rest)
|
||||
}
|
||||
return client.ListAgents(rest)
|
||||
case "get":
|
||||
if len(subArgs) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: agents get <id>\n")
|
||||
return nil
|
||||
}
|
||||
return client.GetAgent(subArgs[0])
|
||||
case "retire":
|
||||
if len(subArgs) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "usage: agents retire <id> [--force] [--reason <reason>]\n")
|
||||
return nil
|
||||
}
|
||||
return client.RetireAgent(subArgs)
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown subcommand: agents %s\n", subcommand)
|
||||
return nil
|
||||
@@ -201,3 +254,26 @@ func handleImport(client *cli.Client, args []string) error {
|
||||
func handleStatus(client *cli.Client) error {
|
||||
return client.GetStatus()
|
||||
}
|
||||
|
||||
// validateHTTPSScheme rejects plaintext and empty-scheme server URLs at
|
||||
// startup so operators get a fail-loud diagnostic before any network call,
|
||||
// not a TCP-refused or TLS-handshake-error downstream. See docs/upgrade-to-tls.md.
|
||||
func validateHTTPSScheme(serverURL string) error {
|
||||
if serverURL == "" {
|
||||
return fmt.Errorf("server URL is empty — set --server (or CERTCTL_SERVER_URL) to an https:// URL (e.g., https://certctl-server:8443)")
|
||||
}
|
||||
u, err := url.Parse(serverURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("server URL %q is not a valid URL: %w", serverURL, err)
|
||||
}
|
||||
switch strings.ToLower(u.Scheme) {
|
||||
case "https":
|
||||
return nil
|
||||
case "http":
|
||||
return fmt.Errorf("server URL %q uses plaintext http:// — the certctl control plane is HTTPS-only", serverURL)
|
||||
case "":
|
||||
return fmt.Errorf("server URL %q is missing a scheme — expected https://", serverURL)
|
||||
default:
|
||||
return fmt.Errorf("server URL %q uses unsupported scheme %q — expected https://", serverURL, u.Scheme)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestValidateHTTPSScheme pins the pre-flight URL-scheme guard that the
|
||||
// HTTPS-Everywhere milestone (v2.2, §3.2) requires on the certctl-cli binary
|
||||
// startup path. The CLI's diagnostic is distinct from the agent and MCP server
|
||||
// because it surfaces the --server flag alongside CERTCTL_SERVER_URL — so the
|
||||
// empty-URL case pins that flag-name substring separately. Every other case
|
||||
// mirrors the dispatch arms in cmd/cli/main.go:validateHTTPSScheme; drifting
|
||||
// the substrings is what this test is here to catch.
|
||||
func TestValidateHTTPSScheme(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
serverURL string
|
||||
wantErr bool
|
||||
wantErrSub string // substring that MUST appear in the error message
|
||||
}{
|
||||
{
|
||||
name: "https URL passes",
|
||||
serverURL: "https://certctl-server:8443",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "https URL with path passes",
|
||||
serverURL: "https://certctl.example.com/api/v1",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "uppercase HTTPS scheme passes (url.Parse lowercases)",
|
||||
serverURL: "HTTPS://certctl-server:8443",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty URL rejected mentions --server flag",
|
||||
serverURL: "",
|
||||
wantErr: true,
|
||||
wantErrSub: "--server",
|
||||
},
|
||||
{
|
||||
name: "empty URL rejected also mentions CERTCTL_SERVER_URL",
|
||||
serverURL: "",
|
||||
wantErr: true,
|
||||
wantErrSub: "CERTCTL_SERVER_URL",
|
||||
},
|
||||
{
|
||||
name: "plaintext http rejected",
|
||||
serverURL: "http://certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "plaintext http://",
|
||||
},
|
||||
{
|
||||
name: "bare host missing scheme rejected",
|
||||
serverURL: "localhost:8443",
|
||||
wantErr: true,
|
||||
// url.Parse treats "localhost:8443" as scheme=localhost, opaque=8443
|
||||
// — exercises the default arm (unsupported scheme) rather than the
|
||||
// empty-scheme arm. Both are fail-closed, which is what we care about.
|
||||
wantErrSub: "unsupported scheme",
|
||||
},
|
||||
{
|
||||
name: "path-only URL rejected",
|
||||
serverURL: "//certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "missing a scheme",
|
||||
},
|
||||
{
|
||||
name: "unsupported scheme rejected",
|
||||
serverURL: "ftp://certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "unsupported scheme",
|
||||
},
|
||||
{
|
||||
name: "ws scheme rejected",
|
||||
serverURL: "ws://certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "unsupported scheme",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := validateHTTPSScheme(tt.serverURL)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("validateHTTPSScheme(%q) err=%v wantErr=%v", tt.serverURL, err, tt.wantErr)
|
||||
}
|
||||
if tt.wantErr && tt.wantErrSub != "" && !strings.Contains(err.Error(), tt.wantErrSub) {
|
||||
t.Errorf("validateHTTPSScheme(%q) err=%q must contain %q so operators see the right diagnostic",
|
||||
tt.serverURL, err.Error(), tt.wantErrSub)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
+46
-2
@@ -4,8 +4,10 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
|
||||
gomcp "github.com/modelcontextprotocol/go-sdk/mcp"
|
||||
|
||||
@@ -16,14 +18,33 @@ import (
|
||||
var Version = "dev"
|
||||
|
||||
func main() {
|
||||
// HTTPS-Everywhere (v2.2): the server is HTTPS-only. The default URL
|
||||
// uses https://; plaintext http:// is rejected by validateHTTPSScheme
|
||||
// below with a fail-loud pre-flight diagnostic pointing at
|
||||
// docs/upgrade-to-tls.md, so operators never get a TCP-refused or
|
||||
// TLS-handshake-error downstream. See docs/tls.md for CA bundle and
|
||||
// insecure-skip-verify guidance.
|
||||
serverURL := os.Getenv("CERTCTL_SERVER_URL")
|
||||
if serverURL == "" {
|
||||
serverURL = "http://localhost:8443"
|
||||
serverURL = "https://localhost:8443"
|
||||
}
|
||||
|
||||
if err := validateHTTPSScheme(serverURL); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
fmt.Fprintf(os.Stderr, "\nThe certctl control plane is HTTPS-only as of v2.2.\n")
|
||||
fmt.Fprintf(os.Stderr, "See docs/upgrade-to-tls.md for the cutover walkthrough.\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
apiKey := os.Getenv("CERTCTL_API_KEY")
|
||||
caBundlePath := os.Getenv("CERTCTL_SERVER_CA_BUNDLE_PATH")
|
||||
insecure := strings.EqualFold(os.Getenv("CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY"), "true")
|
||||
|
||||
client := mcp.NewClient(serverURL, apiKey)
|
||||
client, err := mcp.NewClient(serverURL, apiKey, caBundlePath, insecure)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
server := gomcp.NewServer(&gomcp.Implementation{
|
||||
Name: "certctl",
|
||||
@@ -41,3 +62,26 @@ func main() {
|
||||
log.Fatalf("MCP server error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// validateHTTPSScheme rejects plaintext and empty-scheme server URLs at
|
||||
// startup so operators get a fail-loud diagnostic before any network call,
|
||||
// not a TCP-refused or TLS-handshake-error downstream. See docs/upgrade-to-tls.md.
|
||||
func validateHTTPSScheme(serverURL string) error {
|
||||
if serverURL == "" {
|
||||
return fmt.Errorf("server URL is empty — set CERTCTL_SERVER_URL to an https:// URL (e.g., https://certctl-server:8443)")
|
||||
}
|
||||
u, err := url.Parse(serverURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("server URL %q is not a valid URL: %w", serverURL, err)
|
||||
}
|
||||
switch strings.ToLower(u.Scheme) {
|
||||
case "https":
|
||||
return nil
|
||||
case "http":
|
||||
return fmt.Errorf("server URL %q uses plaintext http:// — the certctl control plane is HTTPS-only", serverURL)
|
||||
case "":
|
||||
return fmt.Errorf("server URL %q is missing a scheme — expected https://", serverURL)
|
||||
default:
|
||||
return fmt.Errorf("server URL %q uses unsupported scheme %q — expected https://", serverURL, u.Scheme)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestValidateHTTPSScheme pins the pre-flight URL-scheme guard that the
|
||||
// HTTPS-Everywhere milestone (v2.2, §3.2) requires on the MCP server binary
|
||||
// startup path. The whole point is to fail loud with a diagnostic that points
|
||||
// at docs/upgrade-to-tls.md *before* any network call — not a cryptic
|
||||
// TCP-refused or TLS-handshake-error two ticks later. Every case here mirrors
|
||||
// the dispatch arms in cmd/mcp-server/main.go:validateHTTPSScheme; drifting
|
||||
// the error-message substrings is what this test is here to catch.
|
||||
func TestValidateHTTPSScheme(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
serverURL string
|
||||
wantErr bool
|
||||
wantErrSub string // substring that MUST appear in the error message
|
||||
}{
|
||||
{
|
||||
name: "https URL passes",
|
||||
serverURL: "https://certctl-server:8443",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "https URL with path passes",
|
||||
serverURL: "https://certctl.example.com/api/v1",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "uppercase HTTPS scheme passes (url.Parse lowercases)",
|
||||
serverURL: "HTTPS://certctl-server:8443",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty URL rejected",
|
||||
serverURL: "",
|
||||
wantErr: true,
|
||||
wantErrSub: "server URL is empty",
|
||||
},
|
||||
{
|
||||
name: "plaintext http rejected",
|
||||
serverURL: "http://certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "plaintext http://",
|
||||
},
|
||||
{
|
||||
name: "bare host missing scheme rejected",
|
||||
serverURL: "localhost:8443",
|
||||
wantErr: true,
|
||||
// url.Parse treats "localhost:8443" as scheme=localhost, opaque=8443
|
||||
// — exercises the default arm (unsupported scheme) rather than the
|
||||
// empty-scheme arm. Both are fail-closed, which is what we care about.
|
||||
wantErrSub: "unsupported scheme",
|
||||
},
|
||||
{
|
||||
name: "path-only URL rejected",
|
||||
serverURL: "//certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "missing a scheme",
|
||||
},
|
||||
{
|
||||
name: "unsupported scheme rejected",
|
||||
serverURL: "ftp://certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "unsupported scheme",
|
||||
},
|
||||
{
|
||||
name: "ws scheme rejected",
|
||||
serverURL: "ws://certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "unsupported scheme",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := validateHTTPSScheme(tt.serverURL)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("validateHTTPSScheme(%q) err=%v wantErr=%v", tt.serverURL, err, tt.wantErr)
|
||||
}
|
||||
if tt.wantErr && tt.wantErrSub != "" && !strings.Contains(err.Error(), tt.wantErrSub) {
|
||||
t.Errorf("validateHTTPSScheme(%q) err=%q must contain %q so operators see the right diagnostic",
|
||||
tt.serverURL, err.Error(), tt.wantErrSub)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,314 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestBuildFinalHandler_Dispatch is the M-001 regression harness for the outer
|
||||
// HTTP dispatch layer. It pins which path prefixes ride the no-auth middleware
|
||||
// chain (EST, SCEP, /.well-known/pki, health/ready, /api/v1/auth/info) versus
|
||||
// the authenticated chain (/api/v1/*).
|
||||
//
|
||||
// The concern under test is ONLY the dispatch in buildFinalHandler — the
|
||||
// handlers themselves are mocked as marker handlers that stamp "AUTH" or
|
||||
// "NOAUTH" into the response body. Service-layer concerns (SCEP password
|
||||
// validation, EST CSR validation, API auth enforcement) are covered by their
|
||||
// respective test suites.
|
||||
//
|
||||
// Case (i) is the central guard: EST with NO client cert / NO Bearer token
|
||||
// MUST reach the no-auth handler (pre-M-001 it was 401'd by the Auth
|
||||
// middleware, blocking enrollment for every real-world EST client).
|
||||
func TestBuildFinalHandler_Dispatch(t *testing.T) {
|
||||
// Marker handlers — each stamps a unique body so tests can verify which
|
||||
// chain the request traversed.
|
||||
authHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Header().Set("X-Chain", "auth")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("AUTH"))
|
||||
})
|
||||
noAuthHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Header().Set("X-Chain", "noauth")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("NOAUTH"))
|
||||
})
|
||||
|
||||
// Dashboard directory with index.html + assets/ for SPA fallback and
|
||||
// static-asset tests. Cleaned up by t.TempDir.
|
||||
webDir := t.TempDir()
|
||||
indexHTML := []byte("<!doctype html><html><body>certctl dashboard</body></html>")
|
||||
if err := os.WriteFile(filepath.Join(webDir, "index.html"), indexHTML, 0o644); err != nil {
|
||||
t.Fatalf("write index.html: %v", err)
|
||||
}
|
||||
assetsDir := filepath.Join(webDir, "assets")
|
||||
if err := os.MkdirAll(assetsDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir assets: %v", err)
|
||||
}
|
||||
assetJS := []byte("console.log('certctl');")
|
||||
if err := os.WriteFile(filepath.Join(assetsDir, "app.js"), assetJS, 0o644); err != nil {
|
||||
t.Fatalf("write app.js: %v", err)
|
||||
}
|
||||
|
||||
handler := buildFinalHandler(authHandler, noAuthHandler, webDir, true /* dashboardEnabled */)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
method string
|
||||
path string
|
||||
wantBody string // "AUTH" | "NOAUTH" | "" (== substring match against response body)
|
||||
wantBodyPrefix string
|
||||
wantStatus int
|
||||
description string
|
||||
}{
|
||||
// ---- Case (i): M-001 central regression guard ----
|
||||
{
|
||||
name: "est_cacerts_no_auth_reaches_noauth_handler",
|
||||
method: http.MethodGet,
|
||||
path: "/.well-known/est/cacerts",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "EST clients cannot present Bearer tokens — must NOT be 401'd before reaching the handler (RFC 7030 §4.1.1)",
|
||||
},
|
||||
{
|
||||
name: "est_simpleenroll_no_auth_reaches_noauth_handler",
|
||||
method: http.MethodPost,
|
||||
path: "/.well-known/est/simpleenroll",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "RFC 7030 §4.2 simpleenroll served from no-auth chain (option D)",
|
||||
},
|
||||
{
|
||||
name: "est_simplereenroll_no_auth_reaches_noauth_handler",
|
||||
method: http.MethodPost,
|
||||
path: "/.well-known/est/simplereenroll",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "RFC 7030 §4.2.2 simplereenroll also on no-auth chain",
|
||||
},
|
||||
{
|
||||
name: "est_csrattrs_no_auth_reaches_noauth_handler",
|
||||
method: http.MethodGet,
|
||||
path: "/.well-known/est/csrattrs",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "RFC 7030 §4.5 csrattrs also on no-auth chain",
|
||||
},
|
||||
|
||||
// ---- Cases (ii) + (iii): SCEP dispatch ----
|
||||
// The actual challengePassword validation lives in the service layer
|
||||
// (internal/service/scep.go). This test pins that ALL /scep* requests
|
||||
// reach the no-auth chain — the service layer is then responsible for
|
||||
// rejecting or accepting based on password contents.
|
||||
{
|
||||
name: "scep_exact_path_reaches_noauth_handler",
|
||||
method: http.MethodGet,
|
||||
path: "/scep",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "SCEP clients authenticate via CSR challengePassword, not Bearer (RFC 8894 §3.2)",
|
||||
},
|
||||
{
|
||||
name: "scep_subpath_reaches_noauth_handler",
|
||||
method: http.MethodPost,
|
||||
path: "/scep/",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "Trailing-slash variant must also ride no-auth chain",
|
||||
},
|
||||
{
|
||||
name: "scep_query_string_reaches_noauth_handler",
|
||||
method: http.MethodGet,
|
||||
path: "/scep?operation=GetCACaps",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "Query string does not affect dispatch — operation dispatch is handler-internal",
|
||||
},
|
||||
// Defensive: /scepxyz MUST NOT match the SCEP prefix (guards against
|
||||
// over-broad matching that would leak non-SCEP paths into no-auth).
|
||||
{
|
||||
name: "scepxyz_does_not_match_scep_prefix",
|
||||
method: http.MethodGet,
|
||||
path: "/scepxyz",
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "certctl dashboard",
|
||||
description: "SPA fallback — /scepxyz must not be confused with /scep or /scep/",
|
||||
},
|
||||
|
||||
// ---- Case (iv): RFC 5280 CRL + RFC 6960 OCSP ----
|
||||
{
|
||||
name: "pki_crl_no_auth_reaches_noauth_handler",
|
||||
method: http.MethodGet,
|
||||
path: "/.well-known/pki/crl/abc123",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "RFC 5280 CRL distribution point must be served without auth",
|
||||
},
|
||||
{
|
||||
name: "pki_ocsp_no_auth_reaches_noauth_handler",
|
||||
method: http.MethodGet,
|
||||
path: "/.well-known/pki/ocsp/abc123/serial",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "RFC 6960 OCSP responder must be served without auth",
|
||||
},
|
||||
|
||||
// ---- Case (v): Authenticated API routes ----
|
||||
{
|
||||
name: "api_v1_certificates_goes_through_auth",
|
||||
method: http.MethodGet,
|
||||
path: "/api/v1/certificates",
|
||||
wantBody: "AUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "Primary API surface must still require Bearer token",
|
||||
},
|
||||
{
|
||||
name: "api_v1_auth_check_goes_through_auth",
|
||||
method: http.MethodGet,
|
||||
path: "/api/v1/auth/check",
|
||||
wantBody: "AUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "auth/check validates the caller's Bearer — auth chain required",
|
||||
},
|
||||
{
|
||||
name: "api_v1_jobs_goes_through_auth",
|
||||
method: http.MethodGet,
|
||||
path: "/api/v1/jobs",
|
||||
wantBody: "AUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "Jobs API is part of the privileged surface",
|
||||
},
|
||||
|
||||
// ---- Health probes bypass auth ----
|
||||
{
|
||||
name: "health_bypasses_auth",
|
||||
method: http.MethodGet,
|
||||
path: "/health",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "Docker/K8s health probes cannot carry Bearer tokens",
|
||||
},
|
||||
{
|
||||
name: "ready_bypasses_auth",
|
||||
method: http.MethodGet,
|
||||
path: "/ready",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "Readiness probe also unauthenticated",
|
||||
},
|
||||
{
|
||||
name: "auth_info_bypasses_auth",
|
||||
method: http.MethodGet,
|
||||
path: "/api/v1/auth/info",
|
||||
wantBody: "NOAUTH",
|
||||
wantStatus: http.StatusOK,
|
||||
description: "React app calls auth/info BEFORE login to discover auth mode",
|
||||
},
|
||||
|
||||
// ---- Static assets served by file server ----
|
||||
{
|
||||
name: "static_asset_served_by_file_server",
|
||||
method: http.MethodGet,
|
||||
path: "/assets/app.js",
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "console.log('certctl');",
|
||||
description: "Built Vite assets served directly without auth",
|
||||
},
|
||||
|
||||
// ---- SPA fallback ----
|
||||
{
|
||||
name: "spa_fallback_serves_index_html",
|
||||
method: http.MethodGet,
|
||||
path: "/",
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "certctl dashboard",
|
||||
description: "Root path serves SPA entry point",
|
||||
},
|
||||
{
|
||||
name: "spa_fallback_for_unknown_route",
|
||||
method: http.MethodGet,
|
||||
path: "/certificates",
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "certctl dashboard",
|
||||
description: "React Router routes fall through to index.html",
|
||||
},
|
||||
{
|
||||
name: "spa_fallback_deep_route",
|
||||
method: http.MethodGet,
|
||||
path: "/certificates/mc-api-prod/detail",
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "certctl dashboard",
|
||||
description: "Deep React Router routes also fall through to SPA",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req := httptest.NewRequest(tc.method, tc.path, nil)
|
||||
w := httptest.NewRecorder()
|
||||
handler.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != tc.wantStatus {
|
||||
t.Errorf("status = %d, want %d (%s)", w.Code, tc.wantStatus, tc.description)
|
||||
}
|
||||
body := w.Body.String()
|
||||
if tc.wantBody != "" && !strings.Contains(body, tc.wantBody) {
|
||||
t.Errorf("body %q does not contain %q (%s)", body, tc.wantBody, tc.description)
|
||||
}
|
||||
if tc.wantBodyPrefix != "" && !strings.HasPrefix(body, tc.wantBodyPrefix) {
|
||||
t.Errorf("body %q does not start with %q (%s)", body, tc.wantBodyPrefix, tc.description)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildFinalHandler_NoDashboard pins the API-only (dashboard-absent)
|
||||
// dispatch behavior. When web/dist/index.html is missing, everything that's
|
||||
// not a no-auth bypass route falls through to the authenticated apiHandler
|
||||
// (pre-M-001 behavior for headless deployments). EST/SCEP/PKI still ride the
|
||||
// no-auth chain.
|
||||
func TestBuildFinalHandler_NoDashboard(t *testing.T) {
|
||||
authHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("AUTH"))
|
||||
})
|
||||
noAuthHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("NOAUTH"))
|
||||
})
|
||||
|
||||
handler := buildFinalHandler(authHandler, noAuthHandler, "/nonexistent", false /* dashboardEnabled */)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
path string
|
||||
wantBody string
|
||||
}{
|
||||
{"est_still_no_auth", "/.well-known/est/cacerts", "NOAUTH"},
|
||||
{"scep_still_no_auth", "/scep", "NOAUTH"},
|
||||
{"pki_still_no_auth", "/.well-known/pki/crl/x", "NOAUTH"},
|
||||
{"health_still_no_auth", "/health", "NOAUTH"},
|
||||
{"api_still_auth", "/api/v1/certificates", "AUTH"},
|
||||
// The difference: non-API, non-special paths go through auth chain when
|
||||
// there's no dashboard to serve (preserves legacy headless behavior).
|
||||
{"unknown_path_falls_through_to_auth", "/", "AUTH"},
|
||||
{"unknown_deep_path_falls_through_to_auth", "/random/path", "AUTH"},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, tc.path, nil)
|
||||
w := httptest.NewRecorder()
|
||||
handler.ServeHTTP(w, req)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200", w.Code)
|
||||
}
|
||||
if got := w.Body.String(); !strings.Contains(got, tc.wantBody) {
|
||||
t.Errorf("body = %q, want to contain %q", got, tc.wantBody)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
+517
-212
@@ -9,6 +9,7 @@ import (
|
||||
"os"
|
||||
"os/signal"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
@@ -16,19 +17,15 @@ import (
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/api/router"
|
||||
"github.com/shankar0123/certctl/internal/config"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
acmeissuer "github.com/shankar0123/certctl/internal/connector/issuer/acme"
|
||||
"github.com/shankar0123/certctl/internal/connector/issuer/local"
|
||||
digicertissuer "github.com/shankar0123/certctl/internal/connector/issuer/digicert"
|
||||
opensslissuer "github.com/shankar0123/certctl/internal/connector/issuer/openssl"
|
||||
stepcaissuer "github.com/shankar0123/certctl/internal/connector/issuer/stepca"
|
||||
sectigoissuer "github.com/shankar0123/certctl/internal/connector/issuer/sectigo"
|
||||
vaultissuer "github.com/shankar0123/certctl/internal/connector/issuer/vault"
|
||||
discoveryawssm "github.com/shankar0123/certctl/internal/connector/discovery/awssm"
|
||||
discoveryazurekv "github.com/shankar0123/certctl/internal/connector/discovery/azurekv"
|
||||
discoverygcpsm "github.com/shankar0123/certctl/internal/connector/discovery/gcpsm"
|
||||
notifyemail "github.com/shankar0123/certctl/internal/connector/notifier/email"
|
||||
notifyopsgenie "github.com/shankar0123/certctl/internal/connector/notifier/opsgenie"
|
||||
notifypagerduty "github.com/shankar0123/certctl/internal/connector/notifier/pagerduty"
|
||||
notifyslack "github.com/shankar0123/certctl/internal/connector/notifier/slack"
|
||||
notifyteams "github.com/shankar0123/certctl/internal/connector/notifier/teams"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/repository/postgres"
|
||||
"github.com/shankar0123/certctl/internal/scheduler"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
@@ -84,126 +81,64 @@ func main() {
|
||||
ownerRepo := postgres.NewOwnerRepository(db)
|
||||
logger.Info("initialized all repositories")
|
||||
|
||||
// Initialize Local CA issuer connector.
|
||||
// In sub-CA mode (CERTCTL_CA_CERT_PATH + CERTCTL_CA_KEY_PATH set), loads a pre-signed
|
||||
// CA cert+key from disk. All issued certs chain to the upstream root (e.g., ADCS).
|
||||
// Otherwise, generates an ephemeral self-signed CA for development/demo.
|
||||
localCAConfig := &local.Config{}
|
||||
if cfg.CA.CertPath != "" && cfg.CA.KeyPath != "" {
|
||||
localCAConfig.CACertPath = cfg.CA.CertPath
|
||||
localCAConfig.CAKeyPath = cfg.CA.KeyPath
|
||||
logger.Info("Local CA configured in sub-CA mode",
|
||||
"cert_path", cfg.CA.CertPath,
|
||||
"key_path", cfg.CA.KeyPath)
|
||||
// Initialize dynamic issuer registry.
|
||||
// Issuers are loaded from the database (with AES-256-GCM encrypted config).
|
||||
// On first boot with an empty database, env var issuers are seeded automatically.
|
||||
//
|
||||
// M-8 (CWE-916 / CWE-329): the encryption passphrase is passed as a raw
|
||||
// string into IssuerService / TargetService / IssuerRegistry. Each call to
|
||||
// crypto.EncryptIfKeySet generates a fresh 16-byte PBKDF2 salt and emits a
|
||||
// v2 blob (magic 0x02 || salt || nonce || sealed). Decryption auto-detects
|
||||
// v1 legacy blobs (no magic) and falls back to the fixed v1 salt for
|
||||
// backward compatibility; v1 blobs transparently upgrade to v2 on next
|
||||
// write. DO NOT pre-derive the key here with crypto.DeriveKey — that was
|
||||
// the v1 fixed-salt behaviour that M-8 removes.
|
||||
encryptionKey := cfg.Encryption.ConfigEncryptionKey
|
||||
if encryptionKey != "" {
|
||||
logger.Info("config encryption enabled (AES-256-GCM, per-ciphertext PBKDF2 salt)")
|
||||
} else {
|
||||
logger.Info("Local CA configured in self-signed mode (ephemeral)")
|
||||
}
|
||||
localCA := local.New(localCAConfig, logger)
|
||||
logger.Info("initialized Local CA issuer connector")
|
||||
|
||||
// Initialize ACME issuer connector (for Let's Encrypt, ZeroSSL, Sectigo, Google Trust Services, etc.)
|
||||
// Supports HTTP-01 (default), DNS-01 (for wildcards), and DNS-PERSIST-01 (standing record) challenge types.
|
||||
// EAB (External Account Binding) required by ZeroSSL, Google Trust Services, SSL.com.
|
||||
acmeConnector := acmeissuer.New(&acmeissuer.Config{
|
||||
DirectoryURL: os.Getenv("CERTCTL_ACME_DIRECTORY_URL"),
|
||||
Email: os.Getenv("CERTCTL_ACME_EMAIL"),
|
||||
EABKid: os.Getenv("CERTCTL_ACME_EAB_KID"),
|
||||
EABHmac: os.Getenv("CERTCTL_ACME_EAB_HMAC"),
|
||||
ChallengeType: os.Getenv("CERTCTL_ACME_CHALLENGE_TYPE"),
|
||||
DNSPresentScript: os.Getenv("CERTCTL_ACME_DNS_PRESENT_SCRIPT"),
|
||||
DNSCleanUpScript: os.Getenv("CERTCTL_ACME_DNS_CLEANUP_SCRIPT"),
|
||||
DNSPersistIssuerDomain: os.Getenv("CERTCTL_ACME_DNS_PERSIST_ISSUER_DOMAIN"),
|
||||
Insecure: cfg.ACME.Insecure,
|
||||
}, logger)
|
||||
logger.Info("initialized ACME issuer connector")
|
||||
|
||||
// Initialize step-ca issuer connector (for Smallstep private CA).
|
||||
// Uses the native /sign API with JWK provisioner authentication.
|
||||
stepcaConnector := stepcaissuer.New(&stepcaissuer.Config{
|
||||
CAURL: os.Getenv("CERTCTL_STEPCA_URL"),
|
||||
RootCertPath: os.Getenv("CERTCTL_STEPCA_ROOT_CERT"),
|
||||
ProvisionerName: os.Getenv("CERTCTL_STEPCA_PROVISIONER"),
|
||||
ProvisionerKeyPath: os.Getenv("CERTCTL_STEPCA_KEY_PATH"),
|
||||
ProvisionerPassword: os.Getenv("CERTCTL_STEPCA_PASSWORD"),
|
||||
}, logger)
|
||||
logger.Info("initialized step-ca issuer connector")
|
||||
|
||||
// Initialize OpenSSL/Custom CA issuer connector (for script-based CA integrations).
|
||||
// Delegates certificate signing to user-provided scripts.
|
||||
opensslConnector := opensslissuer.New(&opensslissuer.Config{
|
||||
SignScript: os.Getenv("CERTCTL_OPENSSL_SIGN_SCRIPT"),
|
||||
RevokeScript: os.Getenv("CERTCTL_OPENSSL_REVOKE_SCRIPT"),
|
||||
CRLScript: os.Getenv("CERTCTL_OPENSSL_CRL_SCRIPT"),
|
||||
TimeoutSeconds: getEnvIntDefault(os.Getenv("CERTCTL_OPENSSL_TIMEOUT_SECONDS"), 30),
|
||||
}, logger)
|
||||
logger.Info("initialized OpenSSL/Custom CA issuer connector")
|
||||
|
||||
// Initialize Vault PKI issuer connector (for HashiCorp Vault internal PKI).
|
||||
// Uses the Vault HTTP API with token authentication.
|
||||
vaultConnector := vaultissuer.New(&vaultissuer.Config{
|
||||
Addr: os.Getenv("CERTCTL_VAULT_ADDR"),
|
||||
Token: os.Getenv("CERTCTL_VAULT_TOKEN"),
|
||||
Mount: getEnvDefault("CERTCTL_VAULT_MOUNT", "pki"),
|
||||
Role: os.Getenv("CERTCTL_VAULT_ROLE"),
|
||||
TTL: getEnvDefault("CERTCTL_VAULT_TTL", "8760h"),
|
||||
}, logger)
|
||||
logger.Info("initialized Vault PKI issuer connector")
|
||||
|
||||
// Initialize DigiCert CertCentral issuer connector (for enterprise public CA).
|
||||
// Uses the DigiCert REST API with async order model.
|
||||
digicertConnector := digicertissuer.New(&digicertissuer.Config{
|
||||
APIKey: os.Getenv("CERTCTL_DIGICERT_API_KEY"),
|
||||
OrgID: os.Getenv("CERTCTL_DIGICERT_ORG_ID"),
|
||||
ProductType: getEnvDefault("CERTCTL_DIGICERT_PRODUCT_TYPE", "ssl_basic"),
|
||||
BaseURL: getEnvDefault("CERTCTL_DIGICERT_BASE_URL", "https://www.digicert.com/services/v2"),
|
||||
}, logger)
|
||||
logger.Info("initialized DigiCert CertCentral issuer connector")
|
||||
|
||||
// Initialize Sectigo SCM issuer connector (for enterprise public CA).
|
||||
// Uses the Sectigo SCM REST API with async order model.
|
||||
sectigoConnector := sectigoissuer.New(§igoissuer.Config{
|
||||
CustomerURI: cfg.Sectigo.CustomerURI,
|
||||
Login: cfg.Sectigo.Login,
|
||||
Password: cfg.Sectigo.Password,
|
||||
OrgID: cfg.Sectigo.OrgID,
|
||||
CertType: cfg.Sectigo.CertType,
|
||||
Term: cfg.Sectigo.Term,
|
||||
BaseURL: cfg.Sectigo.BaseURL,
|
||||
}, logger)
|
||||
logger.Info("initialized Sectigo SCM issuer connector")
|
||||
|
||||
// Build issuer registry: maps issuer IDs (from database) to connector implementations.
|
||||
// "iss-local" matches the seed data issuer ID for the Local CA.
|
||||
// "iss-acme-staging" and "iss-acme-prod" are conventional IDs for ACME issuers.
|
||||
// "iss-stepca" is the step-ca private CA connector.
|
||||
// "iss-openssl" is the custom CA/OpenSSL connector.
|
||||
issuerRegistry := map[string]service.IssuerConnector{
|
||||
"iss-local": service.NewIssuerConnectorAdapter(localCA),
|
||||
"iss-acme-staging": service.NewIssuerConnectorAdapter(acmeConnector),
|
||||
"iss-acme-prod": service.NewIssuerConnectorAdapter(acmeConnector),
|
||||
"iss-stepca": service.NewIssuerConnectorAdapter(stepcaConnector),
|
||||
"iss-openssl": service.NewIssuerConnectorAdapter(opensslConnector),
|
||||
// C-2 fix: fail closed at startup when database-sourced issuer or target
|
||||
// rows exist without a configured encryption key. Previously the server
|
||||
// would emit a one-line warning and silently persist new GUI-created
|
||||
// configs as plaintext (CWE-311). Refuse to start instead: the operator
|
||||
// must either configure CERTCTL_CONFIG_ENCRYPTION_KEY or remove the
|
||||
// vulnerable rows before the control plane can boot.
|
||||
ctx := context.Background()
|
||||
dbIssuers, ierr := issuerRepo.List(ctx)
|
||||
if ierr != nil {
|
||||
logger.Error("startup check: failed to list issuers", "error", ierr)
|
||||
os.Exit(1)
|
||||
}
|
||||
dbTargets, terr := targetRepo.List(ctx)
|
||||
if terr != nil {
|
||||
logger.Error("startup check: failed to list targets", "error", terr)
|
||||
os.Exit(1)
|
||||
}
|
||||
var dbIssuerCount, dbTargetCount int
|
||||
for _, iss := range dbIssuers {
|
||||
if iss != nil && iss.Source == "database" {
|
||||
dbIssuerCount++
|
||||
}
|
||||
}
|
||||
for _, tgt := range dbTargets {
|
||||
if tgt != nil && tgt.Source == "database" {
|
||||
dbTargetCount++
|
||||
}
|
||||
}
|
||||
if dbIssuerCount > 0 || dbTargetCount > 0 {
|
||||
logger.Error(
|
||||
"startup refused: CERTCTL_CONFIG_ENCRYPTION_KEY is not set but database-sourced configs exist "+
|
||||
"(would expose sensitive fields as plaintext, CWE-311). "+
|
||||
"Set the encryption key or remove the affected rows before restarting.",
|
||||
"database_sourced_issuers", dbIssuerCount,
|
||||
"database_sourced_targets", dbTargetCount,
|
||||
)
|
||||
os.Exit(1)
|
||||
}
|
||||
logger.Warn("CERTCTL_CONFIG_ENCRYPTION_KEY not set — env-seeded issuers will be stored in plaintext; GUI-created issuers and targets will be rejected until a key is configured")
|
||||
}
|
||||
|
||||
// Conditionally register Vault PKI (only if CERTCTL_VAULT_ADDR is set)
|
||||
if os.Getenv("CERTCTL_VAULT_ADDR") != "" {
|
||||
issuerRegistry["iss-vault"] = service.NewIssuerConnectorAdapter(vaultConnector)
|
||||
logger.Info("Vault PKI issuer registered", "id", "iss-vault")
|
||||
}
|
||||
|
||||
// Conditionally register DigiCert (only if CERTCTL_DIGICERT_API_KEY is set)
|
||||
if os.Getenv("CERTCTL_DIGICERT_API_KEY") != "" {
|
||||
issuerRegistry["iss-digicert"] = service.NewIssuerConnectorAdapter(digicertConnector)
|
||||
logger.Info("DigiCert CertCentral issuer registered", "id", "iss-digicert")
|
||||
}
|
||||
|
||||
// Conditionally register Sectigo SCM (only if all 3 auth credentials are set)
|
||||
if cfg.Sectigo.CustomerURI != "" && cfg.Sectigo.Login != "" && cfg.Sectigo.Password != "" {
|
||||
issuerRegistry["iss-sectigo"] = service.NewIssuerConnectorAdapter(sectigoConnector)
|
||||
logger.Info("Sectigo SCM issuer registered", "id", "iss-sectigo")
|
||||
}
|
||||
|
||||
logger.Info("issuer registry configured", "issuers", len(issuerRegistry))
|
||||
issuerRegistry := service.NewIssuerRegistry(logger)
|
||||
|
||||
// Initialize revocation repository
|
||||
revocationRepo := postgres.NewRevocationRepository(db)
|
||||
@@ -211,6 +146,12 @@ func main() {
|
||||
// Initialize services (following the dependency graph)
|
||||
auditService := service.NewAuditService(auditRepo)
|
||||
policyService := service.NewPolicyService(policyRepo, auditService)
|
||||
policyService.SetCertRepo(certificateRepo) // D-008: CertificateLifetime arm needs CertificateVersion.NotBefore/NotAfter
|
||||
// G-1: RenewalPolicyService — distinct from PolicyService (compliance rules).
|
||||
// Drives /api/v1/renewal-policies CRUD; the service layer owns slugify + validation,
|
||||
// the repo layer owns sentinel translation for 23505 (name UNIQUE) and 23503
|
||||
// (FK-RESTRICT against managed_certificates.renewal_policy_id).
|
||||
renewalPolicyService := service.NewRenewalPolicyService(renewalPolicyRepo)
|
||||
certificateService := service.NewCertificateService(certificateRepo, policyService, auditService)
|
||||
notifierRegistry := make(map[string]service.Notifier)
|
||||
|
||||
@@ -288,11 +229,21 @@ func main() {
|
||||
renewalService := service.NewRenewalService(certificateRepo, jobRepo, renewalPolicyRepo, profileRepo, auditService, notificationService, issuerRegistry, cfg.Keygen.Mode)
|
||||
renewalService.SetTargetRepo(targetRepo)
|
||||
deploymentService := service.NewDeploymentService(jobRepo, targetRepo, agentRepo, certificateRepo, auditService, notificationService)
|
||||
jobService := service.NewJobService(jobRepo, renewalService, deploymentService, logger)
|
||||
jobService := service.NewJobService(jobRepo, certificateRepo, ownerRepo, renewalService, deploymentService, logger)
|
||||
// I-001: emit "job_retry" audit events when the scheduler resets Failed→Pending.
|
||||
// SetAuditService is optional — JobService falls back to nil-guarded no-op if unwired.
|
||||
jobService.SetAuditService(auditService)
|
||||
agentService := service.NewAgentService(agentRepo, certificateRepo, jobRepo, targetRepo, auditService, issuerRegistry, renewalService)
|
||||
agentService.SetProfileRepo(profileRepo)
|
||||
issuerService := service.NewIssuerService(issuerRepo, auditService)
|
||||
targetService := service.NewTargetService(targetRepo, auditService)
|
||||
issuerService := service.NewIssuerService(issuerRepo, auditService, issuerRegistry, encryptionKey, logger)
|
||||
|
||||
// Seed issuers from env vars on first boot (empty database only), then build registry
|
||||
issuerService.SeedFromEnvVars(context.Background(), cfg)
|
||||
if err := issuerService.BuildRegistry(context.Background()); err != nil {
|
||||
logger.Error("failed to build issuer registry from database", "error", err)
|
||||
}
|
||||
logger.Info("issuer registry loaded", "issuers", issuerRegistry.Len())
|
||||
targetService := service.NewTargetService(targetRepo, auditService, agentRepo, encryptionKey, logger)
|
||||
profileService := service.NewProfileService(profileRepo, auditService)
|
||||
teamService := service.NewTeamService(teamRepo, auditService)
|
||||
ownerService := service.NewOwnerService(ownerRepo, auditService)
|
||||
@@ -312,16 +263,107 @@ func main() {
|
||||
Name: "Network Scanner (Server-Side)",
|
||||
Status: domain.AgentStatusOnline,
|
||||
}
|
||||
if err := agentRepo.Create(context.Background(), sentinelAgent); err != nil {
|
||||
// Ignore duplicate key errors (agent already exists)
|
||||
logger.Debug("sentinel agent creation", "status", "exists or created", "id", service.SentinelAgentID)
|
||||
// M-6: use CreateIfNotExists so duplicate rows on restart/upgrade are
|
||||
// idempotent without swallowing unrelated DB failures (CWE-662).
|
||||
created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelAgent)
|
||||
if err != nil {
|
||||
logger.Error("sentinel agent creation failed", "id", service.SentinelAgentID, "error", err)
|
||||
} else if created {
|
||||
logger.Info("sentinel agent created", "id", service.SentinelAgentID)
|
||||
} else {
|
||||
logger.Debug("sentinel agent already exists", "id", service.SentinelAgentID)
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize cloud discovery sources (M50)
|
||||
var cloudDiscoveryService *service.CloudDiscoveryService
|
||||
if cfg.CloudDiscovery.Enabled {
|
||||
cloudDiscoveryService = service.NewCloudDiscoveryService(discoveryService, logger)
|
||||
|
||||
// AWS Secrets Manager
|
||||
if cfg.CloudDiscovery.AWSSM.Enabled {
|
||||
awsSource := discoveryawssm.New(&cfg.CloudDiscovery.AWSSM, logger)
|
||||
cloudDiscoveryService.RegisterSource(awsSource)
|
||||
// Create sentinel agent for AWS SM
|
||||
sentinelAWS := &domain.Agent{
|
||||
ID: service.SentinelAWSSecretsMgr,
|
||||
Name: "AWS Secrets Manager Discovery",
|
||||
Status: domain.AgentStatusOnline,
|
||||
}
|
||||
// M-6: idempotent create (CWE-662).
|
||||
created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelAWS)
|
||||
if err != nil {
|
||||
logger.Error("sentinel agent creation failed", "id", service.SentinelAWSSecretsMgr, "error", err)
|
||||
} else if created {
|
||||
logger.Info("sentinel agent created", "id", service.SentinelAWSSecretsMgr)
|
||||
} else {
|
||||
logger.Debug("sentinel agent already exists", "id", service.SentinelAWSSecretsMgr)
|
||||
}
|
||||
}
|
||||
|
||||
// Azure Key Vault
|
||||
if cfg.CloudDiscovery.AzureKV.Enabled {
|
||||
azureSource := discoveryazurekv.New(discoveryazurekv.Config{
|
||||
VaultURL: cfg.CloudDiscovery.AzureKV.VaultURL,
|
||||
TenantID: cfg.CloudDiscovery.AzureKV.TenantID,
|
||||
ClientID: cfg.CloudDiscovery.AzureKV.ClientID,
|
||||
ClientSecret: cfg.CloudDiscovery.AzureKV.ClientSecret,
|
||||
}, logger)
|
||||
cloudDiscoveryService.RegisterSource(azureSource)
|
||||
sentinelAzure := &domain.Agent{
|
||||
ID: service.SentinelAzureKeyVault,
|
||||
Name: "Azure Key Vault Discovery",
|
||||
Status: domain.AgentStatusOnline,
|
||||
}
|
||||
// M-6: idempotent create (CWE-662).
|
||||
created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelAzure)
|
||||
if err != nil {
|
||||
logger.Error("sentinel agent creation failed", "id", service.SentinelAzureKeyVault, "error", err)
|
||||
} else if created {
|
||||
logger.Info("sentinel agent created", "id", service.SentinelAzureKeyVault)
|
||||
} else {
|
||||
logger.Debug("sentinel agent already exists", "id", service.SentinelAzureKeyVault)
|
||||
}
|
||||
}
|
||||
|
||||
// GCP Secret Manager
|
||||
if cfg.CloudDiscovery.GCPSM.Enabled {
|
||||
gcpSource := discoverygcpsm.New(&cfg.CloudDiscovery.GCPSM, logger)
|
||||
cloudDiscoveryService.RegisterSource(gcpSource)
|
||||
sentinelGCP := &domain.Agent{
|
||||
ID: service.SentinelGCPSecretMgr,
|
||||
Name: "GCP Secret Manager Discovery",
|
||||
Status: domain.AgentStatusOnline,
|
||||
}
|
||||
// M-6: idempotent create (CWE-662).
|
||||
created, err := agentRepo.CreateIfNotExists(context.Background(), sentinelGCP)
|
||||
if err != nil {
|
||||
logger.Error("sentinel agent creation failed", "id", service.SentinelGCPSecretMgr, "error", err)
|
||||
} else if created {
|
||||
logger.Info("sentinel agent created", "id", service.SentinelGCPSecretMgr)
|
||||
} else {
|
||||
logger.Debug("sentinel agent already exists", "id", service.SentinelGCPSecretMgr)
|
||||
}
|
||||
}
|
||||
|
||||
logger.Info("cloud discovery enabled",
|
||||
"sources", cloudDiscoveryService.SourceCount(),
|
||||
"interval", cfg.CloudDiscovery.Interval.String())
|
||||
}
|
||||
|
||||
logger.Info("initialized all services")
|
||||
|
||||
// Initialize bulk revocation service
|
||||
bulkRevocationService := service.NewBulkRevocationService(revocationSvc, certificateRepo, auditService, logger)
|
||||
|
||||
// Initialize stats and metrics services
|
||||
statsService := service.NewStatsService(certificateRepo, jobRepo, agentRepo)
|
||||
// I-005: wire the notification repository so DashboardSummary.NotificationsDead
|
||||
// is populated, which in turn drives the Prometheus counter
|
||||
// certctl_notification_dead_total in GetPrometheusMetrics. Setter
|
||||
// pattern keeps NewStatsService's nine call sites (main.go + stats_test.go
|
||||
// + 8 digest_test.go sites) untouched.
|
||||
statsService.SetNotifRepo(notificationRepo)
|
||||
logger.Info("initialized stats service")
|
||||
|
||||
// Initialize API handlers
|
||||
@@ -331,6 +373,10 @@ func main() {
|
||||
agentHandler := handler.NewAgentHandler(agentService)
|
||||
jobHandler := handler.NewJobHandler(jobService)
|
||||
policyHandler := handler.NewPolicyHandler(policyService)
|
||||
// G-1: RenewalPolicyHandler — /api/v1/renewal-policies CRUD. Value-returning
|
||||
// constructor matches the house pattern (PolicyHandler, IssuerHandler etc.);
|
||||
// the registry stores it by value in HandlerRegistry.RenewalPolicies.
|
||||
renewalPolicyHandler := handler.NewRenewalPolicyHandler(renewalPolicyService)
|
||||
profileHandler := handler.NewProfileHandler(profileService)
|
||||
teamHandler := handler.NewTeamHandler(teamService)
|
||||
ownerHandler := handler.NewOwnerHandler(ownerService)
|
||||
@@ -347,6 +393,8 @@ func main() {
|
||||
exportService := service.NewExportService(certificateRepo, auditService)
|
||||
exportHandler := handler.NewExportHandler(exportService)
|
||||
|
||||
bulkRevocationHandler := handler.NewBulkRevocationHandler(bulkRevocationService)
|
||||
|
||||
// Initialize digest service (requires email notifier)
|
||||
var digestService *service.DigestService
|
||||
var digestHandler *handler.DigestHandler
|
||||
@@ -366,6 +414,29 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize health check service (M48)
|
||||
var healthCheckService *service.HealthCheckService
|
||||
var healthCheckHandler *handler.HealthCheckHandler
|
||||
if cfg.HealthCheck.Enabled {
|
||||
healthCheckRepo := postgres.NewHealthCheckRepository(db)
|
||||
healthCheckService = service.NewHealthCheckService(
|
||||
healthCheckRepo,
|
||||
auditService,
|
||||
logger,
|
||||
cfg.HealthCheck.MaxConcurrent,
|
||||
time.Duration(cfg.HealthCheck.DefaultTimeout)*time.Millisecond,
|
||||
cfg.HealthCheck.HistoryRetention,
|
||||
cfg.HealthCheck.AutoCreate,
|
||||
)
|
||||
healthCheckHandler = handler.NewHealthCheckHandler(healthCheckService)
|
||||
logger.Info("health check service enabled",
|
||||
"interval", cfg.HealthCheck.CheckInterval.String(),
|
||||
"max_concurrent", cfg.HealthCheck.MaxConcurrent)
|
||||
} else {
|
||||
// Create a no-op health check handler for route registration
|
||||
healthCheckHandler = handler.NewHealthCheckHandler(nil)
|
||||
}
|
||||
|
||||
logger.Info("initialized all handlers")
|
||||
|
||||
// Create context with cancellation
|
||||
@@ -385,8 +456,20 @@ func main() {
|
||||
// Configure scheduler intervals from config
|
||||
sched.SetRenewalCheckInterval(cfg.Scheduler.RenewalCheckInterval)
|
||||
sched.SetJobProcessorInterval(cfg.Scheduler.JobProcessorInterval)
|
||||
// I-001: drive the failed-job retry loop. Runs on start + every RetryInterval
|
||||
// (default 5m, CERTCTL_SCHEDULER_RETRY_INTERVAL). Kept adjacent to the job
|
||||
// processor setter because they share the JobServicer dependency.
|
||||
sched.SetJobRetryInterval(cfg.Scheduler.RetryInterval)
|
||||
sched.SetAgentHealthCheckInterval(cfg.Scheduler.AgentHealthCheckInterval)
|
||||
sched.SetNotificationProcessInterval(cfg.Scheduler.NotificationProcessInterval)
|
||||
// I-005: drive the failed-notification retry sweep. Runs every
|
||||
// NotificationRetryInterval (default 2m, CERTCTL_NOTIFICATION_RETRY_INTERVAL)
|
||||
// and transitions eligible Failed notifications whose next_retry_at has
|
||||
// arrived back to Pending so the notification processor picks them up on
|
||||
// its next tick. Kept adjacent to the notification processor setter
|
||||
// because they share the NotificationServicer dependency (same placement
|
||||
// pattern as I-001's SetJobRetryInterval above).
|
||||
sched.SetNotificationRetryInterval(cfg.Scheduler.NotificationRetryInterval)
|
||||
if cfg.NetworkScan.Enabled {
|
||||
sched.SetNetworkScanInterval(cfg.NetworkScan.ScanInterval)
|
||||
logger.Info("network scanning enabled", "interval", cfg.NetworkScan.ScanInterval.String())
|
||||
@@ -396,6 +479,28 @@ func main() {
|
||||
sched.SetDigestInterval(cfg.Digest.Interval)
|
||||
logger.Info("digest scheduler enabled", "interval", cfg.Digest.Interval.String())
|
||||
}
|
||||
if healthCheckService != nil {
|
||||
sched.SetHealthCheckService(healthCheckService)
|
||||
sched.SetHealthCheckInterval(cfg.HealthCheck.CheckInterval)
|
||||
logger.Info("health check scheduler enabled", "interval", cfg.HealthCheck.CheckInterval.String())
|
||||
}
|
||||
if cloudDiscoveryService != nil && cloudDiscoveryService.SourceCount() > 0 {
|
||||
sched.SetCloudDiscoveryService(cloudDiscoveryService)
|
||||
sched.SetCloudDiscoveryInterval(cfg.CloudDiscovery.Interval)
|
||||
logger.Info("cloud discovery scheduler enabled",
|
||||
"interval", cfg.CloudDiscovery.Interval.String(),
|
||||
"sources", cloudDiscoveryService.SourceCount())
|
||||
}
|
||||
|
||||
// Wire job timeout reaper (I-003)
|
||||
sched.SetJobReaperService(jobService)
|
||||
sched.SetJobTimeoutInterval(cfg.Scheduler.JobTimeoutInterval)
|
||||
sched.SetAwaitingCSRTimeout(cfg.Scheduler.AwaitingCSRTimeout)
|
||||
sched.SetAwaitingApprovalTimeout(cfg.Scheduler.AwaitingApprovalTimeout)
|
||||
logger.Info("job timeout reaper enabled",
|
||||
"interval", cfg.Scheduler.JobTimeoutInterval.String(),
|
||||
"csr_timeout", cfg.Scheduler.AwaitingCSRTimeout.String(),
|
||||
"approval_timeout", cfg.Scheduler.AwaitingApprovalTimeout.String())
|
||||
|
||||
// Start scheduler
|
||||
logger.Info("starting scheduler")
|
||||
@@ -406,35 +511,39 @@ func main() {
|
||||
// Build the API router with all handlers
|
||||
apiRouter := router.New()
|
||||
apiRouter.RegisterHandlers(router.HandlerRegistry{
|
||||
Certificates: certificateHandler,
|
||||
Issuers: issuerHandler,
|
||||
Targets: targetHandler,
|
||||
Agents: agentHandler,
|
||||
Jobs: jobHandler,
|
||||
Policies: policyHandler,
|
||||
Profiles: profileHandler,
|
||||
Teams: teamHandler,
|
||||
Owners: ownerHandler,
|
||||
AgentGroups: agentGroupHandler,
|
||||
Audit: auditHandler,
|
||||
Notifications: notificationHandler,
|
||||
Stats: statsHandler,
|
||||
Metrics: metricsHandler,
|
||||
Health: healthHandler,
|
||||
Discovery: discoveryHandler,
|
||||
NetworkScan: networkScanHandler,
|
||||
Verification: verificationHandler,
|
||||
Export: exportHandler,
|
||||
Digest: *digestHandler,
|
||||
Certificates: certificateHandler,
|
||||
Issuers: issuerHandler,
|
||||
Targets: targetHandler,
|
||||
Agents: agentHandler,
|
||||
Jobs: jobHandler,
|
||||
Policies: policyHandler,
|
||||
RenewalPolicies: renewalPolicyHandler,
|
||||
Profiles: profileHandler,
|
||||
Teams: teamHandler,
|
||||
Owners: ownerHandler,
|
||||
AgentGroups: agentGroupHandler,
|
||||
Audit: auditHandler,
|
||||
Notifications: notificationHandler,
|
||||
Stats: statsHandler,
|
||||
Metrics: metricsHandler,
|
||||
Health: healthHandler,
|
||||
Discovery: discoveryHandler,
|
||||
NetworkScan: networkScanHandler,
|
||||
Verification: verificationHandler,
|
||||
Export: exportHandler,
|
||||
Digest: *digestHandler,
|
||||
HealthChecks: healthCheckHandler,
|
||||
BulkRevocation: bulkRevocationHandler,
|
||||
})
|
||||
// Register EST (RFC 7030) handlers if enabled
|
||||
if cfg.EST.Enabled {
|
||||
issuerConn, ok := issuerRegistry[cfg.EST.IssuerID]
|
||||
issuerConn, ok := issuerRegistry.Get(cfg.EST.IssuerID)
|
||||
if !ok {
|
||||
logger.Error("EST issuer not found in registry", "issuer_id", cfg.EST.IssuerID)
|
||||
os.Exit(1)
|
||||
}
|
||||
estService := service.NewESTService(cfg.EST.IssuerID, issuerConn, auditService, logger)
|
||||
estService.SetProfileRepo(profileRepo)
|
||||
if cfg.EST.ProfileID != "" {
|
||||
estService.SetProfileID(cfg.EST.ProfileID)
|
||||
}
|
||||
@@ -446,13 +555,102 @@ func main() {
|
||||
"endpoints", "/.well-known/est/{cacerts,simpleenroll,simplereenroll,csrattrs}")
|
||||
}
|
||||
|
||||
// Register SCEP (RFC 8894) handlers if enabled
|
||||
if cfg.SCEP.Enabled {
|
||||
// H-2 fix: fail closed at startup when SCEP is enabled without a
|
||||
// challenge password configured. Previously the service-layer guard
|
||||
// at internal/service/scep.go:72-79 skipped the password check when
|
||||
// s.challengePassword == "", meaning any client that could reach the
|
||||
// /scep endpoint could enroll an arbitrary CSR against the configured
|
||||
// issuer (CWE-306, missing authentication for a critical function).
|
||||
// Refuse to start instead: the operator must set
|
||||
// CERTCTL_SCEP_CHALLENGE_PASSWORD (or disable SCEP) before the control
|
||||
// plane can boot.
|
||||
if err := preflightSCEPChallengePassword(cfg.SCEP.Enabled, cfg.SCEP.ChallengePassword); err != nil {
|
||||
logger.Error(
|
||||
"startup refused: SCEP is enabled but CERTCTL_SCEP_CHALLENGE_PASSWORD is not set "+
|
||||
"(would allow unauthenticated certificate enrollment, CWE-306). "+
|
||||
"Set a non-empty challenge password or disable SCEP before restarting.",
|
||||
"error", err,
|
||||
)
|
||||
os.Exit(1)
|
||||
}
|
||||
issuerConn, ok := issuerRegistry.Get(cfg.SCEP.IssuerID)
|
||||
if !ok {
|
||||
logger.Error("SCEP issuer not found in registry", "issuer_id", cfg.SCEP.IssuerID)
|
||||
os.Exit(1)
|
||||
}
|
||||
scepService := service.NewSCEPService(cfg.SCEP.IssuerID, issuerConn, auditService, logger, cfg.SCEP.ChallengePassword)
|
||||
scepService.SetProfileRepo(profileRepo)
|
||||
if cfg.SCEP.ProfileID != "" {
|
||||
scepService.SetProfileID(cfg.SCEP.ProfileID)
|
||||
}
|
||||
scepHandler := handler.NewSCEPHandler(scepService)
|
||||
apiRouter.RegisterSCEPHandlers(scepHandler)
|
||||
logger.Info("SCEP server enabled",
|
||||
"issuer_id", cfg.SCEP.IssuerID,
|
||||
"profile_id", cfg.SCEP.ProfileID,
|
||||
"challenge_password_set", cfg.SCEP.ChallengePassword != "",
|
||||
"endpoints", "/scep?operation={GetCACaps,GetCACert,PKIOperation}")
|
||||
}
|
||||
|
||||
// Register RFC 5280 CRL and RFC 6960 OCSP handlers under /.well-known/pki/.
|
||||
// These are always enabled (no config gate) — revocation data must be
|
||||
// reachable to relying parties for any cert certctl issues. The finalHandler
|
||||
// routing gate below strips auth middleware for this prefix so browsers,
|
||||
// OpenSSL, OCSP stapling sidecars, and mTLS clients can fetch without
|
||||
// presenting certctl Bearer tokens.
|
||||
apiRouter.RegisterPKIHandlers(certificateHandler)
|
||||
logger.Info("PKI endpoints registered",
|
||||
"endpoints", "/.well-known/pki/{crl/{issuer_id},ocsp/{issuer_id}/{serial}}")
|
||||
|
||||
logger.Info("registered all API handlers")
|
||||
|
||||
// Build middleware stack
|
||||
authMiddleware := middleware.NewAuth(middleware.AuthConfig{
|
||||
Type: cfg.Auth.Type,
|
||||
Secret: cfg.Auth.Secret,
|
||||
})
|
||||
// Build middleware stack.
|
||||
//
|
||||
// Authentication unification (M-002): every authenticated request now
|
||||
// carries a named actor in the request context so audit events record
|
||||
// the real key identity instead of the hardcoded "api-key-user" string.
|
||||
// Named keys come from CERTCTL_API_KEYS_NAMED (preferred). For backward
|
||||
// compatibility CERTCTL_AUTH_SECRET is synthesized into legacy-key-N
|
||||
// entries with Admin=false.
|
||||
var namedKeys []middleware.NamedAPIKey
|
||||
if cfg.Auth.Type != "none" {
|
||||
// Translate typed config.NamedAPIKey -> middleware.NamedAPIKey. The
|
||||
// two structs are field-compatible but live in different packages to
|
||||
// preserve the config→middleware dependency direction.
|
||||
for _, nk := range cfg.Auth.NamedKeys {
|
||||
namedKeys = append(namedKeys, middleware.NamedAPIKey{
|
||||
Name: nk.Name,
|
||||
Key: nk.Key,
|
||||
Admin: nk.Admin,
|
||||
})
|
||||
}
|
||||
// Back-compat: if no named keys but legacy Secret is configured,
|
||||
// synthesize named entries so the audit trail still attributes the
|
||||
// action (instead of falling back to "api-key-user" / "anonymous").
|
||||
if len(namedKeys) == 0 && cfg.Auth.Secret != "" {
|
||||
parts := strings.Split(cfg.Auth.Secret, ",")
|
||||
idx := 0
|
||||
for _, p := range parts {
|
||||
p = strings.TrimSpace(p)
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
namedKeys = append(namedKeys, middleware.NamedAPIKey{
|
||||
Name: fmt.Sprintf("legacy-key-%d", idx),
|
||||
Key: p,
|
||||
Admin: false,
|
||||
})
|
||||
idx++
|
||||
}
|
||||
if len(namedKeys) > 0 {
|
||||
logger.Warn("CERTCTL_AUTH_SECRET is deprecated — set CERTCTL_API_KEYS_NAMED for named actor attribution and admin gating",
|
||||
"synthesized_keys", len(namedKeys))
|
||||
}
|
||||
}
|
||||
}
|
||||
authMiddleware := middleware.NewAuthWithNamedKeys(namedKeys)
|
||||
corsMiddleware := middleware.NewCORS(middleware.CORSConfig{
|
||||
AllowedOrigins: cfg.CORS.AllowedOrigins,
|
||||
})
|
||||
@@ -484,7 +682,7 @@ func main() {
|
||||
bodyLimitMiddleware,
|
||||
corsMiddleware,
|
||||
authMiddleware,
|
||||
auditMiddleware,
|
||||
auditMiddleware.Middleware,
|
||||
}
|
||||
|
||||
// Add rate limiter if enabled
|
||||
@@ -501,7 +699,7 @@ func main() {
|
||||
rateLimiter,
|
||||
corsMiddleware,
|
||||
authMiddleware,
|
||||
auditMiddleware,
|
||||
auditMiddleware.Middleware,
|
||||
}
|
||||
logger.Info("rate limiting enabled", "rps", cfg.RateLimit.RPS, "burst", cfg.RateLimit.BurstSize)
|
||||
}
|
||||
@@ -537,61 +735,65 @@ func main() {
|
||||
middleware.Recovery,
|
||||
)
|
||||
|
||||
dashboardEnabled := false
|
||||
if _, err := os.Stat(webDir + "/index.html"); err == nil {
|
||||
fileServer := http.FileServer(http.Dir(webDir))
|
||||
finalHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
path := r.URL.Path
|
||||
// Health/ready and auth/info bypass auth middleware.
|
||||
// Health/ready: Docker/K8s health probes don't carry Bearer tokens.
|
||||
// auth/info: React app calls this before login to detect auth mode.
|
||||
if path == "/health" || path == "/ready" || path == "/api/v1/auth/info" {
|
||||
noAuthHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
// All other API and EST routes go through the full middleware stack (with auth)
|
||||
if (len(path) >= 8 && path[:8] == "/api/v1/") ||
|
||||
(len(path) >= 16 && path[:16] == "/.well-known/est") {
|
||||
apiHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
// Try to serve static files (JS, CSS, assets)
|
||||
if len(path) > 8 && path[:8] == "/assets/" {
|
||||
fileServer.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
// SPA fallback: serve index.html for all other routes
|
||||
http.ServeFile(w, r, webDir+"/index.html")
|
||||
})
|
||||
dashboardEnabled = true
|
||||
}
|
||||
finalHandler = buildFinalHandler(apiHandler, noAuthHandler, webDir, dashboardEnabled)
|
||||
if dashboardEnabled {
|
||||
logger.Info("dashboard available at /", "web_dir", webDir)
|
||||
} else {
|
||||
// No dashboard: route health/auth-info without auth, everything else through full stack
|
||||
finalHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
path := r.URL.Path
|
||||
if path == "/health" || path == "/ready" || path == "/api/v1/auth/info" {
|
||||
noAuthHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
apiHandler.ServeHTTP(w, r)
|
||||
})
|
||||
logger.Info("dashboard directory not found, serving API only")
|
||||
}
|
||||
|
||||
// HTTPS-everywhere milestone §2.1: fail-loud if the TLS configuration is
|
||||
// missing or malformed. Duplicates config.Validate() for defense in depth
|
||||
// (same pattern as preflightSCEPChallengePassword).
|
||||
if err := preflightServerTLS(cfg.Server.TLS.CertPath, cfg.Server.TLS.KeyPath); err != nil {
|
||||
logger.Error("startup refused: HTTPS cert unusable; control plane is HTTPS-only",
|
||||
"error", err,
|
||||
"cert_path", cfg.Server.TLS.CertPath,
|
||||
"key_path", cfg.Server.TLS.KeyPath)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Load the cert+key into a SIGHUP-reloadable holder. Any subsequent
|
||||
// SIGHUP triggers a fresh read and atomic swap so rotations do not need
|
||||
// a restart. Reload failures keep the previous cert and log a warning.
|
||||
tlsCertHolder, err := newCertHolder(cfg.Server.TLS.CertPath, cfg.Server.TLS.KeyPath)
|
||||
if err != nil {
|
||||
logger.Error("startup refused: failed to load TLS cert holder",
|
||||
"error", err,
|
||||
"cert_path", cfg.Server.TLS.CertPath,
|
||||
"key_path", cfg.Server.TLS.KeyPath)
|
||||
os.Exit(1)
|
||||
}
|
||||
stopTLSWatcher := tlsCertHolder.watchSIGHUP(logger)
|
||||
defer stopTLSWatcher()
|
||||
|
||||
// Server configuration
|
||||
addr := net.JoinHostPort(cfg.Server.Host, strconv.Itoa(cfg.Server.Port))
|
||||
httpServer := &http.Server{
|
||||
Addr: addr,
|
||||
Handler: finalHandler,
|
||||
TLSConfig: buildServerTLSConfig(tlsCertHolder),
|
||||
ReadTimeout: 30 * time.Second,
|
||||
ReadHeaderTimeout: 5 * time.Second,
|
||||
WriteTimeout: 120 * time.Second, // Must accommodate ACME issuance (order + challenge + finalize)
|
||||
IdleTimeout: 60 * time.Second,
|
||||
}
|
||||
|
||||
// Start HTTP server in background
|
||||
logger.Info("starting HTTP server", "address", addr)
|
||||
// Start HTTPS server in background. ListenAndServeTLS is called with
|
||||
// empty cert+key arguments because the cert is sourced through
|
||||
// TLSConfig.GetCertificate (the SIGHUP-reloadable holder). Passing file
|
||||
// paths here would pin the first-loaded cert and defeat hot reload.
|
||||
logger.Info("HTTPS server listening",
|
||||
"address", addr,
|
||||
"cert_path", cfg.Server.TLS.CertPath,
|
||||
"min_version", "TLS1.3")
|
||||
go func() {
|
||||
if err := httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
logger.Error("HTTP server error", "error", err)
|
||||
if err := httpServer.ListenAndServeTLS("", ""); err != nil && err != http.ErrServerClosed {
|
||||
logger.Error("HTTPS server error", "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -614,9 +816,20 @@ func main() {
|
||||
logger.Warn("scheduler work did not complete in time", "error", err)
|
||||
}
|
||||
|
||||
logger.Info("shutting down HTTP server")
|
||||
logger.Info("shutting down HTTPS server")
|
||||
if err := httpServer.Shutdown(shutdownCtx); err != nil {
|
||||
logger.Error("HTTP server shutdown error", "error", err)
|
||||
logger.Error("HTTPS server shutdown error", "error", err)
|
||||
}
|
||||
|
||||
// Drain in-flight audit-recording goroutines before closing the DB pool.
|
||||
// The audit middleware spawns one goroutine per non-excluded request; those
|
||||
// goroutines run detached from the request context and write to the
|
||||
// audit_events table via the same *sql.DB. Without this drain, SIGTERM
|
||||
// would close the DB pool while recordings were mid-flight, silently
|
||||
// dropping audit events (M-1, CWE-662 / CWE-400).
|
||||
logger.Info("flushing audit middleware in-flight recordings")
|
||||
if err := auditMiddleware.Flush(shutdownCtx); err != nil {
|
||||
logger.Warn("audit middleware flush did not complete in time", "error", err)
|
||||
}
|
||||
|
||||
// Close database connection
|
||||
@@ -627,22 +840,114 @@ func main() {
|
||||
logger.Info("certctl server stopped")
|
||||
}
|
||||
|
||||
// getEnvDefault reads an environment variable with a default fallback.
|
||||
func getEnvDefault(key, defaultVal string) string {
|
||||
if val := os.Getenv(key); val != "" {
|
||||
return val
|
||||
// preflightSCEPChallengePassword enforces the H-2 fix: if SCEP is enabled, a
|
||||
// non-empty challenge password MUST be configured. Returns a non-nil error
|
||||
// otherwise so the caller can refuse to start the control plane (CWE-306,
|
||||
// missing authentication for a critical function).
|
||||
//
|
||||
// This helper is extracted so the check can be unit tested without booting
|
||||
// the full server. The caller (main) is responsible for translating the
|
||||
// returned error into a structured log line and os.Exit(1).
|
||||
func preflightSCEPChallengePassword(enabled bool, challengePassword string) error {
|
||||
if !enabled {
|
||||
return nil
|
||||
}
|
||||
return defaultVal
|
||||
if challengePassword == "" {
|
||||
return fmt.Errorf("SCEP enabled but CERTCTL_SCEP_CHALLENGE_PASSWORD is empty: " +
|
||||
"SCEP enrollment would accept any client (CWE-306); " +
|
||||
"configure a non-empty shared secret or set CERTCTL_SCEP_ENABLED=false")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getEnvIntDefault parses an integer from a string with a default fallback.
|
||||
func getEnvIntDefault(s string, defaultVal int) int {
|
||||
if s == "" {
|
||||
return defaultVal
|
||||
// buildFinalHandler builds the outer HTTP dispatch handler that routes incoming
|
||||
// requests to either the authenticated apiHandler chain or the unauthenticated
|
||||
// noAuthHandler chain based on URL path prefix. Extracted from main() so the
|
||||
// dispatch logic can be unit tested without booting the full server stack
|
||||
// (see cmd/server/finalhandler_test.go).
|
||||
//
|
||||
// Dispatch rules (M-001, audit 2026-04-19, option D):
|
||||
//
|
||||
// - /health, /ready, /api/v1/auth/info → no-auth (probes + login detection)
|
||||
// - /.well-known/pki/* → no-auth (RFC 5280 CRL, RFC 6960 OCSP)
|
||||
// - /.well-known/est/* → no-auth (RFC 7030 §3.2.3)
|
||||
// - /scep, /scep/* → no-auth (RFC 8894 §3.2, CSR challengePassword)
|
||||
// - /api/v1/* → auth (Bearer token required)
|
||||
// - /assets/* → static file server (dashboard only)
|
||||
// - anything else → SPA index.html fallback (dashboard only)
|
||||
// OR apiHandler (no dashboard)
|
||||
//
|
||||
// EST/SCEP clients (IoT devices, 802.1X supplicants, MDM endpoints, network
|
||||
// appliances) cannot present certctl Bearer tokens, so those endpoints must be
|
||||
// reachable without the Auth middleware. Authentication is instead enforced by
|
||||
// CSR signature verification, profile policy gates, and for SCEP the
|
||||
// challengePassword shared secret (fail-loud gated by preflightSCEPChallengePassword
|
||||
// above).
|
||||
//
|
||||
// webDir must point to a directory containing index.html + assets/ when
|
||||
// dashboardEnabled is true; it is ignored otherwise.
|
||||
func buildFinalHandler(apiHandler, noAuthHandler http.Handler, webDir string, dashboardEnabled bool) http.Handler {
|
||||
var fileServer http.Handler
|
||||
if dashboardEnabled {
|
||||
fileServer = http.FileServer(http.Dir(webDir))
|
||||
}
|
||||
val, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return defaultVal
|
||||
}
|
||||
return val
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
path := r.URL.Path
|
||||
|
||||
// Health/ready and auth/info bypass auth middleware.
|
||||
// Health/ready: Docker/K8s health probes don't carry Bearer tokens.
|
||||
// auth/info: React app calls this before login to detect auth mode.
|
||||
if path == "/health" || path == "/ready" || path == "/api/v1/auth/info" {
|
||||
noAuthHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
// RFC 5280 CRL and RFC 6960 OCSP live under /.well-known/pki/ and MUST
|
||||
// be served unauthenticated — relying parties (browsers, OpenSSL, OCSP
|
||||
// stapling sidecars, mTLS clients) cannot present certctl Bearer tokens.
|
||||
if strings.HasPrefix(path, "/.well-known/pki") {
|
||||
noAuthHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
// RFC 7030 EST endpoints ride the no-auth middleware chain (M-001,
|
||||
// option D, audit 2026-04-19). Trust boundary is CSR signature + profile
|
||||
// policy, not HTTP Bearer. /.well-known/est/cacerts is explicitly
|
||||
// anonymous per RFC 7030 §4.1.1.
|
||||
if strings.HasPrefix(path, "/.well-known/est") {
|
||||
noAuthHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
// RFC 8894 SCEP rides the no-auth chain (M-001, option D). SCEP clients
|
||||
// authenticate via the challengePassword attribute in the PKCS#10 CSR,
|
||||
// not via HTTP Bearer tokens. preflightSCEPChallengePassword refuses to
|
||||
// start the server if SCEP is enabled without a non-empty shared secret.
|
||||
if path == "/scep" || strings.HasPrefix(path, "/scep/") {
|
||||
noAuthHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
// Authenticated API routes — full middleware stack including Auth.
|
||||
if strings.HasPrefix(path, "/api/v1/") {
|
||||
apiHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
if !dashboardEnabled {
|
||||
// No dashboard: everything non-special falls through to the
|
||||
// authenticated handler (preserves pre-M-001 behavior for API-only
|
||||
// deployments).
|
||||
apiHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
// Dashboard-present: serve static assets directly, SPA fallback for
|
||||
// everything else.
|
||||
if strings.HasPrefix(path, "/assets/") {
|
||||
fileServer.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
http.ServeFile(w, r, webDir+"/index.html")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -0,0 +1,650 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/api/router"
|
||||
"github.com/shankar0123/certctl/internal/config"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// TestMain_HealthEndpointBypassesAuth verifies that health check endpoints
|
||||
// bypass auth middleware while protected API endpoints require auth.
|
||||
// This is the most critical test — it validates the core routing pattern used in main.go.
|
||||
func TestMain_HealthEndpointBypassesAuth(t *testing.T) {
|
||||
// Simulate the finalHandler logic from main.go with minimal setup
|
||||
// Create handler functions for health endpoints
|
||||
healthHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok"}`))
|
||||
})
|
||||
|
||||
readyHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ready"}`))
|
||||
})
|
||||
|
||||
authInfoHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"auth_type":"api-key"}`))
|
||||
})
|
||||
|
||||
// Protected API endpoint
|
||||
certHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`[]`))
|
||||
})
|
||||
|
||||
// Build the handler chain the same way main.go does
|
||||
authMiddleware := middleware.NewAuth(middleware.AuthConfig{
|
||||
Type: "api-key",
|
||||
Secret: "test-secret-key",
|
||||
})
|
||||
|
||||
// API handler with auth
|
||||
authHandler := middleware.Chain(certHandler,
|
||||
middleware.RequestID,
|
||||
middleware.Recovery,
|
||||
authMiddleware,
|
||||
)
|
||||
|
||||
// Create finalHandler matching main.go logic
|
||||
finalHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
path := r.URL.Path
|
||||
switch path {
|
||||
case "/health":
|
||||
healthHandler.ServeHTTP(w, r)
|
||||
case "/ready":
|
||||
readyHandler.ServeHTTP(w, r)
|
||||
case "/api/v1/auth/info":
|
||||
authInfoHandler.ServeHTTP(w, r)
|
||||
case "/api/v1/certificates":
|
||||
authHandler.ServeHTTP(w, r)
|
||||
default:
|
||||
http.Error(w, "Not Found", http.StatusNotFound)
|
||||
}
|
||||
})
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
path string
|
||||
method string
|
||||
bypassesAuth bool
|
||||
expectedStatus int
|
||||
}{
|
||||
{
|
||||
name: "GET /health without auth",
|
||||
path: "/health",
|
||||
method: "GET",
|
||||
bypassesAuth: true,
|
||||
expectedStatus: http.StatusOK,
|
||||
},
|
||||
{
|
||||
name: "GET /ready without auth",
|
||||
path: "/ready",
|
||||
method: "GET",
|
||||
bypassesAuth: true,
|
||||
expectedStatus: http.StatusOK,
|
||||
},
|
||||
{
|
||||
name: "GET /api/v1/auth/info without auth",
|
||||
path: "/api/v1/auth/info",
|
||||
method: "GET",
|
||||
bypassesAuth: true,
|
||||
expectedStatus: http.StatusOK,
|
||||
},
|
||||
{
|
||||
name: "GET /api/v1/certificates without auth (should fail)",
|
||||
path: "/api/v1/certificates",
|
||||
method: "GET",
|
||||
bypassesAuth: false,
|
||||
expectedStatus: http.StatusUnauthorized,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
req := httptest.NewRequest(tt.method, tt.path, nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
finalHandler.ServeHTTP(w, req)
|
||||
|
||||
if tt.bypassesAuth && w.Code != tt.expectedStatus {
|
||||
t.Errorf("endpoint %s should bypass auth, got status %d, expected %d",
|
||||
tt.path, w.Code, tt.expectedStatus)
|
||||
}
|
||||
|
||||
if !tt.bypassesAuth && w.Code != tt.expectedStatus {
|
||||
t.Logf("endpoint %s requires auth, got status %d, expected %d (auth middleware working)",
|
||||
tt.path, w.Code, tt.expectedStatus)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_HealthHandlersRespond verifies health endpoints return correct responses.
|
||||
func TestMain_HealthHandlersRespond(t *testing.T) {
|
||||
healthHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok"}`))
|
||||
})
|
||||
|
||||
req := httptest.NewRequest("GET", "/health", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
healthHandler.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
if body := w.Body.String(); body != `{"status":"ok"}` {
|
||||
t.Errorf("expected body '{\"status\":\"ok\"}', got '%s'", body)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_AuthMiddlewareRejectsUnauthorized verifies auth middleware works.
|
||||
func TestMain_AuthMiddlewareRejectsUnauthorized(t *testing.T) {
|
||||
// Create a protected endpoint
|
||||
protectedHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"data":"protected"}`))
|
||||
})
|
||||
|
||||
// Wrap with auth middleware
|
||||
authMiddleware := middleware.NewAuth(middleware.AuthConfig{
|
||||
Type: "api-key",
|
||||
Secret: "test-secret-key",
|
||||
})
|
||||
|
||||
chainedHandler := middleware.Chain(protectedHandler, authMiddleware)
|
||||
|
||||
// Request without auth should be rejected
|
||||
req := httptest.NewRequest("GET", "/api/v1/protected", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("expected status 401 for unauthorized request, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_AuthMiddlewareAllowsWithValidKey verifies auth middleware allows valid keys.
|
||||
func TestMain_AuthMiddlewareAllowsWithValidKey(t *testing.T) {
|
||||
testKey := "test-secret-key"
|
||||
|
||||
// Create a protected endpoint
|
||||
protectedHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"data":"protected"}`))
|
||||
})
|
||||
|
||||
// Wrap with auth middleware
|
||||
authMiddleware := middleware.NewAuth(middleware.AuthConfig{
|
||||
Type: "api-key",
|
||||
Secret: testKey,
|
||||
})
|
||||
|
||||
chainedHandler := middleware.Chain(protectedHandler, authMiddleware)
|
||||
|
||||
// Request with valid auth should be allowed
|
||||
req := httptest.NewRequest("GET", "/api/v1/protected", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+testKey)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200 for authorized request, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_ServerConfigFromEnvironment verifies config.Load() reads env vars correctly.
|
||||
func TestMain_ServerConfigFromEnvironment(t *testing.T) {
|
||||
// Save original env vars
|
||||
oldAuthType := os.Getenv("CERTCTL_AUTH_TYPE")
|
||||
oldServerHost := os.Getenv("CERTCTL_SERVER_HOST")
|
||||
oldServerPort := os.Getenv("CERTCTL_SERVER_PORT")
|
||||
oldTLSCert := os.Getenv("CERTCTL_SERVER_TLS_CERT_PATH")
|
||||
oldTLSKey := os.Getenv("CERTCTL_SERVER_TLS_KEY_PATH")
|
||||
defer func() {
|
||||
if oldAuthType != "" {
|
||||
os.Setenv("CERTCTL_AUTH_TYPE", oldAuthType)
|
||||
} else {
|
||||
os.Unsetenv("CERTCTL_AUTH_TYPE")
|
||||
}
|
||||
if oldServerHost != "" {
|
||||
os.Setenv("CERTCTL_SERVER_HOST", oldServerHost)
|
||||
} else {
|
||||
os.Unsetenv("CERTCTL_SERVER_HOST")
|
||||
}
|
||||
if oldServerPort != "" {
|
||||
os.Setenv("CERTCTL_SERVER_PORT", oldServerPort)
|
||||
} else {
|
||||
os.Unsetenv("CERTCTL_SERVER_PORT")
|
||||
}
|
||||
if oldTLSCert != "" {
|
||||
os.Setenv("CERTCTL_SERVER_TLS_CERT_PATH", oldTLSCert)
|
||||
} else {
|
||||
os.Unsetenv("CERTCTL_SERVER_TLS_CERT_PATH")
|
||||
}
|
||||
if oldTLSKey != "" {
|
||||
os.Setenv("CERTCTL_SERVER_TLS_KEY_PATH", oldTLSKey)
|
||||
} else {
|
||||
os.Unsetenv("CERTCTL_SERVER_TLS_KEY_PATH")
|
||||
}
|
||||
}()
|
||||
|
||||
// HTTPS-only control plane: Validate() refuses to pass without a readable
|
||||
// cert/key pair on disk. Materialize a throwaway ECDSA P-256 pair using the
|
||||
// same generator cmd/server/tls_test.go uses for the certHolder tests.
|
||||
dir := t.TempDir()
|
||||
certPath := dir + "/server.crt"
|
||||
keyPath := dir + "/server.key"
|
||||
generateTestCert(t, certPath, keyPath, "main-test-cn")
|
||||
|
||||
// Set test env vars
|
||||
os.Setenv("CERTCTL_AUTH_TYPE", "none")
|
||||
os.Setenv("CERTCTL_SERVER_HOST", "127.0.0.1")
|
||||
os.Setenv("CERTCTL_SERVER_PORT", "8080")
|
||||
os.Setenv("CERTCTL_SERVER_TLS_CERT_PATH", certPath)
|
||||
os.Setenv("CERTCTL_SERVER_TLS_KEY_PATH", keyPath)
|
||||
|
||||
cfg, err := config.Load()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to load config from env vars: %v", err)
|
||||
}
|
||||
|
||||
if cfg.Auth.Type != "none" {
|
||||
t.Errorf("Expected auth type 'none', got '%s'", cfg.Auth.Type)
|
||||
}
|
||||
|
||||
if cfg.Server.Host != "127.0.0.1" {
|
||||
t.Errorf("Expected server host '127.0.0.1', got '%s'", cfg.Server.Host)
|
||||
}
|
||||
|
||||
if cfg.Server.Port != 8080 {
|
||||
t.Errorf("Expected server port 8080, got %d", cfg.Server.Port)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_AuthTypeConfiguration verifies auth type is read from config.
|
||||
func TestMain_AuthTypeConfiguration(t *testing.T) {
|
||||
// Save original env vars
|
||||
oldAuthType := os.Getenv("CERTCTL_AUTH_TYPE")
|
||||
oldAuthSecret := os.Getenv("CERTCTL_AUTH_SECRET")
|
||||
oldTLSCert := os.Getenv("CERTCTL_SERVER_TLS_CERT_PATH")
|
||||
oldTLSKey := os.Getenv("CERTCTL_SERVER_TLS_KEY_PATH")
|
||||
defer func() {
|
||||
if oldAuthType != "" {
|
||||
os.Setenv("CERTCTL_AUTH_TYPE", oldAuthType)
|
||||
} else {
|
||||
os.Unsetenv("CERTCTL_AUTH_TYPE")
|
||||
}
|
||||
if oldAuthSecret != "" {
|
||||
os.Setenv("CERTCTL_AUTH_SECRET", oldAuthSecret)
|
||||
} else {
|
||||
os.Unsetenv("CERTCTL_AUTH_SECRET")
|
||||
}
|
||||
if oldTLSCert != "" {
|
||||
os.Setenv("CERTCTL_SERVER_TLS_CERT_PATH", oldTLSCert)
|
||||
} else {
|
||||
os.Unsetenv("CERTCTL_SERVER_TLS_CERT_PATH")
|
||||
}
|
||||
if oldTLSKey != "" {
|
||||
os.Setenv("CERTCTL_SERVER_TLS_KEY_PATH", oldTLSKey)
|
||||
} else {
|
||||
os.Unsetenv("CERTCTL_SERVER_TLS_KEY_PATH")
|
||||
}
|
||||
}()
|
||||
|
||||
// HTTPS-only control plane: config.Load()→Validate() refuses to pass
|
||||
// without a readable cert/key pair. Mint one throwaway pair for the whole
|
||||
// sub-test cohort — auth type toggles don't care about the TLS surface.
|
||||
dir := t.TempDir()
|
||||
certPath := dir + "/server.crt"
|
||||
keyPath := dir + "/server.key"
|
||||
generateTestCert(t, certPath, keyPath, "main-test-cn")
|
||||
os.Setenv("CERTCTL_SERVER_TLS_CERT_PATH", certPath)
|
||||
os.Setenv("CERTCTL_SERVER_TLS_KEY_PATH", keyPath)
|
||||
|
||||
// Set auth secret for api-key mode
|
||||
os.Setenv("CERTCTL_AUTH_SECRET", "test-secret")
|
||||
|
||||
testCases := []string{"api-key", "none"}
|
||||
|
||||
for _, authType := range testCases {
|
||||
t.Run(fmt.Sprintf("auth_type_%s", authType), func(t *testing.T) {
|
||||
os.Setenv("CERTCTL_AUTH_TYPE", authType)
|
||||
|
||||
cfg, err := config.Load()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to load config: %v", err)
|
||||
}
|
||||
|
||||
if cfg.Auth.Type != authType {
|
||||
t.Errorf("Expected auth type '%s', got '%s'", authType, cfg.Auth.Type)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_MiddlewareChainConstruction tests that middleware can be properly chained.
|
||||
func TestMain_MiddlewareChainConstruction(t *testing.T) {
|
||||
// Test that the middleware.Chain function works as expected
|
||||
baseHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte("success"))
|
||||
})
|
||||
|
||||
// Chain with RequestID and Recovery middleware
|
||||
chainedHandler := middleware.Chain(baseHandler,
|
||||
middleware.RequestID,
|
||||
middleware.Recovery,
|
||||
)
|
||||
|
||||
req := httptest.NewRequest("GET", "/test", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
if body := w.Body.String(); body != "success" {
|
||||
t.Errorf("expected body 'success', got '%s'", body)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_RequestIDMiddleware verifies RequestID is added to responses.
|
||||
func TestMain_RequestIDMiddleware(t *testing.T) {
|
||||
baseHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
// Wrap with RequestID middleware
|
||||
chainedHandler := middleware.Chain(baseHandler, middleware.RequestID)
|
||||
|
||||
req := httptest.NewRequest("GET", "/test", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
// RequestID should be set in response header
|
||||
if rid := w.Header().Get("X-Request-ID"); rid == "" {
|
||||
t.Logf("X-Request-ID header not present (middleware may work differently)")
|
||||
} else {
|
||||
t.Logf("X-Request-ID header set: %s", rid)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_RecoveryMiddlewareHandlesPanic verifies recovery middleware works.
|
||||
func TestMain_RecoveryMiddlewareHandlesPanic(t *testing.T) {
|
||||
panicHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
panic("test panic")
|
||||
})
|
||||
|
||||
// Wrap with recovery middleware
|
||||
chainedHandler := middleware.Chain(panicHandler, middleware.Recovery)
|
||||
|
||||
req := httptest.NewRequest("GET", "/test", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
// Should not panic
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
// Should return 500 error
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Logf("Expected 500 for panicked handler, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_ServiceInitialization tests that services can be instantiated.
|
||||
// This validates the initialization pattern from main.go without needing a real DB.
|
||||
func TestMain_ServiceInitialization(t *testing.T) {
|
||||
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
|
||||
Level: slog.LevelInfo,
|
||||
}))
|
||||
|
||||
// Create test issuer registry (same as main.go does)
|
||||
issuerRegistry := service.NewIssuerRegistry(logger)
|
||||
|
||||
if issuerRegistry == nil {
|
||||
t.Fatal("issuer registry should not be nil")
|
||||
}
|
||||
|
||||
// Verify the registry has a Len() method (used in main.go)
|
||||
count := issuerRegistry.Len()
|
||||
if count < 0 {
|
||||
t.Errorf("issuer registry length should be >= 0, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_CORSMiddlewareSetHeaders verifies CORS headers are set.
|
||||
func TestMain_CORSMiddlewareSetHeaders(t *testing.T) {
|
||||
baseHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
corsMiddleware := middleware.NewCORS(middleware.CORSConfig{
|
||||
AllowedOrigins: []string{"http://example.com"},
|
||||
})
|
||||
|
||||
chainedHandler := middleware.Chain(baseHandler, corsMiddleware)
|
||||
|
||||
req := httptest.NewRequest("GET", "/test", nil)
|
||||
req.Header.Set("Origin", "http://example.com")
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
// CORS middleware should set access control headers
|
||||
if acah := w.Header().Get("Access-Control-Allow-Origin"); acah == "" {
|
||||
t.Logf("Access-Control-Allow-Origin not set (may be by design)")
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_AuthNoneMode verifies auth can be disabled.
|
||||
func TestMain_AuthNoneMode(t *testing.T) {
|
||||
protectedHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"data":"protected"}`))
|
||||
})
|
||||
|
||||
// Wrap with auth middleware in "none" mode
|
||||
authMiddleware := middleware.NewAuth(middleware.AuthConfig{
|
||||
Type: "none",
|
||||
})
|
||||
|
||||
chainedHandler := middleware.Chain(protectedHandler, authMiddleware)
|
||||
|
||||
// Request without auth should be allowed in "none" mode
|
||||
req := httptest.NewRequest("GET", "/api/v1/protected", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200 in 'none' auth mode, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_RouterRegistration tests that router registration works.
|
||||
func TestMain_RouterRegistration(t *testing.T) {
|
||||
r := router.New()
|
||||
|
||||
// Register a test handler
|
||||
r.RegisterFunc("GET /test", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte("test"))
|
||||
})
|
||||
|
||||
// Request the route
|
||||
req := httptest.NewRequest("GET", "/test", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
// Route should be registered and accessible
|
||||
if w.Code == http.StatusNotFound {
|
||||
t.Errorf("route not registered, got 404")
|
||||
} else if w.Code == http.StatusOK {
|
||||
t.Logf("route registered successfully")
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_RateLimiterIntegration tests rate limiter middleware works.
|
||||
func TestMain_RateLimiterIntegration(t *testing.T) {
|
||||
baseHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
// Create rate limiter with 10 RPS, 1 burst
|
||||
rateLimiter := middleware.NewRateLimiter(middleware.RateLimitConfig{
|
||||
RPS: 10,
|
||||
BurstSize: 1,
|
||||
})
|
||||
|
||||
chainedHandler := middleware.Chain(baseHandler, rateLimiter)
|
||||
|
||||
// First request should succeed
|
||||
req := httptest.NewRequest("GET", "/test", nil)
|
||||
w := httptest.NewRecorder()
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
if w.Code == http.StatusServiceUnavailable {
|
||||
t.Logf("rate limiter is active")
|
||||
} else {
|
||||
t.Logf("rate limiter allowed request (status %d)", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_ContentTypeMiddleware verifies content type is set correctly.
|
||||
func TestMain_ContentTypeMiddleware(t *testing.T) {
|
||||
baseHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok"}`))
|
||||
})
|
||||
|
||||
// Wrap with middleware that sets Content-Type
|
||||
chainedHandler := middleware.Chain(baseHandler, middleware.ContentType)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/v1/test", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
// Verify response
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
// ContentType middleware should set header
|
||||
if ct := w.Header().Get("Content-Type"); ct != "" {
|
||||
t.Logf("Content-Type header set: %s", ct)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMain_ContextPropagation verifies context is propagated through middleware.
|
||||
func TestMain_ContextPropagation(t *testing.T) {
|
||||
type contextKey string
|
||||
testKey := contextKey("test-key")
|
||||
testValue := "test-value"
|
||||
|
||||
baseHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
val := r.Context().Value(testKey)
|
||||
if val == testValue {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
} else {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
}
|
||||
})
|
||||
|
||||
chainedHandler := middleware.Chain(baseHandler, middleware.RequestID)
|
||||
|
||||
req := httptest.NewRequest("GET", "/test", nil)
|
||||
// Add context value before request
|
||||
req = req.WithContext(context.WithValue(req.Context(), testKey, testValue))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
chainedHandler.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Logf("Context value may not be propagated (status %d), this may be expected", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPreflightSCEPChallengePassword is the H-2 regression guard for the
|
||||
// startup pre-flight check. The helper MUST return a non-nil error whenever
|
||||
// SCEP is enabled with an empty challenge password — that configuration
|
||||
// previously allowed unauthenticated certificate enrollment (CWE-306).
|
||||
// Disabled-SCEP and configured-password cases must pass cleanly.
|
||||
func TestPreflightSCEPChallengePassword(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
enabled bool
|
||||
challengePassword string
|
||||
wantErr bool
|
||||
wantErrSubstring string
|
||||
}{
|
||||
{
|
||||
name: "disabled_empty_password_ok",
|
||||
enabled: false,
|
||||
challengePassword: "",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "disabled_with_password_ok",
|
||||
enabled: false,
|
||||
challengePassword: "leftover-value",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "enabled_empty_password_rejected",
|
||||
enabled: true,
|
||||
challengePassword: "",
|
||||
wantErr: true,
|
||||
wantErrSubstring: "CERTCTL_SCEP_CHALLENGE_PASSWORD",
|
||||
},
|
||||
{
|
||||
name: "enabled_with_password_ok",
|
||||
enabled: true,
|
||||
challengePassword: "hunter2",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "enabled_single_char_password_ok",
|
||||
enabled: true,
|
||||
challengePassword: "x",
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := preflightSCEPChallengePassword(tt.enabled, tt.challengePassword)
|
||||
if tt.wantErr {
|
||||
if err == nil {
|
||||
t.Fatalf("expected error, got nil")
|
||||
}
|
||||
if tt.wantErrSubstring != "" && !strings.Contains(err.Error(), tt.wantErrSubstring) {
|
||||
t.Errorf("expected error to mention %q, got: %v", tt.wantErrSubstring, err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "CWE-306") {
|
||||
t.Errorf("expected error to cite CWE-306 for traceability, got: %v", err)
|
||||
}
|
||||
} else if err != nil {
|
||||
t.Errorf("expected no error, got: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,164 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sync"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// certHolder stores the server's TLS certificate under a mutex so it can be
|
||||
// swapped atomically by a SIGHUP handler without restarting the server. A
|
||||
// *tls.Config that wires GetCertificate → (*certHolder).GetCertificate reads
|
||||
// through the holder on every ClientHello, so a successful reload takes
|
||||
// effect on the next new connection immediately and without dropping
|
||||
// in-flight requests.
|
||||
//
|
||||
// Concurrency: GetCertificate is invoked from crypto/tls handshake goroutines
|
||||
// on every new inbound connection; Reload is invoked from the SIGHUP watcher
|
||||
// goroutine. sync.Mutex is sufficient — TLS handshakes are not an inner-loop
|
||||
// hot path and the critical section is a single pointer read.
|
||||
type certHolder struct {
|
||||
mu sync.Mutex
|
||||
cert *tls.Certificate
|
||||
certPath string
|
||||
keyPath string
|
||||
}
|
||||
|
||||
// newCertHolder loads the initial cert+key pair from disk and returns a
|
||||
// holder ready to serve handshakes. Returns a non-nil error if either file
|
||||
// is missing, unreadable, or the pair does not round-trip through
|
||||
// tls.LoadX509KeyPair (for example the key does not sign the cert). The
|
||||
// caller is expected to treat a non-nil error as a fail-loud startup gate
|
||||
// and os.Exit(1) — the HTTPS-everywhere milestone (§3 locked decisions)
|
||||
// prohibits plaintext HTTP fallback.
|
||||
func newCertHolder(certPath, keyPath string) (*certHolder, error) {
|
||||
cert, err := tls.LoadX509KeyPair(certPath, keyPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load TLS cert/key (cert=%q key=%q): %w", certPath, keyPath, err)
|
||||
}
|
||||
return &certHolder{
|
||||
cert: &cert,
|
||||
certPath: certPath,
|
||||
keyPath: keyPath,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetCertificate is the tls.Config.GetCertificate hook. Returns the current
|
||||
// cert under the holder's mutex. ClientHelloInfo is ignored — the control
|
||||
// plane does not multiplex by SNI.
|
||||
func (h *certHolder) GetCertificate(_ *tls.ClientHelloInfo) (*tls.Certificate, error) {
|
||||
h.mu.Lock()
|
||||
defer h.mu.Unlock()
|
||||
return h.cert, nil
|
||||
}
|
||||
|
||||
// Reload re-reads the cert+key pair from disk and swaps the holder
|
||||
// atomically on success. On failure the holder retains its previous cert
|
||||
// and the error is propagated to the caller — the SIGHUP watcher logs and
|
||||
// keeps serving the previous cert rather than crashing on a bad reload.
|
||||
// This is deliberately "fail-safe on reload, fail-loud on startup": an
|
||||
// operator rotating certs wants a recoverable error, not a restart loop.
|
||||
func (h *certHolder) Reload() error {
|
||||
cert, err := tls.LoadX509KeyPair(h.certPath, h.keyPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reload TLS cert/key (cert=%q key=%q): %w", h.certPath, h.keyPath, err)
|
||||
}
|
||||
h.mu.Lock()
|
||||
h.cert = &cert
|
||||
h.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// watchSIGHUP installs a signal handler that calls Reload() on each SIGHUP.
|
||||
// The returned stop function closes the internal done channel and stops
|
||||
// signal delivery so the goroutine can exit cleanly during shutdown. Errors
|
||||
// from Reload are logged but do not terminate the watcher — the operator
|
||||
// can fix the files and send another SIGHUP.
|
||||
//
|
||||
// Defensive design note: this deliberately does NOT panic on Reload error
|
||||
// even though HTTPS is mission-critical. A rotation that writes half-files
|
||||
// (operator overwrites cert.pem then key.pem as two separate copies) would
|
||||
// otherwise crash the server mid-rotation. Logging + retaining the old
|
||||
// cert gives the operator a bounded window to fix and re-SIGHUP.
|
||||
func (h *certHolder) watchSIGHUP(logger *slog.Logger) (stop func()) {
|
||||
ch := make(chan os.Signal, 1)
|
||||
signal.Notify(ch, syscall.SIGHUP)
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ch:
|
||||
if err := h.Reload(); err != nil {
|
||||
logger.Error("TLS cert reload failed; continuing with previous cert",
|
||||
"error", err,
|
||||
"cert_path", h.certPath,
|
||||
"key_path", h.keyPath)
|
||||
continue
|
||||
}
|
||||
logger.Info("TLS cert reloaded via SIGHUP",
|
||||
"cert_path", h.certPath,
|
||||
"key_path", h.keyPath)
|
||||
case <-done:
|
||||
signal.Stop(ch)
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
return func() { close(done) }
|
||||
}
|
||||
|
||||
// buildServerTLSConfig returns the TLS 1.3-only *tls.Config for the HTTPS
|
||||
// server. Pinned per HTTPS-everywhere milestone §2.1 + §3 locked decisions:
|
||||
//
|
||||
// - MinVersion: TLS 1.3 (no TLS 1.2 escape hatch). Go 1.25's crypto/tls
|
||||
// automatically rejects older versions.
|
||||
// - CurvePreferences: explicit [X25519, P-256]. Explicit ordering keeps
|
||||
// the handshake deterministic and documents the accepted curves.
|
||||
// - No CipherSuites field: TLS 1.3 cipher suites are not negotiable in
|
||||
// the handshake (all three mandatory suites — AES-128-GCM-SHA256,
|
||||
// AES-256-GCM-SHA384, CHACHA20-POLY1305-SHA256 — are always offered).
|
||||
// Go's crypto/tls ignores CipherSuites for TLS 1.3.
|
||||
// - GetCertificate: reads through the holder so SIGHUP rotations take
|
||||
// effect on the next new connection without a restart. Setting
|
||||
// tls.Config.Certificates directly would pin the first-loaded cert
|
||||
// and defeat SIGHUP reload.
|
||||
func buildServerTLSConfig(holder *certHolder) *tls.Config {
|
||||
return &tls.Config{
|
||||
MinVersion: tls.VersionTLS13,
|
||||
CurvePreferences: []tls.CurveID{tls.X25519, tls.CurveP256},
|
||||
GetCertificate: holder.GetCertificate,
|
||||
}
|
||||
}
|
||||
|
||||
// preflightServerTLS is the fail-loud startup gate for HTTPS. Returns a
|
||||
// non-nil error when the TLS configuration is missing or the cert+key pair
|
||||
// cannot be parsed, so the caller refuses to start the control plane
|
||||
// (HTTPS-everywhere §3 locked decisions: no plaintext HTTP fallback).
|
||||
//
|
||||
// Duplicates the emptiness + stat + parse checks in config.Validate() for
|
||||
// defense in depth, mirroring the pattern established by
|
||||
// preflightSCEPChallengePassword (which itself duplicates
|
||||
// config.Validate()'s SCEP check for CWE-306). Extracted into a separate
|
||||
// function so the gate is unit-testable without booting the full server.
|
||||
func preflightServerTLS(certPath, keyPath string) error {
|
||||
if certPath == "" {
|
||||
return fmt.Errorf("CERTCTL_SERVER_TLS_CERT_PATH is empty: HTTPS-only control plane refuses to start (see docs/tls.md)")
|
||||
}
|
||||
if keyPath == "" {
|
||||
return fmt.Errorf("CERTCTL_SERVER_TLS_KEY_PATH is empty: HTTPS-only control plane refuses to start (see docs/tls.md)")
|
||||
}
|
||||
if _, err := os.Stat(certPath); err != nil {
|
||||
return fmt.Errorf("TLS cert file %q unreadable: %w (see docs/tls.md)", certPath, err)
|
||||
}
|
||||
if _, err := os.Stat(keyPath); err != nil {
|
||||
return fmt.Errorf("TLS key file %q unreadable: %w (see docs/tls.md)", keyPath, err)
|
||||
}
|
||||
if _, err := tls.LoadX509KeyPair(certPath, keyPath); err != nil {
|
||||
return fmt.Errorf("TLS cert/key pair invalid (cert=%q key=%q): %w (see docs/tls.md)", certPath, keyPath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,418 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/ecdsa"
|
||||
"crypto/elliptic"
|
||||
"crypto/rand"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"io"
|
||||
"log/slog"
|
||||
"math/big"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// generateTestCert writes a PEM-encoded self-signed leaf cert + ECDSA P-256
|
||||
// key pair to certPath/keyPath. The subject is derived from cn so tests can
|
||||
// tell reloaded certs apart from original certs by re-parsing the served
|
||||
// Certificate and comparing the CN.
|
||||
func generateTestCert(t *testing.T, certPath, keyPath, cn string) {
|
||||
t.Helper()
|
||||
priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("ecdsa.GenerateKey: %v", err)
|
||||
}
|
||||
tmpl := &x509.Certificate{
|
||||
SerialNumber: big.NewInt(time.Now().UnixNano()),
|
||||
Subject: pkix.Name{CommonName: cn},
|
||||
NotBefore: time.Now().Add(-1 * time.Hour),
|
||||
NotAfter: time.Now().Add(24 * time.Hour),
|
||||
KeyUsage: x509.KeyUsageDigitalSignature,
|
||||
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
|
||||
DNSNames: []string{"localhost"},
|
||||
IPAddresses: []net.IP{net.ParseIP("127.0.0.1"), net.ParseIP("::1")},
|
||||
}
|
||||
der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &priv.PublicKey, priv)
|
||||
if err != nil {
|
||||
t.Fatalf("x509.CreateCertificate: %v", err)
|
||||
}
|
||||
certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
|
||||
keyDER, err := x509.MarshalECPrivateKey(priv)
|
||||
if err != nil {
|
||||
t.Fatalf("MarshalECPrivateKey: %v", err)
|
||||
}
|
||||
keyPEM := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER})
|
||||
if err := os.WriteFile(certPath, certPEM, 0o600); err != nil {
|
||||
t.Fatalf("write cert: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(keyPath, keyPEM, 0o600); err != nil {
|
||||
t.Fatalf("write key: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// readCertCN returns the CommonName from the leaf cert currently held by the
|
||||
// holder, by exercising the same GetCertificate path the tls handshake would
|
||||
// take. Lets tests assert which generation of the cert is being served.
|
||||
func readCertCN(t *testing.T, h *certHolder) string {
|
||||
t.Helper()
|
||||
c, err := h.GetCertificate(&tls.ClientHelloInfo{})
|
||||
if err != nil {
|
||||
t.Fatalf("GetCertificate: %v", err)
|
||||
}
|
||||
leaf, err := x509.ParseCertificate(c.Certificate[0])
|
||||
if err != nil {
|
||||
t.Fatalf("ParseCertificate: %v", err)
|
||||
}
|
||||
return leaf.Subject.CommonName
|
||||
}
|
||||
|
||||
func silentLogger() *slog.Logger {
|
||||
return slog.New(slog.NewTextHandler(io.Discard, &slog.HandlerOptions{Level: slog.LevelError}))
|
||||
}
|
||||
|
||||
func TestNewCertHolder_ValidPair_LoadsCert(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-initial")
|
||||
|
||||
h, err := newCertHolder(certPath, keyPath)
|
||||
if err != nil {
|
||||
t.Fatalf("newCertHolder: %v", err)
|
||||
}
|
||||
if got := readCertCN(t, h); got != "cn-initial" {
|
||||
t.Fatalf("CN mismatch: got %q want %q", got, "cn-initial")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewCertHolder_MissingFile_Fails(t *testing.T) {
|
||||
_, err := newCertHolder("/nonexistent/cert.pem", "/nonexistent/key.pem")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing files, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewCertHolder_MalformedCert_Fails(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "bad.crt")
|
||||
keyPath := filepath.Join(dir, "bad.key")
|
||||
if err := os.WriteFile(certPath, []byte("not a pem cert"), 0o600); err != nil {
|
||||
t.Fatalf("write cert: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(keyPath, []byte("not a pem key"), 0o600); err != nil {
|
||||
t.Fatalf("write key: %v", err)
|
||||
}
|
||||
_, err := newCertHolder(certPath, keyPath)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for malformed PEM, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCertHolder_Reload_SwapsCert(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-v1")
|
||||
|
||||
h, err := newCertHolder(certPath, keyPath)
|
||||
if err != nil {
|
||||
t.Fatalf("newCertHolder: %v", err)
|
||||
}
|
||||
if got := readCertCN(t, h); got != "cn-v1" {
|
||||
t.Fatalf("initial CN: got %q want cn-v1", got)
|
||||
}
|
||||
|
||||
// Rotate on disk and reload.
|
||||
generateTestCert(t, certPath, keyPath, "cn-v2")
|
||||
if err := h.Reload(); err != nil {
|
||||
t.Fatalf("Reload: %v", err)
|
||||
}
|
||||
if got := readCertCN(t, h); got != "cn-v2" {
|
||||
t.Fatalf("post-reload CN: got %q want cn-v2", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCertHolder_Reload_FailureRetainsPreviousCert(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-v1")
|
||||
|
||||
h, err := newCertHolder(certPath, keyPath)
|
||||
if err != nil {
|
||||
t.Fatalf("newCertHolder: %v", err)
|
||||
}
|
||||
|
||||
// Corrupt the cert file and attempt reload.
|
||||
if err := os.WriteFile(certPath, []byte("garbage"), 0o600); err != nil {
|
||||
t.Fatalf("corrupt cert: %v", err)
|
||||
}
|
||||
if err := h.Reload(); err == nil {
|
||||
t.Fatal("expected Reload error for corrupt file, got nil")
|
||||
}
|
||||
// Holder should still serve the v1 cert.
|
||||
if got := readCertCN(t, h); got != "cn-v1" {
|
||||
t.Fatalf("post-failed-reload CN: got %q want cn-v1 (reload must not clobber on failure)", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCertHolder_GetCertificate_Concurrent(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-concurrent")
|
||||
|
||||
h, err := newCertHolder(certPath, keyPath)
|
||||
if err != nil {
|
||||
t.Fatalf("newCertHolder: %v", err)
|
||||
}
|
||||
|
||||
// 64 readers + 1 rotator for 500ms. Race detector catches any unsynchronized
|
||||
// swap of h.cert. Rotator writes fresh files + Reload, readers call
|
||||
// GetCertificate in a tight loop.
|
||||
var wg sync.WaitGroup
|
||||
done := make(chan struct{})
|
||||
const readers = 64
|
||||
for i := 0; i < readers; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
default:
|
||||
if _, err := h.GetCertificate(&tls.ClientHelloInfo{}); err != nil {
|
||||
t.Errorf("GetCertificate: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < 20; i++ {
|
||||
generateTestCert(t, certPath, keyPath, "cn-concurrent")
|
||||
_ = h.Reload()
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}()
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
close(done)
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestCertHolder_WatchSIGHUP_ReloadsOnSignal(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-before-sighup")
|
||||
|
||||
h, err := newCertHolder(certPath, keyPath)
|
||||
if err != nil {
|
||||
t.Fatalf("newCertHolder: %v", err)
|
||||
}
|
||||
stop := h.watchSIGHUP(silentLogger())
|
||||
defer stop()
|
||||
|
||||
// Rotate on disk, then fire SIGHUP to our own process and poll for the swap.
|
||||
generateTestCert(t, certPath, keyPath, "cn-after-sighup")
|
||||
if err := syscall.Kill(syscall.Getpid(), syscall.SIGHUP); err != nil {
|
||||
t.Fatalf("SIGHUP: %v", err)
|
||||
}
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
if readCertCN(t, h) == "cn-after-sighup" {
|
||||
return
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
t.Fatalf("watcher did not reload cert within 2s (CN still %q)", readCertCN(t, h))
|
||||
}
|
||||
|
||||
func TestCertHolder_WatchSIGHUP_StopExits(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-stop")
|
||||
|
||||
h, err := newCertHolder(certPath, keyPath)
|
||||
if err != nil {
|
||||
t.Fatalf("newCertHolder: %v", err)
|
||||
}
|
||||
stop := h.watchSIGHUP(silentLogger())
|
||||
|
||||
// Closing should be synchronous and safe; a subsequent SIGHUP must not
|
||||
// cause a reload (the watcher goroutine is gone).
|
||||
stop()
|
||||
time.Sleep(50 * time.Millisecond) // let goroutine exit
|
||||
|
||||
// After stop, the signal may still be delivered to the process but the
|
||||
// watcher has called signal.Stop so this channel is no longer receiving.
|
||||
// Simply assert that calling stop() twice does not panic — the goroutine
|
||||
// has already exited, so a second close would panic on the `done`
|
||||
// channel; we do NOT call stop twice. Instead verify no regression in
|
||||
// the held cert.
|
||||
if got := readCertCN(t, h); got != "cn-stop" {
|
||||
t.Fatalf("unexpected cert rotation after stop: got %q want cn-stop", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildServerTLSConfig_IsTLS13Only(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-cfg")
|
||||
|
||||
h, err := newCertHolder(certPath, keyPath)
|
||||
if err != nil {
|
||||
t.Fatalf("newCertHolder: %v", err)
|
||||
}
|
||||
cfg := buildServerTLSConfig(h)
|
||||
if cfg.MinVersion != tls.VersionTLS13 {
|
||||
t.Fatalf("MinVersion: got %#x want %#x (TLS 1.3)", cfg.MinVersion, tls.VersionTLS13)
|
||||
}
|
||||
wantCurves := []tls.CurveID{tls.X25519, tls.CurveP256}
|
||||
if len(cfg.CurvePreferences) != len(wantCurves) {
|
||||
t.Fatalf("CurvePreferences length: got %d want %d", len(cfg.CurvePreferences), len(wantCurves))
|
||||
}
|
||||
for i, c := range cfg.CurvePreferences {
|
||||
if c != wantCurves[i] {
|
||||
t.Fatalf("CurvePreferences[%d]: got %v want %v", i, c, wantCurves[i])
|
||||
}
|
||||
}
|
||||
if cfg.GetCertificate == nil {
|
||||
t.Fatal("GetCertificate: nil (holder not wired; SIGHUP reload would be broken)")
|
||||
}
|
||||
if len(cfg.Certificates) != 0 {
|
||||
t.Fatalf("Certificates: got %d want 0 (static cert would pin the first load and defeat reload)", len(cfg.Certificates))
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildServerTLSConfig_Handshake_TLS12Rejected(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-handshake")
|
||||
|
||||
h, err := newCertHolder(certPath, keyPath)
|
||||
if err != nil {
|
||||
t.Fatalf("newCertHolder: %v", err)
|
||||
}
|
||||
serverCfg := buildServerTLSConfig(h)
|
||||
|
||||
ln, err := tls.Listen("tcp", "127.0.0.1:0", serverCfg)
|
||||
if err != nil {
|
||||
t.Fatalf("tls.Listen: %v", err)
|
||||
}
|
||||
defer ln.Close()
|
||||
|
||||
// Server loop: accept and immediately close (we only care about the
|
||||
// handshake outcome).
|
||||
go func() {
|
||||
for {
|
||||
conn, err := ln.Accept()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// Force handshake so the server-side error surfaces.
|
||||
_ = conn.(*tls.Conn).Handshake()
|
||||
conn.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
// TLS 1.3 client — should succeed.
|
||||
clientOK := &tls.Config{
|
||||
MinVersion: tls.VersionTLS13,
|
||||
MaxVersion: tls.VersionTLS13,
|
||||
InsecureSkipVerify: true,
|
||||
}
|
||||
c, err := tls.Dial("tcp", ln.Addr().String(), clientOK)
|
||||
if err != nil {
|
||||
t.Fatalf("TLS 1.3 dial failed (expected success): %v", err)
|
||||
}
|
||||
if c.ConnectionState().Version != tls.VersionTLS13 {
|
||||
t.Fatalf("negotiated version: got %#x want TLS 1.3 (%#x)", c.ConnectionState().Version, tls.VersionTLS13)
|
||||
}
|
||||
c.Close()
|
||||
|
||||
// TLS 1.2 client — must be rejected at handshake.
|
||||
clientOld := &tls.Config{
|
||||
MinVersion: tls.VersionTLS12,
|
||||
MaxVersion: tls.VersionTLS12,
|
||||
InsecureSkipVerify: true,
|
||||
}
|
||||
if _, err := tls.Dial("tcp", ln.Addr().String(), clientOld); err == nil {
|
||||
t.Fatal("TLS 1.2 dial succeeded; HTTPS-everywhere requires server to refuse TLS 1.2")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreflightServerTLS_MissingCertPath(t *testing.T) {
|
||||
err := preflightServerTLS("", "/any/key.pem")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty cert path, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreflightServerTLS_MissingKeyPath(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-preflight")
|
||||
err := preflightServerTLS(certPath, "")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty key path, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreflightServerTLS_CertFileNotReadable(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
if err := os.WriteFile(keyPath, []byte("k"), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err := preflightServerTLS(filepath.Join(dir, "nope.crt"), keyPath)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for unreadable cert path, got nil")
|
||||
}
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
t.Fatalf("expected os.ErrNotExist wrapped in error chain, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreflightServerTLS_InvalidKeyPair(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
// Pair of valid cert + garbage key — files are readable but the pair
|
||||
// doesn't round-trip tls.LoadX509KeyPair.
|
||||
generateTestCert(t, certPath, keyPath, "cn-bad-pair")
|
||||
if err := os.WriteFile(keyPath, []byte("-----BEGIN EC PRIVATE KEY-----\nBAD\n-----END EC PRIVATE KEY-----\n"), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err := preflightServerTLS(certPath, keyPath)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for invalid key pair, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPreflightServerTLS_ValidPair_NoError(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
certPath := filepath.Join(dir, "tls.crt")
|
||||
keyPath := filepath.Join(dir, "tls.key")
|
||||
generateTestCert(t, certPath, keyPath, "cn-ok")
|
||||
if err := preflightServerTLS(certPath, keyPath); err != nil {
|
||||
t.Fatalf("unexpected error for valid pair: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,523 @@
|
||||
# certctl Docker Compose Environments
|
||||
|
||||
This guide walks through every Docker Compose file in the `deploy/` directory. Each section explains what the environment does, when to use it, every service and environment variable, and the commands to run it. If you've never used Docker before, start with the [Prerequisites](#prerequisites) section. If you're experienced, skip to the environment you need.
|
||||
|
||||
## Contents
|
||||
|
||||
1. [Prerequisites](#prerequisites)
|
||||
2. [How Docker Compose Works (30-Second Version)](#how-docker-compose-works)
|
||||
3. [Base Environment (docker-compose.yml)](#base-environment)
|
||||
4. [Demo Overlay (docker-compose.demo.yml)](#demo-overlay)
|
||||
5. [Development Overlay (docker-compose.dev.yml)](#development-overlay)
|
||||
6. [Test Environment (docker-compose.test.yml)](#test-environment)
|
||||
7. [Environment Variable Reference](#environment-variable-reference)
|
||||
8. [Common Operations](#common-operations)
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
You need two things: **Docker** (the container runtime) and **Docker Compose** (an orchestration tool that ships with Docker Desktop).
|
||||
|
||||
On macOS:
|
||||
```bash
|
||||
brew install --cask docker
|
||||
```
|
||||
|
||||
On Linux (Ubuntu/Debian):
|
||||
```bash
|
||||
curl -fsSL https://get.docker.com | sh
|
||||
sudo usermod -aG docker $USER
|
||||
# Log out and back in for group changes to take effect
|
||||
```
|
||||
|
||||
Verify the install:
|
||||
```bash
|
||||
docker --version # Docker Engine 24+ recommended
|
||||
docker compose version # Docker Compose v2+ required (note: no hyphen)
|
||||
```
|
||||
|
||||
**What Docker actually does:** Docker packages an application and all its dependencies (OS libraries, runtimes, config files) into an isolated unit called a container. When you run `docker compose up`, Docker reads a YAML file that describes multiple containers, creates a private network between them, and starts everything in the right order. Each container sees only its own filesystem and network unless you explicitly share volumes or ports.
|
||||
|
||||
**Why this matters for certctl:** Instead of installing PostgreSQL, building Go binaries, configuring the agent, and wiring everything together by hand, one command gives you the complete platform. Each compose file targets a different use case.
|
||||
|
||||
---
|
||||
|
||||
## How Docker Compose Works
|
||||
|
||||
A compose file defines **services** (containers), **networks** (how they talk to each other), and **volumes** (persistent storage). The key concepts:
|
||||
|
||||
**Services** are named containers. `certctl-server` is the API and web dashboard. `postgres` is the database. `certctl-agent` polls the server for certificate work.
|
||||
|
||||
**Depends_on + healthchecks** control startup order. The server won't start until PostgreSQL reports healthy. The agent won't start until the server reports healthy. This prevents connection errors during boot.
|
||||
|
||||
**Volumes** persist data across restarts. `postgres_data` keeps your database between `docker compose down` and `docker compose up`. Adding `-v` to `down` deletes volumes for a clean slate.
|
||||
|
||||
**Overlay files** let you layer changes. Running `docker compose -f base.yml -f overlay.yml up` merges both files. The overlay can add services, change environment variables, or mount extra volumes without editing the base.
|
||||
|
||||
**Port mapping** (`"8443:8443"`) maps host port (left) to container port (right). After startup, `https://localhost:8443` on your machine reaches the certctl server inside its container (HTTPS-only as of v2.2; the `certctl-tls-init` init container bootstraps a self-signed cert into `deploy/test/certs/`).
|
||||
|
||||
---
|
||||
|
||||
## Base Environment
|
||||
|
||||
**File:** `docker-compose.yml`
|
||||
**When to use:** Production deployments, first-time setup, or any time you want a clean dashboard with the onboarding wizard.
|
||||
|
||||
### What it runs
|
||||
|
||||
Three services on a private bridge network:
|
||||
|
||||
| Service | Image | Purpose | Ports |
|
||||
|---------|-------|---------|-------|
|
||||
| `postgres` | `postgres:16-alpine` | Database. Stores certificates, agents, jobs, audit trail, policies, discovery results. | 5432 |
|
||||
| `certctl-server` | Built from `Dockerfile` | API server + web dashboard + background scheduler. | 8443 |
|
||||
| `certctl-agent` | Built from `Dockerfile.agent` | Polls server for work, generates keys, deploys certificates, discovers existing certs. | none |
|
||||
|
||||
### Starting it
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
cd certctl
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
`--build` compiles the Go server and agent from source, including the React frontend. Without it, Docker may reuse a stale image from a previous build.
|
||||
|
||||
`-d` runs in detached mode (background). Omit it to see logs in your terminal.
|
||||
|
||||
Wait about 30 seconds, then verify:
|
||||
```bash
|
||||
docker compose -f deploy/docker-compose.yml ps
|
||||
# All three services should show "Up (healthy)"
|
||||
|
||||
curl --cacert ./deploy/test/certs/ca.crt https://localhost:8443/health
|
||||
# {"status":"healthy"}
|
||||
```
|
||||
|
||||
The control plane is HTTPS-only as of v2.2. The `certctl-tls-init` init container bootstraps a self-signed cert into `deploy/test/certs/` on first boot; pin it with `--cacert` (as above) or pass `-k` for one-off smoke tests (never in production).
|
||||
|
||||
Open **https://localhost:8443** in your browser. You'll see the onboarding wizard guiding you through: connecting a CA, deploying an agent, and adding your first certificate. Your browser will flag the self-signed cert as untrusted — accept the warning for local evaluation, or import `deploy/test/certs/ca.crt` into your OS trust store to make the warning go away.
|
||||
|
||||
### Service-by-service walkthrough
|
||||
|
||||
#### PostgreSQL
|
||||
|
||||
```yaml
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_DB: certctl
|
||||
POSTGRES_USER: certctl
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-certctl}
|
||||
```
|
||||
|
||||
Alpine-based PostgreSQL 16. The `${POSTGRES_PASSWORD:-certctl}` syntax means: use the `POSTGRES_PASSWORD` environment variable from your shell if set, otherwise default to `certctl`. For production, create a `.env` file:
|
||||
|
||||
```bash
|
||||
echo 'POSTGRES_PASSWORD=your-secure-password-here' > deploy/.env
|
||||
```
|
||||
|
||||
The `volumes` section mounts 10 migration files into PostgreSQL's init directory (`/docker-entrypoint-initdb.d/`). PostgreSQL runs these SQL files in alphabetical order on first boot only. They create the schema (tables, indexes, constraints) and seed the base data (default issuer, default policy). If the `postgres_data` volume already exists with an initialized database, these scripts are skipped entirely.
|
||||
|
||||
**Expert note:** The numbered prefix pattern (`001_`, `002_`, ..., `020_`) ensures deterministic execution order. All migrations use `IF NOT EXISTS` and `ON CONFLICT DO NOTHING` for idempotency, so re-running them against an existing database is safe.
|
||||
|
||||
#### certctl Server
|
||||
|
||||
```yaml
|
||||
certctl-server:
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
CERTCTL_DATABASE_URL: postgres://certctl:${POSTGRES_PASSWORD:-certctl}@postgres:5432/certctl?sslmode=disable
|
||||
CERTCTL_SERVER_HOST: 0.0.0.0
|
||||
CERTCTL_SERVER_PORT: 8443
|
||||
CERTCTL_LOG_LEVEL: info
|
||||
CERTCTL_AUTH_TYPE: none
|
||||
CERTCTL_KEYGEN_MODE: server
|
||||
CERTCTL_NETWORK_SCAN_ENABLED: "true"
|
||||
CERTCTL_CONFIG_ENCRYPTION_KEY: ${CERTCTL_CONFIG_ENCRYPTION_KEY:-change-me-32-char-encryption-key}
|
||||
```
|
||||
|
||||
The server is the control plane. It serves the REST API, the React dashboard, runs 7 background scheduler loops (renewal, job processing, health checks, notifications, short-lived cert expiry, network scanning, digest emails), and manages the issuer/target registry.
|
||||
|
||||
Key environment variables explained:
|
||||
|
||||
- `CERTCTL_DATABASE_URL` references the `postgres` service by hostname. Docker's internal DNS resolves `postgres` to the container's IP on the bridge network. `sslmode=disable` is appropriate because traffic stays on the private Docker network.
|
||||
- `CERTCTL_AUTH_TYPE: none` disables API key authentication so you can explore immediately. For production, set `api-key` and configure `CERTCTL_AUTH_SECRET`.
|
||||
- `CERTCTL_KEYGEN_MODE: server` means the server generates private keys. This is convenient for demos but insecure for production. In production, set `agent` so keys are generated on agent machines and never transmitted.
|
||||
- `CERTCTL_CONFIG_ENCRYPTION_KEY` enables AES-256-GCM encryption for issuer and target configurations stored in the database (credentials, API keys). Without this, the dynamic configuration GUI (adding issuers/targets from the dashboard) won't encrypt sensitive fields. For production, generate a strong random key.
|
||||
- `CERTCTL_NETWORK_SCAN_ENABLED` activates the scheduler loop that probes TLS endpoints on your network to discover certificates you might not be managing.
|
||||
|
||||
**Expert note:** The healthcheck hits `GET /health` every 10 seconds with 5 retries. The `depends_on: condition: service_healthy` on the agent means Docker holds agent startup until this check passes. Resource limits (`cpus: '1.0'`, `memory: 512M`) prevent the server from consuming unbounded resources in shared environments.
|
||||
|
||||
#### certctl Agent
|
||||
|
||||
```yaml
|
||||
certctl-agent:
|
||||
depends_on:
|
||||
certctl-server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
CERTCTL_SERVER_URL: http://certctl-server:8443
|
||||
CERTCTL_API_KEY: ${CERTCTL_API_KEY:-change-me-in-production}
|
||||
CERTCTL_AGENT_NAME: docker-agent
|
||||
CERTCTL_LOG_LEVEL: info
|
||||
CERTCTL_DISCOVERY_DIRS: /var/lib/certctl/keys
|
||||
volumes:
|
||||
- agent_keys:/var/lib/certctl/keys
|
||||
```
|
||||
|
||||
The agent is a lightweight Go binary that polls the server for pending work (certificate deployments, CSR generation requests), executes that work locally, and reports results back. It also scans configured directories for existing certificates (filesystem discovery).
|
||||
|
||||
- `CERTCTL_SERVER_URL` uses the Docker internal hostname `certctl-server`. This resolves inside the Docker network only.
|
||||
- `CERTCTL_DISCOVERY_DIRS` tells the agent which directories to scan for existing certificates. The agent walks these directories recursively, parses PEM and DER files, and reports findings to the server for triage.
|
||||
- The `agent_keys` volume persists private keys generated by the agent across container restarts. Without this volume, keys would be lost when the container stops.
|
||||
|
||||
**Expert note:** The agent's healthcheck uses `pgrep` because the agent doesn't expose an HTTP endpoint. The `restart: unless-stopped` policy means Docker automatically restarts the agent on crashes but respects manual `docker compose stop` commands.
|
||||
|
||||
### Stopping and cleaning up
|
||||
|
||||
```bash
|
||||
# Stop containers but keep data
|
||||
docker compose -f deploy/docker-compose.yml down
|
||||
|
||||
# Stop and delete all data (database, keys, volumes)
|
||||
docker compose -f deploy/docker-compose.yml down -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Demo Overlay
|
||||
|
||||
**File:** `docker-compose.demo.yml`
|
||||
**When to use:** Demos, screenshots, stakeholder presentations, or any time you want a populated dashboard on first boot.
|
||||
|
||||
### What it adds
|
||||
|
||||
One line: mounts `seed_demo.sql` into PostgreSQL's init directory. This 667-line SQL file inserts 180 days of simulated operational history: teams, owners, certificates across multiple issuers, agents on different platforms, jobs with realistic timestamps, discovery scan results, audit events, policies, and profiles.
|
||||
|
||||
### Starting it
|
||||
|
||||
```bash
|
||||
docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.demo.yml up -d --build
|
||||
```
|
||||
|
||||
The `-f` flags are ordered: base first, overlay second. Docker merges them. The demo overlay adds the seed_demo.sql volume mount to the `postgres` service defined in the base file.
|
||||
|
||||
### What you see
|
||||
|
||||
The dashboard shows pre-populated charts: expiration heatmap with upcoming renewals, status distribution across Active/Expiring/Expired/Failed states, 30-day job trends, and issuance rates. The sidebar pages (Certificates, Agents, Discovery, Jobs, etc.) all have data to explore.
|
||||
|
||||
### Resetting demo data
|
||||
|
||||
```bash
|
||||
docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.demo.yml down -v
|
||||
docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.demo.yml up -d --build
|
||||
```
|
||||
|
||||
The `down -v` deletes the `postgres_data` volume. On next boot, PostgreSQL re-runs all init scripts including the demo seed, giving you a clean starting point.
|
||||
|
||||
**Expert note:** The demo overlay is a pure data layer, not a configuration change. The server, agent, and their environment variables remain identical to the base. This means any behavior you see in the demo is exactly what the base environment produces once you populate data through normal operations.
|
||||
|
||||
---
|
||||
|
||||
## Development Overlay
|
||||
|
||||
**File:** `docker-compose.dev.yml`
|
||||
**When to use:** When you're contributing to certctl and need debug logging, database inspection, or a debugger attached to the server process.
|
||||
|
||||
### What it adds
|
||||
|
||||
| Addition | Purpose |
|
||||
|----------|---------|
|
||||
| Debug-level logging on server and agent | See every HTTP request, scheduler tick, and connector operation |
|
||||
| PgAdmin on port 5050 | Visual database browser for inspecting tables, running queries |
|
||||
| Delve debugger port 40000 | Attach a Go debugger to the running server process |
|
||||
|
||||
### Starting it
|
||||
|
||||
```bash
|
||||
docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.dev.yml up --build
|
||||
```
|
||||
|
||||
Omit `-d` during development so you see logs streaming in your terminal.
|
||||
|
||||
### Using PgAdmin
|
||||
|
||||
Open **http://localhost:5050** in your browser. PgAdmin is pre-configured in desktop mode (no login required). To connect to the certctl database:
|
||||
|
||||
1. Right-click "Servers" in the left panel, choose "Register" > "Server"
|
||||
2. Name: `certctl`
|
||||
3. Connection tab: Host = `postgres`, Port = `5432`, Username = `certctl`, Password = `certctl` (or whatever you set in `.env`)
|
||||
|
||||
From there you can browse all 19 tables, inspect certificate records, view audit events, check the scheduler's job queue, and run arbitrary SQL.
|
||||
|
||||
### Using the Delve debugger
|
||||
|
||||
Port 40000 is exposed for remote debugging. To use it, you'd need to modify the Dockerfile to build with debug symbols and start the server under Delve:
|
||||
|
||||
```bash
|
||||
# In Dockerfile, replace the CMD with:
|
||||
CMD ["dlv", "--listen=:40000", "--headless=true", "--api-version=2", "exec", "/app/server"]
|
||||
```
|
||||
|
||||
Then attach from your IDE (VS Code, GoLand) using remote debug configuration pointing to `localhost:40000`.
|
||||
|
||||
### Hot reload
|
||||
|
||||
The dev overlay includes commented-out volume mounts for source code directories. Uncomment them and install [air](https://github.com/cosmtrek/air) to get automatic recompilation on file changes:
|
||||
|
||||
```bash
|
||||
go install github.com/cosmtrek/air@latest
|
||||
```
|
||||
|
||||
**Expert note:** The `builds: context: ..` in the dev overlay overrides the base service's image reference, forcing a local build from the repository root. This means changes to your Go source code are compiled fresh on each `docker compose up --build`.
|
||||
|
||||
---
|
||||
|
||||
## Test Environment
|
||||
|
||||
**File:** `docker-compose.test.yml`
|
||||
**When to use:** Integration testing against real CA backends. This is a standalone environment (not an overlay) with 7 containers on a static-IP subnet.
|
||||
|
||||
### What it runs
|
||||
|
||||
| Service | IP | Purpose |
|
||||
|---------|----|---------|
|
||||
| `postgres` | 10.30.50.2 | Database (clean, no demo data) |
|
||||
| `pebble-challtestsrv` | 10.30.50.3 | DNS/HTTP challenge test server for Pebble |
|
||||
| `pebble` | 10.30.50.4 | ACME test server (simulates Let's Encrypt) |
|
||||
| `step-ca` | 10.30.50.5 | Private CA (Smallstep, JWK provisioner) |
|
||||
| `certctl-server` | 10.30.50.6 | Control plane with all issuers configured |
|
||||
| `nginx` | 10.30.50.7 | TLS target server for deployment testing |
|
||||
| `certctl-agent` | 10.30.50.8 | Agent with NGINX volume + discovery |
|
||||
|
||||
### Why static IPs?
|
||||
|
||||
Pebble (the ACME test server) validates HTTP-01 challenges by connecting to the challenge URL. It resolves domain names via `pebble-challtestsrv`, which is configured to return `10.30.50.6` (the certctl server) for all lookups. Without static IPs, container IPs would be assigned randomly on each boot, breaking the challenge validation chain.
|
||||
|
||||
The `/24` subnet (10.30.50.0/24) provides 254 usable addresses, far more than needed but standard practice for test networks.
|
||||
|
||||
### Starting it
|
||||
|
||||
```bash
|
||||
docker compose -f deploy/docker-compose.test.yml up --build
|
||||
```
|
||||
|
||||
Wait for all health checks to pass (about 60 seconds for step-ca's first-run bootstrap). Then:
|
||||
|
||||
```bash
|
||||
# Dashboard with auth enabled (HTTPS-only as of v2.2; browser will warn on the self-signed cert —
|
||||
# accept the warning or trust `deploy/test/certs/ca.crt` in your OS keychain)
|
||||
open https://localhost:8443
|
||||
# API key: test-key-2026
|
||||
|
||||
# NGINX serving a self-signed placeholder
|
||||
curl -k https://localhost:8444
|
||||
```
|
||||
|
||||
### What's different from the base
|
||||
|
||||
The test environment is configured for production-like behavior:
|
||||
|
||||
- **API key auth enabled** (`CERTCTL_AUTH_TYPE: api-key`, `CERTCTL_AUTH_SECRET: test-key-2026`). Every API request needs `Authorization: Bearer test-key-2026`.
|
||||
- **Agent-side key generation** (`CERTCTL_KEYGEN_MODE: agent`). The agent generates ECDSA P-256 keys locally and submits only the CSR to the server. Private keys never leave the agent container.
|
||||
- **Three real issuers configured:**
|
||||
- **Local CA** (self-signed) for instant issuance testing
|
||||
- **ACME via Pebble** for Let's Encrypt-compatible flow testing (HTTP-01 challenges validated through the challenge test server)
|
||||
- **step-ca** for private CA testing with JWK provisioner authentication
|
||||
- **EST server enabled** (`CERTCTL_EST_ENABLED: "true"`) for RFC 7030 enrollment testing
|
||||
- **Post-deployment verification enabled** (`CERTCTL_VERIFY_DEPLOYMENT: "true"`) so the agent probes NGINX after deploying a cert and confirms the TLS fingerprint matches
|
||||
- **Dynamic config encryption enabled** (`CERTCTL_CONFIG_ENCRYPTION_KEY`) so issuer/target configs added through the GUI are encrypted at rest
|
||||
- **TLS trust bootstrapping:** The server runs a `setup-trust.sh` entrypoint that fetches Pebble's root CA from its management API and copies step-ca's root cert from a shared volume, then runs `update-ca-certificates` before starting the server binary. This is necessary because both CAs use self-signed roots that aren't in Alpine's default trust store.
|
||||
|
||||
### Running the Go integration tests
|
||||
|
||||
The test environment is designed to support the Go integration test suite at `deploy/test/integration_test.go`:
|
||||
|
||||
```bash
|
||||
# Start the environment
|
||||
docker compose -f deploy/docker-compose.test.yml up --build -d
|
||||
|
||||
# Wait for health checks
|
||||
sleep 30
|
||||
|
||||
# Run integration tests (from repo root)
|
||||
go test -tags integration -v ./deploy/test/...
|
||||
```
|
||||
|
||||
The integration tests exercise 12 phases: health, agent heartbeat, Local CA issuance, ACME issuance, renewal, step-ca issuance, revocation + CRL + OCSP, EST enrollment, S/MIME issuance, discovery, network scan, and deployment verification. PostgreSQL port 5432 is exposed so the test binary can query the database directly for assertions.
|
||||
|
||||
See [docs/test-env.md](../docs/test-env.md) for the full walkthrough and manual QA procedures.
|
||||
|
||||
### Stopping and cleaning up
|
||||
|
||||
```bash
|
||||
# Stop but keep data (volumes persist)
|
||||
docker compose -f deploy/docker-compose.test.yml down
|
||||
|
||||
# Full reset (delete step-ca bootstrap, database, agent keys, NGINX certs)
|
||||
docker compose -f deploy/docker-compose.test.yml down -v
|
||||
```
|
||||
|
||||
**Expert note:** The step-ca container auto-bootstraps on first run: generates a root CA, creates a JWK provisioner named "admin" with password "password123", and writes everything to the `stepca_data` volume. Subsequent starts reuse this volume. If you `down -v`, the next boot generates a new root CA, which means all previously issued step-ca certs become untrusted.
|
||||
|
||||
---
|
||||
|
||||
## Environment Variable Reference
|
||||
|
||||
Every `CERTCTL_*` environment variable is read by the server's `internal/config/config.go` via `os.Getenv`. If the prefix is missing, the variable is silently ignored.
|
||||
|
||||
### Server
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_DATABASE_URL` | (required) | PostgreSQL connection string |
|
||||
| `CERTCTL_SERVER_HOST` | `0.0.0.0` | Listen address |
|
||||
| `CERTCTL_SERVER_PORT` | `8443` | Listen port |
|
||||
| `CERTCTL_LOG_LEVEL` | `info` | Log verbosity: `debug`, `info`, `warn`, `error` |
|
||||
| `CERTCTL_AUTH_TYPE` | `api-key` | Auth mode: `api-key` or `none` |
|
||||
| `CERTCTL_AUTH_SECRET` | (none) | API key(s), comma-separated for rotation |
|
||||
| `CERTCTL_KEYGEN_MODE` | `agent` | Key generation: `agent` (production) or `server` (demo) |
|
||||
| `CERTCTL_CONFIG_ENCRYPTION_KEY` | (none) | AES-256-GCM key for encrypting issuer/target configs in DB |
|
||||
| `CERTCTL_NETWORK_SCAN_ENABLED` | `false` | Enable network TLS scanning scheduler loop |
|
||||
| `CERTCTL_NETWORK_SCAN_INTERVAL` | `6h` | How often the network scanner runs |
|
||||
| `CERTCTL_MAX_BODY_SIZE` | `1048576` | Max request body size in bytes (1MB) |
|
||||
| `CERTCTL_CORS_ORIGINS` | (empty) | Allowed CORS origins, comma-separated. Empty = deny all cross-origin |
|
||||
| `CERTCTL_RATE_LIMIT_RPS` | `10` | Requests per second per client |
|
||||
| `CERTCTL_RATE_LIMIT_BURST` | `20` | Burst allowance above RPS |
|
||||
|
||||
### Agent
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_SERVER_URL` | (required) | Server API URL |
|
||||
| `CERTCTL_API_KEY` | (none) | API key for authenticating with server |
|
||||
| `CERTCTL_AGENT_NAME` | (hostname) | Display name in dashboard |
|
||||
| `CERTCTL_AGENT_ID` | (auto-generated) | Stable agent identifier |
|
||||
| `CERTCTL_KEYGEN_MODE` | `agent` | Must match server setting |
|
||||
| `CERTCTL_LOG_LEVEL` | `info` | Log verbosity |
|
||||
| `CERTCTL_KEY_DIR` | `/var/lib/certctl/keys` | Directory for private key storage (0600 perms) |
|
||||
| `CERTCTL_DISCOVERY_DIRS` | (none) | Comma-separated paths to scan for existing certs |
|
||||
|
||||
### Issuers (Server)
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `CERTCTL_ACME_DIRECTORY_URL` | ACME CA directory (e.g., Let's Encrypt, Pebble) |
|
||||
| `CERTCTL_ACME_EMAIL` | ACME account email |
|
||||
| `CERTCTL_ACME_CHALLENGE_TYPE` | `http-01`, `dns-01`, or `dns-persist-01` |
|
||||
| `CERTCTL_ACME_INSECURE` | Skip TLS verification for ACME CA (test only) |
|
||||
| `CERTCTL_ACME_EAB_KID` / `CERTCTL_ACME_EAB_HMAC` | External Account Binding for ZeroSSL, Google Trust Services |
|
||||
| `CERTCTL_ACME_ARI_ENABLED` | Enable RFC 9773 Renewal Information |
|
||||
| `CERTCTL_ACME_PROFILE` | ACME profile (`tlsserver`, `shortlived`) |
|
||||
| `CERTCTL_STEPCA_URL` | step-ca server URL |
|
||||
| `CERTCTL_STEPCA_ROOT_CERT` | Path to step-ca root CA cert |
|
||||
| `CERTCTL_STEPCA_PROVISIONER` | Provisioner name |
|
||||
| `CERTCTL_STEPCA_PASSWORD` | Provisioner password |
|
||||
| `CERTCTL_STEPCA_KEY_PATH` | Path to provisioner key |
|
||||
| `CERTCTL_CA_CERT_PATH` / `CERTCTL_CA_KEY_PATH` | Sub-CA mode: load CA cert+key from disk |
|
||||
| `CERTCTL_VAULT_ADDR` | Vault server address |
|
||||
| `CERTCTL_VAULT_TOKEN` | Vault auth token |
|
||||
| `CERTCTL_VAULT_MOUNT` | PKI secrets engine mount (default: `pki`) |
|
||||
| `CERTCTL_VAULT_ROLE` | PKI role name |
|
||||
| `CERTCTL_DIGICERT_API_KEY` | DigiCert CertCentral API key |
|
||||
| `CERTCTL_DIGICERT_ORG_ID` | DigiCert organization ID |
|
||||
| `CERTCTL_SECTIGO_CUSTOMER_URI` / `_LOGIN` / `_PASSWORD` | Sectigo SCM auth |
|
||||
| `CERTCTL_GOOGLE_CAS_PROJECT` / `_LOCATION` / `_CA_POOL` / `_CREDENTIALS` | Google CAS config |
|
||||
|
||||
### EST Server
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_EST_ENABLED` | `false` | Enable RFC 7030 EST endpoints |
|
||||
| `CERTCTL_EST_ISSUER_ID` | `iss-local` | Which issuer processes EST enrollments |
|
||||
| `CERTCTL_EST_PROFILE_ID` | (none) | Optional profile constraint |
|
||||
|
||||
### Post-Deployment Verification
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `CERTCTL_VERIFY_DEPLOYMENT` | `false` | Agent probes TLS after deploying |
|
||||
| `CERTCTL_VERIFY_TIMEOUT` | `10s` | TLS probe timeout |
|
||||
| `CERTCTL_VERIFY_DELAY` | `2s` | Wait before probing (let service reload) |
|
||||
|
||||
### Notifications
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `CERTCTL_SMTP_HOST` / `_PORT` / `_USERNAME` / `_PASSWORD` / `_FROM_ADDRESS` / `_USE_TLS` | SMTP email |
|
||||
| `CERTCTL_SLACK_WEBHOOK_URL` / `_CHANNEL` / `_USERNAME` | Slack notifications |
|
||||
| `CERTCTL_TEAMS_WEBHOOK_URL` | Microsoft Teams |
|
||||
| `CERTCTL_PAGERDUTY_ROUTING_KEY` / `_SEVERITY` | PagerDuty alerts |
|
||||
| `CERTCTL_OPSGENIE_API_KEY` / `_PRIORITY` | OpsGenie alerts |
|
||||
| `CERTCTL_DIGEST_ENABLED` / `_INTERVAL` / `_RECIPIENTS` | Scheduled digest email |
|
||||
|
||||
---
|
||||
|
||||
## Common Operations
|
||||
|
||||
### Viewing logs
|
||||
|
||||
```bash
|
||||
# All services
|
||||
docker compose -f deploy/docker-compose.yml logs -f
|
||||
|
||||
# Single service
|
||||
docker compose -f deploy/docker-compose.yml logs -f certctl-server
|
||||
|
||||
# Last 100 lines
|
||||
docker compose -f deploy/docker-compose.yml logs --tail 100 certctl-server
|
||||
```
|
||||
|
||||
### Rebuilding after code changes
|
||||
|
||||
```bash
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
Docker only rebuilds images that have changed source files. The `--build` flag is essential after editing Go code or frontend files.
|
||||
|
||||
### Connecting to the database directly
|
||||
|
||||
```bash
|
||||
docker exec -it certctl-postgres psql -U certctl -d certctl
|
||||
```
|
||||
|
||||
Useful queries:
|
||||
```sql
|
||||
-- Certificate inventory
|
||||
SELECT id, common_name, status, expires_at FROM managed_certificates ORDER BY expires_at;
|
||||
|
||||
-- Recent jobs
|
||||
SELECT id, type, status, certificate_id, created_at FROM jobs ORDER BY created_at DESC LIMIT 20;
|
||||
|
||||
-- Audit trail
|
||||
SELECT event_type, actor, resource_id, created_at FROM audit_events ORDER BY created_at DESC LIMIT 20;
|
||||
|
||||
-- Issuer configurations (encrypted_config is AES-256-GCM)
|
||||
SELECT id, type, source, enabled, test_status FROM issuers;
|
||||
```
|
||||
|
||||
### Checking container resource usage
|
||||
|
||||
```bash
|
||||
docker stats --no-stream
|
||||
```
|
||||
|
||||
### Upgrading
|
||||
|
||||
```bash
|
||||
git pull
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
Migrations are idempotent (`IF NOT EXISTS`), so upgrading to a version with new schema changes is safe. PostgreSQL only runs init scripts on first boot of a fresh volume, so new migrations in an upgrade require running them manually:
|
||||
|
||||
```bash
|
||||
docker exec -i certctl-postgres psql -U certctl -d certctl < migrations/000011_new_feature.up.sql
|
||||
```
|
||||
|
||||
Or, for a clean upgrade: `down -v` and `up --build` (loses existing data).
|
||||
@@ -0,0 +1,14 @@
|
||||
# Demo mode: pre-populated dashboard with 32 certificates, 8 agents, 10 issuers, etc.
|
||||
# Use this to showcase certctl's dashboard with realistic data.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.yml -f docker-compose.demo.yml up --build
|
||||
#
|
||||
# To start fresh (wipe previous data):
|
||||
# docker compose -f docker-compose.yml -f docker-compose.demo.yml down -v
|
||||
# docker compose -f docker-compose.yml -f docker-compose.demo.yml up --build
|
||||
|
||||
services:
|
||||
postgres:
|
||||
volumes:
|
||||
- ../migrations/seed_demo.sql:/docker-entrypoint-initdb.d/030_seed_demo.sql
|
||||
@@ -9,11 +9,21 @@ services:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: Dockerfile
|
||||
# Proxy propagation (M-4, Issue #9) — forwards host shell's proxy env
|
||||
# vars into the Docker build so the Node frontend stage and Go module
|
||||
# download can reach the public registries behind corporate proxies.
|
||||
# Defaults to empty; omit the variables from the host environment for
|
||||
# un-proxied builds and the behaviour is byte-identical to the pre-fix
|
||||
# tree.
|
||||
args:
|
||||
HTTP_PROXY: ${HTTP_PROXY:-}
|
||||
HTTPS_PROXY: ${HTTPS_PROXY:-}
|
||||
NO_PROXY: ${NO_PROXY:-}
|
||||
environment:
|
||||
# Verbose logging for development
|
||||
LOG_LEVEL: debug
|
||||
SERVER_HOST: 0.0.0.0
|
||||
SERVER_PORT: 8443
|
||||
CERTCTL_LOG_LEVEL: debug
|
||||
CERTCTL_SERVER_HOST: 0.0.0.0
|
||||
CERTCTL_SERVER_PORT: "8443"
|
||||
volumes:
|
||||
# Mount local source for hot reload (requires air or similar)
|
||||
# Uncomment if using air or similar for hot reload:
|
||||
@@ -29,8 +39,17 @@ services:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: Dockerfile.agent
|
||||
# Proxy propagation (M-4, Issue #9) — forwards host shell's proxy env
|
||||
# vars into the Docker build so the Go module download stage can reach
|
||||
# the public Go module proxy behind corporate proxies. Defaults to
|
||||
# empty; omit the variables from the host environment for un-proxied
|
||||
# builds and the behaviour is byte-identical to the pre-fix tree.
|
||||
args:
|
||||
HTTP_PROXY: ${HTTP_PROXY:-}
|
||||
HTTPS_PROXY: ${HTTPS_PROXY:-}
|
||||
NO_PROXY: ${NO_PROXY:-}
|
||||
environment:
|
||||
LOG_LEVEL: debug
|
||||
CERTCTL_LOG_LEVEL: debug
|
||||
|
||||
# PgAdmin for database exploration
|
||||
pgadmin:
|
||||
|
||||
@@ -4,8 +4,12 @@
|
||||
#
|
||||
# Spins up the full certctl platform with real CA backends for manual QA:
|
||||
#
|
||||
# 0. certctl-tls-init — one-shot init container; writes self-signed
|
||||
# server.crt/.key/ca.crt into ./test/certs (bind
|
||||
# mount, not a named volume — host-readable for
|
||||
# the Go integration test binary)
|
||||
# 1. PostgreSQL 16 — database (clean, no demo data)
|
||||
# 2. certctl-server — control plane API + web dashboard on :8443
|
||||
# 2. certctl-server — control plane API + web dashboard on :8443 (HTTPS)
|
||||
# 3. certctl-agent — polls for work, deploys certs to NGINX
|
||||
# 4. step-ca — private CA (JWK provisioner, auto-bootstraps)
|
||||
# 5. Pebble — ACME test server (simulates Let's Encrypt)
|
||||
@@ -16,15 +20,76 @@
|
||||
# cd deploy
|
||||
# docker compose -f docker-compose.test.yml up --build
|
||||
#
|
||||
# Dashboard: http://localhost:8443
|
||||
# Dashboard: https://localhost:8443 (self-signed — use --cacert test/certs/ca.crt)
|
||||
# API key: test-key-2026
|
||||
# NGINX: https://localhost:8444 (self-signed placeholder until cert deployed)
|
||||
#
|
||||
# Integration tests: `go test -tags integration ./deploy/test/...` picks up
|
||||
# the CA bundle at ./test/certs/ca.crt automatically via CERTCTL_TEST_CA_BUNDLE.
|
||||
#
|
||||
# See docs/test-env.md for the full walkthrough.
|
||||
# =============================================================================
|
||||
|
||||
services:
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTPS-Everywhere Phase 6 — self-signed TLS bootstrap for the test harness.
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mirrors the production `certctl-tls-init` (see docker-compose.yml §10-43)
|
||||
# but writes into a *host bind mount* (./test/certs) instead of a named
|
||||
# volume. The named-volume approach works fine inside Docker but hides the
|
||||
# CA bundle from the Go integration test binary that runs on the host; the
|
||||
# bind mount exposes /etc/certctl/tls/ca.crt at deploy/test/certs/ca.crt
|
||||
# so `newTestClient()` can load it into an x509.CertPool and validate the
|
||||
# self-signed server cert. Test-only divergence, explicitly documented.
|
||||
#
|
||||
# The generated cert has SAN=DNS:certctl-server,DNS:localhost,IP:127.0.0.1
|
||||
# so both in-cluster traffic (agent → certctl-server:8443) and host traffic
|
||||
# (go test → localhost:8443) validate cleanly. Destroy via
|
||||
# `docker compose -f docker-compose.test.yml down -v` + `rm -rf test/certs`
|
||||
# to force regeneration. Keys written 0600, certs 0644, owned 1000:1000
|
||||
# (the UID the server binary runs as inside its container per Dockerfile:64).
|
||||
certctl-tls-init:
|
||||
image: alpine/openssl:latest
|
||||
container_name: certctl-test-tls-init
|
||||
restart: "no"
|
||||
entrypoint: /bin/sh
|
||||
command:
|
||||
- -c
|
||||
- |
|
||||
set -eu
|
||||
CERT=/etc/certctl/tls/server.crt
|
||||
KEY=/etc/certctl/tls/server.key
|
||||
CA=/etc/certctl/tls/ca.crt
|
||||
if [ -f "$$CERT" ] && [ -f "$$KEY" ] && [ -f "$$CA" ]; then
|
||||
echo "TLS cert already present at $$CERT — skipping generation"
|
||||
else
|
||||
mkdir -p /etc/certctl/tls
|
||||
openssl req -x509 -newkey ec \
|
||||
-pkeyopt ec_paramgen_curve:P-256 \
|
||||
-nodes \
|
||||
-keyout "$$KEY" \
|
||||
-out "$$CERT" \
|
||||
-days 3650 \
|
||||
-subj "/CN=certctl-server" \
|
||||
-addext "subjectAltName=DNS:certctl-server,DNS:localhost,IP:127.0.0.1,IP:::1"
|
||||
cp "$$CERT" "$$CA"
|
||||
echo "Generated self-signed TLS cert for certctl-test-server (ECDSA-P256/SHA-256, 3650d, CN=certctl-server)"
|
||||
fi
|
||||
# The test server container runs as root (see `user: "0:0"` below)
|
||||
# because setup-trust.sh needs to update the system trust store, so
|
||||
# the perms here are really about host-side readability — 0644 on
|
||||
# the CA/cert lets `go test` on the host read the bundle without a
|
||||
# chown dance.
|
||||
chown 1000:1000 "$$CERT" "$$KEY" "$$CA" || true
|
||||
chmod 0644 "$$CERT" "$$CA"
|
||||
chmod 0600 "$$KEY"
|
||||
volumes:
|
||||
- ./test/certs:/etc/certctl/tls
|
||||
networks:
|
||||
certctl-test:
|
||||
ipv4_address: 10.30.50.9
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Database
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -45,8 +110,10 @@ services:
|
||||
- ../migrations/000006_discovery.up.sql:/docker-entrypoint-initdb.d/006_discovery.sql
|
||||
- ../migrations/000007_network_discovery.up.sql:/docker-entrypoint-initdb.d/007_network_discovery.sql
|
||||
- ../migrations/000008_verification.up.sql:/docker-entrypoint-initdb.d/008_verification.sql
|
||||
- ../migrations/seed.sql:/docker-entrypoint-initdb.d/010_seed.sql
|
||||
- ../migrations/seed_test.sql:/docker-entrypoint-initdb.d/015_seed_test.sql
|
||||
- ../migrations/000009_issuer_config.up.sql:/docker-entrypoint-initdb.d/009_issuer_config.sql
|
||||
- ../migrations/000010_target_config.up.sql:/docker-entrypoint-initdb.d/010_target_config.sql
|
||||
- ../migrations/seed.sql:/docker-entrypoint-initdb.d/020_seed.sql
|
||||
- ../migrations/seed_test.sql:/docker-entrypoint-initdb.d/025_seed_test.sql
|
||||
# No seed_demo.sql — start with a clean database for real testing
|
||||
networks:
|
||||
certctl-test:
|
||||
@@ -148,6 +215,16 @@ services:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: Dockerfile
|
||||
# Proxy propagation (M-4, Issue #9) — forwards host shell's proxy env
|
||||
# vars into the Docker build so the Node frontend stage and Go module
|
||||
# download can reach the public registries behind corporate proxies.
|
||||
# Defaults to empty; omit the variables from the host environment for
|
||||
# un-proxied builds and the behaviour is byte-identical to the pre-fix
|
||||
# tree.
|
||||
args:
|
||||
HTTP_PROXY: ${HTTP_PROXY:-}
|
||||
HTTPS_PROXY: ${HTTPS_PROXY:-}
|
||||
NO_PROXY: ${NO_PROXY:-}
|
||||
container_name: certctl-test-server
|
||||
depends_on:
|
||||
postgres:
|
||||
@@ -156,6 +233,12 @@ services:
|
||||
condition: service_started
|
||||
step-ca:
|
||||
condition: service_healthy
|
||||
# HTTPS-Everywhere Phase 6: block server boot until the init container
|
||||
# has written server.crt / server.key / ca.crt into ./test/certs. The
|
||||
# init container runs once and exits 0; service_completed_successfully
|
||||
# makes that a gating dependency rather than a liveness one.
|
||||
certctl-tls-init:
|
||||
condition: service_completed_successfully
|
||||
# Run as root so update-ca-certificates can write to /etc/ssl/certs.
|
||||
# Container isolation provides the security boundary.
|
||||
user: "0:0"
|
||||
@@ -167,6 +250,12 @@ services:
|
||||
# Server
|
||||
CERTCTL_SERVER_HOST: 0.0.0.0
|
||||
CERTCTL_SERVER_PORT: 8443
|
||||
# HTTPS-Everywhere Phase 6: point the server at the init-container-generated
|
||||
# cert/key pair (bind-mounted from ./test/certs). Same paths as production
|
||||
# compose so the server binary code path is identical; only the host-side
|
||||
# storage differs (bind mount vs named volume — see §certctl-tls-init block).
|
||||
CERTCTL_SERVER_TLS_CERT_PATH: /etc/certctl/tls/server.crt
|
||||
CERTCTL_SERVER_TLS_KEY_PATH: /etc/certctl/tls/server.key
|
||||
CERTCTL_LOG_LEVEL: debug
|
||||
|
||||
# Auth — API key required (production-like)
|
||||
@@ -196,6 +285,9 @@ services:
|
||||
CERTCTL_EST_ENABLED: "true"
|
||||
CERTCTL_EST_ISSUER_ID: iss-local
|
||||
|
||||
# Dynamic issuer/target config encryption (M34/M35)
|
||||
CERTCTL_CONFIG_ENCRYPTION_KEY: test-encryption-key-32chars!!
|
||||
|
||||
# Network scanning
|
||||
CERTCTL_NETWORK_SCAN_ENABLED: "true"
|
||||
|
||||
@@ -209,12 +301,22 @@ services:
|
||||
- ./test/setup-trust.sh:/app/setup-trust.sh:ro
|
||||
# step-ca data volume (root cert at /certs/root_ca.crt, key at /secrets/provisioner_key)
|
||||
- stepca_data:/stepca-data:ro
|
||||
# HTTPS-Everywhere Phase 6: read-only bind mount of the init-generated
|
||||
# TLS material. The init container writes here; server reads here; the
|
||||
# agent mounts the same host path at the same container path (see below)
|
||||
# so /etc/certctl/tls/ca.crt resolves to the *same* bytes on both sides.
|
||||
- ./test/certs:/etc/certctl/tls:ro
|
||||
networks:
|
||||
certctl-test:
|
||||
ipv4_address: 10.30.50.6
|
||||
healthcheck:
|
||||
# /health requires auth when CERTCTL_AUTH_TYPE=api-key, so include the Bearer token
|
||||
test: ["CMD", "curl", "-f", "-H", "Authorization: Bearer test-key-2026", "http://localhost:8443/health"]
|
||||
# HTTPS-Everywhere Phase 6: healthcheck now speaks TLS with --cacert to
|
||||
# verify the self-signed server cert against the init-generated bundle.
|
||||
# /health requires auth when CERTCTL_AUTH_TYPE=api-key, so include the
|
||||
# Bearer token. curl exits non-zero on both TLS handshake failure and
|
||||
# non-2xx status — either failure keeps depends_on: {condition:
|
||||
# service_healthy} from unblocking the agent, which is what we want.
|
||||
test: ["CMD", "curl", "--cacert", "/etc/certctl/tls/ca.crt", "-f", "-H", "Authorization: Bearer test-key-2026", "https://localhost:8443/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
start_period: 30s
|
||||
@@ -261,12 +363,27 @@ services:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: Dockerfile.agent
|
||||
# Proxy propagation (M-4, Issue #9) — forwards host shell's proxy env
|
||||
# vars into the Docker build so the Go module download stage can reach
|
||||
# the public Go module proxy behind corporate proxies. Defaults to
|
||||
# empty; omit the variables from the host environment for un-proxied
|
||||
# builds and the behaviour is byte-identical to the pre-fix tree.
|
||||
args:
|
||||
HTTP_PROXY: ${HTTP_PROXY:-}
|
||||
HTTPS_PROXY: ${HTTPS_PROXY:-}
|
||||
NO_PROXY: ${NO_PROXY:-}
|
||||
container_name: certctl-test-agent
|
||||
depends_on:
|
||||
certctl-server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
CERTCTL_SERVER_URL: http://certctl-server:8443
|
||||
# HTTPS-Everywhere Phase 6: agent dials the server over TLS and validates
|
||||
# the self-signed cert against the CA bundle pinned by
|
||||
# CERTCTL_SERVER_CA_BUNDLE_PATH. Same env vars + container paths as
|
||||
# production compose so the agent binary code path (loadCABundle →
|
||||
# x509.CertPool → *tls.Config{RootCAs, MinVersion: TLS13}) is identical.
|
||||
CERTCTL_SERVER_URL: https://certctl-server:8443
|
||||
CERTCTL_SERVER_CA_BUNDLE_PATH: /etc/certctl/tls/ca.crt
|
||||
CERTCTL_API_KEY: test-key-2026
|
||||
CERTCTL_AGENT_NAME: test-agent-01
|
||||
CERTCTL_AGENT_ID: agent-test-01
|
||||
@@ -276,6 +393,10 @@ services:
|
||||
volumes:
|
||||
- agent_keys:/var/lib/certctl/keys
|
||||
- nginx_certs:/nginx-certs
|
||||
# HTTPS-Everywhere Phase 6: same bind mount as the server, same path,
|
||||
# so /etc/certctl/tls/ca.crt resolves to the identical bytes. This is
|
||||
# the only way the CN=certctl-server cert validates on the agent side.
|
||||
- ./test/certs:/etc/certctl/tls:ro
|
||||
networks:
|
||||
certctl-test:
|
||||
ipv4_address: 10.30.50.8
|
||||
|
||||
@@ -1,4 +1,57 @@
|
||||
services:
|
||||
# HTTPS-Everywhere Phase 3 — self-signed TLS bootstrap (init container).
|
||||
# Generates a CN=certctl-server ECDSA-P256 (SHA-256 signature) cert with
|
||||
# the SAN list locked by milestone §3.6 on first boot; subsequent boots
|
||||
# see the cert already present in the `certs` named volume and no-op out.
|
||||
# Server + agent mount the volume read-only. Destroy via `docker compose
|
||||
# down -v` to force regeneration. This bootstrap is for docker-compose
|
||||
# demos and local dev only; Helm operators supply a Secret / cert-manager
|
||||
# Certificate per docs/tls.md.
|
||||
#
|
||||
# Rationale for ECDSA-P256 (was ed25519 pre-v2.0.48): Apple's TLS stack
|
||||
# — Safari Network Framework and the macOS-bundled LibreSSL 3.3.6
|
||||
# /usr/bin/curl — does not advertise ed25519 in the ClientHello
|
||||
# signature_algorithms extension for server certs, yielding "tls: peer
|
||||
# doesn't support any of the certificate's signature algorithms" at
|
||||
# handshake. ECDSA-P256 with SHA-256 is universally supported. See
|
||||
# docs/tls.md Pattern 1.
|
||||
certctl-tls-init:
|
||||
image: alpine/openssl:latest
|
||||
container_name: certctl-tls-init
|
||||
restart: "no"
|
||||
entrypoint: /bin/sh
|
||||
command:
|
||||
- -c
|
||||
- |
|
||||
set -eu
|
||||
CERT=/etc/certctl/tls/server.crt
|
||||
KEY=/etc/certctl/tls/server.key
|
||||
CA=/etc/certctl/tls/ca.crt
|
||||
if [ -f "$$CERT" ] && [ -f "$$KEY" ] && [ -f "$$CA" ]; then
|
||||
echo "TLS cert already present at $$CERT — skipping generation"
|
||||
else
|
||||
mkdir -p /etc/certctl/tls
|
||||
openssl req -x509 -newkey ec \
|
||||
-pkeyopt ec_paramgen_curve:P-256 \
|
||||
-nodes \
|
||||
-keyout "$$KEY" \
|
||||
-out "$$CERT" \
|
||||
-days 3650 \
|
||||
-subj "/CN=certctl-server" \
|
||||
-addext "subjectAltName=DNS:certctl-server,DNS:localhost,IP:127.0.0.1,IP:::1"
|
||||
cp "$$CERT" "$$CA"
|
||||
echo "Generated self-signed TLS cert for certctl-server (ECDSA-P256/SHA-256, 3650d, CN=certctl-server)"
|
||||
fi
|
||||
# certctl binary runs as UID 1000 inside the server container per
|
||||
# Dockerfile:64-65; the cert + key must be readable by that UID.
|
||||
chown 1000:1000 "$$CERT" "$$KEY" "$$CA"
|
||||
chmod 0644 "$$CERT" "$$CA"
|
||||
chmod 0600 "$$KEY"
|
||||
volumes:
|
||||
- certs:/etc/certctl/tls
|
||||
networks:
|
||||
- certctl-network
|
||||
|
||||
# PostgreSQL database
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
@@ -19,8 +72,9 @@ services:
|
||||
- ../migrations/000006_discovery.up.sql:/docker-entrypoint-initdb.d/006_discovery.sql
|
||||
- ../migrations/000007_network_discovery.up.sql:/docker-entrypoint-initdb.d/007_network_discovery.sql
|
||||
- ../migrations/000008_verification.up.sql:/docker-entrypoint-initdb.d/008_verification.sql
|
||||
- ../migrations/seed.sql:/docker-entrypoint-initdb.d/010_seed.sql
|
||||
- ../migrations/seed_demo.sql:/docker-entrypoint-initdb.d/011_seed_demo.sql
|
||||
- ../migrations/000009_issuer_config.up.sql:/docker-entrypoint-initdb.d/009_issuer_config.sql
|
||||
- ../migrations/000010_target_config.up.sql:/docker-entrypoint-initdb.d/010_target_config.sql
|
||||
- ../migrations/seed.sql:/docker-entrypoint-initdb.d/020_seed.sql
|
||||
networks:
|
||||
- certctl-network
|
||||
healthcheck:
|
||||
@@ -35,24 +89,41 @@ services:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: Dockerfile
|
||||
# Proxy propagation (M-4, Issue #9) — forwards host shell's proxy env
|
||||
# vars into the Docker build so the Node frontend stage and Go module
|
||||
# download can reach the public registries behind corporate proxies.
|
||||
# Defaults to empty; omit the variables from the host environment for
|
||||
# un-proxied builds and the behaviour is byte-identical to the pre-fix
|
||||
# tree.
|
||||
args:
|
||||
HTTP_PROXY: ${HTTP_PROXY:-}
|
||||
HTTPS_PROXY: ${HTTPS_PROXY:-}
|
||||
NO_PROXY: ${NO_PROXY:-}
|
||||
container_name: certctl-server
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
certctl-tls-init:
|
||||
condition: service_completed_successfully
|
||||
environment:
|
||||
CERTCTL_DATABASE_URL: postgres://certctl:${POSTGRES_PASSWORD:-certctl}@postgres:5432/certctl?sslmode=disable
|
||||
CERTCTL_SERVER_HOST: 0.0.0.0
|
||||
CERTCTL_SERVER_PORT: 8443
|
||||
CERTCTL_SERVER_TLS_CERT_PATH: /etc/certctl/tls/server.crt
|
||||
CERTCTL_SERVER_TLS_KEY_PATH: /etc/certctl/tls/server.key
|
||||
CERTCTL_LOG_LEVEL: info
|
||||
CERTCTL_AUTH_TYPE: none
|
||||
CERTCTL_KEYGEN_MODE: server # Demo uses server-side keygen; production should use "agent"
|
||||
CERTCTL_NETWORK_SCAN_ENABLED: "true" # Enable network scan GUI with seeded demo targets
|
||||
CERTCTL_CONFIG_ENCRYPTION_KEY: ${CERTCTL_CONFIG_ENCRYPTION_KEY:-change-me-32-char-encryption-key} # AES-256-GCM for dynamic issuer/target config
|
||||
ports:
|
||||
- "8443:8443"
|
||||
volumes:
|
||||
- certs:/etc/certctl/tls:ro
|
||||
networks:
|
||||
- certctl-network
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8443/health"]
|
||||
test: ["CMD", "curl", "--cacert", "/etc/certctl/tls/ca.crt", "-f", "https://localhost:8443/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
@@ -73,17 +144,29 @@ services:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: Dockerfile.agent
|
||||
# Proxy propagation (M-4, Issue #9) — forwards host shell's proxy env
|
||||
# vars into the Docker build so the Go module download stage can reach
|
||||
# the public Go module proxy behind corporate proxies. Defaults to
|
||||
# empty; omit the variables from the host environment for un-proxied
|
||||
# builds and the behaviour is byte-identical to the pre-fix tree.
|
||||
args:
|
||||
HTTP_PROXY: ${HTTP_PROXY:-}
|
||||
HTTPS_PROXY: ${HTTPS_PROXY:-}
|
||||
NO_PROXY: ${NO_PROXY:-}
|
||||
container_name: certctl-agent
|
||||
depends_on:
|
||||
certctl-server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
CERTCTL_SERVER_URL: http://certctl-server:8443
|
||||
CERTCTL_SERVER_URL: https://certctl-server:8443
|
||||
CERTCTL_SERVER_CA_BUNDLE_PATH: /etc/certctl/tls/ca.crt
|
||||
CERTCTL_API_KEY: ${CERTCTL_API_KEY:-change-me-in-production}
|
||||
CERTCTL_AGENT_NAME: docker-agent
|
||||
CERTCTL_LOG_LEVEL: info
|
||||
CERTCTL_DISCOVERY_DIRS: /var/lib/certctl/keys # Agent scans this directory for existing certificates
|
||||
volumes:
|
||||
- agent_keys:/var/lib/certctl/keys
|
||||
- certs:/etc/certctl/tls:ro
|
||||
networks:
|
||||
- certctl-network
|
||||
healthcheck:
|
||||
@@ -112,3 +195,5 @@ volumes:
|
||||
driver: local
|
||||
agent_keys:
|
||||
driver: local
|
||||
certs:
|
||||
driver: local
|
||||
|
||||
@@ -458,4 +458,4 @@ For issues, questions, or contributions:
|
||||
## License
|
||||
|
||||
BSL-1.1 (Business Source License)
|
||||
Converts to Apache 2.0 on March 28, 2033
|
||||
Converts to Apache 2.0 on March 14, 2033
|
||||
|
||||
@@ -236,10 +236,12 @@ kubectl get svc -l app.kubernetes.io/instance=certctl
|
||||
kubectl get ingress
|
||||
kubectl describe ingress certctl
|
||||
|
||||
# Test API connectivity
|
||||
# Test API connectivity (HTTPS-only as of v2.2)
|
||||
POD=$(kubectl get pods -l app.kubernetes.io/component=server -o jsonpath='{.items[0].metadata.name}')
|
||||
kubectl port-forward $POD 8443:8443 &
|
||||
curl -H "Authorization: Bearer $API_KEY" http://localhost:8443/health
|
||||
# If the chart provisioned a self-signed cert, fetch the CA bundle from the TLS secret first:
|
||||
# kubectl get secret certctl-server-tls -o jsonpath='{.data.ca\.crt}' | base64 -d > /tmp/certctl-ca.crt
|
||||
curl --cacert /tmp/certctl-ca.crt -H "Authorization: Bearer $API_KEY" https://localhost:8443/health
|
||||
```
|
||||
|
||||
### Step 6: Access the Dashboard
|
||||
@@ -333,9 +335,10 @@ kubectl logs $POD | tail -20
|
||||
# Port forward to API
|
||||
kubectl port-forward svc/certctl-server 8443:8443 &
|
||||
|
||||
# Create a test certificate
|
||||
# Create a test certificate (HTTPS-only as of v2.2 — pin the chart-provisioned CA bundle)
|
||||
# kubectl get secret certctl-server-tls -o jsonpath='{.data.ca\.crt}' | base64 -d > /tmp/certctl-ca.crt
|
||||
API_KEY="your-api-key"
|
||||
curl -X POST http://localhost:8443/api/v1/certificates \
|
||||
curl --cacert /tmp/certctl-ca.crt -X POST https://localhost:8443/api/v1/certificates \
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
|
||||
@@ -33,9 +33,11 @@ kubectl get pods -l app.kubernetes.io/instance=certctl
|
||||
# View server logs
|
||||
kubectl logs -l app.kubernetes.io/component=server -f
|
||||
|
||||
# Access the API
|
||||
# Access the API (HTTPS-only as of v2.2; use --cacert or -k depending on your cert provisioning)
|
||||
kubectl port-forward svc/certctl-server 8443:8443 &
|
||||
curl http://localhost:8443/health
|
||||
# If the chart provisioned a self-signed cert, fetch the CA bundle from the secret first:
|
||||
# kubectl get secret certctl-server-tls -o jsonpath='{.data.ca\.crt}' | base64 -d > /tmp/certctl-ca.crt
|
||||
curl --cacert /tmp/certctl-ca.crt https://localhost:8443/health
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
@@ -4,36 +4,46 @@
|
||||
{{- else if contains "NodePort" .Values.server.service.type }}
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "certctl.fullname" . }}-server)
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
echo https://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.server.service.type }}
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "certctl.fullname" . }}-server --template "{.status.loadBalancer.ingress[0].ip}")
|
||||
echo http://$SERVICE_IP:{{ .Values.server.service.port }}
|
||||
echo https://$SERVICE_IP:{{ .Values.server.service.port }}
|
||||
{{- else }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "certctl.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=server" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
echo "Visit https://127.0.0.1:8443 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8443:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
|
||||
2. Get the default API key:
|
||||
2. Talk to the HTTPS-only server from your workstation:
|
||||
# Export the CA bundle that signed the server cert (self-signed or cert-manager-issued)
|
||||
kubectl get secret --namespace {{ .Release.Namespace }} {{ include "certctl.tls.secretName" . }} \
|
||||
-o jsonpath='{.data.ca\.crt}' | base64 --decode > /tmp/certctl-ca.crt
|
||||
# (If ca.crt is empty, fall back to tls.crt — typical when the Secret
|
||||
# was created from a self-signed bootstrap cert without a separate CA.)
|
||||
|
||||
# Adapt the URL below to match the Server URL printed in step 1.
|
||||
curl --cacert /tmp/certctl-ca.crt https://127.0.0.1:8443/health
|
||||
|
||||
3. Get the default API key:
|
||||
kubectl get secret --namespace {{ .Release.Namespace }} {{ include "certctl.fullname" . }}-server -o jsonpath="{.data.api-key}" | base64 --decode; echo
|
||||
|
||||
3. Get PostgreSQL connection details:
|
||||
4. Get PostgreSQL connection details:
|
||||
Host: {{ include "certctl.fullname" . }}-postgres.{{ .Release.Namespace }}.svc.cluster.local
|
||||
Port: 5432
|
||||
Database: {{ .Values.postgresql.auth.database }}
|
||||
Username: {{ .Values.postgresql.auth.username }}
|
||||
Password: $(kubectl get secret --namespace {{ .Release.Namespace }} {{ include "certctl.fullname" . }}-postgres -o jsonpath="{.data.password}" | base64 --decode)
|
||||
|
||||
4. Check deployment status:
|
||||
5. Check deployment status:
|
||||
kubectl get pods -n {{ .Release.Namespace }} -l app.kubernetes.io/instance={{ .Release.Name }}
|
||||
|
||||
5. View server logs:
|
||||
6. View server logs:
|
||||
kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/name={{ include "certctl.name" . }},app.kubernetes.io/component=server -f
|
||||
|
||||
{{- if .Values.agent.enabled }}
|
||||
|
||||
6. View agent logs:
|
||||
7. View agent logs:
|
||||
kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/name={{ include "certctl.name" . }},app.kubernetes.io/component=agent -f
|
||||
|
||||
{{- end }}
|
||||
@@ -58,11 +68,7 @@ IMPORTANT NOTES FOR PRODUCTION:
|
||||
- Use an external PostgreSQL managed service (AWS RDS, Cloud SQL, etc.)
|
||||
- Set postgresql.enabled=false and configure CERTCTL_DATABASE_URL in values
|
||||
|
||||
5. Enable HTTPS/TLS using an Ingress with certificate management:
|
||||
- Configure cert-manager for automatic TLS certificate renewal
|
||||
- Update ingress values with your domain and certificate issuer
|
||||
|
||||
6. Review security contexts and network policies:
|
||||
5. Review security contexts and network policies:
|
||||
- All containers run as non-root
|
||||
- Implement network policies to restrict traffic between components
|
||||
- Consider pod security policies or security standards for your cluster
|
||||
|
||||
@@ -118,8 +118,54 @@ postgres://{{ .Values.postgresql.auth.username }}:$(POSTGRES_PASSWORD)@{{ includ
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Server URL (for agents)
|
||||
Server URL (for agents). HTTPS-only as of v2.2 — see docs/tls.md.
|
||||
*/}}
|
||||
{{- define "certctl.serverURL" -}}
|
||||
http://{{ include "certctl.fullname" . }}-server:{{ .Values.server.service.port }}
|
||||
https://{{ include "certctl.fullname" . }}-server:{{ .Values.server.service.port }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
TLS Secret name resolver.
|
||||
|
||||
Operator-facing precedence:
|
||||
1. server.tls.existingSecret — operator points at a pre-existing kubernetes.io/tls Secret
|
||||
2. server.tls.certManager.secretName — explicit secret name for the cert-manager Certificate CR
|
||||
3. "<fullname>-tls" — default when cert-manager is enabled but secretName is blank
|
||||
|
||||
Never emits an empty string — that case is already excluded by certctl.tls.required below,
|
||||
which must be invoked by any template that depends on the resolved secret name.
|
||||
*/}}
|
||||
{{- define "certctl.tls.secretName" -}}
|
||||
{{- if .Values.server.tls.existingSecret -}}
|
||||
{{- .Values.server.tls.existingSecret -}}
|
||||
{{- else if .Values.server.tls.certManager.secretName -}}
|
||||
{{- .Values.server.tls.certManager.secretName -}}
|
||||
{{- else -}}
|
||||
{{- printf "%s-tls" (include "certctl.fullname" .) -}}
|
||||
{{- end -}}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
TLS configuration gate.
|
||||
|
||||
HTTPS is the only supported listener mode (v2.2+). The server refuses to start
|
||||
without a cert/key pair mounted at server.tls.mountPath, so `helm template` /
|
||||
`helm install` must fail loudly at render-time rather than shipping a broken
|
||||
Deployment that crash-loops with "tls config required".
|
||||
|
||||
Operators MUST configure EXACTLY ONE of:
|
||||
(a) server.tls.existingSecret: <name-of-kubernetes.io/tls-secret>
|
||||
(b) server.tls.certManager.enabled: true (+ issuerRef.name populated)
|
||||
|
||||
Any template that mounts the TLS Secret must call
|
||||
`{{ include "certctl.tls.required" . }}` at the top so this guard runs once
|
||||
per affected resource. No-op when configured correctly.
|
||||
*/}}
|
||||
{{- define "certctl.tls.required" -}}
|
||||
{{- if and (not .Values.server.tls.existingSecret) (not .Values.server.tls.certManager.enabled) -}}
|
||||
{{- fail "\n\ncertctl refuses to start without TLS.\n\nSet EXACTLY ONE of:\n --set server.tls.existingSecret=<your-kubernetes.io/tls-secret-name>\nOR\n --set server.tls.certManager.enabled=true \\\n --set server.tls.certManager.issuerRef.name=<your-issuer-or-clusterissuer>\n\nSee docs/tls.md for the full setup walkthrough, including bootstrap\nguidance for air-gapped clusters without cert-manager.\n" -}}
|
||||
{{- end -}}
|
||||
{{- if and .Values.server.tls.certManager.enabled (not .Values.server.tls.certManager.issuerRef.name) -}}
|
||||
{{- fail "\n\nserver.tls.certManager.enabled=true but server.tls.certManager.issuerRef.name is empty.\n\nSet:\n --set server.tls.certManager.issuerRef.name=<your-issuer-or-clusterissuer>\n\nSee docs/tls.md.\n" -}}
|
||||
{{- end -}}
|
||||
{{- end }}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
{{- if .Values.agent.enabled }}
|
||||
{{- include "certctl.tls.required" . }}
|
||||
{{- if eq .Values.agent.kind "DaemonSet" }}
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
@@ -53,6 +54,8 @@ spec:
|
||||
fieldPath: metadata.name
|
||||
- name: CERTCTL_KEY_DIR
|
||||
value: {{ .Values.agent.keyDir }}
|
||||
- name: CERTCTL_SERVER_CA_BUNDLE_PATH
|
||||
value: "{{ .Values.server.tls.mountPath }}/ca.crt"
|
||||
{{- if .Values.agent.discoveryDirs }}
|
||||
- name: CERTCTL_DISCOVERY_DIRS
|
||||
valueFrom:
|
||||
@@ -70,12 +73,19 @@ spec:
|
||||
mountPath: {{ .Values.agent.keyDir }}
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: server-tls
|
||||
mountPath: {{ .Values.server.tls.mountPath }}
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: agent-keys
|
||||
emptyDir:
|
||||
sizeLimit: 1Gi
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: server-tls
|
||||
secret:
|
||||
secretName: {{ include "certctl.tls.secretName" . }}
|
||||
defaultMode: 0400
|
||||
{{- else if eq .Values.agent.kind "Deployment" }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
@@ -135,6 +145,8 @@ spec:
|
||||
{{- end }}
|
||||
- name: CERTCTL_KEY_DIR
|
||||
value: {{ .Values.agent.keyDir }}
|
||||
- name: CERTCTL_SERVER_CA_BUNDLE_PATH
|
||||
value: "{{ .Values.server.tls.mountPath }}/ca.crt"
|
||||
{{- if .Values.agent.discoveryDirs }}
|
||||
- name: CERTCTL_DISCOVERY_DIRS
|
||||
valueFrom:
|
||||
@@ -152,11 +164,18 @@ spec:
|
||||
mountPath: {{ .Values.agent.keyDir }}
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: server-tls
|
||||
mountPath: {{ .Values.server.tls.mountPath }}
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: agent-keys
|
||||
emptyDir:
|
||||
sizeLimit: 1Gi
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: server-tls
|
||||
secret:
|
||||
secretName: {{ include "certctl.tls.secretName" . }}
|
||||
defaultMode: 0400
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@@ -1,14 +1,24 @@
|
||||
{{- if .Values.ingress.enabled }}
|
||||
{{- if and .Values.ingress.certManager.enabled (not .Values.ingress.certManager.issuerRef.name) -}}
|
||||
{{- fail "\n\ningress.certManager.enabled=true but ingress.certManager.issuerRef.name is empty.\n\nSet:\n --set ingress.certManager.issuerRef.name=<your-issuer-or-clusterissuer>\n\nThis is separate from server.tls.certManager — it issues the external-facing\nIngress cert, not the in-cluster server TLS cert. See docs/tls.md.\n" -}}
|
||||
{{- end -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- if .Values.ingress.certManager.enabled }}
|
||||
{{- if eq .Values.ingress.certManager.issuerRef.kind "ClusterIssuer" }}
|
||||
cert-manager.io/cluster-issuer: {{ .Values.ingress.certManager.issuerRef.name | quote }}
|
||||
{{- else }}
|
||||
cert-manager.io/issuer: {{ .Values.ingress.certManager.issuerRef.name | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if .Values.ingress.className }}
|
||||
ingressClassName: {{ .Values.ingress.className }}
|
||||
@@ -33,7 +43,7 @@ spec:
|
||||
pathType: {{ .pathType }}
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "certctl.fullname" . }}-server
|
||||
name: {{ include "certctl.fullname" $ }}-server
|
||||
port:
|
||||
number: {{ $.Values.server.service.port }}
|
||||
{{- end }}
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
{{- if .Values.server.tls.certManager.enabled }}
|
||||
{{- include "certctl.tls.required" . }}
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: {{ include "certctl.fullname" . }}-server-tls
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
app.kubernetes.io/component: server
|
||||
spec:
|
||||
secretName: {{ include "certctl.tls.secretName" . }}
|
||||
commonName: {{ .Values.server.tls.certManager.commonName | quote }}
|
||||
dnsNames:
|
||||
{{- range .Values.server.tls.certManager.dnsNames }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
duration: {{ .Values.server.tls.certManager.duration }}
|
||||
renewBefore: {{ .Values.server.tls.certManager.renewBefore }}
|
||||
usages:
|
||||
- server auth
|
||||
- digital signature
|
||||
- key encipherment
|
||||
privateKey:
|
||||
algorithm: ECDSA
|
||||
size: 256
|
||||
rotationPolicy: Always
|
||||
issuerRef:
|
||||
name: {{ .Values.server.tls.certManager.issuerRef.name | quote }}
|
||||
kind: {{ .Values.server.tls.certManager.issuerRef.kind }}
|
||||
group: {{ .Values.server.tls.certManager.issuerRef.group }}
|
||||
{{- end }}
|
||||
@@ -1,3 +1,4 @@
|
||||
{{- include "certctl.tls.required" . }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
@@ -32,7 +33,7 @@ spec:
|
||||
image: {{ include "certctl.serverImage" . }}
|
||||
imagePullPolicy: {{ .Values.server.image.pullPolicy }}
|
||||
ports:
|
||||
- name: http
|
||||
- name: https
|
||||
containerPort: {{ .Values.server.port }}
|
||||
protocol: TCP
|
||||
env:
|
||||
@@ -40,6 +41,10 @@ spec:
|
||||
value: "0.0.0.0"
|
||||
- name: CERTCTL_SERVER_PORT
|
||||
value: "{{ .Values.server.port }}"
|
||||
- name: CERTCTL_SERVER_TLS_CERT_PATH
|
||||
value: "{{ .Values.server.tls.mountPath }}/tls.crt"
|
||||
- name: CERTCTL_SERVER_TLS_KEY_PATH
|
||||
value: "{{ .Values.server.tls.mountPath }}/tls.key"
|
||||
- name: CERTCTL_DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
@@ -172,12 +177,19 @@ spec:
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: tls
|
||||
mountPath: {{ .Values.server.tls.mountPath }}
|
||||
readOnly: true
|
||||
{{- if .Values.server.volumeMounts }}
|
||||
{{- toYaml .Values.server.volumeMounts | nindent 12 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: tls
|
||||
secret:
|
||||
secretName: {{ include "certctl.tls.secretName" . }}
|
||||
defaultMode: 0400
|
||||
{{- if .Values.server.volumes }}
|
||||
{{- toYaml .Values.server.volumes | nindent 8 }}
|
||||
{{- end }}
|
||||
|
||||
@@ -13,8 +13,8 @@ spec:
|
||||
type: {{ .Values.server.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.server.service.port }}
|
||||
targetPort: http
|
||||
targetPort: https
|
||||
protocol: TCP
|
||||
name: http
|
||||
name: https
|
||||
selector:
|
||||
{{- include "certctl.serverSelectorLabels" . | nindent 4 }}
|
||||
|
||||
@@ -18,7 +18,14 @@ metadata:
|
||||
name: {{ include "certctl.fullname" . }}
|
||||
labels:
|
||||
{{- include "certctl.labels" . | nindent 4 }}
|
||||
rules: []
|
||||
rules:
|
||||
{{- if .Values.kubernetesSecrets.enabled }}
|
||||
- apiGroups: [""]
|
||||
resources: ["secrets"]
|
||||
verbs: ["get", "list", "create", "update", "patch"]
|
||||
{{- else }}
|
||||
[]
|
||||
{{- end }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
|
||||
@@ -48,11 +48,12 @@ server:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
# Liveness and readiness probes
|
||||
# Liveness and readiness probes (HTTPS-only as of v2.2)
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
port: https
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
@@ -61,12 +62,50 @@ server:
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readyz
|
||||
port: http
|
||||
port: https
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 2
|
||||
|
||||
# TLS configuration — REQUIRED. HTTPS is the only supported mode (v2.2+).
|
||||
# Operator must configure EXACTLY ONE of:
|
||||
# (a) server.tls.existingSecret: <name> # pre-existing kubernetes.io/tls Secret
|
||||
# (b) server.tls.certManager.enabled: true # provision a cert-manager Certificate CR
|
||||
# Refusing to set either makes `helm template` fail with a diagnostic pointing at docs/tls.md.
|
||||
tls:
|
||||
# Name of a pre-existing Secret (type kubernetes.io/tls) holding tls.crt + tls.key (+ optional ca.crt).
|
||||
# Leave empty to fall through to the cert-manager path.
|
||||
existingSecret: ""
|
||||
|
||||
# Mount path for the TLS Secret inside the server + agent containers.
|
||||
mountPath: /etc/certctl/tls
|
||||
|
||||
# cert-manager auto-provisioning. Opt-in (off by default per milestone §3.4).
|
||||
certManager:
|
||||
enabled: false
|
||||
|
||||
# Secret name the cert-manager Certificate CR writes into. Agents and the server
|
||||
# both read from this Secret. If empty, defaults to "<fullname>-tls".
|
||||
secretName: ""
|
||||
|
||||
# Cert-manager issuer reference.
|
||||
issuerRef:
|
||||
name: "" # e.g. "letsencrypt-prod" or "internal-ca"
|
||||
kind: ClusterIssuer # ClusterIssuer or Issuer
|
||||
group: cert-manager.io
|
||||
|
||||
# Subject fields on the issued cert.
|
||||
commonName: "certctl-server"
|
||||
dnsNames:
|
||||
- certctl-server
|
||||
- localhost
|
||||
|
||||
# Certificate lifetime + renewal window.
|
||||
duration: 2160h # 90 days
|
||||
renewBefore: 360h # 15 days
|
||||
|
||||
# Service type (ClusterIP, LoadBalancer, NodePort)
|
||||
service:
|
||||
type: ClusterIP
|
||||
@@ -356,7 +395,16 @@ ingress:
|
||||
className: ""
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
|
||||
# Optional cert-manager integration for the public-facing Ingress cert.
|
||||
# This is completely independent of server.tls.* — the Ingress terminates
|
||||
# an *additional* TLS hop between the internet and the in-cluster Service.
|
||||
# Leave disabled unless an Ingress is exposing certctl to the outside world.
|
||||
certManager:
|
||||
enabled: false
|
||||
issuerRef:
|
||||
name: "" # e.g. "letsencrypt-prod"
|
||||
kind: ClusterIssuer # ClusterIssuer or Issuer
|
||||
hosts:
|
||||
- host: certctl.local
|
||||
paths:
|
||||
@@ -381,6 +429,13 @@ serviceAccount:
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
# ==============================================================================
|
||||
# Kubernetes Secrets Target Connector
|
||||
# ==============================================================================
|
||||
kubernetesSecrets:
|
||||
# Enable RBAC rules for managing TLS Secrets
|
||||
enabled: false
|
||||
|
||||
# ==============================================================================
|
||||
# Pod Disruption Budget (for HA deployments)
|
||||
# ==============================================================================
|
||||
|
||||
+364
-23
@@ -47,11 +47,30 @@ func envOr(key, fallback string) string {
|
||||
return fallback
|
||||
}
|
||||
|
||||
// HTTPS-Everywhere Phase 6: the test harness now dials the server over TLS and
|
||||
// validates the self-signed cert against the init-container-generated CA bundle
|
||||
// bind-mounted at ./test/certs/ca.crt. The defaults assume the compose setup in
|
||||
// deploy/docker-compose.test.yml; override via the usual env vars when pointing
|
||||
// the suite at a different deployment.
|
||||
//
|
||||
// - CERTCTL_TEST_SERVER_URL — must be https:// for the Phase 6 wiring
|
||||
// - CERTCTL_TEST_CA_BUNDLE — PEM bundle; must contain the server's issuing
|
||||
// CA (self-signed in the compose setup, so server.crt doubles as ca.crt)
|
||||
// - CERTCTL_TEST_INSECURE — set to "true" to fall back to
|
||||
// InsecureSkipVerify when the CA bundle path is unavailable (CI smoke or
|
||||
// exploratory runs only — CI-parity runs MUST use the pinned bundle).
|
||||
//
|
||||
// Under no circumstance does the suite silently downgrade to plaintext HTTP:
|
||||
// Phase 5 (#203) pre-flight guards in cmd/server will refuse to start with an
|
||||
// http:// URL anyway, so a misconfiguration fails loud at test-harness startup
|
||||
// rather than flaking mid-suite.
|
||||
var (
|
||||
serverURL = envOr("CERTCTL_TEST_SERVER_URL", "http://localhost:8443")
|
||||
apiKey = envOr("CERTCTL_TEST_API_KEY", "test-key-2026")
|
||||
dbURL = envOr("CERTCTL_TEST_DB_URL", "postgres://certctl:testpass@localhost:5432/certctl?sslmode=disable")
|
||||
nginxTLS = envOr("CERTCTL_TEST_NGINX_TLS", "localhost:8444")
|
||||
serverURL = envOr("CERTCTL_TEST_SERVER_URL", "https://localhost:8443")
|
||||
apiKey = envOr("CERTCTL_TEST_API_KEY", "test-key-2026")
|
||||
dbURL = envOr("CERTCTL_TEST_DB_URL", "postgres://certctl:testpass@localhost:5432/certctl?sslmode=disable")
|
||||
nginxTLS = envOr("CERTCTL_TEST_NGINX_TLS", "localhost:8444")
|
||||
caBundlePath = envOr("CERTCTL_TEST_CA_BUNDLE", "./certs/ca.crt")
|
||||
insecureTLS = strings.EqualFold(os.Getenv("CERTCTL_TEST_INSECURE"), "true")
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -75,16 +94,74 @@ type testClient struct {
|
||||
apiKey string
|
||||
}
|
||||
|
||||
// buildTLSConfig wires up the x509.CertPool with the self-signed CA bundle
|
||||
// emitted by the certctl-tls-init container. Panics via t.Fatal on the happy
|
||||
// path if both CERTCTL_TEST_CA_BUNDLE is unreadable *and* CERTCTL_TEST_INSECURE
|
||||
// is not set — that combination is almost always a misconfigured test harness
|
||||
// and silently downgrading to InsecureSkipVerify would hide real failures.
|
||||
//
|
||||
// MinVersion is pinned to TLS 1.3 so this matches what cmd/server negotiates
|
||||
// by default; a drift there would surface here first.
|
||||
func buildTLSConfig() *tls.Config {
|
||||
cfg := &tls.Config{
|
||||
MinVersion: tls.VersionTLS13,
|
||||
}
|
||||
if insecureTLS {
|
||||
// Opt-in smoke-run mode; log but don't fail so operators running
|
||||
// `CERTCTL_TEST_INSECURE=true go test -tags integration ./deploy/test/...`
|
||||
// against an ad-hoc environment still get a green suite when the server
|
||||
// is reachable. CI must not set this.
|
||||
cfg.InsecureSkipVerify = true
|
||||
return cfg
|
||||
}
|
||||
pem, err := os.ReadFile(caBundlePath)
|
||||
if err != nil {
|
||||
// Can't use t.Fatal here (called from package-level helpers); fall
|
||||
// back to a panic so the harness dies loud at the first HTTP call.
|
||||
// Operators see a clear "CA bundle missing" message and fix their
|
||||
// setup instead of chasing a confusing TLS handshake error.
|
||||
panic(fmt.Sprintf("integration test: read CA bundle %q: %v — "+
|
||||
"run `docker compose -f deploy/docker-compose.test.yml up` first, or "+
|
||||
"set CERTCTL_TEST_CA_BUNDLE to a valid PEM path, or "+
|
||||
"set CERTCTL_TEST_INSECURE=true for a smoke run", caBundlePath, err))
|
||||
}
|
||||
pool := x509.NewCertPool()
|
||||
if !pool.AppendCertsFromPEM(pem) {
|
||||
panic(fmt.Sprintf("integration test: no PEM certificates parsed from %q", caBundlePath))
|
||||
}
|
||||
cfg.RootCAs = pool
|
||||
return cfg
|
||||
}
|
||||
|
||||
// newTestClient builds a Bearer-authenticated HTTPS client pinned to the
|
||||
// init-container CA. Every phase uses this for REST calls.
|
||||
func newTestClient() *testClient {
|
||||
return &testClient{
|
||||
http: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: buildTLSConfig(),
|
||||
},
|
||||
},
|
||||
baseURL: serverURL,
|
||||
apiKey: apiKey,
|
||||
}
|
||||
}
|
||||
|
||||
// newUnauthHTTPClient returns an *http.Client with the same TLS configuration
|
||||
// but no Bearer token. Used for the Phase 7 RFC 5280 CRL / RFC 8615
|
||||
// `/.well-known/pki/*` probes — those endpoints must be reachable by
|
||||
// *unauthenticated* relying parties per M-006, so we explicitly omit the
|
||||
// Authorization header to prove it.
|
||||
func newUnauthHTTPClient() *http.Client {
|
||||
return &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: buildTLSConfig(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (c *testClient) do(method, path string, body io.Reader) (*http.Response, error) {
|
||||
url := c.baseURL + path
|
||||
req, err := http.NewRequest(method, url, body)
|
||||
@@ -195,16 +272,11 @@ type metricsResponse struct {
|
||||
Uptime float64 `json:"uptime_seconds"`
|
||||
}
|
||||
|
||||
// crlResponse for the CRL endpoint.
|
||||
type crlResponse struct {
|
||||
Version int `json:"version"`
|
||||
Total int `json:"total"`
|
||||
Entries []struct {
|
||||
Serial string `json:"serial_number"`
|
||||
Reason string `json:"reason"`
|
||||
RevokedAt string `json:"revoked_at"`
|
||||
} `json:"entries"`
|
||||
}
|
||||
// M-006: The non-standard JSON CRL endpoint (`GET /api/v1/crl`) was removed.
|
||||
// RFC 5280 §5 defines only the DER wire format, which is now served
|
||||
// unauthenticated at `/.well-known/pki/crl/{issuer_id}` per RFC 8615.
|
||||
// The `crlResponse` Go struct that used to decode the JSON envelope is gone;
|
||||
// Phase 7 parses the DER bytes directly via `x509.ParseRevocationList`.
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PostgreSQL test helper
|
||||
@@ -728,18 +800,48 @@ func TestIntegrationSuite(t *testing.T) {
|
||||
t.Fatalf("revocation response unexpected: %s", body)
|
||||
}
|
||||
|
||||
// Check CRL
|
||||
t.Run("CRL", func(t *testing.T) {
|
||||
resp, err := c.Get("/api/v1/crl")
|
||||
// Check DER CRL served unauthenticated under /.well-known/pki/ per
|
||||
// RFC 5280 §5 + RFC 8615 (M-006). Use newUnauthHTTPClient() — no
|
||||
// Bearer token — to prove the endpoint is reachable by relying
|
||||
// parties that have no certctl API credentials. Post HTTPS-Everywhere
|
||||
// (M-007, Phase 6) the client still speaks TLS 1.3 against the pinned
|
||||
// CA bundle from ./certs/ca.crt; we just skip the Authorization header
|
||||
// to exercise the unauthenticated RFC 5280 / RFC 8615 relying-party
|
||||
// path. Switching from the stdlib http.DefaultClient (plaintext OK,
|
||||
// system trust store only) to the helper keeps the no-auth semantic
|
||||
// while preventing silent plaintext downgrade — the whole point of
|
||||
// this milestone.
|
||||
t.Run("CRL_DER_Unauthenticated", func(t *testing.T) {
|
||||
resp, err := newUnauthHTTPClient().Get(serverURL + "/.well-known/pki/crl/iss-local")
|
||||
if err != nil {
|
||||
t.Fatalf("GET CRL: %v", err)
|
||||
t.Fatalf("GET DER CRL: %v", err)
|
||||
}
|
||||
var crl crlResponse
|
||||
if err := decodeJSON(resp, &crl); err != nil {
|
||||
t.Fatalf("decode CRL: %v", err)
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
t.Fatalf("unexpected status: got %d, want 200 (body=%s)", resp.StatusCode, string(body))
|
||||
}
|
||||
if crl.Total < 1 {
|
||||
t.Fatalf("CRL total: got %d, want >= 1", crl.Total)
|
||||
if ct := resp.Header.Get("Content-Type"); ct != "application/pkix-crl" {
|
||||
t.Errorf("Content-Type: got %q, want %q", ct, "application/pkix-crl")
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("read CRL body: %v", err)
|
||||
}
|
||||
if len(body) == 0 {
|
||||
t.Fatal("CRL body empty")
|
||||
}
|
||||
|
||||
// Parse the DER bytes as an X.509 CRL (RFC 5280) and verify the
|
||||
// just-revoked certificate is listed.
|
||||
crl, err := x509.ParseRevocationList(body)
|
||||
if err != nil {
|
||||
t.Fatalf("parse DER CRL: %v", err)
|
||||
}
|
||||
if len(crl.RevokedCertificateEntries) < 1 {
|
||||
t.Fatalf("CRL entries: got %d, want >= 1", len(crl.RevokedCertificateEntries))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -1123,4 +1225,243 @@ func TestIntegrationSuite(t *testing.T) {
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Phase 13: I-005 Phase 1 Red — Notification Retry + Dead Letter Queue (E2E)
|
||||
//
|
||||
// Pins the full retry-loop contract end-to-end. Phase 2 Green must turn
|
||||
// every subtest Green with a single coherent change set (migration 000016
|
||||
// live, scheduler notificationRetryLoop wired as the 11th loop bumping
|
||||
// the total from 10 → 11, service RetryFailedNotifications + MarkAsDead +
|
||||
// RequeueNotification implemented, handler POST
|
||||
// /api/v1/notifications/{id}/requeue routed, list handler parsing the
|
||||
// status query param).
|
||||
//
|
||||
// Subtests:
|
||||
//
|
||||
// 1. MarkAsDead_OnMaxAttempts — a notification seeded at retry_count=4
|
||||
// (one failure shy of the max_attempts=5 gate) with next_retry_at in
|
||||
// the past is promoted to status='dead' on the first retry-loop
|
||||
// tick. The pre-increment arithmetic `retry_count + 1 = 5 =
|
||||
// max_attempts` triggers MarkAsDead instead of scheduling another
|
||||
// retry.
|
||||
//
|
||||
// 2. Requeue_FlipsDeadToPending — POST
|
||||
// /api/v1/notifications/{id}/requeue on a dead row flips status back
|
||||
// to 'pending', resets retry_count to 0, and clears next_retry_at
|
||||
// so the existing ProcessPendingNotifications loop (not the retry
|
||||
// sweep) picks it up on its next tick.
|
||||
//
|
||||
// 3. ListFilter_StatusDead — GET /api/v1/notifications?status=dead
|
||||
// returns only rows in status='dead' so the UI's Dead Letter tab
|
||||
// (web/src/pages/NotificationsPage.test.tsx subtest #1) can isolate
|
||||
// them without client-side filtering.
|
||||
//
|
||||
// Red behavior at HEAD (what Phase 2 Green must flip):
|
||||
//
|
||||
// * Schema: the INSERTs reference retry_count, next_retry_at,
|
||||
// last_error. Migration 000016 is already written (file (a) of
|
||||
// Phase 1 Red) but until it is applied the INSERTs fail with
|
||||
// "column does not exist" — schema-level Red halt.
|
||||
//
|
||||
// * Subtest 1: no retry loop exists at HEAD. The seeded row stays at
|
||||
// status='failed' retry_count=4 forever. The 4-minute waitFor
|
||||
// therefore times out.
|
||||
//
|
||||
// * Subtest 2: /notifications/{id}/requeue is not routed at HEAD
|
||||
// (internal/api/handler/notifications.go registers only list / get /
|
||||
// mark-read). The POST returns 404.
|
||||
//
|
||||
// * Subtest 3: the list handler does not parse the status query param
|
||||
// at HEAD. The response includes rows of every status, so the
|
||||
// "leaked non-dead row" assertion fires.
|
||||
// -----------------------------------------------------------------------
|
||||
t.Run("Phase13_NotificationRetryDLQ", func(t *testing.T) {
|
||||
// Unreachable endpoint so every webhook delivery attempt fails
|
||||
// deterministically — port 1 is never bound. Pinning retry_count=4
|
||||
// + a guaranteed-failing channel is what turns the seeded row into
|
||||
// 'dead' on the very next scheduler tick (one delivery attempt,
|
||||
// retry_count 4→5, crosses max_attempts=5 → MarkAsDead).
|
||||
const blackHole = "http://127.0.0.1:1/i005-red-black-hole"
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Subtest 1: failed → dead transition after one retry-loop tick
|
||||
// ---------------------------------------------------------------
|
||||
t.Run("MarkAsDead_OnMaxAttempts", func(t *testing.T) {
|
||||
id := fmt.Sprintf("notif-i005-dead-%d", time.Now().UnixNano())
|
||||
|
||||
// retry_count=4 + next attempt = 5 = max_attempts → MarkAsDead.
|
||||
// next_retry_at is backdated so the row is immediately eligible
|
||||
// for the retry sweep rather than having to wait for its own
|
||||
// backoff to elapse.
|
||||
past := time.Now().Add(-30 * time.Second).UTC()
|
||||
db.Exec(t, `
|
||||
INSERT INTO notification_events
|
||||
(id, type, channel, recipient, message, status,
|
||||
retry_count, next_retry_at, last_error)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
`,
|
||||
id, "ExpirationWarning", "Webhook", blackHole,
|
||||
"I-005 integration: DLQ promotion on max_attempts",
|
||||
"failed", 4, past, "transient webhook 500",
|
||||
)
|
||||
|
||||
// Give the retry sweep up to 4m to tick at least once (default
|
||||
// 2m interval + seed/sweep/notifier slop). On success the row
|
||||
// carries status='dead' and retry_count has advanced to 5.
|
||||
waitFor(t, "notification transitions to dead", 4*time.Minute, 5*time.Second,
|
||||
func() (bool, error) {
|
||||
var status string
|
||||
var retry int
|
||||
err := db.db.QueryRow(
|
||||
"SELECT status, retry_count FROM notification_events WHERE id = $1",
|
||||
id,
|
||||
).Scan(&status, &retry)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return strings.EqualFold(status, "dead") && retry >= 5, nil
|
||||
})
|
||||
|
||||
// The dead-letter tab is only useful if operators can see why
|
||||
// the row died. MarkAsDead must preserve the most recent
|
||||
// failure string in last_error rather than nil'ing it.
|
||||
var lastErr sql.NullString
|
||||
if err := db.db.QueryRow(
|
||||
"SELECT last_error FROM notification_events WHERE id = $1", id,
|
||||
).Scan(&lastErr); err != nil {
|
||||
t.Fatalf("read last_error: %v", err)
|
||||
}
|
||||
if !lastErr.Valid || lastErr.String == "" {
|
||||
t.Errorf("dead notification %s has empty last_error — "+
|
||||
"retry loop must preserve the most recent failure", id)
|
||||
}
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Subtest 2: dead → pending via manual Requeue endpoint
|
||||
// ---------------------------------------------------------------
|
||||
t.Run("Requeue_FlipsDeadToPending", func(t *testing.T) {
|
||||
id := fmt.Sprintf("notif-i005-requeue-%d", time.Now().UnixNano())
|
||||
|
||||
// Seed directly at status='dead' rather than waiting for a
|
||||
// scheduler tick — this subtest isolates the requeue handler,
|
||||
// not the retry loop (subtest 1 already pins that).
|
||||
past := time.Now().Add(-10 * time.Minute).UTC()
|
||||
db.Exec(t, `
|
||||
INSERT INTO notification_events
|
||||
(id, type, channel, recipient, message, status,
|
||||
retry_count, next_retry_at, last_error)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
`,
|
||||
id, "ExpirationWarning", "Webhook", blackHole,
|
||||
"I-005 integration: manual requeue",
|
||||
"dead", 5, past, "max attempts reached",
|
||||
)
|
||||
|
||||
resp, err := c.Post("/api/v1/notifications/"+id+"/requeue", "")
|
||||
if err != nil {
|
||||
t.Fatalf("POST requeue: %v", err)
|
||||
}
|
||||
body := readBody(resp)
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("requeue status %d, want 200 (body: %s)",
|
||||
resp.StatusCode, body)
|
||||
}
|
||||
// Phase 2 Green handler responds with {"status":"requeued"}
|
||||
// to mirror MarkAsRead's {"status":"marked_as_read"} envelope.
|
||||
if !strings.Contains(body, "requeued") {
|
||||
t.Errorf("requeue body missing 'requeued' marker: %s", body)
|
||||
}
|
||||
|
||||
// DB must reflect the full flip: pending status, reset counter,
|
||||
// cleared next_retry_at. Clearing next_retry_at is what moves
|
||||
// the row out of the retry-sweep partial index and back under
|
||||
// ProcessPendingNotifications.
|
||||
var status string
|
||||
var retry int
|
||||
var nextRetry sql.NullTime
|
||||
if err := db.db.QueryRow(`
|
||||
SELECT status, retry_count, next_retry_at
|
||||
FROM notification_events WHERE id = $1
|
||||
`, id).Scan(&status, &retry, &nextRetry); err != nil {
|
||||
t.Fatalf("read requeued row: %v", err)
|
||||
}
|
||||
if !strings.EqualFold(status, "pending") {
|
||||
t.Errorf("after requeue: status=%q, want 'pending'", status)
|
||||
}
|
||||
if retry != 0 {
|
||||
t.Errorf("after requeue: retry_count=%d, want 0", retry)
|
||||
}
|
||||
if nextRetry.Valid {
|
||||
t.Errorf("after requeue: next_retry_at=%v, want NULL",
|
||||
nextRetry.Time)
|
||||
}
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Subtest 3: GET /notifications?status=dead isolates DLQ rows
|
||||
// ---------------------------------------------------------------
|
||||
t.Run("ListFilter_StatusDead", func(t *testing.T) {
|
||||
suffix := fmt.Sprintf("%d", time.Now().UnixNano())
|
||||
deadID := "notif-i005-filter-dead-" + suffix
|
||||
pendingID := "notif-i005-filter-pending-" + suffix
|
||||
|
||||
// One row at each end of the lifecycle so we can prove the
|
||||
// filter both matches and excludes.
|
||||
db.Exec(t, `
|
||||
INSERT INTO notification_events
|
||||
(id, type, channel, recipient, message, status, retry_count)
|
||||
VALUES ($1, 'ExpirationWarning', 'Webhook', $2,
|
||||
'I-005 filter test: dead row', 'dead', 5)
|
||||
`, deadID, blackHole)
|
||||
db.Exec(t, `
|
||||
INSERT INTO notification_events
|
||||
(id, type, channel, recipient, message, status, retry_count)
|
||||
VALUES ($1, 'ExpirationWarning', 'Webhook', $2,
|
||||
'I-005 filter test: pending row', 'pending', 0)
|
||||
`, pendingID, blackHole)
|
||||
|
||||
// per_page large enough to rule out pagination artifacts as
|
||||
// the reason a seeded row might be missing from the response.
|
||||
resp, err := c.Get("/api/v1/notifications?status=dead&per_page=500")
|
||||
if err != nil {
|
||||
t.Fatalf("GET notifications?status=dead: %v", err)
|
||||
}
|
||||
var pr pagedResponse
|
||||
if err := decodeJSON(resp, &pr); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
|
||||
type row struct {
|
||||
ID string `json:"id"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
var rows []row
|
||||
if err := json.Unmarshal(pr.Data, &rows); err != nil {
|
||||
t.Fatalf("unmarshal rows: %v", err)
|
||||
}
|
||||
|
||||
var sawDead, sawPending bool
|
||||
for _, r := range rows {
|
||||
if r.ID == deadID {
|
||||
sawDead = true
|
||||
}
|
||||
if r.ID == pendingID {
|
||||
sawPending = true
|
||||
}
|
||||
if !strings.EqualFold(r.Status, "dead") {
|
||||
t.Errorf("status=dead filter leaked non-dead row: "+
|
||||
"id=%s status=%s", r.ID, r.Status)
|
||||
}
|
||||
}
|
||||
if !sawDead {
|
||||
t.Errorf("status=dead filter missed seeded dead row %s", deadID)
|
||||
}
|
||||
if sawPending {
|
||||
t.Errorf("status=dead filter leaked seeded pending row %s",
|
||||
pendingID)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
+45
-10
@@ -1,5 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# DEPRECATED — prefer `go test -tags integration ./deploy/test/...`
|
||||
# =============================================================================
|
||||
#
|
||||
# This bash harness predates the Go integration test suite in
|
||||
# deploy/test/integration_test.go (build tag `integration`, 34 subtests across
|
||||
# 13 phases — health, agent heartbeat, Local CA issuance, ACME, step-ca, EST,
|
||||
# S/MIME, discovery, network scan, revocation + CRL, deployment verification).
|
||||
# The Go suite uses crypto/x509, crypto/tls, and database/sql to parse certs,
|
||||
# probe TLS, and talk to PostgreSQL directly — no openssl text-scraping or
|
||||
# brittle curl pipelines. It is the authoritative integration test surface as
|
||||
# of milestone M-007 (HTTPS Everywhere, Phase 6), where the test compose
|
||||
# stack wires the server on https://localhost:8443 behind a pinned CA bundle
|
||||
# at ./certs/ca.crt.
|
||||
#
|
||||
# Run the Go suite:
|
||||
# (cd deploy && docker compose -f docker-compose.test.yml up -d --build)
|
||||
# go test -tags integration -v -count=1 ./deploy/test/...
|
||||
#
|
||||
# Keep this bash script around because:
|
||||
# * It is cited in docs/test-env.md and muscle-memory for contributors.
|
||||
# * It exercises the CLI / curl path end-to-end (a different failure mode
|
||||
# than the Go HTTP client path).
|
||||
# But any NEW integration coverage goes in integration_test.go — not here.
|
||||
#
|
||||
# =============================================================================
|
||||
# certctl End-to-End Test Script
|
||||
# =============================================================================
|
||||
#
|
||||
@@ -32,10 +57,11 @@ set -euo pipefail
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
COMPOSE_FILE="docker-compose.test.yml"
|
||||
API_URL="http://localhost:8443"
|
||||
API_URL="https://localhost:8443"
|
||||
API_KEY="test-key-2026"
|
||||
NGINX_TLS="localhost:8444"
|
||||
AUTH_HEADER="Authorization: Bearer ${API_KEY}"
|
||||
CACERT="./certs/ca.crt"
|
||||
|
||||
# Flags
|
||||
BUILD=true
|
||||
@@ -91,7 +117,7 @@ header() {
|
||||
# API helper: GET endpoint, return JSON body. Exits 1 on HTTP error.
|
||||
api_get() {
|
||||
local path="$1"
|
||||
curl -sf -H "${AUTH_HEADER}" "${API_URL}${path}" 2>/dev/null
|
||||
curl -sf --cacert "${CACERT}" -H "${AUTH_HEADER}" "${API_URL}${path}" 2>/dev/null
|
||||
}
|
||||
|
||||
# API helper: POST with optional JSON body
|
||||
@@ -99,10 +125,10 @@ api_post() {
|
||||
local path="$1"
|
||||
local body="${2:-}"
|
||||
if [ -n "$body" ]; then
|
||||
curl -sf -X POST -H "${AUTH_HEADER}" -H "Content-Type: application/json" \
|
||||
curl -sf --cacert "${CACERT}" -X POST -H "${AUTH_HEADER}" -H "Content-Type: application/json" \
|
||||
-d "$body" "${API_URL}${path}" 2>/dev/null
|
||||
else
|
||||
curl -sf -X POST -H "${AUTH_HEADER}" "${API_URL}${path}" 2>/dev/null
|
||||
curl -sf --cacert "${CACERT}" -X POST -H "${AUTH_HEADER}" "${API_URL}${path}" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -608,13 +634,22 @@ else
|
||||
fail "Revocation failed" "$REVOKE_RESP"
|
||||
fi
|
||||
|
||||
info "Checking CRL..."
|
||||
CRL_RESP=$(api_get "/api/v1/crl" 2>/dev/null || echo '{"total":0}')
|
||||
CRL_TOTAL=$(echo "$CRL_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('total',0))" 2>/dev/null || echo 0)
|
||||
if [ "$CRL_TOTAL" -ge 1 ]; then
|
||||
pass "CRL contains $CRL_TOTAL revoked certificate(s)"
|
||||
info "Checking DER CRL under /.well-known/pki (RFC 5280 §5, RFC 8615)..."
|
||||
# The JSON CRL endpoint (`GET /api/v1/crl`) was removed in M-006. RFC 5280
|
||||
# defines only the DER wire format, now served unauthenticated at
|
||||
# `/.well-known/pki/crl/{issuer_id}`. Fetch without the Bearer header to
|
||||
# prove the endpoint is reachable by relying parties with no API key.
|
||||
CRL_TMP=$(mktemp)
|
||||
CRL_HEADERS=$(mktemp)
|
||||
CRL_HTTP_CODE=$(curl -s -o "$CRL_TMP" -D "$CRL_HEADERS" -w "%{http_code}" "${API_URL}/.well-known/pki/crl/iss-local" 2>/dev/null || echo "000")
|
||||
CRL_SIZE=$(wc -c < "$CRL_TMP" | tr -d ' ')
|
||||
CRL_CONTENT_TYPE=$(awk 'tolower($1)=="content-type:" { sub(/\r$/,"",$2); print tolower($2) }' "$CRL_HEADERS" | head -n1)
|
||||
rm -f "$CRL_TMP" "$CRL_HEADERS"
|
||||
|
||||
if [ "$CRL_HTTP_CODE" = "200" ] && [ "$CRL_CONTENT_TYPE" = "application/pkix-crl" ] && [ "$CRL_SIZE" -gt 0 ]; then
|
||||
pass "DER CRL served unauthenticated (HTTP 200, Content-Type application/pkix-crl, ${CRL_SIZE} bytes)"
|
||||
else
|
||||
fail "CRL empty after revocation"
|
||||
fail "DER CRL fetch failed: HTTP=$CRL_HTTP_CODE Content-Type=$CRL_CONTENT_TYPE size=$CRL_SIZE"
|
||||
fi
|
||||
|
||||
CERT_STATUS=$(api_get "/api/v1/certificates/mc-local-test" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "unknown")
|
||||
|
||||
+221
-46
@@ -61,7 +61,7 @@ flowchart TB
|
||||
API["REST API\n(Go net/http, :8443)"]
|
||||
SVC["Service Layer"]
|
||||
REPO["Repository Layer\n(database/sql + lib/pq)"]
|
||||
SCHED["Background Scheduler\n7 loops"]
|
||||
SCHED["Background Scheduler\n8 always-on + 4 optional loops"]
|
||||
DASH["Web Dashboard\n(React SPA)"]
|
||||
end
|
||||
|
||||
@@ -82,6 +82,12 @@ flowchart TB
|
||||
CA4["OpenSSL / Custom CA\n(script-based)"]
|
||||
CA6["Vault PKI\n(token auth, /sign API)"]
|
||||
CA7["DigiCert CertCentral\n(async order model)"]
|
||||
CA8["Sectigo SCM\n(async order model)"]
|
||||
CA9["Google CAS\n(OAuth2, sync)"]
|
||||
CA10["AWS ACM PCA\n(sync issuance)"]
|
||||
CA11["Entrust\n(mTLS, sync/async)"]
|
||||
CA12["GlobalSign Atlas\n(mTLS + API key)"]
|
||||
CA13["EJBCA\n(mTLS or OAuth2)"]
|
||||
end
|
||||
|
||||
subgraph "Target Systems"
|
||||
@@ -92,8 +98,12 @@ flowchart TB
|
||||
T7["Caddy\n(admin API / file)"]
|
||||
T8["Envoy\n(file-based SDS)"]
|
||||
T9["Postfix/Dovecot\n(file + service reload)"]
|
||||
T2["F5 BIG-IP\n(proxy agent + iControl REST, planned)"]
|
||||
T2["F5 BIG-IP\n(proxy agent + iControl REST)"]
|
||||
T3["IIS\n(WinRM + local)"]
|
||||
T10["SSH\n(SFTP + reload)"]
|
||||
T11["WinCertStore\n(PowerShell import)"]
|
||||
T12["Java Keystore\n(keytool pipeline)"]
|
||||
T13["Kubernetes Secrets\n(K8s API)"]
|
||||
end
|
||||
|
||||
DASH --> API
|
||||
@@ -101,7 +111,7 @@ flowchart TB
|
||||
SVC --> REPO
|
||||
REPO --> PG
|
||||
SCHED --> SVC
|
||||
SVC -->|"Issue/Renew"| CA1 & CA2 & CA3 & CA4 & CA6 & CA7
|
||||
SVC -->|"Issue/Renew"| CA1 & CA2 & CA3 & CA4 & CA6 & CA7 & CA8 & CA9 & CA10
|
||||
|
||||
A1 & A2 & A3 -->|"CSR + Heartbeat"| API
|
||||
API -->|"Cert + Chain\n(NO private key)"| A1 & A2 & A3
|
||||
@@ -121,7 +131,7 @@ The server exposes a REST API under `/api/v1/` and optionally serves the web das
|
||||
|
||||
### Agents
|
||||
|
||||
Lightweight Go processes that run on or near your infrastructure. Agents generate ECDSA P-256 private keys locally, create CSRs, and submit them to the control plane for signing — private keys never leave agent infrastructure. Agents also handle certificate deployment to target systems (NGINX, Apache httpd, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS fully implemented; F5 BIG-IP interface stub only) and report job status. They communicate with the control plane via HTTP and authenticate with API keys.
|
||||
Lightweight Go processes that run on or near your infrastructure. Agents generate ECDSA P-256 private keys locally, create CSRs, and submit them to the control plane for signing — private keys never leave agent infrastructure. Agents also handle certificate deployment to target systems (NGINX, Apache httpd, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS, F5 BIG-IP, SSH, Windows Certificate Store, Java Keystore, Kubernetes Secrets) and report job status. They communicate with the control plane via HTTP and authenticate with API keys.
|
||||
|
||||
The agent runs two background loops: a heartbeat (every 60 seconds) to signal it's alive, and a work poll (every 30 seconds) to check for actionable jobs via `GET /api/v1/agents/{id}/work`. Jobs may be `AwaitingCSR` (agent needs to generate key + submit CSR) or `Deployment` (agent needs to deploy a certificate). Private keys are stored in `CERTCTL_KEY_DIR` (default `/var/lib/certctl/keys`) with 0600 permissions.
|
||||
|
||||
@@ -129,11 +139,21 @@ The agent runs two background loops: a heartbeat (every 60 seconds) to signal it
|
||||
|
||||
**Agent groups (M11b):** Dynamic device grouping allows organizing agents by metadata criteria. Agent groups can match by OS, architecture, IP CIDR, and version. Groups support both dynamic matching (agents automatically join when criteria match) and manual membership (explicit include/exclude). Renewal policies can be scoped to agent groups via the `agent_group_id` foreign key. The GUI provides full CRUD management for agent groups with visual match criteria badges.
|
||||
|
||||
**Agent soft-retirement (I-004):** `DELETE /api/v1/agents/{id}` is a soft-delete surface — the row is never removed. Retirement stamps `agents.retired_at` (TIMESTAMPTZ) and `agents.retired_reason` (TEXT) and flips the operational status to `Offline`. Default listings (`GET /api/v1/agents`, the dashboard stats counter, and the stale-offline sweeper) filter retired rows out via `AgentRepository.ListActive`; retired rows are surfaced only through the opt-in `GET /api/v1/agents/retired` view. The endpoint follows a preflight → block → escape-hatch contract:
|
||||
|
||||
- **Clean retire** (no active dependencies) — `200 OK` with `RetireAgentResponse` (`cascade=false`, zero counts).
|
||||
- **Blocked by active dependencies** — `409 Conflict` with `BlockedByDependenciesResponse`. The three counts (`active_targets`, `active_certificates`, `pending_jobs`) tell the operator exactly which rows would be orphaned. The schema diverges from `ErrorResponse` because downstream dashboards parse the stable three-key shape.
|
||||
- **Force cascade** — `DELETE /api/v1/agents/{id}?force=true&reason=...`. `reason` is required (400 otherwise). Transactionally soft-retires downstream `deployment_targets`, cancels pending jobs, and soft-retires the agent, emitting an `agent_retirement_cascaded` audit event with actor + reason + per-bucket counts.
|
||||
- **Idempotent re-retire** — a retire attempt against an already-retired agent returns `204 No Content` with an empty body (no second audit event, no response shape — callers that POST again on a retry get a clean no-op).
|
||||
- **Sentinel refusal** — the four sentinel agent IDs (`server-scanner`, `cloud-aws-sm`, `cloud-azure-kv`, `cloud-gcp-sm`) back non-agent discovery subsystems (the network scanner and the three cloud secret-manager sources). They are refused unconditionally — even with `force=true` — via `ErrAgentIsSentinel` → `403 Forbidden`. The ID list lives in `internal/domain/connector.go` (`SentinelAgentIDs`) so handler, repository, and scheduler code can filter them without importing `service`.
|
||||
|
||||
Retired agents receive `410 Gone` on subsequent heartbeats (`service.ErrAgentRetired`). `cmd/agent` treats 410 as a terminal signal and exits cleanly so retired agents stop phoning home. Migration `000015` flipped `deployment_targets.agent_id` from `ON DELETE CASCADE` to `ON DELETE RESTRICT`, making the old hard-delete path a schema error and forcing all retirement through this contract.
|
||||
|
||||
### Web Dashboard
|
||||
|
||||
The web dashboard is the primary operational interface for certctl. It is built with Vite + React + TypeScript and uses TanStack Query for server state management (caching, background refetching, optimistic updates).
|
||||
|
||||
**Current views** (21 pages): certificate inventory (list with multi-select bulk operations + "New Certificate" creation modal + detail with deployment status timeline, inline policy/profile editor, version history, deploy, revoke, archive, and trigger renewal actions), agent fleet (list + detail with system info + OS/architecture grouping with charts), job queue (status, retry, cancel, approve/reject for AwaitingApproval jobs), notification inbox (threshold alert grouping, mark-as-read), audit trail (time range, actor, action filters + CSV/JSON export), policy management (rules with enable/disable toggle + delete + violations), issuers (list with test connection + delete), targets (list with 3-step configuration wizard + delete), owners (list with team resolution + delete), teams (list with delete), agent groups (list with dynamic match criteria badges + enable/disable + delete), certificate profiles (list with crypto constraints), short-lived credentials dashboard (TTL countdown, profile filtering, auto-refresh), discovered certificates triage (claim/dismiss unmanaged certs discovered by agents or network scans), network scan targets management (CRUD for network scan targets + Scan Now button), summary dashboard with charts (expiration heatmap, renewal success rate, status distribution, issuance rate), and login page.
|
||||
**Current views** (24 pages): certificate inventory (list with multi-select bulk operations + "New Certificate" creation modal + detail with deployment status timeline, inline policy/profile editor, version history, deploy, revoke, archive, and trigger renewal actions), agent fleet (list + detail with system info + OS/architecture grouping with charts), job queue (list + detail with verification section, timeline, audit events; approve/reject for AwaitingApproval jobs), notification inbox (threshold alert grouping, mark-as-read), audit trail (time range, actor, action filters + CSV/JSON export), policy management (rules with enable/disable toggle + delete + violations), issuers (catalog with 10 type cards + 3-step create wizard + detail with test connection), targets (list with 3-step configuration wizard + detail with deployment history), owners (list with team resolution + delete), teams (list with delete), agent groups (list with dynamic match criteria badges + enable/disable + delete), certificate profiles (list with crypto constraints), short-lived credentials dashboard (TTL countdown, profile filtering, auto-refresh), discovered certificates triage (claim/dismiss unmanaged certs discovered by agents or network scans), network scan targets management (CRUD + Scan Now button), summary dashboard with charts (expiration heatmap, renewal success rate, status distribution, issuance rate), digest preview and send, observability (health, metrics, Prometheus config), and login page.
|
||||
|
||||
The dashboard includes an **ErrorBoundary component** for graceful error recovery — if a view crashes, the boundary catches the error and displays a user-friendly message instead of breaking the entire dashboard. It also includes a **demo mode** that activates when the API is unreachable — it renders realistic mock data for screenshots and offline presentations.
|
||||
|
||||
@@ -265,6 +285,9 @@ erDiagram
|
||||
text channel
|
||||
text recipient
|
||||
text status
|
||||
int retry_count
|
||||
timestamptz next_retry_at
|
||||
text last_error
|
||||
}
|
||||
certificate_profiles {
|
||||
text id PK
|
||||
@@ -386,7 +409,11 @@ sequenceDiagram
|
||||
Note over A: Agent deploys using locally-held private key
|
||||
```
|
||||
|
||||
**Profile enforcement:** If the certificate is assigned to a profile (`certificate_profile_id`), the profile's `allowed_key_algorithms` and `max_validity_days` constraints are checked during CSR validation. A CSR with a disallowed key type or a validity period exceeding the profile maximum is rejected before reaching the issuer connector.
|
||||
**Profile enforcement (M11c):** Crypto policy enforcement is wired into all four issuance paths: renewal (server-side and agent CSR), agent fallback CSR signing, EST enrollment (RFC 7030), and SCEP enrollment (RFC 8894). At each path, the service layer resolves the certificate's profile and calls `ValidateCSRAgainstProfile()` to check the CSR key algorithm and minimum key size against the profile's `allowed_key_algorithms` rules. A CSR with a disallowed key type or insufficient key size is rejected before reaching the issuer connector.
|
||||
|
||||
**MaxTTL enforcement:** When a profile specifies `max_ttl_seconds`, the value is forwarded through the service-layer `IssuerConnector` interface to the connector layer via `MaxTTLSeconds` on `IssuanceRequest` and `RenewalRequest`. Each issuer connector enforces the cap according to its capabilities: the Local CA caps `NotAfter` directly, Vault overrides its TTL string, step-ca caps `NotAfter` with zero-value handling, and OpenSSL logs an advisory warning (script-based signing can't enforce server-side). For CAs that control validity themselves (ACME, DigiCert, Sectigo, Google CAS, AWS ACM PCA), MaxTTLSeconds passes through but the CA makes the final decision.
|
||||
|
||||
**Key metadata persistence:** Certificate versions record `key_algorithm` and `key_size` extracted from the CSR during issuance. This metadata enables post-hoc auditing — operators can verify that all issued certificates comply with the key requirements in effect at the time of issuance.
|
||||
|
||||
#### Server-Side Key Generation (Demo Only)
|
||||
|
||||
@@ -418,7 +445,7 @@ The agent deploys certificates using target connectors. Each connector knows how
|
||||
- **NGINX**: Writes cert/chain/key files to disk, validates config with `nginx -t`, reloads with `nginx -s reload` or `systemctl reload nginx`
|
||||
- **Apache httpd**: Writes separate cert/chain/key files, validates with `apachectl configtest`, graceful reload
|
||||
- **HAProxy**: Builds a combined PEM file (cert + chain + key), optionally validates config, reloads via systemctl or signal
|
||||
- **F5 BIG-IP** (planned): A proxy agent in the same network zone calls the iControl REST API to upload certificate and update SSL profile bindings. The server assigns the work; the proxy agent executes it.
|
||||
- **F5 BIG-IP**: A proxy agent in the same network zone calls the iControl REST API to upload certificate/key files, install crypto objects, and update the SSL client profile within an atomic transaction. The server assigns the work; the proxy agent executes it.
|
||||
- **IIS** (implemented, dual-mode): (1) Agent-local (recommended) — a Windows agent on the IIS box runs PowerShell `Import-PfxCertificate` + `Set-WebBinding` directly with PFX conversion and SHA-1 thumbprint computation. (2) Proxy agent WinRM — for agentless IIS targets, a nearby Windows agent reaches the IIS box via WinRM.
|
||||
|
||||
The agent handles both the certificate (public) and the private key (read from local key store at `CERTCTL_KEY_DIR`). The control plane never sees the private key and never initiates outbound connections to agents or targets (pull-only model).
|
||||
@@ -449,46 +476,65 @@ sequenceDiagram
|
||||
API-->>U: 200 OK
|
||||
```
|
||||
|
||||
The revocation is recorded in the `certificate_revocations` table (separate from the certificate status update) for CRL generation. The DER-encoded CRL at `GET /api/v1/crl/{issuer_id}` is generated on-demand by querying this table and signing with the issuing CA's key. The OCSP responder at `GET /api/v1/ocsp/{issuer_id}/{serial}` checks both the certificate status and the revocations table to return signed good/revoked/unknown responses.
|
||||
The revocation is recorded in the `certificate_revocations` table (separate from the certificate status update) for CRL generation. The DER-encoded CRL at `GET /.well-known/pki/crl/{issuer_id}` (RFC 5280 §5, RFC 8615) is generated on-demand by querying this table and signing with the issuing CA's key. The OCSP responder at `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` (RFC 6960) checks both the certificate status and the revocations table to return signed good/revoked/unknown responses. Both endpoints are served unauthenticated — relying parties (TLS clients, hardware appliances, browsers) must be able to reach them without a certctl API key — and carry the IANA-registered media types `application/pkix-crl` and `application/ocsp-response` respectively.
|
||||
|
||||
Short-lived certificates (those with profile TTL < 1 hour) return "good" from OCSP and are excluded from CRL — their rapid expiry is treated as sufficient revocation.
|
||||
|
||||
#### Bulk Revocation
|
||||
|
||||
For compliance events requiring fleet-wide revocation (key compromise, CA distrust, mass decommission), certctl supports bulk revocation by filter criteria. The `POST /api/v1/certificates/bulk-revoke` endpoint accepts filter parameters (profile_id, owner_id, agent_id, issuer_id) and creates individual revocation jobs for each matching certificate. Bulk revocation reuses the same 7-step single-cert flow for each certificate — no new issuer notification or audit mechanics. The operation is idempotent: revoking an already-revoked certificate is a no-op. Partial failures are tolerated — if one certificate fails to revoke (e.g., issuer unavailable), the operation continues for remaining certs and returns a summary. A single `bulk_revocation_initiated` audit event logs the operation with filter criteria, operator actor, and summary (total requested, succeeded, failed counts). Audit events for individual certificate revocations record the operator identity separately. The GUI bulk revoke button on the certificates list filters by visible selections and displays an affected-cert count modal before confirmation.
|
||||
|
||||
### 4. Automatic Renewal
|
||||
|
||||
The control plane runs a scheduler with seven background loops:
|
||||
The control plane runs a scheduler with 8 always-on loops plus up to 4 optional loops (enabled by configuration). `internal/scheduler/scheduler.go:262-265` is the authoritative count.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph "Scheduler (Background Goroutines)"
|
||||
R["Renewal Checker\n⏱ every 1h"]
|
||||
J["Job Processor\n⏱ every 30s"]
|
||||
JR["Job Retry\n⏱ every 5m"]
|
||||
JT["Job Timeout\n⏱ every 10m"]
|
||||
H["Agent Health\n⏱ every 2m"]
|
||||
N["Notification Processor\n⏱ every 1m"]
|
||||
NR["Notification Retry\n⏱ every 2m"]
|
||||
SL["Short-Lived Expiry\n⏱ every 30s"]
|
||||
NS["Network Scanner\n⏱ every 6h"]
|
||||
DG["Certificate Digest\n⏱ every 24h"]
|
||||
HC["Endpoint Health\n⏱ every 60s"]
|
||||
CD["Cloud Discovery\n⏱ every 6h"]
|
||||
end
|
||||
|
||||
R -->|"Find expiring certs\nCreate renewal jobs"| DB[("PostgreSQL")]
|
||||
J -->|"Process pending jobs\nCoordinate issuance"| DB
|
||||
JR -->|"Retry Failed jobs\nFailed→Pending"| DB
|
||||
JT -->|"Reap stalled AwaitingCSR / AwaitingApproval jobs"| DB
|
||||
H -->|"Check heartbeat staleness\nMark agents offline"| DB
|
||||
N -->|"Send pending notifications\nEmail / Webhook / Slack"| DB
|
||||
NR -->|"Retry failed notifications\n2^n-min backoff, DLQ after 5 attempts"| DB
|
||||
SL -->|"Expire short-lived certs\nMark as Expired"| DB
|
||||
NS -->|"Probe TLS endpoints\nStore discovered certs"| DB
|
||||
DG -->|"Generate & send HTML digest\nEmail to recipients"| DB
|
||||
HC -->|"Probe deployed TLS endpoints\nState machine + mismatch"| DB
|
||||
CD -->|"AWS SM / Azure KV / GCP SM\nFeed discovery pipeline"| DB
|
||||
```
|
||||
|
||||
| Loop | Interval | Timeout | Purpose |
|
||||
|------|----------|---------|---------|
|
||||
| Renewal checker | 1 hour | 5 minutes | Finds certificates approaching expiry, creates renewal jobs |
|
||||
| Job processor | 30 seconds | 2 minutes | Processes pending jobs (issuance, renewal, deployment) |
|
||||
| Agent health check | 2 minutes | 1 minute | Marks agents as offline if heartbeat is stale |
|
||||
| Notification processor | 1 minute | 1 minute | Sends pending notifications via configured channels |
|
||||
| Short-lived expiry | 30 seconds | 30 seconds | Marks expired short-lived certificates (profile TTL < 1 hour) |
|
||||
| Network scanner | 6 hours | 30 minutes | Probes TLS endpoints on configured CIDR ranges, stores discovered certs (M21, opt-in via `CERTCTL_NETWORK_SCAN_ENABLED`). CIDR size validated at API level — max /20 (4096 IPs) per range. |
|
||||
| Certificate digest | 24 hours | 5 minutes | Generates HTML email with certificate stats, expiration timeline, job health, agent count. Does NOT run on startup — waits for first scheduled tick. Configurable interval and recipients via `CERTCTL_DIGEST_INTERVAL` and `CERTCTL_DIGEST_RECIPIENTS`. Falls back to certificate owner emails if no explicit recipients configured. |
|
||||
| Loop | Interval | Always-on? | Purpose |
|
||||
|------|----------|------------|---------|
|
||||
| Renewal checker | 1 hour | Yes | Finds certificates approaching expiry (threshold-based or ARI-directed), creates renewal jobs |
|
||||
| Job processor | 30 seconds | Yes | Processes pending jobs (issuance, renewal, deployment) |
|
||||
| Job retry | 5 minutes (`CERTCTL_SCHEDULER_RETRY_INTERVAL`) | Yes | Transitions `Failed` jobs back to `Pending` for re-dispatch (I-001) |
|
||||
| Job timeout | 10 minutes (`CERTCTL_JOB_TIMEOUT_INTERVAL`) | Yes | Reaps `AwaitingCSR` jobs older than 24h and `AwaitingApproval` jobs older than 7d to `Failed`, feeding the retry loop (I-003) |
|
||||
| Agent health check | 2 minutes | Yes | Marks agents as offline if heartbeat is stale |
|
||||
| Notification processor | 1 minute | Yes | Sends pending notifications via configured channels |
|
||||
| Notification retry | 2 minutes (`CERTCTL_NOTIFICATION_RETRY_INTERVAL`) | Yes | Re-dispatches `Failed` notifications whose `next_retry_at` has elapsed; exponential backoff (2^n minutes, capped at 1h), 5-attempt budget, terminal `dead` status after exhaustion (I-005) |
|
||||
| Short-lived expiry | 30 seconds | Yes | Marks expired short-lived certificates (profile TTL < 1 hour) |
|
||||
| Network scanner | 6 hours | Opt-in (`CERTCTL_NETWORK_SCAN_ENABLED`) | Probes TLS endpoints on configured CIDR ranges, stores discovered certs (M21). CIDR size validated at API level — max /20 (4096 IPs) per range. |
|
||||
| Certificate digest | 24 hours (`CERTCTL_DIGEST_INTERVAL`) | Opt-in (digest service) | Generates HTML email with certificate stats, expiration timeline, job health, agent count. Does NOT run on startup — waits for first scheduled tick. Falls back to certificate owner emails if no explicit recipients configured. |
|
||||
| Endpoint health | 60 seconds (`CERTCTL_HEALTH_CHECK_INTERVAL`) | Opt-in (health check service) | Probes deployed TLS endpoints, drives the healthy/degraded/down/cert_mismatch state machine (M48) |
|
||||
| Cloud discovery | 6 hours | Opt-in (at least one cloud source configured) | Walks AWS Secrets Manager / Azure Key Vault / GCP Secret Manager, feeds discovery pipeline (M50) |
|
||||
|
||||
Each loop uses `sync/atomic.Bool` idempotency guards to prevent concurrent tick execution — if a loop iteration is still running when the next tick fires, the tick is skipped with a warning log. All loops (including short-lived expiry check) run immediately on startup before entering their ticker interval, ensuring no gap between scheduler start and first execution. The certificate digest loop is the exception — it does NOT run on startup, only on scheduled ticks. Graceful shutdown uses `sync.WaitGroup` with `WaitForCompletion()` to drain all in-flight work before process exit.
|
||||
Each loop uses `sync/atomic.Bool` idempotency guards to prevent concurrent tick execution — if a loop iteration is still running when the next tick fires, the tick is skipped with a warning log. Most loops (including short-lived expiry, job retry, job timeout, and notification retry) run immediately on startup before entering their ticker interval, ensuring no gap between scheduler start and first execution. The certificate digest loop is the exception — it does NOT run on startup, only on scheduled ticks. Graceful shutdown uses `sync.WaitGroup` with `WaitForCompletion()` to drain all in-flight work before process exit.
|
||||
|
||||
Each operation has a context timeout to prevent indefinite hangs if external services become unresponsive.
|
||||
|
||||
@@ -509,11 +555,16 @@ flowchart TB
|
||||
II["IssuerConnector Interface\nIssueCertificate() | RenewCertificate()\nRevokeCertificate() | GetOrderStatus()"]
|
||||
II --> LC["Local CA"]
|
||||
II --> ACME["ACME v2"]
|
||||
II --> SC["step-ca"]
|
||||
II --> SCA["step-ca"]
|
||||
II --> OC["OpenSSL / Custom CA"]
|
||||
II --> VP["Vault PKI"]
|
||||
II --> DC["DigiCert CertCentral"]
|
||||
II --> SG["Sectigo SCM"]
|
||||
II --> GC["Google CAS"]
|
||||
II --> AP2["AWS ACM PCA"]
|
||||
II --> EN["Entrust"]
|
||||
II --> GS["GlobalSign Atlas"]
|
||||
II --> EJ["EJBCA"]
|
||||
end
|
||||
|
||||
subgraph "Target Connectors"
|
||||
@@ -527,7 +578,11 @@ flowchart TB
|
||||
TI --> EV["Envoy"]
|
||||
TI --> PO["Postfix/Dovecot"]
|
||||
TI --> IIS["IIS"]
|
||||
TI --> F5["F5 BIG-IP (interface only)"]
|
||||
TI --> F5["F5 BIG-IP"]
|
||||
TI --> SSH["SSH"]
|
||||
TI --> WCS["WinCertStore"]
|
||||
TI --> JKS["Java Keystore"]
|
||||
TI --> K8S["K8s Secrets"]
|
||||
end
|
||||
|
||||
subgraph "Notifier Connectors"
|
||||
@@ -579,9 +634,9 @@ type Connector interface {
|
||||
}
|
||||
```
|
||||
|
||||
Built-in issuers: **Local CA** (self-signed or sub-CA mode using `crypto/x509`), **ACME v2** (HTTP-01, DNS-01, and DNS-PERSIST-01 challenges, compatible with Let's Encrypt, ZeroSSL, Sectigo, Google Trust Services, and any ACME-compliant CA), **step-ca** (Smallstep private CA via native /sign API with JWK provisioner auth), **OpenSSL/Custom CA** (script-based signing delegating to user-provided shell scripts), **Vault PKI** (HashiCorp Vault's PKI secrets engine via /sign API with token auth), and **DigiCert** (commercial CA via CertCentral REST API with async order processing). The ACME connector uses `golang.org/x/crypto/acme`, generates an ECDSA P-256 account key, handles account registration with ToS acceptance and optional External Account Binding (EAB) for CAs that require it (ZeroSSL, Google Trust Services, SSL.com), order creation, challenge solving (HTTP-01 via built-in server, DNS-01 via script-based hooks, DNS-PERSIST-01 via standing TXT records with auto-fallback to DNS-01), order finalization, and DER-to-PEM chain conversion. For ZeroSSL, EAB credentials are auto-fetched from ZeroSSL's public API when the directory URL is detected as ZeroSSL and no EAB credentials are provided — zero-friction onboarding with no dashboard visit required.
|
||||
Built-in issuers (9 connectors): **Local CA** (self-signed or sub-CA mode using `crypto/x509`), **ACME v2** (HTTP-01, DNS-01, and DNS-PERSIST-01 challenges, compatible with Let's Encrypt, ZeroSSL, Sectigo, Google Trust Services, and any ACME-compliant CA), **step-ca** (Smallstep private CA via native /sign API with JWK provisioner auth), **OpenSSL/Custom CA** (script-based signing delegating to user-provided shell scripts), **Vault PKI** (HashiCorp Vault's PKI secrets engine via /sign API with token auth), **DigiCert** (commercial CA via CertCentral REST API with async order processing), **Sectigo SCM** (async order model with 3-header auth), **Google CAS** (Cloud Certificate Authority Service with OAuth2 service account auth), and **AWS ACM Private CA** (synchronous issuance via ACM PCA API). The ACME connector uses `golang.org/x/crypto/acme`, generates an ECDSA P-256 account key, handles account registration with ToS acceptance and optional External Account Binding (EAB) for CAs that require it (ZeroSSL, Google Trust Services, SSL.com), order creation, challenge solving (HTTP-01 via built-in server, DNS-01 via script-based hooks, DNS-PERSIST-01 via standing TXT records with auto-fallback to DNS-01), order finalization, and DER-to-PEM chain conversion. For ZeroSSL, EAB credentials are auto-fetched from ZeroSSL's public API when the directory URL is detected as ZeroSSL and no EAB credentials are provided — zero-friction onboarding with no dashboard visit required.
|
||||
|
||||
**ACME Renewal Information (ARI, RFC 9702):** The ACME connector supports CA-directed renewal timing via the `GetRenewalInfo()` method. Instead of using fixed thresholds (e.g., renew 30 days before expiry), the CA tells certctl when to renew by providing a `suggestedWindow` with start and end times. This is useful for distributing renewal load during maintenance windows and coordinating mass-revocation scenarios. Enable with `CERTCTL_ACME_ARI_ENABLED=true`. Cert ID is computed as `base64url(SHA-256(DER cert))` per RFC 9702. If the CA doesn't support ARI (404 from the ARI endpoint), certctl automatically falls back to threshold-based renewal — no operator intervention required. Errors from the CA are logged as warnings.
|
||||
**ACME Renewal Information (ARI, RFC 9773):** The ACME connector supports CA-directed renewal timing via the `GetRenewalInfo()` method. Instead of using fixed thresholds (e.g., renew 30 days before expiry), the CA tells certctl when to renew by providing a `suggestedWindow` with start and end times. This is useful for distributing renewal load during maintenance windows and coordinating mass-revocation scenarios. Enable with `CERTCTL_ACME_ARI_ENABLED=true`. Cert ID is computed as `base64url(SHA-256(DER cert))` per RFC 9773. If the CA doesn't support ARI (404 from the ARI endpoint), certctl automatically falls back to threshold-based renewal — no operator intervention required. Errors from the CA are logged as warnings.
|
||||
|
||||
The interface also includes `GetCACertPEM(ctx)` for CA chain distribution (used by the EST server's `/cacerts` endpoint).
|
||||
|
||||
@@ -599,11 +654,11 @@ type Connector interface {
|
||||
|
||||
The `DeploymentRequest` struct carries the full material needed by the target system: the signed certificate, the CA chain, the agent-generated private key, target-specific configuration, and arbitrary metadata. The key field is populated by the agent from its local key store (`CERTCTL_KEY_DIR`) — it never originates from the control plane.
|
||||
|
||||
Built-in targets: **NGINX** (writes cert/chain/key files, validates with `nginx -t`, reloads), **Apache httpd** (writes cert/chain/key files, validates with `apachectl configtest`, graceful reload), **HAProxy** (combined PEM file with cert+chain+key, validates config, reloads via systemctl/signal), **Traefik** (file provider — writes cert/key to watched directory, Traefik auto-reloads), **Caddy** (dual-mode: admin API hot-reload or file-based), **F5 BIG-IP** (interface only — proxy agent + iControl REST, implementation planned), **IIS** (interface only — dual-mode: agent-local PowerShell primary + proxy agent WinRM for agentless targets, implementation planned).
|
||||
Built-in targets (14 connector types): **NGINX** (writes cert/chain/key files, validates with `nginx -t`, reloads), **Apache httpd** (writes cert/chain/key files, validates with `apachectl configtest`, graceful reload), **HAProxy** (combined PEM file with cert+chain+key, validates config, reloads via systemctl/signal), **Traefik** (file provider — writes cert/key to watched directory, Traefik auto-reloads), **Caddy** (dual-mode: admin API hot-reload or file-based), **Envoy** (file-based with optional SDS JSON config), **F5 BIG-IP** (proxy agent + iControl REST, transaction-based atomic SSL profile updates), **IIS** (dual-mode: agent-local PowerShell + proxy agent WinRM for agentless targets), **Postfix/Dovecot** (file write + service reload), **SSH** (agentless deployment via SSH/SFTP), **Windows Certificate Store** (PowerShell-based cert import, dual-mode local/WinRM), **Java Keystore** (PEM → PKCS#12 → keytool pipeline, JKS and PKCS12 formats), **Kubernetes Secrets** (deploys as `kubernetes.io/tls` Secrets via injectable K8sClient interface, in-cluster or kubeconfig auth).
|
||||
|
||||
After deployment, agents can perform **post-deployment TLS verification**: the agent probes the live TLS endpoint using `crypto/tls.DialWithDialer` and compares the SHA-256 fingerprint of the served certificate against what was deployed. Results are reported via `POST /api/v1/jobs/{id}/verify` and stored on the job record. Verification is best-effort — failures don't block or rollback deployments.
|
||||
|
||||
Additional cloud, network, and Kubernetes target connectors are planned for future releases.
|
||||
The SSH connector enables agentless deployment to any Linux/Unix server via SSH/SFTP, using the proxy agent pattern. The Kubernetes Secrets connector deploys certificates as `kubernetes.io/tls` Secrets via an injectable K8sClient interface supporting both in-cluster and out-of-cluster auth.
|
||||
|
||||
### Notifier Connector
|
||||
|
||||
@@ -621,6 +676,16 @@ Built-in notifiers: **Email** (SMTP), **Webhook** (HTTP POST), **Slack** (incomi
|
||||
|
||||
See the [Connector Development Guide](connectors.md) for details on building custom connectors.
|
||||
|
||||
### Notification Retry & Dead-Letter Queue
|
||||
|
||||
A transient notifier failure (SMTP timeout, 5xx webhook response, Slack rate-limit) must not silently drop a critical alert. Migration `000016_notification_retry` adds three columns to `notification_events` — `retry_count INTEGER NOT NULL DEFAULT 0`, `next_retry_at TIMESTAMPTZ` (nullable — only meaningful while a row is in `failed` state), and `last_error TEXT` (the most recent transient error, preserved for operator triage) — together with a partial index `idx_notification_events_retry_sweep ON notification_events(next_retry_at) WHERE status = 'failed' AND next_retry_at IS NOT NULL` so the retry hot path scales with the retry-eligible slice rather than the full notification history.
|
||||
|
||||
The scheduler's notification-retry loop (see the scheduler section above) calls `NotificationService.RetryFailedNotifications(ctx)` every `CERTCTL_NOTIFICATION_RETRY_INTERVAL` (default `2m`). Each tick pulls up to 1000 rows via `notifRepo.ListRetryEligible(ctx, now, maxAttempts, sweepLimit)` — a partial-index-driven query that filters on `status='failed' AND next_retry_at <= now() AND retry_count < 5` — and redispatches them through the same notifier registry used by `ProcessPendingNotifications`. A successful redispatch transitions the row directly to `sent` without incrementing `retry_count`, so the audit trail preserves "delivered on attempt N". A failed redispatch re-arms `next_retry_at` using exponential backoff — `wait = min(2^retry_count minutes, 1h)` — bumps `retry_count`, and stamps `last_error`. When `retry_count >= 4` (the fifth attempt has just failed) the row is promoted to the terminal `dead` status via `notifRepo.MarkAsDead`, which clears `next_retry_at` so the partial retry-sweep index stops matching and the row cannot be re-entered into the retry rotation without operator action.
|
||||
|
||||
`NotificationService.RequeueNotification(ctx, id)` is the operator-driven escape hatch from `dead`. It atomically resets `retry_count → 0`, `next_retry_at → NULL`, `last_error → NULL`, and `status → pending`, handing the row back to `ProcessPendingNotifications` on the next 1m tick. This is the correct response to "the notifier outage is resolved, redeliver the queue"; it is not a retry, which is why the retry counter is reset rather than incremented.
|
||||
|
||||
The dead-letter depth is surfaced in two places. First, `DashboardSummary.NotificationsDead` is populated by `StatsService.GetDashboardSummary` via `notifRepo.CountByStatus(ctx, "dead")`. The injection uses a `SetNotifRepo` setter pattern (mirroring `CertificateService.SetTargetRepo`) rather than a new positional argument to `NewStatsService`, which keeps all nine existing `NewStatsService` call sites (main.go plus eight digest tests and stats_test.go) signature-stable — when the notification repository has not been wired in, `NotificationsDead` falls through to zero. Second, the `/api/v1/metrics/prometheus` endpoint emits `certctl_notification_dead_total` as a counter (operator alert thresholds per the I-005 spec: `> 0` warning, `> 10` critical) using the same `DashboardSummary` snapshot so the dashboard card and the Prometheus counter cannot skew. The web dashboard exposes a two-tab toolbar on `/notifications` — "All" (the pre-I-005 inbox) and "Dead letter" (threads `?status=dead` into the list query, surfaces `Retry N/5` and the truncated `last_error` with a full-text tooltip per row, and binds a Requeue button to `POST /api/v1/notifications/{id}/requeue`).
|
||||
|
||||
### EST Server (RFC 7030)
|
||||
|
||||
The EST (Enrollment over Secure Transport) server provides an industry-standard enrollment interface for devices that need certificates without using the REST API. It runs under `/.well-known/est/` per RFC 7030 and supports four operations: CA certificate distribution (`/cacerts`), initial enrollment (`/simpleenroll`), re-enrollment (`/simplereenroll`), and CSR attributes (`/csrattrs`).
|
||||
@@ -656,10 +721,52 @@ type ESTService interface {
|
||||
}
|
||||
```
|
||||
|
||||
**Issuer connector extension:** EST required adding `GetCACertPEM(ctx) (string, error)` to the issuer connector interface so the `/cacerts` endpoint can serve the CA chain. The Local CA connector returns its CA certificate PEM; ACME, step-ca, OpenSSL, Vault, and DigiCert connectors return errors (they don't expose a static CA chain — their chains are per-issuance).
|
||||
**Issuer connector extension:** EST required adding `GetCACertPEM(ctx) (string, error)` to the issuer connector interface so the `/cacerts` endpoint can serve the CA chain. The Local CA returns its CA certificate PEM; Vault PKI fetches via `GET /v1/{mount}/ca/pem`; Google CAS fetches via API; AWS ACM PCA retrieves via `GetCertificateAuthorityCertificate`. ACME, step-ca, OpenSSL, DigiCert, and Sectigo connectors return errors (they don't expose a static CA chain — their chains are per-issuance).
|
||||
|
||||
**Authentication:** EST endpoints are served unauthenticated at the HTTP layer under `/.well-known/est/*` — no Bearer token required. Per RFC 7030 §3.2.3 EST authentication is deployment-specific, and per §4.1.1 `/cacerts` is explicitly anonymous. certctl enforces authentication via CSR signature verification inside `ESTService.SimpleEnroll`/`SimpleReEnroll` plus profile policy gates (allowed key algorithms, minimum key size, permitted SANs, permitted EKUs, MaxTTL). The HTTP dispatch is implemented in `cmd/server/main.go:buildFinalHandler`, which routes `/.well-known/est/*` through `noAuthHandler` (RequestID + structuredLogger + Recovery only). Operators who need stronger client identification should terminate mTLS at an upstream reverse proxy and pin the CSR's SAN to the client cert subject at the profile level.
|
||||
|
||||
**Audit:** Every EST enrollment is recorded in the audit trail with `protocol: "EST"`, the CN, SANs, issuer ID, serial number, and optional profile ID.
|
||||
|
||||
### SCEP Server (RFC 8894)
|
||||
|
||||
The SCEP (Simple Certificate Enrollment Protocol) server provides certificate enrollment for MDM platforms and network devices. It runs at `/scep` with operation-based dispatch via query parameters per RFC 8894.
|
||||
|
||||
**Architecture:** SCEP follows the exact same layering as EST — a handler-level protocol that delegates certificate issuance to an existing `IssuerConnector`. The `SCEPService` bridges the `SCEPHandler` to whichever issuer connector is configured via `CERTCTL_SCEP_ISSUER_ID`.
|
||||
|
||||
```
|
||||
Client (MDM, network device, SCEP client)
|
||||
│
|
||||
▼
|
||||
SCEPHandler (handler layer)
|
||||
│ PKCS#7 envelope parsing, CSR extraction, challenge password extraction
|
||||
▼
|
||||
SCEPService (service layer)
|
||||
│ Challenge password validation, CSR validation, CN/SAN extraction, audit recording
|
||||
▼
|
||||
IssuerConnector (connector layer via IssuerConnectorAdapter)
|
||||
│ Certificate signing (Local CA, step-ca, etc.)
|
||||
▼
|
||||
Signed certificate returned as PKCS#7 certs-only
|
||||
```
|
||||
|
||||
**Wire format:** SCEP clients wrap CSRs in PKCS#7 SignedData envelopes. The handler parses the outer ASN.1 ContentInfo → SignedData → EncapsulatedContentInfo to extract the CSR bytes. Fallback paths handle base64-encoded PKCS#7 and raw CSR submissions (for simpler clients). Responses use PKCS#7 certs-only via the shared `internal/pkcs7` package (same as EST). Single certs are returned as raw DER for `GetCACert`, chains as PKCS#7.
|
||||
|
||||
**Authentication:** SCEP endpoints at `/scep` and `/scep/*` are served unauthenticated at the HTTP layer — no Bearer token required — per RFC 8894 §3.2, which defines authentication via the `challengePassword` attribute (OID 1.2.840.113549.1.9.7) embedded in the PKCS#10 CSR rather than an HTTP credential. The HTTP dispatch is implemented in `cmd/server/main.go:buildFinalHandler`, which routes `/scep` and `/scep/*` through `noAuthHandler` (RequestID + structuredLogger + Recovery only). The `challengePassword` is mandatory: `preflightSCEPChallengePassword` at startup refuses to boot the control plane when `CERTCTL_SCEP_ENABLED=true` is set without `CERTCTL_SCEP_CHALLENGE_PASSWORD`, closing CWE-306 (missing authentication for a critical function). `SCEPService.PKCSReq` enforces the same invariant defense-in-depth — an empty `s.challengePassword` rejects every enrollment — and the password comparison uses `crypto/subtle.ConstantTimeCompare` to prevent response-time side-channel leakage. The startup log line `SCEP server enabled` emits a `challenge_password_set` boolean for operator visibility.
|
||||
|
||||
**Interface:** The `SCEPHandler` defines an `SCEPService` interface (dependency inversion):
|
||||
|
||||
```go
|
||||
type SCEPService interface {
|
||||
GetCACaps(ctx context.Context) string
|
||||
GetCACert(ctx context.Context) (string, error)
|
||||
PKCSReq(ctx context.Context, csrPEM string, challengePassword string, transactionID string) (*domain.SCEPEnrollResult, error)
|
||||
}
|
||||
```
|
||||
|
||||
**Shared PKCS#7 package:** Both EST and SCEP handlers share a common `internal/pkcs7` package for building PKCS#7 certs-only responses and PEM-to-DER chain conversion, eliminating code duplication between the two enrollment protocols.
|
||||
|
||||
**Audit:** Every SCEP enrollment is recorded in the audit trail with `protocol: "SCEP"`, the CN, SANs, issuer ID, serial number, transaction ID, and optional profile ID.
|
||||
|
||||
## Security Model
|
||||
|
||||
### Private Key Management
|
||||
@@ -701,10 +808,11 @@ The control plane only handles public material: certificates, chains, and CSRs.
|
||||
|
||||
### Authentication
|
||||
|
||||
- **API clients → Server**: API key in `Authorization: Bearer` header, or `none` for demo mode
|
||||
- **API clients → Server**: API key in `Authorization: Bearer` header, or `none` for demo mode. Applies to every path under `/api/v1/*`.
|
||||
- **Agent → Server**: API key registered at agent creation, included in all requests
|
||||
- **Server → Issuers**: ACME account key, or connector-specific credentials
|
||||
- **Agent → Targets**: API tokens, WinRM credentials (stored locally on agent or proxy agent — never on server). Credential scope is limited to the agent's network zone.
|
||||
- **Standards-based enrollment and PKI distribution endpoints**: `/.well-known/est/*` (RFC 7030), `/scep` and `/scep/*` (RFC 8894), and `/.well-known/pki/crl/{issuer_id}` + `/.well-known/pki/ocsp/{issuer_id}/{serial}` (RFC 5280 §5 / RFC 6960 / RFC 8615) are served unauthenticated at the HTTP layer. These protocols carry their own authentication semantics — CSR signature + profile policy for EST (§3.2.3 says EST auth is deployment-specific; §4.1.1 makes `/cacerts` explicitly anonymous), `challengePassword` in CSR attributes for SCEP (§3.2), and relying-party accessibility for CRL/OCSP — and cannot present certctl Bearer tokens. The dispatch is implemented in `cmd/server/main.go:buildFinalHandler`, which routes these prefixes through `noAuthHandler` (RequestID + structuredLogger + Recovery only, no auth or rate-limit middleware). CWE-306 is closed for SCEP by `preflightSCEPChallengePassword`, which refuses to start the server when SCEP is enabled without `CERTCTL_SCEP_CHALLENGE_PASSWORD`. The 27-subtest regression harness `cmd/server/finalhandler_test.go` pins this dispatch surface (EST 4-endpoint, SCEP exact + trailing-slash + query-string, PKI CRL+OCSP, health probes, `/api/v1/*` authenticated, `/assets/*` file server, SPA fallback).
|
||||
|
||||
### Audit Trail
|
||||
|
||||
@@ -741,6 +849,34 @@ All shell-facing inputs (connector scripts, domain names, ACME tokens) are valid
|
||||
|
||||
All incoming HTTP request bodies are capped by `http.MaxBytesReader` middleware (default 1MB, configurable via `CERTCTL_MAX_BODY_SIZE`). Requests exceeding the limit receive a 413 Request Entity Too Large response. The middleware is positioned before authentication in the chain so oversized payloads are rejected early, before any auth processing or database work occurs. Requests without bodies (GET, HEAD, nil body) skip the limit check.
|
||||
|
||||
### Config Encryption at Rest
|
||||
|
||||
Dynamic issuer and target configurations (rows with `source='database'`) contain credentials — ACME EAB HMACs, Vault tokens, DigiCert/Sectigo API keys, SSH private keys, WinRM passwords, F5 BIG-IP passwords, and similar. These are sealed at rest in PostgreSQL via `internal/crypto/encryption.go` using AES-256-GCM with a key derived from the operator passphrase `CERTCTL_CONFIG_ENCRYPTION_KEY` through PBKDF2-SHA256 (100,000 rounds, 32-byte output).
|
||||
|
||||
**v2 wire format (current, M-8 remediation, CWE-916 / CWE-329):**
|
||||
|
||||
```
|
||||
magic(0x02) || salt(16) || nonce(12) || ciphertext+tag
|
||||
```
|
||||
|
||||
Every call to `EncryptIfKeySet` draws 16 fresh bytes from `crypto/rand` as the PBKDF2 salt, so the derived AES-256 key is distinct per ciphertext and per re-encryption. The salt is stored alongside the ciphertext; decryption reads the magic byte, splits out the salt, re-derives the key, and verifies the AEAD tag.
|
||||
|
||||
**v1 legacy format (read-only):**
|
||||
|
||||
```
|
||||
nonce(12) || ciphertext+tag
|
||||
```
|
||||
|
||||
Pre-M-8 blobs were sealed with a package-level fixed salt `"certctl-config-encryption-v1"`. `DecryptIfKeySet` preserves the v1 read path unchanged — a blob whose first byte is not `0x02`, or whose v2 AEAD verification fails (including the 1/256 case where a v1 nonce happens to begin with `0x02`), falls through to a v1 attempt against the legacy fixed salt. v1 blobs are never written by the post-M-8 code path; they re-seal as v2 naturally on the next UPDATE through the normal service CRUD flow. No operator migration ceremony is required.
|
||||
|
||||
**Fail-closed behavior (C-2 sentinel, CWE-311):** both `EncryptIfKeySet` and `DecryptIfKeySet` return `ErrEncryptionKeyRequired` when invoked with an empty passphrase. The server refuses to start if any `source='database'` rows already exist without `CERTCTL_CONFIG_ENCRYPTION_KEY` set.
|
||||
|
||||
**Low-level primitives preserved byte-identical.** `Encrypt`, `Decrypt`, and `DeriveKey` are kept bit-stable so v1 fixtures on disk remain decryptable unchanged and so callers outside the config-encryption path (none today, but the symbols are exported) do not see a breaking change. The new per-ciphertext salt path is reached via the helper `deriveKeyWithSalt(passphrase, salt)`.
|
||||
|
||||
**Passphrase plumbing.** Services (`IssuerService`, `TargetService`, `IssuerRegistry`) hold the operator passphrase as a raw `string` and delegate PBKDF2 to the crypto package per ciphertext. This replaces the pre-M-8 design that pre-derived a single `[]byte` key at service construction and reused it for every row, which was the direct consequence of the fixed-salt KDF.
|
||||
|
||||
**Coverage gate.** CI enforces `internal/crypto/...` coverage ≥ 85% (observed 86.7%) — the encryption primitives are a security-critical gate, and the v2 format plus v1 fallback plus C-2 sentinel paths all need exhaustive coverage to avoid silent regressions.
|
||||
|
||||
### CORS
|
||||
|
||||
CORS uses a **deny-by-default** posture: when `CERTCTL_CORS_ORIGINS` is empty, no CORS headers are set and only same-origin requests can read responses. Operators must explicitly configure allowed origins. This prevents accidental exposure of the API to cross-origin requests in production.
|
||||
@@ -760,7 +896,7 @@ The HTTP middleware stack processes requests in the following order (see `cmd/se
|
||||
|
||||
### Concurrency Safety
|
||||
|
||||
The background scheduler uses `sync/atomic.Bool` idempotency guards on all 7 loops — if a tick fires while the previous iteration is still running, it skips. A `sync.WaitGroup` tracks all in-flight goroutines. `WaitForCompletion(timeout)` blocks during shutdown until all work finishes or the timeout expires, preventing state corruption from mid-flight database operations during process exit.
|
||||
The background scheduler uses `sync/atomic.Bool` idempotency guards on every loop (8 always-on plus up to 4 optional) — if a tick fires while the previous iteration is still running, it skips. A `sync.WaitGroup` tracks all in-flight goroutines. `WaitForCompletion(timeout)` blocks during shutdown until all work finishes or the timeout expires, preventing state corruption from mid-flight database operations during process exit.
|
||||
|
||||
### Logging
|
||||
|
||||
@@ -779,10 +915,12 @@ All endpoints are under `/api/v1/` and follow consistent patterns:
|
||||
|
||||
Resources: certificates, issuers, targets, agents, jobs, policies, profiles, teams, owners, agent-groups, audit, notifications, discovered-certificates, discovery-scans, network-scan-targets, stats, metrics.
|
||||
|
||||
The full API is documented in an OpenAPI 3.1 specification at `api/openapi.yaml` with 99 endpoints across 23 resource domains (97 under `/api/v1/` + `/.well-known/est/` plus `/health` and `/ready`; includes auth, 7 discovery endpoints from M18b, 6 network scan endpoints from M21, Prometheus metrics from M22, 4 EST enrollment endpoints from M23, 2 digest endpoints from M29), all request/response schemas, and pagination conventions. See the [OpenAPI Guide](openapi.md) for usage with Swagger UI and SDK generation.
|
||||
The full API is documented in an OpenAPI 3.1 specification at `api/openapi.yaml` with 97 operations across `/api/v1/` and `/.well-known/est/` (includes auth, 7 discovery endpoints, 6 network scan endpoints, Prometheus metrics, 4 EST enrollment endpoints, 2 digest endpoints, 2 verification endpoints, 2 export endpoints), all request/response schemas, and pagination conventions. The server also registers `/health` and `/ready` outside the OpenAPI spec, bringing the total route count to 107. See the [OpenAPI Guide](openapi.md) for usage with Swagger UI and SDK generation.
|
||||
|
||||
Jobs support additional action endpoints: `POST /api/v1/jobs/{id}/cancel`, `POST /api/v1/jobs/{id}/approve`, `POST /api/v1/jobs/{id}/reject`.
|
||||
|
||||
**Bulk Operations:** `POST /api/v1/certificates/bulk-revoke` — Bulk revocation by filter criteria (profile_id, owner_id, agent_id, issuer_id). Creates individual revocation jobs for matching certificates, with partial-failure tolerance and a summary audit event.
|
||||
|
||||
**Enhanced Query Features (M20):** Certificate list endpoints support additional query capabilities beyond basic pagination:
|
||||
|
||||
- **Sorting**: `?sort=notAfter` (ascending) or `?sort=-createdAt` (descending). Whitelist: notAfter, expiresAt, createdAt, updatedAt, commonName, name, status, environment.
|
||||
@@ -792,7 +930,7 @@ Jobs support additional action endpoints: `POST /api/v1/jobs/{id}/cancel`, `POST
|
||||
- **Additional filters**: `?agent_id=`, `?profile_id=` (in addition to existing status, environment, owner_id, team_id, issuer_id).
|
||||
- **Deployments**: `GET /api/v1/certificates/{id}/deployments` returns deployment targets for a certificate.
|
||||
|
||||
Certificate revocation: `POST /api/v1/certificates/{id}/revoke` with optional `{"reason": "keyCompromise"}`. Supports RFC 5280 reason codes (unspecified, keyCompromise, caCompromise, affiliationChanged, superseded, cessationOfOperation, certificateHold, privilegeWithdrawn). Returns the updated certificate status. Best-effort issuer notification — the revocation succeeds even if the issuer connector is unavailable. A JSON-formatted CRL is available at `GET /api/v1/crl`, and a DER-encoded X.509 CRL signed by the issuing CA at `GET /api/v1/crl/{issuer_id}`. An embedded OCSP responder serves signed responses at `GET /api/v1/ocsp/{issuer_id}/{serial}`. Short-lived certificates (profile TTL < 1 hour) are exempt from CRL/OCSP — expiry is sufficient revocation.
|
||||
Certificate revocation: `POST /api/v1/certificates/{id}/revoke` with optional `{"reason": "keyCompromise"}`. Supports RFC 5280 reason codes (unspecified, keyCompromise, caCompromise, affiliationChanged, superseded, cessationOfOperation, certificateHold, privilegeWithdrawn). Returns the updated certificate status. Best-effort issuer notification — the revocation succeeds even if the issuer connector is unavailable. The DER-encoded X.509 CRL signed by the issuing CA is served unauthenticated at `GET /.well-known/pki/crl/{issuer_id}` (RFC 5280 §5 + RFC 8615, `Content-Type: application/pkix-crl`). The embedded OCSP responder serves signed responses unauthenticated at `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` (RFC 6960, `Content-Type: application/ocsp-response`). Both endpoints are accessible to relying parties with no certctl API credentials, as RFC-compliant PKI consumers expect. Short-lived certificates (profile TTL < 1 hour) are exempt from CRL/OCSP — expiry is sufficient revocation.
|
||||
|
||||
Certificate export (M27): `GET /api/v1/certificates/{id}/export/pem` returns PEM-encoded certificate and chain, and `POST /api/v1/certificates/{id}/export/pkcs12` returns a PKCS#12 bundle (binary). Private keys are never exported — they remain on agents. All exports are audited with actor, timestamp, and format.
|
||||
|
||||
@@ -807,7 +945,7 @@ flowchart LR
|
||||
AI["AI Assistant\n(Claude, Cursor)"] -->|"stdio"| MCP["MCP Server\ncmd/mcp-server/"]
|
||||
MCP -->|"HTTP + Bearer token"| API["certctl REST API\n:8443"]
|
||||
|
||||
subgraph "78 MCP Tools"
|
||||
subgraph "MCP Tools"
|
||||
T1["Certificate CRUD"]
|
||||
T2["Agent Management"]
|
||||
T3["Job Operations"]
|
||||
@@ -821,7 +959,7 @@ flowchart LR
|
||||
|
||||
The MCP server is a stateless HTTP proxy — every MCP tool call translates to an HTTP request to the certctl REST API. It adds no new state, no new dependencies, and no new attack surface beyond what the API already exposes. Configuration is minimal: `CERTCTL_SERVER_URL` and `CERTCTL_API_KEY` environment variables.
|
||||
|
||||
The 78 tools are organized across 16 resource domains with typed input structs and `jsonschema` struct tags for automatic LLM-friendly schema generation. Binary response support handles DER CRL and OCSP endpoints.
|
||||
The tools are organized across 16 resource domains with typed input structs and `jsonschema` struct tags for automatic LLM-friendly schema generation. Binary response support handles DER CRL and OCSP endpoints.
|
||||
|
||||
## CLI Tool
|
||||
|
||||
@@ -896,9 +1034,9 @@ See `deploy/helm/certctl/values.yaml` for the full configuration reference and `
|
||||
|
||||
For production, you would also add an ingress controller, TLS termination for the certctl API itself, and external PostgreSQL (RDS, Cloud SQL, etc.).
|
||||
|
||||
## Discovery Data Flow (M18b + M21)
|
||||
## Discovery Data Flow (M18b + M21 + M50)
|
||||
|
||||
Certificate discovery enables operators to build a complete inventory of existing certificates before managing them with certctl. There are two discovery modes that feed into the same pipeline:
|
||||
Certificate discovery enables operators to build a complete inventory of existing certificates before managing them with certctl. There are three discovery modes that feed into the same pipeline:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
@@ -907,6 +1045,7 @@ flowchart TB
|
||||
SCAN["Filesystem Scanner\n(CERTCTL_DISCOVERY_DIRS)"]
|
||||
SERVER["certctl-server\n(network discovery)"]
|
||||
NETSCAN["TLS Scanner\n(CIDR ranges + ports)"]
|
||||
CLOUD["Cloud Discovery\n(AWS SM / Azure KV / GCP SM)"]
|
||||
end
|
||||
|
||||
EXTRACT["Extract Metadata\n(CN, SANs, serial, issuer, expiry, fingerprint)"]
|
||||
@@ -922,6 +1061,7 @@ flowchart TB
|
||||
SCAN --> EXTRACT
|
||||
SERVER -->|"Scheduler loop\n(every 6h)"| NETSCAN
|
||||
NETSCAN -->|"crypto/tls.Dial\n50 goroutines"| EXTRACT
|
||||
CLOUD -->|"Scheduler loop\n(every 6h)"| EXTRACT
|
||||
EXTRACT --> SERVICE
|
||||
SERVICE --> REPO
|
||||
REPO -->|"Dedup by fingerprint\n+ agent_id + source_path"| DB
|
||||
@@ -948,7 +1088,16 @@ flowchart TB
|
||||
5. **Sentinel agent** — Results submitted using `server-scanner` as virtual agent ID, with `source_path` set to `ip:port` and `source_format` set to `network`
|
||||
6. **Same pipeline** — Feeds into the same `DiscoveryService.ProcessDiscoveryReport()` as filesystem discovery — same dedup, same audit trail, same triage workflow
|
||||
|
||||
**Common triage workflow (both sources):**
|
||||
**Cloud Secret Manager Discovery (M50):**
|
||||
|
||||
1. **Pluggable sources** — Each cloud provider implements the `DiscoverySource` interface (Name, Type, Discover, ValidateConfig). Three built-in sources: AWS Secrets Manager, Azure Key Vault, GCP Secret Manager
|
||||
2. **CloudDiscoveryService orchestrator** — Iterates registered sources, calls `Discover()` on each, feeds reports into `ProcessDiscoveryReport()`. Errors from one source don't prevent other sources from running
|
||||
3. **Scheduler integration** — opt-in cloud discovery scheduler loop (6h default; see `docs/architecture.md` 12-loop topology), runs immediately on startup, `atomic.Bool` idempotency guard
|
||||
4. **Sentinel agents** — Each source uses its own sentinel agent ID (`cloud-aws-sm`, `cloud-azure-kv`, `cloud-gcp-sm`) for dedup and triage filtering
|
||||
5. **Source path format** — `aws-sm://{region}/{secret}`, `azure-kv://{cert-name}/{version}`, `gcp-sm://{project}/{secret}`
|
||||
6. **No new schema** — Reuses existing `discovered_certificates` and `discovery_scans` tables. Sentinel agent IDs leverage existing `(fingerprint_sha256, agent_id, source_path)` dedup constraint
|
||||
|
||||
**Common triage workflow (all sources):**
|
||||
|
||||
1. **Storage** — Records stored in `discovered_certificates` table with status = "Unmanaged"
|
||||
2. **Audit** — `discovery_scan_completed` event logged with agent ID, cert count, scan timestamp
|
||||
@@ -961,29 +1110,53 @@ flowchart TB
|
||||
|
||||
This data flow is pull-based and non-blocking. Agents discover at their own pace; the server stores results for later review. There's no pressure to claim or dismiss; operators can leave certificates in "Unmanaged" status indefinitely.
|
||||
|
||||
## Continuous TLS Health Monitoring (M48)
|
||||
|
||||
Beyond one-time discovery, certctl continuously monitors TLS endpoints for certificate health using a shared TLS probing package and a state-machine-driven health check service. Endpoints transition between states (Healthy → Degraded → Down) based on consecutive failures, and `cert_mismatch` status alerts when a deployed certificate is unexpectedly replaced.
|
||||
|
||||
**Architecture:** Probing is extracted into a shared `internal/tlsprobe/` package used by both the network scanner (M21) and the health monitor. The `HealthCheckService` manages 8 API endpoints for CRUD operations and state transitions. A dedicated opt-in endpoint health scheduler loop runs every 60 seconds (configurable via `CERTCTL_HEALTH_CHECK_INTERVAL`). Individual health check targets have their own check intervals (default 300 seconds) — the scheduler queries only endpoints due for check via `ListDueForCheck()`. Results are stored with historical tracking for 30 days (configurable via `CERTCTL_HEALTH_CHECK_HISTORY_RETENTION`). State transitions trigger notifications (critical for down endpoints, warning for degraded, high for cert_mismatch).
|
||||
|
||||
**State Machine:** Healthy → Degraded (configurable threshold, default 2 consecutive failures) → Down (default 5 failures). The `cert_mismatch` status is special — it fires whenever the observed certificate fingerprint differs from the expected (deployed) fingerprint, catching silent rollbacks and unauthorized cert replacements. Recovery from degraded/down transitions back to healthy and resets the failure counter.
|
||||
|
||||
**API:** 8 endpoints for list (with filters: status, certificate_id, network_scan_target_id, enabled), get, create, update, delete, history (with limit param), acknowledge (incident marking), and summary (aggregate status counts).
|
||||
|
||||
**Auto-Create:** When a deployment job completes with successful verification (M25), the system automatically creates a health check with the deployed certificate's fingerprint as the expected value. Network scan targets can also opt-in to auto-create health checks for discovered endpoints.
|
||||
|
||||
**Configuration:**
|
||||
|
||||
| Env Var | Default | Description |
|
||||
|---|---|---|
|
||||
| `CERTCTL_HEALTH_CHECK_ENABLED` | `false` | Enable/disable the feature |
|
||||
| `CERTCTL_HEALTH_CHECK_INTERVAL` | `60s` | Scheduler tick interval |
|
||||
| `CERTCTL_HEALTH_CHECK_DEFAULT_INTERVAL` | `300s` | Default per-endpoint check interval (5 min) |
|
||||
| `CERTCTL_HEALTH_CHECK_DEFAULT_TIMEOUT` | `5000ms` | TLS connection timeout per probe |
|
||||
| `CERTCTL_HEALTH_CHECK_MAX_CONCURRENT` | `20` | Max concurrent TLS probes |
|
||||
| `CERTCTL_HEALTH_CHECK_HISTORY_RETENTION` | `30 days` | Purge probe history older than this |
|
||||
| `CERTCTL_HEALTH_CHECK_AUTO_CREATE` | `true` | Auto-create checks from deployments |
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
certctl uses a layered testing approach aligned with the handler → service → repository architecture, with 1050+ tests across six layers (service, handler, integration, connector, frontend, and scheduler). The goal is high-confidence regression prevention at the service and handler layers, where the most complex business logic lives, combined with integration tests that exercise the full request path from HTTP to database.
|
||||
certctl is extensively tested across eight layers with CI-enforced coverage gates that act as regression floors. The goal is high-confidence regression prevention at the service and handler layers (where the most complex business logic lives), combined with integration tests that exercise the full request path from HTTP to database.
|
||||
|
||||
**Service layer unit tests** (`internal/service/*_test.go`) — ~238 test functions across 15 files with mock repositories. These test all business logic in isolation: certificate CRUD with validation, certificate revocation (success, already-revoked, archived, invalid reason, all RFC 5280 reason codes, issuer notification, notification service integration, OCSP/CRL generation), agent lifecycle (registration, heartbeat, CSR submission with both keygen modes), job state machine (creation, processing, cancellation, retry logic), policy evaluation (all 5 rule types, violation creation), renewal and issuance flow (server-side and agent-side keygen paths), notification deduplication (threshold tag matching, channel routing), team/owner/agent group CRUD with pagination and audit recording, issuer service CRUD with connection testing, and the issuer connector adapter (type translation between connector and service layers including revocation). Mock repositories are simple structs with function fields, avoiding heavy mocking frameworks — this keeps tests readable and avoids coupling to mock library APIs.
|
||||
**Service layer unit tests** (`internal/service/*_test.go`) — Mock-based tests across all service files covering certificate CRUD, revocation (all RFC 5280 reason codes, OCSP/CRL generation, bulk revocation by filter with partial-failure tolerance), agent lifecycle, job state machine, policy evaluation, renewal/issuance flow (both keygen modes), notification deduplication, team/owner/agent group CRUD, issuer service CRUD with connection testing, and the issuer connector adapter. Mock repositories are simple structs with function fields — no heavy mocking frameworks.
|
||||
|
||||
**Handler layer tests** (`internal/api/handler/*_test.go`) — ~257 test functions across 11 files using Go's `httptest` package. Every handler file has a corresponding test file: certificates (50 tests including revocation, DER CRL, and OCSP), agents (28 tests), jobs (21 tests including approve/reject), notifications (11 tests), policies (19 tests), profiles (18 tests), issuers (17 tests), targets (17 tests), agent groups (12 tests), teams (26 tests), and owners (21 tests). Each test file follows the same pattern: a mock service struct with function fields, `httptest.NewRecorder` for capturing responses, and a shared `contextWithRequestID()` helper. Tests cover the happy path, input validation (missing fields, invalid JSON, empty IDs, name length limits), error propagation from the service layer, method-not-allowed responses, and pagination parameters.
|
||||
**Handler layer tests** (`internal/api/handler/*_test.go`) — Every handler file has a corresponding test file using Go's `httptest` package: certificates (including revocation, bulk revocation by profile/owner/agent/issuer, DER CRL, OCSP), agents, jobs (including approve/reject), notifications, policies, profiles, issuers, targets, agent groups, teams, owners, discovery, network scan, verification, export, EST, digest, stats, and metrics. Tests cover the happy path, input validation, error propagation, method-not-allowed, pagination, and bulk operation partial-failure scenarios.
|
||||
|
||||
**Integration tests** (`internal/integration/`) — Two test files exercising the full stack from HTTP request through router, handler, service, and postgres repository layers. `lifecycle_test.go` has 11 subtests covering the complete certificate lifecycle: team/owner creation, certificate creation, issuer verification, renewal trigger, job verification, agent registration, CSR submission, deployment, and status reporting. `negative_test.go` has 14 subtests covering error paths, 19 M11b endpoint tests, and 8 revocation endpoint tests (M15a+M15b): nonexistent resource lookups (404s), invalid request bodies (malformed JSON, missing required fields), invalid CSR submission, heartbeat for nonexistent agents, wrong HTTP methods on list endpoints, empty list responses, renewal on nonexistent certificates, expired certificate lifecycle, team/owner/agent group CRUD validation, revocation success, already-revoked rejection, not-found revocation, JSON CRL retrieval, DER CRL retrieval, OCSP response retrieval, and short-lived cert exemption. Both use a shared `setupTestServer()` that builds a fully-wired server with real postgres repositories and the Local CA issuer connector. A third file, `e2e_test.go`, contains 8 cross-milestone test functions with 48+ subtests that exercise features across milestones end-to-end: M10 agent metadata via heartbeat, M11 profiles/teams/owners/agent-groups CRUD, M12 issuer registry verification, M13 GUI operation endpoints, M14 stats and metrics, M15 revocation and CRL, M16 notification channels, and M20 enhanced query API (sorting, cursor pagination, sparse fields, time-range filters).
|
||||
**Integration tests** (`internal/integration/`) — Three test files exercising the full stack from HTTP request through router, handler, service, and repository layers. `lifecycle_test.go` covers the complete certificate lifecycle (team/owner creation through deployment and status reporting). `negative_test.go` covers error paths, endpoint validation, and revocation scenarios. `e2e_test.go` exercises cross-milestone features end-to-end (agent metadata, profiles, issuer registry, GUI operations, stats, revocation, notifications, enhanced query API).
|
||||
|
||||
**Frontend tests** (`web/src/api/client.test.ts`, `web/src/api/utils.test.ts`) — 86 Vitest tests covering the API client, stats/metrics endpoints, and utility functions. The API client tests mock `globalThis.fetch` and verify all endpoint functions (certificates, agents, jobs, policies, issuers, targets, notifications, audit, stats, metrics, health) send correct HTTP methods, URLs, headers, and request bodies. They also test API key management (store/retrieve/clear), auth header propagation, 401 event dispatching, and error handling (server messages, error fields, status text fallback). The stats/metrics endpoint tests verify correct query parameter handling and response shape validation. The utility tests use `vi.useFakeTimers()` for deterministic date testing and cover `formatDate`, `formatDateTime`, `timeAgo`, `daysUntil`, and `expiryColor`. The test environment uses jsdom with `@testing-library/jest-dom` matchers.
|
||||
**Go integration tests** (`deploy/test/integration_test.go`) — Runs against the live Docker Compose test environment with real CA backends (Local CA, Pebble ACME, step-ca). Covers health checks, agent heartbeat, issuance, renewal, revocation, CRL/OCSP, EST enrollment, S/MIME, discovery, network scanning, and deployment verification using `crypto/x509` for cert parsing and `crypto/tls` for live TLS verification.
|
||||
|
||||
**CLI tests** (`internal/cli/client_test.go`) — 14 tests covering all 10 CLI subcommands with httptest mock servers, PEM parsing for bulk import, auth header verification, and JSON/table output formatting.
|
||||
**Frontend tests** (`web/src/api/`) — Vitest tests covering the full API client (all endpoint functions with fetch mocking), stats/metrics endpoints, utility functions, and auth flows. Test environment uses jsdom with `@testing-library/jest-dom` matchers.
|
||||
|
||||
**CI pipeline** (`.github/workflows/ci.yml`) — Two parallel jobs: Go (build, vet, race detection, static analysis, vulnerability scanning, test with coverage, coverage threshold enforcement) and Frontend (TypeScript type check, Vitest test suite, Vite production build). The Go job runs `go test -race` on service, handler, middleware, and scheduler packages to catch data races. It runs `golangci-lint` with 11 linters (errcheck, govet, staticcheck, unused, gosimple, ineffassign, typecheck, gocritic, gosec, bodyclose, noctx) configured in `.golangci.yml`. It runs `govulncheck ./...` to scan dependencies for known CVEs. Coverage thresholds are enforced per-layer: service 60%, handler 60%, domain 40%, middleware 50%. These thresholds act as regression floors — they can only go up. Connector tests are included via `./internal/connector/issuer/...` and `./internal/connector/target/...` (covers Local CA, ACME, step-ca, NGINX, Apache, HAProxy, Traefik, and Caddy packages with unit tests for certificate signing logic, DNS solver, issuer validation, and deployment flows). The Frontend job runs `npx vitest run` between the TypeScript check and production build steps.
|
||||
**Connector tests** (`internal/connector/`) — Issuer connectors (Local CA self-signed/sub-CA modes, ACME DNS-01/DNS-PERSIST-01, step-ca, OpenSSL, Vault PKI, DigiCert, Sectigo, Google CAS, AWS ACM PCA — all with httptest mock servers or injectable interface mocks). Target connectors (NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, IIS with mock PowerShell executor, F5 BIG-IP with mock iControl client, Postfix/Dovecot, SSH with mock SSH client, Windows Certificate Store with mock PowerShell executor, Java Keystore with mock command executor, Kubernetes Secrets with mock K8s client, shared certutil package). Notifier connectors (Slack, Teams, PagerDuty, OpsGenie).
|
||||
|
||||
**Connector tests** (`internal/connector/`) — 57 test functions covering issuer, target, and notifier connectors. The Local CA connector has tests for self-signed and sub-CA modes (RSA, ECDSA, config validation, non-CA cert rejection). The ACME DNS solver has 10 tests for script-based DNS-01 and DNS-PERSIST-01 challenges (6 DNS-01 tests + 4 DNS-PERSIST-01 tests covering `PresentPersist` success, no-script error, script failure, and wildcard domain handling). The step-ca connector has tests with a mock HTTP server for issuance, renewal, revocation, and error paths. The OpenSSL/Custom CA connector has 14 tests covering config validation, issuance success/failure/timeout, renewal, revocation, and CRL generation. The NGINX target connector has 13 tests covering config validation, certificate deployment (file writing, permissions, validate/reload commands), and deployment validation. Apache httpd and HAProxy connectors each have 3 tests covering config validation, deployment, and validation flows. Traefik and Caddy connectors have tests covering file-based deployment and (for Caddy) dual-mode API/file configuration. Notifier connector tests span 20 tests across Slack (5), Teams (4), PagerDuty (6), and OpsGenie (5) — verifying channel identity, payload formatting, HTTP error handling, connection failures, auth headers, and configuration defaults.
|
||||
**Scheduler tests** (`internal/scheduler/scheduler_test.go`) — Idempotency guards (`sync/atomic.Bool`), `WaitForCompletion` success and timeout paths, and multi-loop concurrency safety.
|
||||
|
||||
**Scheduler tests** (`internal/scheduler/scheduler_test.go`) — Tests for idempotency guards (`sync/atomic.Bool` CompareAndSwap prevents concurrent loop ticks), `WaitForCompletion` success and timeout paths, and multi-loop idempotency.
|
||||
**Fuzz tests** (`internal/validation/`, `internal/domain/`) — Go native fuzz tests for command validation (`ValidateShellCommand`, `ValidateDomainName`, `ValidateACMEToken`) and revocation domain parsing.
|
||||
|
||||
**Fuzz tests** (`internal/validation/command_fuzz_test.go`, `internal/domain/revocation_fuzz_test.go`) — Go native fuzz tests (`testing/fuzz`) for command validation functions and revocation domain parsing. These exercise `ValidateShellCommand`, `ValidateDomainName`, and `ValidateACMEToken` with random inputs to discover edge cases.
|
||||
**CI pipeline** (`.github/workflows/ci.yml`) — Two parallel jobs. Go: build, vet, `go test -race`, `golangci-lint` (11 linters), `govulncheck`, test with coverage, per-layer coverage threshold enforcement (service 55%, handler 60%, domain 40%, middleware 30%). Frontend: TypeScript type check, Vitest, Vite production build.
|
||||
|
||||
**What's not tested and why:** Postgres repository implementations (`internal/repository/postgres/`) require a real database and are tested only through integration tests, not unit tests — a `testcontainers-go` scaffolding for isolated PostgreSQL instances is planned. Target connectors for F5 BIG-IP and IIS are interface stubs (implementation planned for V3). The ACME connector requires a real ACME server (tested manually against Let's Encrypt staging). These are all candidates for future expansion as the test infrastructure matures.
|
||||
For detailed test procedures, smoke tests, and the release sign-off checklist, see the [Testing Guide](testing-guide.md). For setting up the Docker Compose test environment with real CA backends, see [Test Environment](test-env.md).
|
||||
|
||||
## What's Next
|
||||
|
||||
@@ -993,3 +1166,5 @@ certctl uses a layered testing approach aligned with the handler → service →
|
||||
- [Compliance Mapping](compliance.md) — SOC 2, PCI-DSS 4.0, and NIST SP 800-57 alignment
|
||||
- [MCP Server Guide](mcp.md) — AI-native access to the API
|
||||
- [OpenAPI Spec](openapi.md) — Full API reference and SDK generation
|
||||
- [Testing Guide](testing-guide.md) — Test procedures and release sign-off
|
||||
- [Test Environment](test-env.md) — Docker Compose test environment setup
|
||||
|
||||
@@ -39,7 +39,7 @@ Deploy certctl control plane once (Docker Compose, Kubernetes Helm chart, or sel
|
||||
```bash
|
||||
cd /opt/certctl
|
||||
docker compose up -d
|
||||
# Dashboard & API: http://localhost:8443
|
||||
# Dashboard & API: https://localhost:8443 (self-signed cert — pin with --cacert ./deploy/test/certs/ca.crt)
|
||||
```
|
||||
|
||||
**Option B: Kubernetes** (recommended for prod)
|
||||
@@ -59,7 +59,8 @@ chmod +x /usr/local/bin/certctl-agent
|
||||
|
||||
# Config
|
||||
sudo tee /etc/certctl/agent.env > /dev/null <<EOF
|
||||
CERTCTL_SERVER_URL=http://certctl-control-plane:8443
|
||||
CERTCTL_SERVER_URL=https://certctl-control-plane:8443
|
||||
CERTCTL_SERVER_CA_BUNDLE_PATH=/etc/certctl/tls/ca.crt
|
||||
CERTCTL_API_KEY=your-api-key
|
||||
CERTCTL_DISCOVERY_DIRS=/etc/nginx/certs,/etc/ssl,/etc/letsencrypt/live
|
||||
CERTCTL_KEY_DIR=/var/lib/certctl/keys
|
||||
|
||||
+18
-12
@@ -72,7 +72,7 @@ certctl implements tiered key storage with different protection profiles based o
|
||||
- Configured via: `CERTCTL_CA_CERT_PATH=/path/to/ca.crt` and `CERTCTL_CA_KEY_PATH=/path/to/ca.key`
|
||||
|
||||
**NIST Gap: HSM Storage**
|
||||
NIST SP 800-57 Part 1 recommends Hardware Security Module (HSM) storage for high-value keys (CA signing keys). certctl V2 uses filesystem storage on the server. HSM support is planned for V5 roadmap, enabling integration with:
|
||||
NIST SP 800-57 Part 1 recommends Hardware Security Module (HSM) storage for high-value keys (CA signing keys). certctl V2 uses filesystem storage on the server. HSM support is planned for certctl Pro (V3), enabling integration with:
|
||||
- AWS CloudHSM
|
||||
- Azure Dedicated HSM
|
||||
- Thales Luna, Gemalto SafeNet, YubiHSM (on-premises)
|
||||
@@ -210,15 +210,17 @@ NIST SP 800-57 Part 1 Section 6.2 addresses secure key distribution to minimize
|
||||
- Proxy agent executes deployment via appliance API
|
||||
|
||||
**Revocation Distribution**
|
||||
- Certificate Revocation List (CRL) via `GET /api/v1/crl/{issuer_id}`
|
||||
- Returns DER-encoded X.509 CRL signed by issuing CA
|
||||
- Certificate Revocation List (CRL) via `GET /.well-known/pki/crl/{issuer_id}` (RFC 5280 §5, RFC 8615)
|
||||
- Returns DER-encoded X.509 CRL signed by issuing CA (`Content-Type: application/pkix-crl`)
|
||||
- 24-hour validity period
|
||||
- Includes all revoked serials, reasons, and revocation timestamps
|
||||
- Served unauthenticated so relying parties without certctl API credentials can fetch it
|
||||
- Subject to URL caching; OCSP preferred for real-time revocation
|
||||
- OCSP via `GET /api/v1/ocsp/{issuer_id}/{serial}`
|
||||
- Returns DER-encoded OCSP response (OCSPResponse ASN.1 structure)
|
||||
- OCSP via `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` (RFC 6960)
|
||||
- Returns DER-encoded OCSP response (OCSPResponse ASN.1 structure, `Content-Type: application/ocsp-response`)
|
||||
- Signed by issuing CA (or delegated OCSP signing cert)
|
||||
- Responds with good/revoked/unknown status
|
||||
- Served unauthenticated — the RFC 6960 relying-party model does not assume API credentials
|
||||
- Real-time, more bandwidth-efficient than CRL polling
|
||||
|
||||
## Revocation and Compromise (NIST SP 800-57 Part 3)
|
||||
@@ -272,20 +274,23 @@ NIST SP 800-57 Part 3 covers revocation (Section 2.5) when keys are suspected co
|
||||
- OCSP responder queries revocation table in real-time
|
||||
- Short-lived certificate exemption: certs with TTL < 1 hour skip CRL/OCSP (expiry is sufficient revocation)
|
||||
|
||||
**Bulk Revocation for Large-Scale Compromise Response** (V2.2) — NIST SP 800-57 Part 3 emphasizes rapid revocation when keys are compromised. `POST /api/v1/certificates/bulk-revoke` revokes all certificates matching filter criteria (profile, owner, agent, issuer) in a single operation. This enables operators to execute fleet-wide revocation for key compromise events affecting multiple certificates. Each bulk revocation creates individual jobs reusing the existing revocation pipeline, ensuring every certificate is recorded in the audit trail with the incident reason.
|
||||
|
||||
**Revocation Audit Trail**
|
||||
All revocation events logged:
|
||||
- Event type: `certificate_revoked`
|
||||
- Event type: `certificate_revoked` or `bulk_revocation_initiated` (for fleet operations)
|
||||
- Actor: authenticated user or service
|
||||
- Reason code: RFC 5280 enum
|
||||
- Reason code: RFC 5280 enum (or incident justification for bulk operations)
|
||||
- Timestamp: RFC3339
|
||||
- Issuer notification status: success or error reason
|
||||
- Filter criteria: profile_id, owner_id, agent_id, issuer_id (for bulk revocation)
|
||||
|
||||
## Alignment Summary Table
|
||||
|
||||
| NIST SP 800-57 Area | Status | Coverage | Notes |
|
||||
|---|---|---|---|
|
||||
| **Key Generation** | ✅ Aligned | 100% | Agent-side ECDSA P-256 using crypto/rand; server mode flagged as demo-only |
|
||||
| **Key Storage** | ⚠️ Partially Aligned | 80% | Filesystem with 0600 perms; HSM support planned V5 |
|
||||
| **Key Storage** | ⚠️ Partially Aligned | 80% | Filesystem with 0600 perms; HSM support planned V3 Pro |
|
||||
| **Cryptoperiods** | ✅ Aligned | 100% | Profile-enforced max_ttl; threshold-based renewal alerting |
|
||||
| **Key States** | ✅ Aligned | 100% | Full lifecycle tracking with immutable audit trail |
|
||||
| **Algorithms** | ✅ Aligned | 100% | NIST-approved algorithms only; post-quantum tracking in progress |
|
||||
@@ -301,13 +306,14 @@ All revocation events logged:
|
||||
- [x] RFC 5280 revocation support
|
||||
- [x] Immutable audit trail
|
||||
|
||||
### V2.2 (Planned: 2026)
|
||||
- Bulk revocation by profile/owner/agent/issuer (fleet-level revocation for incident response)
|
||||
|
||||
### V3 (Planned: 2026)
|
||||
- Role-based access control (limit revocation/approval to authorized operators)
|
||||
- Bulk revocation by profile/owner/agent (fleet-level revocation policy)
|
||||
|
||||
### V5 (Planned: 2027+)
|
||||
- HSM support for CA key storage
|
||||
- PKCS#11 integration for hardware tokens
|
||||
### V3 Pro (Planned)
|
||||
- HSM support for CA key storage and agent key storage (TPM 2.0, PKCS#11)
|
||||
- FIPS 140-2/3 validated crypto module (BoringCrypto build or external FIPS library)
|
||||
- Key destruction API (explicit secure erasure of agent keys)
|
||||
- Key escrow / recovery mechanism (backup encrypted private keys for disaster recovery)
|
||||
|
||||
+20
-14
@@ -92,9 +92,11 @@ Your QSA will request evidence that your certificate and key management systems
|
||||
|
||||
- **Certificate Status Tracking** — Four statuses: Active (deployed, not yet expired), Expiring (within threshold, awaiting renewal), Expired (past not-after date), Revoked (revoked via RFC 5280 revocation API). Dashboard charts show status distribution.
|
||||
|
||||
- **Revocation Infrastructure** (M15a, M15b):
|
||||
- CRL endpoint: `GET /api/v1/crl` (JSON format) or `GET /api/v1/crl/{issuer_id}` (DER X.509 CRL, 24h validity, signed by issuing CA)
|
||||
- OCSP responder: `GET /api/v1/ocsp/{issuer_id}/{serial}` (returns DER-encoded OCSP response: good/revoked/unknown)
|
||||
- **Revocation Infrastructure** (M15a, M15b, M-006):
|
||||
- Revocation API: `POST /api/v1/certificates/{id}/revoke` with RFC 5280 reason codes
|
||||
- CRL endpoint: `GET /.well-known/pki/crl/{issuer_id}` — DER X.509 CRL, 24h validity, signed by issuing CA, served unauthenticated (RFC 5280 §5, RFC 8615, `Content-Type: application/pkix-crl`)
|
||||
- OCSP responder: `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` — DER-encoded OCSP response (good/revoked/unknown), served unauthenticated (RFC 6960, `Content-Type: application/ocsp-response`)
|
||||
- Bulk revocation (V2.2): `POST /api/v1/certificates/bulk-revoke` with filter criteria (profile, owner, agent, issuer) for fleet-wide incident response
|
||||
- Short-lived cert exemption: certs with TTL < 1 hour skip CRL/OCSP (expiry is sufficient revocation)
|
||||
|
||||
- **Stats API** (M14) — Real-time visibility:
|
||||
@@ -107,7 +109,7 @@ Your QSA will request evidence that your certificate and key management systems
|
||||
- Discovered certificate report: `GET /api/v1/discovered-certificates` JSON export showing all certs on systems, fingerprints, and status.
|
||||
- Managed certificate inventory: `GET /api/v1/certificates` with filters (`?status=Expiring` for upcoming renewals).
|
||||
- Expiration alert configuration: policy JSON showing `alert_thresholds_days` for each environment.
|
||||
- CRL/OCSP availability proof: HTTP GET requests to `/api/v1/crl` and `/api/v1/ocsp/{issuer}/{serial}` with signed responses.
|
||||
- CRL/OCSP availability proof: unauthenticated HTTP GET requests to `/.well-known/pki/crl/{issuer_id}` (DER, `application/pkix-crl`) and `/.well-known/pki/ocsp/{issuer_id}/{serial}` (DER, `application/ocsp-response`) with signed responses.
|
||||
- Audit trail for certificate creation/renewal/revocation: `GET /api/v1/audit?type=certificate_issued,certificate_renewed,certificate_revoked`.
|
||||
- Dashboard charts showing expiration timeline, renewal success trends, status distribution.
|
||||
|
||||
@@ -326,11 +328,14 @@ This requirement covers key generation, storage, rotation, and destruction. Cert
|
||||
- Issuer notified (best-effort; ACME lacks standard revocation, Local CA skips issuer step).
|
||||
- Revocation notifications sent to owner via email/webhook/Slack/Teams/PagerDuty.
|
||||
|
||||
- **CRL and OCSP Publication** (M15b) — Revoked certificates published in:
|
||||
- CRL: `GET /api/v1/crl` (JSON format) or `GET /api/v1/crl/{issuer_id}` (DER X.509, signed by CA, 24h validity)
|
||||
- OCSP: `GET /api/v1/ocsp/{issuer_id}/{serial}` (returns revoked status for clients validating certificate chain)
|
||||
- **CRL and OCSP Publication** (M15b, M-006) — Revoked certificates published in:
|
||||
- CRL: `GET /.well-known/pki/crl/{issuer_id}` (DER X.509 signed by CA, 24h validity, RFC 5280 §5 + RFC 8615, `Content-Type: application/pkix-crl`)
|
||||
- OCSP: `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` (returns revoked status for clients validating certificate chain, RFC 6960, `Content-Type: application/ocsp-response`)
|
||||
- Both endpoints are served unauthenticated so relying parties (browsers, TLS appliances) without certctl API keys can verify revocation — this is the RFC-compliant PKI model.
|
||||
- Clients checking certificate status via OCSP or CRL see revoked status within 24 hours.
|
||||
|
||||
- **Bulk Revocation for Incident Response** (V2.2) — `POST /api/v1/certificates/bulk-revoke` with filter criteria (profile, owner, agent, issuer) revokes all matching certificates in a single operation. PCI-DSS Req 4 requires rapid response to data transmission security incidents — bulk revocation enables operators to revoke an entire certificate set (e.g., all certs used by a compromised team or endpoint) in minutes rather than hours.
|
||||
|
||||
- **Private Key Destruction on Agent** — When certificate renewed or revoked:
|
||||
- Agent removes old private key file from `CERTCTL_KEY_DIR` when new certificate deployed.
|
||||
- Job status tracking confirms old key is no longer needed.
|
||||
@@ -338,8 +343,8 @@ This requirement covers key generation, storage, rotation, and destruction. Cert
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- Revocation requests: `GET /api/v1/audit?type=certificate_revoked` with RFC 5280 reason codes.
|
||||
- CRL publication: HTTP GET `/api/v1/crl` and parse JSON to show revoked serial numbers and timestamps.
|
||||
- OCSP responder validation: Query `GET /api/v1/ocsp/{issuer}/{serial}` for a known-revoked cert; response includes `revoked` status.
|
||||
- CRL publication: HTTP GET `/.well-known/pki/crl/{issuer_id}` (unauthenticated) returns a DER X.509 CRL — parse with `openssl crl -inform der -noout -text` to show revoked serial numbers, reasons, and timestamps.
|
||||
- OCSP responder validation: Query `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` (unauthenticated) for a known-revoked cert; response includes `revoked` status and can be parsed with `openssl ocsp` tooling.
|
||||
- Audit trail: Certificate status transitions (Active → Revoked) recorded in `audit_events`.
|
||||
|
||||
**Operator Responsibility**:
|
||||
@@ -382,12 +387,12 @@ This requirement covers key generation, storage, rotation, and destruction. Cert
|
||||
- API key transmitted in Authorization header (not URL parameter, not cookie).
|
||||
- Browser to server: TLS.
|
||||
- Agent to server: TLS.
|
||||
- No credential logging (API key hash only, never plaintext).
|
||||
- No credential logging (audit records the per-key actor `Name`, never the Bearer token; logs redact the `Authorization` header).
|
||||
|
||||
**Evidence You Can Provide**:
|
||||
- API configuration: `CERTCTL_AUTH_TYPE=api-key` in deployment manifest.
|
||||
- Database schema: `api_keys` table showing SHA-256 hash column, not plaintext.
|
||||
- API audit log: `GET /api/v1/audit?action=api_call` showing Bearer token validation (no plaintext keys logged).
|
||||
- Key inventory: `CERTCTL_API_KEYS_NAMED` env var (format `name:key:admin,...`) — seeds the in-memory `NamedAPIKey{Name, Key, Admin}` struct at `internal/api/middleware/middleware.go:29`. Keys are constant-time-compared (`subtle.ConstantTimeCompare`) against the Bearer token. No database table stores them; protect the env var contents at rest via a secrets manager (Vault / AWS Secrets Manager / Kubernetes Secrets / Docker Secrets).
|
||||
- API audit log: `GET /api/v1/audit?action=api_call` showing per-key actor names (`Name` field of matched `NamedAPIKey`) on every call, with zero plaintext or hashed key material recorded.
|
||||
- TLS certificate on control plane: `openssl s_client -connect {server}:8443` showing valid certificate, TLS 1.2+, strong cipher.
|
||||
- GUI login flow: browser network tab showing Authorization header (token value redacted in compliance report).
|
||||
|
||||
@@ -557,6 +562,7 @@ This requirement covers key generation, storage, rotation, and destruction. Cert
|
||||
- **Alert Notifications** (M3, M16a) — Configurable escalation:
|
||||
- Email alerts: certificate approaching expiration, renewal failure, revocation notification.
|
||||
- Webhook: custom HTTP POST to your monitoring system (Slack, Teams, PagerDuty, OpsGenie, custom webhook).
|
||||
- **Retry & Dead-Letter Queue** (I-005) — Transient notifier failures (SMTP timeout, webhook 5xx) are retried with exponential backoff (`2^n` minutes capped at 1h, 5-attempt budget) before landing in the terminal `dead` status. Operators monitor DLQ depth via the `certctl_notification_dead_total` Prometheus counter and requeue via the Notifications page Dead letter tab once the underlying outage is resolved. Closes the pre-I-005 silent-drop gap where a single 5xx could lose a compliance-relevant alert without evidence.
|
||||
- Deduplication: one alert per threshold/certificate per day (avoid alert fatigue).
|
||||
|
||||
- **Audit Trail Filtering and Export** (M13) — Compliance reporting:
|
||||
@@ -717,12 +723,12 @@ This requirement covers key generation, storage, rotation, and destruction. Cert
|
||||
| PCI-DSS Requirement | certctl Feature | API/UI Evidence | Database/Config | Audit Trail | Status |
|
||||
|---|---|---|---|---|---|
|
||||
| **4.2.1** Strong Crypto | TLS cert issuance, ACME/step-ca/Local CA, RSA 2048+/ECDSA P-256 | `GET /api/v1/certificates` (key_type, key_size) | Certificate profiles | `GET /api/v1/audit?type=certificate_issued` | Available |
|
||||
| **4.2.2** Cert Inventory & Validation | Managed cert CRUD, discovery (M18b), expiration alerting, CRL/OCSP | `GET /api/v1/certificates`, `GET /api/v1/discovered-certificates`, `GET /api/v1/crl`, `GET /api/v1/ocsp/{issuer}/{serial}` | `managed_certificates`, `discovered_certificates` tables | `GET /api/v1/audit?type=certificate_*` | Available |
|
||||
| **4.2.2** Cert Inventory & Validation | Managed cert CRUD, discovery (M18b), expiration alerting, CRL/OCSP | `GET /api/v1/certificates`, `GET /api/v1/discovered-certificates`, `GET /.well-known/pki/crl/{issuer_id}`, `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` (both unauthenticated, RFC 5280 / RFC 6960) | `managed_certificates`, `discovered_certificates` tables | `GET /api/v1/audit?type=certificate_*` | Available |
|
||||
| **3.6** Key Documentation | Profiles, owner/team tracking, issuer config, audit trail | `GET /api/v1/profiles`, `GET /api/v1/issuers`, certificate detail with owner/team | Profiles, certificate owner/team fields, issuer config | `GET /api/v1/audit?resource_type=certificate` | Available |
|
||||
| **3.7.1** Key Generation | Agent-side ECDSA P-256, server keygen (demo only) | Agent logs, renewal job detail, CSR audit | `CERTCTL_KEYGEN_MODE=agent` (config), job_type=AwaitingCSR | `GET /api/v1/audit?type=certificate_issued` with CSR hash | Available |
|
||||
| **3.7.2** Key Storage | Agent `/var/lib/certctl/keys` (0600), env var secrets, .env excluded | Deployment manifest (env var refs), agent key dir listing | `.env` file (git-ignored), `CERTCTL_KEY_DIR`, `CERTCTL_CA_KEY_PATH` | No API audit (keys off-platform) | Available |
|
||||
| **3.7.3** Key Rotation | Auto renewal, expiration thresholds, renewal jobs | Dashboard renewal trends, `GET /api/v1/jobs?type=Renewal`, certificate versions | Renewal policies, certificate version history | `GET /api/v1/audit?type=certificate_renewed` | Available |
|
||||
| **3.7.4** Key Destruction | Revocation API (RFC 5280), CRL/OCSP, private key cleanup | `POST /api/v1/certificates/{id}/revoke`, `GET /api/v1/crl`, OCSP endpoint | `certificate_revocations` table, CRL publication | `GET /api/v1/audit?type=certificate_revoked` | Available |
|
||||
| **3.7.4** Key Destruction | Revocation API (RFC 5280), CRL/OCSP, private key cleanup | `POST /api/v1/certificates/{id}/revoke`, unauthenticated `GET /.well-known/pki/crl/{issuer_id}` and `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` | `certificate_revocations` table, CRL publication | `GET /api/v1/audit?type=certificate_revoked` | Available |
|
||||
| **8.3** Strong Authentication | API key (SHA-256 hash, TLS), GUI login, 401 redirect | GUI login screenshot, API key auth header, TLS cert | API key hash in database | `GET /api/v1/audit` showing API calls | Available |
|
||||
| **8.6** Acct Management | Credentials out of source, .env excluded, env var config | Code review (no hardcoded secrets), `.gitignore` check | Deployment manifests showing env var refs only | No account lifecycle audit (outside scope) | Available in part |
|
||||
| **10.2** Audit Logging | API audit middleware (M19), certificate lifecycle events | `GET /api/v1/audit` with filter/pagination | `audit_events` table (every API call) | Real-time via API | Available |
|
||||
|
||||
+28
-17
@@ -44,7 +44,8 @@ Each section includes:
|
||||
|
||||
**certctl Implementation** (V2 — Community Edition):
|
||||
|
||||
- **API Key Authentication** — All API calls require a Bearer token (hashed with SHA-256, stored securely, validated with constant-time comparison) or are rejected with 401 Unauthorized. Environment: `CERTCTL_AUTH_TYPE` (default `api-key`; `none` requires explicit opt-in with log warning)
|
||||
- **API Key Authentication** — All `/api/v1/*` calls require a Bearer token (hashed with SHA-256, stored securely, validated with constant-time comparison) or are rejected with 401 Unauthorized. Environment: `CERTCTL_AUTH_TYPE` (default `api-key`; `none` requires explicit opt-in with log warning)
|
||||
- **Standards-based enrollment and PKI distribution endpoints** — EST (`/.well-known/est/*`, RFC 7030), SCEP (`/scep`, `/scep/*`, RFC 8894), and CRL/OCSP (`/.well-known/pki/crl/{issuer_id}`, `/.well-known/pki/ocsp/{issuer_id}/{serial}`, RFC 5280 §5 / RFC 6960 / RFC 8615) are served unauthenticated at the HTTP layer because these protocols cannot present certctl Bearer tokens. Authentication is enforced in-protocol: EST relies on CSR signature verification plus profile policy (RFC 7030 §3.2.3 says EST auth is deployment-specific; §4.1.1 makes `/cacerts` explicitly anonymous); SCEP requires a shared `challengePassword` in the PKCS#10 CSR attributes (OID 1.2.840.113549.1.9.7, RFC 8894 §3.2), validated with `crypto/subtle.ConstantTimeCompare`; CRL and OCSP are intentionally anonymous for relying-party accessibility. CWE-306 (missing authentication for a critical function) is closed for SCEP by `preflightSCEPChallengePassword` in `cmd/server/main.go`, which refuses to start the control plane when `CERTCTL_SCEP_ENABLED=true` is set without `CERTCTL_SCEP_CHALLENGE_PASSWORD`. The HTTP dispatch is implemented in `cmd/server/main.go:buildFinalHandler`, which routes these prefixes through `noAuthHandler` (RequestID + structuredLogger + Recovery only, no auth or rate-limit middleware) and is pinned by the 27-subtest regression harness at `cmd/server/finalhandler_test.go`.
|
||||
- **GUI Authentication** — Web dashboard includes login screen requiring API key entry. Failed auth redirects to login on 401. Auth context persists across page navigation. Logout clears session.
|
||||
- **Configurable CORS** — API restricts cross-origin requests via `CERTCTL_CORS_ORIGINS` allowlist or wildcard. Preflight caching prevents chatty browser auth flows.
|
||||
- **Token Bucket Rate Limiting** — Per-IP rate limiting (configurable via `CERTCTL_RATE_LIMIT_RPS` / `CERTCTL_RATE_LIMIT_BURST`) returns 429 Too Many Requests with Retry-After header. Prevents credential stuffing and brute-force attacks.
|
||||
@@ -58,6 +59,11 @@ Each section includes:
|
||||
- Auth info endpoint: `GET /api/v1/auth/info` (returns current auth mode, served without auth so GUI detects mode)
|
||||
- Rate limiting middleware: `internal/api/middleware/rate_limit.go`
|
||||
- CORS configuration: `cmd/server/main.go`, search for `CERTCTL_CORS_ORIGINS`
|
||||
- Final handler dispatch (authenticated vs. unauthenticated routing): `cmd/server/main.go:buildFinalHandler`
|
||||
- SCEP preflight gate (CWE-306 closure): `cmd/server/main.go:preflightSCEPChallengePassword`
|
||||
- SCEP service-layer defense-in-depth (rejects enrollment on empty challenge password, `crypto/subtle.ConstantTimeCompare`): `internal/service/scep.go`
|
||||
- Final handler dispatch regression harness (27 subtests): `cmd/server/finalhandler_test.go`
|
||||
- OpenAPI spec `security: []` overrides on unauthenticated paths: `api/openapi.yaml` (EST `/cacerts`, `/simpleenroll`, `/simplereenroll`, `/csrattrs`; SCEP `/scep` GET+POST; PKI `/crl/{issuer_id}`, `/ocsp/{issuer_id}/{serial}`)
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
@@ -110,7 +116,7 @@ Each section includes:
|
||||
|
||||
**certctl Implementation** (V2):
|
||||
|
||||
- **API Key Policy** — All API access requires an API key or explicit opt-out. Opt-out (`CERTCTL_AUTH_TYPE=none`) logs a warning: "WARNING: Auth disabled (CERTCTL_AUTH_TYPE=none) — this is insecure and only for development". Configuration choice is logged at startup.
|
||||
- **API Key Policy** — All `/api/v1/*` access requires an API key or explicit opt-out. Opt-out (`CERTCTL_AUTH_TYPE=none`) logs a warning: "WARNING: Auth disabled (CERTCTL_AUTH_TYPE=none) — this is insecure and only for development". Configuration choice is logged at startup. The standards-based enrollment and PKI distribution endpoints (EST, SCEP, CRL, OCSP) are served unauthenticated at the HTTP layer per their respective RFCs; see CC6.1 for the full authentication contract and CWE-306 closure via `preflightSCEPChallengePassword`.
|
||||
- **Agent Authentication** — Agents authenticate to the server via API keys (same mechanism as users). Agent credentials are separate from user API keys.
|
||||
- **Private Key Policy** — Agent-side key generation is the default (`CERTCTL_KEYGEN_MODE=agent`). Server-side keygen (`CERTCTL_KEYGEN_MODE=server`) requires explicit configuration and logs a warning: "server-side key generation enabled (CERTCTL_KEYGEN_MODE=server) — private keys touch control plane, demo only".
|
||||
- **Password Policy** — Not applicable; certctl uses API keys exclusively. Password management is delegated to your organization's IAM system if you integrate OIDC/SSO (V3).
|
||||
@@ -183,15 +189,20 @@ Each section includes:
|
||||
|
||||
- **Health Endpoint** — `GET /health` returns 200 OK with service status. Consumed by Docker health checks and Kubernetes probes.
|
||||
- **Readiness Endpoint** — `GET /ready` returns 200 OK when the database is connected and migrations are applied.
|
||||
- **Background Scheduler Monitoring** — 7 background loops run on a fixed schedule:
|
||||
- Renewal loop: every 1 hour, scans for certificates approaching renewal threshold
|
||||
- Job processor loop: every 30 seconds, picks up pending/waiting jobs and advances their state
|
||||
- Health check loop: every 2 minutes, pings agents to detect downtime
|
||||
- Notification dispatcher loop: every 1 minute, sends queued alerts
|
||||
- Short-lived cert expiry loop: every 30 seconds, marks expired short-lived credentials
|
||||
- Network scanner loop: every 6 hours, scans enabled TLS endpoints for certificate discovery
|
||||
- Digest emailer loop: every 24 hours, sends scheduled certificate digest email to configured recipients
|
||||
Each loop includes error handling and logs failures via structured slog.
|
||||
- **Background Scheduler Monitoring** — 12 background loops (8 always-on + 4 opt-in) run on a fixed schedule. Authoritative topology in `docs/architecture.md`:
|
||||
- Renewal loop (always-on, 1 hour): scans for certificates approaching renewal threshold
|
||||
- Job processor loop (always-on, 30 seconds): picks up pending/waiting jobs and advances their state
|
||||
- Job retry loop (always-on, 5 minutes, `CERTCTL_SCHEDULER_RETRY_INTERVAL`): retries Failed jobs (I-001)
|
||||
- Job timeout reaper loop (always-on, 10 minutes, `CERTCTL_JOB_TIMEOUT_INTERVAL`): fails AwaitingCSR/AwaitingApproval jobs past timeout (I-003)
|
||||
- Agent health check loop (always-on, 2 minutes): pings agents to detect downtime
|
||||
- Notification dispatcher loop (always-on, 1 minute): sends queued alerts
|
||||
- Notification retry loop (always-on, 2 minutes, `CERTCTL_NOTIFICATION_RETRY_INTERVAL`): exponential backoff retry for failed notifications; promote to dead-letter after 5 attempts (I-005)
|
||||
- Short-lived cert expiry loop (always-on, 30 seconds): marks expired short-lived credentials
|
||||
- Network scanner loop (opt-in, 6 hours, `CERTCTL_NETWORK_SCAN_ENABLED`): scans enabled TLS endpoints for certificate discovery
|
||||
- Digest emailer loop (opt-in, 24 hours, `CERTCTL_DIGEST_INTERVAL`): sends scheduled certificate digest email to configured recipients
|
||||
- Endpoint health loop (opt-in, 60 seconds, `CERTCTL_HEALTH_CHECK_INTERVAL`): continuous TLS health probes (M48)
|
||||
- Cloud discovery loop (opt-in, 6 hours, `CERTCTL_CLOUD_DISCOVERY_INTERVAL`): cloud secret manager certificate discovery (M50)
|
||||
Each loop includes `atomic.Bool` idempotency guards, error handling, and structured slog failure logs.
|
||||
- **Metrics Endpoints** — Two formats for monitoring integration:
|
||||
- `GET /api/v1/metrics` — JSON object with gauges, counters, and uptime for custom dashboards
|
||||
- `GET /api/v1/metrics/prometheus` — Prometheus exposition format (`text/plain; version=0.0.4`) for native scraping by Prometheus, Grafana Agent, Datadog, and other OpenMetrics-compatible collectors
|
||||
@@ -282,12 +293,13 @@ Each section includes:
|
||||
- `certificateHold` — temporary revocation (can be "unhold" by reissue)
|
||||
- `privilegeWithdrawn` — access rights revoked
|
||||
Revocation is **immediate** (no approval workflow). The certificate is marked `Revoked` in inventory, an audit event is logged, and optional issuer notification is best-effort. All revoked certs are excluded from active deployments.
|
||||
- **CRL Endpoint** — `GET /api/v1/crl` returns a JSON-formatted Certificate Revocation List (serial, reason, timestamp for each revoked cert). `GET /api/v1/crl/{issuer_id}` returns a DER-encoded X.509 CRL signed by the issuing CA (useful for legacy clients that don't support OCSP).
|
||||
- **OCSP Responder** — `GET /api/v1/ocsp/{issuer_id}/{serial}` returns a signed OCSP response indicating whether a cert is good, revoked, or unknown. Clients (browsers, TLS libraries) query this endpoint to verify cert validity in real-time.
|
||||
- **CRL Endpoint** — `GET /.well-known/pki/crl/{issuer_id}` returns a DER-encoded X.509 CRL signed by the issuing CA (RFC 5280 §5, RFC 8615, `Content-Type: application/pkix-crl`), served unauthenticated for relying parties that don't hold certctl API credentials.
|
||||
- **OCSP Responder** — `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` returns a signed OCSP response indicating whether a cert is good, revoked, or unknown (RFC 6960, `Content-Type: application/ocsp-response`). Also unauthenticated. Clients (browsers, TLS libraries) query this endpoint to verify cert validity in real-time.
|
||||
- **Revocation Notifications** — When a cert is revoked, notifications are sent to:
|
||||
- Certificate owner (email)
|
||||
- Configured webhooks (if you have a SIEM that subscribes)
|
||||
- Slack/Teams channels (if notifiers are configured)
|
||||
- **Bulk Revocation for Fleet-Wide Incidents** (V2.2) — `POST /api/v1/certificates/bulk-revoke` with filter criteria (profile, owner, agent, issuer) revokes all matching certificates in a single operation. Essential for incident response: key compromise affecting multiple certs, CA distrust events, decommissioning a team's infrastructure. Each bulk revocation creates individual jobs reusing the existing revocation pipeline, ensuring audit trail and notifications for every certificate.
|
||||
- **Short-Lived Cert Exemption** — Certificates with TTL < 1 hour (configured in profile) skip CRL/OCSP publication. Expiry is the revocation mechanism for short-lived certs (e.g., Kubernetes pod certs, session tokens).
|
||||
- **Deployment Rollback** — If a revoked cert is still deployed (shouldn't happen, but race conditions exist), operators can manually redeploy a previous version via the GUI. Rollback is audited.
|
||||
|
||||
@@ -302,7 +314,6 @@ Each section includes:
|
||||
|
||||
**V3 Enhancement**:
|
||||
|
||||
- **Bulk Revocation** — Revoke all certs issued by a specific profile, owner, or agent in a single API call (useful for large-scale incidents like CA compromise)
|
||||
- **Revocation Automation** — Trigger revocation based on external events (e.g., employee termination, security breach alert from CT Log monitoring)
|
||||
|
||||
**Operator Responsibility**:
|
||||
@@ -453,15 +464,15 @@ Each section includes:
|
||||
| | Metrics JSON Endpoint | `GET /api/v1/metrics` (gauges, counters, uptime) | ✅ | ✅ | Set thresholds, configure alerting |
|
||||
| | Stats API (time-series) | `GET /api/v1/stats/*` (summary, status, expiration, jobs, issuance) | ✅ | ✅ | Integrate into dashboards, SLO tracking |
|
||||
| | Structured Logging | `slog` middleware with request IDs | ✅ | ✅ | Aggregate logs to SIEM, define retention policy |
|
||||
| | Background Scheduler | 7 loops (renewal 1h, jobs 30s, health 2m, notifications 1m, short-lived 30s, network scan 6h, digest 24h) | ✅ | ✅ | Alert on scheduler loop failures |
|
||||
| | Background Scheduler | 12 loops (8 always-on: renewal 1h, jobs 30s, job retry 5m I-001, job timeout 10m I-003, health 2m, notifications 1m, notif retry 2m I-005, short-lived 30s; 4 opt-in: network scan 6h, digest 24h, endpoint health 60s M48, cloud discovery 6h M50) | ✅ | ✅ | Alert on scheduler loop failures |
|
||||
| **CC7.2** Anomaly Detection | Immutable API Audit Trail | `internal/api/middleware/audit.go`, `GET /api/v1/audit` | ✅ | Enhanced (SIEM export) | Integrate into SIEM, search for anomalies, archive long-term |
|
||||
| | Expiration Threshold Alerting | Configurable per-policy (default 30/14/7/0 days) | ✅ | ✅ | Configure thresholds, integrate notifications |
|
||||
| | Status Auto-Transitions | Active → Expiring (30d) → Expired (0d) | ✅ | ✅ | Monitor status changes in audit trail |
|
||||
| | Notification Routing | Email, Slack, Teams, PagerDuty, OpsGenie | ✅ | ✅ | Configure notifiers, on-call integration |
|
||||
| | Deployment Rollback | Redeploy previous cert version via GUI | ✅ | ✅ | Audit rollback decisions |
|
||||
| **CC7.3** Incident Response | Revocation API (RFC 5280 reasons) | `POST /api/v1/certificates/{id}/revoke` | ✅ | Enhanced (bulk revocation) | Establish incident response policy |
|
||||
| | CRL Endpoint (JSON + DER) | `GET /api/v1/crl`, `GET /api/v1/crl/{issuer_id}` | ✅ | ✅ | Ensure CRL/OCSP accessible to all clients |
|
||||
| | OCSP Responder | `GET /api/v1/ocsp/{issuer_id}/{serial}` | ✅ | ✅ | Test revocation in staging |
|
||||
| | CRL Endpoint (DER, RFC 5280 §5) | `GET /.well-known/pki/crl/{issuer_id}` (unauthenticated, `application/pkix-crl`) | ✅ | ✅ | Ensure CRL/OCSP accessible to all clients without API keys |
|
||||
| | OCSP Responder (RFC 6960) | `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` (unauthenticated, `application/ocsp-response`) | ✅ | ✅ | Test revocation in staging |
|
||||
| | Revocation Notifications | Email, webhook, Slack/Teams on revocation | ✅ | ✅ | Integrate into on-call, document justification separately |
|
||||
| | Short-Lived Cert Exemption | TTL < 1h skip CRL/OCSP | ✅ | ✅ | Configure profiles appropriately |
|
||||
| **CC7.4** Risk Mitigation | Renewal Job Tracking | Job state machine (Pending → Running → Completed/Failed) | ✅ | ✅ | Monitor renewal success rate |
|
||||
|
||||
+19
-5
@@ -123,6 +123,8 @@ At no point does the private key leave the agent. This is a fundamental security
|
||||
|
||||
Agents also report **metadata** about themselves — their operating system, CPU architecture, IP address, hostname, and version — with every heartbeat. This gives ops teams fleet-wide visibility (e.g., "how many agents are running on ARM?", "which agents are still on v1.0.0?") and powers **agent groups** — dynamic device grouping where policies can be scoped to specific agent criteria like OS type, architecture, or network subnet.
|
||||
|
||||
**Retiring an agent.** When you decommission a server, the certctl record for its agent needs to be retired, not deleted. certctl uses a **soft-delete** model: `DELETE /api/v1/agents/{id}` stamps the row with a retired-at timestamp and a reason, instead of removing it. This is deliberate — an audit trail of "who owned this certificate, on which host, for which team" stays intact forever, and the downstream deployment_targets, certificates, and jobs keep valid foreign keys. Retired agents are filtered out of default list views and the dashboard's agent counter, but remain visible through a separate retired-agents view for compliance reconciliation. If the agent still has active deployment targets, deployed certificates, or pending jobs, retirement is blocked by default so you don't silently orphan those rows; the API responds with the exact counts so you can retire or reassign each dependency explicitly. A force-retire escape hatch (`?force=true&reason=...`) is available for true decommission scenarios — it transactionally retires the downstream targets, cancels pending jobs, and records the cascade in the audit trail with the reason you provided. Four internal sentinel agents that back the network scanner and the cloud secret-manager discovery sources cannot be retired at all, even with force, because retiring them would orphan their subsystems. Once retired, an agent that still attempts to heartbeat receives `410 Gone` — the agent process reads that as "you've been retired, shut down" and exits cleanly.
|
||||
|
||||
### Deployment Targets
|
||||
|
||||
Targets are the systems where certificates actually get installed — NGINX web servers, Apache httpd servers, HAProxy load balancers, Traefik reverse proxies, Caddy servers, Envoy gateways, Postfix/Dovecot mail servers, Microsoft IIS servers, and network appliances. Each target type has a **connector** that knows how to deploy certificates to that specific system (e.g., writing files and reloading NGINX or Apache config, building a combined PEM for HAProxy).
|
||||
@@ -183,11 +185,11 @@ Profiles are managed via the API (`/api/v1/profiles`) and the GUI, and can be as
|
||||
|
||||
For policies with `auto_renew` disabled, renewal jobs enter an **AwaitingApproval** state instead of processing immediately. An operator must explicitly approve or reject the renewal via the API or GUI. Approved jobs transition to Pending and are picked up by the scheduler. Rejected jobs are cancelled with an optional reason. This is useful for high-value certificates where you want human oversight before renewal.
|
||||
|
||||
### Renewal Timing: Thresholds vs. ARI (RFC 9702)
|
||||
### Renewal Timing: Thresholds vs. ARI (RFC 9773)
|
||||
|
||||
**Traditional approach (thresholds):** By default, certctl uses static renewal thresholds — renew a certificate at a fixed number of days before expiry (default: 30 days). This simple, predictable model works for most use cases: it avoids unnecessary renewals near expiry and gives you a predictable window to catch failures.
|
||||
|
||||
**Advanced approach (ACME ARI):** Some Certificate Authorities support ACME Renewal Information (RFC 9702), which allows the CA to tell certctl the optimal time to renew. Instead of guessing "renew 30 days before expiry," the CA responds with a precise `suggestedWindow` containing start and end times. This is useful when:
|
||||
**Advanced approach (ACME ARI):** Some Certificate Authorities support ACME Renewal Information (RFC 9773), which allows the CA to tell certctl the optimal time to renew. Instead of guessing "renew 30 days before expiry," the CA responds with a precise `suggestedWindow` containing start and end times. This is useful when:
|
||||
- The CA is performing maintenance and wants to batch renewals in a specific window
|
||||
- The CA is coordinating a mass revocation (e.g., due to a compromise) and needs to control renewal timing
|
||||
- You want to avoid thundering herd renewal spikes by accepting the CA's suggested timing
|
||||
@@ -196,6 +198,16 @@ For policies with `auto_renew` disabled, renewal jobs enter an **AwaitingApprova
|
||||
|
||||
**Graceful degradation:** If your CA doesn't support ARI (returns 404 from the ARI endpoint), certctl automatically falls back to the traditional threshold-based renewal. No configuration change needed — the fallback is transparent. Errors from the CA are logged as warnings and don't block the renewal process.
|
||||
|
||||
### Shorter Certificate Validity (45-Day and 6-Day Certs)
|
||||
|
||||
The industry is moving toward shorter certificate lifetimes. The CA/Browser Forum's SC-081v3 ballot mandates a phased reduction: 200-day max (March 2026), 100-day max (March 2027), and 47-day max (March 2029). Let's Encrypt has already begun reducing default validity to 45 days, and offers 6-day "shortlived" certificates via ACME profile selection.
|
||||
|
||||
certctl handles shorter-lived certificates correctly out of the box:
|
||||
|
||||
- **45-day certs** with the default 31-day renewal window trigger renewal at day 14 — at roughly 1/3 of the cert's lifetime.
|
||||
- **6-day "shortlived" certs** are always within the renewal window. ARI (RFC 9773) is the expected renewal path for these — the CA directs timing. Short-lived certs also skip CRL/OCSP since expiry is sufficient revocation (per profile TTL < 1 hour exemption).
|
||||
- **ACME profile selection** lets you request specific certificate profiles from your CA. Set `CERTCTL_ACME_PROFILE=shortlived` to get 6-day certificates from Let's Encrypt, or `CERTCTL_ACME_PROFILE=tlsserver` for standard TLS certificates.
|
||||
|
||||
### Certificate Revocation
|
||||
|
||||
When a private key is compromised, a certificate is superseded, or a service is decommissioned, you need to revoke the certificate immediately — not wait for it to expire. Revocation tells clients "stop trusting this certificate right now."
|
||||
@@ -204,9 +216,11 @@ certctl implements revocation using three complementary mechanisms:
|
||||
|
||||
**Revocation API**: `POST /api/v1/certificates/{id}/revoke` marks a certificate as revoked in the inventory, records the revocation in a dedicated `certificate_revocations` table, notifies the issuing CA (best-effort — the revocation succeeds even if the CA is unreachable), creates an audit trail entry, and sends notifications. You can specify an RFC 5280 reason code (keyCompromise, superseded, cessationOfOperation, etc.) or let it default to "unspecified."
|
||||
|
||||
**Certificate Revocation List (CRL)**: certctl serves both a JSON-formatted CRL at `GET /api/v1/crl` and DER-encoded X.509 CRLs per issuer at `GET /api/v1/crl/{issuer_id}`. The DER CRL is signed by the issuing CA's key and has 24-hour validity — clients can download it periodically to check revocation status offline.
|
||||
**Bulk Revocation** (Fleet-Level Incident Response): For large-scale incidents like CA compromise or team infrastructure decommissioning, `POST /api/v1/certificates/bulk-revoke` revokes all certificates matching filter criteria in a single operation. Filter by profile, owner, team, agent group, or issuer to target the affected certificate set. This is essential for incident response — instead of revoking certificates one-by-one, operators can revoke an entire fleet in minutes. Bulk revocation creates individual revocation jobs that reuse the existing revocation pipeline, ensuring every certificate is audited and notifications are sent.
|
||||
|
||||
**OCSP Responder**: For real-time revocation checking, certctl includes an embedded OCSP responder at `GET /api/v1/ocsp/{issuer_id}/{serial}`. It returns signed OCSP responses (good, revoked, or unknown) so clients can verify certificate status without downloading the full CRL.
|
||||
**Certificate Revocation List (CRL)**: certctl serves DER-encoded X.509 CRLs per issuer at `GET /.well-known/pki/crl/{issuer_id}` (RFC 5280 §5 wire format, RFC 8615 well-known namespace). The endpoint is unauthenticated so any relying party — browser, TLS client, hardware appliance — can fetch it without a certctl API key. The CRL is signed by the issuing CA's key and has 24-hour validity; clients can download it periodically to check revocation status offline. The response carries `Content-Type: application/pkix-crl`.
|
||||
|
||||
**OCSP Responder**: For real-time revocation checking, certctl includes an embedded OCSP responder at `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` (RFC 6960). Like the CRL endpoint, it is unauthenticated and returns signed OCSP responses (good, revoked, or unknown) with `Content-Type: application/ocsp-response`, so clients can verify certificate status without downloading the full CRL.
|
||||
|
||||
Short-lived certificates (those assigned to profiles with TTL under 1 hour) are exempt from CRL and OCSP — their rapid expiry is considered sufficient revocation. This is a deliberate design choice to reduce infrastructure overhead for ephemeral machine-to-machine credentials.
|
||||
|
||||
@@ -242,7 +256,7 @@ The CLI supports both table and JSON output formats (`--format table` or `--form
|
||||
|
||||
### MCP Server (AI Integration)
|
||||
|
||||
certctl includes an MCP (Model Context Protocol) server that exposes 78 MCP tools covering the REST API. This enables AI assistants like Claude, Cursor, and other MCP-compatible tools to interact with your certificate infrastructure using natural language — "show me all expiring certificates," "revoke the VPN cert," or "what agents are offline?"
|
||||
certctl includes an MCP (Model Context Protocol) server that exposes the entire REST API as MCP tools. This enables AI assistants like Claude, Cursor, and other MCP-compatible tools to interact with your certificate infrastructure using natural language — "show me all expiring certificates," "revoke the VPN cert," or "what agents are offline?"
|
||||
|
||||
The MCP server is a separate binary (`cmd/mcp-server/`) that communicates via stdio transport and acts as a stateless HTTP proxy to the certctl REST API. It requires no additional infrastructure — just point it at your certctl server URL and API key.
|
||||
|
||||
|
||||
+365
-26
@@ -11,9 +11,13 @@ Connectors extend certctl to integrate with external systems for certificate iss
|
||||
- [Built-in: ACME v2 (Let's Encrypt, Sectigo, ZeroSSL)](#built-in-acme-v2-lets-encrypt-sectigo-zerossl)
|
||||
- [Built-in: step-ca (Smallstep Private CA)](#built-in-step-ca-smallstep-private-ca)
|
||||
- [OpenSSL / Custom CA](#openssl--custom-ca)
|
||||
- [Built-in: Vault PKI](#built-in-vault-pki)
|
||||
- [Built-in: DigiCert CertCentral](#built-in-digicert-certcentral)
|
||||
- [Built-in: Sectigo SCM](#built-in-sectigo-scm)
|
||||
- [Built-in: Google CAS](#built-in-google-cas)
|
||||
- [Built-in: AWS ACM Private CA](#built-in-aws-acm-private-ca)
|
||||
- [Revocation Across Issuers](#revocation-across-issuers)
|
||||
- [EST Integration (GetCACertPEM)](#est-integration-getcacertpem)
|
||||
- [Planned Issuers](#planned-issuers)
|
||||
- [Building a Custom Issuer](#building-a-custom-issuer)
|
||||
3. [Target Connector](#target-connector)
|
||||
- [Interface](#interface-1)
|
||||
@@ -24,8 +28,12 @@ Connectors extend certctl to integrate with external systems for certificate iss
|
||||
- [Built-in: Envoy](#built-in-envoy)
|
||||
- [Built-in: Postfix / Dovecot](#built-in-postfix--dovecot)
|
||||
- [Built-in: Caddy](#built-in-caddy)
|
||||
- [F5 BIG-IP (Interface Only)](#f5-big-ip-interface-only)
|
||||
- [F5 BIG-IP (Implemented)](#f5-big-ip-implemented)
|
||||
- [IIS (Implemented, Dual-Mode)](#iis-implemented-dual-mode)
|
||||
- [SSH (Agentless Deployment)](#ssh-agentless-deployment)
|
||||
- [Windows Certificate Store](#windows-certificate-store)
|
||||
- [Java Keystore (JKS / PKCS#12)](#java-keystore-jks--pkcs12)
|
||||
- [Kubernetes Secrets](#kubernetes-secrets)
|
||||
4. [Notifier Connector](#notifier-connector)
|
||||
- [Interface](#interface-2)
|
||||
5. [Registering a Connector](#registering-a-connector)
|
||||
@@ -53,8 +61,8 @@ Connectors extend certctl to integrate with external systems for certificate iss
|
||||
|
||||
Three types of connectors:
|
||||
|
||||
1. **Issuer Connector** — Obtains certificates from CAs (Local CA with sub-CA support, ACME with HTTP-01 + DNS-01 + DNS-PERSIST-01, step-ca, OpenSSL/Custom CA, Vault PKI, DigiCert implemented; additional CA integrations planned)
|
||||
2. **Target Connector** — Deploys certificates to infrastructure (NGINX, Apache httpd, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS implemented; F5 via proxy agent planned; additional cloud and network targets planned)
|
||||
1. **Issuer Connector** — Obtains certificates from CAs. 9 built-in: Local CA (self-signed + sub-CA), ACME v2 (HTTP-01, DNS-01, DNS-PERSIST-01, ARI, EAB, profile selection), step-ca, OpenSSL/Custom CA, Vault PKI, DigiCert CertCentral, Sectigo SCM, Google CAS, AWS ACM Private CA
|
||||
2. **Target Connector** — Deploys certificates to infrastructure. 14 built-in: NGINX, Apache httpd, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS (local + WinRM), F5 BIG-IP (proxy agent), SSH (agentless), Windows Certificate Store, Java Keystore, Kubernetes Secrets
|
||||
3. **Notifier Connector** — Sends alerts about certificate events (Email, Webhooks, Slack, Microsoft Teams, PagerDuty, OpsGenie implemented)
|
||||
|
||||
All connectors accept JSON configuration at initialization, support config validation, and are registered in the service layer. Issuer connectors run on the control plane; target connectors run on agents. For network appliances where agents can't be installed, a **proxy agent** in the same network zone handles deployment — the server never initiates outbound connections.
|
||||
@@ -147,10 +155,12 @@ The Local CA issuer signs certificates using Go's `crypto/x509` library. It supp
|
||||
|
||||
**Sub-CA mode:** Loads a CA certificate and private key from disk (`CERTCTL_CA_CERT_PATH` + `CERTCTL_CA_KEY_PATH`). The CA cert is signed by an upstream CA (e.g., ADCS), so all issued certificates chain to the enterprise root trust hierarchy. Clients that already trust the enterprise root automatically trust certctl-issued certs. Supports RSA, ECDSA, and PKCS#8 key formats. If the paths are not set, falls back to self-signed mode. The loaded certificate must have `IsCA=true` and `KeyUsageCertSign`.
|
||||
|
||||
**CRL and OCSP support (M15b):** The Local CA supports DER-encoded X.509 CRL generation via `GET /api/v1/crl/{issuer_id}` with 24-hour validity. An embedded OCSP responder at `GET /api/v1/ocsp/{issuer_id}/{serial}` returns signed OCSP responses for issued certificates (good/revoked/unknown status). Certificates with profile TTL < 1 hour automatically skip CRL/OCSP — expiry is treated as sufficient revocation for short-lived credentials.
|
||||
**CRL and OCSP support (M15b):** The Local CA supports DER-encoded X.509 CRL generation served unauthenticated at `GET /.well-known/pki/crl/{issuer_id}` (RFC 5280 §5, RFC 8615, `Content-Type: application/pkix-crl`) with 24-hour validity. An embedded OCSP responder at `GET /.well-known/pki/ocsp/{issuer_id}/{serial}` (RFC 6960, `Content-Type: application/ocsp-response`) returns signed OCSP responses for issued certificates (good/revoked/unknown status). Both endpoints are reachable by relying parties with no certctl API credentials, which is how standard TLS clients, browsers, and hardware appliances consume these resources. Certificates with profile TTL < 1 hour automatically skip CRL/OCSP — expiry is treated as sufficient revocation for short-lived credentials.
|
||||
|
||||
**Extended Key Usage (EKU) support (M27):** The Local CA respects EKU constraints from certificate profiles and adjusts key usage flags accordingly. For S/MIME certificates (emailProtection EKU), it uses `DigitalSignature | ContentCommitment` instead of the TLS default. For TLS certificates (serverAuth/clientAuth EKU), it uses `DigitalSignature | KeyEncipherment`. This enables support for multiple certificate types — TLS, S/MIME, code signing, timestamping — from a single CA.
|
||||
|
||||
**MaxTTL enforcement (M11c):** When a certificate profile defines a maximum TTL, the Local CA caps the `NotAfter` field to `min(validity_days, maxTTL)`. This ensures certificates never exceed the profile's configured lifetime regardless of the issuer's `validity_days` setting.
|
||||
|
||||
Configuration:
|
||||
```json
|
||||
{
|
||||
@@ -173,7 +183,7 @@ The ACME connector implements the full ACME v2 protocol using Go's `golang.org/x
|
||||
|
||||
**DNS-PERSIST-01 (standing record):** Creates a one-time persistent TXT record at `_validation-persist.<domain>` containing the CA's issuer domain and your ACME account URI. Once set, this record authorizes unlimited future certificate issuances without per-renewal DNS updates. Based on [draft-ietf-acme-dns-persist](https://datatracker.ietf.org/doc/draft-ietf-acme-dns-persist/) and CA/Browser Forum ballot SC-088v3. If the CA doesn't offer dns-persist-01 yet, the connector falls back to dns-01 automatically.
|
||||
|
||||
**ACME Renewal Information (ARI, RFC 9702):** Instead of using fixed renewal thresholds (e.g., renew 30 days before expiry), certctl can ask the CA when it should renew. Enable with `CERTCTL_ACME_ARI_ENABLED=true`. The ARI protocol lets the CA specify a `suggestedWindow` (start and end times) for when you should renew — useful for distributing load during maintenance windows or coordinating mass revocation scenarios. Cert ID is computed as `base64url(SHA-256(DER cert))`. If the CA doesn't support ARI (404 response), certctl automatically falls back to threshold-based renewal with no operator intervention required.
|
||||
**ACME Renewal Information (ARI, RFC 9773):** Instead of using fixed renewal thresholds (e.g., renew 30 days before expiry), certctl can ask the CA when it should renew. Enable with `CERTCTL_ACME_ARI_ENABLED=true`. The ARI protocol lets the CA specify a `suggestedWindow` (start and end times) for when you should renew — useful for distributing load during maintenance windows or coordinating mass revocation scenarios. Cert ID is computed as `base64url(SHA-256(DER cert))`. If the CA doesn't support ARI (404 response), certctl automatically falls back to threshold-based renewal with no operator intervention required.
|
||||
|
||||
HTTP-01 configuration:
|
||||
```json
|
||||
@@ -243,6 +253,9 @@ Environment variables for the default ACME connector:
|
||||
- `CERTCTL_ACME_DNS_PRESENT_SCRIPT` — Path to DNS record creation script (dns-01 and dns-persist-01)
|
||||
- `CERTCTL_ACME_DNS_CLEANUP_SCRIPT` — Path to DNS record cleanup script (dns-01 only, not used by dns-persist-01)
|
||||
- `CERTCTL_ACME_DNS_PERSIST_ISSUER_DOMAIN` — CA issuer domain for persistent record (dns-persist-01 only, e.g., `letsencrypt.org`)
|
||||
- `CERTCTL_ACME_PROFILE` — Certificate profile for the newOrder request. Let's Encrypt supports `tlsserver` (standard TLS, default) and `shortlived` (6-day certs). Leave empty for the CA's default profile.
|
||||
|
||||
**Certificate Profiles:** Let's Encrypt (GA January 2026) supports ACME certificate profile selection. Set `CERTCTL_ACME_PROFILE=shortlived` to request 6-day certificates — ideal for ephemeral workloads where short validity substitutes for revocation. The `tlsserver` profile produces standard TLS certificates. When the profile field is empty (default), the CA uses its default profile, maintaining full backward compatibility.
|
||||
|
||||
The connector is registered in the issuer registry under `iss-acme-staging` and `iss-acme-prod`. Use `iss-acme-staging` for Let's Encrypt staging (rate-limit-friendly testing) and `iss-acme-prod` for production certificates.
|
||||
|
||||
@@ -274,7 +287,9 @@ Environment variables:
|
||||
|
||||
The connector is registered in the issuer registry under `iss-stepca`. step-ca also works with the existing ACME connector (point `iss-acme-*` at step-ca's ACME directory URL for ACME-based issuance).
|
||||
|
||||
**Note:** step-ca-issued certificates rely on step-ca's own CRL/OCSP infrastructure. certctl's local CRL/OCSP endpoints (`GET /api/v1/crl/{issuer_id}` and `GET /api/v1/ocsp/{issuer_id}/{serial}`) are populated from step-ca's revocation data if available, but clients should validate against step-ca's endpoints for the authoritative status.
|
||||
**Note:** step-ca-issued certificates rely on step-ca's own CRL/OCSP infrastructure. certctl's local CRL/OCSP endpoints (`GET /.well-known/pki/crl/{issuer_id}` and `GET /.well-known/pki/ocsp/{issuer_id}/{serial}`, served unauthenticated per RFC 5280 §5 / RFC 6960 / RFC 8615) are populated from step-ca's revocation data if available, but clients should validate against step-ca's endpoints for the authoritative status.
|
||||
|
||||
**MaxTTL enforcement (M11c):** When a certificate profile defines a maximum TTL, the step-ca connector caps the `NotAfter` field to ensure the issued certificate does not exceed the profile limit, regardless of the step-ca provisioner's own maximum.
|
||||
|
||||
Location: `internal/connector/issuer/stepca/stepca.go`
|
||||
|
||||
@@ -303,16 +318,16 @@ Each issuer handles revocation differently:
|
||||
- **step-ca**: Calls step-ca's `/revoke` API endpoint. Clients should check step-ca's own CRL/OCSP for authoritative status.
|
||||
- **OpenSSL/Custom CA**: Invokes the configured revoke script (`CERTCTL_OPENSSL_REVOKE_SCRIPT`) with the serial number as an argument.
|
||||
|
||||
### EST Integration (GetCACertPEM)
|
||||
### EST/SCEP Integration (GetCACertPEM)
|
||||
|
||||
The `GetCACertPEM()` method returns the PEM-encoded CA certificate chain, used by the EST server's `/.well-known/est/cacerts` endpoint (RFC 7030) to distribute the CA chain to enrolling devices. Each issuer handles this differently:
|
||||
The `GetCACertPEM()` method returns the PEM-encoded CA certificate chain, used by both the EST server's `/.well-known/est/cacerts` endpoint (RFC 7030) and the SCEP server's `GetCACert` operation (RFC 8894) to distribute the CA chain to enrolling devices. Each issuer handles this differently:
|
||||
|
||||
- **Local CA**: Returns the CA certificate PEM (self-signed or sub-CA cert). This is the primary EST issuer.
|
||||
- **Local CA**: Returns the CA certificate PEM (self-signed or sub-CA cert). This is the primary EST/SCEP issuer.
|
||||
- **ACME**: Returns error — ACME CAs provide chains per-issuance, not statically.
|
||||
- **step-ca**: Returns error — step-ca serves its own `/root` endpoint for CA distribution.
|
||||
- **OpenSSL/Custom CA**: Returns error — custom script-based CAs have no CA cert access through certctl.
|
||||
|
||||
Note: EST (Enrollment over Secure Transport) is not a connector — it's a protocol handler (`internal/api/handler/est.go`) that delegates certificate issuance to whichever issuer connector is configured via `CERTCTL_EST_ISSUER_ID`. See the [Architecture Guide](architecture.md#est-server-rfc-7030) for details.
|
||||
Note: EST and SCEP are not connectors — they are protocol handlers (`internal/api/handler/est.go` and `internal/api/handler/scep.go`) that delegate certificate issuance to whichever issuer connector is configured via `CERTCTL_EST_ISSUER_ID` or `CERTCTL_SCEP_ISSUER_ID`. Both share a common `internal/pkcs7` package for PKCS#7 response encoding. See the [Architecture Guide](architecture.md#est-server-rfc-7030) for details.
|
||||
|
||||
### Built-in: Vault PKI
|
||||
|
||||
@@ -332,6 +347,8 @@ The connector is registered in the issuer registry under `iss-vault`. Vault issu
|
||||
|
||||
**Note:** CRL and OCSP are managed by Vault itself. Clients should validate certificate status against Vault's own CRL/OCSP endpoints (`GET /v1/{mount}/crl` and Vault's OCSP responder). certctl does not generate local CRL/OCSP for Vault-issued certificates. Revocation is recorded locally but Vault is the authoritative source.
|
||||
|
||||
**MaxTTL enforcement (M11c):** When a certificate profile defines a maximum TTL, the Vault connector overrides the TTL string in the signing request to ensure the issued certificate does not exceed the profile limit. This is applied before Vault's own role-level max TTL.
|
||||
|
||||
Location: `internal/connector/issuer/vault/vault.go`
|
||||
|
||||
### Built-in: DigiCert CertCentral
|
||||
@@ -379,19 +396,119 @@ The connector submits certificate enrollments to Sectigo's `/ssl/v1/enroll` API.
|
||||
|
||||
Location: `internal/connector/issuer/sectigo/sectigo.go`
|
||||
|
||||
### Coming in V2.2+
|
||||
### Built-in: Google CAS
|
||||
|
||||
The following issuer connectors are planned for future releases:
|
||||
Google Cloud Certificate Authority Service — managed private CA on GCP. Synchronous issuance via CAS REST API with OAuth2 service account auth.
|
||||
|
||||
- **Entrust** — Enterprise CA via Entrust API
|
||||
- **Google CAS** — Google Cloud Certificate Authority Service
|
||||
- **AWS ACM Private CA** — AWS-managed private CA
|
||||
| Setting | Required | Default | Description |
|
||||
|---------|----------|---------|-------------|
|
||||
| `CERTCTL_GOOGLE_CAS_PROJECT` | Yes | — | GCP project ID |
|
||||
| `CERTCTL_GOOGLE_CAS_LOCATION` | Yes | — | GCP region (e.g., `us-central1`) |
|
||||
| `CERTCTL_GOOGLE_CAS_CA_POOL` | Yes | — | CA pool name |
|
||||
| `CERTCTL_GOOGLE_CAS_CREDENTIALS` | Yes | — | Path to service account JSON |
|
||||
| `CERTCTL_GOOGLE_CAS_TTL` | No | `8760h` | Default certificate TTL |
|
||||
|
||||
Note: ADCS (Active Directory Certificate Services) integration is handled via the **sub-CA mode** of the Local CA issuer, not as a separate connector. certctl operates as a subordinate CA with its signing certificate issued by ADCS, so all certctl-issued certs chain to the enterprise ADCS root. See the Local CA section above.
|
||||
**Authentication:** OAuth2 service account. The connector reads a service account JSON file, signs a JWT with the private key, and exchanges it for an access token at Google's token endpoint. Tokens are cached and refreshed automatically (5 min before expiry).
|
||||
|
||||
**Note:** CRL and OCSP are managed by Google CAS directly. certctl records revocations locally and notifies Google CAS via the revoke endpoint.
|
||||
|
||||
Location: `internal/connector/issuer/googlecas/googlecas.go`
|
||||
|
||||
### Built-in: AWS ACM Private CA
|
||||
|
||||
AWS Certificate Manager Private Certificate Authority — managed private CA on AWS. Synchronous issuance via ACM PCA API with standard AWS credential chain (env vars, IAM roles, instance profiles, SSO).
|
||||
|
||||
| Setting | Required | Default | Description |
|
||||
|---------|----------|---------|-------------|
|
||||
| `CERTCTL_AWS_PCA_REGION` | Yes | — | AWS region (e.g., `us-east-1`) |
|
||||
| `CERTCTL_AWS_PCA_CA_ARN` | Yes | — | ARN of the ACM Private CA |
|
||||
| `CERTCTL_AWS_PCA_SIGNING_ALGORITHM` | No | `SHA256WITHRSA` | Signing algorithm |
|
||||
| `CERTCTL_AWS_PCA_VALIDITY_DAYS` | No | `365` | Certificate validity in days |
|
||||
| `CERTCTL_AWS_PCA_TEMPLATE_ARN` | No | — | Optional certificate template ARN |
|
||||
|
||||
**Supported signing algorithms:** SHA256WITHRSA, SHA384WITHRSA, SHA512WITHRSA, SHA256WITHECDSA, SHA384WITHECDSA, SHA512WITHECDSA.
|
||||
|
||||
**Authentication:** Standard AWS credential chain. The connector uses `aws-sdk-go-v2/config.LoadDefaultConfig()` which supports environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`), IAM roles (EC2/ECS), instance profiles, and SSO credentials.
|
||||
|
||||
**Note:** CRL and OCSP are managed by AWS ACM PCA directly. certctl records revocations locally and notifies AWS via the RevokeCertificate API with RFC 5280 reason mapping.
|
||||
|
||||
Location: `internal/connector/issuer/awsacmpca/awsacmpca.go`
|
||||
|
||||
### Built-in: Entrust Certificate Services
|
||||
|
||||
Entrust CA Gateway REST API with mutual TLS (mTLS) client certificate authentication. Supports synchronous issuance (200 OK with PEM) and approval-pending flows (201 Accepted with async polling).
|
||||
|
||||
| Setting | Required | Default | Description |
|
||||
|---------|----------|---------|-------------|
|
||||
| `CERTCTL_ENTRUST_API_URL` | Yes | — | Entrust CA Gateway base URL |
|
||||
| `CERTCTL_ENTRUST_CLIENT_CERT_PATH` | Yes | — | Path to mTLS client certificate PEM |
|
||||
| `CERTCTL_ENTRUST_CLIENT_KEY_PATH` | Yes | — | Path to mTLS client private key PEM |
|
||||
| `CERTCTL_ENTRUST_CA_ID` | Yes | — | Certificate Authority ID (from `GET /certificate-authorities`) |
|
||||
| `CERTCTL_ENTRUST_PROFILE_ID` | No | — | Optional enrollment profile ID |
|
||||
|
||||
**Authentication:** Mutual TLS — the client certificate and key are loaded via `tls.LoadX509KeyPair()` and attached to the HTTP transport. No API key or token required.
|
||||
|
||||
**Issuance model:** Enrollment via `POST /v1/certificate-authorities/{caId}/enrollments`. Returns 200 with PEM immediately for auto-approved enrollments, or 201 Accepted with a tracking ID for approval-pending orders. `GetOrderStatus` polls the enrollment endpoint.
|
||||
|
||||
**Note:** CRL and OCSP are managed by Entrust. certctl records revocations locally and notifies Entrust via `PUT /v1/certificate-authorities/{caId}/certificates/{serial}/revoke`.
|
||||
|
||||
Location: `internal/connector/issuer/entrust/entrust.go`
|
||||
|
||||
### Built-in: GlobalSign Atlas HVCA
|
||||
|
||||
GlobalSign Atlas High Volume CA REST API with dual authentication: mTLS for the TLS handshake and API key/secret headers for request authorization. Region-aware base URLs (EMEA, APAC, Americas).
|
||||
|
||||
| Setting | Required | Default | Description |
|
||||
|---------|----------|---------|-------------|
|
||||
| `CERTCTL_GLOBALSIGN_API_URL` | Yes | — | Atlas HVCA API URL (region-specific) |
|
||||
| `CERTCTL_GLOBALSIGN_API_KEY` | Yes | — | API key for request authentication |
|
||||
| `CERTCTL_GLOBALSIGN_API_SECRET` | Yes | — | API secret for request authentication |
|
||||
| `CERTCTL_GLOBALSIGN_CLIENT_CERT_PATH` | Yes | — | Path to mTLS client certificate PEM |
|
||||
| `CERTCTL_GLOBALSIGN_CLIENT_KEY_PATH` | Yes | — | Path to mTLS client private key PEM |
|
||||
| `CERTCTL_GLOBALSIGN_SERVER_CA_PATH` | No | system trust store | PEM bundle used to verify the Atlas API server certificate. Set this for private/lab Atlas deployments whose server TLS chain is not in the host's default trust bundle. |
|
||||
|
||||
**Authentication:** Dual — mTLS client certificate for TLS handshake plus `X-API-Key` and `X-API-Secret` headers on every request.
|
||||
|
||||
**TLS verification:** The connector always verifies the server certificate. When `server_ca_path` is set, the PEM bundle at that path is used as the trust anchor; otherwise the host's system trust store is used. TLS 1.2 is the minimum protocol version.
|
||||
|
||||
**Issuance model:** `POST /v2/certificates` returns a serial number. Certificate PEM is available after validation completes. Typically resolves within seconds for DV. `GetOrderStatus` polls the certificate endpoint.
|
||||
|
||||
**Note:** CRL and OCSP are managed by GlobalSign. certctl records revocations locally and notifies GlobalSign via `PUT /v2/certificates/{serial}/revoke`.
|
||||
|
||||
Location: `internal/connector/issuer/globalsign/globalsign.go`
|
||||
|
||||
### Built-in: EJBCA (Keyfactor)
|
||||
|
||||
EJBCA REST API for self-hosted open-source and enterprise CAs. Supports dual authentication: mTLS (default) or OAuth2 Bearer token, selectable via configuration.
|
||||
|
||||
| Setting | Required | Default | Description |
|
||||
|---------|----------|---------|-------------|
|
||||
| `CERTCTL_EJBCA_API_URL` | Yes | — | EJBCA REST API base URL |
|
||||
| `CERTCTL_EJBCA_AUTH_MODE` | No | `mtls` | Auth mode: `mtls` or `oauth2` |
|
||||
| `CERTCTL_EJBCA_CLIENT_CERT_PATH` | mTLS | — | Path to client certificate PEM (mTLS mode) |
|
||||
| `CERTCTL_EJBCA_CLIENT_KEY_PATH` | mTLS | — | Path to client key PEM (mTLS mode) |
|
||||
| `CERTCTL_EJBCA_TOKEN` | OAuth2 | — | Bearer token (oauth2 mode) |
|
||||
| `CERTCTL_EJBCA_CA_NAME` | Yes | — | EJBCA CA name |
|
||||
| `CERTCTL_EJBCA_CERT_PROFILE` | No | — | EJBCA certificate profile |
|
||||
| `CERTCTL_EJBCA_EE_PROFILE` | No | — | EJBCA end-entity profile |
|
||||
|
||||
**Authentication:** Configurable via `auth_mode`. In mTLS mode, client certificate and key are loaded for the TLS handshake. In OAuth2 mode, the token is sent as `Authorization: Bearer {token}`.
|
||||
|
||||
**Issuance model:** `POST /v1/certificate/pkcs10enroll` with base64-encoded CSR. Returns base64-encoded certificate PEM. EJBCA 9.3+ creates end-entity and issues cert in a single call. Approval-pending enrollments return 201.
|
||||
|
||||
**Revocation note:** EJBCA requires both issuer DN and serial number for revocation. The connector stores these as a composite `OrderID` in `issuer_dn::serial` format.
|
||||
|
||||
**Note:** CRL and OCSP are managed by the EJBCA instance. certctl records revocations locally and notifies EJBCA via `PUT /v1/certificate/{issuer_dn}/{serial}/revoke`.
|
||||
|
||||
Location: `internal/connector/issuer/ejbca/ejbca.go`
|
||||
|
||||
### ADCS Integration
|
||||
|
||||
Active Directory Certificate Services integration is handled via the **sub-CA mode** of the Local CA issuer, not as a separate connector. certctl operates as a subordinate CA with its signing certificate issued by ADCS, so all certctl-issued certs chain to the enterprise ADCS root. See the Local CA section above.
|
||||
|
||||
### Building a Custom Issuer
|
||||
|
||||
Here's the structure for a HashiCorp Vault PKI issuer:
|
||||
Here's a simplified example showing the connector pattern (using a hypothetical Vault-like CA):
|
||||
|
||||
```go
|
||||
package vault
|
||||
@@ -687,24 +804,37 @@ All commands are validated against shell injection via `validation.ValidateShell
|
||||
|
||||
Location: `internal/connector/target/postfix/postfix.go`
|
||||
|
||||
### F5 BIG-IP (Interface Only)
|
||||
### F5 BIG-IP (Implemented)
|
||||
|
||||
The F5 BIG-IP target connector interface is defined with the iControl REST flow mapped out, but the actual API calls are not yet implemented. F5 appliances can't run agents directly, so this connector uses the **proxy agent pattern**: a designated agent in the same network zone picks up F5 deployment jobs and calls the iControl REST API. The server assigns the work; the proxy agent executes it.
|
||||
The F5 BIG-IP target connector deploys certificates to F5 load balancers via the iControl REST API. F5 appliances can't run agents directly, so this connector uses the **proxy agent pattern**: a designated certctl agent in the same network zone polls for F5 deployment jobs and executes iControl REST calls on behalf of the control plane. Minimum supported BIG-IP version: 12.0+.
|
||||
|
||||
The planned flow is: authenticate via `POST /mgmt/shared/authn/login`, upload cert PEM via `POST /mgmt/tm/ltm/certificate`, update the SSL profile via `PATCH /mgmt/tm/ltm/profile/client-ssl/{profile}`, and validate deployment by checking profile status.
|
||||
The deployment flow uses F5's transaction API for atomic updates: authenticate via token auth, upload cert/key/chain PEM files, install as crypto objects, update the SSL client profile within a transaction, and commit. If the transaction fails, F5 rolls back automatically and the connector cleans up uploaded crypto objects. Updating an SSL profile automatically takes effect on all bound virtual servers — no separate virtual server binding step is needed.
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `host` | string | *(required)* | F5 BIG-IP management hostname or IP |
|
||||
| `port` | int | `443` | iControl REST API port |
|
||||
| `username` | string | *(required)* | Administrative username |
|
||||
| `password` | string | *(required)* | Administrative password |
|
||||
| `partition` | string | `Common` | F5 partition for crypto objects and profiles |
|
||||
| `ssl_profile` | string | *(required)* | SSL client profile name to update |
|
||||
| `insecure` | bool | `true` | Skip TLS verification for management interface (self-signed certs common) |
|
||||
| `timeout` | int | `30` | HTTP timeout in seconds |
|
||||
|
||||
Configuration (defined, not yet functional):
|
||||
```json
|
||||
{
|
||||
"host": "f5.internal.example.com",
|
||||
"port": 443,
|
||||
"username": "admin",
|
||||
"password": "...",
|
||||
"partition": "Common",
|
||||
"ssl_profile": "/Common/clientssl_api"
|
||||
"ssl_profile": "clientssl_api",
|
||||
"insecure": true,
|
||||
"timeout": 30
|
||||
}
|
||||
```
|
||||
|
||||
Note: F5 credentials are stored on the proxy agent, not on the control plane server. This limits the credential blast radius to the proxy agent's network zone.
|
||||
F5 credentials are stored on the proxy agent, not on the control plane server. This limits the credential blast radius to the proxy agent's network zone. Config fields are validated against regex patterns to prevent injection.
|
||||
|
||||
Location: `internal/connector/target/f5/f5.go`
|
||||
|
||||
@@ -779,6 +909,158 @@ The IIS target connector supports two deployment modes — agent-local (recommen
|
||||
|
||||
Location: `internal/connector/target/iis/iis.go`, `internal/connector/target/iis/winrm.go`
|
||||
|
||||
### SSH (Agentless Deployment)
|
||||
|
||||
The SSH target connector enables agentless certificate deployment to any Linux/Unix server via SSH/SFTP. Instead of installing the certctl agent binary on every target, a single "proxy agent" in the same network zone deploys certificates to remote servers over SSH. This is ideal for environments where installing agents on every server is impractical.
|
||||
|
||||
**Key authentication (recommended):**
|
||||
```json
|
||||
{
|
||||
"host": "web-server.internal",
|
||||
"port": 22,
|
||||
"user": "certctl",
|
||||
"auth_method": "key",
|
||||
"private_key_path": "/home/certctl/.ssh/id_ed25519",
|
||||
"cert_path": "/etc/ssl/certs/cert.pem",
|
||||
"key_path": "/etc/ssl/private/key.pem",
|
||||
"chain_path": "/etc/ssl/certs/chain.pem",
|
||||
"reload_command": "systemctl reload nginx",
|
||||
"timeout": 30
|
||||
}
|
||||
```
|
||||
|
||||
**Password authentication:**
|
||||
```json
|
||||
{
|
||||
"host": "legacy-server.internal",
|
||||
"user": "deploy",
|
||||
"auth_method": "password",
|
||||
"password": "s3cret",
|
||||
"cert_path": "/etc/ssl/cert.pem",
|
||||
"key_path": "/etc/ssl/key.pem",
|
||||
"reload_command": "systemctl reload apache2"
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `host` | string | *(required)* | SSH hostname or IP address |
|
||||
| `port` | number | 22 | SSH port |
|
||||
| `user` | string | *(required)* | SSH username |
|
||||
| `auth_method` | string | `"key"` | `"key"` or `"password"` |
|
||||
| `private_key_path` | string | | Path to SSH private key file (key auth) |
|
||||
| `private_key` | string | | Inline SSH private key PEM (alternative to path) |
|
||||
| `password` | string | | SSH password (password auth) |
|
||||
| `passphrase` | string | | Passphrase for encrypted private keys |
|
||||
| `cert_path` | string | *(required)* | Remote path for certificate file |
|
||||
| `key_path` | string | *(required)* | Remote path for private key file |
|
||||
| `chain_path` | string | | Remote path for chain file (if empty, chain appended to cert) |
|
||||
| `cert_mode` | string | `"0644"` | File permissions for cert (octal) |
|
||||
| `key_mode` | string | `"0600"` | File permissions for private key (octal) |
|
||||
| `reload_command` | string | | Command to execute after deployment |
|
||||
| `timeout` | number | 30 | SSH connection timeout in seconds |
|
||||
|
||||
**Security:**
|
||||
- Key-based authentication is recommended over password authentication
|
||||
- Reload commands are validated against shell injection (same validation as Postfix/Dovecot connectors)
|
||||
- Host field is regex-validated to prevent shell metacharacters
|
||||
- Private keys are written with 0600 permissions by default
|
||||
- Host key verification is intentionally skipped (same rationale as network scanner and F5 connector — deploying to known, operator-configured infrastructure)
|
||||
- Encrypted private keys supported via passphrase
|
||||
|
||||
Location: `internal/connector/target/ssh/ssh.go`
|
||||
|
||||
### Windows Certificate Store
|
||||
|
||||
The Windows Certificate Store connector imports certificates into the Windows cert store via PowerShell, without managing IIS site bindings. Use this for non-IIS Windows services that read certificates from the cert store (Exchange, RDP, SQL Server, ADFS, etc.). Same injectable `PowerShellExecutor` pattern as the IIS connector, with optional WinRM proxy mode.
|
||||
|
||||
```json
|
||||
{
|
||||
"store_name": "My",
|
||||
"store_location": "LocalMachine",
|
||||
"friendly_name": "Production API Cert",
|
||||
"remove_expired": true
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `store_name` | string | `"My"` | Windows cert store name (My, Root, WebHosting, etc.) |
|
||||
| `store_location` | string | `"LocalMachine"` | `"LocalMachine"` or `"CurrentUser"` |
|
||||
| `friendly_name` | string | | Optional friendly name for the imported certificate |
|
||||
| `remove_expired` | boolean | `false` | Remove expired certs with same CN after import |
|
||||
| `mode` | string | `"local"` | `"local"` (agent-local) or `"winrm"` (remote) |
|
||||
| `winrm_host` | string | | WinRM hostname (required for winrm mode) |
|
||||
| `winrm_port` | number | 5985 | WinRM port (5985 HTTP, 5986 HTTPS) |
|
||||
| `winrm_username` | string | | WinRM username (required for winrm mode) |
|
||||
| `winrm_password` | string | | WinRM password (required for winrm mode) |
|
||||
| `winrm_https` | boolean | `false` | Use HTTPS for WinRM |
|
||||
| `winrm_insecure` | boolean | `false` | Skip TLS verification for WinRM |
|
||||
|
||||
Location: `internal/connector/target/wincertstore/wincertstore.go`
|
||||
|
||||
### Java Keystore (JKS / PKCS#12)
|
||||
|
||||
The Java Keystore connector deploys certificates to JKS or PKCS#12 keystores via the `keytool` CLI. This enables TLS cert deployment for Tomcat, Jetty, Kafka, Elasticsearch, and any JVM-based service. Flow: PEM to temp PKCS#12, then `keytool -importkeystore` into the target keystore.
|
||||
|
||||
```json
|
||||
{
|
||||
"keystore_path": "/opt/tomcat/conf/keystore.p12",
|
||||
"keystore_password": "changeit",
|
||||
"keystore_type": "PKCS12",
|
||||
"alias": "server",
|
||||
"reload_command": "systemctl restart tomcat"
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `keystore_path` | string | *(required)* | Absolute path to the keystore file |
|
||||
| `keystore_password` | string | *(required)* | Keystore password |
|
||||
| `keystore_type` | string | `"PKCS12"` | `"PKCS12"` or `"JKS"` |
|
||||
| `alias` | string | `"server"` | Key entry alias in the keystore |
|
||||
| `reload_command` | string | | Optional command to run after keystore update |
|
||||
| `create_keystore` | boolean | `true` | Create keystore if it doesn't exist |
|
||||
| `keytool_path` | string | `"keytool"` | Override keytool binary path |
|
||||
|
||||
**Security:**
|
||||
- Reload commands validated against shell injection via `validation.ValidateShellCommand()`
|
||||
- Alias validated against injection (alphanumeric, hyphens, underscores only)
|
||||
- Path traversal prevention on keystore path
|
||||
- Transient PKCS#12 temp file cleaned up after import (even on error)
|
||||
|
||||
Location: `internal/connector/target/javakeystore/javakeystore.go`
|
||||
|
||||
### Kubernetes Secrets
|
||||
|
||||
The Kubernetes Secrets connector deploys certificates as `kubernetes.io/tls` Secrets, compatible with Ingress controllers (nginx-ingress, Traefik, HAProxy), service meshes (Istio, Linkerd), and any Kubernetes workload that reads TLS Secrets.
|
||||
|
||||
```json
|
||||
{
|
||||
"namespace": "production",
|
||||
"secret_name": "api-tls",
|
||||
"labels": {"app": "api-gateway"},
|
||||
"kubeconfig_path": "/home/agent/.kube/config"
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `namespace` | string | *(required)* | Kubernetes namespace (DNS-1123, max 63 chars) |
|
||||
| `secret_name` | string | *(required)* | Secret name (DNS subdomain, max 253 chars) |
|
||||
| `labels` | object | | Additional labels to apply to the Secret |
|
||||
| `kubeconfig_path` | string | | Path to kubeconfig for out-of-cluster agents |
|
||||
|
||||
**Deployment modes:**
|
||||
- **In-cluster (default):** Agent runs as a Pod with a ServiceAccount. Authentication via auto-mounted token. Requires RBAC (`secrets.get`, `secrets.create`, `secrets.update`, `secrets.list`) — see Helm chart.
|
||||
- **Out-of-cluster:** Agent runs outside the cluster with `kubeconfig_path` pointing to a kubeconfig file. Useful for proxy agent pattern.
|
||||
|
||||
**Secret format:** Standard `kubernetes.io/tls` with `tls.crt` (cert + chain PEM) and `tls.key` (private key PEM). Managed labels (`app.kubernetes.io/managed-by: certctl`) and annotations (`certctl.io/deployed-at`, `certctl.io/certificate-id`) are applied automatically.
|
||||
|
||||
**Validation:** After deployment, the connector reads the Secret back and compares the certificate serial number to verify successful deployment.
|
||||
|
||||
Location: `internal/connector/target/k8ssecret/k8ssecret.go`
|
||||
|
||||
## Notifier Connector
|
||||
|
||||
Notifier connectors send alerts about certificate lifecycle events (expiration warnings, renewal success/failure, deployment status, policy violations).
|
||||
@@ -844,7 +1126,7 @@ The digest HTML template includes:
|
||||
- Expiring certificates table (color-coded by urgency: 7d, 14d, 30d)
|
||||
- Auto-refresh and responsive email layout
|
||||
|
||||
**Scheduler Integration:** The 7th scheduler loop runs on configurable interval (default 24 hours). It does NOT run on startup — waits for first scheduled tick. Operation timeout is 5 minutes. Each loop execution is guarded by `sync/atomic.Bool` idempotency.
|
||||
**Scheduler Integration:** The opt-in digest scheduler loop runs on configurable interval (default 24 hours). It does NOT run on startup — waits for first scheduled tick. Operation timeout is 5 minutes. Each loop execution is guarded by `sync/atomic.Bool` idempotency. See `docs/architecture.md` for the full scheduler topology (12 loops, 8 always-on + 4 opt-in).
|
||||
|
||||
Configuration:
|
||||
|
||||
@@ -1107,7 +1389,7 @@ curl -s -X DELETE http://localhost:8443/api/v1/network-scan-targets/nst-dmz
|
||||
|
||||
### Scheduler Integration
|
||||
|
||||
When `CERTCTL_NETWORK_SCAN_ENABLED=true`, the server runs a 6th scheduler loop (alongside renewal, jobs, health, notifications, and short-lived expiry). It scans all enabled targets at the configured interval (default 6h). Each target tracks `last_scan_at`, `last_scan_duration_ms`, and `last_scan_certs_found` for monitoring scan health.
|
||||
When `CERTCTL_NETWORK_SCAN_ENABLED=true`, the server runs the opt-in network scanner scheduler loop alongside the always-on loops (renewal, jobs, job retry, job timeout, agent health, notifications, notification retry, short-lived expiry). It scans all enabled targets at the configured interval (default 6h). Each target tracks `last_scan_at`, `last_scan_duration_ms`, and `last_scan_certs_found` for monitoring scan health. See `docs/architecture.md` for the full 12-loop scheduler topology.
|
||||
|
||||
### Use Cases
|
||||
|
||||
@@ -1117,6 +1399,63 @@ When `CERTCTL_NETWORK_SCAN_ENABLED=true`, the server runs a 6th scheduler loop (
|
||||
- **Migration assessment** — Scan a network range before onboarding to certctl management
|
||||
- **Expiration monitoring** — Discover soon-to-expire certs on network endpoints before they cause outages
|
||||
|
||||
## Cloud Secret Manager Discovery
|
||||
|
||||
certctl extends the existing filesystem and network discovery pipeline to cloud secret managers. Certificates stored in cloud vaults are automatically discovered, inventoried, and available for triage in the Discovery page.
|
||||
|
||||
Each cloud source runs as a pluggable `DiscoverySource` with its own sentinel agent ID. Discovered certificates flow through the same `ProcessDiscoveryReport` pipeline used by filesystem and network discovery — dedup by fingerprint, audit trail, status tracking.
|
||||
|
||||
### AWS Secrets Manager
|
||||
|
||||
Discovers certificates stored as secrets in AWS Secrets Manager. Filters by tag (`type=certificate` by default) and optional name prefix.
|
||||
|
||||
| Variable | Description | Default |
|
||||
|---|---|---|
|
||||
| `CERTCTL_CLOUD_DISCOVERY_ENABLED` | Enable cloud discovery scheduler | `false` |
|
||||
| `CERTCTL_AWS_SM_DISCOVERY_ENABLED` | Enable AWS SM source | `false` |
|
||||
| `CERTCTL_AWS_SM_REGION` | AWS region (e.g., `us-east-1`) | — |
|
||||
| `CERTCTL_AWS_SM_TAG_FILTER` | Tag key=value filter | `type=certificate` |
|
||||
| `CERTCTL_AWS_SM_NAME_PREFIX` | Secret name prefix filter | — |
|
||||
|
||||
Source path format: `aws-sm://{region}/{secret-name}`. Sentinel agent: `cloud-aws-sm`.
|
||||
|
||||
### Azure Key Vault
|
||||
|
||||
Discovers certificates from Azure Key Vault using OAuth2 client credentials authentication. No Azure SDK dependency — uses stdlib HTTP with Azure AD token exchange.
|
||||
|
||||
| Variable | Description | Default |
|
||||
|---|---|---|
|
||||
| `CERTCTL_AZURE_KV_DISCOVERY_ENABLED` | Enable Azure KV source | `false` |
|
||||
| `CERTCTL_AZURE_KV_VAULT_URL` | Vault URL (e.g., `https://myvault.vault.azure.net`) | — |
|
||||
| `CERTCTL_AZURE_KV_TENANT_ID` | Azure AD tenant ID | — |
|
||||
| `CERTCTL_AZURE_KV_CLIENT_ID` | Azure AD application (client) ID | — |
|
||||
| `CERTCTL_AZURE_KV_CLIENT_SECRET` | Azure AD application secret | — |
|
||||
|
||||
Source path format: `azure-kv://{cert-name}/{version}`. Sentinel agent: `cloud-azure-kv`.
|
||||
|
||||
### GCP Secret Manager
|
||||
|
||||
Discovers certificates stored in GCP Secret Manager. Filters by label (`type=certificate`). Uses JWT-based OAuth2 service account auth — no Google SDK dependency.
|
||||
|
||||
| Variable | Description | Default |
|
||||
|---|---|---|
|
||||
| `CERTCTL_GCP_SM_DISCOVERY_ENABLED` | Enable GCP SM source | `false` |
|
||||
| `CERTCTL_GCP_SM_PROJECT` | GCP project ID | — |
|
||||
| `CERTCTL_GCP_SM_CREDENTIALS` | Path to service account JSON file | — |
|
||||
|
||||
Source path format: `gcp-sm://{project}/{secret-name}`. Sentinel agent: `cloud-gcp-sm`.
|
||||
|
||||
### Cloud Discovery Scheduler
|
||||
|
||||
All enabled cloud sources run on a shared opt-in cloud discovery scheduler loop (see `docs/architecture.md` for the full 12-loop scheduler topology). The interval is configurable:
|
||||
|
||||
| Variable | Description | Default |
|
||||
|---|---|---|
|
||||
| `CERTCTL_CLOUD_DISCOVERY_ENABLED` | Master switch | `false` |
|
||||
| `CERTCTL_CLOUD_DISCOVERY_INTERVAL` | Scan interval | `6h` |
|
||||
|
||||
The loop runs immediately on startup and then on each tick. Each source runs sequentially within the loop. Errors from one source do not prevent other sources from running.
|
||||
|
||||
## What's Next
|
||||
|
||||
- [Architecture Guide](architecture.md) — Understanding the full system design
|
||||
|
||||
+27
-19
@@ -50,14 +50,17 @@ docker compose -f deploy/docker-compose.yml up -d --build
|
||||
docker compose -f deploy/docker-compose.yml ps
|
||||
```
|
||||
|
||||
Open **http://localhost:8443** in your browser alongside your terminal. You'll watch changes appear in the dashboard as you make API calls.
|
||||
Open **https://localhost:8443** in your browser alongside your terminal. The default compose stack ships a self-signed cert; your browser will show a warning the first time — click through (or trust `deploy/test/certs/ca.crt` in your OS keychain). You'll watch changes appear in the dashboard as you make API calls.
|
||||
|
||||
Set up a base variable for convenience:
|
||||
Set up base variables for convenience:
|
||||
|
||||
```bash
|
||||
API="http://localhost:8443"
|
||||
API="https://localhost:8443"
|
||||
CA="$PWD/deploy/test/certs/ca.crt" # pin the self-signed CA for curl
|
||||
```
|
||||
|
||||
Every `curl` in this guide uses `--cacert "$CA"` so the TLS handshake verifies against the compose-stack CA instead of the system trust store.
|
||||
|
||||
## How the pieces fit together
|
||||
|
||||
Before we start, here's the high-level flow of what we're about to do:
|
||||
@@ -724,22 +727,24 @@ curl -s -X POST $API/api/v1/certificates/mc-demo-payments/revoke \
|
||||
6. Creates an audit trail entry
|
||||
7. Sends revocation notifications via configured channels
|
||||
|
||||
Check the CRL (Certificate Revocation List):
|
||||
Check the CRL (Certificate Revocation List) — served unauthenticated under the RFC 8615 well-known namespace so relying parties without a certctl API key can still verify revocation (RFC 5280 §5):
|
||||
|
||||
```bash
|
||||
# JSON-formatted CRL
|
||||
curl -s $API/api/v1/crl | jq .
|
||||
|
||||
# DER-encoded X.509 CRL for the local CA (binary — pipe to openssl for inspection)
|
||||
curl -s $API/api/v1/crl/iss-local -o /tmp/crl.der
|
||||
# DER-encoded X.509 CRL for the local CA (binary — pipe to openssl for inspection).
|
||||
# Note: no -H "Authorization: Bearer ..." — the endpoint is deliberately
|
||||
# unauthenticated. Content-Type is application/pkix-crl.
|
||||
curl --cacert "$CA" -s https://localhost:8443/.well-known/pki/crl/iss-local -o /tmp/crl.der
|
||||
openssl crl -inform DER -in /tmp/crl.der -text -noout
|
||||
```
|
||||
|
||||
Check OCSP status:
|
||||
Check OCSP status (RFC 6960, also unauthenticated, `application/ocsp-response`):
|
||||
|
||||
```bash
|
||||
# Replace SERIAL with the actual serial number from the certificate version
|
||||
curl -s $API/api/v1/ocsp/iss-local/SERIAL | jq .
|
||||
# Replace SERIAL with the actual serial number from the certificate version.
|
||||
# The embedded OCSP responder returns a signed DER response — parse it with
|
||||
# `openssl ocsp -respin` or similar tooling.
|
||||
curl --cacert "$CA" -s https://localhost:8443/.well-known/pki/ocsp/iss-local/SERIAL -o /tmp/ocsp.der
|
||||
openssl ocsp -respin /tmp/ocsp.der -noverify -resp_text | head -40
|
||||
```
|
||||
|
||||
**Why RFC 5280 reason codes:** The reason code isn't just metadata — it tells clients *why* the certificate was revoked. A `keyCompromise` revocation means the private key was exposed and the certificate should be distrusted immediately. A `superseded` revocation means a newer certificate replaced it — less urgent. CRLs and OCSP responses include the reason code so client software can make informed trust decisions.
|
||||
@@ -944,7 +949,8 @@ certctl includes a standalone CLI tool for command-line users:
|
||||
cd cmd/cli && go build -o certctl-cli .
|
||||
|
||||
# Export credentials
|
||||
export CERTCTL_SERVER_URL="http://localhost:8443"
|
||||
export CERTCTL_SERVER_URL="https://localhost:8443"
|
||||
export CERTCTL_SERVER_CA_BUNDLE_PATH="$PWD/deploy/test/certs/ca.crt"
|
||||
export CERTCTL_API_KEY="test-key-123"
|
||||
|
||||
# List certificates (JSON or table format)
|
||||
@@ -981,14 +987,15 @@ export CERTCTL_API_KEY="test-key-123"
|
||||
|
||||
## Part 15: MCP Server for AI Integration (M18a)
|
||||
|
||||
certctl exposes 78 MCP tools covering the REST API via the Model Context Protocol (MCP), enabling seamless integration with Claude, Cursor, and other AI assistants:
|
||||
certctl exposes the full REST API via the Model Context Protocol (MCP), enabling seamless integration with Claude, Cursor, and other AI assistants:
|
||||
|
||||
```bash
|
||||
# Build the MCP server
|
||||
cd cmd/mcp-server && go build -o mcp-server .
|
||||
|
||||
# Export credentials
|
||||
export CERTCTL_SERVER_URL="http://localhost:8443"
|
||||
export CERTCTL_SERVER_URL="https://localhost:8443"
|
||||
export CERTCTL_SERVER_CA_BUNDLE_PATH="$PWD/deploy/test/certs/ca.crt"
|
||||
export CERTCTL_API_KEY="test-key-123"
|
||||
|
||||
# Start the MCP server (listens on stdin/stdout)
|
||||
@@ -1046,7 +1053,7 @@ docker compose -f deploy/docker-compose.yml run -e CERTCTL_DISCOVERY_DIRS=/tmp/c
|
||||
Or with the CLI flag:
|
||||
|
||||
```bash
|
||||
certctl-agent --agent-id a-demo-1 --key-dir /tmp/keys --discovery-dirs /tmp/certs --server http://localhost:8443 --api-key test-key-123
|
||||
certctl-agent --agent-id a-demo-1 --key-dir /tmp/keys --discovery-dirs /tmp/certs --server https://localhost:8443 --ca-bundle "$CA" --api-key test-key-123
|
||||
```
|
||||
|
||||
### Network Discovery (Server-Side)
|
||||
@@ -1153,7 +1160,7 @@ flowchart TB
|
||||
API["REST API\nGo net/http"]
|
||||
SVC["Service Layer\nBusiness Logic"]
|
||||
REPO["Repository Layer\ndatabase/sql + lib/pq"]
|
||||
SCHED["Scheduler\n7 background loops"]
|
||||
SCHED["Scheduler\n12 background loops\n(8 always-on + 4 opt-in)"]
|
||||
CONN["Connector Registry\nIssuer + Target + Notifier"]
|
||||
end
|
||||
|
||||
@@ -1189,7 +1196,8 @@ Here's a single script that runs the entire demo end-to-end. Save it as `demo.sh
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
API="http://localhost:8443"
|
||||
API="https://localhost:8443"
|
||||
CA="$PWD/deploy/test/certs/ca.crt" # pin the self-signed CA for curl
|
||||
BLUE='\033[0;34m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
@@ -1297,7 +1305,7 @@ echo " 5. Revoked the certificate with RFC 5280 reason codes"
|
||||
echo " 6. Checked dashboard stats and metrics"
|
||||
echo " 7. All actions recorded in the audit trail"
|
||||
echo ""
|
||||
echo -e "Open ${GREEN}http://localhost:8443${NC} to see everything in the dashboard."
|
||||
echo -e "Open ${GREEN}https://localhost:8443${NC} to see everything in the dashboard."
|
||||
echo "Look for certificate: $CERT_ID"
|
||||
```
|
||||
|
||||
|
||||
+1268
-1270
File diff suppressed because it is too large
Load Diff
+13
-7
@@ -29,15 +29,18 @@ The binary has zero runtime dependencies beyond the certctl server it connects t
|
||||
|
||||
## Configuration
|
||||
|
||||
The MCP server reads two environment variables:
|
||||
The MCP server reads three environment variables:
|
||||
|
||||
| Variable | Required | Default | Description |
|
||||
|----------|----------|---------|-------------|
|
||||
| `CERTCTL_SERVER_URL` | No | `http://localhost:8443` | URL of the certctl REST API |
|
||||
| `CERTCTL_SERVER_URL` | No | `https://localhost:8443` | URL of the certctl REST API (HTTPS-only as of v2.2) |
|
||||
| `CERTCTL_API_KEY` | No | (empty) | API key for authentication (passed as `Bearer` token) |
|
||||
| `CERTCTL_SERVER_CA_BUNDLE_PATH` | Yes (for self-signed / internal CA) | (empty) | Path to PEM CA bundle that signed the server cert. Required when the server cert isn't rooted in the system trust store (the default compose stack ships a self-signed cert at `deploy/test/certs/ca.crt`). |
|
||||
|
||||
If your certctl server has auth enabled (the default), you must provide the API key. The MCP server passes it through to every HTTP request.
|
||||
|
||||
Since v2.2 the certctl control plane is HTTPS-only. If the server cert is self-signed or chained to an internal CA, set `CERTCTL_SERVER_CA_BUNDLE_PATH` so the MCP server can verify the TLS handshake. Never set `CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=true` outside local development — it disables all certificate validation.
|
||||
|
||||
## Setting Up with Claude Desktop
|
||||
|
||||
Add this to your Claude Desktop MCP configuration file (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS, `%APPDATA%\Claude\claude_desktop_config.json` on Windows):
|
||||
@@ -48,7 +51,8 @@ Add this to your Claude Desktop MCP configuration file (`~/Library/Application S
|
||||
"certctl": {
|
||||
"command": "/path/to/certctl-mcp",
|
||||
"env": {
|
||||
"CERTCTL_SERVER_URL": "http://localhost:8443",
|
||||
"CERTCTL_SERVER_URL": "https://localhost:8443",
|
||||
"CERTCTL_SERVER_CA_BUNDLE_PATH": "/path/to/certctl/deploy/test/certs/ca.crt",
|
||||
"CERTCTL_API_KEY": "your-api-key-here"
|
||||
}
|
||||
}
|
||||
@@ -67,7 +71,8 @@ In Cursor, go to Settings → MCP Servers and add:
|
||||
"certctl": {
|
||||
"command": "/path/to/certctl-mcp",
|
||||
"env": {
|
||||
"CERTCTL_SERVER_URL": "http://localhost:8443",
|
||||
"CERTCTL_SERVER_URL": "https://localhost:8443",
|
||||
"CERTCTL_SERVER_CA_BUNDLE_PATH": "/path/to/certctl/deploy/test/certs/ca.crt",
|
||||
"CERTCTL_API_KEY": "your-api-key-here"
|
||||
}
|
||||
}
|
||||
@@ -84,7 +89,8 @@ Add certctl as an MCP server in your project's `.mcp.json`:
|
||||
"certctl": {
|
||||
"command": "/path/to/certctl-mcp",
|
||||
"env": {
|
||||
"CERTCTL_SERVER_URL": "http://localhost:8443",
|
||||
"CERTCTL_SERVER_URL": "https://localhost:8443",
|
||||
"CERTCTL_SERVER_CA_BUNDLE_PATH": "/path/to/certctl/deploy/test/certs/ca.crt",
|
||||
"CERTCTL_API_KEY": "your-api-key-here"
|
||||
}
|
||||
}
|
||||
@@ -94,7 +100,7 @@ Add certctl as an MCP server in your project's `.mcp.json`:
|
||||
|
||||
## Available Tools
|
||||
|
||||
The MCP server registers 78 tools organized across 16 resource domains:
|
||||
The MCP server exposes the full REST API organized across 16 resource domains:
|
||||
|
||||
| Domain | Tools | Examples |
|
||||
|--------|-------|---------|
|
||||
@@ -153,7 +159,7 @@ flowchart LR
|
||||
AI <-->|"stdio"| MCP
|
||||
MCP -->|"HTTP + Bearer token"| SERVER
|
||||
|
||||
MCP ~~~ TOOLS["78 tools · 16 domains\nTyped input structs"]
|
||||
MCP ~~~ TOOLS["REST API via MCP · 16 domains\nTyped input structs"]
|
||||
```
|
||||
|
||||
The MCP server is intentionally thin:
|
||||
|
||||
@@ -34,7 +34,7 @@ cd certctl/deploy
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Access the dashboard at `http://localhost:8443` with API key from `.env` file.
|
||||
Access the dashboard at `https://localhost:8443` with the API key from `.env`. The default compose stack ships a self-signed cert; pin with `--cacert ./deploy/test/certs/ca.crt` when calling the API from the host.
|
||||
|
||||
### 2. Deploy Agents
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ Option A: Docker Compose (quickest for evaluation)
|
||||
```bash
|
||||
cd /opt/certctl
|
||||
docker compose up -d
|
||||
# Dashboard & API: http://localhost:8443
|
||||
# Dashboard & API: https://localhost:8443 (self-signed cert — use --cacert ./deploy/test/certs/ca.crt for the default compose stack)
|
||||
# Default API key in logs (grep CERTCTL_API_KEY docker logs certctl-server)
|
||||
```
|
||||
|
||||
@@ -45,7 +45,8 @@ chmod +x /usr/local/bin/certctl-agent
|
||||
# Create config
|
||||
sudo mkdir -p /etc/certctl /var/lib/certctl/keys
|
||||
sudo tee /etc/certctl/agent.env > /dev/null <<EOF
|
||||
CERTCTL_SERVER_URL=http://certctl-control-plane.example.com:8443
|
||||
CERTCTL_SERVER_URL=https://certctl-control-plane.example.com:8443
|
||||
CERTCTL_SERVER_CA_BUNDLE_PATH=/etc/certctl/tls/ca.crt
|
||||
CERTCTL_API_KEY=your-api-key-here
|
||||
CERTCTL_DISCOVERY_DIRS=/etc/letsencrypt/live
|
||||
CERTCTL_KEY_DIR=/var/lib/certctl/keys
|
||||
|
||||
+9
-4
@@ -68,8 +68,10 @@ The spec organizes endpoints into 16 tags:
|
||||
The spec declares a `bearerAuth` security scheme applied globally. All endpoints under `/api/v1/` require a Bearer token by default:
|
||||
|
||||
```bash
|
||||
curl -H "Authorization: Bearer your-api-key" \
|
||||
http://localhost:8443/api/v1/certificates
|
||||
# The default compose stack uses a self-signed cert; pin with --cacert
|
||||
curl --cacert ./deploy/test/certs/ca.crt \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
https://localhost:8443/api/v1/certificates
|
||||
```
|
||||
|
||||
Three endpoints are exempt from auth (declared with `security: []` in the spec): `/health`, `/ready`, and `/api/v1/auth/info`. The auth info endpoint tells clients whether authentication is enabled and what type is required — useful for GUIs that need to show/hide a login screen.
|
||||
@@ -150,8 +152,9 @@ Import the spec directly into Postman:
|
||||
|
||||
1. Open Postman → Import → File → select `api/openapi.yaml`
|
||||
2. Postman creates a collection with all 78 documented operations organized by tag
|
||||
3. Set the `baseUrl` variable to `http://localhost:8443`
|
||||
3. Set the `baseUrl` variable to `https://localhost:8443` (HTTPS-only as of v2.2)
|
||||
4. Add an `Authorization: Bearer your-api-key` header to the collection
|
||||
5. Import the demo stack CA bundle (`deploy/test/certs/ca.crt`) into Postman's Settings → Certificates → CA Certificates, or disable certificate verification for the `localhost` host (Settings → General → SSL certificate verification)
|
||||
|
||||
## Key Schemas
|
||||
|
||||
@@ -176,8 +179,10 @@ Use the spec to generate contract tests that verify the API matches the spec:
|
||||
```bash
|
||||
# Using schemathesis for fuzz testing against the spec
|
||||
pip install schemathesis
|
||||
# The default compose stack uses a self-signed cert — export a CA bundle or set REQUESTS_CA_BUNDLE
|
||||
export REQUESTS_CA_BUNDLE=$(pwd)/deploy/test/certs/ca.crt
|
||||
schemathesis run api/openapi.yaml \
|
||||
--base-url http://localhost:8443 \
|
||||
--base-url https://localhost:8443 \
|
||||
--header "Authorization: Bearer your-api-key"
|
||||
```
|
||||
|
||||
|
||||
@@ -0,0 +1,297 @@
|
||||
# QA Test Suite Guide (`qa_test.go`)
|
||||
|
||||
> **Audience:** Anyone running release QA for certctl — whether you're a first-time contributor or the maintainer cutting a release tag.
|
||||
>
|
||||
> **Companion to:** `docs/testing-guide.md` (the *what* to test). This document explains the *how* — the automated test file, what it covers, what it skips, and how to fill the gaps manually.
|
||||
|
||||
---
|
||||
|
||||
## What Is This File?
|
||||
|
||||
`deploy/test/qa_test.go` is a single Go test file (~1700 lines) that automates as much of `docs/testing-guide.md` as possible against a running certctl Docker Compose demo stack. It replaces the legacy `qa-smoke-test.sh` bash script.
|
||||
|
||||
It covers **all 54 Parts** of the testing guide:
|
||||
|
||||
- **~164 automated subtests** — API calls, database queries, source file checks, performance benchmarks
|
||||
- **11 skipped Parts** — with documented reasons (external CAs, Windows, browser-only, etc.)
|
||||
- **Remaining ~282 manual tests** — GUI flows, scheduler timing, Docker log inspection — must be done by a human following `docs/testing-guide.md`
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌────────────────────────┐ ┌──────────────────────────┐
|
||||
│ qa_test.go │────▶│ certctl demo stack │
|
||||
│ (//go:build qa) │ │ docker-compose.yml + │
|
||||
│ │ │ docker-compose.demo.yml │
|
||||
│ TestQA(t *testing.T) │ │ │
|
||||
│ ├─ Part01_Infra │ │ ┌─ certctl-server :8443 │
|
||||
│ ├─ Part02_Auth │ │ ├─ postgres :5432 │
|
||||
│ ├─ Part03_CertCRUD │ │ └─ certctl-agent │
|
||||
│ ├─ ... │ └──────────────────────────┘
|
||||
│ └─ Part52_HelmChart │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
Key design choices:
|
||||
|
||||
- **Build tag:** `//go:build qa` — never runs during `go test ./...` or CI. Only runs when explicitly requested.
|
||||
- **Package:** `integration_test` — same package as `integration_test.go` (which uses `//go:build integration` for the test stack). They coexist but never run together.
|
||||
- **Zero internal imports:** Uses only stdlib + `lib/pq` (from `go.mod`). All API interactions are plain HTTP. All JSON is decoded into lightweight local structs (`qaCert`, `qaJob`, etc.) — not the internal domain types.
|
||||
- **Self-cleaning:** Tests that create data use `t.Cleanup()` to delete it afterward. The seed data is not modified.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. **Docker Compose demo stack running:**
|
||||
```bash
|
||||
cd deploy
|
||||
docker compose -f docker-compose.yml -f docker-compose.demo.yml up --build -d
|
||||
```
|
||||
Wait ~15 seconds for health checks to pass.
|
||||
|
||||
2. **Go 1.22+** installed (the project uses Go 1.25 in `go.mod`, but 1.22+ works for running tests).
|
||||
|
||||
3. **PostgreSQL port exposed** — the demo stack exposes port 5432 for database verification tests (table counts, schema checks).
|
||||
|
||||
4. **Repository checkout** — source file verification tests (`fileExists`, `fileContains`) read files relative to `qaRepoDir` (default: `../..` from `deploy/test/`).
|
||||
|
||||
## Running the Tests
|
||||
|
||||
### Full suite
|
||||
```bash
|
||||
cd deploy/test
|
||||
go test -tags qa -v -timeout 10m ./...
|
||||
```
|
||||
|
||||
### Single Part
|
||||
```bash
|
||||
go test -tags qa -v -run TestQA/Part03 ./...
|
||||
```
|
||||
|
||||
### Single subtest
|
||||
```bash
|
||||
go test -tags qa -v -run TestQA/Part03_CertCRUD/Create_Minimal ./...
|
||||
```
|
||||
|
||||
### With custom environment
|
||||
```bash
|
||||
CERTCTL_QA_SERVER_URL=https://staging.internal:8443 \
|
||||
CERTCTL_QA_API_KEY=my-staging-key \
|
||||
CERTCTL_QA_DB_URL=postgres://certctl:secret@db.internal:5432/certctl?sslmode=require \
|
||||
CERTCTL_QA_REPO_DIR=/path/to/certctl \
|
||||
go test -tags qa -v -timeout 10m ./...
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `CERTCTL_QA_SERVER_URL` | `https://localhost:8443` | certctl server URL (HTTPS-only as of v2.2) |
|
||||
| `CERTCTL_QA_API_KEY` | `change-me-in-production` | API key for Bearer auth |
|
||||
| `CERTCTL_QA_DB_URL` | `postgres://certctl:certctl@localhost:5432/certctl?sslmode=disable` | PostgreSQL connection string |
|
||||
| `CERTCTL_QA_REPO_DIR` | `../..` | Path to certctl repo root (for source file checks) |
|
||||
| `CERTCTL_QA_CA_BUNDLE` | `./certs/ca.crt` | PEM CA bundle pinned for TLS verification. The demo stack's `certctl-tls-init` container writes here. |
|
||||
| `CERTCTL_QA_INSECURE` | `false` | Set to `"true"` to skip TLS verification (e.g. before the init container finishes). Never use outside the demo harness. |
|
||||
|
||||
## Part-by-Part Coverage Map
|
||||
|
||||
This table shows what each Part tests and what's left for manual verification.
|
||||
|
||||
| Part | Testing Guide Section | Automated Subtests | What's Automated | What's Manual |
|
||||
|------|----------------------|-------------------|-----------------|--------------|
|
||||
| 1 | Infrastructure & Deployment | 8 | Table count, health/ready endpoints, seed data counts (certs, agents, issuers, targets, policies) | Docker container health, log inspection, volume mounts |
|
||||
| 2 | Authentication & Security | 4 | No-auth 401, bad-key 401, health-no-auth 200, no private keys in API | CORS preflight, rate limiting (429 + Retry-After), TLS config |
|
||||
| 3 | Certificate Lifecycle | 10 | Create (minimal + full), get, 404, list pagination, status/issuer filters, sparse fields, update, archive | Deployment trigger, version history, certificate detail UI |
|
||||
| 4 | Renewal Workflow | 3 | Trigger renewal, 404 on nonexistent, agent work endpoint | AwaitingCSR flow, agent key generation, full issuance cycle |
|
||||
| 5 | Revocation | 5 | Revoke (default reason), already-revoked, nonexistent, invalid reason, CRL JSON | DER CRL, OCSP responder, revocation notifications |
|
||||
| 6 | Policies & Profiles | 6 | Policy CRUD (create/delete), invalid type 400, profile CRUD, list | Policy violation detection, profile enforcement on CSR |
|
||||
| 7 | Ownership & Teams | 4 | Team CRUD, owner CRUD, agent groups list | Owner notification routing, dynamic group matching |
|
||||
| 8 | Job System | 2 | List jobs, 404 on nonexistent | Job state transitions, approval workflow, cancellation |
|
||||
| 9 | Issuer Connectors | 4 | List, get detail, create (GenericCA), missing name 400 | Test connection, issuer-specific issuance flow |
|
||||
| 10 | Sub-CA Mode | SKIP | — | Requires CA cert+key on disk |
|
||||
| 11 | ACME ARI | SKIP | — | Requires ARI-capable CA |
|
||||
| 12 | Vault PKI | SKIP | — | Requires live Vault server |
|
||||
| 13 | DigiCert | SKIP | — | Requires DigiCert sandbox |
|
||||
| 14 | Target Connectors | 3 | List, create NGINX target, delete 204 | Deploy to real target, validate deployment |
|
||||
| 15–17 | Apache/HAProxy, Traefik/Caddy, IIS | — | (Covered by source checks in Parts 42–46) | Requires real services or Windows |
|
||||
| 18 | Agent Operations | 3 | Heartbeat (register), metadata check, auto-create on heartbeat | Agent binary behavior, key storage, discovery scan |
|
||||
| 19 | Agent Work Routing | 1 | Empty work for agent with no targets | Scoped job assignment, multi-target fan-out |
|
||||
| 20 | Post-Deployment Verification | 1 | 404 on nonexistent job verification | TLS probing, fingerprint comparison |
|
||||
| 21 | EST Server | 2 | CACerts (200 + content-type), CSRAttrs (200/204) | simpleenroll with CSR, simplereenroll, PKCS#7 parsing |
|
||||
| 22 | Certificate Export | 3 | PEM export, PKCS#12 export, 404 on nonexistent | Download mode, file content validation |
|
||||
| 25 | Certificate Discovery | 5 | List discovered, summary, list scan targets, create target, invalid CIDR 400 | Agent filesystem scan, claim/dismiss workflow |
|
||||
| 26 | Enhanced Query API | 4 | Sort descending, cursor pagination, time-range filter, invalid sort field | Field projection correctness, cursor token cycling |
|
||||
| 27 | Request Body Size Limits | 1 | 2MB body rejected (413/400) | Exact limit boundary (1MB) |
|
||||
| 28 | CLI | SKIP | — | Requires compiled `certctl-cli` binary |
|
||||
| 29 | MCP Server | SKIP | — | Requires compiled `mcp-server` binary + stdio |
|
||||
| 30 | Observability | 7 | Dashboard summary, certs by status, expiration timeline, job trends, issuance rate, JSON metrics (uptime + gauges), Prometheus (content-type + 4 metric names) | Chart rendering (GUI), Grafana import |
|
||||
| 31 | Notifications | 2 | List, 404 on nonexistent | Notification content, mark-read, email/Slack delivery |
|
||||
| 32 | Audit Trail | 3 | List events (≥10), PUT immutability, DELETE immutability | Actor attribution, body hash, time range filters |
|
||||
| 33 | Background Scheduler | SKIP | — | Timing-dependent; verify via Docker logs |
|
||||
| 34 | Structured Logging | SKIP | — | Requires Docker log inspection |
|
||||
| 35 | GUI Testing | SKIP | — | Requires browser |
|
||||
| 36–37 | Issuer Catalog, Frontend Audit | SKIP | — | Requires browser |
|
||||
| 38 | Error Handling | 5 | Malformed JSON, missing required field, method not allowed, UTF-8 CN, empty body | Stack trace suppression, error response format |
|
||||
| 39 | Performance | 5 | List certs < 200ms, stats < 500ms, metrics < 200ms, Prometheus < 300ms, audit < 500ms | Load testing, concurrent request handling |
|
||||
| 40 | Documentation | 8 | README, quickstart, architecture, connectors, compliance exist; migration guides exist; 8 issuer types in docs; 11 target types in docs | Content accuracy, link validity |
|
||||
| 41 | Regression | 3 | DELETE 204, per_page max fallback, network scan target seed count | `errors.Is(errors.New())` anti-pattern source scan |
|
||||
| 42 | Envoy Target | 5 | Domain type, connector file, test file, OpenAPI, agent dispatch | Envoy deployment test, SDS config |
|
||||
| 43 | Postfix/Dovecot | 3 | Domain types (Postfix + Dovecot), connector file, OpenAPI | Mail server deployment test |
|
||||
| 44 | SSH Target | 4 | Domain type, connector file, agent dispatch (`sshconn`), OpenAPI | SSH deployment test (requires target host) |
|
||||
| 45 | Windows Certificate Store | 3 | Domain type, connector file, shared certutil package | Windows deployment (requires Windows) |
|
||||
| 46 | Java Keystore | 3 | Domain type, connector file, OpenAPI | JKS deployment (requires keytool) |
|
||||
| 47 | Certificate Digest Email | 3 | Preview endpoint (200/503), service file, adapter file | SMTP delivery, HTML template rendering |
|
||||
| 48 | Dynamic Issuer Config | 4 | Crypto package exists, create ACME issuer via API, config redaction check, migration exists | Test connection flow, registry rebuild |
|
||||
| 49 | Dynamic Target Config | 2 | Create NGINX target via API, migration exists | Test connection via agent heartbeat |
|
||||
| 50 | Onboarding Wizard | 2 | Wizard component exists, docker-compose split (clean vs demo) | Wizard UI flow, step completion |
|
||||
| 51 | ACME Profile Selection | 3 | Profile module exists, frontend config, RFC 9702→9773 renumber check | Profile-aware issuance against real CA |
|
||||
| 52 | Helm Chart | 5 | Chart.yaml, values.yaml, 4 templates exist, securityContext, health probes | `helm template` rendering, `helm install` |
|
||||
| 53 | Kubernetes Secrets Target Connector (M47) | 18 | Config validation (namespace DNS-1123, secret name DNS subdomain, label keys, required fields), deployment (create/update Secret, chain concatenation, error propagation), validation (serial comparison, not-found, empty cert) | GUI target wizard KubernetesSecrets fields (namespace, secret_name, labels, kubeconfig_path), Helm RBAC toggle, TargetDetailPage type label |
|
||||
| 54 | AWS ACM Private CA Issuer Connector (M47) | 23 | Config validation (region, CA ARN regex, signing algorithm whitelist, validity_days, defaults), issuance (full flow, empty CSR, errors), renewal (reuses issuance), revocation (reason mapping, default, errors), GetOrderStatus completed, GetCACertPEM (success/chain/error), GetRenewalInfo nil | GUI issuer wizard AWSACMPCA fields (region, ca_arn, signing_algorithm, validity_days, template_arn), seed data visibility, create issuer flow |
|
||||
|
||||
**Totals:** ~164 automated subtests, 11 fully skipped Parts, ~282 manual tests remaining.
|
||||
|
||||
## Test Categories
|
||||
|
||||
The automated tests fall into four categories:
|
||||
|
||||
### 1. API Integration Tests (majority)
|
||||
Make real HTTP requests to the running server and verify status codes, response structure, and JSON field values. Examples:
|
||||
- `POST /api/v1/certificates` with valid payload → 201
|
||||
- `GET /api/v1/certificates?status=Active` → all returned certs have `status: "Active"`
|
||||
- `DELETE /api/v1/certificates/mc-qa-full` → 204
|
||||
|
||||
### 2. Database Verification Tests
|
||||
Connect directly to PostgreSQL and verify schema state:
|
||||
- Table count ≥ 19 (from migrations 000001–000010)
|
||||
- Useful for catching migration regressions
|
||||
|
||||
### 3. Source File Verification Tests
|
||||
Read files from the repo checkout and verify structure:
|
||||
- Domain types exist in `internal/domain/connector.go` (e.g., `TargetTypeEnvoy`)
|
||||
- Connector implementations exist (e.g., `internal/connector/target/envoy/envoy.go`)
|
||||
- Documentation contains expected content (all issuer/target types listed)
|
||||
- No stale RFC 9702 references (replaced by RFC 9773)
|
||||
|
||||
### 4. Performance Spot Checks
|
||||
Timed API requests with threshold assertions:
|
||||
- `GET /api/v1/certificates?per_page=15` < 200ms
|
||||
- `GET /api/v1/stats/summary` < 500ms
|
||||
- `GET /api/v1/metrics/prometheus` < 300ms
|
||||
|
||||
## What This Test Does NOT Cover
|
||||
|
||||
These gaps must be filled by manual testing per `docs/testing-guide.md`:
|
||||
|
||||
### External CA Integrations (Parts 10–13)
|
||||
- **Sub-CA mode** — requires CA cert+key files on disk
|
||||
- **ACME ARI** — requires a CA that supports RFC 9773 Renewal Information
|
||||
- **Vault PKI** — requires a running HashiCorp Vault instance
|
||||
- **DigiCert / Sectigo / Google CAS** — requires sandbox API credentials
|
||||
|
||||
### Browser/GUI Testing (Parts 35–37, 50)
|
||||
- Dashboard chart rendering (Recharts)
|
||||
- Onboarding wizard step-by-step flow
|
||||
- Issuer catalog card layout and create wizard
|
||||
- Bulk operations UI (multi-select, progress bars)
|
||||
- Discovery triage workflow
|
||||
|
||||
### Real Deployment Testing (Parts 15–17)
|
||||
- NGINX/Apache/HAProxy file write + reload
|
||||
- Traefik/Caddy file provider or API reload
|
||||
- IIS PowerShell/WinRM (requires Windows)
|
||||
- F5 BIG-IP iControl REST (requires appliance or mock)
|
||||
- SSH agentless deployment (requires target host)
|
||||
|
||||
### Agent Binary Behavior (Parts 18, 28–29)
|
||||
- Agent-side ECDSA key generation and CSR submission
|
||||
- Agent filesystem discovery scan
|
||||
- CLI tool (`certctl-cli`) — all 10 subcommands
|
||||
- MCP server (`mcp-server`) — stdio transport
|
||||
|
||||
### Timing-Dependent Tests (Parts 33–34)
|
||||
- Background scheduler loop execution (renewal, jobs, health, notifications, digest, network scan)
|
||||
- Structured logging format verification (requires Docker log parsing)
|
||||
|
||||
## How This Relates to `integration_test.go`
|
||||
|
||||
Both files live in `deploy/test/` in the same Go package (`integration_test`):
|
||||
|
||||
| | `qa_test.go` | `integration_test.go` |
|
||||
|---|---|---|
|
||||
| **Build tag** | `//go:build qa` | `//go:build integration` |
|
||||
| **Target stack** | Demo (`docker-compose.yml` + `docker-compose.demo.yml`) | Test (`docker-compose.test.yml`) |
|
||||
| **Port** | 8443 | Different (test stack config) |
|
||||
| **Seed data** | `seed_demo.sql` (32 certs, 8 agents, realistic history) | Minimal (created by tests) |
|
||||
| **CA backends** | Local CA only (demo mode) | Pebble ACME, step-ca, NGINX |
|
||||
| **Purpose** | Release QA — broad coverage, spot checks | Functional — end-to-end issuance, renewal, revocation against real CAs |
|
||||
| **Run frequency** | Before each release tag | CI on every PR |
|
||||
|
||||
They are complementary. Integration tests prove the machinery works. QA tests prove the product works at release quality.
|
||||
|
||||
## Seed Data Reference
|
||||
|
||||
The QA tests depend on `migrations/seed_demo.sql`. Key IDs used:
|
||||
|
||||
### Certificates (32 total)
|
||||
`mc-api-prod`, `mc-web-prod`, `mc-pay-prod`, `mc-dash-prod`, `mc-data-prod`, `mc-search-prod`, `mc-admin-prod`, `mc-blog-prod`, `mc-docs-prod`, `mc-status-prod`, `mc-grpc-prod`, `mc-vault-prod`, `mc-consul-prod`, `mc-shop-prod`, `mc-auth-prod`, `mc-cdn-prod`, `mc-mail-prod`, `mc-ci-prod`, `mc-legacy-prod`, `mc-old-api`, `mc-wiki-prod`, `mc-api-stg`, `mc-web-stg`, `mc-pay-stg`, `mc-api-dev`, `mc-grafana-prod`, `mc-vpn-prod`, `mc-wildcard-prod`, `mc-compromised`, `mc-edge-eu`, `mc-k8s-ingress`, `mc-smime-bob`
|
||||
|
||||
### Agents (9 total)
|
||||
`ag-web-prod`, `ag-web-staging`, `ag-lb-prod`, `ag-iis-prod`, `ag-data-prod`, `ag-edge-01`, `ag-k8s-prod`, `ag-mac-dev`, `server-scanner` (sentinel)
|
||||
|
||||
### Issuers (9 total)
|
||||
`iss-local`, `iss-acme-le`, `iss-stepca`, `iss-acme-zs`, `iss-openssl`, `iss-vault`, `iss-digicert`, `iss-sectigo`, `iss-googlecas`
|
||||
|
||||
### Targets (8 total)
|
||||
`tgt-nginx-prod`, `tgt-nginx-staging`, `tgt-haproxy-prod`, `tgt-apache-prod`, `tgt-iis-prod`, `tgt-traefik-prod`, `tgt-caddy-prod`, `tgt-nginx-data`
|
||||
|
||||
### Network Scan Targets (4 total)
|
||||
`nst-dc1-web`, `nst-dc2-apps`, `nst-dmz`, `nst-edge`
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "Server unreachable" on startup
|
||||
The test pings `GET /health` before running anything. If this fails:
|
||||
```bash
|
||||
# Check if the stack is running
|
||||
docker compose -f docker-compose.yml -f docker-compose.demo.yml ps
|
||||
|
||||
# Check server logs
|
||||
docker compose -f docker-compose.yml -f docker-compose.demo.yml logs certctl-server
|
||||
|
||||
# Check if the port is exposed (self-signed cert — pin CA bundle)
|
||||
curl --cacert ./deploy/test/certs/ca.crt -s https://localhost:8443/health
|
||||
```
|
||||
|
||||
### "connect to QA DB" failure
|
||||
The database tests connect directly to PostgreSQL. Ensure port 5432 is exposed:
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f docker-compose.demo.yml port postgres 5432
|
||||
```
|
||||
|
||||
### Performance tests flaking
|
||||
The performance thresholds (200ms, 300ms, 500ms) assume a local Docker stack. On slow CI runners or remote Docker hosts, increase the thresholds or skip Part 39:
|
||||
```bash
|
||||
go test -tags qa -v -run 'TestQA/Part(?!39)' ./...
|
||||
```
|
||||
|
||||
### Source file checks failing
|
||||
The `fileExists` and `fileContains` helpers read from `CERTCTL_QA_REPO_DIR` (default `../..`). If running from a non-standard location:
|
||||
```bash
|
||||
CERTCTL_QA_REPO_DIR=/absolute/path/to/certctl go test -tags qa -v ./...
|
||||
```
|
||||
|
||||
## Adding New Tests
|
||||
|
||||
When a new feature ships:
|
||||
|
||||
1. **Add a Part section** in `qa_test.go` following the numbering in `docs/testing-guide.md`
|
||||
2. **API tests**: use `c.get()`, `c.post()`, `c.bodyStr()`, `c.getJSON()`, `c.timedGet()`
|
||||
3. **Source checks**: use `fileExists(t, "relative/path")` and `fileContains(t, "path", "substring")`
|
||||
4. **DB checks**: use `openQADB(t)` and `db.queryInt(t, "SELECT ...")`
|
||||
5. **Cleanup**: always use `t.Cleanup()` for data created during tests
|
||||
6. **Skip if external**: use `t.Skip("Requires X — manual test")` with a clear reason
|
||||
|
||||
## Version History
|
||||
|
||||
- **v1.0** (April 2026) — Initial release covering all 52 Parts of testing-guide.md v2.1. Replaces `qa-smoke-test.sh`.
|
||||
- **v1.1** (April 2026) — Added Parts 53–54 (M47: Kubernetes Secrets target + AWS ACM PCA issuer). 54 Parts total, ~164 automated subtests.
|
||||
+78
-48
@@ -60,6 +60,21 @@ cp deploy/.env.example deploy/.env
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
### Docker Compose Environments
|
||||
|
||||
The `deploy/` directory contains four compose files for different use cases:
|
||||
|
||||
| File | Purpose | How to run |
|
||||
|------|---------|------------|
|
||||
| `docker-compose.yml` | **Base platform.** PostgreSQL + certctl server + agent. Clean dashboard with onboarding wizard — use this for production or first-time setup. | `docker compose -f deploy/docker-compose.yml up --build` |
|
||||
| `docker-compose.demo.yml` | **Demo data override.** Layers 180 days of realistic seed data (15 certs, 5 agents, multiple issuers) onto the base. Dashboard charts and tables look populated on first boot. | `docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.demo.yml up --build` |
|
||||
| `docker-compose.dev.yml` | **Development override.** Adds PgAdmin (port 5050), debug-level logging, and a Delve debugger port (40000) for the server. | `docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.dev.yml up --build` |
|
||||
| `docker-compose.test.yml` | **Integration test environment.** 7 containers on a static-IP subnet: PostgreSQL, certctl server+agent, step-ca, Pebble ACME server, challenge test server, and NGINX. Runs the full issuance→deployment→verification flow against real CA backends. Standalone — does not combine with the base file. | `docker compose -f deploy/docker-compose.test.yml up --build` |
|
||||
|
||||
Override files are layered onto the base with multiple `-f` flags. The test environment is self-contained and runs independently. To reset any environment's data, add `down -v` to remove volumes.
|
||||
|
||||
For a deep dive into every service, environment variable, and networking decision, see the [Docker Compose Environments Guide](../deploy/ENVIRONMENTS.md).
|
||||
|
||||
### Kubernetes with Helm
|
||||
|
||||
For production deployments on Kubernetes, use the Helm chart:
|
||||
@@ -90,16 +105,24 @@ certctl-server Up (healthy)
|
||||
certctl-agent Up
|
||||
```
|
||||
|
||||
The control plane is HTTPS-only as of v2.2. The `certctl-tls-init` init container in the shipped `deploy/docker-compose.yml` self-signs a cert on first boot and drops it into a named volume. Extract the CA bundle once and reuse it for every API call in this guide:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8443/health
|
||||
export CA=/tmp/certctl-ca.crt
|
||||
docker compose -f deploy/docker-compose.yml exec -T certctl-server \
|
||||
cat /etc/certctl/tls/ca.crt > "$CA"
|
||||
|
||||
curl --cacert "$CA" https://localhost:8443/health
|
||||
```
|
||||
```json
|
||||
{"status":"healthy"}
|
||||
```
|
||||
|
||||
If you're bringing your own cert (internal CA, cert-manager, operator-supplied Secret), see [`docs/tls.md`](tls.md) for the full provisioning matrix. If you're cutting over an existing install, see [`docs/upgrade-to-tls.md`](upgrade-to-tls.md) for the failure modes (out-of-date `http://…` agents fail at the TLS handshake) and the one-step procedure.
|
||||
|
||||
## Open the Dashboard
|
||||
|
||||
Open **http://localhost:8443** in your browser.
|
||||
Open **https://localhost:8443** in your browser. Your browser will warn about the self-signed cert — that's expected for the demo bootstrap. Trust the CA bundle you just exported, or click through the warning.
|
||||
|
||||
> **Note:** The Docker Compose demo runs with authentication disabled (`CERTCTL_AUTH_TYPE=none`) so you can explore immediately. For production, set `CERTCTL_AUTH_TYPE=api-key` and `CERTCTL_AUTH_SECRET=<your-secret>` in your environment, then pass `Authorization: Bearer <your-secret>` on all API requests. The dashboard will prompt for your API key on first load.
|
||||
>
|
||||
@@ -139,62 +162,64 @@ Everything you see in the dashboard is backed by the REST API. All endpoints liv
|
||||
|
||||
### Core operations
|
||||
|
||||
Every request below uses `--cacert "$CA"` to pin the self-signed CA bundle extracted above. In production, point `$CA` at your internal CA root or the bundle you distributed to the fleet.
|
||||
|
||||
```bash
|
||||
# List all certificates
|
||||
curl -s http://localhost:8443/api/v1/certificates | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/certificates | jq .
|
||||
|
||||
# Filter by status
|
||||
curl -s "http://localhost:8443/api/v1/certificates?status=Expiring" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/certificates?status=Expiring" | jq .
|
||||
|
||||
# Filter by environment
|
||||
curl -s "http://localhost:8443/api/v1/certificates?environment=production" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/certificates?environment=production" | jq .
|
||||
|
||||
# Get a specific certificate
|
||||
curl -s http://localhost:8443/api/v1/certificates/mc-api-prod | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/certificates/mc-api-prod | jq .
|
||||
|
||||
# Get deployment targets for a certificate
|
||||
curl -s http://localhost:8443/api/v1/certificates/mc-api-prod/deployments | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/certificates/mc-api-prod/deployments | jq .
|
||||
|
||||
# List agents
|
||||
curl -s http://localhost:8443/api/v1/agents | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/agents | jq .
|
||||
|
||||
# Check agent pending work
|
||||
curl -s http://localhost:8443/api/v1/agents/ag-web-prod/work | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/agents/ag-web-prod/work | jq .
|
||||
|
||||
# View audit trail
|
||||
curl -s http://localhost:8443/api/v1/audit | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/audit | jq .
|
||||
|
||||
# View policies and violations
|
||||
curl -s http://localhost:8443/api/v1/policies | jq .
|
||||
curl -s http://localhost:8443/api/v1/policies/pr-require-owner/violations | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/policies | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/policies/pr-require-owner/violations | jq .
|
||||
|
||||
# Notifications
|
||||
curl -s http://localhost:8443/api/v1/notifications | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/notifications | jq .
|
||||
|
||||
# Profiles and agent groups
|
||||
curl -s http://localhost:8443/api/v1/profiles | jq .
|
||||
curl -s http://localhost:8443/api/v1/agent-groups | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/profiles | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/agent-groups | jq .
|
||||
```
|
||||
|
||||
### Sorting, filtering, and pagination
|
||||
|
||||
```bash
|
||||
# Sort by expiration date (ascending)
|
||||
curl -s "http://localhost:8443/api/v1/certificates?sort=notAfter" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/certificates?sort=notAfter" | jq .
|
||||
|
||||
# Sort descending (prefix with -)
|
||||
curl -s "http://localhost:8443/api/v1/certificates?sort=-createdAt" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/certificates?sort=-createdAt" | jq .
|
||||
|
||||
# Time-range filters (RFC3339)
|
||||
curl -s "http://localhost:8443/api/v1/certificates?expires_before=2026-05-01T00:00:00Z" | jq .
|
||||
curl -s "http://localhost:8443/api/v1/certificates?created_after=2026-03-01T00:00:00Z" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/certificates?expires_before=2026-05-01T00:00:00Z" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/certificates?created_after=2026-03-01T00:00:00Z" | jq .
|
||||
|
||||
# Sparse fields — request only what you need
|
||||
curl -s "http://localhost:8443/api/v1/certificates?fields=id,common_name,status,expires_at" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/certificates?fields=id,common_name,status,expires_at" | jq .
|
||||
|
||||
# Cursor pagination — efficient for large inventories
|
||||
curl -s "http://localhost:8443/api/v1/certificates?page_size=5" | jq '{next_cursor: .next_cursor, count: (.data | length)}'
|
||||
curl -s "http://localhost:8443/api/v1/certificates?cursor=<next_cursor_value>&page_size=5" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/certificates?page_size=5" | jq '{next_cursor: .next_cursor, count: (.data | length)}'
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/certificates?cursor=<next_cursor_value>&page_size=5" | jq .
|
||||
```
|
||||
|
||||
Supported sort fields: `notAfter`, `expiresAt`, `createdAt`, `updatedAt`, `commonName`, `name`, `status`, `environment`.
|
||||
@@ -203,22 +228,22 @@ Supported sort fields: `notAfter`, `expiresAt`, `createdAt`, `updatedAt`, `commo
|
||||
|
||||
```bash
|
||||
# Dashboard summary
|
||||
curl -s http://localhost:8443/api/v1/stats/summary | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/stats/summary | jq .
|
||||
|
||||
# Certificates by status
|
||||
curl -s http://localhost:8443/api/v1/stats/certificates-by-status | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/stats/certificates-by-status | jq .
|
||||
|
||||
# Expiration timeline (next 90 days)
|
||||
curl -s "http://localhost:8443/api/v1/stats/expiration-timeline?days=90" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/stats/expiration-timeline?days=90" | jq .
|
||||
|
||||
# Job trends (last 30 days)
|
||||
curl -s "http://localhost:8443/api/v1/stats/job-trends?days=30" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/stats/job-trends?days=30" | jq .
|
||||
|
||||
# JSON metrics
|
||||
curl -s http://localhost:8443/api/v1/metrics | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/metrics | jq .
|
||||
|
||||
# Prometheus format (for Prometheus, Grafana Agent, Datadog)
|
||||
curl -s http://localhost:8443/api/v1/metrics/prometheus
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/metrics/prometheus
|
||||
```
|
||||
|
||||
## Create Your First Certificate
|
||||
@@ -226,7 +251,7 @@ curl -s http://localhost:8443/api/v1/metrics/prometheus
|
||||
Create a certificate record that certctl will track, renew, and deploy automatically.
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "My First Certificate",
|
||||
@@ -249,31 +274,34 @@ CERT_ID="<paste the id from the response>"
|
||||
|
||||
Trigger renewal:
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates/$CERT_ID/renew | jq .
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates/$CERT_ID/renew | jq .
|
||||
```
|
||||
|
||||
Check the result:
|
||||
```bash
|
||||
curl -s http://localhost:8443/api/v1/certificates/$CERT_ID | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/certificates/$CERT_ID | jq .
|
||||
```
|
||||
|
||||
Refresh the dashboard at http://localhost:8443 — your new certificate appears in the inventory.
|
||||
Refresh the dashboard at https://localhost:8443 — your new certificate appears in the inventory.
|
||||
|
||||
### Revoke a certificate
|
||||
|
||||
When a private key is compromised or a service is decommissioned:
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates/$CERT_ID/revoke \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates/$CERT_ID/revoke \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "superseded"}' | jq .
|
||||
```
|
||||
|
||||
Supported RFC 5280 reason codes: `unspecified`, `keyCompromise`, `caCompromise`, `affiliationChanged`, `superseded`, `cessationOfOperation`, `certificateHold`, `privilegeWithdrawn`.
|
||||
|
||||
Confirm via CRL:
|
||||
Confirm via the unauthenticated DER CRL (RFC 5280 §5, RFC 8615):
|
||||
```bash
|
||||
curl -s http://localhost:8443/api/v1/crl | jq .
|
||||
# Fetch the CRL without any API key — relying parties shouldn't need one.
|
||||
# The CRL path is unauthenticated, but it's still served over TLS.
|
||||
curl --cacert "$CA" -s https://localhost:8443/.well-known/pki/crl/iss-local -o /tmp/crl.der
|
||||
openssl crl -inform der -in /tmp/crl.der -noout -text | head -40
|
||||
```
|
||||
|
||||
### Interactive approval workflow
|
||||
@@ -282,15 +310,15 @@ For high-value certificates where you want human oversight. The demo includes 2
|
||||
|
||||
```bash
|
||||
# List jobs awaiting approval (demo includes 2)
|
||||
curl -s "http://localhost:8443/api/v1/jobs?status=AwaitingApproval" | jq '.data[] | {id, certificate_id, status}'
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/jobs?status=AwaitingApproval" | jq '.data[] | {id, certificate_id, status}'
|
||||
|
||||
# Approve a pending job
|
||||
curl -s -X POST http://localhost:8443/api/v1/jobs/JOB_ID/approve \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/jobs/JOB_ID/approve \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "Approved for production deployment"}' | jq .
|
||||
|
||||
# Reject a pending job
|
||||
curl -s -X POST http://localhost:8443/api/v1/jobs/JOB_ID/reject \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/jobs/JOB_ID/reject \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "Key type does not meet compliance requirements"}' | jq .
|
||||
```
|
||||
@@ -316,7 +344,7 @@ export CERTCTL_DISCOVERY_DIRS="/etc/nginx/certs,/etc/ssl/certs,/var/lib/certs"
|
||||
export CERTCTL_NETWORK_SCAN_ENABLED=true
|
||||
|
||||
# Create a scan target
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/network-scan-targets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "Internal Network",
|
||||
@@ -328,20 +356,20 @@ curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
|
||||
}' | jq .
|
||||
|
||||
# Trigger an immediate scan
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets/nst-internal-network/scan | jq .
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/network-scan-targets/nst-internal-network/scan | jq .
|
||||
```
|
||||
|
||||
### Triage discovered certificates
|
||||
|
||||
```bash
|
||||
# List discovered certs
|
||||
curl -s "http://localhost:8443/api/v1/discovered-certificates?agent_id=agent-nginx-prod" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/discovered-certificates?agent_id=agent-nginx-prod" | jq .
|
||||
|
||||
# Summary counts
|
||||
curl -s http://localhost:8443/api/v1/discovery-summary | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/discovery-summary | jq .
|
||||
|
||||
# Claim a discovered cert (bring under management)
|
||||
curl -s -X POST "http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID/claim" \
|
||||
curl --cacert "$CA" -s -X POST "https://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID/claim" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"managed_certificate_id": "mc-api-prod"}' | jq .
|
||||
```
|
||||
@@ -351,8 +379,9 @@ curl -s -X POST "http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_
|
||||
```bash
|
||||
cd cmd/cli && go build -o certctl-cli .
|
||||
|
||||
export CERTCTL_SERVER_URL="http://localhost:8443"
|
||||
export CERTCTL_SERVER_URL="https://localhost:8443"
|
||||
export CERTCTL_API_KEY="test-key-123"
|
||||
export CERTCTL_SERVER_CA_BUNDLE_PATH="$CA" # or pass --ca-bundle; --insecure for dev self-signed
|
||||
|
||||
./certctl-cli certs list # List certificates
|
||||
./certctl-cli certs get mc-api-prod # Certificate details
|
||||
@@ -385,10 +414,10 @@ export CERTCTL_DIGEST_RECIPIENTS=ops@example.com,security@example.com
|
||||
|
||||
Preview the digest HTML before enabling scheduled delivery:
|
||||
```bash
|
||||
curl http://localhost:8443/api/v1/digest/preview | jq '.html' | grep -o '<html>' # Shows HTML is ready
|
||||
curl --cacert "$CA" https://localhost:8443/api/v1/digest/preview | jq '.html' | grep -o '<html>' # Shows HTML is ready
|
||||
|
||||
# Trigger a digest send immediately (outside of schedule)
|
||||
curl -X POST http://localhost:8443/api/v1/digest/send
|
||||
curl --cacert "$CA" -X POST https://localhost:8443/api/v1/digest/send
|
||||
```
|
||||
|
||||
If no recipients are configured (`CERTCTL_DIGEST_RECIPIENTS` empty), the digest falls back to certificate owner emails. Digests include total certificates, expiring soon, expired, active agents, completed/failed jobs (30-day summary), and a table of expiring certs color-coded by urgency (7/14/30 days).
|
||||
@@ -398,13 +427,14 @@ If no recipients are configured (`CERTCTL_DIGEST_RECIPIENTS` empty), the digest
|
||||
```bash
|
||||
cd cmd/mcp-server && go build -o mcp-server .
|
||||
|
||||
export CERTCTL_SERVER_URL="http://localhost:8443"
|
||||
export CERTCTL_SERVER_URL="https://localhost:8443"
|
||||
export CERTCTL_API_KEY="test-key-123"
|
||||
export CERTCTL_SERVER_CA_BUNDLE_PATH="$CA" # MCP is env-vars-only; no CLI flags
|
||||
|
||||
./mcp-server
|
||||
```
|
||||
|
||||
Exposes 78 MCP tools covering the REST API via stdio transport. Ask Claude: "What certificates are expiring in the next 30 days?", "Revoke the payments cert due to key compromise", "Show me the audit trail."
|
||||
Exposes the full REST API via MCP over stdio transport. Ask Claude: "What certificates are expiring in the next 30 days?", "Revoke the payments cert due to key compromise", "Show me the audit trail."
|
||||
|
||||
## Demo Data Reference
|
||||
|
||||
|
||||
+69
-49
@@ -16,7 +16,7 @@ You'll start 7 Docker containers that talk to each other:
|
||||
| **pebble-challtestsrv** | DNS/HTTP challenge test server for Pebble | 10.30.50.3 | Not directly — Pebble talks to it |
|
||||
| **Pebble** | A fake Let's Encrypt (tests the ACME protocol without touching the real internet) | 10.30.50.4 | Not directly — the server talks to it |
|
||||
| **step-ca** | A private Certificate Authority (think: your company's internal CA) | 10.30.50.5 | Not directly — the server talks to it |
|
||||
| **certctl-server** | The brain. API + web dashboard + scheduler + ACME challenge server | 10.30.50.6 | **http://localhost:8443** |
|
||||
| **certctl-server** | The brain. API + web dashboard + scheduler + ACME challenge server | 10.30.50.6 | **https://localhost:8443** (self-signed — see CA-bundle note below) |
|
||||
| **NGINX** | A web server. The agent deploys certificates here. | 10.30.50.7 | **https://localhost:8444** |
|
||||
| **certctl-agent** | The hands. Generates keys, deploys certs to NGINX | 10.30.50.8 | Not directly — it talks to the server |
|
||||
|
||||
@@ -123,7 +123,7 @@ docker compose -f docker-compose.test.yml up --build
|
||||
|
||||
```
|
||||
certctl-test-server | {"level":"INFO","msg":"server started","address":"0.0.0.0:8443"}
|
||||
certctl-test-agent | {"level":"INFO","msg":"agent starting","server_url":"http://certctl-server:8443"}
|
||||
certctl-test-agent | {"level":"INFO","msg":"agent starting","server_url":"https://certctl-server:8443"}
|
||||
certctl-test-stepca | Serving HTTPS on :9000 ...
|
||||
certctl-test-pebble | Listening on: 0.0.0.0:14000
|
||||
```
|
||||
@@ -159,13 +159,29 @@ certctl-test-stepca Up (healthy)
|
||||
|
||||
**If certctl-test-server says "Restarting"**: It probably started before step-ca or Pebble were ready. Wait 30 seconds and check again. If it keeps restarting, see [Troubleshooting](#troubleshooting).
|
||||
|
||||
### Get the CA bundle for curl
|
||||
|
||||
The test harness runs HTTPS-only (the `certctl-tls-init` init container self-signs an ECDSA-P256 server cert with a SHA-256 signature into a bind-mounted directory before the server starts — see `docker-compose.test.yml` §`certctl-tls-init` for details). The CA cert that signed it is materialized on the host at `./test/certs/ca.crt` (relative to the `deploy/` directory). Every `curl` in the rest of this doc expects it in `$CA`:
|
||||
|
||||
```bash
|
||||
export CA=$PWD/test/certs/ca.crt
|
||||
ls -la "$CA" # sanity check: file should exist and be non-empty
|
||||
curl --cacert "$CA" -f https://localhost:8443/health
|
||||
```
|
||||
|
||||
Expect `{"status":"ok"}`. If `curl` errors with `SSL certificate problem: unable to get local issuer certificate`, the init container hasn't finished yet — wait a few seconds and retry. If the file doesn't exist at all, the bind mount didn't populate; `docker compose -f docker-compose.test.yml logs certctl-tls-init` should show the self-sign ran.
|
||||
|
||||
For a full explanation of the cert provisioning patterns (self-signed bootstrap, operator-supplied, cert-manager), see [`tls.md`](tls.md). For the one-step cutover from the old plaintext test harness to HTTPS, see [`upgrade-to-tls.md`](upgrade-to-tls.md).
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Open the Dashboard
|
||||
|
||||
Open your web browser and go to:
|
||||
|
||||
**http://localhost:8443**
|
||||
**https://localhost:8443**
|
||||
|
||||
Your browser will warn you that the cert is self-signed ("Your connection is not private" / "NET::ERR_CERT_AUTHORITY_INVALID"). That's expected for the test harness — the CA that signed the cert lives at `deploy/test/certs/ca.crt` and isn't in your system trust store. Click through the warning (Chrome: "Advanced" → "Proceed"; Firefox: "Accept the Risk"; Safari: "Show Details" → "visit this website").
|
||||
|
||||
You'll see a login screen asking for an API key. Enter:
|
||||
|
||||
@@ -198,12 +214,13 @@ Go back to your second terminal. Let's verify the data loaded correctly.
|
||||
### Check the agent
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/agents | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
https://localhost:8443/api/v1/agents | python3 -m json.tool
|
||||
```
|
||||
|
||||
**What this command does**:
|
||||
- `curl` makes an HTTP request (like a browser but from the terminal)
|
||||
- `curl` makes an HTTPS request (like a browser but from the terminal)
|
||||
- `--cacert "$CA"` pins the test harness's self-signed root as the only trust anchor for this call — matches what you exported in Step 1
|
||||
- `-s` means "silent" (don't show progress bars)
|
||||
- `-H "Authorization: Bearer test-key-2026"` sends the API key (same one you used to log in)
|
||||
- `python3 -m json.tool` formats the JSON response so it's readable
|
||||
@@ -233,8 +250,8 @@ The important parts: `"id": "agent-test-01"` and `"status": "online"`. If the st
|
||||
### Check the issuers
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/issuers | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
https://localhost:8443/api/v1/issuers | python3 -m json.tool
|
||||
```
|
||||
|
||||
You should see three issuers:
|
||||
@@ -245,8 +262,8 @@ You should see three issuers:
|
||||
### Check the target
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/targets | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
https://localhost:8443/api/v1/targets | python3 -m json.tool
|
||||
```
|
||||
|
||||
You should see `target-test-nginx` — the NGINX deployment target, assigned to `agent-test-01`.
|
||||
@@ -255,7 +272,7 @@ The target config uses no-op commands for `reload_command` and `validate_command
|
||||
|
||||
### See it all in the dashboard
|
||||
|
||||
Open the dashboard at http://localhost:8443 and click through the sidebar:
|
||||
Open the dashboard at https://localhost:8443 and click through the sidebar:
|
||||
- **Agents** — you should see `test-agent-01`
|
||||
- **Issuers** — you should see all three CAs
|
||||
- **Targets** — you should see `Test NGINX`
|
||||
@@ -287,7 +304,7 @@ The private key **never leaves the agent**. The server only ever sees the CSR (p
|
||||
### Step 4a: Create the certificate record
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates \
|
||||
-H "Authorization: Bearer test-key-2026" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
@@ -338,7 +355,7 @@ docker exec certctl-test-postgres psql -U certctl -d certctl -c \
|
||||
### Step 4c: Trigger issuance
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates/mc-local-test/renew \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates/mc-local-test/renew \
|
||||
-H "Authorization: Bearer test-key-2026" | python3 -m json.tool
|
||||
```
|
||||
|
||||
@@ -395,7 +412,7 @@ The `subject` should match the domain name you chose. The `issuer` should say "c
|
||||
|
||||
### Step 4f: Check the dashboard
|
||||
|
||||
Open the dashboard at http://localhost:8443 and:
|
||||
Open the dashboard at https://localhost:8443 and:
|
||||
|
||||
1. Click **Certificates** in the sidebar — you should see `mc-local-test` with status "Active"
|
||||
2. Click on it to see the detail page — you should see version history, the signed certificate details, and the deployment timeline
|
||||
@@ -414,7 +431,7 @@ This is the real deal. ACME is the protocol that Let's Encrypt uses to issue cer
|
||||
### Step 5a: Create the certificate record
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates \
|
||||
-H "Authorization: Bearer test-key-2026" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
@@ -441,7 +458,7 @@ docker exec certctl-test-postgres psql -U certctl -d certctl -c \
|
||||
"INSERT INTO certificate_target_mappings (certificate_id, target_id) VALUES ('mc-acme-test', 'target-test-nginx') ON CONFLICT DO NOTHING;"
|
||||
|
||||
# Trigger issuance
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates/mc-acme-test/renew \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates/mc-acme-test/renew \
|
||||
-H "Authorization: Bearer test-key-2026" | python3 -m json.tool
|
||||
```
|
||||
|
||||
@@ -502,7 +519,7 @@ Revocation means "this certificate is no longer trusted, even though it hasn't e
|
||||
### Step 7a: Revoke the Local CA cert
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates/mc-local-test/revoke \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates/mc-local-test/revoke \
|
||||
-H "Authorization: Bearer test-key-2026" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"reason": "superseded"}' | python3 -m json.tool
|
||||
@@ -512,12 +529,15 @@ curl -s -X POST http://localhost:8443/api/v1/certificates/mc-local-test/revoke \
|
||||
|
||||
### Step 7b: Check the CRL (Certificate Revocation List)
|
||||
|
||||
The CRL is a DER-encoded X.509 v2 CRL (RFC 5280 §5) served under the RFC 8615 well-known namespace. It is deliberately unauthenticated — relying parties that need to verify revocation don't have certctl API keys.
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/crl | python3 -m json.tool
|
||||
# No Authorization header — the endpoint is public by design.
|
||||
curl --cacert "$CA" -s https://localhost:8443/.well-known/pki/crl/iss-local -o /tmp/crl.der
|
||||
openssl crl -inform der -in /tmp/crl.der -noout -text | head -40
|
||||
```
|
||||
|
||||
**What you should see**: A list that includes the revoked certificate's serial number, the reason, and the timestamp.
|
||||
**What you should see**: `openssl` prints the CRL issuer DN, `This Update` / `Next Update` timestamps, and at least one entry whose `Serial Number` matches the cert you just revoked, with `CRL Reason Code: Superseded` (or whichever reason you passed in step 7a). The response's `Content-Type` header is `application/pkix-crl`.
|
||||
|
||||
### Step 7c: Check in the dashboard
|
||||
|
||||
@@ -530,8 +550,8 @@ Go to **Certificates** in the sidebar. The `mc-local-test` cert should now show
|
||||
The agent is configured to scan `/nginx-certs` every 6 hours for existing certificates. It already ran a scan when it started up. Let's see what it found.
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/discovered-certificates | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
https://localhost:8443/api/v1/discovered-certificates | python3 -m json.tool
|
||||
```
|
||||
|
||||
**What you should see**: Any certificates that exist in the NGINX cert directory, including the ones you deployed in Steps 4-5. The discovery system extracts metadata (CN, SANs, issuer, expiry, fingerprint) from the PEM files.
|
||||
@@ -539,8 +559,8 @@ curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
Check the summary:
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/discovery-summary | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
https://localhost:8443/api/v1/discovery-summary | python3 -m json.tool
|
||||
```
|
||||
|
||||
This shows counts: how many are Unmanaged, Managed, and Dismissed.
|
||||
@@ -554,7 +574,7 @@ In the dashboard: click **Discovery** in the sidebar to see the triage view.
|
||||
Force a renewal on the ACME certificate to see the full cycle happen again:
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates/mc-acme-test/renew \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates/mc-acme-test/renew \
|
||||
-H "Authorization: Bearer test-key-2026" | python3 -m json.tool
|
||||
```
|
||||
|
||||
@@ -581,7 +601,7 @@ The test environment enables EST with `CERTCTL_EST_ENABLED=true` and `CERTCTL_ES
|
||||
### Step 10a: Check available CA certificates
|
||||
|
||||
```bash
|
||||
curl -sk http://localhost:8443/.well-known/est/cacerts \
|
||||
curl --cacert "$CA" -s https://localhost:8443/.well-known/est/cacerts \
|
||||
-H "Authorization: Bearer test-key-2026"
|
||||
```
|
||||
|
||||
@@ -592,7 +612,7 @@ curl -sk http://localhost:8443/.well-known/est/cacerts \
|
||||
### Step 10b: Check CSR attributes
|
||||
|
||||
```bash
|
||||
curl -sk http://localhost:8443/.well-known/est/csrattrs \
|
||||
curl --cacert "$CA" -s https://localhost:8443/.well-known/est/csrattrs \
|
||||
-H "Authorization: Bearer test-key-2026"
|
||||
```
|
||||
|
||||
@@ -612,7 +632,7 @@ openssl req -new -newkey ec -pkeyopt ec_paramgen_curve:P-256 \
|
||||
EST_CSR=$(openssl req -in /tmp/est-test.csr -outform DER | base64 -w 0)
|
||||
|
||||
# Submit to EST simpleenroll endpoint
|
||||
curl -sk -X POST http://localhost:8443/.well-known/est/simpleenroll \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/.well-known/est/simpleenroll \
|
||||
-H "Authorization: Bearer test-key-2026" \
|
||||
-H "Content-Type: application/pkcs10" \
|
||||
-d "$EST_CSR"
|
||||
@@ -625,8 +645,8 @@ curl -sk -X POST http://localhost:8443/.well-known/est/simpleenroll \
|
||||
Decode and inspect the response (if you saved it to a variable):
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/audit-events | python3 -m json.tool | head -30
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
https://localhost:8443/api/v1/audit-events | python3 -m json.tool | head -30
|
||||
```
|
||||
|
||||
Check the audit trail — you should see an `est_enrollment` event with the CN `est-device.certctl.test`.
|
||||
@@ -636,7 +656,7 @@ Check the audit trail — you should see an `est_enrollment` event with the CN `
|
||||
EST also supports re-enrollment (certificate renewal). The same CSR format works:
|
||||
|
||||
```bash
|
||||
curl -sk -X POST http://localhost:8443/.well-known/est/simplereenroll \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/.well-known/est/simplereenroll \
|
||||
-H "Authorization: Bearer test-key-2026" \
|
||||
-H "Content-Type: application/pkcs10" \
|
||||
-d "$EST_CSR"
|
||||
@@ -655,7 +675,7 @@ S/MIME certificates are used for email signing and encryption — a different us
|
||||
### Step 11a: Create an S/MIME certificate record
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates \
|
||||
-H "Authorization: Bearer test-key-2026" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
@@ -683,7 +703,7 @@ Notice:
|
||||
docker exec certctl-test-postgres psql -U certctl -d certctl -c \
|
||||
"INSERT INTO certificate_target_mappings (certificate_id, target_id) VALUES ('mc-smime-test', 'target-test-nginx') ON CONFLICT DO NOTHING;"
|
||||
|
||||
curl -s -X POST http://localhost:8443/api/v1/certificates/mc-smime-test/renew \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/certificates/mc-smime-test/renew \
|
||||
-H "Authorization: Bearer test-key-2026" | python3 -m json.tool
|
||||
```
|
||||
|
||||
@@ -692,15 +712,15 @@ curl -s -X POST http://localhost:8443/api/v1/certificates/mc-smime-test/renew \
|
||||
After the agent processes the job (30-60 seconds), check the certificate details:
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/certificates/mc-smime-test | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
https://localhost:8443/api/v1/certificates/mc-smime-test | python3 -m json.tool
|
||||
```
|
||||
|
||||
The certificate should show `"status": "active"`. To verify the EKU on the actual cert, you can export it:
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/certificates/mc-smime-test/export/pem | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
https://localhost:8443/api/v1/certificates/mc-smime-test/export/pem | python3 -m json.tool
|
||||
```
|
||||
|
||||
If you decode the certificate PEM, you should see:
|
||||
@@ -765,16 +785,16 @@ If you have Go installed, you can build and test the CLI tool:
|
||||
go build -o certctl-cli ./cmd/cli
|
||||
|
||||
# List certificates
|
||||
./certctl-cli --server http://localhost:8443 --api-key test-key-2026 list-certs
|
||||
./certctl-cli --server https://localhost:8443 --ca-bundle "$CA" --api-key test-key-2026 list-certs
|
||||
|
||||
# Get a specific certificate
|
||||
./certctl-cli --server http://localhost:8443 --api-key test-key-2026 get-cert mc-acme-test
|
||||
./certctl-cli --server https://localhost:8443 --ca-bundle "$CA" --api-key test-key-2026 get-cert mc-acme-test
|
||||
|
||||
# Check health
|
||||
./certctl-cli --server http://localhost:8443 --api-key test-key-2026 health
|
||||
./certctl-cli --server https://localhost:8443 --ca-bundle "$CA" --api-key test-key-2026 health
|
||||
|
||||
# Get metrics (JSON format)
|
||||
./certctl-cli --server http://localhost:8443 --api-key test-key-2026 --format json metrics
|
||||
./certctl-cli --server https://localhost:8443 --ca-bundle "$CA" --api-key test-key-2026 --format json metrics
|
||||
```
|
||||
|
||||
---
|
||||
@@ -921,15 +941,15 @@ Look for error messages. Common ones:
|
||||
**Step 2**: Verify the agent is registered:
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
http://localhost:8443/api/v1/agents/agent-test-01 | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
https://localhost:8443/api/v1/agents/agent-test-01 | python3 -m json.tool
|
||||
```
|
||||
|
||||
**Step 3**: Check for pending jobs:
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
"http://localhost:8443/api/v1/jobs?status=Pending&status=AwaitingCSR" | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
"https://localhost:8443/api/v1/jobs?status=Pending&status=AwaitingCSR" | python3 -m json.tool
|
||||
```
|
||||
|
||||
If there are pending jobs but the agent isn't picking them up, check that the job's `agent_id` matches `agent-test-01`.
|
||||
@@ -959,8 +979,8 @@ docker exec certctl-test-nginx nginx -s reload
|
||||
**Step 3**: If the files aren't there, the deployment job hasn't completed. Check the jobs:
|
||||
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer test-key-2026" \
|
||||
"http://localhost:8443/api/v1/jobs?type=Deployment" | python3 -m json.tool
|
||||
curl --cacert "$CA" -s -H "Authorization: Bearer test-key-2026" \
|
||||
"https://localhost:8443/api/v1/jobs?type=Deployment" | python3 -m json.tool
|
||||
```
|
||||
|
||||
Look at the job status. If it's "Running" and stuck, the server's job processor may have picked it up instead of the agent (this was a known bug — the fix skips deployment jobs with `agent_id` in the server's `ProcessPendingJobs`).
|
||||
@@ -1005,7 +1025,7 @@ Change it to a different port, like:
|
||||
- "9443:8443"
|
||||
```
|
||||
|
||||
Then access the dashboard at http://localhost:9443 instead.
|
||||
Then access the dashboard at https://localhost:9443 instead.
|
||||
|
||||
### Starting completely fresh
|
||||
|
||||
@@ -1051,7 +1071,7 @@ docker compose -f docker-compose.test.yml up --build
|
||||
|
||||
| What | Value |
|
||||
|---|---|
|
||||
| Dashboard URL | http://localhost:8443 |
|
||||
| Dashboard URL | https://localhost:8443 (use `--cacert ./test/certs/ca.crt`) |
|
||||
| API key | `test-key-2026` |
|
||||
| NGINX HTTP | http://localhost:8080 |
|
||||
| NGINX HTTPS | https://localhost:8444 |
|
||||
|
||||
+4642
-3031
File diff suppressed because it is too large
Load Diff
+183
@@ -0,0 +1,183 @@
|
||||
# TLS on the Control Plane
|
||||
|
||||
certctl's control plane is HTTPS-only as of v2.2. There is no plaintext `http://` listener, no `auto` mode, no dual-listener bridge, no TLS 1.2 escape hatch. The server refuses to start without a cert+key pair, the agent/CLI/MCP clients reject `http://` URLs at startup, and the Helm chart refuses to render without either an operator-supplied Secret or a cert-manager Certificate CR.
|
||||
|
||||
This doc covers four cert provisioning patterns, SIGHUP-based cert rotation, and the client-side CA-trust configuration agents and the CLI need to talk to the server. If you are upgrading from a pre-HTTPS release and want the step-by-step cutover procedure, read [`upgrade-to-tls.md`](upgrade-to-tls.md) first and come back here for reference.
|
||||
|
||||
## What you get
|
||||
|
||||
The server binds TLS 1.3 only with an explicit curve preference of `[X25519, P-256]`. TLS 1.3 cipher suites are non-negotiable (all three mandatory suites — AES-128-GCM-SHA256, AES-256-GCM-SHA384, CHACHA20-POLY1305-SHA256 — are always offered), so there is no `CipherSuites` knob to misconfigure. No TLS 1.2 fallback is available.
|
||||
|
||||
Two env vars are required on the server:
|
||||
|
||||
- `CERTCTL_SERVER_TLS_CERT_PATH` — filesystem path to the PEM-encoded server certificate
|
||||
- `CERTCTL_SERVER_TLS_KEY_PATH` — filesystem path to the PEM-encoded private key that signs the cert
|
||||
|
||||
Both paths are read during a fail-loud preflight in `cmd/server/main.go` (see `preflightServerTLS` in `cmd/server/tls.go`). If either is unset, unreadable, or the cert+key pair does not round-trip through `tls.LoadX509KeyPair`, the process refuses to start and emits a diagnostic pointing back at this doc. The rationale lives in §3 of the HTTPS-Everywhere milestone: a cert-lifecycle product should not silently bind plaintext.
|
||||
|
||||
## Pattern 1 — Self-signed bootstrap for docker-compose demos
|
||||
|
||||
This is the default for the `deploy/docker-compose.yml` stack. It exists so `docker compose up -d --build` just works on a laptop without the operator standing up a CA first. It is not appropriate for any non-demo environment.
|
||||
|
||||
An init container named `certctl-tls-init` runs once before the server starts. It uses the `alpine/openssl` image and generates an ECDSA-P256 self-signed cert (SHA-256 signature):
|
||||
|
||||
```
|
||||
openssl req -x509 -newkey ec \
|
||||
-pkeyopt ec_paramgen_curve:P-256 \
|
||||
-nodes \
|
||||
-keyout /etc/certctl/tls/server.key \
|
||||
-out /etc/certctl/tls/server.crt \
|
||||
-days 3650 \
|
||||
-subj "/CN=certctl-server" \
|
||||
-addext "subjectAltName=DNS:certctl-server,DNS:localhost,IP:127.0.0.1,IP:::1"
|
||||
```
|
||||
|
||||
**Why ECDSA-P256 and not ed25519.** The pre-v2.0.48 demo bootstrap used ed25519 (small keys, fast signatures). Apple's TLS stack — Safari Network Framework and the macOS-bundled LibreSSL 3.3.6 `/usr/bin/curl` — does not advertise ed25519 in the ClientHello `signature_algorithms` extension for server certs, so an ed25519 server cert was rejected at handshake with `tls: peer doesn't support any of the certificate's signature algorithms` on the server side (and the generic TLS handshake error on the client side). Homebrew OpenSSL 3.x, Chrome, Firefox, and Linux curl all accepted ed25519 — Apple was the outlier. ECDSA-P256 with SHA-256 is universally supported, so the demo bootstrap uses it by default. To pick up the new algorithm on an existing demo install, tear the volume down and rebuild: `docker compose -f deploy/docker-compose.yml down -v && docker compose -f deploy/docker-compose.yml up -d --build`. **Helm and operator-supplied-Secret users (Patterns 2 and 3) are unaffected** — they bring their own cert, and `cmd/server/tls.go` is algorithm-agnostic (TLS 1.3 with curve preference `[X25519, P-256]` for key exchange — no constraint on the server cert's signature algorithm).
|
||||
|
||||
The cert, its matching key, and a copy of the cert published as `ca.crt` land in a named volume (`certs`) mounted at `/etc/certctl/tls/` in the server container (read-only) and the agent container (read-only). The bootstrap is idempotent — if `server.crt`, `server.key`, and `ca.crt` are already present on the volume, the init container logs `TLS cert already present at …` and exits cleanly.
|
||||
|
||||
Single-cert design. CN is `certctl-server` to match the Docker-network hostname. The SAN list is `[certctl-server, localhost, 127.0.0.1, ::1]`, which covers both container-internal agent→server traffic and operator browser/curl access to `https://localhost:8443`. There is no separate intermediate/root chain — the server cert and the CA bundle are the same PEM. This is the whole point of a demo bootstrap.
|
||||
|
||||
To force regeneration (rotate the demo cert), tear the volume down: `docker compose down -v`. The next `up` re-runs the init container.
|
||||
|
||||
The server's Docker healthcheck and the agent both verify against `/etc/certctl/tls/ca.crt`; no `-k` / `InsecureSkipVerify` anywhere in the default stack.
|
||||
|
||||
## Pattern 2 — Operator-supplied `kubernetes.io/tls` Secret (Helm)
|
||||
|
||||
This is the default path for Helm installs. The operator provisions a Secret of type `kubernetes.io/tls` holding `tls.crt` + `tls.key` (and optionally `ca.crt` for mounting a CA bundle to clients in the same cluster) from whatever source they already trust — their internal CA, a manually-issued cert, step-ca, AWS ACM PCA exported to PEM, or the output of the self-signed bootstrap pattern above copied into a cluster Secret.
|
||||
|
||||
```
|
||||
kubectl create secret tls certctl-server-tls \
|
||||
--cert=server.crt \
|
||||
--key=server.key \
|
||||
--namespace certctl
|
||||
```
|
||||
|
||||
Then:
|
||||
|
||||
```
|
||||
helm install certctl deploy/helm/certctl \
|
||||
--namespace certctl \
|
||||
--set server.tls.existingSecret=certctl-server-tls
|
||||
```
|
||||
|
||||
The Secret is mounted read-only at `/etc/certctl/tls/` in the server pod. The `CERTCTL_SERVER_TLS_CERT_PATH` and `CERTCTL_SERVER_TLS_KEY_PATH` env vars are wired to `tls.crt` and `tls.key` keys inside that mount. If `ca.crt` is absent from the Secret, clients that need a CA bundle should use `tls.crt` as the bundle (self-signed case) or mount a separate ConfigMap with the root chain (operator-CA case).
|
||||
|
||||
If the operator sets neither `server.tls.existingSecret` nor `server.tls.certManager.enabled=true`, `helm template` / `helm install` fails at render-time with a diagnostic pointing at this doc. The guard is implemented in `deploy/helm/certctl/templates/_helpers.tpl` under the `certctl.tls.required` helper. This is deliberate: the HTTPS-only server would crash-loop on an empty path, so we fail earlier at Helm-render time.
|
||||
|
||||
## Pattern 3 — cert-manager `Certificate` CR (Helm, opt-in)
|
||||
|
||||
For clusters that already run cert-manager, the chart can provision a `Certificate` CR that writes into the Secret the server pod reads from. This is opt-in — the default is `server.tls.certManager.enabled: false` — because not every cluster has cert-manager installed, and we refuse to ship a chart that silently depends on an external controller.
|
||||
|
||||
```
|
||||
helm install certctl deploy/helm/certctl \
|
||||
--namespace certctl \
|
||||
--set server.tls.certManager.enabled=true \
|
||||
--set server.tls.certManager.issuerRef.name=my-cluster-issuer \
|
||||
--set server.tls.certManager.issuerRef.kind=ClusterIssuer
|
||||
```
|
||||
|
||||
The rendered `Certificate` (see `deploy/helm/certctl/templates/server-certificate.yaml`) writes `tls.crt` + `tls.key` + `ca.crt` into the Secret named by `server.tls.certManager.secretName` (defaults to `<fullname>-tls`). The server pod reads from that same Secret; the agent DaemonSet mounts the same Secret as its CA bundle source.
|
||||
|
||||
cert-manager handles rotation. certctl-server handles in-place reload — see the SIGHUP section below.
|
||||
|
||||
The chart enforces that if `server.tls.certManager.enabled=true`, `server.tls.certManager.issuerRef.name` must also be set. An empty `issuerRef.name` makes `helm template` fail with a diagnostic naming the missing flag.
|
||||
|
||||
## Pattern 4 — Manually-issued from an internal CA
|
||||
|
||||
For operators running neither Helm nor docker-compose (bare-metal / custom orchestration), the server just needs two files on disk pointed at by `CERTCTL_SERVER_TLS_CERT_PATH` and `CERTCTL_SERVER_TLS_KEY_PATH`. Issue the cert from your internal CA with:
|
||||
|
||||
- CN matching the hostname your agents and operators use to dial the server (e.g., `certctl.prod.example.com`)
|
||||
- SAN list covering every hostname and IP that appears in `CERTCTL_SERVER_URL` values across your agent fleet
|
||||
- Key usage: digital signature + key encipherment
|
||||
- Extended key usage: server auth
|
||||
|
||||
Store the key with mode `0600` and owner matching the UID the server runs as (`1000` in our shipped Dockerfile). The server process reads both files during `preflightServerTLS` at startup and again on every SIGHUP.
|
||||
|
||||
The full CA chain that signed the server cert should be distributed to agents, CLI operators, and MCP clients as their `CERTCTL_SERVER_CA_BUNDLE_PATH` — see the client section below.
|
||||
|
||||
## SIGHUP cert rotation
|
||||
|
||||
The server wraps its cert+key pair in a `*certHolder` (see `cmd/server/tls.go`) that guards the loaded `*tls.Certificate` under a `sync.Mutex`. The `*tls.Config` wires `GetCertificate` to the holder, so every new inbound TLS handshake reads whatever cert the holder currently has.
|
||||
|
||||
Send `SIGHUP` to the server PID and the holder re-reads both files from disk. On success, the next new connection uses the new cert; in-flight requests finish on the previous cert. A log line goes out:
|
||||
|
||||
```
|
||||
TLS cert reloaded via SIGHUP cert_path=/etc/certctl/tls/server.crt key_path=/etc/certctl/tls/server.key
|
||||
```
|
||||
|
||||
On failure (missing file, malformed PEM, key does not sign cert), the old cert is retained and an error logs:
|
||||
|
||||
```
|
||||
TLS cert reload failed; continuing with previous cert cert_path=… key_path=… error=…
|
||||
```
|
||||
|
||||
This is deliberately fail-safe on reload (as opposed to fail-loud on startup). A cert-manager renewal race, a partially-copied file, a typo in a rotation script — none of those should crash a running server and drop every agent connection. The operator sees the error in logs, fixes the underlying issue, and sends another `SIGHUP`.
|
||||
|
||||
Pair with cert-manager, certbot `--post-hook`, or any rotation tool that can fire a signal. For docker-compose, `docker compose kill -s HUP certctl-server` works. For Kubernetes, reload is typically handled by cert-manager updating the Secret and the mounted file changing on the next kubelet sync — no explicit SIGHUP needed if the volume mount is `subPath`-free.
|
||||
|
||||
Startup is a different story. If the cert is missing or malformed at process start, the server exits non-zero rather than binding plaintext or attempting a retry loop. That's the HTTPS-only contract.
|
||||
|
||||
## Client-side TLS: agents, CLI, MCP
|
||||
|
||||
Everything that talks to the server enforces HTTPS on the URL.
|
||||
|
||||
### Agent
|
||||
|
||||
`CERTCTL_SERVER_URL` must be `https://…`. `http://`, bare hostnames, `ftp://`, `ws://`, and empty strings are rejected at startup by `validateHTTPSScheme` in `cmd/agent/main.go` with a diagnostic pointing at `upgrade-to-tls.md`. There is no warning-and-proceed path.
|
||||
|
||||
Two additional env vars control how the agent verifies the server cert:
|
||||
|
||||
- `CERTCTL_SERVER_CA_BUNDLE_PATH` — filesystem path to a PEM-encoded CA bundle that signed the server cert. Loaded into `*tls.Config.RootCAs` on the agent's HTTP client. If unset, the agent falls back to the OS system trust store.
|
||||
- `CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY` — defaults to `false`. Setting it to `true` skips verification entirely. **Dev-only escape hatch.** The agent logs a prominent warning at startup (`TLS certificate verification is disabled … never enable this in production`). Use this only when dialing a demo server whose cert you haven't bothered to mount into the agent container.
|
||||
|
||||
Equivalent CLI flags: `--ca-bundle <path>` and `--insecure-skip-verify`.
|
||||
|
||||
If both the CA bundle and `InsecureSkipVerify=true` are set, `InsecureSkipVerify` wins — it's the whole point of the flag. Don't do this in production.
|
||||
|
||||
### CLI (`certctl-cli`)
|
||||
|
||||
Same contract as the agent:
|
||||
|
||||
- `CERTCTL_SERVER_URL` defaults to `https://` scheme; `http://` rejected at startup
|
||||
- `--ca-bundle <path>` flag or `CERTCTL_SERVER_CA_BUNDLE_PATH` env var — CA bundle for server cert verification
|
||||
- `--insecure` flag or `CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=true` — skip verification (dev only)
|
||||
- Error diagnostic on empty URL explicitly mentions both `--server` and `CERTCTL_SERVER_URL` so operators see the right knob to turn
|
||||
|
||||
The CLI shares the URL-scheme validation with the agent; the test pins in `cmd/cli/main_test.go:TestValidateHTTPSScheme` cover the full rejection matrix.
|
||||
|
||||
### MCP server (`certctl-mcp-server`)
|
||||
|
||||
Same three controls as CLI, env-var-driven only (no flags — MCP runs as a stdio subprocess and inherits env from the launching LLM client):
|
||||
|
||||
- `CERTCTL_SERVER_URL` must start with `https://`
|
||||
- `CERTCTL_SERVER_CA_BUNDLE_PATH` optional CA bundle
|
||||
- `CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY` optional skip
|
||||
|
||||
Claude Desktop / other MCP client configs should set all three in the tool's env block.
|
||||
|
||||
## Troubleshooting: fail-loud preflight errors
|
||||
|
||||
Every preflight failure message ends with `(see docs/tls.md)` so this doc is the first hit when an operator searches. Common failures:
|
||||
|
||||
**`CERTCTL_SERVER_TLS_CERT_PATH is empty: HTTPS-only control plane refuses to start`**
|
||||
Set the env var. For docker-compose this is already set to `/etc/certctl/tls/server.crt` in the shipped compose file — if you're seeing this, check the `certctl-tls-init` service logs to see why the init container didn't populate the volume. For Helm, check that `server.tls.existingSecret` or `server.tls.certManager.enabled=true` is set.
|
||||
|
||||
**`TLS cert file "…" unreadable: …`**
|
||||
The cert path is set but `os.Stat` failed. Check filesystem permissions — the server runs as UID 1000 in our shipped Dockerfile; the cert needs to be readable by that UID. Typos in the path also land here.
|
||||
|
||||
**`TLS cert/key pair invalid (cert="…" key="…"): …`**
|
||||
Both files exist but `tls.LoadX509KeyPair` refused them. Typical causes: the private key does not sign the certificate, the key is encrypted with a passphrase (not supported — remove the passphrase with `openssl pkey` before mounting), or one of the two is DER-encoded instead of PEM. Re-issue the pair from the same CA call and re-mount.
|
||||
|
||||
**Client side: `tls: failed to verify certificate: x509: certificate signed by unknown authority`**
|
||||
The client did not trust the CA that signed the server cert. Either mount the CA bundle via `CERTCTL_SERVER_CA_BUNDLE_PATH`, add the CA to the system trust store on the client host, or (dev only) set `CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=true`.
|
||||
|
||||
**Client side: `tls: first record does not look like a TLS handshake`**
|
||||
The client is speaking plaintext HTTP to an HTTPS server (or vice-versa). Check that `CERTCTL_SERVER_URL` starts with `https://`. If you are upgrading from a pre-v2.2 release and your agents are old, they will surface this error until you roll the DaemonSet — see [`upgrade-to-tls.md`](upgrade-to-tls.md).
|
||||
|
||||
## Related docs
|
||||
|
||||
- [`upgrade-to-tls.md`](upgrade-to-tls.md) — one-step cutover from pre-HTTPS releases
|
||||
- [`quickstart.md`](quickstart.md) — docker-compose walkthrough with HTTPS examples
|
||||
- [`test-env.md`](test-env.md) — integration test environment (also HTTPS-only)
|
||||
- Milestone spec: `prompts/https-everywhere-milestone.md` (authoritative source for locked decisions)
|
||||
@@ -0,0 +1,194 @@
|
||||
# Upgrading to HTTPS-Everywhere (v2.2)
|
||||
|
||||
certctl's control plane is HTTPS-only as of v2.2. There is no `http` mode, no `auto` mode, no dual-listener bind, no N-release migration window. The cutover is a single step. Out-of-date agents that still point at `http://…` fail at the TCP/TLS handshake layer on first connect after the upgrade and stay `Offline` in the dashboard until their env block is updated and the fleet is rolled.
|
||||
|
||||
This doc walks operators through the cutover for the two shipped deployment topologies — docker-compose and Helm — and documents the failure modes and rollback posture explicitly.
|
||||
|
||||
For the deep-dive on cert provisioning patterns, SIGHUP cert reload, and client-side CA-trust configuration, read [`tls.md`](tls.md). This doc is the narrow "how do I upgrade" procedure.
|
||||
|
||||
## Preconditions
|
||||
|
||||
Before you start, confirm:
|
||||
|
||||
- **Shell access** to the server host and every agent host. The cutover requires you to restart the server and update every agent's env block.
|
||||
- **A cert+key source** for the server. Pick one:
|
||||
- An internal CA that can issue a server cert (CN + SAN list covering every hostname / IP agents dial).
|
||||
- A `cert-manager` install in the target Kubernetes cluster, plus a `ClusterIssuer` or `Issuer` you're willing to reference.
|
||||
- Willingness to use the self-signed bootstrap that the shipped `deploy/docker-compose.yml` generates automatically. This is the right choice for dev and demo; it is the wrong choice for production.
|
||||
- **A maintenance window.** Out-of-date agents break at the TLS handshake and stay offline until rolled. Schedule the upgrade so the agent fleet can be updated in the same window as the server.
|
||||
- **Backups.** This is a one-way door (see the Rollback section below). Snapshot your PostgreSQL database before `docker compose down` or `helm upgrade`.
|
||||
|
||||
There is no schema migration tied to this release; the only at-rest state that changes is the `certs` named volume (docker-compose) or the `tls.crt`/`tls.key` Secret (Helm).
|
||||
|
||||
## Procedure — docker-compose operators
|
||||
|
||||
The shipped `deploy/docker-compose.yml` includes a `certctl-tls-init` init container that self-signs an ECDSA-P256 (SHA-256 signature) cert on first boot and drops `server.crt`, `server.key`, and `ca.crt` into a named volume mounted read-only at `/etc/certctl/tls/` on the server and agent containers. No manual cert provisioning is required for the default stack. (Pre-v2.0.48 this was an ed25519 cert; see [`tls.md`](tls.md) Pattern 1 for the rationale and the `down -v && up --build` migration note.)
|
||||
|
||||
1. **Pull the HTTPS-everywhere release.** From the repo root:
|
||||
|
||||
```
|
||||
git pull
|
||||
```
|
||||
|
||||
Confirm you're on a tag or `master` that contains the `certctl-tls-init` service in `deploy/docker-compose.yml`. Grep for it: `grep certctl-tls-init deploy/docker-compose.yml` should hit.
|
||||
|
||||
2. **Stop the old plaintext cluster.**
|
||||
|
||||
```
|
||||
docker compose -f deploy/docker-compose.yml down
|
||||
```
|
||||
|
||||
Do not pass `-v`; keeping the PostgreSQL volume preserves your cert inventory, audit trail, and job history across the upgrade.
|
||||
|
||||
3. **Bring the cluster back up with the HTTPS build.**
|
||||
|
||||
```
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
The `certctl-tls-init` service runs once, generates the self-signed cert into the `certs` volume, and exits with code 0. The server container waits for `certctl-tls-init` via `depends_on: { condition: service_completed_successfully }` and only starts once the cert material is on disk. The server's Docker healthcheck now uses `curl --cacert /etc/certctl/tls/ca.crt -f https://localhost:8443/health`, so the container only becomes healthy once the HTTPS listener is up and serving the bundled cert correctly.
|
||||
|
||||
4. **Verify the HTTPS endpoint from the host.**
|
||||
|
||||
```
|
||||
curl --cacert $(docker compose -f deploy/docker-compose.yml exec -T certctl-server cat /etc/certctl/tls/ca.crt) https://localhost:8443/health
|
||||
```
|
||||
|
||||
Expect `{"status":"ok"}` with HTTP 200. If you get a TLS verification error, the CA bundle wasn't read correctly — re-run the `exec -T` command and pipe the output directly into `--cacert @-` or save it to a local file first. If you get `connection refused`, the server never finished startup — check `docker compose logs certctl-server` for a fail-loud preflight diagnostic pointing at `docs/tls.md`.
|
||||
|
||||
5. **Confirm the bundled agent reconnects.** Agents inside the compose stack pick up the new URL (`CERTCTL_SERVER_URL=https://certctl-server:8443`) and the bundled CA (`CERTCTL_SERVER_CA_BUNDLE_PATH=/etc/certctl/tls/ca.crt`) from their env block automatically — no per-agent change needed. Tail the agent log:
|
||||
|
||||
```
|
||||
docker compose -f deploy/docker-compose.yml logs -f certctl-agent
|
||||
```
|
||||
|
||||
You should see `heartbeat sent` within 30 seconds. In the dashboard (`https://localhost:8443`), the agent should show as `Online`.
|
||||
|
||||
**External agents** running outside the compose network (e.g., the `install-agent.sh`-installed systemd service on a separate host) need their env block updated manually before the cutover — see the Agent env block section below.
|
||||
|
||||
## Procedure — Helm operators
|
||||
|
||||
The Helm chart does not self-sign. It refuses to render (`helm template` exits non-zero) unless you configure one of two cert sources: an operator-supplied Secret, or a cert-manager `Certificate` CR. See [`tls.md`](tls.md) for the full pattern catalog.
|
||||
|
||||
1. **Provision cert material.** Pick one of:
|
||||
|
||||
- **Operator-supplied Secret.** Issue a cert from your internal CA (or any other source) and load it into a `kubernetes.io/tls` Secret in the certctl namespace:
|
||||
|
||||
```
|
||||
kubectl create secret tls certctl-server-tls \
|
||||
--cert=server.crt --key=server.key \
|
||||
--namespace certctl
|
||||
```
|
||||
|
||||
- **cert-manager.** Set `server.tls.certManager.enabled=true` on the upgrade and reference an existing `ClusterIssuer` or `Issuer`:
|
||||
|
||||
```
|
||||
--set server.tls.certManager.enabled=true
|
||||
--set server.tls.certManager.issuerRef.name=my-cluster-issuer
|
||||
--set server.tls.certManager.issuerRef.kind=ClusterIssuer
|
||||
```
|
||||
|
||||
2. **Upgrade the release.**
|
||||
|
||||
```
|
||||
helm upgrade certctl deploy/helm/certctl \
|
||||
--namespace certctl \
|
||||
--set server.tls.existingSecret=certctl-server-tls
|
||||
```
|
||||
|
||||
(Or the `certManager` variant.) If you omit both `server.tls.existingSecret` and `server.tls.certManager.enabled`, the chart fails at render time with a diagnostic pointing at `docs/tls.md`. That guard exists precisely so you catch the missing config at `helm upgrade` time, not at pod-crash-loop time.
|
||||
|
||||
3. **Verify the HTTPS endpoint from inside the cluster.** Port-forward and curl with the CA bundle:
|
||||
|
||||
```
|
||||
kubectl port-forward -n certctl svc/certctl-server 8443:8443 &
|
||||
kubectl get secret -n certctl certctl-server-tls -o jsonpath='{.data.ca\.crt}' | base64 -d > /tmp/certctl-ca.crt
|
||||
curl --cacert /tmp/certctl-ca.crt https://localhost:8443/health
|
||||
```
|
||||
|
||||
Expect `{"status":"ok"}`. If the Secret does not contain a `ca.crt` key (operator-supplied Secrets often don't), use `tls.crt` as the bundle instead — for a self-signed cert the two files are identical, and for a cert chained to an internal CA you should separately distribute the root CA bundle via ConfigMap or mounted file.
|
||||
|
||||
4. **Update every agent manifest.** Agents outside this Helm release (or in a separately-managed DaemonSet) need their env block updated:
|
||||
|
||||
```
|
||||
- name: CERTCTL_SERVER_URL
|
||||
value: "https://certctl-server.certctl.svc.cluster.local:8443"
|
||||
- name: CERTCTL_SERVER_CA_BUNDLE_PATH
|
||||
value: "/etc/certctl/tls/ca.crt"
|
||||
```
|
||||
|
||||
Mount the server's Secret (or a separate CA-bundle Secret / ConfigMap) at `/etc/certctl/tls/` as a read-only volume. If you bundle the agent via the shipped Helm chart's DaemonSet, the wiring is already done — set `agent.enabled=true` and the chart mounts the same Secret.
|
||||
|
||||
5. **Roll the agent DaemonSet.**
|
||||
|
||||
```
|
||||
kubectl rollout restart ds/certctl-agent -n certctl
|
||||
kubectl rollout status ds/certctl-agent -n certctl
|
||||
```
|
||||
|
||||
Every agent pod restarts with the new URL + CA bundle and reconnects on HTTPS. The dashboard shows agents flip from `Offline` to `Online` as pods finish rolling.
|
||||
|
||||
## Agent env block — external hosts
|
||||
|
||||
Agents installed on bare-metal or VM hosts via `install-agent.sh` (systemd on Linux, launchd on macOS) read config from `/etc/certctl/agent.env` (Linux) or `~/Library/Application Support/certctl/agent.env` (macOS). On cutover, append or update:
|
||||
|
||||
```
|
||||
CERTCTL_SERVER_URL=https://certctl.example.com:8443
|
||||
CERTCTL_SERVER_CA_BUNDLE_PATH=/etc/certctl/tls/ca.crt
|
||||
# CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=false # Dev only. Never set to true in production.
|
||||
```
|
||||
|
||||
Distribute the CA bundle (the same `ca.crt` the server holds, or the root chain if you issued the server cert from an intermediate) to every agent host. The path under `CERTCTL_SERVER_CA_BUNDLE_PATH` must be readable by the UID the agent service runs as.
|
||||
|
||||
Restart the service after editing:
|
||||
|
||||
- Linux: `systemctl restart certctl-agent`
|
||||
- macOS: `launchctl kickstart -k system/com.certctl.agent`
|
||||
|
||||
The agent refuses to start on an `http://` URL and exits with a pre-flight diagnostic that names this doc. That rejection happens before any network call — no spurious half-connected state.
|
||||
|
||||
## Failure mode
|
||||
|
||||
Out-of-date agents still configured with `CERTCTL_SERVER_URL=http://…` fail on first reconnect after the cutover. The failure surfaces as one of:
|
||||
|
||||
- `dial tcp …: connect: connection refused` — the server is no longer listening on a plaintext port. The new release binds only a TLS listener; attempting a plaintext `connect()` gets refused at the kernel level because nothing holds the socket.
|
||||
- `tls: first record does not look like a TLS handshake` — depending on timing and proxy layers (e.g., a load balancer that accepts the TCP connection before forwarding), the client may negotiate TCP, send an HTTP request line, and have the server's TLS stack reject it.
|
||||
|
||||
Agents in this state surface as `Offline` in the dashboard. They stay offline until their env block is updated and the service restarts. There is no graceful 400-with-migration-URL response because there is no HTTP listener to serve one from — the entire plaintext call path is removed by design.
|
||||
|
||||
If you see an unexpected agent stay `Offline` past the cutover window, SSH to the host and check the agent log. On a systemd host:
|
||||
|
||||
```
|
||||
journalctl -u certctl-agent -n 100
|
||||
```
|
||||
|
||||
Look for `URL scheme "http" is not supported: HTTPS-only control plane refuses to start (see docs/upgrade-to-tls.md)`. That's the pre-flight rejection. Update `CERTCTL_SERVER_URL`, restart the service, and the agent reconnects.
|
||||
|
||||
## Rollback
|
||||
|
||||
**There is no rollback window.** The upgrade is a one-way door. The rationale lives in §3.7 of `prompts/https-everywhere-milestone.md`: a cert-lifecycle product that bridges back to plaintext after committing to HTTPS is advertising that its own security posture is negotiable.
|
||||
|
||||
If you need to revert, you have two options:
|
||||
|
||||
1. **Stay on the pre-HTTPS release.** Do not upgrade until you are ready to run HTTPS on the control plane. Pin your `docker-compose.yml` or `helm upgrade` command to the last pre-v2.2 tag.
|
||||
2. **Rollback the release.** `helm rollback certctl <previous-revision>` or `git checkout <previous-tag> && docker compose up -d --build`. This rolls back the server, the compose topology, and the Helm chart in lockstep. Your PostgreSQL volume — cert inventory, audit trail, jobs — survives the rollback; nothing in this milestone changes the database schema.
|
||||
|
||||
Option 2 drops you back to the plaintext world. It should be treated as an emergency measure, not a supported migration path.
|
||||
|
||||
## After the cutover
|
||||
|
||||
Once every agent is `Online`, confirm a few invariants:
|
||||
|
||||
- `curl -sS -o /dev/null -w "%{http_code}\n" http://localhost:8443/health` returns `000` with `Connection refused` (no HTTP listener). Plaintext is gone.
|
||||
- `openssl s_client -connect localhost:8443 -tls1_2 </dev/null` fails the handshake. TLS 1.2 is rejected.
|
||||
- `openssl s_client -connect localhost:8443 -tls1_3 </dev/null` succeeds and prints the server's SAN list. TLS 1.3 is live.
|
||||
- A cert rotation test: overwrite the server cert on disk, `kill -HUP` the server PID, confirm the new cert serves on the next `openssl s_client -connect … -showcerts` without a process restart. See the SIGHUP section in [`tls.md`](tls.md).
|
||||
|
||||
Update your runbooks. Every `http://certctl.example.com` URL in internal documentation, monitoring config, and on-call playbooks should become `https://certctl.example.com` plus a CA-trust note.
|
||||
|
||||
## Related docs
|
||||
|
||||
- [`tls.md`](tls.md) — cert provisioning patterns, SIGHUP rotation, troubleshooting
|
||||
- [`quickstart.md`](quickstart.md) — docker-compose walkthrough (post-HTTPS)
|
||||
- [`test-env.md`](test-env.md) — integration test environment (HTTPS-only)
|
||||
- Milestone spec: `prompts/https-everywhere-milestone.md`
|
||||
+14
-12
@@ -32,11 +32,13 @@ This isn't a premium feature. It's the default behavior, free. Most alternatives
|
||||
|
||||
### 2. CA-Agnostic Issuer Architecture
|
||||
|
||||
certctl works with any certificate authority, not just ACME providers. Seven issuer connectors ship today, all free:
|
||||
certctl works with any certificate authority, not just ACME providers. Nine issuer connectors ship today, all free:
|
||||
|
||||
- **ACME v2** (Let's Encrypt, ZeroSSL, Google Trust Services, Buypass) — HTTP-01, DNS-01, DNS-PERSIST-01 challenges, External Account Binding, ACME Renewal Information (RFC 9702)
|
||||
- **ACME v2** (Let's Encrypt, ZeroSSL, Google Trust Services, Buypass) — HTTP-01, DNS-01, DNS-PERSIST-01 challenges, External Account Binding, ACME Renewal Information (RFC 9773), certificate profile selection
|
||||
- **HashiCorp Vault PKI** — `/v1/{mount}/sign/{role}` API, token auth
|
||||
- **DigiCert CertCentral** — async order model, OV/EV support
|
||||
- **Sectigo SCM** — async order model, DV/OV/EV support, 3-header auth
|
||||
- **Google Cloud CAS** — Certificate Authority Service, OAuth2 service account auth, CA pool selection
|
||||
- **step-ca** (Smallstep) — native /sign API with JWK provisioner auth
|
||||
- **Local CA** — self-signed or sub-CA mode (chain to ADCS or any enterprise root)
|
||||
- **OpenSSL / Custom CA** — delegate signing to any shell script
|
||||
@@ -54,7 +56,7 @@ A reload command can exit 0 while the certificate doesn't take effect — wrong
|
||||
|
||||
The three differentiators above get the headlines, but the feature surface is wider than most paid platforms:
|
||||
|
||||
**10 deployment targets** — NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, IIS (local PowerShell + remote WinRM), Postfix, and Dovecot. All use a pluggable connector model. The control plane never initiates outbound connections — agents poll for work, meaning certctl works behind firewalls, across network zones, and in air-gapped environments.
|
||||
**13 deployment targets** — NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, IIS (local PowerShell + remote WinRM), F5 BIG-IP (proxy agent + iControl REST), Postfix, Dovecot, SSH (agentless), Windows Certificate Store, and Java Keystore. All use a pluggable connector model. The control plane never initiates outbound connections — agents poll for work, meaning certctl works behind firewalls, across network zones, and in air-gapped environments.
|
||||
|
||||
**Network certificate discovery** — active TLS scanning of CIDR ranges finds certificates you didn't know existed. Agents also scan local filesystems for PEM/DER files. Everything feeds into a triage workflow where you claim, dismiss, or import discovered certs into management.
|
||||
|
||||
@@ -66,11 +68,11 @@ The three differentiators above get the headlines, but the feature surface is wi
|
||||
|
||||
**Prometheus metrics** — `/api/v1/metrics/prometheus` in standard exposition format. Works with Prometheus, Grafana Agent, Datadog Agent, Victoria Metrics.
|
||||
|
||||
**MCP server** — 80 tools exposing the entire API surface for AI-assisted certificate management via Claude, Cursor, or any MCP-compatible client. No other certificate platform offers this.
|
||||
**MCP server** — the entire REST API is exposed via MCP for AI-assisted certificate management via Claude, Cursor, or any MCP-compatible client. No other certificate platform offers this.
|
||||
|
||||
**Full REST API** — 97 OpenAPI 3.1-documented operations. CLI tool with 10 subcommands. Helm chart for Kubernetes deployment. Scheduled certificate digest emails. Certificate export in PEM and PKCS#12. S/MIME support with EKU-aware issuance.
|
||||
**Full REST API** — OpenAPI 3.1-documented operations covering the entire platform. CLI tool with 10 subcommands. Helm chart for Kubernetes deployment. Scheduled certificate digest emails. Certificate export in PEM and PKCS#12. S/MIME support with EKU-aware issuance.
|
||||
|
||||
**1,554 tests** — Go backend with race detection, static analysis (golangci-lint), and vulnerability scanning (govulncheck) on every commit. Frontend test suite. CI runs on every push.
|
||||
**Extensively tested** — Go backend with race detection, static analysis (golangci-lint), and vulnerability scanning (govulncheck) on every commit. CI-enforced per-layer coverage thresholds. Frontend test suite. Every push is gated.
|
||||
|
||||
## How certctl Compares
|
||||
|
||||
@@ -80,15 +82,15 @@ ACME clients solve one slice of the problem — issuance and renewal from ACME C
|
||||
|
||||
### vs. Agent-Based SaaS
|
||||
|
||||
The closest architectural competitors use the same agent model — local key generation, CSR submission, push-based deployment. Where certctl differs: it supports 7 issuer types (not just ACME), provides CRL/OCSP/revocation infrastructure (not just issuance), includes a policy engine and network discovery, and is source-available with no certificate limit. SaaS alternatives are typically proprietary, priced per certificate ($2+/cert/month), and cap their free tiers at 3-5 certificates. certctl is free for any number of certificates, forever.
|
||||
The closest architectural competitors use the same agent model — local key generation, CSR submission, push-based deployment. Where certctl differs: it supports 9 issuer types (not just ACME), provides CRL/OCSP/revocation infrastructure (not just issuance), includes a policy engine and network discovery, and is source-available with no certificate limit. SaaS alternatives are typically proprietary, priced per certificate ($2+/cert/month), and cap their free tiers at 3-5 certificates. certctl is free for any number of certificates, forever.
|
||||
|
||||
### vs. Commercial PKI Platforms
|
||||
|
||||
On-prem or hosted commercial platforms offer broader cert type coverage (VPN certs, device auth, SCEP) and deeper CA integrations. The trade-off: no free tier, opaque pricing (often €13K+/year for 1,500 certs), proprietary codebases, and no public API documentation. certctl trades breadth of exotic cert types for full transparency — source-available code, 97-operation OpenAPI spec, and a free community edition with no artificial limits.
|
||||
On-prem or hosted commercial platforms offer broader cert type coverage (VPN certs, device auth, SCEP) and deeper CA integrations. The trade-off: no free tier, opaque pricing (often €13K+/year for 1,500 certs), proprietary codebases, and no public API documentation. certctl trades breadth of exotic cert types for full transparency — source-available code, fully documented OpenAPI spec, and a free community edition with no artificial limits.
|
||||
|
||||
### vs. Enterprise Platforms
|
||||
|
||||
Venafi and Keyfactor offer decades of features at $75K-$250K+/year. certctl targets organizations that need 80% of those capabilities at a fraction of the cost. What certctl doesn't have yet: SSO/RBAC (coming in certctl Pro), vendor SLA-backed support. What certctl does have that enterprise platforms don't: an MCP server for AI-assisted management, ACME ARI (RFC 9702) for CA-directed renewal timing, and a deployment model that works in 5 minutes instead of 5 months.
|
||||
Venafi and Keyfactor offer decades of features at $75K-$250K+/year. certctl targets organizations that need 80% of those capabilities at a fraction of the cost. What certctl doesn't have yet: SSO/RBAC (coming in certctl Pro), vendor SLA-backed support. What certctl does have that enterprise platforms don't: an MCP server for AI-assisted management, ACME ARI (RFC 9773) for CA-directed renewal timing, and a deployment model that works in 5 minutes instead of 5 months.
|
||||
|
||||
## Who Should Look Elsewhere
|
||||
|
||||
@@ -100,18 +102,18 @@ certctl isn't the right tool for everyone:
|
||||
|
||||
## See It Running
|
||||
|
||||
The demo seeds 32 certificates across 7 issuers, 8 agents, 6 deployment targets, and 180 days of realistic history — jobs, audit events, discovery scans, approval workflows — so you can explore every feature immediately.
|
||||
The demo seeds certificates across multiple issuers, agents, and deployment targets with 180 days of realistic history — jobs, audit events, discovery scans, approval workflows — so you can explore every feature immediately.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
cd certctl/deploy && docker compose up -d
|
||||
# Dashboard at http://localhost:8443
|
||||
# Dashboard at https://localhost:8443 (self-signed cert — pin deploy/test/certs/ca.crt)
|
||||
```
|
||||
|
||||
See the [Quickstart Guide](quickstart.md) for a full walkthrough, or explore the [5 turnkey examples](../examples/) for specific scenarios (ACME+NGINX, wildcard DNS-01, private CA+Traefik, step-ca+HAProxy, multi-issuer).
|
||||
|
||||
## License
|
||||
|
||||
certctl is source-available under the [Business Source License 1.1](../LICENSE). Free for any use except offering a competing managed service. Converts to Apache 2.0 on March 1, 2033.
|
||||
certctl is source-available under the [Business Source License 1.1](../LICENSE). Free for any use except offering a competing managed service. Converts to Apache 2.0 on March 14, 2033.
|
||||
|
||||
You own your data, your keys, and your deployment.
|
||||
|
||||
@@ -36,6 +36,13 @@ flowchart TD
|
||||
|
||||
If you don't have a real domain or can't open port 80, see [Customization Tips](#customization-tips) below.
|
||||
|
||||
## TLS Security
|
||||
|
||||
certctl is HTTPS-only as of v2.2. The demo compose stack provisions a self-signed certificate. When accessing `https://localhost:8443`, you can either:
|
||||
- Use `curl --cacert ./deploy/test/certs/ca.crt ...` to pin the CA certificate
|
||||
- Use `curl -k ...` for quick smoke tests (never in production)
|
||||
- Import the CA at `./deploy/test/certs/ca.crt` into your OS trust store for browser visits
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Clone or copy this example
|
||||
@@ -122,7 +129,7 @@ docker compose logs -f certctl-server certctl-agent
|
||||
|
||||
### 5. Access the dashboard
|
||||
|
||||
Navigate to `http://localhost:8443` (or your `SERVER_PORT`)
|
||||
Navigate to `https://localhost:8443` (or your `SERVER_PORT`)
|
||||
|
||||
You should see:
|
||||
- An empty certificate inventory (no certs issued yet)
|
||||
|
||||
@@ -61,7 +61,7 @@ services:
|
||||
networks:
|
||||
- certctl-network
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'curl -sf http://localhost:8443/health || exit 1']
|
||||
test: ['CMD-SHELL', 'curl -sfk https://localhost:8443/health || exit 1']
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
@@ -9,6 +9,13 @@ This example is ideal for:
|
||||
- Internal PKI with public DNS names
|
||||
- Scenarios where you have programmatic access to your DNS provider's API
|
||||
|
||||
## TLS Security
|
||||
|
||||
certctl is HTTPS-only as of v2.2. The demo compose stack provisions a self-signed certificate. When accessing `https://localhost:8443`, you can either:
|
||||
- Use `curl --cacert ./deploy/test/certs/ca.crt ...` to pin the CA certificate
|
||||
- Use `curl -k ...` for quick smoke tests (never in production)
|
||||
- Import the CA at `./deploy/test/certs/ca.crt` into your OS trust store for browser visits
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before running this example, you need:
|
||||
@@ -74,7 +81,7 @@ This starts:
|
||||
|
||||
### Step 5: Access the Dashboard
|
||||
|
||||
Open your browser to `http://localhost:8443`
|
||||
Open your browser to `https://localhost:8443`
|
||||
|
||||
### Step 6: Create a Wildcard Certificate
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ services:
|
||||
# Default is 30s; increase if your DNS propagates slowly
|
||||
# Set via CERTCTL_ACME_DNS_PROPAGATION_WAIT in code, or rely on default
|
||||
|
||||
# Optional: Let's Encrypt Renewal Information (RFC 9702) for CA-directed renewal timing
|
||||
# Optional: Let's Encrypt Renewal Information (RFC 9773) for CA-directed renewal timing
|
||||
# CERTCTL_ACME_ARI_ENABLED: "true"
|
||||
|
||||
# Local CA as fallback for internal services (optional)
|
||||
@@ -113,7 +113,7 @@ services:
|
||||
- certctl-network
|
||||
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'curl -sf http://localhost:8443/health || exit 1']
|
||||
test: ['CMD-SHELL', 'curl -sfk https://localhost:8443/health || exit 1']
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
@@ -64,7 +64,7 @@ services:
|
||||
networks:
|
||||
- certctl-network
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'curl -sf http://localhost:8443/health || exit 1']
|
||||
test: ['CMD-SHELL', 'curl -sfk https://localhost:8443/health || exit 1']
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
@@ -45,6 +45,13 @@ flowchart TD
|
||||
- **Domain for ACME** (optional) — if using real Let's Encrypt, not needed for demo
|
||||
- **Internet connectivity** — to reach Let's Encrypt's API (demo can use staging directory)
|
||||
|
||||
## TLS Security
|
||||
|
||||
certctl is HTTPS-only as of v2.2. The demo compose stack provisions a self-signed certificate. When accessing `https://localhost:8443`, you can either:
|
||||
- Use `curl --cacert ./deploy/test/certs/ca.crt ...` to pin the CA certificate
|
||||
- Use `curl -k ...` for quick smoke tests (never in production)
|
||||
- Import the CA at `./deploy/test/certs/ca.crt` into your OS trust store for browser visits
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Clone or navigate to this directory
|
||||
@@ -83,7 +90,7 @@ This spins up:
|
||||
|
||||
### 4. Access the dashboard
|
||||
|
||||
Open your browser to **http://localhost:8443** (or your configured SERVER_PORT)
|
||||
Open your browser to **https://localhost:8443** (or your configured SERVER_PORT)
|
||||
|
||||
You should see:
|
||||
- Empty cert inventory (fresh start)
|
||||
|
||||
@@ -77,7 +77,7 @@ services:
|
||||
networks:
|
||||
- certctl-network
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'curl -sf http://localhost:8443/health || exit 1']
|
||||
test: ['CMD-SHELL', 'curl -sfk https://localhost:8443/health || exit 1']
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
@@ -29,6 +29,13 @@ flowchart TD
|
||||
C -->|TLS handshakes| D
|
||||
```
|
||||
|
||||
## TLS Security
|
||||
|
||||
certctl is HTTPS-only as of v2.2. The demo compose stack provisions a self-signed certificate. When accessing `https://localhost:8443`, you can either:
|
||||
- Use `curl --cacert ./deploy/test/certs/ca.crt ...` to pin the CA certificate
|
||||
- Use `curl -k ...` for quick smoke tests (never in production)
|
||||
- Import the CA at `./deploy/test/certs/ca.crt` into your OS trust store for browser visits
|
||||
|
||||
## Quick Start (Self-Signed CA)
|
||||
|
||||
The simplest way to get running in 2 minutes:
|
||||
@@ -58,7 +65,7 @@ EOF
|
||||
docker compose up -d
|
||||
|
||||
# 4. Access the dashboards
|
||||
# - certctl: http://localhost:8443 (API only, use the CLI or direct HTTP calls)
|
||||
# - certctl: https://localhost:8443 (API only, use the CLI or direct HTTP calls)
|
||||
# - Traefik dashboard: http://localhost:8080
|
||||
```
|
||||
|
||||
@@ -112,7 +119,7 @@ Once the stack is running:
|
||||
|
||||
```bash
|
||||
# 1. Create a certificate profile in certctl (defines allowed key types, TTL, etc.)
|
||||
curl -X POST http://localhost:8443/api/v1/profiles \
|
||||
curl -X POST https://localhost:8443/api/v1/profiles \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"id": "prof-internal",
|
||||
@@ -123,7 +130,7 @@ curl -X POST http://localhost:8443/api/v1/profiles \
|
||||
}'
|
||||
|
||||
# 2. Create a renewal policy (defines issuer, renewal thresholds, etc.)
|
||||
curl -X POST http://localhost:8443/api/v1/policies \
|
||||
curl -X POST https://localhost:8443/api/v1/policies \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"id": "pol-internal",
|
||||
@@ -135,7 +142,7 @@ curl -X POST http://localhost:8443/api/v1/policies \
|
||||
}'
|
||||
|
||||
# 3. Create a certificate (triggers issuance immediately)
|
||||
curl -X POST http://localhost:8443/api/v1/certificates \
|
||||
curl -X POST https://localhost:8443/api/v1/certificates \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"common_name": "api.internal.local",
|
||||
@@ -144,7 +151,7 @@ curl -X POST http://localhost:8443/api/v1/certificates \
|
||||
}'
|
||||
|
||||
# 4. Create a Traefik target (agent will deploy to this)
|
||||
curl -X POST http://localhost:8443/api/v1/targets \
|
||||
curl -X POST https://localhost:8443/api/v1/targets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"id": "target-traefik-01",
|
||||
@@ -156,7 +163,7 @@ curl -X POST http://localhost:8443/api/v1/targets \
|
||||
}'
|
||||
|
||||
# 5. Create a deployment job (agent picks this up and deploys)
|
||||
curl -X POST http://localhost:8443/api/v1/certificates/{cert-id}/deploy \
|
||||
curl -X POST https://localhost:8443/api/v1/certificates/{cert-id}/deploy \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"target_ids": ["target-traefik-01"]
|
||||
@@ -209,16 +216,16 @@ The server provides a REST API on port 8443. Example queries:
|
||||
|
||||
```bash
|
||||
# List all certificates
|
||||
curl http://localhost:8443/api/v1/certificates
|
||||
curl https://localhost:8443/api/v1/certificates
|
||||
|
||||
# Check certificate status
|
||||
curl http://localhost:8443/api/v1/certificates/{cert-id}
|
||||
curl https://localhost:8443/api/v1/certificates/{cert-id}
|
||||
|
||||
# View audit trail
|
||||
curl http://localhost:8443/api/v1/audit
|
||||
curl https://localhost:8443/api/v1/audit
|
||||
|
||||
# Check renewal policy compliance
|
||||
curl http://localhost:8443/api/v1/policies/{policy-id}
|
||||
curl https://localhost:8443/api/v1/policies/{policy-id}
|
||||
```
|
||||
|
||||
### Traefik Dashboard
|
||||
@@ -290,7 +297,7 @@ Changes are picked up automatically (file watcher enabled).
|
||||
docker compose logs certctl-agent | grep heartbeat
|
||||
|
||||
# Check deployment job status
|
||||
curl http://localhost:8443/api/v1/jobs | jq '.[] | select(.type == "Deployment")'
|
||||
curl https://localhost:8443/api/v1/jobs | jq '.[] | select(.type == "Deployment")'
|
||||
|
||||
# Check Traefik is watching the directory
|
||||
docker compose exec traefik ls -la /etc/traefik/certs/
|
||||
|
||||
@@ -119,7 +119,7 @@ services:
|
||||
networks:
|
||||
- certctl-network
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'curl -sf http://localhost:8443/health || exit 1']
|
||||
test: ['CMD-SHELL', 'curl -sfk https://localhost:8443/health || exit 1']
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
@@ -48,6 +48,13 @@ Monitor logs:
|
||||
docker compose logs -f certctl-server
|
||||
```
|
||||
|
||||
## TLS Security
|
||||
|
||||
certctl is HTTPS-only as of v2.2. The demo compose stack provisions a self-signed certificate. When accessing `https://localhost:8443`, you can either:
|
||||
- Use `curl --cacert ./deploy/test/certs/ca.crt ...` to pin the CA certificate
|
||||
- Use `curl -k ...` for quick smoke tests (never in production)
|
||||
- Import the CA at `./deploy/test/certs/ca.crt` into your OS trust store for browser visits
|
||||
|
||||
Wait for all services to reach healthy state:
|
||||
|
||||
```bash
|
||||
@@ -69,7 +76,7 @@ certctl-haproxy-... healthy
|
||||
Open your browser to:
|
||||
|
||||
```
|
||||
http://localhost:8443
|
||||
https://localhost:8443
|
||||
```
|
||||
|
||||
You should see an empty dashboard. This is expected — no certificates issued yet.
|
||||
@@ -79,7 +86,7 @@ You should see an empty dashboard. This is expected — no certificates issued y
|
||||
This defines what certificates certctl can issue (key algorithm, max TTL, allowed names).
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8443/api/v1/profiles \
|
||||
curl -X POST https://localhost:8443/api/v1/profiles \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"name": "internal-web",
|
||||
@@ -94,7 +101,7 @@ curl -X POST http://localhost:8443/api/v1/profiles \
|
||||
This tells certctl where to deploy certificates on the HAProxy server.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8443/api/v1/targets \
|
||||
curl -X POST https://localhost:8443/api/v1/targets \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"name": "haproxy-01",
|
||||
@@ -115,7 +122,7 @@ Note: In the Docker Compose environment, reload command can be `kill -HUP $(pido
|
||||
This ties a certificate profile to a deployment target and sets renewal thresholds.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8443/api/v1/renewal-policies \
|
||||
curl -X POST https://localhost:8443/api/v1/renewal-policies \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"name": "haproxy-internal-web",
|
||||
@@ -130,7 +137,7 @@ curl -X POST http://localhost:8443/api/v1/renewal-policies \
|
||||
Get the issuer ID:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8443/api/v1/issuers | jq '.'
|
||||
curl https://localhost:8443/api/v1/issuers | jq '.'
|
||||
```
|
||||
|
||||
You should see `iss-stepca` in the list.
|
||||
@@ -140,7 +147,7 @@ You should see `iss-stepca` in the list.
|
||||
Request a certificate via the API. The server will sign it via step-ca.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8443/api/v1/certificates \
|
||||
curl -X POST https://localhost:8443/api/v1/certificates \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"common_name": "api.internal.example.com",
|
||||
@@ -155,7 +162,7 @@ curl -X POST http://localhost:8443/api/v1/certificates \
|
||||
Get the certificate ID and trigger deployment:
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8443/api/v1/certificates/<cert_id>/deploy \
|
||||
curl -X POST https://localhost:8443/api/v1/certificates/<cert_id>/deploy \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"target_id": "<target_id_from_step_4>"
|
||||
@@ -171,7 +178,7 @@ The agent will:
|
||||
|
||||
### 8. Verify in Dashboard
|
||||
|
||||
Refresh http://localhost:8443 and you should see:
|
||||
Refresh https://localhost:8443 and you should see:
|
||||
- 1 certificate (status: Active, expiry in 90 days)
|
||||
- 1 deployment job (status: Completed)
|
||||
- 1 agent (heartbeat: recent)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
module github.com/shankar0123/certctl
|
||||
|
||||
go 1.25.0
|
||||
go 1.25.9
|
||||
|
||||
require (
|
||||
github.com/google/uuid v1.6.0
|
||||
@@ -10,7 +10,9 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
golang.org/x/crypto v0.31.0
|
||||
github.com/masterzen/winrm v0.0.0-20250927112105-5f8e6c707321
|
||||
github.com/pkg/sftp v1.13.10
|
||||
golang.org/x/crypto v0.41.0
|
||||
software.sslmate.com/src/go-pkcs12 v0.7.0
|
||||
)
|
||||
|
||||
@@ -48,11 +50,11 @@ require (
|
||||
github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect
|
||||
github.com/jcmturner/rpc/v2 v2.0.3 // indirect
|
||||
github.com/klauspost/compress v1.17.4 // indirect
|
||||
github.com/kr/fs v0.1.0 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
||||
github.com/magiconair/properties v1.8.7 // indirect
|
||||
github.com/masterzen/simplexml v0.0.0-20190410153822-31eea3082786 // indirect
|
||||
github.com/masterzen/winrm v0.0.0-20250927112105-5f8e6c707321 // indirect
|
||||
github.com/moby/docker-image-spec v1.3.1 // indirect
|
||||
github.com/moby/patternmatcher v0.6.0 // indirect
|
||||
github.com/moby/sys/sequential v0.5.0 // indirect
|
||||
@@ -69,7 +71,7 @@ require (
|
||||
github.com/shirou/gopsutil/v3 v3.23.12 // indirect
|
||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||
github.com/sirupsen/logrus v1.9.3 // indirect
|
||||
github.com/stretchr/testify v1.9.0 // indirect
|
||||
github.com/stretchr/testify v1.10.0 // indirect
|
||||
github.com/tidwall/transform v0.0.0-20201103190739-32f242e2dbde // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.12 // indirect
|
||||
github.com/tklauser/numcpus v0.6.1 // indirect
|
||||
@@ -79,9 +81,9 @@ require (
|
||||
go.opentelemetry.io/otel v1.24.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v1.24.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.24.0 // indirect
|
||||
golang.org/x/net v0.23.0 // indirect
|
||||
golang.org/x/net v0.42.0 // indirect
|
||||
golang.org/x/oauth2 v0.34.0 // indirect
|
||||
golang.org/x/sys v0.40.0 // indirect
|
||||
golang.org/x/text v0.21.0 // indirect
|
||||
golang.org/x/text v0.28.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
@@ -62,7 +62,9 @@ github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbc
|
||||
github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ=
|
||||
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
|
||||
github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7FsgI=
|
||||
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg=
|
||||
@@ -87,6 +89,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
|
||||
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||
github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
|
||||
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
|
||||
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
||||
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
@@ -121,6 +125,8 @@ github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQ
|
||||
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/sftp v1.13.10 h1:+5FbKNTe5Z9aspU88DPIKJ9z2KZoaGCu6Sr6kKR/5mU=
|
||||
github.com/pkg/sftp v1.13.10/go.mod h1:bJ1a7uDhrX/4OII+agvy28lzRvQrmIQuaHrcI1HbeGA=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
|
||||
@@ -150,8 +156,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0 h1:uADsZpTKFAtp8SLK+hMwSaa+X+JiERHtd4sQAFmXeMo=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0/go.mod h1:oEVBj5zrfJTrgjwONs1SsRbnBtH9OKl+IGl3UMcr2B4=
|
||||
github.com/tidwall/transform v0.0.0-20201103190739-32f242e2dbde h1:AMNpJRc7P+GTwVbl8DkK2I9I8BBUzNiHuH/tlxrpan0=
|
||||
@@ -188,8 +194,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
|
||||
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
|
||||
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
@@ -202,8 +208,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
|
||||
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
|
||||
golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
|
||||
golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw=
|
||||
golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
@@ -230,14 +236,14 @@ golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q=
|
||||
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
|
||||
golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
|
||||
golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
|
||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
|
||||
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
|
||||
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 h1:vVKdlvoWBphwdxWKrFZEuM0kGgGLxUOYcY4U/2Vjg44=
|
||||
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
|
||||
+180
-23
@@ -60,8 +60,29 @@ OPTIONS:
|
||||
-h, --help Show this help message
|
||||
--server-url URL Set CERTCTL_SERVER_URL (skips interactive prompt)
|
||||
--api-key KEY Set CERTCTL_API_KEY (skips interactive prompt)
|
||||
--agent-id ID Set CERTCTL_AGENT_ID (defaults to hostname)
|
||||
--no-start Install but don't start the service
|
||||
|
||||
EXAMPLES:
|
||||
# Interactive install (download first):
|
||||
curl -sSLO https://raw.githubusercontent.com/${GITHUB_REPO}/master/install-agent.sh
|
||||
chmod +x install-agent.sh
|
||||
sudo ./install-agent.sh
|
||||
|
||||
# Non-interactive install (pipe via curl):
|
||||
curl -sSL https://raw.githubusercontent.com/${GITHUB_REPO}/master/install-agent.sh \\
|
||||
| sudo bash -s -- \\
|
||||
--server-url https://certctl.example.com \\
|
||||
--api-key YOUR_API_KEY
|
||||
|
||||
CONTROL-PLANE TLS TRUST:
|
||||
The certctl server is HTTPS-only as of v2.2. This installer does NOT copy a CA
|
||||
bundle — the generated agent.env leaves TLS trust to the system root store by
|
||||
default. If the server uses a private/enterprise or self-signed CA, set
|
||||
CERTCTL_SERVER_CA_BUNDLE_PATH in the generated agent.env to point at the CA
|
||||
bundle, or (dev only) CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=true. See the
|
||||
commented block in the generated agent.env for the full menu.
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
@@ -74,19 +95,47 @@ parse_args() {
|
||||
exit 0
|
||||
;;
|
||||
--server-url)
|
||||
SERVER_URL="$2"
|
||||
SERVER_URL="${2:-}"
|
||||
if [[ -z "$SERVER_URL" ]]; then
|
||||
echo -e "${RED}Error: --server-url requires a value${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
shift 2
|
||||
;;
|
||||
--server-url=*)
|
||||
SERVER_URL="${1#*=}"
|
||||
shift
|
||||
;;
|
||||
--api-key)
|
||||
API_KEY="$2"
|
||||
API_KEY="${2:-}"
|
||||
if [[ -z "$API_KEY" ]]; then
|
||||
echo -e "${RED}Error: --api-key requires a value${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
shift 2
|
||||
;;
|
||||
--api-key=*)
|
||||
API_KEY="${1#*=}"
|
||||
shift
|
||||
;;
|
||||
--agent-id)
|
||||
AGENT_ID="${2:-}"
|
||||
if [[ -z "$AGENT_ID" ]]; then
|
||||
echo -e "${RED}Error: --agent-id requires a value${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
shift 2
|
||||
;;
|
||||
--agent-id=*)
|
||||
AGENT_ID="${1#*=}"
|
||||
shift
|
||||
;;
|
||||
--no-start)
|
||||
NO_START=true
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
echo -e "${RED}Error: Unknown option: $1${NC}"
|
||||
echo -e "${RED}Error: Unknown option: $1${NC}" >&2
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
@@ -94,6 +143,56 @@ parse_args() {
|
||||
done
|
||||
}
|
||||
|
||||
# Ensure stdin is interactive before prompting. When the script is piped via
|
||||
# curl|bash, stdin is the pipe from curl, so `read` hits EOF immediately and
|
||||
# set -e aborts the script silently. Reopen stdin from the controlling terminal
|
||||
# (/dev/tty) if available; otherwise print a helpful error pointing at the
|
||||
# flag-based non-interactive install.
|
||||
ensure_interactive_input() {
|
||||
# If all required config is already provided via flags, no prompting needed.
|
||||
if [[ -n "${SERVER_URL:-}" && -n "${API_KEY:-}" ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
# Already interactive — nothing to do.
|
||||
if [[ -t 0 ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
# Piped stdin — try to reopen from the controlling terminal. Actually
|
||||
# attempt to open /dev/tty inside a subshell: the device node may exist
|
||||
# even when the process has no controlling terminal (ENXIO on open), so
|
||||
# `[[ -r /dev/tty ]]` is not reliable.
|
||||
if ( exec </dev/tty ) 2>/dev/null; then
|
||||
exec </dev/tty
|
||||
return
|
||||
fi
|
||||
|
||||
# No terminal available — emit clear guidance and exit.
|
||||
# Use printf '%b' so the ANSI color escapes in $RED/$NC are interpreted
|
||||
# rather than rendered as literal backslash sequences (a heredoc would
|
||||
# keep them as raw text).
|
||||
{
|
||||
printf '%b\n' "${RED}Error: No interactive terminal available.${NC}"
|
||||
printf '\n'
|
||||
printf 'The installer was piped through curl and no controlling terminal (/dev/tty)\n'
|
||||
printf 'is available for prompts. Pass the required values as flags instead:\n'
|
||||
printf '\n'
|
||||
printf ' curl -sSL https://raw.githubusercontent.com/%s/master/install-agent.sh \\\n' "$GITHUB_REPO"
|
||||
printf ' | sudo bash -s -- \\\n'
|
||||
printf ' --server-url https://certctl.example.com \\\n'
|
||||
printf ' --api-key YOUR_API_KEY\n'
|
||||
printf '\n'
|
||||
printf 'Or download the script first and run it directly:\n'
|
||||
printf '\n'
|
||||
printf ' curl -sSLO https://raw.githubusercontent.com/%s/master/install-agent.sh\n' "$GITHUB_REPO"
|
||||
printf ' chmod +x install-agent.sh\n'
|
||||
printf ' sudo ./install-agent.sh\n'
|
||||
printf '\n'
|
||||
} >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Check if running as root/sudo on Linux
|
||||
check_privileges() {
|
||||
if [[ "$OS_TYPE" == "linux" && "$EUID" -ne 0 ]]; then
|
||||
@@ -103,23 +202,33 @@ check_privileges() {
|
||||
}
|
||||
|
||||
# Download agent binary from GitHub Releases
|
||||
# IMPORTANT: main() captures this function's stdout via `binary_path=$(download_binary)`,
|
||||
# so every status/error message MUST go to stderr (>&2). Only the final
|
||||
# `echo "$temp_file"` is allowed on stdout — that's the return value.
|
||||
#
|
||||
# We deliberately do NOT register an EXIT trap to clean up $temp_file: because
|
||||
# of the command substitution, this function runs in a subshell, and any EXIT
|
||||
# trap set here fires when the subshell exits — which is *before* install_binary
|
||||
# gets a chance to cp the file. Cleanup on success is install_binary's job
|
||||
# (after the cp), and cleanup on curl failure is handled inline below.
|
||||
download_binary() {
|
||||
local binary_name="certctl-agent-${OS_TYPE}-${ARCH_TYPE}"
|
||||
local download_url="${RELEASE_URL}/${binary_name}"
|
||||
|
||||
echo -e "${YELLOW}Downloading certctl agent (${OS_TYPE}-${ARCH_TYPE})...${NC}"
|
||||
echo -e "${YELLOW}Downloading certctl agent (${OS_TYPE}-${ARCH_TYPE})...${NC}" >&2
|
||||
|
||||
if ! command -v curl &> /dev/null; then
|
||||
echo -e "${RED}Error: curl is required but not installed${NC}"
|
||||
echo -e "${RED}Error: curl is required but not installed${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local temp_file=$(mktemp)
|
||||
trap "rm -f $temp_file" EXIT
|
||||
local temp_file
|
||||
temp_file=$(mktemp)
|
||||
|
||||
if ! curl -sSL -f "$download_url" -o "$temp_file"; then
|
||||
echo -e "${RED}Error: Failed to download binary from $download_url${NC}"
|
||||
echo "Make sure the latest release exists on GitHub with the binary asset for ${OS_TYPE}-${ARCH_TYPE}."
|
||||
if ! curl -sSL -f "$download_url" -o "$temp_file" >&2; then
|
||||
rm -f "$temp_file"
|
||||
echo -e "${RED}Error: Failed to download binary from $download_url${NC}" >&2
|
||||
echo "Make sure the latest release exists on GitHub with the binary asset for ${OS_TYPE}-${ARCH_TYPE}." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -146,35 +255,52 @@ install_binary() {
|
||||
|
||||
chmod +x "$INSTALL_DIR/$SERVICE_NAME"
|
||||
echo -e "${GREEN}Binary installed: $INSTALL_DIR/$SERVICE_NAME${NC}"
|
||||
|
||||
# Clean up the temp file created by download_binary. We can't use an EXIT
|
||||
# trap inside download_binary because it runs in a subshell (command
|
||||
# substitution), so the trap would fire before we got here. Doing it
|
||||
# explicitly after the successful cp is the simplest correct pattern.
|
||||
rm -f "$binary_path"
|
||||
}
|
||||
|
||||
# Prompt for configuration (unless --server-url and --api-key provided)
|
||||
# Prompt for configuration. Any value supplied via flag is honored as-is
|
||||
# and we only prompt for the missing pieces. `read || true` prevents set -e
|
||||
# from aborting the script on EOF — instead the empty check below fires the
|
||||
# proper "required" error message.
|
||||
prompt_for_config() {
|
||||
if [[ -z "${SERVER_URL:-}" ]]; then
|
||||
echo ""
|
||||
echo -e "${YELLOW}Enter certctl server URL (e.g., https://certctl.example.com):${NC}"
|
||||
read -r SERVER_URL
|
||||
if [[ -z "$SERVER_URL" ]]; then
|
||||
echo -e "${RED}Error: Server URL is required${NC}"
|
||||
read -r SERVER_URL || true
|
||||
if [[ -z "${SERVER_URL:-}" ]]; then
|
||||
echo -e "${RED}Error: Server URL is required${NC}" >&2
|
||||
echo "Hint: pass --server-url <URL> to run non-interactively." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${API_KEY:-}" ]]; then
|
||||
echo -e "${YELLOW}Enter certctl API key:${NC}"
|
||||
read -sr API_KEY
|
||||
read -rs API_KEY || true
|
||||
echo ""
|
||||
if [[ -z "$API_KEY" ]]; then
|
||||
echo -e "${RED}Error: API key is required${NC}"
|
||||
if [[ -z "${API_KEY:-}" ]]; then
|
||||
echo -e "${RED}Error: API key is required${NC}" >&2
|
||||
echo "Hint: pass --api-key <KEY> to run non-interactively." >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${AGENT_ID:-}" ]]; then
|
||||
local default_agent_id="$(hostname)"
|
||||
echo -e "${YELLOW}Enter agent ID (default: $default_agent_id):${NC}"
|
||||
read -r AGENT_ID
|
||||
if [[ -z "$AGENT_ID" ]]; then
|
||||
local default_agent_id
|
||||
default_agent_id="$(hostname)"
|
||||
# If stdin is still piped (no /dev/tty was available but SERVER_URL +
|
||||
# API_KEY arrived via flags), skip the prompt entirely and use the
|
||||
# default — no need to block on an optional value.
|
||||
if [[ -t 0 ]]; then
|
||||
echo -e "${YELLOW}Enter agent ID (default: $default_agent_id):${NC}"
|
||||
read -r AGENT_ID || true
|
||||
fi
|
||||
if [[ -z "${AGENT_ID:-}" ]]; then
|
||||
AGENT_ID="$default_agent_id"
|
||||
fi
|
||||
fi
|
||||
@@ -204,7 +330,7 @@ setup_linux_config() {
|
||||
# Agent ID (unique identifier in the fleet)
|
||||
CERTCTL_AGENT_ID=$AGENT_ID
|
||||
|
||||
# Control plane server URL
|
||||
# Control plane server URL (HTTPS-only as of v2.2)
|
||||
CERTCTL_SERVER_URL=$SERVER_URL
|
||||
|
||||
# API authentication key
|
||||
@@ -216,6 +342,21 @@ CERTCTL_KEYGEN_MODE=agent
|
||||
# Key storage directory (agent-side keygen)
|
||||
CERTCTL_KEY_DIR=$key_dir
|
||||
|
||||
# ---- Control-plane TLS trust ----
|
||||
# The certctl server is HTTPS-only (v2.2+). The agent's HTTP client MUST trust the
|
||||
# server's certificate chain. Pick ONE of the approaches below:
|
||||
#
|
||||
# 1) Public CA (Let's Encrypt, DigiCert, etc.) — no config needed; system trust store works.
|
||||
# 2) Private / enterprise CA — point the agent at the CA bundle that signed the server cert:
|
||||
# CERTCTL_SERVER_CA_BUNDLE_PATH=/etc/certctl/server-ca.crt
|
||||
#
|
||||
# 3) Self-signed server cert (Helm/compose bootstrap) — same env var, just point at the
|
||||
# extracted self-signed CA bundle (e.g. from the certctl-server-tls Kubernetes secret
|
||||
# via: kubectl get secret certctl-server-tls -o jsonpath='{.data.ca\.crt}' | base64 -d).
|
||||
#
|
||||
# 4) Dev/eval only — disable verification entirely (NEVER do this in production):
|
||||
# CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=true
|
||||
|
||||
# Logging level (debug, info, warn, error)
|
||||
# CERTCTL_LOG_LEVEL=info
|
||||
|
||||
@@ -255,7 +396,7 @@ setup_macos_config() {
|
||||
# Agent ID (unique identifier in the fleet)
|
||||
CERTCTL_AGENT_ID=$AGENT_ID
|
||||
|
||||
# Control plane server URL
|
||||
# Control plane server URL (HTTPS-only as of v2.2)
|
||||
CERTCTL_SERVER_URL=$SERVER_URL
|
||||
|
||||
# API authentication key
|
||||
@@ -267,6 +408,21 @@ CERTCTL_KEYGEN_MODE=agent
|
||||
# Key storage directory (agent-side keygen)
|
||||
CERTCTL_KEY_DIR=$key_dir
|
||||
|
||||
# ---- Control-plane TLS trust ----
|
||||
# The certctl server is HTTPS-only (v2.2+). The agent's HTTP client MUST trust the
|
||||
# server's certificate chain. Pick ONE of the approaches below:
|
||||
#
|
||||
# 1) Public CA (Let's Encrypt, DigiCert, etc.) — no config needed; system trust store works.
|
||||
# 2) Private / enterprise CA — point the agent at the CA bundle that signed the server cert:
|
||||
# CERTCTL_SERVER_CA_BUNDLE_PATH=$HOME/.certctl/server-ca.crt
|
||||
#
|
||||
# 3) Self-signed server cert (Helm/compose bootstrap) — same env var, just point at the
|
||||
# extracted self-signed CA bundle (e.g. from the certctl-server-tls Kubernetes secret
|
||||
# via: kubectl get secret certctl-server-tls -o jsonpath='{.data.ca\.crt}' | base64 -d).
|
||||
#
|
||||
# 4) Dev/eval only — disable verification entirely (NEVER do this in production):
|
||||
# CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=true
|
||||
|
||||
# Logging level (debug, info, warn, error)
|
||||
# CERTCTL_LOG_LEVEL=info
|
||||
|
||||
@@ -447,6 +603,7 @@ main() {
|
||||
echo "Detected platform: ${OS_TYPE}-${ARCH_TYPE}"
|
||||
echo ""
|
||||
|
||||
ensure_interactive_input
|
||||
prompt_for_config
|
||||
|
||||
# Download and install binary
|
||||
|
||||
@@ -0,0 +1,339 @@
|
||||
package handler
|
||||
|
||||
// Adversarial EST (RFC 7030) enrollment tests — Tier 1F.
|
||||
//
|
||||
// EST is the RFC 7030 protocol for certificate enrollment over HTTPS. The
|
||||
// control-plane parser accepts PKCS#10 CSRs either as PEM or as base64-encoded
|
||||
// DER, and it's a prime target for:
|
||||
//
|
||||
// * Malformed base64 / non-DER payloads
|
||||
// * Valid base64 that doesn't decode to a valid CSR
|
||||
// * PEM header spoofing (wrong block type)
|
||||
// * Null bytes and control characters embedded in PEM or base64
|
||||
// * Huge CSR bodies (we expect the handler's 1 MiB LimitReader to clamp them)
|
||||
// * Truncated or partially-written PEM blocks
|
||||
// * Unicode homoglyphs in PEM delimiters
|
||||
// * Content-Type mismatch (handler ignores Content-Type, but attackers might
|
||||
// still try header spoofing)
|
||||
//
|
||||
// The contract is the same as other adversarial tiers: the handler must never
|
||||
// panic and must never return 500 for a malformed CSR (500 is reserved for
|
||||
// issuer/service failures). For adversarial CSRs, the correct status is 400.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// adversarialCSRInputs exercises the EST CSR parsing surface. None of these
|
||||
// should reach the underlying ESTService — they must be rejected by
|
||||
// readCSRFromRequest with a 400 before any service call is made.
|
||||
func adversarialCSRInputs() []struct {
|
||||
name string
|
||||
body string
|
||||
} {
|
||||
// A garbage base64 string that decodes cleanly but isn't a PKCS#10 CSR.
|
||||
// base64 of "this is definitely not a CSR" = dGhpcyBpcyBkZWZpbml0ZWx5IG5vdCBhIENTUg==
|
||||
nonCSRBase64 := base64.StdEncoding.EncodeToString([]byte("this is definitely not a CSR"))
|
||||
|
||||
return []struct {
|
||||
name string
|
||||
body string
|
||||
}{
|
||||
{"garbage_string", "not-a-csr-at-all"},
|
||||
{"base64_garbage", "!!!@@@###$$$%%%"},
|
||||
{"base64_valid_non_csr", nonCSRBase64},
|
||||
{"base64_very_short", "AA=="},
|
||||
{"null_byte_only", "\x00"},
|
||||
{"null_bytes_padding", "\x00\x00\x00\x00\x00\x00\x00\x00"},
|
||||
{"control_chars", "\x01\x02\x03\x04\x05\x06\x07\x08"},
|
||||
{"pem_wrong_block_type", "-----BEGIN CERTIFICATE-----\nMIIB\n-----END CERTIFICATE-----\n"},
|
||||
{"pem_wrong_header_close", "-----BEGIN CERTIFICATE REQUEST-----\nMIIB\n-----END PRIVATE KEY-----\n"},
|
||||
{"pem_empty_block", "-----BEGIN CERTIFICATE REQUEST-----\n-----END CERTIFICATE REQUEST-----\n"},
|
||||
{"pem_garbage_body", "-----BEGIN CERTIFICATE REQUEST-----\n!!!not base64!!!\n-----END CERTIFICATE REQUEST-----\n"},
|
||||
{"pem_truncated", "-----BEGIN CERTIFICATE REQUEST-----\nMIIBijCCAT"},
|
||||
{"pem_no_end_marker", "-----BEGIN CERTIFICATE REQUEST-----\nMIIBijCCATICAQAwFjEUMBIGA1UE\n"},
|
||||
{"pem_header_injection", "-----BEGIN CERTIFICATE REQUEST-----\r\nHost: evil.com\r\n\r\nMIIB\n-----END CERTIFICATE REQUEST-----\n"},
|
||||
{"pem_embedded_null", "-----BEGIN CERTIFICATE\x00REQUEST-----\nMIIB\n-----END CERTIFICATE REQUEST-----\n"},
|
||||
{"unicode_homoglyph_pem", "-----BEGIN CERTIFICATE REQUEST─────\nMIIB\n─────END CERTIFICATE REQUEST-----\n"},
|
||||
{"double_pem_block", "-----BEGIN CERTIFICATE REQUEST-----\nMIIB\n-----END CERTIFICATE REQUEST-----\n-----BEGIN CERTIFICATE REQUEST-----\nMIIB\n-----END CERTIFICATE REQUEST-----\n"},
|
||||
{"json_body", `{"csr":"MIIB","common_name":"attacker.com"}`},
|
||||
{"xml_body", `<?xml version="1.0"?><csr>MIIB</csr>`},
|
||||
{"shell_metacharacters", "$(whoami); rm -rf / #"},
|
||||
{"sql_injection", "' OR 1=1; DROP TABLE certificates;--"},
|
||||
{"long_garbage_10k", strings.Repeat("A", 10000)},
|
||||
{"long_base64_not_csr", base64.StdEncoding.EncodeToString(bytes.Repeat([]byte{0xFF}, 5000))},
|
||||
{"base64_with_newlines_garbage", "AAAAAAAAAAAAAAAA\nBBBBBBBBBBBBBBBB\nCCCCCCCCCCCCCCCC"},
|
||||
{"percent_encoded_pem", "%2D%2D%2D%2D%2DBEGIN+CERTIFICATE+REQUEST%2D%2D%2D%2D%2D"},
|
||||
}
|
||||
}
|
||||
|
||||
// assertESTErrorResponse enforces the EST handler contract for adversarial CSRs:
|
||||
// no panic, no 500, body is valid JSON (since Error helper emits JSON errors).
|
||||
func assertESTErrorResponse(t *testing.T, w *httptest.ResponseRecorder, label string) {
|
||||
t.Helper()
|
||||
|
||||
// The handler must never reach a 500 for parser-rejected CSRs — that would
|
||||
// indicate a service call slipped through.
|
||||
if w.Code == http.StatusInternalServerError {
|
||||
t.Errorf("%s: handler returned 500 body=%q — adversarial CSR should not reach the service layer",
|
||||
label, w.Body.String())
|
||||
}
|
||||
|
||||
// The handler should return 400 Bad Request for adversarial CSR inputs.
|
||||
// A 405 (method not allowed) is impossible here because we always POST.
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("%s: expected 400, got %d (body=%q)", label, w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// newESTHandlerWithTrap returns an ESTHandler whose service panics if reached.
|
||||
// This is the core invariant for Tier 1F: adversarial CSRs must be rejected at
|
||||
// the parser, never reaching SimpleEnroll/SimpleReEnroll on the service.
|
||||
func newESTHandlerWithTrap() (ESTHandler, *trappedESTService) {
|
||||
svc := &trappedESTService{}
|
||||
return NewESTHandler(svc), svc
|
||||
}
|
||||
|
||||
// trappedESTService is a mock that fails the test if any service method is
|
||||
// called with an adversarial CSR. The parser should reject these before they
|
||||
// get here.
|
||||
type trappedESTService struct {
|
||||
serviceCalled bool
|
||||
}
|
||||
|
||||
func (t *trappedESTService) GetCACerts(ctx context.Context) (string, error) {
|
||||
t.serviceCalled = true
|
||||
return "", errors.New("trap: GetCACerts should not be called from adversarial CSR tests")
|
||||
}
|
||||
|
||||
func (t *trappedESTService) SimpleEnroll(ctx context.Context, csrPEM string) (*domain.ESTEnrollResult, error) {
|
||||
t.serviceCalled = true
|
||||
return nil, errors.New("trap: SimpleEnroll should not be called from adversarial CSR tests")
|
||||
}
|
||||
|
||||
func (t *trappedESTService) SimpleReEnroll(ctx context.Context, csrPEM string) (*domain.ESTEnrollResult, error) {
|
||||
t.serviceCalled = true
|
||||
return nil, errors.New("trap: SimpleReEnroll should not be called from adversarial CSR tests")
|
||||
}
|
||||
|
||||
func (t *trappedESTService) GetCSRAttrs(ctx context.Context) ([]byte, error) {
|
||||
t.serviceCalled = true
|
||||
return nil, errors.New("trap: GetCSRAttrs should not be called from adversarial CSR tests")
|
||||
}
|
||||
|
||||
// TestESTSimpleEnroll_AdversarialCSRs runs each adversarial CSR through the
|
||||
// enrollment endpoint.
|
||||
func TestESTSimpleEnroll_AdversarialCSRs(t *testing.T) {
|
||||
for _, tc := range adversarialCSRInputs() {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on body %q: %v", tc.body, r)
|
||||
}
|
||||
}()
|
||||
|
||||
h, svc := newESTHandlerWithTrap()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/.well-known/est/simpleenroll", strings.NewReader(tc.body))
|
||||
req.Header.Set("Content-Type", "application/pkcs10")
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
h.SimpleEnroll(w, req)
|
||||
|
||||
assertESTErrorResponse(t, w, "SimpleEnroll/"+tc.name)
|
||||
|
||||
if svc.serviceCalled {
|
||||
t.Errorf("SimpleEnroll/%s: service was reached with adversarial CSR (body=%q)",
|
||||
tc.name, tc.body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestESTSimpleReEnroll_AdversarialCSRs runs each adversarial CSR through the
|
||||
// re-enrollment endpoint. Same contract as simpleenroll.
|
||||
func TestESTSimpleReEnroll_AdversarialCSRs(t *testing.T) {
|
||||
for _, tc := range adversarialCSRInputs() {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on body %q: %v", tc.body, r)
|
||||
}
|
||||
}()
|
||||
|
||||
h, svc := newESTHandlerWithTrap()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/.well-known/est/simplereenroll", strings.NewReader(tc.body))
|
||||
req.Header.Set("Content-Type", "application/pkcs10")
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
h.SimpleReEnroll(w, req)
|
||||
|
||||
assertESTErrorResponse(t, w, "SimpleReEnroll/"+tc.name)
|
||||
|
||||
if svc.serviceCalled {
|
||||
t.Errorf("SimpleReEnroll/%s: service was reached with adversarial CSR (body=%q)",
|
||||
tc.name, tc.body)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestESTSimpleEnroll_HugeBody verifies the handler's 1 MiB limit truncates
|
||||
// oversized requests at the LimitReader boundary. We send a 2 MiB body of
|
||||
// base64 garbage and confirm the handler rejects it cleanly (400, no panic,
|
||||
// no 500) and the service is never reached.
|
||||
func TestESTSimpleEnroll_HugeBody(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on 2 MiB body: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
// 2 MiB of base64-valid garbage: the LimitReader will truncate to 1 MiB, and
|
||||
// the truncated base64 chunk won't parse as a valid PKCS#10 CSR.
|
||||
huge := strings.Repeat("A", 2<<20)
|
||||
|
||||
h, svc := newESTHandlerWithTrap()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/.well-known/est/simpleenroll", strings.NewReader(huge))
|
||||
req.Header.Set("Content-Type", "application/pkcs10")
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
h.SimpleEnroll(w, req)
|
||||
|
||||
// Contract: 400 Bad Request (parser fail), no panic, no 500.
|
||||
if w.Code == http.StatusInternalServerError {
|
||||
t.Errorf("HugeBody: handler returned 500 for 2 MiB body (body=%q)", w.Body.String())
|
||||
}
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("HugeBody: expected 400, got %d (body=%q)", w.Code, w.Body.String())
|
||||
}
|
||||
if svc.serviceCalled {
|
||||
t.Error("HugeBody: service was reached with 2 MiB adversarial body")
|
||||
}
|
||||
}
|
||||
|
||||
// TestESTSimpleEnroll_ExactlyAtLimit sends a body exactly at the 1 MiB
|
||||
// LimitReader boundary. The body is still garbage (won't parse as CSR), but we
|
||||
// verify the handler doesn't panic or hang on the boundary case.
|
||||
func TestESTSimpleEnroll_ExactlyAtLimit(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on exact-limit body: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
atLimit := strings.Repeat("A", 1<<20) // exactly 1 MiB
|
||||
|
||||
h, _ := newESTHandlerWithTrap()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/.well-known/est/simpleenroll", strings.NewReader(atLimit))
|
||||
w := httptest.NewRecorder()
|
||||
h.SimpleEnroll(w, req)
|
||||
|
||||
if w.Code == http.StatusInternalServerError {
|
||||
t.Errorf("ExactlyAtLimit: handler returned 500 (body=%q)", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestESTSimpleEnroll_MultipartBody sends a multipart/form-data body that a
|
||||
// naive parser might try to unwrap. The handler should treat the raw bytes as
|
||||
// a CSR payload and reject them.
|
||||
func TestESTSimpleEnroll_MultipartBody(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on multipart body: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
multipart := "--boundary\r\nContent-Disposition: form-data; name=\"csr\"\r\n\r\nMIIB\r\n--boundary--\r\n"
|
||||
|
||||
h, svc := newESTHandlerWithTrap()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/.well-known/est/simpleenroll", strings.NewReader(multipart))
|
||||
req.Header.Set("Content-Type", "multipart/form-data; boundary=boundary")
|
||||
w := httptest.NewRecorder()
|
||||
h.SimpleEnroll(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("MultipartBody: expected 400, got %d (body=%q)", w.Code, w.Body.String())
|
||||
}
|
||||
if svc.serviceCalled {
|
||||
t.Error("MultipartBody: service was reached with multipart wrapper")
|
||||
}
|
||||
}
|
||||
|
||||
// TestESTCACerts_MethodAbuse verifies the /cacerts endpoint only accepts GET
|
||||
// and rejects every other method cleanly. This is a small safety check for
|
||||
// the spec invariant.
|
||||
func TestESTCACerts_MethodAbuse(t *testing.T) {
|
||||
methods := []string{
|
||||
http.MethodPost, http.MethodPut, http.MethodDelete,
|
||||
http.MethodPatch, http.MethodHead, http.MethodOptions,
|
||||
"TRACE", "CONNECT", "PROPFIND", "BOGUS",
|
||||
}
|
||||
|
||||
for _, method := range methods {
|
||||
t.Run(method, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on method %s: %v", method, r)
|
||||
}
|
||||
}()
|
||||
|
||||
h, _ := newESTHandlerWithTrap()
|
||||
|
||||
req := httptest.NewRequest(method, "/.well-known/est/cacerts", nil)
|
||||
w := httptest.NewRecorder()
|
||||
h.CACerts(w, req)
|
||||
|
||||
// HEAD on a GET handler in Go's stdlib is normally accepted, but
|
||||
// this handler enforces strict GET-only — so HEAD should also get 405.
|
||||
if w.Code != http.StatusMethodNotAllowed {
|
||||
t.Errorf("method %s: expected 405, got %d", method, w.Code)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestESTSimpleEnroll_MethodAbuse verifies strict POST-only enforcement.
|
||||
func TestESTSimpleEnroll_MethodAbuse(t *testing.T) {
|
||||
methods := []string{
|
||||
http.MethodGet, http.MethodPut, http.MethodDelete,
|
||||
http.MethodPatch, http.MethodHead, http.MethodOptions,
|
||||
"TRACE", "CONNECT",
|
||||
}
|
||||
|
||||
for _, method := range methods {
|
||||
t.Run(method, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on method %s: %v", method, r)
|
||||
}
|
||||
}()
|
||||
|
||||
h, svc := newESTHandlerWithTrap()
|
||||
|
||||
req := httptest.NewRequest(method, "/.well-known/est/simpleenroll", strings.NewReader("body"))
|
||||
w := httptest.NewRecorder()
|
||||
h.SimpleEnroll(w, req)
|
||||
|
||||
if w.Code != http.StatusMethodNotAllowed {
|
||||
t.Errorf("method %s: expected 405, got %d", method, w.Code)
|
||||
}
|
||||
if svc.serviceCalled {
|
||||
t.Errorf("method %s: service was called for non-POST", method)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,337 @@
|
||||
package handler
|
||||
|
||||
// Adversarial path-parameter and multi-segment path tests.
|
||||
//
|
||||
// These tests exercise the input parsing boundary of the certificate handler
|
||||
// against the attack categories listed in certctl-adversarial-testing-prompt.md
|
||||
// Tier 1A / 1B:
|
||||
//
|
||||
// * Empty and whitespace-only path IDs
|
||||
// * SQL-injection sentinels embedded in the path
|
||||
// * Directory traversal (`../../etc/passwd`)
|
||||
// * Null bytes and control characters
|
||||
// * Extremely long IDs (10 KiB)
|
||||
// * Unicode homoglyphs (visually identical substitutes)
|
||||
// * Multi-segment paths (OCSP, DER CRL, versions, renew, deploy, revoke)
|
||||
//
|
||||
// The contract we verify is defensive, not behavioural:
|
||||
//
|
||||
// 1. The handler never panics.
|
||||
// 2. The HTTP status is one of {200, 400, 404, 405} — never 500.
|
||||
// 3. The response body is either empty or valid JSON.
|
||||
// 4. No attacker-controlled input is echoed verbatim in a 500 body.
|
||||
//
|
||||
// We do not assert the exact status code for every adversarial input because
|
||||
// the current handler intentionally delegates identifier validation to the
|
||||
// repository layer; its only job here is to stay up and well-formed.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// adversarialPathInputs is the attack catalog shared by Tier 1A cases. Each
|
||||
// entry targets a different parsing surface; adding a new category here makes
|
||||
// every Tier 1A test below exercise it automatically.
|
||||
func adversarialPathInputs() []struct {
|
||||
name string
|
||||
input string
|
||||
} {
|
||||
return []struct {
|
||||
name string
|
||||
input string
|
||||
}{
|
||||
{"sql_injection_drop_table", "'; DROP TABLE managed_certificates;--"},
|
||||
{"sql_injection_or_true", "' OR 1=1--"},
|
||||
{"sql_injection_union", "mc-001' UNION SELECT * FROM agents--"},
|
||||
{"path_traversal_dot_dot", "../../etc/passwd"},
|
||||
{"path_traversal_encoded", "..%2F..%2Fetc%2Fpasswd"},
|
||||
{"null_byte_trailing", "mc-001\x00"},
|
||||
{"null_byte_embedded", "mc-\x00-001"},
|
||||
{"long_id_10k", strings.Repeat("A", 10000)},
|
||||
{"unicode_homoglyph_hyphen", "mc\u2010001"}, // U+2010 HYPHEN
|
||||
{"unicode_homoglyph_fullwidth", "mc\uFF0D001"}, // U+FF0D FULLWIDTH HYPHEN-MINUS
|
||||
{"control_char_newline", "mc-001\n"},
|
||||
{"control_char_tab", "mc\t001"},
|
||||
{"control_char_bell", "mc\x07001"},
|
||||
{"percent_encoded_null", "mc-001%00"},
|
||||
{"whitespace_only", " "},
|
||||
{"shell_metacharacters", "mc-001;`rm -rf /`"},
|
||||
{"leading_slash", "/mc-001"},
|
||||
{"trailing_slash", "mc-001/"},
|
||||
{"double_slash", "mc//001"},
|
||||
}
|
||||
}
|
||||
|
||||
// assertSafeResponse is the core defensive check. Any adversarial input is
|
||||
// allowed to produce a 4xx, but must not panic or leak through as a 500.
|
||||
func assertSafeResponse(t *testing.T, w *httptest.ResponseRecorder, label string) {
|
||||
t.Helper()
|
||||
|
||||
// 1. No 500 (500 implies the handler reached an unexpected internal state).
|
||||
if w.Code == http.StatusInternalServerError {
|
||||
t.Errorf("%s: handler returned 500, body=%q — adversarial input should not reach an internal error path",
|
||||
label, w.Body.String())
|
||||
}
|
||||
|
||||
// 2. Status must be in the expected safe set.
|
||||
switch w.Code {
|
||||
case http.StatusOK, http.StatusCreated, http.StatusAccepted, http.StatusNoContent,
|
||||
http.StatusBadRequest, http.StatusNotFound, http.StatusMethodNotAllowed, http.StatusNotImplemented:
|
||||
// ok
|
||||
default:
|
||||
t.Errorf("%s: unexpected status %d (body=%q)", label, w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
// 3. Non-empty bodies must be valid JSON (no template leakage, no raw panics).
|
||||
if body := bytes.TrimSpace(w.Body.Bytes()); len(body) > 0 {
|
||||
var discard interface{}
|
||||
if err := json.Unmarshal(body, &discard); err != nil {
|
||||
t.Errorf("%s: response body is not valid JSON: %v (body=%q)", label, err, w.Body.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// newCertHandlerWithMock builds a handler whose mock service returns nothing.
|
||||
// This keeps every adversarial test focused on the handler's parsing layer
|
||||
// rather than service behaviour.
|
||||
func newCertHandlerWithMock() (CertificateHandler, *MockCertificateService) {
|
||||
mock := &MockCertificateService{}
|
||||
return NewCertificateHandler(mock), mock
|
||||
}
|
||||
|
||||
// TestGetCertificate_PathInjection runs each adversarial path through the
|
||||
// certificate GET handler.
|
||||
func TestGetCertificate_PathInjection(t *testing.T) {
|
||||
for _, tc := range adversarialPathInputs() {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on input %q: %v", tc.input, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
// Force a 404 so we can distinguish "service was called" from
|
||||
// "parser accepted the ID"; a 200 with null body is also fine.
|
||||
mock.GetCertificateFn = func(_ context.Context, id string) (*domain.ManagedCertificate, error) {
|
||||
return nil, ErrMockNotFound
|
||||
}
|
||||
|
||||
// Build the URL by string concatenation to keep attacker-controlled
|
||||
// bytes intact (httptest.NewRequest uses url.Parse under the hood,
|
||||
// which normalises some characters — we want the raw path on the
|
||||
// request object).
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/certificates/x", nil)
|
||||
req.URL.Path = "/api/v1/certificates/" + tc.input
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.GetCertificate(w, req)
|
||||
|
||||
assertSafeResponse(t, w, "GetCertificate/"+tc.name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestUpdateCertificate_PathInjection exercises the PUT handler's path parser.
|
||||
// UpdateCertificate splits the path on "/" and takes parts[0]; traversal and
|
||||
// double-slash inputs must still short-circuit at the parser rather than
|
||||
// reaching the service.
|
||||
func TestUpdateCertificate_PathInjection(t *testing.T) {
|
||||
body := `{"common_name":"example.com","owner_id":"o-alice","team_id":"t-a","issuer_id":"iss-local","name":"n","renewal_policy_id":"rp-1"}`
|
||||
|
||||
for _, tc := range adversarialPathInputs() {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on input %q: %v", tc.input, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.UpdateCertificateFn = func(_ context.Context, id string, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
return nil, ErrMockNotFound
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPut, "/api/v1/certificates/x", bytes.NewBufferString(body))
|
||||
req.URL.Path = "/api/v1/certificates/" + tc.input
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.UpdateCertificate(w, req)
|
||||
|
||||
assertSafeResponse(t, w, "UpdateCertificate/"+tc.name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestArchiveCertificate_PathInjection exercises DELETE.
|
||||
func TestArchiveCertificate_PathInjection(t *testing.T) {
|
||||
for _, tc := range adversarialPathInputs() {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on input %q: %v", tc.input, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.ArchiveCertificateFn = func(_ context.Context, id string) error { return ErrMockNotFound }
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/certificates/x", nil)
|
||||
req.URL.Path = "/api/v1/certificates/" + tc.input
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ArchiveCertificate(w, req)
|
||||
|
||||
assertSafeResponse(t, w, "ArchiveCertificate/"+tc.name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetCertificateVersions_MultiSegment is a Tier 1B test: the versions
|
||||
// handler requires a 2-segment path (certID/versions). The parser uses
|
||||
// strings.Split(path, "/") and checks len(parts) < 2 — but an adversarial
|
||||
// caller can inject extra slashes to either produce an empty parts[0] or a
|
||||
// very long parts slice. Either way we must not panic.
|
||||
func TestGetCertificateVersions_MultiSegment(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
path string
|
||||
}{
|
||||
{"missing_segment", "/api/v1/certificates/versions"},
|
||||
{"empty_cert_id", "/api/v1/certificates//versions"},
|
||||
{"traversal_cert_id", "/api/v1/certificates/..%2F..%2Fversions/versions"},
|
||||
{"sql_injection_cert_id", "/api/v1/certificates/'%20OR%201=1--/versions"},
|
||||
{"null_byte_cert_id", "/api/v1/certificates/mc\x00001/versions"},
|
||||
{"very_long_cert_id", "/api/v1/certificates/" + strings.Repeat("A", 5000) + "/versions"},
|
||||
{"trailing_segments", "/api/v1/certificates/mc-001/versions/extra/trailing"},
|
||||
{"deep_nesting", "/api/v1/certificates/" + strings.Repeat("a/", 50) + "versions"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on path %q: %v", tc.path, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.GetCertificateVersionsFn = func(_ context.Context, certID string, page, perPage int) ([]domain.CertificateVersion, int64, error) {
|
||||
return []domain.CertificateVersion{}, 0, nil
|
||||
}
|
||||
|
||||
// Use a dummy safe URL in NewRequest to avoid url.Parse panics
|
||||
// on control chars, then overwrite with the raw attacker path.
|
||||
req := httptest.NewRequest(http.MethodGet, "/safe", nil)
|
||||
req.URL.Path = tc.path
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.GetCertificateVersions(w, req)
|
||||
|
||||
assertSafeResponse(t, w, "GetCertificateVersions/"+tc.name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestHandleOCSP_MultiSegment exercises the OCSP responder's 2-segment path
|
||||
// parser (/.well-known/pki/ocsp/{issuer_id}/{serial_hex}). Each leg is
|
||||
// attacker-controlled and the serial can be arbitrary length. This is a key
|
||||
// adversarial surface because the serial is passed directly to the
|
||||
// CA-operations service, which is expected to treat it as an opaque
|
||||
// identifier.
|
||||
//
|
||||
// M-006 relocation: these paths were previously served at /api/v1/ocsp/*;
|
||||
// under RFC 8615 and RFC 6960 they now live under /.well-known/pki/ocsp/*.
|
||||
func TestHandleOCSP_MultiSegment(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
path string
|
||||
}{
|
||||
{"missing_serial", "/.well-known/pki/ocsp/iss-local"},
|
||||
{"missing_both", "/.well-known/pki/ocsp/"},
|
||||
{"empty_issuer", "/.well-known/pki/ocsp//01ABCDEF"},
|
||||
{"empty_serial", "/.well-known/pki/ocsp/iss-local/"},
|
||||
{"traversal_issuer", "/.well-known/pki/ocsp/..%2F..%2Fetc/passwd/01"},
|
||||
{"null_byte_serial", "/.well-known/pki/ocsp/iss-local/01\x00FF"},
|
||||
{"sql_injection_serial", "/.well-known/pki/ocsp/iss-local/01'; DROP TABLE--"},
|
||||
{"negative_hex_serial", "/.well-known/pki/ocsp/iss-local/-1"},
|
||||
{"unicode_serial", "/.well-known/pki/ocsp/iss-local/01\u2010FF"},
|
||||
{"extremely_long_serial", "/.well-known/pki/ocsp/iss-local/" + strings.Repeat("F", 10000)},
|
||||
{"extra_segments", "/.well-known/pki/ocsp/iss-local/01FF/extra/segments"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on path %q: %v", tc.path, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.GetOCSPResponseFn = func(_ context.Context, issuerID, serialHex string) ([]byte, error) {
|
||||
return nil, ErrMockNotFound
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/safe", nil)
|
||||
req.URL.Path = tc.path
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.HandleOCSP(w, req)
|
||||
|
||||
// OCSP does NOT guarantee JSON responses (pkix-crl uses binary),
|
||||
// so we only check status safety, not body structure.
|
||||
if w.Code == http.StatusInternalServerError {
|
||||
t.Errorf("HandleOCSP/%s: returned 500 body=%q", tc.name, w.Body.String())
|
||||
}
|
||||
if w.Code >= 500 {
|
||||
t.Errorf("HandleOCSP/%s: unexpected 5xx %d", tc.name, w.Code)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetDERCRL_IssuerPathInjection exercises
|
||||
// /.well-known/pki/crl/{issuer_id} (RFC 5280 CRL; M-006 relocation from
|
||||
// /api/v1/crl/{issuer_id}).
|
||||
func TestGetDERCRL_IssuerPathInjection(t *testing.T) {
|
||||
for _, tc := range adversarialPathInputs() {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("handler panicked on input %q: %v", tc.input, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.GenerateDERCRLFn = func(_ context.Context, issuerID string) ([]byte, error) {
|
||||
return nil, ErrMockNotFound
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/.well-known/pki/crl/x", nil)
|
||||
req.URL.Path = "/.well-known/pki/crl/" + tc.input
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.GetDERCRL(w, req)
|
||||
|
||||
if w.Code >= 500 {
|
||||
t.Errorf("GetDERCRL/%s: unexpected 5xx %d (body=%q)", tc.name, w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,539 @@
|
||||
package handler
|
||||
|
||||
// Adversarial query-parameter, request-body, and revocation-reason tests.
|
||||
//
|
||||
// These tests exercise the second boundary of the certificate handler:
|
||||
//
|
||||
// * Numeric pagination parsing (page, per_page, page_size)
|
||||
// * Sort direction and field whitelist
|
||||
// * Time-range filters (expires_before, expires_after, created_after, updated_after)
|
||||
// * Cursor pagination
|
||||
// * Sparse-field projection (?fields=...)
|
||||
// * Request-body JSON parsing (create/update) — null, malformed, deep nesting,
|
||||
// unicode, oversized
|
||||
// * Revocation reason abuse
|
||||
//
|
||||
// The handler silently ignores malformed pagination values (it falls back to
|
||||
// defaults) and ignores invalid RFC3339 time values. These tests lock in that
|
||||
// behaviour so a future "fail-closed" change has to be deliberate.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
)
|
||||
|
||||
// buildListRequest constructs a GET /api/v1/certificates request with the
|
||||
// given raw query string. We use raw query strings (not url.Values.Encode)
|
||||
// so adversarial inputs like "page=abc&page=-1" or "%00" pass through
|
||||
// unchanged.
|
||||
func buildListRequest(rawQuery string) *http.Request {
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/certificates", nil)
|
||||
req.URL.RawQuery = rawQuery
|
||||
return req.WithContext(contextWithRequestID())
|
||||
}
|
||||
|
||||
// TestListCertificates_PaginationAbuse verifies adversarial pagination values
|
||||
// never produce a 500 and the handler always falls back to sane defaults.
|
||||
func TestListCertificates_PaginationAbuse(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
rawQuery string
|
||||
}{
|
||||
{"negative_page", "page=-1"},
|
||||
{"zero_page", "page=0"},
|
||||
{"non_numeric_page", "page=abc"},
|
||||
{"huge_page", "page=99999999999"},
|
||||
{"int_overflow_page", "page=9223372036854775808"}, // int64 max + 1
|
||||
{"negative_per_page", "per_page=-1"},
|
||||
{"zero_per_page", "per_page=0"},
|
||||
{"per_page_cap_at_500", "per_page=500"},
|
||||
{"per_page_above_cap", "per_page=501"},
|
||||
{"per_page_absurd", "per_page=1000000"},
|
||||
{"non_numeric_per_page", "per_page=xyz"},
|
||||
{"mixed_numeric_per_page", "per_page=10abc"},
|
||||
{"negative_page_size", "page_size=-1"},
|
||||
{"page_size_above_cap", "page_size=501"},
|
||||
{"float_page", "page=1.5"},
|
||||
{"exponent_page", "page=1e10"},
|
||||
{"hex_page", "page=0xff"},
|
||||
{"unicode_digits_page", "page=\u0661\u0662\u0663"}, // Arabic-Indic digits
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("panicked on %q: %v", tc.rawQuery, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.ListCertificatesWithFilterFn = func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
// Sanity: page/perPage on the filter must never be negative
|
||||
// and perPage must never exceed 500 after parsing.
|
||||
if filter.Page < 1 {
|
||||
t.Errorf("filter.Page=%d (must be >=1)", filter.Page)
|
||||
}
|
||||
if filter.PerPage < 1 || filter.PerPage > 500 {
|
||||
t.Errorf("filter.PerPage=%d (must be in [1,500])", filter.PerPage)
|
||||
}
|
||||
return []domain.ManagedCertificate{}, 0, nil
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListCertificates(w, buildListRequest(tc.rawQuery))
|
||||
|
||||
assertSafeResponse(t, w, "ListCertificates/"+tc.name)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("%s: expected 200, got %d (body=%q)", tc.name, w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestListCertificates_SortAbuse verifies the sort field (which feeds into a
|
||||
// whitelist in the repository layer) handles adversarial input safely at the
|
||||
// handler boundary. The handler accepts the raw value and forwards it; the
|
||||
// repository is expected to whitelist it, but at THIS layer we just verify
|
||||
// we don't crash or leak.
|
||||
func TestListCertificates_SortAbuse(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
rawQuery string
|
||||
}{
|
||||
{"sql_injection_sort", "sort=notAfter;DROP TABLE managed_certificates--"},
|
||||
{"sql_injection_or", "sort=notAfter' OR '1'='1"},
|
||||
{"path_traversal_sort", "sort=../../etc/passwd"},
|
||||
{"null_byte_sort", "sort=notAfter%00"},
|
||||
{"unicode_sort", "sort=notAfter\u2010desc"},
|
||||
{"leading_dash_only", "sort=-"},
|
||||
{"leading_dashes", "sort=---notAfter"},
|
||||
{"empty_sort", "sort="},
|
||||
{"very_long_sort", "sort=" + strings.Repeat("a", 5000)},
|
||||
{"sort_desc_flag", "sort=notAfter&sort_desc=true"},
|
||||
{"conflicting_sort_desc", "sort=-notAfter&sort_desc=false"},
|
||||
{"unknown_field", "sort=gibberish"},
|
||||
{"shell_metacharacters_sort", "sort=notAfter;rm -rf /"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("panicked on %q: %v", tc.rawQuery, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.ListCertificatesWithFilterFn = func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
return []domain.ManagedCertificate{}, 0, nil
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListCertificates(w, buildListRequest(tc.rawQuery))
|
||||
|
||||
assertSafeResponse(t, w, "ListCertificates/"+tc.name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestListCertificates_FieldsAbuse verifies sparse field projection handles
|
||||
// adversarial field lists safely.
|
||||
func TestListCertificates_FieldsAbuse(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
rawQuery string
|
||||
}{
|
||||
{"sql_injection_fields", "fields=id,name' OR 1=1--"},
|
||||
{"path_traversal_fields", "fields=../../etc/passwd"},
|
||||
{"empty_fields", "fields="},
|
||||
{"single_comma", "fields=,"},
|
||||
{"trailing_comma", "fields=id,name,"},
|
||||
{"leading_comma", "fields=,id,name"},
|
||||
{"whitespace_fields", "fields= id , name "},
|
||||
{"duplicate_fields", "fields=id,id,id,id,id"},
|
||||
{"unknown_fields", "fields=totally_not_a_field"},
|
||||
{"many_fields", "fields=" + strings.Repeat("x,", 200) + "id"},
|
||||
{"unicode_fields", "fields=id,n\u00e4me"},
|
||||
{"null_byte_fields", "fields=id%00name"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("panicked on %q: %v", tc.rawQuery, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.ListCertificatesWithFilterFn = func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
return []domain.ManagedCertificate{}, 0, nil
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListCertificates(w, buildListRequest(tc.rawQuery))
|
||||
|
||||
assertSafeResponse(t, w, "ListCertificates/"+tc.name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestListCertificates_TimeRangeAbuse verifies RFC3339 time-range filters
|
||||
// handle malformed input by silently falling back to no filter (current
|
||||
// behaviour).
|
||||
func TestListCertificates_TimeRangeAbuse(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
rawQuery string
|
||||
}{
|
||||
{"invalid_expires_before", "expires_before=not-a-date"},
|
||||
{"empty_expires_before", "expires_before="},
|
||||
{"garbage_expires_before", "expires_before=%00%00"},
|
||||
{"sql_injection_time", "expires_before=2026-01-01T00:00:00Z';DROP TABLE managed_certificates--"},
|
||||
{"year_zero", "expires_before=0000-01-01T00:00:00Z"},
|
||||
{"year_negative", "expires_before=-0001-01-01T00:00:00Z"},
|
||||
{"year_huge", "expires_before=99999-12-31T23:59:59Z"},
|
||||
{"invalid_month", "expires_before=2026-13-01T00:00:00Z"},
|
||||
{"invalid_day", "expires_before=2026-02-30T00:00:00Z"},
|
||||
{"valid_utc", "expires_before=2026-06-15T12:00:00Z"},
|
||||
{"valid_with_offset", "expires_before=2026-06-15T12:00:00-07:00"},
|
||||
{"unix_seconds_not_rfc3339", "expires_before=1767225600"},
|
||||
{"all_four_filters", "expires_before=garbage&expires_after=garbage&created_after=garbage&updated_after=garbage"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("panicked on %q: %v", tc.rawQuery, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.ListCertificatesWithFilterFn = func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
return []domain.ManagedCertificate{}, 0, nil
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListCertificates(w, buildListRequest(tc.rawQuery))
|
||||
|
||||
assertSafeResponse(t, w, "ListCertificates/"+tc.name)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("%s: expected 200, got %d", tc.name, w.Code)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestListCertificates_CursorAbuse exercises cursor-based pagination with
|
||||
// adversarial cursor tokens. The handler forwards the cursor to the
|
||||
// repository; we verify no 500 at the boundary and that the response type
|
||||
// switches correctly.
|
||||
func TestListCertificates_CursorAbuse(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
cursor string
|
||||
}{
|
||||
{"empty_not_set", ""}, // special-cased: should return PagedResponse
|
||||
{"garbage_cursor", "not-a-valid-cursor"},
|
||||
{"base64_garbage", "dGhpcyBpcyBub3QgYSB2YWxpZCBjdXJzb3I="},
|
||||
{"sql_injection_cursor", "2026-01-01T00:00:00Z:mc-001';DROP TABLE--"},
|
||||
{"path_traversal_cursor", "../../etc/passwd"},
|
||||
{"null_byte_cursor", "valid%00cursor"},
|
||||
{"very_long_cursor", strings.Repeat("A", 8192)},
|
||||
{"unicode_cursor", "2026-01-01T00:00:00Z:mc\u20100001"},
|
||||
{"valid_looking_cursor", "2026-01-01T00:00:00.000000000Z:mc-001"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("panicked on %q: %v", tc.cursor, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.ListCertificatesWithFilterFn = func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
return []domain.ManagedCertificate{}, 0, nil
|
||||
}
|
||||
|
||||
rawQuery := "cursor=" + url.QueryEscape(tc.cursor) + "&page_size=50"
|
||||
if tc.cursor == "" {
|
||||
rawQuery = "page=1&per_page=50"
|
||||
}
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListCertificates(w, buildListRequest(rawQuery))
|
||||
|
||||
assertSafeResponse(t, w, "ListCertificates/"+tc.name)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("%s: expected 200, got %d", tc.name, w.Code)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestListCertificates_FilterInjection verifies the basic string filters
|
||||
// (status, environment, owner_id, team_id, issuer_id, agent_id, profile_id)
|
||||
// are forwarded as-is without causing any handler-layer failures. These go
|
||||
// into parameterized SQL at the repo layer.
|
||||
func TestListCertificates_FilterInjection(t *testing.T) {
|
||||
filters := []string{
|
||||
"status", "environment", "owner_id", "team_id",
|
||||
"issuer_id", "agent_id", "profile_id",
|
||||
}
|
||||
payloads := []string{
|
||||
"' OR 1=1--",
|
||||
"'; DROP TABLE managed_certificates;--",
|
||||
"../../etc/passwd",
|
||||
strings.Repeat("A", 5000),
|
||||
"\u2010hyphen",
|
||||
"%00null",
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
for _, p := range payloads {
|
||||
name := f + "__" + p
|
||||
if len(name) > 80 {
|
||||
name = name[:80]
|
||||
}
|
||||
t.Run(name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("panicked: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.ListCertificatesWithFilterFn = func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
return []domain.ManagedCertificate{}, 0, nil
|
||||
}
|
||||
|
||||
rawQuery := f + "=" + url.QueryEscape(p)
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListCertificates(w, buildListRequest(rawQuery))
|
||||
|
||||
assertSafeResponse(t, w, "ListCertificates/"+f)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- Request body abuse (Tier 1D) ----------
|
||||
|
||||
// TestCreateCertificate_BodyAbuse sends adversarial JSON bodies to
|
||||
// POST /api/v1/certificates. Every case must respond with 400 (not 500,
|
||||
// not 200). This proves we reject malformed input before reaching the
|
||||
// service layer.
|
||||
func TestCreateCertificate_BodyAbuse(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
body string
|
||||
}{
|
||||
{"null_body", "null"},
|
||||
{"empty_body", ""},
|
||||
{"not_json", "not json at all"},
|
||||
{"truncated_json", `{"common_name":"exa`},
|
||||
{"unclosed_object", `{"common_name":"example.com"`},
|
||||
{"array_not_object", `["example.com"]`},
|
||||
{"number_not_object", `42`},
|
||||
{"string_not_object", `"hello"`},
|
||||
{"boolean_not_object", `true`},
|
||||
{"duplicate_keys", `{"common_name":"evil.com","common_name":"example.com"}`},
|
||||
{"unicode_bom", "\ufeff{\"common_name\":\"example.com\"}"},
|
||||
{"deep_nesting", strings.Repeat("{\"x\":", 100) + "null" + strings.Repeat("}", 100)},
|
||||
{"nested_array_bomb", `{"common_name":"x","sans":[[[[[[[[[[]]]]]]]]]]}`},
|
||||
{"sql_injection_cn", `{"common_name":"'; DROP TABLE managed_certificates;--"}`},
|
||||
{"empty_cn", `{"common_name":""}`},
|
||||
{"null_cn", `{"common_name":null}`},
|
||||
{"whitespace_cn", `{"common_name":" "}`},
|
||||
{"cn_too_long", fmt.Sprintf(`{"common_name":%q}`, strings.Repeat("a", 500))},
|
||||
{"cn_path_traversal", `{"common_name":"../../etc/passwd"}`},
|
||||
{"cn_null_byte", "{\"common_name\":\"example\\u0000.com\"}"},
|
||||
{"cn_newline", "{\"common_name\":\"example\\n.com\"}"},
|
||||
{"cn_only_missing_others", `{"common_name":"example.com"}`},
|
||||
{"extra_unknown_fields", `{"common_name":"example.com","__proto__":{"polluted":true},"eval":"alert(1)"}`},
|
||||
{"unicode_homoglyph_cn", "{\"common_name\":\"ex\u0430mple.com\"}"}, // Cyrillic а
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("panicked on %q: %v", tc.name, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.CreateCertificateFn = func(_ context.Context, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
// If we ever reach this, the handler accepted a malformed
|
||||
// body. Return a sentinel that passes but flag it.
|
||||
c := cert
|
||||
c.ID = "mc-accepted"
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates", bytes.NewBufferString(tc.body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
handler.CreateCertificate(w, req)
|
||||
|
||||
assertSafeResponse(t, w, "CreateCertificate/"+tc.name)
|
||||
// Must NOT be 201 — all these bodies should be rejected.
|
||||
if w.Code == http.StatusCreated {
|
||||
t.Errorf("%s: handler accepted malformed body (201) body=%q", tc.name, w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestCreateCertificate_HugeBody sends a 2 MiB JSON body. The body-limit
|
||||
// middleware is not in this handler-unit test, so we just verify the handler
|
||||
// doesn't OOM/panic on a large but well-formed body.
|
||||
func TestCreateCertificate_HugeBody(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("panicked on huge body: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
// 2 MiB of SANs — well-formed JSON, technically valid, just huge.
|
||||
var sb strings.Builder
|
||||
sb.WriteString(`{"common_name":"example.com","owner_id":"o","team_id":"t","issuer_id":"iss","name":"n","renewal_policy_id":"rp","sans":[`)
|
||||
for i := 0; i < 20000; i++ {
|
||||
if i > 0 {
|
||||
sb.WriteByte(',')
|
||||
}
|
||||
fmt.Fprintf(&sb, `"host%d.example.com"`, i)
|
||||
}
|
||||
sb.WriteString(`]}`)
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.CreateCertificateFn = func(_ context.Context, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
c := cert
|
||||
c.ID = "mc-huge"
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates", strings.NewReader(sb.String()))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
handler.CreateCertificate(w, req)
|
||||
|
||||
assertSafeResponse(t, w, "CreateCertificate/huge_body")
|
||||
}
|
||||
|
||||
// ---------- Revocation reason abuse (Tier 1E) ----------
|
||||
|
||||
// TestRevokeCertificate_ReasonAbuse sends adversarial revocation reasons to
|
||||
// POST /api/v1/certificates/{id}/revoke. The handler forwards the reason
|
||||
// string to the service layer, which validates against RFC 5280. Errors
|
||||
// from the service containing "invalid revocation reason" must map to 400,
|
||||
// never 500.
|
||||
func TestRevokeCertificate_ReasonAbuse(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
body string
|
||||
}{
|
||||
{"empty_reason", `{"reason":""}`},
|
||||
{"null_reason", `{"reason":null}`},
|
||||
{"nonexistent_reason", `{"reason":"totally made up"}`},
|
||||
{"case_variant", `{"reason":"KEYCOMPROMISE"}`},
|
||||
{"with_spaces", `{"reason":"key compromise"}`},
|
||||
{"with_dashes", `{"reason":"key-compromise"}`},
|
||||
{"mixed_case", `{"reason":"KeyCompromise"}`},
|
||||
{"lowercase_valid", `{"reason":"keycompromise"}`},
|
||||
{"unicode_homoglyph", "{\"reason\":\"keyCompr\u043emise\"}"},
|
||||
{"sql_injection", `{"reason":"keyCompromise';DROP TABLE revocations--"}`},
|
||||
{"very_long", fmt.Sprintf(`{"reason":%q}`, strings.Repeat("a", 10000))},
|
||||
{"integer_reason", `{"reason":1}`},
|
||||
{"array_reason", `{"reason":["keyCompromise"]}`},
|
||||
{"object_reason", `{"reason":{"code":1}}`},
|
||||
{"extra_fields", `{"reason":"keyCompromise","admin":true,"bypass":true}`},
|
||||
{"no_body", ``},
|
||||
{"malformed_json", `{"reason":`},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("panicked on %q: %v", tc.name, r)
|
||||
}
|
||||
}()
|
||||
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
// The mock always returns "invalid revocation reason" so we
|
||||
// verify the handler's errMsg→status mapping turns it into a 400.
|
||||
mock.RevokeCertificateFn = func(_ context.Context, id string, reason string, _ string) error {
|
||||
// The service uses domain.IsValidRevocationReason. If we got
|
||||
// through to here with something bogus, simulate a real
|
||||
// service error.
|
||||
return fmt.Errorf("invalid revocation reason: %q", reason)
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/mc-001/revoke", bytes.NewBufferString(tc.body))
|
||||
req.URL.Path = "/api/v1/certificates/mc-001/revoke"
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
handler.RevokeCertificate(w, req)
|
||||
|
||||
assertSafeResponse(t, w, "RevokeCertificate/"+tc.name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestRevokeCertificate_AlreadyRevoked locks in the specific error->status
|
||||
// mapping for "already revoked". The handler uses substring matching on the
|
||||
// service error message, which is fragile — this test catches regressions.
|
||||
func TestRevokeCertificate_AlreadyRevoked(t *testing.T) {
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.RevokeCertificateFn = func(_ context.Context, id string, reason string, _ string) error {
|
||||
return fmt.Errorf("cannot revoke: certificate is already revoked")
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/mc-001/revoke", strings.NewReader(`{"reason":"keyCompromise"}`))
|
||||
req.URL.Path = "/api/v1/certificates/mc-001/revoke"
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
handler.RevokeCertificate(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400 for already-revoked, got %d (body=%q)", w.Code, w.Body.String())
|
||||
}
|
||||
assertSafeResponse(t, w, "RevokeCertificate/already_revoked")
|
||||
}
|
||||
|
||||
// TestRevokeCertificate_NotFound verifies 404 mapping.
|
||||
func TestRevokeCertificate_NotFound(t *testing.T) {
|
||||
handler, mock := newCertHandlerWithMock()
|
||||
mock.RevokeCertificateFn = func(_ context.Context, id string, reason string, _ string) error {
|
||||
return fmt.Errorf("certificate not found")
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/mc-missing/revoke", strings.NewReader(`{"reason":"keyCompromise"}`))
|
||||
req.URL.Path = "/api/v1/certificates/mc-missing/revoke"
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
handler.RevokeCertificate(w, req)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("expected 404 for not-found, got %d (body=%q)", w.Code, w.Body.String())
|
||||
}
|
||||
assertSafeResponse(t, w, "RevokeCertificate/not_found")
|
||||
}
|
||||
@@ -3,12 +3,14 @@ package handler
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// AgentGroupService defines the service interface for agent group operations.
|
||||
@@ -22,6 +24,19 @@ type AgentGroupService interface {
|
||||
}
|
||||
|
||||
// AgentGroupHandler handles HTTP requests for agent group operations.
|
||||
//
|
||||
// Error dispatch (post-M-1): every service error routes through the [errToStatus]
|
||||
// choke point via `errors.Is` walking the wrap chain, with one explicit
|
||||
// [service.ErrValidation] arm on the write paths (Create, Update) so the
|
||||
// composed "validation: <field-specific reason>" message the service layer
|
||||
// attaches via `fmt.Errorf("%w: ...", ErrValidation)` can be passed through to
|
||||
// the 400 response body. Before M-1, the Create handler branched on
|
||||
// `strings.Contains(err.Error(), "invalid"|"required")` — fragile because a
|
||||
// single reword in [service.validateAgentGroup] would demote the 400 to 500
|
||||
// with no compile-time signal — and the Update/Delete handlers branched on
|
||||
// `strings.Contains(err.Error(), "not found")`, coupling HTTP classification
|
||||
// to repository human-readable strings. Both now ride the typed
|
||||
// [repository.ErrNotFound] wrap through errToStatus. Mirrors ProfileHandler.
|
||||
type AgentGroupHandler struct {
|
||||
svc AgentGroupService
|
||||
}
|
||||
@@ -89,7 +104,18 @@ func (h AgentGroupHandler) GetAgentGroup(w http.ResponseWriter, r *http.Request)
|
||||
|
||||
group, err := h.svc.GetAgentGroup(r.Context(), id)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Agent group not found", requestID)
|
||||
// M-1: route through errToStatus so a repo-level `sql.ErrNoRows`
|
||||
// (wrapped as repository.ErrNotFound) becomes 404, but a transient DB
|
||||
// failure no longer masquerades as 404 — it correctly surfaces 500. The
|
||||
// pre-M-1 "any error → 404" shortcut was plausible when Get's only
|
||||
// expected failure was "not found", but the choke point now gives us
|
||||
// correct dispatch for free. Mirrors GetProfile.
|
||||
status := errToStatus(err)
|
||||
msg := "Failed to get agent group"
|
||||
if status == http.StatusNotFound {
|
||||
msg = "Agent group not found"
|
||||
}
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -123,7 +149,15 @@ func (h AgentGroupHandler) CreateAgentGroup(w http.ResponseWriter, r *http.Reque
|
||||
|
||||
created, err := h.svc.CreateAgentGroup(r.Context(), group)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "invalid") || strings.Contains(err.Error(), "required") {
|
||||
// M-1: replace the 2-term substring net (`"invalid"|"required"`) with a
|
||||
// single `errors.Is(err, service.ErrValidation)` arm. validateAgentGroup
|
||||
// wraps every field-specific failure via `fmt.Errorf("%w: <reason>",
|
||||
// ErrValidation)`, so `err.Error()` still contains the human-readable
|
||||
// reason (e.g., "agent group name is required") and can be safely passed
|
||||
// to the 400 body — but the status decision no longer depends on the
|
||||
// exact wording. Other errors redact to a generic 500. Mirrors
|
||||
// CreateProfile.
|
||||
if errors.Is(err, service.ErrValidation) {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, err.Error(), requestID)
|
||||
return
|
||||
}
|
||||
@@ -160,11 +194,22 @@ func (h AgentGroupHandler) UpdateAgentGroup(w http.ResponseWriter, r *http.Reque
|
||||
|
||||
updated, err := h.svc.UpdateAgentGroup(r.Context(), id, group)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Agent group not found", requestID)
|
||||
// M-1: explicit ErrValidation arm preserves the user-facing reason in
|
||||
// the 400 body (validateAgentGroup wraps every failure via
|
||||
// `fmt.Errorf("%w: ...", ErrValidation)`); every other error — including
|
||||
// repo-layer ErrNotFound on a missing row — routes through errToStatus
|
||||
// so the 404/500 decision no longer depends on substring matching.
|
||||
// Mirrors UpdateProfile.
|
||||
if errors.Is(err, service.ErrValidation) {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, err.Error(), requestID)
|
||||
return
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to update agent group", requestID)
|
||||
status := errToStatus(err)
|
||||
msg := "Failed to update agent group"
|
||||
if status == http.StatusNotFound {
|
||||
msg = "Agent group not found"
|
||||
}
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -188,11 +233,15 @@ func (h AgentGroupHandler) DeleteAgentGroup(w http.ResponseWriter, r *http.Reque
|
||||
}
|
||||
|
||||
if err := h.svc.DeleteAgentGroup(r.Context(), id); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Agent group not found", requestID)
|
||||
return
|
||||
// M-1: sentinel dispatch replaces the substring 404 check — see the
|
||||
// parallel comment block in UpdateAgentGroup for the rationale. Mirrors
|
||||
// DeleteProfile.
|
||||
status := errToStatus(err)
|
||||
msg := "Failed to delete agent group"
|
||||
if status == http.StatusNotFound {
|
||||
msg = "Agent group not found"
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to delete agent group", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// MockAgentService is a mock implementation of AgentService interface.
|
||||
@@ -24,6 +25,11 @@ type MockAgentService struct {
|
||||
GetWorkFn func(agentID string) ([]domain.Job, error)
|
||||
GetWorkWithTargetsFn func(agentID string) ([]domain.WorkItem, error)
|
||||
UpdateJobStatusFn func(agentID string, jobID string, status string, errMsg string) error
|
||||
// I-004: soft-retirement hooks. Tests that don't set these receive nil
|
||||
// results and nil errors, which mirrors the safest default (no-op) for
|
||||
// unrelated suites that mock only the legacy surface.
|
||||
RetireAgentFn func(agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error)
|
||||
ListRetiredAgentsFn func(page, perPage int) ([]domain.Agent, int64, error)
|
||||
}
|
||||
|
||||
func (m *MockAgentService) ListAgents(_ context.Context, page, perPage int) ([]domain.Agent, int64, error) {
|
||||
@@ -96,6 +102,25 @@ func (m *MockAgentService) UpdateJobStatus(_ context.Context, agentID string, jo
|
||||
return nil
|
||||
}
|
||||
|
||||
// RetireAgent is the I-004 soft-retirement entrypoint. Tests that don't set
|
||||
// RetireAgentFn get a nil result + nil error, which is a no-op response that
|
||||
// lets unrelated suites compile without caring about the retirement surface.
|
||||
func (m *MockAgentService) RetireAgent(_ context.Context, agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error) {
|
||||
if m.RetireAgentFn != nil {
|
||||
return m.RetireAgentFn(agentID, actor, force, reason)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// ListRetiredAgents returns retired rows for the retired-agents tab / audit
|
||||
// views. Same zero-value default as RetireAgent for unrelated tests.
|
||||
func (m *MockAgentService) ListRetiredAgents(_ context.Context, page, perPage int) ([]domain.Agent, int64, error) {
|
||||
if m.ListRetiredAgentsFn != nil {
|
||||
return m.ListRetiredAgentsFn(page, perPage)
|
||||
}
|
||||
return nil, 0, nil
|
||||
}
|
||||
|
||||
// Test ListAgents - success case
|
||||
func TestListAgents_Success(t *testing.T) {
|
||||
now := time.Now()
|
||||
|
||||
@@ -0,0 +1,393 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// agentRetireTestSetup builds an AgentHandler with a mock AgentService whose
|
||||
// RetireAgent / ListRetiredAgents / Heartbeat behavior is driven by the
|
||||
// returned mock. Keeps every I-004 handler test self-contained so a single
|
||||
// failing assertion can't cascade through a shared fixture.
|
||||
func agentRetireTestSetup() (*MockAgentService, AgentHandler) {
|
||||
mock := &MockAgentService{}
|
||||
handler := NewAgentHandler(mock)
|
||||
return mock, handler
|
||||
}
|
||||
|
||||
// TestRetireAgentHandler_Success_200 pins the happy-path contract for the
|
||||
// soft-retirement HTTP surface: DELETE /api/v1/agents/{id} with no dependency
|
||||
// fallout returns 200 OK and a JSON body echoing retirement metadata
|
||||
// (retired_at timestamp, already_retired=false, cascade=false, zero counts).
|
||||
// Operators building dashboards parse these fields; keep the shape stable.
|
||||
func TestRetireAgentHandler_Success_200(t *testing.T) {
|
||||
retiredAt := time.Date(2026, 4, 18, 12, 0, 0, 0, time.UTC)
|
||||
mock, handler := agentRetireTestSetup()
|
||||
mock.RetireAgentFn = func(agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error) {
|
||||
if agentID != "a-prod-001" {
|
||||
t.Fatalf("retire handler received agentID=%q want a-prod-001", agentID)
|
||||
}
|
||||
if force {
|
||||
t.Fatalf("retire handler set force=true unexpectedly; default path must be force=false")
|
||||
}
|
||||
return &service.AgentRetirementResult{
|
||||
AlreadyRetired: false,
|
||||
Cascade: false,
|
||||
RetiredAt: retiredAt,
|
||||
Counts: domain.AgentDependencyCounts{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/agents/a-prod-001", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.RetireAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d body=%s want 200", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var body struct {
|
||||
RetiredAt time.Time `json:"retired_at"`
|
||||
AlreadyRetired bool `json:"already_retired"`
|
||||
Cascade bool `json:"cascade"`
|
||||
Counts domain.AgentDependencyCounts `json:"counts"`
|
||||
}
|
||||
if err := json.NewDecoder(w.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode 200 body: %v", err)
|
||||
}
|
||||
if !body.RetiredAt.Equal(retiredAt) {
|
||||
t.Errorf("retired_at=%v want %v", body.RetiredAt, retiredAt)
|
||||
}
|
||||
if body.AlreadyRetired {
|
||||
t.Errorf("already_retired=true want false on clean retire")
|
||||
}
|
||||
if body.Cascade {
|
||||
t.Errorf("cascade=true want false on clean retire")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRetireAgentHandler_AlreadyRetired_204 covers the idempotent contract: a
|
||||
// retire call against an already-retired agent completes with 204 No Content
|
||||
// (no body). This lets operators safely re-issue the DELETE after a network
|
||||
// blip without fearing duplicate audit events or state mutations.
|
||||
func TestRetireAgentHandler_AlreadyRetired_204(t *testing.T) {
|
||||
mock, handler := agentRetireTestSetup()
|
||||
past := time.Now().Add(-24 * time.Hour)
|
||||
mock.RetireAgentFn = func(agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error) {
|
||||
return &service.AgentRetirementResult{
|
||||
AlreadyRetired: true,
|
||||
Cascade: false,
|
||||
RetiredAt: past,
|
||||
Counts: domain.AgentDependencyCounts{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/agents/a-prod-001", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.RetireAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusNoContent {
|
||||
t.Fatalf("status=%d body=%s want 204", w.Code, w.Body.String())
|
||||
}
|
||||
// 204 No Content must have zero body. If anything leaks through, downstream
|
||||
// clients (curl scripts, dashboards) break.
|
||||
if w.Body.Len() != 0 {
|
||||
t.Errorf("204 body=%q want empty", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestRetireAgentHandler_Sentinel_403 covers the hard guard against retiring
|
||||
// any of the four sentinel agents that back discovery sources and the
|
||||
// network scanner. These IDs are reserved; the handler must surface the
|
||||
// service-layer ErrAgentIsSentinel as 403 Forbidden regardless of force/reason
|
||||
// because no operator intent can legitimately retire them.
|
||||
func TestRetireAgentHandler_Sentinel_403(t *testing.T) {
|
||||
sentinels := []string{"server-scanner", "cloud-aws-sm", "cloud-azure-kv", "cloud-gcp-sm"}
|
||||
for _, id := range sentinels {
|
||||
t.Run(id, func(t *testing.T) {
|
||||
mock, handler := agentRetireTestSetup()
|
||||
mock.RetireAgentFn = func(agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error) {
|
||||
return nil, service.ErrAgentIsSentinel
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/agents/"+id, nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.RetireAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusForbidden {
|
||||
t.Fatalf("sentinel %q status=%d body=%s want 403", id, w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestRetireAgentHandler_NotFound_404 covers the lookup-miss path. Service
|
||||
// returns a not-found error; handler maps to 404. Keeping the error
|
||||
// discrimination at the service layer (sentinel errors.Is) rather than string
|
||||
// matching is the whole point of wrapping.
|
||||
func TestRetireAgentHandler_NotFound_404(t *testing.T) {
|
||||
mock, handler := agentRetireTestSetup()
|
||||
mock.RetireAgentFn = func(agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error) {
|
||||
return nil, errors.New("agent not found")
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/agents/unknown-id", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.RetireAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Fatalf("status=%d body=%s want 404", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestRetireAgentHandler_Blocked_409_WithCounts covers the preflight-blocked
|
||||
// path. Service returns *BlockedByDependenciesError wrapping
|
||||
// ErrBlockedByDependencies; handler unwraps via errors.As, maps to 409, and
|
||||
// MUST include the counts in the response body so operators know what's
|
||||
// blocking them. Without counts the 409 is useless — the operator has to
|
||||
// guess which downstream dependency is holding up the retirement.
|
||||
func TestRetireAgentHandler_Blocked_409_WithCounts(t *testing.T) {
|
||||
mock, handler := agentRetireTestSetup()
|
||||
blockCounts := domain.AgentDependencyCounts{
|
||||
ActiveTargets: 3,
|
||||
ActiveCertificates: 7,
|
||||
PendingJobs: 2,
|
||||
}
|
||||
mock.RetireAgentFn = func(agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error) {
|
||||
return nil, &service.BlockedByDependenciesError{Counts: blockCounts}
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/agents/a-prod-001", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.RetireAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusConflict {
|
||||
t.Fatalf("status=%d body=%s want 409", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var body struct {
|
||||
Error string `json:"error"`
|
||||
Message string `json:"message"`
|
||||
Counts domain.AgentDependencyCounts `json:"counts"`
|
||||
}
|
||||
if err := json.NewDecoder(w.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode 409 body: %v", err)
|
||||
}
|
||||
if body.Counts.ActiveTargets != 3 {
|
||||
t.Errorf("counts.active_targets=%d want 3", body.Counts.ActiveTargets)
|
||||
}
|
||||
if body.Counts.ActiveCertificates != 7 {
|
||||
t.Errorf("counts.active_certificates=%d want 7", body.Counts.ActiveCertificates)
|
||||
}
|
||||
if body.Counts.PendingJobs != 2 {
|
||||
t.Errorf("counts.pending_jobs=%d want 2", body.Counts.PendingJobs)
|
||||
}
|
||||
if body.Message == "" {
|
||||
t.Errorf("409 body missing human-readable message; operators need guidance")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRetireAgentHandler_Force_NoReason_400 covers the force-escape-hatch
|
||||
// guardrail: force=true without a non-empty reason must be rejected at the
|
||||
// handler seam BEFORE the service performs any DB work, because a
|
||||
// reason-less cascade is unauditable. Service returns ErrForceReasonRequired;
|
||||
// handler maps to 400.
|
||||
func TestRetireAgentHandler_Force_NoReason_400(t *testing.T) {
|
||||
mock, handler := agentRetireTestSetup()
|
||||
mock.RetireAgentFn = func(agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error) {
|
||||
if !force {
|
||||
t.Fatalf("handler did not forward force=true; force query param was dropped")
|
||||
}
|
||||
if reason != "" {
|
||||
t.Fatalf("handler passed reason=%q; empty reason must reach service for error path", reason)
|
||||
}
|
||||
return nil, service.ErrForceReasonRequired
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/agents/a-prod-001?force=true", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.RetireAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("status=%d body=%s want 400", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestRetireAgentHandler_ForceCascade_200 covers the successful force-cascade
|
||||
// path: DELETE ?force=true&reason=... → service executes transactional
|
||||
// cascade → 200 with cascade=true and the pre-cascade counts echoed back so
|
||||
// the operator's confirmation dialog can show "I just retired N targets,
|
||||
// M certificates, K pending jobs."
|
||||
func TestRetireAgentHandler_ForceCascade_200(t *testing.T) {
|
||||
mock, handler := agentRetireTestSetup()
|
||||
retiredAt := time.Date(2026, 4, 18, 14, 30, 0, 0, time.UTC)
|
||||
mock.RetireAgentFn = func(agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error) {
|
||||
if !force {
|
||||
t.Fatalf("handler did not forward force=true; query-param parsing broken")
|
||||
}
|
||||
if reason != "decommissioning rack 7" {
|
||||
t.Fatalf("handler forwarded reason=%q want %q", reason, "decommissioning rack 7")
|
||||
}
|
||||
return &service.AgentRetirementResult{
|
||||
AlreadyRetired: false,
|
||||
Cascade: true,
|
||||
RetiredAt: retiredAt,
|
||||
Counts: domain.AgentDependencyCounts{
|
||||
ActiveTargets: 2,
|
||||
ActiveCertificates: 5,
|
||||
PendingJobs: 1,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
url := "/api/v1/agents/a-prod-001?force=true&reason=decommissioning+rack+7"
|
||||
req := httptest.NewRequest(http.MethodDelete, url, nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.RetireAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d body=%s want 200", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var body struct {
|
||||
RetiredAt time.Time `json:"retired_at"`
|
||||
AlreadyRetired bool `json:"already_retired"`
|
||||
Cascade bool `json:"cascade"`
|
||||
Counts domain.AgentDependencyCounts `json:"counts"`
|
||||
}
|
||||
if err := json.NewDecoder(w.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode force-cascade 200 body: %v", err)
|
||||
}
|
||||
if !body.Cascade {
|
||||
t.Errorf("cascade=false want true on ?force=true successful retire")
|
||||
}
|
||||
if body.Counts.ActiveTargets != 2 || body.Counts.ActiveCertificates != 5 || body.Counts.PendingJobs != 1 {
|
||||
t.Errorf("counts=%+v want {ActiveTargets:2 ActiveCertificates:5 PendingJobs:1}", body.Counts)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHeartbeatHandler_RetiredAgent_410 covers the agent-shutdown signal. A
|
||||
// retired agent that is still polling must be told its identity is gone
|
||||
// (410 Gone) rather than offered the normal 200 "recorded" response.
|
||||
// cmd/agent treats 410 as a terminal signal and exits rather than looping
|
||||
// forever against a decommissioned identity. Service returns ErrAgentRetired;
|
||||
// handler maps to 410.
|
||||
func TestHeartbeatHandler_RetiredAgent_410(t *testing.T) {
|
||||
mock, handler := agentRetireTestSetup()
|
||||
mock.HeartbeatFn = func(agentID string, metadata *domain.AgentMetadata) error {
|
||||
return service.ErrAgentRetired
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/agents/a-prod-001/heartbeat", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.Heartbeat(w, req)
|
||||
|
||||
if w.Code != http.StatusGone {
|
||||
t.Fatalf("heartbeat(retired) status=%d body=%s want 410", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestListRetiredAgentsHandler_Success covers the audit/forensics-facing
|
||||
// endpoint GET /api/v1/agents/retired. Returns a paged list of retired rows
|
||||
// alongside total count so the GUI can render a "Retired Agents" tab with
|
||||
// pagination. Default listing (GET /agents) hides retired rows; this is the
|
||||
// opt-in surface for them.
|
||||
func TestListRetiredAgentsHandler_Success(t *testing.T) {
|
||||
past := time.Now().Add(-48 * time.Hour)
|
||||
reason := "old hardware"
|
||||
retired := []domain.Agent{
|
||||
{
|
||||
ID: "agent-retired-01",
|
||||
Name: "decom-01",
|
||||
Hostname: "server-old",
|
||||
Status: domain.AgentStatusOffline,
|
||||
RegisteredAt: past,
|
||||
RetiredAt: &past,
|
||||
RetiredReason: &reason,
|
||||
},
|
||||
}
|
||||
|
||||
mock, handler := agentRetireTestSetup()
|
||||
mock.ListRetiredAgentsFn = func(page, perPage int) ([]domain.Agent, int64, error) {
|
||||
if page != 1 || perPage != 50 {
|
||||
t.Fatalf("ListRetired handler received page=%d perPage=%d want 1/50 defaults", page, perPage)
|
||||
}
|
||||
return retired, 1, nil
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/agents/retired", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.ListRetiredAgents(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d body=%s want 200", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var response PagedResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&response); err != nil {
|
||||
t.Fatalf("decode list-retired body: %v", err)
|
||||
}
|
||||
if response.Total != 1 {
|
||||
t.Errorf("total=%d want 1", response.Total)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRetireAgentHandler_MethodNotAllowed covers defense-in-depth: only
|
||||
// DELETE is valid on /api/v1/agents/{id} for retirement. Using POST/PUT/PATCH
|
||||
// must be rejected with 405 so misconfigured callers don't accidentally
|
||||
// trigger retirement via a wrong-method request.
|
||||
func TestRetireAgentHandler_MethodNotAllowed(t *testing.T) {
|
||||
_, handler := agentRetireTestSetup()
|
||||
|
||||
for _, method := range []string{http.MethodPost, http.MethodPut, http.MethodPatch} {
|
||||
t.Run(method, func(t *testing.T) {
|
||||
req := httptest.NewRequest(method, "/api/v1/agents/a-prod-001", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.RetireAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusMethodNotAllowed {
|
||||
t.Fatalf("method=%s status=%d want 405", method, w.Code)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Compile-time asserts: the mock must satisfy the handler's AgentService
|
||||
// interface. Red state: this fails until the interface grows RetireAgent +
|
||||
// ListRetiredAgents. Once Phase 2b adds those methods to AgentService, this
|
||||
// assertion goes green along with every test above.
|
||||
var _ AgentService = (*MockAgentService)(nil)
|
||||
|
||||
// Unused-import suppressor for context — the package-level tests already
|
||||
// pull context from agent_handler_test.go, but leaving this here documents
|
||||
// that the mock methods receive context.Context values even though this
|
||||
// file's tests don't construct them directly (they ride on httptest.NewRequest).
|
||||
var _ = context.Background
|
||||
@@ -3,16 +3,24 @@ package handler
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// AgentService defines the service interface for agent operations.
|
||||
//
|
||||
// I-004 expansion: RetireAgent + ListRetiredAgents back the soft-retirement
|
||||
// surface. The handler depends on the service-package's AgentRetirementResult
|
||||
// and BlockedByDependenciesError types for result shape + errors.As unwrap,
|
||||
// which is why this file imports internal/service.
|
||||
type AgentService interface {
|
||||
ListAgents(ctx context.Context, page, perPage int) ([]domain.Agent, int64, error)
|
||||
GetAgent(ctx context.Context, id string) (*domain.Agent, error)
|
||||
@@ -24,6 +32,10 @@ type AgentService interface {
|
||||
GetWork(ctx context.Context, agentID string) ([]domain.Job, error)
|
||||
GetWorkWithTargets(ctx context.Context, agentID string) ([]domain.WorkItem, error)
|
||||
UpdateJobStatus(ctx context.Context, agentID string, jobID string, status string, errMsg string) error
|
||||
// I-004 soft-retirement API. Both default to no-op (nil result / nil error)
|
||||
// in mocks that don't override them — handler tests opt in per suite.
|
||||
RetireAgent(ctx context.Context, agentID, actor string, force bool, reason string) (*service.AgentRetirementResult, error)
|
||||
ListRetiredAgents(ctx context.Context, page, perPage int) ([]domain.Agent, int64, error)
|
||||
}
|
||||
|
||||
// AgentHandler handles HTTP requests for agent operations.
|
||||
@@ -96,7 +108,19 @@ func (h AgentHandler) GetAgent(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
agent, err := h.svc.GetAgent(r.Context(), id)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Agent not found", requestID)
|
||||
// M-1 (P2): route through errToStatus so a repo-level
|
||||
// sql.ErrNoRows (wrapped as repository.ErrNotFound) becomes 404,
|
||||
// but a transient DB failure no longer masquerades as 404 — it
|
||||
// correctly surfaces 500. The pre-M-1 "any error → 404" shortcut
|
||||
// was plausible when Get's only expected failure was "not found",
|
||||
// but the choke point now gives us correct dispatch for free.
|
||||
// Mirrors GetAgentGroup / GetProfile.
|
||||
status := errToStatus(err)
|
||||
msg := "Failed to get agent"
|
||||
if status == http.StatusNotFound {
|
||||
msg = "Agent not found"
|
||||
}
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -135,11 +159,20 @@ func (h AgentHandler) RegisterAgent(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
created, err := h.svc.RegisterAgent(r.Context(), agent)
|
||||
if err != nil {
|
||||
errMsg := err.Error()
|
||||
if strings.Contains(errMsg, "unique") || strings.Contains(errMsg, "duplicate") || strings.Contains(errMsg, "already exists") {
|
||||
// M-1 (P2): replace the 3-term substring net
|
||||
// (`"unique"|"duplicate"|"already exists"`) with a typed
|
||||
// errors.Is(err, service.ErrConflict) arm. The service layer now
|
||||
// wraps pg SQLSTATE 23505 duplicate-name violations via
|
||||
// fmt.Errorf("%w: agent name already exists", ErrConflict), so
|
||||
// classification no longer depends on the exact driver wording.
|
||||
// Other errors redact to a generic 500 with slog.Error server-
|
||||
// side diagnostic capture (F-002). Mirrors CreateProfile's
|
||||
// ErrValidation arm pattern, adapted for the conflict case.
|
||||
if errors.Is(err, service.ErrConflict) {
|
||||
ErrorWithRequestID(w, http.StatusConflict, "Agent with this name already exists", requestID)
|
||||
return
|
||||
}
|
||||
slog.Error("RegisterAgent failed", "name", agent.Name, "error", err.Error())
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to register agent", requestID)
|
||||
return
|
||||
}
|
||||
@@ -190,7 +223,24 @@ func (h AgentHandler) Heartbeat(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
if err := h.svc.Heartbeat(r.Context(), agentID, metadata); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
// I-004: a retired agent still polling must receive 410 Gone so
|
||||
// cmd/agent detects the terminal signal and shuts down cleanly
|
||||
// instead of looping forever against a decommissioned identity.
|
||||
// Check this FIRST — before "not found" string matching — so the
|
||||
// retired-path is never masked by a sibling error branch.
|
||||
if errors.Is(err, service.ErrAgentRetired) {
|
||||
ErrorWithRequestID(w, http.StatusGone, "Agent has been retired", requestID)
|
||||
return
|
||||
}
|
||||
// M-1 (P2): the pre-M-1 `strings.Contains(err.Error(), "not
|
||||
// found")` branch now rides the errToStatus choke point, which
|
||||
// recognizes repository.ErrNotFound via errors.Is. The retired-
|
||||
// agent sentinel is still checked FIRST above so the 410 Gone
|
||||
// short-circuit is never masked by the 404 arm. Any other error
|
||||
// redacts to a generic 500 with slog.Error server-side diagnostic
|
||||
// capture (F-002). Mirrors GetAgentGroup.
|
||||
status := errToStatus(err)
|
||||
if status == http.StatusNotFound {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Agent not found", requestID)
|
||||
return
|
||||
}
|
||||
@@ -287,7 +337,16 @@ func (h AgentHandler) AgentCertificatePickup(w http.ResponseWriter, r *http.Requ
|
||||
|
||||
certPEM, err := h.svc.CertificatePickup(r.Context(), agentID, certID)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found or not ready", requestID)
|
||||
// M-1 (P2): route through errToStatus so a repo-level
|
||||
// sql.ErrNoRows (wrapped as repository.ErrNotFound) becomes 404,
|
||||
// but a transient DB failure no longer masquerades as 404 — it
|
||||
// correctly surfaces 500. Mirrors GetAgent.
|
||||
status := errToStatus(err)
|
||||
msg := "Failed to retrieve certificate"
|
||||
if status == http.StatusNotFound {
|
||||
msg = "Certificate not found or not ready"
|
||||
}
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -376,3 +435,190 @@ func (h AgentHandler) AgentReportJobStatus(w http.ResponseWriter, r *http.Reques
|
||||
"status": "updated",
|
||||
})
|
||||
}
|
||||
|
||||
// RetireAgent executes the I-004 soft-retirement surface.
|
||||
// DELETE /api/v1/agents/{id}[?force=true&reason=...]
|
||||
//
|
||||
// Contract (pinned by agent_retire_handler_test.go):
|
||||
//
|
||||
// 405 any method other than DELETE
|
||||
// 200 clean retire (body: retired_at, already_retired=false, cascade=false, counts=0s)
|
||||
// 200 force-cascade retire (body: cascade=true, counts=pre-cascade snapshot)
|
||||
// 204 idempotent retire of an already-retired agent (NO body — downstream
|
||||
// clients that tee responses into dashboards break on spurious bodies)
|
||||
// 400 force=true without a non-empty reason (ErrForceReasonRequired)
|
||||
// 403 one of the four reserved sentinel IDs (ErrAgentIsSentinel)
|
||||
// 404 agent does not exist ("not found" string match, kept for compat with
|
||||
// repo error strings; sentinel checks run first so they never mask)
|
||||
// 409 blocked by preflight counts (*BlockedByDependenciesError) — body
|
||||
// carries the per-bucket counts so the operator UI can tell the
|
||||
// human which downstream dependency is holding up the retirement,
|
||||
// rather than forcing them to re-run the DELETE with ?force=true
|
||||
// and guess
|
||||
// 500 anything else
|
||||
//
|
||||
// The 409 body intentionally does NOT go through ErrorWithRequestID because
|
||||
// that helper's ErrorResponse shape has no `counts` field — we inline-marshal
|
||||
// a custom body instead. Keeping this shape stable is important: the GUI
|
||||
// pattern is "show the 409 dialog, list the N targets / M certs / K jobs
|
||||
// blocking, let the operator retire them first or tick the force checkbox."
|
||||
func (h AgentHandler) RetireAgent(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodDelete {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
|
||||
// Extract {id} from /api/v1/agents/{id}. Mirror GetAgent's pattern so
|
||||
// the path parser is identical across the agent handler surface and a
|
||||
// future refactor can extract it once without introducing drift.
|
||||
rawID := strings.TrimPrefix(r.URL.Path, "/api/v1/agents/")
|
||||
parts := strings.Split(rawID, "/")
|
||||
if len(parts) == 0 || parts[0] == "" {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Agent ID is required", requestID)
|
||||
return
|
||||
}
|
||||
id := parts[0]
|
||||
|
||||
// Parse optional force + reason. A missing `force` param is treated as
|
||||
// force=false (the default, safe path); anything strconv.ParseBool rejects
|
||||
// is also force=false so a malformed query can never silently enable the
|
||||
// cascade. The reason string is passed through verbatim — the service
|
||||
// owns the "force=true requires reason" rule.
|
||||
query := r.URL.Query()
|
||||
force := false
|
||||
if fv := query.Get("force"); fv != "" {
|
||||
if parsed, err := strconv.ParseBool(fv); err == nil {
|
||||
force = parsed
|
||||
}
|
||||
}
|
||||
reason := query.Get("reason")
|
||||
|
||||
actor := resolveActor(r.Context())
|
||||
|
||||
result, err := h.svc.RetireAgent(r.Context(), id, actor, force, reason)
|
||||
if err != nil {
|
||||
// Sentinel + typed-error checks run BEFORE string matching on "not
|
||||
// found" so a repo error that happens to contain those words can
|
||||
// never mask a structural refusal (403/400/409). Order matters.
|
||||
if errors.Is(err, service.ErrAgentIsSentinel) {
|
||||
ErrorWithRequestID(w, http.StatusForbidden, "Agent is a reserved sentinel and cannot be retired", requestID)
|
||||
return
|
||||
}
|
||||
if errors.Is(err, service.ErrForceReasonRequired) {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "force=true requires a non-empty reason", requestID)
|
||||
return
|
||||
}
|
||||
var blocked *service.BlockedByDependenciesError
|
||||
if errors.As(err, &blocked) {
|
||||
// Custom 409 body with per-bucket counts. ErrorResponse has no
|
||||
// `counts` field, so we marshal a bespoke struct instead.
|
||||
// Keep `error`/`message`/`counts` as the stable shape — any
|
||||
// dashboard parsing this relies on those three keys.
|
||||
body := struct {
|
||||
Error string `json:"error"`
|
||||
Message string `json:"message"`
|
||||
Counts domain.AgentDependencyCounts `json:"counts"`
|
||||
}{
|
||||
Error: "blocked_by_dependencies",
|
||||
Message: "Agent has active downstream dependencies. Retire or reassign them " +
|
||||
"first, or re-run with ?force=true&reason=... to cascade.",
|
||||
Counts: blocked.Counts,
|
||||
}
|
||||
JSON(w, http.StatusConflict, body)
|
||||
return
|
||||
}
|
||||
// M-1 (P2): the pre-M-1 `strings.Contains(err.Error(), "not
|
||||
// found")` branch now rides the errToStatus choke point, which
|
||||
// recognizes repository.ErrNotFound via errors.Is. The sentinel
|
||||
// (ErrAgentIsSentinel, ErrForceReasonRequired) and typed
|
||||
// (*BlockedByDependenciesError) checks above still run FIRST so
|
||||
// the 403/400/409 structural refusals are never masked by the
|
||||
// 404 arm. Any other error redacts to a generic 500 with
|
||||
// slog.Error server-side diagnostic capture (F-002).
|
||||
status := errToStatus(err)
|
||||
if status == http.StatusNotFound {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Agent not found", requestID)
|
||||
return
|
||||
}
|
||||
slog.Error("RetireAgent failed", "agent_id", id, "error", err.Error())
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to retire agent", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
// Idempotent retire: the agent was already retired, so we return 204 No
|
||||
// Content with a ZERO-length body. The Red contract (test line 106) fails
|
||||
// if even a trailing newline leaks into the response. WriteHeader alone
|
||||
// emits the status without invoking the JSON encoder.
|
||||
if result.AlreadyRetired {
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return
|
||||
}
|
||||
|
||||
// Clean retire (force=false) or successful cascade (force=true). Body
|
||||
// shape pinned by Red contract: retired_at, already_retired, cascade,
|
||||
// counts. Omitempty is deliberately NOT used — operators parsing the
|
||||
// response expect every field to always be present.
|
||||
JSON(w, http.StatusOK, struct {
|
||||
RetiredAt time.Time `json:"retired_at"`
|
||||
AlreadyRetired bool `json:"already_retired"`
|
||||
Cascade bool `json:"cascade"`
|
||||
Counts domain.AgentDependencyCounts `json:"counts"`
|
||||
}{
|
||||
RetiredAt: result.RetiredAt,
|
||||
AlreadyRetired: result.AlreadyRetired,
|
||||
Cascade: result.Cascade,
|
||||
Counts: result.Counts,
|
||||
})
|
||||
}
|
||||
|
||||
// ListRetiredAgents returns the opt-in listing of retired agents for the
|
||||
// operator UI's "Retired" tab and for audit/forensics workflows.
|
||||
// GET /api/v1/agents/retired?page=1&per_page=50
|
||||
//
|
||||
// The default ListAgents handler hides retired rows; this is the dedicated
|
||||
// surface for reading them back. Pagination defaults match ListAgents so
|
||||
// the GUI can reuse the same query hook (page=1, per_page=50, cap 500).
|
||||
//
|
||||
// Go 1.22's enhanced ServeMux routes `/agents/retired` to this handler via
|
||||
// the literal-beats-pattern-var precedence rule (literal `retired` wins over
|
||||
// `{id}` in the sibling GET /api/v1/agents/{id} route), so both entries can
|
||||
// coexist without conflict. If that precedence ever regresses, the failure
|
||||
// mode is TestListRetiredAgentsHandler_Success blowing up with a 404 — which
|
||||
// is the fast signal we want.
|
||||
func (h AgentHandler) ListRetiredAgents(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
|
||||
page := 1
|
||||
perPage := 50
|
||||
query := r.URL.Query()
|
||||
if p := query.Get("page"); p != "" {
|
||||
if parsed, err := strconv.Atoi(p); err == nil && parsed > 0 {
|
||||
page = parsed
|
||||
}
|
||||
}
|
||||
if pp := query.Get("per_page"); pp != "" {
|
||||
if parsed, err := strconv.Atoi(pp); err == nil && parsed > 0 && parsed <= 500 {
|
||||
perPage = parsed
|
||||
}
|
||||
}
|
||||
|
||||
agents, total, err := h.svc.ListRetiredAgents(r.Context(), page, perPage)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to list retired agents", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, PagedResponse{
|
||||
Data: agents,
|
||||
Total: total,
|
||||
Page: page,
|
||||
PerPage: perPage,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -11,8 +13,8 @@ import (
|
||||
|
||||
// AuditService defines the service interface for audit event operations.
|
||||
type AuditService interface {
|
||||
ListAuditEvents(page, perPage int) ([]domain.AuditEvent, int64, error)
|
||||
GetAuditEvent(id string) (*domain.AuditEvent, error)
|
||||
ListAuditEvents(ctx context.Context, page, perPage int) ([]domain.AuditEvent, int64, error)
|
||||
GetAuditEvent(ctx context.Context, id string) (*domain.AuditEvent, error)
|
||||
}
|
||||
|
||||
// AuditHandler handles HTTP requests for audit event operations.
|
||||
@@ -49,7 +51,7 @@ func (h AuditHandler) ListAuditEvents(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
events, total, err := h.svc.ListAuditEvents(page, perPage)
|
||||
events, total, err := h.svc.ListAuditEvents(r.Context(), page, perPage)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to list audit events", requestID)
|
||||
return
|
||||
@@ -83,9 +85,26 @@ func (h AuditHandler) GetAuditEvent(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
id = parts[0]
|
||||
|
||||
event, err := h.svc.GetAuditEvent(id)
|
||||
event, err := h.svc.GetAuditEvent(r.Context(), id)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Audit event not found", requestID)
|
||||
// M-1 (P2): dispatch routes through errToStatus. Pre-M-1 this branch was
|
||||
// a blanket `any error → 404 Audit event not found` shortcut — which
|
||||
// silently demoted transient DB failures from the service's auditRepo.List
|
||||
// wrap to 404 Not Found with no observable external signal. Post-M-1:
|
||||
// service/audit.go GetAuditEvent only wraps the genuine zero-events path
|
||||
// with service.ErrNotFound via %w, and the repo.List wrap surfaces without
|
||||
// that sentinel — so errors.Is(err, service.ErrNotFound) picks up the real
|
||||
// 404s and everything else correctly surfaces as 500 with server-side
|
||||
// slog.Error capture (F-002 redacted-500 pattern preserved).
|
||||
status := errToStatus(err)
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("GetAuditEvent failed", "audit_event_id", id, "error", err.Error())
|
||||
}
|
||||
msg := "Failed to get audit event"
|
||||
if status == http.StatusNotFound {
|
||||
msg = "Audit event not found"
|
||||
}
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,419 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
)
|
||||
|
||||
// mockAuditService implements AuditService for testing.
|
||||
type mockAuditService struct {
|
||||
listFunc func(page, perPage int) ([]domain.AuditEvent, int64, error)
|
||||
getFunc func(id string) (*domain.AuditEvent, error)
|
||||
}
|
||||
|
||||
func (m *mockAuditService) ListAuditEvents(_ context.Context, page, perPage int) ([]domain.AuditEvent, int64, error) {
|
||||
if m.listFunc != nil {
|
||||
return m.listFunc(page, perPage)
|
||||
}
|
||||
return nil, 0, nil
|
||||
}
|
||||
|
||||
func (m *mockAuditService) GetAuditEvent(_ context.Context, id string) (*domain.AuditEvent, error) {
|
||||
if m.getFunc != nil {
|
||||
return m.getFunc(id)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func TestListAuditEvents_Success(t *testing.T) {
|
||||
events := []domain.AuditEvent{
|
||||
{
|
||||
ID: "ev-1",
|
||||
Action: "certificate_issued",
|
||||
Actor: "user@example.com",
|
||||
ActorType: domain.ActorTypeUser,
|
||||
ResourceID: "mc-api-prod",
|
||||
ResourceType: "Certificate",
|
||||
Timestamp: time.Now(),
|
||||
},
|
||||
{
|
||||
ID: "ev-2",
|
||||
Action: "certificate_renewed",
|
||||
Actor: "user@example.com",
|
||||
ActorType: domain.ActorTypeUser,
|
||||
ResourceID: "mc-api-prod",
|
||||
ResourceType: "Certificate",
|
||||
Timestamp: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
mockSvc := &mockAuditService{
|
||||
listFunc: func(page, perPage int) ([]domain.AuditEvent, int64, error) {
|
||||
if page != 1 || perPage != 50 {
|
||||
t.Errorf("ListAuditEvents called with page=%d, perPage=%d, expected 1, 50", page, perPage)
|
||||
}
|
||||
return events, 2, nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, "/api/v1/audit", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
// Add request ID to context
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListAuditEvents(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusOK {
|
||||
t.Errorf("ListAuditEvents returned status %d, want %d", status, http.StatusOK)
|
||||
}
|
||||
|
||||
var result PagedResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if result.Total != 2 {
|
||||
t.Errorf("Total = %d, want 2", result.Total)
|
||||
}
|
||||
|
||||
if result.Page != 1 {
|
||||
t.Errorf("Page = %d, want 1", result.Page)
|
||||
}
|
||||
|
||||
if result.PerPage != 50 {
|
||||
t.Errorf("PerPage = %d, want 50", result.PerPage)
|
||||
}
|
||||
|
||||
// Check data is present
|
||||
if result.Data == nil {
|
||||
t.Error("Data is nil, want events slice")
|
||||
}
|
||||
}
|
||||
|
||||
func TestListAuditEvents_WithPagination(t *testing.T) {
|
||||
events := []domain.AuditEvent{
|
||||
{
|
||||
ID: "ev-5",
|
||||
Action: "certificate_issued",
|
||||
Actor: "user@example.com",
|
||||
ActorType: domain.ActorTypeUser,
|
||||
ResourceID: "mc-api-prod",
|
||||
ResourceType: "Certificate",
|
||||
Timestamp: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
mockSvc := &mockAuditService{
|
||||
listFunc: func(page, perPage int) ([]domain.AuditEvent, int64, error) {
|
||||
if page != 2 || perPage != 25 {
|
||||
t.Errorf("ListAuditEvents called with page=%d, perPage=%d, expected 2, 25", page, perPage)
|
||||
}
|
||||
return events, 100, nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, "/api/v1/audit?page=2&per_page=25", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListAuditEvents(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusOK {
|
||||
t.Errorf("ListAuditEvents returned status %d, want %d", status, http.StatusOK)
|
||||
}
|
||||
|
||||
var result PagedResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if result.Page != 2 {
|
||||
t.Errorf("Page = %d, want 2", result.Page)
|
||||
}
|
||||
|
||||
if result.PerPage != 25 {
|
||||
t.Errorf("PerPage = %d, want 25", result.PerPage)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListAuditEvents_PerPageMaxLimit(t *testing.T) {
|
||||
mockSvc := &mockAuditService{
|
||||
listFunc: func(page, perPage int) ([]domain.AuditEvent, int64, error) {
|
||||
// Should be capped at 500
|
||||
if perPage > 500 {
|
||||
t.Errorf("perPage = %d, expected <= 500", perPage)
|
||||
}
|
||||
return []domain.AuditEvent{}, 0, nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, "/api/v1/audit?per_page=1000", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListAuditEvents(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusOK {
|
||||
t.Errorf("ListAuditEvents returned status %d, want %d", status, http.StatusOK)
|
||||
}
|
||||
|
||||
var result PagedResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if result.PerPage > 500 {
|
||||
t.Errorf("PerPage = %d, want <= 500", result.PerPage)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListAuditEvents_EmptyResult(t *testing.T) {
|
||||
mockSvc := &mockAuditService{
|
||||
listFunc: func(page, perPage int) ([]domain.AuditEvent, int64, error) {
|
||||
return []domain.AuditEvent{}, 0, nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, "/api/v1/audit", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListAuditEvents(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusOK {
|
||||
t.Errorf("ListAuditEvents returned status %d, want %d", status, http.StatusOK)
|
||||
}
|
||||
|
||||
var result PagedResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if result.Total != 0 {
|
||||
t.Errorf("Total = %d, want 0", result.Total)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListAuditEvents_ServiceError(t *testing.T) {
|
||||
mockSvc := &mockAuditService{
|
||||
listFunc: func(page, perPage int) ([]domain.AuditEvent, int64, error) {
|
||||
return nil, 0, errors.New("database error")
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, "/api/v1/audit", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListAuditEvents(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusInternalServerError {
|
||||
t.Errorf("ListAuditEvents returned status %d, want %d", status, http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
var errResp ErrorResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&errResp); err != nil {
|
||||
t.Fatalf("failed to decode error response: %v", err)
|
||||
}
|
||||
|
||||
if errResp.Message != "Failed to list audit events" {
|
||||
t.Errorf("Message = %q, want 'Failed to list audit events'", errResp.Message)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListAuditEvents_MethodNotAllowed(t *testing.T) {
|
||||
mockSvc := &mockAuditService{}
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodPost, "/api/v1/audit", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.ListAuditEvents(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusMethodNotAllowed {
|
||||
t.Errorf("ListAuditEvents returned status %d, want %d", status, http.StatusMethodNotAllowed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAuditEvent_Success(t *testing.T) {
|
||||
event := &domain.AuditEvent{
|
||||
ID: "ev-123",
|
||||
Action: "certificate_issued",
|
||||
Actor: "user@example.com",
|
||||
ActorType: domain.ActorTypeUser,
|
||||
ResourceID: "mc-api-prod",
|
||||
ResourceType: "Certificate",
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
mockSvc := &mockAuditService{
|
||||
getFunc: func(id string) (*domain.AuditEvent, error) {
|
||||
if id != "ev-123" {
|
||||
t.Errorf("GetAuditEvent called with id=%q, expected ev-123", id)
|
||||
}
|
||||
return event, nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, "/api/v1/audit/ev-123", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.GetAuditEvent(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusOK {
|
||||
t.Errorf("GetAuditEvent returned status %d, want %d", status, http.StatusOK)
|
||||
}
|
||||
|
||||
var result domain.AuditEvent
|
||||
if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if result.ID != "ev-123" {
|
||||
t.Errorf("ID = %q, want ev-123", result.ID)
|
||||
}
|
||||
|
||||
if result.Action != "certificate_issued" {
|
||||
t.Errorf("Action = %q, want certificate_issued", result.Action)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAuditEvent_NotFound(t *testing.T) {
|
||||
mockSvc := &mockAuditService{
|
||||
getFunc: func(id string) (*domain.AuditEvent, error) {
|
||||
return nil, errors.New("not found")
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, "/api/v1/audit/nonexistent", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.GetAuditEvent(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusNotFound {
|
||||
t.Errorf("GetAuditEvent returned status %d, want %d", status, http.StatusNotFound)
|
||||
}
|
||||
|
||||
var errResp ErrorResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&errResp); err != nil {
|
||||
t.Fatalf("failed to decode error response: %v", err)
|
||||
}
|
||||
|
||||
if errResp.Message != "Audit event not found" {
|
||||
t.Errorf("Message = %q, want 'Audit event not found'", errResp.Message)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAuditEvent_MethodNotAllowed(t *testing.T) {
|
||||
mockSvc := &mockAuditService{}
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodDelete, "/api/v1/audit/ev-123", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.GetAuditEvent(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusMethodNotAllowed {
|
||||
t.Errorf("GetAuditEvent returned status %d, want %d", status, http.StatusMethodNotAllowed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAuditEvent_EmptyID(t *testing.T) {
|
||||
mockSvc := &mockAuditService{}
|
||||
handler := NewAuditHandler(mockSvc)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, "/api/v1/audit/", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
ctx := context.WithValue(req.Context(), middleware.RequestIDKey{}, "test-req-id")
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.GetAuditEvent(w, req)
|
||||
|
||||
if status := w.Code; status != http.StatusBadRequest {
|
||||
t.Errorf("GetAuditEvent returned status %d, want %d", status, http.StatusBadRequest)
|
||||
}
|
||||
|
||||
var errResp ErrorResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&errResp); err != nil {
|
||||
t.Fatalf("failed to decode error response: %v", err)
|
||||
}
|
||||
|
||||
if errResp.Message != "Audit event ID is required" {
|
||||
t.Errorf("Message = %q, want 'Audit event ID is required'", errResp.Message)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// BulkRevocationService defines the service interface for bulk certificate revocation.
|
||||
type BulkRevocationService interface {
|
||||
BulkRevoke(ctx context.Context, criteria domain.BulkRevocationCriteria, reason string, actor string) (*domain.BulkRevocationResult, error)
|
||||
}
|
||||
|
||||
// BulkRevocationHandler handles HTTP requests for bulk revocation operations.
|
||||
type BulkRevocationHandler struct {
|
||||
svc BulkRevocationService
|
||||
}
|
||||
|
||||
// NewBulkRevocationHandler creates a new BulkRevocationHandler.
|
||||
func NewBulkRevocationHandler(svc BulkRevocationService) BulkRevocationHandler {
|
||||
return BulkRevocationHandler{svc: svc}
|
||||
}
|
||||
|
||||
// bulkRevokeRequest represents the JSON request body for bulk revocation.
|
||||
type bulkRevokeRequest struct {
|
||||
Reason string `json:"reason"`
|
||||
ProfileID string `json:"profile_id,omitempty"`
|
||||
OwnerID string `json:"owner_id,omitempty"`
|
||||
AgentID string `json:"agent_id,omitempty"`
|
||||
IssuerID string `json:"issuer_id,omitempty"`
|
||||
TeamID string `json:"team_id,omitempty"`
|
||||
CertificateIDs []string `json:"certificate_ids,omitempty"`
|
||||
}
|
||||
|
||||
// BulkRevoke handles bulk certificate revocation.
|
||||
// POST /api/v1/certificates/bulk-revoke
|
||||
//
|
||||
// M-003: admin-only. Bulk revocation is a fleet-scale destructive operation —
|
||||
// a non-admin caller must not be able to invalidate certificates across
|
||||
// profiles/owners/agents. The gate is enforced here (before body parsing) so a
|
||||
// non-admin never sees its request criteria evaluated.
|
||||
func (h BulkRevocationHandler) BulkRevoke(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
|
||||
// M-003: admin-only gate. Non-admin callers are rejected before any
|
||||
// criteria/body processing to avoid leaking validation behavior to
|
||||
// unauthorized actors.
|
||||
if !middleware.IsAdmin(r.Context()) {
|
||||
ErrorWithRequestID(w, http.StatusForbidden,
|
||||
"Bulk revocation requires admin privileges",
|
||||
requestID)
|
||||
return
|
||||
}
|
||||
|
||||
var req bulkRevokeRequest
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Invalid request body", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
// Validate reason is present
|
||||
if req.Reason == "" {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Revocation reason is required", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
// Validate reason is a valid RFC 5280 code
|
||||
if !domain.IsValidRevocationReason(req.Reason) {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Invalid revocation reason: "+req.Reason, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
criteria := domain.BulkRevocationCriteria{
|
||||
ProfileID: req.ProfileID,
|
||||
OwnerID: req.OwnerID,
|
||||
AgentID: req.AgentID,
|
||||
IssuerID: req.IssuerID,
|
||||
TeamID: req.TeamID,
|
||||
CertificateIDs: req.CertificateIDs,
|
||||
}
|
||||
|
||||
// Safety guard: at least one criterion required
|
||||
if criteria.IsEmpty() {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "At least one filter criterion is required (profile_id, owner_id, agent_id, issuer_id, team_id, or certificate_ids)", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
// Extract actor from auth context (M-002: named-key identity → audit trail)
|
||||
actor := resolveActor(r.Context())
|
||||
|
||||
result, err := h.svc.BulkRevoke(r.Context(), criteria, req.Reason, actor)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Bulk revocation failed: "+err.Error(), requestID)
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, result)
|
||||
}
|
||||
@@ -0,0 +1,289 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// mockBulkRevocationService is a test implementation of BulkRevocationService
|
||||
type mockBulkRevocationService struct {
|
||||
BulkRevokeFn func(ctx context.Context, criteria domain.BulkRevocationCriteria, reason string, actor string) (*domain.BulkRevocationResult, error)
|
||||
}
|
||||
|
||||
func (m *mockBulkRevocationService) BulkRevoke(ctx context.Context, criteria domain.BulkRevocationCriteria, reason string, actor string) (*domain.BulkRevocationResult, error) {
|
||||
if m.BulkRevokeFn != nil {
|
||||
return m.BulkRevokeFn(ctx, criteria, reason, actor)
|
||||
}
|
||||
return &domain.BulkRevocationResult{}, nil
|
||||
}
|
||||
|
||||
// adminContext returns a context carrying the admin flag, mimicking what the
|
||||
// auth middleware sets for named-key callers whose entry is admin-tagged.
|
||||
// M-003: bulk revocation handler requires admin context to reach the service.
|
||||
func adminContext() context.Context {
|
||||
ctx := context.WithValue(context.Background(), middleware.RequestIDKey{}, "test-request-id-bulk")
|
||||
ctx = context.WithValue(ctx, middleware.AdminKey{}, true)
|
||||
return ctx
|
||||
}
|
||||
|
||||
func TestBulkRevoke_Success_WithIDs(t *testing.T) {
|
||||
svc := &mockBulkRevocationService{
|
||||
BulkRevokeFn: func(ctx context.Context, criteria domain.BulkRevocationCriteria, reason string, actor string) (*domain.BulkRevocationResult, error) {
|
||||
if len(criteria.CertificateIDs) != 2 {
|
||||
t.Errorf("expected 2 IDs, got %d", len(criteria.CertificateIDs))
|
||||
}
|
||||
if reason != "keyCompromise" {
|
||||
t.Errorf("expected reason keyCompromise, got %s", reason)
|
||||
}
|
||||
return &domain.BulkRevocationResult{
|
||||
TotalMatched: 2,
|
||||
TotalRevoked: 2,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
h := NewBulkRevocationHandler(svc)
|
||||
|
||||
body := `{"reason":"keyCompromise","certificate_ids":["mc-1","mc-2"]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/bulk-revoke", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(adminContext())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
var result domain.BulkRevocationResult
|
||||
if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
if result.TotalMatched != 2 {
|
||||
t.Errorf("expected TotalMatched=2, got %d", result.TotalMatched)
|
||||
}
|
||||
if result.TotalRevoked != 2 {
|
||||
t.Errorf("expected TotalRevoked=2, got %d", result.TotalRevoked)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBulkRevoke_Success_WithProfile(t *testing.T) {
|
||||
svc := &mockBulkRevocationService{
|
||||
BulkRevokeFn: func(ctx context.Context, criteria domain.BulkRevocationCriteria, reason string, actor string) (*domain.BulkRevocationResult, error) {
|
||||
if criteria.ProfileID != "prof-tls" {
|
||||
t.Errorf("expected profile prof-tls, got %s", criteria.ProfileID)
|
||||
}
|
||||
return &domain.BulkRevocationResult{
|
||||
TotalMatched: 5,
|
||||
TotalRevoked: 4,
|
||||
TotalSkipped: 1,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
h := NewBulkRevocationHandler(svc)
|
||||
|
||||
body := `{"reason":"keyCompromise","profile_id":"prof-tls"}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/bulk-revoke", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(adminContext())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBulkRevoke_MissingReason_400(t *testing.T) {
|
||||
h := NewBulkRevocationHandler(&mockBulkRevocationService{})
|
||||
|
||||
body := `{"certificate_ids":["mc-1"]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/bulk-revoke", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(adminContext())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBulkRevoke_EmptyCriteria_400(t *testing.T) {
|
||||
h := NewBulkRevocationHandler(&mockBulkRevocationService{})
|
||||
|
||||
body := `{"reason":"keyCompromise"}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/bulk-revoke", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(adminContext())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBulkRevoke_InvalidReason_400(t *testing.T) {
|
||||
h := NewBulkRevocationHandler(&mockBulkRevocationService{})
|
||||
|
||||
body := `{"reason":"totallyBogus","certificate_ids":["mc-1"]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/bulk-revoke", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(adminContext())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBulkRevoke_MethodNotAllowed_405(t *testing.T) {
|
||||
h := NewBulkRevocationHandler(&mockBulkRevocationService{})
|
||||
|
||||
// Method check fires before the admin gate, so 405 must hold even for a
|
||||
// non-admin caller — asserting this keeps the ordering explicit.
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/certificates/bulk-revoke", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusMethodNotAllowed {
|
||||
t.Errorf("expected 405, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBulkRevoke_ServiceError_500(t *testing.T) {
|
||||
svc := &mockBulkRevocationService{
|
||||
BulkRevokeFn: func(ctx context.Context, criteria domain.BulkRevocationCriteria, reason string, actor string) (*domain.BulkRevocationResult, error) {
|
||||
return nil, fmt.Errorf("database connection failed")
|
||||
},
|
||||
}
|
||||
h := NewBulkRevocationHandler(svc)
|
||||
|
||||
body := `{"reason":"keyCompromise","certificate_ids":["mc-1"]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/bulk-revoke", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(adminContext())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("expected 500, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// --- M-003: admin-only gate on bulk revocation ---
|
||||
|
||||
// TestBulkRevoke_NonAdmin_Returns403 is the central authorization regression
|
||||
// for M-003. A caller without an admin-tagged context must be rejected with
|
||||
// HTTP 403, regardless of how well-formed its body is, and the service layer
|
||||
// must never see the request.
|
||||
func TestBulkRevoke_NonAdmin_Returns403(t *testing.T) {
|
||||
var serviceCalled bool
|
||||
svc := &mockBulkRevocationService{
|
||||
BulkRevokeFn: func(ctx context.Context, criteria domain.BulkRevocationCriteria, reason string, actor string) (*domain.BulkRevocationResult, error) {
|
||||
serviceCalled = true
|
||||
return &domain.BulkRevocationResult{}, nil
|
||||
},
|
||||
}
|
||||
h := NewBulkRevocationHandler(svc)
|
||||
|
||||
// Well-formed body + well-formed reason + filter — the only thing
|
||||
// missing is an admin-tagged context. The gate must still fire.
|
||||
body := `{"reason":"keyCompromise","certificate_ids":["mc-1","mc-2"]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/bulk-revoke", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req = req.WithContext(contextWithRequestID()) // request id only, no admin flag
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusForbidden {
|
||||
t.Fatalf("expected status 403, got %d (body=%q)", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp map[string]any
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
msg, _ := resp["message"].(string)
|
||||
if !strings.Contains(strings.ToLower(msg), "admin") {
|
||||
t.Errorf("expected message to mention admin requirement, got %q", msg)
|
||||
}
|
||||
if serviceCalled {
|
||||
t.Errorf("service was invoked despite non-admin caller — gate failed open")
|
||||
}
|
||||
}
|
||||
|
||||
// TestBulkRevoke_AdminExplicitFalse_Returns403 pins the specific case where the
|
||||
// AdminKey exists but is set to false — e.g., a non-admin named-key caller.
|
||||
// Without this we could regress to "key missing == deny, key present == allow"
|
||||
// which would silently grant a false flag.
|
||||
func TestBulkRevoke_AdminExplicitFalse_Returns403(t *testing.T) {
|
||||
h := NewBulkRevocationHandler(&mockBulkRevocationService{})
|
||||
|
||||
body := `{"reason":"keyCompromise","certificate_ids":["mc-1"]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/bulk-revoke", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
ctx := context.WithValue(context.Background(), middleware.RequestIDKey{}, "test-request-id")
|
||||
ctx = context.WithValue(ctx, middleware.AdminKey{}, false)
|
||||
req = req.WithContext(ctx)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusForbidden {
|
||||
t.Fatalf("expected status 403 for admin=false, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBulkRevoke_AdminPermitted_ForwardsActor confirms the happy path:
|
||||
// an admin-tagged context reaches the service and the actor (from the auth
|
||||
// UserKey) is propagated through to BulkRevoke. This keeps the admin gate and
|
||||
// the M-002 actor-propagation wired together in a single regression.
|
||||
func TestBulkRevoke_AdminPermitted_ForwardsActor(t *testing.T) {
|
||||
var capturedActor string
|
||||
svc := &mockBulkRevocationService{
|
||||
BulkRevokeFn: func(ctx context.Context, criteria domain.BulkRevocationCriteria, reason string, actor string) (*domain.BulkRevocationResult, error) {
|
||||
capturedActor = actor
|
||||
return &domain.BulkRevocationResult{TotalMatched: 1, TotalRevoked: 1}, nil
|
||||
},
|
||||
}
|
||||
h := NewBulkRevocationHandler(svc)
|
||||
|
||||
body := `{"reason":"keyCompromise","certificate_ids":["mc-1"]}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/bulk-revoke", bytes.NewBufferString(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
ctx := context.WithValue(context.Background(), middleware.RequestIDKey{}, "test-request-id")
|
||||
ctx = context.WithValue(ctx, middleware.AdminKey{}, true)
|
||||
ctx = context.WithValue(ctx, middleware.UserKey{}, "ops-admin")
|
||||
req = req.WithContext(ctx)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.BulkRevoke(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status 200 for admin caller, got %d (body=%q)", w.Code, w.Body.String())
|
||||
}
|
||||
if capturedActor != "ops-admin" {
|
||||
t.Errorf("expected actor ops-admin, got %q", capturedActor)
|
||||
}
|
||||
}
|
||||
@@ -17,116 +17,116 @@ import (
|
||||
|
||||
// MockCertificateService is a mock implementation of CertificateService interface.
|
||||
type MockCertificateService struct {
|
||||
ListCertificatesFn func(status, environment, ownerID, teamID, issuerID string, page, perPage int) ([]domain.ManagedCertificate, int64, error)
|
||||
ListCertificatesWithFilterFn func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error)
|
||||
GetCertificateFn func(id string) (*domain.ManagedCertificate, error)
|
||||
CreateCertificateFn func(cert domain.ManagedCertificate) (*domain.ManagedCertificate, error)
|
||||
UpdateCertificateFn func(id string, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error)
|
||||
ArchiveCertificateFn func(id string) error
|
||||
GetCertificateVersionsFn func(certID string, page, perPage int) ([]domain.CertificateVersion, int64, error)
|
||||
TriggerRenewalFn func(certID string) error
|
||||
TriggerDeploymentFn func(certID string, targetID string) error
|
||||
RevokeCertificateFn func(certID string, reason string) error
|
||||
GetRevokedCertificatesFn func() ([]*domain.CertificateRevocation, error)
|
||||
GenerateDERCRLFn func(issuerID string) ([]byte, error)
|
||||
GetOCSPResponseFn func(issuerID string, serialHex string) ([]byte, error)
|
||||
GetCertificateDeploymentsFn func(certID string) ([]domain.DeploymentTarget, error)
|
||||
ListCertificatesFn func(ctx context.Context, status, environment, ownerID, teamID, issuerID string, page, perPage int) ([]domain.ManagedCertificate, int64, error)
|
||||
ListCertificatesWithFilterFn func(ctx context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error)
|
||||
GetCertificateFn func(ctx context.Context, id string) (*domain.ManagedCertificate, error)
|
||||
CreateCertificateFn func(ctx context.Context, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error)
|
||||
UpdateCertificateFn func(ctx context.Context, id string, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error)
|
||||
ArchiveCertificateFn func(ctx context.Context, id string) error
|
||||
GetCertificateVersionsFn func(ctx context.Context, certID string, page, perPage int) ([]domain.CertificateVersion, int64, error)
|
||||
TriggerRenewalFn func(ctx context.Context, certID string, actor string) error
|
||||
TriggerDeploymentFn func(ctx context.Context, certID string, targetID string, actor string) error
|
||||
RevokeCertificateFn func(ctx context.Context, certID string, reason string, actor string) error
|
||||
GetRevokedCertificatesFn func(ctx context.Context) ([]*domain.CertificateRevocation, error)
|
||||
GenerateDERCRLFn func(ctx context.Context, issuerID string) ([]byte, error)
|
||||
GetOCSPResponseFn func(ctx context.Context, issuerID string, serialHex string) ([]byte, error)
|
||||
GetCertificateDeploymentsFn func(ctx context.Context, certID string) ([]domain.DeploymentTarget, error)
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) ListCertificates(status, environment, ownerID, teamID, issuerID string, page, perPage int) ([]domain.ManagedCertificate, int64, error) {
|
||||
func (m *MockCertificateService) ListCertificates(ctx context.Context, status, environment, ownerID, teamID, issuerID string, page, perPage int) ([]domain.ManagedCertificate, int64, error) {
|
||||
if m.ListCertificatesFn != nil {
|
||||
return m.ListCertificatesFn(status, environment, ownerID, teamID, issuerID, page, perPage)
|
||||
return m.ListCertificatesFn(ctx, status, environment, ownerID, teamID, issuerID, page, perPage)
|
||||
}
|
||||
return nil, 0, nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) GetCertificate(id string) (*domain.ManagedCertificate, error) {
|
||||
func (m *MockCertificateService) GetCertificate(ctx context.Context, id string) (*domain.ManagedCertificate, error) {
|
||||
if m.GetCertificateFn != nil {
|
||||
return m.GetCertificateFn(id)
|
||||
return m.GetCertificateFn(ctx, id)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) CreateCertificate(cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
func (m *MockCertificateService) CreateCertificate(ctx context.Context, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
if m.CreateCertificateFn != nil {
|
||||
return m.CreateCertificateFn(cert)
|
||||
return m.CreateCertificateFn(ctx, cert)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) UpdateCertificate(id string, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
func (m *MockCertificateService) UpdateCertificate(ctx context.Context, id string, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
if m.UpdateCertificateFn != nil {
|
||||
return m.UpdateCertificateFn(id, cert)
|
||||
return m.UpdateCertificateFn(ctx, id, cert)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) ArchiveCertificate(id string) error {
|
||||
func (m *MockCertificateService) ArchiveCertificate(ctx context.Context, id string) error {
|
||||
if m.ArchiveCertificateFn != nil {
|
||||
return m.ArchiveCertificateFn(id)
|
||||
return m.ArchiveCertificateFn(ctx, id)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) GetCertificateVersions(certID string, page, perPage int) ([]domain.CertificateVersion, int64, error) {
|
||||
func (m *MockCertificateService) GetCertificateVersions(ctx context.Context, certID string, page, perPage int) ([]domain.CertificateVersion, int64, error) {
|
||||
if m.GetCertificateVersionsFn != nil {
|
||||
return m.GetCertificateVersionsFn(certID, page, perPage)
|
||||
return m.GetCertificateVersionsFn(ctx, certID, page, perPage)
|
||||
}
|
||||
return nil, 0, nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) TriggerRenewal(certID string) error {
|
||||
func (m *MockCertificateService) TriggerRenewal(ctx context.Context, certID string, actor string) error {
|
||||
if m.TriggerRenewalFn != nil {
|
||||
return m.TriggerRenewalFn(certID)
|
||||
return m.TriggerRenewalFn(ctx, certID, actor)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) TriggerDeployment(certID string, targetID string) error {
|
||||
func (m *MockCertificateService) TriggerDeployment(ctx context.Context, certID string, targetID string, actor string) error {
|
||||
if m.TriggerDeploymentFn != nil {
|
||||
return m.TriggerDeploymentFn(certID, targetID)
|
||||
return m.TriggerDeploymentFn(ctx, certID, targetID, actor)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) RevokeCertificate(certID string, reason string) error {
|
||||
func (m *MockCertificateService) RevokeCertificate(ctx context.Context, certID string, reason string, actor string) error {
|
||||
if m.RevokeCertificateFn != nil {
|
||||
return m.RevokeCertificateFn(certID, reason)
|
||||
return m.RevokeCertificateFn(ctx, certID, reason, actor)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) GetRevokedCertificates() ([]*domain.CertificateRevocation, error) {
|
||||
func (m *MockCertificateService) GetRevokedCertificates(ctx context.Context) ([]*domain.CertificateRevocation, error) {
|
||||
if m.GetRevokedCertificatesFn != nil {
|
||||
return m.GetRevokedCertificatesFn()
|
||||
return m.GetRevokedCertificatesFn(ctx)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) GenerateDERCRL(issuerID string) ([]byte, error) {
|
||||
func (m *MockCertificateService) GenerateDERCRL(ctx context.Context, issuerID string) ([]byte, error) {
|
||||
if m.GenerateDERCRLFn != nil {
|
||||
return m.GenerateDERCRLFn(issuerID)
|
||||
return m.GenerateDERCRLFn(ctx, issuerID)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) GetOCSPResponse(issuerID string, serialHex string) ([]byte, error) {
|
||||
func (m *MockCertificateService) GetOCSPResponse(ctx context.Context, issuerID string, serialHex string) ([]byte, error) {
|
||||
if m.GetOCSPResponseFn != nil {
|
||||
return m.GetOCSPResponseFn(issuerID, serialHex)
|
||||
return m.GetOCSPResponseFn(ctx, issuerID, serialHex)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) ListCertificatesWithFilter(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
func (m *MockCertificateService) ListCertificatesWithFilter(ctx context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if m.ListCertificatesWithFilterFn != nil {
|
||||
return m.ListCertificatesWithFilterFn(filter)
|
||||
return m.ListCertificatesWithFilterFn(ctx, filter)
|
||||
}
|
||||
return nil, 0, nil
|
||||
}
|
||||
|
||||
func (m *MockCertificateService) GetCertificateDeployments(certID string) ([]domain.DeploymentTarget, error) {
|
||||
func (m *MockCertificateService) GetCertificateDeployments(ctx context.Context, certID string) ([]domain.DeploymentTarget, error) {
|
||||
if m.GetCertificateDeploymentsFn != nil {
|
||||
return m.GetCertificateDeploymentsFn(certID)
|
||||
return m.GetCertificateDeploymentsFn(ctx, certID)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
@@ -158,7 +158,7 @@ func TestListCertificates_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if filter.Page == 1 && filter.PerPage == 50 {
|
||||
return []domain.ManagedCertificate{cert1, cert2}, 2, nil
|
||||
}
|
||||
@@ -197,7 +197,7 @@ func TestListCertificates_Success(t *testing.T) {
|
||||
// Test ListCertificates - with filters
|
||||
func TestListCertificates_WithFilters(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if filter.Status == "Active" && filter.Environment == "prod" {
|
||||
return []domain.ManagedCertificate{}, 0, nil
|
||||
}
|
||||
@@ -236,7 +236,7 @@ func TestListCertificates_MethodNotAllowed(t *testing.T) {
|
||||
// Test ListCertificates - service error
|
||||
func TestListCertificates_ServiceError(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
return nil, 0, ErrMockServiceFailed
|
||||
},
|
||||
}
|
||||
@@ -266,7 +266,7 @@ func TestGetCertificate_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
mock := &MockCertificateService{
|
||||
GetCertificateFn: func(id string) (*domain.ManagedCertificate, error) {
|
||||
GetCertificateFn: func(_ context.Context, id string) (*domain.ManagedCertificate, error) {
|
||||
if id == "mc-prod-001" {
|
||||
return cert, nil
|
||||
}
|
||||
@@ -298,7 +298,7 @@ func TestGetCertificate_Success(t *testing.T) {
|
||||
// Test GetCertificate - not found
|
||||
func TestGetCertificate_NotFound(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GetCertificateFn: func(id string) (*domain.ManagedCertificate, error) {
|
||||
GetCertificateFn: func(_ context.Context, id string) (*domain.ManagedCertificate, error) {
|
||||
return nil, ErrMockNotFound
|
||||
},
|
||||
}
|
||||
@@ -345,7 +345,7 @@ func TestCreateCertificate_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
mock := &MockCertificateService{
|
||||
CreateCertificateFn: func(cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
CreateCertificateFn: func(_ context.Context, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
return created, nil
|
||||
},
|
||||
}
|
||||
@@ -403,7 +403,7 @@ func TestCreateCertificate_InvalidBody(t *testing.T) {
|
||||
// Test CreateCertificate - service error
|
||||
func TestCreateCertificate_ServiceError(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
CreateCertificateFn: func(cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
CreateCertificateFn: func(_ context.Context, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
return nil, ErrMockServiceFailed
|
||||
},
|
||||
}
|
||||
@@ -432,6 +432,66 @@ func TestCreateCertificate_ServiceError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestCreateCertificate_MissingRequiredField_Returns400 pins the C-001 handler
|
||||
// contract: handler MUST reject a create payload that omits any of the five
|
||||
// required fields (name, common_name, owner_id, team_id, issuer_id,
|
||||
// renewal_policy_id) with HTTP 400 before the service is invoked. The mock
|
||||
// service here would succeed if called; every subtest proving 400 therefore
|
||||
// proves the handler guard fires.
|
||||
func TestCreateCertificate_MissingRequiredField_Returns400(t *testing.T) {
|
||||
baseBody := map[string]interface{}{
|
||||
"name": "API Prod",
|
||||
"common_name": "api.example.com",
|
||||
"owner_id": "o-alice",
|
||||
"team_id": "t-platform",
|
||||
"issuer_id": "iss-local",
|
||||
"renewal_policy_id": "rp-standard",
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
missingField string
|
||||
}{
|
||||
{"missing name", "name"},
|
||||
{"missing common_name", "common_name"},
|
||||
{"missing owner_id", "owner_id"},
|
||||
{"missing team_id", "team_id"},
|
||||
{"missing issuer_id", "issuer_id"},
|
||||
{"missing renewal_policy_id", "renewal_policy_id"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
body := make(map[string]interface{}, len(baseBody))
|
||||
for k, v := range baseBody {
|
||||
body[k] = v
|
||||
}
|
||||
delete(body, tc.missingField)
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
mock := &MockCertificateService{
|
||||
CreateCertificateFn: func(_ context.Context, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
// Would succeed if handler guard did not fire.
|
||||
cert.ID = "mc-would-be-created"
|
||||
return &cert, nil
|
||||
},
|
||||
}
|
||||
handler := NewCertificateHandler(mock)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates", bytes.NewReader(bodyBytes))
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.CreateCertificate(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("%s: expected 400, got %d — body=%s", tc.name, w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test UpdateCertificate - success case
|
||||
func TestUpdateCertificate_Success(t *testing.T) {
|
||||
updated := &domain.ManagedCertificate{
|
||||
@@ -445,7 +505,7 @@ func TestUpdateCertificate_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
mock := &MockCertificateService{
|
||||
UpdateCertificateFn: func(id string, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
UpdateCertificateFn: func(_ context.Context, id string, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
if id == "mc-prod-001" {
|
||||
return updated, nil
|
||||
}
|
||||
@@ -501,7 +561,7 @@ func TestUpdateCertificate_InvalidBody(t *testing.T) {
|
||||
// Test ArchiveCertificate - success case
|
||||
func TestArchiveCertificate_Success(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ArchiveCertificateFn: func(id string) error {
|
||||
ArchiveCertificateFn: func(_ context.Context, id string) error {
|
||||
if id == "mc-prod-001" {
|
||||
return nil
|
||||
}
|
||||
@@ -524,7 +584,7 @@ func TestArchiveCertificate_Success(t *testing.T) {
|
||||
// Test ArchiveCertificate - not found
|
||||
func TestArchiveCertificate_NotFound(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ArchiveCertificateFn: func(id string) error {
|
||||
ArchiveCertificateFn: func(_ context.Context, id string) error {
|
||||
return ErrMockNotFound
|
||||
},
|
||||
}
|
||||
@@ -554,7 +614,7 @@ func TestGetCertificateVersions_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
mock := &MockCertificateService{
|
||||
GetCertificateVersionsFn: func(certID string, page, perPage int) ([]domain.CertificateVersion, int64, error) {
|
||||
GetCertificateVersionsFn: func(_ context.Context, certID string, page, perPage int) ([]domain.CertificateVersion, int64, error) {
|
||||
if certID == "mc-prod-001" {
|
||||
return []domain.CertificateVersion{ver1}, 1, nil
|
||||
}
|
||||
@@ -586,7 +646,7 @@ func TestGetCertificateVersions_Success(t *testing.T) {
|
||||
// Test GetCertificateVersions - not found
|
||||
func TestGetCertificateVersions_NotFound(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GetCertificateVersionsFn: func(certID string, page, perPage int) ([]domain.CertificateVersion, int64, error) {
|
||||
GetCertificateVersionsFn: func(_ context.Context, certID string, page, perPage int) ([]domain.CertificateVersion, int64, error) {
|
||||
return nil, 0, ErrMockNotFound
|
||||
},
|
||||
}
|
||||
@@ -606,7 +666,7 @@ func TestGetCertificateVersions_NotFound(t *testing.T) {
|
||||
// Test TriggerRenewal - success case
|
||||
func TestTriggerRenewal_Success(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
TriggerRenewalFn: func(certID string) error {
|
||||
TriggerRenewalFn: func(_ context.Context, certID string, _ string) error {
|
||||
if certID == "mc-prod-001" {
|
||||
return nil
|
||||
}
|
||||
@@ -638,7 +698,7 @@ func TestTriggerRenewal_Success(t *testing.T) {
|
||||
// Test TriggerRenewal - service error
|
||||
func TestTriggerRenewal_ServiceError(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
TriggerRenewalFn: func(certID string) error {
|
||||
TriggerRenewalFn: func(_ context.Context, certID string, _ string) error {
|
||||
return ErrMockServiceFailed
|
||||
},
|
||||
}
|
||||
@@ -658,7 +718,7 @@ func TestTriggerRenewal_ServiceError(t *testing.T) {
|
||||
// Test TriggerDeployment - success case
|
||||
func TestTriggerDeployment_Success(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
TriggerDeploymentFn: func(certID string, targetID string) error {
|
||||
TriggerDeploymentFn: func(_ context.Context, certID string, targetID string, _ string) error {
|
||||
if certID == "mc-prod-001" {
|
||||
return nil
|
||||
}
|
||||
@@ -695,7 +755,7 @@ func TestTriggerDeployment_Success(t *testing.T) {
|
||||
// Test TriggerDeployment - without target ID
|
||||
func TestTriggerDeployment_NoTargetID(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
TriggerDeploymentFn: func(certID string, targetID string) error {
|
||||
TriggerDeploymentFn: func(_ context.Context, certID string, targetID string, _ string) error {
|
||||
// Should accept empty targetID (deploy to all)
|
||||
return nil
|
||||
},
|
||||
@@ -716,7 +776,7 @@ func TestTriggerDeployment_NoTargetID(t *testing.T) {
|
||||
// Test ListCertificates - invalid page parameter
|
||||
func TestListCertificates_InvalidPageParam(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
// Should default to page 1
|
||||
if filter.Page == 1 {
|
||||
return []domain.ManagedCertificate{}, 0, nil
|
||||
@@ -740,7 +800,7 @@ func TestListCertificates_InvalidPageParam(t *testing.T) {
|
||||
// Test ListCertificates - per_page exceeds max
|
||||
func TestListCertificates_PerPageExceedsMax(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
// Should cap perPage at 500
|
||||
if filter.PerPage == 50 { // defaults to 50 if > 500
|
||||
return []domain.ManagedCertificate{}, 0, nil
|
||||
@@ -765,7 +825,7 @@ func TestListCertificates_PerPageExceedsMax(t *testing.T) {
|
||||
|
||||
func TestRevokeCertificate_Handler_Success(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
RevokeCertificateFn: func(certID string, reason string) error {
|
||||
RevokeCertificateFn: func(_ context.Context, certID string, reason string, _ string) error {
|
||||
if certID != "mc-prod-001" {
|
||||
t.Errorf("expected certID mc-prod-001, got %s", certID)
|
||||
}
|
||||
@@ -798,7 +858,7 @@ func TestRevokeCertificate_Handler_Success(t *testing.T) {
|
||||
|
||||
func TestRevokeCertificate_Handler_NoBody(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
RevokeCertificateFn: func(certID string, reason string) error {
|
||||
RevokeCertificateFn: func(_ context.Context, certID string, reason string, _ string) error {
|
||||
// Empty reason is OK — service defaults to "unspecified"
|
||||
return nil
|
||||
},
|
||||
@@ -818,7 +878,7 @@ func TestRevokeCertificate_Handler_NoBody(t *testing.T) {
|
||||
|
||||
func TestRevokeCertificate_Handler_AlreadyRevoked(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
RevokeCertificateFn: func(certID string, reason string) error {
|
||||
RevokeCertificateFn: func(_ context.Context, certID string, reason string, _ string) error {
|
||||
return fmt.Errorf("certificate is already revoked")
|
||||
},
|
||||
}
|
||||
@@ -839,7 +899,7 @@ func TestRevokeCertificate_Handler_AlreadyRevoked(t *testing.T) {
|
||||
|
||||
func TestRevokeCertificate_Handler_NotFound(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
RevokeCertificateFn: func(certID string, reason string) error {
|
||||
RevokeCertificateFn: func(_ context.Context, certID string, reason string, _ string) error {
|
||||
return fmt.Errorf("failed to fetch certificate: not found")
|
||||
},
|
||||
}
|
||||
@@ -858,7 +918,7 @@ func TestRevokeCertificate_Handler_NotFound(t *testing.T) {
|
||||
|
||||
func TestRevokeCertificate_Handler_InvalidReason(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
RevokeCertificateFn: func(certID string, reason string) error {
|
||||
RevokeCertificateFn: func(_ context.Context, certID string, reason string, _ string) error {
|
||||
return fmt.Errorf("invalid revocation reason: badReason")
|
||||
},
|
||||
}
|
||||
@@ -922,7 +982,7 @@ func TestRevokeCertificate_Handler_EmptyID(t *testing.T) {
|
||||
|
||||
func TestRevokeCertificate_Handler_CannotRevokeArchived(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
RevokeCertificateFn: func(certID string, reason string) error {
|
||||
RevokeCertificateFn: func(_ context.Context, certID string, reason string, _ string) error {
|
||||
return fmt.Errorf("cannot revoke archived certificate")
|
||||
},
|
||||
}
|
||||
@@ -941,7 +1001,7 @@ func TestRevokeCertificate_Handler_CannotRevokeArchived(t *testing.T) {
|
||||
|
||||
func TestRevokeCertificate_Handler_ServerError(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
RevokeCertificateFn: func(certID string, reason string) error {
|
||||
RevokeCertificateFn: func(_ context.Context, certID string, reason string, _ string) error {
|
||||
return fmt.Errorf("database connection lost")
|
||||
},
|
||||
}
|
||||
@@ -958,132 +1018,18 @@ func TestRevokeCertificate_Handler_ServerError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// === CRL Handler Tests ===
|
||||
|
||||
func TestGetCRL_Success(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GetRevokedCertificatesFn: func() ([]*domain.CertificateRevocation, error) {
|
||||
return []*domain.CertificateRevocation{
|
||||
{
|
||||
ID: "rev-1",
|
||||
CertificateID: "cert-1",
|
||||
SerialNumber: "ABC123",
|
||||
Reason: "keyCompromise",
|
||||
RevokedAt: time.Date(2026, 3, 20, 10, 0, 0, 0, time.UTC),
|
||||
},
|
||||
{
|
||||
ID: "rev-2",
|
||||
CertificateID: "cert-2",
|
||||
SerialNumber: "DEF456",
|
||||
Reason: "superseded",
|
||||
RevokedAt: time.Date(2026, 3, 21, 14, 30, 0, 0, time.UTC),
|
||||
},
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/crl", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.GetCRL(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
var resp map[string]interface{}
|
||||
json.NewDecoder(w.Body).Decode(&resp)
|
||||
|
||||
if resp["version"] != float64(1) {
|
||||
t.Errorf("expected version 1, got %v", resp["version"])
|
||||
}
|
||||
if resp["total"] != float64(2) {
|
||||
t.Errorf("expected total 2, got %v", resp["total"])
|
||||
}
|
||||
|
||||
entries, ok := resp["entries"].([]interface{})
|
||||
if !ok {
|
||||
t.Fatal("expected entries to be an array")
|
||||
}
|
||||
if len(entries) != 2 {
|
||||
t.Errorf("expected 2 entries, got %d", len(entries))
|
||||
}
|
||||
|
||||
entry1 := entries[0].(map[string]interface{})
|
||||
if entry1["serial_number"] != "ABC123" {
|
||||
t.Errorf("expected serial ABC123, got %v", entry1["serial_number"])
|
||||
}
|
||||
if entry1["revocation_reason"] != "keyCompromise" {
|
||||
t.Errorf("expected reason keyCompromise, got %v", entry1["revocation_reason"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetCRL_Empty(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GetRevokedCertificatesFn: func() ([]*domain.CertificateRevocation, error) {
|
||||
return nil, nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/crl", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.GetCRL(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status %d, got %d", http.StatusOK, w.Code)
|
||||
}
|
||||
|
||||
var resp map[string]interface{}
|
||||
json.NewDecoder(w.Body).Decode(&resp)
|
||||
if resp["total"] != float64(0) {
|
||||
t.Errorf("expected total 0, got %v", resp["total"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetCRL_ServiceError(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GetRevokedCertificatesFn: func() ([]*domain.CertificateRevocation, error) {
|
||||
return nil, fmt.Errorf("revocation repository not configured")
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/crl", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.GetCRL(w, req)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("expected status %d, got %d", http.StatusInternalServerError, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetCRL_MethodNotAllowed(t *testing.T) {
|
||||
mock := &MockCertificateService{}
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/crl", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.GetCRL(w, req)
|
||||
|
||||
if w.Code != http.StatusMethodNotAllowed {
|
||||
t.Errorf("expected status %d, got %d", http.StatusMethodNotAllowed, w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// M15b: DER CRL and OCSP Handler Tests
|
||||
// === CRL and OCSP Handler Tests (RFC 5280 / RFC 6960, served under /.well-known/pki/) ===
|
||||
//
|
||||
// M-006 relocated these endpoints from /api/v1/crl* and /api/v1/ocsp/* to the
|
||||
// RFC-compliant /.well-known/pki/ namespace and deleted the non-standard JSON
|
||||
// CRL endpoint. The DER-encoded X.509 CRL (application/pkix-crl) and the
|
||||
// DER-encoded OCSP response (application/ocsp-response) are the only wire
|
||||
// formats certctl supports for revocation data.
|
||||
|
||||
func TestGetDERCRL_Success(t *testing.T) {
|
||||
derCRLData := []byte{0x30, 0x82, 0x01, 0x00} // Mock DER CRL bytes
|
||||
mock := &MockCertificateService{
|
||||
GenerateDERCRLFn: func(issuerID string) ([]byte, error) {
|
||||
GenerateDERCRLFn: func(_ context.Context, issuerID string) ([]byte, error) {
|
||||
if issuerID == "iss-local" {
|
||||
return derCRLData, nil
|
||||
}
|
||||
@@ -1092,7 +1038,7 @@ func TestGetDERCRL_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/crl/iss-local", nil)
|
||||
req := httptest.NewRequest(http.MethodGet, "/.well-known/pki/crl/iss-local", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -1107,17 +1053,20 @@ func TestGetDERCRL_Success(t *testing.T) {
|
||||
if len(responseBody) == 0 {
|
||||
t.Error("expected non-empty response body")
|
||||
}
|
||||
if ct := w.Header().Get("Content-Type"); ct != "application/pkix-crl" {
|
||||
t.Errorf("expected Content-Type application/pkix-crl, got %q", ct)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDERCRL_IssuerNotFound(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GenerateDERCRLFn: func(issuerID string) ([]byte, error) {
|
||||
GenerateDERCRLFn: func(_ context.Context, issuerID string) ([]byte, error) {
|
||||
return nil, fmt.Errorf("issuer not found")
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/crl/nonexistent", nil)
|
||||
req := httptest.NewRequest(http.MethodGet, "/.well-known/pki/crl/nonexistent", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -1130,13 +1079,13 @@ func TestGetDERCRL_IssuerNotFound(t *testing.T) {
|
||||
|
||||
func TestGetDERCRL_NotSupported(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GenerateDERCRLFn: func(issuerID string) ([]byte, error) {
|
||||
GenerateDERCRLFn: func(_ context.Context, issuerID string) ([]byte, error) {
|
||||
return nil, fmt.Errorf("issuer does not support CRL generation")
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/crl/iss-acme", nil)
|
||||
req := httptest.NewRequest(http.MethodGet, "/.well-known/pki/crl/iss-acme", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -1151,7 +1100,7 @@ func TestGetDERCRL_NotSupported(t *testing.T) {
|
||||
func TestGetDERCRL_MethodNotAllowed(t *testing.T) {
|
||||
mock := &MockCertificateService{}
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/crl/iss-local", nil)
|
||||
req := httptest.NewRequest(http.MethodPost, "/.well-known/pki/crl/iss-local", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -1165,7 +1114,7 @@ func TestGetDERCRL_MethodNotAllowed(t *testing.T) {
|
||||
func TestHandleOCSP_Success(t *testing.T) {
|
||||
ocspResponseBytes := []byte{0x30, 0x82, 0x02, 0x00} // Mock OCSP response
|
||||
mock := &MockCertificateService{
|
||||
GetOCSPResponseFn: func(issuerID string, serialHex string) ([]byte, error) {
|
||||
GetOCSPResponseFn: func(_ context.Context, issuerID string, serialHex string) ([]byte, error) {
|
||||
if issuerID == "iss-local" && serialHex == "12345" {
|
||||
return ocspResponseBytes, nil
|
||||
}
|
||||
@@ -1174,7 +1123,7 @@ func TestHandleOCSP_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/ocsp/iss-local/12345", nil)
|
||||
req := httptest.NewRequest(http.MethodGet, "/.well-known/pki/ocsp/iss-local/12345", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -1188,12 +1137,15 @@ func TestHandleOCSP_Success(t *testing.T) {
|
||||
if len(responseBody) == 0 {
|
||||
t.Error("expected non-empty OCSP response body")
|
||||
}
|
||||
if ct := w.Header().Get("Content-Type"); ct != "application/ocsp-response" {
|
||||
t.Errorf("expected Content-Type application/ocsp-response, got %q", ct)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleOCSP_MissingSerial(t *testing.T) {
|
||||
mock := &MockCertificateService{}
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/ocsp/iss-local/", nil)
|
||||
req := httptest.NewRequest(http.MethodGet, "/.well-known/pki/ocsp/iss-local/", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -1206,13 +1158,13 @@ func TestHandleOCSP_MissingSerial(t *testing.T) {
|
||||
|
||||
func TestHandleOCSP_IssuerNotFound(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GetOCSPResponseFn: func(issuerID string, serialHex string) ([]byte, error) {
|
||||
GetOCSPResponseFn: func(_ context.Context, issuerID string, serialHex string) ([]byte, error) {
|
||||
return nil, fmt.Errorf("issuer not found")
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/ocsp/nonexistent/ABC123", nil)
|
||||
req := httptest.NewRequest(http.MethodGet, "/.well-known/pki/ocsp/nonexistent/ABC123", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -1225,13 +1177,13 @@ func TestHandleOCSP_IssuerNotFound(t *testing.T) {
|
||||
|
||||
func TestHandleOCSP_CertNotFound(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GetOCSPResponseFn: func(issuerID string, serialHex string) ([]byte, error) {
|
||||
GetOCSPResponseFn: func(_ context.Context, issuerID string, serialHex string) ([]byte, error) {
|
||||
return nil, fmt.Errorf("certificate not found")
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/ocsp/iss-local/UNKNOWN", nil)
|
||||
req := httptest.NewRequest(http.MethodGet, "/.well-known/pki/ocsp/iss-local/UNKNOWN", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -1245,7 +1197,7 @@ func TestHandleOCSP_CertNotFound(t *testing.T) {
|
||||
func TestHandleOCSP_MethodNotAllowed(t *testing.T) {
|
||||
mock := &MockCertificateService{}
|
||||
handler := NewCertificateHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/ocsp/iss-local/12345", nil)
|
||||
req := httptest.NewRequest(http.MethodPost, "/.well-known/pki/ocsp/iss-local/12345", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
@@ -1261,7 +1213,7 @@ func TestHandleOCSP_MethodNotAllowed(t *testing.T) {
|
||||
// TestListCertificates_SortParam tests sort parameter parsing and passing to service.
|
||||
func TestListCertificates_SortParam(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
// Handler strips the '-' prefix and sets SortDesc = true
|
||||
if filter.Sort != "notAfter" || !filter.SortDesc {
|
||||
t.Errorf("expected sort=notAfter desc=true, got sort=%s desc=%v", filter.Sort, filter.SortDesc)
|
||||
@@ -1284,7 +1236,7 @@ func TestListCertificates_SortParam(t *testing.T) {
|
||||
// TestListCertificates_SortParam_Ascending tests sort parameter without '-' prefix (ascending).
|
||||
func TestListCertificates_SortParam_Ascending(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if filter.Sort != "createdAt" || filter.SortDesc {
|
||||
t.Errorf("expected sort=createdAt desc=false, got sort=%s desc=%v", filter.Sort, filter.SortDesc)
|
||||
}
|
||||
@@ -1309,7 +1261,7 @@ func TestListCertificates_TimeRangeFilters(t *testing.T) {
|
||||
after := time.Now().AddDate(0, 0, -90)
|
||||
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if filter.ExpiresBefore == nil {
|
||||
t.Error("expected ExpiresBefore to be set")
|
||||
}
|
||||
@@ -1339,7 +1291,7 @@ func TestListCertificates_CreatedAfterFilter(t *testing.T) {
|
||||
past := time.Now().AddDate(-1, 0, 0)
|
||||
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if filter.CreatedAfter == nil {
|
||||
t.Error("expected CreatedAfter to be set")
|
||||
}
|
||||
@@ -1369,7 +1321,7 @@ func TestListCertificates_CursorPagination(t *testing.T) {
|
||||
}
|
||||
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
return []domain.ManagedCertificate{cert}, 1, nil
|
||||
},
|
||||
}
|
||||
@@ -1409,7 +1361,7 @@ func TestListCertificates_SparseFields(t *testing.T) {
|
||||
}
|
||||
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if len(filter.Fields) != 2 {
|
||||
t.Errorf("expected 2 fields, got %d", len(filter.Fields))
|
||||
}
|
||||
@@ -1456,7 +1408,7 @@ func TestListCertificates_SparseFields(t *testing.T) {
|
||||
// TestListCertificates_ProfileFilter tests profile_id filter.
|
||||
func TestListCertificates_ProfileFilter(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if filter.ProfileID != "prof-standard" {
|
||||
t.Errorf("expected ProfileID=prof-standard, got %s", filter.ProfileID)
|
||||
}
|
||||
@@ -1479,7 +1431,7 @@ func TestListCertificates_ProfileFilter(t *testing.T) {
|
||||
// TestListCertificates_AgentIDFilter tests agent_id filter.
|
||||
func TestListCertificates_AgentIDFilter(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if filter.AgentID != "agent-prod-001" {
|
||||
t.Errorf("expected AgentID=agent-prod-001, got %s", filter.AgentID)
|
||||
}
|
||||
@@ -1502,7 +1454,7 @@ func TestListCertificates_AgentIDFilter(t *testing.T) {
|
||||
// TestListCertificates_CombinedFilters tests multiple filters together.
|
||||
func TestListCertificates_CombinedFilters(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
ListCertificatesWithFilterFn: func(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
ListCertificatesWithFilterFn: func(_ context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error) {
|
||||
if filter.Status != "Active" || filter.Environment != "production" || filter.ProfileID != "prof-standard" {
|
||||
t.Error("expected all filters to be set")
|
||||
}
|
||||
@@ -1540,7 +1492,7 @@ func TestGetCertificateDeployments_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
mock := &MockCertificateService{
|
||||
GetCertificateDeploymentsFn: func(certID string) ([]domain.DeploymentTarget, error) {
|
||||
GetCertificateDeploymentsFn: func(_ context.Context, certID string) ([]domain.DeploymentTarget, error) {
|
||||
if certID != "mc-prod-001" {
|
||||
return nil, ErrMockNotFound
|
||||
}
|
||||
@@ -1576,7 +1528,7 @@ func TestGetCertificateDeployments_Success(t *testing.T) {
|
||||
// TestGetCertificateDeployments_NotFound tests 404 for nonexistent certificate.
|
||||
func TestGetCertificateDeployments_NotFound(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GetCertificateDeploymentsFn: func(certID string) ([]domain.DeploymentTarget, error) {
|
||||
GetCertificateDeploymentsFn: func(_ context.Context, certID string) ([]domain.DeploymentTarget, error) {
|
||||
return nil, fmt.Errorf("certificate not found")
|
||||
},
|
||||
}
|
||||
@@ -1596,7 +1548,7 @@ func TestGetCertificateDeployments_NotFound(t *testing.T) {
|
||||
// TestGetCertificateDeployments_Empty tests successful response with no deployments.
|
||||
func TestGetCertificateDeployments_Empty(t *testing.T) {
|
||||
mock := &MockCertificateService{
|
||||
GetCertificateDeploymentsFn: func(certID string) ([]domain.DeploymentTarget, error) {
|
||||
GetCertificateDeploymentsFn: func(_ context.Context, certID string) ([]domain.DeploymentTarget, error) {
|
||||
if certID == "mc-no-deployments" {
|
||||
return []domain.DeploymentTarget{}, nil
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
@@ -15,20 +16,20 @@ import (
|
||||
|
||||
// CertificateService defines the service interface for certificate operations.
|
||||
type CertificateService interface {
|
||||
ListCertificates(status, environment, ownerID, teamID, issuerID string, page, perPage int) ([]domain.ManagedCertificate, int64, error)
|
||||
ListCertificatesWithFilter(filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error)
|
||||
GetCertificate(id string) (*domain.ManagedCertificate, error)
|
||||
CreateCertificate(cert domain.ManagedCertificate) (*domain.ManagedCertificate, error)
|
||||
UpdateCertificate(id string, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error)
|
||||
ArchiveCertificate(id string) error
|
||||
GetCertificateVersions(certID string, page, perPage int) ([]domain.CertificateVersion, int64, error)
|
||||
TriggerRenewal(certID string) error
|
||||
TriggerDeployment(certID string, targetID string) error
|
||||
RevokeCertificate(certID string, reason string) error
|
||||
GetRevokedCertificates() ([]*domain.CertificateRevocation, error)
|
||||
GenerateDERCRL(issuerID string) ([]byte, error)
|
||||
GetOCSPResponse(issuerID string, serialHex string) ([]byte, error)
|
||||
GetCertificateDeployments(certID string) ([]domain.DeploymentTarget, error)
|
||||
ListCertificates(ctx context.Context, status, environment, ownerID, teamID, issuerID string, page, perPage int) ([]domain.ManagedCertificate, int64, error)
|
||||
ListCertificatesWithFilter(ctx context.Context, filter *repository.CertificateFilter) ([]domain.ManagedCertificate, int, error)
|
||||
GetCertificate(ctx context.Context, id string) (*domain.ManagedCertificate, error)
|
||||
CreateCertificate(ctx context.Context, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error)
|
||||
UpdateCertificate(ctx context.Context, id string, cert domain.ManagedCertificate) (*domain.ManagedCertificate, error)
|
||||
ArchiveCertificate(ctx context.Context, id string) error
|
||||
GetCertificateVersions(ctx context.Context, certID string, page, perPage int) ([]domain.CertificateVersion, int64, error)
|
||||
TriggerRenewal(ctx context.Context, certID string, actor string) error
|
||||
TriggerDeployment(ctx context.Context, certID string, targetID string, actor string) error
|
||||
RevokeCertificate(ctx context.Context, certID string, reason string, actor string) error
|
||||
GetRevokedCertificates(ctx context.Context) ([]*domain.CertificateRevocation, error)
|
||||
GenerateDERCRL(ctx context.Context, issuerID string) ([]byte, error)
|
||||
GetOCSPResponse(ctx context.Context, issuerID string, serialHex string) ([]byte, error)
|
||||
GetCertificateDeployments(ctx context.Context, certID string) ([]domain.DeploymentTarget, error)
|
||||
}
|
||||
|
||||
// CertificateHandler handles HTTP requests for certificate operations.
|
||||
@@ -128,7 +129,7 @@ func (h CertificateHandler) ListCertificates(w http.ResponseWriter, r *http.Requ
|
||||
filter.Fields = strings.Split(fieldsStr, ",")
|
||||
}
|
||||
|
||||
certs, total, err := h.svc.ListCertificatesWithFilter(filter)
|
||||
certs, total, err := h.svc.ListCertificatesWithFilter(r.Context(), filter)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to list certificates", requestID)
|
||||
return
|
||||
@@ -186,7 +187,7 @@ func (h CertificateHandler) GetCertificate(w http.ResponseWriter, r *http.Reques
|
||||
return
|
||||
}
|
||||
|
||||
cert, err := h.svc.GetCertificate(id)
|
||||
cert, err := h.svc.GetCertificate(r.Context(), id)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID)
|
||||
return
|
||||
@@ -241,7 +242,7 @@ func (h CertificateHandler) CreateCertificate(w http.ResponseWriter, r *http.Req
|
||||
return
|
||||
}
|
||||
|
||||
created, err := h.svc.CreateCertificate(cert)
|
||||
created, err := h.svc.CreateCertificate(r.Context(), cert)
|
||||
if err != nil {
|
||||
slog.Error("failed to create certificate", "error", err, "request_id", requestID, "common_name", cert.CommonName, "name", cert.Name)
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to create certificate", requestID)
|
||||
@@ -295,14 +296,15 @@ func (h CertificateHandler) UpdateCertificate(w http.ResponseWriter, r *http.Req
|
||||
}
|
||||
}
|
||||
|
||||
updated, err := h.svc.UpdateCertificate(id, cert)
|
||||
updated, err := h.svc.UpdateCertificate(r.Context(), id, cert)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID)
|
||||
return
|
||||
status := errToStatus(err)
|
||||
msg := err.Error()
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("UpdateCertificate failed", "cert_id", id, "error", err)
|
||||
msg = "internal error"
|
||||
}
|
||||
slog.Error("UpdateCertificate failed", "cert_id", id, "error", err.Error())
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to update certificate", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -325,12 +327,14 @@ func (h CertificateHandler) ArchiveCertificate(w http.ResponseWriter, r *http.Re
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.svc.ArchiveCertificate(id); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID)
|
||||
return
|
||||
if err := h.svc.ArchiveCertificate(r.Context(), id); err != nil {
|
||||
status := errToStatus(err)
|
||||
msg := err.Error()
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("ArchiveCertificate failed", "cert_id", id, "error", err)
|
||||
msg = "internal error"
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to archive certificate", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -370,14 +374,15 @@ func (h CertificateHandler) GetCertificateVersions(w http.ResponseWriter, r *htt
|
||||
}
|
||||
}
|
||||
|
||||
versions, total, err := h.svc.GetCertificateVersions(certID, page, perPage)
|
||||
versions, total, err := h.svc.GetCertificateVersions(r.Context(), certID, page, perPage)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID)
|
||||
return
|
||||
status := errToStatus(err)
|
||||
msg := err.Error()
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("GetCertificateVersions failed", "cert_id", certID, "error", err)
|
||||
msg = "internal error"
|
||||
}
|
||||
slog.Error("GetCertificateVersions failed", "cert_id", certID, "error", err.Error())
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get certificate versions", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -410,21 +415,16 @@ func (h CertificateHandler) TriggerRenewal(w http.ResponseWriter, r *http.Reques
|
||||
}
|
||||
certID := parts[0]
|
||||
|
||||
if err := h.svc.TriggerRenewal(certID); err != nil {
|
||||
errMsg := err.Error()
|
||||
if strings.Contains(errMsg, "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID)
|
||||
return
|
||||
actor := resolveActor(r.Context())
|
||||
|
||||
if err := h.svc.TriggerRenewal(r.Context(), certID, actor); err != nil {
|
||||
status := errToStatus(err)
|
||||
msg := err.Error()
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("TriggerRenewal failed", "cert_id", certID, "error", err)
|
||||
msg = "internal error"
|
||||
}
|
||||
if strings.Contains(errMsg, "cannot renew") {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, errMsg, requestID)
|
||||
return
|
||||
}
|
||||
if strings.Contains(errMsg, "already in progress") {
|
||||
ErrorWithRequestID(w, http.StatusConflict, errMsg, requestID)
|
||||
return
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to trigger renewal", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -466,7 +466,9 @@ func (h CertificateHandler) TriggerDeployment(w http.ResponseWriter, r *http.Req
|
||||
}
|
||||
}
|
||||
|
||||
if err := h.svc.TriggerDeployment(certID, req.TargetID); err != nil {
|
||||
actor := resolveActor(r.Context())
|
||||
|
||||
if err := h.svc.TriggerDeployment(r.Context(), certID, req.TargetID, actor); err != nil {
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to trigger deployment", requestID)
|
||||
return
|
||||
}
|
||||
@@ -508,69 +510,28 @@ func (h CertificateHandler) RevokeCertificate(w http.ResponseWriter, r *http.Req
|
||||
}
|
||||
}
|
||||
|
||||
if err := h.svc.RevokeCertificate(certID, req.Reason); err != nil {
|
||||
// Distinguish between client errors and server errors
|
||||
errMsg := err.Error()
|
||||
if strings.Contains(errMsg, "already revoked") ||
|
||||
strings.Contains(errMsg, "cannot revoke") ||
|
||||
strings.Contains(errMsg, "invalid revocation reason") {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, errMsg, requestID)
|
||||
return
|
||||
actor := resolveActor(r.Context())
|
||||
|
||||
if err := h.svc.RevokeCertificate(r.Context(), certID, req.Reason, actor); err != nil {
|
||||
status := errToStatus(err)
|
||||
msg := err.Error()
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("RevokeCertificate failed", "cert_id", certID, "error", err)
|
||||
msg = "internal error"
|
||||
}
|
||||
if strings.Contains(errMsg, "not found") || strings.Contains(errMsg, "failed to fetch") || strings.Contains(errMsg, "failed to get") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID)
|
||||
return
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to revoke certificate", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, map[string]string{"status": "revoked"})
|
||||
}
|
||||
|
||||
// GetCRL returns the Certificate Revocation List as structured JSON.
|
||||
// GET /api/v1/crl
|
||||
// Note: DER-encoded X.509 CRL generation (requiring CA key access) is planned for M15b
|
||||
// alongside the embedded OCSP responder. This endpoint provides the same data in JSON format.
|
||||
func (h CertificateHandler) GetCRL(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
|
||||
revocations, err := h.svc.GetRevokedCertificates()
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to generate CRL", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
type CRLEntry struct {
|
||||
SerialNumber string `json:"serial_number"`
|
||||
RevocationDate string `json:"revocation_date"`
|
||||
RevocationReason string `json:"revocation_reason"`
|
||||
}
|
||||
|
||||
entries := make([]CRLEntry, 0, len(revocations))
|
||||
for _, rev := range revocations {
|
||||
entries = append(entries, CRLEntry{
|
||||
SerialNumber: rev.SerialNumber,
|
||||
RevocationDate: rev.RevokedAt.Format("2006-01-02T15:04:05Z"),
|
||||
RevocationReason: rev.Reason,
|
||||
})
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, map[string]interface{}{
|
||||
"version": 1,
|
||||
"entries": entries,
|
||||
"total": len(entries),
|
||||
"generated_at": time.Now().UTC().Format("2006-01-02T15:04:05Z"),
|
||||
})
|
||||
}
|
||||
|
||||
// GetDERCRL returns a DER-encoded X.509 CRL signed by the specified issuer.
|
||||
// GET /api/v1/crl/{issuer_id}
|
||||
// GET /.well-known/pki/crl/{issuer_id}
|
||||
//
|
||||
// RFC 5280 § 5. Served unauthenticated under the /.well-known/pki/ namespace so
|
||||
// relying parties (browsers, OpenSSL, OCSP stapling sidecars) can fetch the CRL
|
||||
// without presenting certctl API credentials.
|
||||
func (h CertificateHandler) GetDERCRL(w http.ResponseWriter, r *http.Request) {
|
||||
requestID, _ := r.Context().Value("request_id").(string)
|
||||
|
||||
@@ -579,24 +540,21 @@ func (h CertificateHandler) GetDERCRL(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
issuerID := strings.TrimPrefix(r.URL.Path, "/api/v1/crl/")
|
||||
issuerID := strings.TrimPrefix(r.URL.Path, "/.well-known/pki/crl/")
|
||||
if issuerID == "" {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Issuer ID is required", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
derBytes, err := h.svc.GenerateDERCRL(issuerID)
|
||||
derBytes, err := h.svc.GenerateDERCRL(r.Context(), issuerID)
|
||||
if err != nil {
|
||||
errMsg := err.Error()
|
||||
if strings.Contains(errMsg, "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, errMsg, requestID)
|
||||
return
|
||||
status := errToStatus(err)
|
||||
msg := err.Error()
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("GenerateDERCRL failed", "issuer_id", issuerID, "error", err)
|
||||
msg = "internal error"
|
||||
}
|
||||
if strings.Contains(errMsg, "do not support") || strings.Contains(errMsg, "does not support") {
|
||||
ErrorWithRequestID(w, http.StatusNotImplemented, errMsg, requestID)
|
||||
return
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to generate CRL", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -607,8 +565,11 @@ func (h CertificateHandler) GetDERCRL(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// HandleOCSP processes OCSP requests.
|
||||
// GET /api/v1/ocsp/{issuer_id}/{serial_hex}
|
||||
// For simplicity, use GET with path params instead of binary POST.
|
||||
// GET /.well-known/pki/ocsp/{issuer_id}/{serial_hex}
|
||||
//
|
||||
// RFC 6960. Served unauthenticated under the /.well-known/pki/ namespace. For
|
||||
// simplicity we accept GET with path params rather than the binary POST body
|
||||
// form — the response is a valid DER-encoded OCSP response either way.
|
||||
func (h CertificateHandler) HandleOCSP(w http.ResponseWriter, r *http.Request) {
|
||||
requestID, _ := r.Context().Value("request_id").(string)
|
||||
|
||||
@@ -617,8 +578,8 @@ func (h CertificateHandler) HandleOCSP(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Extract issuer_id and serial from path: /api/v1/ocsp/{issuer_id}/{serial_hex}
|
||||
path := strings.TrimPrefix(r.URL.Path, "/api/v1/ocsp/")
|
||||
// Extract issuer_id and serial from path: /.well-known/pki/ocsp/{issuer_id}/{serial_hex}
|
||||
path := strings.TrimPrefix(r.URL.Path, "/.well-known/pki/ocsp/")
|
||||
parts := strings.SplitN(path, "/", 2)
|
||||
if len(parts) < 2 || parts[0] == "" || parts[1] == "" {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Issuer ID and serial number are required", requestID)
|
||||
@@ -627,18 +588,15 @@ func (h CertificateHandler) HandleOCSP(w http.ResponseWriter, r *http.Request) {
|
||||
issuerID := parts[0]
|
||||
serialHex := parts[1]
|
||||
|
||||
derBytes, err := h.svc.GetOCSPResponse(issuerID, serialHex)
|
||||
derBytes, err := h.svc.GetOCSPResponse(r.Context(), issuerID, serialHex)
|
||||
if err != nil {
|
||||
errMsg := err.Error()
|
||||
if strings.Contains(errMsg, "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, errMsg, requestID)
|
||||
return
|
||||
status := errToStatus(err)
|
||||
msg := err.Error()
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("GetOCSPResponse failed", "issuer_id", issuerID, "serial", serialHex, "error", err)
|
||||
msg = "internal error"
|
||||
}
|
||||
if strings.Contains(errMsg, "do not support") || strings.Contains(errMsg, "does not support") {
|
||||
ErrorWithRequestID(w, http.StatusNotImplemented, errMsg, requestID)
|
||||
return
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to generate OCSP response", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -667,14 +625,15 @@ func (h CertificateHandler) GetCertificateDeployments(w http.ResponseWriter, r *
|
||||
}
|
||||
certID := parts[0]
|
||||
|
||||
deployments, err := h.svc.GetCertificateDeployments(certID)
|
||||
deployments, err := h.svc.GetCertificateDeployments(r.Context(), certID)
|
||||
if err != nil {
|
||||
errMsg := err.Error()
|
||||
if strings.Contains(errMsg, "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID)
|
||||
return
|
||||
status := errToStatus(err)
|
||||
msg := err.Error()
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("GetCertificateDeployments failed", "cert_id", certID, "error", err)
|
||||
msg = "internal error"
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get deployments", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package handler
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
@@ -39,7 +40,8 @@ func (h *DigestHandler) PreviewDigest(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
html, err := h.service.PreviewDigest(r.Context())
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
slog.Error("digest preview failed", "error", err.Error())
|
||||
http.Error(w, "internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -64,9 +66,10 @@ func (h *DigestHandler) SendDigest(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
if err := h.service.SendDigest(r.Context()); err != nil {
|
||||
slog.Error("digest send failed", "error", err.Error())
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
json.NewEncoder(w).Encode(map[string]string{"error": err.Error()})
|
||||
json.NewEncoder(w).Encode(map[string]string{"error": "internal error"})
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -11,12 +11,17 @@ import (
|
||||
)
|
||||
|
||||
// DiscoveryService defines the interface used by the discovery handler.
|
||||
// ClaimDiscovered and DismissDiscovered accept an explicit actor parameter so
|
||||
// the handler can flow the authenticated named-key identity into the audit
|
||||
// trail (M-005). Services that call these methods from non-request contexts
|
||||
// pass a descriptive sentinel (e.g., "system") or "" (which falls back to
|
||||
// "api").
|
||||
type DiscoveryService interface {
|
||||
ProcessDiscoveryReport(ctx context.Context, report *domain.DiscoveryReport) (*domain.DiscoveryScan, error)
|
||||
ListDiscovered(ctx context.Context, agentID, status string, page, perPage int) ([]*domain.DiscoveredCertificate, int, error)
|
||||
GetDiscovered(ctx context.Context, id string) (*domain.DiscoveredCertificate, error)
|
||||
ClaimDiscovered(ctx context.Context, id string, managedCertID string) error
|
||||
DismissDiscovered(ctx context.Context, id string) error
|
||||
ClaimDiscovered(ctx context.Context, id string, managedCertID string, actor string) error
|
||||
DismissDiscovered(ctx context.Context, id string, actor string) error
|
||||
ListScans(ctx context.Context, agentID string, page, perPage int) ([]*domain.DiscoveryScan, int, error)
|
||||
GetScan(ctx context.Context, id string) (*domain.DiscoveryScan, error)
|
||||
GetDiscoverySummary(ctx context.Context) (map[string]int, error)
|
||||
@@ -142,7 +147,7 @@ func (h DiscoveryHandler) ClaimDiscovered(w http.ResponseWriter, r *http.Request
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.svc.ClaimDiscovered(r.Context(), id, body.ManagedCertificateID); err != nil {
|
||||
if err := h.svc.ClaimDiscovered(r.Context(), id, body.ManagedCertificateID, resolveActor(r.Context())); err != nil {
|
||||
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to claim certificate: %v", err))
|
||||
return
|
||||
}
|
||||
@@ -166,7 +171,7 @@ func (h DiscoveryHandler) DismissDiscovered(w http.ResponseWriter, r *http.Reque
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.svc.DismissDiscovered(r.Context(), id); err != nil {
|
||||
if err := h.svc.DismissDiscovered(r.Context(), id, resolveActor(r.Context())); err != nil {
|
||||
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to dismiss certificate: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -19,8 +19,8 @@ type MockDiscoveryService struct {
|
||||
ProcessDiscoveryReportFn func(ctx context.Context, report *domain.DiscoveryReport) (*domain.DiscoveryScan, error)
|
||||
ListDiscoveredFn func(ctx context.Context, agentID, status string, page, perPage int) ([]*domain.DiscoveredCertificate, int, error)
|
||||
GetDiscoveredFn func(ctx context.Context, id string) (*domain.DiscoveredCertificate, error)
|
||||
ClaimDiscoveredFn func(ctx context.Context, id string, managedCertID string) error
|
||||
DismissDiscoveredFn func(ctx context.Context, id string) error
|
||||
ClaimDiscoveredFn func(ctx context.Context, id string, managedCertID string, actor string) error
|
||||
DismissDiscoveredFn func(ctx context.Context, id string, actor string) error
|
||||
ListScansFn func(ctx context.Context, agentID string, page, perPage int) ([]*domain.DiscoveryScan, int, error)
|
||||
GetScanFn func(ctx context.Context, id string) (*domain.DiscoveryScan, error)
|
||||
GetDiscoverySummaryFn func(ctx context.Context) (map[string]int, error)
|
||||
@@ -47,16 +47,16 @@ func (m *MockDiscoveryService) GetDiscovered(ctx context.Context, id string) (*d
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockDiscoveryService) ClaimDiscovered(ctx context.Context, id string, managedCertID string) error {
|
||||
func (m *MockDiscoveryService) ClaimDiscovered(ctx context.Context, id string, managedCertID string, actor string) error {
|
||||
if m.ClaimDiscoveredFn != nil {
|
||||
return m.ClaimDiscoveredFn(ctx, id, managedCertID)
|
||||
return m.ClaimDiscoveredFn(ctx, id, managedCertID, actor)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockDiscoveryService) DismissDiscovered(ctx context.Context, id string) error {
|
||||
func (m *MockDiscoveryService) DismissDiscovered(ctx context.Context, id string, actor string) error {
|
||||
if m.DismissDiscoveredFn != nil {
|
||||
return m.DismissDiscoveredFn(ctx, id)
|
||||
return m.DismissDiscoveredFn(ctx, id, actor)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -352,7 +352,7 @@ func TestGetDiscovered_NotFound(t *testing.T) {
|
||||
// Test ClaimDiscovered - success case
|
||||
func TestClaimDiscovered_Success(t *testing.T) {
|
||||
mock := &MockDiscoveryService{
|
||||
ClaimDiscoveredFn: func(ctx context.Context, id string, managedCertID string) error {
|
||||
ClaimDiscoveredFn: func(ctx context.Context, id string, managedCertID string, actor string) error {
|
||||
if id == "dcert-1" && managedCertID == "mc-prod-1" {
|
||||
return nil
|
||||
}
|
||||
@@ -411,7 +411,7 @@ func TestClaimDiscovered_MissingManagedCertID(t *testing.T) {
|
||||
// Test ClaimDiscovered - discovered cert not found
|
||||
func TestClaimDiscovered_NotFound(t *testing.T) {
|
||||
mock := &MockDiscoveryService{
|
||||
ClaimDiscoveredFn: func(ctx context.Context, id string, managedCertID string) error {
|
||||
ClaimDiscoveredFn: func(ctx context.Context, id string, managedCertID string, actor string) error {
|
||||
return fmt.Errorf("discovered certificate not found")
|
||||
},
|
||||
}
|
||||
@@ -438,7 +438,7 @@ func TestClaimDiscovered_NotFound(t *testing.T) {
|
||||
// Test DismissDiscovered - success case
|
||||
func TestDismissDiscovered_Success(t *testing.T) {
|
||||
mock := &MockDiscoveryService{
|
||||
DismissDiscoveredFn: func(ctx context.Context, id string) error {
|
||||
DismissDiscoveredFn: func(ctx context.Context, id string, actor string) error {
|
||||
if id == "dcert-1" {
|
||||
return nil
|
||||
}
|
||||
@@ -614,7 +614,7 @@ func TestGetDiscoverySummary_MethodNotAllowed(t *testing.T) {
|
||||
// Test DismissDiscovered - service error
|
||||
func TestDismissDiscovered_ServiceError(t *testing.T) {
|
||||
mock := &MockDiscoveryService{
|
||||
DismissDiscoveredFn: func(ctx context.Context, id string) error {
|
||||
DismissDiscoveredFn: func(ctx context.Context, id string, actor string) error {
|
||||
return fmt.Errorf("database error")
|
||||
},
|
||||
}
|
||||
|
||||
@@ -0,0 +1,117 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
|
||||
"github.com/lib/pq"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// errToStatus is the single choke point that maps a service-layer or
|
||||
// repository-layer error to its HTTP status code. Before M-1 (P2), 42 switch
|
||||
// branches across 11 handler files classified errors via
|
||||
// `strings.Contains(err.Error(), ...)` substring matching — a pattern that
|
||||
// made every HTTP status mapping one sentinel-message reword away from silent
|
||||
// regression (see M-003 self-approval privilege boundary: a reword of
|
||||
// ErrSelfApproval.Error() would have demoted 403 Forbidden to 500 Internal
|
||||
// Server Error with no compile-time error, no test failure, and no observable
|
||||
// external signal).
|
||||
//
|
||||
// All handler branches now route through this function via errors.Is and
|
||||
// errors.As, which walks the wrap chain built by fmt.Errorf("%w: ...", ...).
|
||||
// The generic sentinels live in internal/service/errors.go; domain-specific
|
||||
// sentinels (ErrSelfApproval, ErrAgentIsSentinel, ErrBlockedByDependencies,
|
||||
// ErrForceReasonRequired, ErrAgentNotFound) wrap those generics via %w so both
|
||||
// errors.Is(err, ErrSelfApproval) and errors.Is(err, ErrForbidden) succeed on
|
||||
// the same wrapped error.
|
||||
//
|
||||
// # Dispatch order
|
||||
//
|
||||
// 1. ErrAgentRetired → 410 Gone. Tested FIRST. It is deliberately NOT wrapped
|
||||
// under any generic sentinel — 410 Gone is semantically distinct from
|
||||
// 403/404/409 (permanently-terminated resource identity that drives
|
||||
// deterministic agent-binary shutdown at cmd/agent/main.go:1291). Must
|
||||
// short-circuit before any generic check so wrapping can never demote it.
|
||||
// 2. ErrNotFound → 404 Not Found. Both service.ErrNotFound and
|
||||
// repository.ErrNotFound route here — repositories wrap sql.ErrNoRows with
|
||||
// repository.ErrNotFound so a "row not found" escapes the repo layer as a
|
||||
// typed sentinel rather than an untyped fmt.Errorf string. Tested BEFORE
|
||||
// ErrForbidden so RFC 7235's preference for hiding resource existence from
|
||||
// unauthorized callers is preserved (a caller who cannot see a resource
|
||||
// should get 404, not 403).
|
||||
// 3. ErrUnauthenticated → 401 Unauthorized. SCEP challenge-password mismatch
|
||||
// and similar credential failures.
|
||||
// 4. ErrForbidden → 403 Forbidden. M-003 gate. Tested BEFORE ErrValidation so
|
||||
// double-wrapping (e.g., a future fmt.Errorf("%w: ctx", ErrSelfApproval)
|
||||
// in a wrapping call site) cannot demote 403 to 400.
|
||||
// 5. ErrConflict / repository.ErrRenewalPolicyDuplicateName /
|
||||
// repository.ErrRenewalPolicyInUse → 409 Conflict. The repo-layer sentinels
|
||||
// are routed here explicitly so handlers do not need their own dispatch
|
||||
// tree for G-1's renewal-policy FK + unique-name violations.
|
||||
// 6. ErrValidation → 400 Bad Request. Generic input validation / malformed
|
||||
// request bodies / invalid state transitions that the caller could correct
|
||||
// by changing their request.
|
||||
// 7. ErrUnprocessable → 422 Unprocessable Entity. Distinct from
|
||||
// ErrValidation: ErrValidation is "caller sent bad input" (400), while
|
||||
// ErrUnprocessable is "caller's input was fine but our stored data can't
|
||||
// satisfy the operation" — e.g., an X.509 PEM in the inventory that fails
|
||||
// to decode. The pre-M-1 ExportPKCS12 handler pinned 422 on
|
||||
// strings.Contains(err.Error(), "cannot be parsed"); the sentinel makes
|
||||
// that dispatch survive message rewording.
|
||||
// 8. ErrNotImplemented → 501 Not Implemented. Reserved for feature-flag-gated
|
||||
// code paths.
|
||||
// 9. *pq.Error fallback on SQLSTATE 23503 (FK violation) / 23505 (unique
|
||||
// violation) → 409 Conflict. Final branch before the default 500. Anything
|
||||
// that reaches here is technically a code smell (the repository layer
|
||||
// should normally wrap driver errors into a typed sentinel) but the status
|
||||
// mapping is still correct.
|
||||
//
|
||||
// # Why a function, not a middleware
|
||||
//
|
||||
// Handlers must continue to call [Error] / [ErrorWithRequestID] with a
|
||||
// caller-chosen human-readable message (sometimes the wrapped err.Error(),
|
||||
// sometimes a redacted "internal error" for 500s per F-002). This function
|
||||
// gives handlers the status code; the handler keeps control of the body.
|
||||
func errToStatus(err error) int {
|
||||
if err == nil {
|
||||
return http.StatusOK
|
||||
}
|
||||
|
||||
switch {
|
||||
case errors.Is(err, service.ErrAgentRetired):
|
||||
return http.StatusGone // 410 — must short-circuit before generic dispatch
|
||||
case errors.Is(err, service.ErrNotFound),
|
||||
errors.Is(err, repository.ErrNotFound):
|
||||
return http.StatusNotFound // 404 — before ErrForbidden (RFC 7235 existence hiding)
|
||||
case errors.Is(err, service.ErrUnauthenticated):
|
||||
return http.StatusUnauthorized // 401
|
||||
case errors.Is(err, service.ErrForbidden):
|
||||
return http.StatusForbidden // 403 — before ErrValidation (preserves M-003 gate under double-wrap)
|
||||
case errors.Is(err, service.ErrConflict),
|
||||
errors.Is(err, repository.ErrRenewalPolicyDuplicateName),
|
||||
errors.Is(err, repository.ErrRenewalPolicyInUse):
|
||||
return http.StatusConflict // 409
|
||||
case errors.Is(err, service.ErrValidation):
|
||||
return http.StatusBadRequest // 400
|
||||
case errors.Is(err, service.ErrUnprocessable):
|
||||
return http.StatusUnprocessableEntity // 422 — stored-data-unparseable, not caller-input-bad
|
||||
case errors.Is(err, service.ErrNotImplemented):
|
||||
return http.StatusNotImplemented // 501
|
||||
}
|
||||
|
||||
// Driver-level fallback. Raw *pq.Error escaping the repository layer is a
|
||||
// code smell but a real escape hatch today — we still want a correct 409
|
||||
// instead of a generic 500 for FK/unique violations.
|
||||
var pgErr *pq.Error
|
||||
if errors.As(err, &pgErr) {
|
||||
switch pgErr.Code {
|
||||
case "23503", "23505":
|
||||
return http.StatusConflict
|
||||
}
|
||||
}
|
||||
|
||||
return http.StatusInternalServerError
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"github.com/lib/pq"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// TestErrToStatus_DispatchMatrix pins the handler's single error → HTTP
|
||||
// status choke point. Each row covers one branch of the dispatch switch and
|
||||
// the dispatch order invariants documented in errors.go:
|
||||
//
|
||||
// - ErrAgentRetired FIRST (410 short-circuits before generic checks)
|
||||
// - ErrNotFound before ErrForbidden (RFC 7235 existence hiding)
|
||||
// - ErrForbidden before ErrValidation (preserves M-003 gate under double-wrap)
|
||||
// - Repo sentinels route to 409 alongside ErrConflict
|
||||
// - *pq.Error on 23503 / 23505 routes to 409 as the driver-level fallback
|
||||
// - Default path is 500
|
||||
func TestErrToStatus_DispatchMatrix(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
err error
|
||||
want int
|
||||
}{
|
||||
{"nil → 200", nil, http.StatusOK},
|
||||
|
||||
// Each generic sentinel resolves to its documented status code.
|
||||
{"ErrNotFound → 404", service.ErrNotFound, http.StatusNotFound},
|
||||
{"ErrValidation → 400", service.ErrValidation, http.StatusBadRequest},
|
||||
{"ErrConflict → 409", service.ErrConflict, http.StatusConflict},
|
||||
{"ErrForbidden → 403", service.ErrForbidden, http.StatusForbidden},
|
||||
{"ErrUnauthenticated → 401", service.ErrUnauthenticated, http.StatusUnauthorized},
|
||||
{"ErrNotImplemented → 501", service.ErrNotImplemented, http.StatusNotImplemented},
|
||||
|
||||
// Wrapped domain sentinels route through their generic wrap.
|
||||
{"ErrSelfApproval → 403 (via ErrForbidden)", service.ErrSelfApproval, http.StatusForbidden},
|
||||
{"ErrAgentIsSentinel → 403 (via ErrForbidden)", service.ErrAgentIsSentinel, http.StatusForbidden},
|
||||
{"ErrBlockedByDependencies → 409 (via ErrConflict)", service.ErrBlockedByDependencies, http.StatusConflict},
|
||||
{"ErrForceReasonRequired → 400 (via ErrValidation)", service.ErrForceReasonRequired, http.StatusBadRequest},
|
||||
{"ErrAgentNotFound → 400 (via ErrValidation)", service.ErrAgentNotFound, http.StatusBadRequest},
|
||||
|
||||
// ErrAgentRetired is standalone — 410 Gone must fire before any
|
||||
// generic dispatch. This locks in the semantic-distinct short-circuit.
|
||||
{"ErrAgentRetired → 410", service.ErrAgentRetired, http.StatusGone},
|
||||
|
||||
// Repository-layer sentinels (G-1 + M-1).
|
||||
{"repo.ErrNotFound → 404", repository.ErrNotFound, http.StatusNotFound},
|
||||
{"wrapped repo.ErrNotFound → 404",
|
||||
fmt.Errorf("%w: renewal policy rp-foo", repository.ErrNotFound),
|
||||
http.StatusNotFound},
|
||||
{"repo.ErrRenewalPolicyDuplicateName → 409", repository.ErrRenewalPolicyDuplicateName, http.StatusConflict},
|
||||
{"repo.ErrRenewalPolicyInUse → 409", repository.ErrRenewalPolicyInUse, http.StatusConflict},
|
||||
|
||||
// Wrapped errors with additional context survive the dispatch.
|
||||
{"wrapped ErrNotFound with context → 404",
|
||||
fmt.Errorf("lookup failed: %w", service.ErrNotFound),
|
||||
http.StatusNotFound},
|
||||
{"wrapped ErrSelfApproval with context → 403",
|
||||
fmt.Errorf("approval gate: %w", service.ErrSelfApproval),
|
||||
http.StatusForbidden},
|
||||
|
||||
// Driver-level fallback: raw *pq.Error escaping repo layer.
|
||||
{"*pq.Error 23503 → 409", &pq.Error{Code: "23503"}, http.StatusConflict},
|
||||
{"*pq.Error 23505 → 409", &pq.Error{Code: "23505"}, http.StatusConflict},
|
||||
{"*pq.Error 08006 → 500", &pq.Error{Code: "08006"}, http.StatusInternalServerError},
|
||||
|
||||
// Default path.
|
||||
{"unknown error → 500", errors.New("something arbitrary"), http.StatusInternalServerError},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got := errToStatus(c.err)
|
||||
if got != c.want {
|
||||
t.Errorf("errToStatus(%v) = %d, want %d", c.err, got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestErrToStatus_AgentRetiredShortCircuit is a dedicated regression guard
|
||||
// for the most fragile dispatch invariant: ErrAgentRetired's 410 Gone must
|
||||
// fire FIRST. If a future commit wraps it under ErrForbidden (e.g., to
|
||||
// include it in a generic "agent operations forbidden" bucket), this test
|
||||
// goes red and the agent-binary shutdown at cmd/agent/main.go:1291 would
|
||||
// silently stop triggering.
|
||||
func TestErrToStatus_AgentRetiredShortCircuit(t *testing.T) {
|
||||
if got := errToStatus(service.ErrAgentRetired); got != http.StatusGone {
|
||||
t.Fatalf("ErrAgentRetired → %d, want 410 Gone (short-circuit must fire before any generic dispatch)", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestErrToStatus_NotFoundBeforeForbidden locks the RFC 7235 existence-
|
||||
// hiding dispatch order. If someone were to reorder the switch arms to put
|
||||
// ErrForbidden first, an authorization failure on a nonexistent resource
|
||||
// would leak existence via a 403 instead of masking it with a 404.
|
||||
func TestErrToStatus_NotFoundBeforeForbidden(t *testing.T) {
|
||||
// A hypothetical wrapping where both would match — contrived but the
|
||||
// ordering guarantee is what we're testing.
|
||||
both := fmt.Errorf("%w: layered with %w", service.ErrNotFound, service.ErrForbidden)
|
||||
if got := errToStatus(both); got != http.StatusNotFound {
|
||||
t.Errorf("dual-wrapped err → %d, want 404 (ErrNotFound must dispatch before ErrForbidden)", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestErrToStatus_ForbiddenBeforeValidation guards the M-003 self-approval
|
||||
// gate against a future call site that double-wraps ErrSelfApproval under
|
||||
// ErrValidation (intentionally or accidentally). The dispatch must pick
|
||||
// 403, not 400.
|
||||
func TestErrToStatus_ForbiddenBeforeValidation(t *testing.T) {
|
||||
doubled := fmt.Errorf("%w: %w", service.ErrSelfApproval, service.ErrValidation)
|
||||
if got := errToStatus(doubled); got != http.StatusForbidden {
|
||||
t.Errorf("double-wrapped err → %d, want 403 (ErrForbidden must dispatch before ErrValidation — M-003 gate)", got)
|
||||
}
|
||||
}
|
||||
+8
-134
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/pkcs7"
|
||||
)
|
||||
|
||||
// ESTService defines the service interface for EST enrollment operations.
|
||||
@@ -67,7 +68,7 @@ func (h ESTHandler) CACerts(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// Parse PEM to DER for PKCS#7 encoding
|
||||
derCerts, err := pemToDERChain(caCertPEM)
|
||||
derCerts, err := pkcs7.PEMToDERChain(caCertPEM)
|
||||
if err != nil {
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to encode CA certificates", requestID)
|
||||
@@ -75,7 +76,7 @@ func (h ESTHandler) CACerts(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// Build a simple PKCS#7 SignedData (certs-only, degenerate) structure
|
||||
pkcs7Data, err := buildCertsOnlyPKCS7(derCerts)
|
||||
pkcs7Data, err := pkcs7.BuildCertsOnlyPKCS7(derCerts)
|
||||
if err != nil {
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to build PKCS#7 response", requestID)
|
||||
@@ -237,7 +238,7 @@ func (h ESTHandler) writeCertResponse(w http.ResponseWriter, result *domain.ESTE
|
||||
var derCerts [][]byte
|
||||
|
||||
// Add the issued certificate
|
||||
certDER, err := pemToDERChain(result.CertPEM)
|
||||
certDER, err := pkcs7.PEMToDERChain(result.CertPEM)
|
||||
if err != nil || len(certDER) == 0 {
|
||||
http.Error(w, "Failed to encode certificate", http.StatusInternalServerError)
|
||||
return
|
||||
@@ -246,14 +247,14 @@ func (h ESTHandler) writeCertResponse(w http.ResponseWriter, result *domain.ESTE
|
||||
|
||||
// Add the CA chain if present
|
||||
if result.ChainPEM != "" {
|
||||
chainDER, err := pemToDERChain(result.ChainPEM)
|
||||
chainDER, err := pkcs7.PEMToDERChain(result.ChainPEM)
|
||||
if err == nil {
|
||||
derCerts = append(derCerts, chainDER...)
|
||||
}
|
||||
}
|
||||
|
||||
// Build PKCS#7 certs-only
|
||||
pkcs7Data, err := buildCertsOnlyPKCS7(derCerts)
|
||||
pkcs7Data, err := pkcs7.BuildCertsOnlyPKCS7(derCerts)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to build PKCS#7 response", http.StatusInternalServerError)
|
||||
return
|
||||
@@ -273,132 +274,5 @@ func (h ESTHandler) writeCertResponse(w http.ResponseWriter, result *domain.ESTE
|
||||
}
|
||||
}
|
||||
|
||||
// pemToDERChain converts PEM-encoded certificates to a slice of DER-encoded certificates.
|
||||
func pemToDERChain(pemData string) ([][]byte, error) {
|
||||
var derCerts [][]byte
|
||||
rest := []byte(pemData)
|
||||
for {
|
||||
var block *pem.Block
|
||||
block, rest = pem.Decode(rest)
|
||||
if block == nil {
|
||||
break
|
||||
}
|
||||
if block.Type == "CERTIFICATE" {
|
||||
derCerts = append(derCerts, block.Bytes)
|
||||
}
|
||||
}
|
||||
if len(derCerts) == 0 {
|
||||
return nil, fmt.Errorf("no certificates found in PEM data")
|
||||
}
|
||||
return derCerts, nil
|
||||
}
|
||||
|
||||
// buildCertsOnlyPKCS7 creates a degenerate PKCS#7 SignedData structure containing only certificates.
|
||||
// This is the "certs-only" format specified in RFC 7030 Section 4.1.3 for /cacerts responses
|
||||
// and enrollment responses.
|
||||
//
|
||||
// ASN.1 structure (simplified):
|
||||
//
|
||||
// ContentInfo {
|
||||
// contentType: signedData (1.2.840.113549.1.7.2)
|
||||
// content: SignedData {
|
||||
// version: 1
|
||||
// digestAlgorithms: {} (empty)
|
||||
// encapContentInfo: { contentType: data (1.2.840.113549.1.7.1) }
|
||||
// certificates: [cert1, cert2, ...]
|
||||
// signerInfos: {} (empty)
|
||||
// }
|
||||
// }
|
||||
func buildCertsOnlyPKCS7(derCerts [][]byte) ([]byte, error) {
|
||||
// We build the ASN.1 manually to avoid pulling in a PKCS#7 library.
|
||||
// This is a well-defined, static structure — no signing needed.
|
||||
|
||||
// OID for signedData: 1.2.840.113549.1.7.2
|
||||
oidSignedData := []byte{0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x07, 0x02}
|
||||
// OID for data: 1.2.840.113549.1.7.1
|
||||
oidData := []byte{0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x07, 0x01}
|
||||
|
||||
// Build certificates [0] IMPLICIT SET OF Certificate
|
||||
var certsContent []byte
|
||||
for _, cert := range derCerts {
|
||||
certsContent = append(certsContent, cert...)
|
||||
}
|
||||
certsField := asn1WrapImplicit(0, certsContent)
|
||||
|
||||
// Build encapContentInfo: SEQUENCE { OID data }
|
||||
encapContentInfo := asn1WrapSequence(oidData)
|
||||
|
||||
// Build digestAlgorithms: SET {} (empty)
|
||||
digestAlgorithms := asn1WrapSet(nil)
|
||||
|
||||
// Build signerInfos: SET {} (empty)
|
||||
signerInfos := asn1WrapSet(nil)
|
||||
|
||||
// Version: INTEGER 1
|
||||
version := []byte{0x02, 0x01, 0x01}
|
||||
|
||||
// Build SignedData SEQUENCE
|
||||
var signedDataContent []byte
|
||||
signedDataContent = append(signedDataContent, version...)
|
||||
signedDataContent = append(signedDataContent, digestAlgorithms...)
|
||||
signedDataContent = append(signedDataContent, encapContentInfo...)
|
||||
signedDataContent = append(signedDataContent, certsField...)
|
||||
signedDataContent = append(signedDataContent, signerInfos...)
|
||||
signedData := asn1WrapSequence(signedDataContent)
|
||||
|
||||
// Wrap in [0] EXPLICIT for ContentInfo.content
|
||||
contentField := asn1WrapExplicit(0, signedData)
|
||||
|
||||
// Build ContentInfo SEQUENCE
|
||||
var contentInfoContent []byte
|
||||
contentInfoContent = append(contentInfoContent, oidSignedData...)
|
||||
contentInfoContent = append(contentInfoContent, contentField...)
|
||||
contentInfo := asn1WrapSequence(contentInfoContent)
|
||||
|
||||
return contentInfo, nil
|
||||
}
|
||||
|
||||
// asn1WrapSequence wraps content in an ASN.1 SEQUENCE tag (0x30).
|
||||
func asn1WrapSequence(content []byte) []byte {
|
||||
return asn1Wrap(0x30, content)
|
||||
}
|
||||
|
||||
// asn1WrapSet wraps content in an ASN.1 SET tag (0x31).
|
||||
func asn1WrapSet(content []byte) []byte {
|
||||
return asn1Wrap(0x31, content)
|
||||
}
|
||||
|
||||
// asn1WrapExplicit wraps content in an ASN.1 context-specific EXPLICIT tag.
|
||||
func asn1WrapExplicit(tag int, content []byte) []byte {
|
||||
return asn1Wrap(byte(0xa0|tag), content)
|
||||
}
|
||||
|
||||
// asn1WrapImplicit wraps content in an ASN.1 context-specific IMPLICIT CONSTRUCTED tag.
|
||||
func asn1WrapImplicit(tag int, content []byte) []byte {
|
||||
return asn1Wrap(byte(0xa0|tag), content)
|
||||
}
|
||||
|
||||
// asn1Wrap wraps content with an ASN.1 tag and length.
|
||||
func asn1Wrap(tag byte, content []byte) []byte {
|
||||
length := len(content)
|
||||
var result []byte
|
||||
result = append(result, tag)
|
||||
result = append(result, asn1EncodeLength(length)...)
|
||||
result = append(result, content...)
|
||||
return result
|
||||
}
|
||||
|
||||
// asn1EncodeLength encodes a length in ASN.1 DER format.
|
||||
func asn1EncodeLength(length int) []byte {
|
||||
if length < 0x80 {
|
||||
return []byte{byte(length)}
|
||||
}
|
||||
// Long form
|
||||
var lengthBytes []byte
|
||||
l := length
|
||||
for l > 0 {
|
||||
lengthBytes = append([]byte{byte(l & 0xff)}, lengthBytes...)
|
||||
l >>= 8
|
||||
}
|
||||
return append([]byte{byte(0x80 | len(lengthBytes))}, lengthBytes...)
|
||||
}
|
||||
// NOTE: PKCS#7 helpers (BuildCertsOnlyPKCS7, PEMToDERChain, ASN.1 wrappers)
|
||||
// are in the shared internal/pkcs7 package, used by both EST and SCEP handlers.
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/pkcs7"
|
||||
)
|
||||
|
||||
// mockESTService implements ESTService for testing.
|
||||
@@ -338,12 +339,12 @@ func TestESTCSRAttrs_MethodNotAllowed(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildCertsOnlyPKCS7(t *testing.T) {
|
||||
// Test with a dummy DER certificate
|
||||
func TestBuildCertsOnlyPKCS7_ViaSharedPackage(t *testing.T) {
|
||||
// Test with a dummy DER certificate via shared pkcs7 package
|
||||
dummyCert := []byte{0x30, 0x82, 0x01, 0x00} // minimal ASN.1 SEQUENCE
|
||||
result, err := buildCertsOnlyPKCS7([][]byte{dummyCert})
|
||||
result, err := pkcs7.BuildCertsOnlyPKCS7([][]byte{dummyCert})
|
||||
if err != nil {
|
||||
t.Fatalf("buildCertsOnlyPKCS7 failed: %v", err)
|
||||
t.Fatalf("BuildCertsOnlyPKCS7 failed: %v", err)
|
||||
}
|
||||
if len(result) == 0 {
|
||||
t.Error("expected non-empty PKCS#7 output")
|
||||
@@ -354,49 +355,24 @@ func TestBuildCertsOnlyPKCS7(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPemToDERChain(t *testing.T) {
|
||||
func TestPemToDERChain_ViaSharedPackage(t *testing.T) {
|
||||
pemData := generateTestCertPEM(t)
|
||||
certs, err := pemToDERChain(pemData)
|
||||
certs, err := pkcs7.PEMToDERChain(pemData)
|
||||
if err != nil {
|
||||
t.Fatalf("pemToDERChain failed: %v", err)
|
||||
t.Fatalf("PEMToDERChain failed: %v", err)
|
||||
}
|
||||
if len(certs) != 1 {
|
||||
t.Errorf("expected 1 cert, got %d", len(certs))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPemToDERChain_NoCerts(t *testing.T) {
|
||||
_, err := pemToDERChain("not a PEM")
|
||||
func TestPemToDERChain_NoCerts_ViaSharedPackage(t *testing.T) {
|
||||
_, err := pkcs7.PEMToDERChain("not a PEM")
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid PEM")
|
||||
}
|
||||
}
|
||||
|
||||
func TestASN1EncodeLength(t *testing.T) {
|
||||
tests := []struct {
|
||||
length int
|
||||
expected []byte
|
||||
}{
|
||||
{0, []byte{0x00}},
|
||||
{1, []byte{0x01}},
|
||||
{127, []byte{0x7f}},
|
||||
{128, []byte{0x81, 0x80}},
|
||||
{256, []byte{0x82, 0x01, 0x00}},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
result := asn1EncodeLength(tt.length)
|
||||
if len(result) != len(tt.expected) {
|
||||
t.Errorf("asn1EncodeLength(%d): expected %d bytes, got %d", tt.length, len(tt.expected), len(result))
|
||||
continue
|
||||
}
|
||||
for i := range result {
|
||||
if result[i] != tt.expected[i] {
|
||||
t.Errorf("asn1EncodeLength(%d): byte %d: expected 0x%02x, got 0x%02x", tt.length, i, tt.expected[i], result[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestESTCSRAttrs_ServiceError(t *testing.T) {
|
||||
svc := &mockESTService{
|
||||
CSRAttrsErr: errors.New("service error"),
|
||||
|
||||
@@ -46,12 +46,26 @@ func (h ExportHandler) ExportPEM(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
result, err := h.svc.ExportPEM(r.Context(), id)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID)
|
||||
return
|
||||
// M-1 (P2): dispatch routes through errToStatus. Pre-M-1 this branch
|
||||
// classified 404 via strings.Contains(err.Error(), "not found"), which
|
||||
// gave false positives on any error whose rendered text happened to
|
||||
// contain "not found" — notably a transient DB failure when the service
|
||||
// layer wrapped every certRepo.Get error with "certificate not found".
|
||||
// Post-M-1: service/export.go now wraps with "failed to get certificate"
|
||||
// and only the genuine sql.ErrNoRows path surfaces repository.ErrNotFound
|
||||
// through the wrap chain, so errors.Is(err, repository.ErrNotFound) picks
|
||||
// up the real 404s and everything else — including transient DB errors —
|
||||
// correctly surfaces as 500 with server-side slog.Error capture (F-002
|
||||
// redacted-500 pattern preserved).
|
||||
status := errToStatus(err)
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("ExportPEM failed", "cert_id", id, "error", err.Error())
|
||||
}
|
||||
slog.Error("ExportPEM failed", "cert_id", id, "error", err.Error())
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to export certificate", requestID)
|
||||
msg := "Failed to export certificate"
|
||||
if status == http.StatusNotFound {
|
||||
msg = "Certificate not found"
|
||||
}
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -94,16 +108,32 @@ func (h ExportHandler) ExportPKCS12(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
pfxData, err := h.svc.ExportPKCS12(r.Context(), id, req.Password)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID)
|
||||
return
|
||||
// M-1 (P2): dispatch routes through errToStatus. The pre-M-1 3-term
|
||||
// substring net (`"not found"|"cannot be parsed"|"no certificates
|
||||
// found"`) is replaced with sentinel dispatch:
|
||||
// - repository.ErrNotFound (from certificate.go Get/GetLatestVersion
|
||||
// sql.ErrNoRows wrap) → 404
|
||||
// - service.ErrUnprocessable (from service/export.go ExportPKCS12's
|
||||
// parsePEMCertificates-failure and empty-chain wraps) → 422 —
|
||||
// semantically correct because the caller's request is fine; our
|
||||
// stored PEM chain is what cannot be processed
|
||||
// - everything else → 500 with slog.Error capture (F-002 redacted-500
|
||||
// pattern preserved)
|
||||
// A transient DB failure that pre-M-1 would have been swept into the
|
||||
// 404 substring branch (because the service wrapped every certRepo.Get
|
||||
// error with "certificate not found") now correctly surfaces as 500.
|
||||
status := errToStatus(err)
|
||||
if status == http.StatusInternalServerError {
|
||||
slog.Error("ExportPKCS12 failed", "cert_id", id, "error", err.Error())
|
||||
}
|
||||
if strings.Contains(err.Error(), "cannot be parsed") || strings.Contains(err.Error(), "no certificates found") {
|
||||
ErrorWithRequestID(w, http.StatusUnprocessableEntity, "Certificate data cannot be parsed as X.509", requestID)
|
||||
return
|
||||
msg := "Failed to export PKCS#12"
|
||||
switch status {
|
||||
case http.StatusNotFound:
|
||||
msg = "Certificate not found"
|
||||
case http.StatusUnprocessableEntity:
|
||||
msg = "Certificate data cannot be parsed as X.509"
|
||||
}
|
||||
slog.Error("ExportPKCS12 failed", "cert_id", id, "error", err.Error())
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to export PKCS#12", requestID)
|
||||
ErrorWithRequestID(w, status, msg, requestID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -108,9 +108,17 @@ func TestExportPEM_Download(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestExportPEM_NotFound(t *testing.T) {
|
||||
// M-1 (P2): wrap with service.ErrNotFound via %w so the handler's
|
||||
// errToStatus choke point dispatches to 404 via errors.Is. Pre-M-1 this
|
||||
// test used a raw `fmt.Errorf("certificate not found")` string and relied
|
||||
// on the handler's strings.Contains(err.Error(), "not found") classifier
|
||||
// — which was the same mechanism that silently misclassified transient DB
|
||||
// failures whose text happened to include "not found" (see docblock on
|
||||
// ExportPEM handler). Pinning the sentinel contract makes this test
|
||||
// regression-proof against wrap-text changes.
|
||||
mockSvc := &MockExportService{
|
||||
ExportPEMFn: func(_ context.Context, _ string) (*service.ExportPEMResult, error) {
|
||||
return nil, fmt.Errorf("certificate not found")
|
||||
return nil, fmt.Errorf("%w: certificate", service.ErrNotFound)
|
||||
},
|
||||
}
|
||||
h := NewExportHandler(mockSvc)
|
||||
@@ -214,9 +222,11 @@ func TestExportPKCS12_EmptyPassword(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestExportPKCS12_NotFound(t *testing.T) {
|
||||
// M-1 (P2): same sentinel migration as TestExportPEM_NotFound — see
|
||||
// rationale there.
|
||||
mockSvc := &MockExportService{
|
||||
ExportPKCS12Fn: func(_ context.Context, _ string, _ string) ([]byte, error) {
|
||||
return nil, fmt.Errorf("certificate not found")
|
||||
return nil, fmt.Errorf("%w: certificate", service.ErrNotFound)
|
||||
},
|
||||
}
|
||||
h := NewExportHandler(mockSvc)
|
||||
@@ -231,6 +241,31 @@ func TestExportPKCS12_NotFound(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestExportPKCS12_Unprocessable pins the M-1 (P2) 422 contract: when the
|
||||
// service layer wraps a parse failure with service.ErrUnprocessable, the
|
||||
// handler's errToStatus choke point must dispatch to 422 Unprocessable
|
||||
// Entity. Pre-M-1 this was classified via a 2-term substring net
|
||||
// (`"cannot be parsed"|"no certificates found"`) at export.go:101, which
|
||||
// would have been silently broken by a message reword in service/export.go.
|
||||
// The new sentinel makes the dispatch survive message rewording.
|
||||
func TestExportPKCS12_Unprocessable(t *testing.T) {
|
||||
mockSvc := &MockExportService{
|
||||
ExportPKCS12Fn: func(_ context.Context, _ string, _ string) ([]byte, error) {
|
||||
return nil, fmt.Errorf("%w: certificate data cannot be parsed as X.509: asn1 decode error", service.ErrUnprocessable)
|
||||
},
|
||||
}
|
||||
h := NewExportHandler(mockSvc)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/mc-test-1/export/pkcs12", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
h.ExportPKCS12(w, req)
|
||||
|
||||
if w.Code != http.StatusUnprocessableEntity {
|
||||
t.Fatalf("expected 422, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExportPKCS12_ServiceError(t *testing.T) {
|
||||
mockSvc := &MockExportService{
|
||||
ExportPKCS12Fn: func(_ context.Context, _ string, _ string) ([]byte, error) {
|
||||
|
||||
@@ -2,6 +2,8 @@ package handler
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
)
|
||||
|
||||
// HealthHandler handles health and readiness check endpoints.
|
||||
@@ -55,9 +57,23 @@ func (h HealthHandler) AuthInfo(w http.ResponseWriter, r *http.Request) {
|
||||
JSON(w, http.StatusOK, response)
|
||||
}
|
||||
|
||||
// AuthCheck returns 200 if the request has valid auth credentials.
|
||||
// The auth middleware runs before this handler, so reaching here means auth passed.
|
||||
// AuthCheck returns 200 if the request has valid auth credentials, along with
|
||||
// the resolved named-key identity and admin flag so the GUI can gate
|
||||
// admin-only affordances (e.g., the bulk-revoke button).
|
||||
//
|
||||
// M-003 (Phase B.4): surface the admin flag so the frontend hides affordances
|
||||
// that would otherwise 403 at the server. This is a hint for UX only —
|
||||
// authorization remains enforced at the handler layer (bulk_revocation.go).
|
||||
//
|
||||
// The auth middleware runs before this handler, so reaching here means auth
|
||||
// passed. `user` falls back to an empty string when auth is disabled
|
||||
// (CERTCTL_AUTH_TYPE=none).
|
||||
// GET /api/v1/auth/check
|
||||
func (h HealthHandler) AuthCheck(w http.ResponseWriter, r *http.Request) {
|
||||
JSON(w, http.StatusOK, map[string]string{"status": "authenticated"})
|
||||
response := map[string]interface{}{
|
||||
"status": "authenticated",
|
||||
"user": middleware.GetUser(r.Context()),
|
||||
"admin": middleware.IsAdmin(r.Context()),
|
||||
}
|
||||
JSON(w, http.StatusOK, response)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,308 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
)
|
||||
|
||||
// HealthCheckServicer defines the interface used by the health check handler.
|
||||
type HealthCheckServicer interface {
|
||||
Create(ctx context.Context, check *domain.EndpointHealthCheck) error
|
||||
Get(ctx context.Context, id string) (*domain.EndpointHealthCheck, error)
|
||||
Update(ctx context.Context, check *domain.EndpointHealthCheck) error
|
||||
Delete(ctx context.Context, id string) error
|
||||
List(ctx context.Context, filter *repository.HealthCheckFilter) ([]*domain.EndpointHealthCheck, int, error)
|
||||
GetHistory(ctx context.Context, healthCheckID string, limit int) ([]*domain.HealthHistoryEntry, error)
|
||||
AcknowledgeIncident(ctx context.Context, id string, actor string) error
|
||||
GetSummary(ctx context.Context) (*domain.HealthCheckSummary, error)
|
||||
}
|
||||
|
||||
// HealthCheckHandler handles HTTP requests for TLS health monitoring.
|
||||
type HealthCheckHandler struct {
|
||||
service HealthCheckServicer
|
||||
}
|
||||
|
||||
// NewHealthCheckHandler creates a new health check handler.
|
||||
func NewHealthCheckHandler(service HealthCheckServicer) *HealthCheckHandler {
|
||||
return &HealthCheckHandler{service: service}
|
||||
}
|
||||
|
||||
// ListHealthChecks handles GET /api/v1/health-checks
|
||||
func (h *HealthCheckHandler) ListHealthChecks(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
query := r.URL.Query()
|
||||
status := query.Get("status")
|
||||
certificateID := query.Get("certificate_id")
|
||||
networkScanTargetID := query.Get("network_scan_target_id")
|
||||
enabledStr := query.Get("enabled")
|
||||
page := parseIntDefault(query.Get("page"), 1)
|
||||
perPage := parseIntDefault(query.Get("per_page"), 50)
|
||||
if perPage > 500 {
|
||||
perPage = 50
|
||||
}
|
||||
|
||||
// Parse enabled flag if provided
|
||||
var enabledFilter *bool
|
||||
if enabledStr != "" {
|
||||
enabled := enabledStr == "true"
|
||||
enabledFilter = &enabled
|
||||
}
|
||||
|
||||
filter := &repository.HealthCheckFilter{
|
||||
Status: status,
|
||||
CertificateID: certificateID,
|
||||
NetworkScanTargetID: networkScanTargetID,
|
||||
Enabled: enabledFilter,
|
||||
Page: page,
|
||||
PerPage: perPage,
|
||||
}
|
||||
|
||||
checks, total, err := h.service.List(r.Context(), filter)
|
||||
if err != nil {
|
||||
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to list health checks: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
if checks == nil {
|
||||
checks = make([]*domain.EndpointHealthCheck, 0)
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, PagedResponse{
|
||||
Data: checks,
|
||||
Total: int64(total),
|
||||
Page: page,
|
||||
PerPage: perPage,
|
||||
})
|
||||
}
|
||||
|
||||
// GetHealthCheck handles GET /api/v1/health-checks/{id}
|
||||
func (h *HealthCheckHandler) GetHealthCheck(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
id := r.PathValue("id")
|
||||
if id == "" {
|
||||
Error(w, http.StatusBadRequest, "health check ID is required")
|
||||
return
|
||||
}
|
||||
|
||||
check, err := h.service.Get(r.Context(), id)
|
||||
if err != nil {
|
||||
Error(w, http.StatusNotFound, fmt.Sprintf("health check not found: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, check)
|
||||
}
|
||||
|
||||
// CreateHealthCheck handles POST /api/v1/health-checks
|
||||
func (h *HealthCheckHandler) CreateHealthCheck(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
var check domain.EndpointHealthCheck
|
||||
if err := json.NewDecoder(r.Body).Decode(&check); err != nil {
|
||||
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
if check.Endpoint == "" {
|
||||
Error(w, http.StatusBadRequest, "endpoint is required")
|
||||
return
|
||||
}
|
||||
|
||||
// Set defaults
|
||||
if check.CheckIntervalSecs <= 0 {
|
||||
check.CheckIntervalSecs = 300
|
||||
}
|
||||
if check.DegradedThreshold <= 0 {
|
||||
check.DegradedThreshold = 2
|
||||
}
|
||||
if check.DownThreshold <= 0 {
|
||||
check.DownThreshold = 5
|
||||
}
|
||||
if check.Status == "" {
|
||||
check.Status = domain.HealthStatusUnknown
|
||||
}
|
||||
|
||||
if err := h.service.Create(r.Context(), &check); err != nil {
|
||||
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to create health check: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusCreated, check)
|
||||
}
|
||||
|
||||
// UpdateHealthCheck handles PUT /api/v1/health-checks/{id}
|
||||
func (h *HealthCheckHandler) UpdateHealthCheck(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPut {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
id := r.PathValue("id")
|
||||
if id == "" {
|
||||
Error(w, http.StatusBadRequest, "health check ID is required")
|
||||
return
|
||||
}
|
||||
|
||||
// Get existing check
|
||||
existing, err := h.service.Get(r.Context(), id)
|
||||
if err != nil {
|
||||
Error(w, http.StatusNotFound, fmt.Sprintf("health check not found: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
var updates domain.EndpointHealthCheck
|
||||
if err := json.NewDecoder(r.Body).Decode(&updates); err != nil {
|
||||
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// Merge updates (only update provided fields)
|
||||
if updates.Endpoint != "" {
|
||||
existing.Endpoint = updates.Endpoint
|
||||
}
|
||||
if updates.ExpectedFingerprint != "" {
|
||||
existing.ExpectedFingerprint = updates.ExpectedFingerprint
|
||||
}
|
||||
if updates.CheckIntervalSecs > 0 {
|
||||
existing.CheckIntervalSecs = updates.CheckIntervalSecs
|
||||
}
|
||||
if updates.DegradedThreshold > 0 {
|
||||
existing.DegradedThreshold = updates.DegradedThreshold
|
||||
}
|
||||
if updates.DownThreshold > 0 {
|
||||
existing.DownThreshold = updates.DownThreshold
|
||||
}
|
||||
existing.Enabled = updates.Enabled
|
||||
|
||||
if err := h.service.Update(r.Context(), existing); err != nil {
|
||||
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to update health check: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, existing)
|
||||
}
|
||||
|
||||
// DeleteHealthCheck handles DELETE /api/v1/health-checks/{id}
|
||||
func (h *HealthCheckHandler) DeleteHealthCheck(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodDelete {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
id := r.PathValue("id")
|
||||
if id == "" {
|
||||
Error(w, http.StatusBadRequest, "health check ID is required")
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.service.Delete(r.Context(), id); err != nil {
|
||||
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to delete health check: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
|
||||
// GetHealthCheckHistory handles GET /api/v1/health-checks/{id}/history
|
||||
func (h *HealthCheckHandler) GetHealthCheckHistory(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
id := r.PathValue("id")
|
||||
if id == "" {
|
||||
Error(w, http.StatusBadRequest, "health check ID is required")
|
||||
return
|
||||
}
|
||||
|
||||
limitStr := r.URL.Query().Get("limit")
|
||||
limit := 100
|
||||
if limitStr != "" {
|
||||
if l, err := strconv.Atoi(limitStr); err == nil && l > 0 {
|
||||
limit = l
|
||||
}
|
||||
}
|
||||
if limit > 1000 {
|
||||
limit = 1000
|
||||
}
|
||||
|
||||
history, err := h.service.GetHistory(r.Context(), id, limit)
|
||||
if err != nil {
|
||||
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to get health check history: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
if history == nil {
|
||||
history = make([]*domain.HealthHistoryEntry, 0)
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, history)
|
||||
}
|
||||
|
||||
// AcknowledgeHealthCheck handles POST /api/v1/health-checks/{id}/acknowledge
|
||||
func (h *HealthCheckHandler) AcknowledgeHealthCheck(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
id := r.PathValue("id")
|
||||
if id == "" {
|
||||
Error(w, http.StatusBadRequest, "health check ID is required")
|
||||
return
|
||||
}
|
||||
|
||||
var req struct {
|
||||
Actor string `json:"actor,omitempty"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
Error(w, http.StatusBadRequest, fmt.Sprintf("invalid request body: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
if req.Actor == "" {
|
||||
req.Actor = "unknown"
|
||||
}
|
||||
|
||||
if err := h.service.AcknowledgeIncident(r.Context(), id, req.Actor); err != nil {
|
||||
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to acknowledge health check: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
|
||||
// GetHealthCheckSummary handles GET /api/v1/health-checks/summary
|
||||
// This route must be registered BEFORE the /{id} routes
|
||||
func (h *HealthCheckHandler) GetHealthCheckSummary(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
summary, err := h.service.GetSummary(r.Context())
|
||||
if err != nil {
|
||||
Error(w, http.StatusInternalServerError, fmt.Sprintf("failed to get health check summary: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, summary)
|
||||
}
|
||||
@@ -0,0 +1,305 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
)
|
||||
|
||||
// mockHealthCheckSvc implements HealthCheckServicer for testing.
|
||||
type mockHealthCheckSvc struct {
|
||||
createErr error
|
||||
getErr error
|
||||
updateErr error
|
||||
deleteErr error
|
||||
listErr error
|
||||
getHistoryErr error
|
||||
acknowledgeErr error
|
||||
getSummaryErr error
|
||||
checks map[string]*domain.EndpointHealthCheck
|
||||
summary *domain.HealthCheckSummary
|
||||
}
|
||||
|
||||
func newMockHealthCheckSvc() *mockHealthCheckSvc {
|
||||
return &mockHealthCheckSvc{
|
||||
checks: make(map[string]*domain.EndpointHealthCheck),
|
||||
summary: &domain.HealthCheckSummary{
|
||||
Healthy: 1,
|
||||
Degraded: 0,
|
||||
Down: 0,
|
||||
CertMismatch: 0,
|
||||
Unknown: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (m *mockHealthCheckSvc) Create(ctx context.Context, check *domain.EndpointHealthCheck) error {
|
||||
if m.createErr != nil {
|
||||
return m.createErr
|
||||
}
|
||||
check.ID = "hc-created-1"
|
||||
m.checks[check.ID] = check
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockHealthCheckSvc) Get(ctx context.Context, id string) (*domain.EndpointHealthCheck, error) {
|
||||
if m.getErr != nil {
|
||||
return nil, m.getErr
|
||||
}
|
||||
if check, ok := m.checks[id]; ok {
|
||||
return check, nil
|
||||
}
|
||||
return nil, errors.New("not found")
|
||||
}
|
||||
|
||||
func (m *mockHealthCheckSvc) Update(ctx context.Context, check *domain.EndpointHealthCheck) error {
|
||||
if m.updateErr != nil {
|
||||
return m.updateErr
|
||||
}
|
||||
m.checks[check.ID] = check
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockHealthCheckSvc) Delete(ctx context.Context, id string) error {
|
||||
if m.deleteErr != nil {
|
||||
return m.deleteErr
|
||||
}
|
||||
delete(m.checks, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockHealthCheckSvc) List(ctx context.Context, filter *repository.HealthCheckFilter) ([]*domain.EndpointHealthCheck, int, error) {
|
||||
if m.listErr != nil {
|
||||
return nil, 0, m.listErr
|
||||
}
|
||||
checks := make([]*domain.EndpointHealthCheck, 0, len(m.checks))
|
||||
for _, check := range m.checks {
|
||||
checks = append(checks, check)
|
||||
}
|
||||
return checks, len(checks), nil
|
||||
}
|
||||
|
||||
func (m *mockHealthCheckSvc) GetHistory(ctx context.Context, healthCheckID string, limit int) ([]*domain.HealthHistoryEntry, error) {
|
||||
if m.getHistoryErr != nil {
|
||||
return nil, m.getHistoryErr
|
||||
}
|
||||
return make([]*domain.HealthHistoryEntry, 0), nil
|
||||
}
|
||||
|
||||
func (m *mockHealthCheckSvc) AcknowledgeIncident(ctx context.Context, id string, actor string) error {
|
||||
if m.acknowledgeErr != nil {
|
||||
return m.acknowledgeErr
|
||||
}
|
||||
if check, ok := m.checks[id]; ok {
|
||||
check.Acknowledged = true
|
||||
check.AcknowledgedBy = actor
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockHealthCheckSvc) GetSummary(ctx context.Context) (*domain.HealthCheckSummary, error) {
|
||||
if m.getSummaryErr != nil {
|
||||
return nil, m.getSummaryErr
|
||||
}
|
||||
return m.summary, nil
|
||||
}
|
||||
|
||||
// Tests
|
||||
|
||||
func TestListHealthChecks_Success(t *testing.T) {
|
||||
svc := newMockHealthCheckSvc()
|
||||
svc.checks["hc-1"] = &domain.EndpointHealthCheck{
|
||||
ID: "hc-1",
|
||||
Endpoint: "api.example.com:443",
|
||||
Status: domain.HealthStatusHealthy,
|
||||
}
|
||||
handler := NewHealthCheckHandler(svc)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/v1/health-checks", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.ListHealthChecks(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("Expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
var resp PagedResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("Failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if resp.Total != 1 {
|
||||
t.Errorf("Expected 1 health check, got %d", resp.Total)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListHealthChecks_MethodNotAllowed(t *testing.T) {
|
||||
handler := NewHealthCheckHandler(newMockHealthCheckSvc())
|
||||
|
||||
req := httptest.NewRequest("POST", "/api/v1/health-checks", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.ListHealthChecks(w, req)
|
||||
|
||||
if w.Code != http.StatusMethodNotAllowed {
|
||||
t.Errorf("Expected status 405, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetHealthCheck_Success(t *testing.T) {
|
||||
svc := newMockHealthCheckSvc()
|
||||
check := &domain.EndpointHealthCheck{
|
||||
ID: "hc-1",
|
||||
Endpoint: "api.example.com:443",
|
||||
Status: domain.HealthStatusHealthy,
|
||||
}
|
||||
svc.checks["hc-1"] = check
|
||||
handler := NewHealthCheckHandler(svc)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/v1/health-checks/hc-1", nil)
|
||||
req.SetPathValue("id", "hc-1")
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.GetHealthCheck(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("Expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
var resp domain.EndpointHealthCheck
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("Failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if resp.ID != "hc-1" {
|
||||
t.Errorf("Expected ID hc-1, got %s", resp.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetHealthCheck_NotFound(t *testing.T) {
|
||||
handler := NewHealthCheckHandler(newMockHealthCheckSvc())
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/v1/health-checks/nonexistent", nil)
|
||||
req.SetPathValue("id", "nonexistent")
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.GetHealthCheck(w, req)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("Expected status 404, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateHealthCheck_Success(t *testing.T) {
|
||||
svc := newMockHealthCheckSvc()
|
||||
handler := NewHealthCheckHandler(svc)
|
||||
|
||||
check := domain.EndpointHealthCheck{
|
||||
Endpoint: "web.example.com:443",
|
||||
Enabled: true,
|
||||
}
|
||||
body, _ := json.Marshal(check)
|
||||
|
||||
req := httptest.NewRequest("POST", "/api/v1/health-checks", bytes.NewReader(body))
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.CreateHealthCheck(w, req)
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Errorf("Expected status 201, got %d", w.Code)
|
||||
}
|
||||
|
||||
var resp domain.EndpointHealthCheck
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("Failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if resp.Endpoint != "web.example.com:443" {
|
||||
t.Errorf("Expected endpoint web.example.com:443, got %s", resp.Endpoint)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteHealthCheck_Success(t *testing.T) {
|
||||
svc := newMockHealthCheckSvc()
|
||||
svc.checks["hc-1"] = &domain.EndpointHealthCheck{
|
||||
ID: "hc-1",
|
||||
Endpoint: "api.example.com:443",
|
||||
}
|
||||
handler := NewHealthCheckHandler(svc)
|
||||
|
||||
req := httptest.NewRequest("DELETE", "/api/v1/health-checks/hc-1", nil)
|
||||
req.SetPathValue("id", "hc-1")
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.DeleteHealthCheck(w, req)
|
||||
|
||||
if w.Code != http.StatusNoContent {
|
||||
t.Errorf("Expected status 204, got %d", w.Code)
|
||||
}
|
||||
|
||||
if _, ok := svc.checks["hc-1"]; ok {
|
||||
t.Fatal("Expected check to be deleted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcknowledgeHealthCheck_Success(t *testing.T) {
|
||||
svc := newMockHealthCheckSvc()
|
||||
svc.checks["hc-1"] = &domain.EndpointHealthCheck{
|
||||
ID: "hc-1",
|
||||
Endpoint: "api.example.com:443",
|
||||
Status: domain.HealthStatusDown,
|
||||
}
|
||||
handler := NewHealthCheckHandler(svc)
|
||||
|
||||
req := httptest.NewRequest("POST", "/api/v1/health-checks/hc-1/acknowledge", bytes.NewReader([]byte(`{"actor":"user@example.com"}`)))
|
||||
req.SetPathValue("id", "hc-1")
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.AcknowledgeHealthCheck(w, req)
|
||||
|
||||
if w.Code != http.StatusNoContent {
|
||||
t.Errorf("Expected status 204, got %d", w.Code)
|
||||
}
|
||||
|
||||
if !svc.checks["hc-1"].Acknowledged {
|
||||
t.Fatal("Expected check to be acknowledged")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetHealthCheckSummary_Success(t *testing.T) {
|
||||
svc := newMockHealthCheckSvc()
|
||||
svc.summary = &domain.HealthCheckSummary{
|
||||
Healthy: 3,
|
||||
Degraded: 1,
|
||||
Down: 0,
|
||||
CertMismatch: 0,
|
||||
Unknown: 1,
|
||||
}
|
||||
handler := NewHealthCheckHandler(svc)
|
||||
|
||||
req := httptest.NewRequest("GET", "/api/v1/health-checks/summary", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.GetHealthCheckSummary(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("Expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
var resp domain.HealthCheckSummary
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("Failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if resp.Healthy != 3 {
|
||||
t.Errorf("Expected 3 healthy checks, got %d", resp.Healthy)
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user