mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-08 12:38:52 +00:00
Compare commits
49 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 09c29b9f40 | |||
| d364ace02a | |||
| 921dac7e6b | |||
| 21aeed4f4e | |||
| 8c0c8aa69d | |||
| 5411c12841 | |||
| 9f14894868 | |||
| 25996f86fa | |||
| c6602bcbe8 | |||
| 888e10cba0 | |||
| 3c81531398 | |||
| 1383fe419b | |||
| 02438ad9e1 | |||
| 69a2b5c55a | |||
| 95cb002905 | |||
| de8fac24a3 | |||
| 0161bb201c | |||
| 57b539c378 | |||
| 072e2af198 | |||
| 476022ca59 | |||
| 5b151e74da | |||
| 4e8fb16fc2 | |||
| 264015059d | |||
| 596e675ec7 | |||
| 750478a6fe | |||
| 7fcdc73e20 | |||
| 47da13e7a1 | |||
| a849c8b8cf | |||
| d60a0ac297 | |||
| 96d4b1e623 | |||
| 58b14412a1 | |||
| 910097eb30 | |||
| 6d0f7747df | |||
| b4378942fc | |||
| aedf19d128 | |||
| 41706cc0fb | |||
| 9f7b5d89a5 | |||
| 255f61e6c5 | |||
| 3ede1b726f | |||
| 3fe511189f | |||
| e3a9317693 | |||
| 0ab6bc4a73 | |||
| a31cef34c5 | |||
| ee2d6d3a7c | |||
| 7b3a57dfdf | |||
| a103ccfe5c | |||
| c029875196 | |||
| ed833e80f6 | |||
| 0eb3d0310c |
+30
-7
@@ -7,7 +7,7 @@
|
|||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
POSTGRES_DB=certctl
|
POSTGRES_DB=certctl
|
||||||
POSTGRES_USER=certctl
|
POSTGRES_USER=certctl
|
||||||
POSTGRES_PASSWORD=change-me-in-production
|
POSTGRES_PASSWORD=replace-with-openssl-rand-hex-32
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Certctl Server
|
# Certctl Server
|
||||||
@@ -24,7 +24,7 @@ POSTGRES_PASSWORD=change-me-in-production
|
|||||||
# seeds pg_authid on first boot of an empty volume. See docs/quickstart.md
|
# seeds pg_authid on first boot of an empty volume. See docs/quickstart.md
|
||||||
# "Warning" callout and `internal/repository/postgres/db.go::wrapPingError`
|
# "Warning" callout and `internal/repository/postgres/db.go::wrapPingError`
|
||||||
# for the SQLSTATE 28P01 diagnostic that fires when the two drift.
|
# for the SQLSTATE 28P01 diagnostic that fires when the two drift.
|
||||||
CERTCTL_DATABASE_URL=postgres://certctl:change-me-in-production@postgres:5432/certctl?sslmode=disable
|
CERTCTL_DATABASE_URL=postgres://certctl:replace-with-openssl-rand-hex-32@postgres:5432/certctl?sslmode=disable
|
||||||
CERTCTL_SERVER_HOST=0.0.0.0
|
CERTCTL_SERVER_HOST=0.0.0.0
|
||||||
CERTCTL_SERVER_PORT=8443
|
CERTCTL_SERVER_PORT=8443
|
||||||
CERTCTL_LOG_LEVEL=info
|
CERTCTL_LOG_LEVEL=info
|
||||||
@@ -42,10 +42,27 @@ CERTCTL_LOG_FORMAT=json
|
|||||||
# option (no JWT middleware shipped - silent auth downgrade); see
|
# option (no JWT middleware shipped - silent auth downgrade); see
|
||||||
# docs/upgrade-to-v2-jwt-removal.md if you previously set
|
# docs/upgrade-to-v2-jwt-removal.md if you previously set
|
||||||
# CERTCTL_AUTH_TYPE=jwt.
|
# CERTCTL_AUTH_TYPE=jwt.
|
||||||
CERTCTL_AUTH_TYPE=none
|
#
|
||||||
# Required when CERTCTL_AUTH_TYPE is "api-key".
|
# Bundle 2 closure (2026-05-12): the docker-compose base file no longer
|
||||||
# Generate with: openssl rand -base64 32
|
# defaults to AUTH_TYPE=none. The base ships production-shaped; the demo
|
||||||
# CERTCTL_AUTH_SECRET=change-me-in-production
|
# overlay (deploy/docker-compose.demo.yml) flips this baseline into the
|
||||||
|
# populated-dashboard demo path.
|
||||||
|
CERTCTL_AUTH_TYPE=api-key
|
||||||
|
# Required when CERTCTL_AUTH_TYPE is "api-key". Generate with:
|
||||||
|
# openssl rand -base64 32
|
||||||
|
# The Bundle 2 fail-closed Validate() REFUSES TO START if this value
|
||||||
|
# equals the placeholder string "change-me-in-production" outside of
|
||||||
|
# demo mode (CERTCTL_DEMO_MODE_ACK=true).
|
||||||
|
CERTCTL_AUTH_SECRET=replace-with-openssl-rand-base64-32
|
||||||
|
|
||||||
|
# Bundle 2 closure: AES-256-GCM key for encrypting issuer/target config
|
||||||
|
# secrets at rest. Required for any deployment that uses the dynamic
|
||||||
|
# config GUI to store issuer credentials. Generate with:
|
||||||
|
# openssl rand -base64 32
|
||||||
|
# Minimum 32 bytes. The Bundle 2 fail-closed Validate() REFUSES TO
|
||||||
|
# START if this value equals the placeholder string
|
||||||
|
# "change-me-32-char-encryption-key" outside of demo mode.
|
||||||
|
CERTCTL_CONFIG_ENCRYPTION_KEY=replace-with-openssl-rand-base64-32
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Certctl Agent
|
# Certctl Agent
|
||||||
@@ -54,8 +71,14 @@ CERTCTL_AUTH_TYPE=none
|
|||||||
# startup. Use the docker-compose self-signed bootstrap CA bundle from
|
# startup. Use the docker-compose self-signed bootstrap CA bundle from
|
||||||
# `deploy/test/certs/ca.crt` or supply your own via CERTCTL_SERVER_CA_BUNDLE_PATH.
|
# `deploy/test/certs/ca.crt` or supply your own via CERTCTL_SERVER_CA_BUNDLE_PATH.
|
||||||
CERTCTL_SERVER_URL=https://localhost:8443
|
CERTCTL_SERVER_URL=https://localhost:8443
|
||||||
CERTCTL_API_KEY=change-me-in-production
|
# Matches one of the server's CERTCTL_AUTH_SECRET rotation values. The
|
||||||
|
# placeholder is rejected outside demo mode (Bundle 2 fail-closed guard).
|
||||||
|
CERTCTL_API_KEY=replace-with-openssl-rand-base64-32
|
||||||
CERTCTL_AGENT_NAME=local-agent
|
CERTCTL_AGENT_NAME=local-agent
|
||||||
|
# Returned from `POST /api/v1/agents` during agent enrollment. The agent
|
||||||
|
# fail-fasts at startup with "agent-id flag or CERTCTL_AGENT_ID env var
|
||||||
|
# is required" if this is unset.
|
||||||
|
# CERTCTL_AGENT_ID=agent-from-registration-response
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Optional: Scheduler Tuning (defaults are usually fine)
|
# Optional: Scheduler Tuning (defaults are usually fine)
|
||||||
|
|||||||
+337
-63
@@ -14,12 +14,17 @@ jobs:
|
|||||||
name: Go Build & Test
|
name: Go Build & Test
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.25.10'
|
go-version: '1.25.10'
|
||||||
|
# Phase 3 TEST-L1 closure (2026-05-13): enable Go's module +
|
||||||
|
# build cache so re-runs hit the cache instead of recompiling
|
||||||
|
# the world. setup-go v5 cache: true by default; making it
|
||||||
|
# explicit so a future setup-go upgrade can't silently flip it.
|
||||||
|
cache: true
|
||||||
|
|
||||||
- name: Go Build
|
- name: Go Build
|
||||||
run: |
|
run: |
|
||||||
@@ -103,11 +108,29 @@ jobs:
|
|||||||
run: staticcheck ./...
|
run: staticcheck ./...
|
||||||
|
|
||||||
- name: Race Detection
|
- name: Race Detection
|
||||||
run: go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/crypto/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -timeout 300s
|
# Phase 3 TEST-H1 closure (2026-05-13): the pre-Phase-3 invocation
|
||||||
|
# listed 9 explicit package roots, excluding internal/auth/*,
|
||||||
|
# internal/repository/*, internal/mcp, internal/scep, internal/pkcs7,
|
||||||
|
# internal/api/router, internal/api/acme, internal/cli, internal/cms,
|
||||||
|
# internal/config, internal/deploy, internal/integration,
|
||||||
|
# internal/ratelimit, internal/secret, internal/trustanchor, plus
|
||||||
|
# all of cmd/. Audit finding TEST-H1 flagged this as silent
|
||||||
|
# race-detection drift — packages added after the original list
|
||||||
|
# was authored were never covered.
|
||||||
|
#
|
||||||
|
# Post-Phase-3: ./... with -short. The 76 testing.Short() guards
|
||||||
|
# already in the integration-test surface (testcontainers, live-DB,
|
||||||
|
# multi-process) gate behind this flag, so race detection runs
|
||||||
|
# across every package without dragging in long-running suites.
|
||||||
|
# Timeout doubled from 300s to 600s because ./... is broader; the
|
||||||
|
# broader scope is what makes race coverage trustworthy.
|
||||||
|
run: go test -race -short ./... -count=1 -timeout 600s
|
||||||
|
|
||||||
- name: Go Test with Coverage
|
- name: Go Test with Coverage
|
||||||
|
# internal/ciparity/... — post-v2.1.0 anti-rot item 2 surface-
|
||||||
|
# parity tests; stdlib-only so they always pass in this job.
|
||||||
run: |
|
run: |
|
||||||
go test ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/api/router/... ./internal/auth/... ./internal/integration/... ./internal/connector/issuer/... ./internal/connector/target/... ./internal/connector/notifier/... ./internal/connector/discovery/... ./internal/crypto/... ./internal/mcp/... ./internal/cli/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -cover -coverprofile=coverage.out
|
go test ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/api/router/... ./internal/auth/... ./internal/integration/... ./internal/connector/issuer/... ./internal/connector/target/... ./internal/connector/notifier/... ./internal/connector/discovery/... ./internal/crypto/... ./internal/mcp/... ./internal/cli/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... ./internal/ciparity/... -count=1 -cover -coverprofile=coverage.out
|
||||||
|
|
||||||
- name: Check Coverage Thresholds
|
- name: Check Coverage Thresholds
|
||||||
# ci-pipeline-cleanup Phase 2: per-package floors moved to
|
# ci-pipeline-cleanup Phase 2: per-package floors moved to
|
||||||
@@ -118,7 +141,7 @@ jobs:
|
|||||||
run: bash scripts/check-coverage-thresholds.sh
|
run: bash scripts/check-coverage-thresholds.sh
|
||||||
|
|
||||||
- name: Upload Coverage Report
|
- name: Upload Coverage Report
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||||
with:
|
with:
|
||||||
name: go-coverage
|
name: go-coverage
|
||||||
path: coverage.out
|
path: coverage.out
|
||||||
@@ -135,52 +158,6 @@ jobs:
|
|||||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||||
run: bash scripts/coverage-pr-comment.sh
|
run: bash scripts/coverage-pr-comment.sh
|
||||||
|
|
||||||
# Bundle P / Strengthening #6 — QA-doc seed-count drift guard. Forces
|
|
||||||
# every PR that adds a seed row to migrations/seed_demo.sql to keep
|
|
||||||
# docs/contributor/qa-test-suite.md::Seed Data Reference in sync.
|
|
||||||
#
|
|
||||||
# Phase 5 of the 2026-05-04 docs overhaul (commit c64777f) deleted
|
|
||||||
# docs/testing-guide.md (its content dispersed across the new
|
|
||||||
# audience-organized doc tree); the previous QA-doc Part-count drift
|
|
||||||
# guard tracked Part counts between testing-guide.md and the old
|
|
||||||
# qa-test-guide.md headline. With testing-guide.md gone, that guard's
|
|
||||||
# premise is dead and it has been removed. The seed-count drift class
|
|
||||||
# is still live: qa-test-suite.md::Seed Data Reference enumerates
|
|
||||||
# certs/issuers and seed_demo.sql is the source of truth.
|
|
||||||
- name: QA-doc seed-count drift guard
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
DOC=docs/contributor/qa-test-suite.md
|
|
||||||
# Seed-cert count: agnostic to documented header format. The current
|
|
||||||
# documented count lives in `### Certificates (32 total in ...` —
|
|
||||||
# extract the first integer in that header.
|
|
||||||
DOC_CERTS=$(grep -oE '### Certificates \([0-9]+' "$DOC" | grep -oE '[0-9]+' | head -1)
|
|
||||||
# Authoritative count: unique mc-* IDs in seed_demo.sql.
|
|
||||||
SEED_CERTS=$(grep -oE 'mc-[a-z0-9_-]+' migrations/seed_demo.sql | sort -u | wc -l | tr -d ' ')
|
|
||||||
if [ -z "$DOC_CERTS" ]; then
|
|
||||||
echo "::warning::Could not extract documented cert count from $DOC."
|
|
||||||
echo " Skipping cert-count drift check (header format may have changed)."
|
|
||||||
elif [ "$DOC_CERTS" != "$SEED_CERTS" ]; then
|
|
||||||
echo "::error::DRIFT — $DOC says $DOC_CERTS certs; seed_demo.sql has $SEED_CERTS unique mc-* IDs."
|
|
||||||
echo " Update $DOC::Seed Data Reference to match."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
# Issuers: seed-table count vs doc claim.
|
|
||||||
DOC_ISS=$(grep -oE '### Issuers \([0-9]+' "$DOC" | grep -oE '[0-9]+' | head -1)
|
|
||||||
# Authoritative: unique iss-* IDs (close enough proxy; the issuers
|
|
||||||
# table count IS the unique-ID count for this prefix).
|
|
||||||
SEED_ISS=$(grep -oE 'iss-[a-z0-9_-]+' migrations/seed_demo.sql | sort -u | wc -l | tr -d ' ')
|
|
||||||
if [ -z "$DOC_ISS" ]; then
|
|
||||||
echo "::warning::Could not extract documented issuer count."
|
|
||||||
elif [ "$DOC_ISS" != "$SEED_ISS" ] && [ "$((SEED_ISS - DOC_ISS))" -gt 5 ]; then
|
|
||||||
# Allow up to 5pp slack — iss-* IDs appear in audit_events and
|
|
||||||
# other reference tables that aren't issuer-table rows. Drift
|
|
||||||
# only flags when the spread grows large.
|
|
||||||
echo "::error::DRIFT — $DOC says $DOC_ISS issuers; seed_demo.sql has $SEED_ISS unique iss-* IDs (spread > 5)."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "QA-doc seed-count drift guard: clean."
|
|
||||||
|
|
||||||
# Bundle Q / I-001 closure — test-naming convention guard (informational).
|
# Bundle Q / I-001 closure — test-naming convention guard (informational).
|
||||||
# The convention is `Test<Func>_<Scenario>_<ExpectedResult>`. This step
|
# The convention is `Test<Func>_<Scenario>_<ExpectedResult>`. This step
|
||||||
# prints any non-conformant tests but does NOT fail the build until the
|
# prints any non-conformant tests but does NOT fail the build until the
|
||||||
@@ -197,9 +174,8 @@ jobs:
|
|||||||
# internal scenarios expressed via `t.Run` subtests. Requiring the
|
# internal scenarios expressed via `t.Run` subtests. Requiring the
|
||||||
# underscore-Scenario-Result triple repo-wide would mean renaming
|
# underscore-Scenario-Result triple repo-wide would mean renaming
|
||||||
# 167 legitimate tests for no observable behavior change. The
|
# 167 legitimate tests for no observable behavior change. The
|
||||||
# Test<Func>_<Scenario>_<ExpectedResult> form remains documented as
|
# Test<Func>_<Scenario>_<ExpectedResult> form remains the
|
||||||
# the recommended pattern for parameterized scenarios in
|
# recommended pattern for parameterized scenarios, but is not gated.
|
||||||
# docs/contributor/qa-test-suite.md, but is not gated.
|
|
||||||
- name: Regression guards (extracted to scripts/ci-guards/)
|
- name: Regression guards (extracted to scripts/ci-guards/)
|
||||||
# All named regression guards live at scripts/ci-guards/<id>.sh per
|
# All named regression guards live at scripts/ci-guards/<id>.sh per
|
||||||
# ci-pipeline-cleanup bundle Phase 1. Each guard is callable locally:
|
# ci-pipeline-cleanup bundle Phase 1. Each guard is callable locally:
|
||||||
@@ -207,6 +183,7 @@ jobs:
|
|||||||
# Adding a new guard: drop a new <id>.sh; this loop auto-picks it up.
|
# Adding a new guard: drop a new <id>.sh; this loop auto-picks it up.
|
||||||
# Contract: each guard MUST exit 0 on clean repo, non-zero with
|
# Contract: each guard MUST exit 0 on clean repo, non-zero with
|
||||||
# ::error:: prefix on regression. See scripts/ci-guards/README.md.
|
# ::error:: prefix on regression. See scripts/ci-guards/README.md.
|
||||||
|
#
|
||||||
run: |
|
run: |
|
||||||
set -e
|
set -e
|
||||||
fail=0
|
fail=0
|
||||||
@@ -219,14 +196,216 @@ jobs:
|
|||||||
done
|
done
|
||||||
exit $fail
|
exit $fail
|
||||||
|
|
||||||
|
cross-platform-build:
|
||||||
|
# Phase 3 TEST-H2 closure (2026-05-13): the pre-Phase-3 CI ran
|
||||||
|
# exclusively on ubuntu-latest, leaving Windows-specific bugs
|
||||||
|
# (path separators, file permissions, exec.Command semantics)
|
||||||
|
# undetected. The agent + CLI binaries ship for Windows + macOS
|
||||||
|
# users; this matrix asserts they at least BUILD on every OS we
|
||||||
|
# claim to support.
|
||||||
|
#
|
||||||
|
# Build-only — no test run. Full test parity across OSes is a
|
||||||
|
# larger investment (testcontainers is Linux-only on Windows CI
|
||||||
|
# runners, file-permission tests differ, etc.). The build gate
|
||||||
|
# is the minimum that catches the cross-platform regressions
|
||||||
|
# we've seen in practice.
|
||||||
|
name: Cross-platform build (ubuntu / windows / macos)
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||||
|
with:
|
||||||
|
go-version: '1.25.10'
|
||||||
|
cache: true
|
||||||
|
|
||||||
|
- name: Build server + agent + CLI + mcp-server
|
||||||
|
run: |
|
||||||
|
go build ./cmd/server
|
||||||
|
go build ./cmd/agent
|
||||||
|
go build ./cmd/cli
|
||||||
|
go build ./cmd/mcp-server
|
||||||
|
|
||||||
|
cold-db-compose-smoke:
|
||||||
|
# Per post-v2.1.0 anti-rot item 6 (Auditable Codebase Bundle).
|
||||||
|
#
|
||||||
|
# Catches migration-on-cold-DB regressions: wipe the postgres
|
||||||
|
# volume, bring the stack up cold, mint a day-0 admin, issue +
|
||||||
|
# renew + revoke a test certificate, assert audit rows, tear down.
|
||||||
|
# Targets the bug class that the warm-DB integration suite misses
|
||||||
|
# (canonical case: 2026-05-09 migration 000045 broken INSERT,
|
||||||
|
# fixed in commit 6444e13).
|
||||||
|
name: Cold-DB compose smoke
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: go-build-and-test
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
|
- name: Show Docker versions
|
||||||
|
run: |
|
||||||
|
docker --version
|
||||||
|
docker compose version
|
||||||
|
|
||||||
|
- name: Cold-DB compose smoke
|
||||||
|
# The smoke deliberately focuses on the bug class that ONLY a
|
||||||
|
# cold boot can catch: stack-startup correctness against a
|
||||||
|
# blank database. It is intentionally NOT a functional API
|
||||||
|
# walkthrough — the integration test suite under
|
||||||
|
# 'Go Test with Coverage' already covers issue / renew /
|
||||||
|
# revoke / audit-row plumbing against a warm DB.
|
||||||
|
#
|
||||||
|
# The bugs this gate is uniquely positioned to catch:
|
||||||
|
# - Missing required env vars that fail Config.Validate()
|
||||||
|
# at startup (e.g. CERTCTL_DEMO_MODE_ACK gap, 2026-05-12).
|
||||||
|
# - Non-idempotent migrations that crash on the second boot
|
||||||
|
# (e.g. migration 000043 CHECK constraint, 2026-05-12).
|
||||||
|
# - Documented manual flows that don't work end-to-end on
|
||||||
|
# a clean compose (e.g. CERTCTL_BOOTSTRAP_TOKEN
|
||||||
|
# interpolation gap, 2026-05-12).
|
||||||
|
#
|
||||||
|
# Bugs OUTSIDE the scope of this smoke (covered elsewhere):
|
||||||
|
# - API request/response contract changes (integration suite).
|
||||||
|
# - Cert lifecycle correctness (integration suite + handler
|
||||||
|
# tests).
|
||||||
|
# - Audit row plumbing (handler tests).
|
||||||
|
#
|
||||||
|
# 10-min wall-clock cap covers cold image pull + compose-up +
|
||||||
|
# force-recreate + admin bootstrap + teardown. Increase only
|
||||||
|
# if the underlying steps legitimately grow.
|
||||||
|
#
|
||||||
|
# The smoke is inlined here on purpose — it is NOT a script in
|
||||||
|
# scripts/ci-guards/, because there is no value in a developer
|
||||||
|
# running this locally. The whole point of the gate is that CI
|
||||||
|
# owns the cold-DB state; the operator never has to remember to
|
||||||
|
# run it.
|
||||||
|
timeout-minutes: 10
|
||||||
|
working-directory: deploy
|
||||||
|
env:
|
||||||
|
STARTUP_TIMEOUT_SECONDS: 300
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
SERVER_URL="https://localhost:8443"
|
||||||
|
CACERT_PATH="${GITHUB_WORKSPACE}/deploy/test/certs/ca.crt"
|
||||||
|
|
||||||
|
log() { echo "[cold-db-smoke] $*"; }
|
||||||
|
|
||||||
|
wait_for_service_healthy() {
|
||||||
|
local svc="$1" deadline=$(( $(date +%s) + STARTUP_TIMEOUT_SECONDS ))
|
||||||
|
while [ "$(date +%s)" -lt "$deadline" ]; do
|
||||||
|
local state
|
||||||
|
state="$(docker compose ps --format json "$svc" 2>/dev/null | python3 -c '
|
||||||
|
import json, sys
|
||||||
|
try:
|
||||||
|
line = sys.stdin.read().strip()
|
||||||
|
if not line:
|
||||||
|
print("not-up"); sys.exit(0)
|
||||||
|
rows = json.loads(line) if line.startswith("[") else [json.loads(l) for l in line.splitlines() if l.strip()]
|
||||||
|
if not rows:
|
||||||
|
print("not-up")
|
||||||
|
else:
|
||||||
|
print(rows[0].get("Health", rows[0].get("State", "?")))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"err: {e}")
|
||||||
|
')"
|
||||||
|
if [ "$state" = "healthy" ] || [ "$state" = "running" ]; then
|
||||||
|
log " $svc → $state"; return 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
log " $svc did NOT reach healthy within ${STARTUP_TIMEOUT_SECONDS}s (last: $state)"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
http_call() {
|
||||||
|
local method="$1" path="$2" data="${3:-}"
|
||||||
|
local args=(--silent --show-error --max-time 30 -X "$method" "$SERVER_URL$path")
|
||||||
|
[ -f "$CACERT_PATH" ] && args+=(--cacert "$CACERT_PATH") || args+=(--insecure)
|
||||||
|
[ -n "$data" ] && args+=(-H "Content-Type: application/json" -d "$data")
|
||||||
|
curl "${args[@]}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Bundle 2 closure (2026-05-12): the base compose is now
|
||||||
|
# production-shaped — auth=api-key + agent-keygen + fail-closed
|
||||||
|
# placeholder guards. The cold-DB smoke layers in the demo
|
||||||
|
# overlay so the boot path remains zero-config: the overlay
|
||||||
|
# supplies AUTH_TYPE=none + DEMO_MODE_ACK=true + the matching
|
||||||
|
# placeholder creds the fail-closed guards accept under
|
||||||
|
# DEMO_MODE_ACK. The agent service in the overlay also
|
||||||
|
# pre-seeds CERTCTL_AGENT_ID=agent-demo-1 so the bundled
|
||||||
|
# agent doesn't restart-loop. The smoke's purpose (catch
|
||||||
|
# migration-on-cold-DB regressions + verify bootstrap-token
|
||||||
|
# endpoint mints a day-0 admin against a freshly migrated
|
||||||
|
# schema) is orthogonal to whether the auth posture is
|
||||||
|
# demo-mode or api-key, so the overlay is acceptable here.
|
||||||
|
COMPOSE_FILES=(-f docker-compose.yml -f docker-compose.demo.yml)
|
||||||
|
|
||||||
|
# Phase 2 SEC-H3 (2026-05-13): the demo overlay sets
|
||||||
|
# CERTCTL_DEMO_MODE_ACK=true; the SEC-H3 fail-closed guard
|
||||||
|
# requires a paired CERTCTL_DEMO_MODE_ACK_TS within the last
|
||||||
|
# 24h (a static YAML value would rot). The overlay reads
|
||||||
|
# ${CERTCTL_DEMO_MODE_ACK_TS:-} from the shell, so we mint a
|
||||||
|
# fresh timestamp here and export it for every compose
|
||||||
|
# invocation in this job (initial up-d AND the force-recreate
|
||||||
|
# at step 4).
|
||||||
|
export CERTCTL_DEMO_MODE_ACK_TS="$(date +%s)"
|
||||||
|
|
||||||
|
log "1/4 down -v --remove-orphans"
|
||||||
|
docker compose "${COMPOSE_FILES[@]}" down -v --remove-orphans 2>&1 | tail -3 || true
|
||||||
|
|
||||||
|
log "2/4 up -d (cold boot)"
|
||||||
|
docker compose "${COMPOSE_FILES[@]}" up -d 2>&1 | tail -3
|
||||||
|
|
||||||
|
log "3/4 wait for healthchecks"
|
||||||
|
wait_for_service_healthy postgres
|
||||||
|
wait_for_service_healthy certctl-server
|
||||||
|
wait_for_service_healthy certctl-agent || log " (agent skipped)"
|
||||||
|
|
||||||
|
log "4/4 minting day-0 admin (proves migration ladder + bootstrap path)"
|
||||||
|
TOKEN="$(openssl rand -base64 32 | tr -d '\n')"
|
||||||
|
{
|
||||||
|
echo "CERTCTL_BOOTSTRAP_TOKEN=$TOKEN"
|
||||||
|
# Re-emit the demo-mode ACK TS into the --env-file so the
|
||||||
|
# force-recreate at step 4 inherits it. `--env-file` REPLACES
|
||||||
|
# the shell-env source for variable interpolation on compose
|
||||||
|
# operations that use it, so omitting this line would re-trip
|
||||||
|
# the SEC-H3 guard.
|
||||||
|
echo "CERTCTL_DEMO_MODE_ACK_TS=$CERTCTL_DEMO_MODE_ACK_TS"
|
||||||
|
} > /tmp/_smoke.env
|
||||||
|
docker compose "${COMPOSE_FILES[@]}" --env-file /tmp/_smoke.env up -d --force-recreate certctl-server 2>&1 | tail -2
|
||||||
|
sleep 5
|
||||||
|
wait_for_service_healthy certctl-server
|
||||||
|
BODY="$(http_call POST /api/v1/auth/bootstrap "{\"token\":\"$TOKEN\",\"actor_name\":\"smoke-admin\"}")"
|
||||||
|
KEY="$(echo "$BODY" | python3 -c 'import json,sys; print(json.load(sys.stdin)["key_value"])')"
|
||||||
|
[ -n "$KEY" ] || { log "bootstrap failed: $BODY"; exit 1; }
|
||||||
|
|
||||||
|
log "PASS — cold boot + force-recreate + admin bootstrap all green"
|
||||||
|
log "tearing down"
|
||||||
|
docker compose "${COMPOSE_FILES[@]}" down -v 2>&1 | tail -2
|
||||||
|
|
||||||
|
- name: Dump compose logs on failure
|
||||||
|
if: failure()
|
||||||
|
working-directory: deploy
|
||||||
|
run: |
|
||||||
|
for svc in postgres certctl-server certctl-agent certctl-tls-init; do
|
||||||
|
echo "==== $svc ===="
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.demo.yml logs --no-color --tail 200 "$svc" || true
|
||||||
|
done
|
||||||
|
|
||||||
frontend-build:
|
frontend-build:
|
||||||
name: Frontend Build
|
name: Frontend Build
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
- name: Set up Node.js
|
- name: Set up Node.js
|
||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||||
with:
|
with:
|
||||||
node-version: '22'
|
node-version: '22'
|
||||||
|
|
||||||
@@ -234,6 +413,17 @@ jobs:
|
|||||||
working-directory: web
|
working-directory: web
|
||||||
run: npm ci
|
run: npm ci
|
||||||
|
|
||||||
|
- name: npm audit (production deps, high+critical)
|
||||||
|
# Phase 1 TEST-L2 closure (2026-05-13):
|
||||||
|
# Production frontend dependencies must not carry high or
|
||||||
|
# critical CVEs. Dev-only deps (vitest, vite, eslint, etc.)
|
||||||
|
# are excluded via --omit=dev since they never ship to
|
||||||
|
# operators. If this gate fires, triage each finding via npm
|
||||||
|
# overrides, dep upgrade, or a tracked --ignore with an issue
|
||||||
|
# link. Do not mass-silence findings.
|
||||||
|
working-directory: web
|
||||||
|
run: npm audit --omit=dev --audit-level=high
|
||||||
|
|
||||||
- name: TypeScript Check
|
- name: TypeScript Check
|
||||||
working-directory: web
|
working-directory: web
|
||||||
run: npx tsc --noEmit
|
run: npx tsc --noEmit
|
||||||
@@ -269,10 +459,10 @@ jobs:
|
|||||||
name: Helm Chart Validation
|
name: Helm Chart Validation
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
- name: Install Helm
|
- name: Install Helm
|
||||||
uses: azure/setup-helm@v4
|
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4
|
||||||
with:
|
with:
|
||||||
version: '3.13.0'
|
version: '3.13.0'
|
||||||
|
|
||||||
@@ -280,15 +470,25 @@ jobs:
|
|||||||
# configured. Every lint/template invocation below must pick exactly one
|
# configured. Every lint/template invocation below must pick exactly one
|
||||||
# provisioning mode — see deploy/helm/certctl/templates/_helpers.tpl
|
# provisioning mode — see deploy/helm/certctl/templates/_helpers.tpl
|
||||||
# (certctl.tls.required) and docs/operator/tls.md.
|
# (certctl.tls.required) and docs/operator/tls.md.
|
||||||
|
#
|
||||||
|
# Bundle 3 closure (2026-05-12, commit f1fa311): the chart now ALSO
|
||||||
|
# fails render when (a) server.auth.type=api-key + apiKey empty, or
|
||||||
|
# (b) postgresql.enabled=true + postgresql.auth.password empty.
|
||||||
|
# Every positive render below MUST pass both secrets; inverse tests
|
||||||
|
# at the bottom of this job pin the fail-fast guards in place.
|
||||||
- name: Lint Helm Chart
|
- name: Lint Helm Chart
|
||||||
run: |
|
run: |
|
||||||
helm lint deploy/helm/certctl/ \
|
helm lint deploy/helm/certctl/ \
|
||||||
--set server.tls.existingSecret=certctl-tls-ci
|
--set server.tls.existingSecret=certctl-tls-ci \
|
||||||
|
--set server.auth.apiKey=ci-api-key-placeholder \
|
||||||
|
--set postgresql.auth.password=ci-postgres-placeholder
|
||||||
|
|
||||||
- name: Template Helm Chart (existingSecret mode)
|
- name: Template Helm Chart (existingSecret mode)
|
||||||
run: |
|
run: |
|
||||||
helm template certctl deploy/helm/certctl/ \
|
helm template certctl deploy/helm/certctl/ \
|
||||||
--set server.tls.existingSecret=certctl-tls-ci \
|
--set server.tls.existingSecret=certctl-tls-ci \
|
||||||
|
--set server.auth.apiKey=ci-api-key-placeholder \
|
||||||
|
--set postgresql.auth.password=ci-postgres-placeholder \
|
||||||
> /dev/null
|
> /dev/null
|
||||||
|
|
||||||
- name: Template Helm Chart (cert-manager mode)
|
- name: Template Helm Chart (cert-manager mode)
|
||||||
@@ -296,8 +496,30 @@ jobs:
|
|||||||
helm template certctl deploy/helm/certctl/ \
|
helm template certctl deploy/helm/certctl/ \
|
||||||
--set server.tls.certManager.enabled=true \
|
--set server.tls.certManager.enabled=true \
|
||||||
--set server.tls.certManager.issuerRef.name=letsencrypt-prod \
|
--set server.tls.certManager.issuerRef.name=letsencrypt-prod \
|
||||||
|
--set server.auth.apiKey=ci-api-key-placeholder \
|
||||||
|
--set postgresql.auth.password=ci-postgres-placeholder \
|
||||||
> /dev/null
|
> /dev/null
|
||||||
|
|
||||||
|
- name: Template Helm Chart (external Postgres mode — Bundle 3 D2)
|
||||||
|
run: |
|
||||||
|
# Closes Bundle 3 D2: postgresql.enabled=false must (a) render
|
||||||
|
# cleanly with externalDatabase.url and (b) emit ZERO postgres-*
|
||||||
|
# templates. The render output is grep-checked below.
|
||||||
|
out=$(helm template certctl deploy/helm/certctl/ \
|
||||||
|
--set server.tls.existingSecret=certctl-tls-ci \
|
||||||
|
--set postgresql.enabled=false \
|
||||||
|
--set externalDatabase.url='postgres://u:p@db.example.com:5432/certctl?sslmode=require' \
|
||||||
|
--set server.auth.apiKey=ci-api-key-placeholder)
|
||||||
|
# Bundled-Postgres resources must not appear when postgresql.enabled=false.
|
||||||
|
if echo "$out" | grep -qE "^kind: StatefulSet$"; then
|
||||||
|
echo "::error::Bundle 3 D2 regression: postgres StatefulSet rendered with postgresql.enabled=false"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if echo "$out" | grep -q "postgres-secret.yaml"; then
|
||||||
|
echo "::error::Bundle 3 D2 regression: postgres-secret rendered with postgresql.enabled=false"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Template Helm Chart (guard fails without TLS)
|
- name: Template Helm Chart (guard fails without TLS)
|
||||||
run: |
|
run: |
|
||||||
# Inverse test: the chart MUST refuse to render when no TLS source is
|
# Inverse test: the chart MUST refuse to render when no TLS source is
|
||||||
@@ -308,6 +530,58 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Template Helm Chart (guard fails — Bundle 3 D7 TLS both-set)
|
||||||
|
run: |
|
||||||
|
# Bundle 3 D7: setting BOTH existingSecret AND certManager.enabled
|
||||||
|
# creates two conflicting TLS sources of truth. Chart must refuse.
|
||||||
|
if helm template certctl deploy/helm/certctl/ \
|
||||||
|
--set server.tls.existingSecret=ci \
|
||||||
|
--set server.tls.certManager.enabled=true \
|
||||||
|
--set server.tls.certManager.issuerRef.name=foo \
|
||||||
|
--set server.auth.apiKey=k \
|
||||||
|
--set postgresql.auth.password=p \
|
||||||
|
> /dev/null 2>&1; then
|
||||||
|
echo "::error::Bundle 3 D7 regression: chart rendered with BOTH TLS sources configured"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Template Helm Chart (guard fails — Bundle 3 D1 missing apiKey)
|
||||||
|
run: |
|
||||||
|
# Bundle 3 D1: missing server.auth.apiKey when auth.type=api-key
|
||||||
|
# must fail at template time, not silently render an empty Secret.
|
||||||
|
if helm template certctl deploy/helm/certctl/ \
|
||||||
|
--set server.tls.existingSecret=ci \
|
||||||
|
--set postgresql.auth.password=p \
|
||||||
|
> /dev/null 2>&1; then
|
||||||
|
echo "::error::Bundle 3 D1 regression: chart rendered with empty server.auth.apiKey"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Template Helm Chart (guard fails — Bundle 3 D1 missing pg password)
|
||||||
|
run: |
|
||||||
|
# Bundle 3 D1: missing postgresql.auth.password when postgresql.enabled=true
|
||||||
|
# must fail at template time, not silently use a fallback default.
|
||||||
|
if helm template certctl deploy/helm/certctl/ \
|
||||||
|
--set server.tls.existingSecret=ci \
|
||||||
|
--set server.auth.apiKey=k \
|
||||||
|
> /dev/null 2>&1; then
|
||||||
|
echo "::error::Bundle 3 D1 regression: chart rendered with empty postgresql.auth.password"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Template Helm Chart (guard fails — Bundle 3 D1 missing external DB URL)
|
||||||
|
run: |
|
||||||
|
# Bundle 3 D1: missing externalDatabase.url when postgresql.enabled=false
|
||||||
|
# must fail at template time.
|
||||||
|
if helm template certctl deploy/helm/certctl/ \
|
||||||
|
--set server.tls.existingSecret=ci \
|
||||||
|
--set postgresql.enabled=false \
|
||||||
|
--set server.auth.apiKey=k \
|
||||||
|
> /dev/null 2>&1; then
|
||||||
|
echo "::error::Bundle 3 D1 regression: chart rendered with postgresql.enabled=false + empty externalDatabase.url"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# deploy-vendor-e2e — single-job (collapsed from 12-job matrix)
|
# deploy-vendor-e2e — single-job (collapsed from 12-job matrix)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -338,10 +612,10 @@ jobs:
|
|||||||
needs: [go-build-and-test]
|
needs: [go-build-and-test]
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.25.10'
|
go-version: '1.25.10'
|
||||||
cache: true
|
cache: true
|
||||||
@@ -435,10 +709,10 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
timeout-minutes: 15
|
timeout-minutes: 15
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.25.10'
|
go-version: '1.25.10'
|
||||||
cache: true
|
cache: true
|
||||||
|
|||||||
@@ -53,17 +53,17 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
if: matrix.language == 'go'
|
if: matrix.language == 'go'
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||||
with:
|
with:
|
||||||
# Match ci.yml + release.yml + security-deep-scan.yml.
|
# Match ci.yml + release.yml + security-deep-scan.yml.
|
||||||
go-version: '1.25.10'
|
go-version: '1.25.10'
|
||||||
|
|
||||||
- name: Initialize CodeQL
|
- name: Initialize CodeQL
|
||||||
uses: github/codeql-action/init@v3
|
uses: github/codeql-action/init@7fd177fa680c9881b53cdab4d346d32574c9f7f4 # v3
|
||||||
with:
|
with:
|
||||||
languages: ${{ matrix.language }}
|
languages: ${{ matrix.language }}
|
||||||
# Use the security-and-quality query suite — security finds plus
|
# Use the security-and-quality query suite — security finds plus
|
||||||
@@ -72,10 +72,10 @@ jobs:
|
|||||||
queries: security-and-quality
|
queries: security-and-quality
|
||||||
|
|
||||||
- name: Autobuild
|
- name: Autobuild
|
||||||
uses: github/codeql-action/autobuild@v3
|
uses: github/codeql-action/autobuild@7fd177fa680c9881b53cdab4d346d32574c9f7f4 # v3
|
||||||
|
|
||||||
- name: Perform CodeQL Analysis
|
- name: Perform CodeQL Analysis
|
||||||
uses: github/codeql-action/analyze@v3
|
uses: github/codeql-action/analyze@7fd177fa680c9881b53cdab4d346d32574c9f7f4 # v3
|
||||||
with:
|
with:
|
||||||
category: "/language:${{ matrix.language }}"
|
category: "/language:${{ matrix.language }}"
|
||||||
# SARIF upload is implicit (and is what populates the Security tab).
|
# SARIF upload is implicit (and is what populates the Security tab).
|
||||||
|
|||||||
@@ -49,13 +49,13 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
# The compose stack builds the certctl image from the repo
|
# The compose stack builds the certctl image from the repo
|
||||||
# root Dockerfile. Buildx gives the build a usable cache and
|
# root Dockerfile. Buildx gives the build a usable cache and
|
||||||
# works with newer compose versions.
|
# works with newer compose versions.
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||||
|
|
||||||
- name: Run loadtest
|
- name: Run loadtest
|
||||||
run: make loadtest
|
run: make loadtest
|
||||||
@@ -70,7 +70,7 @@ jobs:
|
|||||||
# authoritative machine-readable form; summary.txt is the
|
# authoritative machine-readable form; summary.txt is the
|
||||||
# human-readable text the README baseline tracks.
|
# human-readable text the README baseline tracks.
|
||||||
if: always()
|
if: always()
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||||
with:
|
with:
|
||||||
name: k6-summary-${{ github.run_id }}
|
name: k6-summary-${{ github.run_id }}
|
||||||
path: deploy/test/loadtest/results/
|
path: deploy/test/loadtest/results/
|
||||||
|
|||||||
@@ -39,10 +39,10 @@ jobs:
|
|||||||
os: [linux, darwin]
|
os: [linux, darwin]
|
||||||
arch: [amd64, arm64]
|
arch: [amd64, arm64]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||||
with:
|
with:
|
||||||
go-version: ${{ env.GO_VERSION }}
|
go-version: ${{ env.GO_VERSION }}
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ jobs:
|
|||||||
cat "${OUTPUT_NAME}.sha256"
|
cat "${OUTPUT_NAME}.sha256"
|
||||||
|
|
||||||
- name: Upload build artefacts
|
- name: Upload build artefacts
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||||
with:
|
with:
|
||||||
name: binary-${{ steps.build.outputs.output_name }}
|
name: binary-${{ steps.build.outputs.output_name }}
|
||||||
path: |
|
path: |
|
||||||
@@ -151,7 +151,7 @@ jobs:
|
|||||||
hashes: ${{ steps.hashes.outputs.hashes }}
|
hashes: ${{ steps.hashes.outputs.hashes }}
|
||||||
steps:
|
steps:
|
||||||
- name: Download binary artefacts
|
- name: Download binary artefacts
|
||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||||
with:
|
with:
|
||||||
pattern: binary-*
|
pattern: binary-*
|
||||||
path: artifacts
|
path: artifacts
|
||||||
@@ -191,7 +191,7 @@ jobs:
|
|||||||
checksums.txt
|
checksums.txt
|
||||||
|
|
||||||
- name: Upload artefacts to GitHub Release
|
- name: Upload artefacts to GitHub Release
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
@@ -212,7 +212,7 @@ jobs:
|
|||||||
actions: read
|
actions: read
|
||||||
id-token: write
|
id-token: write
|
||||||
contents: write
|
contents: write
|
||||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@f7dd8c54c2067bafc12ca7a55595d5ee9b75204a # v2.1.0
|
||||||
with:
|
with:
|
||||||
base64-subjects: "${{ needs.aggregate-checksums.outputs.hashes }}"
|
base64-subjects: "${{ needs.aggregate-checksums.outputs.hashes }}"
|
||||||
upload-assets: true
|
upload-assets: true
|
||||||
@@ -235,10 +235,10 @@ jobs:
|
|||||||
id-token: write # Cosign keyless OIDC identity token
|
id-token: write # Cosign keyless OIDC identity token
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
- name: Log in to GitHub Container Registry
|
- name: Log in to GitHub Container Registry
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
|
||||||
with:
|
with:
|
||||||
registry: ${{ env.REGISTRY }}
|
registry: ${{ env.REGISTRY }}
|
||||||
username: ${{ github.actor }}
|
username: ${{ github.actor }}
|
||||||
@@ -249,14 +249,14 @@ jobs:
|
|||||||
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
|
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||||
|
|
||||||
- name: Install Cosign
|
- name: Install Cosign
|
||||||
uses: sigstore/cosign-installer@cad07c2e89fa2edd6e2d7bab4c1aa38e53f76003 # v4.1.1
|
uses: sigstore/cosign-installer@cad07c2e89fa2edd6e2d7bab4c1aa38e53f76003 # v4.1.1
|
||||||
|
|
||||||
- name: Build and push server image
|
- name: Build and push server image
|
||||||
id: server-push
|
id: server-push
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
@@ -291,7 +291,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Build and push agent image
|
- name: Build and push agent image
|
||||||
id: agent-push
|
id: agent-push
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile.agent
|
file: ./Dockerfile.agent
|
||||||
@@ -334,7 +334,7 @@ jobs:
|
|||||||
contents: write
|
contents: write
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
- name: Extract version from tag
|
- name: Extract version from tag
|
||||||
id: version
|
id: version
|
||||||
@@ -351,7 +351,7 @@ jobs:
|
|||||||
# README is the source of truth for those, and inlining them in every
|
# README is the source of truth for those, and inlining them in every
|
||||||
# release page produces the kind of "every release looks identical"
|
# release page produces the kind of "every release looks identical"
|
||||||
# noise that gives operators no signal about what actually changed.
|
# noise that gives operators no signal about what actually changed.
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2
|
||||||
with:
|
with:
|
||||||
# Pin the release title to the tag name. softprops/action-gh-release@v2
|
# Pin the release title to the tag name. softprops/action-gh-release@v2
|
||||||
# falls back to the most recent commit subject when `name:` is omitted,
|
# falls back to the most recent commit subject when `name:` is omitted,
|
||||||
|
|||||||
@@ -36,9 +36,9 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
timeout-minutes: 60
|
timeout-minutes: 60
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.25'
|
go-version: '1.25'
|
||||||
|
|
||||||
@@ -48,15 +48,26 @@ jobs:
|
|||||||
|
|
||||||
# --- Static analysis (slow paths) ---
|
# --- Static analysis (slow paths) ---
|
||||||
|
|
||||||
- name: gosec
|
- name: gosec (G201/G202/G304/G108 subset — Phase 3 TEST-M2 hard gate)
|
||||||
run: |
|
# Phase 3 TEST-M2 closure (2026-05-13): gosec promoted from
|
||||||
$(go env GOPATH)/bin/gosec -fmt sarif -out gosec.sarif ./... || true
|
# continue-on-error (advisory) to blocking on the 4 high-signal
|
||||||
continue-on-error: true
|
# rule subset that targets real prod-bug classes:
|
||||||
|
# G201 = SQL string formatting (SQL injection)
|
||||||
|
# G202 = SQL string concatenation (SQL injection)
|
||||||
|
# G304 = file-path traversal via tainted input
|
||||||
|
# G108 = profiling endpoint exposed
|
||||||
|
# Other gosec rules (G1xx-G7xx broadly) remain in the SARIF
|
||||||
|
# report but don't gate the build — they have higher false-
|
||||||
|
# positive rates than these 4.
|
||||||
|
run: $(go env GOPATH)/bin/gosec -fmt sarif -out gosec.sarif -include=G201,G202,G304,G108 ./...
|
||||||
|
|
||||||
- name: osv-scanner (multi-ecosystem CVE)
|
- name: osv-scanner (multi-ecosystem CVE — Phase 3 TEST-M2 hard gate)
|
||||||
run: |
|
# Phase 3 TEST-M2 closure (2026-05-13): osv-scanner promoted from
|
||||||
$(go env GOPATH)/bin/osv-scanner -r --format json --output osv-scanner.json . || true
|
# advisory to blocking. Complements govulncheck (already blocking
|
||||||
continue-on-error: true
|
# in ci.yml) by covering non-Go dependencies (npm under web/,
|
||||||
|
# any docker base image deps). Findings fail the build; the
|
||||||
|
# exact CVE list lands in osv-scanner.json as a receipt either way.
|
||||||
|
run: $(go env GOPATH)/bin/osv-scanner -r --format json --output osv-scanner.json .
|
||||||
|
|
||||||
# --- Race detector at -count=10 (D-002) ---
|
# --- Race detector at -count=10 (D-002) ---
|
||||||
|
|
||||||
@@ -90,14 +101,39 @@ jobs:
|
|||||||
run: go install github.com/zimmski/go-mutesting/cmd/go-mutesting@latest
|
run: go install github.com/zimmski/go-mutesting/cmd/go-mutesting@latest
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
|
||||||
- name: go-mutesting (crypto cluster)
|
- name: go-mutesting (crypto cluster — Phase 3 TEST-M1 hard gate at 55%)
|
||||||
|
# Phase 3 TEST-M1 closure (2026-05-13): go-mutesting promoted
|
||||||
|
# from advisory (continue-on-error + per-package `|| true`) to
|
||||||
|
# blocking with an explicit mutation-score floor of 55%.
|
||||||
|
# Per-package summary lines emit `The mutation score is X.YZ`;
|
||||||
|
# the awk filter extracts each, and the post-loop check fails
|
||||||
|
# the step if any package drops below 0.55.
|
||||||
|
#
|
||||||
|
# Floor rationale: 55% is the starter ratio that catches major
|
||||||
|
# regressions without rejecting the audit's "this is OK" steady
|
||||||
|
# state. Raise quarterly as the test suite hardens; the floor
|
||||||
|
# change ships in the same commit that adds the strengthening
|
||||||
|
# tests so the ratchet is documented.
|
||||||
run: |
|
run: |
|
||||||
|
set -e
|
||||||
: > go-mutesting.txt
|
: > go-mutesting.txt
|
||||||
for pkg in ./internal/crypto/... ./internal/pkcs7/... ./internal/connector/issuer/local/...; do
|
for pkg in ./internal/crypto/... ./internal/pkcs7/... ./internal/connector/issuer/local/...; do
|
||||||
echo "=== $pkg ===" | tee -a go-mutesting.txt
|
echo "=== $pkg ===" | tee -a go-mutesting.txt
|
||||||
$(go env GOPATH)/bin/go-mutesting "$pkg" 2>&1 | tee -a go-mutesting.txt || true
|
$(go env GOPATH)/bin/go-mutesting "$pkg" 2>&1 | tee -a go-mutesting.txt
|
||||||
done
|
done
|
||||||
continue-on-error: true
|
# Extract every "The mutation score is X.YZ" line; fail on any
|
||||||
|
# score below 0.55. The check works against floats via awk so
|
||||||
|
# 0.55 is the literal threshold (not a percentage).
|
||||||
|
floor=0.55
|
||||||
|
fail=0
|
||||||
|
while IFS= read -r score; do
|
||||||
|
ok=$(awk -v s="$score" -v f="$floor" 'BEGIN{print (s>=f) ? 1 : 0}')
|
||||||
|
if [ "$ok" -ne 1 ]; then
|
||||||
|
echo "::error::mutation score $score below floor $floor"
|
||||||
|
fail=1
|
||||||
|
fi
|
||||||
|
done < <(grep -oE "The mutation score is [0-9.]+" go-mutesting.txt | awk '{print $NF}')
|
||||||
|
exit $fail
|
||||||
|
|
||||||
# --- Container + supply chain (D-001 partial, D-006 partial) ---
|
# --- Container + supply chain (D-001 partial, D-006 partial) ---
|
||||||
|
|
||||||
@@ -105,11 +141,21 @@ jobs:
|
|||||||
run: docker build -t certctl:deep-scan .
|
run: docker build -t certctl:deep-scan .
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
|
||||||
- name: trivy image scan
|
- name: trivy image scan (HIGH+CRITICAL — Phase 3 TEST-M2 hard gate)
|
||||||
|
# Phase 3 TEST-M2 closure (2026-05-13): trivy promoted from
|
||||||
|
# advisory to blocking. --severity filter keeps the gate
|
||||||
|
# noise-free (LOW + MEDIUM findings stay in the JSON receipt
|
||||||
|
# but don't fail the build); --exit-code 1 makes HIGH+CRITICAL
|
||||||
|
# findings the actual gate. Trivy is the third hard deep-scan
|
||||||
|
# gate (alongside gosec + osv-scanner); ZAP / schemathesis /
|
||||||
|
# nuclei / testssl stay advisory because their false-positive
|
||||||
|
# rates on https://localhost:8443-targeted DAST runs are high.
|
||||||
run: |
|
run: |
|
||||||
docker run --rm -v "$PWD":/src aquasec/trivy:latest image \
|
docker run --rm -v "$PWD":/src aquasec/trivy:latest image \
|
||||||
--format json --output /src/trivy.json certctl:deep-scan || true
|
--format json --output /src/trivy.json \
|
||||||
continue-on-error: true
|
--severity HIGH,CRITICAL \
|
||||||
|
--exit-code 1 \
|
||||||
|
certctl:deep-scan
|
||||||
|
|
||||||
- name: syft SBOM
|
- name: syft SBOM
|
||||||
run: |
|
run: |
|
||||||
@@ -126,7 +172,7 @@ jobs:
|
|||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
|
||||||
- name: ZAP baseline
|
- name: ZAP baseline
|
||||||
uses: zaproxy/action-baseline@v0.10.0
|
uses: zaproxy/action-baseline@1e1871e84428617b969d4a1f981a8255630d54b0 # v0.10.0
|
||||||
with:
|
with:
|
||||||
target: 'https://localhost:8443'
|
target: 'https://localhost:8443'
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
@@ -175,7 +221,7 @@ jobs:
|
|||||||
# --- Upload everything as artefacts ---
|
# --- Upload everything as artefacts ---
|
||||||
|
|
||||||
- name: Upload deep-scan receipts
|
- name: Upload deep-scan receipts
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
name: security-deep-scan-${{ github.run_id }}
|
name: security-deep-scan-${{ github.run_id }}
|
||||||
|
|||||||
+14
@@ -88,3 +88,17 @@ Thumbs.db
|
|||||||
# CERTCTL_TEST_CA_BUNDLE=./certs/ca.crt. Material is regenerated on every
|
# CERTCTL_TEST_CA_BUNDLE=./certs/ca.crt. Material is regenerated on every
|
||||||
# `docker compose up` and never belongs in git.
|
# `docker compose up` and never belongs in git.
|
||||||
/deploy/test/certs/
|
/deploy/test/certs/
|
||||||
|
|
||||||
|
# Phase 1 RED-1 closure (2026-05-13): the f5-mock-icontrol Dockerfile
|
||||||
|
# rebuilds from source via multi-stage build (deploy/test/f5-mock-icontrol/
|
||||||
|
# Dockerfile line 13). The compiled ELF must not be tracked.
|
||||||
|
deploy/test/f5-mock-icontrol/f5-mock-icontrol
|
||||||
|
|
||||||
|
# Phase 0 closure (2026-05-13): cowork/ holds the operator's internal
|
||||||
|
# legal / audit / strategy artifacts (counsel-signed AI-authorship
|
||||||
|
# declaration, filter-repo callback, pre-rewrite bundle, audit HTML
|
||||||
|
# scratch). It is private operator scratch space and must never
|
||||||
|
# accidentally land in the public repo. See
|
||||||
|
# docs/history-normalization.md for the public-facing description of
|
||||||
|
# the Phase 0 git-history rewrite.
|
||||||
|
cowork/
|
||||||
|
|||||||
@@ -2,6 +2,50 @@
|
|||||||
|
|
||||||
## Unreleased
|
## Unreleased
|
||||||
|
|
||||||
|
### Breaking changes (scheduled for v2.2.0)
|
||||||
|
|
||||||
|
- **SEC-H1 staged: `CERTCTL_AGENT_BOOTSTRAP_TOKEN_DENY_EMPTY` opt-in flag.**
|
||||||
|
Phase 2 of the architecture diligence remediation (2026-05-13) introduces
|
||||||
|
a new env var that, when set to `true`, makes the server refuse to start
|
||||||
|
unless `CERTCTL_AGENT_BOOTSTRAP_TOKEN` is also set to a real value.
|
||||||
|
Default in this release: `false` (preserves the v2.1.x warn-mode
|
||||||
|
pass-through behavior for backward compatibility). Default flip to
|
||||||
|
`true` is scheduled for v2.2.0 per `WORKSPACE-ROADMAP.md`.
|
||||||
|
|
||||||
|
**Operator action before the v2.2.0 upgrade:** generate a real
|
||||||
|
bootstrap token (`openssl rand -base64 32`) and set
|
||||||
|
`CERTCTL_AGENT_BOOTSTRAP_TOKEN` in your env. When v2.2.0 ships, the
|
||||||
|
deny-empty default flips to `true` and a missing or empty token will
|
||||||
|
fail closed at boot. Operators with the token already set: no action
|
||||||
|
required.
|
||||||
|
|
||||||
|
- **SEC-M4: `CERTCTL_ACME_INSECURE` now requires explicit ACK.**
|
||||||
|
Pre-Phase-2, `CERTCTL_ACME_INSECURE=true` produced only a boot-time
|
||||||
|
WARN log. Post-Phase-2 (THIS release), the server refuses to start
|
||||||
|
unless `CERTCTL_ACME_INSECURE_ACK=true` is set alongside it. ACME
|
||||||
|
directory TLS verification is the load-bearing defense against a
|
||||||
|
network attacker intercepting ACME enrollment; the existing flag was
|
||||||
|
too easy to flip via a copy-pasted Pebble runbook.
|
||||||
|
|
||||||
|
**Operator action:** if you intentionally run against a self-signed
|
||||||
|
ACME server (Pebble, step-ca, internal dev), add
|
||||||
|
`CERTCTL_ACME_INSECURE_ACK=true` to your env. Production deploys
|
||||||
|
MUST never set either flag.
|
||||||
|
|
||||||
|
- **SEC-H3: `CERTCTL_DEMO_MODE_ACK` is no longer sticky — 24h re-ack required.**
|
||||||
|
Pre-Phase-2, setting `CERTCTL_DEMO_MODE_ACK=true` was sticky for the
|
||||||
|
lifetime of the container. Post-Phase-2, operators must ALSO set
|
||||||
|
`CERTCTL_DEMO_MODE_ACK_TS=$(date +%s)` to a unix epoch within the
|
||||||
|
last 24h. The next container restart past 24h refuses to start
|
||||||
|
unless a fresh TS is supplied. Catches the "forgotten demo deployment
|
||||||
|
promoted to production" failure mode.
|
||||||
|
|
||||||
|
**Operator action:** demo deploys must set `CERTCTL_DEMO_MODE_ACK_TS`
|
||||||
|
at every `docker compose up`. The demo Compose helper script handles
|
||||||
|
this automatically when wired; standalone demo deploys add it
|
||||||
|
manually. Production deploys: this guard is irrelevant
|
||||||
|
(`CERTCTL_DEMO_MODE_ACK` should not be set in production).
|
||||||
|
|
||||||
### Security
|
### Security
|
||||||
|
|
||||||
- **Alg-downgrade defense relaxed for Keycloak-shape IdPs (v2.1.0 pre-tag fix).**
|
- **Alg-downgrade defense relaxed for Keycloak-shape IdPs (v2.1.0 pre-tag fix).**
|
||||||
|
|||||||
@@ -2,9 +2,9 @@ Business Source License 1.1
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
|
|
||||||
Licensor: Shankar Kambam
|
Licensor: certctl LLC
|
||||||
Licensed Work: certctl
|
Licensed Work: certctl
|
||||||
The Licensed Work is © 2026 Shankar Kambam.
|
The Licensed Work is © 2026 certctl LLC.
|
||||||
|
|
||||||
Additional Use Grant: You may make use of the Licensed Work, including in
|
Additional Use Grant: You may make use of the Licensed Work, including in
|
||||||
production for your internal business operations and
|
production for your internal business operations and
|
||||||
@@ -12,15 +12,23 @@ Additional Use Grant: You may make use of the Licensed Work, including in
|
|||||||
your own customers, provided that you may not offer
|
your own customers, provided that you may not offer
|
||||||
the Licensed Work as a Commercial Certificate Service.
|
the Licensed Work as a Commercial Certificate Service.
|
||||||
|
|
||||||
A "Commercial Certificate Service" is a product or
|
A "Commercial Certificate Service" is any product
|
||||||
service whose principal value to a third party is the
|
or service that provides third parties with access
|
||||||
|
to or control of any substantial set of the
|
||||||
certificate management functionality of the Licensed
|
certificate management functionality of the Licensed
|
||||||
Work — including but not limited to lifecycle
|
Work — including but not limited to lifecycle
|
||||||
management, discovery, monitoring, alerting, renewal
|
management, discovery, monitoring, alerting, renewal
|
||||||
automation, deployment, and revocation — where the
|
automation, deployment, revocation, certificate
|
||||||
third party accesses or controls that functionality
|
authority operation, certificate issuance,
|
||||||
and compensation is received for that access or
|
certificate signing, or any combination thereof —
|
||||||
control.
|
where compensation, in any form, is received in
|
||||||
|
connection with such access or control. This
|
||||||
|
restriction applies irrespective of whether such
|
||||||
|
functionality is the principal, ancillary,
|
||||||
|
supporting, or one of several values provided by the
|
||||||
|
product or service, and irrespective of whether the
|
||||||
|
Licensed Work is presented under its original name,
|
||||||
|
a modified name, or no name at all.
|
||||||
|
|
||||||
For the avoidance of doubt:
|
For the avoidance of doubt:
|
||||||
|
|
||||||
@@ -36,12 +44,17 @@ Additional Use Grant: You may make use of the Licensed Work, including in
|
|||||||
|
|
||||||
(b) for the purposes of this Additional Use Grant,
|
(b) for the purposes of this Additional Use Grant,
|
||||||
"third party" excludes (i) your employees, (ii)
|
"third party" excludes (i) your employees, (ii)
|
||||||
your contractors acting on your behalf, and (iii)
|
your contractors acting on your behalf, and
|
||||||
your Affiliates. "Affiliate" means any entity
|
(iii) your Affiliates. "Affiliate" means any
|
||||||
that controls, is controlled by, or is under
|
entity that (1) directly or indirectly controls
|
||||||
common control with, you, where "control" means
|
you, (2) is directly or indirectly controlled by
|
||||||
ownership of more than fifty percent (50%) of
|
you, or (3) is directly or indirectly under
|
||||||
the voting interests of the entity;
|
common control with you, where "control" means
|
||||||
|
either (A) ownership of more than fifty percent
|
||||||
|
(50%) of the voting interests of the entity, or
|
||||||
|
(B) the power to direct the management and
|
||||||
|
policies of the entity, whether through voting
|
||||||
|
securities, contract, or otherwise;
|
||||||
|
|
||||||
(c) the restriction on offering a Commercial
|
(c) the restriction on offering a Commercial
|
||||||
Certificate Service applies regardless of whether
|
Certificate Service applies regardless of whether
|
||||||
@@ -67,16 +80,34 @@ works, redistribute, and make non-production use of the Licensed Work. The
|
|||||||
Licensor may make an Additional Use Grant, above, permitting limited production
|
Licensor may make an Additional Use Grant, above, permitting limited production
|
||||||
use.
|
use.
|
||||||
|
|
||||||
Effective on the Change Date, or the fourth anniversary of the first publicly
|
Effective on the Change Date, the Licensor hereby grants you rights under
|
||||||
available distribution of a specific version of the Licensed Work under this
|
|
||||||
License, whichever comes first, the Licensor hereby grants you rights under
|
|
||||||
the terms of the Change License, and the rights granted in the paragraph
|
the terms of the Change License, and the rights granted in the paragraph
|
||||||
above terminate.
|
above terminate.
|
||||||
|
|
||||||
If your use of the Licensed Work does not comply with the requirements
|
If your use of the Licensed Work does not comply with the requirements
|
||||||
currently in effect as described in this License, you must purchase a
|
currently in effect as described in this License, you must purchase a
|
||||||
commercial license from the Licensor, its affiliated entities, or authorized
|
commercial license from the Licensor, its affiliated entities, or authorized
|
||||||
resellers, or you must refrain from using the Licensed Work.
|
resellers, or you must refrain from using the Licensed Work. Rights granted
|
||||||
|
under any commercial license from the Licensor are personal to the licensee
|
||||||
|
and may not be sublicensed, transferred, assigned, or resold to any third
|
||||||
|
party without the Licensor's prior written consent. Any attempted sublicense,
|
||||||
|
transfer, assignment, or resale in violation of this provision is void.
|
||||||
|
|
||||||
|
Restricted Activities. Notwithstanding any other provision of this License,
|
||||||
|
you may not:
|
||||||
|
|
||||||
|
(i) provide the Licensed Work or substantially similar functionality
|
||||||
|
to third parties as a hosted, managed, embedded, bundled, or
|
||||||
|
integrated service, except as expressly permitted in the
|
||||||
|
Additional Use Grant;
|
||||||
|
|
||||||
|
(ii) move, change, disable, circumvent, or work around any license,
|
||||||
|
security, attribution, audit-trail, or feature-gating
|
||||||
|
functionality contained in the Licensed Work; or
|
||||||
|
|
||||||
|
(iii) alter or remove any license, copyright, attribution, trademark,
|
||||||
|
or other notice from the Licensed Work, its derivatives, or any
|
||||||
|
substantial portion thereof.
|
||||||
|
|
||||||
All copies of the original and modified Licensed Work, and derivative works
|
All copies of the original and modified Licensed Work, and derivative works
|
||||||
of the Licensed Work, are subject to this License. This License applies
|
of the Licensed Work, are subject to this License. This License applies
|
||||||
@@ -110,8 +141,12 @@ the Licensor or to any repository hosting the Licensed Work is provided at
|
|||||||
the submitter's sole risk, confers no rights or obligations on the
|
the submitter's sole risk, confers no rights or obligations on the
|
||||||
Licensor, and is not incorporated into the Licensed Work.
|
Licensor, and is not incorporated into the Licensed Work.
|
||||||
|
|
||||||
This License does not grant you any right in any trademark or logo of the
|
Trademark and naming. This License does not grant you any right in any
|
||||||
Licensor or its Affiliates.
|
trademark, service mark, trade name, or logo of the Licensor or its
|
||||||
|
Affiliates. Forks, derivative works, and modifications of the Licensed Work
|
||||||
|
must not use the name "certctl," any name confusingly similar to "certctl,"
|
||||||
|
or any Licensor trademark in their distributed form, marketing materials,
|
||||||
|
package metadata, or service offerings.
|
||||||
|
|
||||||
Governing law and venue. This License shall be governed by and construed in
|
Governing law and venue. This License shall be governed by and construed in
|
||||||
accordance with the laws of the State of Florida, USA, without giving
|
accordance with the laws of the State of Florida, USA, without giving
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
.PHONY: help build run test lint verify verify-docs verify-deploy loadtest acme-cert-manager-test acme-rfc-conformance-test keycloak-integration-test okta-smoke-test benchmark-auth benchmark-auth-coldcache clean docker-up docker-down migrate-up migrate-down generate test-cover frontend-build qa-stats
|
.PHONY: help build run test lint verify verify-deploy loadtest acme-cert-manager-test acme-rfc-conformance-test keycloak-integration-test okta-smoke-test benchmark-auth benchmark-auth-coldcache clean docker-up docker-down migrate-up migrate-down generate test-cover frontend-build e2e-test qa-stats
|
||||||
|
|
||||||
# Default target - show help
|
# Default target - show help
|
||||||
help:
|
help:
|
||||||
@@ -16,7 +16,6 @@ help:
|
|||||||
@echo " make lint Run linter (golangci-lint)"
|
@echo " make lint Run linter (golangci-lint)"
|
||||||
@echo " make fmt Format code with gofmt"
|
@echo " make fmt Format code with gofmt"
|
||||||
@echo " make verify Pre-commit gate: fmt + vet + lint + test (CI-parity)"
|
@echo " make verify Pre-commit gate: fmt + vet + lint + test (CI-parity)"
|
||||||
@echo " make verify-docs Pre-tag gate: QA-doc drift checks (operator-facing docs)"
|
|
||||||
@echo " make verify-deploy Pre-push gate: digest validity + OpenAPI parity + docker build smoke"
|
@echo " make verify-deploy Pre-push gate: digest validity + OpenAPI parity + docker build smoke"
|
||||||
@echo " make loadtest k6 throughput run against postgres + certctl (NOT in verify; manual + cron only)"
|
@echo " make loadtest k6 throughput run against postgres + certctl (NOT in verify; manual + cron only)"
|
||||||
@echo ""
|
@echo ""
|
||||||
@@ -119,23 +118,6 @@ verify:
|
|||||||
@echo ""
|
@echo ""
|
||||||
@echo "verify: PASS — safe to commit"
|
@echo "verify: PASS — safe to commit"
|
||||||
|
|
||||||
# verify-docs: pre-tag gate. Runs the QA-doc seed-count drift guard
|
|
||||||
# that ci-pipeline-cleanup Phase 11 / frozen decision 0.13 moved out
|
|
||||||
# of CI (was per-push blocking; now operator-runs pre-tag). Protects
|
|
||||||
# docs/contributor/qa-test-suite.md::Seed Data Reference from
|
|
||||||
# drifting vs migrations/seed_demo.sql. Operator-facing docs only —
|
|
||||||
# not product-affecting.
|
|
||||||
#
|
|
||||||
# The QA-doc Part-count drift guard retired in the 2026-05-04 docs
|
|
||||||
# overhaul Phase 5 when docs/testing-guide.md was pruned (its content
|
|
||||||
# dispersed across the audience-organized doc tree); the Part-count
|
|
||||||
# class no longer exists outside the qa_test.go file itself.
|
|
||||||
verify-docs:
|
|
||||||
@echo "==> QA-doc seed-count drift"
|
|
||||||
@bash scripts/qa-doc-seed-count.sh
|
|
||||||
@echo ""
|
|
||||||
@echo "verify-docs: PASS — safe to tag"
|
|
||||||
|
|
||||||
# verify-deploy: optional pre-push gate. Runs the digest-validity check,
|
# verify-deploy: optional pre-push gate. Runs the digest-validity check,
|
||||||
# the OpenAPI ↔ handler parity check, and a Docker build smoke for the
|
# the OpenAPI ↔ handler parity check, and a Docker build smoke for the
|
||||||
# production images (server + agent only — fast subset for local; CI
|
# production images (server + agent only — fast subset for local; CI
|
||||||
@@ -313,13 +295,23 @@ frontend-build:
|
|||||||
cd web && npm ci && npx vite build
|
cd web && npm ci && npx vite build
|
||||||
@echo "Frontend build complete"
|
@echo "Frontend build complete"
|
||||||
|
|
||||||
# QA Suite Stats — Bundle P / Strengthening #8.
|
# Phase 3 TEST-M3 closure (2026-05-13): browser-driven E2E smoke
|
||||||
# Single source-of-truth for every count claim in
|
# target. The full 15-flow suite from web/src/__tests__/e2e/README.md
|
||||||
# docs/contributor/qa-test-suite.md. The Strengthening #6 CI drift guards
|
# ships in frontend-design-audit Phase 8; this target is the harness
|
||||||
# (now scoped to the seed-count class only — the Part-count class retired
|
# wiring that lets `make e2e-test` work today.
|
||||||
# in the 2026-05-04 docs overhaul Phase 5 when testing-guide.md was
|
#
|
||||||
# pruned) consume the same numbers, eliminating the doc-drift class
|
# First-time setup: `cd web && npm install && npx playwright install --with-deps chromium`.
|
||||||
# structurally.
|
# The webServer block in web/playwright.config.ts boots `npm run dev`
|
||||||
|
# automatically; no separate `make docker-up` needed.
|
||||||
|
e2e-test:
|
||||||
|
@echo "Running Playwright E2E (smoke + any *.spec.ts under web/src/__tests__/e2e/)..."
|
||||||
|
cd web && npx playwright test
|
||||||
|
@echo "E2E run complete"
|
||||||
|
|
||||||
|
# qa-stats: snapshot of the test-suite size at the current commit.
|
||||||
|
# Backend Go tests + subtests + fuzz targets + skipped sites, plus the
|
||||||
|
# seed-data counts in migrations/seed_demo.sql. Useful before a release
|
||||||
|
# to spot-check that no whole layer dropped off.
|
||||||
qa-stats:
|
qa-stats:
|
||||||
@echo "=== certctl QA Suite Stats ==="
|
@echo "=== certctl QA Suite Stats ==="
|
||||||
@echo "Date: $$(date +%Y-%m-%d)"
|
@echo "Date: $$(date +%Y-%m-%d)"
|
||||||
|
|||||||
@@ -0,0 +1,18 @@
|
|||||||
|
certctl
|
||||||
|
Copyright 2026 certctl LLC.
|
||||||
|
|
||||||
|
This product is distributed under the Business Source License 1.1.
|
||||||
|
See LICENSE at the repository root for the full license text and
|
||||||
|
the Additional Use Grant carve-outs.
|
||||||
|
|
||||||
|
This product links third-party Go modules and JavaScript packages
|
||||||
|
whose own license terms apply to those components. The full
|
||||||
|
inventory of third-party dependencies and their respective licenses
|
||||||
|
is enumerated in THIRD_PARTY_NOTICES.md at the repository root.
|
||||||
|
|
||||||
|
Effective March 14, 2076, the BSL 1.1 license converts to the
|
||||||
|
Apache License 2.0 per the Change Date in LICENSE.
|
||||||
|
|
||||||
|
For inquiries about commercial licensing terms outside the
|
||||||
|
Additional Use Grant — including the Commercial Certificate
|
||||||
|
Service restriction — contact certctl@proton.me.
|
||||||
@@ -9,13 +9,17 @@
|
|||||||
[](https://github.com/certctl-io/certctl/releases)
|
[](https://github.com/certctl-io/certctl/releases)
|
||||||
[](https://github.com/certctl-io/certctl/stargazers)
|
[](https://github.com/certctl-io/certctl/stargazers)
|
||||||
|
|
||||||
certctl is a self-hosted platform that automates the entire TLS certificate lifecycle, from issuance through renewal to deployment, with zero human intervention. It works with any certificate authority, deploys to any server, and keeps private keys on your infrastructure where they belong. Free, source-available under BSL 1.1, covers the same lifecycle that enterprise platforms charge $100K+/year for.
|
certctl is a self-hosted platform that automates the entire TLS certificate lifecycle, from issuance through renewal to deployment, with zero human intervention. Twelve native CA connectors plus an OpenSSL / shell-script adapter for custom CAs; fifteen native deployment-target connectors plus a proxy-agent pattern for network appliances and agentless targets. Private keys stay on your infrastructure where they belong. Free, source-available under BSL 1.1, covers the same lifecycle that enterprise platforms charge $100K+/year for.
|
||||||
|
|
||||||
The CA/Browser Forum's [Ballot SC-081v3](https://cabforum.org/2025/04/11/ballot-sc081v3-introduce-schedule-of-reducing-validity-and-data-reuse-periods/) caps public TLS certificates at **200 days by March 2026**, **100 days by 2027**, and **47 days by 2029**. At 47-day lifespans, a team managing 100 certificates is processing 7+ renewals per week, every week, forever. Manual workflows stop being a choice.
|
The CA/Browser Forum's [Ballot SC-081v3](https://cabforum.org/2025/04/11/ballot-sc081v3-introduce-schedule-of-reducing-validity-and-data-reuse-periods/) caps public TLS certificates at **200 days by March 2026**, **100 days by 2027**, and **47 days by 2029**. At 47-day lifespans, a team managing 100 certificates is processing 7+ renewals per week, every week, forever. Manual workflows stop being a choice.
|
||||||
|
|
||||||
> **Status: Early-access.** Production-quality core — Local CA, ACME, agent deployment, CRUD, audit, role-based authz (auditor split + day-0 bootstrap + four-eyes approval). Broader surface — intermediate CA hierarchy, ACME/SCEP/EST servers, network appliances — still maturing.
|
> **Status: Early-access — actively looking for design partners.**
|
||||||
|
|
||||||
> v2.1.0 ships federated identity in early-access: OIDC SSO across Keycloak, Authentik, Okta, Auth0, Entra ID, and Google Workspace; HMAC-signed server-side sessions with `__Host-` cookies and CSRF rotation; OIDC Back-Channel Logout; Argon2id break-glass admin. Lab and dev deployments encouraged; production welcomed with the understanding that customer-scale battle-testing is in progress — please [file issues](https://github.com/certctl-io/certctl/issues) on the federated-identity surface, where real-world IdP shapes surface fast.
|
> The certificate lifecycle core is production-quality today: Local CA, ACME, agent deployment, audit, [role-based access control](docs/operator/rbac.md) with auditor split and four-eyes approval. v2.1.0 adds federated identity on top — [OIDC SSO](docs/operator/oidc-runbooks/index.md), server-side sessions, back-channel logout, and a break-glass admin path for SSO-outage recovery.
|
||||||
|
|
||||||
|
> If your team runs PKI infrastructure that could use real automation, we'd love to have you on certctl. Lab and dev deployments are great. Production is welcome too — especially on the federated-identity surface, where real-world IdP shapes are exactly the exposure we can't manufacture in CI. Battle-testing certctl in your environment is genuinely valuable to us.
|
||||||
|
|
||||||
|
> [File issues](https://github.com/certctl-io/certctl/issues) liberally. Every IdP quirk, every connector edge, every doc gap you hit — that's how the platform earns the right to drop the "early-access" label. The faster the loop, the faster everyone benefits.
|
||||||
|
|
||||||
> **Actively maintained, shipping weekly.** [Open an issue](https://github.com/certctl-io/certctl/issues) if something breaks. CI runs the full test suite with race detection, static analysis, and vulnerability scanning on every commit.
|
> **Actively maintained, shipping weekly.** [Open an issue](https://github.com/certctl-io/certctl/issues) if something breaks. CI runs the full test suite with race detection, static analysis, and vulnerability scanning on every commit.
|
||||||
|
|
||||||
@@ -31,7 +35,6 @@ The full audience-organized index lives at [`docs/README.md`](docs/README.md). T
|
|||||||
| Production operator | [Architecture](docs/reference/architecture.md) → [Security posture](docs/operator/security.md) → [Disaster recovery runbook](docs/operator/runbooks/disaster-recovery.md) |
|
| Production operator | [Architecture](docs/reference/architecture.md) → [Security posture](docs/operator/security.md) → [Disaster recovery runbook](docs/operator/runbooks/disaster-recovery.md) |
|
||||||
| PKI engineer | [ACME server](docs/reference/protocols/acme-server.md) → [SCEP server](docs/reference/protocols/scep-server.md) → [EST server](docs/reference/protocols/est.md) → [CA hierarchy](docs/reference/intermediate-ca-hierarchy.md) |
|
| PKI engineer | [ACME server](docs/reference/protocols/acme-server.md) → [SCEP server](docs/reference/protocols/scep-server.md) → [EST server](docs/reference/protocols/est.md) → [CA hierarchy](docs/reference/intermediate-ca-hierarchy.md) |
|
||||||
| Migrating from another tool | [from certbot](docs/migration/from-certbot.md) / [from acme.sh](docs/migration/from-acmesh.md) / [cert-manager coexistence](docs/migration/cert-manager-coexistence.md) |
|
| Migrating from another tool | [from certbot](docs/migration/from-certbot.md) / [from acme.sh](docs/migration/from-acmesh.md) / [cert-manager coexistence](docs/migration/cert-manager-coexistence.md) |
|
||||||
| Contributor | [Architecture](docs/reference/architecture.md) → [Testing strategy](docs/contributor/testing-strategy.md) → [CI pipeline](docs/contributor/ci-pipeline.md) |
|
|
||||||
|
|
||||||
For the connector reference (12 issuers, 15 targets, 6 notifiers) see [`docs/reference/connectors/index.md`](docs/reference/connectors/index.md).
|
For the connector reference (12 issuers, 15 targets, 6 notifiers) see [`docs/reference/connectors/index.md`](docs/reference/connectors/index.md).
|
||||||
|
|
||||||
@@ -61,7 +64,7 @@ Built for **platform engineering and DevOps teams** managing 10 to 500+ certific
|
|||||||
certctl handles the full certificate lifecycle in one self-hosted control plane:
|
certctl handles the full certificate lifecycle in one self-hosted control plane:
|
||||||
|
|
||||||
- **Issue and renew** from any CA. Let's Encrypt and any ACME provider, an embedded ACME server you can point cert-manager / certbot / lego at directly, a built-in local CA with sub-CA mode (chains under your enterprise root like ADCS), step-ca, Vault PKI, EJBCA, AWS ACM PCA, Google CAS, DigiCert, Sectigo, GlobalSign, Entrust, plus an OpenSSL / shell-script adapter for anything custom. Twelve native issuer connectors. See the [connector reference](docs/reference/connectors/index.md).
|
- **Issue and renew** from any CA. Let's Encrypt and any ACME provider, an embedded ACME server you can point cert-manager / certbot / lego at directly, a built-in local CA with sub-CA mode (chains under your enterprise root like ADCS), step-ca, Vault PKI, EJBCA, AWS ACM PCA, Google CAS, DigiCert, Sectigo, GlobalSign, Entrust, plus an OpenSSL / shell-script adapter for anything custom. Twelve native issuer connectors. See the [connector reference](docs/reference/connectors/index.md).
|
||||||
- **Deploy automatically** to NGINX, Apache, HAProxy, Caddy, Traefik, Envoy, IIS, Windows Cert Store, Java keystore, Kubernetes Secrets, AWS ACM, Azure Key Vault, SSH known-hosts, Postfix + Dovecot, F5 BIG-IP. Fifteen native target connectors. Every deploy goes through atomic-write + ownership-preservation + SHA-256 idempotency + per-target Prometheus counters + pre-deploy snapshot + on-failure rollback. See [`docs/reference/deployment-model.md`](docs/reference/deployment-model.md).
|
- **Deploy automatically** to NGINX, Apache, HAProxy, Caddy, Traefik, Envoy, IIS, Windows Cert Store, Java keystore, Kubernetes Secrets, AWS ACM, Azure Key Vault, SSH known-hosts, Postfix + Dovecot, F5 BIG-IP. Fifteen native target connectors. File-based targets share an atomic-write + SHA-256 idempotency + on-failure rollback + per-target Prometheus counters primitive (the `deploy.Apply` path covers 12 of 13 file-based connectors). Cloud / API targets (AWS ACM, Azure Key Vault) use vendor-SDK semantics rather than the file primitive; F5 uses iControl REST transactions; Kubernetes Secrets is preview. For the per-target guarantee matrix, see [`docs/reference/deployment-model.md`](docs/reference/deployment-model.md). The reload / validate commands operators configure for shell-using targets (NGINX, Apache, HAProxy, Postfix, JavaKeystore, SSH) are validated server-side AND agent-side against shell-metacharacter injection before execution (see [`internal/connector/target/configcheck`](internal/connector/target/configcheck)).
|
||||||
- **Run as an ACME server** so existing client tooling plugs in directly. RFC 8555 + RFC 9773 ARI, two per-profile auth modes (public-trust-style validation or trust_authenticated for internal PKI), doubly-signed key rollover, revoke-cert on both kid path and jwk path, per-account rate limiting. Cert-manager / certbot / lego all work pointed at it. See [`docs/reference/protocols/acme-server.md`](docs/reference/protocols/acme-server.md).
|
- **Run as an ACME server** so existing client tooling plugs in directly. RFC 8555 + RFC 9773 ARI, two per-profile auth modes (public-trust-style validation or trust_authenticated for internal PKI), doubly-signed key rollover, revoke-cert on both kid path and jwk path, per-account rate limiting. Cert-manager / certbot / lego all work pointed at it. See [`docs/reference/protocols/acme-server.md`](docs/reference/protocols/acme-server.md).
|
||||||
- **Run as a SCEP server** for Microsoft Intune-managed phones, ChromeOS devices, network appliances. RFC 8894 native with full PKIMessage wire format, native Intune challenge dispatch with replay protection, per-profile dispatch with separate RA cert per profile. See [`docs/reference/protocols/scep-server.md`](docs/reference/protocols/scep-server.md).
|
- **Run as a SCEP server** for Microsoft Intune-managed phones, ChromeOS devices, network appliances. RFC 8894 native with full PKIMessage wire format, native Intune challenge dispatch with replay protection, per-profile dispatch with separate RA cert per profile. See [`docs/reference/protocols/scep-server.md`](docs/reference/protocols/scep-server.md).
|
||||||
- **Run as an EST server** for HTTPS-based PKCS#10 enrollment. 802.1X / Wi-Fi authentication, IoT device enrollment, RFC 9266 channel binding. See [`docs/reference/protocols/est.md`](docs/reference/protocols/est.md).
|
- **Run as an EST server** for HTTPS-based PKCS#10 enrollment. 802.1X / Wi-Fi authentication, IoT device enrollment, RFC 9266 channel binding. See [`docs/reference/protocols/est.md`](docs/reference/protocols/est.md).
|
||||||
@@ -84,15 +87,28 @@ Security: three authentication paths — API keys (SHA-256 hashed + constant-tim
|
|||||||
|
|
||||||
### Docker Compose (recommended)
|
### Docker Compose (recommended)
|
||||||
|
|
||||||
|
**Demo path — zero config, populated dashboard:**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/certctl-io/certctl.git
|
git clone https://github.com/certctl-io/certctl.git
|
||||||
cd certctl
|
cd certctl
|
||||||
docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.demo.yml up -d --build
|
docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.demo.yml up -d --build
|
||||||
```
|
```
|
||||||
|
|
||||||
Wait ~30 seconds, then open **https://localhost:8443** in your browser. The shipped demo overlay seeds 180 days of realistic history across 13 issuers, 8 agents, managed + discovered certs, jobs, deploys, audit, and notification events. The `certctl-tls-init` init container self-signs an ECDSA-P256 cert on first boot — accept the browser warning for the demo, or feed the generated `ca.crt` to your client.
|
Wait ~30 seconds, then open **https://localhost:8443** in your browser. The demo overlay flips the base into demo-mode auth (every request served as the synthetic admin actor `actor-demo-anon` — the server emits a prominent ⚠ DEMO MODE banner at boot reminding you this posture is for evaluation only) and seeds 180 days of realistic history across 13 issuers, 8 agents, managed + discovered certs, jobs, deploys, audit, and notification events. The `certctl-tls-init` init container self-signs an ECDSA-P256 cert on first boot — accept the browser warning for the demo, or feed the generated `ca.crt` to your client.
|
||||||
|
|
||||||
For a clean install without demo data, drop the `-f deploy/docker-compose.demo.yml` flag and run `docker compose -f deploy/docker-compose.yml up -d --build`. The four compose files (`docker-compose.yml` base, `docker-compose.demo.yml` overlay, `docker-compose.dev.yml` for PgAdmin + debug logging, `docker-compose.test.yml` for integration tests) are documented at [`deploy/ENVIRONMENTS.md`](deploy/ENVIRONMENTS.md).
|
**Production path — `.env` required, fail-closed on placeholders:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example deploy/.env # or root .env if running outside compose
|
||||||
|
"${EDITOR:-nano}" deploy/.env # set POSTGRES_PASSWORD, CERTCTL_AUTH_SECRET,
|
||||||
|
# CERTCTL_API_KEY, CERTCTL_CONFIG_ENCRYPTION_KEY,
|
||||||
|
# CERTCTL_AGENT_ID — all via openssl rand
|
||||||
|
# (replace nano with your preferred editor)
|
||||||
|
docker compose -f deploy/docker-compose.yml up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
The base compose alone (no demo overlay) ships production-shaped: default `auth-type=api-key`, default `keygen-mode=agent`, no demo seed, no demo-mode synthetic admin. The fail-closed startup guards in `internal/config/config.go::Validate` refuse to boot when any of the change-me-... placeholder credentials reach config outside of demo mode (Bundle 2 closure, 2026-05-12). The four compose files (`docker-compose.yml` base, `docker-compose.demo.yml` overlay, `docker-compose.dev.yml` for PgAdmin + debug logging, `docker-compose.test.yml` for integration tests) are documented at [`deploy/ENVIRONMENTS.md`](deploy/ENVIRONMENTS.md).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --cacert $(docker compose -f deploy/docker-compose.yml exec -T certctl-server cat /etc/certctl/tls/ca.crt) https://localhost:8443/health
|
curl --cacert $(docker compose -f deploy/docker-compose.yml exec -T certctl-server cat /etc/certctl/tls/ca.crt) https://localhost:8443/health
|
||||||
@@ -112,12 +128,15 @@ Detects your OS and architecture, downloads the binary, configures systemd (Linu
|
|||||||
### Helm chart (Kubernetes)
|
### Helm chart (Kubernetes)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Required: TLS (pick one), server API key, and Postgres password.
|
||||||
|
# The chart fail-fasts at template time if any required value is missing.
|
||||||
helm install certctl deploy/helm/certctl/ \
|
helm install certctl deploy/helm/certctl/ \
|
||||||
--set server.auth.apiKey=your-api-key \
|
--set server.tls.existingSecret=<your-kubernetes.io/tls-secret-name> \
|
||||||
--set postgresql.password=your-db-password
|
--set server.auth.apiKey=$(openssl rand -base64 32) \
|
||||||
|
--set postgresql.auth.password=$(openssl rand -base64 32)
|
||||||
```
|
```
|
||||||
|
|
||||||
Production-ready chart with Server Deployment, PostgreSQL StatefulSet, Agent DaemonSet, health probes, security contexts (non-root, read-only rootfs), and optional Ingress. See [values.yaml](deploy/helm/certctl/values.yaml).
|
Production-ready chart with Server Deployment, PostgreSQL StatefulSet (or external Postgres), Agent DaemonSet, health probes, container-scope security hardening (read-only rootfs, drop-all capabilities, non-root UID), optional PodDisruptionBudget, NetworkPolicy, Prometheus ServiceMonitor, and Ingress. See [values.yaml](deploy/helm/certctl/values.yaml) and the [external-Postgres example](deploy/helm/examples/values-external-db.yaml).
|
||||||
|
|
||||||
### Container images
|
### Container images
|
||||||
|
|
||||||
@@ -156,8 +175,6 @@ make docker-up # Start Docker Compose stack
|
|||||||
|
|
||||||
CI runs `go vet`, `go test -race`, `golangci-lint`, `govulncheck`, and per-package coverage thresholds (service 70%, handler 75%, crypto 88%, auth packages 85-95%) on every push. The thresholds-as-data file is `.github/coverage-thresholds.yml`; lowering a floor requires corresponding test work, not a config flip. Frontend CI runs TypeScript type checking, Vitest tests, and Vite production build.
|
CI runs `go vet`, `go test -race`, `golangci-lint`, `govulncheck`, and per-package coverage thresholds (service 70%, handler 75%, crypto 88%, auth packages 85-95%) on every push. The thresholds-as-data file is `.github/coverage-thresholds.yml`; lowering a floor requires corresponding test work, not a config flip. Frontend CI runs TypeScript type checking, Vitest tests, and Vite production build.
|
||||||
|
|
||||||
For the full contributor guide see [`docs/contributor/`](docs/contributor/) — testing strategy, test environment, CI pipeline, QA prerequisites.
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
Licensed under the [Business Source License 1.1](LICENSE). The source code is publicly available and free to use, modify, and self-host. The one restriction: you may not use certctl's certificate management functionality as part of a commercial certificate-management offering to third parties. See the LICENSE file for the full Additional Use Grant.
|
Licensed under the [Business Source License 1.1](LICENSE). The source code is publicly available and free to use, modify, and self-host. The one restriction: you may not use certctl's certificate management functionality as part of a commercial certificate-management offering to third parties. See the LICENSE file for the full Additional Use Grant.
|
||||||
|
|||||||
@@ -0,0 +1,161 @@
|
|||||||
|
# Third-Party Notices
|
||||||
|
|
||||||
|
certctl is distributed under the Business Source License 1.1
|
||||||
|
(see [LICENSE](LICENSE)). The binaries built from this source link
|
||||||
|
third-party Go and JavaScript libraries listed below; certctl LLC
|
||||||
|
acknowledges each library's authors and reproduces their copyright
|
||||||
|
and license terms here in compliance with each library's license.
|
||||||
|
|
||||||
|
Full license text for each library lives in that library's upstream
|
||||||
|
repository. The license type is provided per-row; for the canonical
|
||||||
|
notice, refer to the upstream source.
|
||||||
|
|
||||||
|
- **Last reviewed:** 2026-05-13
|
||||||
|
- **Holder:** certctl LLC
|
||||||
|
- **License:** BSL 1.1 (Apache 2.0 effective March 14, 2076)
|
||||||
|
|
||||||
|
## Go Modules (binary-link dependencies)
|
||||||
|
|
||||||
|
Generated by walking `go list -deps ./...` against the certctl
|
||||||
|
server, agent, CLI, and MCP-server build paths. Excludes the Go
|
||||||
|
standard library and the certctl-io/certctl module itself.
|
||||||
|
|
||||||
|
**Count:** see commit; generate via `go list -deps -f '{{if .Module}}{{.Module.Path}} {{.Module.Version}}{{end}}' ./...`
|
||||||
|
|
||||||
|
| Module | Version | License |
|
||||||
|
|---|---|---|
|
||||||
|
| `github.com/Azure/azure-sdk-for-go/sdk/azcore` | v1.20.0 | MIT |
|
||||||
|
| `github.com/Azure/azure-sdk-for-go/sdk/azidentity` | v1.13.1 | MIT |
|
||||||
|
| `github.com/Azure/azure-sdk-for-go/sdk/internal` | v1.11.2 | MIT |
|
||||||
|
| `github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azcertificates` | v1.4.0 | MIT |
|
||||||
|
| `github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal` | v1.2.0 | MIT |
|
||||||
|
| `github.com/Azure/go-ntlmssp` | v0.1.1 | MIT |
|
||||||
|
| `github.com/AzureAD/microsoft-authentication-library-for-go` | v1.6.0 | MIT |
|
||||||
|
| `github.com/ChrisTrenkamp/goxpath` | v0.0.0-20210404020558-97928f7e12b6 | MIT |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2` | v1.41.7 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/config` | v1.32.17 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/credentials` | v1.19.16 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/feature/ec2/imds` | v1.18.23 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/internal/configsources` | v1.4.23 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/internal/endpoints/v2` | v2.7.23 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/internal/v4a` | v1.4.24 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/service/acm` | v1.38.3 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/service/acmpca` | v1.46.14 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding` | v1.13.9 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/service/internal/presigned-url` | v1.13.23 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/service/signin` | v1.0.11 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/service/sso` | v1.30.17 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/service/ssooidc` | v1.35.21 | Apache-2.0 |
|
||||||
|
| `github.com/aws/aws-sdk-go-v2/service/sts` | v1.42.1 | Apache-2.0 |
|
||||||
|
| `github.com/aws/smithy-go` | v1.25.1 | Apache-2.0 |
|
||||||
|
| `github.com/bodgit/ntlmssp` | v0.0.0-20240506230425-31973bb52d9b | BSD-2/3-Clause |
|
||||||
|
| `github.com/bodgit/windows` | v1.0.1 | BSD-2/3-Clause |
|
||||||
|
| `github.com/coreos/go-oidc/v3` | v3.18.0 | Apache-2.0 |
|
||||||
|
| `github.com/go-jose/go-jose/v4` | v4.1.4 | Apache-2.0 |
|
||||||
|
| `github.com/go-logr/logr` | v1.4.3 | Apache-2.0 |
|
||||||
|
| `github.com/gofrs/uuid` | v4.4.0+incompatible | MIT |
|
||||||
|
| `github.com/golang-jwt/jwt/v5` | v5.3.0 | MIT |
|
||||||
|
| `github.com/google/jsonschema-go` | v0.4.2 | MIT |
|
||||||
|
| `github.com/google/uuid` | v1.6.0 | BSD-2/3-Clause |
|
||||||
|
| `github.com/hashicorp/go-cleanhttp` | v0.5.2 | MPL-2.0 |
|
||||||
|
| `github.com/hashicorp/go-uuid` | v1.0.3 | MPL-2.0 |
|
||||||
|
| `github.com/jcmturner/aescts/v2` | v2.0.0 | Apache-2.0 |
|
||||||
|
| `github.com/jcmturner/dnsutils/v2` | v2.0.0 | Apache-2.0 |
|
||||||
|
| `github.com/jcmturner/gofork` | v1.7.6 | BSD-2/3-Clause |
|
||||||
|
| `github.com/jcmturner/goidentity/v6` | v6.0.1 | Apache-2.0 |
|
||||||
|
| `github.com/jcmturner/gokrb5/v8` | v8.4.4 | Apache-2.0 |
|
||||||
|
| `github.com/jcmturner/rpc/v2` | v2.0.3 | Apache-2.0 |
|
||||||
|
| `github.com/kr/fs` | v0.1.0 | BSD-2/3-Clause |
|
||||||
|
| `github.com/kylelemons/godebug` | v1.1.0 | Apache-2.0 |
|
||||||
|
| `github.com/lib/pq` | v1.10.9 | MIT |
|
||||||
|
| `github.com/masterzen/simplexml` | v0.0.0-20190410153822-31eea3082786 | Apache-2.0 |
|
||||||
|
| `github.com/masterzen/winrm` | v0.0.0-20250927112105-5f8e6c707321 | Apache-2.0 |
|
||||||
|
| `github.com/modelcontextprotocol/go-sdk` | v1.4.1 | Apache-2.0 |
|
||||||
|
| `github.com/pkg/browser` | v0.0.0-20240102092130-5ac0b6a4141c | BSD-2/3-Clause |
|
||||||
|
| `github.com/pkg/sftp` | v1.13.10 | BSD-2/3-Clause |
|
||||||
|
| `github.com/segmentio/asm` | v1.1.3 | MIT |
|
||||||
|
| `github.com/segmentio/encoding` | v0.5.4 | MIT |
|
||||||
|
| `github.com/tidwall/transform` | v0.0.0-20201103190739-32f242e2dbde | ISC |
|
||||||
|
| `github.com/yosida95/uritemplate/v3` | v3.0.2 | BSD-2/3-Clause |
|
||||||
|
| `golang.org/x/crypto` | v0.50.0 | BSD-2/3-Clause |
|
||||||
|
| `golang.org/x/net` | v0.53.0 | BSD-2/3-Clause |
|
||||||
|
| `golang.org/x/oauth2` | v0.36.0 | BSD-2/3-Clause |
|
||||||
|
| `golang.org/x/sync` | v0.20.0 | BSD-2/3-Clause |
|
||||||
|
| `golang.org/x/sys` | v0.43.0 | BSD-2/3-Clause |
|
||||||
|
| `golang.org/x/text` | v0.36.0 | BSD-2/3-Clause |
|
||||||
|
| `software.sslmate.com/src/go-pkcs12` | v0.7.0 | BSD-2/3-Clause |
|
||||||
|
|
||||||
|
## JavaScript Packages (production transitive closure)
|
||||||
|
|
||||||
|
Generated by walking the `dependencies` graph from `web/package.json`
|
||||||
|
through `node_modules/`. Excludes devDependencies (Vitest, Playwright,
|
||||||
|
Vite, etc.) since they don't ship in the distributed frontend bundle.
|
||||||
|
|
||||||
|
| Package | Version | License |
|
||||||
|
|---|---|---|
|
||||||
|
| `@reduxjs/toolkit` | 2.11.2 | MIT |
|
||||||
|
| `@remix-run/router` | 1.23.2 | MIT |
|
||||||
|
| `@standard-schema/spec` | 1.1.0 | MIT |
|
||||||
|
| `@standard-schema/utils` | 0.3.0 | MIT |
|
||||||
|
| `@tanstack/query-core` | 5.90.20 | MIT |
|
||||||
|
| `@tanstack/react-query` | 5.90.21 | MIT |
|
||||||
|
| `@types/d3-array` | 3.2.2 | MIT |
|
||||||
|
| `@types/d3-color` | 3.1.3 | MIT |
|
||||||
|
| `@types/d3-ease` | 3.0.2 | MIT |
|
||||||
|
| `@types/d3-interpolate` | 3.0.4 | MIT |
|
||||||
|
| `@types/d3-path` | 3.1.1 | MIT |
|
||||||
|
| `@types/d3-scale` | 4.0.9 | MIT |
|
||||||
|
| `@types/d3-shape` | 3.1.8 | MIT |
|
||||||
|
| `@types/d3-time` | 3.0.4 | MIT |
|
||||||
|
| `@types/d3-timer` | 3.0.2 | MIT |
|
||||||
|
| `@types/use-sync-external-store` | 0.0.6 | MIT |
|
||||||
|
| `clsx` | 2.1.1 | MIT |
|
||||||
|
| `d3-array` | 3.2.4 | ISC |
|
||||||
|
| `d3-color` | 3.1.0 | ISC |
|
||||||
|
| `d3-ease` | 3.0.1 | BSD-3-Clause |
|
||||||
|
| `d3-format` | 3.1.2 | ISC |
|
||||||
|
| `d3-interpolate` | 3.0.1 | ISC |
|
||||||
|
| `d3-path` | 3.1.0 | ISC |
|
||||||
|
| `d3-scale` | 4.0.2 | ISC |
|
||||||
|
| `d3-shape` | 3.2.0 | ISC |
|
||||||
|
| `d3-time` | 3.1.0 | ISC |
|
||||||
|
| `d3-time-format` | 4.1.0 | ISC |
|
||||||
|
| `d3-timer` | 3.0.1 | ISC |
|
||||||
|
| `decimal.js-light` | 2.5.1 | MIT |
|
||||||
|
| `es-toolkit` | 1.45.1 | MIT |
|
||||||
|
| `eventemitter3` | 5.0.4 | MIT |
|
||||||
|
| `immer` | 10.2.0 | MIT |
|
||||||
|
| `internmap` | 2.0.3 | ISC |
|
||||||
|
| `js-tokens` | 4.0.0 | MIT |
|
||||||
|
| `loose-envify` | 1.4.0 | MIT |
|
||||||
|
| `react` | 18.3.1 | MIT |
|
||||||
|
| `react-dom` | 18.3.1 | MIT |
|
||||||
|
| `react-redux` | 9.2.0 | MIT |
|
||||||
|
| `react-router` | 6.30.3 | MIT |
|
||||||
|
| `react-router-dom` | 6.30.3 | MIT |
|
||||||
|
| `recharts` | 3.8.0 | MIT |
|
||||||
|
| `redux` | 5.0.1 | MIT |
|
||||||
|
| `redux-thunk` | 3.1.0 | MIT |
|
||||||
|
| `reselect` | 5.1.1 | MIT |
|
||||||
|
| `scheduler` | 0.23.2 | MIT |
|
||||||
|
| `tiny-invariant` | 1.3.3 | MIT |
|
||||||
|
| `use-sync-external-store` | 1.6.0 | MIT |
|
||||||
|
| `victory-vendor` | 37.3.6 | MIT AND ISC |
|
||||||
|
|
||||||
|
## Test-fixture-only dependencies
|
||||||
|
|
||||||
|
**Cisco libest.** The certctl integration test suite exercises the EST
|
||||||
|
(RFC 7030) endpoints against Cisco's libest reference client. libest
|
||||||
|
runs as a sidecar container (`certctl-test-libest`) only when the
|
||||||
|
`est-e2e` Docker Compose profile is active — it is **not** vendored
|
||||||
|
into the certctl source tree and **not** linked into any distributed
|
||||||
|
release artifact (server, agent, CLI, MCP-server, container images,
|
||||||
|
or release tarballs). For libest's own license terms, see
|
||||||
|
<https://github.com/cisco/libest>.
|
||||||
|
|
||||||
|
**f5-mock-icontrol.** The F5 deployment-target integration test
|
||||||
|
ships a small Go program at `deploy/test/f5-mock-icontrol/main.go`
|
||||||
|
under the same BSL 1.1 license as the rest of certctl. The compiled
|
||||||
|
ELF was removed from the tracked tree in Phase 1 closure (commit
|
||||||
|
eda3b48, 2026-05-13); it now rebuilds via the Dockerfile's
|
||||||
|
multi-stage build on demand.
|
||||||
@@ -7,6 +7,24 @@
|
|||||||
# (health, metrics, pprof) routes only.
|
# (health, metrics, pprof) routes only.
|
||||||
#
|
#
|
||||||
# Per ci-pipeline-cleanup bundle Phase 9 / frozen decision 0.11.
|
# Per ci-pipeline-cleanup bundle Phase 9 / frozen decision 0.11.
|
||||||
|
#
|
||||||
|
# Phase 5 reconciliation (2026-05-13, architecture diligence audit
|
||||||
|
# ARCH-H1): of the 64 entries below, 35 are legitimate wire-protocol
|
||||||
|
# carve-outs (SCEP RFC 8894 = 8 entries, ACME RFC 8555 default + per-
|
||||||
|
# profile = 27 entries) that MUST stay. The remaining 29 are REST-
|
||||||
|
# shaped routes whose OpenAPI ops were deferred during their original
|
||||||
|
# Bundle 2 / audit-2026-05-10 / 2026-05-11 work. Burn-down plan:
|
||||||
|
#
|
||||||
|
# Sprint A (per-cluster, ~7-8 ops each):
|
||||||
|
# Cluster 1: auth/sessions + auth/oidc (12 ops)
|
||||||
|
# Cluster 2: auth/breakglass + auth/users + auth/runtime-config (8 ops)
|
||||||
|
# Cluster 3: audit/export + demo-residual/cleanup + auth/logout +
|
||||||
|
# auth/breakglass/login + auth/oidc/{login,callback,bcl} (9 ops)
|
||||||
|
#
|
||||||
|
# Each authored OpenAPI op needs request/response schemas (not
|
||||||
|
# placeholders) so the generated client at web/orval.config.ts emits
|
||||||
|
# typed signatures. When an op lands, delete the corresponding entry
|
||||||
|
# below + bump the openapi-handler-parity.sh expected counts.
|
||||||
|
|
||||||
documented_exceptions:
|
documented_exceptions:
|
||||||
- route: "GET /scep"
|
- route: "GET /scep"
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -699,6 +702,26 @@ func (a *Agent) executeDeploymentJob(ctx context.Context, job JobItem) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bundle 1 / RT-C1 closure (2026-05-12): defense in depth. The server
|
||||||
|
// runs internal/connector/target/configcheck.Validate on the way IN
|
||||||
|
// (Create/Update), and rejects shell metacharacters in command-bearing
|
||||||
|
// fields. Re-run the connector's full ValidateConfig here on the way
|
||||||
|
// OUT, before any DeployCertificate call. This catches (a) configs
|
||||||
|
// that pre-date the server-side guard, (b) corruption/tampering of
|
||||||
|
// the encrypted config blob, and (c) per-connector filesystem
|
||||||
|
// invariants (cert dir exists, paths writable) that the server can't
|
||||||
|
// check because the filesystem is on the agent host.
|
||||||
|
if err := connector.ValidateConfig(ctx, job.TargetConfig); err != nil {
|
||||||
|
a.logger.Error("connector config validation failed",
|
||||||
|
"job_id", job.ID,
|
||||||
|
"target_type", job.TargetType,
|
||||||
|
"error", err)
|
||||||
|
if reportErr := a.reportJobStatus(ctx, job.ID, "Failed", fmt.Sprintf("%s config validation failed: %v", job.TargetType, err)); reportErr != nil {
|
||||||
|
a.logger.Error("failed to report job status to server", "job_id", job.ID, "status", "Failed", "error", reportErr)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
deployReq := target.DeploymentRequest{
|
deployReq := target.DeploymentRequest{
|
||||||
CertPEM: certOnly,
|
CertPEM: certOnly,
|
||||||
KeyPEM: keyPEM,
|
KeyPEM: keyPEM,
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
+47
-2
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -102,6 +105,19 @@ func main() {
|
|||||||
"server_host", cfg.Server.Host,
|
"server_host", cfg.Server.Host,
|
||||||
"server_port", cfg.Server.Port)
|
"server_port", cfg.Server.Port)
|
||||||
|
|
||||||
|
// Bundle 2 (2026-05-12) — visible demo-mode banner at boot.
|
||||||
|
//
|
||||||
|
// When CERTCTL_DEMO_MODE_ACK=true the HIGH-12 startup guard already
|
||||||
|
// passed and the server is about to serve every request as the
|
||||||
|
// synthetic admin actor `actor-demo-anon`. Operators have lost
|
||||||
|
// production deploys to this posture more than once (last incident:
|
||||||
|
// 2026-04-19, a screenshot run that kept running for three days);
|
||||||
|
// the per-startup banner makes the posture unmissable in any log
|
||||||
|
// scraper, dashboard, or `journalctl --since boot` review.
|
||||||
|
if cfg.Auth.DemoModeAck {
|
||||||
|
logger.Warn("⚠ DEMO MODE ACTIVE — CERTCTL_DEMO_MODE_ACK=true is set; every request is served as the synthetic admin actor `actor-demo-anon` (no authentication enforced). This deployment MUST NOT hold production keys, certificates, or audit history. To promote to production: (1) unset CERTCTL_DEMO_MODE_ACK; (2) set CERTCTL_AUTH_TYPE=api-key or oidc; (3) set CERTCTL_AUTH_SECRET to a fresh `openssl rand -base64 32`; (4) set CERTCTL_KEYGEN_MODE=agent; (5) rotate CERTCTL_CONFIG_ENCRYPTION_KEY to a fresh `openssl rand -base64 32` (≥ 32 bytes, not the change-me placeholder); (6) restart the server. See docs/operator/security.md for the full posture.")
|
||||||
|
}
|
||||||
|
|
||||||
// Bundle-5 / Audit H-007: deprecation WARN when the agent bootstrap
|
// Bundle-5 / Audit H-007: deprecation WARN when the agent bootstrap
|
||||||
// token is unset. Pre-Bundle-5 there was no token at all; the v2.0.x
|
// token is unset. Pre-Bundle-5 there was no token at all; the v2.0.x
|
||||||
// default keeps the warn-mode pass-through so existing demo deploys
|
// default keeps the warn-mode pass-through so existing demo deploys
|
||||||
@@ -115,8 +131,14 @@ func main() {
|
|||||||
logger.Info("agent bootstrap token configured (length redacted; constant-time compare on POST /api/v1/agents)")
|
logger.Info("agent bootstrap token configured (length redacted; constant-time compare on POST /api/v1/agents)")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize database connection pool
|
// Initialize database connection pool.
|
||||||
db, err := postgres.NewDB(cfg.Database.URL)
|
//
|
||||||
|
// Bundle 3 closure (D12): pre-Bundle-3 the operator-facing
|
||||||
|
// CERTCTL_DATABASE_MAX_CONNS was a lying-field — config loaded the
|
||||||
|
// value and Validate() checked the floor, but the pool was hard-
|
||||||
|
// coded to SetMaxOpenConns(25). Post-Bundle-3 NewDBWithMaxConns
|
||||||
|
// threads the operator setting through to the connection pool.
|
||||||
|
db, err := postgres.NewDBWithMaxConns(cfg.Database.URL, cfg.Database.MaxConnections)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("failed to connect to database", "error", err)
|
logger.Error("failed to connect to database", "error", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
@@ -564,12 +586,35 @@ func main() {
|
|||||||
SameSite: sameSiteMode,
|
SameSite: sameSiteMode,
|
||||||
Secure: true,
|
Secure: true,
|
||||||
})
|
})
|
||||||
|
// Bundle 5 closure (audit S1): wire the per-source-IP rate limiter
|
||||||
|
// for POST /auth/breakglass/login. 5 attempts / minute / IP, 50 000
|
||||||
|
// key cap. Pre-Bundle-5 the handler docstring claimed this rate
|
||||||
|
// limit but no limiter was installed; the route bypasses the global
|
||||||
|
// RPS middleware because it's mounted via r.mux.Handle in the
|
||||||
|
// AuthExemptRouterRoutes path. The service-layer Argon2id lockout
|
||||||
|
// state machine remains the second line of defense.
|
||||||
|
breakglassHandler.SetLoginRateLimiter(
|
||||||
|
ratelimit.NewSlidingWindowLimiter(5, time.Minute, 50_000),
|
||||||
|
)
|
||||||
if cfg.Auth.Breakglass.Enabled {
|
if cfg.Auth.Breakglass.Enabled {
|
||||||
logger.Warn("CERTCTL_BREAKGLASS_ENABLED=true — break-glass admin path is ACTIVE; this bypasses SSO. Disable in steady-state.",
|
logger.Warn("CERTCTL_BREAKGLASS_ENABLED=true — break-glass admin path is ACTIVE; this bypasses SSO. Disable in steady-state.",
|
||||||
"lockout_threshold", cfg.Auth.Breakglass.LockoutThreshold,
|
"lockout_threshold", cfg.Auth.Breakglass.LockoutThreshold,
|
||||||
"lockout_duration", cfg.Auth.Breakglass.LockoutDuration.String())
|
"lockout_duration", cfg.Auth.Breakglass.LockoutDuration.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bundle 5 closure (audit RT-L2): operator-visible startup warning
|
||||||
|
// when CERTCTL_ACME_INSECURE=true disables ACME directory TLS
|
||||||
|
// verification. Pre-Bundle-5 this knob silently disabled TLS
|
||||||
|
// verification for every ACME issuance call without surfacing any
|
||||||
|
// signal at boot; the only mention lived in a values.yaml comment.
|
||||||
|
// Pebble / step-ca / dev ACME proxies use self-signed certs so the
|
||||||
|
// knob has legitimate dev uses, but a production deploy that flips
|
||||||
|
// it (typically copy-pasting from a Pebble integration runbook)
|
||||||
|
// gets MITM exposure on every CA round-trip. Loud at boot now.
|
||||||
|
if cfg.ACME.Insecure {
|
||||||
|
logger.Warn("CERTCTL_ACME_INSECURE=true — ACME directory TLS verification is DISABLED. Every ACME round-trip skips certificate chain validation; production deploys MUST unset this. Acceptable only for dev / Pebble / step-ca with operator-supplied self-signed roots.")
|
||||||
|
}
|
||||||
|
|
||||||
policyService := service.NewPolicyService(policyRepo, auditService)
|
policyService := service.NewPolicyService(policyRepo, auditService)
|
||||||
policyService.SetCertRepo(certificateRepo) // D-008: CertificateLifetime arm needs CertificateVersion.NotBefore/NotAfter
|
policyService.SetCertRepo(certificateRepo) // D-008: CertificateLifetime arm needs CertificateVersion.NotBefore/NotAfter
|
||||||
// G-1: RenewalPolicyService — distinct from PolicyService (compliance rules).
|
// G-1: RenewalPolicyService — distinct from PolicyService (compliance rules).
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
// Copyright (c) certctl-io contributors.
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
//
|
//
|
||||||
// Audit 2026-05-11 A-8 — demo-mode residual-grants detector. Closes the
|
// Audit 2026-05-11 A-8 — demo-mode residual-grants detector. Closes the
|
||||||
// deferred Phase 2 leg of HIGH-12 (cowork/auth-bundles-fixes-2026-05-10/
|
// deferred Phase 2 leg of HIGH-12 (cowork/auth-bundles-fixes-2026-05-10/
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
+37
-6
@@ -1,8 +1,39 @@
|
|||||||
# certctl Docker Compose environment variables
|
# certctl Docker Compose environment variables (Bundle 2 — 2026-05-12)
|
||||||
# Copy this file to .env and customize for your deployment
|
#
|
||||||
|
# Copy this file to deploy/.env and customize. The production-shaped base
|
||||||
|
# compose (docker-compose.yml) requires every variable below to be set;
|
||||||
|
# the Bundle 2 fail-closed startup guards REFUSE TO BOOT if any value
|
||||||
|
# remains at a "change-me-..." or "replace-with-..." placeholder outside
|
||||||
|
# demo mode (CERTCTL_DEMO_MODE_ACK=true).
|
||||||
|
#
|
||||||
|
# DEMO PATH (zero-config, populated dashboard, demo-mode auth):
|
||||||
|
# docker compose -f deploy/docker-compose.yml \
|
||||||
|
# -f deploy/docker-compose.demo.yml up -d --build
|
||||||
|
# The demo overlay supplies its own placeholder values plus DEMO_MODE_ACK
|
||||||
|
# so this .env is NOT needed.
|
||||||
|
#
|
||||||
|
# PRODUCTION PATH (this .env is required):
|
||||||
|
# docker compose -f deploy/docker-compose.yml up -d
|
||||||
|
|
||||||
# PostgreSQL password (change in production!)
|
# PostgreSQL password — openssl rand -hex 32
|
||||||
POSTGRES_PASSWORD=certctl
|
POSTGRES_PASSWORD=replace-with-openssl-rand-hex-32
|
||||||
|
|
||||||
# Agent API key (change in production! Generate with: openssl rand -hex 32)
|
# Server API-key secret — openssl rand -base64 32
|
||||||
CERTCTL_API_KEY=change-me-in-production
|
CERTCTL_AUTH_SECRET=replace-with-openssl-rand-base64-32
|
||||||
|
|
||||||
|
# Bundled-agent API key (matches one of the server's AUTH_SECRET rotation
|
||||||
|
# values). Generate with: openssl rand -base64 32
|
||||||
|
CERTCTL_API_KEY=replace-with-openssl-rand-base64-32
|
||||||
|
|
||||||
|
# AES-256-GCM key for encrypting issuer/target config secrets at rest.
|
||||||
|
# Minimum 32 bytes. Generate with: openssl rand -base64 32
|
||||||
|
CERTCTL_CONFIG_ENCRYPTION_KEY=replace-with-openssl-rand-base64-32
|
||||||
|
|
||||||
|
# Agent ID returned from `POST /api/v1/agents` during agent enrollment.
|
||||||
|
# Without this the bundled certctl-agent service fail-fasts at startup.
|
||||||
|
# CERTCTL_AGENT_ID=agent-from-registration-response
|
||||||
|
|
||||||
|
# Day-0 admin bootstrap token (optional — generate with: openssl rand -hex 32).
|
||||||
|
# When set, POST /api/v1/auth/bootstrap mints the first admin actor + API
|
||||||
|
# key. When unset (default), that endpoint returns 410 Gone.
|
||||||
|
# CERTCTL_BOOTSTRAP_TOKEN=
|
||||||
|
|||||||
+46
-15
@@ -62,7 +62,9 @@ A compose file defines **services** (containers), **networks** (how they talk to
|
|||||||
## Base Environment
|
## Base Environment
|
||||||
|
|
||||||
**File:** `docker-compose.yml`
|
**File:** `docker-compose.yml`
|
||||||
**When to use:** Production deployments, first-time setup, or any time you want a clean dashboard with the onboarding wizard.
|
**When to use:** Production deployments and any time you want a clean, production-shaped stack with real authentication enforced.
|
||||||
|
|
||||||
|
**Bundle 2 closure (2026-05-12):** the base compose was split from the demo overlay. Pre-Bundle-2 this file IS the demo path (auth=none, keygen=server, demo-seed=true, change-me placeholder credentials baked in). Operators reading "drop the demo overlay for a clean install" were not getting a clean install — they were getting a demo stack with the overlay's data layer stripped off. Post-Bundle-2 the base ships production-shaped: `CERTCTL_AUTH_TYPE` defaults to `api-key`, `CERTCTL_KEYGEN_MODE` defaults to `agent`, demo-mode + demo-seed default to false, and every credential placeholder is rejected at startup. The demo path is now a single overlay flag away (`-f deploy/docker-compose.demo.yml`).
|
||||||
|
|
||||||
### What it runs
|
### What it runs
|
||||||
|
|
||||||
@@ -79,9 +81,20 @@ Three services on a private bridge network:
|
|||||||
```bash
|
```bash
|
||||||
git clone https://github.com/certctl-io/certctl.git
|
git clone https://github.com/certctl-io/certctl.git
|
||||||
cd certctl
|
cd certctl
|
||||||
|
|
||||||
|
# Required: provide real credentials. Without this step the server fail-fasts
|
||||||
|
# at startup on the Bundle 2 placeholder-credential guards.
|
||||||
|
cp .env.example deploy/.env
|
||||||
|
$EDITOR deploy/.env
|
||||||
|
# Set: POSTGRES_PASSWORD, CERTCTL_AUTH_SECRET, CERTCTL_API_KEY,
|
||||||
|
# CERTCTL_CONFIG_ENCRYPTION_KEY (all via `openssl rand -base64 32`),
|
||||||
|
# CERTCTL_AGENT_ID (returned from `POST /api/v1/agents`).
|
||||||
|
|
||||||
docker compose -f deploy/docker-compose.yml up -d --build
|
docker compose -f deploy/docker-compose.yml up -d --build
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you just want to kick the tires without writing a `.env`, use the demo overlay instead — see [Demo Overlay](#demo-overlay) below.
|
||||||
|
|
||||||
`--build` compiles the Go server and agent from source, including the React frontend. Without it, Docker may reuse a stale image from a previous build.
|
`--build` compiles the Go server and agent from source, including the React frontend. Without it, Docker may reuse a stale image from a previous build.
|
||||||
|
|
||||||
`-d` runs in detached mode (background). Omit it to see logs in your terminal.
|
`-d` runs in detached mode (background). Omit it to see logs in your terminal.
|
||||||
@@ -132,14 +145,16 @@ certctl-server:
|
|||||||
postgres:
|
postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
environment:
|
environment:
|
||||||
CERTCTL_DATABASE_URL: postgres://certctl:${POSTGRES_PASSWORD:-certctl}@postgres:5432/certctl?sslmode=disable
|
CERTCTL_DATABASE_URL: postgres://certctl:${POSTGRES_PASSWORD}@postgres:5432/certctl?sslmode=disable
|
||||||
CERTCTL_SERVER_HOST: 0.0.0.0
|
CERTCTL_SERVER_HOST: 0.0.0.0
|
||||||
CERTCTL_SERVER_PORT: 8443
|
CERTCTL_SERVER_PORT: 8443
|
||||||
CERTCTL_LOG_LEVEL: info
|
CERTCTL_LOG_LEVEL: info
|
||||||
CERTCTL_AUTH_TYPE: none
|
# Bundle 2 (2026-05-12): no auth-type / keygen-mode override here.
|
||||||
CERTCTL_KEYGEN_MODE: server
|
# Code defaults (api-key + agent) take effect; the demo overlay flips
|
||||||
|
# both to demo-mode (none + server).
|
||||||
|
CERTCTL_AUTH_SECRET: ${CERTCTL_AUTH_SECRET}
|
||||||
CERTCTL_NETWORK_SCAN_ENABLED: "true"
|
CERTCTL_NETWORK_SCAN_ENABLED: "true"
|
||||||
CERTCTL_CONFIG_ENCRYPTION_KEY: ${CERTCTL_CONFIG_ENCRYPTION_KEY:-change-me-32-char-encryption-key}
|
CERTCTL_CONFIG_ENCRYPTION_KEY: ${CERTCTL_CONFIG_ENCRYPTION_KEY}
|
||||||
```
|
```
|
||||||
|
|
||||||
The server is the control plane. It serves the REST API, the React dashboard, runs 7 background scheduler loops (renewal, job processing, health checks, notifications, short-lived cert expiry, network scanning, digest emails), and manages the issuer/target registry.
|
The server is the control plane. It serves the REST API, the React dashboard, runs 7 background scheduler loops (renewal, job processing, health checks, notifications, short-lived cert expiry, network scanning, digest emails), and manages the issuer/target registry.
|
||||||
@@ -147,9 +162,10 @@ The server is the control plane. It serves the REST API, the React dashboard, ru
|
|||||||
Key environment variables explained:
|
Key environment variables explained:
|
||||||
|
|
||||||
- `CERTCTL_DATABASE_URL` references the `postgres` service by hostname. Docker's internal DNS resolves `postgres` to the container's IP on the bridge network. `sslmode=disable` is appropriate because traffic stays on the private Docker network.
|
- `CERTCTL_DATABASE_URL` references the `postgres` service by hostname. Docker's internal DNS resolves `postgres` to the container's IP on the bridge network. `sslmode=disable` is appropriate because traffic stays on the private Docker network.
|
||||||
- `CERTCTL_AUTH_TYPE: none` disables API key authentication so you can explore immediately. For production, set `api-key` and configure `CERTCTL_AUTH_SECRET`.
|
- `CERTCTL_AUTH_TYPE` defaults to `api-key` in the code (`internal/config/config.go`); the base compose does NOT override it. To run demo-mode auth (every request served as the synthetic admin actor), layer the demo overlay on top.
|
||||||
- `CERTCTL_KEYGEN_MODE: server` means the server generates private keys. This is convenient for demos but insecure for production. In production, set `agent` so keys are generated on agent machines and never transmitted.
|
- `CERTCTL_AUTH_SECRET` is the API-key value the server accepts. The Bundle 2 fail-closed guard rejects the literal placeholder `change-me-in-production` outside demo mode. Generate with `openssl rand -base64 32`.
|
||||||
- `CERTCTL_CONFIG_ENCRYPTION_KEY` enables AES-256-GCM encryption for issuer and target configurations stored in the database (credentials, API keys). Without this, the dynamic configuration GUI (adding issuers/targets from the dashboard) won't encrypt sensitive fields. For production, generate a strong random key.
|
- `CERTCTL_KEYGEN_MODE` defaults to `agent` in the code (the base compose does NOT override it). Production deploys leave it there so private keys stay on agent infrastructure; the demo overlay flips it to `server` so the demo can issue + hold the key on the server box without an agent dance.
|
||||||
|
- `CERTCTL_CONFIG_ENCRYPTION_KEY` enables AES-256-GCM encryption for issuer and target configurations stored in the database (credentials, API keys). Required for any deploy that adds issuers via the GUI. The Bundle 2 fail-closed guard rejects the literal placeholder `change-me-32-char-encryption-key` outside demo mode. Generate with `openssl rand -base64 32` (≥ 32 bytes).
|
||||||
- `CERTCTL_NETWORK_SCAN_ENABLED` activates the scheduler loop that probes TLS endpoints on your network to discover certificates you might not be managing.
|
- `CERTCTL_NETWORK_SCAN_ENABLED` activates the scheduler loop that probes TLS endpoints on your network to discover certificates you might not be managing.
|
||||||
|
|
||||||
**Expert note:** The healthcheck hits `GET /health` every 10 seconds with 5 retries. The `depends_on: condition: service_healthy` on the agent means Docker holds agent startup until this check passes. Resource limits (`cpus: '1.0'`, `memory: 512M`) prevent the server from consuming unbounded resources in shared environments.
|
**Expert note:** The healthcheck hits `GET /health` every 10 seconds with 5 retries. The `depends_on: condition: service_healthy` on the agent means Docker holds agent startup until this check passes. Resource limits (`cpus: '1.0'`, `memory: 512M`) prevent the server from consuming unbounded resources in shared environments.
|
||||||
@@ -162,8 +178,12 @@ certctl-agent:
|
|||||||
certctl-server:
|
certctl-server:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
environment:
|
environment:
|
||||||
CERTCTL_SERVER_URL: http://certctl-server:8443
|
CERTCTL_SERVER_URL: https://certctl-server:8443
|
||||||
CERTCTL_API_KEY: ${CERTCTL_API_KEY:-change-me-in-production}
|
# Bundle 2 (2026-05-12): no placeholder fallbacks. Operators MUST
|
||||||
|
# set CERTCTL_API_KEY + CERTCTL_AGENT_ID in deploy/.env. The agent
|
||||||
|
# binary fail-fasts at startup when CERTCTL_AGENT_ID is unset.
|
||||||
|
CERTCTL_API_KEY: ${CERTCTL_API_KEY}
|
||||||
|
CERTCTL_AGENT_ID: ${CERTCTL_AGENT_ID}
|
||||||
CERTCTL_AGENT_NAME: docker-agent
|
CERTCTL_AGENT_NAME: docker-agent
|
||||||
CERTCTL_LOG_LEVEL: info
|
CERTCTL_LOG_LEVEL: info
|
||||||
CERTCTL_DISCOVERY_DIRS: /var/lib/certctl/keys
|
CERTCTL_DISCOVERY_DIRS: /var/lib/certctl/keys
|
||||||
@@ -194,13 +214,18 @@ docker compose -f deploy/docker-compose.yml down -v
|
|||||||
## Demo Overlay
|
## Demo Overlay
|
||||||
|
|
||||||
**File:** `docker-compose.demo.yml`
|
**File:** `docker-compose.demo.yml`
|
||||||
**When to use:** Demos, screenshots, stakeholder presentations, or any time you want a populated dashboard on first boot.
|
**When to use:** Demos, screenshots, stakeholder presentations, or any time you want a one-command zero-config evaluation stack with a populated dashboard.
|
||||||
|
|
||||||
### What it adds
|
### What it adds
|
||||||
|
|
||||||
One env var: `CERTCTL_DEMO_SEED=true` on the `certctl-server` service. The server applies `migrations/seed_demo.sql` at boot via `postgres.RunDemoSeed` AFTER the baseline migrations + `seed.sql` are in place. The demo seed file inserts 180 days of simulated operational history: teams, owners, certificates across multiple issuers, agents on different platforms, jobs with realistic timestamps, discovery scan results, audit events, policies, and profiles.
|
Bundle 2 closure (2026-05-12) moved every demo-mode env var out of the base compose into this overlay. The overlay now carries:
|
||||||
|
|
||||||
Pre-U-3 the overlay used to mount `seed_demo.sql` into PostgreSQL's `/docker-entrypoint-initdb.d/` and rely on initdb-time application. That worked only because the production stack also mounted the migrations there, so the schema existed when initdb ran. Once U-3 dropped the production initdb mounts (single source of truth: server runs `RunMigrations` + `RunSeed` at boot), the demo seed could no longer be applied at initdb time — the tables it references wouldn't exist yet. Post-U-3 the overlay is a 27-line override file with no `image:` / `build:` of its own; it MUST be passed alongside the base, or compose errors with `service "certctl-server" has neither an image nor a build context specified`.
|
- `CERTCTL_AUTH_TYPE=none` + `CERTCTL_DEMO_MODE_ACK=true` — demo-mode synthetic admin actor (`actor-demo-anon`). The server emits a prominent ⚠ DEMO MODE WARN banner at boot with a production-promotion checklist (`cmd/server/main.go`).
|
||||||
|
- `CERTCTL_KEYGEN_MODE=server` — demo-only server-side keygen.
|
||||||
|
- `CERTCTL_DEMO_SEED=true` — the server applies `migrations/seed_demo.sql` at boot via `postgres.RunDemoSeed`, inserting 180 days of simulated operational history (teams, owners, certificates, agents, jobs, discovery results, audit events, policies, profiles).
|
||||||
|
- Fixed weak `POSTGRES_PASSWORD=certctl`, `CERTCTL_AUTH_SECRET=change-me-in-production`, `CERTCTL_CONFIG_ENCRYPTION_KEY=change-me-32-char-encryption-key`, `CERTCTL_API_KEY=change-me-in-production`, `CERTCTL_AGENT_ID=agent-demo-1` — placeholder credentials the Bundle 2 fail-closed `Validate()` rejects outside demo mode, but the demo overlay's `DEMO_MODE_ACK=true` unlocks them.
|
||||||
|
|
||||||
|
Pre-U-3 the overlay used to mount `seed_demo.sql` into PostgreSQL's `/docker-entrypoint-initdb.d/` and rely on initdb-time application. That worked only because the production stack also mounted the migrations there, so the schema existed when initdb ran. Once U-3 dropped the production initdb mounts (single source of truth: server runs `RunMigrations` + `RunSeed` at boot), the demo seed could no longer be applied at initdb time — the tables it references wouldn't exist yet. Post-U-3 the overlay is an override file with no `image:` / `build:` of its own; it MUST be passed alongside the base, or compose errors with `service "certctl-server" has neither an image nor a build context specified`.
|
||||||
|
|
||||||
### Starting it
|
### Starting it
|
||||||
|
|
||||||
@@ -382,7 +407,7 @@ Every `CERTCTL_*` environment variable is read by the server's `internal/config/
|
|||||||
| `CERTCTL_SERVER_HOST` | `0.0.0.0` | Listen address |
|
| `CERTCTL_SERVER_HOST` | `0.0.0.0` | Listen address |
|
||||||
| `CERTCTL_SERVER_PORT` | `8443` | Listen port |
|
| `CERTCTL_SERVER_PORT` | `8443` | Listen port |
|
||||||
| `CERTCTL_LOG_LEVEL` | `info` | Log verbosity: `debug`, `info`, `warn`, `error` |
|
| `CERTCTL_LOG_LEVEL` | `info` | Log verbosity: `debug`, `info`, `warn`, `error` |
|
||||||
| `CERTCTL_AUTH_TYPE` | `api-key` | Auth mode: `api-key` or `none` |
|
| `CERTCTL_AUTH_TYPE` | `api-key` | Auth mode: `api-key`, `none`, or `oidc` (Auth Bundle 2). |
|
||||||
| `CERTCTL_AUTH_SECRET` | (none) | API key(s), comma-separated for rotation |
|
| `CERTCTL_AUTH_SECRET` | (none) | API key(s), comma-separated for rotation |
|
||||||
| `CERTCTL_KEYGEN_MODE` | `agent` | Key generation: `agent` (production) or `server` (demo) |
|
| `CERTCTL_KEYGEN_MODE` | `agent` | Key generation: `agent` (production) or `server` (demo) |
|
||||||
| `CERTCTL_CONFIG_ENCRYPTION_KEY` | (none) | AES-256-GCM key for encrypting issuer/target configs in DB |
|
| `CERTCTL_CONFIG_ENCRYPTION_KEY` | (none) | AES-256-GCM key for encrypting issuer/target configs in DB |
|
||||||
@@ -392,6 +417,11 @@ Every `CERTCTL_*` environment variable is read by the server's `internal/config/
|
|||||||
| `CERTCTL_CORS_ORIGINS` | (empty) | Allowed CORS origins, comma-separated. Empty = deny all cross-origin |
|
| `CERTCTL_CORS_ORIGINS` | (empty) | Allowed CORS origins, comma-separated. Empty = deny all cross-origin |
|
||||||
| `CERTCTL_RATE_LIMIT_RPS` | `10` | Requests per second per client |
|
| `CERTCTL_RATE_LIMIT_RPS` | `10` | Requests per second per client |
|
||||||
| `CERTCTL_RATE_LIMIT_BURST` | `20` | Burst allowance above RPS |
|
| `CERTCTL_RATE_LIMIT_BURST` | `20` | Burst allowance above RPS |
|
||||||
|
| `CERTCTL_AGENT_BOOTSTRAP_TOKEN` | (empty) | Agent-registration bootstrap secret. Empty = v2.1.x warn-mode pass-through. Set to a real value (`openssl rand -base64 32`); the deny-empty flag's default flip in v2.2.0 will require it. |
|
||||||
|
| `CERTCTL_AGENT_BOOTSTRAP_TOKEN_DENY_EMPTY` | `false` | Phase 2 SEC-H1 staged flag. When `true`, the server refuses to start unless `CERTCTL_AGENT_BOOTSTRAP_TOKEN` is non-empty. Default flip to `true` scheduled for v2.2.0. |
|
||||||
|
| `CERTCTL_DEMO_MODE_ACK` | `false` | Acknowledges demo-mode synthetic admin posture (required when `CERTCTL_AUTH_TYPE=none` binds to a non-loopback host). Must be paired with `CERTCTL_DEMO_MODE_ACK_TS` per Phase 2 SEC-H3. |
|
||||||
|
| `CERTCTL_DEMO_MODE_ACK_TS` | (empty) | Phase 2 SEC-H3: unix-epoch timestamp at which DemoModeAck was last acknowledged. When `CERTCTL_DEMO_MODE_ACK=true`, this must parse as a unix epoch within the last 24h. Set via `CERTCTL_DEMO_MODE_ACK_TS=$(date +%s)` at every `docker compose up`. |
|
||||||
|
| `CERTCTL_ACME_INSECURE_ACK` | `false` | Phase 2 SEC-M4: explicit ACK required to boot with `CERTCTL_ACME_INSECURE=true`. Production deploys MUST never set either flag. |
|
||||||
|
|
||||||
### Agent
|
### Agent
|
||||||
|
|
||||||
@@ -400,7 +430,7 @@ Every `CERTCTL_*` environment variable is read by the server's `internal/config/
|
|||||||
| `CERTCTL_SERVER_URL` | (required) | Server API URL |
|
| `CERTCTL_SERVER_URL` | (required) | Server API URL |
|
||||||
| `CERTCTL_API_KEY` | (none) | API key for authenticating with server |
|
| `CERTCTL_API_KEY` | (none) | API key for authenticating with server |
|
||||||
| `CERTCTL_AGENT_NAME` | (hostname) | Display name in dashboard |
|
| `CERTCTL_AGENT_NAME` | (hostname) | Display name in dashboard |
|
||||||
| `CERTCTL_AGENT_ID` | (auto-generated) | Stable agent identifier |
|
| `CERTCTL_AGENT_ID` | (none — required) | Stable agent identifier returned from `POST /api/v1/agents`. The agent binary fail-fasts at startup if unset. |
|
||||||
| `CERTCTL_KEYGEN_MODE` | `agent` | Must match server setting |
|
| `CERTCTL_KEYGEN_MODE` | `agent` | Must match server setting |
|
||||||
| `CERTCTL_LOG_LEVEL` | `info` | Log verbosity |
|
| `CERTCTL_LOG_LEVEL` | `info` | Log verbosity |
|
||||||
| `CERTCTL_KEY_DIR` | `/var/lib/certctl/keys` | Directory for private key storage (0600 perms) |
|
| `CERTCTL_KEY_DIR` | `/var/lib/certctl/keys` | Directory for private key storage (0600 perms) |
|
||||||
@@ -415,6 +445,7 @@ Every `CERTCTL_*` environment variable is read by the server's `internal/config/
|
|||||||
| `CERTCTL_ACME_CHALLENGE_TYPE` | `http-01`, `dns-01`, or `dns-persist-01` |
|
| `CERTCTL_ACME_CHALLENGE_TYPE` | `http-01`, `dns-01`, or `dns-persist-01` |
|
||||||
| `CERTCTL_ACME_INSECURE` | Skip TLS verification for ACME CA (test only) |
|
| `CERTCTL_ACME_INSECURE` | Skip TLS verification for ACME CA (test only) |
|
||||||
| `CERTCTL_ACME_EAB_KID` / `CERTCTL_ACME_EAB_HMAC` | External Account Binding for ZeroSSL, Google Trust Services |
|
| `CERTCTL_ACME_EAB_KID` / `CERTCTL_ACME_EAB_HMAC` | External Account Binding for ZeroSSL, Google Trust Services |
|
||||||
|
| `CERTCTL_ZEROSSL_EAB_URL` | Override the ZeroSSL EAB-credentials endpoint (defaults to the public ZeroSSL URL; only set for ZeroSSL staging or a private mirror) |
|
||||||
| `CERTCTL_ACME_ARI_ENABLED` | Enable RFC 9773 Renewal Information |
|
| `CERTCTL_ACME_ARI_ENABLED` | Enable RFC 9773 Renewal Information |
|
||||||
| `CERTCTL_ACME_PROFILE` | ACME profile (`tlsserver`, `shortlived`) |
|
| `CERTCTL_ACME_PROFILE` | ACME profile (`tlsserver`, `shortlived`) |
|
||||||
| `CERTCTL_STEPCA_URL` | step-ca server URL |
|
| `CERTCTL_STEPCA_URL` | step-ca server URL |
|
||||||
|
|||||||
Executable
+38
@@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# deploy/demo-up.sh — boot the certctl demo stack with the fresh
|
||||||
|
# CERTCTL_DEMO_MODE_ACK_TS the Phase 2 SEC-H3 guard requires.
|
||||||
|
#
|
||||||
|
# The demo overlay sets CERTCTL_DEMO_MODE_ACK=true. Phase 2 SEC-H3
|
||||||
|
# (2026-05-13) pairs that with a fail-closed requirement: the server
|
||||||
|
# refuses to start unless CERTCTL_DEMO_MODE_ACK_TS=<unix-epoch> is set
|
||||||
|
# and is within the last 24h (with 1-minute future clock-skew tolerance).
|
||||||
|
#
|
||||||
|
# A static value in docker-compose.demo.yml would rot the next day, so
|
||||||
|
# the overlay passthroughs the value from the shell environment. This
|
||||||
|
# helper mints a fresh TS at run time and forwards any extra args to
|
||||||
|
# `docker compose up`, so operators can use it as a drop-in replacement
|
||||||
|
# for the bare command. Example:
|
||||||
|
#
|
||||||
|
# ./demo-up.sh -d # cold boot in detached mode
|
||||||
|
# ./demo-up.sh -d --pull always # forward any flags through
|
||||||
|
#
|
||||||
|
# The cold-DB compose smoke in .github/workflows/ci.yml does the same
|
||||||
|
# thing inline; this script exists so local operators don't have to
|
||||||
|
# remember the export.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# cd to the deploy/ dir so the relative `-f` paths resolve regardless
|
||||||
|
# of where the operator invokes this from. The script lives next to
|
||||||
|
# the compose files it references.
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
|
||||||
|
export CERTCTL_DEMO_MODE_ACK_TS="$(date +%s)"
|
||||||
|
|
||||||
|
echo "[demo-up] minting CERTCTL_DEMO_MODE_ACK_TS=$CERTCTL_DEMO_MODE_ACK_TS"
|
||||||
|
echo "[demo-up] running: docker compose -f docker-compose.yml -f docker-compose.demo.yml up $*"
|
||||||
|
|
||||||
|
exec docker compose \
|
||||||
|
-f docker-compose.yml \
|
||||||
|
-f docker-compose.demo.yml \
|
||||||
|
up "$@"
|
||||||
+115
-16
@@ -1,26 +1,125 @@
|
|||||||
# Demo mode: pre-populated dashboard with 32 certificates, 8 agents, 10 issuers, etc.
|
# =============================================================================
|
||||||
# Use this to showcase certctl's dashboard with realistic data.
|
# certctl DEMO overlay — Bundle 2 (2026-05-12)
|
||||||
|
# =============================================================================
|
||||||
#
|
#
|
||||||
# Usage:
|
# Layered on top of the production-shaped base (docker-compose.yml) to give
|
||||||
# docker compose -f docker-compose.yml -f docker-compose.demo.yml up --build
|
# operators a one-command, zero-config demo path:
|
||||||
#
|
#
|
||||||
# To start fresh (wipe previous data):
|
# deploy/demo-up.sh -d --build
|
||||||
# docker compose -f docker-compose.yml -f docker-compose.demo.yml down -v
|
|
||||||
# docker compose -f docker-compose.yml -f docker-compose.demo.yml up --build
|
|
||||||
#
|
#
|
||||||
# U-3 (P1, cat-u-seed_initdb_schema_drift): pre-U-3 this overlay mounted
|
# (which forwards args to `docker compose up` after exporting the fresh
|
||||||
# `seed_demo.sql` into postgres `/docker-entrypoint-initdb.d/`. That worked
|
# CERTCTL_DEMO_MODE_ACK_TS that Phase 2 SEC-H3 requires). Equivalent
|
||||||
# only because the production stack also mounted the migrations there, so
|
# manual invocation:
|
||||||
# the schema existed at initdb time. Once U-3 dropped the production
|
#
|
||||||
|
# CERTCTL_DEMO_MODE_ACK_TS=$(date +%s) docker compose \
|
||||||
|
# -f deploy/docker-compose.yml \
|
||||||
|
# -f deploy/docker-compose.demo.yml up -d --build
|
||||||
|
#
|
||||||
|
# What this overlay does:
|
||||||
|
#
|
||||||
|
# 1. Flips CERTCTL_AUTH_TYPE=none + CERTCTL_DEMO_MODE_ACK=true. Every
|
||||||
|
# request is served as the synthetic admin actor `actor-demo-anon`;
|
||||||
|
# the server emits a prominent ⚠ DEMO MODE WARN banner at boot with
|
||||||
|
# a production-promotion checklist (cmd/server/main.go::emitDemoBanner).
|
||||||
|
# Phase 2 SEC-H3 (2026-05-13) pairs DEMO_MODE_ACK with a required
|
||||||
|
# DEMO_MODE_ACK_TS within the last 24h. The overlay reads
|
||||||
|
# ${CERTCTL_DEMO_MODE_ACK_TS:-} from the shell — use deploy/demo-up.sh
|
||||||
|
# (which exports a fresh TS) instead of bare `docker compose up`.
|
||||||
|
#
|
||||||
|
# 2. Flips CERTCTL_KEYGEN_MODE=server (the demo issues + holds the key on
|
||||||
|
# the server to keep the dashboard populated; production deploys must
|
||||||
|
# use the default `agent` mode where keys never leave the agent box).
|
||||||
|
#
|
||||||
|
# 3. Flips CERTCTL_DEMO_SEED=true. The server applies migrations/seed_demo.sql
|
||||||
|
# at boot via postgres.RunDemoSeed AFTER baseline migrations + seed.sql,
|
||||||
|
# pre-seeding 180 days of simulated history across 13 issuers + 8 agents.
|
||||||
|
#
|
||||||
|
# 4. Supplies the change-me-... placeholder values for POSTGRES_PASSWORD,
|
||||||
|
# CERTCTL_API_KEY, CERTCTL_CONFIG_ENCRYPTION_KEY, and CERTCTL_AGENT_ID
|
||||||
|
# so the demo runs without a deploy/.env file. The Bundle 2 fail-closed
|
||||||
|
# Validate() rejects these placeholders outside demo mode, so this only
|
||||||
|
# works alongside DEMO_MODE_ACK=true.
|
||||||
|
#
|
||||||
|
# U-3 history: pre-U-3 this overlay mounted seed_demo.sql into postgres
|
||||||
|
# `/docker-entrypoint-initdb.d/`. That worked only because the production
|
||||||
|
# stack also mounted the migrations there. Once U-3 dropped the production
|
||||||
# initdb mounts (single source of truth: server runs RunMigrations + RunSeed
|
# initdb mounts (single source of truth: server runs RunMigrations + RunSeed
|
||||||
# at boot), the demo seed could no longer be applied at initdb time — the
|
# at boot), the demo seed could no longer be applied at initdb time — the
|
||||||
# tables it references wouldn't exist yet.
|
# tables it references wouldn't exist yet. Post-U-3 the overlay just sets
|
||||||
|
# CERTCTL_DEMO_SEED=true; the server applies seed_demo.sql at boot via
|
||||||
|
# postgres.RunDemoSeed AFTER baseline migrations + seed.sql.
|
||||||
#
|
#
|
||||||
# Post-U-3 the demo overlay just sets CERTCTL_DEMO_SEED=true; the server
|
# Bundle 2 history: pre-Bundle-2 the base compose IS this demo path; this
|
||||||
# applies seed_demo.sql at boot via postgres.RunDemoSeed AFTER baseline
|
# overlay was a single-flag thin shim. Bundle 2 split the demo env vars
|
||||||
# migrations + seed.sql are in place. Same single source of truth, no
|
# out of the base so `docker compose -f deploy/docker-compose.yml up`
|
||||||
# initdb mounts, no schema-vs-seed drift.
|
# (no overlay) boots production-shaped — which is what every operator
|
||||||
|
# reading the README quickstart line "drop the demo overlay for a clean
|
||||||
|
# install" expected. The overlay carries the full demo posture now.
|
||||||
|
#
|
||||||
|
# To start fresh (wipe previous data):
|
||||||
|
# docker compose -f deploy/docker-compose.yml \
|
||||||
|
# -f deploy/docker-compose.demo.yml down -v
|
||||||
|
# deploy/demo-up.sh -d --build
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
postgres:
|
||||||
|
# Fixed weak password is intentional for the no-setup demo path.
|
||||||
|
# See docker-compose.yml for the production override pattern.
|
||||||
|
environment:
|
||||||
|
POSTGRES_PASSWORD: certctl
|
||||||
|
|
||||||
certctl-server:
|
certctl-server:
|
||||||
environment:
|
environment:
|
||||||
|
# Demo-mode auth: every request served as the synthetic
|
||||||
|
# `actor-demo-anon` admin. The server's HIGH-12 startup guard
|
||||||
|
# requires DEMO_MODE_ACK=true to allow this combination on a
|
||||||
|
# non-loopback bind; the boot-time WARN banner (cmd/server/main.go)
|
||||||
|
# reminds the operator on every start.
|
||||||
|
CERTCTL_AUTH_TYPE: none
|
||||||
|
CERTCTL_DEMO_MODE_ACK: "true"
|
||||||
|
# Phase 2 SEC-H3 (2026-05-13): DEMO_MODE_ACK=true requires a fresh
|
||||||
|
# DEMO_MODE_ACK_TS within the last 24h. The overlay can't hardcode
|
||||||
|
# a timestamp (it would rot the next day), so we passthrough from
|
||||||
|
# the shell. Operators set this via:
|
||||||
|
# CERTCTL_DEMO_MODE_ACK_TS=$(date +%s) docker compose \
|
||||||
|
# -f docker-compose.yml -f docker-compose.demo.yml up -d
|
||||||
|
# The cold-DB smoke + any helper script (deploy/demo-up.sh, when
|
||||||
|
# it lands) export this before invoking compose. Empty value
|
||||||
|
# fails the SEC-H3 guard with a clear operator-facing error
|
||||||
|
# message pointing at this line.
|
||||||
|
CERTCTL_DEMO_MODE_ACK_TS: "${CERTCTL_DEMO_MODE_ACK_TS:-}"
|
||||||
|
# Server-side keygen so the demo can populate the dashboard with
|
||||||
|
# full lifecycle history. Production deploys leave this at the
|
||||||
|
# code default `agent` (CertctlAgent generates ECDSA P-256 keys
|
||||||
|
# locally and submits CSRs only).
|
||||||
|
CERTCTL_KEYGEN_MODE: server
|
||||||
|
# Demo creds — the Bundle 2 fail-closed Validate() rejects these
|
||||||
|
# sentinels outside demo mode, but DEMO_MODE_ACK=true unlocks them.
|
||||||
|
CERTCTL_CONFIG_ENCRYPTION_KEY: change-me-32-char-encryption-key
|
||||||
|
CERTCTL_AUTH_SECRET: change-me-in-production
|
||||||
|
# Cold-DB smoke fix (2026-05-13): the base compose builds the
|
||||||
|
# database URL via compose-level `${POSTGRES_PASSWORD}` interpolation
|
||||||
|
# (deploy/docker-compose.yml line ~177), which reads the SHELL env —
|
||||||
|
# NOT the postgres service's `environment:` block above (that one
|
||||||
|
# feeds the postgres container's initdb only). In a zero-env-var
|
||||||
|
# CI run the shell var is blank, producing
|
||||||
|
# `postgres://certctl:@postgres:5432/...` and a SCRAM rejection
|
||||||
|
# against a database that initdb seeded with password `certctl`.
|
||||||
|
# Pinning the full URL here closes the gap: the demo overlay is
|
||||||
|
# now fully self-sufficient (matches the file's docstring claim)
|
||||||
|
# and the cold-DB smoke passes against a fresh GitHub-runner clone
|
||||||
|
# with no .env file or exported shell vars. Production deploys
|
||||||
|
# override CERTCTL_DATABASE_URL via the base compose's
|
||||||
|
# `${CERTCTL_DATABASE_URL:-...}` default, so this literal is
|
||||||
|
# overlay-scoped and never leaks into a production posture.
|
||||||
|
CERTCTL_DATABASE_URL: postgres://certctl:certctl@postgres:5432/certctl?sslmode=disable
|
||||||
|
# 180-day simulated history seed applied at boot.
|
||||||
CERTCTL_DEMO_SEED: "true"
|
CERTCTL_DEMO_SEED: "true"
|
||||||
|
|
||||||
|
certctl-agent:
|
||||||
|
environment:
|
||||||
|
# Pre-seeded by migrations/seed_demo.sql; the bundled agent
|
||||||
|
# connects with these creds and the demo-mode synthetic admin
|
||||||
|
# accepts every request regardless of API key.
|
||||||
|
CERTCTL_API_KEY: change-me-in-production
|
||||||
|
CERTCTL_AGENT_ID: agent-demo-1
|
||||||
|
|||||||
@@ -272,6 +272,14 @@ services:
|
|||||||
CERTCTL_ACME_EMAIL: test@certctl.dev
|
CERTCTL_ACME_EMAIL: test@certctl.dev
|
||||||
CERTCTL_ACME_CHALLENGE_TYPE: http-01
|
CERTCTL_ACME_CHALLENGE_TYPE: http-01
|
||||||
CERTCTL_ACME_INSECURE: "true"
|
CERTCTL_ACME_INSECURE: "true"
|
||||||
|
# Phase 2 SEC-M4 (2026-05-13): CERTCTL_ACME_INSECURE=true requires
|
||||||
|
# the paired CERTCTL_ACME_INSECURE_ACK=true; without the ACK the
|
||||||
|
# server's Config.Validate() refuses to start. This integration
|
||||||
|
# stack uses Pebble's self-signed ACME directory, so disabling
|
||||||
|
# TLS verification is correct — but the ACK env var has to be
|
||||||
|
# set explicitly so the test posture matches what production
|
||||||
|
# operators are blocked from doing accidentally.
|
||||||
|
CERTCTL_ACME_INSECURE_ACK: "true"
|
||||||
|
|
||||||
# step-ca issuer (iss-stepca)
|
# step-ca issuer (iss-stepca)
|
||||||
CERTCTL_STEPCA_URL: https://step-ca:9000
|
CERTCTL_STEPCA_URL: https://step-ca:9000
|
||||||
|
|||||||
+98
-27
@@ -1,3 +1,49 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# certctl base compose — PRODUCTION-SHAPED (Bundle 2, 2026-05-12)
|
||||||
|
# =============================================================================
|
||||||
|
#
|
||||||
|
# This base file ships a SAFE-BY-DEFAULT control plane:
|
||||||
|
#
|
||||||
|
# - CERTCTL_AUTH_TYPE defaults to api-key (the code default; not overridden
|
||||||
|
# here). The server REFUSES to start with auth=none on a non-loopback
|
||||||
|
# bind unless CERTCTL_DEMO_MODE_ACK=true (Audit 2026-05-10 HIGH-12 +
|
||||||
|
# Bundle 2 closure: see internal/config/config.go::Validate).
|
||||||
|
# - CERTCTL_KEYGEN_MODE defaults to agent (the code default).
|
||||||
|
# - CERTCTL_DEMO_SEED defaults to false (the code default; the 180-day
|
||||||
|
# simulated history seed only runs under the demo overlay).
|
||||||
|
# - Default placeholder credentials (`change-me-...` sentinels) are NOT
|
||||||
|
# interpolated by this compose. The server REFUSES to start when those
|
||||||
|
# placeholder strings reach config (Bundle 2 fail-closed guards) unless
|
||||||
|
# DEMO_MODE_ACK=true. Operators MUST set:
|
||||||
|
# POSTGRES_PASSWORD (openssl rand -hex 32)
|
||||||
|
# CERTCTL_AUTH_SECRET (openssl rand -hex 32)
|
||||||
|
# CERTCTL_CONFIG_ENCRYPTION_KEY (openssl rand -base64 32)
|
||||||
|
# CERTCTL_API_KEY (matches CERTCTL_AUTH_SECRET or one
|
||||||
|
# of its rotation siblings)
|
||||||
|
# CERTCTL_AGENT_ID (returned from POST /api/v1/agents)
|
||||||
|
# in deploy/.env or the shell environment. See deploy/.env.example.
|
||||||
|
#
|
||||||
|
# USAGE
|
||||||
|
# -----
|
||||||
|
#
|
||||||
|
# Production-shaped (this base alone):
|
||||||
|
# docker compose -f deploy/docker-compose.yml up -d
|
||||||
|
#
|
||||||
|
# Bundled demo (zero-config, populated dashboard, demo-mode auth):
|
||||||
|
# docker compose -f deploy/docker-compose.yml \
|
||||||
|
# -f deploy/docker-compose.demo.yml up -d
|
||||||
|
#
|
||||||
|
# The demo overlay (docker-compose.demo.yml) layers in the demo-mode env
|
||||||
|
# vars (AUTH_TYPE=none + DEMO_MODE_ACK=true + KEYGEN_MODE=server +
|
||||||
|
# DEMO_SEED=true + the change-me placeholder creds). It exists so the
|
||||||
|
# `docker compose up` smoke + screenshot path stays one command — but it
|
||||||
|
# ALSO carries the operator-visible warning banner the server emits at
|
||||||
|
# boot when DEMO_MODE_ACK=true.
|
||||||
|
#
|
||||||
|
# Pre-Bundle-2 this base file WAS the demo path. The split happened in
|
||||||
|
# 2026-05-12; the README quickstart, deploy/ENVIRONMENTS.md, and the
|
||||||
|
# cold-DB compose smoke in .github/workflows/ci.yml were updated in the
|
||||||
|
# same commit to point at the new layout.
|
||||||
services:
|
services:
|
||||||
# HTTPS-Everywhere Phase 3 — self-signed TLS bootstrap (init container).
|
# HTTPS-Everywhere Phase 3 — self-signed TLS bootstrap (init container).
|
||||||
# Generates a CN=certctl-server ECDSA-P256 (SHA-256 signature) cert with
|
# Generates a CN=certctl-server ECDSA-P256 (SHA-256 signature) cert with
|
||||||
@@ -82,7 +128,12 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
POSTGRES_DB: certctl
|
POSTGRES_DB: certctl
|
||||||
POSTGRES_USER: certctl
|
POSTGRES_USER: certctl
|
||||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-certctl}
|
# Bundle 2 closure: no `:-certctl` fallback. Operators MUST set
|
||||||
|
# POSTGRES_PASSWORD in deploy/.env or the shell environment. The
|
||||||
|
# demo overlay (docker-compose.demo.yml) supplies a fixed weak
|
||||||
|
# default for screenshot/demo use; production deploys never
|
||||||
|
# depend on that fallback.
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
ports:
|
ports:
|
||||||
- "5432:5432"
|
- "5432:5432"
|
||||||
volumes:
|
volumes:
|
||||||
@@ -123,25 +174,44 @@ services:
|
|||||||
# on the docker bridge network keeps sslmode=disable acceptable; for
|
# on the docker bridge network keeps sslmode=disable acceptable; for
|
||||||
# external/managed Postgres operators MUST override CERTCTL_DATABASE_URL
|
# external/managed Postgres operators MUST override CERTCTL_DATABASE_URL
|
||||||
# with sslmode=verify-full and provide the CA bundle. See docs/database-tls.md.
|
# with sslmode=verify-full and provide the CA bundle. See docs/database-tls.md.
|
||||||
CERTCTL_DATABASE_URL: ${CERTCTL_DATABASE_URL:-postgres://certctl:${POSTGRES_PASSWORD:-certctl}@postgres:5432/certctl?sslmode=disable}
|
CERTCTL_DATABASE_URL: ${CERTCTL_DATABASE_URL:-postgres://certctl:${POSTGRES_PASSWORD}@postgres:5432/certctl?sslmode=disable}
|
||||||
CERTCTL_SERVER_HOST: 0.0.0.0
|
CERTCTL_SERVER_HOST: 0.0.0.0
|
||||||
CERTCTL_SERVER_PORT: 8443
|
CERTCTL_SERVER_PORT: 8443
|
||||||
CERTCTL_SERVER_TLS_CERT_PATH: /etc/certctl/tls/server.crt
|
CERTCTL_SERVER_TLS_CERT_PATH: /etc/certctl/tls/server.crt
|
||||||
CERTCTL_SERVER_TLS_KEY_PATH: /etc/certctl/tls/server.key
|
CERTCTL_SERVER_TLS_KEY_PATH: /etc/certctl/tls/server.key
|
||||||
CERTCTL_LOG_LEVEL: info
|
CERTCTL_LOG_LEVEL: info
|
||||||
CERTCTL_AUTH_TYPE: none
|
# Bundle 2 closure (compose split). The base compose no longer
|
||||||
CERTCTL_KEYGEN_MODE: server # Demo uses server-side keygen; production should use "agent"
|
# sets CERTCTL_AUTH_TYPE / CERTCTL_KEYGEN_MODE / DEMO_MODE_ACK /
|
||||||
CERTCTL_NETWORK_SCAN_ENABLED: "true" # Enable network scan GUI with seeded demo targets
|
# DEMO_SEED — the code defaults take over (auth-type api-key,
|
||||||
CERTCTL_CONFIG_ENCRYPTION_KEY: ${CERTCTL_CONFIG_ENCRYPTION_KEY:-change-me-32-char-encryption-key} # AES-256-GCM for dynamic issuer/target config
|
# keygen agent, demo-mode false, demo-seed false). The demo
|
||||||
# Bundle 1 follow-on: this compose IS the bundled demo path
|
# overlay (docker-compose.demo.yml) is what flips this baseline
|
||||||
# (CERTCTL_AUTH_TYPE=none + KEYGEN_MODE=server above), so the
|
# into the populated-dashboard demo path; without that overlay
|
||||||
# demo seed runs by default. seed_demo.sql pre-seeds the
|
# the server boots production-shaped and refuses to start unless
|
||||||
# agent-demo-1 row that the bundled certctl-agent below needs
|
# the operator has supplied CERTCTL_AUTH_SECRET +
|
||||||
# to authenticate. The docker-compose.demo.yml overlay still
|
# CERTCTL_CONFIG_ENCRYPTION_KEY.
|
||||||
# works (it sets the same flag) and remains for backward
|
#
|
||||||
# compat. Production deploys override CERTCTL_AUTH_TYPE +
|
# Audit 2026-05-10 HIGH-12: when DEMO_MODE_ACK=true (set by the
|
||||||
# KEYGEN_MODE + DEMO_SEED via their own compose.
|
# demo overlay) AND the listener binds to a non-loopback address,
|
||||||
CERTCTL_DEMO_SEED: "true"
|
# every request is served as the synthetic admin actor
|
||||||
|
# `actor-demo-anon`. The server emits a prominent boot-time WARN
|
||||||
|
# banner with a production-promotion checklist in that case.
|
||||||
|
CERTCTL_AUTH_SECRET: ${CERTCTL_AUTH_SECRET}
|
||||||
|
CERTCTL_NETWORK_SCAN_ENABLED: "true" # Enable network scan GUI
|
||||||
|
CERTCTL_CONFIG_ENCRYPTION_KEY: ${CERTCTL_CONFIG_ENCRYPTION_KEY} # AES-256-GCM for dynamic issuer/target config
|
||||||
|
# Bootstrap token interpolation surface (Auditable Codebase Bundle
|
||||||
|
# cold-DB smoke closure, 2026-05-12). Pre-fix, the `env-file +
|
||||||
|
# --force-recreate certctl-server` pattern documented in
|
||||||
|
# cowork/manual-testing-bundle-2.html (and used by the cold-DB
|
||||||
|
# smoke job in .github/workflows/ci.yml::cold-db-compose-smoke)
|
||||||
|
# set CERTCTL_BOOTSTRAP_TOKEN in compose's own interpolation
|
||||||
|
# environment but the container never received it because this
|
||||||
|
# block didn't reference the variable. Wiring it as an explicit
|
||||||
|
# interpolation (default empty) makes the documented manual flow
|
||||||
|
# actually work end-to-end. Empty value = bootstrap strategy
|
||||||
|
# disabled (server returns 410 Gone on POST /api/v1/auth/bootstrap),
|
||||||
|
# which is the safe default — only set the var when you intend to
|
||||||
|
# mint a day-0 admin via the bootstrap path.
|
||||||
|
CERTCTL_BOOTSTRAP_TOKEN: ${CERTCTL_BOOTSTRAP_TOKEN:-}
|
||||||
ports:
|
ports:
|
||||||
- "8443:8443"
|
- "8443:8443"
|
||||||
volumes:
|
volumes:
|
||||||
@@ -191,18 +261,19 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
CERTCTL_SERVER_URL: https://certctl-server:8443
|
CERTCTL_SERVER_URL: https://certctl-server:8443
|
||||||
CERTCTL_SERVER_CA_BUNDLE_PATH: /etc/certctl/tls/ca.crt
|
CERTCTL_SERVER_CA_BUNDLE_PATH: /etc/certctl/tls/ca.crt
|
||||||
CERTCTL_API_KEY: ${CERTCTL_API_KEY:-change-me-in-production}
|
# Bundle 2 closure (compose split). No placeholder fallbacks.
|
||||||
# Bundle 1 follow-on: pre-Bundle-1 the bundled agent had no
|
# Operators MUST set CERTCTL_API_KEY (matching one of the server's
|
||||||
# CERTCTL_AGENT_ID set, hit cmd/agent/main.go's fail-fast guard
|
# CERTCTL_AUTH_SECRET rotation values) and CERTCTL_AGENT_ID
|
||||||
# ("agent-id flag or CERTCTL_AGENT_ID env var is required"), and
|
# (returned from `POST /api/v1/agents` during agent enrollment).
|
||||||
# restart-looped silently on every fresh `docker compose up`.
|
# Without an agent ID, cmd/agent/main.go fails fast at startup
|
||||||
# Latent since 2026-03-14 (commit d395776). seed_demo.sql now
|
# with "agent-id flag or CERTCTL_AGENT_ID env var is required" —
|
||||||
# pre-seeds the matching agents row; the demo runs with
|
# the cold-DB compose smoke in .github/workflows/ci.yml tolerates
|
||||||
# CERTCTL_AUTH_TYPE=none on the server so the api_key Bearer
|
# the agent restart loop because the smoke targets server boot
|
||||||
# token is irrelevant here. Production deploys override
|
# only. The demo overlay (docker-compose.demo.yml) supplies a
|
||||||
# CERTCTL_AGENT_ID with the value returned from
|
# pre-seeded agent-demo-1 row + matching env vars so the demo
|
||||||
# POST /api/v1/agents during registration.
|
# path stays one-command.
|
||||||
CERTCTL_AGENT_ID: ${CERTCTL_AGENT_ID:-agent-demo-1}
|
CERTCTL_API_KEY: ${CERTCTL_API_KEY}
|
||||||
|
CERTCTL_AGENT_ID: ${CERTCTL_AGENT_ID}
|
||||||
CERTCTL_AGENT_NAME: docker-agent
|
CERTCTL_AGENT_NAME: docker-agent
|
||||||
CERTCTL_LOG_LEVEL: info
|
CERTCTL_LOG_LEVEL: info
|
||||||
CERTCTL_DISCOVERY_DIRS: /var/lib/certctl/keys # Agent scans this directory for existing certificates
|
CERTCTL_DISCOVERY_DIRS: /var/lib/certctl/keys # Agent scans this directory for existing certificates
|
||||||
|
|||||||
@@ -2,7 +2,15 @@ apiVersion: v2
|
|||||||
name: certctl
|
name: certctl
|
||||||
description: Self-hosted certificate lifecycle management platform
|
description: Self-hosted certificate lifecycle management platform
|
||||||
type: application
|
type: application
|
||||||
version: 0.1.0
|
# Bundle 3 closure (OPS-L1): bumped from 0.1.0 → 1.0.0. The pre-1.0
|
||||||
|
# version implied "unstable chart, breaking changes on every minor"
|
||||||
|
# which prospective enterprise operators read as "not ready for
|
||||||
|
# production". The chart has been deployed against real clusters since
|
||||||
|
# 2026-02 and shipped through 8 audit closures (M-018, U-1, U-2, U-3,
|
||||||
|
# H-1, G-1, B1 connector validation, B2 first-run guards); 1.0.0
|
||||||
|
# matches that maturity. The chart still adheres to semver going
|
||||||
|
# forward — any breaking value-schema change bumps to 2.0.0.
|
||||||
|
version: 1.0.0
|
||||||
appVersion: "2.1.0"
|
appVersion: "2.1.0"
|
||||||
keywords:
|
keywords:
|
||||||
- certificate
|
- certificate
|
||||||
|
|||||||
@@ -128,8 +128,27 @@ Bundle B / Audit M-018 (PCI-DSS Req 4 / CWE-319):
|
|||||||
postgresql.tls.mode without further translation.
|
postgresql.tls.mode without further translation.
|
||||||
*/}}
|
*/}}
|
||||||
{{- define "certctl.databaseURL" -}}
|
{{- define "certctl.databaseURL" -}}
|
||||||
|
{{- if .Values.postgresql.enabled -}}
|
||||||
{{- $sslMode := default "disable" .Values.postgresql.tls.mode -}}
|
{{- $sslMode := default "disable" .Values.postgresql.tls.mode -}}
|
||||||
postgres://{{ .Values.postgresql.auth.username }}:$(POSTGRES_PASSWORD)@{{ include "certctl.fullname" . }}-postgres:5432/{{ .Values.postgresql.auth.database }}?sslmode={{ $sslMode }}
|
postgres://{{ .Values.postgresql.auth.username }}:$(POSTGRES_PASSWORD)@{{ include "certctl.fullname" . }}-postgres:5432/{{ .Values.postgresql.auth.database }}?sslmode={{ $sslMode }}
|
||||||
|
{{- else -}}
|
||||||
|
{{- /*
|
||||||
|
Bundle 3 closure (D2 + OPS-L2): external-Postgres first-class path.
|
||||||
|
When postgresql.enabled=false, the chart NEVER renders the
|
||||||
|
bundled StatefulSet, postgres-secret, or postgres-service —
|
||||||
|
templates/postgres-*.yaml gate themselves on .Values.postgresql.enabled.
|
||||||
|
The connection string comes from externalDatabase.url (the canonical
|
||||||
|
form) or, for backward-compat with pre-Bundle-3 deploys, from
|
||||||
|
server.env.CERTCTL_DATABASE_URL (which overrides this helper at the
|
||||||
|
pod-spec level — see server-deployment.yaml).
|
||||||
|
|
||||||
|
externalDatabase.url is consumed VERBATIM by the server's
|
||||||
|
CERTCTL_DATABASE_URL env var. Operators are responsible for choosing
|
||||||
|
the right sslmode (`verify-full` recommended for managed Postgres
|
||||||
|
per PCI-DSS Req 4 §2.2.5; see docs/database-tls.md).
|
||||||
|
*/ -}}
|
||||||
|
{{- required "externalDatabase.url is required when postgresql.enabled=false" .Values.externalDatabase.url -}}
|
||||||
|
{{- end -}}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
{{/*
|
{{/*
|
||||||
@@ -180,11 +199,110 @@ per affected resource. No-op when configured correctly.
|
|||||||
{{- if and (not .Values.server.tls.existingSecret) (not .Values.server.tls.certManager.enabled) -}}
|
{{- if and (not .Values.server.tls.existingSecret) (not .Values.server.tls.certManager.enabled) -}}
|
||||||
{{- fail "\n\ncertctl refuses to start without TLS.\n\nSet EXACTLY ONE of:\n --set server.tls.existingSecret=<your-kubernetes.io/tls-secret-name>\nOR\n --set server.tls.certManager.enabled=true \\\n --set server.tls.certManager.issuerRef.name=<your-issuer-or-clusterissuer>\n\nSee docs/tls.md for the full setup walkthrough, including bootstrap\nguidance for air-gapped clusters without cert-manager.\n" -}}
|
{{- fail "\n\ncertctl refuses to start without TLS.\n\nSet EXACTLY ONE of:\n --set server.tls.existingSecret=<your-kubernetes.io/tls-secret-name>\nOR\n --set server.tls.certManager.enabled=true \\\n --set server.tls.certManager.issuerRef.name=<your-issuer-or-clusterissuer>\n\nSee docs/tls.md for the full setup walkthrough, including bootstrap\nguidance for air-gapped clusters without cert-manager.\n" -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
{{- if and .Values.server.tls.existingSecret .Values.server.tls.certManager.enabled -}}
|
||||||
|
{{- /*
|
||||||
|
Bundle 3 closure (D7): pre-Bundle-3 the helper only rejected the
|
||||||
|
NEITHER-set case. Setting BOTH (`existingSecret` AND `certManager.enabled=true`)
|
||||||
|
produced two TLS sources of truth — the existing Secret got mounted but
|
||||||
|
cert-manager simultaneously provisioned a Certificate CR pointing at a
|
||||||
|
conflicting Secret. Operators ended up with a dangling cert-manager
|
||||||
|
Certificate or a wrong-source TLS bundle. The chart now refuses at
|
||||||
|
render-time so the misconfiguration cannot ship.
|
||||||
|
*/ -}}
|
||||||
|
{{- fail "\n\nserver.tls.existingSecret AND server.tls.certManager.enabled are BOTH set.\n\nThe chart requires EXACTLY ONE TLS ownership path (Bundle 3 closure / audit D7):\n - existingSecret: operator owns the TLS Secret; cert-manager must NOT provision one.\n - certManager.enabled: cert-manager owns the TLS Secret; existingSecret must be empty.\n\nUnset one of:\n --set server.tls.existingSecret=\"\" (let cert-manager own it)\nOR\n --set server.tls.certManager.enabled=false (let the existing Secret stand)\n\nSee docs/tls.md.\n" -}}
|
||||||
|
{{- end -}}
|
||||||
{{- if and .Values.server.tls.certManager.enabled (not .Values.server.tls.certManager.issuerRef.name) -}}
|
{{- if and .Values.server.tls.certManager.enabled (not .Values.server.tls.certManager.issuerRef.name) -}}
|
||||||
{{- fail "\n\nserver.tls.certManager.enabled=true but server.tls.certManager.issuerRef.name is empty.\n\nSet:\n --set server.tls.certManager.issuerRef.name=<your-issuer-or-clusterissuer>\n\nSee docs/tls.md.\n" -}}
|
{{- fail "\n\nserver.tls.certManager.enabled=true but server.tls.certManager.issuerRef.name is empty.\n\nSet:\n --set server.tls.certManager.issuerRef.name=<your-issuer-or-clusterissuer>\n\nSee docs/tls.md.\n" -}}
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Pod- vs container-scope security context split (Bundle 3 closure / audit D3).
|
||||||
|
|
||||||
|
The Kubernetes API splits SecurityContext into two non-overlapping
|
||||||
|
field sets, and silently DROPS fields that land at the wrong scope —
|
||||||
|
which is exactly the audit D3 finding pre-Bundle-3.
|
||||||
|
|
||||||
|
Pod-scope fields (applied via spec.securityContext):
|
||||||
|
runAsNonRoot, runAsUser, runAsGroup, fsGroup, fsGroupChangePolicy,
|
||||||
|
supplementalGroups, seLinuxOptions, seccompProfile, sysctls.
|
||||||
|
|
||||||
|
Container-scope fields (applied via spec.containers[].securityContext):
|
||||||
|
readOnlyRootFilesystem, allowPrivilegeEscalation, capabilities,
|
||||||
|
privileged, procMount, runAsNonRoot/runAsUser/runAsGroup (override),
|
||||||
|
seLinuxOptions/seccompProfile (override).
|
||||||
|
|
||||||
|
These helpers split a single operator-facing `securityContext` map
|
||||||
|
into the two sub-maps so the chart renders each field at the scope
|
||||||
|
where Kubernetes actually honors it. The split is conservative — a
|
||||||
|
field that COULD live at either scope is rendered at pod scope only
|
||||||
|
(no override at container scope) so behavior matches the pre-Bundle-3
|
||||||
|
operator intent: pod-level setting is the source of truth.
|
||||||
|
|
||||||
|
Operators don't need to change values.yaml; the existing
|
||||||
|
`server.securityContext` and `agent.securityContext` blocks keep
|
||||||
|
working byte-for-byte. The Helm template just routes each field to
|
||||||
|
the correct YAML node now.
|
||||||
|
*/}}
|
||||||
|
{{- define "certctl.podSecurityContext" -}}
|
||||||
|
{{- $sc := . -}}
|
||||||
|
{{- $podKeys := list "runAsNonRoot" "runAsUser" "runAsGroup" "fsGroup" "fsGroupChangePolicy" "supplementalGroups" "seLinuxOptions" "seccompProfile" "sysctls" -}}
|
||||||
|
{{- $out := dict -}}
|
||||||
|
{{- range $k := $podKeys -}}
|
||||||
|
{{- if hasKey $sc $k -}}
|
||||||
|
{{- $_ := set $out $k (index $sc $k) -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- toYaml $out -}}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{- define "certctl.containerSecurityContext" -}}
|
||||||
|
{{- $sc := . -}}
|
||||||
|
{{- $containerKeys := list "readOnlyRootFilesystem" "allowPrivilegeEscalation" "capabilities" "privileged" "procMount" -}}
|
||||||
|
{{- $out := dict -}}
|
||||||
|
{{- range $k := $containerKeys -}}
|
||||||
|
{{- if hasKey $sc $k -}}
|
||||||
|
{{- $_ := set $out $k (index $sc $k) -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- toYaml $out -}}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Required-secret gate (Bundle 3 closure / audit D1).
|
||||||
|
|
||||||
|
Pre-Bundle-3 the chart accepted empty `server.auth.apiKey` and empty
|
||||||
|
`postgresql.auth.password` and rendered Secrets with empty values; the
|
||||||
|
certctl-server container then crash-looped at startup with the auth
|
||||||
|
configuration error or with `pq: password authentication failed for
|
||||||
|
user "certctl"`. Worse, an operator who forgot to set the api-key
|
||||||
|
ended up with auth.type=api-key + empty CERTCTL_AUTH_SECRET in the
|
||||||
|
Secret, which Validate() rejects at startup — but the diagnostic
|
||||||
|
surfaces inside a CrashLoopBackOff, not at `helm install` time where
|
||||||
|
it would be caught immediately.
|
||||||
|
|
||||||
|
Post-Bundle-3 the chart fails at template time with operator-actionable
|
||||||
|
guidance. The bundled-Postgres path (`postgresql.enabled=true`)
|
||||||
|
requires `postgresql.auth.password`; the external-Postgres path
|
||||||
|
(`postgresql.enabled=false`) skips that check because credentials are
|
||||||
|
embedded in `externalDatabase.url` instead.
|
||||||
|
|
||||||
|
Any template that depends on either secret value should call
|
||||||
|
`{{ include "certctl.requiredSecrets" . }}` at the top so this guard
|
||||||
|
runs once per affected resource. No-op when configured correctly.
|
||||||
|
*/}}
|
||||||
|
{{- define "certctl.requiredSecrets" -}}
|
||||||
|
{{- if and (eq .Values.server.auth.type "api-key") (not .Values.server.auth.apiKey) -}}
|
||||||
|
{{- fail "\n\nserver.auth.type=\"api-key\" but server.auth.apiKey is empty.\n\nSet:\n --set server.auth.apiKey=$(openssl rand -base64 32)\n\nor put the value in a values override. The certctl-server container\nrefuses to start without an API key when auth.type=api-key.\n\nFor demo deploys without authentication, use:\n --set server.auth.type=none\n(only safe behind an authenticating gateway — see docs/operator/security.md).\n" -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- if and .Values.postgresql.enabled (not .Values.postgresql.auth.password) -}}
|
||||||
|
{{- fail "\n\npostgresql.enabled=true but postgresql.auth.password is empty.\n\nSet:\n --set postgresql.auth.password=$(openssl rand -base64 32)\n\nor put the value in a values override. The bundled Postgres\nStatefulSet refuses to bootstrap initdb without POSTGRES_PASSWORD.\n\nFor external Postgres deployments, set:\n --set postgresql.enabled=false\n --set externalDatabase.url=postgres://user:pass@host:5432/db?sslmode=require\nSee deploy/helm/examples/values-external-db.yaml.\n" -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- if and (not .Values.postgresql.enabled) (not .Values.externalDatabase.url) (not .Values.server.env.CERTCTL_DATABASE_URL) -}}
|
||||||
|
{{- fail "\n\npostgresql.enabled=false but no external database URL is configured.\n\nSet ONE of:\n --set externalDatabase.url=postgres://user:pass@host:5432/db?sslmode=require\nOR (legacy)\n --set server.env.CERTCTL_DATABASE_URL=postgres://user:pass@host:5432/db?sslmode=require\n\nSee deploy/helm/examples/values-external-db.yaml.\n" -}}
|
||||||
|
{{- end -}}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
{{/*
|
{{/*
|
||||||
Auth-type validation gate.
|
Auth-type validation gate.
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
serviceAccountName: {{ include "certctl.serviceAccountName" . }}
|
serviceAccountName: {{ include "certctl.serviceAccountName" . }}
|
||||||
securityContext:
|
securityContext:
|
||||||
{{- toYaml .Values.agent.securityContext | nindent 8 }}
|
{{- include "certctl.podSecurityContext" .Values.agent.securityContext | nindent 8 }}
|
||||||
{{- with .Values.imagePullSecrets }}
|
{{- with .Values.imagePullSecrets }}
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
{{- toYaml . | nindent 8 }}
|
{{- toYaml . | nindent 8 }}
|
||||||
@@ -40,6 +40,8 @@ spec:
|
|||||||
- name: agent
|
- name: agent
|
||||||
image: {{ include "certctl.agentImage" . }}
|
image: {{ include "certctl.agentImage" . }}
|
||||||
imagePullPolicy: {{ .Values.agent.image.pullPolicy }}
|
imagePullPolicy: {{ .Values.agent.image.pullPolicy }}
|
||||||
|
securityContext:
|
||||||
|
{{- include "certctl.containerSecurityContext" .Values.agent.securityContext | nindent 12 }}
|
||||||
env:
|
env:
|
||||||
- name: CERTCTL_SERVER_URL
|
- name: CERTCTL_SERVER_URL
|
||||||
value: {{ include "certctl.serverURL" . }}
|
value: {{ include "certctl.serverURL" . }}
|
||||||
@@ -106,7 +108,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
serviceAccountName: {{ include "certctl.serviceAccountName" . }}
|
serviceAccountName: {{ include "certctl.serviceAccountName" . }}
|
||||||
securityContext:
|
securityContext:
|
||||||
{{- toYaml .Values.agent.securityContext | nindent 8 }}
|
{{- include "certctl.podSecurityContext" .Values.agent.securityContext | nindent 8 }}
|
||||||
{{- with .Values.imagePullSecrets }}
|
{{- with .Values.imagePullSecrets }}
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
{{- toYaml . | nindent 8 }}
|
{{- toYaml . | nindent 8 }}
|
||||||
@@ -127,6 +129,8 @@ spec:
|
|||||||
- name: agent
|
- name: agent
|
||||||
image: {{ include "certctl.agentImage" . }}
|
image: {{ include "certctl.agentImage" . }}
|
||||||
imagePullPolicy: {{ .Values.agent.image.pullPolicy }}
|
imagePullPolicy: {{ .Values.agent.image.pullPolicy }}
|
||||||
|
securityContext:
|
||||||
|
{{- include "certctl.containerSecurityContext" .Values.agent.securityContext | nindent 12 }}
|
||||||
env:
|
env:
|
||||||
- name: CERTCTL_SERVER_URL
|
- name: CERTCTL_SERVER_URL
|
||||||
value: {{ include "certctl.serverURL" . }}
|
value: {{ include "certctl.serverURL" . }}
|
||||||
|
|||||||
@@ -0,0 +1,75 @@
|
|||||||
|
{{- /*
|
||||||
|
Bundle 3 closure (D11): NetworkPolicy for the server Deployment.
|
||||||
|
|
||||||
|
Pre-Bundle-3 the chart had no NetworkPolicy template at all — the
|
||||||
|
audit-D11 "documented placeholder" finding referred to docs claiming
|
||||||
|
deny-by-default network isolation that the rendered chart did not
|
||||||
|
provide. Closed.
|
||||||
|
|
||||||
|
This template emits a single NetworkPolicy that, when enabled,
|
||||||
|
restricts the certctl-server Pod to:
|
||||||
|
- Ingress : from any agent Pod in the same namespace (selector
|
||||||
|
match on app.kubernetes.io/component=agent) on the
|
||||||
|
server port, plus optional operator-supplied
|
||||||
|
additional from clauses (.networkPolicy.extraIngress).
|
||||||
|
- Egress : to the postgres Pod (when postgresql.enabled=true),
|
||||||
|
53/UDP+TCP for kube-dns, and operator-supplied
|
||||||
|
additional to clauses for outbound CA / OIDC / SMTP
|
||||||
|
(.networkPolicy.extraEgress).
|
||||||
|
|
||||||
|
Default off so existing deploys don't suddenly lose network reach.
|
||||||
|
Operators opt in once they've mapped their actual egress surface.
|
||||||
|
*/ -}}
|
||||||
|
{{- if .Values.networkPolicy.enabled }}
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: {{ include "certctl.fullname" . }}-server
|
||||||
|
labels:
|
||||||
|
{{- include "certctl.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: server
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "certctl.serverSelectorLabels" . | nindent 6 }}
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
# Allow in-cluster agent Pods to reach the server's HTTPS port.
|
||||||
|
- from:
|
||||||
|
- podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: {{ include "certctl.name" . }}
|
||||||
|
app.kubernetes.io/component: agent
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: {{ .Values.server.port }}
|
||||||
|
{{- with .Values.networkPolicy.extraIngress }}
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
egress:
|
||||||
|
# Kube-DNS (53/UDP + 53/TCP). Required for any in-cluster name
|
||||||
|
# resolution (postgres-service, OIDC issuer hostnames, ACME).
|
||||||
|
- to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
ports:
|
||||||
|
- protocol: UDP
|
||||||
|
port: 53
|
||||||
|
- protocol: TCP
|
||||||
|
port: 53
|
||||||
|
{{- if .Values.postgresql.enabled }}
|
||||||
|
# Bundled-Postgres egress.
|
||||||
|
- to:
|
||||||
|
- podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: {{ include "certctl.name" . }}
|
||||||
|
app.kubernetes.io/component: postgres
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 5432
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.networkPolicy.extraEgress }}
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
{{- /*
|
||||||
|
Bundle 3 closure (D11): PodDisruptionBudget for the server Deployment.
|
||||||
|
|
||||||
|
Pre-Bundle-3 values.yaml carried `podDisruptionBudget.enabled` +
|
||||||
|
`minAvailable` + `maxUnavailable` knobs but no template consumed
|
||||||
|
them. Audit D11 closed.
|
||||||
|
|
||||||
|
The PDB only renders when server.replicas > 1 — a single-replica
|
||||||
|
deployment can't satisfy minAvailable=1 during voluntary disruption
|
||||||
|
anyway (the K8s scheduler would refuse to drain the node). Operators
|
||||||
|
running 2+ replicas get the PDB; operators running a single replica
|
||||||
|
get a templated-out NOTES line reminding them to bump replicas first.
|
||||||
|
*/ -}}
|
||||||
|
{{- if and .Values.podDisruptionBudget.enabled (gt (int .Values.server.replicas) 1) }}
|
||||||
|
apiVersion: policy/v1
|
||||||
|
kind: PodDisruptionBudget
|
||||||
|
metadata:
|
||||||
|
name: {{ include "certctl.fullname" . }}-server
|
||||||
|
labels:
|
||||||
|
{{- include "certctl.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: server
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "certctl.serverSelectorLabels" . | nindent 6 }}
|
||||||
|
{{- if .Values.podDisruptionBudget.minAvailable }}
|
||||||
|
minAvailable: {{ .Values.podDisruptionBudget.minAvailable }}
|
||||||
|
{{- else if .Values.podDisruptionBudget.maxUnavailable }}
|
||||||
|
maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
@@ -1,3 +1,14 @@
|
|||||||
|
{{- if .Values.postgresql.enabled }}
|
||||||
|
{{- /*
|
||||||
|
Bundle 3 closure (D1 + D2): the bundled-Postgres Secret only renders
|
||||||
|
when postgresql.enabled=true. Pre-Bundle-3 this template rendered
|
||||||
|
unconditionally with `password: "changeme"` as the fallback default —
|
||||||
|
which is exactly what the change-me-... cluster of audit findings
|
||||||
|
was about (a deployment that uses the rendered chart with default
|
||||||
|
values ships a known weak password). The Bundle-3 helper at
|
||||||
|
certctl.requiredSecrets fail-closes empty password at template time
|
||||||
|
before this template ever runs.
|
||||||
|
*/ -}}
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
@@ -7,6 +18,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: postgres
|
app.kubernetes.io/component: postgres
|
||||||
type: Opaque
|
type: Opaque
|
||||||
stringData:
|
stringData:
|
||||||
password: {{ .Values.postgresql.auth.password | default "changeme" | quote }}
|
password: {{ required "postgresql.auth.password is required when postgresql.enabled=true (Bundle 3: no fallback default)" .Values.postgresql.auth.password | quote }}
|
||||||
username: {{ .Values.postgresql.auth.username | quote }}
|
username: {{ .Values.postgresql.auth.username | quote }}
|
||||||
database: {{ .Values.postgresql.auth.database | quote }}
|
database: {{ .Values.postgresql.auth.database | quote }}
|
||||||
|
{{- end }}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
{{- include "certctl.tls.required" . }}
|
{{- include "certctl.tls.required" . }}
|
||||||
{{- include "certctl.validateAuthType" . }}
|
{{- include "certctl.validateAuthType" . }}
|
||||||
|
{{- include "certctl.requiredSecrets" . }}
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
@@ -23,8 +24,13 @@ spec:
|
|||||||
checksum/secret: {{ include (print $.Template.BasePath "/server-secret.yaml") . | sha256sum }}
|
checksum/secret: {{ include (print $.Template.BasePath "/server-secret.yaml") . | sha256sum }}
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: {{ include "certctl.serviceAccountName" . }}
|
serviceAccountName: {{ include "certctl.serviceAccountName" . }}
|
||||||
|
# Bundle 3 closure (D3): pod-level fields only. The container-only
|
||||||
|
# fields (readOnlyRootFilesystem, allowPrivilegeEscalation,
|
||||||
|
# capabilities, privileged) render at container scope below —
|
||||||
|
# pre-Bundle-3 they all sat here at pod scope and the K8s API
|
||||||
|
# silently dropped them.
|
||||||
securityContext:
|
securityContext:
|
||||||
{{- toYaml .Values.server.securityContext | nindent 8 }}
|
{{- include "certctl.podSecurityContext" .Values.server.securityContext | nindent 8 }}
|
||||||
{{- with .Values.imagePullSecrets }}
|
{{- with .Values.imagePullSecrets }}
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
{{- toYaml . | nindent 8 }}
|
{{- toYaml . | nindent 8 }}
|
||||||
@@ -33,6 +39,13 @@ spec:
|
|||||||
- name: server
|
- name: server
|
||||||
image: {{ include "certctl.serverImage" . }}
|
image: {{ include "certctl.serverImage" . }}
|
||||||
imagePullPolicy: {{ .Values.server.image.pullPolicy }}
|
imagePullPolicy: {{ .Values.server.image.pullPolicy }}
|
||||||
|
# Bundle 3 closure (D3): container-scope security hardening.
|
||||||
|
# readOnlyRootFilesystem + allowPrivilegeEscalation +
|
||||||
|
# capabilities are container-only fields per the K8s API; the
|
||||||
|
# helper splits them out of the operator-facing
|
||||||
|
# server.securityContext map so existing values keep working.
|
||||||
|
securityContext:
|
||||||
|
{{- include "certctl.containerSecurityContext" .Values.server.securityContext | nindent 12 }}
|
||||||
ports:
|
ports:
|
||||||
- name: https
|
- name: https
|
||||||
containerPort: {{ .Values.server.port }}
|
containerPort: {{ .Values.server.port }}
|
||||||
@@ -51,11 +64,16 @@ spec:
|
|||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: {{ include "certctl.fullname" . }}-server
|
name: {{ include "certctl.fullname" . }}-server
|
||||||
key: database-url
|
key: database-url
|
||||||
|
# Bundle 3 closure (D2): POSTGRES_PASSWORD is only needed
|
||||||
|
# for the bundled-Postgres mode. External Postgres mode
|
||||||
|
# embeds the password directly in externalDatabase.url.
|
||||||
|
{{- if .Values.postgresql.enabled }}
|
||||||
- name: POSTGRES_PASSWORD
|
- name: POSTGRES_PASSWORD
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: {{ include "certctl.fullname" . }}-postgres
|
name: {{ include "certctl.fullname" . }}-postgres
|
||||||
key: password
|
key: password
|
||||||
|
{{- end }}
|
||||||
- name: CERTCTL_LOG_LEVEL
|
- name: CERTCTL_LOG_LEVEL
|
||||||
valueFrom:
|
valueFrom:
|
||||||
configMapKeyRef:
|
configMapKeyRef:
|
||||||
|
|||||||
@@ -0,0 +1,63 @@
|
|||||||
|
{{- /*
|
||||||
|
Bundle 3 closure (D5 + OPS-M1 docs): Prometheus Operator ServiceMonitor.
|
||||||
|
|
||||||
|
Pre-Bundle-3 the chart had `monitoring.serviceMonitor.enabled` in
|
||||||
|
values.yaml but no template consumed it — toggling it on rendered
|
||||||
|
nothing. Audit D5 closed.
|
||||||
|
|
||||||
|
The endpoint scrapes /api/v1/metrics/prometheus which the certctl
|
||||||
|
server already exposes in Prometheus exposition format (see
|
||||||
|
internal/api/handler/metrics.go::GetPrometheusMetrics). Note: the
|
||||||
|
endpoint is rbac-gated on `metrics.read`, so the ServiceMonitor needs
|
||||||
|
a bearer token. Operators with Prometheus Operator MUST set
|
||||||
|
`monitoring.serviceMonitor.bearerTokenSecret` pointing at a Secret
|
||||||
|
that holds an API key with the `metrics.read` permission. Without
|
||||||
|
that, scrapes return 401.
|
||||||
|
|
||||||
|
OPS-M1 caveat: the current /metrics/prometheus handler is a hand-rolled
|
||||||
|
exposition-format emitter, not prometheus/client_golang-instrumented
|
||||||
|
code. Histograms, exemplars, and target labels are limited to what the
|
||||||
|
handler computes statically. Migration to client_golang tracked in
|
||||||
|
WORKSPACE-ROADMAP.md.
|
||||||
|
*/ -}}
|
||||||
|
{{- if and .Values.monitoring.enabled .Values.monitoring.serviceMonitor.enabled }}
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: {{ include "certctl.fullname" . }}-server
|
||||||
|
labels:
|
||||||
|
{{- include "certctl.labels" . | nindent 4 }}
|
||||||
|
app.kubernetes.io/component: server
|
||||||
|
{{- with .Values.monitoring.serviceMonitor.labels }}
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
{{- include "certctl.serverSelectorLabels" . | nindent 6 }}
|
||||||
|
endpoints:
|
||||||
|
- port: https
|
||||||
|
scheme: https
|
||||||
|
path: /api/v1/metrics/prometheus
|
||||||
|
interval: {{ .Values.monitoring.serviceMonitor.interval | default "30s" }}
|
||||||
|
scrapeTimeout: {{ .Values.monitoring.serviceMonitor.scrapeTimeout | default "10s" }}
|
||||||
|
tlsConfig:
|
||||||
|
# The certctl server uses self-signed bootstrap TLS or operator-
|
||||||
|
# provided cert-manager TLS — the ServiceMonitor consumes the
|
||||||
|
# same CA bundle the server presents. When server.tls.existingSecret
|
||||||
|
# is set, operators usually want to pull the matching ca.crt key
|
||||||
|
# out of that Secret. Adjust if your CA chain lives elsewhere.
|
||||||
|
{{- if .Values.monitoring.serviceMonitor.tlsConfig }}
|
||||||
|
{{- toYaml .Values.monitoring.serviceMonitor.tlsConfig | nindent 8 }}
|
||||||
|
{{- else }}
|
||||||
|
insecureSkipVerify: true
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.monitoring.serviceMonitor.bearerTokenSecret }}
|
||||||
|
bearerTokenSecret:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.monitoring.serviceMonitor.relabelings }}
|
||||||
|
relabelings:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
@@ -15,7 +15,10 @@ fullnameOverride: ""
|
|||||||
# Certctl Server Configuration
|
# Certctl Server Configuration
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
server:
|
server:
|
||||||
# Number of replicas (for HA deployments)
|
# Number of replicas (for HA deployments).
|
||||||
|
# Phase 2 DEPL-H1: production HA is operator-opt-in across this field
|
||||||
|
# + podDisruptionBudget.enabled + server.service.sessionAffinity.
|
||||||
|
# See docs/operator/runbooks/ha.md for the smallest-possible HA overlay.
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
|
||||||
# Image configuration
|
# Image configuration
|
||||||
@@ -272,6 +275,34 @@ server:
|
|||||||
# secret:
|
# secret:
|
||||||
# secretName: ca-cert
|
# secretName: ca-cert
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# External Database Configuration (Bundle 3 closure / D2 + OPS-L2)
|
||||||
|
# ==============================================================================
|
||||||
|
# When postgresql.enabled=false, the chart skips the bundled StatefulSet +
|
||||||
|
# Secret + Service and instead consumes the URL below verbatim as the
|
||||||
|
# server's CERTCTL_DATABASE_URL. The URL embeds username, password,
|
||||||
|
# host, port, database, and sslmode — operators are responsible for
|
||||||
|
# rotating credentials in this string out-of-band (Kubernetes Secret +
|
||||||
|
# helm upgrade is the supported pattern).
|
||||||
|
#
|
||||||
|
# Recommended sslmode for managed Postgres (RDS, Cloud SQL, Azure DB):
|
||||||
|
# verify-full — PCI-DSS Req 4 v4.0 §2.2.5 compliant; requires CA bundle.
|
||||||
|
# Mount the CA via server.volumes / server.volumeMounts and
|
||||||
|
# set sslrootcert=/path/in/pod/ca.crt in the URL.
|
||||||
|
#
|
||||||
|
# Example values overrides:
|
||||||
|
# postgresql.enabled: false
|
||||||
|
# externalDatabase.url: "postgres://certctl:HUNTER2@db.example.com:5432/certctl?sslmode=verify-full"
|
||||||
|
#
|
||||||
|
# Migration from the legacy `server.env.CERTCTL_DATABASE_URL` workaround:
|
||||||
|
# both still work (env block overrides the helper-emitted Secret value at
|
||||||
|
# pod-spec level), but the new path renders cleaner manifests with no
|
||||||
|
# stranded postgres-* templates.
|
||||||
|
externalDatabase:
|
||||||
|
# Connection string used when postgresql.enabled=false.
|
||||||
|
# Required in that mode — see certctl.requiredSecrets helper.
|
||||||
|
url: ""
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# PostgreSQL Configuration
|
# PostgreSQL Configuration
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
@@ -510,14 +541,34 @@ rbac:
|
|||||||
create: true
|
create: true
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Kubernetes Secrets Target Connector
|
# Kubernetes Secrets Target Connector (PREVIEW — Bundle 3 closure / C3)
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
# Bundle 3 audit closure (C3): the connector framework at
|
||||||
|
# internal/connector/target/k8ssecret/ ships the Config + interface +
|
||||||
|
# 14 unit tests, but the production K8s client at
|
||||||
|
# k8ssecret.go::realK8sClient is documented as "a stub placeholder for
|
||||||
|
# the real k8s.io/client-go implementation". The repo does not import
|
||||||
|
# k8s.io/client-go (verified via `grep -n "client-go" go.mod`), so the
|
||||||
|
# connector cannot deploy to a real cluster today.
|
||||||
|
#
|
||||||
|
# Setting kubernetesSecrets.enabled=true wires up the RBAC verbs the
|
||||||
|
# real client will need (get/create/update/patch/delete on Secrets)
|
||||||
|
# without making the connector functional — operators trying to use it
|
||||||
|
# get the stub's error and a pointer to this note.
|
||||||
|
#
|
||||||
|
# Status: PREVIEW. Production client lands when the cluster-management
|
||||||
|
# bundle ships (tracked in WORKSPACE-ROADMAP.md). Until then,
|
||||||
|
# in-cluster deploys use the file-based connectors (NGINX, Apache,
|
||||||
|
# HAProxy, etc.) via a Pod-mounted Secret + DaemonSet agent.
|
||||||
kubernetesSecrets:
|
kubernetesSecrets:
|
||||||
# Enable RBAC rules for managing TLS Secrets
|
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Pod Disruption Budget (for HA deployments)
|
# Pod Disruption Budget (for HA deployments).
|
||||||
|
# Phase 2 DEPL-H1: defaults to enabled=false because a PDB template
|
||||||
|
# rendered at `replicas: 1` blocks every rolling restart on a
|
||||||
|
# single-node cluster. Production HA flips this to true alongside
|
||||||
|
# server.replicas ≥ 2. See docs/operator/runbooks/ha.md.
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
podDisruptionBudget:
|
podDisruptionBudget:
|
||||||
enabled: false
|
enabled: false
|
||||||
@@ -527,6 +578,13 @@ podDisruptionBudget:
|
|||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Monitoring Configuration
|
# Monitoring Configuration
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
# Bundle 3 closure (D5): the ServiceMonitor template at
|
||||||
|
# templates/servicemonitor.yaml renders when both monitoring.enabled=true
|
||||||
|
# AND monitoring.serviceMonitor.enabled=true. The endpoint scrapes
|
||||||
|
# /api/v1/metrics/prometheus, which is rbac-gated on `metrics.read` —
|
||||||
|
# operators MUST provide a bearer token via
|
||||||
|
# monitoring.serviceMonitor.bearerTokenSecret pointing at a Secret with
|
||||||
|
# an API key holding that permission. Without the token, scrapes 401.
|
||||||
monitoring:
|
monitoring:
|
||||||
enabled: false
|
enabled: false
|
||||||
# Prometheus ServiceMonitor
|
# Prometheus ServiceMonitor
|
||||||
@@ -534,8 +592,53 @@ monitoring:
|
|||||||
enabled: false
|
enabled: false
|
||||||
interval: 30s
|
interval: 30s
|
||||||
scrapeTimeout: 10s
|
scrapeTimeout: 10s
|
||||||
|
# Additional labels applied to the ServiceMonitor metadata.
|
||||||
# labels: {}
|
# labels: {}
|
||||||
# selector: {}
|
# Bearer-token Secret reference (required when the certctl server's
|
||||||
|
# /api/v1/metrics/prometheus endpoint is gated by api-key auth).
|
||||||
|
# Example:
|
||||||
|
# bearerTokenSecret:
|
||||||
|
# name: certctl-prometheus-key
|
||||||
|
# key: api-key
|
||||||
|
# bearerTokenSecret: {}
|
||||||
|
# TLS config for the scrape endpoint. The certctl server presents
|
||||||
|
# the same TLS cert the rest of the chart uses; insecureSkipVerify
|
||||||
|
# defaults to true so demos work out of the box. Production deploys
|
||||||
|
# should pin the CA via caFile or ca.secret.
|
||||||
|
# tlsConfig:
|
||||||
|
# caFile: /etc/prometheus/secrets/certctl-ca/ca.crt
|
||||||
|
# serverName: certctl-server
|
||||||
|
# tlsConfig: {}
|
||||||
|
# Optional relabeling for the scrape job.
|
||||||
|
# relabelings: []
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# Network Policy (Bundle 3 closure / D11)
|
||||||
|
# ==============================================================================
|
||||||
|
# Default off so existing deploys don't suddenly lose network reach.
|
||||||
|
# When enabled, restricts the server pod to:
|
||||||
|
# - Ingress: from in-namespace agent pods only.
|
||||||
|
# - Egress: kube-dns + bundled Postgres (if enabled).
|
||||||
|
# Operators add CA / OIDC / SMTP egress via extraEgress.
|
||||||
|
networkPolicy:
|
||||||
|
enabled: false
|
||||||
|
# Additional Ingress rules merged into the policy. Each entry is a
|
||||||
|
# raw networking.k8s.io/v1 NetworkPolicyIngressRule.
|
||||||
|
extraIngress: []
|
||||||
|
# Additional Egress rules merged into the policy. Common operator
|
||||||
|
# need: 443/TCP to an OIDC issuer, 443/TCP to a public CA endpoint,
|
||||||
|
# 25/TCP to an SMTP relay.
|
||||||
|
# Example:
|
||||||
|
# extraEgress:
|
||||||
|
# - to:
|
||||||
|
# - ipBlock:
|
||||||
|
# cidr: 0.0.0.0/0
|
||||||
|
# except:
|
||||||
|
# - 10.0.0.0/8
|
||||||
|
# ports:
|
||||||
|
# - protocol: TCP
|
||||||
|
# port: 443
|
||||||
|
extraEgress: []
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Advanced Configuration
|
# Advanced Configuration
|
||||||
|
|||||||
Binary file not shown.
+6
-1
@@ -1,6 +1,6 @@
|
|||||||
# certctl Documentation
|
# certctl Documentation
|
||||||
|
|
||||||
> Last reviewed: 2026-05-05
|
> Last reviewed: 2026-05-12
|
||||||
|
|
||||||
The full docs index, organized by audience. Pick the section that matches what you need to do; each link below opens a focused doc rather than a wall of text.
|
The full docs index, organized by audience. Pick the section that matches what you need to do; each link below opens a focused doc rather than a wall of text.
|
||||||
|
|
||||||
@@ -65,6 +65,8 @@ You're running certctl in production and need operational guidance.
|
|||||||
| Doc | What it covers |
|
| Doc | What it covers |
|
||||||
|---|---|
|
|---|---|
|
||||||
| [Security posture](operator/security.md) | Auth, rate limits, encryption at rest, key rotation, RBAC + OIDC + sessions + break-glass, bootstrap |
|
| [Security posture](operator/security.md) | Auth, rate limits, encryption at rest, key rotation, RBAC + OIDC + sessions + break-glass, bootstrap |
|
||||||
|
| [Secret custody](operator/secret-custody.md) | Where private keys live; FileDriver vs HSM/KMS; encryption wire format; env-seeded vs DB-seeded plaintext policy |
|
||||||
|
| [Observability](operator/observability.md) | Metrics surface, Prometheus exposition vs client_golang, tracing scope, log structure, rate-limit semantics across restarts/replicas |
|
||||||
| [RBAC operator reference](operator/rbac.md) | Roles, permissions, scopes, scope-down + day-0 bootstrap |
|
| [RBAC operator reference](operator/rbac.md) | Roles, permissions, scopes, scope-down + day-0 bootstrap |
|
||||||
| [Auth threat model](operator/auth-threat-model.md) | API-key + RBAC + OIDC + sessions + break-glass — token forgery, session hijacking, IdP compromise, role-grant abuse, bootstrap-token leak, audit-mutation |
|
| [Auth threat model](operator/auth-threat-model.md) | API-key + RBAC + OIDC + sessions + break-glass — token forgery, session hijacking, IdP compromise, role-grant abuse, bootstrap-token leak, audit-mutation |
|
||||||
| [OIDC / SSO runbooks](operator/oidc-runbooks/index.md) | Per-IdP setup guides — Keycloak, Authentik, Okta, Auth0, Entra ID, Google Workspace |
|
| [OIDC / SSO runbooks](operator/oidc-runbooks/index.md) | Per-IdP setup guides — Keycloak, Authentik, Okta, Auth0, Entra ID, Google Workspace |
|
||||||
@@ -83,6 +85,8 @@ You're running certctl in production and need operational guidance.
|
|||||||
| [Cloud targets](operator/runbooks/cloud-targets.md) | AWS ACM + Azure Key Vault deployment, debugging, rollback |
|
| [Cloud targets](operator/runbooks/cloud-targets.md) | AWS ACM + Azure Key Vault deployment, debugging, rollback |
|
||||||
| [Expiry alerts](operator/runbooks/expiry-alerts.md) | Per-policy multi-channel routing matrix, severity tiers |
|
| [Expiry alerts](operator/runbooks/expiry-alerts.md) | Per-policy multi-channel routing matrix, severity tiers |
|
||||||
| [Disaster recovery](operator/runbooks/disaster-recovery.md) | CRL cache, OCSP responder cert, CA private-key rotation, Postgres restore |
|
| [Disaster recovery](operator/runbooks/disaster-recovery.md) | CRL cache, OCSP responder cert, CA private-key rotation, Postgres restore |
|
||||||
|
| [Config-encryption upgrade](operator/runbooks/config-encryption-upgrade.md) | Force v1/v2 → v3 re-seal across the database; passphrase rotation procedure |
|
||||||
|
| [PostgreSQL backup](operator/runbooks/postgres-backup.md) | Operator-run backup recipe (docker-compose + Kubernetes); recommended cadence; quarterly DR dry-run |
|
||||||
|
|
||||||
## Migration
|
## Migration
|
||||||
|
|
||||||
@@ -112,6 +116,7 @@ You're contributing to certctl, running tests locally, or trying to understand t
|
|||||||
| [GUI QA checklist](contributor/gui-qa-checklist.md) | Manual GUI verification pass for release |
|
| [GUI QA checklist](contributor/gui-qa-checklist.md) | Manual GUI verification pass for release |
|
||||||
| [Release sign-off](contributor/release-sign-off.md) | Release-day checklist — code state, automated gates, manual QA, artefact verification |
|
| [Release sign-off](contributor/release-sign-off.md) | Release-day checklist — code state, automated gates, manual QA, artefact verification |
|
||||||
| [CI pipeline](contributor/ci-pipeline.md) | CI shape, regression guards, adding new checks |
|
| [CI pipeline](contributor/ci-pipeline.md) | CI shape, regression guards, adding new checks |
|
||||||
|
| [CI guards](contributor/ci-guards.md) | Per-class CI guards (code-shape, contract-parity, build/dep, operational); how to add one |
|
||||||
|
|
||||||
## Archive
|
## Archive
|
||||||
|
|
||||||
|
|||||||
@@ -1,232 +0,0 @@
|
|||||||
# CI Pipeline — Operator Guide
|
|
||||||
|
|
||||||
> Last reviewed: 2026-05-05
|
|
||||||
|
|
||||||
> Authoritative guide to certctl's CI pipeline shape.
|
|
||||||
> Per the ci-pipeline-cleanup spec, Phase 12.
|
|
||||||
|
|
||||||
## Trigger model
|
|
||||||
|
|
||||||
Three triggers, each with its own scope. Don't mix.
|
|
||||||
|
|
||||||
| Trigger | Workflow | Scope | Wall-clock target |
|
|
||||||
|---|---|---|---|
|
|
||||||
| Push to master, PR to master | `.github/workflows/ci.yml` + `.github/workflows/codeql.yml` | Blocking — every check earns its keep | <10 min |
|
|
||||||
| Daily 06:00 UTC + `workflow_dispatch` | `.github/workflows/security-deep-scan.yml` | Slow scans (gosec, osv, trivy, ZAP, schemathesis, nuclei, testssl, semgrep, mutation, `-race -count=10`); best-effort, never blocks | 60 min budget |
|
|
||||||
| Tag push (`v*`) | `.github/workflows/release.yml` | Cross-platform binaries, ghcr.io push, SLSA provenance, GitHub release | n/a |
|
|
||||||
|
|
||||||
This guide covers the **on-push pipeline** only.
|
|
||||||
|
|
||||||
## On-push pipeline (7 status checks)
|
|
||||||
|
|
||||||
```mermaid
|
|
||||||
flowchart TD
|
|
||||||
Push["push to master"]
|
|
||||||
CI["CI workflow (5 jobs)"]
|
|
||||||
CodeQL["CodeQL workflow (2 jobs)"]
|
|
||||||
GoBuild["go-build-and-test<br/>~6-7 min"]
|
|
||||||
Frontend["frontend-build<br/>~1 min"]
|
|
||||||
HelmLint["helm-lint<br/>~10 sec"]
|
|
||||||
Vendor["deploy-vendor-e2e<br/>~5 min, depends on go-build-and-test"]
|
|
||||||
Image["image-and-supply-chain<br/>~3 min, parallel"]
|
|
||||||
AnalyzeGo["Analyze (go)<br/>~5 min, parallel"]
|
|
||||||
AnalyzeJS["Analyze (javascript-typescript)<br/>~5 min, parallel"]
|
|
||||||
Push --> CI
|
|
||||||
Push --> CodeQL
|
|
||||||
CI --> GoBuild
|
|
||||||
CI --> Frontend
|
|
||||||
CI --> HelmLint
|
|
||||||
CI --> Vendor
|
|
||||||
CI --> Image
|
|
||||||
CodeQL --> AnalyzeGo
|
|
||||||
CodeQL --> AnalyzeJS
|
|
||||||
GoBuild -.depends on.-> Vendor
|
|
||||||
```
|
|
||||||
|
|
||||||
End-to-end wall-clock: dominated by `go-build-and-test` + `deploy-vendor-e2e` chain (~12 min) running in parallel with CodeQL (~5 min). Target ~10 min.
|
|
||||||
|
|
||||||
## Per-job deep-dive
|
|
||||||
|
|
||||||
### `go-build-and-test` (Ubuntu, ~6-7 min)
|
|
||||||
|
|
||||||
Runs the Go build/test suite + 18 of 20 regression guards.
|
|
||||||
|
|
||||||
Steps:
|
|
||||||
1. `actions/checkout@v4`
|
|
||||||
2. `actions/setup-go@v5` (Go 1.25.10)
|
|
||||||
3. `go build ./cmd/...` (server, agent, mcp-server, cli)
|
|
||||||
4. **gofmt drift** — `gofmt -l .` must be empty (Makefile::verify parity)
|
|
||||||
5. **go mod tidy drift** — `go mod tidy && git diff --exit-code go.mod go.sum`
|
|
||||||
6. `go vet ./...`
|
|
||||||
7. Install + run **golangci-lint** v2.11.4 (`--timeout 5m`)
|
|
||||||
8. Install + run **govulncheck** (hard gate)
|
|
||||||
9. Install + run **staticcheck** (hard gate; `continue-on-error: false`)
|
|
||||||
10. **Race Detection** — `go test -race -count=1 ./internal/...` (9-package list, 5min timeout)
|
|
||||||
11. **Go Test with Coverage** — full coverage profile to `coverage.out`
|
|
||||||
12. **Check Coverage Thresholds** — `bash scripts/check-coverage-thresholds.sh` (reads `.github/coverage-thresholds.yml`)
|
|
||||||
13. **Upload Coverage Report** — artifact (`go-coverage`, 30-day retention)
|
|
||||||
14. **Coverage PR comment** — posts/updates per-PR coverage table (PR builds only)
|
|
||||||
15. **Regression guards** — loop runs all `scripts/ci-guards/*.sh` (18 of 20 guards)
|
|
||||||
|
|
||||||
Local equivalent: `make verify` covers steps 4, 6, 7, 11 (with `-short`).
|
|
||||||
|
|
||||||
### `frontend-build` (Ubuntu, ~1 min)
|
|
||||||
|
|
||||||
Vitest tests + tsc check + vite build + 2 of 20 regression guards (already covered by the ci-guards loop in `go-build-and-test`).
|
|
||||||
|
|
||||||
Steps:
|
|
||||||
1. `actions/checkout@v4`
|
|
||||||
2. `actions/setup-node@v4` (Node 22)
|
|
||||||
3. `npm ci`
|
|
||||||
4. `npx tsc --noEmit`
|
|
||||||
5. `npx vitest run`
|
|
||||||
6. `npx vite build`
|
|
||||||
7. **Regression guards** — same `scripts/ci-guards/*.sh` loop as `go-build-and-test` (catches frontend-side guards: S-1, P-1, T-1, L-015, L-019, M-009, G-3)
|
|
||||||
|
|
||||||
### `helm-lint` (Ubuntu, ~10 sec)
|
|
||||||
|
|
||||||
Helm chart validation in 3 modes + inverse fail-loud test:
|
|
||||||
1. `helm lint` with existingSecret
|
|
||||||
2. `helm template` (existingSecret mode)
|
|
||||||
3. `helm template` (cert-manager mode)
|
|
||||||
4. `helm template` (no TLS source — MUST fail per fail-loud guard)
|
|
||||||
|
|
||||||
### `deploy-vendor-e2e` (Ubuntu, ~5 min, depends on `go-build-and-test`)
|
|
||||||
|
|
||||||
Single-job collapse of the prior 12-job matrix (per ci-pipeline-cleanup Phase 5 / frozen decision 0.4 — revises Bundle II decision 0.9).
|
|
||||||
|
|
||||||
Steps:
|
|
||||||
1. `actions/checkout@v5`
|
|
||||||
2. `actions/setup-go@v5` (Go 1.25.10, cache: true)
|
|
||||||
3. **Build f5-mock-icontrol sidecar** — only sidecar without published image
|
|
||||||
4. **Bring up all vendor sidecars** — `docker compose --profile deploy-e2e up -d` (11 sidecars)
|
|
||||||
5. **Run all vendor-edge e2e** — `go test -tags integration -race -count=1 -run 'VendorEdge_'`; output captured to `test-output.log`
|
|
||||||
6. **Skip-count enforcement** — `bash scripts/ci-guards/vendor-e2e-skip-check.sh test-output.log` (catches sidecar boot failures via skip-count vs allowlist)
|
|
||||||
7. **Tear down sidecars** — `docker compose down -v` (always runs)
|
|
||||||
|
|
||||||
The `deploy-vendor-e2e-windows` matrix was deleted entirely (per ci-pipeline-cleanup Phase 6 / frozen decision 0.5 — revises Bundle II decision 0.4). IIS + WinCertStore validation moved to [`docs/connector-iis.md::Operator validation playbook`](connector-iis.md#operator-validation-playbook-windows-host).
|
|
||||||
|
|
||||||
### `image-and-supply-chain` (Ubuntu, ~3 min, parallel)
|
|
||||||
|
|
||||||
Three checks bundled (per ci-pipeline-cleanup Phases 7-9 / frozen decision 0.8):
|
|
||||||
1. **Digest validity** — `bash scripts/ci-guards/digest-validity.sh`. Resolves every `@sha256:<digest>` ref in `deploy/**/*.{yml,Dockerfile*}` against its registry. Closes the H-001 lying-field gap.
|
|
||||||
2. **Docker build smoke** — builds all 4 Dockerfiles (`Dockerfile`, `Dockerfile.agent`, `deploy/test/f5-mock-icontrol/Dockerfile`, `deploy/test/libest/Dockerfile`).
|
|
||||||
3. **OpenAPI ↔ handler operationId parity** — `bash scripts/ci-guards/openapi-handler-parity.sh`. Every router route must have a matching `operationId` in `api/openapi.yaml` or be documented in `api/openapi-handler-exceptions.yaml`.
|
|
||||||
|
|
||||||
### CodeQL (Ubuntu × 2 languages, ~5 min)
|
|
||||||
|
|
||||||
`.github/workflows/codeql.yml` — interprocedural taint tracking. Two matrix jobs: `go` and `javascript-typescript`. Triggers on push, PR, and weekly Sunday cron.
|
|
||||||
|
|
||||||
## The 20 regression guards
|
|
||||||
|
|
||||||
Located at `scripts/ci-guards/<id>.sh`. Each script is callable locally:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
bash scripts/ci-guards/G-3-env-docs-drift.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
Or run all of them:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
for g in scripts/ci-guards/*.sh; do
|
|
||||||
echo "=== $(basename "$g") ==="
|
|
||||||
bash "$g" || echo " FAILED"
|
|
||||||
done
|
|
||||||
```
|
|
||||||
|
|
||||||
| ID | Catches |
|
|
||||||
|---|---|
|
|
||||||
| `G-1-jwt-auth-literal` | JWT silent auth downgrade reappearing |
|
|
||||||
| `L-001-insecure-skip-verify` | Bare `InsecureSkipVerify: true` without `//nolint:gosec` |
|
|
||||||
| `H-001-bare-from` | Bare Dockerfile `FROM` without `@sha256:` digest pin |
|
|
||||||
| `M-012-no-root-user` | Dockerfile missing terminal `USER <non-root>` |
|
|
||||||
| `H-009-readme-jwt` | README re-introducing JWT-as-supported claim |
|
|
||||||
| `G-2-api-key-hash-json` | `api_key_hash` in JSON-emitting surface |
|
|
||||||
| `U-2-plaintext-healthcheck` | Plaintext `http://` in HEALTHCHECK |
|
|
||||||
| `U-3-migration-mount` | Migration file mounted into postgres initdb |
|
|
||||||
| `D-1-D-2-statusbadge-phantom` | Dead StatusBadge keys + 8 TS phantom fields across 4 interfaces |
|
|
||||||
| `L-1-bulk-action-loop` | Client-side `for ... await` bulk action loops |
|
|
||||||
| `B-1-orphan-crud` | 8 update/create/delete fns lose page consumers |
|
|
||||||
| `S-2-strings-contains-err` | `strings.Contains(err.Error(), ...)` brittle dispatch |
|
|
||||||
| `G-3-env-docs-drift` | `CERTCTL_*` env var defined OR documented but not both |
|
|
||||||
| `test-naming-convention` | `func TestXxx` lowercase first letter (Go silently skips) |
|
|
||||||
| `S-1-hardcoded-source-counts` | Hardcoded "N issuer connectors" prose |
|
|
||||||
| `P-1-documented-orphan-fns` | 16 read-fn names removed from client.ts exports |
|
|
||||||
| `T-1-frontend-page-coverage` | New page in `web/src/pages/` without sibling `.test.tsx` |
|
|
||||||
| `bundle-8-L-015-target-blank-rel-noopener` | `target="_blank"` without `rel="noopener noreferrer"` |
|
|
||||||
| `bundle-8-L-019-dangerously-set-inner-html` | `dangerouslySetInnerHTML` outside `safeHtml.ts` |
|
|
||||||
| `bundle-8-M-009-bare-usemutation` | Bare `useMutation()` outside the `useTrackedMutation` wrapper |
|
|
||||||
|
|
||||||
Plus three additional scripts for non-guard operator workflows:
|
|
||||||
- `scripts/ci-guards/vendor-e2e-skip-check.sh` — vendor-e2e skip-count enforcement (used by `deploy-vendor-e2e` job)
|
|
||||||
- `scripts/ci-guards/digest-validity.sh` — used by `image-and-supply-chain` job
|
|
||||||
- `scripts/ci-guards/openapi-handler-parity.sh` — used by `image-and-supply-chain` job
|
|
||||||
- `scripts/ci-guards/coverage-pr-comment.sh` — used by `go-build-and-test` job
|
|
||||||
- `scripts/check-coverage-thresholds.sh` — used by `go-build-and-test` job
|
|
||||||
|
|
||||||
## Coverage thresholds
|
|
||||||
|
|
||||||
Manifest at `.github/coverage-thresholds.yml`. Each entry has `floor:` (integer percentage) + `why:` (load-bearing context). Lowering a floor REQUIRES corresponding code-side test work — never lower the gate to make CI green.
|
|
||||||
|
|
||||||
To add a new gated package: add an entry to the YAML; no script changes needed.
|
|
||||||
|
|
||||||
## Make targets — three-tier convention
|
|
||||||
|
|
||||||
| Target | When | What |
|
|
||||||
|---|---|---|
|
|
||||||
| `make verify` | **Required pre-commit** | gofmt + vet + golangci-lint + go test -short |
|
|
||||||
| `make verify-deploy` | Optional pre-push | digest-validity + OpenAPI parity + Docker build smoke (server + agent only — fast subset) |
|
|
||||||
| `make verify-docs` | **Required pre-tag** | QA-doc Part-count + seed-count drift checks |
|
|
||||||
|
|
||||||
## Adding a new check
|
|
||||||
|
|
||||||
| Check type | Where it goes | Auto-picked-up by CI? |
|
|
||||||
|---|---|---|
|
|
||||||
| Regression guard (grep / shape pattern) | New `scripts/ci-guards/<id>.sh` script | Yes — loop step iterates `*.sh` |
|
|
||||||
| Coverage threshold (per-package) | New entry in `.github/coverage-thresholds.yml` | Yes — bash loop reads YAML |
|
|
||||||
| OpenAPI route exception | New entry in `api/openapi-handler-exceptions.yaml` | Yes — parity script reads YAML |
|
|
||||||
| Vendor-e2e expected skip | New line in `scripts/ci-guards/vendor-e2e-skip-allowlist.txt` | Yes — skip-check script reads file |
|
|
||||||
| New CI job | Edit `.github/workflows/ci.yml` directly | n/a (job definition is the source) |
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
| CI step fails | Likely cause | Fix |
|
|
||||||
|---|---|---|
|
|
||||||
| `gofmt drift` | source needs `gofmt -w` | `make fmt` locally + commit |
|
|
||||||
| `go mod tidy drift` | imported a package without committing go.mod | `go mod tidy` + commit |
|
|
||||||
| `Run staticcheck` | new SA1019 deprecated-API site | migrate the API OR add `//lint:ignore SA1019 <reason>` |
|
|
||||||
| `Check Coverage Thresholds` | per-package coverage dropped below floor | add tests; do NOT lower the floor |
|
|
||||||
| `Regression guards` (any `<id>.sh`) | the audit-finding the guard pinned reappeared | read the guard's head-comment block for the closure rationale + fix the regression |
|
|
||||||
| `Skip-count enforcement` | a vendor sidecar failed to start | check docker logs; fix sidecar; OR if a new Windows-only test was added, add to `scripts/ci-guards/vendor-e2e-skip-allowlist.txt` |
|
|
||||||
| `Digest validity` | a `@sha256` digest doesn't resolve | re-resolve from registry, replace in compose / Dockerfile |
|
|
||||||
| `OpenAPI ↔ handler parity` | new router route without operationId | add to `api/openapi.yaml` (preferred) OR `api/openapi-handler-exceptions.yaml` |
|
|
||||||
| `Docker build smoke` | Dockerfile syntax error or COPY path drift | fix the Dockerfile |
|
|
||||||
| `CodeQL Analyze` | interprocedural dataflow finding | review the SARIF in Security → Code scanning tab |
|
|
||||||
|
|
||||||
## Status check accounting
|
|
||||||
|
|
||||||
**Current (post-cleanup):** 7 status checks per push.
|
|
||||||
- 1 × `Go Build & Test`
|
|
||||||
- 1 × `Frontend Build`
|
|
||||||
- 1 × `Helm Chart Validation`
|
|
||||||
- 1 × `deploy-vendor-e2e`
|
|
||||||
- 1 × `image-and-supply-chain`
|
|
||||||
- 2 × `CodeQL Analyze (<lang>)` (go + javascript-typescript)
|
|
||||||
|
|
||||||
**Pre-cleanup (HEAD `1de61e91`):** 19 status checks. The 12-vendor matrix + 2-vendor Windows matrix collapsed to 1 + 0 respectively; the 3 Go/Frontend/Helm jobs unchanged; 2 CodeQL unchanged; 1 new `image-and-supply-chain` added.
|
|
||||||
|
|
||||||
## Required GitHub branch protection list
|
|
||||||
|
|
||||||
When updating the `master` branch protection rule (Settings → Branches), the "Require status checks to pass" list should be exactly:
|
|
||||||
|
|
||||||
```
|
|
||||||
Go Build & Test
|
|
||||||
Frontend Build
|
|
||||||
Helm Chart Validation
|
|
||||||
deploy-vendor-e2e
|
|
||||||
image-and-supply-chain
|
|
||||||
Analyze (go)
|
|
||||||
Analyze (javascript-typescript)
|
|
||||||
```
|
|
||||||
|
|
||||||
Old-name checks (`deploy-vendor-e2e (<vendor>)` × 12, `deploy-vendor-e2e-windows (<vendor>)` × 2) won't appear on new PRs after the workflow change. Operator removes them from the required list.
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
# GUI QA Checklist
|
|
||||||
|
|
||||||
> Last reviewed: 2026-05-05
|
|
||||||
|
|
||||||
Manual GUI verification pass for release sign-off. Vitest covers component-level behavior; this checklist covers end-to-end flows that only land correctly when the React SPA, the REST API, and the database are all wired together.
|
|
||||||
|
|
||||||
## Prereqs
|
|
||||||
|
|
||||||
The full stack must be running and healthy per [`qa-prerequisites.md`](qa-prerequisites.md). Open `https://localhost:8443` in a fresh browser session (Incognito / Private mode is fine — avoids cached state from previous QA passes).
|
|
||||||
|
|
||||||
## Pages to verify
|
|
||||||
|
|
||||||
For each page, the verification is "open it, confirm it renders without console errors, exercise the documented action, confirm the action lands as expected."
|
|
||||||
|
|
||||||
| Page | Action to verify | Expected result |
|
|
||||||
|---|---|---|
|
|
||||||
| `/dashboard` | Page loads, all 4 stat cards populate | Total / Active / Expiring / Expired counts match `GET /api/v1/stats/summary` |
|
|
||||||
| `/certificates` | Inventory list paginates | "Next page" button works; URL updates with cursor; row count consistent |
|
|
||||||
| `/certificates/<id>` | Detail page opens for any cert | Cert chain renders, deployment status shows, audit timeline visible |
|
|
||||||
| `/issuers` | Catalog renders all configured issuers | Each issuer card shows last-used / status; clicking opens detail |
|
|
||||||
| `/issuers/<id>` | Issuer config form | Edit + Save round-trips through `PATCH /api/v1/issuers/<id>` |
|
|
||||||
| `/issuers/hierarchy` | CA tree view | Multi-level hierarchy renders; admin-gated CRUD buttons present for admins only |
|
|
||||||
| `/agents` | Fleet view | Online/offline status accurate; OS/arch grouping correct |
|
|
||||||
| `/agents/<id>` | Agent detail | Last heartbeat, registered date, deployment job history |
|
|
||||||
| `/agents/groups` | Agent groups CRUD | Create + edit + delete a test group; verify dynamic membership matching |
|
|
||||||
| `/jobs` | Job queue | Filter by status / type works; click into a job opens detail |
|
|
||||||
| `/jobs/<id>` | Job detail | Status, retries, logs, owner attribution |
|
|
||||||
| `/policies` | Renewal policies CRUD | Edit AlertChannels matrix, save, verify backend reflects change |
|
|
||||||
| `/profiles` | Certificate profiles | EKU constraints + max TTL editable; profile binding works |
|
|
||||||
| `/notifications` | Notifier config | Test connection button against each configured notifier |
|
|
||||||
| `/discovery` | Discovery triage | Claim / Dismiss buttons round-trip to backend |
|
|
||||||
| `/network-scans` | Scan target CRUD | Create scan target, trigger immediate scan, results appear |
|
|
||||||
| `/audit` | Audit trail | Filter by actor / action / time range; CSV export works |
|
|
||||||
| `/short-lived` | Short-lived credential dashboard | Live TTL countdown updates; auto-refresh every 10s |
|
|
||||||
| `/observability` | Observability dashboard | Charts render: expiration heatmap, renewal trends, issuance rate |
|
|
||||||
| `/health` | Health monitor | TLS endpoint health: healthy / degraded / down states accurate |
|
|
||||||
| `/digest` | Digest preview | Email preview renders; "Send digest" button dispatches |
|
|
||||||
| `/owners` | Owners CRUD | Create owner with team, edit, delete (after reassigning certs) |
|
|
||||||
| `/teams` | Teams CRUD | Create + delete; verify cascade removes orphan owners |
|
|
||||||
| `/scep` | SCEP admin tabs | Profiles / Intune Monitoring / Recent Activity all populate |
|
|
||||||
| `/est` | EST admin tabs | Profiles / Recent Activity / Trust Bundle all populate |
|
|
||||||
| `/login` | Login flow | API key entry persists for the session; bad key rejected |
|
|
||||||
|
|
||||||
## Console hygiene
|
|
||||||
|
|
||||||
Open browser DevTools and confirm:
|
|
||||||
|
|
||||||
- No uncaught exceptions on any page
|
|
||||||
- No 404 / 500 responses in the Network tab from API calls
|
|
||||||
- No CORS errors
|
|
||||||
- No CSP violations
|
|
||||||
|
|
||||||
## Mobile / narrow-viewport
|
|
||||||
|
|
||||||
The dashboard is desktop-first but should not break catastrophically on narrow viewports. Resize the browser to 380px width; confirm:
|
|
||||||
|
|
||||||
- Sidebar collapses to a hamburger menu
|
|
||||||
- Tables either scroll horizontally or stack on mobile
|
|
||||||
- Forms remain usable
|
|
||||||
|
|
||||||
## Accessibility spot-check
|
|
||||||
|
|
||||||
- Tab through any single page using only the keyboard. Every interactive element must be reachable, and the focus indicator must be visible.
|
|
||||||
- Lighthouse accessibility audit on `/dashboard`: target ≥ 90.
|
|
||||||
|
|
||||||
## Sign-off
|
|
||||||
|
|
||||||
Document any deviations in the release sign-off matrix at [`release-sign-off.md`](release-sign-off.md).
|
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
# QA Prerequisites
|
|
||||||
|
|
||||||
> Last reviewed: 2026-05-05
|
|
||||||
|
|
||||||
Operational prereqs for running release QA against certctl. Before any of the contributor-facing testing surfaces (test-environment.md, gui-qa-checklist.md, release-sign-off.md) are useful, the local stack needs to be in a known-good state.
|
|
||||||
|
|
||||||
## Why manual QA on top of automated tests?
|
|
||||||
|
|
||||||
Automated tests mock dependencies and run in isolation. Manual QA validates the full integrated stack: real PostgreSQL, real HTTP, real agent binary, real file I/O, real scheduler timing. It catches issues that unit tests can't: migration ordering, Docker networking, env var parsing, browser rendering, and timing-dependent scheduler behavior.
|
|
||||||
|
|
||||||
## Environment setup
|
|
||||||
|
|
||||||
**Step 1: Start the full stack.**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd deploy && docker compose -f docker-compose.yml -f docker-compose.demo.yml up --build -d
|
|
||||||
```
|
|
||||||
|
|
||||||
This builds three containers (postgres, certctl-server, certctl-agent) and runs them on a bridge network. The `--build` flag ensures you're testing the current code, not a stale image. The `demo` overlay is an override file (no `image:` or `build:` of its own) that layers `CERTCTL_DEMO_SEED=true` onto the base — both files must be passed in that order or compose errors with `service "certctl-server" has neither an image nor a build context specified`. The seed populates the database with realistic fixtures.
|
|
||||||
|
|
||||||
**Step 2: Wait for healthy state.**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
for i in $(seq 1 30); do
|
|
||||||
STATUS=$(docker compose ps --format json 2>/dev/null | jq -r 'select(.Health != null) | "\(.Name): \(.Health)"' 2>/dev/null)
|
|
||||||
echo "$STATUS"
|
|
||||||
echo "$STATUS" | grep -q "unhealthy\|starting" || break
|
|
||||||
sleep 2
|
|
||||||
done
|
|
||||||
```
|
|
||||||
|
|
||||||
Why: Docker Compose starts containers in dependency order (postgres → server → agent), but "started" doesn't mean "ready." Health checks confirm postgres accepts connections, the server responds on `/health`, and the agent process is running.
|
|
||||||
|
|
||||||
**Step 3: Set shell variables used throughout the QA flow.**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export SERVER=https://localhost:8443
|
|
||||||
export API_KEY="change-me-in-production"
|
|
||||||
export AUTH="Authorization: Bearer $API_KEY"
|
|
||||||
export CT="Content-Type: application/json"
|
|
||||||
export CACERT="--cacert ./deploy/test/certs/ca.crt"
|
|
||||||
```
|
|
||||||
|
|
||||||
Every curl command in QA docs uses these variables. Setting them once avoids typos and keeps the docs copy-pasteable.
|
|
||||||
|
|
||||||
> **Note:** The default Docker Compose sets `CERTCTL_AUTH_TYPE: none` for the demo overlay, meaning auth is disabled. Tests that exercise auth require flipping this to `api-key`; instructions are in the relevant test docs.
|
|
||||||
|
|
||||||
**Step 4: Build CLI and MCP server binaries on the host.**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
go build -o certctl-cli ./cmd/cli/...
|
|
||||||
go build -o certctl-mcp ./cmd/mcp-server/...
|
|
||||||
```
|
|
||||||
|
|
||||||
The CLI and MCP server are separate binaries that talk to the server over HTTP. Building them verifies the code compiles and produces the executables you'll test later.
|
|
||||||
|
|
||||||
## Demo data baseline
|
|
||||||
|
|
||||||
The seed data (`migrations/seed.sql` + `migrations/seed_demo.sql`) pre-populates the database with realistic fixtures. Confirm it loaded:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -s $CACERT -H "$AUTH" $SERVER/api/v1/stats/summary | jq .
|
|
||||||
```
|
|
||||||
|
|
||||||
**Expected shape:**
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"total_certificates": 15,
|
|
||||||
"active_certificates": ...,
|
|
||||||
"expiring_certificates": ...,
|
|
||||||
"expired_certificates": ...,
|
|
||||||
"pending_renewals": ...
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Reference IDs in the demo data** (used across QA docs):
|
|
||||||
|
|
||||||
| Resource | IDs | Count |
|
|
||||||
|---|---|---|
|
|
||||||
| Teams | `t-platform`, `t-security`, `t-payments`, `t-frontend`, `t-data` | 5 |
|
|
||||||
| Owners | `o-alice`, `o-bob`, `o-carol`, `o-dave`, `o-eve` | 5 |
|
|
||||||
| Policies | `rp-standard`, `rp-urgent`, `rp-manual` | 3 |
|
|
||||||
| Issuers | `iss-local`, `iss-acme-le`, `iss-stepca`, `iss-digicert` | 4 |
|
|
||||||
| Agents | `ag-web-prod`, `ag-web-staging`, `ag-lb-prod`, `ag-iis-prod`, `ag-data-prod` | 5 |
|
|
||||||
| Targets | `tgt-nginx-prod`, `tgt-nginx-staging`, `tgt-f5-prod`, `tgt-iis-prod`, `tgt-nginx-data` | 5 |
|
|
||||||
| Profiles | `prof-standard-tls`, `prof-internal-mtls`, `prof-short-lived`, `prof-high-security` | 4 |
|
|
||||||
| Certificates | `mc-api-prod`, `mc-web-prod`, `mc-pay-prod`, etc. | 15 |
|
|
||||||
| Agent Groups | `ag-linux-prod`, `ag-linux-amd64`, `ag-windows`, `ag-datacenter-a`, `ag-manual` | 5 |
|
|
||||||
| Network Scan Targets | `nst-dc1-web`, `nst-dc2-apps`, `nst-dmz` | 3 |
|
|
||||||
|
|
||||||
## Once these are green
|
|
||||||
|
|
||||||
Move to the appropriate downstream surface:
|
|
||||||
|
|
||||||
- [`test-environment.md`](test-environment.md) — full local environment tutorial with real CAs (Pebble, step-ca, etc.)
|
|
||||||
- [`gui-qa-checklist.md`](gui-qa-checklist.md) — manual GUI test pass
|
|
||||||
- [`release-sign-off.md`](release-sign-off.md) — release-day checklist
|
|
||||||
- [`testing-strategy.md`](testing-strategy.md) — what we test in CI vs daily deep-scan vs manual QA
|
|
||||||
@@ -1,445 +0,0 @@
|
|||||||
# QA Test Suite Guide (`qa_test.go`)
|
|
||||||
|
|
||||||
> Last reviewed: 2026-05-05
|
|
||||||
|
|
||||||
> **Audience:** Anyone running release QA for certctl — whether you're a first-time contributor or the maintainer cutting a release tag.
|
|
||||||
>
|
|
||||||
> **Self-contained.** Through 2026-05-04 this doc was a companion to a separate `docs/testing-guide.md` (the *what* to test) — that companion was pruned during the Phase 5 docs overhaul (its content dispersed across the audience-organized doc tree). The Part-by-Part Coverage Map below is now the canonical inventory of QA Parts.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Test Suite Health (regenerate via `make qa-stats`)
|
|
||||||
|
|
||||||
> Snapshot at HEAD. Re-run `make qa-stats` to refresh; the QA-doc seed-count drift guard (`.github/workflows/ci.yml::QA-doc seed-count drift guard`) catches out-of-date cert / issuer counts on every PR. The Part-count drift guard retired in the 2026-05-04 docs overhaul Phase 5 (testing-guide.md was pruned; Part counts are now tracked inside `qa_test.go` itself, not against an external doc). **Last regenerated: 2026-04-27 (Bundle P).**
|
|
||||||
|
|
||||||
| Metric | Value | Target | Status |
|
|
||||||
|---|---|---|---|
|
|
||||||
| Backend test files | 221 | n/a | ℹ |
|
|
||||||
| Backend `Test*` functions | 2,454 | n/a | ℹ |
|
|
||||||
| Backend `t.Run` subtests | 778 | n/a | ℹ |
|
|
||||||
| Frontend test files | 38 | n/a | ℹ |
|
|
||||||
| Fuzz targets | 11 | ≥10 (one per hand-rolled parser) | ✓ |
|
|
||||||
| `t.Skip` sites | 60 | each carries valid rationale (Bundle O audit) | ✓ |
|
|
||||||
| `qa_test.go` Part_* subtests | 53 | covers 49 of 56 historical QA Parts directly + Parts 15–17 indirectly via Parts 42–46 | ✓ |
|
|
||||||
| Existential cluster line cov (post-Bundle-J + L.B + Bundle 0.7) | acme 55.6%, stepca 90.4%, local-issuer ≥86%, crypto ≥85% | ≥95% | △ ACME below; tracked in `coverage-matrix.md` |
|
|
||||||
| Mutation kill rate (Existential) | unmeasured (operator-runnable per Strengthening #5) | ≥90% | ⚠ |
|
|
||||||
| Race detector clean (`-count=10`) | partial (`-count=3` clean per Phase 0) | 0 races | ⚠ |
|
|
||||||
|
|
||||||
## What Is This File?
|
|
||||||
|
|
||||||
`deploy/test/qa_test.go` is a single Go test file (~1700 lines) that automates the historical QA Part inventory (preserved in the Part-by-Part Coverage Map below) against a running certctl Docker Compose demo stack. It replaces the legacy `qa-smoke-test.sh` bash script.
|
|
||||||
|
|
||||||
It covers **49 of 56 Parts** of the testing guide as automation; the remaining 7 are
|
|
||||||
either manual-only by design or pending QA-suite coverage:
|
|
||||||
|
|
||||||
- **49 `Part_*` automation wrappers**, **~159 leaf subtests** — API calls, database queries, source file checks, performance benchmarks
|
|
||||||
- **11 fully skipped Parts** — with documented reasons (external CAs, Windows, browser-only, etc.) — see "What This Test Does NOT Cover" below
|
|
||||||
- **4 Parts NOT YET AUTOMATED** — Parts 23 (S/MIME & EKU), 24 (OCSP/CRL), 55 (Agent Soft-Retirement), 56 (Notification Retry & Dead-Letter) — must be tested manually until QA-suite automation lands; the Part-by-Part Coverage Map below describes the surface area each Part covers
|
|
||||||
- **Manual-only flows** in addition: GUI flows, scheduler timing, Docker log inspection — must be done by a human (Coverage Map below describes each)
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
```mermaid
|
|
||||||
flowchart LR
|
|
||||||
QA["qa_test.go (//go:build qa)<br/><br/>TestQA(t *testing.T)<br/>├─ Part01_Infra<br/>├─ Part02_Auth<br/>├─ Part03_CertCRUD<br/>├─ ...<br/>└─ Part52_HelmChart"]
|
|
||||||
subgraph Stack["certctl demo stack<br/>docker-compose.yml + docker-compose.demo.yml"]
|
|
||||||
Server["certctl-server :8443"]
|
|
||||||
Postgres["postgres :5432"]
|
|
||||||
Agents["certctl-agent (×N)<br/>↑ seed_demo.sql provisions 12 agent rows<br/>(1 active, 2 retired, 9 reserved/sentinel)<br/>for the soft-retire / FSM coverage Parts 55–56 exercise"]
|
|
||||||
end
|
|
||||||
QA --> Stack
|
|
||||||
```
|
|
||||||
|
|
||||||
> **Multi-agent demo stack (Bundle Q / L-004 closure).** The demo
|
|
||||||
> stack runs a single live `certctl-agent` container by default but
|
|
||||||
> the database is seeded with 12 agent rows (`migrations/seed_demo.sql`,
|
|
||||||
> grep `mc-* | ag-*` IDs). The "(×N)" notation reflects the seed-data
|
|
||||||
> reality: Parts 04 (Agents Listing), 05 (Agent Heartbeats), 55
|
|
||||||
> (Agent Soft-Retirement), and FSM coverage tables in
|
|
||||||
> `coverage-audit-2026-04-27/tables/fsm-coverage.md` exercise the full
|
|
||||||
> multi-agent population, not the one live container. Operators
|
|
||||||
> running the QA suite in a parallel-agent topology should set
|
|
||||||
> `AGENT_COUNT=N` in compose-override and re-derive the seed counts
|
|
||||||
> via `make qa-stats`.
|
|
||||||
|
|
||||||
Key design choices:
|
|
||||||
|
|
||||||
- **Build tag:** `//go:build qa` — never runs during `go test ./...` or CI. Only runs when explicitly requested.
|
|
||||||
- **Package:** `integration_test` — same package as `integration_test.go` (which uses `//go:build integration` for the test stack). They coexist but never run together.
|
|
||||||
- **Zero internal imports:** Uses only stdlib + `lib/pq` (from `go.mod`). All API interactions are plain HTTP. All JSON is decoded into lightweight local structs (`qaCert`, `qaJob`, etc.) — not the internal domain types.
|
|
||||||
- **Self-cleaning:** Tests that create data use `t.Cleanup()` to delete it afterward. The seed data is not modified.
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
1. **Docker Compose demo stack running:**
|
|
||||||
```bash
|
|
||||||
cd deploy
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.demo.yml up --build -d
|
|
||||||
```
|
|
||||||
Wait ~15 seconds for health checks to pass.
|
|
||||||
|
|
||||||
2. **Go 1.22+** installed (the project uses Go 1.25 in `go.mod`, but 1.22+ works for running tests).
|
|
||||||
|
|
||||||
3. **PostgreSQL port exposed** — the demo stack exposes port 5432 for database verification tests (table counts, schema checks).
|
|
||||||
|
|
||||||
4. **Repository checkout** — source file verification tests (`fileExists`, `fileContains`) read files relative to `qaRepoDir` (default: `../..` from `deploy/test/`).
|
|
||||||
|
|
||||||
## Running the Tests
|
|
||||||
|
|
||||||
### Full suite
|
|
||||||
```bash
|
|
||||||
cd deploy/test
|
|
||||||
go test -tags qa -v -timeout 10m ./...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Single Part
|
|
||||||
```bash
|
|
||||||
go test -tags qa -v -run TestQA/Part03 ./...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Single subtest
|
|
||||||
```bash
|
|
||||||
go test -tags qa -v -run TestQA/Part03_CertCRUD/Create_Minimal ./...
|
|
||||||
```
|
|
||||||
|
|
||||||
### With custom environment
|
|
||||||
```bash
|
|
||||||
CERTCTL_QA_SERVER_URL=https://staging.internal:8443 \
|
|
||||||
CERTCTL_QA_API_KEY=my-staging-key \
|
|
||||||
CERTCTL_QA_DB_URL=postgres://certctl:secret@db.internal:5432/certctl?sslmode=require \
|
|
||||||
CERTCTL_QA_REPO_DIR=/path/to/certctl \
|
|
||||||
go test -tags qa -v -timeout 10m ./...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
|
|
||||||
| Variable | Default | Description |
|
|
||||||
|---|---|---|
|
|
||||||
| `CERTCTL_QA_SERVER_URL` | `https://localhost:8443` | certctl server URL (HTTPS-only as of v2.2) |
|
|
||||||
| `CERTCTL_QA_API_KEY` | `change-me-in-production` | API key for Bearer auth |
|
|
||||||
| `CERTCTL_QA_DB_URL` | `postgres://certctl:certctl@localhost:5432/certctl?sslmode=disable` | PostgreSQL connection string |
|
|
||||||
| `CERTCTL_QA_REPO_DIR` | `../..` | Path to certctl repo root (for source file checks) |
|
|
||||||
| `CERTCTL_QA_CA_BUNDLE` | `./certs/ca.crt` | PEM CA bundle pinned for TLS verification. The demo stack's `certctl-tls-init` container writes here. |
|
|
||||||
| `CERTCTL_QA_INSECURE` | `false` | Set to `"true"` to skip TLS verification (e.g. before the init container finishes). Never use outside the demo harness. |
|
|
||||||
|
|
||||||
## Part-by-Part Coverage Map
|
|
||||||
|
|
||||||
This table shows what each Part tests and what's left for manual verification.
|
|
||||||
|
|
||||||
| Part | Testing Guide Section | Automated Subtests | What's Automated | What's Manual |
|
|
||||||
|------|----------------------|-------------------|-----------------|--------------|
|
|
||||||
| 1 | Infrastructure & Deployment | 8 | Table count, health/ready endpoints, seed data counts (certs, agents, issuers, targets, policies) | Docker container health, log inspection, volume mounts |
|
|
||||||
| 2 | Authentication & Security | 4 | No-auth 401, bad-key 401, health-no-auth 200, no private keys in API | CORS preflight, rate limiting (429 + Retry-After), TLS config |
|
|
||||||
| 3 | Certificate Lifecycle | 10 | Create (minimal + full), get, 404, list pagination, status/issuer filters, sparse fields, update, archive | Deployment trigger, version history, certificate detail UI |
|
|
||||||
| 4 | Renewal Workflow | 3 | Trigger renewal, 404 on nonexistent, agent work endpoint | AwaitingCSR flow, agent key generation, full issuance cycle |
|
|
||||||
| 5 | Revocation | 5 | Revoke (default reason), already-revoked, nonexistent, invalid reason, CRL JSON | DER CRL, OCSP responder, revocation notifications |
|
|
||||||
| 6 | Policies & Profiles | 6 | Policy CRUD (create/delete), invalid type 400, profile CRUD, list | Policy violation detection, profile enforcement on CSR |
|
|
||||||
| 7 | Ownership & Teams | 4 | Team CRUD, owner CRUD, agent groups list | Owner notification routing, dynamic group matching |
|
|
||||||
| 8 | Job System | 2 | List jobs, 404 on nonexistent | Job state transitions, approval workflow, cancellation |
|
|
||||||
| 9 | Issuer Connectors | 4 | List, get detail, create (GenericCA), missing name 400 | Test connection, issuer-specific issuance flow |
|
|
||||||
| 10 | Sub-CA Mode | SKIP | — | Requires CA cert+key on disk |
|
|
||||||
| 11 | ACME ARI | SKIP | — | Requires ARI-capable CA |
|
|
||||||
| 12 | Vault PKI | SKIP | — | Requires live Vault server |
|
|
||||||
| 13 | DigiCert | SKIP | — | Requires DigiCert sandbox |
|
|
||||||
| 14 | Target Connectors | 3 | List, create NGINX target, delete 204 | Deploy to real target, validate deployment |
|
|
||||||
| 15–17 | Apache/HAProxy, Traefik/Caddy, IIS | — | (Covered by source checks in Parts 42–46) | Requires real services or Windows |
|
|
||||||
| 18 | Agent Operations | 3 | Heartbeat (register), metadata check, auto-create on heartbeat | Agent binary behavior, key storage, discovery scan |
|
|
||||||
| 19 | Agent Work Routing | 1 | Empty work for agent with no targets | Scoped job assignment, multi-target fan-out |
|
|
||||||
| 20 | Post-Deployment Verification | 1 | 404 on nonexistent job verification | TLS probing, fingerprint comparison |
|
|
||||||
| 21 | EST Server | 2 | CACerts (200 + content-type), CSRAttrs (200/204) | simpleenroll with CSR, simplereenroll, PKCS#7 parsing |
|
|
||||||
| 22 | Certificate Export | 3 | PEM export, PKCS#12 export, 404 on nonexistent | Download mode, file content validation |
|
|
||||||
| 23 | S/MIME & EKU Support | 0 (NOT AUTOMATED) | — | S/MIME profile creation; EKU enforcement on issuance; SMIMECapabilities extension presence in issued cert; rejection of profile-violating EKU on CSR. Test manually — see the Coverage Map row |
|
|
||||||
| 24 | OCSP Responder & DER CRL | 0 (NOT AUTOMATED) | — | OCSP request/response (RFC 6960), DER CRL generation, status (Good/Revoked/Unknown), Must-Staple coordination. Test manually — see the Coverage Map row |
|
|
||||||
| 25 | Certificate Discovery | 5 | List discovered, summary, list scan targets, create target, invalid CIDR 400 | Agent filesystem scan, claim/dismiss workflow |
|
|
||||||
| 26 | Enhanced Query API | 4 | Sort descending, cursor pagination, time-range filter, invalid sort field | Field projection correctness, cursor token cycling |
|
|
||||||
| 27 | Request Body Size Limits | 1 | 2MB body rejected (413/400) | Exact limit boundary (1MB) |
|
|
||||||
| 28 | CLI | SKIP | — | Requires compiled `certctl-cli` binary |
|
|
||||||
| 29 | MCP Server | SKIP | — | Requires compiled `mcp-server` binary + stdio |
|
|
||||||
| 30 | Observability | 7 | Dashboard summary, certs by status, expiration timeline, job trends, issuance rate, JSON metrics (uptime + gauges), Prometheus (content-type + 4 metric names) | Chart rendering (GUI), Grafana import |
|
|
||||||
| 31 | Notifications | 2 | List, 404 on nonexistent | Notification content, mark-read, email/Slack delivery |
|
|
||||||
| 32 | Audit Trail | 3 | List events (≥10), PUT immutability, DELETE immutability | Actor attribution, body hash, time range filters |
|
|
||||||
| 33 | Background Scheduler | SKIP | — | Timing-dependent; verify via Docker logs |
|
|
||||||
| 34 | Structured Logging | SKIP | — | Requires Docker log inspection |
|
|
||||||
| 35 | GUI Testing | SKIP | — | Requires browser |
|
|
||||||
| 36–37 | Issuer Catalog, Frontend Audit | SKIP | — | Requires browser |
|
|
||||||
| 38 | Error Handling | 5 | Malformed JSON, missing required field, method not allowed, UTF-8 CN, empty body | Stack trace suppression, error response format |
|
|
||||||
| 39 | Performance | 5 | List certs < 200ms, stats < 500ms, metrics < 200ms, Prometheus < 300ms, audit < 500ms | Load testing, concurrent request handling |
|
|
||||||
| 40 | Documentation | 8 | README, quickstart, architecture, connectors exist; migration guides exist; 8 issuer types in docs; 11 target types in docs | Content accuracy, link validity |
|
|
||||||
| 41 | Regression | 3 | DELETE 204, per_page max fallback, network scan target seed count | `errors.Is(errors.New())` anti-pattern source scan |
|
|
||||||
| 42 | Envoy Target | 5 | Domain type, connector file, test file, OpenAPI, agent dispatch | Envoy deployment test, SDS config |
|
|
||||||
| 43 | Postfix/Dovecot | 3 | Domain types (Postfix + Dovecot), connector file, OpenAPI | Mail server deployment test |
|
|
||||||
| 44 | SSH Target | 4 | Domain type, connector file, agent dispatch (`sshconn`), OpenAPI | SSH deployment test (requires target host) |
|
|
||||||
| 45 | Windows Certificate Store | 3 | Domain type, connector file, shared certutil package | Windows deployment (requires Windows) |
|
|
||||||
| 46 | Java Keystore | 3 | Domain type, connector file, OpenAPI | JKS deployment (requires keytool) |
|
|
||||||
| 47 | Certificate Digest Email | 3 | Preview endpoint (200/503), service file, adapter file | SMTP delivery, HTML template rendering |
|
|
||||||
| 48 | Dynamic Issuer Config | 4 | Crypto package exists, create ACME issuer via API, config redaction check, migration exists | Test connection flow, registry rebuild |
|
|
||||||
| 49 | Dynamic Target Config | 2 | Create NGINX target via API, migration exists | Test connection via agent heartbeat |
|
|
||||||
| 50 | Onboarding Wizard | 2 | Wizard component exists, docker-compose split (clean vs demo) | Wizard UI flow, step completion |
|
|
||||||
| 51 | ACME Profile Selection | 3 | Profile module exists, frontend config, RFC 9702→9773 renumber check | Profile-aware issuance against real CA |
|
|
||||||
| 52 | Helm Chart | 5 | Chart.yaml, values.yaml, 4 templates exist, securityContext, health probes | `helm template` rendering, `helm install` |
|
|
||||||
| 53 | Kubernetes Secrets Target Connector (M47) | 18 | Config validation (namespace DNS-1123, secret name DNS subdomain, label keys, required fields), deployment (create/update Secret, chain concatenation, error propagation), validation (serial comparison, not-found, empty cert) | GUI target wizard KubernetesSecrets fields (namespace, secret_name, labels, kubeconfig_path), Helm RBAC toggle, TargetDetailPage type label |
|
|
||||||
| 54 | AWS ACM Private CA Issuer Connector (M47) | 23 | Config validation (region, CA ARN regex, signing algorithm whitelist, validity_days, defaults), issuance (full flow, empty CSR, errors), renewal (reuses issuance), revocation (reason mapping, default, errors), GetOrderStatus completed, GetCACertPEM (success/chain/error), GetRenewalInfo nil | GUI issuer wizard AWSACMPCA fields (region, ca_arn, signing_algorithm, validity_days, template_arn), seed data visibility, create issuer flow |
|
|
||||||
| 55 | Agent Soft-Retirement (I-004) | 0 (NOT AUTOMATED) | — | Soft-retire vs hard-retire; force flag; reason capture; foreign-key cascade behavior on retired-agent cert ownership; reactivation. Test manually — see the Coverage Map row |
|
|
||||||
| 56 | Notification Retry & Dead-Letter Queue (I-005) | 0 (NOT AUTOMATED) | — | Retry loop with exponential backoff, dead-letter transition after N retries, requeue endpoint (`POST /api/v1/notifications/{id}/requeue`), idempotency on retry. Test manually — see the Coverage Map row |
|
|
||||||
|
|
||||||
**Totals (verified 2026-04-27):** 49 `Part_*` automation wrappers, ~159 leaf subtests, 11 fully
|
|
||||||
skipped Parts, 4 Parts not yet automated (23, 24, 55, 56), and an unspecified count of manual-only
|
|
||||||
flows (GUI, scheduler timing, Docker log inspection). Run `grep -cE 't\.Run\("Part[0-9]+_' deploy/test/qa_test.go` to count Part_* automation wrappers
|
|
||||||
and `grep -cE 't\.Run\("Part[0-9]+_' deploy/test/qa_test.go` to re-verify.
|
|
||||||
|
|
||||||
## Coverage by Risk Class
|
|
||||||
|
|
||||||
A buyer's QA lead reading this doc wants "where are the existential bugs caught?" — Bundle P / Strengthening #1 surfaces that view directly. The table below classifies each Part by risk class so reviewers can answer the existential-coverage question in one glance.
|
|
||||||
|
|
||||||
| Risk class | Description | Parts in scope | Automation status |
|
|
||||||
|---|---|---|---|
|
|
||||||
| **Existential** (Critical paths — bugs would compromise CA, leak keys, mis-issue, bypass revocation) | Crypto, PKCS#7, local-issuer, OCSP/CRL, agent keygen, CSR validation | 5 (Revocation), 21 (EST), 23 (S/MIME EKU), 24 (OCSP/CRL), 47 (Digest with cert content), 53 (K8s Secrets), 54 (AWS PCA) | 5/7 automated; Parts 23 + 24 pending (Bundle I Skip stubs in `qa_test.go`; manual playbook in the Coverage Map below) |
|
|
||||||
| **High** (FSM corruption, credential leak, authn/z weakening) | Renewal, jobs, agents, issuers, deployment, scheduler | 4, 7, 8, 9, 18, 19, 20, 22, 25, 28, 29, 32, 33, 48, 49, 55, 56 | 14/17 automated; CLI / MCP / scheduler-loop are inherently SKIP (require compiled binaries / Docker logs); Parts 55 + 56 pending |
|
|
||||||
| **Medium** (Operational pain or silent data drift) | Targets, notifiers, observability, error handling, performance, regression | 14, 15-17, 30, 31, 38, 39, 40, 41, 42, 43, 44, 45, 46 | 14/14 automated (15-17 indirect via Parts 42–46) |
|
|
||||||
| **Low** (Hygiene) | Documentation, docs verification | 40 (Documentation), 50 (Onboarding) | 2/2 automated |
|
|
||||||
| **Frontend** (XSS, render correctness, mutation contracts) | GUI testing | 35, 36-37 | 0/3 automated in this suite (Vitest covers separately under `web/`); this doc punts to manual + Vitest |
|
|
||||||
| **Audit-relevant** | Audit trail, body-size limits, request limits, Helm chart deploy posture | 27, 32, 51, 52 | 4/4 automated |
|
|
||||||
|
|
||||||
This is the table acquisition reviewers screenshot for their report. When a new Part_* subtest lands in `qa_test.go`, classify it here.
|
|
||||||
|
|
||||||
## Test Categories
|
|
||||||
|
|
||||||
The automated tests fall into four categories:
|
|
||||||
|
|
||||||
### 1. API Integration Tests (majority)
|
|
||||||
Make real HTTP requests to the running server and verify status codes, response structure, and JSON field values. Examples:
|
|
||||||
- `POST /api/v1/certificates` with valid payload → 201
|
|
||||||
- `GET /api/v1/certificates?status=Active` → all returned certs have `status: "Active"`
|
|
||||||
- `DELETE /api/v1/certificates/mc-qa-full` → 204
|
|
||||||
|
|
||||||
### 2. Database Verification Tests
|
|
||||||
Connect directly to PostgreSQL and verify schema state:
|
|
||||||
- Table count ≥ 19 (from migrations 000001–000010)
|
|
||||||
- Useful for catching migration regressions
|
|
||||||
|
|
||||||
### 3. Source File Verification Tests
|
|
||||||
Read files from the repo checkout and verify structure:
|
|
||||||
- Domain types exist in `internal/domain/connector.go` (e.g., `TargetTypeEnvoy`)
|
|
||||||
- Connector implementations exist (e.g., `internal/connector/target/envoy/envoy.go`)
|
|
||||||
- Documentation contains expected content (all issuer/target types listed)
|
|
||||||
- No stale RFC 9702 references (replaced by RFC 9773)
|
|
||||||
|
|
||||||
### 4. Performance Spot Checks
|
|
||||||
Timed API requests with threshold assertions:
|
|
||||||
- `GET /api/v1/certificates?per_page=15` < 200ms
|
|
||||||
- `GET /api/v1/stats/summary` < 500ms
|
|
||||||
- `GET /api/v1/metrics/prometheus` < 300ms
|
|
||||||
|
|
||||||
## What This Test Does NOT Cover
|
|
||||||
|
|
||||||
These gaps must be filled by manual testing — see each Coverage Map row for surface-area description:
|
|
||||||
|
|
||||||
### Not Yet Automated (Parts 23, 24, 55, 56)
|
|
||||||
|
|
||||||
These historical QA Parts are listed in the Coverage Map below but have no `Part_*` automation
|
|
||||||
in `qa_test.go` yet. They are operator-runnable from the manual playbook; QA-suite
|
|
||||||
automation should land before the next acquisition-grade release.
|
|
||||||
|
|
||||||
- **Part 23: S/MIME & EKU Support** — profile-driven EKU enforcement; SMIMECapabilities extension
|
|
||||||
- **Part 24: OCSP Responder & DER CRL** — OCSP request/response correctness, CRL generation, Must-Staple coordination
|
|
||||||
- **Part 55: Agent Soft-Retirement (I-004)** — soft vs hard retire, FK cascade, reactivation
|
|
||||||
- **Part 56: Notification Retry & Dead-Letter Queue (I-005)** — retry semantics, dead-letter transition, requeue
|
|
||||||
|
|
||||||
### External CA Integrations (Parts 10–13)
|
|
||||||
- **Sub-CA mode** — requires CA cert+key files on disk
|
|
||||||
- **ACME ARI** — requires a CA that supports RFC 9773 Renewal Information
|
|
||||||
- **Vault PKI** — requires a running HashiCorp Vault instance
|
|
||||||
- **DigiCert / Sectigo / Google CAS** — requires sandbox API credentials
|
|
||||||
|
|
||||||
### Browser/GUI Testing (Parts 35–37, 50)
|
|
||||||
- Dashboard chart rendering (Recharts)
|
|
||||||
- Onboarding wizard step-by-step flow
|
|
||||||
- Issuer catalog card layout and create wizard
|
|
||||||
- Bulk operations UI (multi-select, progress bars)
|
|
||||||
- Discovery triage workflow
|
|
||||||
|
|
||||||
### Real Deployment Testing (Parts 15–17)
|
|
||||||
- NGINX/Apache/HAProxy file write + reload
|
|
||||||
- Traefik/Caddy file provider or API reload
|
|
||||||
- IIS PowerShell/WinRM (requires Windows)
|
|
||||||
- F5 BIG-IP iControl REST (requires appliance or mock)
|
|
||||||
- SSH agentless deployment (requires target host)
|
|
||||||
|
|
||||||
### Agent Binary Behavior (Parts 18, 28–29)
|
|
||||||
- Agent-side ECDSA key generation and CSR submission
|
|
||||||
- Agent filesystem discovery scan
|
|
||||||
- CLI tool (`certctl-cli`) — all 10 subcommands
|
|
||||||
- MCP server (`mcp-server`) — stdio transport
|
|
||||||
|
|
||||||
### Timing-Dependent Tests (Parts 33–34)
|
|
||||||
- Background scheduler loop execution (renewal, jobs, health, notifications, digest, network scan)
|
|
||||||
- Structured logging format verification (requires Docker log parsing)
|
|
||||||
|
|
||||||
## How This Relates to `integration_test.go`
|
|
||||||
|
|
||||||
Both files live in `deploy/test/` in the same Go package (`integration_test`):
|
|
||||||
|
|
||||||
| | `qa_test.go` | `integration_test.go` |
|
|
||||||
|---|---|---|
|
|
||||||
| **Build tag** | `//go:build qa` | `//go:build integration` |
|
|
||||||
| **Target stack** | Demo (`docker-compose.yml` + `docker-compose.demo.yml`) | Test (`docker-compose.test.yml`) |
|
|
||||||
| **Port** | 8443 | Different (test stack config) |
|
|
||||||
| **Seed data** | `seed_demo.sql` (32 certs, 12 agents, 13 issuers, 8 targets, realistic history) | Minimal (created by tests) |
|
|
||||||
| **CA backends** | Local CA only (demo mode) | Pebble ACME, step-ca, NGINX |
|
|
||||||
| **Purpose** | Release QA — broad coverage, spot checks | Functional — end-to-end issuance, renewal, revocation against real CAs |
|
|
||||||
| **Run frequency** | Before each release tag | CI on every PR |
|
|
||||||
|
|
||||||
They are complementary. Integration tests prove the machinery works. QA tests prove the product works at release quality.
|
|
||||||
|
|
||||||
## Seed Data Reference
|
|
||||||
|
|
||||||
The QA tests depend on `migrations/seed_demo.sql`. Key IDs used:
|
|
||||||
|
|
||||||
### Certificates (32 total in `managed_certificates`)
|
|
||||||
|
|
||||||
The full canonical list is generated by:
|
|
||||||
```
|
|
||||||
sed -n '/^INSERT INTO managed_certificates/,/^;/p' migrations/seed_demo.sql \
|
|
||||||
| grep -oE "^\s*\('mc-[a-z0-9_-]+" | sed -E "s/^\s*\('//" | sort -u
|
|
||||||
```
|
|
||||||
|
|
||||||
Hand-listing is unsustainable as the seed grows; tests reference IDs by lookup, not by enumeration.
|
|
||||||
Sample IDs: `mc-api-prod`, `mc-web-prod`, `mc-pay-prod`, `mc-compromised`, `mc-smime-bob`, `mc-edge-eu`, `mc-k8s-ingress`, `mc-wildcard-prod`. See `migrations/seed_demo.sql:147` onward.
|
|
||||||
|
|
||||||
### Agents (12 total in `agents` table)
|
|
||||||
|
|
||||||
8 named workload agents + 1 server-side sentinel + 3 cloud-discovery sentinels:
|
|
||||||
|
|
||||||
- **Workload agents:** `ag-web-prod`, `ag-web-staging`, `ag-lb-prod`, `ag-iis-prod`, `ag-data-prod`, `ag-edge-01`, `ag-k8s-prod`, `ag-mac-dev`
|
|
||||||
- **Server-side sentinel:** `server-scanner`
|
|
||||||
- **Cloud-discovery sentinels:** `cloud-aws-sm`, `cloud-azure-kv`, `cloud-gcp-sm`
|
|
||||||
|
|
||||||
Full list via:
|
|
||||||
```
|
|
||||||
sed -n '/^INSERT INTO agents/,/^;/p' migrations/seed_demo.sql \
|
|
||||||
| grep -oE "^\s*\('[a-z][a-z0-9_-]+" | sed -E "s/^\s*\('//"
|
|
||||||
```
|
|
||||||
|
|
||||||
(The `agent_groups` table also contains entries with `ag-*` IDs — `ag-linux-prod`, `ag-windows`, `ag-datacenter-a`, `ag-arm64`, `ag-manual` — but those are *group* IDs, not agents. Don't confuse the two.)
|
|
||||||
|
|
||||||
### Issuers (13 total)
|
|
||||||
|
|
||||||
`iss-local`, `iss-acme-le`, `iss-stepca`, `iss-acme-zs`, `iss-openssl`, `iss-vault`, `iss-digicert`, `iss-sectigo`, `iss-googlecas`, `iss-awsacmpca`, `iss-entrust`, `iss-globalsign`, `iss-ejbca`.
|
|
||||||
|
|
||||||
Full list via:
|
|
||||||
```
|
|
||||||
sed -n '/^INSERT INTO issuers/,/^;/p' migrations/seed_demo.sql \
|
|
||||||
| grep -oE "^\s*\('iss-[a-z0-9_-]+" | sed -E "s/^\s*\('//"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Targets (8 total in `deployment_targets`)
|
|
||||||
`tgt-nginx-prod`, `tgt-nginx-staging`, `tgt-haproxy-prod`, `tgt-apache-prod`, `tgt-iis-prod`, `tgt-traefik-prod`, `tgt-caddy-prod`, `tgt-nginx-data`
|
|
||||||
|
|
||||||
### Network Scan Targets (4 total in `network_scan_targets`)
|
|
||||||
`nst-dc1-web`, `nst-dc2-apps`, `nst-dmz`, `nst-edge`
|
|
||||||
|
|
||||||
**Maintenance note:** when adding new seed rows, also update this section, OR remove the
|
|
||||||
per-table counts and rely on the `sed | grep` commands so the doc stops drifting on every
|
|
||||||
seed-data change. A CI guard that fails when the doc count diverges from the seed file is
|
|
||||||
proposed in `coverage-audit-2026-04-27/tables/qa-doc-strengthening.md` (Strengthening #6).
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### "Server unreachable" on startup
|
|
||||||
The test pings `GET /health` before running anything. If this fails:
|
|
||||||
```bash
|
|
||||||
# Check if the stack is running
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.demo.yml ps
|
|
||||||
|
|
||||||
# Check server logs
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.demo.yml logs certctl-server
|
|
||||||
|
|
||||||
# Check if the port is exposed (self-signed cert — pin CA bundle)
|
|
||||||
curl --cacert ./deploy/test/certs/ca.crt -s https://localhost:8443/health
|
|
||||||
```
|
|
||||||
|
|
||||||
### "connect to QA DB" failure
|
|
||||||
The database tests connect directly to PostgreSQL. Ensure port 5432 is exposed:
|
|
||||||
```bash
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.demo.yml port postgres 5432
|
|
||||||
```
|
|
||||||
|
|
||||||
### Performance tests flaking
|
|
||||||
The performance thresholds (200ms, 300ms, 500ms) assume a local Docker stack. On slow CI runners or remote Docker hosts, increase the thresholds or skip Part 39:
|
|
||||||
```bash
|
|
||||||
go test -tags qa -v -run 'TestQA/Part(?!39)' ./...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Source file checks failing
|
|
||||||
The `fileExists` and `fileContains` helpers read from `CERTCTL_QA_REPO_DIR` (default `../..`). If running from a non-standard location:
|
|
||||||
```bash
|
|
||||||
CERTCTL_QA_REPO_DIR=/absolute/path/to/certctl go test -tags qa -v ./...
|
|
||||||
```
|
|
||||||
|
|
||||||
## Release Day Sign-Off Matrix
|
|
||||||
|
|
||||||
Before tagging a release, the QA-on-call engineer signs off on each row. This matrix replaces the previous ad-hoc release checklist and ties test execution directly to release approval. Acquisition-grade releases have this kind of matrix; the doc previously didn't.
|
|
||||||
|
|
||||||
| Sign-off | Evidence | Owner | Result | Date |
|
|
||||||
|---|---|---|---|---|
|
|
||||||
| `make verify` clean on master | CI run URL | Eng-on-call | ☐ | |
|
|
||||||
| `go test -tags qa ./deploy/test/...` ≥ 95% pass rate (skips counted as pass) | Test output | QA-on-call | ☐ | |
|
|
||||||
| `go test -race -count=10 ./internal/...` 0 races | `tool-output/race-x10.txt` | QA-on-call | ☐ | |
|
|
||||||
| Coverage ≥ thresholds in `ci.yml` (service / handler / crypto / local-issuer / acme / stepca / mcp) | `tool-output/cover-summary.txt` | QA-on-call | ☐ | |
|
|
||||||
| Helm chart `helm lint && helm template` clean | `tool-output/helm.txt` | DevOps-on-call | ☐ | |
|
|
||||||
| All `t.Skip` sites have current rationales (see Bundle O audit; CI guard catches new orphans) | `make qa-stats` t.Skip count | QA-on-call | ☐ | |
|
|
||||||
| Frontend: Vitest run clean; per-page coverage ≥ 70% | `web/tool-output/vitest.txt` | Frontend-on-call | ☐ | |
|
|
||||||
| Manual Parts 23, 24, 55, 56 executed (or explicit defer with rationale) | This sheet | QA-on-call | ☐ | |
|
|
||||||
| Demo stack `docker compose up -d --build` smoke (`/health` 200, `/ready` 200) | curl receipt | QA-on-call | ☐ | |
|
|
||||||
| `govulncheck ./...` clean (or deferred-call advisories tracked in `gap-backlog`) | `tool-output/govulncheck.json` | Security-on-call | ☐ | |
|
|
||||||
| QA-doc drift guards green (Part-count + cert-count) | CI run URL | QA-on-call | ☐ | |
|
|
||||||
| FSM transition coverage tables (`coverage-audit-2026-04-27/tables/fsm-coverage.md`) — Existential FSMs ≥80% legal + 100% illegal | This sheet | QA-on-call | ☐ | |
|
|
||||||
|
|
||||||
**Sign-off owner:** ______________________ **Date:** ______ **Tag:** v__.__.__
|
|
||||||
|
|
||||||
## Mutation Testing Targets & Kill Rate
|
|
||||||
|
|
||||||
Mutation testing exposes which assertions are actually load-bearing — tests can pass against broken code if mutations survive, which is a coverage trap. The audit's Phase 0 attempted to run `go-mutesting` on the Existential cluster but was blocked by a Go 1.25 / arm64 incompatibility in `osutil@v1.6.1` (uses `syscall.Dup2` which is undefined on linux/arm64). The operator-runnable workaround uses a fork that targets `unix.Dup3` instead.
|
|
||||||
|
|
||||||
| Package | Risk class | Target kill rate | Last measured | Tool |
|
|
||||||
|---|---|---|---|---|
|
|
||||||
| `internal/crypto` | Existential | ≥90% | unmeasured (sandbox-blocked, operator-runnable) | go-mutesting |
|
|
||||||
| `internal/pkcs7` | Existential | ≥90% | unmeasured | go-mutesting |
|
|
||||||
| `internal/connector/issuer/local` | Existential | ≥90% | unmeasured | go-mutesting |
|
|
||||||
| `internal/connector/issuer/acme` | Existential | ≥80% (catch-up; failure-mode coverage 55.6% per Bundle J) | unmeasured | go-mutesting |
|
|
||||||
| `internal/connector/issuer/stepca` | Existential | ≥85% (post-Bundle-L.B coverage at 90.4%) | unmeasured | go-mutesting |
|
|
||||||
| `internal/api/middleware` | High | ≥80% | unmeasured | go-mutesting |
|
|
||||||
| `internal/validation` | Existential (CWE-78 / CWE-113 boundary) | ≥90% | unmeasured | go-mutesting |
|
|
||||||
| `web/src/utils/safeHtml.ts` | Frontend (XSS gate) | ≥90% | unmeasured | Stryker |
|
|
||||||
|
|
||||||
### Operator command (per package)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Use the avito-tech fork that supports linux/arm64 + Go 1.25.
|
|
||||||
go install github.com/avito-tech/go-mutesting/cmd/go-mutesting@latest
|
|
||||||
|
|
||||||
mkdir -p tool-output
|
|
||||||
$(go env GOPATH)/bin/go-mutesting --debug ./internal/crypto/... \
|
|
||||||
> tool-output/mutation-crypto.txt 2>&1
|
|
||||||
grep -oE 'mutation score is [0-9.]+' tool-output/mutation-crypto.txt | tail -1
|
|
||||||
```
|
|
||||||
|
|
||||||
**Acceptance:** ≥80% (Existential) / ≥70% (High). Anything below is a Medium finding; triage entries go in `coverage-audit-2026-04-27/gap-backlog.md`. This subsection moves mutation testing from "future work" to "documented release gate."
|
|
||||||
|
|
||||||
## Adding New Tests
|
|
||||||
|
|
||||||
When a new feature ships:
|
|
||||||
|
|
||||||
1. **Add a Part section** in `qa_test.go` following the numbering convention in the Coverage Map below
|
|
||||||
2. **API tests**: use `c.get()`, `c.post()`, `c.bodyStr()`, `c.getJSON()`, `c.timedGet()`
|
|
||||||
3. **Source checks**: use `fileExists(t, "relative/path")` and `fileContains(t, "path", "substring")`
|
|
||||||
4. **DB checks**: use `openQADB(t)` and `db.queryInt(t, "SELECT ...")`
|
|
||||||
5. **Cleanup**: always use `t.Cleanup()` for data created during tests
|
|
||||||
6. **Skip if external**: use `t.Skip("Requires X — manual test")` with a clear reason
|
|
||||||
|
|
||||||
## Version History
|
|
||||||
|
|
||||||
- **v1.3** (April 2026, post-Bundle-P) — QA Doc Strengthening shipped. New top-of-doc Test Suite Health dashboard (regenerated via `make qa-stats`). New Coverage by Risk Class table after the Coverage Map. New Release Day Sign-Off Matrix and Mutation Testing Targets sections. CI seed-count + Part-count drift guards land in `.github/workflows/ci.yml` so future doc drift fails CI. Bundle P closes M-007 / M-010 / M-011 / M-012 (structural strengthening) + M-008 (Mutation Testing Targets).
|
|
||||||
- **v1.2** (April 2026, post-coverage-audit) — Documented Parts 55–56 (I-004 Agent Soft-Retirement, I-005 Notification Retry & Dead-Letter) and surfaced Parts 23–24 (S/MIME & EKU; OCSP/CRL) as not-yet-automated. 56 Parts total in `testing-guide.md`; 49 live `Part_*` automation wrappers in `qa_test.go` + 4 new `Skip` stubs for Parts 23/24/55/56 = 53 wrappers (Parts 15–17 remain covered by source-checks in Parts 42–46). Reconciled seed-data section to actual `seed_demo.sql` counts (12 agents, 13 issuers; certs were already accurate at 32). Bundle I of the 2026-04-27 coverage-audit closure plan.
|
|
||||||
- **v1.1** (April 2026) — Added Parts 53–54 (M47: Kubernetes Secrets target + AWS ACM PCA issuer). 54 Parts total, ~164 automated subtests.
|
|
||||||
- **v1.0** (April 2026) — Initial release covering all 52 Parts of testing-guide.md v2.1. Replaces `qa-smoke-test.sh`.
|
|
||||||
@@ -1,93 +0,0 @@
|
|||||||
# Release Sign-Off
|
|
||||||
|
|
||||||
> Last reviewed: 2026-05-05
|
|
||||||
|
|
||||||
Release-day checklist for tagging a new certctl release. Walks through the gates that must be green before pushing the tag, in the order they should be verified.
|
|
||||||
|
|
||||||
## Pre-release: code state
|
|
||||||
|
|
||||||
| Gate | How to check | Pass |
|
|
||||||
|---|---|---|
|
|
||||||
| `master` is at the commit you intend to tag | `git log -1 --format='%H %s'` | ☐ |
|
|
||||||
| Working tree clean | `git status -sb` | ☐ |
|
|
||||||
| Local matches GitHub | `curl -sS https://api.github.com/repos/certctl-io/certctl/commits/master \| grep -oE '"sha": "[a-f0-9]+"' \| head -1` matches local | ☐ |
|
|
||||||
| `WORKSPACE-CHANGELOG.md` updated with the release's milestones | manual review | ☐ |
|
|
||||||
| `certctl/CHANGELOG.md` updated (release-facing) | manual review | ☐ |
|
|
||||||
| Migration ladder ends cleanly | `ls migrations/*.up.sql \| sort \| tail -3` shows the right last migration | ☐ |
|
|
||||||
|
|
||||||
## Pre-release: automated gates (CI)
|
|
||||||
|
|
||||||
| Gate | How to check | Pass |
|
|
||||||
|---|---|---|
|
|
||||||
| CI pipeline green on the tag-target commit | GitHub Actions web UI | ☐ |
|
|
||||||
| `make verify` clean locally | run from repo root | ☐ |
|
|
||||||
| `go test -race -count=1 ./...` clean | full race check | ☐ |
|
|
||||||
| `golangci-lint run ./...` clean | local lint | ☐ |
|
|
||||||
| `govulncheck ./...` clean | vulnerability scan | ☐ |
|
|
||||||
| Coverage thresholds met (service ≥55%, handler ≥60%, domain ≥40%, middleware ≥30%) | `go test -coverprofile=cover.out ./... && go tool cover -func=cover.out` | ☐ |
|
|
||||||
| Frontend type-check + Vitest + Vite build clean | `cd web && npm run typecheck && npm run test && npm run build` | ☐ |
|
|
||||||
|
|
||||||
## Pre-release: manual QA passes
|
|
||||||
|
|
||||||
| Surface | Checklist | Pass |
|
|
||||||
|---|---|---|
|
|
||||||
| Local stack boots clean from scratch | `qa-prerequisites.md` Steps 1-4 green | ☐ |
|
|
||||||
| GUI QA checklist | `gui-qa-checklist.md` end to end | ☐ |
|
|
||||||
| End-to-end test environment | `test-environment.md` Steps 1-14 green | ☐ |
|
|
||||||
| Performance baselines | `performance-baselines.md` four spot checks within bounds | ☐ |
|
|
||||||
| Helm chart deploys clean | `helm-deployment.md` install + verify | ☐ |
|
|
||||||
| ACME server interop (cert-manager) | `make acme-cert-manager-test` green | ☐ |
|
|
||||||
| ACME server RFC conformance (lego) | `make acme-rfc-conformance-test` green | ☐ |
|
|
||||||
|
|
||||||
## Release artefact verification
|
|
||||||
|
|
||||||
After the release workflow runs (triggered by tag push), verify the published artefacts:
|
|
||||||
|
|
||||||
| Artefact | How to verify | Pass |
|
|
||||||
|---|---|---|
|
|
||||||
| Cosign keyless OIDC signature on `checksums.txt` | per `docs/reference/release-verification.md` step 2 | ☐ |
|
|
||||||
| SLSA Level 3 provenance on each binary | step 3 | ☐ |
|
|
||||||
| Container image signature + SBOM + provenance | step 4 | ☐ |
|
|
||||||
| Release notes published on GitHub Releases page | manual review | ☐ |
|
|
||||||
| ghcr.io images at `ghcr.io/certctl-io/certctl-{server,agent}:<tag>` pullable | `docker pull` round-trips | ☐ |
|
|
||||||
|
|
||||||
## Branch protection + tag push
|
|
||||||
|
|
||||||
| Gate | How to check | Pass |
|
|
||||||
|---|---|---|
|
|
||||||
| `master` branch protection rule allows the tag push | Repository Settings → Branches | ☐ |
|
|
||||||
| Tag pushed | `git tag -s v<version> -m 'Release v<version>'; git push origin v<version>` | ☐ |
|
|
||||||
| Release workflow kicked off in GitHub Actions | watch the Actions tab | ☐ |
|
|
||||||
|
|
||||||
## Post-release
|
|
||||||
|
|
||||||
| Gate | How to check | Pass |
|
|
||||||
|---|---|---|
|
|
||||||
| Release workflow completed without errors | GitHub Actions | ☐ |
|
|
||||||
| Sample binary downloaded and Cosign-verified by an operator who is not the release author | another team member | ☐ |
|
|
||||||
| `WORKSPACE-CHANGELOG.md` notes the tag commit SHA | manual edit | ☐ |
|
|
||||||
| workspace-tracking "Active Focus" → "Current tag" updated | manual edit | ☐ |
|
|
||||||
| `certctl.io/index.html` star count + `data-gh-version` rendering picks up the new tag | open the landing page in 6+ hours (cache TTL) | ☐ |
|
|
||||||
| Reddit / Hacker News / LinkedIn announcement drafted (if a major release) | per the operator's promotion playbook | ☐ |
|
|
||||||
|
|
||||||
## If a gate fails
|
|
||||||
|
|
||||||
Revert the tag push immediately:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git push --delete origin v<version>
|
|
||||||
git tag -d v<version>
|
|
||||||
```
|
|
||||||
|
|
||||||
Investigate, fix, re-tag.
|
|
||||||
|
|
||||||
## Related docs
|
|
||||||
|
|
||||||
- [`docs/contributor/qa-prerequisites.md`](qa-prerequisites.md) — local stack prereqs
|
|
||||||
- [`docs/contributor/test-environment.md`](test-environment.md) — full local environment tutorial
|
|
||||||
- [`docs/contributor/gui-qa-checklist.md`](gui-qa-checklist.md) — GUI manual QA pass
|
|
||||||
- [`docs/contributor/testing-strategy.md`](testing-strategy.md) — what we test in CI vs deep-scan vs manual QA
|
|
||||||
- [`docs/contributor/ci-pipeline.md`](ci-pipeline.md) — CI shape and regression guards
|
|
||||||
- [`docs/operator/performance-baselines.md`](../operator/performance-baselines.md) — performance regression spot checks
|
|
||||||
- [`docs/operator/helm-deployment.md`](../operator/helm-deployment.md) — Helm install + verify
|
|
||||||
- [`docs/reference/release-verification.md`](../reference/release-verification.md) — Cosign / SLSA / SBOM verification procedure
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,200 +0,0 @@
|
|||||||
# certctl Testing Strategy & Deep-Scan Operator Runbook
|
|
||||||
|
|
||||||
> Last reviewed: 2026-05-05
|
|
||||||
|
|
||||||
This doc covers the **testing topology** (per-PR fast gates vs. daily deep-scan
|
|
||||||
gates), and the **operator runbook** for re-running each deep-scan tool locally
|
|
||||||
when the CI receipt is ambiguous or when an operator wants to validate a fix
|
|
||||||
before the next scheduled scan.
|
|
||||||
|
|
||||||
For the manual end-to-end QA playbook, see [`testing-guide.md`](../testing-guide.md).
|
|
||||||
For the security posture / per-finding closure log, see [`security.md`](../operator/security.md).
|
|
||||||
|
|
||||||
## CI workflow split
|
|
||||||
|
|
||||||
certctl runs two GitHub Actions workflows:
|
|
||||||
|
|
||||||
- **`.github/workflows/ci.yml`** — runs on every push/PR. Fast feedback only.
|
|
||||||
Includes `gofmt`, `go vet`, `golangci-lint`, `go test -short -count=1`,
|
|
||||||
`govulncheck`, the per-layer coverage gates, and the regression-grep guards
|
|
||||||
(the M-009 mutation budget, the L-001 InsecureSkipVerify guard, the H-001
|
|
||||||
Dockerfile SHA-pin guard, the M-012 USER-directive guard, etc.).
|
|
||||||
- **`.github/workflows/security-deep-scan.yml`** — runs daily 06:00 UTC and on
|
|
||||||
manual dispatch. Heavyweight tools that need docker, network egress to
|
|
||||||
scanner registries, or wall-clock budgets the per-PR check can't tolerate.
|
|
||||||
Includes `gosec`, `osv-scanner`, the `-race -count=10` full-suite run,
|
|
||||||
`trivy` image scan, `syft` SBOM, ZAP baseline DAST, `nuclei`,
|
|
||||||
`schemathesis` OpenAPI fuzz, `testssl.sh`, `go-mutesting` mutation testing,
|
|
||||||
and `semgrep p/react-security`.
|
|
||||||
|
|
||||||
Receipts from each scheduled run are uploaded as a 30-day-retention artefact
|
|
||||||
named `security-deep-scan-<run-id>`. Audit them via the GitHub Actions UI;
|
|
||||||
download the artefact zip for any scan that surfaces a finding.
|
|
||||||
|
|
||||||
## Operator runbook — local re-run procedures
|
|
||||||
|
|
||||||
These are the same commands the workflow runs, intended for an operator with
|
|
||||||
a workstation that has docker + the Go toolchain installed. The local-run
|
|
||||||
shape is identical to CI; the difference is wall-clock and the artefact
|
|
||||||
location (CI uploads; local writes to `$PWD`).
|
|
||||||
|
|
||||||
### Mutation testing (D-003)
|
|
||||||
|
|
||||||
**Tool:** [`go-mutesting`](https://github.com/zimmski/go-mutesting). Mutates
|
|
||||||
each AST node in turn (flips comparisons, swaps return values, removes
|
|
||||||
statements) and re-runs the package's tests. A mutant is **killed** if any
|
|
||||||
test fails; **surviving** mutants indicate a coverage gap (no test caught
|
|
||||||
the bug the mutant introduced).
|
|
||||||
|
|
||||||
**Targets:** the three security-critical packages whose coverage gate is
|
|
||||||
**85%** in `ci.yml`:
|
|
||||||
|
|
||||||
- `internal/crypto/`
|
|
||||||
- `internal/pkcs7/`
|
|
||||||
- `internal/connector/issuer/local/`
|
|
||||||
|
|
||||||
**Acceptance threshold:** ≥80% mutation kill ratio per package. Surviving
|
|
||||||
mutants below that threshold get triaged in
|
|
||||||
the project's 2026-04-25 mutation-results notes — either
|
|
||||||
ship a targeted unit test that kills the mutant, or document an
|
|
||||||
equivalent-mutation justification.
|
|
||||||
|
|
||||||
**Local run:**
|
|
||||||
|
|
||||||
```
|
|
||||||
go install github.com/zimmski/go-mutesting/cmd/go-mutesting@latest
|
|
||||||
for pkg in ./internal/crypto/... ./internal/pkcs7/... ./internal/connector/issuer/local/...; do
|
|
||||||
echo "=== $pkg ==="
|
|
||||||
$(go env GOPATH)/bin/go-mutesting "$pkg"
|
|
||||||
done
|
|
||||||
```
|
|
||||||
|
|
||||||
The tool prints one line per mutant (`PASS` = killed, `FAIL` = surviving)
|
|
||||||
plus a per-package summary `The mutation score is X.YZ`. CPU-bound, single
|
|
||||||
core, takes ~10 minutes on a 2024-era laptop for the three packages combined.
|
|
||||||
|
|
||||||
**Sandbox note:** `go-mutesting` writes a mutant copy of the source tree to
|
|
||||||
`/tmp/go-mutesting/` per run; needs ≥2 GB free disk. Sandboxed CI runners
|
|
||||||
are sized for this; constrained dev sandboxes are not.
|
|
||||||
|
|
||||||
### DAST baseline (D-004)
|
|
||||||
|
|
||||||
**Tool:** [OWASP ZAP `baseline`](https://www.zaproxy.org/docs/docker/baseline-scan/).
|
|
||||||
Spiders the running server's URL surface and runs the OWASP-ZAP active+passive
|
|
||||||
rule pack. **Baseline** mode skips the destructive active-scan rules; it's safe
|
|
||||||
against a non-throwaway environment.
|
|
||||||
|
|
||||||
**Target:** the live `deploy/docker-compose.yml` stack on `https://localhost:8443`.
|
|
||||||
|
|
||||||
**Acceptance:** zero HIGH/CRITICAL alerts. WARN/INFO alerts get triaged in the
|
|
||||||
ZAP report; some are unavoidable (e.g., HSTS preload-list nag is a deployment
|
|
||||||
recommendation, not a server defect).
|
|
||||||
|
|
||||||
**Local run:**
|
|
||||||
|
|
||||||
```
|
|
||||||
docker compose -f deploy/docker-compose.yml up -d
|
|
||||||
sleep 20 # wait for /ready to flip OK; check `curl --cacert deploy/test/certs/ca.crt https://localhost:8443/ready`
|
|
||||||
docker run --rm --network host \
|
|
||||||
-v "$PWD":/zap/wrk \
|
|
||||||
ghcr.io/zaproxy/zaproxy:stable \
|
|
||||||
zap-baseline.py -t https://localhost:8443 \
|
|
||||||
-r zap-report.html -J zap-report.json
|
|
||||||
docker compose -f deploy/docker-compose.yml down
|
|
||||||
```
|
|
||||||
|
|
||||||
The HTML report opens in a browser; the JSON is machine-readable for triage.
|
|
||||||
|
|
||||||
### TLS audit (D-005)
|
|
||||||
|
|
||||||
**Tool:** [`testssl.sh`](https://testssl.sh/). Probes the TLS handshake and
|
|
||||||
each enabled cipher suite; reports protocol-version weaknesses, cipher
|
|
||||||
weaknesses, certificate-chain issues, and known CVE patterns (Heartbleed,
|
|
||||||
ROBOT, BEAST, etc.).
|
|
||||||
|
|
||||||
**Target:** the live stack on `https://localhost:8443`.
|
|
||||||
|
|
||||||
**Acceptance:** zero HIGH/CRITICAL findings. certctl pins
|
|
||||||
`tls.Config.MinVersion = tls.VersionTLS13` (`cmd/server/tls.go`), so anything
|
|
||||||
that surfaces is either (a) a real defect, (b) a testssl false positive, or
|
|
||||||
(c) a deployment-config issue worth documenting in the operator runbook.
|
|
||||||
|
|
||||||
**Local run:**
|
|
||||||
|
|
||||||
```
|
|
||||||
docker compose -f deploy/docker-compose.yml up -d
|
|
||||||
sleep 20
|
|
||||||
docker run --rm --network host \
|
|
||||||
-v "$PWD":/data \
|
|
||||||
drwetter/testssl.sh:latest \
|
|
||||||
--jsonfile /data/testssl.json https://localhost:8443
|
|
||||||
docker compose -f deploy/docker-compose.yml down
|
|
||||||
|
|
||||||
# Filter to actionable severities
|
|
||||||
jq '[.scanResult[] | select(.severity == "HIGH" or .severity == "CRITICAL")]' testssl.json
|
|
||||||
```
|
|
||||||
|
|
||||||
### Frontend semgrep (D-007)
|
|
||||||
|
|
||||||
**Tool:** [`semgrep`](https://semgrep.dev/) with the maintained
|
|
||||||
[`p/react-security` ruleset](https://semgrep.dev/p/react-security). Catches
|
|
||||||
React-specific XSS / injection patterns: `dangerouslySetInnerHTML` without
|
|
||||||
sanitization, `target="_blank"` without `rel="noopener noreferrer"`,
|
|
||||||
`href={userInput}`, `eval`, `document.write`, etc.
|
|
||||||
|
|
||||||
**Target:** the frontend source tree at `web/src/`.
|
|
||||||
|
|
||||||
**Acceptance:** zero findings. Bundle 8 already verified
|
|
||||||
`dangerouslySetInnerHTML` count at zero and the `target="_blank"`
|
|
||||||
rel-noopener pin via simple grep guards in `ci.yml`; semgrep adds defence
|
|
||||||
in depth — it catches escape patterns the greps don't see (e.g.,
|
|
||||||
`href={user_input}`, runtime `eval`, `document.write`).
|
|
||||||
|
|
||||||
**Local run:**
|
|
||||||
|
|
||||||
```
|
|
||||||
docker run --rm -v "$PWD":/src returntocorp/semgrep:latest \
|
|
||||||
semgrep --config=p/react-security --json /src/web/src \
|
|
||||||
> semgrep-react.json
|
|
||||||
|
|
||||||
# Count findings
|
|
||||||
jq '.results | length' semgrep-react.json
|
|
||||||
|
|
||||||
# Pretty-print findings
|
|
||||||
jq '.results[] | {rule_id: .check_id, path, line: .start.line, message: .extra.message}' semgrep-react.json
|
|
||||||
```
|
|
||||||
|
|
||||||
If the count is non-zero, every result has a `check_id` (e.g.
|
|
||||||
`react.dangerouslySetInnerHTML`) and a `message` describing the escape
|
|
||||||
pattern. Triage each: either fix the call site, or — for legitimate edge
|
|
||||||
cases — add a `// nosem: <check_id> — <reason>` directive on the
|
|
||||||
preceding line.
|
|
||||||
|
|
||||||
## Cadence
|
|
||||||
|
|
||||||
| Tool | Trigger | Wall-clock | Owner |
|
|
||||||
|----------------------|------------------------------------|------------|----------------|
|
|
||||||
| go-mutesting | daily deep-scan + manual dispatch | ~10 min | maintainers |
|
|
||||||
| ZAP baseline (DAST) | daily deep-scan + manual dispatch | ~5 min | maintainers |
|
|
||||||
| testssl.sh | daily deep-scan + manual dispatch | ~3 min | maintainers |
|
|
||||||
| semgrep react | daily deep-scan + manual dispatch | ~1 min | maintainers |
|
|
||||||
| `make verify` | every commit (pre-push) | ~1 min | every developer |
|
|
||||||
| ci.yml fast gates | every push/PR | ~3 min | every developer |
|
|
||||||
|
|
||||||
Re-run any of the deep-scan tools locally when:
|
|
||||||
|
|
||||||
- A CI receipt surfaces an unexpected finding and you want to bisect against
|
|
||||||
a local change before pushing.
|
|
||||||
- You're cutting a release tag and want belt-and-suspenders evidence beyond
|
|
||||||
the most recent scheduled scan.
|
|
||||||
- You're adding a new feature in the relevant surface (crypto code →
|
|
||||||
re-run mutation testing; new HTTP handler → re-run schemathesis + ZAP;
|
|
||||||
new TLS-config knob → re-run testssl).
|
|
||||||
|
|
||||||
## Related docs
|
|
||||||
|
|
||||||
- [`docs/operator/security.md`](../operator/security.md) — security posture, per-finding closure log.
|
|
||||||
- [`docs/testing-guide.md`](../testing-guide.md) — manual end-to-end QA playbook.
|
|
||||||
- [`.github/workflows/ci.yml`](../.github/workflows/ci.yml) — per-PR fast gates.
|
|
||||||
- [`.github/workflows/security-deep-scan.yml`](../.github/workflows/security-deep-scan.yml) — daily deep-scan gates.
|
|
||||||
- [`scripts/install-security-tools.sh`](../scripts/install-security-tools.sh) — Go-host-installed tools (the docker-based tools are not in this script).
|
|
||||||
@@ -0,0 +1,97 @@
|
|||||||
|
# Git history normalization — 2026-05-13
|
||||||
|
|
||||||
|
> Last reviewed: 2026-05-13
|
||||||
|
|
||||||
|
This page documents a one-time normalization of certctl's git history
|
||||||
|
that landed on `master` on 2026-05-13. If you are reading this because
|
||||||
|
your clone failed to fast-forward, or because a commit SHA you bookmarked
|
||||||
|
no longer resolves, this is the explanation.
|
||||||
|
|
||||||
|
## What changed
|
||||||
|
|
||||||
|
Every commit's `author` and `committer` metadata was rewritten to a
|
||||||
|
single canonical identity (`shankar0123 <skreddy040@gmail.com>`). The
|
||||||
|
14 pre-rewrite author identities — operator name variants plus
|
||||||
|
AI/automation identities (Claude, Copilot, cowork agent, certctl-bot,
|
||||||
|
etc.) — collapsed to that one canonical author.
|
||||||
|
|
||||||
|
No source-code content was changed by the rewrite. Every line of code
|
||||||
|
in every commit is byte-for-byte identical to its pre-rewrite version.
|
||||||
|
Only the `author` and `committer` metadata fields were touched; commit
|
||||||
|
messages, subject lines, milestone IDs (M49, L-1, etc.), and every
|
||||||
|
other line of every commit's body are preserved verbatim.
|
||||||
|
|
||||||
|
## Why
|
||||||
|
|
||||||
|
Two reasons:
|
||||||
|
|
||||||
|
1. **LLC ownership transfer.** The codebase is now legally owned by
|
||||||
|
**certctl LLC**, which the operator incorporated to hold rights in
|
||||||
|
the project. The BSL 1.1 Licensor field in `LICENSE` flipped from a
|
||||||
|
natural-person name to `certctl LLC` in the same change set. Uniform
|
||||||
|
per-commit authorship under one canonical operator identity makes
|
||||||
|
the chain of title between the codebase and the LLC unambiguous.
|
||||||
|
|
||||||
|
2. **Pre-traction cleanup.** The rewrite cost of git-history
|
||||||
|
normalization scales with how many external clones and references
|
||||||
|
have calcified against specific commit SHAs. Doing it now, before
|
||||||
|
the project has a large external surface, minimizes disruption to
|
||||||
|
downstream consumers.
|
||||||
|
|
||||||
|
## What is preserved
|
||||||
|
|
||||||
|
A complete off-platform bundle backup of the pre-rewrite tree is held
|
||||||
|
by the operator (off-repo, not pushed). It contains every original
|
||||||
|
commit SHA, every original author identity, and the full ref graph as
|
||||||
|
it existed before the rewrite. The bundle is the immutable
|
||||||
|
preservation record and is recoverable forever.
|
||||||
|
|
||||||
|
An `archive/pre-author-normalization-2026-05-13` tag briefly existed
|
||||||
|
on origin pointing at the pre-rewrite tip but was removed when the
|
||||||
|
operator opted to clean the contributor graph of pre-rewrite
|
||||||
|
authorship signal. The bundle remains as the canonical archive — any
|
||||||
|
forensic question about pre-rewrite state can be answered by loading
|
||||||
|
the bundle into a fresh clone (`git clone pre-rewrite-2026-05-13.bundle`).
|
||||||
|
|
||||||
|
## Recovering after the rewrite
|
||||||
|
|
||||||
|
If you had a clone of certctl from before 2026-05-13, your local
|
||||||
|
history diverged from origin's at the rewrite. Easiest recovery:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd certctl
|
||||||
|
git fetch origin
|
||||||
|
git fetch origin --tags
|
||||||
|
git reset --hard origin/master
|
||||||
|
```
|
||||||
|
|
||||||
|
This force-aligns your local tree with the new origin. Any local
|
||||||
|
branches you had based on pre-rewrite history will need rebasing onto
|
||||||
|
the new master.
|
||||||
|
|
||||||
|
If you need to inspect the pre-rewrite state for a forensic or
|
||||||
|
diligence question, contact the operator directly — the off-platform
|
||||||
|
bundle is the canonical archive and is available on request.
|
||||||
|
|
||||||
|
## Container images and release tarballs
|
||||||
|
|
||||||
|
ghcr.io container images that were published before the rewrite
|
||||||
|
(`ghcr.io/certctl-io/certctl-{server,agent}:<old-tag>`) remain pullable
|
||||||
|
indefinitely. Their OCI source-SHA labels reference commit SHAs that
|
||||||
|
no longer resolve in the public origin — the images themselves still
|
||||||
|
work; only the source-SHA back-reference is now orphan. New release
|
||||||
|
images published after the rewrite reference current SHAs normally.
|
||||||
|
|
||||||
|
If you downloaded a release tarball before the rewrite, the tarball's
|
||||||
|
contents are unchanged; only its associated `git` SHA differs from the
|
||||||
|
current `v2.x.y` tag (which has been re-pointed to the rewritten
|
||||||
|
commit at the same logical point in history).
|
||||||
|
|
||||||
|
## Operational note for contributors
|
||||||
|
|
||||||
|
Future contributions to certctl should be authored under the
|
||||||
|
operator's canonical git identity. Pull requests from external
|
||||||
|
contributors will need a Contributor License Agreement (CLA) workflow,
|
||||||
|
which the project will set up before accepting external PRs. Until
|
||||||
|
then, the project does not solicit or accept external code
|
||||||
|
contributions.
|
||||||
@@ -0,0 +1,214 @@
|
|||||||
|
# Observability — what certctl emits, what it doesn't, and what survives a restart
|
||||||
|
|
||||||
|
> Last reviewed: 2026-05-13
|
||||||
|
|
||||||
|
Use this when:
|
||||||
|
- You're sizing certctl's observability surface against your existing
|
||||||
|
metrics + tracing + logging stack and want to know exactly what
|
||||||
|
drops in cleanly and what gaps you'll need to bridge.
|
||||||
|
- You're investigating a "weird metric" or planning a Grafana
|
||||||
|
dashboard and need the canonical list of what's exposed.
|
||||||
|
- You're running multi-replica or restarting frequently and need to
|
||||||
|
understand which counters reset.
|
||||||
|
|
||||||
|
certctl's observability posture is deliberately minimal-but-honest:
|
||||||
|
ship the surfaces an operator actually needs to wire into a Prometheus
|
||||||
|
+ Grafana + Loki stack, and don't make claims the implementation
|
||||||
|
can't back. This document is the canonical statement of what's
|
||||||
|
emitted, what's deferred, and why.
|
||||||
|
|
||||||
|
## Metrics — what's emitted
|
||||||
|
|
||||||
|
certctl exposes metrics through two endpoints on the control plane:
|
||||||
|
|
||||||
|
| Endpoint | Content-Type | Audience |
|
||||||
|
|---|---|---|
|
||||||
|
| `GET /api/v1/metrics` | `application/json` | Dashboards that prefer JSON, ad-hoc curl |
|
||||||
|
| `GET /api/v1/metrics/prometheus` | `text/plain; version=0.0.4; charset=utf-8` (Prometheus exposition) | Prometheus, Grafana Agent, Datadog Agent, Victoria Metrics, any OpenMetrics-compatible scraper |
|
||||||
|
|
||||||
|
The Prometheus endpoint emits standard `# HELP` / `# TYPE` / metric
|
||||||
|
lines following the conventions at
|
||||||
|
[prometheus.io/docs/instrumenting/exposition_formats](https://prometheus.io/docs/instrumenting/exposition_formats/).
|
||||||
|
Metric names are lowercase, snake_case, and prefixed with `certctl_`.
|
||||||
|
|
||||||
|
The implementation is at
|
||||||
|
[`internal/api/handler/metrics.go`](../../internal/api/handler/metrics.go).
|
||||||
|
|
||||||
|
### What's covered
|
||||||
|
|
||||||
|
Run the endpoint against a live deployment for the authoritative list
|
||||||
|
(it expands as the service ships more metrics). At time of writing the
|
||||||
|
exposition includes:
|
||||||
|
|
||||||
|
- Certificate-inventory gauges: `certctl_certificate_total`,
|
||||||
|
`certctl_certificate_active`, `certctl_certificate_expiring_soon`,
|
||||||
|
`certctl_certificate_expired`, `certctl_certificate_revoked`.
|
||||||
|
- Per-issuer-type issuance histograms:
|
||||||
|
`certctl_issuance_duration_seconds{issuer_type=…}` (the 2026-05-01
|
||||||
|
issuer-coverage audit closure #4 — this is the load-bearing metric
|
||||||
|
for per-issuer SLOs).
|
||||||
|
- Server uptime: `certctl_uptime_seconds`.
|
||||||
|
|
||||||
|
### Prometheus library vs hand-rolled exposition (acquisition diligence)
|
||||||
|
|
||||||
|
certctl writes Prometheus exposition format with `fmt.Fprintf` from
|
||||||
|
the metrics handler, not via the `github.com/prometheus/client_golang`
|
||||||
|
library. This is intentional for v2.x:
|
||||||
|
|
||||||
|
- The metric surface is shallow (gauges + a handful of histograms with
|
||||||
|
static labels). The client library's value is on the registration +
|
||||||
|
thread-safe accumulation side, neither of which is load-bearing for
|
||||||
|
the current surface.
|
||||||
|
- The exposition output is pinned to the spec version explicitly
|
||||||
|
(`version=0.0.4`) and is unit-tested against expected output at
|
||||||
|
`internal/api/handler/stats_handler_test.go`.
|
||||||
|
- Swapping in `client_golang` is a mechanical migration when the
|
||||||
|
metric surface grows (per-connector counters + RED-method histograms
|
||||||
|
on every handler are the natural next surface), but it has no
|
||||||
|
operator-visible behavior change today.
|
||||||
|
|
||||||
|
The migration is on the
|
||||||
|
[WORKSPACE-ROADMAP.md](../../WORKSPACE-ROADMAP.md) as a v3 item. If
|
||||||
|
you're an acquirer reading this: the question to ask is "does the
|
||||||
|
metric surface meet our SLO needs today" — not "is the right library
|
||||||
|
under the hood." If the answer to the first question is yes, the
|
||||||
|
second is a refactor, not a feature gap.
|
||||||
|
|
||||||
|
## Tracing — explicitly not yet shipped
|
||||||
|
|
||||||
|
certctl does **not** ship distributed tracing instrumentation today:
|
||||||
|
|
||||||
|
- No OpenTelemetry SDK setup in `cmd/server/main.go`.
|
||||||
|
- No OTLP exporter wired into outbound calls (issuer connectors,
|
||||||
|
agent enrollment, etc.).
|
||||||
|
- The `go.opentelemetry.io/otel` packages that appear in
|
||||||
|
[`go.mod`](../../go.mod) are indirect-only — they're transitive
|
||||||
|
dependencies of `coreos/go-oidc` and similar.
|
||||||
|
|
||||||
|
This is honest: there is no in-process tracing surface to monitor,
|
||||||
|
correlate, or sample. If your environment requires end-to-end traces
|
||||||
|
across the certctl control plane + agents + issuer backends, this is
|
||||||
|
a gap you would close on the certctl side as part of a v3 work item.
|
||||||
|
Until then:
|
||||||
|
|
||||||
|
- Structured logs include a `request_id` you can correlate across
|
||||||
|
the server log stream. See
|
||||||
|
[`internal/api/middleware/request_id.go`](../../internal/api/middleware/request_id.go).
|
||||||
|
- The Prometheus histogram
|
||||||
|
`certctl_issuance_duration_seconds{issuer_type=…}` carries the
|
||||||
|
same per-issuer latency signal a trace span would, just without
|
||||||
|
the per-request fan-out.
|
||||||
|
|
||||||
|
OpenTelemetry instrumentation is tracked in
|
||||||
|
[WORKSPACE-ROADMAP.md](../../WORKSPACE-ROADMAP.md) as a v3 item.
|
||||||
|
|
||||||
|
## Logging
|
||||||
|
|
||||||
|
certctl emits structured JSON logs to stdout via the stdlib
|
||||||
|
`log/slog` package. Every line carries `time`, `level`, `msg`, and —
|
||||||
|
where relevant — `request_id`, `actor_id`, and a contextual subject
|
||||||
|
(`certificate_id`, `issuer_id`, `agent_id`, etc.).
|
||||||
|
|
||||||
|
Log level is controlled by `CERTCTL_LOG_LEVEL` (`debug` / `info` /
|
||||||
|
`warn` / `error`); defaults to `info`. There is no in-process log
|
||||||
|
ingest — operators are expected to collect from container stdout
|
||||||
|
into their existing log pipeline (Loki, CloudWatch Logs, Datadog,
|
||||||
|
ELK, Splunk, etc.).
|
||||||
|
|
||||||
|
No log line contains private-key material, bearer tokens, OIDC
|
||||||
|
client secrets, or session cookies. The break-glass login path
|
||||||
|
explicitly scrubs the password before it reaches the audit subsystem
|
||||||
|
(see [`docs/operator/auth-threat-model.md`](auth-threat-model.md) §
|
||||||
|
"Break-glass token leak").
|
||||||
|
|
||||||
|
## Rate-limit behavior under restarts and replicas
|
||||||
|
|
||||||
|
Where rate limits exist, they are **per-process, in-memory,
|
||||||
|
reset-on-restart, and not shared across replicas**. This matters for
|
||||||
|
multi-replica deployments and for any compliance posture that asks
|
||||||
|
"what limits apply globally vs per-pod."
|
||||||
|
|
||||||
|
### Inventory
|
||||||
|
|
||||||
|
| Limiter | Scope | Window | Cap | Survives restart? | Shared across replicas? |
|
||||||
|
|---|---|---|---|---|---|
|
||||||
|
| Break-glass login (per source-IP) | `internal/api/handler/auth_breakglass.go` | 60s | 5 attempts | No | No |
|
||||||
|
| SCEP/Intune per-device challenge | `internal/scep/intune/` | 60s | configurable (`*_PER_MINUTE`) | No | No |
|
||||||
|
| EST per-principal CSR enrollment | `internal/est/` | 60s | configurable | No | No |
|
||||||
|
| EST HTTP-Basic source-IP failed-auth | `internal/est/` | 60s | configurable | No | No |
|
||||||
|
| ACME per-account orders / key-change / challenge-respond | `internal/service/acme.go` | 1h | configurable | No | No |
|
||||||
|
|
||||||
|
All five use the shared `internal/ratelimit/sliding_window.go`
|
||||||
|
primitive. Buckets live in a single per-process map guarded by a
|
||||||
|
mutex; the package-level cap prevents unbounded growth under
|
||||||
|
adversarial key cardinality (default 100,000 keys; oldest-by-newest-
|
||||||
|
timestamp evicted under pressure).
|
||||||
|
|
||||||
|
### Implications for multi-replica deployments
|
||||||
|
|
||||||
|
- **Effective per-replica cap is the documented cap.** A 2-replica
|
||||||
|
deployment lets through up to 2× the per-key window cap before
|
||||||
|
either replica rejects.
|
||||||
|
- **Restart resets the bucket.** A `kubectl rollout restart` empties
|
||||||
|
the in-memory windows on every replica. An attacker who notices
|
||||||
|
this could in principle re-issue burst attempts after every roll;
|
||||||
|
the threat model accepts this because rollouts are operator-driven
|
||||||
|
and the relevant endpoints already require credentials.
|
||||||
|
- **No cross-replica fan-out.** Rate-limit decisions on replica A
|
||||||
|
are not visible to replica B. Sticky-session ingress routing (with
|
||||||
|
`service.spec.sessionAffinity: ClientIP` on Kubernetes or the
|
||||||
|
equivalent on your load balancer) tightens the effective cap to
|
||||||
|
per-replica + per-source-IP rather than per-replica + per-source-IP
|
||||||
|
for whichever pod the request happened to land on.
|
||||||
|
|
||||||
|
If your threat model requires globally-enforced rate limits across
|
||||||
|
replicas, the implementation surface is roughly: swap the per-process
|
||||||
|
map for a database-backed sliding window (or a Redis-backed equivalent
|
||||||
|
if you already run Redis). This is on the
|
||||||
|
[WORKSPACE-ROADMAP.md](../../WORKSPACE-ROADMAP.md) as a v3 item;
|
||||||
|
nothing in the certctl threat model today requires it.
|
||||||
|
|
||||||
|
### Where these numbers live
|
||||||
|
|
||||||
|
The configurable caps are exposed as `CERTCTL_*_PER_MINUTE` /
|
||||||
|
`CERTCTL_ACME_*_PER_HOUR` env vars — see the
|
||||||
|
[security posture](security.md) doc for the operator-facing
|
||||||
|
configuration surface. The hard-coded ones (break-glass 5/min) are
|
||||||
|
intentionally non-configurable as a defense-in-depth measure; the
|
||||||
|
auth subsystem owns that policy decision.
|
||||||
|
|
||||||
|
## Performance harness scope
|
||||||
|
|
||||||
|
The load-test harness at [`deploy/test/loadtest/`](../../deploy/test/loadtest/)
|
||||||
|
covers the API-tier hot paths (issuance acceptance + cert list). It
|
||||||
|
does NOT load-test issuer-connector round-trips (you'd be load-
|
||||||
|
testing someone else's API), full multi-RTT ACME enrollment flows,
|
||||||
|
bulk-revoke / bulk-renew admin paths, or scheduler concurrency under
|
||||||
|
bulk renewal. Each exclusion is justified in
|
||||||
|
[`deploy/test/loadtest/README.md`](../../deploy/test/loadtest/README.md)
|
||||||
|
under "What it explicitly does NOT measure." If your evaluation
|
||||||
|
requires a benchmark on one of those exclusions, the right next step
|
||||||
|
is a follow-up scenario in that directory.
|
||||||
|
|
||||||
|
The per-component benchmarks ship in-tree as Go `Benchmark*`
|
||||||
|
functions:
|
||||||
|
- `internal/auth/session/bench_test.go` — session signing + validation
|
||||||
|
steady state and cold-process timing.
|
||||||
|
- `internal/auth/oidc/bench_test.go` — OIDC verify steady state.
|
||||||
|
- `internal/auth/oidc/bench_keycloak_test.go` — OIDC cold-cache timing
|
||||||
|
(gated `//go:build integration`).
|
||||||
|
|
||||||
|
Authoritative benchmark numbers + threshold contracts:
|
||||||
|
[`docs/operator/auth-benchmarks.md`](auth-benchmarks.md) (auth
|
||||||
|
subsystem) and [`docs/operator/performance-baselines.md`](performance-baselines.md)
|
||||||
|
(general API tier).
|
||||||
|
|
||||||
|
## Related reading
|
||||||
|
|
||||||
|
- [`docs/operator/security.md`](security.md) — the broader hardening
|
||||||
|
posture; this document is its observability subset.
|
||||||
|
- [`docs/operator/performance-baselines.md`](performance-baselines.md) — operator-runnable benchmarks against the API tier
|
||||||
|
- [`docs/operator/auth-benchmarks.md`](auth-benchmarks.md) — session
|
||||||
|
+ OIDC validation timings + threshold contracts
|
||||||
|
- [`deploy/test/loadtest/README.md`](../../deploy/test/loadtest/README.md) — k6 load-test harness scope + threshold contract
|
||||||
|
- [`docs/operator/runbooks/postgres-backup.md`](runbooks/postgres-backup.md) — operator-run backup recipe (separate file because it's a procedural runbook, not an observability claim)
|
||||||
@@ -101,6 +101,5 @@ Capture timing in your own loadtest-baselines log so future regressions surface
|
|||||||
|
|
||||||
## Related docs
|
## Related docs
|
||||||
|
|
||||||
- [`docs/contributor/ci-pipeline.md`](../contributor/ci-pipeline.md) — CI guard for performance regression
|
|
||||||
- [`docs/operator/security.md`](security.md) — rate limit tuning
|
- [`docs/operator/security.md`](security.md) — rate limit tuning
|
||||||
- [`docs/reference/architecture.md`](../reference/architecture.md) — request path through handler → service → repository
|
- [`docs/reference/architecture.md`](../reference/architecture.md) — request path through handler → service → repository
|
||||||
|
|||||||
@@ -0,0 +1,165 @@
|
|||||||
|
# Runbook: forcing config-encryption blob upgrades (v1/v2 → v3)
|
||||||
|
|
||||||
|
> Last reviewed: 2026-05-12
|
||||||
|
|
||||||
|
Use this when:
|
||||||
|
- You've rotated `CERTCTL_CONFIG_ENCRYPTION_KEY` and want every row in
|
||||||
|
the database to be re-sealed under the new passphrase, not just the
|
||||||
|
next ones to be touched.
|
||||||
|
- A v1- or v2-era encrypted blob existed in your database before you
|
||||||
|
upgraded to a post-M-8 release and you want to retire the legacy
|
||||||
|
read path's PBKDF2 work factor (100,000 rounds) in favor of the v3
|
||||||
|
factor (600,000 rounds, OWASP 2024).
|
||||||
|
- You're preparing for an audit and want every at-rest encrypted blob
|
||||||
|
to be on the same wire format.
|
||||||
|
|
||||||
|
Audience: a platform sysadmin who can run SQL against certctl's
|
||||||
|
PostgreSQL instance and exercise the GUI/REST API write paths.
|
||||||
|
|
||||||
|
For background on the v3 / v2 / v1 wire formats and the FileDriver vs
|
||||||
|
HSM threat model, read
|
||||||
|
[`docs/operator/secret-custody.md`](../secret-custody.md) first.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Background: how the read fallback works
|
||||||
|
|
||||||
|
`internal/crypto/encryption.go::DecryptIfKeySet` reads three on-disk
|
||||||
|
formats in this order:
|
||||||
|
|
||||||
|
```
|
||||||
|
v3 (magic 0x03, per-ciphertext 16-byte salt, PBKDF2 600k) →
|
||||||
|
v2 (magic 0x02, per-ciphertext 16-byte salt, PBKDF2 100k) →
|
||||||
|
v1 (no magic, fixed 28-byte salt, PBKDF2 100k)
|
||||||
|
```
|
||||||
|
|
||||||
|
The fallback is AEAD-driven: if v3 decryption fails authentication, the
|
||||||
|
function tries v2; if v2 fails, v1. This is what keeps pre-M-8 v1 blobs
|
||||||
|
readable without an explicit migration.
|
||||||
|
|
||||||
|
`EncryptIfKeySet` always writes v3. As a result, any row that is
|
||||||
|
**re-written** through the normal application code path is silently
|
||||||
|
upgraded to v3 the moment it's persisted.
|
||||||
|
|
||||||
|
The implication: you do not need to "migrate" v1/v2 blobs for them to
|
||||||
|
keep working — only if you want the v1/v2 wire format physically gone
|
||||||
|
from your database.
|
||||||
|
|
||||||
|
## Procedure
|
||||||
|
|
||||||
|
### Step 1 — confirm the encryption key is set
|
||||||
|
|
||||||
|
Re-encryption obviously cannot run without a passphrase. Verify:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
echo "${CERTCTL_CONFIG_ENCRYPTION_KEY:-NOT SET}" | sed -E 's/./*/g'
|
||||||
|
```
|
||||||
|
|
||||||
|
If the variable prints `NOT SET`, do not proceed — set the key in your
|
||||||
|
deployment manifest and restart the control plane first.
|
||||||
|
|
||||||
|
### Step 2 — identify which tables hold encrypted blobs
|
||||||
|
|
||||||
|
Encrypted columns in the v2.1.0 schema:
|
||||||
|
|
||||||
|
| Table | Column | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| `issuers` | `encrypted_config` | Only populated for `source='database'` rows (env-seeded rows are not encrypted) |
|
||||||
|
| `targets` | `encrypted_config` | Same source-based gating as issuers |
|
||||||
|
| `oidc_providers` | `client_secret_enc` | OIDC client_secret |
|
||||||
|
| `auth_session_signing_keys` | `key_material_enc` | HMAC-SHA256 session-cookie signing key |
|
||||||
|
|
||||||
|
If your schema differs, derive the column list from the migration
|
||||||
|
folder:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
grep -hE '_enc[ ,]|encrypted_config' migrations/*.up.sql | sort -u
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3 — identify rows still on v1/v2
|
||||||
|
|
||||||
|
The magic byte of the blob distinguishes versions; v1 blobs start with
|
||||||
|
the random AES-GCM nonce (anything but `0x02` or `0x03` is definitely
|
||||||
|
v1), and v2 vs v3 is determined by the first byte:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Per-table version distribution (run against your live database)
|
||||||
|
SELECT
|
||||||
|
SUBSTRING(encrypted_config FROM 1 FOR 1)::bytea AS magic,
|
||||||
|
COUNT(*) AS rows
|
||||||
|
FROM issuers
|
||||||
|
WHERE encrypted_config IS NOT NULL
|
||||||
|
GROUP BY magic;
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected steady-state output is a single row with `magic = \x03`.
|
||||||
|
Any rows with `\x02` are v2; any rows with anything else are v1.
|
||||||
|
|
||||||
|
### Step 4 — force re-sealing
|
||||||
|
|
||||||
|
`UPDATE` the rows back to themselves through the normal application
|
||||||
|
write path. The cleanest way to do this is via the REST API or GUI,
|
||||||
|
not raw SQL — re-issuing the same `PUT /api/v1/issuers/:id` reads the
|
||||||
|
row, decrypts, then re-encrypts under v3 on the write back.
|
||||||
|
|
||||||
|
For an issuer named `iss-letsencrypt-prod`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Fetch then re-PUT the same body (CSRF + bearer token elided).
|
||||||
|
curl -sS https://certctl.example.com/api/v1/issuers/iss-letsencrypt-prod \
|
||||||
|
-H "Authorization: Bearer $CERTCTL_API_KEY" \
|
||||||
|
| jq '.' \
|
||||||
|
| curl -sS -X PUT https://certctl.example.com/api/v1/issuers/iss-letsencrypt-prod \
|
||||||
|
-H "Authorization: Bearer $CERTCTL_API_KEY" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
--data-binary @-
|
||||||
|
```
|
||||||
|
|
||||||
|
Repeat for each row that the Step 3 query flagged as non-v3.
|
||||||
|
|
||||||
|
### Step 5 — verify
|
||||||
|
|
||||||
|
Re-run the Step 3 query. The output should now show only `magic =
|
||||||
|
\x03` rows.
|
||||||
|
|
||||||
|
## Special case: rotating the encryption-key passphrase
|
||||||
|
|
||||||
|
If your goal is to retire a possibly-compromised passphrase rather
|
||||||
|
than retire a legacy wire format, the order is:
|
||||||
|
|
||||||
|
1. Generate a new passphrase. Document it via your secret-management
|
||||||
|
tool (HashiCorp Vault, AWS Secrets Manager, etc.).
|
||||||
|
2. Stop the control plane briefly so no rows are written under the
|
||||||
|
stale passphrase during the transition window.
|
||||||
|
3. Run a one-shot decrypt-with-old / re-encrypt-with-new pass.
|
||||||
|
certctl ships no built-in tool for this — see the open
|
||||||
|
roadmap item below. The cleanest current approach is:
|
||||||
|
- Start certctl with the OLD passphrase.
|
||||||
|
- Read every encrypted column out to a JSON dump via the REST API.
|
||||||
|
- Stop certctl. Update its env to the NEW passphrase. Restart.
|
||||||
|
- PUT every row back from the JSON dump (the writes re-seal under
|
||||||
|
the new passphrase).
|
||||||
|
4. Document the old passphrase as retired in your secret-management
|
||||||
|
tool. Anyone with read access to a pre-rotation backup still needs
|
||||||
|
it to decrypt that backup; the live database no longer needs it.
|
||||||
|
|
||||||
|
For most operators, simply rotating the passphrase and letting the
|
||||||
|
re-seal happen organically as rows are touched is acceptable — the
|
||||||
|
v3 wire format with PBKDF2 600k rounds makes offline brute-force
|
||||||
|
against the old passphrase computationally expensive.
|
||||||
|
|
||||||
|
## Open roadmap items
|
||||||
|
|
||||||
|
- Ship a built-in `certctl admin reseal --all` command that does Steps
|
||||||
|
3 and 4 in one shot, with structured progress + audit logging.
|
||||||
|
Tracked in [WORKSPACE-ROADMAP.md](../../WORKSPACE-ROADMAP.md).
|
||||||
|
- Surface per-table v1/v2/v3 distribution as a Prometheus gauge so
|
||||||
|
alerting can fire on "rows on legacy format" drift.
|
||||||
|
|
||||||
|
## Related reading
|
||||||
|
|
||||||
|
- [`docs/operator/secret-custody.md`](../secret-custody.md) — the
|
||||||
|
broader where-do-private-keys-live reference; this runbook is the
|
||||||
|
procedural arm of that document.
|
||||||
|
- [`internal/crypto/encryption.go`](../../../internal/crypto/encryption.go)
|
||||||
|
package comment — wire format authoritative reference.
|
||||||
@@ -0,0 +1,113 @@
|
|||||||
|
# High-Availability Deployment Runbook
|
||||||
|
|
||||||
|
> Last reviewed: 2026-05-13
|
||||||
|
|
||||||
|
<!-- Phase 2 DEPL-H1 closure -->
|
||||||
|
|
||||||
|
|
||||||
|
certctl's Helm chart ships with conservative single-replica defaults
|
||||||
|
that produce a working `helm install` against any Kubernetes cluster.
|
||||||
|
Production HA is operator-opt-in across three values surfaces — none
|
||||||
|
of which the chart flips on your behalf.
|
||||||
|
|
||||||
|
This runbook documents the three changes, why they default off, and
|
||||||
|
the smallest-possible HA values overlay.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Why HA is opt-in (not default)
|
||||||
|
|
||||||
|
Three load-bearing reasons the chart defaults are `replicas: 1` and
|
||||||
|
`podDisruptionBudget.enabled: false`:
|
||||||
|
|
||||||
|
1. **A 1-replica deployment works on every cluster.** A multi-replica
|
||||||
|
default with `minAvailable: 2` would render a PDB at install time;
|
||||||
|
if the cluster has fewer than 2 nodes available (single-node
|
||||||
|
`kind` / `minikube` / fresh `k3s` clusters), Helm renders fine but
|
||||||
|
the first `kubectl rollout` blocks indefinitely waiting for the
|
||||||
|
second replica that can never schedule. Defaulting off keeps the
|
||||||
|
demo path one-command.
|
||||||
|
|
||||||
|
2. **Postgres is a singleton in the bundled chart.** The chart's
|
||||||
|
`postgres-statefulset.yaml` runs ONE Postgres pod. Scaling the
|
||||||
|
server tier past 1 replica without an externalized Postgres + a
|
||||||
|
pgbouncer-style proxy doesn't actually buy HA at the DB tier — the
|
||||||
|
single Postgres pod is the failure domain. Operators who want true
|
||||||
|
HA route Postgres to a managed service (RDS, Cloud SQL, AlloyDB,
|
||||||
|
AKS-managed-Postgres, Aiven) or run their own cluster (Patroni,
|
||||||
|
CloudNativePG, Zalando postgres-operator). See the
|
||||||
|
[external-Postgres values example](../../deploy/helm/examples/values-external-db.yaml).
|
||||||
|
|
||||||
|
3. **Session affinity is HTTPS-only.** The control plane is HTTPS-only
|
||||||
|
(TLS 1.3 pinned). Adding `sessionAffinity: ClientIP` to the
|
||||||
|
server Service mid-deployment when a sticky front-end LB is in
|
||||||
|
play (NGINX Ingress, Cloud LB with backend service) is the right
|
||||||
|
default for OIDC + RBAC session cookies. But operators who terminate
|
||||||
|
TLS at a different layer (Envoy mesh, Cloudflare in front of the
|
||||||
|
cluster) may have already solved affinity upstream — flipping it
|
||||||
|
on by default would over-constrain those paths.
|
||||||
|
|
||||||
|
## The smallest production-HA overlay
|
||||||
|
|
||||||
|
Three Helm values to flip:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# values-ha.yaml — copy into your overlay and edit to taste.
|
||||||
|
|
||||||
|
server:
|
||||||
|
# ≥ 2 replicas is the minimum for the PDB to render. 3 gives you
|
||||||
|
# a true rolling-restart tolerance window (1 down for upgrade,
|
||||||
|
# 2 still serving) without dropping below minAvailable.
|
||||||
|
replicas: 3
|
||||||
|
|
||||||
|
service:
|
||||||
|
# Required when the front-end LB doesn't already enforce
|
||||||
|
# session affinity. OIDC + RBAC session cookies need to land
|
||||||
|
# on the same backend pod for the session lifetime.
|
||||||
|
sessionAffinity: ClientIP
|
||||||
|
|
||||||
|
podDisruptionBudget:
|
||||||
|
# Renders the PDB template; controller-side voluntary disruptions
|
||||||
|
# (node-drain for k8s upgrade, cluster-autoscaler scale-down)
|
||||||
|
# respect this floor.
|
||||||
|
enabled: true
|
||||||
|
# With server.replicas: 3, minAvailable: 2 leaves headroom for one
|
||||||
|
# rolling restart at a time.
|
||||||
|
minAvailable: 2
|
||||||
|
# maxUnavailable is mutually exclusive with minAvailable; pick one.
|
||||||
|
# maxUnavailable: 1
|
||||||
|
```
|
||||||
|
|
||||||
|
Apply with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
helm upgrade certctl deploy/helm/certctl/ -f values-ha.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## What you still own as the operator
|
||||||
|
|
||||||
|
Three things the chart does not solve, even at `replicas: 3`:
|
||||||
|
|
||||||
|
1. **Postgres HA.** Route to an externalized Postgres (managed cloud
|
||||||
|
or operator-managed cluster). The chart's bundled StatefulSet
|
||||||
|
pod is a development/single-AZ pattern, not a production HA path.
|
||||||
|
2. **TLS material lifecycle.** The chart accepts an `existingSecret`
|
||||||
|
for the server cert; rotating it is operator-side automation.
|
||||||
|
The dashboard + agent can issue their own certs via the local CA
|
||||||
|
(eat-your-own-dogfood); the operator can wire `cert-manager` if
|
||||||
|
they prefer that path.
|
||||||
|
3. **Backup CronJob.** Phase 4 of the architecture diligence
|
||||||
|
remediation plan (DEPL-H2) ships a `backup-cronjob.yaml` template;
|
||||||
|
until that lands, backups are operator-run per the existing
|
||||||
|
`docs/operator/runbooks/postgres-backup.md` runbook.
|
||||||
|
|
||||||
|
## Cross-references
|
||||||
|
|
||||||
|
- `deploy/helm/certctl/values.yaml` lines 19, 446, 566 — the three
|
||||||
|
defaults this runbook documents.
|
||||||
|
- `docs/operator/runbooks/postgres-backup.md` — Postgres backup
|
||||||
|
runbook (today, operator-run).
|
||||||
|
- `docs/operator/runbooks/disaster-recovery.md` — DR procedure.
|
||||||
|
- Phase 4 (Helm Chart, DR, And Ops Surface) of the architecture
|
||||||
|
diligence remediation plan tracks the chart-level work
|
||||||
|
(backup CronJob, PrometheusRule starter, migration hook, etc.).
|
||||||
@@ -0,0 +1,169 @@
|
|||||||
|
# Runbook: PostgreSQL backup for certctl
|
||||||
|
|
||||||
|
> Last reviewed: 2026-05-13
|
||||||
|
|
||||||
|
Use this when:
|
||||||
|
- You're setting up a new certctl deployment and need a backup policy
|
||||||
|
before going to production.
|
||||||
|
- A buyer or auditor asks "where's the backup automation?" and you need
|
||||||
|
to point at the recommended cadence + procedure.
|
||||||
|
- You're rotating the encryption key, swapping CAs, or doing any other
|
||||||
|
destructive maintenance and want a snapshot to roll back to.
|
||||||
|
|
||||||
|
certctl does not ship a built-in backup daemon. Postgres is the system
|
||||||
|
of record for every piece of certctl state that isn't on the
|
||||||
|
operator's filesystem (CA keys, OCSP responder keys, SCEP/EST trust
|
||||||
|
bundles — see "Operator-managed (NOT in DB)" in the
|
||||||
|
[disaster-recovery runbook](disaster-recovery.md#postgres-restore));
|
||||||
|
backing it up is treated as a standard PostgreSQL operations task
|
||||||
|
that the operator owns end-to-end with their existing tooling.
|
||||||
|
|
||||||
|
This page is the recommended recipe.
|
||||||
|
|
||||||
|
## What to back up
|
||||||
|
|
||||||
|
| Layer | Tool | Cadence |
|
||||||
|
|---|---|---|
|
||||||
|
| `certctl` database (the row data) | `pg_dump` (logical) **or** `pg_basebackup` + WAL archive (physical PIT) | ≥ daily, retention ≥ 30d |
|
||||||
|
| CA cert + key (`CERTCTL_CA_CERT_PATH`, `CERTCTL_CA_KEY_PATH`) | Out-of-band file backup (operator's existing secret-management tool) | On change |
|
||||||
|
| SCEP RA cert + key (per profile) | Out-of-band file backup | On change |
|
||||||
|
| OCSP responder keys | Out-of-band file backup (`CERTCTL_OCSP_RESPONDER_KEY_DIR`) | On change |
|
||||||
|
| Trust-anchor PEM bundles | Out-of-band file backup | On change |
|
||||||
|
| Env vars (auth secret, etc.) | Operator's secret-management tool (Vault, AWS Secrets Manager, etc.) | On rotation |
|
||||||
|
|
||||||
|
A backup of only the Postgres database without the operator-managed
|
||||||
|
file material is **not a complete restore artifact** — see the
|
||||||
|
[disaster-recovery runbook's Postgres-restore section](disaster-recovery.md#postgres-restore)
|
||||||
|
for the full inventory. The DR runbook owns the restore procedure;
|
||||||
|
this page owns the capture procedure.
|
||||||
|
|
||||||
|
## Logical backup (recommended for most deployments)
|
||||||
|
|
||||||
|
`pg_dump -Fc` produces a portable compressed dump that's easy to
|
||||||
|
restore into a fresh Postgres instance at any version ≥ the dump's
|
||||||
|
source version. Best for deployments where the DB is small enough
|
||||||
|
that a full logical dump fits the backup window (rough rule of thumb:
|
||||||
|
under a million `managed_certificates` rows + corresponding history).
|
||||||
|
|
||||||
|
### docker-compose
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Snapshot. Run from any host that can reach the postgres container.
|
||||||
|
TIMESTAMP=$(date -u +%Y%m%dT%H%M%SZ)
|
||||||
|
docker compose -f deploy/docker-compose.yml exec -T postgres \
|
||||||
|
pg_dump --format=custom --no-owner --no-acl --dbname=certctl \
|
||||||
|
> "certctl-${TIMESTAMP}.dump"
|
||||||
|
|
||||||
|
# 2. Verify integrity (catch transport / truncation bugs early).
|
||||||
|
docker run --rm -v "$PWD:/dumps" -w /dumps postgres:16-alpine \
|
||||||
|
pg_restore --list "certctl-${TIMESTAMP}.dump" > /dev/null \
|
||||||
|
&& echo "OK: pg_restore --list parses the dump cleanly" \
|
||||||
|
|| { echo "CORRUPT DUMP"; exit 1; }
|
||||||
|
|
||||||
|
# 3. Move to durable storage (S3, GCS, NFS, encrypted-at-rest blob
|
||||||
|
# storage of your choice). DO NOT leave the dump on the certctl host
|
||||||
|
# alone — that defeats the purpose of having a backup.
|
||||||
|
aws s3 cp "certctl-${TIMESTAMP}.dump" "s3://your-bucket/certctl/"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kubernetes (with the bundled Helm chart)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Snapshot via kubectl exec into the postgres StatefulSet pod.
|
||||||
|
TIMESTAMP=$(date -u +%Y%m%dT%H%M%SZ)
|
||||||
|
NAMESPACE=certctl
|
||||||
|
kubectl exec -n "$NAMESPACE" statefulset/postgres -- \
|
||||||
|
pg_dump --format=custom --no-owner --no-acl --dbname=certctl \
|
||||||
|
> "certctl-${TIMESTAMP}.dump"
|
||||||
|
|
||||||
|
# 2. Same verification step as above.
|
||||||
|
# 3. Same off-host storage step as above.
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore (cross-reference)
|
||||||
|
|
||||||
|
The restore procedure lives in
|
||||||
|
[disaster-recovery.md § Postgres restore](disaster-recovery.md#postgres-restore).
|
||||||
|
The key reminders: stop certctl first, restore the DB, run any
|
||||||
|
migrations newer than the snapshot, truncate the CRL + OCSP caches,
|
||||||
|
then restart.
|
||||||
|
|
||||||
|
## Physical / PITR backup (large fleets, RPO < 1h)
|
||||||
|
|
||||||
|
Logical dumps have a coarse RPO (the last successful dump). For
|
||||||
|
deployments where ≤ 1h of cert-issuance history loss is unacceptable,
|
||||||
|
pair Postgres physical backups with continuous WAL archiving:
|
||||||
|
|
||||||
|
- `pg_basebackup` for the initial seed
|
||||||
|
- `archive_command = '<your-WAL-archiver>'` in `postgresql.conf` to
|
||||||
|
ship every WAL segment off the host as it closes
|
||||||
|
- `pgbackrest` or `wal-g` for the operational layer (both are
|
||||||
|
battle-tested, support encryption, and integrate cleanly with S3 /
|
||||||
|
GCS / Azure Blob)
|
||||||
|
|
||||||
|
certctl ships nothing in this layer — it's standard PostgreSQL DBA
|
||||||
|
work, and shipping a bespoke recipe would just be a worse version of
|
||||||
|
what `pgbackrest` already does. The
|
||||||
|
[pgbackrest configuration guide](https://pgbackrest.org/configuration.html)
|
||||||
|
is the authoritative reference.
|
||||||
|
|
||||||
|
## Automation paths
|
||||||
|
|
||||||
|
This is the gap an acquisition reviewer typically wants to see filled.
|
||||||
|
certctl ships no backup CronJob template in the Helm chart — the
|
||||||
|
operator owns this layer because:
|
||||||
|
|
||||||
|
1. The right tool depends on the deployment topology (in-cluster
|
||||||
|
Postgres vs. managed Postgres vs. self-hosted on a VM).
|
||||||
|
2. The right secret-management integration depends on the operator's
|
||||||
|
existing stack (Vault, AWS Secrets Manager, GCP Secret Manager,
|
||||||
|
sealed-secrets, External Secrets).
|
||||||
|
3. The right storage backend depends on the operator's existing
|
||||||
|
off-host blob storage.
|
||||||
|
|
||||||
|
A bundled CronJob would be a half-answer for any operator with an
|
||||||
|
established backup posture, and would have to be torn out before
|
||||||
|
production. Three sample recipes that cover the common cases:
|
||||||
|
|
||||||
|
- **In-cluster Postgres → S3:** a CronJob running an alpine image with
|
||||||
|
`aws-cli` + the `pg_dump` command above, output piped to
|
||||||
|
`aws s3 cp`. Cosign-signed if your supply-chain policy requires it.
|
||||||
|
- **Managed Postgres (AWS RDS / GCP Cloud SQL / Azure DB):** rely on
|
||||||
|
the cloud provider's built-in PITR backup; configure retention
|
||||||
|
≥ 30 days; the certctl deployment surface is the connection string
|
||||||
|
alone.
|
||||||
|
- **Self-hosted VM:** systemd timer + `pg_dump` + `restic` (or
|
||||||
|
`borgbackup`) to encrypted off-host storage.
|
||||||
|
|
||||||
|
Tracked in [WORKSPACE-ROADMAP.md](../../../WORKSPACE-ROADMAP.md) as a
|
||||||
|
post-v2.1.0 nice-to-have: an opt-in Helm CronJob template for the
|
||||||
|
in-cluster-Postgres-to-S3 case as a starter. The right time to ship
|
||||||
|
it is when a real operator asks for it; speculatively shipping it
|
||||||
|
without that signal would just produce a template every deployment
|
||||||
|
ends up rewriting.
|
||||||
|
|
||||||
|
## Verification — what to dry-run quarterly
|
||||||
|
|
||||||
|
A backup you've never restored is a backup you don't have. Add this
|
||||||
|
to your quarterly on-call rotation:
|
||||||
|
|
||||||
|
1. Pick the most recent dump from the previous quarter.
|
||||||
|
2. Stand up a throwaway Postgres instance (Docker, kind, anything).
|
||||||
|
3. `pg_restore -d certctl <the dump>`.
|
||||||
|
4. Bring up a certctl-server container pointed at the throwaway DB
|
||||||
|
(`CERTCTL_DATABASE_URL=postgres://certctl:...@throwaway/...`).
|
||||||
|
5. Confirm `/api/v1/version` returns 200, `/api/v1/certificates`
|
||||||
|
lists the expected rows, and the scheduler logs show no
|
||||||
|
migration-version mismatch.
|
||||||
|
6. Tear down. Note the timing in your DR registry.
|
||||||
|
|
||||||
|
The [disaster-recovery runbook](disaster-recovery.md) covers what to
|
||||||
|
do when this dry-run reveals a gap.
|
||||||
|
|
||||||
|
## Related reading
|
||||||
|
|
||||||
|
- [`docs/operator/runbooks/disaster-recovery.md`](disaster-recovery.md) — the restore companion
|
||||||
|
- [`docs/operator/secret-custody.md`](../secret-custody.md) — what
|
||||||
|
the operator-managed file material (CA keys, RA keys, trust
|
||||||
|
anchors) contains, why it lives outside the DB, and what it costs
|
||||||
|
to lose
|
||||||
@@ -0,0 +1,166 @@
|
|||||||
|
# Secret custody — where private keys live in certctl
|
||||||
|
|
||||||
|
> Last reviewed: 2026-05-12
|
||||||
|
|
||||||
|
Use this when:
|
||||||
|
- You're sizing certctl against an internal security review or third-party
|
||||||
|
diligence ("where do private keys live, and how are they protected at
|
||||||
|
rest?").
|
||||||
|
- You're evaluating the file-on-disk vs HSM-vs-cloud-KMS roadmap before
|
||||||
|
committing to a deployment topology.
|
||||||
|
- You need a single page that names every secret material on the control
|
||||||
|
plane and on agents, plus the at-rest protection for each.
|
||||||
|
|
||||||
|
This document covers WHAT secrets exist, HOW they are stored, and the
|
||||||
|
THREAT MODEL we accept for each — it is not a hardening checklist. The
|
||||||
|
hardening levers (env-vars, file modes, encryption-key configuration) are
|
||||||
|
cross-referenced as you read through.
|
||||||
|
|
||||||
|
## The secrets that exist
|
||||||
|
|
||||||
|
| Material | Where it lives | Protection at rest | Closes when… |
|
||||||
|
|---|---|---|---|
|
||||||
|
| Local CA private key | File on the control-plane host (`CERTCTL_CA_KEY_PATH`) | Filesystem ACLs (operator-supplied path; mode 0600 recommended) | A `signer.PKCS11Driver` or `signer.CloudKMSDriver` ships (post-v2.1.0) |
|
||||||
|
| Agent ECDSA P-256 private keys | File on each agent host (default `/var/lib/certctl-agent/keys/`) | Filesystem ACLs on the agent host. Never transmitted to the control plane. | TPM / Secure Enclave drivers ship (no current roadmap entry) |
|
||||||
|
| OIDC client secret | `oidc_providers.client_secret_enc` column (PostgreSQL) | AES-256-GCM v3 wire format, derived from `CERTCTL_CONFIG_ENCRYPTION_KEY` via PBKDF2-SHA256 600k rounds | The encryption key is rotated via `internal/crypto` re-seal (see runbook below) |
|
||||||
|
| Session signing key | `auth_session_signing_keys` table (PostgreSQL) | AES-256-GCM v3, same encryption-key passphrase as above | HSM/FIPS-validated signing-key driver lands (deferred to v3) |
|
||||||
|
| Break-glass credential | `breakglass_credentials.password_hash` column (PostgreSQL) | Argon2id (m=64MiB, t=1, p=4) hash; never encrypted because we need constant-time comparison | Out of scope — Argon2id resists offline attack already |
|
||||||
|
| API-key bearer tokens | `auth_api_keys.token_hash` column (PostgreSQL) | SHA-256(token) only — the plaintext is shown to the operator once at create time and never persisted | Out of scope |
|
||||||
|
| CSR private keys mid-issuance | Agent memory only, ephemeral | Never written to disk; never transmitted to the server (CSRs only) | Already closed |
|
||||||
|
| Issuer-connector backend secrets | `issuers.encrypted_config` column (PostgreSQL) for `source='database'` rows | AES-256-GCM v3; FAIL-CLOSED if `CERTCTL_CONFIG_ENCRYPTION_KEY` is unset (see "Env-seeded vs DB-seeded" below) | Already closed for `source='database'`; `source='env'` carries an explicit carve-out |
|
||||||
|
|
||||||
|
The breakdown by row source matters and is the subject of the next
|
||||||
|
section. Read it before concluding that a plaintext column is a bug.
|
||||||
|
|
||||||
|
## Env-seeded vs DB-seeded configs
|
||||||
|
|
||||||
|
certctl supports two sources for issuer and target configurations:
|
||||||
|
|
||||||
|
- **`source='env'`** — built from process environment variables on every
|
||||||
|
boot (`CERTCTL_CA_CERT_PATH`, `CERTCTL_CA_KEY_PATH`, `CERTCTL_ACME_DIRECTORY_URL`,
|
||||||
|
`CERTCTL_STEPCA_URL`, etc. — see `internal/service/issuer.go::buildEnvVarSeeds`
|
||||||
|
for the exact list). These rows are deterministically reconstructable from environment and
|
||||||
|
exist primarily so the GUI has something to display and so audit logs
|
||||||
|
can reference an issuer ID. The `config` column is intentionally
|
||||||
|
plaintext for `source='env'` rows: the exact same bytes already live
|
||||||
|
in the operator's Compose file / Helm values / systemd unit, so
|
||||||
|
persisting them again to PostgreSQL adds no new disclosure surface.
|
||||||
|
|
||||||
|
- **`source='database'`** — created via the GUI or REST API write paths
|
||||||
|
(`POST /api/v1/issuers`, etc.). These rows fail closed when
|
||||||
|
`CERTCTL_CONFIG_ENCRYPTION_KEY` is not configured:
|
||||||
|
- The HTTP handlers refuse the write with
|
||||||
|
`crypto.ErrEncryptionKeyRequired`.
|
||||||
|
- The server **refuses to start** if any `source='database'` row
|
||||||
|
exists without the encryption key, to prevent retroactive
|
||||||
|
plaintext exposure.
|
||||||
|
|
||||||
|
The startup guard is in `cmd/server/main.go` around the
|
||||||
|
`encryptionKey != ""` branch — it lists `source='database'` rows on every
|
||||||
|
boot and aborts if any are present without the key.
|
||||||
|
|
||||||
|
If you want every issuer/target row to be encrypted at rest unconditionally,
|
||||||
|
set `CERTCTL_CONFIG_ENCRYPTION_KEY` and use database-sourced
|
||||||
|
configurations exclusively (re-create env-seeded rows through the GUI
|
||||||
|
once the key is present).
|
||||||
|
|
||||||
|
## The signer abstraction
|
||||||
|
|
||||||
|
All CA private-key signing flows through
|
||||||
|
`internal/crypto/signer.Signer`, which embeds the stdlib `crypto.Signer`
|
||||||
|
and adds `Algorithm()`. Two drivers ship today:
|
||||||
|
|
||||||
|
- `signer.FileDriver` — the production default. Wraps the historical
|
||||||
|
file-on-disk PEM flow without behavior change. **Heap-resident**:
|
||||||
|
while certctl is running, the key bytes sit in the process's address
|
||||||
|
space.
|
||||||
|
- `signer.MemoryDriver` — used in tests; never reaches production code
|
||||||
|
paths.
|
||||||
|
|
||||||
|
The disk-exposure leg of the threat model is documented inline at the
|
||||||
|
top of `internal/connector/issuer/local/local.go` (the L-014 carve-out).
|
||||||
|
The mitigations on the FileDriver leg include:
|
||||||
|
- mode 0600 enforced on the key file at startup,
|
||||||
|
- the key directory is not served by any handler,
|
||||||
|
- the bytes are never logged or echoed in audit events,
|
||||||
|
- the server fails closed if it cannot read the key.
|
||||||
|
|
||||||
|
`FileDriver` does NOT mitigate "an attacker with read access to the
|
||||||
|
control-plane filesystem can recover the CA key." That mitigation lives
|
||||||
|
in a future `signer.PKCS11Driver` (hardware token) or
|
||||||
|
`signer.CloudKMSDriver` (AWS/GCP/Azure KMS). The interface exists; the
|
||||||
|
drivers do not ship yet. Both are post-v2.1.0 roadmap items — see
|
||||||
|
[`docs/reference/architecture.md`](../reference/architecture.md) for the
|
||||||
|
target topology.
|
||||||
|
|
||||||
|
If you need HSM-grade key custody today, you have two options:
|
||||||
|
1. Run certctl behind an enterprise issuer (Microsoft ADCS, EJBCA,
|
||||||
|
Smallstep, ACME-public) and configure certctl's local CA as
|
||||||
|
intermediate-only or disable it entirely. The issuer connector then
|
||||||
|
sends every signing request to your existing hardware-rooted PKI.
|
||||||
|
2. Wait for the PKCS#11 driver. Track its status in
|
||||||
|
[WORKSPACE-ROADMAP.md](../../WORKSPACE-ROADMAP.md).
|
||||||
|
|
||||||
|
## Config-encryption wire format
|
||||||
|
|
||||||
|
`internal/crypto/encryption.go` produces and reads three on-disk
|
||||||
|
formats. The read path accepts all three; the write path emits only
|
||||||
|
the newest:
|
||||||
|
|
||||||
|
| Version | Magic byte | Salt | PBKDF2-SHA256 work factor | Status |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| v3 | `0x03` | per-ciphertext 16B | 600,000 | **Default for all writes** (OWASP 2024) |
|
||||||
|
| v2 | `0x02` | per-ciphertext 16B | 100,000 | Legacy read-only; superseded by v3 |
|
||||||
|
| v1 | none | fixed 28B | 100,000 | Pre-M-8 legacy read-only; written before per-ciphertext-salt fix |
|
||||||
|
|
||||||
|
The wire-format documentation is also in the `internal/crypto/encryption.go`
|
||||||
|
package comment.
|
||||||
|
|
||||||
|
### Forcing legacy blob upgrades
|
||||||
|
|
||||||
|
Re-sealing happens passively: any `UPDATE` against a row that contains a
|
||||||
|
v1 or v2 blob triggers a v3 rewrite the next time the field is set.
|
||||||
|
There is no in-place migration tool because re-sealing requires reading
|
||||||
|
the row through the same code path that performs the write, and any
|
||||||
|
operational path that touches the row (renaming an issuer in the GUI,
|
||||||
|
updating a target's endpoint, refreshing an OIDC provider's
|
||||||
|
client-secret) achieves this naturally.
|
||||||
|
|
||||||
|
If you want to FORCE re-sealing across the entire database, use the
|
||||||
|
runbook at
|
||||||
|
[`docs/operator/runbooks/config-encryption-upgrade.md`](runbooks/config-encryption-upgrade.md).
|
||||||
|
Recommended only if you suspect the encryption-key passphrase has
|
||||||
|
been exposed and have already rotated it (the runbook covers the
|
||||||
|
rotation order: set the new key, force re-seal, retire the old key
|
||||||
|
from the rotation pool).
|
||||||
|
|
||||||
|
## Roadmap (what is not yet closed)
|
||||||
|
|
||||||
|
Tracked in [`WORKSPACE-ROADMAP.md`](../../WORKSPACE-ROADMAP.md), not
|
||||||
|
maintained here to prevent drift:
|
||||||
|
|
||||||
|
- `signer.PKCS11Driver` for HSM-token-backed CA key custody.
|
||||||
|
- `signer.CloudKMSDriver` for AWS/GCP/Azure KMS-backed CA key custody.
|
||||||
|
- FIPS 140-3 mode for the entire control plane.
|
||||||
|
- HSM-backed session signing key (currently HMAC-SHA256 software keys).
|
||||||
|
|
||||||
|
If a buyer or auditor asks for "HSM support," the honest answer is:
|
||||||
|
the interface is there, the drivers are not, and an enterprise issuer
|
||||||
|
connector is the bridge until the drivers ship.
|
||||||
|
|
||||||
|
## Related reading
|
||||||
|
|
||||||
|
- [`docs/operator/security.md`](security.md) — the broader hardening
|
||||||
|
checklist; covers TLS, RBAC, audit logging, network policy.
|
||||||
|
- [`docs/operator/auth-threat-model.md`](auth-threat-model.md) — the
|
||||||
|
authentication-subsystem threat model. Item 5 ("HSM / FIPS-validated
|
||||||
|
signing key for sessions") is the session-signing-key analog of this
|
||||||
|
document's CA-key story.
|
||||||
|
- [`docs/reference/architecture.md`](../reference/architecture.md) §
|
||||||
|
"Signer abstraction" — the diagram form of the FileDriver / future
|
||||||
|
PKCS11Driver / CloudKMSDriver topology.
|
||||||
|
- [`internal/crypto/encryption.go`](../../internal/crypto/encryption.go)
|
||||||
|
package comment — wire format authoritative reference.
|
||||||
|
- [`internal/connector/issuer/local/local.go`](../../internal/connector/issuer/local/local.go)
|
||||||
|
L-014 carve-out — the load-bearing threat-model section for the
|
||||||
|
FileDriver case.
|
||||||
@@ -403,6 +403,124 @@ the end of step 4, extend the window before step 5.
|
|||||||
from the env var and restart. That's appropriate for a small env-var
|
from the env var and restart. That's appropriate for a small env-var
|
||||||
inventory; it would not scale to a per-user-key-issued model.
|
inventory; it would not scale to a per-user-key-issued model.
|
||||||
|
|
||||||
|
## Security carve-outs & operator-tunable defaults
|
||||||
|
|
||||||
|
Phase 2 of the architecture diligence remediation (2026-05-13)
|
||||||
|
consolidated the following carve-outs into one canonical section so
|
||||||
|
operators reviewing security posture have a single search target. Each
|
||||||
|
entry cites the exact file:line of the carve-out, why it exists, and
|
||||||
|
what the operator should do.
|
||||||
|
|
||||||
|
### TLS verification — dev escape hatches
|
||||||
|
|
||||||
|
certctl has three `InsecureSkipVerify=true` sites that are dev/probe
|
||||||
|
escape hatches, never enabled by default in production:
|
||||||
|
|
||||||
|
- **Agent dev escape** — `cmd/agent/main.go:179` (wired from
|
||||||
|
`cmd/agent/main.go:61` config field + `cmd/agent/main.go:1371` CLI
|
||||||
|
flag). Operators flip this only when debugging an agent against a
|
||||||
|
self-signed control plane that hasn't been added to the agent's
|
||||||
|
trust store. Document as `--insecure-skip-verify` in the agent's
|
||||||
|
install runbook; the agent logs a startup WARN any time the flag
|
||||||
|
is set. SEC-M3 pins that the carve-out is intentional.
|
||||||
|
- **Agent verification probe** — `cmd/agent/verify.go:78`. The probe
|
||||||
|
intentionally opens a TLS connection with verification disabled so
|
||||||
|
it can inspect any certificate the endpoint serves (including
|
||||||
|
self-signed or expired ones — that's the whole point of a probe).
|
||||||
|
The probe never returns trust state to a security-relevant code
|
||||||
|
path; it only reads cert metadata. SEC-M3 pins this.
|
||||||
|
- **tlsprobe (network scanner)** — `internal/tlsprobe/probe.go:54`.
|
||||||
|
Same rationale as the agent verify probe — network discovery must
|
||||||
|
introspect any certificate it finds, including the ones with the
|
||||||
|
problems we're scanning for. SEC-M3 pins this.
|
||||||
|
|
||||||
|
### F5 target connector — `InsecureSkipVerify` per-config
|
||||||
|
|
||||||
|
The F5 target connector exposes an `Insecure: bool` field on its
|
||||||
|
per-target config blob (default `false`). When set,
|
||||||
|
`internal/connector/target/f5/f5.go:134` builds the HTTP client with
|
||||||
|
`InsecureSkipVerify: config.Insecure`. SEC-M5 closure: operator
|
||||||
|
opt-in for self-signed F5 BIG-IP device certs; mitigation is to run
|
||||||
|
the F5 + the proxy-agent on a network-segmented internal subnet.
|
||||||
|
Document in the F5 connector's per-target setup guide.
|
||||||
|
|
||||||
|
### ACME issuer — `CERTCTL_ACME_INSECURE` (now gated on ACK)
|
||||||
|
|
||||||
|
`internal/connector/issuer/acme/acme.go:201` builds the ACME HTTP
|
||||||
|
client with `InsecureSkipVerify: true` for the Pebble integration
|
||||||
|
test path. The per-issuer runtime setting comes from
|
||||||
|
`CERTCTL_ACME_INSECURE` (`internal/config/config.go:2116`); Phase 2
|
||||||
|
SEC-M4 closure (2026-05-13) added the fail-closed gate so the operator
|
||||||
|
must ALSO set `CERTCTL_ACME_INSECURE_ACK=true` for the server to boot.
|
||||||
|
Production deploys must never set either flag. The boot-time WARN log
|
||||||
|
at `cmd/server/main.go:611` continues to fire for the ACK'd case so
|
||||||
|
every restart logs the reminder.
|
||||||
|
|
||||||
|
### CSP `'unsafe-inline'` on `style-src`
|
||||||
|
|
||||||
|
`internal/api/middleware/securityheaders.go:58` ships the dashboard
|
||||||
|
CSP with `style-src 'self' 'unsafe-inline'`. This is required because
|
||||||
|
Tailwind compiles utility classes into a single stylesheet at build
|
||||||
|
time, but inline-style attributes appear in the dashboard via inline
|
||||||
|
`<svg>` elements + Recharts' `<ResponsiveContainer>` injecting inline
|
||||||
|
width/height. SEC-L1 closure: the carve-out is necessary today; the
|
||||||
|
planned tightening flow is the frontend audit's FE-H2 (icon library)
|
||||||
|
+ decorative-SVG sweep that then unlocks the CSP hardening (drops
|
||||||
|
`'unsafe-inline'`).
|
||||||
|
|
||||||
|
### Break-glass admin — Argon2id rest-defense reminder
|
||||||
|
|
||||||
|
The break-glass admin path (`docs/operator/runbooks/disaster-recovery.md`)
|
||||||
|
hashes the operator-supplied password with Argon2id and stores the
|
||||||
|
hash in the `breakglass_credentials` table. SEC-L2 reminder: the
|
||||||
|
strength of the rest-defense is operator-supplied — pick a password
|
||||||
|
with sufficient entropy (≥ 64 random bits via `openssl rand -base64
|
||||||
|
12`) and rotate after every use. Argon2id resists offline cracking
|
||||||
|
but an operator-supplied "Password123" hashes the same way.
|
||||||
|
|
||||||
|
### Body-size limit (1 MB default) — operator-tunable
|
||||||
|
|
||||||
|
The `http.MaxBytesReader` wrap caps inbound request bodies at 1 MB
|
||||||
|
by default. The cap is necessary defense against unbounded-body DOS
|
||||||
|
but catches legitimate operator workflows:
|
||||||
|
|
||||||
|
- Bulk truststore PEM bundle uploads (CA bundles for federated trust
|
||||||
|
stores can be > 1 MB).
|
||||||
|
- Multi-MB CRL pushes via the CRL-cache endpoint.
|
||||||
|
- Bulk-import of certificates with embedded chains.
|
||||||
|
|
||||||
|
SEC-L3 closure: operators raise the cap via `CERTCTL_MAX_BODY_SIZE`
|
||||||
|
(units: bytes; e.g. `CERTCTL_MAX_BODY_SIZE=10485760` for 10 MB).
|
||||||
|
Document in `deploy/ENVIRONMENTS.md`.
|
||||||
|
|
||||||
|
### Demo Compose placeholder credentials
|
||||||
|
|
||||||
|
`deploy/docker-compose.demo.yml` ships `CERTCTL_AUTH_SECRET=change-me-in-production`,
|
||||||
|
`CERTCTL_CONFIG_ENCRYPTION_KEY=change-me-32-char-encryption-key`, and
|
||||||
|
`CERTCTL_API_KEY=change-me-in-production` as documented demo
|
||||||
|
defaults. The runtime `Validate()` fail-closed guards
|
||||||
|
(`internal/config/config.go::Validate`, Bundle 2 2026-05-12) refuse
|
||||||
|
to start if those literal strings reach a non-demo config. Phase 2
|
||||||
|
DEPL-M2 closure adds a CI guard
|
||||||
|
(`scripts/ci-guards/no-change-me-in-prod-compose.sh`) that fails the
|
||||||
|
build at PR time if a `change-me-*` literal leaks into a non-demo
|
||||||
|
compose file — catching the regression one layer before the runtime
|
||||||
|
guard fires.
|
||||||
|
|
||||||
|
### Kubernetes NetworkPolicy — operator-opt-in
|
||||||
|
|
||||||
|
`deploy/helm/certctl/templates/networkpolicy.yaml` ships the template
|
||||||
|
but `deploy/helm/certctl/values.yaml` defaults `networkPolicy.enabled:
|
||||||
|
false`. DEPL-M3 rationale: most Kubernetes clusters don't have a
|
||||||
|
NetworkPolicy controller installed (kind / minikube / fresh k3s); a
|
||||||
|
default-enabled NetworkPolicy renders fine but produces no
|
||||||
|
enforcement, and bare-metal `kube-router`-style controllers may
|
||||||
|
interpret a permissive default differently. Production deploys with a
|
||||||
|
real NetworkPolicy controller (Calico, Cilium, Antrea) flip the
|
||||||
|
values key to `true` and tune the policy in their values overlay.
|
||||||
|
Document the production-enable in
|
||||||
|
`docs/operator/runbooks/ha.md` (added Phase 2 DEPL-H1).
|
||||||
|
|
||||||
## Reporting a vulnerability
|
## Reporting a vulnerability
|
||||||
|
|
||||||
Email `certctl@proton.me`. Coordinated disclosure preferred; we will
|
Email `certctl@proton.me`. Coordinated disclosure preferred; we will
|
||||||
|
|||||||
@@ -151,7 +151,12 @@ The agent runs two background loops: a heartbeat (every 60 seconds) to signal it
|
|||||||
|
|
||||||
Retired agents receive `410 Gone` on subsequent heartbeats (`service.ErrAgentRetired`). `cmd/agent` treats 410 as a terminal signal and exits cleanly so retired agents stop phoning home. Migration `000015` flipped `deployment_targets.agent_id` from `ON DELETE CASCADE` to `ON DELETE RESTRICT`, making the old hard-delete path a schema error and forcing all retirement through this contract.
|
Retired agents receive `410 Gone` on subsequent heartbeats (`service.ErrAgentRetired`). `cmd/agent` treats 410 as a terminal signal and exits cleanly so retired agents stop phoning home. Migration `000015` flipped `deployment_targets.agent_id` from `ON DELETE CASCADE` to `ON DELETE RESTRICT`, making the old hard-delete path a schema error and forcing all retirement through this contract.
|
||||||
|
|
||||||
**Registration is by-design pull-only (C-1 closure, cat-b-6177f36636fb).** Agents register themselves at first heartbeat via `install-agent.sh` + `cmd/agent/main.go` — never via the GUI. The `web/src/api/client.ts::registerAgent` client function is intentionally orphan in the dashboard for this reason. It's preserved in `client.ts` (rather than deleted) so future features that want to drive registration from the GUI — for example, a one-click "register proxy agent" panel for network-appliance topologies where the agent runs in a different network zone from the device it manages — can reach the endpoint without a `client.ts` edit. Operators looking to scale agent enrollment use `install-agent.sh` against a config-management system (Ansible, Salt, Puppet) or a baked-in cloud-init script, not the dashboard.
|
**Registration is a two-step operator-driven flow (C-1 closure, cat-b-6177f36636fb).** Agent enrollment is intentionally NOT auto-driven by the agent binary — the agent fail-fasts at startup if `CERTCTL_AGENT_ID` is unset (`cmd/agent/main.go`: "agent-id flag or CERTCTL_AGENT_ID env var is required"). Operators register an agent in one of two ways before starting it:
|
||||||
|
|
||||||
|
1. **Programmatic** — `POST /api/v1/agents` with the agent's metadata payload and (when configured) an `Authorization: Bearer <CERTCTL_AGENT_BOOTSTRAP_TOKEN>` header. The response carries the `id` field; that string goes into `CERTCTL_AGENT_ID` for the agent process. Suitable for config-management (Ansible, Salt, Puppet) or cloud-init flows.
|
||||||
|
2. **GUI** — the dashboard's Agents page exposes the same endpoint via `web/src/api/client.ts::registerAgent`. The function is kept reachable rather than deleted so the eventual "register proxy agent" panel for network-appliance topologies can land without a `client.ts` edit; today the panel is not yet wired into the page.
|
||||||
|
|
||||||
|
Once registered, the operator passes the returned ID to `install-agent.sh` via `--agent-id` (or sets the env var directly) and starts the agent. The pull-only deployment model (the server never initiates outbound connections to agents) means this asymmetric flow is by-design: only the agent's network reach matters, and registration always crosses that boundary outbound from the agent's side once the agent boots with a valid ID.
|
||||||
|
|
||||||
### Web Dashboard
|
### Web Dashboard
|
||||||
|
|
||||||
@@ -1033,14 +1038,31 @@ The HTTP middleware stack processes requests in the following order (see `cmd/se
|
|||||||
4. **BodyLimit** - request body size cap via `http.MaxBytesReader`
|
4. **BodyLimit** - request body size cap via `http.MaxBytesReader`
|
||||||
5. **RateLimiter** - token bucket rate limiting (optional, when enabled)
|
5. **RateLimiter** - token bucket rate limiting (optional, when enabled)
|
||||||
6. **CORS** - cross-origin request handling (deny-by-default)
|
6. **CORS** - cross-origin request handling (deny-by-default)
|
||||||
7. **Auth** - API key validation (or none in development; JWT/OIDC via authenticating gateway, see below — not in-process)
|
7. **Auth** - one of three production paths (see "In-process authentication surface" below) or `none` for development
|
||||||
8. **AuditLog** - records every API call to the audit trail (requires auth context for actor)
|
8. **AuditLog** - records every API call to the audit trail (requires auth context for actor)
|
||||||
|
|
||||||
### Authenticating-gateway pattern (JWT, OIDC, mTLS)
|
### In-process authentication surface
|
||||||
|
|
||||||
certctl's in-process authentication surface is intentionally narrow: `api-key` for production deployments and `none` for development. There is no in-process JWT, OIDC, mTLS, or SAML middleware. (`CERTCTL_AUTH_TYPE=jwt` was accepted pre-G-1 but silently routed through the api-key bearer middleware — a security finding masquerading as a config option, removed at the v2.x boundary; see [`upgrade-to-v2-jwt-removal.md`](upgrade-to-v2-jwt-removal.md) if you previously set it.)
|
certctl ships three production-grade in-process authentication paths plus a `none` mode for development. Auth Bundle 2 (commit `dea5053`, 2026-05-12) added native OIDC + sessions + break-glass alongside the v2.0.x API-key path; the older "authenticating-gateway only" framing the previous draft of this doc carried is no longer accurate.
|
||||||
|
|
||||||
For deployments that need JWT/OIDC/mTLS, the standard pattern is to put an authenticating gateway in front of certctl and configure `CERTCTL_AUTH_TYPE=none` on the upstream certctl process. The gateway terminates the federated identity protocol, validates tokens / certificates / SAML assertions, and proxies the authenticated request to certctl as a same-origin call on a private network. This separation gives operators the full breadth of the modern identity ecosystem (oauth2-proxy, Envoy `ext_authz`, Traefik `ForwardAuth`, Pomerium, Authelia, Caddy `forward_auth`, Apache `mod_auth_openidc`, nginx `auth_request`) without certctl itself having to track signing-key rotation, claim mapping, audience validation, and the rest of the JWT/OIDC surface area. Operators wanting per-request actor attribution past the gateway boundary forward the gateway-resolved identity (e.g., `X-Auth-Request-User` from oauth2-proxy) and run a small authorization layer at the gateway that enforces the bearer-key contract certctl actually uses.
|
| `CERTCTL_AUTH_TYPE` | What it authenticates | When to use |
|
||||||
|
|---|---|---|
|
||||||
|
| `api-key` (default) | `Authorization: Bearer <key>` matched against SHA-256-hashed `CERTCTL_AUTH_SECRET` / `CERTCTL_API_KEYS_NAMED` rows. | Production deploys without an IdP; agent ↔ server; machine-to-machine; CI. |
|
||||||
|
| `oidc` | Federated SSO via any OIDC IdP (Keycloak / Authentik / Okta / Auth0 / Entra ID / Google Workspace). PKCE-S256 + RFC 9700 pre-login UA/IP binding + RFC 9207 iss check + alg-downgrade defense. Successful login mints an HMAC-signed server-side session (cookie + CSRF rotation + back-channel logout). | Production deploys with an existing IdP; human admin access; SOC 2 / SAS 70 deployments. |
|
||||||
|
| `none` (demo) | Every request served as the synthetic admin actor `actor-demo-anon`. | Demo / evaluation only. The fail-closed `CERTCTL_DEMO_MODE_ACK=true` requirement (Audit 2026-05-10 HIGH-12) prevents accidental production use; the boot-time WARN banner (Bundle 2) makes the posture unmissable. |
|
||||||
|
|
||||||
|
Side surfaces:
|
||||||
|
- **Day-0 bootstrap** via `CERTCTL_BOOTSTRAP_TOKEN` + `POST /api/v1/auth/bootstrap` mints the first admin actor + API key one-shot; the endpoint closes itself the moment any admin exists.
|
||||||
|
- **Break-glass admin** (Auth Bundle 2 Phase 7.5) — Argon2id-hashed local-password recovery for SSO-outage. Default-OFF (`CERTCTL_BREAKGLASS_ENABLED=false`); surface returns 404 to scanners when disabled. Rate-limited at 5/min per source IP at the route (Bundle 5 closure).
|
||||||
|
- **RBAC enforcement** on every gated handler via `auth.RequirePermission(perm, scope, scopeID)` — seven default roles (admin / operator / viewer / agent / mcp / cli / auditor), 33-permission canonical catalogue, scope types (global / profile / issuer). Auditor split is load-bearing: `r-auditor` holds only `audit.read` + `audit.export`.
|
||||||
|
|
||||||
|
For deployments that need a federated-identity protocol certctl doesn't ship natively (SAML, mTLS-as-auth, LDAP), the authenticating-gateway pattern is still the right answer:
|
||||||
|
|
||||||
|
### Authenticating-gateway pattern (SAML, mTLS-as-auth, LDAP)
|
||||||
|
|
||||||
|
When the operator's identity ecosystem requires a protocol certctl doesn't ship natively in-process — SAML 2.0, mTLS-as-authentication (TLS client cert binding to actor), LDAP-direct, Kerberos — the standard pattern is to put an authenticating gateway in front of certctl and configure `CERTCTL_AUTH_TYPE=none` on the upstream. The gateway terminates the federated identity protocol, validates tokens / certificates / SAML assertions, and proxies the authenticated request to certctl as a same-origin call on a private network. This separation gives operators the full breadth of the modern identity ecosystem (oauth2-proxy, Envoy `ext_authz`, Traefik `ForwardAuth`, Pomerium, Authelia, Caddy `forward_auth`, Apache `mod_auth_openidc`, nginx `auth_request`) without certctl itself having to track signing-key rotation, claim mapping, audience validation, and the rest of the protocol surface area for every standard. Operators wanting per-request actor attribution past the gateway boundary forward the gateway-resolved identity (e.g., `X-Auth-Request-User` from oauth2-proxy) and run a small authorization layer at the gateway that enforces the bearer-key contract certctl actually uses.
|
||||||
|
|
||||||
|
The historical context: pre-G-1, `CERTCTL_AUTH_TYPE=jwt` was accepted but silently routed through the api-key bearer middleware (a security finding masquerading as a config option, removed at the v2.x boundary; see [`upgrade-to-v2-jwt-removal.md`](upgrade-to-v2-jwt-removal.md) if you previously set it). Native OIDC arrived later via Auth Bundle 2 — operators on the pre-Bundle-2 "gateway-only for OIDC" pattern can keep it (it still works) or migrate to native OIDC per [`docs/migration/oidc-enable.md`](../migration/oidc-enable.md).
|
||||||
|
|
||||||
### Concurrency Safety
|
### Concurrency Safety
|
||||||
|
|
||||||
|
|||||||
@@ -153,4 +153,4 @@ The `--wait` flag blocks until the job reaches a terminal state (Completed / Fai
|
|||||||
|
|
||||||
- [`docs/reference/api.md`](api.md) — the OpenAPI 3.1 spec the CLI wraps
|
- [`docs/reference/api.md`](api.md) — the OpenAPI 3.1 spec the CLI wraps
|
||||||
- [`docs/reference/mcp.md`](mcp.md) — the MCP server that exposes the same surface to AI assistants
|
- [`docs/reference/mcp.md`](mcp.md) — the MCP server that exposes the same surface to AI assistants
|
||||||
- [`docs/contributor/qa-prerequisites.md`](../contributor/qa-prerequisites.md) — local environment setup before the CLI can talk to a server
|
- [`docs/getting-started/quickstart.md`](../getting-started/quickstart.md) — local environment setup before the CLI can talk to a server
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ For the full deploy contract see
|
|||||||
|
|
||||||
| Variable | Default | Description |
|
| Variable | Default | Description |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `CERTCTL_AGENT_ID` | (none — required) | The agent's unique ID, issued by `POST /api/v1/agents/register` and bundled into the agent's registration response. Pass via this env var when the agent runs as a systemd unit / container without the `-agent-id` CLI flag. |
|
| `CERTCTL_AGENT_ID` | (none — required) | The agent's unique ID, issued by `POST /api/v1/agents` (requires `CERTCTL_AGENT_BOOTSTRAP_TOKEN` when configured) and returned in the registration response body. Pass via this env var when the agent runs as a systemd unit / container without the `-agent-id` CLI flag. The bundled `install-agent.sh` does NOT auto-register — operators pre-register an agent via the REST endpoint (or the dashboard), then pass the returned ID to the script via `--agent-id`. |
|
||||||
|
|
||||||
## Auth (RBAC + OIDC + sessions + break-glass)
|
## Auth (RBAC + OIDC + sessions + break-glass)
|
||||||
|
|
||||||
|
|||||||
@@ -28,6 +28,46 @@ a single shared primitive:
|
|||||||
This document describes the operator-visible surface. The Go-level
|
This document describes the operator-visible surface. The Go-level
|
||||||
contract lives at `internal/deploy/doc.go`.
|
contract lives at `internal/deploy/doc.go`.
|
||||||
|
|
||||||
|
## 1.6. Per-target guarantee matrix
|
||||||
|
|
||||||
|
Added 2026-05-12 (Bundle 1 / CLAIM-M2 closure). The README previously
|
||||||
|
claimed "every deploy goes through atomic-write + ownership-preservation
|
||||||
|
+ SHA-256 idempotency + per-target Prometheus counters + pre-deploy
|
||||||
|
snapshot + on-failure rollback." That claim is true for the file-based
|
||||||
|
deploy primitive only. Cloud / API targets use vendor-SDK semantics and
|
||||||
|
do not share the same primitive. This matrix is the authoritative
|
||||||
|
per-target answer.
|
||||||
|
|
||||||
|
Legend: ✓ = supported / always on. ✗ = not applicable to this target
|
||||||
|
family. ◐ = partial / vendor-specific equivalent. preview = ships but
|
||||||
|
the production code path is a stub (see CLAIM-H4).
|
||||||
|
|
||||||
|
| Target | Atomic write | Owner/perms preserved | SHA-256 idempotency | Pre-deploy snapshot | On-failure rollback | Post-deploy TLS verify | Prometheus counters | Server+agent shell-injection validation |
|
||||||
|
|---|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
|
||||||
|
| NGINX | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||||
|
| Apache | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||||
|
| HAProxy | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||||
|
| Caddy | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ (no operator commands) |
|
||||||
|
| Traefik | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ |
|
||||||
|
| Envoy | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ |
|
||||||
|
| Postfix / Dovecot| ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||||
|
| SSH known-hosts | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ (no TLS endpoint) | ✓ | ✓ |
|
||||||
|
| JavaKeystore | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ (file format, no socket) | ✓ | ✓ |
|
||||||
|
| IIS | ◐ (Windows cert store API) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ |
|
||||||
|
| WinCertStore | ◐ (Windows cert store API) | ✓ | ✓ | ✓ | ✓ | ✗ | ✓ | ✗ |
|
||||||
|
| F5 BIG-IP | ✓ (iControl REST transaction) | ✗ (no FS) | ◐ (cert object name) | ◐ (transaction rollback) | ✓ (transaction rollback) | ✓ (mgmt API GET) | ✓ | ✗ |
|
||||||
|
| AWS ACM | ✗ (SDK call) | ✗ (no FS) | ◐ (ACM-side replace) | ✗ | ◐ (re-import old ARN) | ✗ | ✓ | ✗ |
|
||||||
|
| Azure Key Vault | ✗ (SDK call) | ✗ (no FS) | ◐ (KV-side versioning) | ✗ | ◐ (KV versioning) | ✗ | ✓ | ✗ |
|
||||||
|
| Kubernetes Secrets | preview | preview | preview | preview | preview | preview | preview | ✗ |
|
||||||
|
|
||||||
|
**Notes on the matrix:**
|
||||||
|
|
||||||
|
- **Atomic write / owner-perms / SHA-256 idempotency / snapshot / rollback** are properties of the shared `deploy.Apply` primitive in `internal/deploy/`. They apply to file-based targets where certctl writes to disk.
|
||||||
|
- **Cloud / API targets** (AWS ACM, Azure Key Vault) use the vendor SDK's import / replace operation. The vendor handles versioning and atomicity at their layer. certctl tracks the operation outcome via Prometheus counters; "rollback" in this row means "re-import the previous cert ARN" rather than the file-primitive's `os.Rename` rollback.
|
||||||
|
- **F5** uses iControl REST transactions for atomicity (deploy-hardening I docs above). It does not touch a filesystem; the snapshot/rollback semantics live in the F5 transaction protocol.
|
||||||
|
- **Kubernetes Secrets** ships but the production client (`realK8sClient`) returns `"real Kubernetes client not implemented"` for all methods (see `internal/connector/target/k8ssecret/k8ssecret.go:395+`). Operators evaluating against a real cluster should treat this connector as preview until the production client lands.
|
||||||
|
- **Server+agent shell-injection validation** (Bundle 1 / RT-C1 closure 2026-05-12) is on for every connector that accepts operator-supplied command strings: `reload_command`, `validate_command`, `restart_command`. Validation runs at API ingestion (`internal/service/target.go::Create` + `::Update` + `::CreateTarget` + `::UpdateTarget` via `internal/connector/target/configcheck`) AND on the agent before deploy (`cmd/agent/main.go` post-`createTargetConnector`, calling each connector's full `ValidateConfig` method). Connectors that do not accept operator shell strings (Caddy / Traefik / Envoy / cloud targets) skip this check by design.
|
||||||
|
|
||||||
## 1.5. Audit closure status (2026-05-02 deployment-target audit)
|
## 1.5. Audit closure status (2026-05-02 deployment-target audit)
|
||||||
|
|
||||||
The 2026-05-02 deployment-target coverage audit
|
The 2026-05-02 deployment-target coverage audit
|
||||||
|
|||||||
@@ -0,0 +1,234 @@
|
|||||||
|
# Test Skip Inventory
|
||||||
|
|
||||||
|
<!-- Auto-generated by scripts/skip-inventory.sh — do not edit by hand. -->
|
||||||
|
<!-- Re-run after adding or removing any t.Skip(). CI guard: -->
|
||||||
|
<!-- scripts/ci-guards/skip-inventory-drift.sh -->
|
||||||
|
|
||||||
|
> Last reviewed: 2026-05-13
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
- Total t.Skip sites: **142**
|
||||||
|
- testing.Short() guards: **76** (these gate behind `go test -short`)
|
||||||
|
|
||||||
|
Re-run inventory with: `./scripts/skip-inventory.sh`.
|
||||||
|
|
||||||
|
## Sites (grouped by package)
|
||||||
|
|
||||||
|
### `cmd/agent`
|
||||||
|
|
||||||
|
- `cmd/agent/keymem_test.go:209` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `cmd/agent/keymem_test.go:425` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `cmd/agent/keymem_test.go:451` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `cmd/agent/keymem_test.go:491` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `cmd/agent/keymem_test.go:523` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `cmd/agent/keymem_test.go:526` — t.Skip("running as root; cannot revoke parent dir write permission")
|
||||||
|
- `cmd/agent/keymem_test.go:553` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `cmd/agent/keymem_test.go:556` — t.Skip("running as root; cannot revoke parent dir read+exec permission")
|
||||||
|
- `cmd/agent/keymem_test.go:623` — t.Skip("chmod-error branch is only reliably triggerable on linux via /sys (read-only fs)")
|
||||||
|
- `cmd/agent/keymem_test.go:631` — t.Skipf("/sys/kernel not stat-able as a dir on this host; skipping (%v)", err)
|
||||||
|
- `cmd/agent/keymem_test.go:637` — t.Skipf("/sys/kernel mode %#o already satisfies no-chmod branch", mode)
|
||||||
|
- `cmd/agent/keymem_test.go:652` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `cmd/agent/keymem_test.go:655` — t.Skip("running as root; cannot revoke parent dir write permission")
|
||||||
|
- `cmd/agent/keymem_test.go:686` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `cmd/agent/verify_test.go:402` — t.Skip("no TLS certificates configured on test server")
|
||||||
|
|
||||||
|
### `cmd/server`
|
||||||
|
|
||||||
|
- `cmd/server/preflight_demo_residual_test.go:41` — t.Skip("preflight A-8 test requires Postgres (testcontainers); skipping under -short")
|
||||||
|
- `cmd/server/preflight_demo_residual_test.go:97` — t.Skip("A-8 testcontainers unavailable; skipping")
|
||||||
|
|
||||||
|
### `deploy/test/acme-integration`
|
||||||
|
|
||||||
|
- `deploy/test/acme-integration/certmanager_test.go:54` — t.Skip("KIND_AVAILABLE unset — kind-driven cert-manager integration test skipped")
|
||||||
|
|
||||||
|
### `deploy/test`
|
||||||
|
|
||||||
|
- `deploy/test/crl_ocsp_e2e_test.go:134` — t.Skip("integration only")
|
||||||
|
- `deploy/test/crl_ocsp_e2e_test.go:65` — t.Skip("integration only")
|
||||||
|
- `deploy/test/est_e2e_test.go:124` — t.Skip("integration tests require INTEGRATION=1; skipping libest e2e suite")
|
||||||
|
- `deploy/test/est_e2e_test.go:129` — t.Skipf("libest sidecar (container %q) not running (status=%q). Run `cd deploy && docker compose -f docker-compose.test.yml --profile est-e2e up -d libest-client` to bring it up.", libestContainer, status)
|
||||||
|
- `deploy/test/est_e2e_test.go:213` — t.Skip("/config/certs/bootstrap.pem not present in libest sidecar — skipping mTLS path. To enable: mint a bootstrap cert against the per-profile mTLS trust anchor and copy into deploy/test/certs/.")
|
||||||
|
- `deploy/test/est_e2e_test.go:252` — t.Skip("server-keygen disabled on the e2e EST profile (HTTP 404). Enable via CERTCTL_EST_PROFILE_E2E_SERVER_KEYGEN_ENABLED=true in docker-compose.test.yml.")
|
||||||
|
- `deploy/test/est_e2e_test.go:333` — t.Skipf("libest build lacks --tls-exporter support: %v", err)
|
||||||
|
- `deploy/test/healthcheck_test.go:102` — t.Skip("docker not available — skipping image-level HEALTHCHECK test")
|
||||||
|
- `deploy/test/healthcheck_test.go:163` — t.Skip("docker not available — skipping image-level HEALTHCHECK test")
|
||||||
|
- `deploy/test/healthcheck_test.go:224` — t.Skip("docker not available — skipping runtime HEALTHCHECK test")
|
||||||
|
- `deploy/test/healthcheck_test.go:227` — t.Skip("runtime HEALTHCHECK test takes ~45s; skipping under -short")
|
||||||
|
- `deploy/test/healthcheck_test.go:229` — t.Skip("runtime probe contract not yet wired to a sidecar postgres; " +
|
||||||
|
- `deploy/test/healthcheck_test.go:28` — // The tests skip cleanly with t.Skip when docker is not available
|
||||||
|
- `deploy/test/healthcheck_test.go:32` — // Q-1 closure (cat-s3-58ce7e9840be): this file's 5 t.Skip sites are
|
||||||
|
- `deploy/test/healthcheck_test.go:41` — // - Line 212: hard t.Skip for the runtime probe contract — image-spec
|
||||||
|
- `deploy/test/integration_test.go:1129` — t.Skip("no PEM data in certificate version")
|
||||||
|
- `deploy/test/integration_test.go:513` — t.Skip("agent not yet online (may be slow to heartbeat)")
|
||||||
|
- `deploy/test/integration_test.go:805` — t.Skip("depends on Phase04 (Local CA cert not created)")
|
||||||
|
- `deploy/test/integration_test.go:901` — t.Skip("no discovered certificates yet (agent scan may not have run)")
|
||||||
|
- `deploy/test/integration_test.go:942` — t.Skip("no certificate in Active state for renewal test")
|
||||||
|
- `deploy/test/integration_test.go:954` — t.Skipf("renewal trigger returned: %s", body)
|
||||||
|
- `deploy/test/nginx_vendor_e2e_test.go:108` — t.Skip()
|
||||||
|
- `deploy/test/qa_test.go:1055` — t.Skip("Part 23 (S/MIME & EKU) is documented in docs/testing-guide.md::Part 23 " +
|
||||||
|
- `deploy/test/qa_test.go:1065` — t.Skip("Part 24 (OCSP/CRL) is documented in docs/testing-guide.md::Part 24 " +
|
||||||
|
- `deploy/test/qa_test.go:1175` — t.Skip("Requires compiled certctl-cli binary — manual test")
|
||||||
|
- `deploy/test/qa_test.go:1179` — t.Skip("Requires compiled mcp-server binary + stdio — manual test")
|
||||||
|
- `deploy/test/qa_test.go:1313` — t.Skip("Scheduler tests are timing-dependent — verify via Docker logs manually")
|
||||||
|
- `deploy/test/qa_test.go:1320` — t.Skip("Requires Docker log inspection — manual test")
|
||||||
|
- `deploy/test/qa_test.go:1327` — t.Skip("Requires browser — manual test")
|
||||||
|
- `deploy/test/qa_test.go:1334` — t.Skip("Requires browser — manual test")
|
||||||
|
- `deploy/test/qa_test.go:1338` — t.Skip("Requires browser — manual test")
|
||||||
|
- `deploy/test/qa_test.go:1914` — t.Skip("Part 55 (Agent Soft-Retirement) is documented in docs/testing-guide.md::Part 55 " +
|
||||||
|
- `deploy/test/qa_test.go:1924` — t.Skip("Part 56 (Notification Retry/Dead-Letter) is documented in docs/testing-guide.md::Part 56 " +
|
||||||
|
- `deploy/test/qa_test.go:38` — // Q-1 closure (cat-s3-58ce7e9840be): this file contains 11 `t.Skip("Requires
|
||||||
|
- `deploy/test/qa_test.go:46` — // the runtime t.Skip is the second-line guard for operators who run
|
||||||
|
- `deploy/test/qa_test.go:50` — // is correct, and the t.Skip messages already name the missing
|
||||||
|
- `deploy/test/qa_test.go:870` — t.Skip("Requires CA cert+key setup — manual test")
|
||||||
|
- `deploy/test/qa_test.go:874` — t.Skip("Requires ACME CA with ARI support — manual test")
|
||||||
|
- `deploy/test/qa_test.go:881` — t.Skip("Requires live Vault server — manual test")
|
||||||
|
- `deploy/test/qa_test.go:885` — t.Skip("Requires DigiCert sandbox — manual test")
|
||||||
|
- `deploy/test/scep_intune_e2e_test.go:159` — t.Skipf("integration stack not reachable at %s: %v — start docker-compose.test.yml first", serverURL, err)
|
||||||
|
- `deploy/test/scep_intune_e2e_test.go:163` — t.Skipf("/scep/%s not configured — see deploy/docker-compose.test.yml for the e2eintune profile env vars", e2eintunePathID)
|
||||||
|
- `deploy/test/scep_intune_e2e_test.go:166` — t.Skipf("/scep/%s GetCACaps returned %d — Intune profile may not be enabled in compose env", e2eintunePathID, resp.StatusCode)
|
||||||
|
- `deploy/test/scep_intune_e2e_test.go:170` — t.Skipf("/scep/%s GetCACaps body=%q does NOT advertise SCEPStandard — Intune profile may be misconfigured", e2eintunePathID, string(body))
|
||||||
|
- `deploy/test/vendor_e2e_helpers_smoke_test.go:31` — t.Skip("requires network egress to api.github.com (or similar known TLS endpoint); run manually")
|
||||||
|
- `deploy/test/vendor_e2e_helpers_smoke_test.go:36` — t.Skip("requires network egress; run manually")
|
||||||
|
- `deploy/test/vendor_e2e_helpers_smoke_test.go:41` — // When hostPath is empty the helper t.Skip's. Re-run-from-
|
||||||
|
|
||||||
|
### `internal/api/handler`
|
||||||
|
|
||||||
|
- `internal/api/handler/health_test.go:481` — t.Skip("integration-style test; covered by deploy/test/integration_test.go (//go:build integration). " +
|
||||||
|
- `internal/api/handler/health_test.go:499` — t.Skipf("postgres driver unavailable in this build: %v", err)
|
||||||
|
|
||||||
|
### `internal/auth/breakglass`
|
||||||
|
|
||||||
|
- `internal/auth/breakglass/service_test.go:417` — t.Skip("timing test skipped in -short mode (Argon2id is expensive)")
|
||||||
|
|
||||||
|
### `internal/auth/oidc/domain`
|
||||||
|
|
||||||
|
- `internal/auth/oidc/domain/types_test.go:186` — t.Skip()
|
||||||
|
|
||||||
|
### `internal/auth/oidc`
|
||||||
|
|
||||||
|
- `internal/auth/oidc/bench_keycloak_test.go:103` — // signature matters because it calls t.Skip / t.Fatal / t.Cleanup.
|
||||||
|
- `internal/auth/oidc/integration_keycloak_test.go:53` — // initialized in keycloakFor() so individual tests can `t.Skip` under
|
||||||
|
- `internal/auth/oidc/integration_okta_smoke_test.go:64` — // If any required env var is missing, the test t.Skip's with a clear
|
||||||
|
- `internal/auth/oidc/integration_okta_smoke_test.go:84` — t.Skipf("Okta smoke test requires env vars: %s — skipping", strings.Join(missing, ", "))
|
||||||
|
|
||||||
|
### `internal/ciparity`
|
||||||
|
|
||||||
|
- `internal/ciparity/surface_parity_test.go:97` — // readFileOrSkip reads a file; on ENOENT, calls t.Skipf rather than
|
||||||
|
|
||||||
|
### `internal/connector/issuer/acme`
|
||||||
|
|
||||||
|
- `internal/connector/issuer/acme/acme_failure_test.go:687` — t.Skipf("could not bind challenge server (env may not allow): %v", err)
|
||||||
|
|
||||||
|
### `internal/connector/issuer/local`
|
||||||
|
|
||||||
|
- `internal/connector/issuer/local/bundle9_coverage_test.go:467` — t.Skip("unexpectedly short DER")
|
||||||
|
- `internal/connector/issuer/local/bundle9_coverage_test.go:592` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `internal/connector/issuer/local/bundle9_coverage_test.go:609` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `internal/connector/issuer/local/bundle9_coverage_test.go:621` — t.Skip("permission semantics differ on windows")
|
||||||
|
- `internal/connector/issuer/local/bundle9_coverage_test.go:653` — t.Skip("permission semantics differ on windows")
|
||||||
|
|
||||||
|
### `internal/connector/issuer/openssl`
|
||||||
|
|
||||||
|
- `internal/connector/issuer/openssl/openssl_failure_test.go:124` — t.Skip("running as root; chmod 0o600 doesn't gate execution for uid 0")
|
||||||
|
- `internal/connector/issuer/openssl/openssl_failure_test.go:71` — t.Skip("openssl adapter shell-out tests assume POSIX bash; skipping on Windows")
|
||||||
|
|
||||||
|
### `internal/connector/notifier/email`
|
||||||
|
|
||||||
|
- `internal/connector/notifier/email/email_test.go:425` — t.Skip("test requires no service on smtp.example.com:587")
|
||||||
|
- `internal/connector/notifier/email/email_test.go:503` — t.Skip("test assumes no service on 127.0.0.1:54321")
|
||||||
|
|
||||||
|
### `internal/connector/target/iis`
|
||||||
|
|
||||||
|
- `internal/connector/target/iis/iis_test.go:225` — t.Skip("Skipping: powershell.exe not available (non-Windows)")
|
||||||
|
- `internal/connector/target/iis/iis_test.go:92` — t.Skip("Skipping: powershell.exe not available (non-Windows)")
|
||||||
|
|
||||||
|
### `internal/crypto`
|
||||||
|
|
||||||
|
- `internal/crypto/encryption_property_test.go:35` — t.Skip("skipping property-based test in -short mode (PBKDF2 600k rounds × 50 iters > short budget)")
|
||||||
|
- `internal/crypto/encryption_property_test.go:75` — t.Skip("skipping property-based test in -short mode (PBKDF2 cost)")
|
||||||
|
|
||||||
|
### `internal/deploy`
|
||||||
|
|
||||||
|
- `internal/deploy/coverage_test.go:403` — t.Skip("read-only chmod doesn't restrict root")
|
||||||
|
- `internal/deploy/coverage_test.go:467` — t.Skip("non-unix")
|
||||||
|
- `internal/deploy/deploy_test.go:611` — t.Skip("non-unix platform")
|
||||||
|
|
||||||
|
### `internal/ratelimit`
|
||||||
|
|
||||||
|
- `internal/ratelimit/sliding_window_test.go:146` — t.Skip("race-style test under -short")
|
||||||
|
|
||||||
|
### `internal/repository/postgres`
|
||||||
|
|
||||||
|
- `internal/repository/postgres/audit_worm_test.go:29` — t.Skip("skipping integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_revoke_scope_test.go:118` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_revoke_scope_test.go:149` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_revoke_scope_test.go:179` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_revoke_scope_test.go:208` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_revoke_scope_test.go:56` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_revoke_scope_test.go:87` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_scope_test.go:123` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_scope_test.go:153` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_scope_test.go:181` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_scope_test.go:207` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_scope_test.go:229` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_scope_test.go:252` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_scope_test.go:281` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/auth_scope_test.go:95` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_encryption_invariant_test.go:160` — t.Skip("Phase 13 encryption invariant: integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_encryption_invariant_test.go:225` — t.Skip("Phase 13 encryption invariant: integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_encryption_invariant_test.go:62` — t.Skip("Phase 13 encryption invariant: integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_prelogin_encryption_test.go:163` — t.Skip("HIGH-5 legacy fallback: integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_prelogin_encryption_test.go:42` — t.Skip("HIGH-5 encryption invariant: integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:117` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:140` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:171` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:185` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:209` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:239` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:301` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:331` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:45` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:82` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/oidc_test.go:96` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/repo_test.go:1944` — t.Skip("integration test requires PostgreSQL")
|
||||||
|
- `internal/repository/postgres/repo_test.go:2003` — t.Skip("integration test requires PostgreSQL")
|
||||||
|
- `internal/repository/postgres/repo_test.go:2114` — t.Skip("integration test requires PostgreSQL")
|
||||||
|
- `internal/repository/postgres/seed_test.go:91` — t.Skip("skipping integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:100` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:120` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:167` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:197` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:211` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:246` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:259` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:29` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:307` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:340` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:407` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:54` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/session_test.go:86` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/testutil_test.go:39` — t.Skip("skipping integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:106` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:131` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:170` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:210` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:29` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:302` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:339` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:374` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:59` — t.Skip("integration test in short mode")
|
||||||
|
- `internal/repository/postgres/user_test.go:73` — t.Skip("integration test in short mode")
|
||||||
|
|
||||||
|
### `internal/scep/intune`
|
||||||
|
|
||||||
|
- `internal/scep/intune/challenge_golden_test.go:47` — t.Skip("regenerate fixtures only when -update-golden is passed")
|
||||||
|
- `internal/scep/intune/challenge_test.go:213` — t.Skip("encoder didn't produce padding for this fixture; skipping")
|
||||||
|
- `internal/scep/intune/rate_limit_test.go:139` — t.Skip("race-style test under -short")
|
||||||
|
- `internal/scep/intune/replay_test.go:131` — t.Skip("race-style test under -short; run full suite for coverage")
|
||||||
|
|
||||||
|
### `internal/service`
|
||||||
|
|
||||||
|
- `internal/service/coverage_extras_test.go:374` — t.Skipf("RSA keygen unavailable: %v", err)
|
||||||
|
- `internal/service/coverage_extras_test.go:394` — t.Skipf("ECDSA keygen unavailable: %v", err)
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
// Package acme implements the ACME server-side protocol surface (RFC 8555
|
// Package acme implements the ACME server-side protocol surface (RFC 8555
|
||||||
// + RFC 9773 ARI). It is deliberately separate from
|
// + RFC 9773 ARI). It is deliberately separate from
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package acme
|
package acme
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (c) certctl
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
// SPDX-License-Identifier: BSL-1.1
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
// Package handler — Auth Bundle 2 Phase 7.5 / break-glass admin HTTP surface.
|
// Package handler — Auth Bundle 2 Phase 7.5 / break-glass admin HTTP surface.
|
||||||
//
|
//
|
||||||
// 4 endpoints across two access levels:
|
// 4 endpoints across two access levels:
|
||||||
@@ -32,6 +35,7 @@ import (
|
|||||||
"github.com/certctl-io/certctl/internal/auth/breakglass"
|
"github.com/certctl-io/certctl/internal/auth/breakglass"
|
||||||
bgdomain "github.com/certctl-io/certctl/internal/auth/breakglass/domain"
|
bgdomain "github.com/certctl-io/certctl/internal/auth/breakglass/domain"
|
||||||
sessiondomain "github.com/certctl-io/certctl/internal/auth/session/domain"
|
sessiondomain "github.com/certctl-io/certctl/internal/auth/session/domain"
|
||||||
|
"github.com/certctl-io/certctl/internal/ratelimit"
|
||||||
)
|
)
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -51,9 +55,30 @@ type BreakglassService interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// AuthBreakglassHandler ships the Phase 7.5 surface.
|
// AuthBreakglassHandler ships the Phase 7.5 surface.
|
||||||
|
//
|
||||||
|
// Bundle 5 closure (S1): the docstring at the top of this file claimed
|
||||||
|
// the login endpoint was "Rate-limited at 5/minute per source IP via
|
||||||
|
// the existing rate limiter middleware" but no per-route limiter was
|
||||||
|
// wired — `/auth/breakglass/login` is registered via `r.mux.Handle`
|
||||||
|
// in router.go::AuthExemptRouterRoutes and bypasses the global RPS
|
||||||
|
// middleware that wraps `r.Register`-mounted routes. The login handler
|
||||||
|
// now owns its own SlidingWindowLimiter (5 attempts / minute / source
|
||||||
|
// IP, 50 000 key cap) so the documented behavior actually ships.
|
||||||
|
//
|
||||||
|
// Wired at startup via SetLoginRateLimiter (called from cmd/server/main.go
|
||||||
|
// alongside the other per-handler rate limiters that close audit
|
||||||
|
// findings H-9 / H-12 / Bundle 3 D7 / etc.). Defense-in-depth: even
|
||||||
|
// when the limiter is nil (legacy / test), the service-layer Argon2id
|
||||||
|
// lockout state machine still protects against brute force — but a
|
||||||
|
// nil limiter is a misconfiguration the integration test catches.
|
||||||
type AuthBreakglassHandler struct {
|
type AuthBreakglassHandler struct {
|
||||||
svc BreakglassService
|
svc BreakglassService
|
||||||
cookieAttrs SessionCookieAttrs
|
cookieAttrs SessionCookieAttrs
|
||||||
|
// loginLimiter rate-limits POST /auth/breakglass/login by source IP.
|
||||||
|
// nil-safe: when unset, the handler skips the limiter check and
|
||||||
|
// relies on the service-layer Argon2id lockout. Production deploys
|
||||||
|
// MUST set this via SetLoginRateLimiter.
|
||||||
|
loginLimiter *ratelimit.SlidingWindowLimiter
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewAuthBreakglassHandler constructs the handler.
|
// NewAuthBreakglassHandler constructs the handler.
|
||||||
@@ -61,6 +86,13 @@ func NewAuthBreakglassHandler(svc BreakglassService, cookieAttrs SessionCookieAt
|
|||||||
return &AuthBreakglassHandler{svc: svc, cookieAttrs: cookieAttrs}
|
return &AuthBreakglassHandler{svc: svc, cookieAttrs: cookieAttrs}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetLoginRateLimiter wires the per-source-IP rate limiter the Login
|
||||||
|
// handler enforces. Bundle 5 closure (S1) — see the AuthBreakglassHandler
|
||||||
|
// type docstring for the full rationale.
|
||||||
|
func (h *AuthBreakglassHandler) SetLoginRateLimiter(l *ratelimit.SlidingWindowLimiter) {
|
||||||
|
h.loginLimiter = l
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// 1. Public login endpoint.
|
// 1. Public login endpoint.
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -98,6 +130,22 @@ func (h *AuthBreakglassHandler) Login(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ip := clientIPFromRequest(r)
|
ip := clientIPFromRequest(r)
|
||||||
|
|
||||||
|
// Bundle 5 closure (S1): per-source-IP rate limit. 5 attempts /
|
||||||
|
// minute / IP (default; configurable via the constructor at
|
||||||
|
// cmd/server/main.go). Returns 429 with no body so the response
|
||||||
|
// shape matches the rest of the auth surface (scanner-unfriendly).
|
||||||
|
// Audited by the service layer on the next attempt — we don't
|
||||||
|
// audit the rate-limit hit itself here because that would let an
|
||||||
|
// attacker flood the audit table with rate-limit rows from a
|
||||||
|
// single IP.
|
||||||
|
if h.loginLimiter != nil {
|
||||||
|
if err := h.loginLimiter.Allow(ip, time.Now()); err != nil {
|
||||||
|
Error(w, http.StatusTooManyRequests, "too many requests")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
res, err := h.svc.Authenticate(r.Context(), req.ActorID, req.Password, ip, r.UserAgent())
|
res, err := h.svc.Authenticate(r.Context(), req.ActorID, req.Password, ip, r.UserAgent())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// All authenticate errors map to the SAME 401 + same body.
|
// All authenticate errors map to the SAME 401 + same body.
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
// Package handler — Auth Bundle 2 Phase 5 / OIDC + session HTTP surface.
|
// Package handler — Auth Bundle 2 Phase 5 / OIDC + session HTTP surface.
|
||||||
//
|
//
|
||||||
// 13 endpoints split into three logical groups:
|
// 13 endpoints split into three logical groups:
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
// Audit 2026-05-10 MED-11 closure — federated-user admin surface.
|
// Audit 2026-05-10 MED-11 closure — federated-user admin surface.
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import "time"
|
import "time"
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
// Copyright 2026 certctl LLC. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: BUSL-1.1
|
||||||
|
|
||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user