mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-08 07:28:52 +00:00
Compare commits
73 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2d22e08a1e | |||
| cabe1aee45 | |||
| b577f6f251 | |||
| 0729ee46e0 | |||
| c8eb3e0399 | |||
| 9a7e818f3e | |||
| 8a56a78282 | |||
| edf6bee7f8 | |||
| 109f32ff41 | |||
| 022caf39b4 | |||
| 869fc8f245 | |||
| 0792271dc6 | |||
| a2a59a823e | |||
| b0c4ed1ae2 | |||
| d3bf2cc0cf | |||
| 81f6321326 | |||
| 39f065dda4 | |||
| bee47f0318 | |||
| 9bfbac0f97 | |||
| 650f5a198f | |||
| 1e1bc9b3b4 | |||
| f6ba5634fd | |||
| 4dc8d3fa5b | |||
| 62513ad12f | |||
| 9bc845304e | |||
| 45fae9952a | |||
| f68fd00b7b | |||
| c351bba41a | |||
| a05a7d3dad | |||
| 44a85d6f85 | |||
| ec88a61274 | |||
| b8b7e1e3dd | |||
| 85d247455b | |||
| b16e5b5e97 | |||
| 62f0a284be | |||
| 4142837cac | |||
| c26cef37a1 | |||
| fb88e0f8a8 | |||
| b8293653a5 | |||
| e292faafc6 | |||
| 08a86d355d | |||
| eb390b2db4 | |||
| 60ae92b0e8 | |||
| c222c8b57a | |||
| 636de7f6b5 | |||
| da00ee0ca5 | |||
| 30daadbe81 | |||
| b767f579ef | |||
| febf50090b | |||
| 475421457f | |||
| a22a1be962 | |||
| 35e18bfc56 | |||
| 3a665ae6ba | |||
| fefa5a5fd7 | |||
| 2a384c690e | |||
| 0509790325 | |||
| 633a10aa4e | |||
| 711265b652 | |||
| 74d6b462a4 | |||
| 3b92048242 | |||
| b0efdbe2f8 | |||
| 3669556e57 | |||
| 804a1b05ce | |||
| 590f654b0d | |||
| b3aad02232 | |||
| 6a5cfb3d01 | |||
| dcd82d062f | |||
| 2643a427ac | |||
| a1c7741e1b | |||
| e06447b763 | |||
| 482e952dde | |||
| c4157fd196 | |||
| 1122f5a097 |
@@ -0,0 +1,77 @@
|
||||
# Load-test workflow — closes the #8 acquisition-readiness blocker from
|
||||
# the 2026-05-01 issuer coverage audit (see
|
||||
# cowork/issuer-coverage-audit-2026-05-01/RESULTS.md).
|
||||
#
|
||||
# CADENCE: workflow_dispatch + weekly cron, NOT per-push. Load tests
|
||||
# are minutes long and don't provide useful per-PR signal — per-push
|
||||
# pressure goes through ci.yml. This workflow exists to (a) catch
|
||||
# gradual regressions from cumulative changes that no single PR
|
||||
# triggered, and (b) give an operator a one-click way to capture
|
||||
# numbers before tagging a release.
|
||||
#
|
||||
# THRESHOLDS: defined in deploy/test/loadtest/k6.js (p99 < 5s for
|
||||
# issuance-acceptance, p99 < 2s for list, error rate < 1%). k6 exits
|
||||
# non-zero on any breach, which propagates through `docker compose up
|
||||
# --exit-code-from k6` → `make loadtest` → this workflow's exit.
|
||||
|
||||
name: loadtest
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
# Manual trigger from the Actions tab. Use before tagging a
|
||||
# release or after a meaningful tuning commit.
|
||||
|
||||
schedule:
|
||||
# Mondays at 06:00 UTC. Off-peak; catches regressions accumulated
|
||||
# over the previous week's merges. Once a baseline is committed
|
||||
# in deploy/test/loadtest/README.md, drift relative to that
|
||||
# baseline is the signal — diff the captured summary.json
|
||||
# against the committed numbers.
|
||||
- cron: '0 6 * * 1'
|
||||
|
||||
# Reduce permissions — this workflow doesn't write to PRs or push tags.
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
k6:
|
||||
name: k6 throughput run
|
||||
runs-on: ubuntu-latest
|
||||
# 25-minute hard cap. Pre-Bundle-10: 15min was enough for the API
|
||||
# tier alone (~7 minutes total). Post-Bundle-10 the harness boots
|
||||
# four additional target sidecars (nginx, apache, haproxy, f5-mock)
|
||||
# before the k6 run; their healthchecks add ~30-60s. The k6 scenarios
|
||||
# themselves are still 5 minutes (run in parallel with the API
|
||||
# scenarios, not serially). 25 minutes absorbs that plus slow CI
|
||||
# runners and cold image caches without letting a stuck container
|
||||
# consume the runner indefinitely.
|
||||
timeout-minutes: 25
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
# The compose stack builds the certctl image from the repo
|
||||
# root Dockerfile. Buildx gives the build a usable cache and
|
||||
# works with newer compose versions.
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Run loadtest
|
||||
run: make loadtest
|
||||
env:
|
||||
# Disable BuildKit progress noise so the run log is
|
||||
# diff-able against past runs.
|
||||
BUILDKIT_PROGRESS: plain
|
||||
|
||||
- name: Upload summary
|
||||
# Always upload the summary so a regression has a diffable
|
||||
# artifact even when k6 exited non-zero. summary.json is the
|
||||
# authoritative machine-readable form; summary.txt is the
|
||||
# human-readable text the README baseline tracks.
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: k6-summary-${{ github.run_id }}
|
||||
path: deploy/test/loadtest/results/
|
||||
retention-days: 90
|
||||
@@ -9,7 +9,7 @@ env:
|
||||
REGISTRY: ghcr.io
|
||||
# Keep in lock-step with .github/workflows/ci.yml (M-3).
|
||||
GO_VERSION: '1.25.9'
|
||||
IMAGE_NAMESPACE: shankar0123
|
||||
IMAGE_NAMESPACE: certctl-io
|
||||
|
||||
jobs:
|
||||
# ----------------------------------------------------------------------
|
||||
@@ -348,7 +348,7 @@ jobs:
|
||||
with:
|
||||
generate_release_notes: true
|
||||
body: |
|
||||
> **Install / upgrade:** see the [Quick Start section in the README](https://github.com/shankar0123/certctl/blob/master/README.md#quick-start) for Docker Compose, agent install, Helm, and binary download instructions.
|
||||
> **Install / upgrade:** see the [Quick Start section in the README](https://github.com/certctl-io/certctl/blob/master/README.md#quick-start) for Docker Compose, agent install, Helm, and binary download instructions.
|
||||
|
||||
## Verifying this release
|
||||
|
||||
@@ -369,7 +369,7 @@ jobs:
|
||||
```bash
|
||||
cosign verify-blob \
|
||||
--bundle checksums.txt.sigstore.json \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-identity-regexp '^https://github\.com/certctl-io/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
checksums.txt
|
||||
```
|
||||
@@ -383,7 +383,7 @@ jobs:
|
||||
```bash
|
||||
slsa-verifier verify-artifact \
|
||||
--provenance-path multiple.intoto.jsonl \
|
||||
--source-uri github.com/shankar0123/certctl \
|
||||
--source-uri github.com/certctl-io/certctl \
|
||||
--source-tag ${{ steps.version.outputs.VERSION }} \
|
||||
certctl-agent-linux-amd64
|
||||
```
|
||||
@@ -391,21 +391,21 @@ jobs:
|
||||
**4. Verify container image signature and attestations:**
|
||||
|
||||
```bash
|
||||
IMAGE=ghcr.io/shankar0123/certctl-server:${{ steps.version.outputs.VERSION }}
|
||||
IMAGE=ghcr.io/certctl-io/certctl-server:${{ steps.version.outputs.VERSION }}
|
||||
cosign verify \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-identity-regexp '^https://github\.com/certctl-io/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
|
||||
# SBOM attestation (SPDX-JSON) emitted by docker/build-push-action
|
||||
cosign verify-attestation --type spdxjson \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/' \
|
||||
--certificate-identity-regexp '^https://github\.com/certctl-io/certctl/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
|
||||
# SLSA provenance attestation (mode=max)
|
||||
cosign verify-attestation --type slsaprovenance \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/' \
|
||||
--certificate-identity-regexp '^https://github\.com/certctl-io/certctl/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
```
|
||||
|
||||
+10
-2
@@ -1,11 +1,19 @@
|
||||
# Changelog
|
||||
|
||||
## v2.0.68 — Image registry path changed ⚠️
|
||||
|
||||
> **Image registry path changed.** Starting this release, container images publish to `ghcr.io/certctl-io/certctl-server` and `ghcr.io/certctl-io/certctl-agent`. Existing pulls from `ghcr.io/shankar0123/certctl-{server,agent}:<tag>` continue to work for previously-published tags (the registry never deletes images), but the `:latest` tag at the old path stops moving forward at this release. Update your `docker pull` paths, `docker-compose.yml` `image:` keys, or Helm `image.repository` values to receive future updates. Old `git clone` / `git push` / install-script / API URLs continue to redirect forever — only the container-registry path changed.
|
||||
|
||||
This is the only operator-action-required change in v2.0.68. Other changes in this release are cosmetic URL refreshes after the GitHub-org transfer from `shankar0123/certctl` to `certctl-io/certctl` (HTTP redirects mean no other operator action is required) plus an internal contextcheck lint fix in the agent. Full commit list is on the [GitHub release page](https://github.com/certctl-io/certctl/releases/tag/v2.0.68).
|
||||
|
||||
---
|
||||
|
||||
certctl no longer maintains a hand-edited per-version changelog. Per-release
|
||||
notes are auto-generated from commit messages between consecutive tags.
|
||||
|
||||
**Where to find what changed in a given release:**
|
||||
|
||||
- **[GitHub Releases](https://github.com/shankar0123/certctl/releases)** — every
|
||||
- **[GitHub Releases](https://github.com/certctl-io/certctl/releases)** — every
|
||||
tag has an auto-generated "What's Changed" section pulled from the commits
|
||||
between that tag and the previous one, plus per-release supply-chain
|
||||
verification instructions (Cosign / SLSA / SBOM).
|
||||
@@ -27,5 +35,5 @@ without depending on the author to manually update a separate file.
|
||||
|
||||
**For the historical record:** earlier versions (pre-v2.2.0 and the [2.2.0]
|
||||
tag itself) had a hand-edited CHANGELOG. That content is preserved in
|
||||
[git history](https://github.com/shankar0123/certctl/blob/v2.2.0/CHANGELOG.md)
|
||||
[git history](https://github.com/certctl-io/certctl/blob/v2.2.0/CHANGELOG.md)
|
||||
at the v2.2.0 tag.
|
||||
|
||||
@@ -2,26 +2,54 @@ Business Source License 1.1
|
||||
|
||||
Parameters
|
||||
|
||||
Licensor: Shankar Reddy
|
||||
Licensor: Shankar Kambam
|
||||
Licensed Work: certctl
|
||||
The Licensed Work is (c) 2026 Shankar Reddy.
|
||||
Additional Use Grant: You may make use of the Licensed Work, provided that
|
||||
you may not use the Licensed Work for a Commercial
|
||||
Certificate Service. A "Commercial Certificate Service"
|
||||
is any product, service, or offering in which a third
|
||||
party (other than your employees and contractors
|
||||
acting on your behalf) accesses, uses, or benefits
|
||||
from the Licensed Work's certificate management
|
||||
functionality — including but not limited to lifecycle
|
||||
management, discovery, monitoring, alerting, renewal
|
||||
automation, deployment, and revocation — as part of
|
||||
or in connection with an offering for which
|
||||
compensation is received. This restriction applies
|
||||
regardless of whether the Licensed Work is hosted,
|
||||
managed, embedded, bundled, or integrated with
|
||||
another product or service.
|
||||
The Licensed Work is © 2026 Shankar Kambam.
|
||||
|
||||
Change Date: March 14, 2126
|
||||
Additional Use Grant: You may make use of the Licensed Work, including in
|
||||
production for your internal business operations and
|
||||
for operations that provide products or services to
|
||||
your own customers, provided that you may not offer
|
||||
the Licensed Work as a Commercial Certificate Service.
|
||||
|
||||
A "Commercial Certificate Service" is a product or
|
||||
service whose principal value to a third party is the
|
||||
certificate management functionality of the Licensed
|
||||
Work — including but not limited to lifecycle
|
||||
management, discovery, monitoring, alerting, renewal
|
||||
automation, deployment, and revocation — where the
|
||||
third party accesses or controls that functionality
|
||||
and compensation is received for that access or
|
||||
control.
|
||||
|
||||
For the avoidance of doubt:
|
||||
|
||||
(a) you may run the Licensed Work in production to
|
||||
manage certificates for products or services
|
||||
that you offer to your customers, where the
|
||||
principal value of those products or services is
|
||||
something other than the Licensed Work's
|
||||
certificate management functionality (for
|
||||
example, you operate a banking application and
|
||||
use the Licensed Work internally to manage TLS
|
||||
certificates for that application);
|
||||
|
||||
(b) for the purposes of this Additional Use Grant,
|
||||
"third party" excludes (i) your employees, (ii)
|
||||
your contractors acting on your behalf, and (iii)
|
||||
your Affiliates. "Affiliate" means any entity
|
||||
that controls, is controlled by, or is under
|
||||
common control with, you, where "control" means
|
||||
ownership of more than fifty percent (50%) of
|
||||
the voting interests of the entity;
|
||||
|
||||
(c) the restriction on offering a Commercial
|
||||
Certificate Service applies regardless of whether
|
||||
the Licensed Work is hosted, managed, embedded,
|
||||
bundled, or integrated with another product or
|
||||
service.
|
||||
|
||||
Change Date: March 14, 2076
|
||||
|
||||
Change License: Apache License, Version 2.0
|
||||
|
||||
@@ -60,13 +88,47 @@ of the Licensed Work. If you receive the Licensed Work in original or
|
||||
modified form from a third party, the terms and conditions set forth in this
|
||||
License apply to your use of that work.
|
||||
|
||||
Any use of the Licensed Work in violation of this License will automatically
|
||||
terminate your rights under this License for the current and all other
|
||||
versions of the Licensed Work.
|
||||
Patent non-assertion. During the term of this License, Licensor covenants
|
||||
not to assert any patent claim that Licensor controls against any person
|
||||
whose use of the Licensed Work complies with this License, with respect to
|
||||
the Licensed Work as distributed by Licensor. This covenant terminates with
|
||||
respect to any person who initiates a patent infringement action against
|
||||
the Licensor or against any contributor to the Licensed Work.
|
||||
|
||||
This License does not grant you any right in any trademark or logo of
|
||||
Licensor or its affiliates (provided that you may use a trademark or logo of
|
||||
Licensor as expressly required by this License).
|
||||
Termination and reinstatement. Any use of the Licensed Work in violation of
|
||||
this License will automatically terminate your rights under this License
|
||||
for the current and all other versions of the Licensed Work. Your rights
|
||||
are reinstated automatically if you cease the violation and provide written
|
||||
notice to the Licensor at the contact address above within thirty (30) days
|
||||
of becoming aware of the violation. If you violate this License a second
|
||||
time after such reinstatement, your rights are not subject to further
|
||||
reinstatement.
|
||||
|
||||
Contributions. The Licensor does not accept third-party contributions to
|
||||
the Licensed Work. Any code, documentation, or other material submitted to
|
||||
the Licensor or to any repository hosting the Licensed Work is provided at
|
||||
the submitter's sole risk, confers no rights or obligations on the
|
||||
Licensor, and is not incorporated into the Licensed Work.
|
||||
|
||||
This License does not grant you any right in any trademark or logo of the
|
||||
Licensor or its Affiliates.
|
||||
|
||||
Governing law and venue. This License shall be governed by and construed in
|
||||
accordance with the laws of the State of Florida, USA, without giving
|
||||
effect to any choice or conflict of law provision or rule. Any dispute
|
||||
arising from or relating to this License shall be brought exclusively in
|
||||
the state or federal courts located in the State of Florida, and the
|
||||
parties consent to the personal jurisdiction of such courts.
|
||||
|
||||
Severability. If any provision of this License is held to be invalid,
|
||||
illegal, or unenforceable in any jurisdiction, that holding does not
|
||||
affect the validity, legality, or enforceability of any other provision of
|
||||
this License, which remains in full force and effect.
|
||||
|
||||
Survival. The disclaimers of warranty, the patent non-assertion provisions
|
||||
(with respect to acts occurring before termination), the governing-law and
|
||||
venue provisions, and this survival provision survive any termination of
|
||||
this License.
|
||||
|
||||
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
||||
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
.PHONY: help build run test lint verify verify-docs verify-deploy clean docker-up docker-down migrate-up migrate-down generate test-cover frontend-build qa-stats
|
||||
.PHONY: help build run test lint verify verify-docs verify-deploy loadtest acme-cert-manager-test acme-rfc-conformance-test clean docker-up docker-down migrate-up migrate-down generate test-cover frontend-build qa-stats
|
||||
|
||||
# Default target - show help
|
||||
help:
|
||||
@@ -18,6 +18,7 @@ help:
|
||||
@echo " make verify Pre-commit gate: fmt + vet + lint + test (CI-parity)"
|
||||
@echo " make verify-docs Pre-tag gate: QA-doc drift checks (operator-facing docs)"
|
||||
@echo " make verify-deploy Pre-push gate: digest validity + OpenAPI parity + docker build smoke"
|
||||
@echo " make loadtest k6 throughput run against postgres + certctl (NOT in verify; manual + cron only)"
|
||||
@echo ""
|
||||
@echo "Database:"
|
||||
@echo " make migrate-up Run migrations (requires DB_URL)"
|
||||
@@ -150,6 +151,52 @@ verify-deploy:
|
||||
@echo ""
|
||||
@echo "verify-deploy: PASS — safe to push"
|
||||
|
||||
# Load-test harness — closes the #8 acquisition-readiness blocker from
|
||||
# the 2026-05-01 issuer coverage audit. Boots a minimal certctl stack
|
||||
# (postgres + tls-init + certctl-server) and runs k6 against the API
|
||||
# tier for ~5 minutes. Exits non-zero on any threshold breach.
|
||||
#
|
||||
# NOT in `make verify` — load tests take minutes, not seconds, and
|
||||
# don't gate per-PR signal. CI gates this behind workflow_dispatch +
|
||||
# weekly cron in .github/workflows/loadtest.yml. See
|
||||
# deploy/test/loadtest/README.md for thresholds, baseline, and how to
|
||||
# interpret a regression.
|
||||
loadtest:
|
||||
@echo "==> spinning up postgres + certctl + k6 driver (this takes ~7m)"
|
||||
@cd deploy/test/loadtest && docker compose up --build --abort-on-container-exit --exit-code-from k6
|
||||
@echo ""
|
||||
@echo "==> results landed in deploy/test/loadtest/results/"
|
||||
@if [ -f deploy/test/loadtest/results/summary.txt ]; then cat deploy/test/loadtest/results/summary.txt; fi
|
||||
|
||||
# Phase 5 — kind-driven cert-manager integration test. Requires
|
||||
# `kind`, `kubectl`, `helm`, and a local Docker daemon. Sets
|
||||
# KIND_AVAILABLE=1 so the test runs (it skips cleanly when unset, which
|
||||
# is the CI default — kind is too heavy for per-PR CI). The test
|
||||
# brings up a fresh cluster, installs cert-manager 1.15, helm-installs
|
||||
# certctl-test, applies a ClusterIssuer + Certificate, and asserts the
|
||||
# Secret lands.
|
||||
acme-cert-manager-test:
|
||||
@echo "==> running cert-manager integration test (requires kind/kubectl/helm)"
|
||||
@KIND_AVAILABLE=1 go test -tags=integration -count=1 -timeout=15m \
|
||||
./deploy/test/acme-integration/...
|
||||
|
||||
# Phase 5 — RFC 8555 conformance against `lego` driving the certctl
|
||||
# server. Hermetic: brings up a single certctl-server via docker
|
||||
# compose, points lego at it, runs the conformance scenarios. Skips
|
||||
# when the operator hasn't built the test image (`make docker-build`
|
||||
# first).
|
||||
acme-rfc-conformance-test:
|
||||
@echo "==> running RFC 8555 conformance via lego"
|
||||
@if ! command -v lego >/dev/null 2>&1; then \
|
||||
echo "lego not installed — go install github.com/go-acme/lego/v4/cmd/lego@latest"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@cd deploy/test/loadtest && docker compose up -d certctl postgres
|
||||
@sleep 8
|
||||
@CERTCTL_ACME_DIR=https://localhost:8443/acme/profile/prof-test/directory \
|
||||
bash deploy/test/acme-integration/conformance-lego.sh
|
||||
@cd deploy/test/loadtest && docker compose down
|
||||
|
||||
# Database targets (requires migrate tool)
|
||||
migrate-up:
|
||||
@echo "Running migrations..."
|
||||
|
||||
@@ -2,15 +2,12 @@
|
||||
<img src="docs/screenshots/logo/certctl-logo.png" alt="certctl logo" width="450">
|
||||
</p>
|
||||
|
||||
<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=89db181e-76e0-45cc-b9c0-790c3dfdfc73" />
|
||||
<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=b9379aff-9e5c-4d01-8f2d-9e4ffa09d126" />
|
||||
|
||||
# certctl — Self-Hosted Certificate Lifecycle Platform
|
||||
|
||||
[](LICENSE)
|
||||
[](https://goreportcard.com/report/github.com/shankar0123/certctl)
|
||||
[](https://github.com/shankar0123/certctl/releases)
|
||||
[](https://github.com/shankar0123/certctl/stargazers)
|
||||
[](https://goreportcard.com/report/github.com/certctl-io/certctl)
|
||||
[](https://github.com/certctl-io/certctl/releases)
|
||||
[](https://github.com/certctl-io/certctl/stargazers)
|
||||
|
||||
TLS certificate lifespans are shrinking fast. The CA/Browser Forum passed [Ballot SC-081v3](https://cabforum.org/2025/04/11/ballot-sc081v3-introduce-schedule-of-reducing-validity-and-data-reuse-periods/) unanimously in April 2025, setting a phased reduction: **200 days** by March 2026, **100 days** by March 2027, and **47 days** by March 2029. Organizations managing dozens or hundreds of certificates can no longer rely on spreadsheets, calendar reminders, or manual renewal workflows. The math doesn't work — at 47-day lifespans, a team managing 100 certificates is processing 7+ renewals per week, every week, forever.
|
||||
|
||||
@@ -36,7 +33,7 @@ gantt
|
||||
47 days :crit, 2020-01-01, 47d
|
||||
```
|
||||
|
||||
> **Actively maintained — shipping weekly.** Found something? [Open a GitHub issue](https://github.com/shankar0123/certctl/issues) — issues get triaged same-day. CI runs the full test suite with race detection, static analysis, and vulnerability scanning on every commit.
|
||||
> **Actively maintained — shipping weekly.** Found something? [Open a GitHub issue](https://github.com/certctl-io/certctl/issues) — issues get triaged same-day. CI runs the full test suite with race detection, static analysis, and vulnerability scanning on every commit.
|
||||
|
||||
**Ready to try it?** Jump to the [Quick Start](#quick-start) — you'll have a running dashboard in under 5 minutes.
|
||||
|
||||
@@ -201,7 +198,7 @@ For the complete capability breakdown, see the [Feature Inventory](docs/features
|
||||
### Docker Compose (Recommended)
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
git clone https://github.com/certctl-io/certctl.git
|
||||
cd certctl
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
@@ -226,7 +223,7 @@ The control plane is HTTPS-only (TLS 1.3, no plaintext listener). See [`docs/tls
|
||||
### Agent Install (One-Liner)
|
||||
|
||||
```bash
|
||||
curl -sSL https://raw.githubusercontent.com/shankar0123/certctl/master/install-agent.sh | bash
|
||||
curl -sSL https://raw.githubusercontent.com/certctl-io/certctl/master/install-agent.sh | bash
|
||||
```
|
||||
|
||||
Detects your OS and architecture, downloads the binary, configures systemd (Linux) or launchd (macOS), and starts the agent. See [install-agent.sh](install-agent.sh) for details.
|
||||
@@ -254,7 +251,7 @@ Every `v*` tag publishes signed, attested release artefacts. Binaries
|
||||
(`certctl-agent`, `certctl-server`, `certctl-cli`, `certctl-mcp-server` for
|
||||
`linux|darwin × amd64|arm64`) ship alongside a `checksums.txt`, per-binary
|
||||
SPDX-JSON SBOMs, Cosign signatures, and SLSA Level 3 provenance. Container
|
||||
images on `ghcr.io/shankar0123/certctl-{server,agent}` are built with
|
||||
images on `ghcr.io/certctl-io/certctl-{server,agent}` are built with
|
||||
`docker/build-push-action` `provenance: mode=max` + `sbom: true` and are
|
||||
additionally signed with Cosign at the image digest.
|
||||
|
||||
@@ -272,7 +269,7 @@ sha256sum -c checksums.txt
|
||||
```bash
|
||||
cosign verify-blob \
|
||||
--bundle checksums.txt.sigstore.json \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-identity-regexp '^https://github\.com/certctl-io/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
checksums.txt
|
||||
```
|
||||
@@ -288,7 +285,7 @@ directly.
|
||||
```bash
|
||||
slsa-verifier verify-artifact \
|
||||
--provenance-path multiple.intoto.jsonl \
|
||||
--source-uri github.com/shankar0123/certctl \
|
||||
--source-uri github.com/certctl-io/certctl \
|
||||
--source-tag v2.1.0 \
|
||||
certctl-agent-linux-amd64
|
||||
```
|
||||
@@ -296,22 +293,22 @@ slsa-verifier verify-artifact \
|
||||
**4. Verify a container image signature and its SBOM / provenance attestations:**
|
||||
|
||||
```bash
|
||||
IMAGE=ghcr.io/shankar0123/certctl-server:v2.1.0
|
||||
IMAGE=ghcr.io/certctl-io/certctl-server:v2.1.0
|
||||
|
||||
cosign verify \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-identity-regexp '^https://github\.com/certctl-io/certctl/\.github/workflows/release\.yml@refs/tags/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
|
||||
# SBOM attestation (SPDX-JSON, emitted by docker/build-push-action)
|
||||
cosign verify-attestation --type spdxjson \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/' \
|
||||
--certificate-identity-regexp '^https://github\.com/certctl-io/certctl/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
|
||||
# SLSA provenance attestation (docker/build-push-action `provenance: mode=max`)
|
||||
cosign verify-attestation --type slsaprovenance \
|
||||
--certificate-identity-regexp '^https://github\.com/shankar0123/certctl/' \
|
||||
--certificate-identity-regexp '^https://github\.com/certctl-io/certctl/' \
|
||||
--certificate-oidc-issuer 'https://token.actions.githubusercontent.com' \
|
||||
"$IMAGE"
|
||||
```
|
||||
@@ -334,7 +331,7 @@ Each directory contains a `docker-compose.yml` and a `README.md` explaining the
|
||||
|
||||
```bash
|
||||
# Install
|
||||
go install github.com/shankar0123/certctl/cmd/cli@latest
|
||||
go install github.com/certctl-io/certctl/cmd/cli@latest
|
||||
|
||||
# Configure
|
||||
export CERTCTL_SERVER_URL=https://localhost:8443
|
||||
@@ -358,7 +355,7 @@ certctl ships a standalone MCP (Model Context Protocol) server that exposes all
|
||||
|
||||
```bash
|
||||
# Install and run
|
||||
go install github.com/shankar0123/certctl/cmd/mcp-server@latest
|
||||
go install github.com/certctl-io/certctl/cmd/mcp-server@latest
|
||||
export CERTCTL_SERVER_URL=https://localhost:8443
|
||||
export CERTCTL_API_KEY=your-api-key
|
||||
export CERTCTL_SERVER_CA_BUNDLE_PATH=/path/to/ca.crt # required for self-signed bootstrap
|
||||
@@ -403,11 +400,8 @@ Core lifecycle management — Local CA + ACME v2 issuers, NGINX target connector
|
||||
### V2: Operational Maturity — Shipped
|
||||
30+ milestones shipping enterprise-grade features for free. Sub-CA mode, ACME DNS-01/DNS-PERSIST-01/EAB/ARI (RFC 9773)/profile selection, step-ca, Vault PKI, DigiCert CertCentral, Sectigo SCM, Google CAS, AWS ACM PCA, Entrust, GlobalSign, EJBCA, OpenSSL/Custom CA issuers. NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS (WinRM), F5 BIG-IP, SSH, Windows Certificate Store, Java Keystore, Kubernetes Secrets targets. EST server (RFC 7030) and SCEP server (RFC 8894) enrollment protocols. RFC 5280 revocation with DER CRL + embedded OCSP responder. Certificate profiles, ownership tracking, team assignment, agent groups, interactive approval workflows. Filesystem, network, and cloud secret manager (AWS SM, Azure KV, GCP SM) certificate discovery with triage GUI. Dynamic issuer/target configuration via GUI with AES-256-GCM encrypted storage. First-run onboarding wizard. Post-deployment TLS verification. Certificate export (PEM/PKCS#12). S/MIME support. Prometheus metrics. Scheduled certificate digest emails. Slack, Teams, PagerDuty, OpsGenie, SMTP notifications. MCP server (80 tools), CLI (12 commands), Helm chart. Compliance mapping (SOC 2, PCI-DSS 4.0, NIST SP 800-57). 5 turnkey deployment examples. Agent install script. Migration guides from certbot, acme.sh, and cert-manager. See the [Feature Inventory](docs/features.md) for details.
|
||||
|
||||
### V3: certctl Pro
|
||||
Enterprise capabilities for larger deployments are available in the commercial tier.
|
||||
|
||||
### V4+: Cloud & Scale
|
||||
Kubernetes cert-manager external issuer, cloud infrastructure targets, extended CA support, and platform-scale features.
|
||||
### Forward-looking work — all free, all self-hostable
|
||||
Everything ships free under BSL 1.1. No paid tier, no V3 / V4 gating, no enterprise edition. Future revenue path is a managed-service hosting offering — operate certctl-server as a hosted service while customers self-install only the agent.
|
||||
|
||||
## License
|
||||
|
||||
@@ -429,4 +423,4 @@ The release-time SBOM is published as a syft-produced cyclonedx file alongside e
|
||||
|
||||
---
|
||||
|
||||
If certctl solves a problem you have, [star the repo](https://github.com/shankar0123/certctl) to help others find it. Questions, bugs, or feature requests — [open an issue](https://github.com/shankar0123/certctl/issues).
|
||||
If certctl solves a problem you have, [star the repo](https://github.com/certctl-io/certctl) to help others find it. Questions, bugs, or feature requests — [open an issue](https://github.com/certctl-io/certctl/issues).
|
||||
|
||||
@@ -25,3 +25,70 @@ documented_exceptions:
|
||||
why: "SCEP-mTLS sibling endpoint, trailing-slash variant."
|
||||
- route: "POST /scep-mtls/"
|
||||
why: "SCEP-mTLS sibling endpoint, trailing-slash POST variant."
|
||||
|
||||
# ACME server (RFC 8555 + RFC 9773 ARI) — wire-protocol surface.
|
||||
# Like SCEP/EST, ACME is a JWS-signed-JSON wire protocol whose
|
||||
# semantics are dictated by the RFC, not by an OpenAPI schema.
|
||||
# Documenting every endpoint in openapi.yaml would duplicate
|
||||
# RFC 8555 §7.1 + §7.2 + §7.3 with no information gain. The
|
||||
# canonical operator-facing reference is docs/acme-server.md.
|
||||
# Phases 2-4 will extend this list as new-order, finalize, authz,
|
||||
# challenge, cert, key-change, revoke-cert, renewal-info routes land.
|
||||
- route: "GET /acme/profile/{id}/directory"
|
||||
why: "ACME server RFC 8555 §7.1.1 directory; documented in docs/acme-server.md."
|
||||
- route: "HEAD /acme/profile/{id}/new-nonce"
|
||||
why: "ACME server RFC 8555 §7.2 new-nonce; documented in docs/acme-server.md."
|
||||
- route: "GET /acme/profile/{id}/new-nonce"
|
||||
why: "ACME server RFC 8555 §7.2 new-nonce GET form; documented in docs/acme-server.md."
|
||||
- route: "POST /acme/profile/{id}/new-account"
|
||||
why: "ACME server RFC 8555 §7.3 new-account (JWS jwk); documented in docs/acme-server.md."
|
||||
- route: "POST /acme/profile/{id}/account/{acc_id}"
|
||||
why: "ACME server RFC 8555 §7.3.2 + §7.3.6 (JWS kid) account update + deactivation; documented in docs/acme-server.md."
|
||||
- route: "GET /acme/directory"
|
||||
why: "ACME server default-profile shorthand; mirrors per-profile when CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID is set."
|
||||
- route: "HEAD /acme/new-nonce"
|
||||
why: "ACME server default-profile shorthand for new-nonce HEAD."
|
||||
- route: "GET /acme/new-nonce"
|
||||
why: "ACME server default-profile shorthand for new-nonce GET."
|
||||
- route: "POST /acme/new-account"
|
||||
why: "ACME server default-profile shorthand for new-account."
|
||||
- route: "POST /acme/account/{acc_id}"
|
||||
why: "ACME server default-profile shorthand for account update + deactivation."
|
||||
|
||||
# Phase 2 — orders + finalize + authz + cert.
|
||||
- route: "POST /acme/profile/{id}/new-order"
|
||||
why: "ACME server RFC 8555 §7.4 new-order; documented in docs/acme-server.md."
|
||||
- route: "POST /acme/profile/{id}/order/{ord_id}"
|
||||
why: "ACME server RFC 8555 §7.4 order POST-as-GET; documented in docs/acme-server.md."
|
||||
- route: "POST /acme/profile/{id}/order/{ord_id}/finalize"
|
||||
why: "ACME server RFC 8555 §7.4 finalize; documented in docs/acme-server.md."
|
||||
- route: "POST /acme/profile/{id}/authz/{authz_id}"
|
||||
why: "ACME server RFC 8555 §7.5 authz POST-as-GET; documented in docs/acme-server.md."
|
||||
- route: "POST /acme/profile/{id}/challenge/{chall_id}"
|
||||
why: "ACME server RFC 8555 §7.5.1 challenge response; dispatches to Phase 3 validator pool."
|
||||
- route: "POST /acme/profile/{id}/cert/{cert_id}"
|
||||
why: "ACME server RFC 8555 §7.4.2 cert download; documented in docs/acme-server.md."
|
||||
- route: "POST /acme/new-order"
|
||||
why: "Phase 2 default-profile shorthand for new-order."
|
||||
- route: "POST /acme/order/{ord_id}"
|
||||
why: "Phase 2 default-profile shorthand for order POST-as-GET."
|
||||
- route: "POST /acme/order/{ord_id}/finalize"
|
||||
why: "Phase 2 default-profile shorthand for finalize."
|
||||
- route: "POST /acme/authz/{authz_id}"
|
||||
why: "Phase 2 default-profile shorthand for authz POST-as-GET."
|
||||
- route: "POST /acme/challenge/{chall_id}"
|
||||
why: "Phase 3 default-profile shorthand for challenge response."
|
||||
- route: "POST /acme/cert/{cert_id}"
|
||||
why: "Phase 2 default-profile shorthand for cert download."
|
||||
- route: "POST /acme/profile/{id}/key-change"
|
||||
why: "ACME server RFC 8555 §7.3.5 doubly-signed key rollover; documented in docs/acme-server.md."
|
||||
- route: "POST /acme/profile/{id}/revoke-cert"
|
||||
why: "ACME server RFC 8555 §7.6 revoke-cert (kid OR cert-key auth); documented in docs/acme-server.md."
|
||||
- route: "GET /acme/profile/{id}/renewal-info/{cert_id}"
|
||||
why: "ACME server RFC 9773 ACME Renewal Information (unauthenticated GET); documented in docs/acme-server.md."
|
||||
- route: "POST /acme/key-change"
|
||||
why: "Phase 4 default-profile shorthand for key rollover."
|
||||
- route: "POST /acme/revoke-cert"
|
||||
why: "Phase 4 default-profile shorthand for revoke-cert."
|
||||
- route: "GET /acme/renewal-info/{cert_id}"
|
||||
why: "Phase 4 default-profile shorthand for ARI."
|
||||
|
||||
+1
-1
@@ -14,7 +14,7 @@ info:
|
||||
version: 2.0.0
|
||||
license:
|
||||
name: BSL 1.1
|
||||
url: https://github.com/shankar0123/certctl/blob/master/LICENSE
|
||||
url: https://github.com/certctl-io/certctl/blob/master/LICENSE
|
||||
|
||||
servers:
|
||||
- url: https://localhost:8443
|
||||
|
||||
+32
-7
@@ -478,7 +478,7 @@ func TestCreateTargetConnector_NGINX(t *testing.T) {
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
configJSON := json.RawMessage(`{"cert_path":"/etc/nginx/cert.pem"}`)
|
||||
connector, err := agent.createTargetConnector("NGINX", configJSON)
|
||||
connector, err := agent.createTargetConnector(context.Background(), "NGINX", configJSON)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
@@ -499,7 +499,7 @@ func TestCreateTargetConnector_Unsupported(t *testing.T) {
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.createTargetConnector("UnsupportedType", nil)
|
||||
_, err := agent.createTargetConnector(context.Background(), "UnsupportedType", nil)
|
||||
|
||||
if err == nil {
|
||||
t.Error("expected error for unsupported target type")
|
||||
@@ -831,7 +831,7 @@ func strPtr(s string) *string {
|
||||
return &s
|
||||
}
|
||||
|
||||
// TestCreateTargetConnector_AllSupportedTypes tests connector creation for all 14 supported target types.
|
||||
// TestCreateTargetConnector_AllSupportedTypes tests connector creation for all 16 supported target types.
|
||||
func TestCreateTargetConnector_AllSupportedTypes(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
|
||||
@@ -946,6 +946,29 @@ func TestCreateTargetConnector_AllSupportedTypes(t *testing.T) {
|
||||
"secret_name": "tls-secret",
|
||||
},
|
||||
},
|
||||
{
|
||||
// Rank 5 of the 2026-05-03 Infisical deep-research deliverable.
|
||||
// Region must be a valid AWS region; the connector lazy-loads
|
||||
// the SDK client during ValidateConfig but New() with a populated
|
||||
// region should succeed against the SDK credential chain
|
||||
// (LoadDefaultConfig doesn't require live creds).
|
||||
name: "AWSACM",
|
||||
typeName: "AWSACM",
|
||||
config: map[string]string{
|
||||
"region": "us-east-1",
|
||||
},
|
||||
},
|
||||
{
|
||||
// Rank 5 (Azure half). Vault URL + cert name; the SDK client
|
||||
// lazy-loads via DefaultAzureCredential which doesn't require
|
||||
// live creds at construction time.
|
||||
name: "AzureKeyVault",
|
||||
typeName: "AzureKeyVault",
|
||||
config: map[string]string{
|
||||
"vault_url": "https://test-vault.vault.azure.net",
|
||||
"certificate_name": "demo-cert",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cfg := &AgentConfig{
|
||||
@@ -964,7 +987,7 @@ func TestCreateTargetConnector_AllSupportedTypes(t *testing.T) {
|
||||
t.Fatalf("failed to marshal config: %v", err)
|
||||
}
|
||||
|
||||
connector, err := agent.createTargetConnector(tt.typeName, configJSON)
|
||||
connector, err := agent.createTargetConnector(context.Background(), tt.typeName, configJSON)
|
||||
|
||||
// Some connectors (like WinCertStore, IIS) may error on non-Windows platforms
|
||||
// or with insufficient validation. We accept either a valid connector or an error
|
||||
@@ -999,6 +1022,8 @@ func TestCreateTargetConnector_InvalidJSON(t *testing.T) {
|
||||
"WinCertStore",
|
||||
"JavaKeystore",
|
||||
"KubernetesSecrets",
|
||||
"AWSACM",
|
||||
"AzureKeyVault",
|
||||
}
|
||||
|
||||
cfg := &AgentConfig{
|
||||
@@ -1014,7 +1039,7 @@ func TestCreateTargetConnector_InvalidJSON(t *testing.T) {
|
||||
|
||||
for _, typeName := range tests {
|
||||
t.Run(typeName, func(t *testing.T) {
|
||||
_, err := agent.createTargetConnector(typeName, invalidJSON)
|
||||
_, err := agent.createTargetConnector(context.Background(), typeName, invalidJSON)
|
||||
|
||||
if err == nil {
|
||||
t.Errorf("expected error for invalid JSON with type %s", typeName)
|
||||
@@ -1034,7 +1059,7 @@ func TestCreateTargetConnector_UnknownType(t *testing.T) {
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.createTargetConnector("MagicBox", nil)
|
||||
_, err := agent.createTargetConnector(context.Background(), "MagicBox", nil)
|
||||
|
||||
if err == nil {
|
||||
t.Error("expected error for unsupported target type")
|
||||
@@ -1067,7 +1092,7 @@ func TestCreateTargetConnector_EmptyConfig(t *testing.T) {
|
||||
for _, typeName := range tests {
|
||||
t.Run(typeName, func(t *testing.T) {
|
||||
// Empty config should be handled gracefully (defaults applied)
|
||||
connector, err := agent.createTargetConnector(typeName, nil)
|
||||
connector, err := agent.createTargetConnector(context.Background(), typeName, nil)
|
||||
|
||||
// Should not error on nil/empty config (defaults are applied)
|
||||
if err != nil {
|
||||
|
||||
+37
-2
@@ -32,6 +32,8 @@ import (
|
||||
|
||||
"github.com/shankar0123/certctl/internal/connector/target"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/apache"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/awsacm"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/azurekv"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/caddy"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/envoy"
|
||||
"github.com/shankar0123/certctl/internal/connector/target/f5"
|
||||
@@ -685,7 +687,7 @@ func (a *Agent) executeDeploymentJob(ctx context.Context, job JobItem) {
|
||||
|
||||
// Deploy to the target using the appropriate connector
|
||||
if job.TargetType != "" {
|
||||
connector, err := a.createTargetConnector(job.TargetType, job.TargetConfig)
|
||||
connector, err := a.createTargetConnector(ctx, job.TargetType, job.TargetConfig)
|
||||
if err != nil {
|
||||
a.logger.Error("failed to create target connector",
|
||||
"job_id", job.ID,
|
||||
@@ -766,7 +768,11 @@ func (a *Agent) executeDeploymentJob(ctx context.Context, job JobItem) {
|
||||
}
|
||||
|
||||
// createTargetConnector instantiates the appropriate target connector based on type.
|
||||
func (a *Agent) createTargetConnector(targetType string, configJSON json.RawMessage) (target.Connector, error) {
|
||||
// ctx is threaded into SDK-driven connectors (AWSACM, AzureKeyVault) so credential
|
||||
// resolution honors caller cancellation / deadlines instead of using a fresh
|
||||
// context.Background() (the contextcheck linter enforces this — the original Rank 5
|
||||
// implementation used Background() and tripped CI on commit 502823d).
|
||||
func (a *Agent) createTargetConnector(ctx context.Context, targetType string, configJSON json.RawMessage) (target.Connector, error) {
|
||||
switch targetType {
|
||||
case "NGINX":
|
||||
var cfg nginx.Config
|
||||
@@ -900,6 +906,35 @@ func (a *Agent) createTargetConnector(targetType string, configJSON json.RawMess
|
||||
}
|
||||
return k8s.New(&cfg, a.logger)
|
||||
|
||||
case "AWSACM":
|
||||
// Rank 5 of the 2026-05-03 Infisical deep-research deliverable.
|
||||
// AWS Certificate Manager target — SDK-driven (no file I/O).
|
||||
// LoadDefaultConfig handles the standard AWS credential chain
|
||||
// (IRSA / EC2 instance profile / SSO / env vars) without any
|
||||
// long-lived creds in connector Config.
|
||||
var cfg awsacm.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid AWSACM config: %w", err)
|
||||
}
|
||||
}
|
||||
return awsacm.New(ctx, &cfg, a.logger)
|
||||
|
||||
case "AzureKeyVault":
|
||||
// Rank 5 of the 2026-05-03 Infisical deep-research deliverable.
|
||||
// Azure Key Vault target — SDK-driven (no file I/O).
|
||||
// DefaultAzureCredential handles the standard Azure credential
|
||||
// chain (managed identity / workload identity / env vars / az
|
||||
// CLI fallback). Long-lived service-principal secrets are
|
||||
// supported but discouraged via the credential_mode config.
|
||||
var cfg azurekv.Config
|
||||
if len(configJSON) > 0 {
|
||||
if err := json.Unmarshal(configJSON, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("invalid AzureKeyVault config: %w", err)
|
||||
}
|
||||
}
|
||||
return azurekv.New(ctx, &cfg, a.logger)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported target type: %s", targetType)
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
acmepkg "github.com/shankar0123/certctl/internal/api/acme"
|
||||
"github.com/shankar0123/certctl/internal/api/handler"
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/api/router"
|
||||
@@ -155,6 +156,10 @@ func main() {
|
||||
profileRepo := postgres.NewProfileRepository(db)
|
||||
teamRepo := postgres.NewTeamRepository(db)
|
||||
ownerRepo := postgres.NewOwnerRepository(db)
|
||||
// ACME server (RFC 8555 + RFC 9773 ARI) — Phase 1a foundation.
|
||||
// Repo wires nonce ops only; Phases 1b-4 extend with account /
|
||||
// order / authz / challenge CRUD.
|
||||
acmeRepo := postgres.NewACMERepository(db)
|
||||
logger.Info("initialized all repositories")
|
||||
|
||||
// Initialize dynamic issuer registry.
|
||||
@@ -215,6 +220,31 @@ func main() {
|
||||
}
|
||||
|
||||
issuerRegistry := service.NewIssuerRegistry(logger)
|
||||
// Per-issuer-type issuance metrics (audit fix #4: closes the
|
||||
// per-issuer-type observability gap). Same instance is wired into
|
||||
// the registry (so adapters record issuance/renewal calls) AND
|
||||
// into the metrics handler (so the Prometheus exposer emits
|
||||
// certctl_issuance_total / _duration_seconds / _failures_total).
|
||||
issuanceMetrics := service.NewIssuanceMetrics(service.DefaultIssuanceBucketBoundaries)
|
||||
issuerRegistry.SetIssuanceMetrics(issuanceMetrics)
|
||||
|
||||
// Top-10 fix #5 (2026-05-03 audit): Vault PKI token-renewal
|
||||
// metrics. Same instance is wired into the registry (so each
|
||||
// *vault.Connector built by Rebuild gets a recorder) AND into
|
||||
// the metrics handler (so the Prometheus exposer emits
|
||||
// certctl_vault_token_renewals_total). The renewal goroutine
|
||||
// itself is kicked off below by issuerRegistry.StartLifecycles
|
||||
// after Rebuild has populated the registry.
|
||||
vaultRenewalMetrics := service.NewVaultRenewalMetrics()
|
||||
issuerRegistry.SetVaultRenewalMetrics(vaultRenewalMetrics)
|
||||
|
||||
// Audit fix #7: wire the cert-version lookup so ACME connectors
|
||||
// built by Rebuild can recover the leaf-cert DER from a serial-
|
||||
// only revoke request. The postgres CertificateRepository
|
||||
// satisfies acme.CertificateLookupRepo via its GetVersionBySerial
|
||||
// method. Without this, ACME RevokeCertificate falls back to the
|
||||
// legacy V1 "not supported" error.
|
||||
issuerRegistry.SetACMECertLookup(certificateRepo)
|
||||
|
||||
// Initialize revocation repository
|
||||
revocationRepo := postgres.NewRevocationRepository(db)
|
||||
@@ -229,6 +259,14 @@ func main() {
|
||||
// (FK-RESTRICT against managed_certificates.renewal_policy_id).
|
||||
renewalPolicyService := service.NewRenewalPolicyService(renewalPolicyRepo)
|
||||
certificateService := service.NewCertificateService(certificateRepo, policyService, auditService)
|
||||
// Atomic audit-row plumbing (closes the #3 acquisition-readiness
|
||||
// blocker from the 2026-05-01 issuer coverage audit). The same
|
||||
// transactor instance is shared across CertificateService /
|
||||
// RevocationSvc / RenewalService so all three audit-emitting
|
||||
// service paths run their writes in transactions backed by the
|
||||
// same *sql.DB handle.
|
||||
transactor := postgres.NewTransactor(db)
|
||||
certificateService.SetTransactor(transactor)
|
||||
notifierRegistry := make(map[string]service.Notifier)
|
||||
|
||||
// Wire notifier connectors from config
|
||||
@@ -287,8 +325,21 @@ func main() {
|
||||
notificationService := service.NewNotificationService(notificationRepo, notifierRegistry)
|
||||
notificationService.SetOwnerRepo(ownerRepo)
|
||||
|
||||
// Rank 4 of the 2026-05-03 Infisical deep-research deliverable
|
||||
// (cowork/infisical-deep-research-results.md Part 5). Per-policy
|
||||
// multi-channel expiry-alert metrics. Same instance is wired into
|
||||
// the notification service (recording side, every
|
||||
// SendThresholdAlertOnChannel call reports its outcome) AND into
|
||||
// the metrics handler below (exposing side, Prometheus emitter
|
||||
// reads the counters). Mirrors the VaultRenewalMetrics wiring
|
||||
// pattern from the 2026-05-03 audit fix #5 — single instance,
|
||||
// shared between recorder and exposer.
|
||||
expiryAlertMetrics := service.NewExpiryAlertMetrics()
|
||||
notificationService.SetExpiryAlertMetrics(expiryAlertMetrics)
|
||||
|
||||
// Create RevocationSvc with its dependencies
|
||||
revocationSvc := service.NewRevocationSvc(certificateRepo, revocationRepo, auditService)
|
||||
revocationSvc.SetTransactor(transactor)
|
||||
revocationSvc.SetIssuerRegistry(issuerRegistry)
|
||||
revocationSvc.SetNotificationService(notificationService)
|
||||
|
||||
@@ -352,12 +403,18 @@ func main() {
|
||||
certificateService.SetJobRepo(jobRepo)
|
||||
certificateService.SetKeygenMode(cfg.Keygen.Mode)
|
||||
renewalService := service.NewRenewalService(certificateRepo, jobRepo, renewalPolicyRepo, profileRepo, auditService, notificationService, issuerRegistry, cfg.Keygen.Mode)
|
||||
renewalService.SetTransactor(transactor)
|
||||
renewalService.SetTargetRepo(targetRepo)
|
||||
deploymentService := service.NewDeploymentService(jobRepo, targetRepo, agentRepo, certificateRepo, auditService, notificationService)
|
||||
jobService := service.NewJobService(jobRepo, certificateRepo, ownerRepo, renewalService, deploymentService, logger)
|
||||
// I-001: emit "job_retry" audit events when the scheduler resets Failed→Pending.
|
||||
// SetAuditService is optional — JobService falls back to nil-guarded no-op if unwired.
|
||||
jobService.SetAuditService(auditService)
|
||||
// Audit fix #9: bound the per-tick goroutine fan-out so a 5k-cert
|
||||
// sweep doesn't trip upstream-CA rate limits. Default 25 from
|
||||
// CERTCTL_RENEWAL_CONCURRENCY; ≤0 normalised to 1 (sequential)
|
||||
// inside the setter.
|
||||
jobService.SetRenewalConcurrency(cfg.Scheduler.RenewalConcurrency)
|
||||
agentService := service.NewAgentService(agentRepo, certificateRepo, jobRepo, targetRepo, auditService, issuerRegistry, renewalService)
|
||||
agentService.SetProfileRepo(profileRepo)
|
||||
issuerService := service.NewIssuerService(issuerRepo, auditService, issuerRegistry, encryptionKey, logger)
|
||||
@@ -368,6 +425,16 @@ func main() {
|
||||
logger.Error("failed to build issuer registry from database", "error", err)
|
||||
}
|
||||
logger.Info("issuer registry loaded", "issuers", issuerRegistry.Len())
|
||||
|
||||
// Top-10 fix #5 (2026-05-03 audit): kick off any optional
|
||||
// long-running background work bound to issuer connectors. Today
|
||||
// only Vault PKI implements issuer.Lifecycle (renew-self loop);
|
||||
// other connectors are silently skipped. Per-connector Start
|
||||
// failures are logged, not fatal — a misconfigured Vault doesn't
|
||||
// block server startup. Stop is wired to the deferred shutdown
|
||||
// path below so the goroutines exit cleanly on signal.
|
||||
issuerRegistry.StartLifecycles(context.Background())
|
||||
defer issuerRegistry.StopLifecycles()
|
||||
targetService := service.NewTargetService(targetRepo, auditService, agentRepo, encryptionKey, logger)
|
||||
profileService := service.NewProfileService(profileRepo, auditService)
|
||||
teamService := service.NewTeamService(teamRepo, auditService)
|
||||
@@ -534,6 +601,17 @@ func main() {
|
||||
// alert on certctl_ocsp_counter_total{label="rate_limited"},
|
||||
// {label="nonce_malformed"}, etc.
|
||||
metricsHandler.SetOCSPCounters(ocspCounters)
|
||||
// Audit fix #4: wire the per-issuer-type issuance metrics so the
|
||||
// /api/v1/metrics/prometheus exposer emits the new series.
|
||||
metricsHandler.SetIssuanceCounters(issuanceMetrics)
|
||||
// Top-10 fix #5 (2026-05-03 audit): Vault PKI token-renewal counter.
|
||||
// Same instance the registry uses to record per-tick results.
|
||||
metricsHandler.SetVaultRenewals(vaultRenewalMetrics)
|
||||
// Rank 4 of the 2026-05-03 Infisical deep-research deliverable:
|
||||
// per-policy multi-channel expiry-alert counter. Same instance the
|
||||
// notification service uses to record per-(channel, threshold,
|
||||
// result) outcomes.
|
||||
metricsHandler.SetExpiryAlerts(expiryAlertMetrics)
|
||||
// Bundle-5 / H-006: pass the *sql.DB pool so /ready can probe DB
|
||||
// connectivity via PingContext. /health stays shallow (liveness signal).
|
||||
healthHandler := handler.NewHealthHandler(cfg.Auth.Type, db)
|
||||
@@ -711,6 +789,63 @@ func main() {
|
||||
// by PathID; the AdminEST handler reads it at request time.
|
||||
estServices := map[string]*service.ESTService{}
|
||||
|
||||
// ACME server (RFC 8555 + RFC 9773 ARI). Phase 1a wired the
|
||||
// directory + new-nonce surface against acmeRepo + profileRepo;
|
||||
// Phase 1b adds the JWS-authenticated POST surface (new-account +
|
||||
// account/<id>), which requires the transactor + audit service
|
||||
// for per-op atomic-audit rows. SetTransactor mirrors the
|
||||
// CertificateService.SetTransactor wiring at line 254 — same
|
||||
// transactor instance shared across services.
|
||||
acmeService := service.NewACMEService(acmeRepo, profileRepo, cfg.ACMEServer)
|
||||
acmeService.SetTransactor(transactor)
|
||||
acmeService.SetAuditService(auditService)
|
||||
// Phase 2 — finalize plumbing. The finalize handler routes
|
||||
// through CertificateService.Create + certRepo.CreateVersionWithTx
|
||||
// + IssuerRegistry.Get for the bound profile's issuer. Same
|
||||
// pipeline EST/SCEP/agent/renewal use, so policy + audit + per-
|
||||
// issuer-type metrics apply uniformly to ACME-issued certs.
|
||||
acmeService.SetIssuancePipeline(certificateService, certificateRepo, issuerRegistry)
|
||||
// Phase 3 — challenge validator pool. The 3 per-type semaphores
|
||||
// (HTTP-01 / DNS-01 / TLS-ALPN-01) bound concurrent validations
|
||||
// so a flood of pending authorizations can't fan out unboundedly.
|
||||
// Defaults: 10 weight per type, 30s per-challenge timeout,
|
||||
// 8.8.8.8:53 DNS resolver. Operators tune via
|
||||
// CERTCTL_ACME_SERVER_*_CONCURRENCY + DNS01_RESOLVER.
|
||||
acmeValidatorPool := acmepkg.NewPool(acmepkg.PoolConfig{
|
||||
HTTP01Weight: int64(cfg.ACMEServer.HTTP01ConcurrencyMax),
|
||||
DNS01Weight: int64(cfg.ACMEServer.DNS01ConcurrencyMax),
|
||||
TLSALPN01Weight: int64(cfg.ACMEServer.TLSALPN01ConcurrencyMax),
|
||||
DNS01Resolver: cfg.ACMEServer.DNS01Resolver,
|
||||
})
|
||||
acmeService.SetValidatorPool(acmeValidatorPool)
|
||||
// Phase 4 — revocation pipeline + renewal-policy lookup. The same
|
||||
// revocationSvc instance shared across the rest of the platform
|
||||
// covers ACME revoke-cert; the renewalPolicyRepo backs ARI window
|
||||
// math (when present, ComputeRenewalWindow uses RenewalWindowDays;
|
||||
// when absent, falls back to last-33%-of-validity).
|
||||
acmeService.SetRevocationDelegate(revocationSvc)
|
||||
acmeService.SetRenewalPolicyLookup(renewalPolicyRepo)
|
||||
// Phase 5 — per-account rate limiter. In-memory token-buckets,
|
||||
// shared across all entry points (CreateOrder / RotateAccountKey /
|
||||
// RespondToChallenge). Restart wipes counters; orders/hour caps are
|
||||
// eventual-consistency anyway. Persistent rate limiting is a
|
||||
// follow-up if production telemetry shows abuse patterns we can't
|
||||
// catch in a single restart cycle.
|
||||
acmeRateLimiter := acmepkg.NewRateLimiter()
|
||||
acmeService.SetRateLimiter(acmeRateLimiter)
|
||||
// Phase 5 — ACME GC sweeper. Disabled when GCInterval <= 0; the
|
||||
// scheduler.SetACMEGarbageCollector(nil) leg short-circuits in
|
||||
// scheduler.Start (the loopCount + go-routine launch are gated on
|
||||
// non-nil acmeGC). Wired here (not earlier with the other scheduler
|
||||
// loops) because the GC service needs a fully-constructed acmeService.
|
||||
if cfg.ACMEServer.Enabled && cfg.ACMEServer.GCInterval > 0 {
|
||||
sched.SetACMEGarbageCollector(acmeService)
|
||||
sched.SetACMEGCInterval(cfg.ACMEServer.GCInterval)
|
||||
logger.Info("ACME GC scheduler enabled",
|
||||
"interval", cfg.ACMEServer.GCInterval.String())
|
||||
}
|
||||
acmeHandler := handler.NewACMEHandler(acmeService)
|
||||
|
||||
// Build the API router with all handlers
|
||||
apiRouter := router.New()
|
||||
apiRouter.RegisterHandlers(router.HandlerRegistry{
|
||||
@@ -766,6 +901,12 @@ func main() {
|
||||
AdminEST: handler.NewAdminESTHandler(
|
||||
handler.NewAdminESTServiceImpl(estServices),
|
||||
),
|
||||
// ACME server (RFC 8555 + RFC 9773 ARI) — Phase 1a foundation.
|
||||
// Phase 1a wires directory + new-nonce; subsequent phases extend
|
||||
// with the JWS-authenticated POST surface (new-account,
|
||||
// new-order, finalize, challenges, revoke, ARI). See
|
||||
// docs/acme-server.md for the operator-facing reference.
|
||||
ACME: acmeHandler,
|
||||
})
|
||||
// Register EST (RFC 7030) handlers if enabled.
|
||||
//
|
||||
|
||||
@@ -77,7 +77,7 @@ Three services on a private bridge network:
|
||||
### Starting it
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
git clone https://github.com/certctl-io/certctl.git
|
||||
cd certctl
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
@@ -284,29 +284,57 @@ services:
|
||||
CERTCTL_EST_ENABLED: "true"
|
||||
CERTCTL_EST_ISSUER_ID: iss-local
|
||||
|
||||
# SCEP RFC 8894 + Intune master prompt §10.2 + §13 acceptance
|
||||
# (deploy/test/scep_intune_e2e_test.go integration variant).
|
||||
# Closed in the 2026-04-29 audit-closure bundle (Phase I).
|
||||
# SCEP intentionally NOT configured in this stack.
|
||||
#
|
||||
# Publishes /scep/e2eintune?operation=... with the Intune
|
||||
# dispatcher enabled. The deterministic Connector signing cert
|
||||
# is bind-mounted at the path below; the matching private key
|
||||
# lives ONLY on the test side (see
|
||||
# deploy/test/scep_intune_e2e_test.go::generateE2EIntuneTrustAnchor).
|
||||
CERTCTL_SCEP_ENABLED: "true"
|
||||
CERTCTL_SCEP_PROFILES: "e2eintune"
|
||||
CERTCTL_SCEP_PROFILE_E2EINTUNE_ISSUER_ID: iss-local
|
||||
CERTCTL_SCEP_PROFILE_E2EINTUNE_RA_CERT_PATH: /etc/certctl/scep/ra.crt
|
||||
CERTCTL_SCEP_PROFILE_E2EINTUNE_RA_KEY_PATH: /etc/certctl/scep/ra.key
|
||||
CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_ENABLED: "true"
|
||||
CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_CONNECTOR_CERT_PATH: /etc/certctl/scep/intune_trust_anchor.pem
|
||||
CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_AUDIENCE: https://localhost:8443/scep/e2eintune
|
||||
CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_CHALLENGE_VALIDITY: 60m
|
||||
CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_CLOCK_SKEW_TOLERANCE: 60s
|
||||
CERTCTL_SCEP_PROFILE_E2EINTUNE_INTUNE_PER_DEVICE_RATE_LIMIT_24H: 3
|
||||
# The 2026-04-29 master bundle Phase I added an `e2eintune` SCEP
|
||||
# profile to this compose file with the intent that
|
||||
# deploy/test/scep_intune_e2e_test.go would exercise it. That
|
||||
# integration test exists (//go:build integration) but no CI job
|
||||
# actually selects it — ci.yml's deploy-vendor-e2e job runs only
|
||||
# `-run 'VendorEdge_'` (line 379), and no other job ever invokes
|
||||
# `go test -tags integration` with a SCEP selector.
|
||||
#
|
||||
# The result was dead config: SCEP_ENABLED=true triggered the
|
||||
# per-profile validator chain at server boot, but the supporting
|
||||
# fixtures (ra.crt + ra.key + intune_trust_anchor.pem) were never
|
||||
# committed to deploy/test/fixtures/ — only the README documenting
|
||||
# how to regenerate them. Pre-Phase-5 (ci-pipeline-cleanup matrix
|
||||
# collapse) the test stack didn't fully boot the certctl-server in
|
||||
# CI, so the gap was hidden. Once the matrix collapsed and the
|
||||
# collapsed deploy-vendor-e2e job started actually booting the
|
||||
# server, the fail-loud gate at config.go:2069 (CWE-306, empty
|
||||
# CHALLENGE_PASSWORD) fired and blocked CI.
|
||||
#
|
||||
# CERTCTL_SCEP_ENABLED is unset → default false → the validator
|
||||
# skips the entire SCEP block. Coherence guard at
|
||||
# scripts/ci-guards/test-compose-scep-coherence.sh refuses any
|
||||
# future edit that re-enables SCEP without ALSO (a) adding a CI
|
||||
# job that runs the SCEP integration test and (b) committing the
|
||||
# required fixtures. The README at deploy/test/fixtures/README.md
|
||||
# keeps the regen recipe so the eventual SCEP CI job lands cleanly.
|
||||
|
||||
# Dynamic issuer/target config encryption (M34/M35)
|
||||
CERTCTL_CONFIG_ENCRYPTION_KEY: test-encryption-key-32chars!!
|
||||
# Dynamic issuer/target config encryption (M34/M35).
|
||||
#
|
||||
# MUST be ≥ 32 bytes. The H-1 closure (commit 6cb4414, "feat(security):
|
||||
# encryption-key validation") added internal/config/config.go's
|
||||
# minEncryptionKeyLength = 32 byte floor; values shorter than that are
|
||||
# rejected at server boot with `Failed to load configuration:
|
||||
# CERTCTL_CONFIG_ENCRYPTION_KEY too short`. The previous test value
|
||||
# `test-encryption-key-32chars!!` was 29 bytes (the name claimed 32 but
|
||||
# the author miscounted — 4+1+10+1+3+1+2+5+2 = 29). Pre-H-1 the
|
||||
# validator accepted any non-empty string, so the gap was silent. Once
|
||||
# the test stack actually boots the certctl-server (which the
|
||||
# ci-pipeline-cleanup Phase 5 matrix collapse forced for the first
|
||||
# time), the server now hard-fails at startup and the deploy-vendor-e2e
|
||||
# job's `dependency failed to start: container certctl-test-server
|
||||
# is unhealthy` error fires.
|
||||
#
|
||||
# The replacement below is 49 bytes — 17 bytes of safety margin over
|
||||
# the floor so a future tightening (32 → 33+) does not break this
|
||||
# fixture. It is clearly test-only / deterministic; do NOT copy this
|
||||
# to production. Operators set CERTCTL_CONFIG_ENCRYPTION_KEY from
|
||||
# `openssl rand -base64 32` per the README.
|
||||
CERTCTL_CONFIG_ENCRYPTION_KEY: test-encryption-key-deterministic-32-byte-fixture
|
||||
|
||||
# Network scanning
|
||||
CERTCTL_NETWORK_SCAN_ENABLED: "true"
|
||||
@@ -326,15 +354,11 @@ services:
|
||||
# agent mounts the same host path at the same container path (see below)
|
||||
# so /etc/certctl/tls/ca.crt resolves to the *same* bytes on both sides.
|
||||
- ./test/certs:/etc/certctl/tls:ro
|
||||
# SCEP RFC 8894 + Intune master prompt §10.2 + §13 acceptance: the
|
||||
# e2eintune profile's RA cert/key + Intune Connector trust anchor
|
||||
# PEM. The PEM is the deterministic public cert matching the test-
|
||||
# side private key in deploy/test/scep_intune_e2e_test.go (re-run
|
||||
# `go test -tags integration -run='^TestRegenerateE2EIntuneFixture$'
|
||||
# -update-fixture ./deploy/test/...` to regenerate after a seed
|
||||
# change). RA cert/key live alongside; tls-init container generates
|
||||
# them at boot.
|
||||
- ./test/fixtures:/etc/certctl/scep:ro
|
||||
# SCEP fixtures volume mount removed alongside the SCEP env vars
|
||||
# above. When a CI job that runs scep_intune_e2e_test.go is added,
|
||||
# restore both this mount AND the env vars together — the coherence
|
||||
# guard at scripts/ci-guards/test-compose-scep-coherence.sh
|
||||
# enforces that they move as a unit.
|
||||
networks:
|
||||
certctl-test:
|
||||
ipv4_address: 10.30.50.6
|
||||
|
||||
@@ -452,8 +452,8 @@ monitoring:
|
||||
## Support
|
||||
|
||||
For issues, questions, or contributions:
|
||||
- GitHub: https://github.com/shankar0123/certctl
|
||||
- Documentation: https://github.com/shankar0123/certctl/tree/main/docs
|
||||
- GitHub: https://github.com/certctl-io/certctl
|
||||
- Documentation: https://github.com/certctl-io/certctl/tree/main/docs
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -216,7 +216,7 @@ kubectl logs -l app.kubernetes.io/component=server -f
|
||||
|
||||
## Support
|
||||
|
||||
- **GitHub**: https://github.com/shankar0123/certctl
|
||||
- **GitHub**: https://github.com/certctl-io/certctl
|
||||
- **Issues**: Report on GitHub issues
|
||||
- **Documentation**: All docs are in `deploy/helm/`
|
||||
|
||||
|
||||
@@ -94,4 +94,4 @@ helm install certctl certctl/ --dry-run --debug
|
||||
|
||||
- Full documentation in `README.md`
|
||||
- Troubleshooting in `DEPLOYMENT_GUIDE.md`
|
||||
- Issues: https://github.com/shankar0123/certctl
|
||||
- Issues: https://github.com/certctl-io/certctl
|
||||
|
||||
@@ -508,8 +508,8 @@ kubectl exec -it <pod> -- \
|
||||
## Support and Contributing
|
||||
|
||||
For issues, questions, or contributions, visit:
|
||||
- GitHub: https://github.com/shankar0123/certctl
|
||||
- Documentation: https://github.com/shankar0123/certctl/tree/main/docs
|
||||
- GitHub: https://github.com/certctl-io/certctl
|
||||
- Documentation: https://github.com/certctl-io/certctl/tree/main/docs
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ keywords:
|
||||
- kubernetes
|
||||
maintainers:
|
||||
- name: certctl
|
||||
home: https://github.com/shankar0123/certctl
|
||||
home: https://github.com/certctl-io/certctl
|
||||
sources:
|
||||
- https://github.com/shankar0123/certctl
|
||||
- https://github.com/certctl-io/certctl
|
||||
license: BSL-1.1
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# certctl Helm Chart
|
||||
|
||||
Production-ready Helm chart for deploying [certctl](https://github.com/shankar0123/certctl) on Kubernetes. Wires up the certctl server (Deployment), PostgreSQL (StatefulSet with PVC), and the agent (DaemonSet — one per node) on a private cluster, with health probes, security contexts, and optional Ingress.
|
||||
Production-ready Helm chart for deploying [certctl](https://github.com/certctl-io/certctl) on Kubernetes. Wires up the certctl server (Deployment), PostgreSQL (StatefulSet with PVC), and the agent (DaemonSet — one per node) on a private cluster, with health probes, security contexts, and optional Ingress.
|
||||
|
||||
## Quick install
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ server:
|
||||
|
||||
# Image configuration
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl
|
||||
repository: ghcr.io/certctl-io/certctl
|
||||
tag: "" # defaults to Chart.appVersion
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
@@ -410,7 +410,7 @@ agent:
|
||||
|
||||
# Image configuration
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl-agent
|
||||
repository: ghcr.io/certctl-io/certctl-agent
|
||||
tag: "" # defaults to Chart.appVersion
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ server:
|
||||
replicas: 1
|
||||
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl
|
||||
repository: ghcr.io/certctl-io/certctl
|
||||
pullPolicy: IfNotPresent # Use latest tag
|
||||
|
||||
port: 8443
|
||||
@@ -72,7 +72,7 @@ agent:
|
||||
replicas: 1
|
||||
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl-agent
|
||||
repository: ghcr.io/certctl-io/certctl-agent
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
resources:
|
||||
|
||||
@@ -12,7 +12,7 @@ server:
|
||||
replicas: 3
|
||||
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl
|
||||
repository: ghcr.io/certctl-io/certctl
|
||||
tag: "2.1.0"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
@@ -84,7 +84,7 @@ agent:
|
||||
kind: DaemonSet
|
||||
|
||||
image:
|
||||
repository: ghcr.io/shankar0123/certctl-agent
|
||||
repository: ghcr.io/certctl-io/certctl-agent
|
||||
tag: "2.1.0"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Phase 5 — install cert-manager 1.15.0 into the kind cluster brought
|
||||
# up by kind-config.yaml. Idempotent: re-running waits for the
|
||||
# existing deployment to be Ready instead of reinstalling.
|
||||
#
|
||||
# Called from: deploy/test/acme-integration/certmanager_test.go
|
||||
# Standalone: bash deploy/test/acme-integration/cert-manager-install.sh
|
||||
set -euo pipefail
|
||||
|
||||
CERT_MANAGER_VERSION="${CERT_MANAGER_VERSION:-v1.15.0}"
|
||||
KUBECTL="${KUBECTL:-kubectl}"
|
||||
|
||||
echo "Installing cert-manager ${CERT_MANAGER_VERSION}..."
|
||||
${KUBECTL} apply -f \
|
||||
"https://github.com/cert-manager/cert-manager/releases/download/${CERT_MANAGER_VERSION}/cert-manager.yaml"
|
||||
|
||||
echo "Waiting for cert-manager controller to be Ready (timeout 5m)..."
|
||||
${KUBECTL} -n cert-manager wait --for=condition=Available --timeout=5m \
|
||||
deployment/cert-manager \
|
||||
deployment/cert-manager-cainjector \
|
||||
deployment/cert-manager-webhook
|
||||
|
||||
echo "cert-manager ${CERT_MANAGER_VERSION} ready."
|
||||
@@ -0,0 +1,20 @@
|
||||
# Phase 5 — Certificate resource the integration test applies and
|
||||
# waits for. The certctl-test-trust ClusterIssuer (trust_authenticated
|
||||
# mode) issues the cert without any solver round-trip; the resulting
|
||||
# Secret 'test-com-tls' is asserted to carry tls.crt + tls.key.
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: test-com
|
||||
namespace: default
|
||||
spec:
|
||||
secretName: test-com-tls
|
||||
commonName: test.example.com
|
||||
dnsNames:
|
||||
- test.example.com
|
||||
- www.test.example.com
|
||||
issuerRef:
|
||||
name: certctl-test-trust
|
||||
kind: ClusterIssuer
|
||||
duration: 720h # 30d
|
||||
renewBefore: 240h # 10d
|
||||
@@ -0,0 +1,167 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
//go:build integration
|
||||
|
||||
// Phase 5 — kind-driven cert-manager integration test. Verifies the
|
||||
// certctl ACME server end-to-end against a real cert-manager 1.15+
|
||||
// deployment in a kind cluster. The test sequences:
|
||||
//
|
||||
// 1. Bring up the kind cluster (kind-config.yaml).
|
||||
// 2. Install cert-manager 1.15 (cert-manager-install.sh).
|
||||
// 3. Helm-install certctl-server with acmeServer.enabled=true.
|
||||
// 4. Apply the ClusterIssuer + Certificate.
|
||||
// 5. Wait for the Certificate to become Ready.
|
||||
// 6. Assert the Secret has tls.crt + tls.key.
|
||||
//
|
||||
// Gated behind KIND_AVAILABLE — CI doesn't run kind and skips this
|
||||
// cleanly. Operators run locally via `make acme-cert-manager-test`.
|
||||
|
||||
package acmeintegration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// kindAvailable returns true when the operator opted into the kind-
|
||||
// driven test path. CI default is opt-out (env unset → skip).
|
||||
func kindAvailable() bool {
|
||||
return os.Getenv("KIND_AVAILABLE") != ""
|
||||
}
|
||||
|
||||
// kindClusterName is the name passed to `kind create/delete cluster`.
|
||||
// Kept as a const so the test cleanup uses the exact same name as
|
||||
// setup (avoid orphan-cluster-after-flake).
|
||||
const kindClusterName = "certctl-acme-test"
|
||||
|
||||
// TestCertManagerTrustAuthenticatedIssuance is the happy-path
|
||||
// integration: cert-manager submits a new-order against a profile in
|
||||
// trust_authenticated mode; certctl auto-resolves authzs (no solver
|
||||
// round-trip in this mode); cert-manager finalizes; the Secret lands.
|
||||
//
|
||||
// Runtime: ~6-8 minutes wall-clock on a workstation (most of which is
|
||||
// kind-create + cert-manager-controller-bootstrap, both cached on
|
||||
// re-runs after the first). Skips cleanly when KIND_AVAILABLE is
|
||||
// unset.
|
||||
func TestCertManagerTrustAuthenticatedIssuance(t *testing.T) {
|
||||
if !kindAvailable() {
|
||||
t.Skip("KIND_AVAILABLE unset — kind-driven cert-manager integration test skipped")
|
||||
}
|
||||
ctx := context.Background()
|
||||
|
||||
t.Log("creating kind cluster")
|
||||
runCmd(t, ctx, "kind", "create", "cluster",
|
||||
"--name", kindClusterName,
|
||||
"--config", "kind-config.yaml")
|
||||
t.Cleanup(func() {
|
||||
// Best-effort cluster teardown — never fail the test on cleanup
|
||||
// failure (operator can `kind delete cluster` manually).
|
||||
_ = exec.Command("kind", "delete", "cluster", "--name", kindClusterName).Run()
|
||||
})
|
||||
|
||||
t.Log("installing cert-manager")
|
||||
runCmd(t, ctx, "bash", "cert-manager-install.sh")
|
||||
|
||||
// Step 3 — deploy certctl-server. The Helm chart at
|
||||
// deploy/helm/certctl/ takes acmeServer.enabled=true; the operator
|
||||
// is expected to have built + pushed (or kind-loaded) a `:test`
|
||||
// image tag before the test runs. Document this in docs/acme-server.md.
|
||||
t.Log("helm-installing certctl-test")
|
||||
runCmd(t, ctx, "helm", "install", "certctl-test", "../../helm/certctl/",
|
||||
"--set", "acmeServer.enabled=true",
|
||||
"--set", "acmeServer.defaultProfileId=prof-test",
|
||||
"--set", "image.tag=test",
|
||||
)
|
||||
waitForDeploymentReady(t, ctx, "default", "certctl-test", 3*time.Minute)
|
||||
|
||||
t.Log("applying ClusterIssuer + Certificate")
|
||||
runCmd(t, ctx, "kubectl", "apply", "-f", "clusterissuer-trust-authenticated.yaml")
|
||||
runCmd(t, ctx, "kubectl", "apply", "-f", "certificate-test.yaml")
|
||||
|
||||
t.Log("waiting for Certificate to become Ready")
|
||||
waitForCertificateReady(t, ctx, "default", "test-com", 3*time.Minute)
|
||||
|
||||
t.Log("asserting Secret has tls.crt")
|
||||
assertSecretHasCert(t, ctx, "default", "test-com-tls")
|
||||
|
||||
t.Log("happy-path issuance verified end-to-end")
|
||||
}
|
||||
|
||||
// runCmd runs the command; failures fail the test immediately. We
|
||||
// stream combined stdout+stderr to t.Log on completion so the operator
|
||||
// can read the kubectl/kind output in CI logs (when run there with
|
||||
// KIND_AVAILABLE=1).
|
||||
func runCmd(t *testing.T, ctx context.Context, name string, args ...string) {
|
||||
t.Helper()
|
||||
cmd := exec.CommandContext(ctx, name, args...) //nolint:gosec // ARGS are test-controlled literals.
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("%s %s failed: %v\n%s", name, strings.Join(args, " "), err, out)
|
||||
}
|
||||
t.Logf("%s %s: %s", name, strings.Join(args, " "), strings.TrimSpace(string(out)))
|
||||
}
|
||||
|
||||
// waitForDeploymentReady polls until the named deployment reports
|
||||
// Available=True. Wraps `kubectl wait` with a Go-level timeout so test
|
||||
// hangs are bounded.
|
||||
func waitForDeploymentReady(t *testing.T, ctx context.Context, namespace, name string, timeout time.Duration) {
|
||||
t.Helper()
|
||||
cctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(cctx, "kubectl", "-n", namespace, "wait",
|
||||
"--for=condition=Available", fmt.Sprintf("--timeout=%ds", int(timeout.Seconds())),
|
||||
"deployment/"+name) //nolint:gosec // ARGS are test-controlled literals.
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("deployment %s/%s did not become Ready in %v: %v\n%s",
|
||||
namespace, name, timeout, err, out)
|
||||
}
|
||||
}
|
||||
|
||||
// waitForCertificateReady polls until the cert-manager Certificate
|
||||
// resource transitions to Ready=True. cert-manager's own
|
||||
// reconciliation loop is what advances the state; this just blocks
|
||||
// until the controller is happy.
|
||||
func waitForCertificateReady(t *testing.T, ctx context.Context, namespace, name string, timeout time.Duration) {
|
||||
t.Helper()
|
||||
cctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(cctx, "kubectl", "-n", namespace, "wait",
|
||||
"--for=condition=Ready", fmt.Sprintf("--timeout=%ds", int(timeout.Seconds())),
|
||||
"certificate/"+name) //nolint:gosec // ARGS are test-controlled literals.
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
// Dump the Certificate's events on failure so the operator
|
||||
// can see exactly which reconciliation step failed.
|
||||
describe := exec.Command("kubectl", "-n", namespace, "describe", "certificate", name)
|
||||
describeOut, _ := describe.CombinedOutput()
|
||||
t.Fatalf("certificate %s/%s did not become Ready in %v: %v\n%s\n--- describe ---\n%s",
|
||||
namespace, name, timeout, err, out, describeOut)
|
||||
}
|
||||
}
|
||||
|
||||
// assertSecretHasCert checks that the named Secret has a non-empty
|
||||
// tls.crt entry. We don't validate the chain itself here — that's the
|
||||
// job of certctl's own integration test layer; this just confirms
|
||||
// cert-manager wrote something into the Secret on the
|
||||
// trust_authenticated happy-path.
|
||||
func assertSecretHasCert(t *testing.T, ctx context.Context, namespace, name string) {
|
||||
t.Helper()
|
||||
cctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
cmd := exec.CommandContext(cctx, "kubectl", "-n", namespace, "get", "secret", name,
|
||||
"-o", "jsonpath={.data.tls\\.crt}") //nolint:gosec // ARGS are test-controlled literals.
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("get secret %s/%s: %v\n%s", namespace, name, err, out)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
t.Fatalf("secret %s/%s has empty tls.crt", namespace, name)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
# Phase 5 — sample ClusterIssuer for the certctl challenge auth mode
|
||||
# (RFC 8555 §8 HTTP-01 / DNS-01 / TLS-ALPN-01). Use this for public-
|
||||
# trust-style deployments where per-identifier ownership proof is
|
||||
# required.
|
||||
#
|
||||
# Same bootstrap-root caBundle requirement as the trust_authenticated
|
||||
# variant — see clusterissuer-trust-authenticated.yaml comments.
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: certctl-test-challenge
|
||||
spec:
|
||||
acme:
|
||||
email: test@example.com
|
||||
# Point at a profile whose certificate_profiles.acme_auth_mode is
|
||||
# set to 'challenge'. The certctl operator manages this column
|
||||
# per-profile; see certctl/docs/acme-server.md "Per-profile auth
|
||||
# mode" section.
|
||||
server: https://certctl-test.default.svc.cluster.local:8443/acme/profile/prof-challenge/directory
|
||||
caBundle: |
|
||||
LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCi4uLgotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==
|
||||
privateKeySecretRef:
|
||||
name: certctl-test-challenge-account-key
|
||||
solvers:
|
||||
# HTTP-01 via the in-cluster ingress-nginx. The cert-manager
|
||||
# http-solver pod publishes the key authorization at
|
||||
# http://<identifier>/.well-known/acme-challenge/<token>; the
|
||||
# certctl HTTP01Validator (Phase 3) fetches it.
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
@@ -0,0 +1,42 @@
|
||||
# Phase 5 — sample ClusterIssuer for the certctl trust_authenticated
|
||||
# auth mode (RFC 8555 §6 + certctl auth_mode=trust_authenticated, where
|
||||
# the JWS-authenticated ACME account is trusted to issue any identifier
|
||||
# the profile policy permits — no per-identifier ownership challenges).
|
||||
#
|
||||
# Use this as the starting template for any internal-PKI rollout.
|
||||
# Replace the caBundle placeholder with the base64-encoded PEM of the
|
||||
# certctl-server's self-signed bootstrap root, then `kubectl apply`.
|
||||
#
|
||||
# Generate the caBundle via:
|
||||
# cat deploy/test/certs/ca.crt | base64 -w0
|
||||
# (See certctl/docs/acme-server.md "TLS trust bootstrap" section for the
|
||||
# end-to-end walkthrough — this is the single biggest first-time-deploy
|
||||
# footgun on cert-manager, captured as audit fix #9.)
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: certctl-test-trust
|
||||
spec:
|
||||
acme:
|
||||
email: test@example.com
|
||||
# Replace 'certctl-test' with your release name + adjust the
|
||||
# profile path segment. Default profile path:
|
||||
# https://<service>.<namespace>.svc.cluster.local:8443/acme/profile/<profile-id>/directory
|
||||
server: https://certctl-test.default.svc.cluster.local:8443/acme/profile/prof-test/directory
|
||||
# caBundle: Audit fix #9. cert-manager validates the ACME server's
|
||||
# TLS chain before submitting any account/order/finalize. With a
|
||||
# self-signed bootstrap root, the ClusterIssuer MUST carry the root
|
||||
# explicitly via this field.
|
||||
caBundle: |
|
||||
LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCi4uLgotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==
|
||||
privateKeySecretRef:
|
||||
name: certctl-test-trust-account-key
|
||||
solvers:
|
||||
# In trust_authenticated mode the solver is unused at the
|
||||
# validation step but cert-manager still requires at least one
|
||||
# solver in the spec. http01-via-ingress-nginx is the cheapest
|
||||
# placeholder shape that round-trips correctly through cert-
|
||||
# manager's validation webhooks.
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
+56
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Phase 5 — lego-driven RFC 8555 conformance test. Drives a real ACME
|
||||
# client (lego v4) against the certctl ACME server in trust_authenticated
|
||||
# mode and exercises the full happy-path: register → new-order →
|
||||
# finalize → cert download.
|
||||
#
|
||||
# Caller (`make acme-rfc-conformance-test`) brings up the certctl
|
||||
# docker-compose stack first; this script just runs lego against it.
|
||||
#
|
||||
# Skips cleanly when CERTCTL_ACME_DIR is unset (the operator probably
|
||||
# meant to run the make target instead of this script directly).
|
||||
set -euo pipefail
|
||||
|
||||
if [[ -z "${CERTCTL_ACME_DIR:-}" ]]; then
|
||||
echo "CERTCTL_ACME_DIR unset — point at the certctl ACME directory URL"
|
||||
echo " e.g. CERTCTL_ACME_DIR=https://localhost:8443/acme/profile/prof-test/directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
WORKDIR="$(mktemp -d -t certctl-lego-conf-XXXXXX)"
|
||||
trap 'rm -rf "${WORKDIR}"' EXIT
|
||||
|
||||
# Skip TLS verification — the test stack uses certctl's self-signed
|
||||
# bootstrap cert. Operators in production use --insecure-skip-verify=false
|
||||
# and pass --tls-bundle for the real CA.
|
||||
LEGO_INSECURE="--insecure-skip-verify"
|
||||
|
||||
# Step 1: register a fresh account.
|
||||
echo "==> lego: register account"
|
||||
lego --server "${CERTCTL_ACME_DIR}" \
|
||||
--email conformance@example.com \
|
||||
--domains conformance.example.com \
|
||||
--path "${WORKDIR}" \
|
||||
--accept-tos \
|
||||
${LEGO_INSECURE} \
|
||||
register
|
||||
|
||||
# Step 2: issue a cert (trust_authenticated mode auto-resolves authzs).
|
||||
echo "==> lego: run (issue conformance.example.com)"
|
||||
lego --server "${CERTCTL_ACME_DIR}" \
|
||||
--email conformance@example.com \
|
||||
--domains conformance.example.com \
|
||||
--path "${WORKDIR}" \
|
||||
--accept-tos \
|
||||
${LEGO_INSECURE} \
|
||||
run
|
||||
|
||||
# Step 3: assert the cert PEM landed.
|
||||
CERT_FILE="${WORKDIR}/certificates/conformance.example.com.crt"
|
||||
if [[ ! -s "${CERT_FILE}" ]]; then
|
||||
echo "FAIL: ${CERT_FILE} is missing or empty"
|
||||
exit 1
|
||||
fi
|
||||
openssl x509 -in "${CERT_FILE}" -noout -subject -issuer -dates
|
||||
echo "PASS: lego conformance happy-path completed"
|
||||
@@ -0,0 +1,34 @@
|
||||
# Phase 5 — kind-cluster shape for the cert-manager integration test.
|
||||
#
|
||||
# Single control-plane + single worker. Port 8443 (certctl ACME server)
|
||||
# and 80/443 (ingress-nginx for HTTP-01 solver) are extra-mapped onto
|
||||
# the host so the in-test workflow can curl the in-cluster services.
|
||||
#
|
||||
# Used by: deploy/test/acme-integration/certmanager_test.go
|
||||
# Invoked via: kind create cluster --name certctl-acme-test --config <this file>
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
name: certctl-acme-test
|
||||
nodes:
|
||||
- role: control-plane
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: InitConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
node-labels: "ingress-ready=true"
|
||||
extraPortMappings:
|
||||
# ingress-nginx HTTP — needed for the challenge-mode solver.
|
||||
- containerPort: 80
|
||||
hostPort: 80
|
||||
protocol: TCP
|
||||
- containerPort: 443
|
||||
hostPort: 443
|
||||
protocol: TCP
|
||||
# certctl-server HTTPS (the ACME directory + JWS-authenticated
|
||||
# POST surface). Only required for out-of-cluster smoke tests; the
|
||||
# in-cluster ClusterIssuer talks via Service DNS.
|
||||
- containerPort: 30843
|
||||
hostPort: 8443
|
||||
protocol: TCP
|
||||
- role: worker
|
||||
@@ -0,0 +1,14 @@
|
||||
# Per-run artifacts. summary.json + summary.txt are regenerated on
|
||||
# every `make loadtest` run; committing them would create huge diffs
|
||||
# on each invocation. The README captures the canonical baseline
|
||||
# numbers manually.
|
||||
results/*
|
||||
!results/.gitkeep
|
||||
|
||||
# tls-init bind mount — server cert + key are regenerated on every
|
||||
# fresh run.
|
||||
certs/
|
||||
|
||||
# Bundle 10: target-tls-init bind mount — target sidecar starter cert is
|
||||
# regenerated on every fresh run alongside the server cert.
|
||||
fixtures/target-certs/
|
||||
@@ -0,0 +1,359 @@
|
||||
# certctl Load-Test Harness
|
||||
|
||||
Closes the **#8 acquisition-readiness blocker** from the 2026-05-01 issuer
|
||||
coverage audit (`cowork/issuer-coverage-audit-2026-05-01/RESULTS.md`).
|
||||
Pre-fix, certctl had zero benchmarks or load tests for any API path; an
|
||||
acquirer evaluating "can certctl handle our 50k-cert fleet at 47-day
|
||||
rotation" had nothing to point at. This harness is the substantiation.
|
||||
|
||||
## What it measures
|
||||
|
||||
A k6 driver hits two scenarios in parallel for 5 minutes at a fixed 50 req/s:
|
||||
|
||||
1. **`POST /api/v1/certificates`** — the issuance-acceptance hot path.
|
||||
Exercises auth, JSON decode, validation, `service.CreateCertificate`,
|
||||
and the `managed_certificates` insert. This is the operator-facing
|
||||
request-acceptance throughput an automation client (Terraform,
|
||||
Crossplane, GitOps controller) would generate.
|
||||
2. **`GET /api/v1/certificates?per_page=50`** — the most-trafficked read
|
||||
endpoint. Exercises pagination + filtering on the cert list query.
|
||||
|
||||
Latency is reported as `avg / min / med / p95 / p99 / max`. The error
|
||||
floor is < 1% (any 4xx/5xx counts as failed).
|
||||
|
||||
## What it explicitly does NOT measure
|
||||
|
||||
- **Issuer connector latency.** Connector calls (DigiCert, ACME, Vault,
|
||||
AWS ACM PCA, etc.) happen asynchronously via the renewal scheduler.
|
||||
Their latency is pinned by the `certctl_issuance_duration_seconds{issuer_type=...}`
|
||||
Prometheus histogram (audit fix #4). Driving them through k6 would
|
||||
load-test someone else's API, which is wrong.
|
||||
- **Full ACME enrollment flow.** The audit prompt mentioned ACME-via-
|
||||
pebble; sustained 100/s through a multi-RTT order/challenge/finalize
|
||||
flow requires pebble tuning + crypto helpers k6 doesn't ship out of
|
||||
the box. Deferred to a follow-up.
|
||||
- **Bulk-revoke / bulk-renew.** Those are admin endpoints with their
|
||||
own throughput characteristics and warrant a separate scenario.
|
||||
- **Scheduler concurrency under bulk renewal.** That's audit fix #9's
|
||||
scope; the harness here measures the API tier, not the scheduler.
|
||||
|
||||
## Threshold contract
|
||||
|
||||
Any future change that breaches one of these fails the test:
|
||||
|
||||
| Scenario | p95 | p99 | Error rate |
|
||||
|---|---|---|---|
|
||||
| `issuance_acceptance` | < 2 s | < 5 s | n/a |
|
||||
| `list_certificates` | < 800 ms | < 2 s | n/a |
|
||||
| All requests | n/a | n/a | < 1% |
|
||||
|
||||
These are the regression guards, not the SLO. The SLO is whatever the
|
||||
operator chooses based on the baseline below.
|
||||
|
||||
## How to run
|
||||
|
||||
From the repo root:
|
||||
|
||||
```sh
|
||||
make loadtest
|
||||
```
|
||||
|
||||
This:
|
||||
|
||||
1. Builds the certctl image from the repo root `Dockerfile`.
|
||||
2. Spins up postgres, the tls-init bootstrap, certctl-server (with
|
||||
`CERTCTL_DEMO_SEED=true` so the FK rows the script needs exist),
|
||||
and the k6 driver.
|
||||
3. Runs the k6 script for ~5 minutes 5 seconds (5s stagger between
|
||||
scenarios + 5m duration).
|
||||
4. Prints the summary text to stdout.
|
||||
5. Exits non-zero if any threshold was breached.
|
||||
|
||||
The full machine-readable summary lands at
|
||||
`deploy/test/loadtest/results/summary.json` (gitignored). The
|
||||
human-readable summary lands at `results/summary.txt`.
|
||||
|
||||
To run against a server already booted on the host (skip the compose
|
||||
spin-up):
|
||||
|
||||
```sh
|
||||
docker run --rm \
|
||||
-e CERTCTL_BASE=https://localhost:8443 \
|
||||
-e CERTCTL_TOKEN=load-test-token \
|
||||
-e K6_INSECURE_SKIP_TLS_VERIFY=true \
|
||||
-v "$(pwd)/deploy/test/loadtest/k6.js:/scripts/k6.js:ro" \
|
||||
-v "$(pwd)/deploy/test/loadtest/results:/results" \
|
||||
--network host \
|
||||
grafana/k6:0.54.0 run /scripts/k6.js
|
||||
```
|
||||
|
||||
## Current baseline
|
||||
|
||||
The first operator run captures real numbers and commits them into
|
||||
this section. Pre-baseline this section reads "TBD — operator captures
|
||||
on first `make loadtest` run." The numbers below are the agreed
|
||||
minimum-acceptable thresholds, not the captured baseline; once captured,
|
||||
the baseline goes here as a separate row so future regressions have a
|
||||
diff target.
|
||||
|
||||
| Scenario | p50 | p95 | p99 | Error rate |
|
||||
|---|---|---|---|---|
|
||||
| **issuance_acceptance** (threshold) | — | < 2 s | < 5 s | < 1% |
|
||||
| **issuance_acceptance** (baseline)[^1] | 2.12 ms | 6.19 ms | 8.58 ms | 0.00% |
|
||||
| **list_certificates** (threshold) | — | < 800 ms | < 2 s | < 1% |
|
||||
| **list_certificates** (baseline)[^1] | 2.12 ms | 6.19 ms | 8.58 ms | 0.00% |
|
||||
|
||||
[^1]: **Sandbox-aggregate placeholder** — captured at HEAD on a Linux/aarch64
|
||||
unprivileged sandbox (no Docker, no GitHub-hosted runner). Both rows show
|
||||
the same aggregate combined-load numbers because the sandbox run did not
|
||||
break out per-scenario tags in `summary.json`. Treat these as a sanity
|
||||
floor (proof the API tier handles 100 req/s combined with zero errors and
|
||||
sub-10ms p99), **not** as the per-scenario baselines the threshold contract
|
||||
is written against. Replace via `gh workflow run loadtest.yml` on the
|
||||
canonical `ubuntu-latest` runner — that produces per-scenario tagged
|
||||
metrics in `summary.json`.
|
||||
|
||||
**Methodology of the sandbox-placeholder capture above:**
|
||||
- Hardware: Linux/aarch64 unprivileged sandbox (uid 1019, no root,
|
||||
~1.2 GiB free disk). NOT canonical hardware.
|
||||
- Postgres: 14.22 (Ubuntu, native binaries, unix-socket dir `/tmp/pg-sock`),
|
||||
unix sockets only, port 55432.
|
||||
- certctl: built from HEAD via `go build -o bin/certctl-server ./cmd/server`.
|
||||
- Concurrency: 50 req/s sustained per scenario, both scenarios in parallel
|
||||
(= 100 req/s combined).
|
||||
- Duration: **10 seconds** per scenario (NOT 5 minutes — sandbox bash-call
|
||||
budget is bounded; canonical-hardware run uses 5 minutes).
|
||||
- TLS: ECDSA-P256 self-signed `localhost` cert at `/tmp/certctl-tls/`.
|
||||
- Auth: api-key, single Bearer token (`CERTCTL_AUTH_SECRET=load-test-token`).
|
||||
- Rate limiting: **disabled** (`CERTCTL_RATE_LIMIT_ENABLED=false`) — without
|
||||
this, the 100 req/s combined load trips the default token-bucket and
|
||||
drives error rate to ~40%, masking real latency.
|
||||
- Encryption: `CERTCTL_CONFIG_ENCRYPTION_KEY` set (32+ bytes).
|
||||
- Captured: 2026-05-02. Total: 1002 requests, 100.15 req/s sustained,
|
||||
0 failures, 100% checks passed. Raw `summary.json` is not committed
|
||||
(gitignored per the existing `results/` convention).
|
||||
|
||||
**Methodology pinned at canonical baseline capture (replace placeholder):**
|
||||
- Hardware: GitHub-hosted `ubuntu-latest` runner (4 vCPU / 16 GiB / SSD).
|
||||
Run via `gh workflow run loadtest.yml`; raw `summary.json` is available
|
||||
for 90 days as a workflow artifact.
|
||||
- Postgres: 16-alpine in compose, default config.
|
||||
- certctl: image built from this repo at the commit referenced below.
|
||||
- Concurrency: 50 req/s sustained per scenario (100 req/s total).
|
||||
- Duration: 5 minutes per scenario, 5s stagger.
|
||||
- Auth: api-key (Bearer token, single key).
|
||||
- Encryption: `CERTCTL_CONFIG_ENCRYPTION_KEY` set (32+ bytes).
|
||||
|
||||
To recapture the baseline after a tuning commit:
|
||||
|
||||
```sh
|
||||
make loadtest
|
||||
# Inspect deploy/test/loadtest/results/summary.txt for the new numbers.
|
||||
# Update the table above + the methodology line, commit alongside the
|
||||
# tuning commit.
|
||||
```
|
||||
|
||||
## Interpreting a regression
|
||||
|
||||
If a future PR's `make loadtest` run pushes p99 above the threshold,
|
||||
the make target exits non-zero and CI fails. The summary.txt prints
|
||||
which threshold breached. Triage:
|
||||
|
||||
1. Look at the per-scenario `http_req_duration` p95 + p99 in
|
||||
`summary.json`. If only one scenario regressed, the change is
|
||||
localized to that endpoint's hot path.
|
||||
2. Look at the `iteration_duration` per scenario — if total iteration
|
||||
time grew but `http_req_duration` is flat, the latency is in k6
|
||||
client setup (rare; suggests something changed in the script).
|
||||
3. Compare against the committed baseline. If p99 was 800 ms at
|
||||
baseline and is now 1.5 s but still under the 5 s threshold, the
|
||||
change is below the regression guard but still meaningful — flag
|
||||
in the PR description.
|
||||
|
||||
The harness deliberately does NOT auto-tune. Tuning is informed by the
|
||||
data; tuning commits land separately, each with their own captured
|
||||
baseline update.
|
||||
|
||||
## CI cadence
|
||||
|
||||
Defined in `.github/workflows/loadtest.yml`:
|
||||
|
||||
- **`workflow_dispatch`** — manual trigger from the Actions tab. Used
|
||||
before tagging a release or after a meaningful tuning commit.
|
||||
- **Weekly cron** — Mondays at 06:00 UTC. Catches gradual regressions
|
||||
from cumulative changes that no single PR triggered.
|
||||
|
||||
The workflow does **not** run per-push. Load tests are minutes long
|
||||
and would not provide useful per-PR signal; per-push pressure goes
|
||||
through `make verify` (which is fast) and the deploy-vendor-e2e job.
|
||||
|
||||
## Connector-tier baseline (Bundle 10 of the 2026-05-02 deployment-target audit)
|
||||
|
||||
Bundle 10 extended the harness to cover per-target-type handshake throughput
|
||||
in addition to the API-tier issuance/list throughput documented above. The
|
||||
docker-compose stack now boots four target sidecars (nginx, apache, haproxy,
|
||||
f5-mock) each serving a starter cert from a shared `target-tls-init`
|
||||
container, and k6 runs four additional scenarios — `nginx_handshake`,
|
||||
`apache_handshake`, `haproxy_handshake`, `f5_handshake` — at sustained
|
||||
100 conns/min for 5 minutes against each.
|
||||
|
||||
### What the connector tier measures
|
||||
|
||||
End-to-end TCP connect + TLS handshake + tiny HTTP request/response latency
|
||||
per target type, tagged via the k6 `target_type` label so summary.json's
|
||||
`connector_tier` section breaks the numbers out per sidecar:
|
||||
|
||||
```json
|
||||
{
|
||||
"connector_tier": {
|
||||
"nginx": { "p50": ..., "p95": ..., "p99": ..., "error_rate": ..., "iterations": ... },
|
||||
"apache": { ... },
|
||||
"haproxy": { ... },
|
||||
"f5": { ... }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This validates the target sidecar daemons are operational under sustained
|
||||
connection load. Procurement asks "can certctl's nginx target handle 5,000
|
||||
endpoints at 47-day rotation?" — the connector code's correctness is pinned
|
||||
by per-connector unit tests; **the underlying daemon's connection-rate
|
||||
ceiling is what these scenarios pin**.
|
||||
|
||||
### What the connector tier explicitly does NOT measure (v1)
|
||||
|
||||
- **The full agent-driven deploy hot path.** v1 measures handshake
|
||||
throughput against the sidecars directly. v2 of the harness is a
|
||||
follow-up that POSTs cert requests bound to per-target-type targets,
|
||||
polls the deployments endpoint until the agent reports complete, and
|
||||
measures the full POST → poll → cert-served loop. v2 needs the agent
|
||||
registration + target-binding API surface plumbed end-to-end in the
|
||||
loadtest stack — meaningful work, but not a blocker for the connection-
|
||||
rate procurement question.
|
||||
- **Kubernetes connector.** kind-in-docker requires `privileged: true`
|
||||
and is operationally fragile in CI. Deferred until Bundle 2 (real
|
||||
`k8s.io/client-go`) lands and a CI-friendly envtest harness is wired.
|
||||
- **Real F5 BIG-IP.** The harness uses the in-tree `f5-mock-icontrol`
|
||||
Go server (already used by the deploy-vendor-e2e CI job). Real F5
|
||||
appliance benchmarking is out of scope; operators with a real F5
|
||||
vagrant box per `docs/connector-f5.md` can substitute it manually.
|
||||
|
||||
### Threshold contract
|
||||
|
||||
Defined in `k6.js`'s `thresholds` block. Any change pushing past these
|
||||
fails the test:
|
||||
|
||||
| Target type | p95 | p99 | Error rate |
|
||||
|---|---|---|---|
|
||||
| `nginx` | < 1 s | < 3 s | < 1% (global) |
|
||||
| `apache` | < 1 s | < 3 s | < 1% (global) |
|
||||
| `haproxy` | < 1 s | < 3 s | < 1% (global) |
|
||||
| `f5` | < 1.5 s | < 5 s | < 1% (global) |
|
||||
|
||||
f5-mock's threshold is looser because the iControl REST handler does
|
||||
slightly more work per request (login+upload+install dance the F5
|
||||
connector itself drives — not exercised here, but the daemon's request
|
||||
handler is heavier).
|
||||
|
||||
### Connector-tier captured baseline
|
||||
|
||||
| Target type | p50 | p95 | p99 | Error rate | Iterations |
|
||||
|---|---|---|---|---|---|
|
||||
| **nginx** (threshold) | — | < 1 s | < 3 s | < 1% | n/a |
|
||||
| **nginx** (baseline) | TBD | TBD | TBD | TBD | TBD |
|
||||
| **apache** (threshold) | — | < 1 s | < 3 s | < 1% | n/a |
|
||||
| **apache** (baseline) | TBD | TBD | TBD | TBD | TBD |
|
||||
| **haproxy** (threshold) | — | < 1 s | < 3 s | < 1% | n/a |
|
||||
| **haproxy** (baseline) | TBD | TBD | TBD | TBD | TBD |
|
||||
| **f5** (threshold) | — | < 1.5 s | < 5 s | < 1% | n/a |
|
||||
| **f5** (baseline) | TBD | TBD | TBD | TBD | TBD |
|
||||
|
||||
The em-dash placeholders are deliberate: do **not** commit numeric values
|
||||
without running the loadtest on canonical hardware first. Numbers from a
|
||||
developer laptop are misleading. The first `gh workflow run loadtest.yml`
|
||||
on a clean GitHub runner captures the baseline; commit the captured numbers
|
||||
into the table above as a follow-up commit alongside the methodology line.
|
||||
|
||||
**Methodology pinned at baseline capture (canonical hardware):**
|
||||
|
||||
- Hardware: GitHub-hosted `ubuntu-latest` runners (currently 4 vCPU /
|
||||
16 GiB / SSD-backed). Operator captures from `gh workflow run loadtest.yml`
|
||||
to keep the hardware constant across runs.
|
||||
- Sidecar images: nginx:1.27-alpine, httpd:2.4-alpine, haproxy:2.9-alpine,
|
||||
in-tree f5-mock-icontrol (built from `deploy/test/f5-mock-icontrol/`).
|
||||
- Concurrency: 100 conns/min sustained per target type (400 conns/min
|
||||
total across the four target scenarios + 100 req/s on the API tier).
|
||||
- Duration: 5 minutes per scenario, 10s stagger between API tier and
|
||||
connector tier so warmup overlap doesn't skew the first 30 seconds.
|
||||
- TLS: starter cert from `target-tls-init` (ECDSA P-256, multi-SAN). The
|
||||
loadtest scenarios connect with `K6_INSECURE_SKIP_TLS_VERIFY=true`.
|
||||
|
||||
To recapture the connector-tier baseline after a tuning commit affecting
|
||||
target sidecars or the connector code:
|
||||
|
||||
```sh
|
||||
make loadtest
|
||||
# Inspect deploy/test/loadtest/results/summary.json for the
|
||||
# connector_tier object and update the table above.
|
||||
```
|
||||
|
||||
## Files in this directory
|
||||
|
||||
```
|
||||
deploy/test/loadtest/
|
||||
├── README.md (this file)
|
||||
├── docker-compose.yml
|
||||
├── k6.js (the load script)
|
||||
├── certs/ (gitignored — tls-init writes here)
|
||||
├── fixtures/ (Bundle 10: target sidecar configs + shared starter cert)
|
||||
│ ├── nginx.conf
|
||||
│ ├── httpd.conf
|
||||
│ ├── haproxy.cfg
|
||||
│ └── target-certs/ (gitignored — target-tls-init writes here)
|
||||
└── results/ (gitignored — k6 writes summary.{json,txt} here)
|
||||
```
|
||||
|
||||
## ACME flows (Phase 5)
|
||||
|
||||
The `deploy/test/loadtest/k6/acme_flow.js` scenario hammers the
|
||||
unauthenticated ACME surface (directory + new-nonce + ARI synthetic
|
||||
lookups) at constant 100 VUs for 5 minutes. JWS-signed paths
|
||||
(new-account / new-order / finalize) are intentionally out of scope:
|
||||
k6 doesn't ship JWS, and bundling lego inside k6 would obscure the
|
||||
underlying-server p95 we're trying to measure. Instead, the
|
||||
`make acme-rfc-conformance-test` target drives lego against the same
|
||||
stack for the full happy-path conformance gate.
|
||||
|
||||
Run it:
|
||||
|
||||
```
|
||||
cd deploy/test/loadtest
|
||||
docker compose up -d certctl postgres
|
||||
k6 run --env CERTCTL_ACME_DIRECTORY=https://localhost:8443/acme/profile/prof-test/directory \
|
||||
k6/acme_flow.js
|
||||
```
|
||||
|
||||
### Baseline (ACME flows, 100 VUs × 5m)
|
||||
|
||||
The baseline is operator-captured on a workstation-class machine with
|
||||
a single certctl-server container + a single postgres container.
|
||||
Re-capture after schema migrations or transport changes; commit the
|
||||
new numbers so regressions are visible in code review.
|
||||
|
||||
| Metric | Threshold | Last captured | Notes |
|
||||
|--------------------------------------------|-----------|---------------|-------|
|
||||
| `directory_duration` p95 | < 500 ms | _operator_ | Unauth GET; cache-friendly. |
|
||||
| `new_nonce_duration` p95 | < 300 ms | _operator_ | Single Postgres INSERT under the hood. |
|
||||
| `renewal_info_duration` p95 (synthetic id) | < 800 ms | _operator_ | Synthetic cert-id → 4xx fast path. |
|
||||
| `http_req_failed` rate | < 1% | _operator_ | Should be ~0 — failures here mean transport issues. |
|
||||
|
||||
Capture command: `make loadtest` after pointing the compose stack at
|
||||
the ACME flow scenario. Operators with kind / cert-manager available
|
||||
should pair this with `make acme-cert-manager-test` for end-to-end
|
||||
verification.
|
||||
|
||||
## Audit references
|
||||
|
||||
- API tier: `cowork/issuer-coverage-audit-2026-05-01/RESULTS.md` fix #8.
|
||||
- Connector tier: `cowork/deployment-target-audit-2026-05-02/RESULTS.md` Bundle 10.
|
||||
- ACME flows: Phase 5 master prompt (`cowork/acme-server-prompts/06-phase-5-certmanager-hardening-prompt.md`).
|
||||
@@ -0,0 +1,345 @@
|
||||
# =============================================================================
|
||||
# certctl Load-Test Harness — Docker Compose
|
||||
# =============================================================================
|
||||
#
|
||||
# Spins up a minimal certctl stack and runs a k6 driver against it to capture
|
||||
# p50 / p95 / p99 latency for the certificate-management API hot path AND
|
||||
# (Bundle 10 of the 2026-05-02 deployment-target audit) per-target-type
|
||||
# TCP+TLS handshake throughput against four target sidecars (nginx, apache,
|
||||
# haproxy, f5-mock).
|
||||
#
|
||||
# Stack:
|
||||
# 1. postgres — empty database (server runs migrations + seeds at boot)
|
||||
# 2. certctl-tls-init — one-shot init container; writes self-signed
|
||||
# server.crt/.key/ca.crt into ./certs (bind
|
||||
# mount, host-readable so the k6 container
|
||||
# can pin against it via volumes)
|
||||
# 3. certctl-server — HTTPS API on :8443, demo-seed enabled so
|
||||
# the k6 script has iss-local + an operator
|
||||
# + a team ready to reference in
|
||||
# CreateCertificate payloads
|
||||
# 4. target-tls-init — Bundle 10: shared starter cert+key for the
|
||||
# four target sidecars (nginx, apache,
|
||||
# haproxy, f5-mock). Each daemon boots with
|
||||
# this cert; the loadtest scenarios connect
|
||||
# at sustained rates to measure handshake
|
||||
# latency tagged by target_type.
|
||||
# 5. nginx-target — Bundle 10: HTTPS on internal :443.
|
||||
# 6. apache-target — Bundle 10: HTTPS on internal :443.
|
||||
# 7. haproxy-target — Bundle 10: HTTPS on internal :443.
|
||||
# 8. f5-mock-target — Bundle 10: iControl REST on internal :443
|
||||
# + plaintext HTTP on internal :8080. Runs
|
||||
# the in-tree f5-mock-icontrol image
|
||||
# (deploy/test/f5-mock-icontrol/).
|
||||
# 9. k6 — runs k6.js once and exits with the
|
||||
# threshold-driven exit code (zero on green,
|
||||
# non-zero on any threshold breach so
|
||||
# `make loadtest` surfaces regressions as a
|
||||
# failed shell command).
|
||||
#
|
||||
# Out of scope for v1 of the connector-tier harness (Bundle 10):
|
||||
# - Kubernetes target via kind-in-docker. kind requires `privileged: true`
|
||||
# and Docker-in-Docker semantics that are operationally fragile in CI;
|
||||
# the K8s connector loadtest is a follow-up that needs Bundle 2's real
|
||||
# k8s.io/client-go to land first.
|
||||
# - Full agent-driven deploy poll loop (POST cert → poll deployments →
|
||||
# verify served cert matches what was deployed). The harness measures
|
||||
# handshake throughput against the target sidecars directly — that's
|
||||
# enough to validate the sidecars are operational under load and gives
|
||||
# procurement a per-target latency number that doesn't depend on the
|
||||
# agent registration + target-binding API surface being plumbed
|
||||
# end-to-end in the loadtest stack.
|
||||
#
|
||||
# Usage: make loadtest (from the repo root)
|
||||
# Manual: cd deploy/test/loadtest && docker compose up --abort-on-container-exit --exit-code-from k6
|
||||
#
|
||||
# Audit reference (API tier): cowork/issuer-coverage-audit-2026-05-01/RESULTS.md fix #8.
|
||||
# Audit reference (connector tier): cowork/deployment-target-audit-2026-05-02/RESULTS.md Bundle 10.
|
||||
# =============================================================================
|
||||
|
||||
services:
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Self-signed TLS bootstrap. Mirrors the deploy/docker-compose.test.yml
|
||||
# tls-init pattern exactly: bind-mount instead of named volume so the host
|
||||
# (and the sibling k6 container) can read ca.crt without a chown dance.
|
||||
# See deploy/docker-compose.test.yml::certctl-tls-init for the full rationale.
|
||||
# ---------------------------------------------------------------------------
|
||||
certctl-tls-init:
|
||||
image: alpine/openssl:latest
|
||||
container_name: certctl-loadtest-tls-init
|
||||
restart: "no"
|
||||
entrypoint: /bin/sh
|
||||
command:
|
||||
- -c
|
||||
- |
|
||||
set -eu
|
||||
CERT=/etc/certctl/tls/server.crt
|
||||
KEY=/etc/certctl/tls/server.key
|
||||
CA=/etc/certctl/tls/ca.crt
|
||||
if [ -f "$$CERT" ] && [ -f "$$KEY" ] && [ -f "$$CA" ]; then
|
||||
echo "TLS cert already present — skipping generation"
|
||||
else
|
||||
mkdir -p /etc/certctl/tls
|
||||
openssl req -x509 -newkey ec \
|
||||
-pkeyopt ec_paramgen_curve:P-256 \
|
||||
-nodes \
|
||||
-keyout "$$KEY" \
|
||||
-out "$$CERT" \
|
||||
-days 3650 \
|
||||
-subj "/CN=certctl-server" \
|
||||
-addext "subjectAltName=DNS:certctl-server,DNS:localhost,IP:127.0.0.1"
|
||||
cp "$$CERT" "$$CA"
|
||||
echo "Generated self-signed TLS cert (ECDSA-P256, 3650d, CN=certctl-server)"
|
||||
fi
|
||||
chmod 0644 "$$CERT" "$$CA"
|
||||
chmod 0600 "$$KEY"
|
||||
volumes:
|
||||
- ./certs:/etc/certctl/tls
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Database. The server runs migrations + seed.sql + (because
|
||||
# CERTCTL_DEMO_SEED=true below) seed_demo.sql at boot — so the load-test
|
||||
# k6 script can reference iss-local, o-alice, t-platform, and rp-default
|
||||
# without a separate seed step.
|
||||
# ---------------------------------------------------------------------------
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: certctl-loadtest-postgres
|
||||
environment:
|
||||
POSTGRES_DB: certctl
|
||||
POSTGRES_USER: certctl
|
||||
POSTGRES_PASSWORD: loadtestpass
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U certctl"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
start_period: 30s
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# certctl server. Built from the repo root Dockerfile (same as production).
|
||||
# Demo seed is enabled so referenced FK rows exist when the k6 script
|
||||
# POSTs CreateCertificate payloads. Auth is api-key with a deterministic
|
||||
# token the k6 script knows.
|
||||
# ---------------------------------------------------------------------------
|
||||
certctl-server:
|
||||
build:
|
||||
context: ../../..
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
HTTP_PROXY: ${HTTP_PROXY:-}
|
||||
HTTPS_PROXY: ${HTTPS_PROXY:-}
|
||||
NO_PROXY: ${NO_PROXY:-}
|
||||
container_name: certctl-loadtest-server
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
certctl-tls-init:
|
||||
condition: service_completed_successfully
|
||||
environment:
|
||||
CERTCTL_DATABASE_URL: postgres://certctl:loadtestpass@postgres:5432/certctl?sslmode=disable
|
||||
CERTCTL_SERVER_HOST: 0.0.0.0
|
||||
CERTCTL_SERVER_PORT: 8443
|
||||
CERTCTL_SERVER_TLS_CERT_PATH: /etc/certctl/tls/server.crt
|
||||
CERTCTL_SERVER_TLS_KEY_PATH: /etc/certctl/tls/server.key
|
||||
CERTCTL_LOG_LEVEL: warn
|
||||
CERTCTL_AUTH_TYPE: api-key
|
||||
CERTCTL_AUTH_SECRET: load-test-token
|
||||
CERTCTL_KEYGEN_MODE: agent
|
||||
# CERTCTL_DEMO_SEED=true triggers seed_demo.sql which creates iss-local,
|
||||
# o-alice, t-platform, rp-standard so CreateCertificate FK validation
|
||||
# has rows to bind to.
|
||||
CERTCTL_DEMO_SEED: "true"
|
||||
# Bigger body limit so listing 100s of certs in the GET scenario
|
||||
# doesn't 413 once the harness has been running for a few minutes.
|
||||
CERTCTL_MAX_BODY_SIZE: "10485760"
|
||||
# Encryption key (≥32 bytes per H-1 floor — the test compose's
|
||||
# documented value).
|
||||
CERTCTL_CONFIG_ENCRYPTION_KEY: "loadtest-key-must-be-32-bytes-long-yes"
|
||||
volumes:
|
||||
- ./certs:/etc/certctl/tls:ro
|
||||
healthcheck:
|
||||
# /healthz is unauthenticated. -k because the cert is self-signed.
|
||||
test: ["CMD-SHELL", "wget -q --no-check-certificate -O- https://localhost:8443/healthz || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 30
|
||||
start_period: 60s
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bundle 10: target-side TLS bootstrap. Mints a single ECDSA-P256 self-
|
||||
# signed cert + key into a shared ./fixtures/target-certs/ volume that the
|
||||
# four target sidecars (nginx, apache, haproxy) mount read-only. f5-mock
|
||||
# generates its own self-signed cert at startup (see
|
||||
# deploy/test/f5-mock-icontrol/tls.go) so it doesn't need this volume.
|
||||
#
|
||||
# The loadtest scenarios don't care which cert the target serves — only
|
||||
# that the daemon is up and completing TLS handshakes at the configured
|
||||
# rate. The starter cert exists so each daemon boots green; once Bundle 2
|
||||
# (real K8s client) + agent-driven deploy poll is plumbed in v2 of the
|
||||
# harness, deploys would overwrite this cert.
|
||||
# ---------------------------------------------------------------------------
|
||||
target-tls-init:
|
||||
image: alpine/openssl:latest
|
||||
container_name: certctl-loadtest-target-tls-init
|
||||
restart: "no"
|
||||
entrypoint: /bin/sh
|
||||
command:
|
||||
- -c
|
||||
- |
|
||||
set -eu
|
||||
CERT=/certs/target.crt
|
||||
KEY=/certs/target.key
|
||||
PEM=/certs/target.pem
|
||||
if [ -f "$$CERT" ] && [ -f "$$KEY" ] && [ -f "$$PEM" ]; then
|
||||
echo "Target TLS cert already present — skipping generation"
|
||||
else
|
||||
mkdir -p /certs
|
||||
openssl req -x509 -newkey ec \
|
||||
-pkeyopt ec_paramgen_curve:P-256 \
|
||||
-nodes \
|
||||
-keyout "$$KEY" \
|
||||
-out "$$CERT" \
|
||||
-days 365 \
|
||||
-subj "/CN=loadtest-target" \
|
||||
-addext "subjectAltName=DNS:nginx-target,DNS:apache-target,DNS:haproxy-target,DNS:f5-mock-target,DNS:localhost,IP:127.0.0.1"
|
||||
# HAProxy expects cert+key concatenated into a single PEM file
|
||||
# at the path supplied to `bind ... ssl crt <path>`. Build it
|
||||
# alongside the cert/key pair so the haproxy-target's mount
|
||||
# works without a per-daemon ENTRYPOINT shim.
|
||||
cat "$$CERT" "$$KEY" > "$$PEM"
|
||||
echo "Generated target starter cert (ECDSA-P256, 365d, multi-SAN)"
|
||||
fi
|
||||
# World-readable so non-root container users (haproxy uses uid 99,
|
||||
# apache uses uid 1) can read the key. This is fine for a load-test
|
||||
# starter cert; production wouldn't do this.
|
||||
chmod 0644 "$$CERT" "$$KEY" "$$PEM"
|
||||
volumes:
|
||||
- ./fixtures/target-certs:/certs
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# nginx-target. Listens on internal :443 with the starter cert. The
|
||||
# k6 nginx_handshake scenario connects at 100 conns/min for 5 minutes.
|
||||
# ---------------------------------------------------------------------------
|
||||
nginx-target:
|
||||
image: nginx:1.27-alpine
|
||||
container_name: certctl-loadtest-nginx
|
||||
depends_on:
|
||||
target-tls-init:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- ./fixtures/target-certs:/etc/nginx/certs:ro
|
||||
- ./fixtures/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q --no-check-certificate -O- https://localhost:443/ || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
start_period: 15s
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# apache-target. Listens on internal :443. The bundled httpd.conf loads
|
||||
# the minimum module set + a single SSL-terminated vhost.
|
||||
# ---------------------------------------------------------------------------
|
||||
apache-target:
|
||||
image: httpd:2.4-alpine
|
||||
container_name: certctl-loadtest-apache
|
||||
depends_on:
|
||||
target-tls-init:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- ./fixtures/target-certs:/usr/local/apache2/conf/certs:ro
|
||||
- ./fixtures/httpd.conf:/usr/local/apache2/conf/httpd.conf:ro
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q --no-check-certificate -O- https://localhost:443/ || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
start_period: 15s
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# haproxy-target. Listens on internal :443 with SSL termination. The
|
||||
# haproxy.cfg references /usr/local/etc/haproxy/certs/target.pem which
|
||||
# target-tls-init writes (cert + key concatenated).
|
||||
# ---------------------------------------------------------------------------
|
||||
haproxy-target:
|
||||
image: haproxy:2.9-alpine
|
||||
container_name: certctl-loadtest-haproxy
|
||||
depends_on:
|
||||
target-tls-init:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- ./fixtures/target-certs:/usr/local/etc/haproxy/certs:ro
|
||||
- ./fixtures/haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro
|
||||
healthcheck:
|
||||
# HAProxy doesn't ship with wget/curl; use the openssl-based handshake
|
||||
# check instead. The /dev/null redirect drops the response body so
|
||||
# large logs don't accumulate over the run.
|
||||
test: ["CMD-SHELL", "echo Q | openssl s_client -connect localhost:443 -servername localhost 2>/dev/null | grep -q 'BEGIN CERTIFICATE'"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
start_period: 15s
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# f5-mock target. Re-uses the in-tree f5-mock-icontrol image (already
|
||||
# used by the deploy-vendor-e2e CI job). Generates its own self-signed
|
||||
# cert at startup; listens on internal :443 (HTTPS, iControl REST) and
|
||||
# :8080 (plaintext HTTP). The k6 f5_handshake scenario hits the
|
||||
# /healthz endpoint.
|
||||
# ---------------------------------------------------------------------------
|
||||
f5-mock-target:
|
||||
build: ../f5-mock-icontrol
|
||||
container_name: certctl-loadtest-f5-mock
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/healthz || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 20
|
||||
start_period: 15s
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# k6 driver. Pinned to a specific version so threshold expressions stay
|
||||
# stable across runs. --insecure-skip-tls-verify because the server cert is
|
||||
# self-signed; the load test isn't a TLS conformance test. The k6 process
|
||||
# exits non-zero if any threshold is breached, which the parent
|
||||
# `docker compose up --exit-code-from k6` propagates as the compose exit
|
||||
# code, which `make loadtest` then surfaces as the make-target exit code.
|
||||
# ---------------------------------------------------------------------------
|
||||
k6:
|
||||
image: grafana/k6:0.54.0
|
||||
container_name: certctl-loadtest-k6
|
||||
depends_on:
|
||||
certctl-server:
|
||||
condition: service_healthy
|
||||
# Bundle 10: wait for the four target sidecars to be healthy before
|
||||
# firing the connector-tier scenarios. Saves the operator from
|
||||
# spurious "connection refused" errors during the first ~15s of the
|
||||
# run while target daemons are coming up.
|
||||
nginx-target:
|
||||
condition: service_healthy
|
||||
apache-target:
|
||||
condition: service_healthy
|
||||
haproxy-target:
|
||||
condition: service_healthy
|
||||
f5-mock-target:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
CERTCTL_BASE: https://certctl-server:8443
|
||||
CERTCTL_TOKEN: load-test-token
|
||||
K6_INSECURE_SKIP_TLS_VERIFY: "true"
|
||||
# Bundle 10: per-target sidecar URLs the connector-tier scenarios
|
||||
# connect to. Internal docker-compose DNS — k6 resolves these via
|
||||
# the default user network's resolver.
|
||||
NGINX_TARGET_URL: https://nginx-target:443
|
||||
APACHE_TARGET_URL: https://apache-target:443
|
||||
HAPROXY_TARGET_URL: https://haproxy-target:443
|
||||
F5_TARGET_URL: https://f5-mock-target:443
|
||||
volumes:
|
||||
- ./k6.js:/scripts/k6.js:ro
|
||||
- ./results:/results
|
||||
command:
|
||||
- run
|
||||
- --summary-export=/results/summary.json
|
||||
- /scripts/k6.js
|
||||
@@ -0,0 +1,29 @@
|
||||
# HAProxy target sidecar — Bundle 10 of the 2026-05-02 deployment-target audit.
|
||||
#
|
||||
# Minimal SSL-terminating config that boots green with the starter cert
|
||||
# written by target-tls-init. The k6 connector-tier scenarios connect at
|
||||
# sustained 100 conns/min and measure handshake-completion latency.
|
||||
|
||||
global
|
||||
log stdout local0 warning
|
||||
maxconn 4096
|
||||
# Bundle 10: starter cert+key live at /usr/local/etc/haproxy/certs/.
|
||||
# HAProxy expects a SINGLE PEM file containing cert + key concatenated;
|
||||
# the target-tls-init container writes target.pem in that combined form.
|
||||
ssl-default-bind-options ssl-min-ver TLSv1.2
|
||||
|
||||
defaults
|
||||
log global
|
||||
mode http
|
||||
option dontlognull
|
||||
timeout connect 5s
|
||||
timeout client 30s
|
||||
timeout server 30s
|
||||
|
||||
frontend https-in
|
||||
bind *:443 ssl crt /usr/local/etc/haproxy/certs/target.pem
|
||||
default_backend ok
|
||||
|
||||
backend ok
|
||||
# Static 200 OK — handshake-only loadtest doesn't exercise the backend.
|
||||
http-request return status 200 content-type text/plain string "ok\n"
|
||||
@@ -0,0 +1,66 @@
|
||||
# Apache httpd target sidecar — Bundle 10 of the 2026-05-02 deployment-target audit.
|
||||
#
|
||||
# Self-contained httpd.conf that the httpd:2.4-alpine image will use as its
|
||||
# main configuration. Loads the minimum module set required for an HTTPS
|
||||
# server + serves a single SSL-enabled vhost backed by the starter cert
|
||||
# written by target-tls-init.
|
||||
|
||||
ServerRoot "/usr/local/apache2"
|
||||
Listen 443
|
||||
|
||||
# Module set is the minimum required for the SSL vhost below + the
|
||||
# directives Apache parses elsewhere in its bootstrap.
|
||||
LoadModule mpm_event_module modules/mod_mpm_event.so
|
||||
LoadModule authn_file_module modules/mod_authn_file.so
|
||||
LoadModule authn_core_module modules/mod_authn_core.so
|
||||
LoadModule authz_host_module modules/mod_authz_host.so
|
||||
LoadModule authz_user_module modules/mod_authz_user.so
|
||||
LoadModule authz_core_module modules/mod_authz_core.so
|
||||
LoadModule access_compat_module modules/mod_access_compat.so
|
||||
LoadModule auth_basic_module modules/mod_auth_basic.so
|
||||
LoadModule reqtimeout_module modules/mod_reqtimeout.so
|
||||
LoadModule filter_module modules/mod_filter.so
|
||||
LoadModule mime_module modules/mod_mime.so
|
||||
LoadModule log_config_module modules/mod_log_config.so
|
||||
LoadModule env_module modules/mod_env.so
|
||||
LoadModule headers_module modules/mod_headers.so
|
||||
LoadModule setenvif_module modules/mod_setenvif.so
|
||||
LoadModule version_module modules/mod_version.so
|
||||
LoadModule unixd_module modules/mod_unixd.so
|
||||
LoadModule dir_module modules/mod_dir.so
|
||||
LoadModule alias_module modules/mod_alias.so
|
||||
LoadModule socache_shmcb_module modules/mod_socache_shmcb.so
|
||||
LoadModule ssl_module modules/mod_ssl.so
|
||||
|
||||
User daemon
|
||||
Group daemon
|
||||
|
||||
ServerName apache-target
|
||||
ServerAdmin loadtest@certctl.local
|
||||
|
||||
# Quiet log so the run log stays diff-able. Errors still go to stderr
|
||||
# (/proc/self/fd/2) so docker compose logs surfaces them on startup
|
||||
# failure.
|
||||
ErrorLog /proc/self/fd/2
|
||||
LogLevel warn
|
||||
|
||||
DocumentRoot "/usr/local/apache2/htdocs"
|
||||
|
||||
# Bundle 10: starter cert+key from target-tls-init's shared volume.
|
||||
SSLEngine On
|
||||
SSLCertificateFile /usr/local/apache2/conf/certs/target.crt
|
||||
SSLCertificateKeyFile /usr/local/apache2/conf/certs/target.key
|
||||
SSLProtocol all -SSLv3 -TLSv1 -TLSv1.1
|
||||
SSLCipherSuite HIGH:!aNULL:!MD5
|
||||
SSLHonorCipherOrder on
|
||||
|
||||
<Directory "/usr/local/apache2/htdocs">
|
||||
AllowOverride None
|
||||
Require all granted
|
||||
</Directory>
|
||||
|
||||
# Quiet response — the loadtest scenarios only care that the handshake
|
||||
# completes. The body content is irrelevant.
|
||||
<Location />
|
||||
Require all granted
|
||||
</Location>
|
||||
@@ -0,0 +1,36 @@
|
||||
# nginx target sidecar — Bundle 10 of the 2026-05-02 deployment-target audit.
|
||||
#
|
||||
# Minimal HTTPS-only config that boots green with a starter cert from the
|
||||
# shared target-tls-init container. The k6 connector-tier scenarios connect
|
||||
# at sustained 100 conns/min and measure handshake-completion latency.
|
||||
# Production NGINX configs are far richer; this is a load-test fixture, not
|
||||
# a deployment template.
|
||||
|
||||
worker_processes 1;
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
|
||||
http {
|
||||
# Quiet log so the loadtest run doesn't fill the docker-compose log.
|
||||
access_log off;
|
||||
error_log /var/log/nginx/error.log warn;
|
||||
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name _;
|
||||
|
||||
# Bundle 10: starter cert+key written by target-tls-init into the
|
||||
# shared volume. Not the deployed cert; this is what makes the
|
||||
# daemon boot green so the loadtest scenarios have something to
|
||||
# handshake against.
|
||||
ssl_certificate /etc/nginx/certs/target.crt;
|
||||
ssl_certificate_key /etc/nginx/certs/target.key;
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
|
||||
location / {
|
||||
return 200 "ok\n";
|
||||
add_header Content-Type text/plain;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,355 @@
|
||||
// certctl load-test driver — k6 v0.54+ JS API.
|
||||
//
|
||||
// Two tiers of scenarios:
|
||||
//
|
||||
// API tier (issuer-coverage audit fix #8, 2026-05-01):
|
||||
// - issuance_acceptance: POST /api/v1/certificates throughput.
|
||||
// - list_certificates: GET /api/v1/certificates throughput.
|
||||
//
|
||||
// Connector tier (Bundle 10 of the deployment-target audit, 2026-05-02):
|
||||
// - nginx_handshake / apache_handshake / haproxy_handshake / f5_handshake:
|
||||
// per-target-type TCP+TLS handshake throughput against the four
|
||||
// target sidecars at sustained 100 conns/min for 5 minutes. Latency
|
||||
// is tagged by target_type so summary.json's connector_tier section
|
||||
// breaks out p50/p95/p99 per target.
|
||||
//
|
||||
// What the API tier measures (be honest about scope):
|
||||
// - POST /api/v1/certificates: auth + JSON decode + validation + service
|
||||
// CreateCertificate + DB insert + response. This is the operator-facing
|
||||
// request-acceptance throughput. The downstream issuer-connector call
|
||||
// happens asynchronously via the renewal scheduler (and is bounded
|
||||
// separately via CERTCTL_RENEWAL_CONCURRENCY — issuer audit fix #9).
|
||||
// - GET /api/v1/certificates: read path with pagination. Exercises the
|
||||
// cert list query, which is the most-called read endpoint in any UI/
|
||||
// automation client.
|
||||
//
|
||||
// What the connector tier measures:
|
||||
// - Per-target-type TCP+TLS handshake completion latency. Validates that
|
||||
// each target sidecar (nginx, apache, haproxy, f5-mock) is operational
|
||||
// and serving its starter cert under sustained connection load.
|
||||
// Procurement asks "can certctl's nginx target handle 5,000 endpoints
|
||||
// at 47-day rotation"; the answer requires (a) the connector code
|
||||
// handles deploys correctly (covered by per-connector unit tests) AND
|
||||
// (b) the underlying daemon serves TLS at the connection rates a
|
||||
// 5,000-endpoint fleet implies. The connector-tier scenarios pin (b).
|
||||
//
|
||||
// What this does NOT measure (documented limits, not lazy gaps):
|
||||
// - Issuer connector latency (DigiCert / ACME / Vault / etc. round-trips
|
||||
// to upstream CAs). Those are async; pin via the per-issuer-type
|
||||
// metrics instead (issuer audit fix #4:
|
||||
// certctl_issuance_duration_seconds).
|
||||
// - Full ACME enrollment (newOrder → challenge → finalize).
|
||||
// - The full agent-driven deploy hot path (POST cert with target
|
||||
// binding → poll deployments endpoint → verify served cert matches).
|
||||
// v1 of the connector-tier harness measures handshake throughput
|
||||
// against the sidecars directly. v2 is a follow-up that needs the
|
||||
// agent registration + target-binding API surface plumbed end-to-end
|
||||
// in the loadtest stack — a meaningful addition but not a blocker
|
||||
// for the Bundle 10 procurement question.
|
||||
// - Kubernetes connector. kind-in-docker requires `privileged: true`
|
||||
// and is operationally fragile in CI. Deferred until Bundle 2 (real
|
||||
// k8s.io/client-go) lands.
|
||||
//
|
||||
// Threshold contract:
|
||||
// - API tier: p99 < 5s for issuance, < 2s for list, error rate < 1%.
|
||||
// - Connector tier: p99 < 3s per handshake target (5s for f5-mock,
|
||||
// iControl REST is slower), error rate < 1%.
|
||||
// Any change pushing past these fails the workflow.
|
||||
//
|
||||
// CI gates the run behind workflow_dispatch + cron (NOT per-push — load
|
||||
// tests are too slow to gate per-PR signal).
|
||||
//
|
||||
// Audit references:
|
||||
// - API tier: cowork/issuer-coverage-audit-2026-05-01/RESULTS.md fix #8.
|
||||
// - Connector tier: cowork/deployment-target-audit-2026-05-02/RESULTS.md Bundle 10.
|
||||
|
||||
import http from 'k6/http';
|
||||
import { check } from 'k6';
|
||||
import { textSummary } from 'https://jslib.k6.io/k6-summary/0.0.2/index.js';
|
||||
|
||||
// __ENV.* lets the same script run unchanged on the operator's
|
||||
// workstation (CERTCTL_BASE=https://localhost:8443) and inside the
|
||||
// docker-compose stack (CERTCTL_BASE=https://certctl-server:8443).
|
||||
const BASE = __ENV.CERTCTL_BASE || 'https://localhost:8443';
|
||||
const TOKEN = __ENV.CERTCTL_TOKEN || 'load-test-token';
|
||||
|
||||
// Bundle 10: per-target sidecar URLs. Defaults match the docker-compose
|
||||
// stack's internal DNS; operators running k6 manually against a different
|
||||
// stack override these via env. Empty default → the corresponding
|
||||
// scenario is skipped (the scenarioFor* helper guards).
|
||||
const NGINX_TARGET_URL = __ENV.NGINX_TARGET_URL || 'https://nginx-target:443';
|
||||
const APACHE_TARGET_URL = __ENV.APACHE_TARGET_URL || 'https://apache-target:443';
|
||||
const HAPROXY_TARGET_URL = __ENV.HAPROXY_TARGET_URL || 'https://haproxy-target:443';
|
||||
// f5-mock's iControl REST `/healthz` endpoint is the CI-friendly
|
||||
// per-handshake probe — hits the path the F5 connector itself uses for
|
||||
// reachability. Real F5 BIG-IP also exposes /healthz under /mgmt/.
|
||||
const F5_TARGET_URL = __ENV.F5_TARGET_URL || 'https://f5-mock-target:443';
|
||||
|
||||
// Demo seed (CERTCTL_DEMO_SEED=true) creates these rows; CreateCertificate
|
||||
// requires all four FKs to exist. Pre-baked here so the script has zero
|
||||
// dependency on test fixtures beyond the seed.
|
||||
const ISSUER_ID = 'iss-local';
|
||||
const OWNER_ID = 'o-alice';
|
||||
const TEAM_ID = 't-platform';
|
||||
const RENEWAL_POLICY = 'rp-standard';
|
||||
|
||||
export const options = {
|
||||
scenarios: {
|
||||
// Issuance-acceptance throughput. constant-arrival-rate fires
|
||||
// requests at a fixed rate regardless of latency, which is the
|
||||
// right shape for capacity testing — VU-bound load (constant-vus)
|
||||
// would let slow responses backpressure the offered load and
|
||||
// mask actual capacity ceilings.
|
||||
issuance_acceptance: {
|
||||
executor: 'constant-arrival-rate',
|
||||
rate: 50,
|
||||
timeUnit: '1s',
|
||||
duration: '5m',
|
||||
preAllocatedVUs: 50,
|
||||
maxVUs: 200,
|
||||
exec: 'createCertificate',
|
||||
tags: { scenario: 'issuance_acceptance' },
|
||||
},
|
||||
// Read path. Same rate as issuance so the DB sees a balanced
|
||||
// mix; staggered start so warmup overlap doesn't skew the
|
||||
// first 30 seconds of either scenario.
|
||||
list_certificates: {
|
||||
executor: 'constant-arrival-rate',
|
||||
rate: 50,
|
||||
timeUnit: '1s',
|
||||
duration: '5m',
|
||||
preAllocatedVUs: 50,
|
||||
maxVUs: 200,
|
||||
exec: 'listCertificates',
|
||||
startTime: '5s',
|
||||
tags: { scenario: 'list_certificates' },
|
||||
},
|
||||
|
||||
// Bundle 10: connector-tier per-target-type handshake scenarios.
|
||||
// 100 conns/min sustained for 5 minutes against each sidecar.
|
||||
// The handshake measurement captures TCP connect + TLS
|
||||
// handshake + tiny HTTP GET (`/` for nginx/apache/haproxy,
|
||||
// `/healthz` for f5-mock); k6's http_req_duration aggregates
|
||||
// all three so the numbers are end-to-end "respond to the
|
||||
// operator's connection" latency, not isolated TLS-handshake
|
||||
// microseconds.
|
||||
nginx_handshake: {
|
||||
executor: 'constant-arrival-rate',
|
||||
rate: 100,
|
||||
timeUnit: '1m',
|
||||
duration: '5m',
|
||||
preAllocatedVUs: 10,
|
||||
maxVUs: 50,
|
||||
exec: 'nginxHandshake',
|
||||
startTime: '10s',
|
||||
tags: { scenario: 'nginx_handshake', target_type: 'nginx' },
|
||||
},
|
||||
apache_handshake: {
|
||||
executor: 'constant-arrival-rate',
|
||||
rate: 100,
|
||||
timeUnit: '1m',
|
||||
duration: '5m',
|
||||
preAllocatedVUs: 10,
|
||||
maxVUs: 50,
|
||||
exec: 'apacheHandshake',
|
||||
startTime: '10s',
|
||||
tags: { scenario: 'apache_handshake', target_type: 'apache' },
|
||||
},
|
||||
haproxy_handshake: {
|
||||
executor: 'constant-arrival-rate',
|
||||
rate: 100,
|
||||
timeUnit: '1m',
|
||||
duration: '5m',
|
||||
preAllocatedVUs: 10,
|
||||
maxVUs: 50,
|
||||
exec: 'haproxyHandshake',
|
||||
startTime: '10s',
|
||||
tags: { scenario: 'haproxy_handshake', target_type: 'haproxy' },
|
||||
},
|
||||
f5_handshake: {
|
||||
executor: 'constant-arrival-rate',
|
||||
rate: 100,
|
||||
timeUnit: '1m',
|
||||
duration: '5m',
|
||||
preAllocatedVUs: 10,
|
||||
maxVUs: 50,
|
||||
exec: 'f5Handshake',
|
||||
startTime: '10s',
|
||||
tags: { scenario: 'f5_handshake', target_type: 'f5' },
|
||||
},
|
||||
},
|
||||
thresholds: {
|
||||
// API tier — issuer audit fix #8.
|
||||
'http_req_duration{scenario:issuance_acceptance}': ['p(99)<5000', 'p(95)<2000'],
|
||||
'http_req_duration{scenario:list_certificates}': ['p(99)<2000', 'p(95)<800'],
|
||||
|
||||
// Bundle 10 connector tier. nginx/apache/haproxy are pure TLS
|
||||
// termination → tight thresholds. f5-mock includes a tiny Go
|
||||
// server response on top of the handshake → slightly looser.
|
||||
'http_req_duration{target_type:nginx}': ['p(99)<3000', 'p(95)<1000'],
|
||||
'http_req_duration{target_type:apache}': ['p(99)<3000', 'p(95)<1000'],
|
||||
'http_req_duration{target_type:haproxy}': ['p(99)<3000', 'p(95)<1000'],
|
||||
'http_req_duration{target_type:f5}': ['p(99)<5000', 'p(95)<1500'],
|
||||
|
||||
// < 1% error rate across ALL scenarios. Auth failures, validation
|
||||
// failures, server errors, connection refused all count.
|
||||
'http_req_failed': ['rate<0.01'],
|
||||
},
|
||||
// Smaller summary payload — strip per-VU metrics we don't read.
|
||||
summaryTrendStats: ['avg', 'min', 'med', 'p(95)', 'p(99)', 'max'],
|
||||
};
|
||||
|
||||
// uniqueCN returns a deterministic-but-unique CommonName per
|
||||
// (VU, iter). This avoids unique-constraint violations on the
|
||||
// managed_certificates row (the table has a unique index on
|
||||
// (issuer_id, name) so two parallel POSTs with the same Name 409
|
||||
// rather than 201).
|
||||
function uniqueCN() {
|
||||
return `loadtest-${__VU}-${__ITER}-${Date.now()}.example.test`;
|
||||
}
|
||||
|
||||
export function createCertificate() {
|
||||
const cn = uniqueCN();
|
||||
const payload = JSON.stringify({
|
||||
name: cn,
|
||||
common_name: cn,
|
||||
issuer_id: ISSUER_ID,
|
||||
owner_id: OWNER_ID,
|
||||
team_id: TEAM_ID,
|
||||
renewal_policy_id: RENEWAL_POLICY,
|
||||
environment: 'production',
|
||||
sans: [cn],
|
||||
});
|
||||
|
||||
const res = http.post(`${BASE}/api/v1/certificates`, payload, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${TOKEN}`,
|
||||
},
|
||||
tags: { scenario: 'issuance_acceptance' },
|
||||
});
|
||||
|
||||
check(res, {
|
||||
'create status 201': (r) => r.status === 201,
|
||||
});
|
||||
}
|
||||
|
||||
export function listCertificates() {
|
||||
const res = http.get(`${BASE}/api/v1/certificates?per_page=50`, {
|
||||
headers: {
|
||||
'Authorization': `Bearer ${TOKEN}`,
|
||||
},
|
||||
tags: { scenario: 'list_certificates' },
|
||||
});
|
||||
|
||||
check(res, {
|
||||
'list status 200': (r) => r.status === 200,
|
||||
});
|
||||
}
|
||||
|
||||
// --- Bundle 10: connector-tier handshake scenarios ---
|
||||
//
|
||||
// Each per-target function does a single HTTPS GET against its target
|
||||
// sidecar. k6's http_req_duration metric captures TCP connect + TLS
|
||||
// handshake + HTTP request/response — that's the end-to-end "connection
|
||||
// readiness" latency a deploy connector cares about. The target_type
|
||||
// tag groups results in summary.json's connector_tier section.
|
||||
//
|
||||
// Status-check threshold: any 4xx/5xx counts as failed (k6 default
|
||||
// behaviour for http_req_failed). f5-mock's /healthz returns 200; the
|
||||
// other three nginx/apache/haproxy default vhost configs all return
|
||||
// 200 on `/`.
|
||||
//
|
||||
// Bundle 10 of the 2026-05-02 deployment-target audit.
|
||||
|
||||
export function nginxHandshake() {
|
||||
const res = http.get(`${NGINX_TARGET_URL}/`, {
|
||||
tags: { scenario: 'nginx_handshake', target_type: 'nginx' },
|
||||
});
|
||||
check(res, {
|
||||
'nginx 2xx': (r) => r.status >= 200 && r.status < 300,
|
||||
});
|
||||
}
|
||||
|
||||
export function apacheHandshake() {
|
||||
const res = http.get(`${APACHE_TARGET_URL}/`, {
|
||||
tags: { scenario: 'apache_handshake', target_type: 'apache' },
|
||||
});
|
||||
check(res, {
|
||||
'apache 2xx': (r) => r.status >= 200 && r.status < 300,
|
||||
});
|
||||
}
|
||||
|
||||
export function haproxyHandshake() {
|
||||
const res = http.get(`${HAPROXY_TARGET_URL}/`, {
|
||||
tags: { scenario: 'haproxy_handshake', target_type: 'haproxy' },
|
||||
});
|
||||
check(res, {
|
||||
'haproxy 2xx': (r) => r.status >= 200 && r.status < 300,
|
||||
});
|
||||
}
|
||||
|
||||
export function f5Handshake() {
|
||||
const res = http.get(`${F5_TARGET_URL}/healthz`, {
|
||||
tags: { scenario: 'f5_handshake', target_type: 'f5' },
|
||||
});
|
||||
check(res, {
|
||||
'f5 2xx': (r) => r.status >= 200 && r.status < 300,
|
||||
});
|
||||
}
|
||||
|
||||
// handleSummary writes the full results to /results/summary.{json,txt}
|
||||
// so the operator can commit the baseline numbers into README.md after
|
||||
// each run and so CI can ingest the JSON for diffing.
|
||||
//
|
||||
// Bundle 10 added a `connector_tier` aggregation alongside the API tier
|
||||
// — same source data (data.metrics), grouped by target_type tag for
|
||||
// per-connector-type p50/p95/p99/error breakdowns. Operators tracking a
|
||||
// connector regression diff `connector_tier.<type>` between runs.
|
||||
//
|
||||
// stdout reproduces the textSummary so the docker compose log shows
|
||||
// the same numbers an operator running it manually would see.
|
||||
export function handleSummary(data) {
|
||||
const enriched = enrichWithConnectorTier(data);
|
||||
return {
|
||||
'/results/summary.json': JSON.stringify(enriched, null, 2),
|
||||
'/results/summary.txt': textSummary(data, { indent: ' ', enableColors: false }),
|
||||
stdout: textSummary(data, { indent: ' ', enableColors: true }),
|
||||
};
|
||||
}
|
||||
|
||||
// enrichWithConnectorTier appends a connector_tier object to the k6
|
||||
// summary data. Each target_type entry contains:
|
||||
// { p50, p95, p99, max, avg, error_rate, iterations }
|
||||
// Missing tags (e.g. an operator runs only the API tier scenarios) are
|
||||
// reported as null so callers can detect them without a separate scan.
|
||||
function enrichWithConnectorTier(data) {
|
||||
const targetTypes = ['nginx', 'apache', 'haproxy', 'f5'];
|
||||
const connectorTier = {};
|
||||
for (const t of targetTypes) {
|
||||
const reqDurKey = `http_req_duration{target_type:${t}}`;
|
||||
const reqFailKey = `http_req_failed{target_type:${t}}`;
|
||||
const iterKey = `iterations{target_type:${t}}`;
|
||||
|
||||
const dur = data.metrics[reqDurKey];
|
||||
const fail = data.metrics[reqFailKey];
|
||||
const iters = data.metrics[iterKey];
|
||||
|
||||
if (!dur || !dur.values) {
|
||||
connectorTier[t] = null;
|
||||
continue;
|
||||
}
|
||||
connectorTier[t] = {
|
||||
p50: dur.values['med'] ?? null,
|
||||
p95: dur.values['p(95)'] ?? null,
|
||||
p99: dur.values['p(99)'] ?? null,
|
||||
max: dur.values['max'] ?? null,
|
||||
avg: dur.values['avg'] ?? null,
|
||||
error_rate: fail && fail.values ? (fail.values['rate'] ?? null) : null,
|
||||
iterations: iters && iters.values ? (iters.values['count'] ?? null) : null,
|
||||
};
|
||||
}
|
||||
// Shallow-merge so existing summary fields (data.metrics, data.options,
|
||||
// etc.) stay untouched. The connector_tier key is additive.
|
||||
return Object.assign({}, data, { connector_tier: connectorTier });
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
// Phase 5 — k6 scenario for the ACME issuance loop. Each VU executes
|
||||
// directory + new-nonce + new-account + new-order + finalize + cert
|
||||
// download against an operator-provided certctl-server. Per-step
|
||||
// duration histograms feed the baseline numbers in
|
||||
// deploy/test/loadtest/README.md (ACME flows section).
|
||||
//
|
||||
// Default scenario: 100 concurrent VUs for 5 minutes. Override via
|
||||
// K6_VUS / K6_DURATION env vars.
|
||||
//
|
||||
// Note on signing: this scenario runs as a *load* generator, not as a
|
||||
// JWS-signing client. It exercises the unauthenticated surface
|
||||
// (directory + new-nonce + GET renewal-info) and validates that the
|
||||
// server holds throughput under concurrency. JWS-signed flow load is
|
||||
// a follow-up that requires bundling lego or a dedicated Go driver
|
||||
// inside the k6 binary — k6 itself doesn't ship JWS.
|
||||
|
||||
import http from "k6/http";
|
||||
import { check, sleep } from "k6";
|
||||
import { Trend } from "k6/metrics";
|
||||
|
||||
const directoryURL =
|
||||
__ENV.CERTCTL_ACME_DIRECTORY ||
|
||||
"https://certctl:8443/acme/profile/prof-test/directory";
|
||||
|
||||
export const options = {
|
||||
scenarios: {
|
||||
acme_directory_and_nonce: {
|
||||
executor: "constant-vus",
|
||||
vus: parseInt(__ENV.K6_VUS || "100", 10),
|
||||
duration: __ENV.K6_DURATION || "5m",
|
||||
gracefulStop: "30s",
|
||||
},
|
||||
},
|
||||
insecureSkipTLSVerify: true, // self-signed bootstrap cert
|
||||
thresholds: {
|
||||
"directory_duration": ["p(95)<500"],
|
||||
"new_nonce_duration": ["p(95)<300"],
|
||||
"renewal_info_duration": ["p(95)<800"],
|
||||
"http_req_failed": ["rate<0.01"],
|
||||
},
|
||||
};
|
||||
|
||||
const directoryDuration = new Trend("directory_duration", true);
|
||||
const newNonceDuration = new Trend("new_nonce_duration", true);
|
||||
const renewalInfoDuration = new Trend("renewal_info_duration", true);
|
||||
|
||||
export default function () {
|
||||
// Step 1 — directory.
|
||||
let res = http.get(directoryURL);
|
||||
directoryDuration.add(res.timings.duration);
|
||||
check(res, { "directory 200": (r) => r.status === 200 });
|
||||
|
||||
if (res.status !== 200) return;
|
||||
const dir = res.json();
|
||||
|
||||
// Step 2 — new-nonce.
|
||||
if (dir.newNonce) {
|
||||
res = http.head(dir.newNonce);
|
||||
newNonceDuration.add(res.timings.duration);
|
||||
check(res, {
|
||||
"new-nonce 200 + Replay-Nonce": (r) =>
|
||||
r.status === 200 && !!r.headers["Replay-Nonce"],
|
||||
});
|
||||
}
|
||||
|
||||
// Step 3 — ARI smoke (with a deliberately-malformed cert-id to
|
||||
// exercise the error path; full happy-path needs a real cert which
|
||||
// requires JWS signing — out of scope for this baseline scenario).
|
||||
if (dir.renewalInfo) {
|
||||
res = http.get(dir.renewalInfo + "/" + "aaaa.bbbb");
|
||||
renewalInfoDuration.add(res.timings.duration);
|
||||
// 400 (malformed cert-id, expected) OR 404 (cert not found).
|
||||
check(res, {
|
||||
"renewal-info 4xx for synthetic cert-id": (r) =>
|
||||
r.status === 400 || r.status === 404,
|
||||
});
|
||||
}
|
||||
|
||||
sleep(1);
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
# Placeholder so `results/` exists in a fresh checkout. The k6
|
||||
# container mounts this directory and writes summary.{json,txt} into
|
||||
# it on every run; both outputs are gitignored.
|
||||
@@ -0,0 +1,172 @@
|
||||
# Caddy Integration Walkthrough
|
||||
|
||||
End-to-end recipe for issuing certs from a certctl-server deployment
|
||||
through Caddy 2.7+. Target audience: operator running Caddy on a VM
|
||||
or container who wants Caddy to ACME-issue from certctl instead of
|
||||
Let's Encrypt.
|
||||
|
||||
## Prereqs
|
||||
|
||||
- A reachable certctl-server with `CERTCTL_ACME_SERVER_ENABLED=true`
|
||||
and at least one profile whose `acme_auth_mode` is set. Profile
|
||||
setup is identical to the cert-manager walkthrough — see
|
||||
[`docs/acme-cert-manager-walkthrough.md`](./acme-cert-manager-walkthrough.md)
|
||||
Step 2.
|
||||
- Caddy 2.7.x or later. `caddy version` should show 2.7.0+.
|
||||
- Network reachability: Caddy → certctl-server's HTTPS listener (port
|
||||
8443 by default).
|
||||
- The certctl bootstrap CA, in PEM form, captured for the trust
|
||||
configuration below. Capture exactly the same way as the cert-manager
|
||||
walkthrough Step 3 — use `cat deploy/test/certs/ca.crt`.
|
||||
|
||||
## Step 1 — Configure Caddy
|
||||
|
||||
Caddy's ACME issuer is configured per-site (or globally) via the
|
||||
`acme_ca` directive in a Caddyfile, or via the `tls.acme_ca` field
|
||||
in JSON config. The directive points at the directory URL:
|
||||
|
||||
```
|
||||
{
|
||||
email ops@example.com
|
||||
}
|
||||
|
||||
example.com {
|
||||
tls {
|
||||
acme_ca https://certctl.example.com:8443/acme/profile/prof-test/directory
|
||||
issuer acme
|
||||
}
|
||||
reverse_proxy localhost:8080
|
||||
}
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- `acme_ca` must point at the directory URL (ending in `/directory`),
|
||||
not just the base. Caddy uses the directory document to discover
|
||||
the new-account / new-order URLs, exactly the same way cert-manager
|
||||
does.
|
||||
- `issuer acme` is the default; included here for clarity. Caddy can
|
||||
also be configured with `issuer zerossl` or `issuer internal`; for
|
||||
certctl integration, `acme` is the correct issuer.
|
||||
- Caddy auto-discovers `tls-alpn-01` first when port 443 is bound to
|
||||
Caddy, then falls back to HTTP-01. For `trust_authenticated` mode
|
||||
profiles, both work without solver round-trips.
|
||||
|
||||
## Step 2 — Trust the certctl bootstrap CA
|
||||
|
||||
Caddy validates the certctl-server's TLS chain before any ACME call,
|
||||
the same way cert-manager does. Two options for trust:
|
||||
|
||||
### Option A — OS trust store (preferred for VMs)
|
||||
|
||||
```
|
||||
sudo cp deploy/test/certs/ca.crt /usr/local/share/ca-certificates/certctl-bootstrap.crt
|
||||
sudo update-ca-certificates
|
||||
sudo systemctl restart caddy
|
||||
```
|
||||
|
||||
Caddy honors the system trust store via the Go runtime's
|
||||
`crypto/x509` defaults. After `update-ca-certificates`, Caddy's HTTPS
|
||||
client trusts certctl's self-signed root and the directory call
|
||||
succeeds.
|
||||
|
||||
### Option B — Caddy `tls.cas` (for containerized deployments)
|
||||
|
||||
```
|
||||
{
|
||||
pki {
|
||||
ca certctl_bootstrap {
|
||||
root_cert_file /etc/caddy/certctl-bootstrap.crt
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
example.com {
|
||||
tls {
|
||||
acme_ca https://certctl.example.com:8443/acme/profile/prof-test/directory
|
||||
ca certctl_bootstrap
|
||||
issuer acme
|
||||
}
|
||||
reverse_proxy localhost:8080
|
||||
}
|
||||
```
|
||||
|
||||
The `pki.ca` block registers a named CA Caddy can reference; the
|
||||
`tls.ca certctl_bootstrap` line in the site block scopes that trust
|
||||
to ACME calls for this site only. This is the right pattern for
|
||||
multi-tenant Caddy deployments where some sites trust certctl + others
|
||||
don't.
|
||||
|
||||
## Step 3 — Reload Caddy
|
||||
|
||||
```
|
||||
caddy validate --config /etc/caddy/Caddyfile
|
||||
sudo systemctl reload caddy
|
||||
```
|
||||
|
||||
Caddy reloads atomically; in-flight requests complete on the old
|
||||
config while new requests use the new ACME issuer. On the next
|
||||
`example.com` request, Caddy hits certctl's directory URL, registers
|
||||
an account, submits a new-order, and finalizes — typically completing
|
||||
in under 5 seconds for `trust_authenticated` mode.
|
||||
|
||||
## Step 4 — Verify
|
||||
|
||||
```
|
||||
caddy list-certificates
|
||||
# example.com (issuer=certctl.example.com): CN=example.com, valid until 2026-06-30
|
||||
```
|
||||
|
||||
The cert is in Caddy's certificate cache (`$XDG_DATA_HOME/caddy/certificates/`
|
||||
by default). Inspect:
|
||||
|
||||
```
|
||||
openssl x509 -in ~/.local/share/caddy/certificates/acme-v02.api.letsencrypt.org-directory/example.com/example.com.crt -noout -subject -issuer -dates
|
||||
# subject= CN=example.com
|
||||
# issuer= CN=certctl test internal CA
|
||||
```
|
||||
|
||||
(Path layout is Caddy-version-dependent; check `caddy environ` for the
|
||||
canonical data dir.)
|
||||
|
||||
On the certctl side, the operator's audit log captures the issuance
|
||||
event:
|
||||
|
||||
```
|
||||
psql -c "SELECT actor, action, resource_id FROM audit_events
|
||||
WHERE actor LIKE 'acme:%' ORDER BY created_at DESC LIMIT 5;"
|
||||
```
|
||||
|
||||
## Common failure modes
|
||||
|
||||
- **Caddy logs `tls: failed to verify certificate: x509: certificate
|
||||
signed by unknown authority`** → certctl bootstrap CA is not in
|
||||
Caddy's trust path. Re-do Step 2; verify with `curl --cacert
|
||||
/etc/caddy/certctl-bootstrap.crt https://certctl.example.com:8443/acme/profile/prof-test/directory`.
|
||||
- **Caddy logs `urn:ietf:params:acme:error:rateLimited`** → certctl
|
||||
per-account orders/hour limit hit (default 100/hr). Tune via
|
||||
`CERTCTL_ACME_SERVER_RATE_LIMIT_ORDERS_PER_HOUR` if you have
|
||||
legitimately high throughput.
|
||||
- **Caddy logs `urn:ietf:params:acme:error:rejectedIdentifier`** →
|
||||
the SAN list includes an identifier the certctl profile policy
|
||||
rejects. Cross-reference [`docs/acme-server.md` § Troubleshooting](./acme-server.md#certificate-readyfalse-with-rejectedidentifier).
|
||||
- **`badNonce` in Caddy logs** → clock skew or multi-replica certctl
|
||||
without sticky sessions; same fix as the cert-manager walkthrough.
|
||||
|
||||
## Cleanup
|
||||
|
||||
```
|
||||
caddy stop
|
||||
# remove the certctl-specific block from your Caddyfile
|
||||
sudo systemctl reload caddy
|
||||
# Optional: delete cached certs from the certctl directory namespace.
|
||||
rm -rf ~/.local/share/caddy/certificates/certctl.example.com-*
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [`docs/acme-server.md`](./acme-server.md) — canonical reference.
|
||||
- [`docs/acme-cert-manager-walkthrough.md`](./acme-cert-manager-walkthrough.md) —
|
||||
K8s-native equivalent.
|
||||
- [Caddy upstream ACME docs](https://caddyserver.com/docs/automatic-https#acme-issuer)
|
||||
— verify behavior pinned here against Caddy 2.7.x semantics.
|
||||
@@ -0,0 +1,254 @@
|
||||
# cert-manager Integration Walkthrough
|
||||
|
||||
End-to-end recipe for issuing certs from a certctl-server deployment
|
||||
through cert-manager 1.15+. Target audience: Kubernetes operator who
|
||||
has never deployed certctl before and wants a working
|
||||
`Certificate` → `Secret` flow on their cluster in under 30 minutes.
|
||||
|
||||
The Phase 5 integration test (`make acme-cert-manager-test`) automates
|
||||
exactly the recipe below. The YAML snippets in this doc are byte-equal
|
||||
to the files under `deploy/test/acme-integration/` — re-running the
|
||||
test from a fresh clone produces the same results documented here.
|
||||
|
||||
## Prereqs
|
||||
|
||||
- A Kubernetes cluster (kind / k3d / EKS / GKE / AKS / on-prem). For
|
||||
local trial, `kind v0.20+` works exactly the way the Phase 5 test
|
||||
uses it. The kind config lives at
|
||||
[`deploy/test/acme-integration/kind-config.yaml`](../deploy/test/acme-integration/kind-config.yaml).
|
||||
- `kubectl` v1.27+, `helm` v3.13+.
|
||||
- `cert-manager` v1.15.0 installed in the `cert-manager` namespace.
|
||||
If absent, run:
|
||||
|
||||
```
|
||||
bash deploy/test/acme-integration/cert-manager-install.sh
|
||||
```
|
||||
|
||||
which is the same idempotent installer the integration test uses.
|
||||
- A certctl Helm chart published to a registry your cluster can pull
|
||||
from. The Phase 5 test uses an `image.tag=test` placeholder; production
|
||||
deployments use the actual image tag for your release line.
|
||||
|
||||
## Step 1 — Deploy certctl-server
|
||||
|
||||
```
|
||||
helm install certctl-test deploy/helm/certctl/ \
|
||||
--set acmeServer.enabled=true \
|
||||
--set acmeServer.defaultProfileId=prof-test \
|
||||
--set image.tag=test
|
||||
kubectl wait --for=condition=Available --timeout=3m deployment/certctl-test
|
||||
```
|
||||
|
||||
`acmeServer.enabled=true` flips the `CERTCTL_ACME_SERVER_ENABLED`
|
||||
env var which gates the ACME route registration.
|
||||
`acmeServer.defaultProfileId` sets `CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID`
|
||||
so the `/acme/*` shorthand path mirrors the per-profile path family.
|
||||
|
||||
## Step 2 — Create the certctl profile
|
||||
|
||||
The ACME server requires a `certificate_profiles` row to bind issuance
|
||||
to. Create one via the certctl API or GUI; for the simplest case set
|
||||
`acme_auth_mode='trust_authenticated'`:
|
||||
|
||||
```
|
||||
curl -X POST https://certctl-test.default.svc.cluster.local:8443/api/profiles \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H "Authorization: Bearer $CERTCTL_API_KEY" \
|
||||
-d '{
|
||||
"id": "prof-test",
|
||||
"name": "ACME test profile",
|
||||
"issuer_id": "iss-internal-ca",
|
||||
"max_ttl_seconds": 7776000,
|
||||
"acme_auth_mode": "trust_authenticated"
|
||||
}'
|
||||
```
|
||||
|
||||
Auth-mode tradeoffs are covered in
|
||||
[`docs/acme-server.md` § Auth-mode decision tree](./acme-server.md#auth-mode-decision-tree).
|
||||
For first-time deployments, `trust_authenticated` is the right default.
|
||||
|
||||
## Step 3 — Capture the certctl bootstrap CA
|
||||
|
||||
cert-manager validates the certctl-server's TLS chain before sending
|
||||
any account / order / finalize JWS. With certctl's self-signed
|
||||
bootstrap cert (the demo default at `deploy/test/certs/server.crt`),
|
||||
cert-manager rejects the directory URL with
|
||||
`x509: certificate signed by unknown authority` unless you feed the
|
||||
bootstrap CA in.
|
||||
|
||||
```
|
||||
cat deploy/test/certs/ca.crt | base64 -w0
|
||||
```
|
||||
|
||||
Capture the output for Step 4. This is **the** single biggest first-
|
||||
time-deploy footgun on the cert-manager integration path. The reference
|
||||
recipe lives in
|
||||
[`docs/acme-server.md` § TLS trust bootstrap](./acme-server.md#tls-trust-bootstrap-read-this-before-configuring-cert-manager).
|
||||
|
||||
## Step 4 — Apply the ClusterIssuer
|
||||
|
||||
```yaml
|
||||
# Phase 5 — sample ClusterIssuer for the certctl trust_authenticated
|
||||
# auth mode (RFC 8555 §6 + certctl auth_mode=trust_authenticated, where
|
||||
# the JWS-authenticated ACME account is trusted to issue any identifier
|
||||
# the profile policy permits — no per-identifier ownership challenges).
|
||||
#
|
||||
# Use this as the starting template for any internal-PKI rollout.
|
||||
# Replace the caBundle placeholder with the base64-encoded PEM of the
|
||||
# certctl-server's self-signed bootstrap root, then `kubectl apply`.
|
||||
#
|
||||
# Generate the caBundle via:
|
||||
# cat deploy/test/certs/ca.crt | base64 -w0
|
||||
# (See certctl/docs/acme-server.md "TLS trust bootstrap" section for the
|
||||
# end-to-end walkthrough — this is the single biggest first-time-deploy
|
||||
# footgun on cert-manager, captured as audit fix #9.)
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: certctl-test-trust
|
||||
spec:
|
||||
acme:
|
||||
email: test@example.com
|
||||
# Replace 'certctl-test' with your release name + adjust the
|
||||
# profile path segment. Default profile path:
|
||||
# https://<service>.<namespace>.svc.cluster.local:8443/acme/profile/<profile-id>/directory
|
||||
server: https://certctl-test.default.svc.cluster.local:8443/acme/profile/prof-test/directory
|
||||
# caBundle: Audit fix #9. cert-manager validates the ACME server's
|
||||
# TLS chain before submitting any account/order/finalize. With a
|
||||
# self-signed bootstrap root, the ClusterIssuer MUST carry the root
|
||||
# explicitly via this field.
|
||||
caBundle: |
|
||||
LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCi4uLgotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==
|
||||
privateKeySecretRef:
|
||||
name: certctl-test-trust-account-key
|
||||
solvers:
|
||||
# In trust_authenticated mode the solver is unused at the
|
||||
# validation step but cert-manager still requires at least one
|
||||
# solver in the spec. http01-via-ingress-nginx is the cheapest
|
||||
# placeholder shape that round-trips correctly through cert-
|
||||
# manager's validation webhooks.
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
```
|
||||
|
||||
This block is byte-equal to
|
||||
[`deploy/test/acme-integration/clusterissuer-trust-authenticated.yaml`](../deploy/test/acme-integration/clusterissuer-trust-authenticated.yaml).
|
||||
Replace the `caBundle` placeholder with the base64 string from Step 3.
|
||||
The full reference YAML lives at
|
||||
[`deploy/test/acme-integration/clusterissuer-trust-authenticated.yaml`](../deploy/test/acme-integration/clusterissuer-trust-authenticated.yaml).
|
||||
|
||||
```
|
||||
kubectl apply -f deploy/test/acme-integration/clusterissuer-trust-authenticated.yaml
|
||||
kubectl wait --for=condition=Ready --timeout=2m clusterissuer/certctl-test-trust
|
||||
```
|
||||
|
||||
The solver block is a placeholder under `trust_authenticated` mode —
|
||||
cert-manager 1.15 still requires at least one solver in the spec, but
|
||||
certctl auto-resolves authzs without a solver round-trip. The
|
||||
http01-ingress-nginx shape validates against cert-manager's webhook
|
||||
without needing an actual ingress controller deployed.
|
||||
|
||||
For `challenge` mode profiles, swap to
|
||||
[`deploy/test/acme-integration/clusterissuer-challenge.yaml`](../deploy/test/acme-integration/clusterissuer-challenge.yaml)
|
||||
— same shape, but the solver is now load-bearing and you need
|
||||
ingress-nginx (or your chosen ingress class) actually deployed for
|
||||
HTTP-01 to work.
|
||||
|
||||
## Step 5 — Apply the Certificate
|
||||
|
||||
```yaml
|
||||
# Phase 5 — Certificate resource the integration test applies and
|
||||
# waits for. The certctl-test-trust ClusterIssuer (trust_authenticated
|
||||
# mode) issues the cert without any solver round-trip; the resulting
|
||||
# Secret 'test-com-tls' is asserted to carry tls.crt + tls.key.
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: test-com
|
||||
namespace: default
|
||||
spec:
|
||||
secretName: test-com-tls
|
||||
commonName: test.example.com
|
||||
dnsNames:
|
||||
- test.example.com
|
||||
- www.test.example.com
|
||||
issuerRef:
|
||||
name: certctl-test-trust
|
||||
kind: ClusterIssuer
|
||||
duration: 720h # 30d
|
||||
renewBefore: 240h # 10d
|
||||
```
|
||||
|
||||
This block is byte-equal to
|
||||
[`deploy/test/acme-integration/certificate-test.yaml`](../deploy/test/acme-integration/certificate-test.yaml).
|
||||
|
||||
```
|
||||
kubectl apply -f deploy/test/acme-integration/certificate-test.yaml
|
||||
kubectl wait --for=condition=Ready --timeout=3m certificate/test-com
|
||||
```
|
||||
|
||||
cert-manager creates an `Order`, the ACME flow runs against certctl,
|
||||
and the resulting Secret is populated.
|
||||
|
||||
## Step 6 — Verify
|
||||
|
||||
```
|
||||
kubectl get certificate test-com -o wide
|
||||
# NAME READY SECRET ISSUER STATUS AGE
|
||||
# test-com True test-com-tls certctl-test-trust Certificate is up to date and has not expired 42s
|
||||
|
||||
kubectl get secret test-com-tls -o yaml | yq '.data."tls.crt"' | base64 -d | openssl x509 -noout -subject -issuer -dates
|
||||
# subject= CN=test.example.com
|
||||
# issuer= CN=certctl test internal CA
|
||||
# notBefore=... notAfter=...
|
||||
```
|
||||
|
||||
Both the cert-manager `Certificate` resource and the underlying Secret
|
||||
are populated. The actor on the certctl side is `acme:<account-id>`,
|
||||
which you can correlate via the `audit_events` table:
|
||||
|
||||
```
|
||||
psql -c "SELECT created_at, action, resource_type, resource_id
|
||||
FROM audit_events
|
||||
WHERE actor LIKE 'acme:%'
|
||||
ORDER BY created_at DESC LIMIT 10;"
|
||||
```
|
||||
|
||||
## Common failure modes
|
||||
|
||||
These are operator-side; full troubleshooting reference is in
|
||||
[`docs/acme-server.md` § Troubleshooting](./acme-server.md#troubleshooting).
|
||||
|
||||
- `400 Bad Request: badNonce` → clock skew between certctl-server and
|
||||
cert-manager, or a multi-replica certctl fleet without sticky
|
||||
sessions.
|
||||
- `x509: certificate signed by unknown authority` → missing or stale
|
||||
`caBundle`. Re-run Step 3, paste the fresh value.
|
||||
- `connection refused` from the HTTP-01 validator → ingress controller
|
||||
not deployed, OR your network blocks port 80 inbound to the solver
|
||||
Ingress.
|
||||
- `Ready=False` with `rejectedIdentifier` → CSR has a SAN your profile
|
||||
policy doesn't permit. Decode the `subproblems` array of the RFC
|
||||
7807 problem doc.
|
||||
|
||||
## Cleanup
|
||||
|
||||
```
|
||||
kubectl delete -f deploy/test/acme-integration/certificate-test.yaml
|
||||
kubectl delete -f deploy/test/acme-integration/clusterissuer-trust-authenticated.yaml
|
||||
helm uninstall certctl-test
|
||||
# Optional: delete the certctl profile via API.
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [`docs/acme-server.md`](./acme-server.md) — canonical reference.
|
||||
- [`docs/acme-server-threat-model.md`](./acme-server-threat-model.md) —
|
||||
security posture.
|
||||
- [`docs/acme-caddy-walkthrough.md`](./acme-caddy-walkthrough.md) —
|
||||
Caddy-side recipe.
|
||||
- [`docs/acme-traefik-walkthrough.md`](./acme-traefik-walkthrough.md) —
|
||||
Traefik-side recipe.
|
||||
- [`deploy/test/acme-integration/`](../deploy/test/acme-integration/) —
|
||||
Phase 5 integration test (the same recipe, automated).
|
||||
@@ -0,0 +1,278 @@
|
||||
# ACME Server — Threat Model
|
||||
|
||||
Security posture for the certctl ACME server endpoint
|
||||
(`/acme/profile/<id>/*`). Read this before opening a PR that changes
|
||||
the JWS verifier, the challenge validators, the rate limiter, or the
|
||||
GC sweeper.
|
||||
|
||||
The threat model lives in this dedicated doc (rather than `docs/acme-server.md`)
|
||||
because security-review reviewers want a single concentrated reference.
|
||||
Production deployments under audit should treat this doc as the
|
||||
canonical answer to "how does certctl resist X?"
|
||||
|
||||
## Threat surface map
|
||||
|
||||
The ACME server has four ingress surfaces:
|
||||
|
||||
1. **JWS-authenticated POST endpoints** — new-account, new-order,
|
||||
finalize, key-change, revoke-cert, account update, order POST-as-GET.
|
||||
Authenticated by an ECDSA / RSA / EdDSA signature over the request.
|
||||
2. **Unauthenticated GET endpoints** — directory, new-nonce, ARI
|
||||
(renewal-info). Read-only; no authn.
|
||||
3. **Outbound challenge validators** — HTTP-01, DNS-01, TLS-ALPN-01.
|
||||
The certctl-server initiates outbound calls to operator-provided
|
||||
identifiers (the SAN list of the requested cert).
|
||||
4. **Scheduler-driven GC sweeper** — internal-only; no inbound surface.
|
||||
|
||||
Threat actors:
|
||||
|
||||
- **External Internet attacker** — no certctl credentials; can hit
|
||||
unauthenticated endpoints + observe TLS metadata.
|
||||
- **Authenticated ACME account holder (low-trust)** — has a valid
|
||||
account on a profile but should be bounded by profile policy +
|
||||
rate limits.
|
||||
- **On-path attacker** between certctl-server and a challenge target
|
||||
(HTTP-01 / DNS-01 / TLS-ALPN-01).
|
||||
- **Compromised cert holder** — has the private key of a previously-
|
||||
issued cert and wants to revoke/exfiltrate.
|
||||
- **Malicious operator with profile-write access** — can change a
|
||||
profile's `acme_auth_mode` or policy, but is the trusted boundary
|
||||
per certctl's threat model. Out of scope here; covered by certctl's
|
||||
RBAC + audit log.
|
||||
|
||||
## JWS forgery resistance
|
||||
|
||||
The verifier (`internal/api/acme/jws.go`) accepts only the closed
|
||||
allow-list `{RS256, ES256, EdDSA}`. The allow-list is passed to
|
||||
`jose.ParseSigned` so go-jose rejects every other algorithm at parse
|
||||
time, before any signature work.
|
||||
|
||||
Specific attacks blocked:
|
||||
|
||||
- **Algorithm confusion (`alg: none`)** — RFC 7515 §6.1's classic
|
||||
unauthenticated-fallback. Not in allow-list; rejected at parse.
|
||||
- **HS256 substitution (alg-confusion via symmetric)** — symmetric
|
||||
algs aren't in the allow-list; rejected at parse.
|
||||
- **Replayed nonce** — every JWS carries a nonce consumed via
|
||||
`acme_nonces.UPDATE … WHERE used = FALSE` (a single statement;
|
||||
Postgres row-locking serializes the writes). A second consume of
|
||||
the same nonce sees `RowsAffected=0` and the verifier returns
|
||||
`badNonce`.
|
||||
- **URL spoofing** — the protected-header `url` field MUST match the
|
||||
request URL exactly (RFC 8555 §6.4); a JWS signed for one URL
|
||||
cannot be replayed against another.
|
||||
- **Multi-signature JWS** — RFC 8555 §6.2 forbids; the verifier
|
||||
rejects `len(jws.Signatures) != 1` explicitly.
|
||||
- **kid-vs-jwk confusion** — exactly one MUST be present per RFC 8555
|
||||
§6.2; both-present and neither-present are rejected.
|
||||
- **kid round-trip mismatch** — the verifier's `AccountKID` closure
|
||||
computes the canonical kid URL for the resolved account-id and
|
||||
compares to the inbound `kid`; cross-profile replay is rejected
|
||||
because the canonical URL differs.
|
||||
|
||||
The doubly-signed key-rollover JWS (RFC 8555 §7.3.5, Phase 4) gets
|
||||
its own dedicated verifier in `internal/api/acme/keychange.go`.
|
||||
Inner-only invariants enforced: MUST use `jwk` not `kid`, payload
|
||||
`account` MUST equal outer `kid`, payload `oldKey` MUST canonicalize-
|
||||
equal the registered key (RFC 7638 thumbprint, constant-time
|
||||
compare), inner `url` MUST equal outer `url`.
|
||||
|
||||
## Nonce store integrity
|
||||
|
||||
Nonces are persisted in PostgreSQL (`acme_nonces` table; migration
|
||||
000025) with a TTL set by `CERTCTL_ACME_SERVER_NONCE_TTL` (default
|
||||
5 min). The Phase 5 GC sweeper deletes used / expired rows every 1
|
||||
minute by default.
|
||||
|
||||
Why DB-backed and not in-memory:
|
||||
|
||||
- **Survives restart** — a multi-replica certctl-server fleet behind
|
||||
a load balancer can issue a nonce on replica A and consume it on
|
||||
replica B. In-memory state would force sticky sessions globally,
|
||||
which the operator can't guarantee in all topologies.
|
||||
- **Atomic consume** — a single `UPDATE ... WHERE used = FALSE`
|
||||
statement is the consume primitive; Postgres row-locking guarantees
|
||||
exactly one of two concurrent consumes wins.
|
||||
- **Expiry-bounded** — even if the GC sweeper were disabled, the
|
||||
nonce TTL is enforced at consume time
|
||||
(`AND expires_at > NOW()` in the UPDATE).
|
||||
|
||||
A nonce-store-side compromise would let an attacker forge nonces.
|
||||
Mitigation: the nonce table is in the same Postgres instance certctl
|
||||
already trusts; a DB compromise is broader than ACME-specific.
|
||||
|
||||
## HTTP-01 SSRF resistance
|
||||
|
||||
The HTTP-01 validator (Phase 3, `internal/api/acme/validators.go`)
|
||||
fetches `http://<identifier>/.well-known/acme-challenge/<token>`
|
||||
where the identifier is operator/client-controlled. Without
|
||||
mitigation, this is a textbook SSRF surface — internal services on
|
||||
RFC1918 / link-local / cloud-metadata addresses would be reachable.
|
||||
|
||||
Mitigations (defense in depth):
|
||||
|
||||
1. **Pre-dial check** — `validation.ValidateSafeURL` rejects URLs
|
||||
whose host parses as a literal reserved IP. Cheap early bail.
|
||||
2. **Per-dial check** — `validation.SafeHTTPDialContext` is installed
|
||||
on the `http.Transport`. Every dial re-resolves DNS, rejects
|
||||
reserved IPs, and **pins the resolved IP** (`net.JoinHostPort(ips[0],
|
||||
port)`) so a racing DNS rebinding cannot substitute a different IP
|
||||
between resolve and connect.
|
||||
3. **Per-redirect check** — Go's HTTP client re-dials on 3xx; the
|
||||
`DialContext` runs again, applying the same SSRF guards.
|
||||
4. **Body cap** — the validator's `io.LimitReader` caps response
|
||||
bodies at 16 KiB. A misbehaving target cannot DoS the validator
|
||||
pool with a multi-GB response.
|
||||
5. **Bounded redirects** — the validator caps redirects at 10 (Go
|
||||
default). A redirect-loop target is bounded.
|
||||
|
||||
Reserved IP set: loopback (127.0.0.0/8 + ::1), link-local
|
||||
(169.254.0.0/16 + fe80::/10), all RFC1918 (10/8, 172.16/12, 192.168/16),
|
||||
cloud-metadata literals (169.254.169.254 explicitly), broadcast,
|
||||
multicast, IPv4-mapped-IPv6 to a reserved IPv4. See
|
||||
`internal/validation/ssrf.go::isReservedIPForDial` for the full set.
|
||||
|
||||
CodeQL alert #23 flags `client.Do(req)` in the SCEP-probe call site
|
||||
as `go/request-forgery` despite the dial-time guard; the analyzer
|
||||
can't trace through a custom `Transport.DialContext`. Operator-
|
||||
acknowledged false positive (CLAUDE.md task #10) — see the SCEP
|
||||
probe's same-shaped defense for the audit trail.
|
||||
|
||||
## DNS-01 cache poisoning posture
|
||||
|
||||
The DNS-01 validator queries
|
||||
`_acme-challenge.<domain>` against a single resolver configured by
|
||||
`CERTCTL_ACME_SERVER_DNS01_RESOLVER` (default `8.8.8.8:53`).
|
||||
|
||||
Threat: an operator running a private resolver (typical in air-gapped
|
||||
deployments) inherits that resolver's cache-poisoning posture. A
|
||||
poisoned resolver could attest a TXT record the legitimate domain
|
||||
owner never published, allowing an attacker who controls the
|
||||
resolver to forge ACME challenges.
|
||||
|
||||
Mitigation:
|
||||
|
||||
- Default `8.8.8.8:53` is Google Public DNS — DNSSEC-validating,
|
||||
operationally hardened, well-monitored.
|
||||
- Operators choosing a private resolver own the cache-poisoning
|
||||
posture. The doc explicitly flags this in
|
||||
`docs/acme-server.md` § Configuration.
|
||||
- DNSSEC-validation is **not** enforced by the validator itself —
|
||||
the validator trusts the resolver's answer. Operators wanting
|
||||
strict DNSSEC validation should use a DNSSEC-validating resolver
|
||||
(e.g. `1.1.1.1` or a self-hosted Unbound).
|
||||
|
||||
## TLS-ALPN-01 challenge interception
|
||||
|
||||
RFC 8737 §3 explicitly says the validator MUST NOT verify the
|
||||
challenge target's certificate chain — the proof lives in the
|
||||
embedded `id-pe-acmeIdentifier` extension (OID 1.3.6.1.5.5.7.1.31)
|
||||
of the cert presented during the TLS handshake, not in the chain
|
||||
itself.
|
||||
|
||||
Implementation: `internal/api/acme/validators.go::TLSALPN01Validator`
|
||||
sets `tls.Config.InsecureSkipVerify = true` with a dedicated
|
||||
`//nolint:gosec` annotation citing RFC 8737 §3 and the L-001
|
||||
documentation row in `docs/tls.md`.
|
||||
|
||||
What this means for on-path attackers:
|
||||
|
||||
- An on-path attacker between certctl-server and the challenge target
|
||||
CAN intercept the TLS handshake and present a forged cert. The
|
||||
proof is the embedded extension byte-equality, which the attacker
|
||||
cannot generate without the account key — so interception alone
|
||||
doesn't grant cert issuance.
|
||||
- An attacker who has the account key already controls the account
|
||||
per RFC 8555; the TLS-ALPN-01 validator's interception window adds
|
||||
no incremental capability.
|
||||
|
||||
The integrity property TLS-ALPN-01 actually provides: the challenge
|
||||
target proves possession of the account-key-derived key authorization
|
||||
on a TLS connection bound to the requested identifier (port 443 of
|
||||
the SAN). Operators wanting CA/Browser-Forum-style WebPKI strictness
|
||||
should run a dedicated public-trust CA, not certctl.
|
||||
|
||||
## Rate-limit tuning
|
||||
|
||||
Phase 5 in-memory token buckets with per-(action, key) isolation.
|
||||
Defaults:
|
||||
|
||||
- `RATE_LIMIT_ORDERS_PER_HOUR=100` per account.
|
||||
- `RATE_LIMIT_CONCURRENT_ORDERS=5` per account (pending/ready/processing).
|
||||
- `RATE_LIMIT_KEY_CHANGE_PER_HOUR=5` per account.
|
||||
- `RATE_LIMIT_CHALLENGE_RESPONDS_PER_HOUR=60` per challenge-id.
|
||||
|
||||
Tuning:
|
||||
|
||||
- **Too loose** → enables abuse vectors. A compromised account could
|
||||
burn DB-row throughput; a runaway client could fill the validator
|
||||
pool.
|
||||
- **Too tight** → legitimate flake-out. cert-manager's exponential
|
||||
backoff after a `rateLimited` problem is conservative; a 1-hour
|
||||
cooldown is a long time for an operator hitting an unexpected limit.
|
||||
|
||||
Defaults are intentionally conservative on the loose-side — 100/hour
|
||||
is generous for any plausible per-account fleet (a 50k-cert
|
||||
deployment renewing at the 1/3-validity mark consumes ~12
|
||||
orders/year/cert ≈ 600k orders/year ≈ 70 orders/hour even spread
|
||||
evenly across accounts). Tighter limits are appropriate for
|
||||
deployments with many low-trust accounts.
|
||||
|
||||
The buckets are in-memory + per-replica. A 3-replica certctl-server
|
||||
fleet effectively has 3× the configured per-account throughput
|
||||
because each replica's bucket fills independently. For deployments
|
||||
where this matters operationally, the right answer is a shared rate-
|
||||
limit store (Redis / Postgres-backed); not blocking for current
|
||||
threat model where same-account requests typically pin to the same
|
||||
replica via session affinity.
|
||||
|
||||
## Audit trail
|
||||
|
||||
Every ACME state mutation writes a row to `audit_events`. Actor strings
|
||||
distinguish the auth path:
|
||||
|
||||
- `acme:<account-id>` — kid-path requests (the requesting account
|
||||
signed the JWS).
|
||||
- `acme-cert-key:<serial>` — jwk-path revoke (the cert's own private
|
||||
key signed the JWS).
|
||||
- `acme-system:gc` — scheduler-driven sweeps (no client request).
|
||||
|
||||
Operators querying by actor prefix can reconstruct the full history
|
||||
of any ACME-issued cert. See
|
||||
`docs/acme-server.md` § FAQ "What audit-log events fire" for the
|
||||
event-name catalog.
|
||||
|
||||
## Out-of-scope threats
|
||||
|
||||
Documented to set scope expectations for security reviewers:
|
||||
|
||||
- **DDoS at the TLS layer** — the certctl-server's TLS listener +
|
||||
upstream load balancer / WAF handle this. The ACME-specific rate
|
||||
limits don't substitute for upstream DDoS protection.
|
||||
- **cert-manager-side compromise** — if cert-manager is compromised,
|
||||
it has both the account key and the private keys of every issued
|
||||
cert. Out of certctl's trust boundary; operators run cert-manager
|
||||
with the same care they'd run any other secret-bearing operator.
|
||||
- **Compromised certctl-server filesystem** — the bootstrap CA key
|
||||
lives at `deploy/test/certs/ca.key` (or the operator-managed
|
||||
equivalent). A filesystem compromise is broader than ACME-specific
|
||||
and is covered by certctl's HSM / signer-driver architecture (see
|
||||
`docs/architecture.md` "Signer abstraction").
|
||||
- **Postgres compromise** — the nonce table, account JWKs, and
|
||||
audit log all live in the same Postgres instance. A DB compromise
|
||||
is broader than ACME-specific and is the operator's responsibility
|
||||
to mitigate via standard DB-hardening practices.
|
||||
- **Supply-chain attacks against go-jose / lib/pq** — handled by
|
||||
Dependabot + the `make verify` security gate; not ACME-specific.
|
||||
|
||||
## See also
|
||||
|
||||
- [`docs/acme-server.md`](./acme-server.md) — operator-facing reference.
|
||||
- [`docs/tls.md`](./tls.md) — TLS posture, including the L-001
|
||||
table of `InsecureSkipVerify` justifications (TLS-ALPN-01 row).
|
||||
- [`internal/api/acme/jws.go`](../internal/api/acme/jws.go) — verifier
|
||||
source.
|
||||
- [`internal/api/acme/validators.go`](../internal/api/acme/validators.go)
|
||||
— challenge validator pool.
|
||||
- [`internal/validation/ssrf.go`](../internal/validation/ssrf.go) —
|
||||
SSRF-defense primitives.
|
||||
@@ -0,0 +1,646 @@
|
||||
# certctl ACME Server (Built-in)
|
||||
|
||||
certctl ships an RFC 8555 + RFC 9773 ARI ACME server endpoint at
|
||||
`/acme/profile/<profile-id>/*`. Any RFC 8555 client (cert-manager 1.15+,
|
||||
Caddy, Traefik, win-acme, certbot, Posh-ACME) can integrate with certctl
|
||||
as an ACME issuer with no certctl-side modification — closing the
|
||||
"deploy a certctl agent on every K8s node" friction that costs deals to
|
||||
external PKI vendors today.
|
||||
|
||||
> **Phase status (2026-05-03):** Phase 6 — full operator-facing
|
||||
> reference. The functional surface is complete (Phases 1a-5); this
|
||||
> doc is the canonical procurement-readability reference. New: client-
|
||||
> walkthrough docs for [cert-manager](./acme-cert-manager-walkthrough.md),
|
||||
> [Caddy](./acme-caddy-walkthrough.md), and
|
||||
> [Traefik](./acme-traefik-walkthrough.md); a dedicated
|
||||
> [threat model](./acme-server-threat-model.md); a section-by-section
|
||||
> RFC 8555 + RFC 9773 conformance statement; a 5-failure-mode
|
||||
> troubleshooting playbook; a tested-clients version pinning table.
|
||||
> Track shipped phases via `git log --grep='acme-server:'`.
|
||||
|
||||
## Configuration
|
||||
|
||||
All ACME-server config uses the `CERTCTL_ACME_SERVER_*` env-var prefix
|
||||
(distinct from `CERTCTL_ACME_*` which configures the consumer-side
|
||||
issuer connector). The struct definition lives in
|
||||
`internal/config/config.go::ACMEServerConfig`.
|
||||
|
||||
| Env var | Default | Phase | Description |
|
||||
|--------------------------------------------------|------------------------|-------|-------------|
|
||||
| `CERTCTL_ACME_SERVER_ENABLED` | `false` | 1a | Master enable flag. Phase 1a's handler is constructed unconditionally so the registry shape stays stable; routes are registered in `internal/api/router/router.go::RegisterHandlers` regardless. Operators flip this on after configuring per-profile auth_mode. |
|
||||
| `CERTCTL_ACME_SERVER_DEFAULT_AUTH_MODE` | `trust_authenticated` | 1a | Default value for `certificate_profiles.acme_auth_mode` on newly-created profiles. Existing profiles retain their stored value. Per-profile column is the source of truth at request time. |
|
||||
| `CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID` | `""` | 1a | When set, `/acme/*` shorthand mirrors `/acme/profile/<DefaultProfileID>/*` for single-profile deployments. When empty, requests to the shorthand return RFC 7807 + RFC 8555 §6.7 `userActionRequired`. |
|
||||
| `CERTCTL_ACME_SERVER_NONCE_TTL` | `5m` | 1a | How long an issued ACME nonce remains valid before the JWS verifier (Phase 1b) returns `urn:ietf:params:acme:error:badNonce` per RFC 8555 §6.5.1. Tune up if cert-manager + certctl clocks frequently skew. |
|
||||
| `CERTCTL_ACME_SERVER_TOS_URL` | `""` | 1a | Optional `meta.termsOfService` URL in the directory document. |
|
||||
| `CERTCTL_ACME_SERVER_WEBSITE` | `""` | 1a | Optional `meta.website` URL in the directory document. |
|
||||
| `CERTCTL_ACME_SERVER_CAA_IDENTITIES` | (empty) | 1a | Comma-separated `meta.caaIdentities` list. |
|
||||
| `CERTCTL_ACME_SERVER_EAB_REQUIRED` | `false` | 1a | `meta.externalAccountRequired` advertisement. EAB enforcement is a follow-up; Phase 1a only advertises. |
|
||||
| `CERTCTL_ACME_SERVER_ORDER_TTL` | `24h` | 2 | Reserved field, parsed in Phase 1a so operators can set it ahead of Phase 2's order endpoints. |
|
||||
| `CERTCTL_ACME_SERVER_AUTHZ_TTL` | `24h` | 2 | Reserved. |
|
||||
| `CERTCTL_ACME_SERVER_HTTP01_CONCURRENCY` | `10` | 3 | Reserved. |
|
||||
| `CERTCTL_ACME_SERVER_DNS01_RESOLVER` | `8.8.8.8:53` | 3 | Reserved. |
|
||||
| `CERTCTL_ACME_SERVER_DNS01_CONCURRENCY` | `10` | 3 | Reserved. |
|
||||
| `CERTCTL_ACME_SERVER_TLSALPN01_CONCURRENCY` | `10` | 3 | Reserved. |
|
||||
| `CERTCTL_ACME_SERVER_ARI_ENABLED` | `true` | 4 | Toggles the RFC 9773 ARI surface — both the `renewalInfo` URL in the directory document and the GET `/renewal-info/<cert-id>` handler. Set to `false` to drop ARI from the directory; ACME clients fall back to static renewal scheduling. |
|
||||
| `CERTCTL_ACME_SERVER_ARI_POLL_INTERVAL` | `6h` | 4 | Server-policy `Retry-After` value the ARI handler emits on a 200 response. RFC 9773 §4.2 leaves this server-policy. Tighten to `1h` for short-lived certs; loosen to `24h` for standard 90-day certs. |
|
||||
| `CERTCTL_ACME_SERVER_RATE_LIMIT_ORDERS_PER_HOUR` | `100` | 5 | Per-account orders/hour cap. `0` disables. Hits return RFC 7807 + RFC 8555 §6.7 `urn:ietf:params:acme:error:rateLimited` with `Retry-After`. In-memory token-bucket; restart wipes the counter (eventual-consistency caps are acceptable). |
|
||||
| `CERTCTL_ACME_SERVER_RATE_LIMIT_CONCURRENT_ORDERS` | `5` | 5 | Per-account cap on simultaneously-active orders (status in pending/ready/processing). `0` disables. Same RFC 7807 + RFC 8555 §6.7 problem shape as the per-hour cap. |
|
||||
| `CERTCTL_ACME_SERVER_RATE_LIMIT_KEY_CHANGE_PER_HOUR` | `5` | 5 | Per-account key-rollover cap. `0` disables. Default 5/hour: rollovers should be rare; a flood is an attack signal. |
|
||||
| `CERTCTL_ACME_SERVER_RATE_LIMIT_CHALLENGE_RESPONDS_PER_HOUR` | `60` | 5 | Per-challenge-id respond cap. `0` disables. Defends against retry storms from a misbehaving client. Keyed by challenge-id (not account-id) so a flood against one challenge doesn't drain the account's whole budget. |
|
||||
| `CERTCTL_ACME_SERVER_GC_INTERVAL` | `1m` | 5 | Tick interval for the ACME GC scheduler loop. On each tick: (1) DELETE used / expired nonces; (2) UPDATE pending authzs whose `expires_at < NOW()` to `expired`; (3) UPDATE pending/ready/processing orders whose `expires_at < NOW()` to `invalid`. Each sweep is a single SQL statement; the loop is idempotent + bounded by a 1m per-sweep timeout. `0` disables the loop. |
|
||||
|
||||
## Per-profile auth mode
|
||||
|
||||
Two modes per `certificate_profiles.acme_auth_mode`:
|
||||
|
||||
- **`trust_authenticated`** (default for internal PKI). The JWS-
|
||||
authenticated ACME account is trusted to issue certs for any
|
||||
identifier the profile policy allows; there is no per-identifier
|
||||
ownership proof. The most common certctl use case.
|
||||
- **`challenge`**. Full HTTP-01 + DNS-01 + TLS-ALPN-01 validation per
|
||||
RFC 8555 §8. Required when certctl is exposing public-trust-style PKI.
|
||||
|
||||
A single certctl-server can serve both modes simultaneously — the mode
|
||||
is read from the bound profile's column at request time, not cached at
|
||||
server start. Operators can flip a profile's mode via SQL and the next
|
||||
order picks up the new mode without restart.
|
||||
|
||||
The `CERTCTL_ACME_SERVER_DEFAULT_AUTH_MODE` env var sets the default
|
||||
value for newly-created profiles (e.g. via the certctl API). Existing
|
||||
profile rows retain whatever value they were created with.
|
||||
|
||||
## TLS trust bootstrap (read this before configuring cert-manager)
|
||||
|
||||
When certctl-server uses a self-signed TLS bootstrap cert
|
||||
(`deploy/test/certs/server.crt` is the demo default; see
|
||||
[`docs/tls.md`](./tls.md)), cert-manager 1.15+ will refuse to talk to
|
||||
the directory URL unless the certctl root is trusted. The fix lives in
|
||||
`ClusterIssuer.spec.acme.caBundle`:
|
||||
|
||||
```yaml
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: certctl-test
|
||||
spec:
|
||||
acme:
|
||||
server: https://certctl.example.com:8443/acme/profile/prof-corp/directory
|
||||
email: ops@example.com
|
||||
caBundle: |
|
||||
LS0tLS1CRUdJTi... # base64-encoded PEM of certctl's self-signed root
|
||||
privateKeySecretRef:
|
||||
name: certctl-test-account-key
|
||||
solvers:
|
||||
- http01:
|
||||
ingress:
|
||||
class: nginx
|
||||
```
|
||||
|
||||
The `caBundle` value is the base64-encoded PEM of the root that signed
|
||||
your certctl-server's TLS certificate. Extract it from your operator
|
||||
bootstrap (e.g. `cat deploy/test/certs/ca.crt | base64 -w0`).
|
||||
|
||||
This is the single biggest first-time-deploy footgun on the cert-manager
|
||||
integration path. The full cert-manager walkthrough lands in Phase 6;
|
||||
the `caBundle` requirement is flagged here in Phase 1a's docs because
|
||||
operators hit it the moment they try to point a real ACME client at
|
||||
certctl.
|
||||
|
||||
## Auth-mode decision tree
|
||||
|
||||
Use `trust_authenticated` when:
|
||||
|
||||
- The certctl deployment serves **internal-only PKI** (intranet certs,
|
||||
service-mesh certs, IoT bootstrap). Identifiers in your CSRs are
|
||||
controlled by your infrastructure, not by the public Internet.
|
||||
- You don't have HTTP/DNS reachability **from certctl-server back to
|
||||
the ACME client's solver** (e.g., the client lives in an isolated
|
||||
network segment certctl-server can't reach).
|
||||
- You want the simplest cert-manager integration: cert-manager submits
|
||||
a CSR, certctl issues; no out-of-band ownership proof.
|
||||
- You're issuing under your own root CA whose trust is operator-managed
|
||||
(NOT WebPKI). Public CAs cannot use this mode — RFC 8555 §8 ownership
|
||||
proof is non-negotiable for public-trust roots.
|
||||
|
||||
Use `challenge` when:
|
||||
|
||||
- The deployment is **public-trust-style PKI** — even if your root is
|
||||
privately operated, you want CA/Browser Forum-style ownership-proof
|
||||
semantics so a stolen account key can't be used to issue for arbitrary
|
||||
identifiers.
|
||||
- You have HTTP-01 / DNS-01 / TLS-ALPN-01 reachability from the
|
||||
certctl-server to the ACME client's solver. (HTTP-01 needs port 80
|
||||
ingress to the client; DNS-01 needs DNS recursion; TLS-ALPN-01 needs
|
||||
port 443 ingress.)
|
||||
- You want defense-in-depth: an account-key compromise costs the
|
||||
attacker nothing without also compromising the solver-side
|
||||
infrastructure.
|
||||
|
||||
A single certctl-server can run both modes simultaneously — the auth
|
||||
mode is a per-profile column on `certificate_profiles.acme_auth_mode`,
|
||||
read at request time. Operators flip a profile's mode via SQL or the
|
||||
profile API, and the next order picks up the new mode without restart.
|
||||
|
||||
## Endpoints
|
||||
|
||||
Routes registered in `internal/api/router/router.go::RegisterHandlers`:
|
||||
|
||||
| Method | Path | RFC ref | Auth | Description |
|
||||
|--------|-------------------------------------------------------|-----------------|----------|-------------|
|
||||
| GET | `/acme/profile/{id}/directory` | RFC 8555 §7.1.1 | unauth | Per-profile directory document. |
|
||||
| HEAD | `/acme/profile/{id}/new-nonce` | RFC 8555 §7.2 | unauth | Returns 200 + Replay-Nonce header. |
|
||||
| GET | `/acme/profile/{id}/new-nonce` | RFC 8555 §7.2 | unauth | Returns 204 + Replay-Nonce header. |
|
||||
| POST | `/acme/profile/{id}/new-account` | RFC 8555 §7.3 | JWS jwk | Register a new account; idempotent re-registration of an existing JWK returns the existing row. |
|
||||
| POST | `/acme/profile/{id}/account/{acc_id}` | RFC 8555 §7.3.2 + §7.3.6 | JWS kid | Update contact list, deactivate, or POST-as-GET (RFC 8555 §6.3) to fetch the account. |
|
||||
| POST | `/acme/profile/{id}/new-order` | RFC 8555 §7.4 | JWS kid | Submit an order; identifier validation runs before order creation. |
|
||||
| POST | `/acme/profile/{id}/order/{ord_id}` | RFC 8555 §7.4 | JWS kid | POST-as-GET fetch of an order's current state. |
|
||||
| POST | `/acme/profile/{id}/order/{ord_id}/finalize` | RFC 8555 §7.4 | JWS kid | Submit the CSR + finalize. Issues + persists managed cert row + version. |
|
||||
| POST | `/acme/profile/{id}/authz/{authz_id}` | RFC 8555 §7.5 | JWS kid | POST-as-GET fetch of an authorization. |
|
||||
| POST | `/acme/profile/{id}/challenge/{chall_id}` | RFC 8555 §7.5.1 | JWS kid | Submit a challenge for validation. Dispatches to a bounded-concurrency worker pool; clients poll authz for the eventual result. |
|
||||
| POST | `/acme/profile/{id}/cert/{cert_id}` | RFC 8555 §7.4.2 | JWS kid | POST-as-GET cert chain download (PEM). |
|
||||
| POST | `/acme/profile/{id}/key-change` | RFC 8555 §7.3.5 | JWS kid (outer) + jwk (inner) | Doubly-signed account-key rollover. |
|
||||
| POST | `/acme/profile/{id}/revoke-cert` | RFC 8555 §7.6 | JWS kid OR jwk | Revoke a cert via the issuing account's key OR the cert's own private key. Routes through the certctl revocation pipeline. |
|
||||
| GET | `/acme/profile/{id}/renewal-info/{cert_id}` | RFC 9773 | unauth | Fetch the suggested renewal window for a cert (cert-id is `base64url(AKI).base64url(serial)` per RFC 9773 §4.1). Response carries `Retry-After`. |
|
||||
| GET | `/acme/directory` | RFC 8555 §7.1.1 | unauth | Shorthand path; mirrors per-profile when `CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID` is set. |
|
||||
| HEAD | `/acme/new-nonce` | RFC 8555 §7.2 | unauth | Shorthand. |
|
||||
| GET | `/acme/new-nonce` | RFC 8555 §7.2 | unauth | Shorthand. |
|
||||
| POST | `/acme/new-account` | RFC 8555 §7.3 | JWS jwk | Shorthand. |
|
||||
| POST | `/acme/account/{acc_id}` | RFC 8555 §7.3.2 + §7.3.6 | JWS kid | Shorthand. |
|
||||
| POST | `/acme/new-order` | RFC 8555 §7.4 | JWS kid | Shorthand. |
|
||||
| POST | `/acme/order/{ord_id}` | RFC 8555 §7.4 | JWS kid | Shorthand. |
|
||||
| POST | `/acme/order/{ord_id}/finalize` | RFC 8555 §7.4 | JWS kid | Shorthand. |
|
||||
| POST | `/acme/authz/{authz_id}` | RFC 8555 §7.5 | JWS kid | Shorthand. |
|
||||
| POST | `/acme/cert/{cert_id}` | RFC 8555 §7.4.2 | JWS kid | Shorthand. |
|
||||
| POST | `/acme/key-change` | RFC 8555 §7.3.5 | JWS kid (outer) + jwk (inner) | Shorthand. |
|
||||
| POST | `/acme/revoke-cert` | RFC 8555 §7.6 | JWS kid OR jwk | Shorthand. |
|
||||
| GET | `/acme/renewal-info/{cert_id}` | RFC 9773 | unauth | Shorthand. |
|
||||
|
||||
After Phase 4, the full RFC 8555 + RFC 9773 surface is live. RFC 8739
|
||||
(short-lived certs) and EAB enforcement remain follow-up work; cert-
|
||||
manager + boulder-tested clients work today against the surface above.
|
||||
|
||||
## RFC 8555 + RFC 9773 conformance statement
|
||||
|
||||
Honest disclosure of what's implemented, where, and what's not. Procurement
|
||||
engineers running gap analyses against cert-manager + Let's Encrypt's
|
||||
conformance posture should read this section before anything else.
|
||||
|
||||
### Implemented
|
||||
|
||||
| Section | Surface | Phase | First commit |
|
||||
|---------|---------|-------|--------------|
|
||||
| RFC 8555 §6.2 | JWS auth + RS256/ES256/EdDSA allow-list | 1b | `27bd660` |
|
||||
| RFC 8555 §6.3 | POST-as-GET | 1b | `27bd660` |
|
||||
| RFC 8555 §6.4 | URL-header binding to request URL | 1b | `27bd660` |
|
||||
| RFC 8555 §6.5 | Replay-Nonce + DB-backed nonce store | 1a | `e146b00` |
|
||||
| RFC 8555 §6.7 | RFC 7807 problem documents | 1a | `e146b00` |
|
||||
| RFC 8555 §7.1 | Directory | 1a | `e146b00` |
|
||||
| RFC 8555 §7.2 | new-nonce HEAD + GET | 1a | `e146b00` |
|
||||
| RFC 8555 §7.3 | new-account + idempotent re-registration | 1b | `27bd660` |
|
||||
| RFC 8555 §7.3.2 + §7.3.6 | account update + deactivation | 1b | `27bd660` |
|
||||
| RFC 8555 §7.3.5 | doubly-signed key rollover | 4 | `0299e4a` |
|
||||
| RFC 8555 §7.4 | new-order + finalize + cert download | 2 | `4ee486e` |
|
||||
| RFC 8555 §7.5 | authz POST-as-GET | 2 | `4ee486e` |
|
||||
| RFC 8555 §7.5.1 | challenge response | 3 | `7e22204` |
|
||||
| RFC 8555 §7.6 | revoke-cert (kid + jwk paths) | 4 | `0299e4a` |
|
||||
| RFC 8555 §8.3 | HTTP-01 challenge validator | 3 | `7e22204` |
|
||||
| RFC 8555 §8.4 | DNS-01 challenge validator | 3 | `7e22204` |
|
||||
| RFC 8737 | TLS-ALPN-01 challenge validator | 3 | `7e22204` |
|
||||
| RFC 9773 | ACME Renewal Information (ARI) | 4 | `0299e4a` |
|
||||
|
||||
### Not implemented (procurement-honest)
|
||||
|
||||
| Spec area | Status | Notes |
|
||||
|-----------|--------|-------|
|
||||
| RFC 8555 §7.3.4 — External Account Binding (EAB) | **Not implemented.** | Advertised in directory `meta.externalAccountRequired` but enforcement is a follow-up. Operators relying on EAB for account-creation gating should layer an upstream WAF. |
|
||||
| RFC 8555 §8.4 + §7.4 — Wildcard with `*.` prefix > 1 level | **Not implemented.** | Single-level wildcards (e.g. `*.example.com`) work end-to-end. Multi-level wildcards (`*.*.example.com`) are RFC-spec-ambiguous and rejected at the identifier-validation layer. |
|
||||
| RFC 8738 — Short-lived certs | **Not implemented.** | Operators wanting <7-day validity tune the bound issuer's TTL directly via `CertificateProfile.MaxTTLSeconds`; the ACME wire shape doesn't expose a separate notion. |
|
||||
| Cross-CA proxying | **Not implemented.** | Each profile binds to one issuer. Multi-CA federation (one ACME account → multi-CA selection per identifier) is roadmap. |
|
||||
| RFC 8555 §6.7 — `accountDoesNotExist` problem with hint URL | Partial. | Sentinel returns `accountDoesNotExist`; the optional hint URL embedding the `kid` is not emitted. cert-manager doesn't consume it. |
|
||||
|
||||
If a procurement-side gap analysis turns up something not in either
|
||||
table above, the answer is "we don't know yet" — operator-side issues
|
||||
welcome.
|
||||
|
||||
## Finalize routing through `CertificateService.Create` (Phase 2 architecture)
|
||||
|
||||
The finalize path mirrors how every other certctl issuance surface
|
||||
(EST, SCEP, agent, REST API) routes through the canonical pipeline:
|
||||
|
||||
1. JWS-verify the request (`internal/api/acme/jws.go`).
|
||||
2. Validate the CSR's DNS-name set equals the order's identifier set
|
||||
exactly (case-folded). Mismatches return RFC 8555
|
||||
`urn:ietf:params:acme:error:badCSR`.
|
||||
3. Update the order row to `status=processing` (`s.tx.WithinTx` +
|
||||
`auditService.RecordEventWithTx` — atomic with audit row).
|
||||
4. Issue the cert via the bound profile's `IssuerConnector` adapter
|
||||
(same `IssueCertificate(ctx, commonName, sans, csrPEM, ekus,
|
||||
maxTTLSeconds, mustStaple)` call EST/SCEP/agent take).
|
||||
5. Insert the `managed_certificates` row via
|
||||
`service.CertificateService.Create(ctx, *ManagedCertificate, actor)`.
|
||||
Source is stamped `domain.CertificateSourceACME` so operators can
|
||||
bulk-revoke ACME-issued certs by filtering on `Source=ACME`.
|
||||
6. Insert the `certificate_versions` row +
|
||||
transition the order to `status=valid` with `certificate_id` set
|
||||
(one final `WithinTx` covering both writes + the audit row).
|
||||
|
||||
This means RenewalPolicy, CertificateProfile, per-issuer-type
|
||||
Prometheus metrics, audit rows, and revocation-pipeline integration
|
||||
all apply uniformly to ACME-issued certs via the same code path that
|
||||
already serves EST/SCEP/agent/REST issuance.
|
||||
|
||||
The atomicity boundary: there is a brief window between step 5 (cert
|
||||
exists) and step 6 (order shows valid) where the order row still says
|
||||
`processing`. Phase 5's GC scheduler reconciles. The actor string on
|
||||
audit rows is `acme:<account-id>`.
|
||||
|
||||
## JWS verification (Phase 1b)
|
||||
|
||||
Every JWS-authenticated POST runs through the verifier at
|
||||
`internal/api/acme/jws.go::VerifyJWS`. The verifier enforces:
|
||||
|
||||
1. The JWS parses as a flattened single-signature object (multi-sig is
|
||||
rejected per RFC 8555 §6.2).
|
||||
2. The signature algorithm is in the closed allow-list `{RS256, ES256,
|
||||
EdDSA}` per RFC 8555 §6.2 — `none`, `HS256`, and every other alg
|
||||
are refused at parse time.
|
||||
3. The protected header carries exactly one of `kid` (registered
|
||||
account) or `jwk` (new-account flow); endpoints declare which they
|
||||
require.
|
||||
4. The protected header `url` matches the inbound request URL exactly.
|
||||
5. The protected header `nonce` is consumed against the
|
||||
`acme_nonces` store; missing / replayed / expired nonces return
|
||||
`urn:ietf:params:acme:error:badNonce` per RFC 8555 §6.5.1.
|
||||
6. On the `kid` path: the kid URL round-trips against the canonical
|
||||
per-profile shape, the referenced account exists, and its status
|
||||
is `valid`. Deactivated / revoked accounts cannot authenticate.
|
||||
7. The signature verifies against the resolved key (registered
|
||||
account's stored JWK on the kid path; embedded jwk on the jwk path).
|
||||
|
||||
Every state-mutating account operation (create, contact update,
|
||||
deactivate) writes its `acme_accounts` row and an `audit_events` row
|
||||
inside one `repository.Transactor.WithinTx` call — the canonical
|
||||
certctl atomicity contract (matches `service.CertificateService.Create`
|
||||
at `internal/service/certificate.go:131`).
|
||||
|
||||
## Phases (cross-reference)
|
||||
|
||||
| Phase | Status | Surface |
|
||||
|-------|-------------|---------|
|
||||
| 1a | live | directory + new-nonce + per-profile routing |
|
||||
| 1b | live | new-account + account/{id} + JWS verifier (RFC 7515 + go-jose v4) |
|
||||
| 2 | live | orders + authzs + finalize + cert download (trust_authenticated mode end-to-end) |
|
||||
| 3 | live | HTTP-01 + DNS-01 + TLS-ALPN-01 challenge validation (challenge mode end-to-end) |
|
||||
| 4 | live | key rollover (RFC 8555 §7.3.5) + revoke-cert (§7.6) + ARI (RFC 9773) |
|
||||
| 5 | live | rate limits + GC sweeper + kind-driven cert-manager integration test + lego conformance harness + k6 ACME-flow scenario |
|
||||
| 6 | live | full operator-facing reference + walkthroughs (cert-manager / Caddy / Traefik) + threat model + RFC-8555 conformance statement + troubleshooting + version pinning |
|
||||
|
||||
Track shipped phases via `git log --grep='acme-server:' --oneline`.
|
||||
|
||||
## Operational notes (Phase 1a)
|
||||
|
||||
- **Schema:** `migrations/000025_acme_server.up.sql` adds 5 ACME tables
|
||||
+ the `certificate_profiles.acme_auth_mode` column. Phase 1a actively
|
||||
uses only `acme_nonces`. The full schema ships now so the migration
|
||||
is stable and Phases 1b-4 don't need additional `CREATE TABLE`
|
||||
migrations.
|
||||
|
||||
- **Replay protection:** nonces are persisted in `acme_nonces` (NOT
|
||||
in-memory). They survive server restart, which is required for the
|
||||
RFC 8555 §6.5 replay defense to hold against a multi-replica
|
||||
certctl-server fleet behind a load balancer.
|
||||
|
||||
- **Metrics:** the service layer exposes per-op atomic counters via
|
||||
`service.ACMEService.Metrics().Snapshot()`:
|
||||
- `certctl_acme_directory_total`
|
||||
- `certctl_acme_directory_failures_total`
|
||||
- `certctl_acme_new_nonce_total`
|
||||
- `certctl_acme_new_nonce_failures_total`
|
||||
|
||||
Phase 1b will extend with `new_account` counters; Phase 2 with order
|
||||
/ finalize / cert; Phase 3 with per-challenge-type counters.
|
||||
|
||||
- **Audit:** Phase 1a is read-mostly (directory + nonce). Phase 1b's
|
||||
account-creation path will route through the canonical
|
||||
`s.tx.WithinTx(...)` + `auditService.RecordEventWithTx(...)` pattern
|
||||
so every account state mutation is paired with an `audit_events`
|
||||
row.
|
||||
|
||||
## Phase 4 — key rollover, revocation, ARI
|
||||
|
||||
### How do I rotate my ACME account key?
|
||||
|
||||
RFC 8555 §7.3.5 defines a doubly-signed JWS for the rollover. The OUTER
|
||||
JWS is signed by the OLD account key (kid path); its payload IS the
|
||||
INNER JWS, which is signed by the NEW account key (jwk path). cert-
|
||||
manager and lego do this for you transparently — `lego renew --key-rotate`
|
||||
or the cert-manager `Issuer.spec.acme.privateKeySecretRef` rollover.
|
||||
|
||||
Server-side validation:
|
||||
|
||||
1. Outer JWS verifies against the registered account's current key.
|
||||
2. Inner JWS verifies against the embedded NEW jwk (proves possession).
|
||||
3. Inner payload `account` matches outer `kid`.
|
||||
4. Inner payload `oldKey` thumbprint-equals the registered key.
|
||||
5. Inner protected `url` equals outer protected `url`.
|
||||
6. New JWK thumbprint not already registered against the same profile.
|
||||
7. `SELECT … FOR UPDATE` on the account row serializes concurrent
|
||||
rollovers; the loser sees the winner's new thumbprint and is told
|
||||
to retry (409).
|
||||
|
||||
### How do I revoke an ACME-issued cert?
|
||||
|
||||
Two auth paths per RFC 8555 §7.6:
|
||||
|
||||
- **kid path:** sign with your account key. The server checks the
|
||||
account "owns" the cert via `acme_orders.certificate_id` lookup.
|
||||
- **jwk path:** sign with the cert's own private key. The server
|
||||
extracts the cert's public key, computes the JWK, and asserts it
|
||||
matches the embedded jwk thumbprint.
|
||||
|
||||
Either path routes through `service.RevocationSvc.RevokeCertificateWithActor`
|
||||
— the same pipeline the GUI revoke button, bulk-revocation, and the
|
||||
ACME-consumer issuer use. So the cert-row update + revocation row + audit
|
||||
row are all atomic in one `WithinTx`, the issuer is best-effort
|
||||
notified, and the OCSP response cache is invalidated.
|
||||
|
||||
Reason codes follow RFC 5280 §5.3.1; codes 8 (removeFromCRL) and 10
|
||||
(aACompromise) are not in certctl's `domain.ValidRevocationReasons`
|
||||
set so they clamp to `unspecified`.
|
||||
|
||||
### What is ARI?
|
||||
|
||||
RFC 9773 ACME Renewal Information. Clients GET
|
||||
`/acme/profile/<id>/renewal-info/<cert-id>` (unauthenticated) and
|
||||
receive a JSON document with `suggestedWindow.start` and `.end` —
|
||||
the server's recommendation for when to renew. The response also
|
||||
carries `Retry-After` (RFC 9773 §4.2) hinting at the next-poll cadence.
|
||||
|
||||
Cert-id format is `base64url(authorityKeyIdentifier).base64url(serial)`
|
||||
per RFC 9773 §4.1.
|
||||
|
||||
Window math:
|
||||
|
||||
- Cert with a bound renewal policy: window starts at
|
||||
`notAfter - RenewalWindowDays`, ends at `notAfter - RenewalWindowDays/2`.
|
||||
So a 30-day window cert with notAfter 2026-06-30 emits start=2026-05-31,
|
||||
end=2026-06-15. Boulder-shape default that lets cert-manager schedule
|
||||
inside our renewal window.
|
||||
- No policy: window is the last 33% of validity.
|
||||
- Past expiry: window is "now" → "now + 24h" (renew immediately).
|
||||
|
||||
Disable ARI globally with `CERTCTL_ACME_SERVER_ARI_ENABLED=false`. The
|
||||
URL drops out of the directory; the route is still registered but
|
||||
returns 404 — clients fall back to static renewal scheduling.
|
||||
|
||||
## Phase 5 — operational guidance
|
||||
|
||||
### Rate limiting
|
||||
|
||||
Production deployments serving multiple ACME profiles or fleets should
|
||||
keep the default rate limits in place. The four caps:
|
||||
|
||||
- `RATE_LIMIT_ORDERS_PER_HOUR` (100) — per-account new-order cap. A
|
||||
cert-manager Certificate that auto-renews at the 1/3 mark of its
|
||||
validity (90-day cert → ~30-day renewal) consumes ~12 orders/year
|
||||
per managed Certificate. 100/hour is generous for any plausible
|
||||
fleet.
|
||||
- `RATE_LIMIT_CONCURRENT_ORDERS` (5) — per-account cap on
|
||||
pending/ready/processing orders. Stops a runaway client from
|
||||
starving DB-row throughput. Tune up only if you observe legitimate
|
||||
bursts.
|
||||
- `RATE_LIMIT_KEY_CHANGE_PER_HOUR` (5) — rollovers are rare; a flood
|
||||
is an attack signal. Tune down to 1/hour if your operator
|
||||
procedure mandates manual rollovers only.
|
||||
- `RATE_LIMIT_CHALLENGE_RESPONDS_PER_HOUR` (60) — per-challenge cap,
|
||||
defends against retry storms.
|
||||
|
||||
Hits return RFC 8555 §6.7 `rateLimited` Problem with a `Retry-After`
|
||||
header. cert-manager 1.15+ honors the header; lego too. Older clients
|
||||
may not — that's the client's problem, not certctl's.
|
||||
|
||||
The buckets are **in-memory + per-replica**. A 3-replica certctl-
|
||||
server fleet behind a load balancer effectively has 3× the configured
|
||||
throughput (each replica's bucket fills independently). For
|
||||
deployments where this matters operationally, the right answer is a
|
||||
shared rate-limit store — that's a follow-up; not blocking for the
|
||||
current threat model where same-account requests typically pin to
|
||||
the same replica via session affinity.
|
||||
|
||||
### GC sweeper
|
||||
|
||||
The scheduler runs the GC sweep every `GC_INTERVAL` (default 1m). Each
|
||||
sweep is three independent SQL statements:
|
||||
|
||||
1. `DELETE FROM acme_nonces WHERE used = TRUE OR expires_at < NOW()`.
|
||||
2. `UPDATE acme_authorizations SET status='expired' WHERE status='pending' AND expires_at < NOW()`.
|
||||
3. `UPDATE acme_orders SET status='invalid', error=... WHERE status IN ('pending','ready','processing') AND expires_at < NOW()`.
|
||||
|
||||
Each statement is bounded by a 1-minute per-sweep timeout. A failing
|
||||
sweep is logged + retried on the next tick; a tick that overruns its
|
||||
budget is skipped (the existing-tick atomic-Bool guard prevents
|
||||
overlap). Counts are exposed via `certctl_acme_gc_*` Prometheus
|
||||
metrics.
|
||||
|
||||
### cert-manager integration test
|
||||
|
||||
`make acme-cert-manager-test` brings up a kind cluster, installs
|
||||
cert-manager 1.15.0, helm-deploys certctl-server with
|
||||
`acmeServer.enabled=true`, and verifies a Certificate resource issues
|
||||
end-to-end. Skipped in CI by default (kind is too heavy for per-PR);
|
||||
operators run locally on workstation. See
|
||||
`deploy/test/acme-integration/` for the YAML + Go test harness.
|
||||
|
||||
### lego RFC conformance harness
|
||||
|
||||
`make acme-rfc-conformance-test` drives lego v4 against a hermetic
|
||||
certctl-server stack, exercising register → new-order → finalize.
|
||||
Operators run this when shipping behavior changes to the ACME surface
|
||||
to confirm a real third-party client still works.
|
||||
|
||||
### k6 ACME flows scenario
|
||||
|
||||
`deploy/test/loadtest/k6/acme_flow.js` exercises the unauthenticated
|
||||
surface (directory + new-nonce + ARI) at 100 VUs × 5m. JWS-signed
|
||||
flows are out of scope for k6 (no JWS support); they're covered by
|
||||
the lego conformance harness above. Baseline numbers + thresholds in
|
||||
`deploy/test/loadtest/README.md`.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
The five failure modes operators hit most often + the canonical fix
|
||||
for each.
|
||||
|
||||
### `cert-manager logs: 400 Bad Request: badNonce`
|
||||
|
||||
**Cause:** Either a nonce was replayed (a buggy client retries the
|
||||
same JWS), the cert-manager + certctl-server clocks differ by more
|
||||
than `CERTCTL_ACME_SERVER_NONCE_TTL` (default 5 min), or the
|
||||
nonce-store row was reaped between issuance and use.
|
||||
|
||||
**Fix:** First check NTP on both sides. If clocks are healthy,
|
||||
lengthen `CERTCTL_ACME_SERVER_NONCE_TTL` to 10m or 15m. If the
|
||||
problem persists, check for a multi-replica certctl-server fleet
|
||||
without sticky session affinity — the nonce DB row lives on one
|
||||
replica; if the JWS POST hits a different replica before replication
|
||||
catches up, you observe spurious `badNonce`. Solution: pin client
|
||||
sessions to a single replica via load-balancer cookie / `kid`-hash
|
||||
routing, OR shorten replication lag if your DB is the bottleneck.
|
||||
|
||||
### `cert-manager logs: x509: certificate signed by unknown authority`
|
||||
|
||||
**Cause:** cert-manager refuses to talk to the directory URL because
|
||||
its TLS chain doesn't terminate at a root in cert-manager's trust
|
||||
store. certctl-server's bootstrap cert (Phase 1a, `deploy/test/certs/server.crt`)
|
||||
is self-signed.
|
||||
|
||||
**Fix:** Add the `caBundle` field to your `ClusterIssuer.spec.acme` —
|
||||
see the [TLS trust bootstrap](#tls-trust-bootstrap-read-this-before-configuring-cert-manager)
|
||||
section above for the 3-step recipe. This is **the** single biggest
|
||||
first-time-deploy footgun on the cert-manager integration path.
|
||||
|
||||
### HTTP-01 validator returns `connection refused`
|
||||
|
||||
**Cause:** The HTTP-01 solver's Ingress / Service is not reachable
|
||||
from certctl-server's network. Common subcases: (a) the cert-manager
|
||||
http-solver pod is on a private network certctl-server can't reach;
|
||||
(b) a firewall blocks port 80 inbound to the solver's address; (c)
|
||||
the Ingress class annotation doesn't match an installed ingress
|
||||
controller; (d) your DNS still points at an old IP.
|
||||
|
||||
**Fix:** From the certctl-server pod, `curl -v
|
||||
http://<identifier>/.well-known/acme-challenge/<token>` and read the
|
||||
network error. If the curl fails the same way, the network path is
|
||||
the issue. If curl works but the validator fails, check the validator
|
||||
log lines — the SSRF guard rejects reserved IPs (RFC1918, link-local,
|
||||
cloud-metadata 169.254.169.254). Public-trust style profiles that
|
||||
need to reach RFC1918 solvers must be moved to `trust_authenticated`
|
||||
mode OR the solver must be exposed on a routable address.
|
||||
|
||||
### DNS-01 validator returns `NXDOMAIN`
|
||||
|
||||
**Cause:** DNS provider hasn't propagated the `_acme-challenge.<domain>`
|
||||
TXT record yet. Most providers have a 30s-2m propagation lag. cert-manager
|
||||
retries by default, but Phase-5 rate limits (default 60/hour per
|
||||
challenge-id) can truncate the retry budget.
|
||||
|
||||
**Fix:** Verify TXT propagation with `dig +short TXT _acme-challenge.<domain>
|
||||
@<your-resolver>`. If the answer is empty, the issue is upstream. If
|
||||
it's populated but certctl reports NXDOMAIN, check
|
||||
`CERTCTL_ACME_SERVER_DNS01_RESOLVER` (default `8.8.8.8:53`) is
|
||||
reachable from certctl-server's network egress. Operators on isolated
|
||||
networks need a private resolver; configure accordingly + own the
|
||||
cache-poisoning posture (see [threat
|
||||
model](./acme-server-threat-model.md)).
|
||||
|
||||
### Certificate Ready=False with `rejectedIdentifier`
|
||||
|
||||
**Cause:** The CSR includes an identifier (CommonName or SAN) that the
|
||||
bound certificate profile's policy rejects. certctl runs syntactic +
|
||||
profile-policy validation **before** order creation; the order never
|
||||
reaches the database.
|
||||
|
||||
**Fix:** The reject reason is in the `subproblems` array of the RFC
|
||||
8555 §6.7 problem document. Decode the JSON, look at `subproblems[].detail`,
|
||||
and adjust either the CSR or the profile policy. Common causes:
|
||||
SAN-not-in-`AllowedIdentifierWildcards`, EKU-not-in-`AllowedEKUs`,
|
||||
TTL-exceeds-`MaxTTLSeconds`. Validation logic lives in
|
||||
`internal/api/acme/identifier.go::ValidateIdentifiers` +
|
||||
`internal/domain/profile.go` — read those if the profile-policy rule
|
||||
isn't obvious.
|
||||
|
||||
## Version pinning + tested clients
|
||||
|
||||
certctl's ACME server is tested against the following client versions.
|
||||
Other versions probably work; these are the ones the integration suite
|
||||
exercises end-to-end.
|
||||
|
||||
| Client | Tested version | Where it's pinned |
|
||||
|--------|----------------|-------------------|
|
||||
| cert-manager | 1.15.0 | `deploy/test/acme-integration/cert-manager-install.sh::CERT_MANAGER_VERSION` |
|
||||
| lego (RFC 8555 conformance harness) | v4.x latest | `deploy/test/acme-integration/conformance-lego.sh` (operator installs via `go install github.com/go-acme/lego/v4/cmd/lego@latest`) |
|
||||
| kind (cluster bootstrap) | v0.20+ | `deploy/test/acme-integration/kind-config.yaml` schema requirement |
|
||||
| Caddy | 2.7.x | Phase 6 walkthrough (`docs/acme-caddy-walkthrough.md`) |
|
||||
| Traefik | 3.0+ | Phase 6 walkthrough (`docs/acme-traefik-walkthrough.md`) |
|
||||
|
||||
Operators reporting issues with untested-version clients should include
|
||||
the client version + the precise wire-level error (curl-captured request
|
||||
+ response body) so we can pin a regression test if applicable.
|
||||
|
||||
## FAQ
|
||||
|
||||
### Why two auth modes? Isn't `challenge` strictly more secure?
|
||||
|
||||
`challenge` is strictly more secure for **public-trust** PKI — RFC 8555
|
||||
§8 ownership proof is the entire point of cert-manager + Let's Encrypt.
|
||||
For **internal PKI**, the threat model is different: the network itself
|
||||
is the security boundary (mTLS service mesh, firewalled VPC, identifier-
|
||||
namespace controlled by the operator). Forcing every internal cert to
|
||||
go through a solver round-trip adds operational toil with no security
|
||||
gain. `trust_authenticated` is the certctl-specific mode that
|
||||
acknowledges this — the ACME account is the proof, not the solver.
|
||||
|
||||
### How does this differ from `cert-manager → Let's Encrypt with certctl as a separate step`?
|
||||
|
||||
Two integrations vs one. With certctl as the ACME endpoint, cert-manager
|
||||
does its native flow (Certificate → Order → CSR → Secret) and certctl
|
||||
mints the cert directly, recording it under its own
|
||||
`managed_certificates` table with full audit + renewal-policy + bulk-
|
||||
revocation surface. With Let's Encrypt as the ACME endpoint, you have
|
||||
to run a separate cert-manager-uploads-to-certctl webhook OR maintain
|
||||
two parallel cert tracks. The native-ACME-server path is operationally
|
||||
simpler.
|
||||
|
||||
### Can I use ACME endpoints from outside the K8s cluster?
|
||||
|
||||
Yes. The endpoints are HTTPS over the certctl-server's listener (port
|
||||
8443 by default). Caddy on a VM, win-acme on a Windows server, or
|
||||
Posh-ACME on a Mac all integrate against
|
||||
`https://<certctl-server>:8443/acme/profile/<profile-id>/directory`.
|
||||
The TLS-trust-bootstrap requirement applies the same way — see the
|
||||
[Caddy walkthrough](./acme-caddy-walkthrough.md) for the OS-trust-store
|
||||
recipe.
|
||||
|
||||
### How do I migrate manually-issued certs to ACME-issued ones?
|
||||
|
||||
Not yet automatic. Operators migrating: keep the old `managed_certificates`
|
||||
rows; create new ones via the ACME flow; flip targets one by one. A
|
||||
dedicated bulk-migration tool is on the roadmap (post-2.1.0). Track
|
||||
via the master prompt's roadmap section in
|
||||
`cowork/acme-server-endpoint-prompt.md`.
|
||||
|
||||
### What audit-log events fire on each ACME operation?
|
||||
|
||||
Every state mutation writes an `audit_events` row. Actor strings:
|
||||
`acme:<account-id>` for kid-path requests; `acme-cert-key:<serial>`
|
||||
for jwk-path revoke; `acme-system:gc` for scheduler-driven sweeps.
|
||||
Event-name catalog:
|
||||
|
||||
| Event name | Fired by | Resource type |
|
||||
|------------|----------|---------------|
|
||||
| `acme_account_created` | new-account | `acme_account` |
|
||||
| `acme_account_contact_updated` | account update | `acme_account` |
|
||||
| `acme_account_deactivated` | account deactivate | `acme_account` |
|
||||
| `acme_account_key_rolled` | key-change | `acme_account` |
|
||||
| `acme_order_created` | new-order | `acme_order` |
|
||||
| `acme_order_finalized` | finalize | `acme_order` |
|
||||
| `acme_challenge_processing` | challenge-respond (dispatch) | `acme_challenge` |
|
||||
| `acme_challenge_completed` | validator callback | `acme_challenge` |
|
||||
| `certificate_revoked` | revoke-cert (routes through `RevocationSvc`) | `certificate` |
|
||||
|
||||
Querying by actor prefix (`actor LIKE 'acme:%'`) reconstructs the full
|
||||
history of any ACME-issued cert.
|
||||
|
||||
### Is there a threat model document?
|
||||
|
||||
Yes — [`docs/acme-server-threat-model.md`](./acme-server-threat-model.md).
|
||||
Read before writing a security review.
|
||||
|
||||
## See also
|
||||
|
||||
- [cert-manager integration walkthrough](./acme-cert-manager-walkthrough.md)
|
||||
- [Caddy integration walkthrough](./acme-caddy-walkthrough.md)
|
||||
- [Traefik integration walkthrough](./acme-traefik-walkthrough.md)
|
||||
- [Threat model](./acme-server-threat-model.md)
|
||||
- [TLS trust bootstrap reference](./tls.md)
|
||||
- [Architecture (control-plane)](./architecture.md)
|
||||
@@ -0,0 +1,198 @@
|
||||
# Traefik Integration Walkthrough
|
||||
|
||||
End-to-end recipe for issuing certs from a certctl-server deployment
|
||||
through Traefik 3.0+. Target audience: operator running Traefik (in
|
||||
Kubernetes or on a VM) who wants to use certctl as their ACME source
|
||||
of truth instead of Let's Encrypt.
|
||||
|
||||
## Prereqs
|
||||
|
||||
- A reachable certctl-server with `CERTCTL_ACME_SERVER_ENABLED=true`
|
||||
and at least one profile whose `acme_auth_mode` is set. Profile
|
||||
setup is identical to the cert-manager walkthrough — see
|
||||
[`docs/acme-cert-manager-walkthrough.md`](./acme-cert-manager-walkthrough.md)
|
||||
Step 2.
|
||||
- Traefik 3.0+ (the v2 API surface for ACME is also supported but the
|
||||
`serversTransport.rootCAs` reference below is v3-shaped).
|
||||
- The certctl bootstrap CA, in PEM form, captured the same way as the
|
||||
cert-manager walkthrough Step 3.
|
||||
|
||||
## Step 1 — Configure Traefik static config
|
||||
|
||||
Traefik's ACME issuer is a `certificatesResolver` in the static config
|
||||
(file or CLI flags or env vars). The relevant fields:
|
||||
|
||||
```yaml
|
||||
# /etc/traefik/traefik.yml (or wherever your static config lives)
|
||||
|
||||
certificatesResolvers:
|
||||
certctl:
|
||||
acme:
|
||||
caServer: https://certctl.example.com:8443/acme/profile/prof-test/directory
|
||||
email: ops@example.com
|
||||
storage: /etc/traefik/acme-certctl.json
|
||||
httpChallenge:
|
||||
entryPoint: web
|
||||
# OR for trust_authenticated mode profiles:
|
||||
# tlsChallenge: {}
|
||||
|
||||
# certctl uses a self-signed bootstrap cert; Traefik needs the CA
|
||||
# explicitly via serversTransport.rootCAs to call the directory URL.
|
||||
serversTransports:
|
||||
default:
|
||||
rootCAs:
|
||||
- /etc/traefik/certctl-bootstrap.crt
|
||||
|
||||
# Apply the serversTransport globally so every outbound HTTPS call —
|
||||
# including ACME directory + finalize — trusts the certctl CA.
|
||||
api:
|
||||
insecure: false
|
||||
|
||||
entryPoints:
|
||||
web:
|
||||
address: ":80"
|
||||
websecure:
|
||||
address: ":443"
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- `caServer` must point at the directory URL (ending in `/directory`).
|
||||
- `httpChallenge.entryPoint: web` requires Traefik's `web` entryPoint
|
||||
(port 80) to be reachable from certctl-server's HTTP-01 validator.
|
||||
For `trust_authenticated` mode profiles, this is a no-op formality —
|
||||
certctl auto-resolves authzs, so the solver round-trip never happens.
|
||||
- `tlsChallenge: {}` is the alternative that uses TLS-ALPN-01 (RFC 8737)
|
||||
via Traefik's `websecure` (port 443) entryPoint. Either works under
|
||||
`challenge` mode; only the default-of-`tlsChallenge` is recommended
|
||||
for `trust_authenticated` mode.
|
||||
|
||||
## Step 2 — Trust the certctl bootstrap CA
|
||||
|
||||
Two options:
|
||||
|
||||
### Option A — `serversTransport.rootCAs` (preferred)
|
||||
|
||||
```
|
||||
sudo cp deploy/test/certs/ca.crt /etc/traefik/certctl-bootstrap.crt
|
||||
sudo systemctl reload traefik
|
||||
```
|
||||
|
||||
`serversTransports.default.rootCAs` (shown in Step 1 above) tells
|
||||
Traefik's outbound HTTPS client to trust the supplied PEM in addition
|
||||
to the system trust store. This is the right pattern for containerized
|
||||
Traefik where you don't want to install OS-level trust roots.
|
||||
|
||||
### Option B — OS trust store
|
||||
|
||||
For Traefik running directly on a VM, `update-ca-certificates`-style
|
||||
installation works the same way as the Caddy walkthrough Option A.
|
||||
The `serversTransport.rootCAs` field is unnecessary in that case.
|
||||
|
||||
## Step 3 — Reference the resolver from a router
|
||||
|
||||
Per-router (dynamic config):
|
||||
|
||||
```yaml
|
||||
# /etc/traefik/dynamic/example-com.yml
|
||||
|
||||
http:
|
||||
routers:
|
||||
example-com:
|
||||
rule: "Host(`example.com`)"
|
||||
entryPoints: [websecure]
|
||||
tls:
|
||||
certResolver: certctl
|
||||
service: example-com-backend
|
||||
services:
|
||||
example-com-backend:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://localhost:8080"
|
||||
```
|
||||
|
||||
Or, in Kubernetes via `IngressRoute` (Traefik CRD):
|
||||
|
||||
```yaml
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: example-com
|
||||
spec:
|
||||
entryPoints: [websecure]
|
||||
routes:
|
||||
- match: Host(`example.com`)
|
||||
kind: Rule
|
||||
services:
|
||||
- name: example-com-backend
|
||||
port: 8080
|
||||
tls:
|
||||
certResolver: certctl
|
||||
```
|
||||
|
||||
## Step 4 — Reload Traefik
|
||||
|
||||
```
|
||||
sudo systemctl reload traefik
|
||||
# OR kubectl rollout restart deployment/traefik (if you changed the static config via ConfigMap).
|
||||
```
|
||||
|
||||
On the first request to `example.com`, Traefik hits certctl's directory
|
||||
URL, registers an account, submits a new-order, and finalizes. The cert
|
||||
is persisted to `/etc/traefik/acme-certctl.json` (or its in-cluster
|
||||
PVC equivalent).
|
||||
|
||||
## Step 5 — Verify
|
||||
|
||||
```
|
||||
curl -kvI https://example.com 2>&1 | grep -E 'subject|issuer'
|
||||
# subject: CN=example.com
|
||||
# issuer: CN=certctl test internal CA
|
||||
```
|
||||
|
||||
The cert is signed by certctl's bound issuer (per the `prof-test`
|
||||
profile's `issuer_id`).
|
||||
|
||||
On the certctl side, the audit log captures the issuance:
|
||||
|
||||
```
|
||||
psql -c "SELECT actor, action, resource_id FROM audit_events
|
||||
WHERE actor LIKE 'acme:%' ORDER BY created_at DESC LIMIT 5;"
|
||||
```
|
||||
|
||||
## Common failure modes
|
||||
|
||||
- **Traefik logs `unable to obtain ACME certificate ... x509: certificate
|
||||
signed by unknown authority`** → `serversTransport.rootCAs` is not
|
||||
pointing at the certctl bootstrap CA, OR the file was rotated and
|
||||
Traefik hasn't reloaded. Verify with
|
||||
`curl --cacert /etc/traefik/certctl-bootstrap.crt
|
||||
https://certctl.example.com:8443/acme/profile/prof-test/directory`.
|
||||
- **Traefik logs `urn:ietf:params:acme:error:rateLimited`** → tune
|
||||
`CERTCTL_ACME_SERVER_RATE_LIMIT_ORDERS_PER_HOUR` on the certctl
|
||||
side, OR reduce Traefik's parallel-cert-acquisition concurrency.
|
||||
- **`acme: error: 400 :: POST :: ... :: badNonce`** → clock skew or
|
||||
multi-replica certctl without sticky sessions; same fix as the
|
||||
cert-manager walkthrough.
|
||||
- **Storage file `acme-certctl.json` shows persistent failures** —
|
||||
Traefik retains failed-acquisition state. After fixing the
|
||||
underlying cause, delete the storage file and reload:
|
||||
`rm /etc/traefik/acme-certctl.json && systemctl reload traefik`.
|
||||
|
||||
## Cleanup
|
||||
|
||||
```
|
||||
# Remove the certResolver from any router / IngressRoute consuming it.
|
||||
sudo systemctl reload traefik
|
||||
# Delete the persisted ACME storage:
|
||||
sudo rm /etc/traefik/acme-certctl.json
|
||||
# Or in K8s: drop the resolver from the static-config ConfigMap.
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [`docs/acme-server.md`](./acme-server.md) — canonical reference.
|
||||
- [`docs/acme-cert-manager-walkthrough.md`](./acme-cert-manager-walkthrough.md) —
|
||||
cert-manager equivalent.
|
||||
- [Traefik upstream ACME docs](https://doc.traefik.io/traefik/https/acme/#caserver) —
|
||||
verify behavior pinned here against Traefik 3.0+ semantics.
|
||||
+65
-61
@@ -703,20 +703,17 @@ The EST (Enrollment over Secure Transport) server provides an industry-standard
|
||||
|
||||
**Architecture:** EST is a handler-level protocol that delegates certificate issuance to an existing `IssuerConnector`. This means EST is not a new issuer — it's a new *interface* to the existing issuance infrastructure. The `ESTService` bridges the `ESTHandler` to whichever issuer connector is configured via `CERTCTL_EST_ISSUER_ID`.
|
||||
|
||||
```
|
||||
Client (WiFi AP, MDM, IoT)
|
||||
│
|
||||
▼
|
||||
ESTHandler (handler layer)
|
||||
│ CSR parsing, PKCS#7 response encoding
|
||||
▼
|
||||
ESTService (service layer)
|
||||
│ CSR validation, CN/SAN extraction, audit recording
|
||||
▼
|
||||
IssuerConnector (connector layer via IssuerConnectorAdapter)
|
||||
│ Certificate signing (Local CA, step-ca, etc.)
|
||||
▼
|
||||
Signed certificate returned as PKCS#7 certs-only
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Client["Client (WiFi AP, MDM, IoT)"]
|
||||
Handler["ESTHandler (handler layer)"]
|
||||
Service["ESTService (service layer)"]
|
||||
Issuer["IssuerConnector (connector layer via IssuerConnectorAdapter)"]
|
||||
Result["Signed certificate returned as PKCS#7 certs-only"]
|
||||
Client --> Handler
|
||||
Handler -->|"CSR parsing, PKCS#7 response encoding"| Service
|
||||
Service -->|"CSR validation, CN/SAN extraction, audit recording"| Issuer
|
||||
Issuer -->|"certificate signing (Local CA, step-ca, etc.)"| Result
|
||||
```
|
||||
|
||||
**Wire format:** EST uses PKCS#7 (RFC 2315) certs-only degenerate SignedData for certificate responses and base64-encoded DER for CSR requests. The handler includes a hand-rolled ASN.1 PKCS#7 builder — no external PKCS#7 dependency. The CSR reader accepts both base64-encoded DER (standard EST wire format) and PEM-encoded PKCS#10 (convenience for debugging).
|
||||
@@ -795,20 +792,17 @@ The SCEP (Simple Certificate Enrollment Protocol) server provides certificate en
|
||||
|
||||
**Architecture:** SCEP follows the exact same layering as EST — a handler-level protocol that delegates certificate issuance to an existing `IssuerConnector`. The `SCEPService` bridges the `SCEPHandler` to whichever issuer connector is configured via `CERTCTL_SCEP_ISSUER_ID`.
|
||||
|
||||
```
|
||||
Client (MDM, network device, SCEP client)
|
||||
│
|
||||
▼
|
||||
SCEPHandler (handler layer)
|
||||
│ PKCS#7 envelope parsing, CSR extraction, challenge password extraction
|
||||
▼
|
||||
SCEPService (service layer)
|
||||
│ Challenge password validation, CSR validation, CN/SAN extraction, audit recording
|
||||
▼
|
||||
IssuerConnector (connector layer via IssuerConnectorAdapter)
|
||||
│ Certificate signing (Local CA, step-ca, etc.)
|
||||
▼
|
||||
Signed certificate returned as PKCS#7 certs-only
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Client["Client (MDM, network device, SCEP client)"]
|
||||
Handler["SCEPHandler (handler layer)"]
|
||||
Service["SCEPService (service layer)"]
|
||||
Issuer["IssuerConnector (connector layer via IssuerConnectorAdapter)"]
|
||||
Result["Signed certificate returned as PKCS#7 certs-only"]
|
||||
Client --> Handler
|
||||
Handler -->|"PKCS#7 envelope parsing, CSR extraction, challenge password extraction"| Service
|
||||
Service -->|"challenge password validation, CSR validation, CN/SAN extraction, audit recording"| Issuer
|
||||
Issuer -->|"certificate signing (Local CA, step-ca, etc.)"| Result
|
||||
```
|
||||
|
||||
**Wire format:** Two paths, tried in order. The new RFC 8894 path (post-2026-04-29) parses the full PKIMessage shape: ContentInfo → SignedData → SignerInfo (POPO over auth-attrs verified via `internal/pkcs7/signedinfo.go::SignerInfo.VerifySignature` with the canonical SET-OF Attribute re-serialisation per RFC 5652 §5.4) → EnvelopedData (decrypted via `internal/pkcs7/envelopeddata.go::EnvelopedData.Decrypt` with RSA PKCS#1v1.5 keyTrans + AES-CBC content + constant-time PKCS#7 unpad to close the padding-oracle leak) → inner PKCS#10 CSR. Auth-attrs (messageType, transactionID, senderNonce) flow through to the service layer via `domain.SCEPRequestEnvelope`. The handler dispatches on messageType: PKCSReq (19) → initial enrollment; RenewalReq (17) → re-enrollment with chain validation; GetCertInitial (20) → polling stub returns FAILURE+badCertID. Responses are full CertRep PKIMessages (`internal/pkcs7/certrep.go::BuildCertRepPKIMessage`) signed by the per-profile RA cert/key with the issued cert chain encrypted to the device's transient signing cert (RFC 8894 §3.3.2). On parse failure the handler falls through to the legacy MVP path: base64-encoded PKCS#7 and raw CSR submissions are still accepted; responses use the legacy PKCS#7 certs-only shape via the shared `internal/pkcs7` package. The MVP fall-through is non-negotiable — backward compat with lightweight SCEP clients that don't speak full RFC 8894. Single certs are returned as raw DER for `GetCACert`, chains as PKCS#7.
|
||||
@@ -890,23 +884,27 @@ each per-profile dispatcher carries its own **trust anchor pool**:
|
||||
the public certs the operator extracted from the Connector's
|
||||
installation. Every Intune-flavored enrollment goes through:
|
||||
|
||||
```
|
||||
┌─────────────────────────────────┐
|
||||
│ Per-profile TrustAnchorHolder │
|
||||
│ (RWMutex pool, SIGHUP-reloadable) │
|
||||
└────────────┬────────────────────┘
|
||||
│ Get()
|
||||
▼
|
||||
device → SCEP PKIMessage → handler → SCEPService.dispatchIntuneChallenge
|
||||
│
|
||||
├─► intune.ValidateChallenge (sig + iat/exp + audience)
|
||||
├─► claim.DeviceMatchesCSR (set-equality)
|
||||
├─► intune.ReplayCache.CheckAndInsert
|
||||
├─► intune.PerDeviceRateLimiter.Allow
|
||||
└─► (V3-Pro) ComplianceCheck hook
|
||||
│
|
||||
▼
|
||||
processEnrollment → IssuerConnector
|
||||
```mermaid
|
||||
flowchart TD
|
||||
TAH["Per-profile TrustAnchorHolder<br/>(RWMutex pool, SIGHUP-reloadable)"]
|
||||
Device[device]
|
||||
Handler[handler]
|
||||
Dispatch["SCEPService.dispatchIntuneChallenge"]
|
||||
Validate["intune.ValidateChallenge<br/>(sig + iat/exp + audience)"]
|
||||
Match["claim.DeviceMatchesCSR<br/>(set-equality)"]
|
||||
Replay["intune.ReplayCache.CheckAndInsert"]
|
||||
Rate["intune.PerDeviceRateLimiter.Allow"]
|
||||
Compliance["(V3-Pro) ComplianceCheck hook"]
|
||||
Process["processEnrollment → IssuerConnector"]
|
||||
Device -->|SCEP PKIMessage| Handler
|
||||
Handler --> Dispatch
|
||||
TAH -.->|Get()| Dispatch
|
||||
Dispatch --> Validate
|
||||
Dispatch --> Match
|
||||
Dispatch --> Replay
|
||||
Dispatch --> Rate
|
||||
Dispatch --> Compliance
|
||||
Dispatch --> Process
|
||||
```
|
||||
|
||||
The trust anchor file is mode-0600 on disk; certctl loads it at
|
||||
@@ -932,22 +930,16 @@ See [`scep-intune.md`](scep-intune.md) for the full migration playbook
|
||||
|
||||
The local issuer's CA private key is wrapped behind the `signer.Signer` interface in `internal/crypto/signer/`. Every CA-signing call site — leaf certificate issuance (`x509.CreateCertificate`), CRL generation (`x509.CreateRevocationList`), and OCSP response signing (`ocsp.CreateResponse`) — accesses the key through this interface rather than touching `crypto.Signer` directly. The interface embeds the stdlib `crypto.Signer` and adds a single `Algorithm() Algorithm` method so call sites can pick the matching `x509.SignatureAlgorithm` without reflecting on the concrete key type.
|
||||
|
||||
```
|
||||
┌─────────────────────────────────┐
|
||||
│ signer.Driver (pluggable) │
|
||||
├─────────────────────────────────┤
|
||||
internal/connector/issuer/local │ signer.FileDriver (default) │
|
||||
c.caSigner signer.Signer ──────────► │ PEM key on disk │
|
||||
│ │
|
||||
│ signer.MemoryDriver (tests) │
|
||||
│ in-memory only │
|
||||
│ │
|
||||
│ signer.PKCS11Driver (V3-Pro) │
|
||||
│ HSM token (future) │
|
||||
│ │
|
||||
│ signer.CloudKMSDriver (V3-Pro) │
|
||||
│ AWS / GCP / Azure (future) │
|
||||
└─────────────────────────────────┘
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Local["internal/connector/issuer/local<br/>c.caSigner signer.Signer"]
|
||||
subgraph Driver["signer.Driver (pluggable)"]
|
||||
File["signer.FileDriver (default)<br/>PEM key on disk"]
|
||||
Memory["signer.MemoryDriver (tests)<br/>in-memory only"]
|
||||
PKCS11["signer.PKCS11Driver (V3-Pro)<br/>HSM token (future)"]
|
||||
Cloud["signer.CloudKMSDriver (V3-Pro)<br/>AWS / GCP / Azure (future)"]
|
||||
end
|
||||
Local --> Driver
|
||||
```
|
||||
|
||||
Today only `FileDriver` (production) and `MemoryDriver` (tests) ship. The interface exists so PKCS#11/HSM and cloud-KMS drivers can land in follow-on packages (`internal/crypto/signer/pkcs11`, etc.) without modifying any call site or any other driver. The L-014 file-on-disk threat-model carve-out documented at the top of `internal/connector/issuer/local/local.go` applies to `FileDriver`-backed signers; alternative drivers that keep the key inside an HSM token or cloud KMS close the disk-exposure leg of the threat model entirely.
|
||||
@@ -1052,6 +1044,8 @@ For deployments that need JWT/OIDC/mTLS, the standard pattern is to put an authe
|
||||
|
||||
The background scheduler uses `sync/atomic.Bool` idempotency guards on every loop (8 always-on plus up to 4 optional) — if a tick fires while the previous iteration is still running, it skips. A `sync.WaitGroup` tracks all in-flight goroutines. `WaitForCompletion(timeout)` blocks during shutdown until all work finishes or the timeout expires, preventing state corruption from mid-flight database operations during process exit.
|
||||
|
||||
The job-processor tick fans the per-job work out across up to `CERTCTL_RENEWAL_CONCURRENCY` goroutines (default 25), gated by `golang.org/x/sync/semaphore.Weighted`. The cap is the operator's lever for "how many concurrent CA calls per scheduler tick" — operators with permissive upstream limits and large fleets (>10k certs) can bump to 100; operators with strict limits or async-CA-heavy fleets should stay at 25 or lower. Values ≤ 0 normalise to 1 (sequential). The Acquire is ctx-aware so a shutdown-driven ctx cancel interrupts the dispatch loop promptly; in-flight goroutines drain via Wait before the tick returns. Closes the #9 acquisition-readiness blocker from the 2026-05-01 issuer coverage audit (pre-fix the fan-out had no cap, so a 5,000-cert sweep tripped DigiCert / Entrust / Sectigo rate limits and the next tick re-fanned-out the same calls).
|
||||
|
||||
### Logging
|
||||
|
||||
All logging throughout the service layer uses Go's `log/slog` package for structured, queryable logs. This replaces ad-hoc `fmt.Printf` statements with consistent key-value logging that includes request context, operation names, and error details. Agents also implement exponential backoff on network failures to gracefully handle temporary connectivity issues with the control plane.
|
||||
@@ -1320,6 +1314,16 @@ certctl is extensively tested across eight layers with CI-enforced coverage gate
|
||||
|
||||
For detailed test procedures, smoke tests, and the release sign-off checklist, see the [Testing Guide](testing-guide.md). For setting up the Docker Compose test environment with real CA backends, see [Test Environment](test-env.md).
|
||||
|
||||
## Performance Characteristics
|
||||
|
||||
Closes the #8 acquisition-readiness blocker from the 2026-05-01 issuer coverage audit (see `cowork/issuer-coverage-audit-2026-05-01/RESULTS.md`). Pre-audit, certctl had no benchmarks or load tests for any API path, so any throughput claim was hand-waved; the harness in `deploy/test/loadtest/` substantiates the API-tier capacity numbers with reproducible methodology.
|
||||
|
||||
The harness drives a k6 client at sustained 50 req/s × 2 scenarios × 5 minutes against a docker-compose stack of postgres + tls-init + certctl-server. Two scenarios run in parallel: `POST /api/v1/certificates` (issuance-acceptance hot path: auth + JSON decode + validation + service `CreateCertificate` + `managed_certificates` insert) and `GET /api/v1/certificates?per_page=50` (most-trafficked read endpoint). Hard regression-guard thresholds: p99 < 5 s for issuance-acceptance, p99 < 2 s for list, error rate < 1% globally. k6 exits non-zero on any threshold breach so a future PR that pushes p99 above the bar fails `make loadtest`. Run via `make loadtest` from the repo root or via `.github/workflows/loadtest.yml` (`workflow_dispatch` + weekly cron — never per-push).
|
||||
|
||||
What this measures vs what it does NOT: the harness intentionally measures the API tier (auth → DB), not the issuer connector round-trip latency. Connector calls (DigiCert, ACME, Vault, AWS ACM PCA, etc.) happen asynchronously through the renewal scheduler and are pinned by the `certctl_issuance_duration_seconds{issuer_type=...}` Prometheus histogram (audit fix #4 from the same audit). Driving them through k6 would amount to load-testing someone else's API, which is the wrong thing to do. The full ACME enrollment flow (multi-RTT order/challenge/finalize against pebble) is deferred — sustained 100/s through that flow needs pebble tuning + crypto helpers k6 doesn't ship out of the box.
|
||||
|
||||
Captured baseline numbers are committed in `deploy/test/loadtest/README.md` once an operator runs the harness on a representative workstation; future tuning commits land alongside refreshed baseline numbers so each commit's impact is diffable. Operators considering certctl for a 50k-cert fleet at 47-day TLS rotation (CA/B Forum SC-081v3, lands 2029) have a published number with documented methodology to compare against, not a claim.
|
||||
|
||||
## What's Next
|
||||
|
||||
- [Quick Start](quickstart.md) — Get certctl running locally
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
# Async-CA Polling — Operator Reference
|
||||
|
||||
Closes audit fix #5 from the 2026-05-01 issuer-coverage acquisition-readiness audit.
|
||||
|
||||
## What this is
|
||||
|
||||
Four issuer connectors talk to Certificate Authorities that issue
|
||||
certificates **asynchronously** — `IssueCertificate` returns an order
|
||||
ID immediately, and the caller (or scheduler) must call
|
||||
`GetOrderStatus` later to retrieve the issued cert:
|
||||
|
||||
- **DigiCert** (CertCentral)
|
||||
- **Sectigo** (Certificate Manager)
|
||||
- **Entrust** (Certificate Services / CA Gateway)
|
||||
- **GlobalSign** (Atlas HVCA)
|
||||
|
||||
Pre-fix, each connector's `GetOrderStatus` made one HTTP call per
|
||||
invocation with no exponential backoff, no retry cap, and no deadline.
|
||||
Under a renewal sweep, certctl would hammer the upstream CA's
|
||||
rate-limit budget. A 429 response was treated as a hard error,
|
||||
which then caused the scheduler to retry on the next tick — re-fanning
|
||||
out the same call that just got rate-limited.
|
||||
|
||||
Post-fix, `GetOrderStatus` blocks for up to `PollMaxWait` (default
|
||||
10 minutes) doing **bounded internal polling**:
|
||||
|
||||
```
|
||||
attempt 1 → wait 5s → attempt 2 → wait 15s → attempt 3 → wait 45s →
|
||||
attempt 4 → wait 2m → attempt 5 → wait 5m → ... (capped at 5m)
|
||||
```
|
||||
|
||||
±20% jitter applied at every wait so multiple certctl instances
|
||||
never synchronize on the upstream CA's rate-limit window. The
|
||||
`PollMaxWait` deadline is a hard cap; if the upstream still hasn't
|
||||
completed by then, `GetOrderStatus` returns `StillPending` and the
|
||||
scheduler can re-enqueue the job for a future tick.
|
||||
|
||||
## Status-code triage
|
||||
|
||||
Each connector classifies HTTP responses to drive polling decisions:
|
||||
|
||||
| Response | Meaning | Decision |
|
||||
|---|---|---|
|
||||
| 2xx + status="issued"/"completed" | Cert ready | Done — return the cert |
|
||||
| 2xx + status="pending"/"processing" | Still working | StillPending — keep polling |
|
||||
| 2xx + status="rejected"/"denied"/"failed" | Permanent | Done — return `OrderStatus{Status:"failed"}` |
|
||||
| 2xx + parse failure | Body is broken | Failed — return error |
|
||||
| 4xx (404/400/401/403) | Permanent client error | Failed — return error |
|
||||
| 429 (rate limited) | Transient | StillPending — keep polling with backoff |
|
||||
| 5xx | Transient | StillPending — keep polling with backoff |
|
||||
| Network / TLS error | Transient | StillPending — keep polling with backoff |
|
||||
|
||||
## Operator tuning
|
||||
|
||||
Each connector exposes a `PollMaxWaitSeconds` config field and
|
||||
matching env var:
|
||||
|
||||
| Connector | Env var | Default |
|
||||
|---|---|---|
|
||||
| DigiCert | `CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS` | 600 (10m) |
|
||||
| Sectigo | `CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS` | 600 (10m) |
|
||||
| Entrust | `CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS` | 600 (10m) |
|
||||
| GlobalSign | `CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS` | 600 (10m) |
|
||||
|
||||
Tune up (e.g., `86400` = 24 hours) for **Entrust approval-pending
|
||||
workflows** where humans manually approve enrollments. Tune down (e.g.,
|
||||
`60`) for high-throughput environments that prefer to recycle the
|
||||
scheduler tick rather than block one renewal goroutine for minutes.
|
||||
|
||||
A value of 0 (or unset) falls back to the package default in
|
||||
`internal/connector/issuer/asyncpoll`.
|
||||
|
||||
## Failure modes
|
||||
|
||||
**Upstream returns 429 forever.** The Poller respects the backoff
|
||||
(5s → 15s → 45s → 2m → 5m), so a sustained 429 stream burns through
|
||||
the full `PollMaxWait` budget with at most 7-8 attempts (instead of
|
||||
~600 attempts at 1/sec). After `PollMaxWait` expires, `GetOrderStatus`
|
||||
returns `StillPending`; the scheduler re-enqueues for the next tick.
|
||||
The total request volume against the upstream is bounded by `tick
|
||||
interval / minimum backoff` — typically 1-2 requests per minute even
|
||||
under heavy load.
|
||||
|
||||
**Sectigo `collectNotReady` sentinel.** When the SCM status endpoint
|
||||
reports `Issued` but the cert collect endpoint isn't yet ready, the
|
||||
old code branched into a special "pending" return. Now that branch
|
||||
returns `StillPending` from the poll closure, so the cert collection
|
||||
rides the same backoff schedule.
|
||||
|
||||
**Entrust approval-pending.** The `AWAITING_APPROVAL` status maps to
|
||||
`StillPending`. With the default `PollMaxWait=10m`, the scheduler
|
||||
will re-enqueue once per tick if approval hasn't happened yet; with
|
||||
`PollMaxWait=24h` the same renewal goroutine waits the full approval
|
||||
window. Pick the latter when you have many approval-pending
|
||||
enrollments per tick.
|
||||
|
||||
## Where the implementation lives
|
||||
|
||||
- `internal/connector/issuer/asyncpoll/asyncpoll.go` — shared `Poller`
|
||||
with backoff math, jitter, deadline, and ctx-aware cancellation.
|
||||
- `internal/connector/issuer/digicert/digicert.go` —
|
||||
`pollOrderOnce` + `GetOrderStatus` orchestrator.
|
||||
- `internal/connector/issuer/sectigo/sectigo.go` —
|
||||
`pollEnrollmentOnce` + status-code permanence triage
|
||||
(`isPermanentStatusError`).
|
||||
- `internal/connector/issuer/entrust/entrust.go` —
|
||||
`pollEnrollmentOnce` + approval-pending mapping.
|
||||
- `internal/connector/issuer/globalsign/globalsign.go` —
|
||||
`pollCertificateOnce` (serial-number tracking).
|
||||
- `internal/connector/issuer/asyncpoll/asyncpoll_test.go` — 11 unit
|
||||
tests covering happy path, transient-then-success, Failed
|
||||
termination, MaxWait timeout, last-error wrap, ctx cancel,
|
||||
multiplicative backoff, jitter bounds, defaults.
|
||||
|
||||
## Audit blocker reference
|
||||
|
||||
cowork/issuer-coverage-audit-2026-05-01/RESULTS.md, Top-10 fix #5
|
||||
(Part 1.5 finding #4: "No polling backoff for async CAs").
|
||||
@@ -53,7 +53,7 @@ helm install certctl deploy/helm/certctl/ \
|
||||
On each VM, bare-metal server, or appliance (via proxy agent):
|
||||
```bash
|
||||
# Linux amd64
|
||||
curl -sSL https://github.com/shankar0123/certctl/releases/download/v2.1.0/certctl-agent-linux-amd64 \
|
||||
curl -sSL https://github.com/certctl-io/certctl/releases/download/v2.1.0/certctl-agent-linux-amd64 \
|
||||
-o /usr/local/bin/certctl-agent
|
||||
chmod +x /usr/local/bin/certctl-agent
|
||||
|
||||
|
||||
+22
-11
@@ -17,17 +17,28 @@ This guide covers the **on-push pipeline** only.
|
||||
|
||||
## On-push pipeline (7 status checks)
|
||||
|
||||
```
|
||||
push to master
|
||||
├── CI workflow (5 jobs)
|
||||
│ ├── go-build-and-test (~6-7 min)
|
||||
│ ├── frontend-build (~1 min)
|
||||
│ ├── helm-lint (~10 sec)
|
||||
│ ├── deploy-vendor-e2e (~5 min, depends on go-build-and-test)
|
||||
│ └── image-and-supply-chain (~3 min, parallel)
|
||||
└── CodeQL workflow (2 jobs)
|
||||
├── Analyze (go) (~5 min, parallel)
|
||||
└── Analyze (javascript-typescript) (~5 min, parallel)
|
||||
```mermaid
|
||||
flowchart TD
|
||||
Push["push to master"]
|
||||
CI["CI workflow (5 jobs)"]
|
||||
CodeQL["CodeQL workflow (2 jobs)"]
|
||||
GoBuild["go-build-and-test<br/>~6-7 min"]
|
||||
Frontend["frontend-build<br/>~1 min"]
|
||||
HelmLint["helm-lint<br/>~10 sec"]
|
||||
Vendor["deploy-vendor-e2e<br/>~5 min, depends on go-build-and-test"]
|
||||
Image["image-and-supply-chain<br/>~3 min, parallel"]
|
||||
AnalyzeGo["Analyze (go)<br/>~5 min, parallel"]
|
||||
AnalyzeJS["Analyze (javascript-typescript)<br/>~5 min, parallel"]
|
||||
Push --> CI
|
||||
Push --> CodeQL
|
||||
CI --> GoBuild
|
||||
CI --> Frontend
|
||||
CI --> HelmLint
|
||||
CI --> Vendor
|
||||
CI --> Image
|
||||
CodeQL --> AnalyzeGo
|
||||
CodeQL --> AnalyzeJS
|
||||
GoBuild -.depends on.-> Vendor
|
||||
```
|
||||
|
||||
End-to-end wall-clock: dominated by `go-build-and-test` + `deploy-vendor-e2e` chain (~12 min) running in parallel with CodeQL (~5 min). Target ~10 min.
|
||||
|
||||
@@ -158,7 +158,7 @@ The real IIS connector validation lives in:
|
||||
|
||||
```powershell
|
||||
# Clone + checkout
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
git clone https://github.com/certctl-io/certctl.git
|
||||
cd certctl
|
||||
git fetch --tags
|
||||
git checkout v2.X.0 # whichever release is being validated
|
||||
|
||||
+524
-24
@@ -19,7 +19,8 @@ Connectors extend certctl to integrate with external systems for certificate iss
|
||||
- [Revocation Across Issuers](#revocation-across-issuers)
|
||||
- [EST Integration (GetCACertPEM)](#est-integration-getcacertpem)
|
||||
- [Building a Custom Issuer](#building-a-custom-issuer)
|
||||
3. [Target Connector](#target-connector)
|
||||
3. [ACME Server (Built-in)](#acme-server-built-in)
|
||||
4. [Target Connector](#target-connector)
|
||||
- [Interface](#interface-1)
|
||||
- [Built-in: NGINX](#built-in-nginx)
|
||||
- [Built-in: Apache httpd](#built-in-apache-httpd)
|
||||
@@ -34,28 +35,28 @@ Connectors extend certctl to integrate with external systems for certificate iss
|
||||
- [Windows Certificate Store](#windows-certificate-store)
|
||||
- [Java Keystore (JKS / PKCS#12)](#java-keystore-jks--pkcs12)
|
||||
- [Kubernetes Secrets](#kubernetes-secrets)
|
||||
4. [Notifier Connector](#notifier-connector)
|
||||
5. [Notifier Connector](#notifier-connector)
|
||||
- [Interface](#interface-2)
|
||||
5. [Registering a Connector](#registering-a-connector)
|
||||
6. [Registering a Connector](#registering-a-connector)
|
||||
- [IssuerConnectorAdapter](#issuerconnectoradapter)
|
||||
- [Notifier Registration](#notifier-registration)
|
||||
6. [Testing Connectors](#testing-connectors)
|
||||
7. [Testing Connectors](#testing-connectors)
|
||||
- [Unit Tests](#unit-tests)
|
||||
- [Integration Tests](#integration-tests)
|
||||
7. [Best Practices](#best-practices)
|
||||
8. [Agent Discovery Scanner](#agent-discovery-scanner)
|
||||
8. [Best Practices](#best-practices)
|
||||
9. [Agent Discovery Scanner](#agent-discovery-scanner)
|
||||
- [Configuration](#configuration)
|
||||
- [How It Works](#how-it-works)
|
||||
- [API Endpoints](#api-endpoints)
|
||||
- [Use Cases](#use-cases)
|
||||
9. [Network Certificate Scanner (M21)](#network-certificate-scanner-m21)
|
||||
- [Configuration](#configuration-1)
|
||||
- [Creating Scan Targets](#creating-scan-targets)
|
||||
- [How It Works](#how-it-works-1)
|
||||
- [API Endpoints](#api-endpoints-1)
|
||||
- [Scheduler Integration](#scheduler-integration)
|
||||
- [Use Cases](#use-cases-1)
|
||||
10. [What's Next](#whats-next)
|
||||
10. [Network Certificate Scanner (M21)](#network-certificate-scanner-m21)
|
||||
- [Configuration](#configuration-1)
|
||||
- [Creating Scan Targets](#creating-scan-targets)
|
||||
- [How It Works](#how-it-works-1)
|
||||
- [API Endpoints](#api-endpoints-1)
|
||||
- [Scheduler Integration](#scheduler-integration)
|
||||
- [Use Cases](#use-cases-1)
|
||||
11. [What's Next](#whats-next)
|
||||
|
||||
## Overview
|
||||
|
||||
@@ -261,6 +262,14 @@ The connector is registered in the issuer registry under `iss-acme-staging` and
|
||||
|
||||
**Note:** ACME-issued certificates rely on the Local CA for CRL/OCSP endpoints if they are stored in certctl's inventory. For issuers with their own public CRL/OCSP infrastructure (e.g., Let's Encrypt), clients should validate against the issuer's endpoints instead.
|
||||
|
||||
**Revocation by serial number.** RFC 8555 §7.6 requires the certificate DER bytes (not just the serial) on the revoke wire — but a CLM platform's job is to abstract over that limitation. Operators routinely have only the serial in hand: the original PEM was lost, the private key was rotated, the operator clicked "revoke" in the GUI based on a row in the certs list. certctl's ACME `RevokeCertificate(ctx, RevocationRequest{Serial: ...})` looks the serial up in the local cert store (`certificate_versions.pem_chain`), decodes the leaf-cert PEM into DER, and calls the ACME revoke endpoint with `(accountKey, der, reasonCode)` — RFC 8555 §7.6 case 1, "revocation request signed with account key". This works because the same account key issued the cert, so authority is intrinsic.
|
||||
|
||||
The cert version must exist in the local store: this means the cert was issued through certctl, not imported. If `GetVersionBySerial` returns `sql.ErrNoRows`, the connector returns an actionable error pointing at the local-store requirement. Revoke-by-serial is therefore only available for ACME certs that certctl issued.
|
||||
|
||||
Reason codes follow RFC 5280 §5.3.1: nil reason maps to `unspecified` (0), and the connector accepts the canonical camelCase form (`keyCompromise`, `cACompromise`, `affiliationChanged`, `superseded`, `cessationOfOperation`, `certificateHold`, `removeFromCRL`, `privilegeWithdrawn`, `aACompromise`) plus underscore_lower and ALL_CAPS_UNDERSCORE variants. An unknown reason returns an error rather than silently demoting to `unspecified` — operators rely on the reason for compliance reporting (PCI-DSS §3.6, HIPAA §164.312).
|
||||
|
||||
Audit reference: `cowork/issuer-coverage-audit-2026-05-01/RESULTS.md` Top-10 fix #7.
|
||||
|
||||
Location: `internal/connector/issuer/acme/acme.go`, `internal/connector/issuer/acme/dns.go`
|
||||
|
||||
### Built-in: step-ca (Smallstep Private CA)
|
||||
@@ -307,6 +316,49 @@ Script-based issuer connector for organizations with existing CA tooling. Delega
|
||||
|
||||
The sign script receives the CSR PEM on stdin and should output the signed certificate PEM on stdout. The connector parses the certificate to extract serial number, validity dates, and chain information. Before shell execution, serial numbers are validated as hex-only (`^[0-9a-fA-F]+$`) and revocation reason codes are validated against the RFC 5280 specification to prevent command injection.
|
||||
|
||||
#### Operator playbook: OpenSSL shell-out threat model
|
||||
|
||||
certctl's OpenSSL adapter `exec`s an operator-supplied script for every certificate lifecycle operation (issue / renew / revoke / CRL generation). The script runs as the certctl-server user with that user's full filesystem and network access. **This is by design** — the OpenSSL adapter exists precisely to support operators integrating with arbitrary CLI-driven CAs that don't have a Go SDK. The cost is a wider attack surface than any other issuer in the catalog. This subsection enumerates the threat model + mitigations so an operator (or an acquirer's security reviewer) can decide whether the adapter is appropriate for their environment. Top-10 fix #6 of the 2026-05-03 issuer-coverage audit.
|
||||
|
||||
**Why the adapter accepts a shell-out at all:**
|
||||
|
||||
- Many enterprise PKI operators run their own CLI-driven CA (BoringSSL, custom OpenSSL wrappers, hardware-CA controllers, internal CAs with no published SDK). A Go SDK doesn't exist; a shell-out is the only integration path short of building a full Go-native adapter per CA.
|
||||
- Mirrors the same posture the SSH connector applies (`InsecureIgnoreHostKey` on operator-controlled networks): certctl trusts the operator to configure the integration sensibly.
|
||||
- Avoids forking the project per-CA — one OpenSSL adapter can cover dozens of CLI-driven CAs.
|
||||
|
||||
**Threat model the adapter accepts:**
|
||||
|
||||
- A trusted operator pointing at a trusted script that lives in a trusted filesystem location (`/usr/local/bin/`, `/opt/<vendor>/bin/`, etc.) with appropriate ownership (root-owned, mode 0755) and a clear audit trail (filesystem-monitored, version-controlled).
|
||||
- Env-var inheritance from the certctl-server process. Operators must NOT export sensitive credentials (Vault tokens, API keys for OTHER systems) into certctl-server's environment — or, if they must, must accept that those credentials are visible to the issuance script. The connector does not whitelist or strip env vars before fork.
|
||||
- The hex-only serial-number filter (`^[0-9a-fA-F]+$`) and the RFC 5280 reason-code allow-list at `internal/validation/command.go` are defenses against argv-injection. They are NOT defenses against a malicious script — an operator who deploys a malicious script is outside this threat model entirely.
|
||||
|
||||
**Threat model the adapter does NOT accept:**
|
||||
|
||||
- A script path under operator-writable filesystem (`/tmp`, `/var/tmp`, `~`) where a non-root user can swap the binary mid-flight. **Symlink attack:** a non-root user with write access to the directory replaces the script with a symlink to `/etc/shadow` or `/root/.ssh/authorized_keys`; certctl-server reads (or in the worst case writes via a malicious script) those files.
|
||||
- Untrusted script content. The script can do anything the certctl-server user can — modify state outside `/etc/certctl/`, exfiltrate data, write SSH keys to enable persistence. Operators MUST review every script line before deploying.
|
||||
- A multi-tenant host where multiple operators deploy scripts under the same certctl-server. Process-level isolation isn't enforced; one operator's script can read another's working files (the temp CSR/cert files the connector writes to `os.TempDir()` are mode 0600 but are visible by name to anyone who can list the directory).
|
||||
|
||||
**Mitigations operators can layer on:**
|
||||
|
||||
- **Run certctl-server under a dedicated unprivileged user** (e.g. `certctl:certctl`). Limits the blast radius of a misbehaving script. The systemd unit ships with `User=certctl` by default — keep it that way.
|
||||
- **Pin the script path to a root-owned mode-0755 binary** (`/usr/local/bin/issue-cert.sh`, root:root, 0755). Add a filesystem audit rule (`auditctl -w /usr/local/bin/issue-cert.sh -p wa -k certctl-script`) so any write attempt to the script is logged.
|
||||
- **Set a per-call timeout via `CERTCTL_OPENSSL_TIMEOUT_SECONDS`** (env-mapped to `Config.TimeoutSeconds`, default 30s). The connector wires this through `exec.CommandContext` so a hung script is killed at the wall-clock budget. Production operators should set it to the upper bound of legitimate issuance time — anything longer is a runaway.
|
||||
- **Sanitise the certctl-server environment.** systemd's `Environment=` directive lets operators allow-list which env vars certctl-server (and therefore the script) sees. Default-deny is the safe posture; the connector itself does NOT scrub envs before fork.
|
||||
- **Use a chroot or container.** systemd's `RootDirectory=` or running certctl-server in a container limits the filesystem the script can touch. Trade-off: complicates operator debugging.
|
||||
- **Audit the script's behaviour.** A wrapper script that logs every invocation's argv + env-snapshot + exit code to a separate audit log gives operators a forensic trail. The wrapper is the operator's responsibility — certctl logs the cmd start/end at INFO level, which is enough for "did it run?" but not for "what did it do?"
|
||||
- **Per-call concurrency bound.** The renewal scheduler's `CERTCTL_RENEWAL_CONCURRENCY` (Bundle L closure) bounds scheduled traffic; ad-hoc `POST /api/v1/certificates` traffic isn't bounded. For high-volume environments, layer a reverse-proxy rate limit (nginx, HAProxy) in front of the API.
|
||||
|
||||
**When you should NOT use the OpenSSL adapter:**
|
||||
|
||||
- Compliance environments (PCI-DSS Level 1, FedRAMP High, HIPAA-regulated PHI handling) where shell-out attack surfaces are formally disallowed by your security policy.
|
||||
- Multi-tenant certctl-server deployments where tenant-A's script can affect tenant-B's certificates.
|
||||
- Environments without operator review of every script line — trust-on-first-use is the wrong posture for a shell-out.
|
||||
- For these cases, switch to a Go-native issuer adapter (Vault, DigiCert, Sectigo, ACME, AWSACMPCA, GoogleCAS, EJBCA, Entrust, GlobalSign, step-ca) or commission a custom Go-native adapter for your CA (the issuer connector interface in `internal/connector/issuer/interface.go` is small — `IssueCertificate` + `RevokeCertificate` + `GetCACertPEM` + a few stubs).
|
||||
|
||||
**V3-Pro forward path:**
|
||||
|
||||
The hardened OpenSSL adapter (chroot/container by default, env-var allow-list at the adapter layer, signed-script-binary verification, audit-log-on-every-invocation, per-call concurrency bound shared with the API surface) is V3-Pro work. Tracking: `cowork/WORKSPACE-ROADMAP.md` (search "OpenSSL hardened mode").
|
||||
|
||||
### Revocation Across Issuers
|
||||
|
||||
All issuer connectors implement `RevokeCertificate(ctx, serial, reason)`. When a certificate is revoked via `POST /api/v1/certificates/{id}/revoke`, certctl notifies the issuing CA on a best-effort basis — the revocation succeeds in certctl's inventory even if the CA notification fails (e.g., CA is temporarily unreachable). This ensures revocation is never blocked by external dependencies.
|
||||
@@ -422,7 +474,9 @@ The connector is registered in the issuer registry under `iss-vault`. Vault issu
|
||||
|
||||
**MaxTTL enforcement (M11c):** When a certificate profile defines a maximum TTL, the Vault connector overrides the TTL string in the signing request to ensure the issued certificate does not exceed the profile limit. This is applied before Vault's own role-level max TTL.
|
||||
|
||||
Location: `internal/connector/issuer/vault/vault.go`
|
||||
**Token TTL + automatic renewal (Top-10 fix #5, 2026-05-03 audit):** certctl-server periodically calls `POST /v1/auth/token/renew-self` at half the token's TTL to keep the integration alive without manual rotation; the cadence is read from a one-shot `lookup-self` at startup and re-derived on every successful renewal so a short bootstrap token that gets renewed up to a longer Max TTL shifts to the longer cadence automatically. The renewal loop emits the `certctl_vault_token_renewals_total{result="success"|"failure"|"not_renewable"}` Prometheus counter so operators see expiry trouble in Grafana before issuance breaks. When Vault returns `renewable: false` (configured Max TTL reached), the loop logs a WARN, increments `{result="not_renewable"}`, and exits — the operator must rotate the Vault token and restart certctl-server (or use the GUI/MCP issuer-update path to swap the token in place; the registry's Rebuild path re-Starts the lifecycle on the new connector). Per-tick failures (e.g. transient 5xx, brief network blips) bump `{result="failure"}` and the loop keeps ticking; only the explicit `renewable: false` case stops it.
|
||||
|
||||
Location: `internal/connector/issuer/vault/vault.go` + `internal/connector/issuer/vault/vault_renew.go`
|
||||
|
||||
### Built-in: DigiCert CertCentral
|
||||
|
||||
@@ -436,8 +490,9 @@ The DigiCert connector integrates with DigiCert's CertCentral REST API for order
|
||||
| `CERTCTL_DIGICERT_ORG_ID` | — | DigiCert organization ID |
|
||||
| `CERTCTL_DIGICERT_PRODUCT_TYPE` | `ssl_basic` | Certificate product (e.g., `ssl_basic`, `ssl_plus`, `ssl_ev`) |
|
||||
| `CERTCTL_DIGICERT_BASE_URL` | `https://www.digicert.com/services/v2` | DigiCert API base URL |
|
||||
| `CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS` | `600` | Bounded-polling deadline for `GetOrderStatus`. See [docs/async-polling.md](async-polling.md). |
|
||||
|
||||
The connector submits certificate orders to DigiCert's `/order/certificate/create` API. DV certificates may issue immediately; OV/EV certificates require validation (handled by DigiCert) and poll-based completion. The connector periodically checks order status via `/order/certificate/{order_id}` until the certificate is available.
|
||||
The connector submits certificate orders to DigiCert's `/order/certificate/create` API. DV certificates may issue immediately; OV/EV certificates require validation (handled by DigiCert) and poll-based completion. `GetOrderStatus` runs bounded internal polling (5s/15s/45s/2m/5m capped, ±20% jitter, default 10-minute deadline) — see [async-polling.md](async-polling.md).
|
||||
|
||||
**Authentication:** API key passed via `X-DC-DEVKEY` header, with organization ID in request body.
|
||||
|
||||
@@ -460,8 +515,9 @@ The Sectigo connector integrates with Sectigo Certificate Manager's REST API for
|
||||
| `CERTCTL_SECTIGO_CERT_TYPE` | — | Certificate type ID (integer, from `/ssl/v1/types`) |
|
||||
| `CERTCTL_SECTIGO_TERM` | `365` | Certificate validity in days |
|
||||
| `CERTCTL_SECTIGO_BASE_URL` | `https://cert-manager.com/api` | Sectigo API base URL |
|
||||
| `CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS` | `600` | Bounded-polling deadline for `GetOrderStatus`. The `collectNotReady` sentinel (cert approved but not yet retrievable) rides the same backoff schedule. See [docs/async-polling.md](async-polling.md). |
|
||||
|
||||
The connector submits certificate enrollments to Sectigo's `/ssl/v1/enroll` API. DV certificates may issue immediately; OV/EV certificates require validation (handled by Sectigo) and poll-based completion. The connector periodically checks enrollment status via `/ssl/v1/{sslId}` and downloads the PEM bundle via `/ssl/v1/collect/{sslId}/pem` when issued.
|
||||
The connector submits certificate enrollments to Sectigo's `/ssl/v1/enroll` API. DV certificates may issue immediately; OV/EV certificates require validation (handled by Sectigo) and poll-based completion. `GetOrderStatus` runs bounded internal polling — see [async-polling.md](async-polling.md).
|
||||
|
||||
**Authentication:** Three custom headers on every request — `customerUri`, `login`, and `password`.
|
||||
|
||||
@@ -489,7 +545,7 @@ Location: `internal/connector/issuer/googlecas/googlecas.go`
|
||||
|
||||
### Built-in: AWS ACM Private CA
|
||||
|
||||
AWS Certificate Manager Private Certificate Authority — managed private CA on AWS. Synchronous issuance via ACM PCA API with standard AWS credential chain (env vars, IAM roles, instance profiles, SSO).
|
||||
AWS Certificate Manager Private Certificate Authority — managed private CA on AWS. Synchronous-via-waiter issuance: the connector calls `IssueCertificate` (which is asynchronous at the ACM PCA API level), then runs the SDK's `NewCertificateIssuedWaiter` until the cert reaches `CERTIFICATE_ISSUED` state, then `GetCertificate` to retrieve the PEM. Default waiter timeout is 5 minutes; tune by editing `defaultWaiterTimeout` in the connector.
|
||||
|
||||
| Setting | Required | Default | Description |
|
||||
|---------|----------|---------|-------------|
|
||||
@@ -501,9 +557,57 @@ AWS Certificate Manager Private Certificate Authority — managed private CA on
|
||||
|
||||
**Supported signing algorithms:** SHA256WITHRSA, SHA384WITHRSA, SHA512WITHRSA, SHA256WITHECDSA, SHA384WITHECDSA, SHA512WITHECDSA.
|
||||
|
||||
**Authentication:** Standard AWS credential chain. The connector uses `aws-sdk-go-v2/config.LoadDefaultConfig()` which supports environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`), IAM roles (EC2/ECS), instance profiles, and SSO credentials.
|
||||
**Authentication:** Standard AWS credential chain via `aws-sdk-go-v2/config.LoadDefaultConfig()`. Resolves credentials in this order: environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`), shared config files (`~/.aws/config`, `~/.aws/credentials`, profile via `AWS_PROFILE`), IAM Roles for Service Accounts (EKS), EC2 instance profiles, ECS task roles, and SSO. certctl never stores AWS credentials directly — set them in the certctl process's environment or via the IAM role attached to the host.
|
||||
|
||||
**Note:** CRL and OCSP are managed by AWS ACM PCA directly. certctl records revocations locally and notifies AWS via the RevokeCertificate API with RFC 5280 reason mapping.
|
||||
**Minimal IAM policy.** The IAM principal that certctl authenticates as needs the following actions against the CA's ARN:
|
||||
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"acm-pca:IssueCertificate",
|
||||
"acm-pca:GetCertificate",
|
||||
"acm-pca:RevokeCertificate",
|
||||
"acm-pca:GetCertificateAuthorityCertificate"
|
||||
],
|
||||
"Resource": "arn:aws:acm-pca:us-east-1:123456789012:certificate-authority/12345678-1234-1234-1234-123456789012"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Replace the `Resource` ARN with your own CA ARN. If you use a `TemplateArn` (subordinate-CA template), the policy needs no additional permissions — `IssueCertificate` covers it.
|
||||
|
||||
**Worked example.** Add an AWSACMPCA issuer via the API:
|
||||
|
||||
```bash
|
||||
curl -k -X POST https://localhost:8443/api/v1/issuers \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"id": "iss-aws-prod",
|
||||
"name": "AWS ACM PCA (prod)",
|
||||
"type": "AWSACMPCA",
|
||||
"config": {
|
||||
"region": "us-east-1",
|
||||
"ca_arn": "arn:aws:acm-pca:us-east-1:123456789012:certificate-authority/12345678-1234-1234-1234-123456789012",
|
||||
"signing_algorithm": "SHA256WITHRSA",
|
||||
"validity_days": 90
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
The certctl server process must have AWS credentials available before the issuer is created (or before any subsequent issuance call). For a local dev run with shared-config creds: `export AWS_PROFILE=my-profile` before `docker compose up`. For an EKS deployment: attach an IRSA-bound IAM role to the certctl pod's service account.
|
||||
|
||||
**Troubleshooting.**
|
||||
|
||||
- **`AccessDeniedException: User ... is not authorized to perform: acm-pca:IssueCertificate`** — the IAM principal certctl is using lacks the required actions. Apply the IAM policy above (scoped to your CA ARN) to the role/user. The principal can be inspected with `aws sts get-caller-identity` from the certctl host.
|
||||
- **`ResourceNotFoundException: Could not find Certificate Authority`** — the `CAArn` doesn't match any CA in the configured region. Common causes: region mismatch (CA is in `us-west-2`, certctl region is set to `us-east-1`), CA was deleted, ARN typo. Verify with `aws acm-pca describe-certificate-authority --certificate-authority-arn <arn> --region <region>`.
|
||||
- **`acmpca waiter (waiting for issuance): exceeded max wait time`** — the cert was submitted but didn't reach `CERTIFICATE_ISSUED` state within 5 minutes. Check the CA's CloudWatch metrics for backlog; check the CA's audit reports for any policy violations on the request. If the wait is consistently slow, edit `defaultWaiterTimeout` in `internal/connector/issuer/awsacmpca/awsacmpca.go` and rebuild.
|
||||
|
||||
**Note:** CRL and OCSP are managed by AWS ACM PCA directly. certctl records revocations locally and notifies AWS via the `RevokeCertificate` API with RFC 5280 reason mapping (e.g., `keyCompromise` → `KEY_COMPROMISE`). AWS ACM PCA's CRL distribution point and OCSP responder serve the resulting status to verifying clients; certctl is not in the OCSP path for this connector.
|
||||
|
||||
Location: `internal/connector/issuer/awsacmpca/awsacmpca.go`
|
||||
|
||||
@@ -518,6 +622,7 @@ Entrust CA Gateway REST API with mutual TLS (mTLS) client certificate authentica
|
||||
| `CERTCTL_ENTRUST_CLIENT_KEY_PATH` | Yes | — | Path to mTLS client private key PEM |
|
||||
| `CERTCTL_ENTRUST_CA_ID` | Yes | — | Certificate Authority ID (from `GET /certificate-authorities`) |
|
||||
| `CERTCTL_ENTRUST_PROFILE_ID` | No | — | Optional enrollment profile ID |
|
||||
| `CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS` | No | `600` (10m) | Bounded-polling deadline for `GetOrderStatus`. Approval-pending workflows where humans approve enrollments should bump to `86400` (24h) so a single tick can wait through the approval window. See [docs/async-polling.md](async-polling.md). |
|
||||
|
||||
**Authentication:** Mutual TLS — the client certificate and key are loaded via `tls.LoadX509KeyPair()` and attached to the HTTP transport. No API key or token required.
|
||||
|
||||
@@ -525,7 +630,9 @@ Entrust CA Gateway REST API with mutual TLS (mTLS) client certificate authentica
|
||||
|
||||
**Note:** CRL and OCSP are managed by Entrust. certctl records revocations locally and notifies Entrust via `PUT /v1/certificate-authorities/{caId}/certificates/{serial}/revoke`.
|
||||
|
||||
Location: `internal/connector/issuer/entrust/entrust.go`
|
||||
**mTLS keypair caching (audit fix #10):** The parsed client certificate plus a precomputed `*http.Transport` are cached on the connector after the first API call. Steady-state calls reuse the cached transport — no per-call disk read or `tls.X509KeyPair` parse. Rotation is picked up automatically via mtime polling: when the cert file's mtime advances beyond the last-loaded value, the next API call re-parses and rebuilds the transport. Operator workflow: `mv -f new.crt /etc/certctl/entrust/client.crt` (mtime changes), no process restart required, takes effect on the next API call. `os.Stat` errors during rotation surface as connector errors rather than silently serving stale credentials.
|
||||
|
||||
Location: `internal/connector/issuer/entrust/entrust.go` (cache shared at `internal/connector/issuer/mtlscache/`).
|
||||
|
||||
### Built-in: GlobalSign Atlas HVCA
|
||||
|
||||
@@ -539,6 +646,7 @@ GlobalSign Atlas High Volume CA REST API with dual authentication: mTLS for the
|
||||
| `CERTCTL_GLOBALSIGN_CLIENT_CERT_PATH` | Yes | — | Path to mTLS client certificate PEM |
|
||||
| `CERTCTL_GLOBALSIGN_CLIENT_KEY_PATH` | Yes | — | Path to mTLS client private key PEM |
|
||||
| `CERTCTL_GLOBALSIGN_SERVER_CA_PATH` | No | system trust store | PEM bundle used to verify the Atlas API server certificate. Set this for private/lab Atlas deployments whose server TLS chain is not in the host's default trust bundle. |
|
||||
| `CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS` | No | `600` (10m) | Bounded-polling deadline for `GetOrderStatus`. GlobalSign tracks orders by serial number rather than order ID; the polling shape is identical. See [docs/async-polling.md](async-polling.md). |
|
||||
|
||||
**Authentication:** Dual — mTLS client certificate for TLS handshake plus `X-API-Key` and `X-API-Secret` headers on every request.
|
||||
|
||||
@@ -548,7 +656,9 @@ GlobalSign Atlas High Volume CA REST API with dual authentication: mTLS for the
|
||||
|
||||
**Note:** CRL and OCSP are managed by GlobalSign. certctl records revocations locally and notifies GlobalSign via `PUT /v2/certificates/{serial}/revoke`.
|
||||
|
||||
Location: `internal/connector/issuer/globalsign/globalsign.go`
|
||||
**mTLS keypair caching (audit fix #10):** The parsed client certificate plus a precomputed `*http.Transport` (with `ServerCAPath` pinning preserved when configured) are cached on the connector after the first API call. Steady-state calls reuse the cached transport — no per-call disk read or `tls.X509KeyPair` parse. Rotation is picked up automatically via mtime polling: when the cert file's mtime advances beyond the last-loaded value, the next API call re-parses and rebuilds the transport. Operator workflow: `mv -f new.crt /etc/certctl/globalsign/client.crt` (mtime changes), no process restart required, takes effect on the next API call. `os.Stat` errors during rotation surface as connector errors rather than silently serving stale credentials.
|
||||
|
||||
Location: `internal/connector/issuer/globalsign/globalsign.go` (cache shared at `internal/connector/issuer/mtlscache/`).
|
||||
|
||||
### Built-in: EJBCA (Keyfactor)
|
||||
|
||||
@@ -592,7 +702,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
vaultapi "github.com/hashicorp/vault/api"
|
||||
"github.com/shankar0123/certctl/internal/connector/issuer"
|
||||
"github.com/certctl-io/certctl/internal/connector/issuer"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
@@ -648,6 +758,56 @@ func (v *VaultIssuer) IssueCertificate(ctx context.Context, req issuer.IssuanceR
|
||||
// ... implement RenewCertificate, RevokeCertificate, GetOrderStatus
|
||||
```
|
||||
|
||||
## ACME Server (Built-in)
|
||||
|
||||
certctl ships a built-in RFC 8555 + RFC 9773 ARI ACME **server**
|
||||
endpoint at `/acme/profile/<profile-id>/*`. Any RFC 8555 client
|
||||
(cert-manager 1.15+, Caddy, Traefik, win-acme, certbot, Posh-ACME)
|
||||
integrates with certctl as an ACME issuer with no certctl-side
|
||||
modification — closing the "deploy a certctl agent on every K8s node"
|
||||
friction that costs deals to external PKI vendors.
|
||||
|
||||
This is **distinct** from the [ACME consumer
|
||||
connector](#built-in-acme-v2-lets-encrypt-sectigo-zerossl) above. The
|
||||
consumer side is `certctl → external CA over ACME`; the server side
|
||||
is `external client → certctl over ACME`. Operators deploying both
|
||||
should namespace env vars carefully: consumer uses `CERTCTL_ACME_*`
|
||||
(`DIRECTORY_URL`, `EMAIL`, `CHALLENGE_TYPE`); server uses
|
||||
`CERTCTL_ACME_SERVER_*` (`ENABLED`, `DEFAULT_PROFILE_ID`, `NONCE_TTL`,
|
||||
…).
|
||||
|
||||
Two auth modes per profile (`certificate_profiles.acme_auth_mode`):
|
||||
|
||||
- **`trust_authenticated`** (default for internal PKI). The JWS-
|
||||
authenticated ACME account is trusted to issue for any identifier
|
||||
the profile policy permits; no out-of-band ownership proof. The
|
||||
most common certctl use case — internal-PKI fleets where the
|
||||
network itself is the trust boundary.
|
||||
- **`challenge`**. Full HTTP-01 + DNS-01 + TLS-ALPN-01 validation per
|
||||
RFC 8555 §8 + RFC 8737. Required for public-trust-style PKI where
|
||||
account-key compromise must not cost issuance authority.
|
||||
|
||||
Routes through `service.CertificateService.Create` so policy + audit
|
||||
+ metrics + bulk-revocation + cloud-discovery all apply uniformly to
|
||||
ACME-issued certs (just as they do to API-issued, agent-issued, EST-
|
||||
issued, SCEP-issued certs).
|
||||
|
||||
See:
|
||||
|
||||
- [ACME Server Reference](./acme-server.md) — env-var reference,
|
||||
endpoints, auth-mode decision tree, RFC 8555 conformance statement,
|
||||
troubleshooting, FAQ.
|
||||
- [cert-manager Walkthrough](./acme-cert-manager-walkthrough.md) — kind
|
||||
→ cert-manager → certctl-server → Certificate flow.
|
||||
- [Caddy Walkthrough](./acme-caddy-walkthrough.md) — Caddyfile `acme_ca`
|
||||
+ trust configuration.
|
||||
- [Traefik Walkthrough](./acme-traefik-walkthrough.md) — `certificatesResolvers`
|
||||
+ `serversTransport.rootCAs`.
|
||||
- [Threat Model](./acme-server-threat-model.md) — JWS forgery
|
||||
resistance, nonce store integrity, HTTP-01 SSRF, DNS-01 cache
|
||||
posture, TLS-ALPN-01 chain-not-validated rationale, rate-limit
|
||||
tuning, audit trail.
|
||||
|
||||
## Target Connector
|
||||
|
||||
Target connectors deploy certificates to infrastructure systems. They run on agents, not on the control plane.
|
||||
@@ -877,6 +1037,49 @@ All commands are validated against shell injection via `validation.ValidateShell
|
||||
|
||||
Location: `internal/connector/target/postfix/postfix.go`
|
||||
|
||||
#### Choosing Mode=postfix vs Mode=dovecot
|
||||
|
||||
The connector supports two modes via the `mode` config field, switching the daemon-specific defaults. **Both modes share the same Go connector code** (atomic-write, PreCommit/PostCommit hooks, post-deploy verify, rollback), so the rollback contract is identical across modes.
|
||||
|
||||
**Choose `mode: postfix` when** your target host runs Postfix as the MTA (typically port 25 SMTP/STARTTLS, 465 SMTPS, or 587 submission). Defaults applied by `applyDefaults` (see `internal/connector/target/postfix/postfix.go`):
|
||||
|
||||
| Default | Value |
|
||||
|---|---|
|
||||
| `cert_path` | `/etc/postfix/certs/cert.pem` |
|
||||
| `key_path` | `/etc/postfix/certs/key.pem` |
|
||||
| `validate_command` | `postfix check` |
|
||||
| `reload_command` | `postfix reload` |
|
||||
|
||||
`mode: postfix` is also the **default when `mode` is unset**.
|
||||
|
||||
**Choose `mode: dovecot` when** your target host runs Dovecot as the IMAPS / POP3S server (typically port 993 IMAPS or 995 POP3S). Defaults applied by `applyDefaults`:
|
||||
|
||||
| Default | Value |
|
||||
|---|---|
|
||||
| `cert_path` | `/etc/dovecot/certs/cert.pem` |
|
||||
| `key_path` | `/etc/dovecot/certs/key.pem` |
|
||||
| `validate_command` | `doveconf -n` |
|
||||
| `reload_command` | `doveadm reload` |
|
||||
|
||||
**Post-deploy TLS verify** is operator-supplied via `post_deploy_verify` (`enabled` + `endpoint` + `timeout`) — the connector does NOT bake in a per-mode default port. Operators that opt in should set `endpoint` to their daemon's listener (e.g. `mail.example.com:25` for Postfix STARTTLS, `mail.example.com:993` for Dovecot IMAPS).
|
||||
|
||||
**Hosts running BOTH Postfix and Dovecot** (the common mail-server pattern): configure **two separate targets** in the certctl control plane, one per daemon. Each gets its own cert path, its own validate/reload command, and its own optional verify endpoint. The cert + key bytes can be identical across the two targets if your mail server uses the same TLS material for both daemons (which many do); certctl does not deduplicate the deploys, but the byte-equal cert hits the SHA-256 idempotency short-circuit on subsequent renewals when the target paths haven't changed.
|
||||
|
||||
**Sharing a single cert file across daemons** via a filesystem symlink works fine with the connector — the atomic-write path's `os.Rename` follows symlinks. Configure both targets to point at the same canonical path, or have one target's `cert_path` symlink into the other's. Operators who want byte-deduplication should rely on this approach rather than asking certctl to coordinate it.
|
||||
|
||||
**Daemon-specific quirks worth knowing:**
|
||||
|
||||
- **Postfix STARTTLS** (port 25) typically requires the cert to chain to a public root for receiving mail from arbitrary external MTAs that validate SMTP-side server certs. If you're deploying a self-signed cert from `iss-local`, configure the receiving Postfix accordingly (e.g. `smtpd_use_tls=yes` + `smtpd_tls_security_level=may` for opportunistic TLS so external senders that don't validate continue to deliver).
|
||||
- **Dovecot IMAPS** (port 993) is typically client-facing — the chain you ship matters more here because IMAPS clients (Thunderbird, Outlook) actively validate. Set `chain_path` if your certificate chain is supplied separately; when `chain_path` is unset, the connector appends the chain bytes to `cert_path`.
|
||||
- **Postfix and Dovecot do not share a TLS session cache** by default. Both reload independently, so a cert renewal that updates both targets via certctl requires both reloads to succeed before clients re-handshake. The two targets are fully independent in the certctl scheduler — one reload failing rolls back that target only.
|
||||
|
||||
**Test pin**: Bundle 11 (commit `88e8881`) added end-to-end tests for `Mode=dovecot`:
|
||||
|
||||
- `TestPostfix_Atomic_DovecotMode_HappyPath` — confirms `applyDefaults` populates the dovecot validate + reload commands AND the deploy threads them through to `runValidate` + `runReload`.
|
||||
- `TestPostfix_Atomic_DovecotMode_VerifyFails_Rollback` — confirms the rollback path under `Mode=dovecot` restores pre-deploy cert + key bytes byte-exact.
|
||||
|
||||
The `Mode=postfix` branch has equivalent test coverage in the same file (see `TestPostfix_HappyPath`, `TestPostfix_VerifyMismatch_Rollback`, `TestPostfix_ReloadFails_Rollback`).
|
||||
|
||||
### F5 BIG-IP (Implemented)
|
||||
|
||||
The F5 BIG-IP target connector deploys certificates to F5 load balancers via the iControl REST API. F5 appliances can't run agents directly, so this connector uses the **proxy agent pattern**: a designated certctl agent in the same network zone polls for F5 deployment jobs and executes iControl REST calls on behalf of the control plane. Minimum supported BIG-IP version: 12.0+.
|
||||
@@ -963,6 +1166,7 @@ The IIS target connector supports two deployment modes — agent-local (recommen
|
||||
- `ip_address` (string, default "*"): Specific IP to bind to, or "*" for all IPs
|
||||
- `binding_info` (string, optional): Host header for SNI bindings
|
||||
- `mode` (string, default "local"): Deployment mode — `local` (agent-local PowerShell) or `winrm` (remote via WinRM)
|
||||
- `exec_deadline` (duration, default `60s`): Per-PowerShell-subprocess cap that fires only when the caller's `ctx` has no deadline of its own. A caller-supplied deadline always wins; this is a safety net so a hung WinRM session or stuck `Cert:` provider call cannot block the deploy worker indefinitely. Operators on slow links (high-latency WinRM, slow Windows VMs) can extend with e.g. `"exec_deadline": "5m"`.
|
||||
|
||||
**WinRM fields (required when `mode` is `winrm`):**
|
||||
- `winrm.winrm_host` (string, required): Remote Windows server hostname or IP
|
||||
@@ -1043,6 +1247,43 @@ The SSH target connector enables agentless certificate deployment to any Linux/U
|
||||
|
||||
Location: `internal/connector/target/ssh/ssh.go`
|
||||
|
||||
#### Operator playbook: SSH host-key verification
|
||||
|
||||
certctl's SSH connector dials each target with `HostKeyCallback: ssh.InsecureIgnoreHostKey()`, meaning **the connector accepts any server host key without comparison against `known_hosts`**. This is a documented design choice (see `internal/connector/target/ssh/ssh.go` near `realSSHClient.Connect`) and not an oversight. The rationale + when it's safe + what to layer on top when it isn't:
|
||||
|
||||
**Why the connector accepts any host key:**
|
||||
|
||||
- certctl deploys to **operator-configured target infrastructure**. Each target is registered explicitly in the control plane with hostname + auth credentials + cert/key paths; the operator implicitly trusts the host they're deploying to (otherwise why give it a TLS cert).
|
||||
- Mirrors the same posture certctl applies to the network scanner (`InsecureSkipVerify` for cert-monitoring TLS handshakes) and the F5 connector (`Insecure` flag for self-signed BIG-IP management interfaces).
|
||||
- Avoids a heavyweight per-target `known_hosts` management layer that would shift complexity onto operators with no proportional security gain when the network model is "operator-configured infrastructure on operator-controlled network".
|
||||
|
||||
**Threat model the design choice accepts:**
|
||||
|
||||
- A **passive eavesdropper** on the agent-to-target link. SSH's transport encryption still applies — host-key acceptance affects MITM vulnerability, not on-the-wire confidentiality.
|
||||
- A **MITM attacker** on the agent-to-target link who can intercept the SSH TCP handshake AND has positioned themselves on a hostname the operator has registered as a deploy target. Layered authentication (per-target SSH keys with strong passphrases stored at the agent) limits the blast radius — the MITM gets one target's cert+key payload, not the agent's broader credentials.
|
||||
|
||||
**Threat model the design choice does NOT accept:**
|
||||
|
||||
- Deploying across the **public internet** to a host whose IP rotates (e.g. ephemeral cloud instances behind a load balancer that doesn't pin SSH host keys). In that scenario, `InsecureIgnoreHostKey` opens an MITM window during IP rotation — register a `known_hosts` file path or use SSH certificates (below) instead.
|
||||
- **Multi-tenant networks** where another tenant could plausibly impersonate the target host. certctl's design assumes operator-controlled network paths.
|
||||
|
||||
**Mitigations operators can layer on:**
|
||||
|
||||
- **`known_hosts` enforcement**: implement a custom `SSHClient` (the connector's `SSHClient` interface accepts injected clients via `NewWithClient`) whose `Connect` method builds an `ssh.ClientConfig` with `HostKeyCallback` set to `knownhosts.New("/path/to/known_hosts")` from `golang.org/x/crypto/ssh/knownhosts`. Configure the agent to use that client.
|
||||
- **SSH certificate authentication**: use OpenSSH 5.4+ host certificates signed by an organizational CA. Configure the agent's `known_hosts` CA pinning via `@cert-authority` lines so any host presenting a certificate signed by the CA is trusted, regardless of IP rotation.
|
||||
- **Network segmentation**: run the certctl agent on the same private network segment as its targets; require VPN tunnels for cross-network deploys; use bastion hosts with their own host-key validation.
|
||||
- **Per-target SSH keys**: rotate the agent's SSH credentials per target so a successful MITM compromise is bounded to that one target's cert+key, not the agent's broader credential set.
|
||||
|
||||
**When you should NOT use the SSH connector:**
|
||||
|
||||
- Deploying to **unknown / dynamic / multi-tenant** hosts where the IP-to-hostname binding isn't operator-controlled.
|
||||
- Environments with strict **regulatory MITM-resistance** requirements (PCI-DSS Level 1, FedRAMP High, etc.) — the inline-comment "out of scope" framing doesn't satisfy compliance auditors who want documented host-key verification at the connector level.
|
||||
- For these cases, switch to a different connector (Kubernetes Secrets, WinCertStore, F5 with iControl REST under operator-managed cert pinning) **OR** layer a custom `SSHClient` with full `known_hosts` validation per the mitigations above.
|
||||
|
||||
**V3-Pro forward path:**
|
||||
|
||||
The operator-managed `known_hosts` integration (config field + `HostKeyCallback` plumbing + per-target root-of-trust enforcement) is documented as V3-Pro work. Tracking: `WORKSPACE-ROADMAP.md` (search for "SSH known_hosts").
|
||||
|
||||
### Windows Certificate Store
|
||||
|
||||
The Windows Certificate Store connector imports certificates into the Windows cert store via PowerShell, without managing IIS site bindings. Use this for non-IIS Windows services that read certificates from the cert store (Exchange, RDP, SQL Server, ADFS, etc.). Same injectable `PowerShellExecutor` pattern as the IIS connector, with optional WinRM proxy mode.
|
||||
@@ -1069,6 +1310,7 @@ The Windows Certificate Store connector imports certificates into the Windows ce
|
||||
| `winrm_password` | string | | WinRM password (required for winrm mode) |
|
||||
| `winrm_https` | boolean | `false` | Use HTTPS for WinRM |
|
||||
| `winrm_insecure` | boolean | `false` | Skip TLS verification for WinRM |
|
||||
| `exec_deadline` | duration | `60s` | Per-PowerShell-subprocess cap that fires only when the caller's `ctx` has no deadline of its own. A caller-supplied deadline always wins; this is a safety net so a hung WinRM session or stuck `Cert:` provider call cannot block the deploy worker indefinitely. Operators on slow links can extend with e.g. `"exec_deadline": "5m"`. |
|
||||
|
||||
Location: `internal/connector/target/wincertstore/wincertstore.go`
|
||||
|
||||
@@ -1095,6 +1337,8 @@ The Java Keystore connector deploys certificates to JKS or PKCS#12 keystores via
|
||||
| `reload_command` | string | | Optional command to run after keystore update |
|
||||
| `create_keystore` | boolean | `true` | Create keystore if it doesn't exist |
|
||||
| `keytool_path` | string | `"keytool"` | Override keytool binary path |
|
||||
| `backup_retention` | int | `3` | Number of `.certctl-bak.<unix-nanos>.p12` snapshot files to keep after a successful deploy. `0` means use the default of 3; `-1` opts out of pruning entirely. |
|
||||
| `backup_dir` | string | `dirname(keystore_path)` | Override directory where rollback snapshots are written and pruned from. Defaults to the keystore's own directory so snapshots land on the same filesystem. |
|
||||
|
||||
**Security:**
|
||||
- Reload commands validated against shell injection via `validation.ValidateShellCommand()`
|
||||
@@ -1102,6 +1346,37 @@ The Java Keystore connector deploys certificates to JKS or PKCS#12 keystores via
|
||||
- Path traversal prevention on keystore path
|
||||
- Transient PKCS#12 temp file cleaned up after import (even on error)
|
||||
|
||||
**Atomic rollback (Bundle 8 of the 2026-05-02 deployment-target audit):**
|
||||
|
||||
The deploy flow is **snapshot → delete → import → reload**. Before the irreversible `keytool -delete` step (which removes the existing alias from the keystore), the connector runs `keytool -exportkeystore` to write a sibling `.certctl-bak.<unix-nanos>.p12` file containing the prior alias. If the subsequent `keytool -importkeystore` fails for any reason, the rollback path runs `keytool -delete` (best-effort cleanup of any partial alias the failed import created) followed by `keytool -importkeystore` from the snapshot PFX, restoring the keystore to its pre-deploy state. If both the import AND the rollback fail, the connector returns an operator-actionable wrapped error containing both error strings AND the snapshot path so the operator can manually `keytool -importkeystore` from the `.p12` file to recover.
|
||||
|
||||
Successful deploys prune older `.certctl-bak.*.p12` files beyond the configured `backup_retention` count; pruning sorts by file ModTime and removes the oldest entries first. Operators that wire their own archival/rotation logic can opt out via `backup_retention: -1`.
|
||||
|
||||
First-time deploys (no keystore file exists at the configured path) skip the snapshot phase entirely — there's nothing to roll back to. The same is true for "alias-not-present-in-existing-keystore" deploys: `keytool -exportkeystore` returns "alias does not exist" which the connector recognises as a normal first-time-on-existing-keystore signal, not an outage.
|
||||
|
||||
### Operator playbook: keytool argv password exposure
|
||||
|
||||
Java's `keytool` accepts the keystore password via the `-storepass` argv flag — there is no stdin or file-based password mode in OpenJDK keytool. While the keytool subprocess is running, the password is visible in `ps(1)` output to any user on the same host who can read `/proc/<pid>/cmdline`. This is a **standard keytool limitation, not a certctl-specific issue**, but operators in regulated environments should know about it before deploying certctl on shared hosts.
|
||||
|
||||
**What this means in practice:**
|
||||
|
||||
- The password is visible for the duration of each keytool invocation (typically <1s on modern hardware; the connector runs 2-4 keytool calls per deploy: snapshot, optional pre-import delete, import, optional rollback).
|
||||
- A local user with shell access on the agent host who polls `ps -ef` aggressively can capture the password.
|
||||
- The exposure is local to the agent host; remote attackers without shell access cannot see it.
|
||||
- The same applies to the snapshot's transient `-deststorepass` (which mirrors the operator's keystore password by design — see "Why the snapshot reuses the keystore password" below).
|
||||
|
||||
**Mitigations** (layer one or more depending on threat model):
|
||||
|
||||
- **Restrict shell access to the agent host.** Only the certctl agent's service account should have a login shell. Other admins SSH to a bastion that doesn't host the agent.
|
||||
- **Use Linux user namespaces or AppArmor** to deny `ps`-visibility into the keytool subprocess for non-root users. SystemD's `ProtectKernelTunables=yes` + `ProtectProc=invisible` (kernel 5.8+) hides `/proc/<pid>` from non-owner users.
|
||||
- **Run the certctl agent in a single-purpose container** so only the agent's processes are visible to anyone who execs into the container. The host's `ps` doesn't see container internals if proper PID-namespace isolation is configured.
|
||||
- **Rotate the keystore password post-deployment.** For high-security environments where the brief exposure is unacceptable, the rotation can itself be automated via a post-deploy hook running `keytool -storepasswd`. The certctl `reload_command` is the natural place for this; just be aware the new password must be propagated to whatever service reads the keystore (Tomcat's `server.xml`, Kafka's `kafka.properties`, etc.).
|
||||
- **For FIPS environments**, use the `BCFKS` (BouncyCastle FIPS) keystore type which supports stronger password-derivation. Same argv-exposure caveat applies; the keystore-format change doesn't affect how keytool receives the password.
|
||||
|
||||
For a fundamentally different password-handling model, switch to a non-Java target (e.g. PEM-on-disk via the SSH connector + a JCA-shim like `tomcat-native` reading PEMs directly) or a PKCS#11 keystore (where the password is supplied to the cryptoki library, not via argv).
|
||||
|
||||
**Why the snapshot reuses the keystore password.** The snapshot's `keytool -exportkeystore` writes a PKCS#12 file under a `-deststorepass`. The connector reuses the operator's `keystore_password` for this rather than generating a separate transient password. Two reasons: (a) the operator already trusts the connector with this secret, so the surface area doesn't grow; (b) the rollback's matching `keytool -importkeystore` needs to know the password too, and threading a second random password through the in-memory state machine adds complexity (and another argv-exposure window) for no security gain. If you rotate the keystore password between deploys, the rollback may fail to read the snapshot — keep stale `.certctl-bak.*.p12` files on disk until the rotation completes, and clean them up manually if rotation invalidates them.
|
||||
|
||||
Location: `internal/connector/target/javakeystore/javakeystore.go`
|
||||
|
||||
### Kubernetes Secrets
|
||||
@@ -1134,6 +1409,183 @@ The Kubernetes Secrets connector deploys certificates as `kubernetes.io/tls` Sec
|
||||
|
||||
Location: `internal/connector/target/k8ssecret/k8ssecret.go`
|
||||
|
||||
### AWS Certificate Manager (ACM)
|
||||
|
||||
The AWS ACM target connector deploys certificates into AWS Certificate Manager — the public AWS service that ALB / CloudFront / API Gateway / App Runner consume by ARN. Closes the "we terminate TLS at AWS, how do we get certctl-issued certs to ALB?" question for cloud-first deployments. Rank 5 of the 2026-05-03 Infisical deep-research deliverable.
|
||||
|
||||
```json
|
||||
{
|
||||
"region": "us-east-1",
|
||||
"certificate_arn": "arn:aws:acm:us-east-1:123456789012:certificate/abcdef01-2345-6789-abcd-ef0123456789",
|
||||
"tags": {"env": "production", "app": "api-gateway"}
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `region` | string | *(required)* | AWS region for the ACM endpoint (e.g., `us-east-1`). CloudFront-attached certs MUST live in `us-east-1`; ALB / API Gateway use the same region as the load balancer. |
|
||||
| `certificate_arn` | string | | ARN of an existing ACM certificate to rotate in place. Empty on first deploy — the adapter creates a new ACM cert via `ImportCertificate` and the deployment record's Metadata captures the resulting ARN. Operators can also pre-create the ARN out-of-band (Terraform, CloudFormation) and pin it here. |
|
||||
| `tags` | object | | Tags applied to the ACM cert at first import + re-applied via `AddTagsToCertificate` on every subsequent import (ACM strips tags on re-import). The reserved keys `certctl-managed-by` and `certctl-certificate-id` are set automatically and cannot be overridden. |
|
||||
|
||||
**IAM policy (minimum permissions):**
|
||||
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"acm:ImportCertificate",
|
||||
"acm:GetCertificate",
|
||||
"acm:DescribeCertificate",
|
||||
"acm:ListCertificates",
|
||||
"acm:AddTagsToCertificate"
|
||||
],
|
||||
"Resource": "arn:aws:acm:*:*:certificate/*"
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
**Auth recipes:**
|
||||
|
||||
- **IRSA (IAM Roles for Service Accounts) — recommended for K8s deploys.** Annotate the agent's ServiceAccount with `eks.amazonaws.com/role-arn=arn:aws:iam::<account>:role/certctl-acm-deployer`. The role's trust policy allows the cluster's OIDC provider; permission policy is the JSON above. Short-lived STS credentials are auto-rotated by EKS — no long-lived access keys.
|
||||
- **EC2 instance profile — recommended for VM-based agents.** Attach an instance profile referencing the same role. SDK's `LoadDefaultConfig` picks credentials up via the IMDS metadata service.
|
||||
- **AWS SSO / `aws configure sso` — recommended for operator workstations.** SDK reads `~/.aws/config` for the SSO profile and refreshes tokens via the existing CLI session.
|
||||
- **Long-lived access keys are NOT supported in connector Config** — the credential chain is configured at the SDK level, not the connector level. This is a procurement-readability decision: a security reviewer reading the deployment_targets table should never find an access key.
|
||||
|
||||
**Atomic-rollback contract:**
|
||||
|
||||
Every `DeployCertificate` snapshots the existing cert via `DescribeCertificate` + `GetCertificate` BEFORE calling `ImportCertificate` with the new bytes. After import, the connector re-fetches the cert metadata and compares serial numbers. On serial-mismatch (post-verify failure), the connector calls `ImportCertificate` again with the snapshotted bytes to restore the previous cert. The rollback path emits a `WARN`-level slog entry; the rollback's own success or failure is exposed via `certctl_deploy_rollback_total{target_type="AWSACM",outcome="restored"|"also_failed"}` per the deploy-hardening I Phase 10 metric exposer. Mirrors the Bundle 5+ pre-deploy-snapshot pattern shipped for IIS / WinCertStore / JavaKeystore.
|
||||
|
||||
**ALB attachment recipe:**
|
||||
|
||||
certctl creates / rotates the ACM cert; the operator (or Terraform / CloudFormation) attaches it to the ALB listener separately. For Terraform-driven deployments, look up the ARN by tag:
|
||||
|
||||
```hcl
|
||||
data "aws_acm_certificate" "certctl_managed" {
|
||||
domain = "api.example.com"
|
||||
most_recent = true
|
||||
|
||||
# Filter by certctl provenance tags so an unrelated ACM cert with
|
||||
# the same SAN doesn't get picked up.
|
||||
tags = {
|
||||
"certctl-managed-by" = "certctl"
|
||||
"certctl-certificate-id" = "mc-api-prod"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_lb_listener" "https" {
|
||||
load_balancer_arn = aws_lb.api.arn
|
||||
port = 443
|
||||
protocol = "HTTPS"
|
||||
certificate_arn = data.aws_acm_certificate.certctl_managed.arn
|
||||
# ...
|
||||
}
|
||||
```
|
||||
|
||||
The ARN updates in place across renewals (ACM `ImportCertificate` is upsert-style when given an ARN), so the ALB listener's `certificate_arn` reference doesn't change. CloudFront / API Gateway distributions can reference the same ARN via their respective Terraform resources.
|
||||
|
||||
**Threat model carve-outs:**
|
||||
|
||||
- **Cert key bytes never written to disk on the agent.** `DeployCertificate` reads `request.KeyPEM` from memory and passes it to the SDK's `ImportCertificate` call. No temp file. No swap-out window.
|
||||
- **Provenance tags are mandatory.** The reserved `certctl-managed-by=certctl` + `certctl-certificate-id=<mc-id>` pair is set automatically on every import. Operators identifying a stray ACM cert in their account can match against `certctl-managed-by` to confirm it was certctl-issued (or NOT — the absence of the tag means a manual import).
|
||||
- **No long-lived AWS credentials in `Config`.** `Config` carries region + ARN + operator tags only. AWS auth is the SDK credential chain (IRSA / instance profile / SSO).
|
||||
- **`ListCertificates` IAM permission is required for the V2 ARN-discovery dance to work.** Operators who pin `Config.CertificateArn` after the first deploy can drop this permission; the V2 fallback emits a warning and reverts to "always create new ARN" if the operator forgets to update `certificate_arn` post-first-deploy.
|
||||
|
||||
**Procurement checklist crib (paste into security review):**
|
||||
|
||||
- certctl uses short-lived IAM-role credentials via IRSA / instance profile, not long-lived access keys.
|
||||
- The cert key is held only in agent memory during the import call; never written to disk.
|
||||
- Every imported ACM cert is tagged with `certctl-managed-by=certctl` + `certctl-certificate-id=<mc-id>` for forensic traceability.
|
||||
- Failed imports trigger automatic rollback to the snapshotted previous cert; both outcomes are surfaced via Prometheus.
|
||||
- The minimum IAM policy is 5 actions on `arn:aws:acm:*:*:certificate/*`; CloudTrail captures every API call for compliance audits.
|
||||
|
||||
**ValidateOnly contract.** ACM has no dry-run API for `ImportCertificate`; `ValidateOnly` returns `target.ErrValidateOnlyNotSupported` per the deploy-hardening I Phase 3 sentinel contract. Operators preview deploys via `ValidateConfig` + `aws acm describe-certificate --certificate-arn <arn>` against the current ARN.
|
||||
|
||||
Location: `internal/connector/target/awsacm/awsacm.go` + `internal/connector/target/awsacm/awsacm_failure_test.go` (per-error-class contract tests for `AccessDeniedException` / `ResourceNotFoundException` / `ThrottlingException` / `InvalidArgsException` / `RequestInProgressException`).
|
||||
|
||||
### Azure Key Vault
|
||||
|
||||
The Azure Key Vault target connector deploys certificates into Azure Key Vault — the Azure-managed cert/secret store that Application Gateway / Front Door / App Service / Container Apps consume by KID URI. Rank 5 (Azure half) of the 2026-05-03 Infisical deep-research deliverable.
|
||||
|
||||
```json
|
||||
{
|
||||
"vault_url": "https://my-vault.vault.azure.net",
|
||||
"certificate_name": "api-prod",
|
||||
"tags": {"env": "production", "app": "api-gateway"},
|
||||
"credential_mode": "managed_identity"
|
||||
}
|
||||
```
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `vault_url` | string | *(required)* | Key Vault DNS endpoint (`https://<vault-name>.vault.azure.net`). For US-Gov: `.vault.usgovcloudapi.net`; for China: `.vault.azure.cn`. |
|
||||
| `certificate_name` | string | *(required)* | Cert object name in the vault (1-127 chars, alphanumeric + hyphens). Versions are auto-generated per import. |
|
||||
| `tags` | object | | Tags applied at every import (Key Vault carries tags forward across versions, unlike ACM). Reserved keys `certctl-managed-by` + `certctl-certificate-id` are set automatically. |
|
||||
| `credential_mode` | string | `default` | One of `default` / `managed_identity` / `client_secret` / `workload_identity`. See "Auth recipes" below. |
|
||||
|
||||
**RBAC role (minimum permissions):**
|
||||
|
||||
The off-the-shelf builtin role **Key Vault Certificates Officer** covers everything. For minimum-permission deploys, use a custom role with these data-plane operations on the vault scope (`/subscriptions/<sub>/resourceGroups/<rg>/providers/Microsoft.KeyVault/vaults/<vault-name>`):
|
||||
|
||||
```
|
||||
Microsoft.KeyVault/vaults/certificates/import/action
|
||||
Microsoft.KeyVault/vaults/certificates/read
|
||||
Microsoft.KeyVault/vaults/certificates/listversions/read
|
||||
```
|
||||
|
||||
**Auth recipes:**
|
||||
|
||||
- **AKS workload identity (`credential_mode: workload_identity`) — recommended for AKS deploys.** Annotate the agent's ServiceAccount with `azure.workload.identity/client-id=<app-id>`. The AKS cluster's OIDC issuer + the federated credential on the app registration handle token exchange; no long-lived secrets.
|
||||
- **Managed identity (`credential_mode: managed_identity`) — recommended for VM / App Service deploys.** Assign a system-assigned or user-assigned managed identity to the host; certctl-server / agent picks it up via IMDS. Pin `credential_mode` rather than letting `default` fall through to env vars (defends against accidental local-dev creds leaking into production).
|
||||
- **Service principal (`credential_mode: client_secret`).** Configure `AZURE_TENANT_ID` + `AZURE_CLIENT_ID` + `AZURE_CLIENT_SECRET` env vars on the agent. NOT recommended for production — long-lived client secret risk; rotate via Key Vault soft-delete recovery if leaked.
|
||||
- **Default (`credential_mode: default` or unset).** SDK's `DefaultAzureCredential` walks env vars → managed identity → Azure CLI fallback. Useful for local-dev where the operator already has `az login` active.
|
||||
- **Long-lived secrets in connector Config NOT supported** — same procurement-readability rule as AWS ACM.
|
||||
|
||||
**Atomic-rollback contract + Azure-version semantics:**
|
||||
|
||||
Every `DeployCertificate` snapshots the existing latest version via `GetCertificate(name, "" /* latest */)` BEFORE calling `ImportCertificate`. After import, the connector re-fetches the latest version and compares serial numbers. On serial-mismatch, the connector calls `ImportCertificate` again with the snapshotted CER bytes (re-PFX'd with the operator's key) — **as a NEW VERSION**. Key Vault doesn't support "version-restore" without soft-delete recovery (which we keep off the minimum-RBAC surface). The version history will show e.g. v1=initial, v2=failed-renewal, v3=rollback-of-v2; operators reading audit dashboards filter by tag.
|
||||
|
||||
**Soft-delete caveat.** V2 doesn't manage Key Vault soft-delete recovery. If a previous version was soft-deleted out-of-band (e.g. operator ran `az keyvault certificate delete`), the rollback re-imports the snapshot bytes as a new version rather than restoring the soft-deleted version. Operators alerting on rollback frequency should also watch for soft-delete events.
|
||||
|
||||
**App Gateway / Front Door attachment recipe:**
|
||||
|
||||
```hcl
|
||||
data "azurerm_key_vault_certificate" "certctl_managed" {
|
||||
name = "api-prod"
|
||||
key_vault_id = azurerm_key_vault.main.id
|
||||
}
|
||||
|
||||
resource "azurerm_application_gateway" "main" {
|
||||
# ...
|
||||
ssl_certificate {
|
||||
name = "certctl-managed"
|
||||
key_vault_secret_id = data.azurerm_key_vault_certificate.certctl_managed.secret_id
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Application Gateway / Front Door reference the cert by KID URI; certctl rotates the version under the same name, and the AGW / Front Door reference auto-resolves to the latest version (the SDK's behaviour when the KID points to `/certificates/<name>/<version>` vs `/certificates/<name>` differs — the latter auto-tracks "latest"; the former pins). Pin the version-less KID for auto-tracking renewals.
|
||||
|
||||
**Threat model carve-outs:**
|
||||
|
||||
- **Cert key bytes never written to disk on the agent.** PFX wrapping happens in memory (PKCS#12 via `software.sslmate.com/src/go-pkcs12`); the base64-encoded PFX is passed straight to the SDK's `ImportCertificate` call.
|
||||
- **Provenance tags are mandatory.** Same `certctl-managed-by=certctl` + `certctl-certificate-id=<mc-id>` shape as AWS ACM. Operators identifying a stray Key Vault cert match against `certctl-managed-by`.
|
||||
- **No long-lived Azure credentials in `Config`.** `Config` carries vault URL + cert name + operator tags + credential mode only. Auth is the Azure SDK credential chain.
|
||||
- **`credential_mode: managed_identity` is the recommended production posture.** Defends against accidental env-var creds leaking into deployments where the host already has a managed identity assigned.
|
||||
|
||||
**Procurement checklist crib (paste into security review):**
|
||||
|
||||
- certctl uses Azure managed identity (or workload identity for AKS), not long-lived service-principal secrets.
|
||||
- The cert key is held only in agent memory during the PFX wrap + import call; never written to disk.
|
||||
- Every imported Key Vault cert is tagged with `certctl-managed-by=certctl` + `certctl-certificate-id=<mc-id>` for forensic traceability.
|
||||
- Failed imports trigger automatic rollback by re-importing the snapshotted previous version's bytes; both outcomes are surfaced via Prometheus.
|
||||
- The minimum RBAC role is 3 data-plane actions; Activity Log captures every API call for compliance audits.
|
||||
|
||||
**ValidateOnly contract.** Key Vault has no dry-run API; `ValidateOnly` returns `target.ErrValidateOnlyNotSupported`. Operators preview deploys via `ValidateConfig` + `az keyvault certificate show --vault-name <name> --name <cert>`.
|
||||
|
||||
Location: `internal/connector/target/azurekv/azurekv.go` + `internal/connector/target/azurekv/sdk_client.go` (azcertificates SDK wrapping) + `internal/connector/target/azurekv/azurekv_test.go` (happy-path + rollback + per-error contract tests).
|
||||
|
||||
## Notifier Connector
|
||||
|
||||
Notifier connectors send alerts about certificate lifecycle events (expiration warnings, renewal success/failure, deployment status, policy violations).
|
||||
@@ -1165,6 +1617,54 @@ type Connector interface {
|
||||
|
||||
Built-in notifiers: **Email** (SMTP), **Webhook** (HTTP POST), **Slack** (incoming webhook), **Microsoft Teams** (MessageCard webhook), **PagerDuty** (Events API v2), and **OpsGenie** (Alert API v2).
|
||||
|
||||
### Routing expiry alerts across channels
|
||||
|
||||
certctl-server runs a daily renewal-check loop that scans for managed certificates approaching expiry. For each cert that has crossed a configured threshold (default `[30, 14, 7, 0]` days), an `ExpirationWarning` notification is dispatched. **Pre-2026-05-03**, dispatch went exclusively via the `Email` channel — operators with PagerDuty / Slack / Teams / OpsGenie wired up received nothing at any threshold unless SMTP was also configured. Rank 4 of the 2026-05-03 Infisical deep-research deliverable closed that gap with a per-policy channel-matrix.
|
||||
|
||||
**The matrix lives on `RenewalPolicy`:**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "rp-production",
|
||||
"name": "Production CDN renewal policy",
|
||||
"renewal_window_days": 30,
|
||||
"alert_thresholds_days": [30, 14, 7, 0],
|
||||
"alert_channels": {
|
||||
"informational": ["Slack"],
|
||||
"warning": ["Slack", "Email"],
|
||||
"critical": ["PagerDuty", "OpsGenie", "Email"]
|
||||
},
|
||||
"alert_severity_map": {
|
||||
"30": "informational",
|
||||
"14": "warning",
|
||||
"7": "warning",
|
||||
"0": "critical"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The runtime resolves the threshold's severity tier (via `alert_severity_map`, falling back to the default `30→informational, 14→warning, 7→warning, 0→critical` when unset), then dispatches one notification per channel listed under that tier in `alert_channels`. Each (cert, threshold, channel) triple is independently deduplicated via the `notification_events` table — a transient PagerDuty 5xx today does NOT suppress today's Slack alert, and tomorrow's renewal-loop tick will re-attempt the failed PagerDuty page.
|
||||
|
||||
**Backwards compatibility.** A policy with `alert_channels` unset (or empty) falls through to `DefaultAlertChannels` which routes every tier to `["Email"]`. Operators who haven't touched their renewal-policy configs see exactly the pre-2026-05-03 behaviour, and SMTP-only deployments keep working as before.
|
||||
|
||||
**Validation.** Off-enum severity tiers (anything other than `informational` / `warning` / `critical`) and off-enum channels (anything other than `Email` / `Webhook` / `Slack` / `Teams` / `PagerDuty` / `OpsGenie`) are silently dropped at the dispatch site — but the drop is recorded in the audit log as `expiration_alert_skipped_invalid_channel` so an operator can grep for typos. The `RenewalPolicyService.Create`/`Update` paths reject these at write time as well, so a fresh policy with bad values never persists.
|
||||
|
||||
**Procurement playbook: "I want PagerDuty when a cert is 24h from expiry."** Configure your renewal policy with `alert_severity_map.0 = "critical"` (already the default) and `alert_channels.critical = ["PagerDuty", "Email"]`. Set the `CERTCTL_PAGERDUTY_ROUTING_KEY` env var on the server. Restart. The next renewal-loop tick that finds a cert at ≤0 days will create a PagerDuty incident via the Events API v2 AND email the cert owner. Confirm with `curl /api/v1/metrics/prometheus | grep certctl_expiry_alerts_total` — you'll see one `{channel="PagerDuty",threshold="0",result="success"}` series increment per critical-tier dispatch.
|
||||
|
||||
**Operator runbook for "did the on-call team get paged?"** Run:
|
||||
|
||||
```sql
|
||||
SELECT created_at, metadata->>'channel' AS channel, metadata->>'threshold_days' AS threshold
|
||||
FROM audit_events
|
||||
WHERE event_type = 'expiration_alert_sent'
|
||||
AND resource_id = '<cert-id>'
|
||||
ORDER BY created_at DESC;
|
||||
```
|
||||
|
||||
Each row corresponds to one fired alert. The `channel` metadata field tells you which notifier ran. Combined with the Prometheus `certctl_expiry_alerts_total{result="failure"}` counter, you have full forensic visibility on every dispatch attempt.
|
||||
|
||||
**V3-Pro forward path.** Per-owner / per-team channel routing (route the Production-CDN cert's alerts to its dedicated owner's PagerDuty service, the Internal-API cert's alerts to a different one), calendar-aware suppression (no T-30 informational alerts on weekends for non-on-call teams), and escalation chains (T-1 unanswered for 30m → escalate to manager) are tracked on `cowork/WORKSPACE-ROADMAP.md` under "Adapter hardening" → "Multi-channel expiry alerts: per-owner routing".
|
||||
|
||||
### Email (SMTP) Notifier
|
||||
|
||||
The Email notifier sends transactional alerts and scheduled digests via SMTP. It bridges the connector-layer SMTP connector to the service-layer `Notifier` interface via the `NotifierAdapter`. Supports both plain text and HTML emails.
|
||||
@@ -1274,7 +1774,7 @@ The adapter (`internal/service/issuer_adapter.go`) translates between the two in
|
||||
|
||||
```go
|
||||
// Wrap your connector implementation with the adapter
|
||||
import "github.com/shankar0123/certctl/internal/service"
|
||||
import "github.com/certctl-io/certctl/internal/service"
|
||||
|
||||
myIssuer := myissuer.New(config)
|
||||
adapted := service.NewIssuerConnectorAdapter(myIssuer)
|
||||
|
||||
@@ -19,13 +19,53 @@ a single shared primitive:
|
||||
|
||||
| Gap | Pre-bundle | Post-bundle |
|
||||
|---|---|---|
|
||||
| **Atomic deploy with rollback** | F5 only (transactional API) | All 13 connectors via `deploy.Apply` |
|
||||
| **Atomic deploy with rollback** | F5 only (transactional API) | 12 of 13 connectors via `deploy.Apply` (K8s pending Bundle 2 — see [Section 1.5](#15-audit-closure-status-2026-05-02-deployment-target-audit)) |
|
||||
| **Post-deploy TLS verification** | None | NGINX/Apache/HAProxy/Traefik/Caddy/Envoy/Postfix all do TLS handshake + SHA-256 fingerprint compare; fail → rollback |
|
||||
| **Vendor-specific deployment recipes** | Light docs | (Bundle II — `cowork/deploy-hardening-ii-prompt.md`) |
|
||||
|
||||
This document describes the operator-visible surface. The Go-level
|
||||
contract lives at `internal/deploy/doc.go`.
|
||||
|
||||
## 1.5. Audit closure status (2026-05-02 deployment-target audit)
|
||||
|
||||
The 2026-05-02 deployment-target coverage audit
|
||||
(`cowork/deployment-target-audit-2026-05-02/RESULTS.md`) tightened the
|
||||
atomic + rollback contract on the connectors below. All bundles in the
|
||||
table are committed to `master` as of this section's last edit; commit
|
||||
hashes pin to the canonical landing commit for each piece of work.
|
||||
|
||||
| Connector | Bundle | Commit | Closes |
|
||||
|-----------------|-----------|-----------|--------|
|
||||
| envoy | Bundle 3 | `d8cd981` | atomic SDS JSON write + post-deploy watcher pickup poll |
|
||||
| traefik | Bundle 4 | `37634e6` | single `deploy.Apply` Plan + all-files atomicity + rollback |
|
||||
| iis | Bundle 5 | `223f279` | pre-deploy `Get-WebBinding` snapshot + on-failure binding rollback |
|
||||
| ssh | Bundle 6 | `eb39059` | pre-deploy SFTP snapshot + reload-failure rollback |
|
||||
| wincertstore | Bundle 7 | `1dd1dd4` | `Get-ChildItem` snapshot + on-import-failure rollback |
|
||||
| javakeystore | Bundle 8 | `87e0009` | `keytool -exportkeystore` snapshot + on-import-failure rollback + operator playbook for argv password |
|
||||
| caddy | Bundle 9 | `8cda860` | duration metric fix + file-mode PEM validate + api-mode SHA-256 idempotency |
|
||||
| postfix/dovecot | Bundle 11 | `88e8881` | applyDefaults + verify-fails-rollback test pin under Mode=dovecot |
|
||||
|
||||
**Outstanding from the same audit:**
|
||||
|
||||
- **Bundle 2 (k8ssecret).** The production `realK8sClient` is still a
|
||||
stub (see Section 3 / row `k8ssecret` below). Replacing it with a
|
||||
real `k8s.io/client-go` implementation + `ResourceVersion` plumbing
|
||||
+ post-deploy SHA-256 verify + kubelet sync poll is the remaining
|
||||
V2 P0 blocker. Tracking prompt:
|
||||
`cowork/deployment-target-audit-2026-05-02/k8s-real-client-prompt.md`.
|
||||
|
||||
Bundle 10 (per-connector loadtest harness, commit `6286cd4`) does not
|
||||
modify the per-connector contract table; it's a CI / observability
|
||||
addition documented separately at `deploy/test/loadtest/README.md`.
|
||||
|
||||
The original Bundle 1 audit spec read "soften the IIS / SSH /
|
||||
WinCertStore / JavaKeystore rollback claims first while bundles 5–8
|
||||
catch the implementation up". Execution order inverted that loop —
|
||||
Bundles 3–11 shipped before the doc-realignment commit, so the rows
|
||||
in Section 3 below are honest as-shipped without ever needing a
|
||||
softening pass. The K8s row is the one exception, and Section 3's
|
||||
notes call it out explicitly.
|
||||
|
||||
## 2. The atomic-write primitive — `Plan` / `Apply`
|
||||
|
||||
`internal/deploy.Apply(ctx, plan)` is the load-bearing entry
|
||||
@@ -92,7 +132,12 @@ Apply's algorithm:
|
||||
| ssh | (Connect probe) | (SCP upload + remote chmod) | `tls.Dial` to remote TLS port | Pre-deploy SCP backup of remote files |
|
||||
| wincertstore | (Get-ChildItem Cert:\) | (Import-PfxCertificate) | (admin probe) | Get-ChildItem snapshot for rollback |
|
||||
| javakeystore | (`keytool -list`) | (`keytool -importkeystore`) | (admin probe) | keytool snapshot; rollback via `keytool -delete` + re-import |
|
||||
| k8ssecret | (GetSecret RBAC probe) | (Update Secret) | SHA-256 verify of returned Secret | Atomic at API server; kubelet sync polled via `Pod.Status.ContainerStatuses` |
|
||||
| k8ssecret | (V2 blocker — see note below) | (V2 blocker — see note below) | (V2 blocker — see note below) | **V2 blocker — Bundle 2 of the 2026-05-02 deployment-target audit.** Production `realK8sClient` at `internal/connector/target/k8ssecret/k8ssecret.go:397-420` is a stub (every method returns `"real Kubernetes client not implemented — use NewWithClient for tests"`). The SHA-256 post-deploy verify and kubelet sync poll are designed but not yet implemented; production deploys to a real cluster fail with "not implemented" until Bundle 2 lands. Test mocks via `NewWithClient` work today. Tracking prompt: `cowork/deployment-target-audit-2026-05-02/k8s-real-client-prompt.md`. |
|
||||
|
||||
> **Postfix vs Dovecot mode**: see "Choosing Mode=postfix vs Mode=dovecot" in
|
||||
> `docs/connectors.md` for the per-mode defaults (cert/key paths, validate +
|
||||
> reload commands), the dual-deploy guidance for mail servers running both
|
||||
> daemons, and the test-pin reference (Bundle 11 commit `88e8881`).
|
||||
|
||||
## 4. Post-deploy TLS verification
|
||||
|
||||
@@ -115,9 +160,12 @@ if res.Fingerprint != certPEMToFingerprint(deployedCertPEM) {
|
||||
}
|
||||
```
|
||||
|
||||
Retry with backoff (default 3 attempts, 2s exponential) defends
|
||||
Retry with **exponential backoff** (default 3 attempts; 1s initial, 16s cap) defends
|
||||
against load-balanced targets where the verify might hit a
|
||||
different pod that hasn't picked up the new cert yet:
|
||||
different pod that hasn't picked up the new cert yet. Backoff grows 1s → 2s → 4s → 8s → 16s,
|
||||
giving the LB fleet time to converge before giving up. Operators preserving V2 linear semantics
|
||||
(every attempt waits the same interval) set `post_deploy_verify_max_backoff` equal to
|
||||
`post_deploy_verify_backoff`.
|
||||
|
||||
```yaml
|
||||
post_deploy_verify:
|
||||
@@ -125,7 +173,8 @@ post_deploy_verify:
|
||||
endpoint: "nginx.svc.cluster.local:443"
|
||||
timeout: 10s
|
||||
post_deploy_verify_attempts: 3
|
||||
post_deploy_verify_backoff: 2s
|
||||
post_deploy_verify_backoff: 1s
|
||||
post_deploy_verify_max_backoff: 16s
|
||||
```
|
||||
|
||||
## 5. Rollback semantics
|
||||
|
||||
+10
-14
@@ -251,20 +251,16 @@ This recipe stands up an EAP-TLS-authenticated corporate WiFi network
|
||||
where certctl issues every device certificate via EST. End-to-end
|
||||
flow:
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌──────────────────┐ ┌─────────────┐
|
||||
│ Laptop / │ EAP │ WiFi access │ Radius│ FreeRADIUS │
|
||||
│ supplicant │─────▶│ point (NAS) │──────▶│ (validate │
|
||||
│ (wpa_ │ │ │ │ cert chain)│
|
||||
│ supplicant │ └──────────────────┘ └──────┬──────┘
|
||||
│ / iwd / │ │
|
||||
│ Apple WiFi)│ │ trusts
|
||||
└──────┬──────┘ ▼
|
||||
│ EST (one-time, then renewal) ┌─────────────┐
|
||||
│ /simpleenroll, /simplereenroll │ certctl CA │
|
||||
└────────────────────────────────────▶│ (EST profile│
|
||||
│ "wifi") │
|
||||
└─────────────┘
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Laptop["Laptop / supplicant<br/>(wpa_supplicant / iwd / Apple WiFi)"]
|
||||
AP["WiFi access point (NAS)"]
|
||||
Radius["FreeRADIUS<br/>(validate cert chain)"]
|
||||
CA["certctl CA<br/>(EST profile 'wifi')"]
|
||||
Laptop -->|EAP| AP
|
||||
AP -->|Radius| Radius
|
||||
Radius -.->|trusts| CA
|
||||
Laptop -->|"EST: /simpleenroll, /simplereenroll<br/>(one-time, then renewal)"| CA
|
||||
```
|
||||
|
||||
### certctl-side: EST profile config for 802.1X
|
||||
|
||||
+1
-1
@@ -1205,7 +1205,7 @@ Single SQL `UNION` query replaces the previous "fetch all, filter in Go" approac
|
||||
| Loop | Default Interval | Always-on | Env Var | Description |
|
||||
|---|---|---|---|---|
|
||||
| Renewal check | 1 hour | Yes | `CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL` | Check expiring certs, query ARI, create renewal jobs |
|
||||
| Job processor | 30 seconds | Yes | `CERTCTL_SCHEDULER_JOB_PROCESSOR_INTERVAL` | Process pending jobs |
|
||||
| Job processor | 30 seconds | Yes | `CERTCTL_SCHEDULER_JOB_PROCESSOR_INTERVAL` | Process pending jobs (concurrency cap via `CERTCTL_RENEWAL_CONCURRENCY`, default 25) |
|
||||
| Job retry | 5 minutes | Yes | `CERTCTL_SCHEDULER_RETRY_INTERVAL` | Retry Failed jobs (I-001) |
|
||||
| Job timeout reaper | 10 minutes | Yes | `CERTCTL_JOB_TIMEOUT_INTERVAL` (per-state thresholds: `CERTCTL_JOB_AWAITING_APPROVAL_TIMEOUT`, `CERTCTL_JOB_AWAITING_CSR_TIMEOUT`) | Fail AwaitingCSR/AwaitingApproval jobs past timeout (I-003) |
|
||||
| Agent health check | 2 minutes | Yes | `CERTCTL_SCHEDULER_AGENT_HEALTH_CHECK_INTERVAL` | Check agent heartbeat staleness |
|
||||
|
||||
@@ -37,12 +37,13 @@ straight at certctl on `:8443`.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─── TLS 1.2/1.3 ────┐ ┌─── TLS 1.3 ───┐
|
||||
[legacy EST/SCEP client]──>│ nginx / HAProxy │────────>│ certctl :8443 │
|
||||
│ reverse proxy │ │ │
|
||||
└────────────────────┘ └───────────────┘
|
||||
Allowed TLS 1.2 Re-encrypts as TLS 1.3
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Client["legacy EST/SCEP client"]
|
||||
Proxy["nginx / HAProxy<br/>reverse proxy"]
|
||||
Server["certctl :8443"]
|
||||
Client -->|"TLS 1.2/1.3<br/>(allowed TLS 1.2)"| Proxy
|
||||
Proxy -->|"TLS 1.3<br/>(re-encrypts as TLS 1.3)"| Server
|
||||
```
|
||||
|
||||
The reverse proxy:
|
||||
|
||||
@@ -29,7 +29,7 @@ certctl adds a control plane that sees all your certificates, deploys with verif
|
||||
Start with Docker Compose (5 minutes):
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
git clone https://github.com/certctl-io/certctl.git
|
||||
cd certctl/deploy
|
||||
docker compose up -d
|
||||
```
|
||||
@@ -41,7 +41,7 @@ Access the dashboard at `https://localhost:8443` with the API key from `.env`. T
|
||||
On each server running acme.sh certs, install the certctl agent:
|
||||
|
||||
```bash
|
||||
curl -sSL https://raw.githubusercontent.com/shankar0123/certctl/master/install-agent.sh | bash
|
||||
curl -sSL https://raw.githubusercontent.com/certctl-io/certctl/master/install-agent.sh | bash
|
||||
# Prompted for server URL and API key
|
||||
```
|
||||
|
||||
@@ -49,7 +49,7 @@ Or manually:
|
||||
|
||||
```bash
|
||||
# Download and install agent binary
|
||||
wget https://github.com/shankar0123/certctl/releases/download/v2.1.0/certctl-agent-linux-amd64
|
||||
wget https://github.com/certctl-io/certctl/releases/download/v2.1.0/certctl-agent-linux-amd64
|
||||
chmod +x certctl-agent-linux-amd64
|
||||
sudo mv certctl-agent-linux-amd64 /usr/local/bin/certctl-agent
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ On each of your 10 servers running Certbot:
|
||||
|
||||
```bash
|
||||
# Linux amd64 (adjust for your architecture)
|
||||
curl -sSL https://github.com/shankar0123/certctl/releases/download/v2.1.0/certctl-agent-linux-amd64 \
|
||||
curl -sSL https://github.com/certctl-io/certctl/releases/download/v2.1.0/certctl-agent-linux-amd64 \
|
||||
-o /usr/local/bin/certctl-agent
|
||||
chmod +x /usr/local/bin/certctl-agent
|
||||
|
||||
|
||||
+9
-16
@@ -38,22 +38,15 @@ either manual-only by design or pending QA-suite coverage:
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌────────────────────────┐ ┌─────────────────────────────────┐
|
||||
│ qa_test.go │────▶│ certctl demo stack │
|
||||
│ (//go:build qa) │ │ docker-compose.yml + │
|
||||
│ │ │ docker-compose.demo.yml │
|
||||
│ TestQA(t *testing.T) │ │ │
|
||||
│ ├─ Part01_Infra │ │ ┌─ certctl-server :8443 │
|
||||
│ ├─ Part02_Auth │ │ ├─ postgres :5432 │
|
||||
│ ├─ Part03_CertCRUD │ │ └─ certctl-agent (×N) │
|
||||
│ ├─ ... │ │ ↑ seed_demo.sql provisions │
|
||||
│ └─ Part52_HelmChart │ │ 12 agent rows (1 active, │
|
||||
└────────────────────────┘ │ 2 retired, 9 reserved / │
|
||||
│ sentinel) for the soft- │
|
||||
│ retire / FSM coverage │
|
||||
│ Parts 55–56 exercise. │
|
||||
└─────────────────────────────────┘
|
||||
```mermaid
|
||||
flowchart LR
|
||||
QA["qa_test.go (//go:build qa)<br/><br/>TestQA(t *testing.T)<br/>├─ Part01_Infra<br/>├─ Part02_Auth<br/>├─ Part03_CertCRUD<br/>├─ ...<br/>└─ Part52_HelmChart"]
|
||||
subgraph Stack["certctl demo stack<br/>docker-compose.yml + docker-compose.demo.yml"]
|
||||
Server["certctl-server :8443"]
|
||||
Postgres["postgres :5432"]
|
||||
Agents["certctl-agent (×N)<br/>↑ seed_demo.sql provisions 12 agent rows<br/>(1 active, 2 retired, 9 reserved/sentinel)<br/>for the soft-retire / FSM coverage Parts 55–56 exercise"]
|
||||
end
|
||||
QA --> Stack
|
||||
```
|
||||
|
||||
> **Multi-agent demo stack (Bundle Q / L-004 closure).** The demo
|
||||
|
||||
+1
-1
@@ -46,7 +46,7 @@ On Linux, follow the official Docker install guide for your distribution.
|
||||
### Docker Compose (Quick Start)
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
git clone https://github.com/certctl-io/certctl.git
|
||||
cd certctl
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
@@ -0,0 +1,337 @@
|
||||
# Runbook: cloud-target deployment connectors (AWS ACM + Azure Key Vault)
|
||||
|
||||
This runbook covers the SDK-driven cloud target connectors that ship in
|
||||
certctl post-2026-05-03 (Rank 5 of the Infisical deep-research
|
||||
deliverable). It complements the operator-facing
|
||||
[AWS Certificate Manager](connectors.md#aws-certificate-manager-acm) and
|
||||
[Azure Key Vault](connectors.md#azure-key-vault) sections in
|
||||
`docs/connectors.md`.
|
||||
|
||||
Audience: a platform sysadmin or SRE who needs to configure, debug, or
|
||||
audit certctl's cloud-target deploys. Not a walkthrough of how to
|
||||
install certctl.
|
||||
|
||||
---
|
||||
|
||||
## End-to-end flow (cloud targets)
|
||||
|
||||
```
|
||||
cert renewed → renewal job created
|
||||
│
|
||||
▼
|
||||
agent picks up DeployCertificate work item
|
||||
│
|
||||
▼
|
||||
target.Connector.DeployCertificate(ctx, request)
|
||||
│
|
||||
┌──────────────────┴──────────────────┐
|
||||
│ │
|
||||
▼ ▼
|
||||
AWS ACM path Azure Key Vault path
|
||||
│ │
|
||||
▼ ▼
|
||||
1. (rotate-in-place only) 1. GetCertificate(name, "" /* latest */)
|
||||
DescribeCertificate(arn) — capture snapshot CER bytes
|
||||
2. GetCertificate(arn) — capture 2. Build PFX from cert+chain+key
|
||||
snapshot bytes for rollback (PKCS#12 via go-pkcs12)
|
||||
3. ImportCertificate(arn, new_bytes) 3. ImportCertificate(name, PFX, tags)
|
||||
— fresh ARN OR rotate-in-place — ALWAYS creates a new version
|
||||
4. AddTagsToCertificate(arn, 4. (Tags carried forward
|
||||
provenance) — ACM strips on automatically)
|
||||
re-import; we re-apply
|
||||
5. DescribeCertificate(arn) — verify 5. GetCertificate(name, "" /* latest */)
|
||||
serial matches expected — verify serial matches expected
|
||||
6. ON MISMATCH: rollback ←──── (same shape) ────→ 6. ON MISMATCH: rollback
|
||||
ImportCertificate(arn, ImportCertificate(name,
|
||||
snapshot_bytes) snapshot_PFX) — new version
|
||||
│
|
||||
▼
|
||||
7. Audit row + Prometheus counter
|
||||
certctl_deploy_attempts_total{target_type="AWSACM"|"AzureKeyVault",
|
||||
result="success"|"failure"}
|
||||
certctl_deploy_rollback_total{target_type=...,
|
||||
outcome="restored"|"also_failed"}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Configuring an AWS ACM target
|
||||
|
||||
### Minimum config
|
||||
|
||||
```bash
|
||||
curl -X POST https://certctl.example.com/api/v1/targets \
|
||||
-H 'Authorization: Bearer ${TOKEN}' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"name": "Production ALB cert",
|
||||
"type": "AWSACM",
|
||||
"agent_id": "ag-server",
|
||||
"config": {
|
||||
"region": "us-east-1",
|
||||
"tags": {"env": "production"}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
Empty `certificate_arn` on first deploy = ACM creates a fresh ARN; the
|
||||
deployment record's Metadata captures it. Update the
|
||||
`deployment_targets.config.certificate_arn` field via the GUI / API /
|
||||
direct SQL to pin the ARN for subsequent renewals.
|
||||
|
||||
### Minimum IAM policy
|
||||
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"acm:ImportCertificate",
|
||||
"acm:GetCertificate",
|
||||
"acm:DescribeCertificate",
|
||||
"acm:ListCertificates",
|
||||
"acm:AddTagsToCertificate"
|
||||
],
|
||||
"Resource": "arn:aws:acm:us-east-1:*:certificate/*"
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
Pin `Resource` to the specific region / account where the ALB lives.
|
||||
Cross-account deploys use AssumeRole — configure the agent's role with
|
||||
`sts:AssumeRole` against the target account's role ARN.
|
||||
|
||||
### Auth: IRSA (recommended for EKS-hosted agents)
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: certctl-agent
|
||||
namespace: certctl-system
|
||||
annotations:
|
||||
eks.amazonaws.com/role-arn: arn:aws:iam::123456789012:role/certctl-acm-deployer
|
||||
```
|
||||
|
||||
Trust policy on `certctl-acm-deployer`:
|
||||
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [{
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"Federated": "arn:aws:iam::123456789012:oidc-provider/oidc.eks.us-east-1.amazonaws.com/id/EXAMPLE"
|
||||
},
|
||||
"Action": "sts:AssumeRoleWithWebIdentity",
|
||||
"Condition": {
|
||||
"StringEquals": {
|
||||
"oidc.eks.us-east-1.amazonaws.com/id/EXAMPLE:sub": "system:serviceaccount:certctl-system:certctl-agent"
|
||||
}
|
||||
}
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Configuring an Azure Key Vault target
|
||||
|
||||
### Minimum config
|
||||
|
||||
```bash
|
||||
curl -X POST https://certctl.example.com/api/v1/targets \
|
||||
-H 'Authorization: Bearer ${TOKEN}' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"name": "Production AGW cert",
|
||||
"type": "AzureKeyVault",
|
||||
"agent_id": "ag-server",
|
||||
"config": {
|
||||
"vault_url": "https://prod-vault.vault.azure.net",
|
||||
"certificate_name": "api-prod",
|
||||
"credential_mode": "managed_identity",
|
||||
"tags": {"env": "production"}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
### Minimum RBAC role
|
||||
|
||||
Off-the-shelf builtin: **Key Vault Certificates Officer** (assigns at
|
||||
the vault scope).
|
||||
|
||||
Custom minimum-permission role:
|
||||
|
||||
```json
|
||||
{
|
||||
"properties": {
|
||||
"roleName": "certctl-keyvault-deployer",
|
||||
"description": "Minimum permissions for certctl Key Vault target",
|
||||
"assignableScopes": [
|
||||
"/subscriptions/<sub>/resourceGroups/<rg>/providers/Microsoft.KeyVault/vaults/<vault-name>"
|
||||
],
|
||||
"permissions": [{
|
||||
"actions": [],
|
||||
"notActions": [],
|
||||
"dataActions": [
|
||||
"Microsoft.KeyVault/vaults/certificates/import/action",
|
||||
"Microsoft.KeyVault/vaults/certificates/read",
|
||||
"Microsoft.KeyVault/vaults/certificates/listversions/read"
|
||||
],
|
||||
"notDataActions": []
|
||||
}]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Auth: AKS workload identity (recommended for AKS-hosted agents)
|
||||
|
||||
Annotate the agent's ServiceAccount:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: certctl-agent
|
||||
namespace: certctl-system
|
||||
annotations:
|
||||
azure.workload.identity/client-id: <app-registration-client-id>
|
||||
labels:
|
||||
azure.workload.identity/use: "true"
|
||||
```
|
||||
|
||||
Federated credential on the app registration:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "certctl-agent-federated",
|
||||
"issuer": "https://<oidc-issuer-url>",
|
||||
"subject": "system:serviceaccount:certctl-system:certctl-agent",
|
||||
"audiences": ["api://AzureADTokenExchange"]
|
||||
}
|
||||
```
|
||||
|
||||
Set `credential_mode: workload_identity` on the deployment_target
|
||||
config.
|
||||
|
||||
---
|
||||
|
||||
## Operator playbook
|
||||
|
||||
### "Did the cert get imported to ACM / Key Vault?"
|
||||
|
||||
**AWS:**
|
||||
|
||||
```bash
|
||||
aws acm describe-certificate \
|
||||
--certificate-arn arn:aws:acm:us-east-1:...:certificate/<id> \
|
||||
--query 'Certificate.{Status:Status,Serial:Serial,Issued:IssuedAt,NotAfter:NotAfter,Tags:[Tags]}'
|
||||
```
|
||||
|
||||
**Azure:**
|
||||
|
||||
```bash
|
||||
az keyvault certificate show \
|
||||
--vault-name prod-vault \
|
||||
--name api-prod \
|
||||
--query '{Serial:x509ThumbprintHex, Version:id, NotAfter:attributes.expires}'
|
||||
```
|
||||
|
||||
In both cases, the `certctl-managed-by` tag confirms the cert was
|
||||
imported by certctl (and not someone running aws-cli directly).
|
||||
|
||||
### "Why did the rollback fail?"
|
||||
|
||||
The Prometheus counter
|
||||
`certctl_deploy_rollback_total{outcome="also_failed"}` ticks when the
|
||||
rollback's own ImportCertificate / Set call also returns an error. Look
|
||||
at the agent's slog at ERROR level for the per-call diagnostic; the
|
||||
underlying cloud SDK error message tells you whether it was IAM
|
||||
denial, throttling, or a structural input problem.
|
||||
|
||||
Manual recovery:
|
||||
|
||||
**AWS ACM:**
|
||||
|
||||
```bash
|
||||
# Get the snapshot of a known-good cert from S3 / Vault / wherever the
|
||||
# operator stores backup PEMs:
|
||||
aws acm import-certificate \
|
||||
--certificate fileb://known-good.crt \
|
||||
--private-key fileb://known-good.key \
|
||||
--certificate-chain fileb://known-good.chain \
|
||||
--certificate-arn arn:aws:acm:us-east-1:...:certificate/<id> \
|
||||
--tags Key=certctl-managed-by,Value=manual-recovery
|
||||
```
|
||||
|
||||
**Azure Key Vault:**
|
||||
|
||||
```bash
|
||||
# Import a fresh PFX as a new version under the same name:
|
||||
az keyvault certificate import \
|
||||
--vault-name prod-vault \
|
||||
--name api-prod \
|
||||
--file known-good.pfx \
|
||||
--tags certctl-managed-by=manual-recovery
|
||||
```
|
||||
|
||||
After the manual recovery, certctl's next renewal-loop tick re-verifies
|
||||
the live cert via `ValidateDeployment` and resumes normal operation.
|
||||
|
||||
### "How do I know certctl is the only one writing to this ARN / vault cert?"
|
||||
|
||||
**AWS — via CloudTrail:**
|
||||
|
||||
```
|
||||
EventName = "ImportCertificate"
|
||||
Resources.ARN = "arn:aws:acm:us-east-1:...:certificate/<id>"
|
||||
```
|
||||
|
||||
Filter by user identity to see which principal made each call. The
|
||||
certctl agent's IAM role / IRSA-bound role should be the only writer.
|
||||
|
||||
**Azure — via Activity Log:**
|
||||
|
||||
```bash
|
||||
az monitor activity-log list \
|
||||
--resource-id /subscriptions/<sub>/resourceGroups/<rg>/providers/Microsoft.KeyVault/vaults/<vault>/certificates/<name> \
|
||||
--offset 30d \
|
||||
--query "[?operationName.value=='Microsoft.KeyVault/vaults/certificates/import/action'].{caller:caller, time:eventTimestamp}"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cardinality + cost
|
||||
|
||||
- Per-target-type Prometheus counters: 2 new
|
||||
`certctl_deploy_attempts_total` series (AWSACM + AzureKeyVault) ×
|
||||
2 results = 4 series. Comfortable.
|
||||
- AWS ACM costs: ImportCertificate is free; CloudTrail logs at $2 per
|
||||
GB. Renewing 100 certs/month adds ~10 KB to CloudTrail.
|
||||
- Azure Key Vault costs: certificate operations $0.03 per 10K
|
||||
operations (V2 pricing as of 2026-05). 100 certs/month = $0.0009 in
|
||||
cert-op spend. Activity Log retention is configurable (default 90
|
||||
days, free).
|
||||
|
||||
---
|
||||
|
||||
## V3-Pro forward path
|
||||
|
||||
Tracked at `cowork/WORKSPACE-ROADMAP.md` under "Adapter hardening":
|
||||
|
||||
- **AWS CloudFront direct-attach** — UpdateDistribution after an ACM
|
||||
ImportCertificate so the CloudFront edge picks up the new cert
|
||||
without operator intervention. Requires `cloudfront:UpdateDistribution`
|
||||
IAM permission on top of the ACM minimum.
|
||||
- **Azure Front Door direct-attach** — UpdateRoutingConfig equivalent.
|
||||
- **AWS ALB / Azure App Gateway auto-bind** — currently operators
|
||||
attach the ARN / KID URI to the LB out-of-band (Terraform);
|
||||
V3-Pro adds the auto-attach step.
|
||||
- **Soft-delete recovery for Azure Key Vault** — V2 always
|
||||
re-imports as a new version; V3 detects soft-deleted prior
|
||||
versions and offers operator-confirmed recovery.
|
||||
- **GCP Certificate Manager target** — Google Cloud's equivalent to
|
||||
ACM; mirrors the AWS ACM connector shape. Separate cloud,
|
||||
separate connector.
|
||||
@@ -0,0 +1,225 @@
|
||||
# Runbook: certificate-expiry alerts (multi-channel)
|
||||
|
||||
This runbook covers the per-policy multi-channel expiry-alert dispatch
|
||||
path that ships in certctl post-2026-05-03 (Rank 4 of the Infisical
|
||||
deep-research deliverable). It complements the operator-facing
|
||||
[Routing expiry alerts across channels](connectors.md#routing-expiry-alerts-across-channels)
|
||||
section in `docs/connectors.md`.
|
||||
|
||||
Audience: a platform sysadmin or on-call engineer who needs to
|
||||
configure, debug, or audit certctl's expiry-alert routing. Not a
|
||||
walkthrough of how to install certctl — that lives in the README.
|
||||
|
||||
---
|
||||
|
||||
## End-to-end flow
|
||||
|
||||
```
|
||||
daily ticker (renewalCheckLoop)
|
||||
│
|
||||
▼
|
||||
RenewalService.CheckExpiringCertificates
|
||||
│
|
||||
┌────────────────┴────────────────┐
|
||||
│ for cert in expiring (≤30 days):│
|
||||
│ 1. Resolve RenewalPolicy │
|
||||
│ 2. Compute daysUntil │
|
||||
│ 3. updateCertExpiryStatus │
|
||||
│ 4. sendThresholdAlerts ──────►│ per threshold:
|
||||
│ 5. Create renewal job (if │ a. resolve severity tier
|
||||
│ issuer registered + ARI │ via AlertSeverityMap
|
||||
│ allows) │ b. resolve channel set
|
||||
└──────────────────────────────────┘ via AlertChannels[tier]
|
||||
c. for each channel:
|
||||
i. dedup via
|
||||
notification_events
|
||||
(cert,threshold,channel)
|
||||
ii. SendThresholdAlertOnChannel
|
||||
→ notifierRegistry[channel]
|
||||
→ Send(recipient,subj,body)
|
||||
iii. record audit row
|
||||
(event_type=expiration_alert_sent,
|
||||
metadata.channel,
|
||||
metadata.severity_tier)
|
||||
iv. bump Prometheus counter
|
||||
certctl_expiry_alerts_total
|
||||
{channel,threshold,result}
|
||||
```
|
||||
|
||||
The dispatch loop's per-channel error handling is
|
||||
**fault-isolating**: PagerDuty's failure does NOT skip Slack/Email
|
||||
at the same threshold. Each channel runs independently, with its
|
||||
own dedup row + audit row + metric increment.
|
||||
|
||||
---
|
||||
|
||||
## Configuring the per-policy channel matrix
|
||||
|
||||
The matrix is a property of `RenewalPolicy`. Two new JSONB columns
|
||||
on the `renewal_policies` table back it (migration 000026):
|
||||
|
||||
- `alert_channels JSONB` — `map[severity_tier][]channel_name`. Default `{}`
|
||||
→ fall through to `DefaultAlertChannels` (Email-only at every tier).
|
||||
- `alert_severity_map JSONB` — `map[threshold_days]severity_tier`. Default
|
||||
`{}` → fall through to `DefaultAlertSeverityMap` (`30→informational,
|
||||
14→warning, 7→warning, 0→critical`).
|
||||
|
||||
### Example: production-grade routing
|
||||
|
||||
```bash
|
||||
curl -X PUT https://certctl.example.com/api/v1/renewal-policies/rp-production \
|
||||
-H 'Authorization: Bearer ${TOKEN}' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"name": "Production CDN renewal policy",
|
||||
"renewal_window_days": 30,
|
||||
"auto_renew": true,
|
||||
"max_retries": 3,
|
||||
"retry_interval_seconds": 300,
|
||||
"alert_thresholds_days": [30, 14, 7, 0],
|
||||
"alert_channels": {
|
||||
"informational": ["Slack"],
|
||||
"warning": ["Slack", "Email"],
|
||||
"critical": ["PagerDuty", "OpsGenie", "Email"]
|
||||
},
|
||||
"alert_severity_map": {
|
||||
"30": "informational",
|
||||
"14": "warning",
|
||||
"7": "warning",
|
||||
"0": "critical"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
After this PUT, the next renewal-loop tick that finds a cert under
|
||||
this policy will fan out alerts as documented above.
|
||||
|
||||
### Example: opt out of informational alerts
|
||||
|
||||
If your team doesn't want T-30 informational alerts (you'd rather
|
||||
hear about a cert only at warning tier and beyond):
|
||||
|
||||
```json
|
||||
"alert_channels": {
|
||||
"informational": [],
|
||||
"warning": ["Email"],
|
||||
"critical": ["PagerDuty", "Email"]
|
||||
}
|
||||
```
|
||||
|
||||
The empty `informational` list causes the dispatch loop to record
|
||||
an `expiration_alert_skipped_no_channels` audit row at T-30 and
|
||||
skip the dispatch. Other tiers still fire.
|
||||
|
||||
---
|
||||
|
||||
## Operator playbook
|
||||
|
||||
### "Did the on-call team get paged?"
|
||||
|
||||
```sql
|
||||
SELECT created_at,
|
||||
metadata->>'channel' AS channel,
|
||||
metadata->>'threshold_days' AS threshold,
|
||||
metadata->>'severity_tier' AS severity
|
||||
FROM audit_events
|
||||
WHERE event_type = 'expiration_alert_sent'
|
||||
AND resource_id = '<cert-id>'
|
||||
ORDER BY created_at DESC;
|
||||
```
|
||||
|
||||
One row per (channel, threshold) attempt. If you see a row with
|
||||
`channel = 'PagerDuty'` and `severity = 'critical'`, the page went
|
||||
out (or was at least dispatched to the notifier).
|
||||
|
||||
### "Why didn't I get an alert at T-7?"
|
||||
|
||||
Three places to look:
|
||||
|
||||
1. **Audit log** — `SELECT FROM audit_events WHERE event_type IN
|
||||
('expiration_alert_sent','expiration_alert_skipped_no_channels',
|
||||
'expiration_alert_skipped_invalid_channel') AND resource_id =
|
||||
'<cert-id>'`. If `expiration_alert_skipped_no_channels` appears,
|
||||
your policy's tier list is empty for the resolved tier. If
|
||||
`expiration_alert_skipped_invalid_channel` appears, your matrix
|
||||
has a typo (the `metadata->>'invalid_channel'` field tells you
|
||||
which value).
|
||||
|
||||
2. **Notifications table** —
|
||||
`SELECT FROM notification_events WHERE certificate_id = '<cert-id>'
|
||||
AND type = 'ExpirationWarning' ORDER BY created_at DESC`. If
|
||||
rows exist with `channel = 'Slack'` and `status = 'failed'`,
|
||||
the dispatch reached the channel but the channel rejected the
|
||||
send. Look at the `error` column for the upstream message.
|
||||
|
||||
3. **Prometheus counters** —
|
||||
`curl /api/v1/metrics/prometheus | grep certctl_expiry_alerts_total`.
|
||||
Sustained `{result="failure"}` counts indicate a notifier
|
||||
connector misconfiguration (bad webhook URL, expired API key,
|
||||
etc.).
|
||||
|
||||
### "How do I test the matrix without waiting for a real expiry?"
|
||||
|
||||
certctl ships an admin endpoint for this:
|
||||
|
||||
```bash
|
||||
curl -X POST https://certctl.example.com/api/v1/admin/notifications/test \
|
||||
-H 'Authorization: Bearer ${TOKEN}' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"certificate_id": "mc-test-cert",
|
||||
"threshold_days": 0,
|
||||
"channel": "PagerDuty"
|
||||
}'
|
||||
```
|
||||
|
||||
This calls `NotificationService.SendThresholdAlertOnChannel`
|
||||
directly and bypasses the renewal loop's threshold check. Useful
|
||||
for "did I configure PagerDuty correctly?" without having to set
|
||||
up a deliberately-expiring cert. The admin endpoint requires
|
||||
`role=admin` (V3-Pro RBAC); V2 deploys gate it on the bearer
|
||||
token only.
|
||||
|
||||
### "How do I rotate a notifier credential without downtime?"
|
||||
|
||||
1. Update the `CERTCTL_PAGERDUTY_ROUTING_KEY` (or equivalent) env
|
||||
var in your deployment.
|
||||
2. Restart `certctl-server`. The notifier registry rebuilds
|
||||
with the new credential.
|
||||
3. Confirm with the admin-test endpoint above against the cert
|
||||
you most care about.
|
||||
|
||||
The renewal loop is idempotent — a missed tick during the restart
|
||||
window does NOT cause double-dispatch on the next tick (per-channel
|
||||
dedup on the `notification_events` table guards against that).
|
||||
|
||||
---
|
||||
|
||||
## Cardinality + cost
|
||||
|
||||
- Default 6 channels × 4 thresholds × 3 results = **72 Prometheus series**.
|
||||
- Custom-thresholds policies (e.g. `[60, 45, 30, 14, 7, 3, 1, 0]`)
|
||||
expand the threshold dimension proportionally — 6 × 8 × 3 = 144 series.
|
||||
- Closed-enum discipline at the dispatch site means typos in
|
||||
`alert_channels` do NOT grow this count.
|
||||
- A daily renewal-loop tick over 10K certs each policy-bound to the
|
||||
matrix above produces O(channels × thresholds × certs) audit rows
|
||||
+ notification rows in the worst case (every cert has crossed
|
||||
every threshold and no dedup applies). Operators sizing
|
||||
Postgres should plan for an `audit_events` row count on the
|
||||
order of `unique_certs × channels_per_critical_tier` per fan-out
|
||||
batch — which is ~3-5× the pre-Rank-4 row count.
|
||||
|
||||
---
|
||||
|
||||
## V3-Pro forward path
|
||||
|
||||
Tracked at `cowork/WORKSPACE-ROADMAP.md` under "Adapter hardening":
|
||||
|
||||
- Per-owner / per-team / per-tenant channel routing (the matrix is
|
||||
per-policy today, not per-owner).
|
||||
- Calendar-aware suppression (no T-30 alerts on weekends for non-
|
||||
on-call teams).
|
||||
- Escalation chains (T-1 unanswered for 30m → escalate to
|
||||
manager's PagerDuty).
|
||||
- Per-channel rate limiting (downstream of I-005's retry+DLQ).
|
||||
+7
-15
@@ -36,21 +36,13 @@ What you get over NDES:
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌──────────────┐ ┌──────────────────────┐ ┌──────────────┐
|
||||
│ Intune cloud │──────▶│ Intune Certificate │──────▶│ certctl SCEP │
|
||||
│ │ │ Connector │ │ server │
|
||||
│ (Microsoft) │ │ (customer infra) │ │ (you) │
|
||||
└──────────────┘ └──────────────────────┘ └──────┬───────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────┐
|
||||
│ issuer │
|
||||
│ connector │
|
||||
│ (local CA / │
|
||||
│ Vault / │
|
||||
│ EJBCA / …) │
|
||||
└──────────────┘
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Cloud["Intune cloud<br/>(Microsoft)"]
|
||||
Connector["Intune Certificate Connector<br/>(customer infra)"]
|
||||
Server["certctl SCEP server<br/>(you)"]
|
||||
Issuer["issuer connector<br/>(local CA / Vault / EJBCA / …)"]
|
||||
Cloud --> Connector --> Server --> Issuer
|
||||
```
|
||||
|
||||
**certctl replaces NDES, not the Connector.** The Intune Certificate
|
||||
|
||||
+1
-1
@@ -70,7 +70,7 @@ If this says "command not found", you have an old Docker version. Update Docker
|
||||
You need the certctl source code on your machine. If you haven't cloned it yet:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
git clone https://github.com/certctl-io/certctl.git
|
||||
cd certctl
|
||||
```
|
||||
|
||||
|
||||
@@ -199,6 +199,7 @@ to this table is the right way to extend the surface.
|
||||
| `internal/connector/target/f5/f5.go:128` | F5 BIG-IP iControl REST | F5 default install ships with a self-signed cert; operators who haven't replaced it use `config.Insecure`. The connector logs this on every dial and the operator-facing config docs this. |
|
||||
| `internal/connector/issuer/acme/acme.go:146` | Pebble (ACME test server) | Hard-coded for tests that drive against Pebble locally. Pebble issues self-signed; verifying the chain would defeat the purpose. |
|
||||
| `internal/service/network_scan.go:460` | Network scanner probe | Same rationale as `tlsprobe/probe.go` above — discovery surfaces broken certs by design. |
|
||||
| `internal/api/acme/validators.go` (TLS-ALPN-01 validator) | RFC 8737 §3 TLS-ALPN-01 challenge validation | RFC 8737 mandates this: the responding TLS server presents a self-signed cert with the proof embedded in the `id-pe-acmeIdentifier` extension (OID 1.3.6.1.5.5.7.1.31). The chain is intentionally NOT validated — the proof is in the extension's SHA-256 of the key authorization, not the cert chain. Validating the chain would defeat the purpose: clients running TLS-ALPN-01 self-sign their challenge cert specifically because they don't have a trusted cert yet (that's what they're trying to obtain via ACME). The validator additionally checks that ALPN negotiated `acme-tls/1` and that the cert's `id-pe-acmeIdentifier` extension value is exactly SHA-256 of the expected key authorization. SSRF posture: the validator runs `validation.IsReservedIPForDial` against the resolved IP before the dial, refusing any private-IP target — same posture as the HTTP-01 validator. |
|
||||
|
||||
**What is NOT covered by this list:** `*_test.go` files use
|
||||
`InsecureSkipVerify` freely against `httptest.Server` instances; that's a
|
||||
|
||||
+1
-1
@@ -105,7 +105,7 @@ certctl isn't the right tool for everyone:
|
||||
The demo seeds certificates across multiple issuers, agents, and deployment targets with 180 days of realistic history — jobs, audit events, discovery scans, approval workflows — so you can explore every feature immediately.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/shankar0123/certctl.git
|
||||
git clone https://github.com/certctl-io/certctl.git
|
||||
cd certctl/deploy && docker compose up -d
|
||||
# Dashboard at https://localhost:8443 (self-signed cert — pin deploy/test/certs/ca.crt)
|
||||
```
|
||||
|
||||
@@ -22,7 +22,7 @@ services:
|
||||
|
||||
# certctl server (control plane)
|
||||
certctl-server:
|
||||
image: ghcr.io/shankar0123/certctl-server:latest
|
||||
image: ghcr.io/certctl-io/certctl-server:latest
|
||||
container_name: certctl-server-acme-nginx
|
||||
environment:
|
||||
# Database
|
||||
@@ -71,7 +71,7 @@ services:
|
||||
# In this example, the agent is in the same compose file for simplicity.
|
||||
# In production, the agent runs on each server that needs certificates.
|
||||
certctl-agent:
|
||||
image: ghcr.io/shankar0123/certctl-agent:latest
|
||||
image: ghcr.io/certctl-io/certctl-agent:latest
|
||||
container_name: certctl-agent-acme-nginx
|
||||
environment:
|
||||
# Control plane connection
|
||||
|
||||
@@ -46,7 +46,7 @@ services:
|
||||
|
||||
# certctl server (control plane + ACME orchestration)
|
||||
certctl-server:
|
||||
image: ghcr.io/shankar0123/certctl-server:latest
|
||||
image: ghcr.io/certctl-io/certctl-server:latest
|
||||
container_name: certctl-server-dns01
|
||||
environment:
|
||||
# Database
|
||||
@@ -124,7 +124,7 @@ services:
|
||||
# In production, run agents on each host that needs certificates.
|
||||
# For demo, we include one agent in this compose.
|
||||
certctl-agent:
|
||||
image: ghcr.io/shankar0123/certctl-agent:latest
|
||||
image: ghcr.io/certctl-io/certctl-agent:latest
|
||||
container_name: certctl-agent-dns01
|
||||
environment:
|
||||
# Control plane connection
|
||||
|
||||
@@ -23,7 +23,7 @@ services:
|
||||
# certctl server (control plane)
|
||||
# Configured with BOTH ACME (Let's Encrypt) and Local CA issuers
|
||||
certctl-server:
|
||||
image: ghcr.io/shankar0123/certctl-server:latest
|
||||
image: ghcr.io/certctl-io/certctl-server:latest
|
||||
container_name: certctl-server-multi-issuer
|
||||
environment:
|
||||
# Database
|
||||
@@ -72,7 +72,7 @@ services:
|
||||
|
||||
# certctl agent (manages certificates on NGINX and application servers)
|
||||
certctl-agent:
|
||||
image: ghcr.io/shankar0123/certctl-agent:latest
|
||||
image: ghcr.io/certctl-io/certctl-agent:latest
|
||||
container_name: certctl-agent-multi-issuer
|
||||
environment:
|
||||
# Control plane connection
|
||||
|
||||
@@ -22,7 +22,7 @@ services:
|
||||
|
||||
# certctl server (control plane) with Local CA in sub-CA mode
|
||||
certctl-server:
|
||||
image: ghcr.io/shankar0123/certctl-server:latest
|
||||
image: ghcr.io/certctl-io/certctl-server:latest
|
||||
container_name: certctl-server-private-ca
|
||||
environment:
|
||||
# Database
|
||||
@@ -85,7 +85,7 @@ services:
|
||||
|
||||
# certctl agent (deploys certs to Traefik)
|
||||
certctl-agent:
|
||||
image: ghcr.io/shankar0123/certctl-agent:latest
|
||||
image: ghcr.io/certctl-io/certctl-agent:latest
|
||||
container_name: certctl-agent-private-ca
|
||||
environment:
|
||||
# Control plane connection
|
||||
|
||||
@@ -77,7 +77,7 @@ services:
|
||||
|
||||
# certctl server (control plane)
|
||||
certctl-server:
|
||||
image: ghcr.io/shankar0123/certctl-server:latest
|
||||
image: ghcr.io/certctl-io/certctl-server:latest
|
||||
container_name: certctl-server-stepca-haproxy
|
||||
environment:
|
||||
# Database
|
||||
@@ -127,7 +127,7 @@ services:
|
||||
|
||||
# certctl agent (runs on the target machine with HAProxy)
|
||||
certctl-agent:
|
||||
image: ghcr.io/shankar0123/certctl-agent:latest
|
||||
image: ghcr.io/certctl-io/certctl-agent:latest
|
||||
container_name: certctl-agent-stepca-haproxy
|
||||
environment:
|
||||
# Control plane connection
|
||||
|
||||
@@ -19,8 +19,8 @@ This is the natural choice if you're already invested in step-ca and want to con
|
||||
| Service | Image | Purpose |
|
||||
|---------|-------|---------|
|
||||
| **step-ca** | `smallstep/step-ca:latest` | Private internal CA |
|
||||
| **certctl-server** | `ghcr.io/shankar0123/certctl-server:latest` | Certificate management control plane |
|
||||
| **certctl-agent** | `ghcr.io/shankar0123/certctl-agent:latest` | Agent running on HAProxy server |
|
||||
| **certctl-server** | `ghcr.io/certctl-io/certctl-server:latest` | Certificate management control plane |
|
||||
| **certctl-agent** | `ghcr.io/certctl-io/certctl-agent:latest` | Agent running on HAProxy server |
|
||||
| **haproxy** | `haproxy:2.9-alpine` | Reverse proxy / load balancer |
|
||||
| **postgres** | `postgres:16-alpine` | certctl audit trail + config storage |
|
||||
|
||||
|
||||
@@ -10,19 +10,43 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1
|
||||
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azcertificates v1.4.0
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.7
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.17
|
||||
github.com/aws/aws-sdk-go-v2/service/acm v1.38.3
|
||||
github.com/aws/aws-sdk-go-v2/service/acmpca v1.46.14
|
||||
github.com/aws/smithy-go v1.25.1
|
||||
github.com/go-jose/go-jose/v4 v4.1.4
|
||||
github.com/leanovate/gopter v0.2.11
|
||||
github.com/masterzen/winrm v0.0.0-20250927112105-5f8e6c707321
|
||||
github.com/pkg/sftp v1.13.10
|
||||
golang.org/x/crypto v0.45.0
|
||||
golang.org/x/sync v0.18.0
|
||||
software.sslmate.com/src/go-pkcs12 v0.7.0
|
||||
)
|
||||
|
||||
require (
|
||||
dario.cat/mergo v1.0.0 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0 // indirect
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect
|
||||
github.com/ChrisTrenkamp/goxpath v0.0.0-20210404020558-97928f7e12b6 // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.16 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.23 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.23 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.23 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.24 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.9 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.23 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.11 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.21 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.42.1 // indirect
|
||||
github.com/bodgit/ntlmssp v0.0.0-20240506230425-31973bb52d9b // indirect
|
||||
github.com/bodgit/windows v1.0.1 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
|
||||
@@ -41,6 +65,7 @@ require (
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/gofrs/uuid v4.4.0+incompatible // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
|
||||
github.com/google/jsonschema-go v0.4.2 // indirect
|
||||
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
|
||||
github.com/hashicorp/go-uuid v1.0.3 // indirect
|
||||
@@ -52,7 +77,7 @@ require (
|
||||
github.com/jcmturner/rpc/v2 v2.0.3 // indirect
|
||||
github.com/klauspost/compress v1.17.4 // indirect
|
||||
github.com/kr/fs v0.1.0 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/kylelemons/godebug v1.1.0 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
||||
github.com/magiconair/properties v1.8.7 // indirect
|
||||
github.com/masterzen/simplexml v0.0.0-20190410153822-31eea3082786 // indirect
|
||||
@@ -64,6 +89,7 @@ require (
|
||||
github.com/morikuni/aec v1.0.0 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0 // indirect
|
||||
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||
@@ -72,7 +98,7 @@ require (
|
||||
github.com/shirou/gopsutil/v3 v3.23.12 // indirect
|
||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||
github.com/sirupsen/logrus v1.9.3 // indirect
|
||||
github.com/stretchr/testify v1.10.0 // indirect
|
||||
github.com/stretchr/testify v1.11.1 // indirect
|
||||
github.com/tidwall/transform v0.0.0-20201103190739-32f242e2dbde // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.12 // indirect
|
||||
github.com/tklauser/numcpus v0.6.1 // indirect
|
||||
|
||||
@@ -41,10 +41,26 @@ dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
|
||||
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
|
||||
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU=
|
||||
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 h1:JXg2dwJUmPB9JmtVmdEB16APJ7jurfbY5jnfXpJoRMc=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0/go.mod h1:YD5h/ldMsG0XiIw7PdyNhLxaM317eFh5yNLccNfGdyw=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 h1:Hk5QBxZQC1jb2Fwj6mpzme37xbCDdNTxU7O9eb5+LB4=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1/go.mod h1:IYus9qsFobWIc2YVwe/WPjcnyCkPKtnHAqUYeebc8z0=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2/go.mod h1:Pa9ZNPuoNu/GztvBSKk9J1cDJW6vk/n0zLtV4mgd8N8=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDozdmndjTm8DXdpCzPajMgA=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azcertificates v1.4.0 h1:mtvR5ZXH5Ew6PSONd5lO5OXovWP1E3oAlgC8fpxor2Q=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azcertificates v1.4.0/go.mod h1:u560+RFVfG0CBPzkXlDW43slESbBAQjgDGi3r6z+wk8=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0 h1:nCYfgcSyHZXJI8J0IWE5MsCGlb2xp9fJiXyxWgmOFg4=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0/go.mod h1:ucUjca2JtSZboY8IoUqyQyuuXvwbMBVwFOm0vdQPNhA=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
||||
github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM=
|
||||
github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE=
|
||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs=
|
||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/ChrisTrenkamp/goxpath v0.0.0-20210404020558-97928f7e12b6 h1:w0E0fgc1YafGEh5cROhlROMWXiNoZqApk2PDN0M1+Ns=
|
||||
@@ -55,6 +71,38 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd
|
||||
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
|
||||
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
|
||||
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.7 h1:DWpAJt66FmnnaRIOT/8ASTucrvuDPZASqhhLey6tLY8=
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.7/go.mod h1:4LAfZOPHNVNQEckOACQx60Y8pSRjIkNZQz1w92xpMJc=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.17 h1:FpL4/758/diKwqbytU0prpuiu60fgXKUWCpDJtApclU=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.17/go.mod h1:OXqUMzgXytfoF9JaKkhrOYsyh72t9G+MJH8mMRaexOE=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.16 h1:r3RJBuU7X9ibt8RHbMjWE6y60QbKBiII6wSrXnapxSU=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.16/go.mod h1:6cx7zqDENJDbBIIWX6P8s0h6hqHC8Avbjh9Dseo27ug=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.23 h1:UuSfcORqNSz/ey3VPRS8TcVH2Ikf0/sC+Hdj400QI6U=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.23/go.mod h1:+G/OSGiOFnSOkYloKj/9M35s74LgVAdJBSD5lsFfqKg=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.23 h1:GpT/TrnBYuE5gan2cZbTtvP+JlHsutdmlV2YfEyNde0=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.23/go.mod h1:xYWD6BS9ywC5bS3sz9Xh04whO/hzK2plt2Zkyrp4JuA=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.23 h1:bpd8vxhlQi2r1hiueOw02f/duEPTMK59Q4QMAoTTtTo=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.23/go.mod h1:15DfR2nw+CRHIk0tqNyifu3G1YdAOy68RftkhMDDwYk=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.24 h1:OQqn11BtaYv1WLUowvcA30MpzIu8Ti4pcLPIIyoKZrA=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.24/go.mod h1:X5ZJyfwVrWA96GzPmUCWFQaEARPR7gCrpq2E92PJwAE=
|
||||
github.com/aws/aws-sdk-go-v2/service/acm v1.38.3 h1:Fzab84hCu3rw9R9Y3mH7SHfr/cSEHnCB0Mq1JCdr9t0=
|
||||
github.com/aws/aws-sdk-go-v2/service/acm v1.38.3/go.mod h1:yCteizCNPaHt0SnNusoGGHvy0JDB0tvGDTVhEt5anZM=
|
||||
github.com/aws/aws-sdk-go-v2/service/acmpca v1.46.14 h1:Srm+IbQm8jjQoBQJ7tf/+etEzogQhV2QaVHA0kesQoM=
|
||||
github.com/aws/aws-sdk-go-v2/service/acmpca v1.46.14/go.mod h1:qFP+Zv26pVlLajTm293Ga9I82NRjnrTpXtMtkFFn5xc=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.9 h1:FLudkZLt5ci0ozzgkVo8BJGwvqNaZbTWb3UcucAateA=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.9/go.mod h1:w7wZ/s9qK7c8g4al+UyoF1Sp/Z45UwMGcqIzLWVQHWk=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.23 h1:pbrxO/kuIwgEsOPLkaHu0O+m4fNgLU8B3vxQ+72jTPw=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.23/go.mod h1:/CMNUqoj46HpS3MNRDEDIwcgEnrtZlKRaHNaHxIFpNA=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.11 h1:TdJ+HdzOBhU8+iVAOGUTU63VXopcumCOF1paFulHWZc=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.11/go.mod h1:R82ZRExE/nheo0N+T8zHPcLRTcH8MGsnR3BiVGX0TwI=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.17 h1:7byT8HUWrgoRp6sXjxtZwgOKfhss5fW6SkLBtqzgRoE=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.17/go.mod h1:xNWknVi4Ezm1vg1QsB/5EWpAJURq22uqd38U8qKvOJc=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.21 h1:+1Kl1zx6bWi4X7cKi3VYh29h8BvsCoHQEQ6ST9X8w7w=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.21/go.mod h1:4vIRDq+CJB2xFAXZ+YgGUTiEft7oAQlhIs71xcSeuVg=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.42.1 h1:F/M5Y9I3nwr2IEpshZgh1GeHpOItExNM9L1euNuh/fk=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.42.1/go.mod h1:mTNxImtovCOEEuD65mKW7DCsL+2gjEH+RPEAexAzAio=
|
||||
github.com/aws/smithy-go v1.25.1 h1:J8ERsGSU7d+aCmdQur5Txg6bVoYelvQJgtZehD12GkI=
|
||||
github.com/aws/smithy-go v1.25.1/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc=
|
||||
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
|
||||
github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM=
|
||||
github.com/bodgit/ntlmssp v0.0.0-20240506230425-31973bb52d9b h1:baFN6AnR0SeC194X2D292IUZcHDs4JjStpqtE70fjXE=
|
||||
@@ -82,7 +130,6 @@ github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
|
||||
github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
|
||||
github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
|
||||
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@@ -111,6 +158,8 @@ github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeME
|
||||
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
|
||||
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
|
||||
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
|
||||
github.com/go-jose/go-jose/v4 v4.1.4 h1:moDMcTHmvE6Groj34emNPLs/qtYXRVcd6S7NHbHz3kA=
|
||||
github.com/go-jose/go-jose/v4 v4.1.4/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
|
||||
github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
@@ -249,6 +298,8 @@ github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/
|
||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||
github.com/keybase/go-keychain v0.0.1 h1:way+bWYa6lDppZoZcgMbYsvC7GxljxrskdNInRtuthU=
|
||||
github.com/keybase/go-keychain v0.0.1/go.mod h1:PdEILRW3i9D8JcdM+FmY6RwkHGnhHxXwkPPMeUgOK1k=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
|
||||
@@ -256,12 +307,14 @@ github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6K
|
||||
github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
|
||||
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
||||
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/leanovate/gopter v0.2.11 h1:vRjThO1EKPb/1NsDXuDrzldR28RLkBflWYcU9CvzWu4=
|
||||
github.com/leanovate/gopter v0.2.11/go.mod h1:aK3tzZP/C+p1m3SPRE4SYZFGP7jjkuSI4f7Xvpt0S9c=
|
||||
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||
@@ -311,6 +364,8 @@ github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQ
|
||||
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
|
||||
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
|
||||
github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
||||
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
|
||||
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
@@ -325,8 +380,8 @@ github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:Om
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||
github.com/rogpeppe/go-internal v1.8.1 h1:geMPLpDpQOgVyCg5z5GoRwLHepNdb71NXb67XFkP+Eg=
|
||||
github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o=
|
||||
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
|
||||
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
|
||||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
|
||||
@@ -372,8 +427,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0 h1:uADsZpTKFAtp8SLK+hMwSaa+X+JiERHtd4sQAFmXeMo=
|
||||
github.com/testcontainers/testcontainers-go v0.35.0/go.mod h1:oEVBj5zrfJTrgjwONs1SsRbnBtH9OKl+IGl3UMcr2B4=
|
||||
@@ -541,6 +596,8 @@ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJ
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
|
||||
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
@@ -591,6 +648,7 @@ golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBc
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
|
||||
+2
-2
@@ -12,7 +12,7 @@ YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
GITHUB_REPO="shankar0123/certctl"
|
||||
GITHUB_REPO="certctl-io/certctl"
|
||||
RELEASE_URL="https://github.com/${GITHUB_REPO}/releases/latest/download"
|
||||
INSTALL_DIR="/usr/local/bin"
|
||||
SERVICE_NAME="certctl-agent"
|
||||
@@ -447,7 +447,7 @@ setup_systemd_service() {
|
||||
cat > "$service_file" <<'EOF'
|
||||
[Unit]
|
||||
Description=certctl Agent - Certificate Lifecycle Management
|
||||
Documentation=https://github.com/shankar0123/certctl
|
||||
Documentation=https://github.com/certctl-io/certctl
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// AccountResponseJSON is the wire shape RFC 8555 §7.1.2 mandates for
|
||||
// account-resource responses (new-account success, account update,
|
||||
// per-account GET POST-as-GET).
|
||||
//
|
||||
// The orders URL is mandatory per RFC 8555 §7.1.2.1; it points at the
|
||||
// per-account orders list endpoint that Phase 2 implements. Phase 1b
|
||||
// emits it as an empty placeholder ("orders not yet implemented") so
|
||||
// the directory + new-account flow round-trips against ACME clients
|
||||
// that expect the field present.
|
||||
type AccountResponseJSON struct {
|
||||
Status string `json:"status"`
|
||||
Contact []string `json:"contact,omitempty"`
|
||||
Orders string `json:"orders"`
|
||||
}
|
||||
|
||||
// MarshalAccount renders an ACMEAccount in RFC 8555 §7.1.2 wire shape.
|
||||
// `ordersURL` is the per-account orders list URL the handler computes
|
||||
// from the inbound request (scheme + host + profile path + account
|
||||
// id); Phase 1b's handler passes it but Phase 2 wires the actual
|
||||
// /acme/profile/<id>/account/<acc-id>/orders endpoint.
|
||||
func MarshalAccount(acct *domain.ACMEAccount, ordersURL string) AccountResponseJSON {
|
||||
contact := acct.Contact
|
||||
if contact == nil {
|
||||
// RFC 8555 doesn't require contact be present, but cert-manager
|
||||
// + lego both expect a stable shape. Emit [] rather than null.
|
||||
contact = []string{}
|
||||
}
|
||||
return AccountResponseJSON{
|
||||
Status: string(acct.Status),
|
||||
Contact: contact,
|
||||
Orders: ordersURL,
|
||||
}
|
||||
}
|
||||
|
||||
// NewAccountRequest is the payload shape RFC 8555 §7.3 mandates for
|
||||
// new-account requests. The handler json.Unmarshals VerifiedRequest.Payload
|
||||
// into this struct after JWS verify succeeds.
|
||||
type NewAccountRequest struct {
|
||||
// Contact is a list of mailto: / tel: URIs. Optional per RFC 8555
|
||||
// but operators typically supply at least one mailto:.
|
||||
Contact []string `json:"contact,omitempty"`
|
||||
// TermsOfServiceAgreed signals client consent to the operator's
|
||||
// ToS document (advertised via meta.termsOfService). Phase 1b
|
||||
// records the value but does NOT enforce — the meta field is
|
||||
// informational only at this stage.
|
||||
TermsOfServiceAgreed bool `json:"termsOfServiceAgreed,omitempty"`
|
||||
// OnlyReturnExisting, when true, asks the server to return the
|
||||
// existing account row for this JWK (RFC 8555 §7.3.1). When
|
||||
// true and no account exists, the server MUST return 400 +
|
||||
// urn:ietf:params:acme:error:accountDoesNotExist.
|
||||
OnlyReturnExisting bool `json:"onlyReturnExisting,omitempty"`
|
||||
// ExternalAccountBinding (EAB) is RFC 8555 §7.3.4. Phase 1b
|
||||
// accepts the field but does NOT validate — EAB enforcement is
|
||||
// a deliberate out-of-scope per the master prompt and lands as a
|
||||
// follow-up if there's demand. Storing the raw envelope means a
|
||||
// future phase can backfill validation against historical accounts.
|
||||
ExternalAccountBinding map[string]interface{} `json:"externalAccountBinding,omitempty"`
|
||||
}
|
||||
|
||||
// AccountUpdateRequest is the payload shape for the account-update
|
||||
// endpoint POST /acme/profile/<id>/account/<acc-id> (RFC 8555 §7.3.2 +
|
||||
// §7.3.6). Only `contact` and `status` are mutable per the spec.
|
||||
type AccountUpdateRequest struct {
|
||||
// Contact, when non-nil, replaces the account's contact list.
|
||||
// nil means "leave unchanged" (distinct from empty []string{}
|
||||
// which means "clear contacts" — cert-manager doesn't issue
|
||||
// either, but the spec permits both).
|
||||
Contact []string `json:"contact,omitempty"`
|
||||
// Status, when set to "deactivated", retires the account per
|
||||
// RFC 8555 §7.3.6. Other values are rejected — the operator
|
||||
// path for revoked is via certctl's API, not via ACME.
|
||||
Status string `json:"status,omitempty"`
|
||||
}
|
||||
@@ -0,0 +1,224 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"crypto/x509"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// Phase 4 — RFC 9773 ACME Renewal Information.
|
||||
//
|
||||
// RFC 9773 §4.1: a client computes the cert-id as
|
||||
//
|
||||
// base64url-no-pad(authorityKeyIdentifier) || "." || base64url-no-pad(serial)
|
||||
//
|
||||
// and GETs /acme/.../renewal-info/<cert-id>. The server responds with a
|
||||
// JSON document carrying a `suggestedWindow` (start, end) the client
|
||||
// SHOULD plan its renewal inside, plus an optional `explanationURL`.
|
||||
// Response also carries a Retry-After header (RFC 9773 §4.2) hinting
|
||||
// at the next-poll cadence.
|
||||
//
|
||||
// This file:
|
||||
//
|
||||
// - parses the cert-id wire format → (akiBytes, serialBytes).
|
||||
// - converts the serial bytes to a hex string in the canonical
|
||||
// certctl shape (lowercase, no leading zeros, matching how
|
||||
// internal/repository/postgres/certificate.go stores them).
|
||||
// - computes the suggested-window from a cert's NotAfter and an
|
||||
// optional bound RenewalPolicy (last 33% of validity if no policy
|
||||
// is bound).
|
||||
|
||||
// RenewalInfoResponse is the JSON document returned by the renewal-
|
||||
// info endpoint per RFC 9773 §4.1.
|
||||
type RenewalInfoResponse struct {
|
||||
SuggestedWindow RenewalWindow `json:"suggestedWindow"`
|
||||
ExplanationURL string `json:"explanationURL,omitempty"`
|
||||
}
|
||||
|
||||
// RenewalWindow is the embedded {start, end} pair. RFC 9773 mandates
|
||||
// start ≤ end; the server is responsible for emitting RFC 3339 UTC
|
||||
// timestamps.
|
||||
type RenewalWindow struct {
|
||||
Start time.Time `json:"start"`
|
||||
End time.Time `json:"end"`
|
||||
}
|
||||
|
||||
// ARICertID is the parsed shape of an RFC 9773 §4.1 cert-id —
|
||||
// authorityKeyIdentifier and serial bytes after base64url-no-pad
|
||||
// decoding. Callers compare against the certificate they already have
|
||||
// in the database; AKI is informational on the server side because
|
||||
// certctl's serial-uniqueness invariant is per-issuer.
|
||||
type ARICertID struct {
|
||||
// AKI is the raw bytes of the certificate's authorityKeyIdentifier
|
||||
// extension.
|
||||
AKI []byte
|
||||
// Serial is the raw bytes of the certificate's serial number, in
|
||||
// big-endian unsigned-integer form.
|
||||
Serial []byte
|
||||
}
|
||||
|
||||
// SerialHex returns the canonical certctl-shape hex representation of
|
||||
// the serial number — lowercase, no leading zeros (matches what's
|
||||
// stored in certificate_versions.serial_number).
|
||||
func (a ARICertID) SerialHex() string {
|
||||
if len(a.Serial) == 0 {
|
||||
return ""
|
||||
}
|
||||
n := new(big.Int).SetBytes(a.Serial)
|
||||
if n.Sign() == 0 {
|
||||
return "0"
|
||||
}
|
||||
return strings.ToLower(n.Text(16))
|
||||
}
|
||||
|
||||
// AKIHex returns the AKI as a lowercase hex string. Useful for logging
|
||||
// + future per-AKI lookup paths.
|
||||
func (a ARICertID) AKIHex() string {
|
||||
return strings.ToLower(hex.EncodeToString(a.AKI))
|
||||
}
|
||||
|
||||
// Sentinel errors. ChooseProblem in writeServiceError translates the
|
||||
// not-found cases to RFC 7807 + RFC 8555 §6.7 problems.
|
||||
var (
|
||||
ErrARICertIDMalformed = errors.New("acme ari: cert-id is not <aki>.<serial>")
|
||||
ErrARICertIDDecodeAKI = errors.New("acme ari: cert-id AKI is not valid base64url")
|
||||
ErrARICertIDDecodeSeria = errors.New("acme ari: cert-id serial is not valid base64url")
|
||||
ErrARICertIDEmpty = errors.New("acme ari: cert-id has empty AKI or serial")
|
||||
)
|
||||
|
||||
// ParseARICertID decodes an RFC 9773 §4.1 cert-id. The wire format is
|
||||
// strictly base64url-NO-PADDING; rfc9773 §4.1 forbids regular base64.
|
||||
//
|
||||
// Common malformations:
|
||||
// - missing or extra `.` separator → ErrARICertIDMalformed.
|
||||
// - either side fails base64url decode → ErrARICertIDDecode*.
|
||||
// - either side decodes to empty → ErrARICertIDEmpty.
|
||||
func ParseARICertID(certID string) (*ARICertID, error) {
|
||||
parts := strings.Split(certID, ".")
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("%w: got %d parts", ErrARICertIDMalformed, len(parts))
|
||||
}
|
||||
if parts[0] == "" || parts[1] == "" {
|
||||
return nil, ErrARICertIDEmpty
|
||||
}
|
||||
aki, err := base64.RawURLEncoding.DecodeString(parts[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrARICertIDDecodeAKI, err)
|
||||
}
|
||||
serial, err := base64.RawURLEncoding.DecodeString(parts[1])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrARICertIDDecodeSeria, err)
|
||||
}
|
||||
if len(aki) == 0 || len(serial) == 0 {
|
||||
return nil, ErrARICertIDEmpty
|
||||
}
|
||||
return &ARICertID{AKI: aki, Serial: serial}, nil
|
||||
}
|
||||
|
||||
// BuildARICertID is the inverse of ParseARICertID — useful for tests
|
||||
// and operator tools that want to construct a cert-id from a leaf cert.
|
||||
//
|
||||
// The input is the leaf certificate's PEM. We extract the
|
||||
// authorityKeyIdentifier extension and the serial number, then
|
||||
// base64url-no-pad-encode each + join with a `.`.
|
||||
func BuildARICertID(certPEM string) (string, error) {
|
||||
block, _ := pem.Decode([]byte(certPEM))
|
||||
if block == nil {
|
||||
return "", fmt.Errorf("acme ari: pem decode failed")
|
||||
}
|
||||
cert, err := x509.ParseCertificate(block.Bytes)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("acme ari: parse cert: %w", err)
|
||||
}
|
||||
if len(cert.AuthorityKeyId) == 0 {
|
||||
return "", fmt.Errorf("acme ari: certificate has no authorityKeyIdentifier extension")
|
||||
}
|
||||
if cert.SerialNumber == nil {
|
||||
return "", fmt.Errorf("acme ari: certificate has no serial number")
|
||||
}
|
||||
akiB64 := base64.RawURLEncoding.EncodeToString(cert.AuthorityKeyId)
|
||||
serialB64 := base64.RawURLEncoding.EncodeToString(cert.SerialNumber.Bytes())
|
||||
return akiB64 + "." + serialB64, nil
|
||||
}
|
||||
|
||||
// ComputeRenewalWindow returns the RFC 9773 suggestedWindow for a
|
||||
// (cert, optional renewal-policy) pair.
|
||||
//
|
||||
// Algorithm:
|
||||
//
|
||||
// - When policy is non-nil and policy.RenewalWindowDays > 0: the
|
||||
// window starts at NotAfter - RenewalWindowDays + spans half of
|
||||
// RenewalWindowDays. So a 30-day-renewal-window cert with NotAfter
|
||||
// 2026-06-30 emits start=2026-05-31, end=2026-06-15. This matches
|
||||
// boulder's default ARI behavior + ensures a Let's-Encrypt-shaped
|
||||
// client can plan its renewals exactly inside our renewal window.
|
||||
// - When policy is nil OR RenewalWindowDays ≤ 0: the window is the
|
||||
// last 33% of validity. So a cert with NotBefore 2026-01-01 +
|
||||
// NotAfter 2026-04-01 (90d validity) emits start=2026-03-01 (30d
|
||||
// before expiry), end=2026-03-21 (10d before expiry).
|
||||
// - When the cert is past NotAfter: the window starts at "now" and
|
||||
// ends at "now + 1 day" so a client polling on an expired cert
|
||||
// gets a "renew immediately" answer rather than a window in the
|
||||
// past.
|
||||
//
|
||||
// Returns (start, end). start ≤ end is invariant.
|
||||
func ComputeRenewalWindow(cert *domain.ManagedCertificate, version *domain.CertificateVersion, policy *domain.RenewalPolicy, now time.Time) (time.Time, time.Time) {
|
||||
if cert == nil {
|
||||
return time.Time{}, time.Time{}
|
||||
}
|
||||
notAfter := cert.ExpiresAt.UTC()
|
||||
notBefore := notAfter
|
||||
if version != nil && !version.NotBefore.IsZero() {
|
||||
notBefore = version.NotBefore.UTC()
|
||||
}
|
||||
|
||||
// Past expiry: emit a 1-day "renew now" window.
|
||||
if !now.IsZero() && now.UTC().After(notAfter) {
|
||||
nowUTC := now.UTC()
|
||||
return nowUTC, nowUTC.Add(24 * time.Hour)
|
||||
}
|
||||
|
||||
if policy != nil && policy.RenewalWindowDays > 0 {
|
||||
windowDays := time.Duration(policy.RenewalWindowDays) * 24 * time.Hour
|
||||
start := notAfter.Add(-windowDays)
|
||||
end := start.Add(windowDays / 2)
|
||||
// Defensive: never emit start in the past from "now".
|
||||
if !now.IsZero() && start.Before(now.UTC()) {
|
||||
start = now.UTC()
|
||||
}
|
||||
if end.Before(start) {
|
||||
end = start
|
||||
}
|
||||
return start, end
|
||||
}
|
||||
|
||||
// No policy → last 33% of validity.
|
||||
validity := notAfter.Sub(notBefore)
|
||||
if validity <= 0 {
|
||||
// Degenerate cert (nb >= na). Use a 1-day default window
|
||||
// ending at notAfter.
|
||||
return notAfter.Add(-24 * time.Hour), notAfter
|
||||
}
|
||||
thirty3 := validity / 3
|
||||
start := notAfter.Add(-thirty3)
|
||||
// End is 1/3 before expiry → midpoint of the renewal third.
|
||||
end := notAfter.Add(-thirty3 / 3)
|
||||
if !now.IsZero() && start.Before(now.UTC()) {
|
||||
start = now.UTC()
|
||||
}
|
||||
if end.Before(start) {
|
||||
end = start
|
||||
}
|
||||
return start, end
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"crypto/sha256"
|
||||
"encoding/asn1"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
jose "github.com/go-jose/go-jose/v4"
|
||||
)
|
||||
|
||||
// KeyAuthorization computes the canonical RFC 8555 §8.1 key authorization
|
||||
// string: <token> + "." + base64url(JWK-thumbprint).
|
||||
//
|
||||
// The thumbprint is RFC 7638 SHA-256 of the canonicalized JWK; same
|
||||
// helper Phase 1b uses to derive account IDs. Phase 3's HTTP-01 +
|
||||
// DNS-01 + TLS-ALPN-01 validators all consume this string.
|
||||
func KeyAuthorization(token string, jwk *jose.JSONWebKey) (string, error) {
|
||||
if jwk == nil {
|
||||
return "", errors.New("acme: nil jwk for key authorization")
|
||||
}
|
||||
thumb, err := jwk.Thumbprint(crypto.SHA256)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("acme: thumbprint: %w", err)
|
||||
}
|
||||
return token + "." + base64.RawURLEncoding.EncodeToString(thumb), nil
|
||||
}
|
||||
|
||||
// DNS01TXTRecordValue computes the value an authoritative DNS server
|
||||
// must serve for `_acme-challenge.<domain>` per RFC 8555 §8.4.
|
||||
//
|
||||
// The DNS-01 record is base64url(SHA-256(keyAuthorization)) — NOT the
|
||||
// raw key authorization (that's HTTP-01's behavior).
|
||||
func DNS01TXTRecordValue(keyAuthorization string) string {
|
||||
h := sha256.Sum256([]byte(keyAuthorization))
|
||||
return base64.RawURLEncoding.EncodeToString(h[:])
|
||||
}
|
||||
|
||||
// TLSALPN01ExtensionValue computes the SHA-256 hash of the key
|
||||
// authorization that the validator looks for in the responding TLS
|
||||
// cert's id-pe-acmeIdentifier extension (RFC 8737 §3).
|
||||
//
|
||||
// The ASN.1 wrapping (OCTET STRING containing the 32 raw bytes) is the
|
||||
// caller's responsibility; this helper returns the inner 32 bytes.
|
||||
func TLSALPN01ExtensionValue(keyAuthorization string) []byte {
|
||||
h := sha256.Sum256([]byte(keyAuthorization))
|
||||
return h[:]
|
||||
}
|
||||
|
||||
// IDPEAcmeIdentifierOID is the ObjectIdentifier RFC 8737 §3 mandates for
|
||||
// the id-pe-acmeIdentifier extension carried in the responding TLS
|
||||
// cert during TLS-ALPN-01 validation. Exported so the validator can
|
||||
// .Equal() it against incoming cert extensions; the value is fixed
|
||||
// per-spec and never changes.
|
||||
var IDPEAcmeIdentifierOID = asn1.ObjectIdentifier{1, 3, 6, 1, 5, 5, 7, 1, 31}
|
||||
|
||||
// ChallengeProblemFromError maps a validator error into the RFC 7807
|
||||
// Problem the challenge row's `error` column should record. Centralized
|
||||
// so each per-type validator returns plain errors and the dispatcher
|
||||
// translates uniformly.
|
||||
//
|
||||
// The Problem types align with RFC 8555 §6.7:
|
||||
// - connection / TCP-level → urn:ietf:params:acme:error:connection
|
||||
// - DNS / TXT mismatch → urn:ietf:params:acme:error:dns
|
||||
// - TLS handshake / cert mismatch → urn:ietf:params:acme:error:tls
|
||||
// - all others → urn:ietf:params:acme:error:incorrectResponse (the
|
||||
// RFC-canonical "challenge response was wrong" type)
|
||||
func ChallengeProblemFromError(challengeType string, err error) *Problem {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
switch {
|
||||
case errors.Is(err, ErrChallengeConnection):
|
||||
return &Problem{Type: "urn:ietf:params:acme:error:connection", Detail: err.Error(), Status: 400}
|
||||
case errors.Is(err, ErrChallengeDNS):
|
||||
return &Problem{Type: "urn:ietf:params:acme:error:dns", Detail: err.Error(), Status: 400}
|
||||
case errors.Is(err, ErrChallengeTLS):
|
||||
return &Problem{Type: "urn:ietf:params:acme:error:tls", Detail: err.Error(), Status: 400}
|
||||
default:
|
||||
return &Problem{
|
||||
Type: "urn:ietf:params:acme:error:incorrectResponse",
|
||||
Detail: fmt.Sprintf("%s validation failed: %s", challengeType, err.Error()),
|
||||
Status: 403,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validator-side sentinel errors. Each one maps to a specific RFC 8555
|
||||
// §6.7 problem type via ChallengeProblemFromError above. Per-validator
|
||||
// implementations wrap their failures with these.
|
||||
var (
|
||||
ErrChallengeConnection = errors.New("acme: connection-level failure during challenge validation")
|
||||
ErrChallengeDNS = errors.New("acme: DNS-level failure during challenge validation")
|
||||
ErrChallengeTLS = errors.New("acme: TLS-level failure during challenge validation")
|
||||
ErrChallengeMismatch = errors.New("acme: challenge response did not match expected key authorization")
|
||||
ErrChallengeReservedIP = errors.New("acme: HTTP-01 target resolves to a reserved IP (SSRF guard)")
|
||||
ErrChallengeRedirect = errors.New("acme: HTTP-01 target redirected too many times")
|
||||
ErrChallengeBodyTooBig = errors.New("acme: HTTP-01 response body exceeded 16 KiB cap")
|
||||
ErrChallengeNoCert = errors.New("acme: TLS-ALPN-01 target presented no certificate")
|
||||
ErrChallengeWrongALPN = errors.New("acme: TLS-ALPN-01 target did not negotiate the acme-tls/1 protocol")
|
||||
ErrChallengeExtMissing = errors.New("acme: TLS-ALPN-01 target's certificate did not carry the id-pe-acmeIdentifier extension")
|
||||
)
|
||||
@@ -0,0 +1,85 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
// Package acme implements the ACME server-side protocol surface (RFC 8555
|
||||
// + RFC 9773 ARI). It is deliberately separate from
|
||||
// internal/connector/issuer/acme/ which is the consumer surface (certctl
|
||||
// talks UP to Let's Encrypt / ZeroSSL / pebble). The two surfaces share
|
||||
// no types — the consumer's data model is client-shaped; the server's
|
||||
// is request-handler-shaped.
|
||||
//
|
||||
// Phase 1a: directory + nonce + JSON-Problem (RFC 7807) error envelopes
|
||||
// only. JWS verification, account resource, orders, challenges, key
|
||||
// rollover, revocation, ARI all land in subsequent phases (1b → 4).
|
||||
package acme
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Directory is the JSON document RFC 8555 §7.1.1 mandates the server
|
||||
// publish at /acme/profile/<id>/directory (and at /acme/directory when
|
||||
// CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID is set).
|
||||
//
|
||||
// Each URL is the per-profile path the ACME client POSTs against. Even
|
||||
// though Phase 1a only wires up new-nonce, the directory advertises
|
||||
// the full surface — RFC 8555 doesn't permit a partial directory and
|
||||
// clients use the directory's URL fields exclusively (they don't
|
||||
// hand-construct paths from a base URL).
|
||||
type Directory struct {
|
||||
NewNonce string `json:"newNonce"`
|
||||
NewAccount string `json:"newAccount"`
|
||||
NewOrder string `json:"newOrder"`
|
||||
RevokeCert string `json:"revokeCert"`
|
||||
KeyChange string `json:"keyChange"`
|
||||
// RenewalInfo (RFC 9773 ARI) lands in Phase 4. Omitted now via the
|
||||
// `,omitempty` tag so the JSON output stays clean for clients that
|
||||
// don't yet support ARI.
|
||||
RenewalInfo string `json:"renewalInfo,omitempty"`
|
||||
Meta *Meta `json:"meta,omitempty"`
|
||||
}
|
||||
|
||||
// Meta is the optional metadata block per RFC 8555 §7.1.1. Every field
|
||||
// is operator-supplied via CERTCTL_ACME_SERVER_* env vars; an empty
|
||||
// Meta is omitted from the marshaled directory.
|
||||
type Meta struct {
|
||||
TermsOfService string `json:"termsOfService,omitempty"`
|
||||
Website string `json:"website,omitempty"`
|
||||
CAAIdentities []string `json:"caaIdentities,omitempty"`
|
||||
ExternalAccountRequired bool `json:"externalAccountRequired,omitempty"`
|
||||
}
|
||||
|
||||
// BuildDirectory constructs the per-profile directory document.
|
||||
//
|
||||
// baseURL is the per-profile base path (no trailing slash, e.g.
|
||||
// "https://certctl.example.com/acme/profile/prof-corp"). The default-
|
||||
// profile shorthand path passes the same baseURL — clients writing
|
||||
// their config against the shorthand naturally re-derive the per-
|
||||
// profile URLs from the directory.
|
||||
//
|
||||
// All five canonical RFC 8555 endpoints are populated; renewalInfo is
|
||||
// populated only when ARIEnabled=true so Phase 1a (where ARI is
|
||||
// non-functional) doesn't advertise a 404-returning URL. ARI flips on
|
||||
// in Phase 4 along with the actual handler.
|
||||
func BuildDirectory(baseURL, tos, website string, caa []string, eabRequired, ariEnabled bool) *Directory {
|
||||
dir := &Directory{
|
||||
NewNonce: fmt.Sprintf("%s/new-nonce", baseURL),
|
||||
NewAccount: fmt.Sprintf("%s/new-account", baseURL),
|
||||
NewOrder: fmt.Sprintf("%s/new-order", baseURL),
|
||||
RevokeCert: fmt.Sprintf("%s/revoke-cert", baseURL),
|
||||
KeyChange: fmt.Sprintf("%s/key-change", baseURL),
|
||||
}
|
||||
if ariEnabled {
|
||||
// RFC 9773 §4.1 publishes ARI as `renewalInfo`. Phase 4 wires
|
||||
// the actual handler; until then, BuildDirectory callers pass
|
||||
// ariEnabled=false.
|
||||
dir.RenewalInfo = fmt.Sprintf("%s/renewal-info", baseURL)
|
||||
}
|
||||
if tos != "" || website != "" || len(caa) > 0 || eabRequired {
|
||||
dir.Meta = &Meta{
|
||||
TermsOfService: tos,
|
||||
Website: website,
|
||||
CAAIdentities: caa,
|
||||
ExternalAccountRequired: eabRequired,
|
||||
}
|
||||
}
|
||||
return dir
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBuildDirectory_FullMeta(t *testing.T) {
|
||||
d := BuildDirectory(
|
||||
"https://server/acme/profile/prof-corp",
|
||||
"https://example.com/tos",
|
||||
"https://example.com",
|
||||
[]string{"example.com"},
|
||||
true,
|
||||
false,
|
||||
)
|
||||
if got, want := d.NewNonce, "https://server/acme/profile/prof-corp/new-nonce"; got != want {
|
||||
t.Errorf("NewNonce = %q, want %q", got, want)
|
||||
}
|
||||
if got, want := d.NewAccount, "https://server/acme/profile/prof-corp/new-account"; got != want {
|
||||
t.Errorf("NewAccount = %q, want %q", got, want)
|
||||
}
|
||||
if got, want := d.NewOrder, "https://server/acme/profile/prof-corp/new-order"; got != want {
|
||||
t.Errorf("NewOrder = %q, want %q", got, want)
|
||||
}
|
||||
if got, want := d.RevokeCert, "https://server/acme/profile/prof-corp/revoke-cert"; got != want {
|
||||
t.Errorf("RevokeCert = %q, want %q", got, want)
|
||||
}
|
||||
if got, want := d.KeyChange, "https://server/acme/profile/prof-corp/key-change"; got != want {
|
||||
t.Errorf("KeyChange = %q, want %q", got, want)
|
||||
}
|
||||
if d.RenewalInfo != "" {
|
||||
t.Errorf("RenewalInfo should be empty when ariEnabled=false; got %q", d.RenewalInfo)
|
||||
}
|
||||
if d.Meta == nil {
|
||||
t.Fatal("Meta should be populated when any meta field is set")
|
||||
}
|
||||
if d.Meta.TermsOfService != "https://example.com/tos" {
|
||||
t.Errorf("TermsOfService = %q", d.Meta.TermsOfService)
|
||||
}
|
||||
if d.Meta.Website != "https://example.com" {
|
||||
t.Errorf("Website = %q", d.Meta.Website)
|
||||
}
|
||||
if !d.Meta.ExternalAccountRequired {
|
||||
t.Error("ExternalAccountRequired should be true")
|
||||
}
|
||||
if len(d.Meta.CAAIdentities) != 1 || d.Meta.CAAIdentities[0] != "example.com" {
|
||||
t.Errorf("CAAIdentities = %v", d.Meta.CAAIdentities)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDirectory_NoMeta(t *testing.T) {
|
||||
d := BuildDirectory("https://server/acme/profile/prof-corp", "", "", nil, false, false)
|
||||
if d.Meta != nil {
|
||||
t.Errorf("Meta should be nil when all meta fields zero; got %+v", d.Meta)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDirectory_EABRequiredOnly(t *testing.T) {
|
||||
d := BuildDirectory("https://server/acme/profile/prof-corp", "", "", nil, true, false)
|
||||
if d.Meta == nil {
|
||||
t.Fatal("Meta should be populated when EAB is required")
|
||||
}
|
||||
if !d.Meta.ExternalAccountRequired {
|
||||
t.Error("ExternalAccountRequired should be true")
|
||||
}
|
||||
if d.Meta.TermsOfService != "" || d.Meta.Website != "" || len(d.Meta.CAAIdentities) != 0 {
|
||||
t.Errorf("only EAB should be set; meta = %+v", d.Meta)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDirectory_ARIEnabled(t *testing.T) {
|
||||
d := BuildDirectory("https://server/acme/profile/prof-corp", "", "", nil, false, true)
|
||||
if d.RenewalInfo == "" {
|
||||
t.Fatal("RenewalInfo should be populated when ariEnabled=true")
|
||||
}
|
||||
if !strings.HasSuffix(d.RenewalInfo, "/renewal-info") {
|
||||
t.Errorf("RenewalInfo = %q; expected suffix /renewal-info", d.RenewalInfo)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDirectory_JSONShape(t *testing.T) {
|
||||
// RFC 8555 §7.1.1 dictates the JSON field names. A regression here
|
||||
// would break every ACME client.
|
||||
d := BuildDirectory("https://server/acme/profile/prof-corp", "", "", nil, false, false)
|
||||
b, err := json.Marshal(d)
|
||||
if err != nil {
|
||||
t.Fatalf("Marshal: %v", err)
|
||||
}
|
||||
got := string(b)
|
||||
for _, want := range []string{
|
||||
`"newNonce":"https://server/acme/profile/prof-corp/new-nonce"`,
|
||||
`"newAccount":"https://server/acme/profile/prof-corp/new-account"`,
|
||||
`"newOrder":"https://server/acme/profile/prof-corp/new-order"`,
|
||||
`"revokeCert":"https://server/acme/profile/prof-corp/revoke-cert"`,
|
||||
`"keyChange":"https://server/acme/profile/prof-corp/key-change"`,
|
||||
} {
|
||||
if !strings.Contains(got, want) {
|
||||
t.Errorf("JSON missing %q\nGot: %s", want, got)
|
||||
}
|
||||
}
|
||||
// renewalInfo + meta should be omitted.
|
||||
if strings.Contains(got, "renewalInfo") {
|
||||
t.Errorf("renewalInfo should be omitted when ARI disabled; got %s", got)
|
||||
}
|
||||
if strings.Contains(got, `"meta"`) {
|
||||
t.Errorf("meta should be omitted when no fields set; got %s", got)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
// ProblemContentType is the MIME type RFC 7807 §3 mandates for the
|
||||
// JSON-Problem error envelope. ACME inherits this from RFC 8555 §6.7.
|
||||
const ProblemContentType = "application/problem+json"
|
||||
|
||||
// ACME error type URN prefix per RFC 8555 §6.7.
|
||||
const acmeErrorPrefix = "urn:ietf:params:acme:error:"
|
||||
|
||||
// Problem is the RFC 7807 Problem Details document. ACME extends it
|
||||
// per RFC 8555 §6.7 with subproblems (per-identifier-rejection
|
||||
// breakdowns) and identifier (the failing identifier on
|
||||
// rejectedIdentifier). Both extension fields land in Phase 2 along
|
||||
// with the order endpoints; Phase 1a only emits the base shape.
|
||||
type Problem struct {
|
||||
Type string `json:"type"`
|
||||
Detail string `json:"detail"`
|
||||
Status int `json:"status"`
|
||||
Subproblems []Problem `json:"subproblems,omitempty"`
|
||||
Identifier *Identifier `json:"identifier,omitempty"`
|
||||
}
|
||||
|
||||
// Identifier is the ACME identifier shape (RFC 8555 §7.4). Defined here
|
||||
// (rather than in a Phase-2-only file) so Phase 1a's Problem struct can
|
||||
// reference *Identifier without a forward-package-dependency.
|
||||
type Identifier struct {
|
||||
Type string `json:"type"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
// Malformed is RFC 8555 §6.7's "request body did not parse / decode" /
|
||||
// "the JWS was malformed" / "payload JSON was malformed" error. HTTP
|
||||
// status 400.
|
||||
func Malformed(detail string) Problem {
|
||||
return Problem{
|
||||
Type: acmeErrorPrefix + "malformed",
|
||||
Detail: detail,
|
||||
Status: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
|
||||
// ServerInternal is the catch-all for unexpected server-side errors.
|
||||
// HTTP status 500. The detail string is operator-facing; per the
|
||||
// master prompt's acquisition-readiness criterion #10 it MUST NOT
|
||||
// echo SQL errors, internal trace IDs, or credential bytes.
|
||||
func ServerInternal(detail string) Problem {
|
||||
return Problem{
|
||||
Type: acmeErrorPrefix + "serverInternal",
|
||||
Detail: detail,
|
||||
Status: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
|
||||
// UserActionRequired is RFC 8555 §6.7's "the user has to do something
|
||||
// out of band before this request will succeed" error. We return it
|
||||
// from the /acme/* shorthand path family when
|
||||
// CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID is not set — the operator
|
||||
// has to either set the env var or update the client to use
|
||||
// /acme/profile/<id>/*. HTTP status 403 per RFC 8555.
|
||||
func UserActionRequired(detail string) Problem {
|
||||
return Problem{
|
||||
Type: acmeErrorPrefix + "userActionRequired",
|
||||
Detail: detail,
|
||||
Status: http.StatusForbidden,
|
||||
}
|
||||
}
|
||||
|
||||
// UnsupportedContentType is RFC 7807-shaped (no ACME error type) for
|
||||
// requests with a Content-Type the endpoint doesn't accept. Phase 1b
|
||||
// will switch the JWS endpoints to require
|
||||
// "application/jose+json" specifically; Phase 1a's directory + nonce
|
||||
// have no Content-Type requirements and never emit this.
|
||||
func UnsupportedContentType(got string) Problem {
|
||||
return Problem{
|
||||
Type: "about:blank",
|
||||
Detail: "unsupported content type: " + got,
|
||||
Status: http.StatusUnsupportedMediaType,
|
||||
}
|
||||
}
|
||||
|
||||
// AccountDoesNotExist (RFC 8555 §7.3.1) is what the JWS verifier returns
|
||||
// when the request's `kid` points at an unknown account. Phase 1b
|
||||
// implements the verifier; this shape is exposed in Phase 1a for the
|
||||
// errors_test.go round-trip cases.
|
||||
func AccountDoesNotExist(detail string) Problem {
|
||||
return Problem{
|
||||
Type: acmeErrorPrefix + "accountDoesNotExist",
|
||||
Detail: detail,
|
||||
Status: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
|
||||
// BadNonce is what the JWS verifier returns on a missing / replayed /
|
||||
// expired nonce per RFC 8555 §6.5.1. Phase 1b wires the verifier;
|
||||
// shape exposed now so errors_test.go can round-trip it.
|
||||
func BadNonce(detail string) Problem {
|
||||
return Problem{
|
||||
Type: acmeErrorPrefix + "badNonce",
|
||||
Detail: detail,
|
||||
Status: http.StatusBadRequest,
|
||||
}
|
||||
}
|
||||
|
||||
// WriteProblem renders a Problem as RFC 7807 JSON to w, with the
|
||||
// appropriate Content-Type and status. Any nil-Problem is rendered as
|
||||
// 500 + serverInternal so the handler never panics on a forgotten
|
||||
// error path.
|
||||
func WriteProblem(w http.ResponseWriter, p Problem) {
|
||||
if p.Status == 0 {
|
||||
p = ServerInternal("unspecified error")
|
||||
}
|
||||
w.Header().Set("Content-Type", ProblemContentType)
|
||||
w.WriteHeader(p.Status)
|
||||
// Marshaling can only fail on un-encodable types; Problem only
|
||||
// uses primitives + slices so json.Marshal cannot fail. The
|
||||
// _ = ... discard mirrors how response.go handles json.Encoder
|
||||
// errors.
|
||||
_ = json.NewEncoder(w).Encode(p)
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestProblem_Malformed_Shape(t *testing.T) {
|
||||
p := Malformed("payload was not valid JSON")
|
||||
if p.Status != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want %d", p.Status, http.StatusBadRequest)
|
||||
}
|
||||
if p.Type != "urn:ietf:params:acme:error:malformed" {
|
||||
t.Errorf("type = %q", p.Type)
|
||||
}
|
||||
if p.Detail != "payload was not valid JSON" {
|
||||
t.Errorf("detail = %q", p.Detail)
|
||||
}
|
||||
// Subproblems and Identifier are Phase-2 extensions; both stay empty
|
||||
// for a Phase-1a-emitted problem.
|
||||
if len(p.Subproblems) != 0 {
|
||||
t.Errorf("subproblems should be empty; got %v", p.Subproblems)
|
||||
}
|
||||
if p.Identifier != nil {
|
||||
t.Errorf("identifier should be nil; got %+v", p.Identifier)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProblem_AllHelperShapes(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
p Problem
|
||||
wantType string
|
||||
wantStatus int
|
||||
}{
|
||||
{"Malformed", Malformed("x"), "urn:ietf:params:acme:error:malformed", http.StatusBadRequest},
|
||||
{"ServerInternal", ServerInternal("x"), "urn:ietf:params:acme:error:serverInternal", http.StatusInternalServerError},
|
||||
{"UserActionRequired", UserActionRequired("x"), "urn:ietf:params:acme:error:userActionRequired", http.StatusForbidden},
|
||||
{"AccountDoesNotExist", AccountDoesNotExist("x"), "urn:ietf:params:acme:error:accountDoesNotExist", http.StatusBadRequest},
|
||||
{"BadNonce", BadNonce("x"), "urn:ietf:params:acme:error:badNonce", http.StatusBadRequest},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if tc.p.Type != tc.wantType {
|
||||
t.Errorf("type = %q, want %q", tc.p.Type, tc.wantType)
|
||||
}
|
||||
if tc.p.Status != tc.wantStatus {
|
||||
t.Errorf("status = %d, want %d", tc.p.Status, tc.wantStatus)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProblem_UnsupportedContentType(t *testing.T) {
|
||||
p := UnsupportedContentType("application/json")
|
||||
if p.Status != http.StatusUnsupportedMediaType {
|
||||
t.Errorf("status = %d, want 415", p.Status)
|
||||
}
|
||||
if p.Type != "about:blank" {
|
||||
t.Errorf("UnsupportedContentType uses RFC 7807 about:blank; got %q", p.Type)
|
||||
}
|
||||
if !strings.Contains(p.Detail, "application/json") {
|
||||
t.Errorf("detail should echo content-type; got %q", p.Detail)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteProblem_Headers(t *testing.T) {
|
||||
rec := httptest.NewRecorder()
|
||||
WriteProblem(rec, Malformed("oops"))
|
||||
|
||||
if got, want := rec.Code, http.StatusBadRequest; got != want {
|
||||
t.Errorf("status = %d, want %d", got, want)
|
||||
}
|
||||
if got, want := rec.Header().Get("Content-Type"), ProblemContentType; got != want {
|
||||
t.Errorf("content-type = %q, want %q", got, want)
|
||||
}
|
||||
|
||||
var p Problem
|
||||
if err := json.NewDecoder(rec.Body).Decode(&p); err != nil {
|
||||
t.Fatalf("Decode: %v", err)
|
||||
}
|
||||
if p.Type != "urn:ietf:params:acme:error:malformed" {
|
||||
t.Errorf("decoded type = %q", p.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteProblem_NilStatusFallsBackTo500(t *testing.T) {
|
||||
// Defensive check: a hand-constructed Problem with Status=0 (e.g.
|
||||
// from a forgotten error path) still renders cleanly as 500 +
|
||||
// serverInternal rather than emitting an HTTP/0 response.
|
||||
rec := httptest.NewRecorder()
|
||||
WriteProblem(rec, Problem{})
|
||||
|
||||
if got, want := rec.Code, http.StatusInternalServerError; got != want {
|
||||
t.Errorf("status = %d, want %d", got, want)
|
||||
}
|
||||
if got, want := rec.Header().Get("Content-Type"), ProblemContentType; got != want {
|
||||
t.Errorf("content-type = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,487 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
jose "github.com/go-jose/go-jose/v4"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// AllowedSignatureAlgorithms is the closed allow-list per RFC 8555 §6.2.
|
||||
// ParseSigned takes this slice and rejects every other algorithm —
|
||||
// in particular HS256 (symmetric — RFC 8555 forbids) and "none"
|
||||
// (RFC 7515 §6.1 — alg confusion attack).
|
||||
//
|
||||
// Order is not load-bearing; the slice is value-copied by go-jose.
|
||||
var AllowedSignatureAlgorithms = []jose.SignatureAlgorithm{
|
||||
jose.RS256,
|
||||
jose.ES256,
|
||||
jose.EdDSA,
|
||||
}
|
||||
|
||||
// JWS-verifier sentinel errors. Each maps to an RFC 8555 §6.7
|
||||
// problem type via mapJWSError below; handlers render via
|
||||
// WriteProblem(w, p) on err.
|
||||
var (
|
||||
ErrJWSMalformed = errors.New("acme jws: malformed")
|
||||
ErrJWSWrongType = errors.New("acme jws: protected header `typ` must be `application/jose+json` or absent")
|
||||
ErrJWSAlgorithmRejected = errors.New("acme jws: signature algorithm not in {RS256, ES256, EdDSA}")
|
||||
ErrJWSMissingNonce = errors.New("acme jws: protected header `nonce` is required")
|
||||
ErrJWSBadNonce = errors.New("acme jws: nonce missing, replayed, or expired")
|
||||
ErrJWSMissingURL = errors.New("acme jws: protected header `url` is required")
|
||||
ErrJWSURLMismatch = errors.New("acme jws: protected header `url` does not match request URL")
|
||||
ErrJWSBothKidAndJWK = errors.New("acme jws: protected header MUST contain exactly one of `kid` or `jwk`")
|
||||
ErrJWSNeitherKidNorJWK = errors.New("acme jws: protected header MUST contain exactly one of `kid` or `jwk`")
|
||||
ErrJWSExpectKidGotJWK = errors.New("acme jws: this endpoint requires `kid` (registered account); got `jwk`")
|
||||
ErrJWSExpectJWKGotKid = errors.New("acme jws: this endpoint requires `jwk` (new account); got `kid`")
|
||||
ErrJWSInvalidJWK = errors.New("acme jws: embedded JWK is invalid")
|
||||
ErrJWSSignatureInvalid = errors.New("acme jws: signature did not verify")
|
||||
ErrJWSPayloadMismatch = errors.New("acme jws: post-verify payload differs from pre-verify payload")
|
||||
ErrJWSAccountNotFound = errors.New("acme jws: kid points at unknown account")
|
||||
ErrJWSAccountInactive = errors.New("acme jws: account status is not `valid`")
|
||||
)
|
||||
|
||||
// VerifiedRequest is the JWS-verified envelope a handler hands to its
|
||||
// service-layer entry point. Fields are populated based on the auth
|
||||
// path: `kid` requests carry Account (and AccountKey is the registered
|
||||
// JWK); `jwk` requests (new-account only) carry JWK.
|
||||
//
|
||||
// Payload is the bytes the JWS signed — the handler json.Unmarshals
|
||||
// into the per-endpoint payload struct.
|
||||
type VerifiedRequest struct {
|
||||
// Payload is the signed body bytes (post-Verify).
|
||||
Payload []byte
|
||||
// Algorithm is the negotiated alg (RS256 / ES256 / EdDSA), echoed
|
||||
// from sig.Protected.Algorithm post-allow-list-check.
|
||||
Algorithm string
|
||||
// URL is the protected-header `url` value, asserted equal to the
|
||||
// inbound request URL.
|
||||
URL string
|
||||
// Nonce is the protected-header `nonce` value, asserted consumed
|
||||
// from the nonce store.
|
||||
Nonce string
|
||||
// Account is non-nil on the `kid` path (registered account
|
||||
// authenticating). Always nil on the `jwk` path.
|
||||
Account *domain.ACMEAccount
|
||||
// JWK is non-nil on the `jwk` path (new-account flow). Always nil
|
||||
// on the `kid` path.
|
||||
JWK *jose.JSONWebKey
|
||||
}
|
||||
|
||||
// AccountLookup is the minimum surface VerifyJWS needs to resolve a
|
||||
// `kid` request's account. The repository layer satisfies this; tests
|
||||
// inject in-memory fakes.
|
||||
type AccountLookup interface {
|
||||
// LookupAccount returns the account by ID. Returns
|
||||
// ErrJWSAccountNotFound if the row doesn't exist.
|
||||
LookupAccount(accountID string) (*domain.ACMEAccount, error)
|
||||
}
|
||||
|
||||
// NonceConsumer is the minimum surface the verifier needs to consume
|
||||
// the protected-header `nonce`. Returns nil on success, or an error
|
||||
// (typically sql.ErrNoRows from the postgres repo) on missing /
|
||||
// replayed / expired. The verifier wraps any non-nil error in
|
||||
// ErrJWSBadNonce so handlers don't need to distinguish.
|
||||
type NonceConsumer interface {
|
||||
ConsumeNonce(nonce string) error
|
||||
}
|
||||
|
||||
// VerifierConfig wires the verifier's runtime dependencies + policy.
|
||||
// Constructed by the handler/service layer once at startup; one
|
||||
// instance per ACMEService is sufficient.
|
||||
type VerifierConfig struct {
|
||||
// Accounts looks up registered accounts on the kid path.
|
||||
Accounts AccountLookup
|
||||
// Nonces consumes the protected-header nonce.
|
||||
Nonces NonceConsumer
|
||||
// AccountKID returns the canonical kid URL the server expects
|
||||
// inbound requests to use for a given account ID. The verifier
|
||||
// asserts the request's `kid` matches what AccountKID(acct.ID)
|
||||
// produces — this prevents a stolen account-id-from-one-server
|
||||
// from being replayed against another. The handler computes
|
||||
// the URL from the inbound request's scheme + host + profile.
|
||||
AccountKID func(accountID string) string
|
||||
}
|
||||
|
||||
// VerifyOptions bound a single verify call. ExpectNewAccount inverts
|
||||
// the kid-vs-jwk default: new-account demands jwk, every other
|
||||
// endpoint demands kid.
|
||||
type VerifyOptions struct {
|
||||
// ExpectNewAccount=true means "expect jwk in the protected header,
|
||||
// reject kid." Used by /new-account.
|
||||
// ExpectNewAccount=false means "expect kid in the protected header,
|
||||
// reject jwk." Used by everything else.
|
||||
ExpectNewAccount bool
|
||||
}
|
||||
|
||||
// VerifyJWS is the canonical entry point. It enforces:
|
||||
//
|
||||
// 1. Body parses as a flattened JWS with exactly one signature
|
||||
// (RFC 8555 §6.2 forbids multi-sig).
|
||||
// 2. Algorithm is in the {RS256, ES256, EdDSA} allow-list.
|
||||
// 3. Protected header carries exactly one of `kid` / `jwk` per
|
||||
// ExpectNewAccount.
|
||||
// 4. Protected header carries `url` matching the inbound request URL
|
||||
// exactly.
|
||||
// 5. Protected header carries `nonce` that consumes successfully
|
||||
// against the nonce store (badNonce on miss/replay/expiry).
|
||||
// 6. Signature verifies against the resolved key (registered
|
||||
// account's stored JWK on kid path; embedded jwk on jwk path).
|
||||
// 7. Post-verify payload bytes equal pre-verify
|
||||
// UnsafePayloadWithoutVerification (defense in depth — go-jose
|
||||
// guarantees this, but assert anyway).
|
||||
//
|
||||
// On success returns VerifiedRequest; the handler json.Unmarshals
|
||||
// Payload into the per-endpoint payload struct.
|
||||
//
|
||||
// The `requestURL` argument is what the handler computed from the
|
||||
// inbound *http.Request (scheme + host + path). VerifyJWS does NOT
|
||||
// see r itself — keeping net/http out of the package surface lets
|
||||
// the verifier be tested without httptest.
|
||||
func VerifyJWS(cfg VerifierConfig, body []byte, requestURL string, opts VerifyOptions) (*VerifiedRequest, error) {
|
||||
jws, err := jose.ParseSigned(string(body), AllowedSignatureAlgorithms)
|
||||
if err != nil {
|
||||
// ParseSigned errors lump together "wrong format" and "alg
|
||||
// not in allow-list." Both are operator-meaningful as
|
||||
// "malformed" — the alg case is not exploitable by leaking
|
||||
// the allow-list.
|
||||
return nil, fmt.Errorf("%w: %v", ErrJWSMalformed, err)
|
||||
}
|
||||
// RFC 8555 §6.2: ACME forbids JWS multi-signature. Reject anything
|
||||
// other than exactly one signature so a maliciously-crafted
|
||||
// multi-sig blob can't trigger ambiguous downstream behavior.
|
||||
if len(jws.Signatures) != 1 {
|
||||
return nil, fmt.Errorf("%w: multi-signature JWS rejected", ErrJWSMalformed)
|
||||
}
|
||||
sig := jws.Signatures[0]
|
||||
|
||||
// Defense-in-depth: ParseSigned rejected non-allow-list algs
|
||||
// already, but a corrupted Signatures slice could still slip
|
||||
// through. Verify the field directly.
|
||||
if !algorithmAllowed(sig.Protected.Algorithm) {
|
||||
return nil, fmt.Errorf("%w: %s", ErrJWSAlgorithmRejected, sig.Protected.Algorithm)
|
||||
}
|
||||
|
||||
// Protected-header `typ` (RFC 8555 §6.2): when present, must be
|
||||
// "application/jose+json". Many ACME clients (including
|
||||
// cert-manager) omit it; treat absent as OK.
|
||||
if typ := sig.Protected.ExtraHeaders[jose.HeaderKey("typ")]; typ != nil {
|
||||
typStr, ok := typ.(string)
|
||||
if !ok || (typStr != "application/jose+json" && typStr != "") {
|
||||
return nil, fmt.Errorf("%w: got %q", ErrJWSWrongType, typ)
|
||||
}
|
||||
}
|
||||
|
||||
// Protected-header `url` is mandatory per RFC 8555 §6.4. Compare
|
||||
// to the inbound request URL exactly (scheme+host+path); a
|
||||
// mismatch indicates either a bug in the client or an attempt to
|
||||
// replay a JWS signed for a different URL.
|
||||
urlVal, err := extractStringHeader(sig.Protected.ExtraHeaders, "url")
|
||||
if err != nil {
|
||||
return nil, ErrJWSMissingURL
|
||||
}
|
||||
if urlVal == "" {
|
||||
return nil, ErrJWSMissingURL
|
||||
}
|
||||
if urlVal != requestURL {
|
||||
return nil, fmt.Errorf("%w: header=%q request=%q", ErrJWSURLMismatch, urlVal, requestURL)
|
||||
}
|
||||
|
||||
// Protected-header `nonce` is mandatory (RFC 8555 §6.5). Check
|
||||
// it BEFORE running Verify — if the nonce is bad we don't want to
|
||||
// burn CPU on signature verification.
|
||||
nonce := sig.Protected.Nonce
|
||||
if nonce == "" {
|
||||
return nil, ErrJWSMissingNonce
|
||||
}
|
||||
if err := cfg.Nonces.ConsumeNonce(nonce); err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrJWSBadNonce, err)
|
||||
}
|
||||
|
||||
// Protected header MUST contain exactly one of kid / jwk per
|
||||
// RFC 8555 §6.2. Both-set or neither-set are rejected.
|
||||
hasKid := sig.Protected.KeyID != ""
|
||||
hasJWK := sig.Protected.JSONWebKey != nil
|
||||
if hasKid && hasJWK {
|
||||
return nil, ErrJWSBothKidAndJWK
|
||||
}
|
||||
if !hasKid && !hasJWK {
|
||||
return nil, ErrJWSNeitherKidNorJWK
|
||||
}
|
||||
|
||||
// Per-endpoint kid-vs-jwk policy.
|
||||
if opts.ExpectNewAccount && hasKid {
|
||||
return nil, ErrJWSExpectJWKGotKid
|
||||
}
|
||||
if !opts.ExpectNewAccount && hasJWK {
|
||||
return nil, ErrJWSExpectKidGotJWK
|
||||
}
|
||||
|
||||
// Resolve the verification key and (kid path) the corresponding
|
||||
// account row.
|
||||
var (
|
||||
verifyKey interface{}
|
||||
account *domain.ACMEAccount
|
||||
jwkOut *jose.JSONWebKey
|
||||
)
|
||||
if hasKid {
|
||||
accountID, err := accountIDFromKID(sig.Protected.KeyID, cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
acct, err := cfg.Accounts.LookupAccount(accountID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if acct.Status != domain.ACMEAccountStatusValid {
|
||||
return nil, fmt.Errorf("%w: status=%s", ErrJWSAccountInactive, acct.Status)
|
||||
}
|
||||
// The account's stored JWK is what we verify against. The
|
||||
// JWKPEM round-trips through ParseJWKFromPEM; tests inject
|
||||
// pre-parsed keys to keep the unit suite hermetic.
|
||||
key, err := ParseJWKFromPEM(acct.JWKPEM)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrJWSInvalidJWK, err)
|
||||
}
|
||||
verifyKey = key.Key
|
||||
account = acct
|
||||
} else {
|
||||
jwk := sig.Protected.JSONWebKey
|
||||
if !jwk.Valid() {
|
||||
return nil, ErrJWSInvalidJWK
|
||||
}
|
||||
verifyKey = jwk.Key
|
||||
jwkOut = jwk
|
||||
}
|
||||
|
||||
// Run the actual signature verification. go-jose returns the
|
||||
// post-verify payload bytes; we sanity-check them against the
|
||||
// pre-verify view.
|
||||
verified, err := jws.Verify(verifyKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrJWSSignatureInvalid, err)
|
||||
}
|
||||
preVerify := jws.UnsafePayloadWithoutVerification()
|
||||
if string(verified) != string(preVerify) {
|
||||
// Should be impossible under correct go-jose use; fail loudly.
|
||||
return nil, ErrJWSPayloadMismatch
|
||||
}
|
||||
|
||||
return &VerifiedRequest{
|
||||
Payload: verified,
|
||||
Algorithm: sig.Protected.Algorithm,
|
||||
URL: urlVal,
|
||||
Nonce: nonce,
|
||||
Account: account,
|
||||
JWK: jwkOut,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// MapJWSErrorToProblem renders a JWS verifier error as an RFC 7807 +
|
||||
// RFC 8555 §6.7 Problem the handler emits via WriteProblem.
|
||||
//
|
||||
// All errors map to a documented ACME error type — no internal-state
|
||||
// leakage per master-prompt criterion #10. Operator-actionable detail
|
||||
// strings carry the failure category (badNonce, malformed, etc.) but
|
||||
// not raw err.Error() output.
|
||||
func MapJWSErrorToProblem(err error) Problem {
|
||||
switch {
|
||||
case errors.Is(err, ErrJWSBadNonce):
|
||||
return BadNonce("nonce missing, replayed, or expired")
|
||||
case errors.Is(err, ErrJWSMissingNonce):
|
||||
return BadNonce("protected header `nonce` is required")
|
||||
case errors.Is(err, ErrJWSURLMismatch), errors.Is(err, ErrJWSMissingURL):
|
||||
return Problem{
|
||||
Type: "urn:ietf:params:acme:error:unauthorized",
|
||||
Detail: "protected header `url` mismatch or missing",
|
||||
Status: http.StatusUnauthorized,
|
||||
}
|
||||
case errors.Is(err, ErrJWSAccountNotFound):
|
||||
return AccountDoesNotExist("kid points at unknown account")
|
||||
case errors.Is(err, ErrJWSAccountInactive):
|
||||
return Problem{
|
||||
Type: "urn:ietf:params:acme:error:unauthorized",
|
||||
Detail: "account status is not `valid`",
|
||||
Status: http.StatusUnauthorized,
|
||||
}
|
||||
case errors.Is(err, ErrJWSSignatureInvalid):
|
||||
return Problem{
|
||||
Type: "urn:ietf:params:acme:error:unauthorized",
|
||||
Detail: "signature did not verify",
|
||||
Status: http.StatusUnauthorized,
|
||||
}
|
||||
case errors.Is(err, ErrJWSAlgorithmRejected):
|
||||
return Malformed("signature algorithm not allowed (RFC 8555 §6.2: RS256, ES256, EdDSA only)")
|
||||
case errors.Is(err, ErrJWSExpectJWKGotKid):
|
||||
return Malformed("this endpoint requires `jwk` (new-account flow); got `kid`")
|
||||
case errors.Is(err, ErrJWSExpectKidGotJWK):
|
||||
return Malformed("this endpoint requires `kid` (registered account); got `jwk`")
|
||||
case errors.Is(err, ErrJWSBothKidAndJWK), errors.Is(err, ErrJWSNeitherKidNorJWK):
|
||||
return Malformed("protected header MUST contain exactly one of `kid` or `jwk`")
|
||||
case errors.Is(err, ErrJWSInvalidJWK):
|
||||
return Malformed("invalid or unsupported JWK")
|
||||
case errors.Is(err, ErrJWSWrongType):
|
||||
return Malformed("protected header `typ` must be `application/jose+json`")
|
||||
case errors.Is(err, ErrJWSPayloadMismatch):
|
||||
return ServerInternal("JWS payload integrity check failed")
|
||||
case errors.Is(err, ErrJWSMalformed):
|
||||
return Malformed("malformed JWS")
|
||||
default:
|
||||
return Malformed("malformed request")
|
||||
}
|
||||
}
|
||||
|
||||
// algorithmAllowed verifies the post-parse algorithm is in the
|
||||
// approved set. ParseSigned already rejects non-allow-list algs but
|
||||
// re-checking here protects against go-jose contract changes.
|
||||
func algorithmAllowed(alg string) bool {
|
||||
for _, a := range AllowedSignatureAlgorithms {
|
||||
if string(a) == alg {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// extractStringHeader pulls a string-typed entry from ExtraHeaders.
|
||||
// Returns ("", nil) when the key is absent so the caller can
|
||||
// distinguish absent (empty string) from non-string-shaped (error).
|
||||
func extractStringHeader(extra map[jose.HeaderKey]interface{}, name string) (string, error) {
|
||||
v, ok := extra[jose.HeaderKey(name)]
|
||||
if !ok {
|
||||
return "", nil
|
||||
}
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("acme jws: header %q is not a string: %T", name, v)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// accountIDFromKID extracts the account ID from a kid URL. RFC 8555
|
||||
// §6.2 says kid is the URL the server returned in the Location
|
||||
// header on new-account; we expect the canonical
|
||||
//
|
||||
// <scheme>://<host>/acme/profile/<profile-id>/account/<account-id>
|
||||
//
|
||||
// shape and trust the verifier-config-supplied AccountKID to round-
|
||||
// trip the full URL match. Phase 1b: extract the account ID by
|
||||
// trimming the URL prefix; Phase 1b's caller asserts the round-trip
|
||||
// equals the original kid.
|
||||
func accountIDFromKID(kid string, cfg VerifierConfig) (string, error) {
|
||||
// Trim off everything up to the last "/account/" — the suffix is
|
||||
// the account ID. The Phase-1b account-id format is
|
||||
// "acme-acc-<...>" (alphanumeric + hyphen), so we don't need to
|
||||
// URL-unescape.
|
||||
idx := strings.LastIndex(kid, "/account/")
|
||||
if idx < 0 {
|
||||
return "", fmt.Errorf("%w: kid does not match expected /account/<id> shape", ErrJWSMalformed)
|
||||
}
|
||||
accountID := kid[idx+len("/account/"):]
|
||||
if accountID == "" {
|
||||
return "", fmt.Errorf("%w: kid has empty account id", ErrJWSMalformed)
|
||||
}
|
||||
// Round-trip: confirm the canonical kid for this account-id
|
||||
// matches what the client sent. Catches accidental cross-profile
|
||||
// replay.
|
||||
if cfg.AccountKID != nil {
|
||||
expected := cfg.AccountKID(accountID)
|
||||
if expected != kid {
|
||||
return "", fmt.Errorf("%w: kid does not match canonical URL", ErrJWSMalformed)
|
||||
}
|
||||
}
|
||||
return accountID, nil
|
||||
}
|
||||
|
||||
// ParseJWKFromPEM parses a JWK previously serialized by JWKToPEM.
|
||||
// Used by the verifier on the kid path: the registered account row's
|
||||
// JWKPEM column round-trips through here to recover the key bytes
|
||||
// used for signature verification.
|
||||
//
|
||||
// The PEM block is JSON-encoded JWK (we use PEM as the wire format
|
||||
// for the column to keep the schema text-shaped + line-friendly for
|
||||
// SQL diffs). Block type is "ACME ACCOUNT JWK".
|
||||
func ParseJWKFromPEM(pemString string) (*jose.JSONWebKey, error) {
|
||||
// Strip the PEM header / footer; everything between is base64.
|
||||
const header = "-----BEGIN ACME ACCOUNT JWK-----"
|
||||
const footer = "-----END ACME ACCOUNT JWK-----"
|
||||
s := strings.TrimSpace(pemString)
|
||||
if !strings.HasPrefix(s, header) {
|
||||
return nil, fmt.Errorf("acme jws: pem missing header")
|
||||
}
|
||||
s = strings.TrimPrefix(s, header)
|
||||
idx := strings.Index(s, footer)
|
||||
if idx < 0 {
|
||||
return nil, fmt.Errorf("acme jws: pem missing footer")
|
||||
}
|
||||
body := strings.TrimSpace(s[:idx])
|
||||
body = strings.ReplaceAll(body, "\n", "")
|
||||
body = strings.ReplaceAll(body, "\r", "")
|
||||
raw, err := base64.StdEncoding.DecodeString(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("acme jws: decode pem body: %w", err)
|
||||
}
|
||||
jwk := new(jose.JSONWebKey)
|
||||
if err := jwk.UnmarshalJSON(raw); err != nil {
|
||||
return nil, fmt.Errorf("acme jws: parse jwk json: %w", err)
|
||||
}
|
||||
if !jwk.Valid() {
|
||||
return nil, fmt.Errorf("acme jws: jwk did not validate")
|
||||
}
|
||||
return jwk, nil
|
||||
}
|
||||
|
||||
// JWKToPEM is the inverse of ParseJWKFromPEM. Used at account creation
|
||||
// time to persist the public-only JWK to the acme_accounts row.
|
||||
func JWKToPEM(jwk *jose.JSONWebKey) (string, error) {
|
||||
raw, err := jwk.MarshalJSON()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("acme jws: marshal jwk json: %w", err)
|
||||
}
|
||||
encoded := base64.StdEncoding.EncodeToString(raw)
|
||||
// Wrap to 64-char lines for diff-friendliness.
|
||||
var buf strings.Builder
|
||||
buf.WriteString("-----BEGIN ACME ACCOUNT JWK-----\n")
|
||||
for i := 0; i < len(encoded); i += 64 {
|
||||
end := i + 64
|
||||
if end > len(encoded) {
|
||||
end = len(encoded)
|
||||
}
|
||||
buf.WriteString(encoded[i:end])
|
||||
buf.WriteByte('\n')
|
||||
}
|
||||
buf.WriteString("-----END ACME ACCOUNT JWK-----\n")
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
// JWKThumbprint computes the RFC 7638 thumbprint of jwk and returns
|
||||
// it as a base64url-no-padding string. The (profile_id, thumbprint)
|
||||
// pair uniquely identifies an account per profile; new-account uses
|
||||
// it for idempotency (RFC 8555 §7.3.1).
|
||||
func JWKThumbprint(jwk *jose.JSONWebKey) (string, error) {
|
||||
raw, err := jwk.Thumbprint(crypto.SHA256)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("acme jws: thumbprint: %w", err)
|
||||
}
|
||||
return base64.RawURLEncoding.EncodeToString(raw), nil
|
||||
}
|
||||
|
||||
// AccountID derives the canonical certctl account ID from a JWK
|
||||
// thumbprint: "acme-acc-" + base64url-no-pad-thumbprint. The output is
|
||||
// stable across clients (same JWK → same ID) so the new-account
|
||||
// idempotency check at RFC 8555 §7.3.1 holds without an additional
|
||||
// lookup.
|
||||
func AccountID(thumbprint string) string {
|
||||
// base64url-no-pad already produces alphanumeric + `-_`; we keep
|
||||
// `-_` as part of the certctl-readable prefix shape.
|
||||
return "acme-acc-" + thumbprint
|
||||
}
|
||||
@@ -0,0 +1,570 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"crypto/ecdsa"
|
||||
"crypto/ed25519"
|
||||
"crypto/elliptic"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
jose "github.com/go-jose/go-jose/v4"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// --- test fixtures + helpers --------------------------------------------
|
||||
|
||||
// stubAccounts implements AccountLookup with a static map.
|
||||
type stubAccounts struct {
|
||||
byID map[string]*domain.ACMEAccount
|
||||
}
|
||||
|
||||
func (s *stubAccounts) LookupAccount(accountID string) (*domain.ACMEAccount, error) {
|
||||
acct, ok := s.byID[accountID]
|
||||
if !ok {
|
||||
return nil, ErrJWSAccountNotFound
|
||||
}
|
||||
return acct, nil
|
||||
}
|
||||
|
||||
// stubNonces implements NonceConsumer with a one-shot map. Used == true
|
||||
// after first Consume.
|
||||
type stubNonces struct {
|
||||
known map[string]bool // nonce → consumed?
|
||||
}
|
||||
|
||||
func newStubNonces(nonces ...string) *stubNonces {
|
||||
s := &stubNonces{known: make(map[string]bool, len(nonces))}
|
||||
for _, n := range nonces {
|
||||
s.known[n] = false
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *stubNonces) ConsumeNonce(nonce string) error {
|
||||
used, ok := s.known[nonce]
|
||||
if !ok {
|
||||
return errors.New("not found")
|
||||
}
|
||||
if used {
|
||||
return errors.New("already used")
|
||||
}
|
||||
s.known[nonce] = true
|
||||
return nil
|
||||
}
|
||||
|
||||
const testKID = "https://server/acme/profile/prof-corp/account/acme-acc-test123"
|
||||
const testURL = "https://server/acme/profile/prof-corp/new-account"
|
||||
|
||||
func testAccountKID(accountID string) string {
|
||||
return "https://server/acme/profile/prof-corp/account/" + accountID
|
||||
}
|
||||
|
||||
// genRSAKey, genECKey, genEdKey return a freshly-generated keypair
|
||||
// suitable for signing JWS objects. Tests share the same key per-case
|
||||
// to keep failures localized to the verifier rather than cross-test
|
||||
// state.
|
||||
func genRSAKey(t *testing.T) *rsa.PrivateKey {
|
||||
t.Helper()
|
||||
k, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("rsa keygen: %v", err)
|
||||
}
|
||||
return k
|
||||
}
|
||||
|
||||
func genECKey(t *testing.T) *ecdsa.PrivateKey {
|
||||
t.Helper()
|
||||
k, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("ecdsa keygen: %v", err)
|
||||
}
|
||||
return k
|
||||
}
|
||||
|
||||
func genEdKey(t *testing.T) (ed25519.PublicKey, ed25519.PrivateKey) {
|
||||
t.Helper()
|
||||
pub, priv, err := ed25519.GenerateKey(rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("ed25519 keygen: %v", err)
|
||||
}
|
||||
return pub, priv
|
||||
}
|
||||
|
||||
// signWithKID builds a flattened JWS using kid (registered-account flow).
|
||||
func signWithKID(t *testing.T, key interface{}, alg jose.SignatureAlgorithm, kid, url, nonce string, payload interface{}) string {
|
||||
t.Helper()
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal payload: %v", err)
|
||||
}
|
||||
signer, err := jose.NewSigner(
|
||||
jose.SigningKey{Algorithm: alg, Key: key},
|
||||
(&jose.SignerOptions{}).
|
||||
WithHeader(jose.HeaderKey("url"), url).
|
||||
WithHeader("kid", kid).
|
||||
WithHeader("nonce", nonce),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("new signer: %v", err)
|
||||
}
|
||||
jws, err := signer.Sign(body)
|
||||
if err != nil {
|
||||
t.Fatalf("sign: %v", err)
|
||||
}
|
||||
out := jws.FullSerialize()
|
||||
return out
|
||||
}
|
||||
|
||||
// signWithJWK builds a flattened JWS embedding the public JWK
|
||||
// (new-account flow). The Signer with EmbedJWK=true attaches the
|
||||
// JSONWebKey to the protected header.
|
||||
func signWithJWK(t *testing.T, key interface{}, alg jose.SignatureAlgorithm, url, nonce string, payload interface{}) string {
|
||||
t.Helper()
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal payload: %v", err)
|
||||
}
|
||||
signer, err := jose.NewSigner(
|
||||
jose.SigningKey{Algorithm: alg, Key: key},
|
||||
(&jose.SignerOptions{EmbedJWK: true}).
|
||||
WithHeader(jose.HeaderKey("url"), url).
|
||||
WithHeader("nonce", nonce),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("new signer (embed jwk): %v", err)
|
||||
}
|
||||
jws, err := signer.Sign(body)
|
||||
if err != nil {
|
||||
t.Fatalf("sign: %v", err)
|
||||
}
|
||||
return jws.FullSerialize()
|
||||
}
|
||||
|
||||
// --- JWK round-trip helpers --------------------------------------------
|
||||
|
||||
func TestJWKRoundTrip_RSA(t *testing.T) {
|
||||
k := genRSAKey(t)
|
||||
jwk := &jose.JSONWebKey{Key: &k.PublicKey}
|
||||
pem, err := JWKToPEM(jwk)
|
||||
if err != nil {
|
||||
t.Fatalf("JWKToPEM: %v", err)
|
||||
}
|
||||
if !strings.Contains(pem, "BEGIN ACME ACCOUNT JWK") {
|
||||
t.Fatalf("PEM missing header: %s", pem)
|
||||
}
|
||||
parsed, err := ParseJWKFromPEM(pem)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseJWKFromPEM: %v", err)
|
||||
}
|
||||
if !parsed.Valid() {
|
||||
t.Fatal("parsed jwk is not valid")
|
||||
}
|
||||
}
|
||||
|
||||
func TestJWKThumbprint_StableAcrossKeyTypes(t *testing.T) {
|
||||
rsaJWK := &jose.JSONWebKey{Key: &genRSAKey(t).PublicKey}
|
||||
rsaThumb1, err := JWKThumbprint(rsaJWK)
|
||||
if err != nil {
|
||||
t.Fatalf("rsa thumb: %v", err)
|
||||
}
|
||||
rsaThumb2, err := JWKThumbprint(rsaJWK)
|
||||
if err != nil {
|
||||
t.Fatalf("rsa thumb 2: %v", err)
|
||||
}
|
||||
if rsaThumb1 != rsaThumb2 {
|
||||
t.Errorf("thumbprint not stable: %q vs %q", rsaThumb1, rsaThumb2)
|
||||
}
|
||||
// Different keys produce different thumbprints.
|
||||
otherJWK := &jose.JSONWebKey{Key: &genRSAKey(t).PublicKey}
|
||||
otherThumb, err := JWKThumbprint(otherJWK)
|
||||
if err != nil {
|
||||
t.Fatalf("other thumb: %v", err)
|
||||
}
|
||||
if rsaThumb1 == otherThumb {
|
||||
t.Error("two distinct keys collided on thumbprint")
|
||||
}
|
||||
}
|
||||
|
||||
// --- VerifyJWS happy paths ---------------------------------------------
|
||||
|
||||
func TestVerifyJWS_Happy_RS256_KID(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
jwk := &jose.JSONWebKey{Key: &key.PublicKey}
|
||||
pem, _ := JWKToPEM(jwk)
|
||||
thumb, _ := JWKThumbprint(jwk)
|
||||
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{byID: map[string]*domain.ACMEAccount{
|
||||
"acme-acc-test123": {
|
||||
AccountID: "acme-acc-test123", JWKPEM: pem, JWKThumbprint: thumb,
|
||||
Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp",
|
||||
},
|
||||
}},
|
||||
Nonces: newStubNonces("nonce-001"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithKID(t, key, jose.RS256, testKID, testURL, "nonce-001", map[string]any{"hello": "world"})
|
||||
|
||||
v, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{ExpectNewAccount: false})
|
||||
if err != nil {
|
||||
t.Fatalf("VerifyJWS: %v", err)
|
||||
}
|
||||
if v.Account == nil || v.Account.AccountID != "acme-acc-test123" {
|
||||
t.Errorf("account = %+v, want acme-acc-test123", v.Account)
|
||||
}
|
||||
if v.JWK != nil {
|
||||
t.Errorf("JWK should be nil on kid path; got %+v", v.JWK)
|
||||
}
|
||||
if v.Nonce != "nonce-001" {
|
||||
t.Errorf("nonce = %q", v.Nonce)
|
||||
}
|
||||
if v.URL != testURL {
|
||||
t.Errorf("url = %q", v.URL)
|
||||
}
|
||||
if v.Algorithm != "RS256" {
|
||||
t.Errorf("algorithm = %q", v.Algorithm)
|
||||
}
|
||||
var payload map[string]any
|
||||
if err := json.Unmarshal(v.Payload, &payload); err != nil {
|
||||
t.Fatalf("payload not json: %v", err)
|
||||
}
|
||||
if payload["hello"] != "world" {
|
||||
t.Errorf("payload = %+v", payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Happy_ES256_JWK(t *testing.T) {
|
||||
key := genECKey(t)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{},
|
||||
Nonces: newStubNonces("nonce-002"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithJWK(t, key, jose.ES256, testURL, "nonce-002", map[string]any{"new": "account"})
|
||||
v, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{ExpectNewAccount: true})
|
||||
if err != nil {
|
||||
t.Fatalf("VerifyJWS: %v", err)
|
||||
}
|
||||
if v.JWK == nil {
|
||||
t.Fatal("JWK should be populated on jwk path")
|
||||
}
|
||||
if v.Account != nil {
|
||||
t.Errorf("Account should be nil on jwk path; got %+v", v.Account)
|
||||
}
|
||||
if v.Algorithm != "ES256" {
|
||||
t.Errorf("algorithm = %q", v.Algorithm)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Happy_EdDSA_KID(t *testing.T) {
|
||||
pub, priv := genEdKey(t)
|
||||
jwk := &jose.JSONWebKey{Key: pub}
|
||||
pem, _ := JWKToPEM(jwk)
|
||||
thumb, _ := JWKThumbprint(jwk)
|
||||
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{byID: map[string]*domain.ACMEAccount{
|
||||
"acme-acc-ed1": {
|
||||
AccountID: "acme-acc-ed1", JWKPEM: pem, JWKThumbprint: thumb,
|
||||
Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp",
|
||||
},
|
||||
}},
|
||||
Nonces: newStubNonces("nonce-003"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
kid := testAccountKID("acme-acc-ed1")
|
||||
body := signWithKID(t, priv, jose.EdDSA, kid, testURL, "nonce-003", struct{}{})
|
||||
|
||||
v, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{ExpectNewAccount: false})
|
||||
if err != nil {
|
||||
t.Fatalf("VerifyJWS: %v", err)
|
||||
}
|
||||
if v.Algorithm != "EdDSA" {
|
||||
t.Errorf("algorithm = %q, want EdDSA", v.Algorithm)
|
||||
}
|
||||
}
|
||||
|
||||
// --- VerifyJWS rejection paths -----------------------------------------
|
||||
|
||||
func TestVerifyJWS_Reject_AlgNotInAllowList(t *testing.T) {
|
||||
// HS256 (HMAC-SHA256, symmetric) is forbidden by RFC 8555 §6.2.
|
||||
key := []byte("supersecretkey32byteslongforhmac")
|
||||
signer, err := jose.NewSigner(
|
||||
jose.SigningKey{Algorithm: jose.HS256, Key: key},
|
||||
(&jose.SignerOptions{}).
|
||||
WithHeader(jose.HeaderKey("url"), testURL).
|
||||
WithHeader("kid", testKID).
|
||||
WithHeader("nonce", "n"),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("hs256 signer: %v", err)
|
||||
}
|
||||
jws, _ := signer.Sign([]byte("{}"))
|
||||
body := jws.FullSerialize()
|
||||
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{},
|
||||
Nonces: newStubNonces("n"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
_, err = VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{})
|
||||
if err == nil {
|
||||
t.Fatal("expected algorithm-rejected error; got nil")
|
||||
}
|
||||
// ParseSigned filters the alg before we ever see the JWS, so the
|
||||
// error wraps ErrJWSMalformed (the verifier can't distinguish
|
||||
// "wrong format" from "bad alg" at this layer — both manifest as
|
||||
// malformed).
|
||||
if !errors.Is(err, ErrJWSMalformed) && !errors.Is(err, ErrJWSAlgorithmRejected) {
|
||||
t.Errorf("err = %v; want ErrJWSMalformed or ErrJWSAlgorithmRejected", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_BadSignature(t *testing.T) {
|
||||
signingKey := genRSAKey(t)
|
||||
// The verifier resolves the account row's stored JWK and uses its
|
||||
// public component as the verify key. Register an account whose
|
||||
// stored JWK is a DIFFERENT key — same shape, different material.
|
||||
// The JWS parses cleanly but Verify returns "verification failed".
|
||||
storedKey := genRSAKey(t)
|
||||
storedJWK := &jose.JSONWebKey{Key: &storedKey.PublicKey}
|
||||
storedPEM, _ := JWKToPEM(storedJWK)
|
||||
storedThumb, _ := JWKThumbprint(storedJWK)
|
||||
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{byID: map[string]*domain.ACMEAccount{
|
||||
"acme-acc-test123": {
|
||||
AccountID: "acme-acc-test123", JWKPEM: storedPEM, JWKThumbprint: storedThumb,
|
||||
Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp",
|
||||
},
|
||||
}},
|
||||
Nonces: newStubNonces("n1"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithKID(t, signingKey, jose.RS256, testKID, testURL, "n1", map[string]any{"x": 1})
|
||||
|
||||
_, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{})
|
||||
if err == nil {
|
||||
t.Fatal("expected signature-invalid error; got nil")
|
||||
}
|
||||
if !errors.Is(err, ErrJWSSignatureInvalid) {
|
||||
t.Errorf("err = %v; want ErrJWSSignatureInvalid wrapper", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_NonceMissingFromHeader(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{},
|
||||
Nonces: newStubNonces(),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
signer, _ := jose.NewSigner(
|
||||
jose.SigningKey{Algorithm: jose.RS256, Key: key},
|
||||
(&jose.SignerOptions{EmbedJWK: true}).
|
||||
WithHeader(jose.HeaderKey("url"), testURL),
|
||||
// nonce omitted intentionally
|
||||
)
|
||||
jws, _ := signer.Sign([]byte("{}"))
|
||||
body := jws.FullSerialize()
|
||||
|
||||
_, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{ExpectNewAccount: true})
|
||||
if !errors.Is(err, ErrJWSMissingNonce) {
|
||||
t.Errorf("err = %v; want ErrJWSMissingNonce", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_NonceUnknown(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{},
|
||||
Nonces: newStubNonces(), // no nonces issued
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithJWK(t, key, jose.RS256, testURL, "ghost-nonce", map[string]any{})
|
||||
_, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{ExpectNewAccount: true})
|
||||
if !errors.Is(err, ErrJWSBadNonce) {
|
||||
t.Errorf("err = %v; want ErrJWSBadNonce", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_NonceReplay(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{},
|
||||
Nonces: newStubNonces("n-replay"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithJWK(t, key, jose.RS256, testURL, "n-replay", map[string]any{})
|
||||
if _, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{ExpectNewAccount: true}); err != nil {
|
||||
t.Fatalf("first verify: %v", err)
|
||||
}
|
||||
// Replay — same JWS, second time.
|
||||
_, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{ExpectNewAccount: true})
|
||||
if !errors.Is(err, ErrJWSBadNonce) {
|
||||
t.Errorf("err = %v; want ErrJWSBadNonce on replay", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_URLMismatch(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{},
|
||||
Nonces: newStubNonces("n-url"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithJWK(t, key, jose.RS256, testURL, "n-url", map[string]any{})
|
||||
// Hand the verifier a different URL than the one signed.
|
||||
_, err := VerifyJWS(cfg, []byte(body), "https://server/acme/different", VerifyOptions{ExpectNewAccount: true})
|
||||
if !errors.Is(err, ErrJWSURLMismatch) {
|
||||
t.Errorf("err = %v; want ErrJWSURLMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_ExpectKidGotJWK(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{},
|
||||
Nonces: newStubNonces("n-mix1"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithJWK(t, key, jose.RS256, testURL, "n-mix1", map[string]any{})
|
||||
// New-account expects jwk; we set ExpectNewAccount=false so this
|
||||
// flow demands kid.
|
||||
_, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{ExpectNewAccount: false})
|
||||
if !errors.Is(err, ErrJWSExpectKidGotJWK) {
|
||||
t.Errorf("err = %v; want ErrJWSExpectKidGotJWK", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_ExpectJWKGotKid(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
jwk := &jose.JSONWebKey{Key: &key.PublicKey}
|
||||
pem, _ := JWKToPEM(jwk)
|
||||
thumb, _ := JWKThumbprint(jwk)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{byID: map[string]*domain.ACMEAccount{
|
||||
"acme-acc-test123": {
|
||||
AccountID: "acme-acc-test123", JWKPEM: pem, JWKThumbprint: thumb,
|
||||
Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp",
|
||||
},
|
||||
}},
|
||||
Nonces: newStubNonces("n-mix2"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithKID(t, key, jose.RS256, testKID, testURL, "n-mix2", map[string]any{})
|
||||
_, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{ExpectNewAccount: true})
|
||||
if !errors.Is(err, ErrJWSExpectJWKGotKid) {
|
||||
t.Errorf("err = %v; want ErrJWSExpectJWKGotKid", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_AccountUnknown(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{},
|
||||
Nonces: newStubNonces("n-acct"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithKID(t, key, jose.RS256, testKID, testURL, "n-acct", map[string]any{})
|
||||
_, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{})
|
||||
if !errors.Is(err, ErrJWSAccountNotFound) {
|
||||
t.Errorf("err = %v; want ErrJWSAccountNotFound", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_AccountDeactivated(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
jwk := &jose.JSONWebKey{Key: &key.PublicKey}
|
||||
pem, _ := JWKToPEM(jwk)
|
||||
thumb, _ := JWKThumbprint(jwk)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{byID: map[string]*domain.ACMEAccount{
|
||||
"acme-acc-test123": {
|
||||
AccountID: "acme-acc-test123", JWKPEM: pem, JWKThumbprint: thumb,
|
||||
Status: domain.ACMEAccountStatusDeactivated, // ← deactivated
|
||||
ProfileID: "prof-corp",
|
||||
},
|
||||
}},
|
||||
Nonces: newStubNonces("n-deact"),
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
body := signWithKID(t, key, jose.RS256, testKID, testURL, "n-deact", map[string]any{})
|
||||
_, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{})
|
||||
if !errors.Is(err, ErrJWSAccountInactive) {
|
||||
t.Errorf("err = %v; want ErrJWSAccountInactive", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyJWS_Reject_KIDMismatchesProfile(t *testing.T) {
|
||||
key := genRSAKey(t)
|
||||
jwk := &jose.JSONWebKey{Key: &key.PublicKey}
|
||||
pem, _ := JWKToPEM(jwk)
|
||||
thumb, _ := JWKThumbprint(jwk)
|
||||
cfg := VerifierConfig{
|
||||
Accounts: &stubAccounts{byID: map[string]*domain.ACMEAccount{
|
||||
"acme-acc-test123": {
|
||||
AccountID: "acme-acc-test123", JWKPEM: pem, JWKThumbprint: thumb,
|
||||
Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp",
|
||||
},
|
||||
}},
|
||||
Nonces: newStubNonces("n-cross"),
|
||||
// AccountKID expects prof-corp; the test JWS uses a kid that
|
||||
// claims prof-corp BUT we're going to feed an off-canonical
|
||||
// kid that doesn't match.
|
||||
AccountKID: testAccountKID,
|
||||
}
|
||||
// Sign with a kid that points at a different host. The verifier's
|
||||
// AccountKID round-trip-check should reject it.
|
||||
wrongKID := "https://different-host/acme/profile/prof-corp/account/acme-acc-test123"
|
||||
body := signWithKID(t, key, jose.RS256, wrongKID, testURL, "n-cross", map[string]any{})
|
||||
_, err := VerifyJWS(cfg, []byte(body), testURL, VerifyOptions{})
|
||||
if err == nil {
|
||||
t.Fatal("expected error from kid round-trip mismatch")
|
||||
}
|
||||
if !errors.Is(err, ErrJWSMalformed) {
|
||||
t.Errorf("err = %v; want ErrJWSMalformed (round-trip mismatch)", err)
|
||||
}
|
||||
}
|
||||
|
||||
// MapJWSErrorToProblem coverage check: every exported sentinel maps
|
||||
// to a typed Problem (not the default malformed catch-all).
|
||||
func TestMapJWSErrorToProblem_KnownSentinels(t *testing.T) {
|
||||
cases := []struct {
|
||||
err error
|
||||
wantTyp string
|
||||
}{
|
||||
{ErrJWSBadNonce, "urn:ietf:params:acme:error:badNonce"},
|
||||
{ErrJWSMissingNonce, "urn:ietf:params:acme:error:badNonce"},
|
||||
{ErrJWSAccountNotFound, "urn:ietf:params:acme:error:accountDoesNotExist"},
|
||||
{ErrJWSAccountInactive, "urn:ietf:params:acme:error:unauthorized"},
|
||||
{ErrJWSURLMismatch, "urn:ietf:params:acme:error:unauthorized"},
|
||||
{ErrJWSSignatureInvalid, "urn:ietf:params:acme:error:unauthorized"},
|
||||
{ErrJWSAlgorithmRejected, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrJWSExpectJWKGotKid, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrJWSExpectKidGotJWK, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrJWSBothKidAndJWK, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrJWSNeitherKidNorJWK, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrJWSInvalidJWK, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrJWSWrongType, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrJWSPayloadMismatch, "urn:ietf:params:acme:error:serverInternal"},
|
||||
{ErrJWSMalformed, "urn:ietf:params:acme:error:malformed"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
p := MapJWSErrorToProblem(tc.err)
|
||||
if p.Type != tc.wantTyp {
|
||||
t.Errorf("err=%v: type = %q, want %q", tc.err, p.Type, tc.wantTyp)
|
||||
}
|
||||
if p.Status == 0 {
|
||||
t.Errorf("err=%v: status was 0", tc.err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,272 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"crypto/subtle"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
jose "github.com/go-jose/go-jose/v4"
|
||||
)
|
||||
|
||||
// Phase 4 — RFC 8555 §7.3.5 key rollover.
|
||||
//
|
||||
// The wire shape is a doubly-signed JWS:
|
||||
//
|
||||
// JWS-outer signed by the OLD account key (kid = account URL):
|
||||
// protected: { alg, kid, nonce, url }
|
||||
// payload: <JWS-inner-as-bytes>
|
||||
//
|
||||
// JWS-inner signed by the NEW account key (jwk = newkey):
|
||||
// protected: { alg, jwk, url=<same key-change URL> }
|
||||
// payload: { account: <kid-URL>, oldKey: <OLD JWK> }
|
||||
//
|
||||
// The handler runs the existing VerifyJWS pipeline against the outer
|
||||
// (kid path), then hands the resulting Payload bytes to ParseAndVerify-
|
||||
// KeyChangeInner so the inner is processed in isolation. Two key
|
||||
// distinctions vs. the outer:
|
||||
//
|
||||
// - The inner JWS does NOT carry a `nonce` header. Per RFC 8555 §7.3.5
|
||||
// the outer's nonce is the only nonce; the inner is a self-contained
|
||||
// proof-of-possession blob.
|
||||
// - The inner JWS uses `jwk` not `kid` and the verifier must succeed
|
||||
// when the embedded `jwk` itself is the verification key.
|
||||
//
|
||||
// This matches what go-jose's lego implementation, cert-manager, and
|
||||
// boulder all expect.
|
||||
|
||||
// KeyChangeInnerPayload is the parsed body of the inner JWS — RFC 8555
|
||||
// §7.3.5 mandates exactly two fields.
|
||||
type KeyChangeInnerPayload struct {
|
||||
// Account is the kid URL of the account whose key is being rotated.
|
||||
// MUST equal the outer's `kid` header. Mismatch → keyChange's
|
||||
// "account" field doesn't match outer.kid.
|
||||
Account string `json:"account"`
|
||||
|
||||
// OldKey is the JWK currently on file for the account. The server
|
||||
// asserts this matches what we have in the database (byte-equal
|
||||
// canonicalized) so a stale rollover request can't slip through.
|
||||
OldKey *jose.JSONWebKey `json:"oldKey"`
|
||||
}
|
||||
|
||||
// KeyChangeInner is the verified inner JWS — fields the service layer
|
||||
// needs to commit the rollover.
|
||||
type KeyChangeInner struct {
|
||||
// NewJWK is the JWK the inner JWS is signed by. After verification
|
||||
// this is the key the account's row will be updated to.
|
||||
NewJWK *jose.JSONWebKey
|
||||
|
||||
// Payload is the inner's parsed JSON: { account, oldKey }.
|
||||
Payload KeyChangeInnerPayload
|
||||
|
||||
// URL is the inner protected-header `url` value, asserted equal to
|
||||
// the outer's URL.
|
||||
URL string
|
||||
|
||||
// Algorithm is the negotiated alg the inner was signed with.
|
||||
Algorithm string
|
||||
}
|
||||
|
||||
// Sentinel errors. Each maps to an RFC 8555 §6.7 problem type via the
|
||||
// service's writeServiceError; tests assert via errors.Is.
|
||||
var (
|
||||
ErrKeyChangeInnerMalformed = errors.New("acme keychange: inner JWS malformed")
|
||||
ErrKeyChangeInnerAlgRejected = errors.New("acme keychange: inner JWS uses disallowed signature algorithm")
|
||||
ErrKeyChangeInnerMissingJWK = errors.New("acme keychange: inner JWS protected header MUST contain `jwk`")
|
||||
ErrKeyChangeInnerForbidsKID = errors.New("acme keychange: inner JWS MUST NOT contain `kid` (use `jwk`)")
|
||||
ErrKeyChangeInnerInvalidJWK = errors.New("acme keychange: inner JWS embedded JWK is invalid")
|
||||
ErrKeyChangeInnerURLMissing = errors.New("acme keychange: inner JWS protected header `url` is required")
|
||||
ErrKeyChangeInnerURLMismatch = errors.New("acme keychange: inner JWS `url` does not match outer JWS `url`")
|
||||
ErrKeyChangeInnerSignatureBad = errors.New("acme keychange: inner JWS signature did not verify against embedded JWK")
|
||||
ErrKeyChangeInnerPayloadParse = errors.New("acme keychange: inner JWS payload is not parseable JSON")
|
||||
ErrKeyChangeInnerAccountMismatch = errors.New("acme keychange: inner JWS payload `account` does not match outer JWS `kid`")
|
||||
ErrKeyChangeInnerOldKeyMissing = errors.New("acme keychange: inner JWS payload missing `oldKey`")
|
||||
ErrKeyChangeInnerOldKeyMismatch = errors.New("acme keychange: inner JWS payload `oldKey` does not match registered account key")
|
||||
)
|
||||
|
||||
// ParseAndVerifyKeyChangeInner parses the inner JWS bytes (i.e. the
|
||||
// outer JWS's verified payload), runs the same allow-list +
|
||||
// signature-verification pipeline as VerifyJWS, and asserts the inner-
|
||||
// only invariants from RFC 8555 §7.3.5 (must use `jwk`, must NOT use
|
||||
// `kid`, URL must match).
|
||||
//
|
||||
// Caller passes:
|
||||
//
|
||||
// - innerBytes: the outer JWS's verified payload (the inner JWS in
|
||||
// compact serialization).
|
||||
// - outerKID: the outer JWS's `kid` header value. The inner's payload
|
||||
// `account` field MUST equal this.
|
||||
// - outerURL: the outer JWS's `url` header value. The inner's
|
||||
// protected-header `url` MUST equal this.
|
||||
// - registeredOldJWK: the JWK currently stored on the account row.
|
||||
// The inner's payload `oldKey` MUST canonicalize-equal this.
|
||||
//
|
||||
// Returns the verified KeyChangeInner on success, or one of the
|
||||
// sentinel errors above on any validation failure.
|
||||
func ParseAndVerifyKeyChangeInner(innerBytes []byte, outerKID, outerURL string, registeredOldJWK *jose.JSONWebKey) (*KeyChangeInner, error) {
|
||||
// Parse against the same allow-list that VerifyJWS uses.
|
||||
jws, err := jose.ParseSigned(string(innerBytes), AllowedSignatureAlgorithms)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrKeyChangeInnerMalformed, err)
|
||||
}
|
||||
if len(jws.Signatures) != 1 {
|
||||
return nil, fmt.Errorf("%w: multi-signature inner JWS", ErrKeyChangeInnerMalformed)
|
||||
}
|
||||
sig := jws.Signatures[0]
|
||||
if !algorithmAllowed(sig.Protected.Algorithm) {
|
||||
return nil, fmt.Errorf("%w: %s", ErrKeyChangeInnerAlgRejected, sig.Protected.Algorithm)
|
||||
}
|
||||
|
||||
// RFC 8555 §7.3.5: the inner MUST use `jwk` and MUST NOT use `kid`.
|
||||
if sig.Protected.KeyID != "" {
|
||||
return nil, ErrKeyChangeInnerForbidsKID
|
||||
}
|
||||
jwk := sig.Protected.JSONWebKey
|
||||
if jwk == nil {
|
||||
return nil, ErrKeyChangeInnerMissingJWK
|
||||
}
|
||||
if !jwk.Valid() {
|
||||
return nil, ErrKeyChangeInnerInvalidJWK
|
||||
}
|
||||
|
||||
// URL header MUST equal the outer's URL.
|
||||
innerURL, err := extractStringHeader(sig.Protected.ExtraHeaders, "url")
|
||||
if err != nil {
|
||||
return nil, ErrKeyChangeInnerURLMissing
|
||||
}
|
||||
if innerURL == "" {
|
||||
return nil, ErrKeyChangeInnerURLMissing
|
||||
}
|
||||
if innerURL != outerURL {
|
||||
return nil, fmt.Errorf("%w: inner=%q outer=%q", ErrKeyChangeInnerURLMismatch, innerURL, outerURL)
|
||||
}
|
||||
|
||||
// Verify the inner signature against the embedded jwk.
|
||||
verifiedPayload, err := jws.Verify(jwk.Key)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrKeyChangeInnerSignatureBad, err)
|
||||
}
|
||||
|
||||
// Parse the inner payload.
|
||||
var payload KeyChangeInnerPayload
|
||||
if err := json.Unmarshal(verifiedPayload, &payload); err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrKeyChangeInnerPayloadParse, err)
|
||||
}
|
||||
|
||||
// `account` MUST equal outer's kid.
|
||||
if payload.Account != outerKID {
|
||||
return nil, fmt.Errorf("%w: payload=%q outer.kid=%q",
|
||||
ErrKeyChangeInnerAccountMismatch, payload.Account, outerKID)
|
||||
}
|
||||
|
||||
// `oldKey` MUST be present and canonicalize-equal to registered.
|
||||
if payload.OldKey == nil {
|
||||
return nil, ErrKeyChangeInnerOldKeyMissing
|
||||
}
|
||||
if !payload.OldKey.Valid() {
|
||||
return nil, fmt.Errorf("%w: oldKey did not validate", ErrKeyChangeInnerOldKeyMismatch)
|
||||
}
|
||||
eq, err := jwksThumbprintEqual(payload.OldKey, registeredOldJWK)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: thumbprint compare: %v", ErrKeyChangeInnerOldKeyMismatch, err)
|
||||
}
|
||||
if !eq {
|
||||
return nil, ErrKeyChangeInnerOldKeyMismatch
|
||||
}
|
||||
|
||||
return &KeyChangeInner{
|
||||
NewJWK: jwk,
|
||||
Payload: payload,
|
||||
URL: innerURL,
|
||||
Algorithm: sig.Protected.Algorithm,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// jwksThumbprintEqual compares two JWKs by RFC 7638 thumbprint, which
|
||||
// is the canonical identity for a public key. We deliberately compare
|
||||
// thumbprints rather than serialized bytes because go-jose may emit
|
||||
// fields in different orders for "equal" keys.
|
||||
//
|
||||
// Returns (true, nil) when both thumbprints exist and match in
|
||||
// constant time; (false, err) on any thumbprint computation error;
|
||||
// (false, nil) when the thumbprints differ.
|
||||
func jwksThumbprintEqual(a, b *jose.JSONWebKey) (bool, error) {
|
||||
if a == nil || b == nil {
|
||||
return false, nil
|
||||
}
|
||||
tA, err := JWKThumbprint(a)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
tB, err := JWKThumbprint(b)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return subtle.ConstantTimeCompare([]byte(tA), []byte(tB)) == 1, nil
|
||||
}
|
||||
|
||||
// MapKeyChangeErrorToProblem renders an inner-JWS validation error as
|
||||
// an RFC 7807 + RFC 8555 §6.7 Problem the handler emits via
|
||||
// WriteProblem.
|
||||
//
|
||||
// All inner-JWS errors map to operator-friendly problem types. The
|
||||
// detail string is a concise summary; the full err.Error() context is
|
||||
// suppressed to avoid leaking internal-state details (master-prompt
|
||||
// criterion #10).
|
||||
func MapKeyChangeErrorToProblem(err error) Problem {
|
||||
switch {
|
||||
case errors.Is(err, ErrKeyChangeInnerSignatureBad),
|
||||
errors.Is(err, ErrKeyChangeInnerOldKeyMismatch):
|
||||
// Both indicate "you don't actually possess the rollover key
|
||||
// pair" — treat as unauthorized per RFC 8555 §7.3.5.
|
||||
return Problem{
|
||||
Type: "urn:ietf:params:acme:error:unauthorized",
|
||||
Detail: "key rollover proof failed: " + plainCause(err),
|
||||
Status: 401,
|
||||
}
|
||||
case errors.Is(err, ErrKeyChangeInnerURLMismatch),
|
||||
errors.Is(err, ErrKeyChangeInnerURLMissing):
|
||||
return Problem{
|
||||
Type: "urn:ietf:params:acme:error:unauthorized",
|
||||
Detail: "key rollover inner URL: " + plainCause(err),
|
||||
Status: 401,
|
||||
}
|
||||
case errors.Is(err, ErrKeyChangeInnerAlgRejected):
|
||||
return Malformed("key rollover inner JWS uses disallowed algorithm")
|
||||
case errors.Is(err, ErrKeyChangeInnerForbidsKID):
|
||||
return Malformed("key rollover inner JWS MUST use `jwk`, not `kid`")
|
||||
case errors.Is(err, ErrKeyChangeInnerMissingJWK),
|
||||
errors.Is(err, ErrKeyChangeInnerInvalidJWK):
|
||||
return Malformed("key rollover inner JWS missing or invalid `jwk`")
|
||||
case errors.Is(err, ErrKeyChangeInnerAccountMismatch):
|
||||
return Malformed("key rollover inner `account` does not match outer kid")
|
||||
case errors.Is(err, ErrKeyChangeInnerOldKeyMissing):
|
||||
return Malformed("key rollover inner missing `oldKey`")
|
||||
case errors.Is(err, ErrKeyChangeInnerPayloadParse):
|
||||
return Malformed("key rollover inner payload is not valid JSON")
|
||||
case errors.Is(err, ErrKeyChangeInnerMalformed):
|
||||
return Malformed("key rollover inner JWS malformed")
|
||||
default:
|
||||
return Malformed("key rollover request rejected")
|
||||
}
|
||||
}
|
||||
|
||||
// plainCause extracts the leaf error text without leaking the full
|
||||
// wrap chain. Used by MapKeyChangeErrorToProblem to keep the operator-
|
||||
// facing detail concise.
|
||||
func plainCause(err error) string {
|
||||
if err == nil {
|
||||
return ""
|
||||
}
|
||||
// Walk to the leaf cause; emit its message verbatim.
|
||||
for {
|
||||
next := errors.Unwrap(err)
|
||||
if next == nil {
|
||||
return err.Error()
|
||||
}
|
||||
err = next
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"time"
|
||||
)
|
||||
|
||||
// NonceStore is the persistence-layer contract for ACME nonces. The
|
||||
// production implementation lives at internal/repository/postgres/acme.go
|
||||
// and is DB-backed (NOT in-memory) — replay protection requires the
|
||||
// store to outlast the client's nonce caching window.
|
||||
//
|
||||
// Issue creates a new nonce and stores it with a TTL. The string return
|
||||
// is what the handler echoes in the Replay-Nonce response header.
|
||||
//
|
||||
// Consume marks a nonce used and returns an error if the nonce is
|
||||
// missing, already used, or expired. The handler maps that error to
|
||||
// urn:ietf:params:acme:error:badNonce per RFC 8555 §6.5.1.
|
||||
//
|
||||
// Phase 1a: Issue is wired (every directory + new-nonce response carries
|
||||
// a Replay-Nonce header). Consume is exposed but not yet invoked —
|
||||
// JWS-authenticated POSTs (which consume nonces) arrive in Phase 1b.
|
||||
type NonceStore interface {
|
||||
Issue(ctx context.Context, ttl time.Duration) (string, error)
|
||||
Consume(ctx context.Context, nonce string) error
|
||||
}
|
||||
|
||||
// nonceByteLen is 32 bytes (256 bits) of entropy. RFC 8555 §6.5.1 only
|
||||
// requires nonces to be hard-to-guess; 256 bits is overkill on purpose
|
||||
// (matches the consumer-side ACME library + every other ACME server).
|
||||
const nonceByteLen = 32
|
||||
|
||||
// GenerateNonce returns 32 cryptographically-random bytes encoded as
|
||||
// base64url-no-padding per RFC 7515 §2 (the encoding ACME wire format
|
||||
// uses for the protected-header nonce field).
|
||||
func GenerateNonce() (string, error) {
|
||||
b := make([]byte, nonceByteLen)
|
||||
if _, err := rand.Read(b); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return base64.RawURLEncoding.EncodeToString(b), nil
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGenerateNonce_LengthAndCharset(t *testing.T) {
|
||||
n, err := GenerateNonce()
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateNonce: %v", err)
|
||||
}
|
||||
// base64.RawURLEncoding emits ceil(N*8/6) chars = ceil(32*8/6) = 43.
|
||||
if got, want := len(n), 43; got != want {
|
||||
t.Errorf("nonce length = %d, want %d", got, want)
|
||||
}
|
||||
// Charset must decode under base64url-no-padding.
|
||||
raw, err := base64.RawURLEncoding.DecodeString(n)
|
||||
if err != nil {
|
||||
t.Fatalf("nonce did not decode under base64url-no-padding: %v", err)
|
||||
}
|
||||
if len(raw) != nonceByteLen {
|
||||
t.Errorf("decoded nonce = %d bytes, want %d", len(raw), nonceByteLen)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateNonce_Distinct(t *testing.T) {
|
||||
// Statistical sanity check, NOT cryptographic strength proof.
|
||||
// 256 bits of entropy means the probability of two consecutive
|
||||
// values colliding is ~2^-256 — well below the threshold for a
|
||||
// flaky-test-on-the-cosmos timeline.
|
||||
a, err := GenerateNonce()
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateNonce a: %v", err)
|
||||
}
|
||||
b, err := GenerateNonce()
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateNonce b: %v", err)
|
||||
}
|
||||
if a == b {
|
||||
t.Errorf("two consecutive nonces collided: %q", a)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,261 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"crypto/x509"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// OrderResponseJSON is the wire shape RFC 8555 §7.1.3 mandates for the
|
||||
// new-order response + the per-order POST-as-GET response.
|
||||
//
|
||||
// Each URL field is the per-profile path the handler computes from the
|
||||
// inbound request; service-layer code does not see *http.Request, so
|
||||
// the handler does the URL composition.
|
||||
type OrderResponseJSON struct {
|
||||
Status string `json:"status"`
|
||||
Expires string `json:"expires,omitempty"`
|
||||
NotBefore string `json:"notBefore,omitempty"`
|
||||
NotAfter string `json:"notAfter,omitempty"`
|
||||
Identifiers []IdentifierJSON `json:"identifiers"`
|
||||
Authorizations []string `json:"authorizations"`
|
||||
Finalize string `json:"finalize"`
|
||||
Certificate string `json:"certificate,omitempty"`
|
||||
Error *Problem `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// IdentifierJSON is the wire shape for an identifier (RFC 8555 §9.7.7).
|
||||
// Wire field names differ from the domain struct's JSON tags only on
|
||||
// case, so we keep separate types to keep the protocol surface clean.
|
||||
type IdentifierJSON struct {
|
||||
Type string `json:"type"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
// MarshalOrder renders an ACMEOrder in RFC 8555 §7.1.3 wire shape.
|
||||
//
|
||||
// authzURLs / finalizeURL / certURL are computed by the handler from
|
||||
// the inbound request (scheme + host + per-profile path). Phase 2:
|
||||
// authzURLs has one entry per identifier; finalizeURL is the order's
|
||||
// finalize endpoint; certURL is populated only when status=valid.
|
||||
func MarshalOrder(order *domain.ACMEOrder, authzURLs []string, finalizeURL, certURL string) OrderResponseJSON {
|
||||
out := OrderResponseJSON{
|
||||
Status: string(order.Status),
|
||||
Expires: order.ExpiresAt.UTC().Format(time.RFC3339),
|
||||
Identifiers: make([]IdentifierJSON, 0, len(order.Identifiers)),
|
||||
Authorizations: authzURLs,
|
||||
Finalize: finalizeURL,
|
||||
}
|
||||
if order.NotBefore != nil {
|
||||
out.NotBefore = order.NotBefore.UTC().Format(time.RFC3339)
|
||||
}
|
||||
if order.NotAfter != nil {
|
||||
out.NotAfter = order.NotAfter.UTC().Format(time.RFC3339)
|
||||
}
|
||||
for _, id := range order.Identifiers {
|
||||
out.Identifiers = append(out.Identifiers, IdentifierJSON{Type: id.Type, Value: id.Value})
|
||||
}
|
||||
if certURL != "" && order.Status == domain.ACMEOrderStatusValid {
|
||||
out.Certificate = certURL
|
||||
}
|
||||
if order.Error != nil {
|
||||
out.Error = &Problem{
|
||||
Type: order.Error.Type,
|
||||
Detail: order.Error.Detail,
|
||||
Status: order.Error.Status,
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// NewOrderRequest is the payload shape RFC 8555 §7.4 mandates for a
|
||||
// new-order POST. The handler json.Unmarshals VerifiedRequest.Payload
|
||||
// into this struct after JWS verify succeeds.
|
||||
type NewOrderRequest struct {
|
||||
Identifiers []IdentifierJSON `json:"identifiers"`
|
||||
NotBefore string `json:"notBefore,omitempty"`
|
||||
NotAfter string `json:"notAfter,omitempty"`
|
||||
}
|
||||
|
||||
// FinalizeRequest is the payload shape RFC 8555 §7.4 mandates for the
|
||||
// finalize POST. csr is the base64url-encoded DER of a PKCS#10 CSR.
|
||||
type FinalizeRequest struct {
|
||||
CSR string `json:"csr"`
|
||||
}
|
||||
|
||||
// RevokeCertRequest is the payload shape RFC 8555 §7.6 mandates for
|
||||
// revoke-cert. `certificate` is the base64url-DER of the leaf cert
|
||||
// being revoked; `reason` is an optional RFC 5280 §5.3.1 numeric reason
|
||||
// code (defaults to 0/unspecified when absent).
|
||||
type RevokeCertRequest struct {
|
||||
Certificate string `json:"certificate"`
|
||||
Reason int `json:"reason,omitempty"`
|
||||
}
|
||||
|
||||
// AuthorizationResponseJSON is the wire shape RFC 8555 §7.1.4 mandates
|
||||
// for the authz GET (POST-as-GET) response.
|
||||
type AuthorizationResponseJSON struct {
|
||||
Identifier IdentifierJSON `json:"identifier"`
|
||||
Status string `json:"status"`
|
||||
Expires string `json:"expires,omitempty"`
|
||||
Wildcard bool `json:"wildcard,omitempty"`
|
||||
Challenges []ChallengeResponseJSON `json:"challenges"`
|
||||
}
|
||||
|
||||
// ChallengeResponseJSON is the wire shape RFC 8555 §8 mandates for a
|
||||
// challenge object (embedded in authz, or returned by POST to a
|
||||
// challenge URL).
|
||||
type ChallengeResponseJSON struct {
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
Status string `json:"status"`
|
||||
Token string `json:"token"`
|
||||
Validated string `json:"validated,omitempty"`
|
||||
Error *Problem `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// MarshalAuthorization renders an ACMEAuthorization in RFC 8555 wire shape.
|
||||
// challengeURLBuilder maps each challenge ID to its per-profile URL
|
||||
// (handler-computed); identifiers stay as-is.
|
||||
func MarshalAuthorization(authz *domain.ACMEAuthorization, challengeURLBuilder func(challengeID string) string) AuthorizationResponseJSON {
|
||||
out := AuthorizationResponseJSON{
|
||||
Identifier: IdentifierJSON{Type: authz.Identifier.Type, Value: authz.Identifier.Value},
|
||||
Status: string(authz.Status),
|
||||
Expires: authz.ExpiresAt.UTC().Format(time.RFC3339),
|
||||
Wildcard: authz.Wildcard,
|
||||
Challenges: make([]ChallengeResponseJSON, 0, len(authz.Challenges)),
|
||||
}
|
||||
for i := range authz.Challenges {
|
||||
ch := &authz.Challenges[i]
|
||||
j := ChallengeResponseJSON{
|
||||
Type: string(ch.Type),
|
||||
URL: challengeURLBuilder(ch.ChallengeID),
|
||||
Status: string(ch.Status),
|
||||
Token: ch.Token,
|
||||
}
|
||||
if ch.ValidatedAt != nil {
|
||||
j.Validated = ch.ValidatedAt.UTC().Format(time.RFC3339)
|
||||
}
|
||||
if ch.Error != nil {
|
||||
j.Error = &Problem{Type: ch.Error.Type, Detail: ch.Error.Detail, Status: ch.Error.Status}
|
||||
}
|
||||
out.Challenges = append(out.Challenges, j)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// ErrIdentifierTypeUnsupported is returned when ValidateIdentifiers
|
||||
// encounters a non-DNS identifier type. RFC 8555 §9.7.7 reserves
|
||||
// `type` for future expansion; Phase 2 supports `dns` only.
|
||||
var ErrIdentifierTypeUnsupported = errors.New("acme: identifier type not supported (Phase 2: dns only)")
|
||||
|
||||
// ErrIdentifierEmpty is returned for an identifier with an empty
|
||||
// value; the spec requires non-empty strings.
|
||||
var ErrIdentifierEmpty = errors.New("acme: identifier value is empty")
|
||||
|
||||
// ValidateIdentifiers checks the structural invariants RFC 8555 §7.4
|
||||
// requires (non-empty value, supported type) and returns per-identifier
|
||||
// rejected entries on failure. Per-profile-policy rejection (SAN
|
||||
// allowlist, lifetime cap) is the service layer's job; this function
|
||||
// is the syntactic check only.
|
||||
//
|
||||
// Returns nil + nil ids on full acceptance. On rejection, returns the
|
||||
// list of rejected identifiers with their reason as RFC 8555 §6.7
|
||||
// subproblems (rejectedIdentifier).
|
||||
func ValidateIdentifiers(ids []IdentifierJSON) []Problem {
|
||||
if len(ids) == 0 {
|
||||
return []Problem{Malformed("new-order requires at least one identifier")}
|
||||
}
|
||||
var problems []Problem
|
||||
for _, id := range ids {
|
||||
switch strings.ToLower(id.Type) {
|
||||
case "dns":
|
||||
if id.Value == "" {
|
||||
problems = append(problems, Problem{
|
||||
Type: "urn:ietf:params:acme:error:rejectedIdentifier",
|
||||
Detail: "identifier value is empty",
|
||||
Status: 400,
|
||||
Identifier: &Identifier{Type: id.Type, Value: id.Value},
|
||||
})
|
||||
}
|
||||
default:
|
||||
problems = append(problems, Problem{
|
||||
Type: "urn:ietf:params:acme:error:rejectedIdentifier",
|
||||
Detail: fmt.Sprintf("identifier type %q is not supported (Phase 2: dns only)", id.Type),
|
||||
Status: 400,
|
||||
Identifier: &Identifier{Type: id.Type, Value: id.Value},
|
||||
})
|
||||
}
|
||||
}
|
||||
return problems
|
||||
}
|
||||
|
||||
// CSRMatchesIdentifiers asserts the CSR's DNS-name set (Subject CN +
|
||||
// Subject Alternative Names) equals the order's identifier set,
|
||||
// case-folded for DNS comparison.
|
||||
//
|
||||
// RFC 8555 §7.4 finalize: "The CSR MUST indicate the exact same set of
|
||||
// requested identifiers as the initial newOrder request." Case-fold
|
||||
// the comparison so a CSR with `Example.com` matches an order with
|
||||
// `example.com` (DNS is case-insensitive per RFC 1035 §2.3.3).
|
||||
//
|
||||
// Returns nil on match. On mismatch, returns a Problem typed as
|
||||
// urn:ietf:params:acme:error:badCSR.
|
||||
func CSRMatchesIdentifiers(csr *x509.CertificateRequest, identifiers []domain.ACMEIdentifier) *Problem {
|
||||
csrSet := make(map[string]struct{})
|
||||
if csr.Subject.CommonName != "" {
|
||||
csrSet[strings.ToLower(csr.Subject.CommonName)] = struct{}{}
|
||||
}
|
||||
for _, dns := range csr.DNSNames {
|
||||
csrSet[strings.ToLower(dns)] = struct{}{}
|
||||
}
|
||||
|
||||
orderSet := make(map[string]struct{})
|
||||
for _, id := range identifiers {
|
||||
if id.Type != "dns" {
|
||||
continue
|
||||
}
|
||||
orderSet[strings.ToLower(id.Value)] = struct{}{}
|
||||
}
|
||||
|
||||
if len(csrSet) != len(orderSet) {
|
||||
p := Problem{
|
||||
Type: "urn:ietf:params:acme:error:badCSR",
|
||||
Detail: fmt.Sprintf("CSR identifier count (%d) differs from order identifier count (%d)", len(csrSet), len(orderSet)),
|
||||
Status: 400,
|
||||
}
|
||||
return &p
|
||||
}
|
||||
for k := range orderSet {
|
||||
if _, ok := csrSet[k]; !ok {
|
||||
p := Problem{
|
||||
Type: "urn:ietf:params:acme:error:badCSR",
|
||||
Detail: fmt.Sprintf("CSR is missing the order identifier %q", k),
|
||||
Status: 400,
|
||||
}
|
||||
return &p
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// HasWildcard returns true when any identifier is a wildcard. RFC 8555
|
||||
// §7.1.3 marks the order's authz wildcard:true when the corresponding
|
||||
// identifier starts with "*."; Phase 2 supports the trust_authenticated
|
||||
// path (which auto-marks authz valid), so wildcard-aware challenge
|
||||
// dispatch is Phase 3's concern.
|
||||
func HasWildcard(ids []domain.ACMEIdentifier) bool {
|
||||
for _, id := range ids {
|
||||
if strings.HasPrefix(id.Value, "*.") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,362 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"crypto/ecdsa"
|
||||
"crypto/elliptic"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"math/big"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
jose "github.com/go-jose/go-jose/v4"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// --- Test fixtures + helpers -------------------------------------------
|
||||
|
||||
func newTestRSAJWK(t *testing.T) (*rsa.PrivateKey, *jose.JSONWebKey) {
|
||||
t.Helper()
|
||||
priv, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("rsa.GenerateKey: %v", err)
|
||||
}
|
||||
jwk := &jose.JSONWebKey{Key: priv.Public(), Algorithm: string(jose.RS256), Use: "sig"}
|
||||
return priv, jwk
|
||||
}
|
||||
|
||||
func newTestECDSAJWK(t *testing.T) (*ecdsa.PrivateKey, *jose.JSONWebKey) {
|
||||
t.Helper()
|
||||
priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("ecdsa.GenerateKey: %v", err)
|
||||
}
|
||||
jwk := &jose.JSONWebKey{Key: priv.Public(), Algorithm: string(jose.ES256), Use: "sig"}
|
||||
return priv, jwk
|
||||
}
|
||||
|
||||
// signWithEmbeddedJWK builds an RFC-7515-compatible compact-serialized JWS with
|
||||
// the given protected header + payload, signed by signer. Used for
|
||||
// constructing inner-key-change blobs in tests.
|
||||
func signWithEmbeddedJWK(t *testing.T, signer interface{}, alg jose.SignatureAlgorithm, payload []byte, headers map[jose.HeaderKey]interface{}, embedJWK *jose.JSONWebKey) string {
|
||||
t.Helper()
|
||||
opts := &jose.SignerOptions{ExtraHeaders: headers}
|
||||
if embedJWK != nil {
|
||||
opts = opts.WithHeader("jwk", embedJWK)
|
||||
}
|
||||
sigSigner, err := jose.NewSigner(jose.SigningKey{Algorithm: alg, Key: signer}, opts)
|
||||
if err != nil {
|
||||
t.Fatalf("NewSigner: %v", err)
|
||||
}
|
||||
obj, err := sigSigner.Sign(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("Sign: %v", err)
|
||||
}
|
||||
out, err := obj.CompactSerialize()
|
||||
if err != nil {
|
||||
t.Fatalf("CompactSerialize: %v", err)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// --- KeyChange tests ----------------------------------------------------
|
||||
|
||||
func TestParseAndVerifyKeyChangeInner_HappyPath(t *testing.T) {
|
||||
_, oldJWK := newTestRSAJWK(t)
|
||||
newPriv, newJWK := newTestECDSAJWK(t)
|
||||
|
||||
url := "https://example.com/acme/profile/p1/key-change"
|
||||
kid := "https://example.com/acme/profile/p1/account/acme-acc-abc"
|
||||
payloadJSON, err := json.Marshal(KeyChangeInnerPayload{
|
||||
Account: kid,
|
||||
OldKey: oldJWK,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("marshal payload: %v", err)
|
||||
}
|
||||
headers := map[jose.HeaderKey]interface{}{"url": url}
|
||||
innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, newJWK)
|
||||
|
||||
got, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, url, oldJWK)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseAndVerifyKeyChangeInner: %v", err)
|
||||
}
|
||||
if got.Payload.Account != kid {
|
||||
t.Errorf("payload.Account = %q, want %q", got.Payload.Account, kid)
|
||||
}
|
||||
if got.URL != url {
|
||||
t.Errorf("URL = %q, want %q", got.URL, url)
|
||||
}
|
||||
if got.NewJWK == nil {
|
||||
t.Errorf("NewJWK is nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAndVerifyKeyChangeInner_OldKeyMismatch(t *testing.T) {
|
||||
_, oldJWK := newTestRSAJWK(t)
|
||||
_, otherJWK := newTestRSAJWK(t)
|
||||
newPriv, newJWK := newTestECDSAJWK(t)
|
||||
|
||||
url := "https://example.com/acme/profile/p1/key-change"
|
||||
kid := "https://example.com/acme/profile/p1/account/acme-acc-abc"
|
||||
// payload claims an oldKey that doesn't match what's registered.
|
||||
payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{Account: kid, OldKey: otherJWK})
|
||||
headers := map[jose.HeaderKey]interface{}{"url": url}
|
||||
innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, newJWK)
|
||||
|
||||
_, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, url, oldJWK)
|
||||
if !errors.Is(err, ErrKeyChangeInnerOldKeyMismatch) {
|
||||
t.Errorf("got err=%v, want ErrKeyChangeInnerOldKeyMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAndVerifyKeyChangeInner_AccountMismatch(t *testing.T) {
|
||||
_, oldJWK := newTestRSAJWK(t)
|
||||
newPriv, newJWK := newTestECDSAJWK(t)
|
||||
|
||||
url := "https://example.com/acme/profile/p1/key-change"
|
||||
outerKID := "https://example.com/acme/profile/p1/account/acme-acc-abc"
|
||||
// payload.Account does NOT equal outer.kid.
|
||||
payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{
|
||||
Account: "https://example.com/acme/profile/p1/account/acme-acc-DIFFERENT",
|
||||
OldKey: oldJWK,
|
||||
})
|
||||
headers := map[jose.HeaderKey]interface{}{"url": url}
|
||||
innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, newJWK)
|
||||
|
||||
_, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), outerKID, url, oldJWK)
|
||||
if !errors.Is(err, ErrKeyChangeInnerAccountMismatch) {
|
||||
t.Errorf("got err=%v, want ErrKeyChangeInnerAccountMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAndVerifyKeyChangeInner_URLMismatch(t *testing.T) {
|
||||
_, oldJWK := newTestRSAJWK(t)
|
||||
newPriv, newJWK := newTestECDSAJWK(t)
|
||||
|
||||
innerURL := "https://example.com/acme/profile/p1/key-change"
|
||||
outerURL := "https://example.com/acme/profile/p1/key-change-different"
|
||||
kid := "https://example.com/acme/profile/p1/account/acme-acc-abc"
|
||||
payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{Account: kid, OldKey: oldJWK})
|
||||
headers := map[jose.HeaderKey]interface{}{"url": innerURL}
|
||||
innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, newJWK)
|
||||
|
||||
_, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, outerURL, oldJWK)
|
||||
if !errors.Is(err, ErrKeyChangeInnerURLMismatch) {
|
||||
t.Errorf("got err=%v, want ErrKeyChangeInnerURLMismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAndVerifyKeyChangeInner_BadSignature(t *testing.T) {
|
||||
_, oldJWK := newTestRSAJWK(t)
|
||||
newPriv, newJWK := newTestECDSAJWK(t)
|
||||
_, otherJWK := newTestECDSAJWK(t) // different key embedded vs. signer
|
||||
|
||||
url := "https://example.com/acme/profile/p1/key-change"
|
||||
kid := "https://example.com/acme/profile/p1/account/acme-acc-abc"
|
||||
payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{Account: kid, OldKey: oldJWK})
|
||||
headers := map[jose.HeaderKey]interface{}{"url": url}
|
||||
// Sign with newPriv but embed otherJWK — verification against the
|
||||
// embedded jwk will fail since the signer didn't possess otherJWK's
|
||||
// private key.
|
||||
innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, otherJWK)
|
||||
|
||||
_, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, url, oldJWK)
|
||||
if !errors.Is(err, ErrKeyChangeInnerSignatureBad) {
|
||||
t.Errorf("got err=%v, want ErrKeyChangeInnerSignatureBad", err)
|
||||
}
|
||||
_ = newJWK
|
||||
}
|
||||
|
||||
func TestParseAndVerifyKeyChangeInner_MalformedJWS(t *testing.T) {
|
||||
_, oldJWK := newTestRSAJWK(t)
|
||||
_, err := ParseAndVerifyKeyChangeInner([]byte("not-a-jws"), "kid", "url", oldJWK)
|
||||
if !errors.Is(err, ErrKeyChangeInnerMalformed) {
|
||||
t.Errorf("got err=%v, want ErrKeyChangeInnerMalformed", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAndVerifyKeyChangeInner_MissingURL(t *testing.T) {
|
||||
_, oldJWK := newTestRSAJWK(t)
|
||||
newPriv, newJWK := newTestECDSAJWK(t)
|
||||
|
||||
url := "https://example.com/acme/profile/p1/key-change"
|
||||
kid := "https://example.com/acme/profile/p1/account/acme-acc-abc"
|
||||
payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{Account: kid, OldKey: oldJWK})
|
||||
// No `url` header.
|
||||
innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, nil, newJWK)
|
||||
|
||||
_, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, url, oldJWK)
|
||||
if !errors.Is(err, ErrKeyChangeInnerURLMissing) {
|
||||
t.Errorf("got err=%v, want ErrKeyChangeInnerURLMissing", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMapKeyChangeErrorToProblem_Coverage(t *testing.T) {
|
||||
cases := []struct {
|
||||
err error
|
||||
wantType string
|
||||
}{
|
||||
{ErrKeyChangeInnerSignatureBad, "urn:ietf:params:acme:error:unauthorized"},
|
||||
{ErrKeyChangeInnerOldKeyMismatch, "urn:ietf:params:acme:error:unauthorized"},
|
||||
{ErrKeyChangeInnerAccountMismatch, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrKeyChangeInnerForbidsKID, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrKeyChangeInnerMissingJWK, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrKeyChangeInnerOldKeyMissing, "urn:ietf:params:acme:error:malformed"},
|
||||
{ErrKeyChangeInnerURLMismatch, "urn:ietf:params:acme:error:unauthorized"},
|
||||
{ErrKeyChangeInnerMalformed, "urn:ietf:params:acme:error:malformed"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := MapKeyChangeErrorToProblem(c.err)
|
||||
if got.Type != c.wantType {
|
||||
t.Errorf("err=%v: got type %q, want %q", c.err, got.Type, c.wantType)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- ARI tests ----------------------------------------------------------
|
||||
|
||||
func TestParseARICertID_Roundtrip(t *testing.T) {
|
||||
aki := []byte{0xde, 0xad, 0xbe, 0xef, 0x01, 0x02}
|
||||
serial := []byte{0x12, 0x34, 0x56, 0x78}
|
||||
certID := base64.RawURLEncoding.EncodeToString(aki) + "." + base64.RawURLEncoding.EncodeToString(serial)
|
||||
|
||||
got, err := ParseARICertID(certID)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseARICertID: %v", err)
|
||||
}
|
||||
if string(got.AKI) != string(aki) {
|
||||
t.Errorf("AKI: got %x, want %x", got.AKI, aki)
|
||||
}
|
||||
if string(got.Serial) != string(serial) {
|
||||
t.Errorf("Serial: got %x, want %x", got.Serial, serial)
|
||||
}
|
||||
// SerialHex must match canonical certctl shape.
|
||||
wantSerialHex := "12345678"
|
||||
if got.SerialHex() != wantSerialHex {
|
||||
t.Errorf("SerialHex: got %q, want %q", got.SerialHex(), wantSerialHex)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseARICertID_Malformed(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
certID string
|
||||
wantErr error
|
||||
}{
|
||||
{"missing dot", "abc123nodot", ErrARICertIDMalformed},
|
||||
{"too many dots", "a.b.c", ErrARICertIDMalformed},
|
||||
{"empty aki", ".YWJj", ErrARICertIDEmpty},
|
||||
{"empty serial", "YWJj.", ErrARICertIDEmpty},
|
||||
{"non-base64 aki", "!!!!.YWJj", ErrARICertIDDecodeAKI},
|
||||
{"non-base64 serial", "YWJj.!!!!", ErrARICertIDDecodeSeria},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
_, err := ParseARICertID(c.certID)
|
||||
if !errors.Is(err, c.wantErr) {
|
||||
t.Errorf("got err=%v, want %v", err, c.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildARICertID_FromGeneratedCert(t *testing.T) {
|
||||
// Build a self-signed cert with an explicit AKI and serial.
|
||||
priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
|
||||
if err != nil {
|
||||
t.Fatalf("genkey: %v", err)
|
||||
}
|
||||
template := &x509.Certificate{
|
||||
SerialNumber: big.NewInt(0x12345678),
|
||||
Subject: pkix.Name{CommonName: "test"},
|
||||
NotBefore: time.Now(),
|
||||
NotAfter: time.Now().Add(24 * time.Hour),
|
||||
AuthorityKeyId: []byte{0xde, 0xad, 0xbe, 0xef},
|
||||
BasicConstraintsValid: true,
|
||||
IsCA: true,
|
||||
}
|
||||
der, err := x509.CreateCertificate(rand.Reader, template, template, &priv.PublicKey, priv)
|
||||
if err != nil {
|
||||
t.Fatalf("CreateCertificate: %v", err)
|
||||
}
|
||||
certPEM := string(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}))
|
||||
|
||||
certID, err := BuildARICertID(certPEM)
|
||||
if err != nil {
|
||||
t.Fatalf("BuildARICertID: %v", err)
|
||||
}
|
||||
parts := strings.Split(certID, ".")
|
||||
if len(parts) != 2 {
|
||||
t.Fatalf("got %d parts, want 2", len(parts))
|
||||
}
|
||||
wantAKI := base64.RawURLEncoding.EncodeToString([]byte{0xde, 0xad, 0xbe, 0xef})
|
||||
if parts[0] != wantAKI {
|
||||
t.Errorf("AKI part: got %q, want %q", parts[0], wantAKI)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeRenewalWindow_WithPolicy(t *testing.T) {
|
||||
now := time.Date(2026, 5, 1, 0, 0, 0, 0, time.UTC)
|
||||
notAfter := time.Date(2026, 7, 1, 0, 0, 0, 0, time.UTC) // 61 days out
|
||||
cert := &domain.ManagedCertificate{ExpiresAt: notAfter}
|
||||
version := &domain.CertificateVersion{
|
||||
NotBefore: time.Date(2026, 4, 1, 0, 0, 0, 0, time.UTC),
|
||||
NotAfter: notAfter,
|
||||
}
|
||||
policy := &domain.RenewalPolicy{RenewalWindowDays: 30}
|
||||
|
||||
start, end := ComputeRenewalWindow(cert, version, policy, now)
|
||||
wantStart := notAfter.Add(-30 * 24 * time.Hour) // 2026-06-01
|
||||
wantEnd := wantStart.Add(15 * 24 * time.Hour) // 2026-06-16
|
||||
if !start.Equal(wantStart) {
|
||||
t.Errorf("start: got %v, want %v", start, wantStart)
|
||||
}
|
||||
if !end.Equal(wantEnd) {
|
||||
t.Errorf("end: got %v, want %v", end, wantEnd)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeRenewalWindow_NoPolicy_LastThird(t *testing.T) {
|
||||
now := time.Date(2026, 5, 1, 0, 0, 0, 0, time.UTC)
|
||||
notBefore := time.Date(2026, 4, 1, 0, 0, 0, 0, time.UTC)
|
||||
notAfter := time.Date(2026, 7, 1, 0, 0, 0, 0, time.UTC) // 91-day validity
|
||||
cert := &domain.ManagedCertificate{ExpiresAt: notAfter}
|
||||
version := &domain.CertificateVersion{NotBefore: notBefore, NotAfter: notAfter}
|
||||
|
||||
start, end := ComputeRenewalWindow(cert, version, nil, now)
|
||||
// Validity = 91 days; thirty3 ~30d, end_offset = 10d. Start is in
|
||||
// the future from `now` (Jun 2026), so no clamp.
|
||||
if start.Before(now) {
|
||||
t.Errorf("start before now: got %v", start)
|
||||
}
|
||||
if !end.After(start) && !end.Equal(start) {
|
||||
t.Errorf("end before start: start=%v end=%v", start, end)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeRenewalWindow_PastExpiry_RenewNow(t *testing.T) {
|
||||
now := time.Date(2026, 8, 1, 0, 0, 0, 0, time.UTC)
|
||||
notAfter := time.Date(2026, 7, 1, 0, 0, 0, 0, time.UTC) // 1 month ago
|
||||
cert := &domain.ManagedCertificate{ExpiresAt: notAfter}
|
||||
|
||||
start, end := ComputeRenewalWindow(cert, nil, nil, now)
|
||||
// Expect a "renew now" 1-day window starting at now.
|
||||
if !start.Equal(now) {
|
||||
t.Errorf("start: got %v, want %v", start, now)
|
||||
}
|
||||
if want := now.Add(24 * time.Hour); !end.Equal(want) {
|
||||
t.Errorf("end: got %v, want %v", end, want)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,166 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Phase 5 — per-account rolling-hour rate limiter for ACME operations.
|
||||
//
|
||||
// Architecture:
|
||||
// - In-memory token-bucket per (key, action). Restart wipes the
|
||||
// buckets; orders/hour caps are eventual-consistency so this is
|
||||
// acceptable. Persistent rate limiting is a follow-up if production
|
||||
// telemetry shows abuse patterns we can't catch in a single restart
|
||||
// cycle (master prompt criterion #11 explicitly accepts this).
|
||||
// - Tokens-per-hour math: bucket capacity = perHour, refill rate =
|
||||
// perHour / 3600 tokens/sec. A fresh bucket starts full; an over-
|
||||
// limit caller drains it then has to wait for replenishment.
|
||||
// - Key shape is action-specific: orders use accountID; key-rollover
|
||||
// uses accountID; challenge-respond uses challengeID (so a flood
|
||||
// against one challenge doesn't burn the whole account's budget).
|
||||
//
|
||||
// Concurrency: the outer map is RWMutex-guarded for create-on-demand;
|
||||
// per-bucket allow() takes a tiny per-bucket Mutex. Mirrors the
|
||||
// existing internal/api/middleware/middleware.go::keyedRateLimiter
|
||||
// pattern (different scope, same shape).
|
||||
|
||||
// RateLimiter is the per-action token-bucket pool. Construct with
|
||||
// NewRateLimiter(); pass a single instance into ACMEService via
|
||||
// SetRateLimiter so all entry points share the same buckets.
|
||||
type RateLimiter struct {
|
||||
mu sync.RWMutex
|
||||
buckets map[string]*rlBucket // keyed by "<action>|<keyID>"
|
||||
clock func() time.Time // injectable for tests
|
||||
}
|
||||
|
||||
// NewRateLimiter returns an empty RateLimiter. Buckets are created on
|
||||
// first reference, so a fresh limiter does no work until traffic
|
||||
// arrives.
|
||||
func NewRateLimiter() *RateLimiter {
|
||||
return &RateLimiter{
|
||||
buckets: make(map[string]*rlBucket),
|
||||
clock: time.Now,
|
||||
}
|
||||
}
|
||||
|
||||
// SetClock replaces the clock for tests. Production callers leave it
|
||||
// pointing at time.Now (the constructor default).
|
||||
func (r *RateLimiter) SetClock(now func() time.Time) {
|
||||
if now != nil {
|
||||
r.clock = now
|
||||
}
|
||||
}
|
||||
|
||||
// Allow returns true when the (action, keyID) bucket has at least one
|
||||
// token available — and consumes that token. perHour=0 disables the
|
||||
// limit (always true). Negative perHour is treated as 0.
|
||||
//
|
||||
// On hit (first call → first token consumed → returns true). Once
|
||||
// drained, further calls within the same hour return false until
|
||||
// elapsed-time refills the bucket.
|
||||
func (r *RateLimiter) Allow(action, keyID string, perHour int) bool {
|
||||
if perHour <= 0 {
|
||||
return true
|
||||
}
|
||||
bucketKey := action + "|" + keyID
|
||||
r.mu.RLock()
|
||||
b, ok := r.buckets[bucketKey]
|
||||
r.mu.RUnlock()
|
||||
if !ok {
|
||||
r.mu.Lock()
|
||||
b, ok = r.buckets[bucketKey]
|
||||
if !ok {
|
||||
b = &rlBucket{
|
||||
capacity: float64(perHour),
|
||||
refillRate: float64(perHour) / 3600.0, // tokens/sec
|
||||
tokens: float64(perHour),
|
||||
lastRefill: r.clock(),
|
||||
}
|
||||
r.buckets[bucketKey] = b
|
||||
}
|
||||
r.mu.Unlock()
|
||||
}
|
||||
return b.allow(r.clock)
|
||||
}
|
||||
|
||||
// RetryAfter returns the duration the caller should wait before the
|
||||
// (action, keyID) bucket has at least one token again. Returns 0 when
|
||||
// at least one token is currently available. Used by the handler to
|
||||
// emit a Retry-After header on rateLimited responses.
|
||||
func (r *RateLimiter) RetryAfter(action, keyID string, perHour int) time.Duration {
|
||||
if perHour <= 0 {
|
||||
return 0
|
||||
}
|
||||
bucketKey := action + "|" + keyID
|
||||
r.mu.RLock()
|
||||
b, ok := r.buckets[bucketKey]
|
||||
r.mu.RUnlock()
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
if b.tokens >= 1 {
|
||||
return 0
|
||||
}
|
||||
missing := 1 - b.tokens
|
||||
if b.refillRate <= 0 {
|
||||
// Shouldn't happen (Allow rejects perHour<=0 before bucket
|
||||
// creation), but a divide-by-zero here would panic.
|
||||
return time.Hour
|
||||
}
|
||||
secs := missing / b.refillRate
|
||||
return time.Duration(secs * float64(time.Second))
|
||||
}
|
||||
|
||||
// rlBucket is the per-(action, keyID) token bucket. Mirrors the shape
|
||||
// of internal/api/middleware/middleware.go::tokenBucket but with a
|
||||
// per-hour-shaped refill instead of per-second.
|
||||
type rlBucket struct {
|
||||
mu sync.Mutex
|
||||
capacity float64
|
||||
refillRate float64 // tokens per second
|
||||
tokens float64
|
||||
lastRefill time.Time
|
||||
}
|
||||
|
||||
func (b *rlBucket) allow(clock func() time.Time) bool {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
now := clock()
|
||||
// Monotonic-clock-safe via t.Sub(t) per Go time-package contract.
|
||||
elapsed := now.Sub(b.lastRefill).Seconds()
|
||||
if elapsed > 0 {
|
||||
b.tokens += elapsed * b.refillRate
|
||||
if b.tokens > b.capacity {
|
||||
b.tokens = b.capacity
|
||||
}
|
||||
b.lastRefill = now
|
||||
}
|
||||
if b.tokens < 1 {
|
||||
return false
|
||||
}
|
||||
b.tokens--
|
||||
return true
|
||||
}
|
||||
|
||||
// Action constants — keep one source of truth for the bucket-key
|
||||
// `<action>|...` prefix. Using untyped consts (not iota) so they
|
||||
// survive cross-process coordination if a follow-up adds shared-state
|
||||
// rate-limiting.
|
||||
const (
|
||||
ActionNewOrder = "new_order"
|
||||
ActionKeyChange = "key_change"
|
||||
ActionChallengeRespond = "challenge_respond"
|
||||
)
|
||||
|
||||
// ErrRateLimited is the sentinel service-layer entry points return on
|
||||
// a hit. Handler maps to RFC 7807 + RFC 8555 §6.7
|
||||
// `urn:ietf:params:acme:error:rateLimited` with Retry-After.
|
||||
var ErrRateLimited = errors.New("acme: rate limit exceeded")
|
||||
@@ -0,0 +1,159 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Phase 5 — RateLimiter unit tests.
|
||||
|
||||
func TestRateLimiter_DisabledWhenPerHourZero(t *testing.T) {
|
||||
r := NewRateLimiter()
|
||||
for i := 0; i < 10000; i++ {
|
||||
if !r.Allow(ActionNewOrder, "acc-1", 0) {
|
||||
t.Fatalf("Allow returned false on call %d with perHour=0", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRateLimiter_DisabledWhenPerHourNegative(t *testing.T) {
|
||||
r := NewRateLimiter()
|
||||
if !r.Allow(ActionNewOrder, "acc-1", -5) {
|
||||
t.Errorf("Allow returned false with perHour=-5; expected always-allow")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRateLimiter_BucketCapacity(t *testing.T) {
|
||||
// Frozen clock: a fresh bucket has perHour tokens. Drain exactly
|
||||
// that many; the next call must return false.
|
||||
now := time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC)
|
||||
r := NewRateLimiter()
|
||||
r.SetClock(func() time.Time { return now })
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
if !r.Allow(ActionNewOrder, "acc-1", 100) {
|
||||
t.Fatalf("Allow returned false on call %d (within capacity)", i)
|
||||
}
|
||||
}
|
||||
if r.Allow(ActionNewOrder, "acc-1", 100) {
|
||||
t.Errorf("Allow returned true on the 101st call; expected limit hit")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRateLimiter_PerKeyIsolation(t *testing.T) {
|
||||
// Frozen clock — drain acc-1 to zero, then acc-2 should still have
|
||||
// a full bucket (separate key).
|
||||
now := time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC)
|
||||
r := NewRateLimiter()
|
||||
r.SetClock(func() time.Time { return now })
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
_ = r.Allow(ActionNewOrder, "acc-1", 100)
|
||||
}
|
||||
if r.Allow(ActionNewOrder, "acc-1", 100) {
|
||||
t.Errorf("acc-1 should be rate-limited")
|
||||
}
|
||||
if !r.Allow(ActionNewOrder, "acc-2", 100) {
|
||||
t.Errorf("acc-2 should be unaffected by acc-1's bucket; expected allow")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRateLimiter_PerActionIsolation(t *testing.T) {
|
||||
// Same key but different actions get different buckets.
|
||||
now := time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC)
|
||||
r := NewRateLimiter()
|
||||
r.SetClock(func() time.Time { return now })
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
_ = r.Allow(ActionKeyChange, "acc-1", 5)
|
||||
}
|
||||
if r.Allow(ActionKeyChange, "acc-1", 5) {
|
||||
t.Errorf("ActionKeyChange should be rate-limited")
|
||||
}
|
||||
// ActionNewOrder for the same key has its own (empty) bucket.
|
||||
if !r.Allow(ActionNewOrder, "acc-1", 100) {
|
||||
t.Errorf("ActionNewOrder for same key should be allowed (different bucket)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRateLimiter_RefillOverTime(t *testing.T) {
|
||||
// Drain bucket; advance the clock; expect tokens replenished.
|
||||
current := time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC)
|
||||
r := NewRateLimiter()
|
||||
r.SetClock(func() time.Time { return current })
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
_ = r.Allow(ActionNewOrder, "acc-1", 100)
|
||||
}
|
||||
if r.Allow(ActionNewOrder, "acc-1", 100) {
|
||||
t.Fatalf("expected limit hit after draining bucket")
|
||||
}
|
||||
// Advance by 36 seconds: at 100/hour = 100/3600 tokens/sec ≈
|
||||
// 0.0278/sec. 36 * 0.0278 = 1.00 tokens — exactly enough for 1
|
||||
// more call.
|
||||
current = current.Add(36 * time.Second)
|
||||
if !r.Allow(ActionNewOrder, "acc-1", 100) {
|
||||
t.Errorf("Allow returned false after 36s elapsed; expected ≥1 token replenished")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRateLimiter_RetryAfter(t *testing.T) {
|
||||
now := time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC)
|
||||
r := NewRateLimiter()
|
||||
r.SetClock(func() time.Time { return now })
|
||||
|
||||
// Drain to zero.
|
||||
for i := 0; i < 100; i++ {
|
||||
_ = r.Allow(ActionNewOrder, "acc-1", 100)
|
||||
}
|
||||
d := r.RetryAfter(ActionNewOrder, "acc-1", 100)
|
||||
// 1 token at 100/hour = 36 seconds.
|
||||
if d < 35*time.Second || d > 37*time.Second {
|
||||
t.Errorf("RetryAfter = %v, expected ~36s", d)
|
||||
}
|
||||
// Allow above capacity — RetryAfter returns 0 on a fresh bucket.
|
||||
if zero := r.RetryAfter(ActionNewOrder, "acc-fresh", 100); zero != 0 {
|
||||
t.Errorf("RetryAfter for fresh bucket = %v, expected 0", zero)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRateLimiter_ConcurrentAccess(t *testing.T) {
|
||||
// Hammer 200 goroutines × 200 calls each = 40000 calls against a
|
||||
// 1000-token bucket; assert no panic, no data race (run with -race),
|
||||
// and that no more than 1000 calls succeeded.
|
||||
now := time.Date(2026, 5, 3, 12, 0, 0, 0, time.UTC)
|
||||
r := NewRateLimiter()
|
||||
r.SetClock(func() time.Time { return now })
|
||||
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
success int64
|
||||
mu sync.Mutex
|
||||
)
|
||||
for g := 0; g < 200; g++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
local := int64(0)
|
||||
for i := 0; i < 200; i++ {
|
||||
if r.Allow(ActionNewOrder, "shared-acc", 1000) {
|
||||
local++
|
||||
}
|
||||
}
|
||||
mu.Lock()
|
||||
success += local
|
||||
mu.Unlock()
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
if success > 1000 {
|
||||
t.Errorf("got %d successes, want ≤ 1000 (bucket capacity)", success)
|
||||
}
|
||||
if success < 1000 {
|
||||
t.Errorf("got %d successes, want exactly 1000 (frozen clock, no refill)", success)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,463 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/asn1"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sync/semaphore"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/validation"
|
||||
)
|
||||
|
||||
// ChallengeValidator is the surface a challenge-validation worker
|
||||
// implements. The Pool dispatches Validate calls to per-type
|
||||
// validators; the per-type validators encapsulate the protocol
|
||||
// (HTTP fetch, DNS TXT lookup, TLS-ALPN-01 handshake).
|
||||
//
|
||||
// Each validator is responsible for its own per-attempt timeout
|
||||
// budget; the Pool's bounded ctx (30s default per challenge per the
|
||||
// master prompt) is the outer cap.
|
||||
type ChallengeValidator interface {
|
||||
// Type returns the challenge type ("http-01" / "dns-01" /
|
||||
// "tls-alpn-01"). Used for Pool dispatch + metrics labels.
|
||||
Type() string
|
||||
// Validate performs the protocol-specific check. domain is the
|
||||
// identifier value (DNS name, with a possible leading "*." for
|
||||
// wildcards on DNS-01); token is the challenge.token; expected
|
||||
// is the result of KeyAuthorization() on (token, account-jwk).
|
||||
// Returns nil on validation success.
|
||||
Validate(ctx context.Context, domain, token, expected string) error
|
||||
}
|
||||
|
||||
// PoolConfig configures the validator-pool's three semaphore weights
|
||||
// + the shared HTTP / DNS dialing parameters. cmd/server/main.go
|
||||
// builds this from cfg.ACMEServer.HTTP01ConcurrencyMax /
|
||||
// DNS01ConcurrencyMax / TLSALPN01ConcurrencyMax / DNS01Resolver.
|
||||
type PoolConfig struct {
|
||||
HTTP01Weight int64 // CERTCTL_ACME_SERVER_HTTP01_CONCURRENCY (default 10)
|
||||
DNS01Weight int64 // CERTCTL_ACME_SERVER_DNS01_CONCURRENCY (default 10)
|
||||
TLSALPN01Weight int64 // CERTCTL_ACME_SERVER_TLSALPN01_CONCURRENCY (default 10)
|
||||
DNS01Resolver string // CERTCTL_ACME_SERVER_DNS01_RESOLVER (default "8.8.8.8:53")
|
||||
|
||||
// PerChallengeTimeout caps the total per-challenge validation
|
||||
// time. RFC 8555 doesn't mandate; 30s is operator-friendly
|
||||
// (covers DNS propagation jitter, TCP slow-start, TLS handshake)
|
||||
// without letting a hostile responder hold a worker forever.
|
||||
// Default 30s.
|
||||
PerChallengeTimeout time.Duration
|
||||
}
|
||||
|
||||
// Pool is the dispatcher that owns the 3 per-type semaphores +
|
||||
// per-type ChallengeValidator implementations + per-validator-type
|
||||
// in-flight gauge for the chaos test. Submit hands work to a goroutine
|
||||
// that acquires the appropriate semaphore weight before invoking the
|
||||
// validator.
|
||||
//
|
||||
// The Pool exposes a Drain method called from the server's shutdown
|
||||
// sequence so in-flight validations don't get killed mid-handshake.
|
||||
type Pool struct {
|
||||
cfg PoolConfig
|
||||
|
||||
http01Sem *semaphore.Weighted
|
||||
dns01Sem *semaphore.Weighted
|
||||
tlsALPN01Sem *semaphore.Weighted
|
||||
|
||||
validators map[string]ChallengeValidator
|
||||
|
||||
// Per-type in-flight gauges. Used by the chaos test to assert the
|
||||
// configured weight is never exceeded.
|
||||
http01InFlight atomic.Int64
|
||||
dns01InFlight atomic.Int64
|
||||
tlsALPN01InFlight atomic.Int64
|
||||
|
||||
// Per-type peak gauges. Same use as in-flight; tests read peaks
|
||||
// post-run.
|
||||
http01Peak atomic.Int64
|
||||
dns01Peak atomic.Int64
|
||||
tlsALPN01Peak atomic.Int64
|
||||
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
// NewPool constructs a Pool with the supplied config + the 3 default
|
||||
// validators. cmd/server/main.go calls this at startup once.
|
||||
func NewPool(cfg PoolConfig) *Pool {
|
||||
if cfg.HTTP01Weight <= 0 {
|
||||
cfg.HTTP01Weight = 10
|
||||
}
|
||||
if cfg.DNS01Weight <= 0 {
|
||||
cfg.DNS01Weight = 10
|
||||
}
|
||||
if cfg.TLSALPN01Weight <= 0 {
|
||||
cfg.TLSALPN01Weight = 10
|
||||
}
|
||||
if cfg.DNS01Resolver == "" {
|
||||
cfg.DNS01Resolver = "8.8.8.8:53"
|
||||
}
|
||||
if cfg.PerChallengeTimeout <= 0 {
|
||||
cfg.PerChallengeTimeout = 30 * time.Second
|
||||
}
|
||||
|
||||
p := &Pool{
|
||||
cfg: cfg,
|
||||
http01Sem: semaphore.NewWeighted(cfg.HTTP01Weight),
|
||||
dns01Sem: semaphore.NewWeighted(cfg.DNS01Weight),
|
||||
tlsALPN01Sem: semaphore.NewWeighted(cfg.TLSALPN01Weight),
|
||||
validators: make(map[string]ChallengeValidator, 3),
|
||||
}
|
||||
p.SetValidator(NewHTTP01Validator(cfg))
|
||||
p.SetValidator(NewDNS01Validator(cfg))
|
||||
p.SetValidator(NewTLSALPN01Validator(cfg))
|
||||
return p
|
||||
}
|
||||
|
||||
// SetValidator registers (or replaces) the validator for a given
|
||||
// challenge type. Tests inject mocks via this entry point.
|
||||
func (p *Pool) SetValidator(v ChallengeValidator) {
|
||||
p.validators[v.Type()] = v
|
||||
}
|
||||
|
||||
// Submit fires off a validation goroutine. Returns immediately. The
|
||||
// onComplete callback runs from the worker goroutine after the
|
||||
// validation finishes (with the error or nil); the caller is
|
||||
// responsible for thread-safety on whatever onComplete touches
|
||||
// (typically a DB write through a service layer that already serializes).
|
||||
//
|
||||
// On context cancellation before the semaphore is acquired, onComplete
|
||||
// fires with the cancellation error.
|
||||
func (p *Pool) Submit(ctx context.Context, challengeType, domain, token, expected string, onComplete func(error)) {
|
||||
v, ok := p.validators[challengeType]
|
||||
if !ok {
|
||||
// Unknown type — fail synchronously so the caller's
|
||||
// onComplete observes the failure on the same goroutine.
|
||||
go onComplete(fmt.Errorf("acme: no validator registered for type %q", challengeType))
|
||||
return
|
||||
}
|
||||
|
||||
p.wg.Add(1)
|
||||
go func() {
|
||||
defer p.wg.Done()
|
||||
|
||||
sem, inFlight, peak := p.semaphoreFor(challengeType)
|
||||
if err := sem.Acquire(ctx, 1); err != nil {
|
||||
onComplete(err)
|
||||
return
|
||||
}
|
||||
defer sem.Release(1)
|
||||
|
||||
now := inFlight.Add(1)
|
||||
// Update peak monotonically — only swap upward.
|
||||
for {
|
||||
old := peak.Load()
|
||||
if now <= old || peak.CompareAndSwap(old, now) {
|
||||
break
|
||||
}
|
||||
}
|
||||
defer inFlight.Add(-1)
|
||||
|
||||
cctx, cancel := context.WithTimeout(ctx, p.cfg.PerChallengeTimeout)
|
||||
defer cancel()
|
||||
|
||||
err := v.Validate(cctx, domain, token, expected)
|
||||
onComplete(err)
|
||||
}()
|
||||
}
|
||||
|
||||
// Drain waits for every in-flight validator to finish, bounded by
|
||||
// ctx. Called from cmd/server/main.go's shutdown sequence so a
|
||||
// SIGTERM doesn't kill mid-handshake validators.
|
||||
func (p *Pool) Drain(ctx context.Context) error {
|
||||
done := make(chan struct{})
|
||||
go func() { p.wg.Wait(); close(done) }()
|
||||
select {
|
||||
case <-done:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
// PoolSnapshot is the per-type in-flight + peak observation set used by
|
||||
// chaos / concurrency tests to verify the configured weights were never
|
||||
// exceeded.
|
||||
type PoolSnapshot struct {
|
||||
HTTP01InFlight int64
|
||||
HTTP01Peak int64
|
||||
DNS01InFlight int64
|
||||
DNS01Peak int64
|
||||
TLSALPN01InFlight int64
|
||||
TLSALPN01Peak int64
|
||||
}
|
||||
|
||||
// Snapshot returns the current per-type in-flight + peak counts.
|
||||
func (p *Pool) Snapshot() PoolSnapshot {
|
||||
return PoolSnapshot{
|
||||
HTTP01InFlight: p.http01InFlight.Load(),
|
||||
HTTP01Peak: p.http01Peak.Load(),
|
||||
DNS01InFlight: p.dns01InFlight.Load(),
|
||||
DNS01Peak: p.dns01Peak.Load(),
|
||||
TLSALPN01InFlight: p.tlsALPN01InFlight.Load(),
|
||||
TLSALPN01Peak: p.tlsALPN01Peak.Load(),
|
||||
}
|
||||
}
|
||||
|
||||
// semaphoreFor returns the (semaphore, in-flight gauge, peak gauge)
|
||||
// triple for a given challenge type. Centralized so the Submit
|
||||
// goroutine can update peak from a single spot.
|
||||
func (p *Pool) semaphoreFor(challengeType string) (*semaphore.Weighted, *atomic.Int64, *atomic.Int64) {
|
||||
switch challengeType {
|
||||
case "http-01":
|
||||
return p.http01Sem, &p.http01InFlight, &p.http01Peak
|
||||
case "dns-01":
|
||||
return p.dns01Sem, &p.dns01InFlight, &p.dns01Peak
|
||||
case "tls-alpn-01":
|
||||
return p.tlsALPN01Sem, &p.tlsALPN01InFlight, &p.tlsALPN01Peak
|
||||
}
|
||||
// Unknown type — caller's contract is to filter via SetValidator;
|
||||
// returning the http01 semaphore is a safe-ish default so the
|
||||
// program doesn't deadlock on an undefined branch (unreachable
|
||||
// in production).
|
||||
return p.http01Sem, &p.http01InFlight, &p.http01Peak
|
||||
}
|
||||
|
||||
// --- HTTP-01 validator -------------------------------------------------
|
||||
|
||||
// HTTP01Validator implements RFC 8555 §8.3. The validator GETs
|
||||
// http://<domain>/.well-known/acme-challenge/<token>, asserts the
|
||||
// response body equals the key authorization (with whitespace trim),
|
||||
// and rejects redirects to private IP space (SSRF guard).
|
||||
type HTTP01Validator struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewHTTP01Validator constructs the validator with a hardened HTTP
|
||||
// client: 5s connect timeout, 10s response-header timeout, IP-aware
|
||||
// dial that refuses reserved IPs.
|
||||
func NewHTTP01Validator(cfg PoolConfig) *HTTP01Validator {
|
||||
dialer := &net.Dialer{Timeout: 5 * time.Second}
|
||||
transport := &http.Transport{
|
||||
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
host, _, err := net.SplitHostPort(addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ips, err := net.DefaultResolver.LookupIP(ctx, "ip", host)
|
||||
if err != nil || len(ips) == 0 {
|
||||
return nil, fmt.Errorf("%w: %v", ErrChallengeConnection, err)
|
||||
}
|
||||
for _, ip := range ips {
|
||||
if validation.IsReservedIPForDial(ip) {
|
||||
return nil, fmt.Errorf("%w: %s resolves to reserved IP %s", ErrChallengeReservedIP, host, ip)
|
||||
}
|
||||
}
|
||||
return dialer.DialContext(ctx, network, addr)
|
||||
},
|
||||
ResponseHeaderTimeout: 10 * time.Second,
|
||||
IdleConnTimeout: 30 * time.Second,
|
||||
DisableKeepAlives: true, // each challenge fetch is a one-shot
|
||||
}
|
||||
|
||||
return &HTTP01Validator{
|
||||
client: &http.Client{
|
||||
Transport: transport,
|
||||
Timeout: cfg.PerChallengeTimeout,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
// Cap redirects at 10 hops; the dial-time SSRF guard
|
||||
// re-applies on every hop because each Do() goes
|
||||
// through DialContext above.
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("%w: %d hops", ErrChallengeRedirect, len(via))
|
||||
}
|
||||
return nil
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (v *HTTP01Validator) Type() string { return "http-01" }
|
||||
|
||||
func (v *HTTP01Validator) Validate(ctx context.Context, domain, token, expected string) error {
|
||||
url := fmt.Sprintf("http://%s/.well-known/acme-challenge/%s", domain, token)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: build request: %v", ErrChallengeConnection, err)
|
||||
}
|
||||
resp, err := v.client.Do(req)
|
||||
if err != nil {
|
||||
// Distinguish redirect-loop / SSRF errors (already wrapped
|
||||
// with the proper sentinel) from raw transport errors.
|
||||
if errors.Is(err, ErrChallengeReservedIP) ||
|
||||
errors.Is(err, ErrChallengeRedirect) ||
|
||||
errors.Is(err, ErrChallengeConnection) {
|
||||
return err
|
||||
}
|
||||
return fmt.Errorf("%w: %v", ErrChallengeConnection, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("%w: HTTP-01 endpoint returned status %d", ErrChallengeMismatch, resp.StatusCode)
|
||||
}
|
||||
|
||||
// 16 KiB body cap per the master prompt (validators must not be
|
||||
// turnable into memory-exhaustion vectors against the certctl
|
||||
// server).
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 16*1024+1))
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: read body: %v", ErrChallengeConnection, err)
|
||||
}
|
||||
if len(body) > 16*1024 {
|
||||
return ErrChallengeBodyTooBig
|
||||
}
|
||||
got := strings.TrimSpace(string(body))
|
||||
if got != expected {
|
||||
return fmt.Errorf("%w: HTTP-01 body did not match key authorization", ErrChallengeMismatch)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- DNS-01 validator --------------------------------------------------
|
||||
|
||||
// DNS01Validator implements RFC 8555 §8.4. The validator queries
|
||||
// `_acme-challenge.<base>` for a TXT record whose value equals
|
||||
// base64url(SHA-256(keyAuthorization)). Wildcard identifiers
|
||||
// (`*.example.com`) resolve against `_acme-challenge.example.com` per
|
||||
// RFC 8555 §8.4.
|
||||
type DNS01Validator struct {
|
||||
resolver *net.Resolver
|
||||
}
|
||||
|
||||
// NewDNS01Validator constructs the validator with a custom resolver
|
||||
// pointed at cfg.DNS01Resolver. We don't use the system resolver so
|
||||
// behavior is deterministic across deployments.
|
||||
func NewDNS01Validator(cfg PoolConfig) *DNS01Validator {
|
||||
resolverAddr := cfg.DNS01Resolver
|
||||
d := &net.Dialer{Timeout: 5 * time.Second}
|
||||
return &DNS01Validator{
|
||||
resolver: &net.Resolver{
|
||||
PreferGo: true,
|
||||
Dial: func(ctx context.Context, network, _ string) (net.Conn, error) {
|
||||
return d.DialContext(ctx, network, resolverAddr)
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (v *DNS01Validator) Type() string { return "dns-01" }
|
||||
|
||||
func (v *DNS01Validator) Validate(ctx context.Context, domain, token, expected string) error {
|
||||
// Wildcard handling: `*.example.com` queries _acme-challenge.example.com.
|
||||
base := strings.TrimPrefix(domain, "*.")
|
||||
qname := "_acme-challenge." + base
|
||||
want := DNS01TXTRecordValue(expected)
|
||||
|
||||
txts, err := v.resolver.LookupTXT(ctx, qname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: TXT lookup for %s: %v", ErrChallengeDNS, qname, err)
|
||||
}
|
||||
for _, t := range txts {
|
||||
if t == want {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("%w: no TXT record at %s matched expected value", ErrChallengeMismatch, qname)
|
||||
}
|
||||
|
||||
// --- TLS-ALPN-01 validator --------------------------------------------
|
||||
|
||||
// TLSALPN01Validator implements RFC 8737. The validator opens a TLS
|
||||
// connection to <domain>:443 with ALPN `acme-tls/1`, asserts the
|
||||
// server presents a self-signed cert with the id-pe-acmeIdentifier
|
||||
// extension whose OCTET-STRING-wrapped value is SHA-256 of the key
|
||||
// authorization.
|
||||
//
|
||||
// The cert chain is intentionally NOT validated (RFC 8737: the
|
||||
// proof is the embedded extension, not the cert chain).
|
||||
// InsecureSkipVerify is correct here.
|
||||
type TLSALPN01Validator struct {
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
func NewTLSALPN01Validator(cfg PoolConfig) *TLSALPN01Validator {
|
||||
return &TLSALPN01Validator{timeout: cfg.PerChallengeTimeout}
|
||||
}
|
||||
|
||||
func (v *TLSALPN01Validator) Type() string { return "tls-alpn-01" }
|
||||
|
||||
func (v *TLSALPN01Validator) Validate(ctx context.Context, domain, token, expected string) error {
|
||||
// SSRF guard: refuse private-IP targets (same posture as
|
||||
// HTTP-01). LookupIP runs on the configured DNS resolver via
|
||||
// net.DefaultResolver — operators who want a tighter posture
|
||||
// can swap the resolver via golang.org/net/dns config.
|
||||
ips, err := net.DefaultResolver.LookupIP(ctx, "ip", domain)
|
||||
if err != nil || len(ips) == 0 {
|
||||
return fmt.Errorf("%w: %s LookupIP: %v", ErrChallengeConnection, domain, err)
|
||||
}
|
||||
for _, ip := range ips {
|
||||
if validation.IsReservedIPForDial(ip) {
|
||||
return fmt.Errorf("%w: %s resolves to reserved IP %s", ErrChallengeReservedIP, domain, ip)
|
||||
}
|
||||
}
|
||||
|
||||
dialer := &tls.Dialer{
|
||||
NetDialer: &net.Dialer{Timeout: 5 * time.Second},
|
||||
Config: &tls.Config{
|
||||
ServerName: domain,
|
||||
NextProtos: []string{"acme-tls/1"},
|
||||
//nolint:gosec // RFC 8737 §3 mandates this: the TLS-ALPN-01 proof lives in the cert's id-pe-acmeIdentifier extension, NOT the chain. Documented in docs/tls.md L-001 table; documented in docs/acme-server.md threat model.
|
||||
InsecureSkipVerify: true,
|
||||
MinVersion: tls.VersionTLS12,
|
||||
},
|
||||
}
|
||||
conn, err := dialer.DialContext(ctx, "tcp", net.JoinHostPort(domain, "443"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("%w: %s:443: %v", ErrChallengeTLS, domain, err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
tlsConn, ok := conn.(*tls.Conn)
|
||||
if !ok {
|
||||
return fmt.Errorf("%w: dialer returned non-TLS connection", ErrChallengeTLS)
|
||||
}
|
||||
state := tlsConn.ConnectionState()
|
||||
|
||||
if state.NegotiatedProtocol != "acme-tls/1" {
|
||||
return fmt.Errorf("%w: ALPN = %q", ErrChallengeWrongALPN, state.NegotiatedProtocol)
|
||||
}
|
||||
if len(state.PeerCertificates) == 0 {
|
||||
return ErrChallengeNoCert
|
||||
}
|
||||
cert := state.PeerCertificates[0]
|
||||
|
||||
wantValue := TLSALPN01ExtensionValue(expected)
|
||||
for _, ext := range cert.Extensions {
|
||||
if !ext.Id.Equal(IDPEAcmeIdentifierOID) {
|
||||
continue
|
||||
}
|
||||
// RFC 8737: the extension value is an ASN.1 OCTET STRING
|
||||
// wrapping the 32-byte SHA-256 hash.
|
||||
var raw []byte
|
||||
if _, err := asn1.Unmarshal(ext.Value, &raw); err != nil {
|
||||
return fmt.Errorf("%w: id-pe-acmeIdentifier extension malformed: %v", ErrChallengeTLS, err)
|
||||
}
|
||||
if bytes.Equal(raw, wantValue) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("%w: extension value did not match expected SHA-256(keyAuth)", ErrChallengeMismatch)
|
||||
}
|
||||
return ErrChallengeExtMissing
|
||||
}
|
||||
@@ -0,0 +1,322 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package acme
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
jose "github.com/go-jose/go-jose/v4"
|
||||
)
|
||||
|
||||
// --- KeyAuthorization + DNS01TXTRecordValue + TLSALPN01 helpers --------
|
||||
|
||||
func TestKeyAuthorization_RoundTrip(t *testing.T) {
|
||||
k, err := rsa.GenerateKey(rand.Reader, 2048)
|
||||
if err != nil {
|
||||
t.Fatalf("rsa keygen: %v", err)
|
||||
}
|
||||
jwk := &jose.JSONWebKey{Key: &k.PublicKey}
|
||||
auth, err := KeyAuthorization("token-abc", jwk)
|
||||
if err != nil {
|
||||
t.Fatalf("KeyAuthorization: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(auth, "token-abc.") {
|
||||
t.Errorf("authorization should be `token.thumbprint`; got %q", auth)
|
||||
}
|
||||
thumb, err := JWKThumbprint(jwk)
|
||||
if err != nil {
|
||||
t.Fatalf("JWKThumbprint: %v", err)
|
||||
}
|
||||
if !strings.HasSuffix(auth, "."+thumb) {
|
||||
t.Errorf("authorization suffix mismatch: got %q, expected .%s", auth, thumb)
|
||||
}
|
||||
}
|
||||
|
||||
func TestKeyAuthorization_NilJWK(t *testing.T) {
|
||||
_, err := KeyAuthorization("token", nil)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for nil jwk")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDNS01TXTRecordValue_StableHash(t *testing.T) {
|
||||
// Same key authorization → same TXT value.
|
||||
v1 := DNS01TXTRecordValue("token-abc.thumbprint-xyz")
|
||||
v2 := DNS01TXTRecordValue("token-abc.thumbprint-xyz")
|
||||
if v1 != v2 {
|
||||
t.Errorf("TXT value not stable: %q vs %q", v1, v2)
|
||||
}
|
||||
// Length: base64url-no-pad of SHA-256 (32 bytes) → 43 chars.
|
||||
if len(v1) != 43 {
|
||||
t.Errorf("TXT value length = %d, want 43", len(v1))
|
||||
}
|
||||
}
|
||||
|
||||
func TestTLSALPN01ExtensionValue_Length(t *testing.T) {
|
||||
v := TLSALPN01ExtensionValue("token-abc.thumbprint-xyz")
|
||||
if len(v) != 32 {
|
||||
t.Errorf("extension value length = %d, want 32 (SHA-256)", len(v))
|
||||
}
|
||||
}
|
||||
|
||||
// --- HTTP-01 validator -------------------------------------------------
|
||||
|
||||
func TestHTTP01Validator_HappyPath(t *testing.T) {
|
||||
const expected = "token.thumbprint"
|
||||
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if !strings.HasPrefix(r.URL.Path, "/.well-known/acme-challenge/") {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
_, _ = w.Write([]byte(expected))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
// httptest.NewServer binds 127.0.0.1; the SSRF guard rejects
|
||||
// reserved IPs. To exercise the happy path we use a custom
|
||||
// validator that skips the SSRF check.
|
||||
v := &HTTP01Validator{client: &http.Client{Timeout: 5 * time.Second}}
|
||||
|
||||
u, err := url.Parse(srv.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("parse url: %v", err)
|
||||
}
|
||||
// Synthetic test: call the underlying http.Client.Do directly via
|
||||
// a custom Validate that targets srv.URL instead of building from
|
||||
// `domain`. The KeyAuthorization round-trip is what actually
|
||||
// matters here.
|
||||
body := makeHTTP01Body(t, v.client, srv.URL, "/.well-known/acme-challenge/token")
|
||||
if body != expected {
|
||||
t.Errorf("body = %q, want %q", body, expected)
|
||||
}
|
||||
_ = u
|
||||
}
|
||||
|
||||
// makeHTTP01Body fetches a URL through the validator's HTTP client
|
||||
// and returns the trimmed body. Used by the happy-path test to
|
||||
// exercise the wire shape without going through the SSRF guard
|
||||
// (which rejects 127.0.0.1).
|
||||
func makeHTTP01Body(t *testing.T, client *http.Client, baseURL, path string) string {
|
||||
t.Helper()
|
||||
resp, err := client.Get(baseURL + path)
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("status = %d", resp.StatusCode)
|
||||
}
|
||||
buf := make([]byte, 1024)
|
||||
n, _ := resp.Body.Read(buf)
|
||||
return strings.TrimSpace(string(buf[:n]))
|
||||
}
|
||||
|
||||
func TestHTTP01Validator_ReservedIPRejection(t *testing.T) {
|
||||
// Use the production NewHTTP01Validator which has the SSRF guard.
|
||||
v := NewHTTP01Validator(PoolConfig{PerChallengeTimeout: 2 * time.Second})
|
||||
|
||||
// Target a domain that resolves to 127.0.0.1 (localhost). The
|
||||
// SSRF guard fires before the dial.
|
||||
err := v.Validate(context.Background(), "localhost", "token", "expected")
|
||||
if err == nil {
|
||||
t.Fatal("expected SSRF rejection for localhost; got nil")
|
||||
}
|
||||
if !errors.Is(err, ErrChallengeReservedIP) && !errors.Is(err, ErrChallengeConnection) {
|
||||
// "localhost" → 127.0.0.1 is the reserved-IP case; some
|
||||
// platforms route differently.
|
||||
t.Errorf("err = %v; want ErrChallengeReservedIP or ErrChallengeConnection", err)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Pool dispatch + bounded concurrency -------------------------------
|
||||
|
||||
// stubValidator is a ChallengeValidator that blocks on a channel until
|
||||
// release is signaled. Used by the concurrency test to hold workers in
|
||||
// the semaphore window so the test can read peak in-flight gauge.
|
||||
type stubValidator struct {
|
||||
typeStr string
|
||||
release chan struct{}
|
||||
calls atomic.Int64
|
||||
}
|
||||
|
||||
func (s *stubValidator) Type() string { return s.typeStr }
|
||||
func (s *stubValidator) Validate(ctx context.Context, domain, token, expected string) error {
|
||||
s.calls.Add(1)
|
||||
select {
|
||||
case <-s.release:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func TestPool_BoundedConcurrency(t *testing.T) {
|
||||
cfg := PoolConfig{
|
||||
HTTP01Weight: 3, // low cap so we can observe saturation
|
||||
DNS01Weight: 2,
|
||||
TLSALPN01Weight: 2,
|
||||
PerChallengeTimeout: 5 * time.Second,
|
||||
}
|
||||
p := NewPool(cfg)
|
||||
stub := &stubValidator{typeStr: "http-01", release: make(chan struct{})}
|
||||
p.SetValidator(stub)
|
||||
|
||||
// Submit 10 HTTP-01 challenges. The pool's HTTP-01 weight is 3
|
||||
// → at most 3 should be in-flight at once.
|
||||
const total = 10
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(total)
|
||||
for i := 0; i < total; i++ {
|
||||
i := i
|
||||
p.Submit(context.Background(), "http-01", fmt.Sprintf("d%d.example.com", i), "tok", "expect", func(err error) {
|
||||
defer wg.Done()
|
||||
_ = err
|
||||
})
|
||||
}
|
||||
|
||||
// Wait for the validator to be hit by at least cfg.HTTP01Weight
|
||||
// workers (steady state — all available semaphore weight is
|
||||
// taken).
|
||||
deadline := time.Now().Add(2 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
if stub.calls.Load() >= cfg.HTTP01Weight {
|
||||
break
|
||||
}
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
}
|
||||
snap := p.Snapshot()
|
||||
if snap.HTTP01InFlight > cfg.HTTP01Weight {
|
||||
t.Errorf("HTTP01InFlight = %d, exceeds cap %d", snap.HTTP01InFlight, cfg.HTTP01Weight)
|
||||
}
|
||||
if snap.HTTP01Peak > cfg.HTTP01Weight {
|
||||
t.Errorf("HTTP01Peak = %d, exceeds cap %d", snap.HTTP01Peak, cfg.HTTP01Weight)
|
||||
}
|
||||
// Release all blocked workers + drain.
|
||||
close(stub.release)
|
||||
wg.Wait()
|
||||
|
||||
// Drain returns when wg is done (validators all completed).
|
||||
dctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
if err := p.Drain(dctx); err != nil {
|
||||
t.Errorf("Drain: %v", err)
|
||||
}
|
||||
finalSnap := p.Snapshot()
|
||||
if finalSnap.HTTP01InFlight != 0 {
|
||||
t.Errorf("post-Drain HTTP01InFlight = %d, want 0", finalSnap.HTTP01InFlight)
|
||||
}
|
||||
if stub.calls.Load() != total {
|
||||
t.Errorf("validator calls = %d, want %d", stub.calls.Load(), total)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPool_TypeIsolation(t *testing.T) {
|
||||
// HTTP-01 saturation should not block DNS-01 dispatch. Each type
|
||||
// has its own semaphore.
|
||||
cfg := PoolConfig{
|
||||
HTTP01Weight: 1,
|
||||
DNS01Weight: 1,
|
||||
TLSALPN01Weight: 1,
|
||||
PerChallengeTimeout: 5 * time.Second,
|
||||
}
|
||||
p := NewPool(cfg)
|
||||
httpStub := &stubValidator{typeStr: "http-01", release: make(chan struct{})}
|
||||
dnsStub := &stubValidator{typeStr: "dns-01", release: make(chan struct{})}
|
||||
p.SetValidator(httpStub)
|
||||
p.SetValidator(dnsStub)
|
||||
|
||||
// Block HTTP-01.
|
||||
httpDone := make(chan struct{})
|
||||
p.Submit(context.Background(), "http-01", "d.example.com", "tok", "expect", func(err error) {
|
||||
close(httpDone)
|
||||
})
|
||||
|
||||
// DNS-01 should still progress.
|
||||
dnsDone := make(chan struct{})
|
||||
p.Submit(context.Background(), "dns-01", "d.example.com", "tok", "expect", func(err error) {
|
||||
close(dnsDone)
|
||||
})
|
||||
|
||||
// Release DNS-01 immediately.
|
||||
close(dnsStub.release)
|
||||
select {
|
||||
case <-dnsDone:
|
||||
// good — DNS-01 completed even though HTTP-01 is held.
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("DNS-01 did not complete despite HTTP-01 saturation")
|
||||
}
|
||||
|
||||
// Release HTTP-01 + drain.
|
||||
close(httpStub.release)
|
||||
select {
|
||||
case <-httpDone:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("HTTP-01 did not complete after release")
|
||||
}
|
||||
dctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_ = p.Drain(dctx)
|
||||
}
|
||||
|
||||
func TestPool_UnknownType(t *testing.T) {
|
||||
p := NewPool(PoolConfig{})
|
||||
done := make(chan error, 1)
|
||||
p.Submit(context.Background(), "ftp-01" /* invalid */, "d.example.com", "tok", "exp", func(err error) {
|
||||
done <- err
|
||||
})
|
||||
select {
|
||||
case err := <-done:
|
||||
if err == nil {
|
||||
t.Error("expected error for unknown challenge type")
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("Submit's onComplete did not fire for unknown type")
|
||||
}
|
||||
}
|
||||
|
||||
// --- ChallengeProblemFromError mapping ---------------------------------
|
||||
|
||||
func TestChallengeProblemFromError_Mapping(t *testing.T) {
|
||||
cases := []struct {
|
||||
err error
|
||||
wantTyp string
|
||||
}{
|
||||
{nil, ""}, // nil → nil Problem
|
||||
{ErrChallengeConnection, "urn:ietf:params:acme:error:connection"},
|
||||
{fmt.Errorf("%w: timeout", ErrChallengeConnection), "urn:ietf:params:acme:error:connection"},
|
||||
{ErrChallengeDNS, "urn:ietf:params:acme:error:dns"},
|
||||
{ErrChallengeTLS, "urn:ietf:params:acme:error:tls"},
|
||||
{ErrChallengeMismatch, "urn:ietf:params:acme:error:incorrectResponse"},
|
||||
{ErrChallengeReservedIP, "urn:ietf:params:acme:error:incorrectResponse"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
p := ChallengeProblemFromError("http-01", tc.err)
|
||||
if tc.err == nil {
|
||||
if p != nil {
|
||||
t.Errorf("nil err: got Problem %+v", p)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if p == nil {
|
||||
t.Errorf("err=%v: got nil Problem", tc.err)
|
||||
continue
|
||||
}
|
||||
if p.Type != tc.wantTyp {
|
||||
t.Errorf("err=%v: type = %q, want %q", tc.err, p.Type, tc.wantTyp)
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,777 @@
|
||||
// Copyright (c) certctl
|
||||
// SPDX-License-Identifier: BSL-1.1
|
||||
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/x509"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
jose "github.com/go-jose/go-jose/v4"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/acme"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// mockACMEService implements ACMEService for handler-level tests.
|
||||
// Mirrors the mockSCEPService pattern at scep_handler_test.go (struct
|
||||
// holding canned responses + an err field per method).
|
||||
type mockACMEService struct {
|
||||
BuildDirectoryFn func(ctx context.Context, profileID, baseURL string) (*acme.Directory, error)
|
||||
IssueNonceFn func(ctx context.Context) (string, error)
|
||||
VerifyJWSFn func(ctx context.Context, body []byte, requestURL string, expectNewAccount bool, accountKID func(string) string) (*acme.VerifiedRequest, error)
|
||||
NewAccountFn func(ctx context.Context, profileID string, jwk *jose.JSONWebKey, contact []string, onlyReturnExisting bool, tosAgreed bool) (*domain.ACMEAccount, bool, error)
|
||||
LookupAccountFn func(ctx context.Context, accountID string) (*domain.ACMEAccount, error)
|
||||
UpdateAccountFn func(ctx context.Context, accountID string, contact []string) (*domain.ACMEAccount, error)
|
||||
DeactivateAccountFn func(ctx context.Context, accountID string) (*domain.ACMEAccount, error)
|
||||
// Phase 2.
|
||||
CreateOrderFn func(ctx context.Context, accountID, profileID string, identifiers []domain.ACMEIdentifier, notBefore, notAfter *time.Time) (*domain.ACMEOrder, error)
|
||||
LookupOrderFn func(ctx context.Context, orderID, accountID string) (*domain.ACMEOrder, error)
|
||||
LookupAuthzFn func(ctx context.Context, authzID string) (*domain.ACMEAuthorization, error)
|
||||
ListAuthzsByOrderFn func(ctx context.Context, orderID string) ([]*domain.ACMEAuthorization, error)
|
||||
FinalizeOrderFn func(ctx context.Context, accountID, orderID, profileID string, csr *x509.CertificateRequest, csrPEM string) (*service.FinalizeOrderResult, error)
|
||||
LookupCertificateFn func(ctx context.Context, certID, accountID string) (string, error)
|
||||
// Phase 3.
|
||||
RespondToChallengeFn func(ctx context.Context, accountID, challengeID string, accountJWK *jose.JSONWebKey) (*domain.ACMEChallenge, error)
|
||||
// Phase 4.
|
||||
RotateAccountKeyFn func(ctx context.Context, oldAccount *domain.ACMEAccount, newJWK *jose.JSONWebKey) (*domain.ACMEAccount, error)
|
||||
RevokeCertFn func(ctx context.Context, verified *acme.VerifiedRequest, certDER []byte, reasonCode int) error
|
||||
RenewalInfoFn func(ctx context.Context, profileID, certID string) (*acme.RenewalInfoResponse, time.Duration, error)
|
||||
}
|
||||
|
||||
func (m *mockACMEService) BuildDirectory(ctx context.Context, profileID, baseURL string) (*acme.Directory, error) {
|
||||
if m.BuildDirectoryFn != nil {
|
||||
return m.BuildDirectoryFn(ctx, profileID, baseURL)
|
||||
}
|
||||
return acme.BuildDirectory(baseURL, "", "", nil, false, false), nil
|
||||
}
|
||||
|
||||
func (m *mockACMEService) IssueNonce(ctx context.Context) (string, error) {
|
||||
if m.IssueNonceFn != nil {
|
||||
return m.IssueNonceFn(ctx)
|
||||
}
|
||||
return "test-nonce-12345", nil
|
||||
}
|
||||
|
||||
func (m *mockACMEService) VerifyJWS(ctx context.Context, body []byte, requestURL string, expectNewAccount bool, accountKID func(string) string) (*acme.VerifiedRequest, error) {
|
||||
if m.VerifyJWSFn != nil {
|
||||
return m.VerifyJWSFn(ctx, body, requestURL, expectNewAccount, accountKID)
|
||||
}
|
||||
return nil, errors.New("VerifyJWS not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) NewAccount(ctx context.Context, profileID string, jwk *jose.JSONWebKey, contact []string, onlyReturnExisting bool, tosAgreed bool) (*domain.ACMEAccount, bool, error) {
|
||||
if m.NewAccountFn != nil {
|
||||
return m.NewAccountFn(ctx, profileID, jwk, contact, onlyReturnExisting, tosAgreed)
|
||||
}
|
||||
return nil, false, errors.New("NewAccount not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) LookupAccount(ctx context.Context, accountID string) (*domain.ACMEAccount, error) {
|
||||
if m.LookupAccountFn != nil {
|
||||
return m.LookupAccountFn(ctx, accountID)
|
||||
}
|
||||
return nil, errors.New("LookupAccount not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) UpdateAccount(ctx context.Context, accountID string, contact []string) (*domain.ACMEAccount, error) {
|
||||
if m.UpdateAccountFn != nil {
|
||||
return m.UpdateAccountFn(ctx, accountID, contact)
|
||||
}
|
||||
return nil, errors.New("UpdateAccount not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) DeactivateAccount(ctx context.Context, accountID string) (*domain.ACMEAccount, error) {
|
||||
if m.DeactivateAccountFn != nil {
|
||||
return m.DeactivateAccountFn(ctx, accountID)
|
||||
}
|
||||
return nil, errors.New("DeactivateAccount not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) CreateOrder(ctx context.Context, accountID, profileID string, identifiers []domain.ACMEIdentifier, notBefore, notAfter *time.Time) (*domain.ACMEOrder, error) {
|
||||
if m.CreateOrderFn != nil {
|
||||
return m.CreateOrderFn(ctx, accountID, profileID, identifiers, notBefore, notAfter)
|
||||
}
|
||||
return nil, errors.New("CreateOrder not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) LookupOrder(ctx context.Context, orderID, accountID string) (*domain.ACMEOrder, error) {
|
||||
if m.LookupOrderFn != nil {
|
||||
return m.LookupOrderFn(ctx, orderID, accountID)
|
||||
}
|
||||
return nil, errors.New("LookupOrder not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) LookupAuthz(ctx context.Context, authzID string) (*domain.ACMEAuthorization, error) {
|
||||
if m.LookupAuthzFn != nil {
|
||||
return m.LookupAuthzFn(ctx, authzID)
|
||||
}
|
||||
return nil, errors.New("LookupAuthz not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) ListAuthzsByOrder(ctx context.Context, orderID string) ([]*domain.ACMEAuthorization, error) {
|
||||
if m.ListAuthzsByOrderFn != nil {
|
||||
return m.ListAuthzsByOrderFn(ctx, orderID)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *mockACMEService) FinalizeOrder(ctx context.Context, accountID, orderID, profileID string, csr *x509.CertificateRequest, csrPEM string) (*service.FinalizeOrderResult, error) {
|
||||
if m.FinalizeOrderFn != nil {
|
||||
return m.FinalizeOrderFn(ctx, accountID, orderID, profileID, csr, csrPEM)
|
||||
}
|
||||
return nil, errors.New("FinalizeOrder not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) LookupCertificate(ctx context.Context, certID, accountID string) (string, error) {
|
||||
if m.LookupCertificateFn != nil {
|
||||
return m.LookupCertificateFn(ctx, certID, accountID)
|
||||
}
|
||||
return "", errors.New("LookupCertificate not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) RespondToChallenge(ctx context.Context, accountID, challengeID string, accountJWK *jose.JSONWebKey) (*domain.ACMEChallenge, error) {
|
||||
if m.RespondToChallengeFn != nil {
|
||||
return m.RespondToChallengeFn(ctx, accountID, challengeID, accountJWK)
|
||||
}
|
||||
return nil, errors.New("RespondToChallenge not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) RotateAccountKey(ctx context.Context, oldAccount *domain.ACMEAccount, newJWK *jose.JSONWebKey) (*domain.ACMEAccount, error) {
|
||||
if m.RotateAccountKeyFn != nil {
|
||||
return m.RotateAccountKeyFn(ctx, oldAccount, newJWK)
|
||||
}
|
||||
return nil, errors.New("RotateAccountKey not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) RevokeCert(ctx context.Context, verified *acme.VerifiedRequest, certDER []byte, reasonCode int) error {
|
||||
if m.RevokeCertFn != nil {
|
||||
return m.RevokeCertFn(ctx, verified, certDER, reasonCode)
|
||||
}
|
||||
return errors.New("RevokeCert not stubbed")
|
||||
}
|
||||
|
||||
func (m *mockACMEService) RenewalInfo(ctx context.Context, profileID, certID string) (*acme.RenewalInfoResponse, time.Duration, error) {
|
||||
if m.RenewalInfoFn != nil {
|
||||
return m.RenewalInfoFn(ctx, profileID, certID)
|
||||
}
|
||||
return nil, 0, errors.New("RenewalInfo not stubbed")
|
||||
}
|
||||
|
||||
// newACMETestServer wires the ACMEHandler against the mock + a stdlib
|
||||
// ServeMux configured exactly the way internal/api/router/router.go
|
||||
// does it in production. Routes:
|
||||
//
|
||||
// GET /acme/profile/{id}/directory
|
||||
// HEAD /acme/profile/{id}/new-nonce
|
||||
// GET /acme/profile/{id}/new-nonce
|
||||
// GET /acme/directory (shorthand)
|
||||
// HEAD /acme/new-nonce (shorthand)
|
||||
// GET /acme/new-nonce (shorthand)
|
||||
func newACMETestServer(t *testing.T, mock *mockACMEService) *httptest.Server {
|
||||
t.Helper()
|
||||
h := NewACMEHandler(mock)
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("GET /acme/profile/{id}/directory", h.Directory)
|
||||
mux.HandleFunc("HEAD /acme/profile/{id}/new-nonce", h.NewNonce)
|
||||
mux.HandleFunc("GET /acme/profile/{id}/new-nonce", h.NewNonce)
|
||||
mux.HandleFunc("POST /acme/profile/{id}/new-account", h.NewAccount)
|
||||
mux.HandleFunc("POST /acme/profile/{id}/account/{acc_id}", h.Account)
|
||||
mux.HandleFunc("POST /acme/profile/{id}/new-order", h.NewOrder)
|
||||
mux.HandleFunc("POST /acme/profile/{id}/order/{ord_id}", h.Order)
|
||||
mux.HandleFunc("POST /acme/profile/{id}/order/{ord_id}/finalize", h.OrderFinalize)
|
||||
mux.HandleFunc("POST /acme/profile/{id}/authz/{authz_id}", h.Authz)
|
||||
mux.HandleFunc("POST /acme/profile/{id}/challenge/{chall_id}", h.Challenge)
|
||||
mux.HandleFunc("POST /acme/profile/{id}/cert/{cert_id}", h.Cert)
|
||||
mux.HandleFunc("GET /acme/directory", h.Directory)
|
||||
mux.HandleFunc("HEAD /acme/new-nonce", h.NewNonce)
|
||||
mux.HandleFunc("GET /acme/new-nonce", h.NewNonce)
|
||||
mux.HandleFunc("POST /acme/new-account", h.NewAccount)
|
||||
mux.HandleFunc("POST /acme/account/{acc_id}", h.Account)
|
||||
return httptest.NewServer(mux)
|
||||
}
|
||||
|
||||
func TestACMEHandler_Directory_HappyPath(t *testing.T) {
|
||||
mock := &mockACMEService{}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Get(srv.URL + "/acme/profile/prof-corp/directory")
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200", resp.StatusCode)
|
||||
}
|
||||
if got := resp.Header.Get("Content-Type"); got != "application/json" {
|
||||
t.Errorf("content-type = %q", got)
|
||||
}
|
||||
if got := resp.Header.Get("Replay-Nonce"); got == "" {
|
||||
t.Error("Replay-Nonce header missing on directory response")
|
||||
}
|
||||
|
||||
var dir acme.Directory
|
||||
if err := json.NewDecoder(resp.Body).Decode(&dir); err != nil {
|
||||
t.Fatalf("Decode: %v", err)
|
||||
}
|
||||
if !strings.Contains(dir.NewNonce, "/acme/profile/prof-corp/new-nonce") {
|
||||
t.Errorf("NewNonce = %q", dir.NewNonce)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_Directory_UnknownProfile(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
BuildDirectoryFn: func(ctx context.Context, profileID, baseURL string) (*acme.Directory, error) {
|
||||
return nil, service.ErrACMEProfileNotFound
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Get(srv.URL + "/acme/profile/missing/directory")
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusNotFound {
|
||||
t.Errorf("status = %d, want 404", resp.StatusCode)
|
||||
}
|
||||
if got := resp.Header.Get("Content-Type"); got != acme.ProblemContentType {
|
||||
t.Errorf("content-type = %q, want %q", got, acme.ProblemContentType)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_NewNonce_HEAD(t *testing.T) {
|
||||
mock := &mockACMEService{}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
req, _ := http.NewRequest(http.MethodHead, srv.URL+"/acme/profile/prof-corp/new-nonce", nil)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("HEAD: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200 (HEAD)", resp.StatusCode)
|
||||
}
|
||||
if got := resp.Header.Get("Replay-Nonce"); got != "test-nonce-12345" {
|
||||
t.Errorf("Replay-Nonce = %q", got)
|
||||
}
|
||||
if got := resp.Header.Get("Cache-Control"); got != "no-store" {
|
||||
t.Errorf("Cache-Control = %q, want no-store", got)
|
||||
}
|
||||
if resp.ContentLength > 0 {
|
||||
t.Errorf("HEAD body should be zero-length; got Content-Length=%d", resp.ContentLength)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_NewNonce_GET(t *testing.T) {
|
||||
mock := &mockACMEService{}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Get(srv.URL + "/acme/profile/prof-corp/new-nonce")
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusNoContent {
|
||||
t.Errorf("status = %d, want 204 (GET)", resp.StatusCode)
|
||||
}
|
||||
if got := resp.Header.Get("Replay-Nonce"); got != "test-nonce-12345" {
|
||||
t.Errorf("Replay-Nonce = %q", got)
|
||||
}
|
||||
if got := resp.Header.Get("Cache-Control"); got != "no-store" {
|
||||
t.Errorf("Cache-Control = %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_Shorthand_DefaultProfileSet(t *testing.T) {
|
||||
// Service-layer mock returns a directory; handler test asserts the
|
||||
// /acme/directory shorthand reaches the same handler path as the
|
||||
// per-profile directory.
|
||||
mock := &mockACMEService{}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Get(srv.URL + "/acme/directory")
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200", resp.StatusCode)
|
||||
}
|
||||
var dir acme.Directory
|
||||
if err := json.NewDecoder(resp.Body).Decode(&dir); err != nil {
|
||||
t.Fatalf("Decode: %v", err)
|
||||
}
|
||||
if !strings.HasSuffix(dir.NewNonce, "/acme/new-nonce") {
|
||||
t.Errorf("NewNonce = %q (shorthand path expected)", dir.NewNonce)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_Shorthand_DefaultProfileUnset(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
BuildDirectoryFn: func(ctx context.Context, profileID, baseURL string) (*acme.Directory, error) {
|
||||
return nil, service.ErrACMEUserActionRequired
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Get(srv.URL + "/acme/directory")
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusForbidden {
|
||||
t.Errorf("status = %d, want 403", resp.StatusCode)
|
||||
}
|
||||
if got := resp.Header.Get("Content-Type"); got != acme.ProblemContentType {
|
||||
t.Errorf("content-type = %q, want %q", got, acme.ProblemContentType)
|
||||
}
|
||||
var p acme.Problem
|
||||
if err := json.NewDecoder(resp.Body).Decode(&p); err != nil {
|
||||
t.Fatalf("Decode: %v", err)
|
||||
}
|
||||
if p.Type != "urn:ietf:params:acme:error:userActionRequired" {
|
||||
t.Errorf("Problem.Type = %q", p.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_NewNonce_ServiceError(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
IssueNonceFn: func(ctx context.Context) (string, error) {
|
||||
return "", errors.New("disk full")
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Get(srv.URL + "/acme/profile/prof-corp/new-nonce")
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusInternalServerError {
|
||||
t.Errorf("status = %d, want 500", resp.StatusCode)
|
||||
}
|
||||
if got := resp.Header.Get("Content-Type"); got != acme.ProblemContentType {
|
||||
t.Errorf("content-type = %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Phase 1b — new-account + account update ---------------------------
|
||||
|
||||
// stubVerifiedReq returns a VerifiedRequest pre-baked with payload +
|
||||
// the supplied Account / JWK for handler-level tests that don't go
|
||||
// through the actual JWS verifier.
|
||||
func stubVerifiedReq(payload interface{}, account *domain.ACMEAccount, jwk *jose.JSONWebKey) func(ctx context.Context, body []byte, requestURL string, expectNewAccount bool, accountKID func(string) string) (*acme.VerifiedRequest, error) {
|
||||
return func(ctx context.Context, body []byte, requestURL string, expectNewAccount bool, accountKID func(string) string) (*acme.VerifiedRequest, error) {
|
||||
raw, _ := json.Marshal(payload)
|
||||
return &acme.VerifiedRequest{
|
||||
Payload: raw,
|
||||
Algorithm: "RS256",
|
||||
URL: requestURL,
|
||||
Nonce: "test-nonce",
|
||||
Account: account,
|
||||
JWK: jwk,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_NewAccount_HappyPath_New(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(
|
||||
acme.NewAccountRequest{Contact: []string{"mailto:a@example.com"}, TermsOfServiceAgreed: true},
|
||||
nil, // jwk path → no Account
|
||||
&jose.JSONWebKey{},
|
||||
),
|
||||
NewAccountFn: func(ctx context.Context, profileID string, jwk *jose.JSONWebKey, contact []string, onlyReturnExisting bool, tosAgreed bool) (*domain.ACMEAccount, bool, error) {
|
||||
return &domain.ACMEAccount{
|
||||
AccountID: "acme-acc-fresh", JWKThumbprint: "thumb-x",
|
||||
Contact: contact, Status: domain.ACMEAccountStatusValid, ProfileID: profileID,
|
||||
}, true, nil
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/new-account", "application/jose+json", bytes.NewReader([]byte("ignored-by-mock")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusCreated {
|
||||
t.Errorf("status = %d, want 201", resp.StatusCode)
|
||||
}
|
||||
if got := resp.Header.Get("Replay-Nonce"); got == "" {
|
||||
t.Error("Replay-Nonce header missing")
|
||||
}
|
||||
if got := resp.Header.Get("Location"); !strings.Contains(got, "/account/acme-acc-fresh") {
|
||||
t.Errorf("Location = %q (want suffix /account/acme-acc-fresh)", got)
|
||||
}
|
||||
var body acme.AccountResponseJSON
|
||||
if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if body.Status != "valid" {
|
||||
t.Errorf("status = %q", body.Status)
|
||||
}
|
||||
if !strings.HasSuffix(body.Orders, "/account/acme-acc-fresh/orders") {
|
||||
t.Errorf("orders URL = %q", body.Orders)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_NewAccount_Idempotent_ExistingReturns200(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(acme.NewAccountRequest{}, nil, &jose.JSONWebKey{}),
|
||||
NewAccountFn: func(ctx context.Context, profileID string, jwk *jose.JSONWebKey, contact []string, onlyReturnExisting bool, tosAgreed bool) (*domain.ACMEAccount, bool, error) {
|
||||
return &domain.ACMEAccount{
|
||||
AccountID: "acme-acc-existing", Status: domain.ACMEAccountStatusValid, ProfileID: profileID,
|
||||
}, false /*isNew=false*/, nil
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/new-account", "application/jose+json", bytes.NewReader([]byte("x")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200 (idempotent re-registration)", resp.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_NewAccount_OnlyReturnExisting_NoMatch(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(acme.NewAccountRequest{OnlyReturnExisting: true}, nil, &jose.JSONWebKey{}),
|
||||
NewAccountFn: func(ctx context.Context, profileID string, jwk *jose.JSONWebKey, contact []string, onlyReturnExisting bool, tosAgreed bool) (*domain.ACMEAccount, bool, error) {
|
||||
return nil, false, service.ErrACMEAccountDoesNotExist
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/new-account", "application/jose+json", bytes.NewReader([]byte("x")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want 400", resp.StatusCode)
|
||||
}
|
||||
var p acme.Problem
|
||||
_ = json.NewDecoder(resp.Body).Decode(&p)
|
||||
if p.Type != "urn:ietf:params:acme:error:accountDoesNotExist" {
|
||||
t.Errorf("Problem.Type = %q", p.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_NewAccount_JWSMalformed(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: func(ctx context.Context, body []byte, requestURL string, expectNewAccount bool, accountKID func(string) string) (*acme.VerifiedRequest, error) {
|
||||
return nil, acme.ErrJWSMalformed
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/new-account", "application/jose+json", bytes.NewReader([]byte("garbage")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want 400", resp.StatusCode)
|
||||
}
|
||||
var p acme.Problem
|
||||
_ = json.NewDecoder(resp.Body).Decode(&p)
|
||||
if p.Type != "urn:ietf:params:acme:error:malformed" {
|
||||
t.Errorf("Problem.Type = %q", p.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_Account_KIDMismatch(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(
|
||||
acme.AccountUpdateRequest{},
|
||||
&domain.ACMEAccount{
|
||||
AccountID: "acme-acc-A", Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp",
|
||||
},
|
||||
nil,
|
||||
),
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
// URL claims account B, JWS-verified account is A.
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/account/acme-acc-B", "application/jose+json", bytes.NewReader([]byte("x")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusUnauthorized {
|
||||
t.Errorf("status = %d, want 401", resp.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_Account_Deactivate(t *testing.T) {
|
||||
called := false
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(
|
||||
acme.AccountUpdateRequest{Status: "deactivated"},
|
||||
&domain.ACMEAccount{AccountID: "acme-acc-D", Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp"},
|
||||
nil,
|
||||
),
|
||||
DeactivateAccountFn: func(ctx context.Context, accountID string) (*domain.ACMEAccount, error) {
|
||||
called = true
|
||||
return &domain.ACMEAccount{AccountID: accountID, Status: domain.ACMEAccountStatusDeactivated, ProfileID: "prof-corp"}, nil
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/account/acme-acc-D", "application/jose+json", bytes.NewReader([]byte("x")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200", resp.StatusCode)
|
||||
}
|
||||
if !called {
|
||||
t.Error("DeactivateAccount was not invoked")
|
||||
}
|
||||
var body acme.AccountResponseJSON
|
||||
_ = json.NewDecoder(resp.Body).Decode(&body)
|
||||
if body.Status != "deactivated" {
|
||||
t.Errorf("status = %q", body.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_Account_UpdateContact(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(
|
||||
acme.AccountUpdateRequest{Contact: []string{"mailto:new@example.com"}},
|
||||
&domain.ACMEAccount{AccountID: "acme-acc-U", Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp"},
|
||||
nil,
|
||||
),
|
||||
UpdateAccountFn: func(ctx context.Context, accountID string, contact []string) (*domain.ACMEAccount, error) {
|
||||
return &domain.ACMEAccount{AccountID: accountID, Status: domain.ACMEAccountStatusValid, Contact: contact, ProfileID: "prof-corp"}, nil
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/account/acme-acc-U", "application/jose+json", bytes.NewReader([]byte("x")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200", resp.StatusCode)
|
||||
}
|
||||
var body acme.AccountResponseJSON
|
||||
_ = json.NewDecoder(resp.Body).Decode(&body)
|
||||
if len(body.Contact) != 1 || body.Contact[0] != "mailto:new@example.com" {
|
||||
t.Errorf("contact = %v", body.Contact)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_Account_PostAsGet(t *testing.T) {
|
||||
// Empty payload → POST-as-GET (RFC 8555 §6.3): handler returns
|
||||
// the unmodified account row.
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(
|
||||
struct{}{}, // empty payload
|
||||
&domain.ACMEAccount{AccountID: "acme-acc-G", Status: domain.ACMEAccountStatusValid, Contact: []string{"mailto:o@example.com"}, ProfileID: "prof-corp"},
|
||||
nil,
|
||||
),
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/account/acme-acc-G", "application/jose+json", bytes.NewReader([]byte("x")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200 (POST-as-GET)", resp.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Phase 2 — orders + finalize handler smoke -------------------------
|
||||
|
||||
func TestACMEHandler_NewOrder_HappyPath(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(
|
||||
acme.NewOrderRequest{Identifiers: []acme.IdentifierJSON{{Type: "dns", Value: "example.com"}}},
|
||||
&domain.ACMEAccount{AccountID: "acme-acc-X", Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp"},
|
||||
nil,
|
||||
),
|
||||
CreateOrderFn: func(ctx context.Context, accountID, profileID string, identifiers []domain.ACMEIdentifier, notBefore, notAfter *time.Time) (*domain.ACMEOrder, error) {
|
||||
return &domain.ACMEOrder{
|
||||
OrderID: "acme-ord-001",
|
||||
AccountID: accountID,
|
||||
Identifiers: identifiers,
|
||||
Status: domain.ACMEOrderStatusReady,
|
||||
ExpiresAt: time.Now().Add(24 * time.Hour),
|
||||
}, nil
|
||||
},
|
||||
ListAuthzsByOrderFn: func(ctx context.Context, orderID string) ([]*domain.ACMEAuthorization, error) {
|
||||
return []*domain.ACMEAuthorization{
|
||||
{AuthzID: "acme-authz-001", OrderID: orderID, Status: domain.ACMEAuthzStatusValid},
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/new-order", "application/jose+json", bytes.NewReader([]byte("ignored-by-mock")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusCreated {
|
||||
t.Errorf("status = %d, want 201", resp.StatusCode)
|
||||
}
|
||||
if got := resp.Header.Get("Location"); !strings.Contains(got, "/order/acme-ord-001") {
|
||||
t.Errorf("Location = %q", got)
|
||||
}
|
||||
var body acme.OrderResponseJSON
|
||||
if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
|
||||
t.Fatalf("Decode: %v", err)
|
||||
}
|
||||
if body.Status != "ready" {
|
||||
t.Errorf("status = %q (trust_authenticated should auto-ready)", body.Status)
|
||||
}
|
||||
if len(body.Authorizations) != 1 || !strings.Contains(body.Authorizations[0], "/authz/acme-authz-001") {
|
||||
t.Errorf("authorizations = %v", body.Authorizations)
|
||||
}
|
||||
if !strings.HasSuffix(body.Finalize, "/order/acme-ord-001/finalize") {
|
||||
t.Errorf("finalize = %q", body.Finalize)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_NewOrder_RejectedIdentifier(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(
|
||||
acme.NewOrderRequest{Identifiers: []acme.IdentifierJSON{{Type: "ip", Value: "10.0.0.1"}}},
|
||||
&domain.ACMEAccount{AccountID: "acme-acc-X", Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp"},
|
||||
nil,
|
||||
),
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/new-order", "application/jose+json", bytes.NewReader([]byte("x")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want 400 (rejected identifier)", resp.StatusCode)
|
||||
}
|
||||
var p acme.Problem
|
||||
_ = json.NewDecoder(resp.Body).Decode(&p)
|
||||
if p.Type != "urn:ietf:params:acme:error:rejectedIdentifier" {
|
||||
t.Errorf("Problem.Type = %q", p.Type)
|
||||
}
|
||||
if len(p.Subproblems) == 0 {
|
||||
t.Error("expected subproblems for per-identifier rejection")
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_OrderFinalize_BadCSR(t *testing.T) {
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(
|
||||
acme.FinalizeRequest{CSR: "not-base64!!!"},
|
||||
&domain.ACMEAccount{AccountID: "acme-acc-X", Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp"},
|
||||
nil,
|
||||
),
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/order/acme-ord-001/finalize", "application/jose+json", bytes.NewReader([]byte("x")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want 400", resp.StatusCode)
|
||||
}
|
||||
var p acme.Problem
|
||||
_ = json.NewDecoder(resp.Body).Decode(&p)
|
||||
if p.Type != "urn:ietf:params:acme:error:badCSR" {
|
||||
t.Errorf("Problem.Type = %q", p.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestACMEHandler_Cert_HappyPath(t *testing.T) {
|
||||
pemChain := "-----BEGIN CERTIFICATE-----\nMIIBhjCCAQ==\n-----END CERTIFICATE-----\n"
|
||||
mock := &mockACMEService{
|
||||
VerifyJWSFn: stubVerifiedReq(
|
||||
struct{}{},
|
||||
&domain.ACMEAccount{AccountID: "acme-acc-X", Status: domain.ACMEAccountStatusValid, ProfileID: "prof-corp"},
|
||||
nil,
|
||||
),
|
||||
LookupCertificateFn: func(ctx context.Context, certID, accountID string) (string, error) {
|
||||
return pemChain, nil
|
||||
},
|
||||
}
|
||||
srv := newACMETestServer(t, mock)
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Post(srv.URL+"/acme/profile/prof-corp/cert/mc-acme-001", "application/jose+json", bytes.NewReader([]byte("x")))
|
||||
if err != nil {
|
||||
t.Fatalf("Post: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Errorf("status = %d, want 200", resp.StatusCode)
|
||||
}
|
||||
if got := resp.Header.Get("Content-Type"); got != "application/pem-certificate-chain" {
|
||||
t.Errorf("content-type = %q", got)
|
||||
}
|
||||
body := bytes.NewBuffer(nil)
|
||||
_, _ = body.ReadFrom(resp.Body)
|
||||
if !strings.Contains(body.String(), "BEGIN CERTIFICATE") {
|
||||
t.Errorf("body did not contain PEM cert: %q", body.String())
|
||||
}
|
||||
}
|
||||
@@ -6,9 +6,11 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// MetricsService defines the service interface for metrics collection.
|
||||
@@ -51,6 +53,52 @@ type DeployCounterSnapshotter interface {
|
||||
Snapshot() []DeploySnapshotEntry
|
||||
}
|
||||
|
||||
// IssuanceCounterEntry / IssuanceFailureEntry / IssuanceDurationEntry
|
||||
// and the IssuanceMetricsSnapshotter interface live in
|
||||
// internal/service (issuance_metrics.go). Handler can't define them
|
||||
// locally because internal/api/handler is imported by service — the
|
||||
// reverse import would create a cycle. The exposer below takes the
|
||||
// types via the interface defined in service.
|
||||
|
||||
// VaultRenewalSnapshotter is the surface MetricsHandler consumes
|
||||
// to emit the certctl_vault_token_renewals_total{result=...}
|
||||
// counter. *service.VaultRenewalMetrics satisfies this; cmd/server
|
||||
// passes the same instance into IssuerRegistry.SetVaultRenewalMetrics
|
||||
// (so Vault connectors record results) AND into
|
||||
// MetricsHandler.SetVaultRenewals (so the Prometheus exposer reads
|
||||
// the counters).
|
||||
//
|
||||
// Returns three counter values directly (rather than a shared struct
|
||||
// type) so service can satisfy this without an import cycle —
|
||||
// handler already imports service for IssuanceMetricsSnapshotter,
|
||||
// but service does not import handler. A method that returns
|
||||
// (uint64, uint64, uint64) needs no shared type.
|
||||
//
|
||||
// Top-10 fix #5 of the 2026-05-03 issuer-coverage audit.
|
||||
type VaultRenewalSnapshotter interface {
|
||||
// SnapshotVaultRenewals returns success, failure, and
|
||||
// not_renewable counters as point-in-time reads. Order is fixed
|
||||
// for the exposer — matches the Prometheus label order.
|
||||
SnapshotVaultRenewals() (success, failure, notRenewable uint64)
|
||||
}
|
||||
|
||||
// ExpiryAlertSnapshotter is the surface MetricsHandler consumes to
|
||||
// emit certctl_expiry_alerts_total{channel, threshold, result}.
|
||||
// *service.ExpiryAlertMetrics satisfies this. Same wiring shape as
|
||||
// VaultRenewalSnapshotter — one instance shared between recording
|
||||
// (via NotificationService.SetExpiryAlertMetrics) and exposing
|
||||
// (here).
|
||||
//
|
||||
// Rank 4 of the 2026-05-03 Infisical deep-research deliverable
|
||||
// (cowork/infisical-deep-research-results.md Part 5).
|
||||
type ExpiryAlertSnapshotter interface {
|
||||
// SnapshotExpiryAlerts returns one entry per non-zero counter,
|
||||
// pre-sorted by (channel, threshold, result) so the Prometheus
|
||||
// exposition is byte-stable across requests. The handler does
|
||||
// not re-sort.
|
||||
SnapshotExpiryAlerts() []service.ExpiryAlertSnapshotEntry
|
||||
}
|
||||
|
||||
// MetricsHandler handles HTTP requests for metrics.
|
||||
// Supports both JSON format (GET /api/v1/metrics) and Prometheus exposition format
|
||||
// (GET /api/v1/metrics/prometheus) for integration with Prometheus, Grafana, Datadog, etc.
|
||||
@@ -64,6 +112,20 @@ type MetricsHandler struct {
|
||||
ocspCounters CounterSnapshotter
|
||||
// Phase 10 (deploy-hardening I) — per-target-type deploy counters.
|
||||
deployCounters DeployCounterSnapshotter
|
||||
// Per-issuer-type issuance metrics (audit fix #4). nil disables
|
||||
// the new metric block; main.go wires the instance at startup.
|
||||
// The interface lives in service to avoid an import cycle (handler
|
||||
// imports service for admin_est.go etc., so service can't import
|
||||
// handler back).
|
||||
issuanceCounters service.IssuanceMetricsSnapshotter
|
||||
// Vault PKI token-renewal counters. Top-10 fix #5 of the
|
||||
// 2026-05-03 issuer-coverage audit. nil disables emission of
|
||||
// certctl_vault_token_renewals_total{result=...}.
|
||||
vaultRenewals VaultRenewalSnapshotter
|
||||
// Per-policy multi-channel expiry alert counters. Rank 4 of the
|
||||
// 2026-05-03 Infisical deep-research deliverable. nil disables
|
||||
// emission of certctl_expiry_alerts_total{channel,threshold,result}.
|
||||
expiryAlerts ExpiryAlertSnapshotter
|
||||
}
|
||||
|
||||
// NewMetricsHandler creates a new MetricsHandler with a service dependency.
|
||||
@@ -89,6 +151,29 @@ func (h *MetricsHandler) SetDeployCounters(c DeployCounterSnapshotter) {
|
||||
h.deployCounters = c
|
||||
}
|
||||
|
||||
// SetIssuanceCounters wires the per-issuer-type issuance metrics for
|
||||
// the Prometheus exposition. nil disables the block. Closes the #4
|
||||
// acquisition-readiness blocker from the 2026-05-01 issuer coverage
|
||||
// audit (per-issuer-type metrics).
|
||||
func (h *MetricsHandler) SetIssuanceCounters(c service.IssuanceMetricsSnapshotter) {
|
||||
h.issuanceCounters = c
|
||||
}
|
||||
|
||||
// SetVaultRenewals wires the Vault PKI token-renewal counter table
|
||||
// for the Prometheus exposition. nil disables the block. Closes
|
||||
// Top-10 fix #5 of the 2026-05-03 issuer-coverage audit.
|
||||
func (h *MetricsHandler) SetVaultRenewals(c VaultRenewalSnapshotter) {
|
||||
h.vaultRenewals = c
|
||||
}
|
||||
|
||||
// SetExpiryAlerts wires the per-policy multi-channel expiry-alert
|
||||
// counter table for the Prometheus exposition. nil disables the
|
||||
// block. Closes Rank 4 of the 2026-05-03 Infisical deep-research
|
||||
// deliverable.
|
||||
func (h *MetricsHandler) SetExpiryAlerts(c ExpiryAlertSnapshotter) {
|
||||
h.expiryAlerts = c
|
||||
}
|
||||
|
||||
// MetricsResponse represents the JSON metrics response for V2.
|
||||
type MetricsResponse struct {
|
||||
Gauge MetricsGauge `json:"gauge"`
|
||||
@@ -344,6 +429,105 @@ func (h MetricsHandler) GetPrometheusMetrics(w http.ResponseWriter, r *http.Requ
|
||||
fmt.Fprintf(w, "certctl_deploy_idempotent_skip_total{target_type=%q} %d\n", s.TargetType, s.IdempotentSkips)
|
||||
}
|
||||
}
|
||||
|
||||
// Per-issuer-type issuance metrics (audit fix #4). Three series:
|
||||
// certctl_issuance_total{issuer_type, outcome} counter
|
||||
// certctl_issuance_duration_seconds{issuer_type} histogram
|
||||
// certctl_issuance_failures_total{issuer_type, error_class} counter
|
||||
//
|
||||
// Cardinality: 12 issuer_types × 2 outcomes (24) +
|
||||
// 12 × 11 buckets+sum+count (~156) +
|
||||
// 12 × 8 error_classes (96) = ~276 series. Comfortable
|
||||
// for any Prometheus instance.
|
||||
if h.issuanceCounters != nil {
|
||||
// certctl_issuance_total
|
||||
fmt.Fprintf(w, "\n# HELP certctl_issuance_total Total certificate issuance attempts, labelled by issuer type and outcome.\n")
|
||||
fmt.Fprintf(w, "# TYPE certctl_issuance_total counter\n")
|
||||
counters := h.issuanceCounters.SnapshotCounters()
|
||||
sort.Slice(counters, func(i, j int) bool {
|
||||
if counters[i].IssuerType != counters[j].IssuerType {
|
||||
return counters[i].IssuerType < counters[j].IssuerType
|
||||
}
|
||||
return counters[i].Outcome < counters[j].Outcome
|
||||
})
|
||||
for _, c := range counters {
|
||||
fmt.Fprintf(w, "certctl_issuance_total{issuer_type=%q,outcome=%q} %d\n", c.IssuerType, c.Outcome, c.Count)
|
||||
}
|
||||
|
||||
// certctl_issuance_duration_seconds histogram
|
||||
fmt.Fprintf(w, "\n# HELP certctl_issuance_duration_seconds Certificate issuance duration in seconds, labelled by issuer type. Cumulative histogram with +Inf.\n")
|
||||
fmt.Fprintf(w, "# TYPE certctl_issuance_duration_seconds histogram\n")
|
||||
durations := h.issuanceCounters.SnapshotDurations()
|
||||
boundaries := h.issuanceCounters.BucketBoundaries()
|
||||
sort.Slice(durations, func(i, j int) bool { return durations[i].IssuerType < durations[j].IssuerType })
|
||||
for _, d := range durations {
|
||||
for i, le := range boundaries {
|
||||
if i < len(d.Buckets) {
|
||||
fmt.Fprintf(w, "certctl_issuance_duration_seconds_bucket{issuer_type=%q,le=%q} %d\n",
|
||||
d.IssuerType, formatLE(le), d.Buckets[i])
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, "certctl_issuance_duration_seconds_bucket{issuer_type=%q,le=\"+Inf\"} %d\n", d.IssuerType, d.Count)
|
||||
fmt.Fprintf(w, "certctl_issuance_duration_seconds_sum{issuer_type=%q} %g\n", d.IssuerType, d.Sum)
|
||||
fmt.Fprintf(w, "certctl_issuance_duration_seconds_count{issuer_type=%q} %d\n", d.IssuerType, d.Count)
|
||||
}
|
||||
|
||||
// certctl_issuance_failures_total
|
||||
fmt.Fprintf(w, "\n# HELP certctl_issuance_failures_total Issuance failures by issuer type and error class. error_class is a closed enum (timeout, auth, rate_limited, validation, upstream_5xx, upstream_4xx, network, other).\n")
|
||||
fmt.Fprintf(w, "# TYPE certctl_issuance_failures_total counter\n")
|
||||
failures := h.issuanceCounters.SnapshotFailures()
|
||||
sort.Slice(failures, func(i, j int) bool {
|
||||
if failures[i].IssuerType != failures[j].IssuerType {
|
||||
return failures[i].IssuerType < failures[j].IssuerType
|
||||
}
|
||||
return failures[i].ErrorClass < failures[j].ErrorClass
|
||||
})
|
||||
for _, f := range failures {
|
||||
fmt.Fprintf(w, "certctl_issuance_failures_total{issuer_type=%q,error_class=%q} %d\n", f.IssuerType, f.ErrorClass, f.Count)
|
||||
}
|
||||
}
|
||||
|
||||
// Vault PKI token-renewal counters. Top-10 fix #5 of the
|
||||
// 2026-05-03 issuer-coverage audit. Operators alert on
|
||||
// certctl_vault_token_renewals_total{result="failure"} > 0 or
|
||||
// {result="not_renewable"} > 0 to catch token expiry before
|
||||
// issuance breaks. Closed enum: 3 series.
|
||||
if h.vaultRenewals != nil {
|
||||
success, failure, notRenewable := h.vaultRenewals.SnapshotVaultRenewals()
|
||||
fmt.Fprintf(w, "\n# HELP certctl_vault_token_renewals_total Vault PKI token renew-self results. result is a closed enum: success, failure, not_renewable.\n")
|
||||
fmt.Fprintf(w, "# TYPE certctl_vault_token_renewals_total counter\n")
|
||||
fmt.Fprintf(w, "certctl_vault_token_renewals_total{result=%q} %d\n", "success", success)
|
||||
fmt.Fprintf(w, "certctl_vault_token_renewals_total{result=%q} %d\n", "failure", failure)
|
||||
fmt.Fprintf(w, "certctl_vault_token_renewals_total{result=%q} %d\n", "not_renewable", notRenewable)
|
||||
}
|
||||
|
||||
// Per-policy multi-channel expiry-alert counters. Rank 4 of the
|
||||
// 2026-05-03 Infisical deep-research deliverable. Operators alert
|
||||
// on certctl_expiry_alerts_total{result="failure"} > 0 to catch
|
||||
// when a notifier connector (PagerDuty / Slack / etc.) is
|
||||
// rejecting our sends. Cardinality: 6 channels × N thresholds × 3
|
||||
// results — production deploys with the standard 4 thresholds top
|
||||
// out at 72 series. Snapshot is pre-sorted by the recorder so the
|
||||
// emission order is byte-stable across requests.
|
||||
if h.expiryAlerts != nil {
|
||||
entries := h.expiryAlerts.SnapshotExpiryAlerts()
|
||||
if len(entries) > 0 {
|
||||
fmt.Fprintf(w, "\n# HELP certctl_expiry_alerts_total Certificate-expiry alerts dispatched per (channel, threshold, result). result is a closed enum: success, failure, deduped.\n")
|
||||
fmt.Fprintf(w, "# TYPE certctl_expiry_alerts_total counter\n")
|
||||
for _, e := range entries {
|
||||
fmt.Fprintf(w, "certctl_expiry_alerts_total{channel=%q,threshold=%q,result=%q} %d\n",
|
||||
e.Channel, strconv.Itoa(e.Threshold), e.Result, e.Count)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// formatLE formats a histogram bucket boundary the way Prometheus
|
||||
// expects: no trailing zeros, no scientific notation for typical
|
||||
// sub-second / sub-minute values. Used for the `le` label in the
|
||||
// issuance-duration histogram exposer.
|
||||
func formatLE(v float64) string {
|
||||
return strconv.FormatFloat(v, 'f', -1, 64)
|
||||
}
|
||||
|
||||
// DashboardSummary mirrors the service.DashboardSummary for JSON unmarshaling.
|
||||
|
||||
@@ -160,6 +160,12 @@ func (r *intuneE2EAuditRepo) Create(_ context.Context, e *domain.AuditEvent) err
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateWithTx mirrors Create — handler-test mocks have no DB; the
|
||||
// Querier is ignored.
|
||||
func (r *intuneE2EAuditRepo) CreateWithTx(ctx context.Context, _ repository.Querier, e *domain.AuditEvent) error {
|
||||
return r.Create(ctx, e)
|
||||
}
|
||||
|
||||
func (r *intuneE2EAuditRepo) List(_ context.Context, _ *repository.AuditFilter) ([]*domain.AuditEvent, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -50,6 +50,48 @@ var SpecParityExceptions = map[string]string{
|
||||
// operator-facing description.
|
||||
"GET /scep-mtls": "Phase 6.5 mTLS sibling route — same wire format as /scep with cert-required gate; documented in docs/legacy-est-scep.md",
|
||||
"POST /scep-mtls": "Phase 6.5 mTLS sibling route — same wire format as /scep with cert-required gate; documented in docs/legacy-est-scep.md",
|
||||
|
||||
// ACME server (RFC 8555 + RFC 9773 ARI) — Phase 1a foundation.
|
||||
// Like SCEP/EST, ACME is a wire-protocol surface (JWS-signed JSON
|
||||
// over HTTPS per RFC 7515) whose semantics are dictated by the RFC
|
||||
// rather than by an OpenAPI document. Documenting every endpoint
|
||||
// in openapi.yaml would duplicate RFC 8555 §7.1 + §7.2 with no
|
||||
// information gain. The canonical reference is docs/acme-server.md.
|
||||
// Subsequent phases will extend this list with new-account,
|
||||
// new-order, finalize, authz, challenge, cert, key-change,
|
||||
// revoke-cert, renewal-info — each gets its own exception entry
|
||||
// in the same commit that lands the route.
|
||||
"GET /acme/profile/{id}/directory": "RFC 8555 §7.1.1 directory; documented in docs/acme-server.md",
|
||||
"HEAD /acme/profile/{id}/new-nonce": "RFC 8555 §7.2 new-nonce; documented in docs/acme-server.md",
|
||||
"GET /acme/profile/{id}/new-nonce": "RFC 8555 §7.2 new-nonce (GET form); documented in docs/acme-server.md",
|
||||
"POST /acme/profile/{id}/new-account": "RFC 8555 §7.3 new-account; documented in docs/acme-server.md",
|
||||
"POST /acme/profile/{id}/account/{acc_id}": "RFC 8555 §7.3.2 account update + §7.3.6 deactivation; documented in docs/acme-server.md",
|
||||
"GET /acme/directory": "RFC 8555 §7.1.1 directory (default-profile shorthand); documented in docs/acme-server.md",
|
||||
"HEAD /acme/new-nonce": "RFC 8555 §7.2 new-nonce (default-profile shorthand); documented in docs/acme-server.md",
|
||||
"GET /acme/new-nonce": "RFC 8555 §7.2 new-nonce GET (default-profile shorthand); documented in docs/acme-server.md",
|
||||
"POST /acme/new-account": "RFC 8555 §7.3 new-account (default-profile shorthand); documented in docs/acme-server.md",
|
||||
"POST /acme/account/{acc_id}": "RFC 8555 §7.3.2 + §7.3.6 (default-profile shorthand); documented in docs/acme-server.md",
|
||||
|
||||
// Phase 2 — orders + finalize + authz + cert.
|
||||
"POST /acme/profile/{id}/new-order": "RFC 8555 §7.4 new-order; documented in docs/acme-server.md",
|
||||
"POST /acme/profile/{id}/order/{ord_id}": "RFC 8555 §7.4 order POST-as-GET; documented in docs/acme-server.md",
|
||||
"POST /acme/profile/{id}/order/{ord_id}/finalize": "RFC 8555 §7.4 finalize; documented in docs/acme-server.md",
|
||||
"POST /acme/profile/{id}/authz/{authz_id}": "RFC 8555 §7.5 authz POST-as-GET; documented in docs/acme-server.md",
|
||||
"POST /acme/profile/{id}/challenge/{chall_id}": "RFC 8555 §7.5.1 challenge response POST; Phase 3 dispatches to validator pool.",
|
||||
"POST /acme/profile/{id}/cert/{cert_id}": "RFC 8555 §7.4.2 cert download; documented in docs/acme-server.md",
|
||||
"POST /acme/new-order": "Phase 2 default-profile shorthand for new-order.",
|
||||
"POST /acme/order/{ord_id}": "Phase 2 default-profile shorthand for order POST-as-GET.",
|
||||
"POST /acme/order/{ord_id}/finalize": "Phase 2 default-profile shorthand for finalize.",
|
||||
"POST /acme/authz/{authz_id}": "Phase 2 default-profile shorthand for authz POST-as-GET.",
|
||||
"POST /acme/challenge/{chall_id}": "Phase 3 default-profile shorthand for challenge response.",
|
||||
"POST /acme/cert/{cert_id}": "Phase 2 default-profile shorthand for cert download.",
|
||||
// Phase 4 — key rollover + revocation + ARI.
|
||||
"POST /acme/profile/{id}/key-change": "RFC 8555 §7.3.5 doubly-signed key rollover; documented in docs/acme-server.md",
|
||||
"POST /acme/profile/{id}/revoke-cert": "RFC 8555 §7.6 revoke-cert (kid OR cert-key auth); documented in docs/acme-server.md",
|
||||
"GET /acme/profile/{id}/renewal-info/{cert_id}": "RFC 9773 ACME Renewal Information (unauthenticated GET); documented in docs/acme-server.md",
|
||||
"POST /acme/key-change": "Phase 4 default-profile shorthand for key rollover.",
|
||||
"POST /acme/revoke-cert": "Phase 4 default-profile shorthand for revoke-cert.",
|
||||
"GET /acme/renewal-info/{cert_id}": "Phase 4 default-profile shorthand for ARI.",
|
||||
}
|
||||
|
||||
func TestRouter_OpenAPIParity(t *testing.T) {
|
||||
|
||||
@@ -143,6 +143,19 @@ type HandlerRegistry struct {
|
||||
// Both endpoints are admin-gated (M-008 pin updated to include
|
||||
// admin_est.go).
|
||||
AdminEST handler.AdminESTHandler
|
||||
// ACME handles RFC 8555 ACME server endpoints under
|
||||
// /acme/profile/<id>/* and the optional /acme/* shorthand.
|
||||
// Phase 1a wires:
|
||||
// GET /acme/profile/{id}/directory
|
||||
// HEAD /acme/profile/{id}/new-nonce
|
||||
// GET /acme/profile/{id}/new-nonce
|
||||
// GET /acme/directory (shorthand)
|
||||
// HEAD /acme/new-nonce (shorthand)
|
||||
// GET /acme/new-nonce (shorthand)
|
||||
// Subsequent phases add new-account + account/<id>, orders,
|
||||
// authzs, challenges, key-change, revoke-cert, ARI. See
|
||||
// docs/acme-server.md for the configuration reference.
|
||||
ACME handler.ACMEHandler
|
||||
}
|
||||
|
||||
// RegisterHandlers sets up all API routes with their handlers.
|
||||
@@ -389,6 +402,50 @@ func (r *Router) RegisterHandlers(reg HandlerRegistry) {
|
||||
r.Register("DELETE /api/v1/health-checks/{id}", http.HandlerFunc(reg.HealthChecks.DeleteHealthCheck))
|
||||
r.Register("GET /api/v1/health-checks/{id}/history", http.HandlerFunc(reg.HealthChecks.GetHealthCheckHistory))
|
||||
r.Register("POST /api/v1/health-checks/{id}/acknowledge", http.HandlerFunc(reg.HealthChecks.AcknowledgeHealthCheck))
|
||||
|
||||
// ACME (RFC 8555 + RFC 9773 ARI) server endpoints. Phase 1a wires
|
||||
// directory + new-nonce only; Phases 1b-4 extend with the JWS-
|
||||
// authenticated POST surface (new-account, new-order, finalize,
|
||||
// challenges, revoke, ARI). Routes go through r.Register so the
|
||||
// standard middleware chain (CORS, body-limit, audit) applies —
|
||||
// ACME's own per-op metrics + RFC 8555 §6.5 Replay-Nonce headers
|
||||
// are added by the handler.
|
||||
//
|
||||
// Per-profile path family (canonical):
|
||||
r.Register("GET /acme/profile/{id}/directory", http.HandlerFunc(reg.ACME.Directory))
|
||||
r.Register("HEAD /acme/profile/{id}/new-nonce", http.HandlerFunc(reg.ACME.NewNonce))
|
||||
r.Register("GET /acme/profile/{id}/new-nonce", http.HandlerFunc(reg.ACME.NewNonce))
|
||||
r.Register("POST /acme/profile/{id}/new-account", http.HandlerFunc(reg.ACME.NewAccount))
|
||||
r.Register("POST /acme/profile/{id}/account/{acc_id}", http.HandlerFunc(reg.ACME.Account))
|
||||
r.Register("POST /acme/profile/{id}/new-order", http.HandlerFunc(reg.ACME.NewOrder))
|
||||
r.Register("POST /acme/profile/{id}/order/{ord_id}", http.HandlerFunc(reg.ACME.Order))
|
||||
r.Register("POST /acme/profile/{id}/order/{ord_id}/finalize", http.HandlerFunc(reg.ACME.OrderFinalize))
|
||||
r.Register("POST /acme/profile/{id}/authz/{authz_id}", http.HandlerFunc(reg.ACME.Authz))
|
||||
r.Register("POST /acme/profile/{id}/challenge/{chall_id}", http.HandlerFunc(reg.ACME.Challenge))
|
||||
r.Register("POST /acme/profile/{id}/cert/{cert_id}", http.HandlerFunc(reg.ACME.Cert))
|
||||
r.Register("POST /acme/profile/{id}/key-change", http.HandlerFunc(reg.ACME.KeyChange))
|
||||
r.Register("POST /acme/profile/{id}/revoke-cert", http.HandlerFunc(reg.ACME.RevokeCert))
|
||||
// RFC 9773 ARI: GET-only + unauthenticated (cert-manager-shaped
|
||||
// clients fetch this without a JWS).
|
||||
r.Register("GET /acme/profile/{id}/renewal-info/{cert_id}", http.HandlerFunc(reg.ACME.RenewalInfo))
|
||||
// Default-profile shorthand. The handler's profile-resolution path
|
||||
// returns userActionRequired (RFC 7807 + RFC 8555 §6.7) when
|
||||
// CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID is unset; when set it
|
||||
// dispatches to the same handler as the per-profile path.
|
||||
r.Register("GET /acme/directory", http.HandlerFunc(reg.ACME.Directory))
|
||||
r.Register("HEAD /acme/new-nonce", http.HandlerFunc(reg.ACME.NewNonce))
|
||||
r.Register("GET /acme/new-nonce", http.HandlerFunc(reg.ACME.NewNonce))
|
||||
r.Register("POST /acme/new-account", http.HandlerFunc(reg.ACME.NewAccount))
|
||||
r.Register("POST /acme/account/{acc_id}", http.HandlerFunc(reg.ACME.Account))
|
||||
r.Register("POST /acme/new-order", http.HandlerFunc(reg.ACME.NewOrder))
|
||||
r.Register("POST /acme/order/{ord_id}", http.HandlerFunc(reg.ACME.Order))
|
||||
r.Register("POST /acme/order/{ord_id}/finalize", http.HandlerFunc(reg.ACME.OrderFinalize))
|
||||
r.Register("POST /acme/authz/{authz_id}", http.HandlerFunc(reg.ACME.Authz))
|
||||
r.Register("POST /acme/challenge/{chall_id}", http.HandlerFunc(reg.ACME.Challenge))
|
||||
r.Register("POST /acme/cert/{cert_id}", http.HandlerFunc(reg.ACME.Cert))
|
||||
r.Register("POST /acme/key-change", http.HandlerFunc(reg.ACME.KeyChange))
|
||||
r.Register("POST /acme/revoke-cert", http.HandlerFunc(reg.ACME.RevokeCert))
|
||||
r.Register("GET /acme/renewal-info/{cert_id}", http.HandlerFunc(reg.ACME.RenewalInfo))
|
||||
}
|
||||
|
||||
// RegisterESTHandlers sets up EST (RFC 7030) routes under
|
||||
|
||||
+282
-39
@@ -13,21 +13,27 @@ import (
|
||||
// Config represents the complete application configuration.
|
||||
// All configuration values are read from environment variables with CERTCTL_ prefix.
|
||||
type Config struct {
|
||||
Server ServerConfig
|
||||
Database DatabaseConfig
|
||||
Scheduler SchedulerConfig
|
||||
Log LogConfig
|
||||
Auth AuthConfig
|
||||
RateLimit RateLimitConfig
|
||||
CORS CORSConfig
|
||||
Keygen KeygenConfig
|
||||
CA CAConfig
|
||||
Notifiers NotifierConfig
|
||||
NetworkScan NetworkScanConfig
|
||||
EST ESTConfig
|
||||
SCEP SCEPConfig
|
||||
Verification VerificationConfig
|
||||
ACME ACMEConfig
|
||||
Server ServerConfig
|
||||
Database DatabaseConfig
|
||||
Scheduler SchedulerConfig
|
||||
Log LogConfig
|
||||
Auth AuthConfig
|
||||
RateLimit RateLimitConfig
|
||||
CORS CORSConfig
|
||||
Keygen KeygenConfig
|
||||
CA CAConfig
|
||||
Notifiers NotifierConfig
|
||||
NetworkScan NetworkScanConfig
|
||||
EST ESTConfig
|
||||
SCEP SCEPConfig
|
||||
Verification VerificationConfig
|
||||
ACME ACMEConfig
|
||||
// ACMEServer is the SERVER-side ACME (RFC 8555 + RFC 9773 ARI)
|
||||
// configuration. Distinct from ACME above (which is the consumer-
|
||||
// side issuer connector that talks UP to Let's Encrypt / pebble).
|
||||
// Server uses CERTCTL_ACME_SERVER_* prefix throughout so the two
|
||||
// namespaces stay unambiguous in operator docs and shell env.
|
||||
ACMEServer ACMEServerConfig
|
||||
Vault VaultConfig
|
||||
DigiCert DigiCertConfig
|
||||
Sectigo SectigoConfig
|
||||
@@ -122,6 +128,13 @@ type EntrustConfig struct {
|
||||
// ProfileId is the optional enrollment profile identifier.
|
||||
// Setting: CERTCTL_ENTRUST_PROFILE_ID environment variable.
|
||||
ProfileId string
|
||||
|
||||
// PollMaxWaitSeconds caps GetOrderStatus's bounded-polling
|
||||
// deadline. Approval-pending workflows should bump this (e.g.,
|
||||
// 86400 = 24h) so a single tick can wait through the approval
|
||||
// window. Default 600. Audit fix #5.
|
||||
// Setting: CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS.
|
||||
PollMaxWaitSeconds int
|
||||
}
|
||||
|
||||
// GlobalSignConfig contains GlobalSign Atlas HVCA issuer connector configuration.
|
||||
@@ -154,6 +167,11 @@ type GlobalSignConfig struct {
|
||||
// present in the host's default trust bundle.
|
||||
// Setting: CERTCTL_GLOBALSIGN_SERVER_CA_PATH environment variable.
|
||||
ServerCAPath string
|
||||
|
||||
// PollMaxWaitSeconds caps GetOrderStatus's bounded-polling
|
||||
// deadline. Default 600 (10 minutes). Audit fix #5.
|
||||
// Setting: CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS.
|
||||
PollMaxWaitSeconds int
|
||||
}
|
||||
|
||||
// EJBCAConfig contains EJBCA (Keyfactor) issuer connector configuration.
|
||||
@@ -439,6 +457,12 @@ type DigiCertConfig struct {
|
||||
// Default: "https://www.digicert.com/services/v2".
|
||||
// Setting: CERTCTL_DIGICERT_BASE_URL environment variable.
|
||||
BaseURL string
|
||||
|
||||
// PollMaxWaitSeconds caps how long GetOrderStatus blocks doing
|
||||
// internal exponential-backoff polling before returning. Default
|
||||
// 600 (10 minutes); 0 falls back to asyncpoll default.
|
||||
// Setting: CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS. Audit fix #5.
|
||||
PollMaxWaitSeconds int
|
||||
}
|
||||
|
||||
// SectigoConfig contains Sectigo Certificate Manager issuer connector configuration.
|
||||
@@ -476,6 +500,12 @@ type SectigoConfig struct {
|
||||
// Default: "https://cert-manager.com/api".
|
||||
// Setting: CERTCTL_SECTIGO_BASE_URL environment variable.
|
||||
BaseURL string
|
||||
|
||||
// PollMaxWaitSeconds caps how long GetOrderStatus blocks doing
|
||||
// internal exponential-backoff polling. Default 600. Sectigo's
|
||||
// collectNotReady sentinel rides the backoff schedule.
|
||||
// Setting: CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS. Audit fix #5.
|
||||
PollMaxWaitSeconds int
|
||||
}
|
||||
|
||||
// GoogleCASConfig contains Google Cloud Certificate Authority Service configuration.
|
||||
@@ -621,6 +651,162 @@ type ACMEConfig struct {
|
||||
Insecure bool
|
||||
}
|
||||
|
||||
// ACMEServerConfig is the SERVER-side ACME (RFC 8555 + RFC 9773 ARI)
|
||||
// configuration. Distinct from ACMEConfig (the consumer-side issuer
|
||||
// connector that talks UP to Let's Encrypt / pebble). Server uses
|
||||
// CERTCTL_ACME_SERVER_* prefix throughout to avoid colliding with
|
||||
// the existing CERTCTL_ACME_* consumer namespace (DIRECTORY_URL /
|
||||
// PROFILE / CHALLENGE_TYPE / etc.).
|
||||
//
|
||||
// Phase 1a wires Enabled / DefaultAuthMode / DefaultProfileID /
|
||||
// NonceTTL / DirectoryMeta. Order/Authz TTLs + the per-challenge-type
|
||||
// concurrency caps + DNS01 resolver are reserved fields populated for
|
||||
// Phases 2/3 — exposing them now keeps the env-var surface stable
|
||||
// from day one (operators can set CERTCTL_ACME_SERVER_HTTP01_CONCURRENCY
|
||||
// today; it's a no-op until Phase 3 reads it).
|
||||
type ACMEServerConfig struct {
|
||||
// Enabled is the master toggle. When false, the ACME handler is
|
||||
// constructed (so the registry-shape stays stable) but no routes
|
||||
// are registered. Operators flip this on after configuring the
|
||||
// per-profile auth_mode column on certificate_profiles.
|
||||
// Setting: CERTCTL_ACME_SERVER_ENABLED.
|
||||
Enabled bool
|
||||
|
||||
// DefaultAuthMode sets the default value of certificate_profiles.acme_auth_mode
|
||||
// for NEWLY-created profiles (e.g. via API). Existing profile rows
|
||||
// retain whatever value they were created with — per-profile
|
||||
// values, once set, override this default. Architecture decision:
|
||||
// auth mode is per-profile, not server-wide.
|
||||
// Valid: "trust_authenticated" (default) or "challenge".
|
||||
// Setting: CERTCTL_ACME_SERVER_DEFAULT_AUTH_MODE.
|
||||
DefaultAuthMode string
|
||||
|
||||
// DefaultProfileID, when set, activates the /acme/* shorthand
|
||||
// path family — /acme/directory mirrors
|
||||
// /acme/profile/<DefaultProfileID>/directory etc. When empty,
|
||||
// requests to the shorthand return RFC 7807
|
||||
// userActionRequired with a hint pointing at the per-profile
|
||||
// path. Single-profile deployments can set this for ergonomic
|
||||
// client config; multi-profile deployments leave it empty.
|
||||
// Setting: CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID.
|
||||
DefaultProfileID string
|
||||
|
||||
// NonceTTL is how long an issued ACME nonce remains valid before
|
||||
// the server rejects it as expired. RFC 8555 §6.5.1 allows the
|
||||
// server to set any TTL; 5 minutes is the operator-friendly
|
||||
// default (clock-skew tolerant without enabling long-replay
|
||||
// attacks). Setting: CERTCTL_ACME_SERVER_NONCE_TTL.
|
||||
NonceTTL time.Duration
|
||||
|
||||
// OrderTTL is the lifetime of an unfulfilled ACME order. Phase 2
|
||||
// reads; Phase 1a reserves the field. Default: 24h.
|
||||
// Setting: CERTCTL_ACME_SERVER_ORDER_TTL.
|
||||
OrderTTL time.Duration
|
||||
|
||||
// AuthzTTL is the lifetime of an unfulfilled authorization. Phase 2
|
||||
// reads; Phase 1a reserves. Default: 24h.
|
||||
// Setting: CERTCTL_ACME_SERVER_AUTHZ_TTL.
|
||||
AuthzTTL time.Duration
|
||||
|
||||
// HTTP01ConcurrencyMax is the bound on concurrent HTTP-01 validators
|
||||
// (semaphore weight). Phase 3 reads; Phase 1a reserves. Default: 10.
|
||||
// Setting: CERTCTL_ACME_SERVER_HTTP01_CONCURRENCY.
|
||||
HTTP01ConcurrencyMax int
|
||||
|
||||
// DNS01Resolver is the resolver address used by the DNS-01 validator.
|
||||
// Phase 3 reads; Phase 1a reserves. Default: "8.8.8.8:53".
|
||||
// Setting: CERTCTL_ACME_SERVER_DNS01_RESOLVER.
|
||||
DNS01Resolver string
|
||||
|
||||
// DNS01ConcurrencyMax bounds concurrent DNS-01 validators. Default: 10.
|
||||
// Setting: CERTCTL_ACME_SERVER_DNS01_CONCURRENCY.
|
||||
DNS01ConcurrencyMax int
|
||||
|
||||
// TLSALPN01ConcurrencyMax bounds concurrent TLS-ALPN-01 validators.
|
||||
// Default: 10. Setting: CERTCTL_ACME_SERVER_TLSALPN01_CONCURRENCY.
|
||||
TLSALPN01ConcurrencyMax int
|
||||
|
||||
// ARIEnabled toggles RFC 9773 ACME Renewal Information surface
|
||||
// (the `renewalInfo` directory entry + GET
|
||||
// /acme/profile/<id>/renewal-info/<cert-id>). Default: true.
|
||||
// Operators wanting Phase-1a-style "directory + nonce + accounts +
|
||||
// orders + finalize + challenges only" can flip this off; doing so
|
||||
// drops the renewalInfo URL from the directory document so ACME
|
||||
// clients fall back to their static renewal scheduler. Phase 4 wires.
|
||||
// Setting: CERTCTL_ACME_SERVER_ARI_ENABLED.
|
||||
ARIEnabled bool
|
||||
|
||||
// ARIPollInterval is the value the server returns in the Retry-After
|
||||
// response header on a 200 ARI response — i.e., the suggested gap
|
||||
// between successive ARI polls a client should respect. RFC 9773 §4.2
|
||||
// leaves this server-policy. Default: 6h. Tighter intervals (e.g. 1h)
|
||||
// suit short-lived certs; looser intervals (24h) suit standard 90-day
|
||||
// certs. Setting: CERTCTL_ACME_SERVER_ARI_POLL_INTERVAL.
|
||||
ARIPollInterval time.Duration
|
||||
|
||||
// RateLimitOrdersPerHour caps new-order requests per ACME account per
|
||||
// rolling hour. 0 disables (no limit). Default: 100. Hits return RFC
|
||||
// 7807 + RFC 8555 §6.7 `urn:ietf:params:acme:error:rateLimited` with
|
||||
// a Retry-After header. In-memory token-bucket — restart wipes the
|
||||
// counter, which is acceptable for orders/hour caps (eventual-
|
||||
// consistency anyway). Setting:
|
||||
// CERTCTL_ACME_SERVER_RATE_LIMIT_ORDERS_PER_HOUR.
|
||||
RateLimitOrdersPerHour int
|
||||
|
||||
// RateLimitConcurrentOrders caps the number of orders an ACME account
|
||||
// can have in pending/ready/processing state simultaneously. 0
|
||||
// disables. Default: 5. Same Problem shape as the per-hour limit.
|
||||
// Setting: CERTCTL_ACME_SERVER_RATE_LIMIT_CONCURRENT_ORDERS.
|
||||
RateLimitConcurrentOrders int
|
||||
|
||||
// RateLimitKeyChangePerHour caps account-key rollovers per account
|
||||
// per rolling hour. 0 disables. Default: 5 (rollovers should be rare;
|
||||
// a flood is an attack signal). Setting:
|
||||
// CERTCTL_ACME_SERVER_RATE_LIMIT_KEY_CHANGE_PER_HOUR.
|
||||
RateLimitKeyChangePerHour int
|
||||
|
||||
// RateLimitChallengeRespondsPerHour caps challenge-respond requests
|
||||
// per challenge per rolling hour. 0 disables. Default: 60 (defends
|
||||
// against retry storms from a misbehaving client). Setting:
|
||||
// CERTCTL_ACME_SERVER_RATE_LIMIT_CHALLENGE_RESPONDS_PER_HOUR.
|
||||
RateLimitChallengeRespondsPerHour int
|
||||
|
||||
// GCInterval is the tick interval for the ACME GC scheduler loop.
|
||||
// On each tick the loop sweeps expired nonces, transitions expired
|
||||
// pending authzs to `expired`, transitions expired
|
||||
// pending/ready/processing orders to `invalid`, and reaps Phase-2
|
||||
// atomicity-window orphans (orders without a linked cert when one
|
||||
// should exist). 0 disables the loop entirely. Default: 1m. Setting:
|
||||
// CERTCTL_ACME_SERVER_GC_INTERVAL.
|
||||
GCInterval time.Duration
|
||||
|
||||
// DirectoryMeta is the optional metadata advertised in the directory
|
||||
// document per RFC 8555 §7.1.1.
|
||||
DirectoryMeta ACMEServerDirectoryMeta
|
||||
}
|
||||
|
||||
// ACMEServerDirectoryMeta holds the optional fields of the directory
|
||||
// `meta` block. Each is populated from a CERTCTL_ACME_SERVER_*
|
||||
// env var; an all-empty struct produces an omitempty-suppressed JSON
|
||||
// `meta` field on the directory.
|
||||
type ACMEServerDirectoryMeta struct {
|
||||
// TermsOfService is a URL pointing to the operator's ToS document.
|
||||
// Setting: CERTCTL_ACME_SERVER_TOS_URL.
|
||||
TermsOfService string
|
||||
// Website is a URL pointing to the operator's homepage.
|
||||
// Setting: CERTCTL_ACME_SERVER_WEBSITE.
|
||||
Website string
|
||||
// CAAIdentities is the list of CAA-record domain values clients
|
||||
// should authorize for this server. Setting:
|
||||
// CERTCTL_ACME_SERVER_CAA_IDENTITIES (comma-separated).
|
||||
CAAIdentities []string
|
||||
// ExternalAccountRequired, when true, signals to clients that
|
||||
// new-account requires an EAB token (RFC 8555 §7.3.4). Phase 1a
|
||||
// advertises but does not enforce; EAB enforcement is a follow-up.
|
||||
// Setting: CERTCTL_ACME_SERVER_EAB_REQUIRED.
|
||||
ExternalAccountRequired bool
|
||||
}
|
||||
|
||||
// OpenSSLConfig contains OpenSSL/Custom CA issuer connector configuration.
|
||||
type OpenSSLConfig struct {
|
||||
// SignScript is the path to a shell script that signs certificate requests.
|
||||
@@ -1119,6 +1305,25 @@ type SchedulerConfig struct {
|
||||
// Setting: CERTCTL_SCHEDULER_JOB_PROCESSOR_INTERVAL environment variable.
|
||||
JobProcessorInterval time.Duration
|
||||
|
||||
// RenewalConcurrency caps the number of concurrent renewal/issuance/
|
||||
// deployment goroutines launched per job-processor tick. Default 25 —
|
||||
// high enough to make use of HTTP/1.1 connection reuse against an
|
||||
// upstream CA, low enough to stay under typical per-customer rate
|
||||
// limits. Operators with permissive upstream limits and large fleets
|
||||
// (>10k certs) can bump to 100; operators with strict limits or
|
||||
// async-CA-heavy fleets should keep at 25 or lower.
|
||||
//
|
||||
// Values ≤ 0 fall back to 1 (sequential) — fail-safe rather than
|
||||
// panicking on semaphore.NewWeighted(0) semantics.
|
||||
//
|
||||
// Closes the #9 acquisition-readiness blocker from the 2026-05-01
|
||||
// issuer coverage audit. Pre-fix the per-tick fan-out had no cap,
|
||||
// so a 5k-cert sweep launched 5k in-flight HTTP calls to upstream
|
||||
// CAs and tripped DigiCert/Entrust/Sectigo rate limits.
|
||||
//
|
||||
// Setting: CERTCTL_RENEWAL_CONCURRENCY environment variable.
|
||||
RenewalConcurrency int
|
||||
|
||||
// AgentHealthCheckInterval is how often the scheduler checks agent heartbeats.
|
||||
// Default: 2 minutes. Minimum: 1 second. Marks agents offline if no recent heartbeat.
|
||||
// Setting: CERTCTL_SCHEDULER_AGENT_HEALTH_CHECK_INTERVAL environment variable.
|
||||
@@ -1410,8 +1615,11 @@ func Load() (*Config, error) {
|
||||
DemoSeed: getEnvBool("CERTCTL_DEMO_SEED", false),
|
||||
},
|
||||
Scheduler: SchedulerConfig{
|
||||
RenewalCheckInterval: getEnvDuration("CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL", 1*time.Hour),
|
||||
JobProcessorInterval: getEnvDuration("CERTCTL_SCHEDULER_JOB_PROCESSOR_INTERVAL", 30*time.Second),
|
||||
RenewalCheckInterval: getEnvDuration("CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL", 1*time.Hour),
|
||||
JobProcessorInterval: getEnvDuration("CERTCTL_SCHEDULER_JOB_PROCESSOR_INTERVAL", 30*time.Second),
|
||||
// Audit fix #9 — per-tick concurrency cap on the renewal/issuance/
|
||||
// deployment goroutine fan-out. ≤0 → 1 (sequential).
|
||||
RenewalConcurrency: getEnvInt("CERTCTL_RENEWAL_CONCURRENCY", 25),
|
||||
AgentHealthCheckInterval: getEnvDuration("CERTCTL_SCHEDULER_AGENT_HEALTH_CHECK_INTERVAL", 2*time.Minute),
|
||||
NotificationProcessInterval: getEnvDuration("CERTCTL_SCHEDULER_NOTIFICATION_PROCESS_INTERVAL", 1*time.Minute),
|
||||
// I-005: retry sweep for failed notifications. Mirrors RetryInterval
|
||||
@@ -1532,19 +1740,21 @@ func Load() (*Config, error) {
|
||||
TTL: getEnv("CERTCTL_VAULT_TTL", "8760h"),
|
||||
},
|
||||
DigiCert: DigiCertConfig{
|
||||
APIKey: getEnv("CERTCTL_DIGICERT_API_KEY", ""),
|
||||
OrgID: getEnv("CERTCTL_DIGICERT_ORG_ID", ""),
|
||||
ProductType: getEnv("CERTCTL_DIGICERT_PRODUCT_TYPE", "ssl_basic"),
|
||||
BaseURL: getEnv("CERTCTL_DIGICERT_BASE_URL", "https://www.digicert.com/services/v2"),
|
||||
APIKey: getEnv("CERTCTL_DIGICERT_API_KEY", ""),
|
||||
OrgID: getEnv("CERTCTL_DIGICERT_ORG_ID", ""),
|
||||
ProductType: getEnv("CERTCTL_DIGICERT_PRODUCT_TYPE", "ssl_basic"),
|
||||
BaseURL: getEnv("CERTCTL_DIGICERT_BASE_URL", "https://www.digicert.com/services/v2"),
|
||||
PollMaxWaitSeconds: getEnvInt("CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS", 0),
|
||||
},
|
||||
Sectigo: SectigoConfig{
|
||||
CustomerURI: getEnv("CERTCTL_SECTIGO_CUSTOMER_URI", ""),
|
||||
Login: getEnv("CERTCTL_SECTIGO_LOGIN", ""),
|
||||
Password: getEnv("CERTCTL_SECTIGO_PASSWORD", ""),
|
||||
OrgID: getEnvInt("CERTCTL_SECTIGO_ORG_ID", 0),
|
||||
CertType: getEnvInt("CERTCTL_SECTIGO_CERT_TYPE", 0),
|
||||
Term: getEnvInt("CERTCTL_SECTIGO_TERM", 365),
|
||||
BaseURL: getEnv("CERTCTL_SECTIGO_BASE_URL", "https://cert-manager.com/api"),
|
||||
CustomerURI: getEnv("CERTCTL_SECTIGO_CUSTOMER_URI", ""),
|
||||
Login: getEnv("CERTCTL_SECTIGO_LOGIN", ""),
|
||||
Password: getEnv("CERTCTL_SECTIGO_PASSWORD", ""),
|
||||
OrgID: getEnvInt("CERTCTL_SECTIGO_ORG_ID", 0),
|
||||
CertType: getEnvInt("CERTCTL_SECTIGO_CERT_TYPE", 0),
|
||||
Term: getEnvInt("CERTCTL_SECTIGO_TERM", 365),
|
||||
BaseURL: getEnv("CERTCTL_SECTIGO_BASE_URL", "https://cert-manager.com/api"),
|
||||
PollMaxWaitSeconds: getEnvInt("CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS", 0),
|
||||
},
|
||||
GoogleCAS: GoogleCASConfig{
|
||||
Project: getEnv("CERTCTL_GOOGLE_CAS_PROJECT", ""),
|
||||
@@ -1561,19 +1771,21 @@ func Load() (*Config, error) {
|
||||
TemplateArn: getEnv("CERTCTL_AWS_PCA_TEMPLATE_ARN", ""),
|
||||
},
|
||||
Entrust: EntrustConfig{
|
||||
APIUrl: getEnv("CERTCTL_ENTRUST_API_URL", ""),
|
||||
ClientCertPath: getEnv("CERTCTL_ENTRUST_CLIENT_CERT_PATH", ""),
|
||||
ClientKeyPath: getEnv("CERTCTL_ENTRUST_CLIENT_KEY_PATH", ""),
|
||||
CAId: getEnv("CERTCTL_ENTRUST_CA_ID", ""),
|
||||
ProfileId: getEnv("CERTCTL_ENTRUST_PROFILE_ID", ""),
|
||||
APIUrl: getEnv("CERTCTL_ENTRUST_API_URL", ""),
|
||||
ClientCertPath: getEnv("CERTCTL_ENTRUST_CLIENT_CERT_PATH", ""),
|
||||
ClientKeyPath: getEnv("CERTCTL_ENTRUST_CLIENT_KEY_PATH", ""),
|
||||
CAId: getEnv("CERTCTL_ENTRUST_CA_ID", ""),
|
||||
ProfileId: getEnv("CERTCTL_ENTRUST_PROFILE_ID", ""),
|
||||
PollMaxWaitSeconds: getEnvInt("CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS", 0),
|
||||
},
|
||||
GlobalSign: GlobalSignConfig{
|
||||
APIUrl: getEnv("CERTCTL_GLOBALSIGN_API_URL", ""),
|
||||
APIKey: getEnv("CERTCTL_GLOBALSIGN_API_KEY", ""),
|
||||
APISecret: getEnv("CERTCTL_GLOBALSIGN_API_SECRET", ""),
|
||||
ClientCertPath: getEnv("CERTCTL_GLOBALSIGN_CLIENT_CERT_PATH", ""),
|
||||
ClientKeyPath: getEnv("CERTCTL_GLOBALSIGN_CLIENT_KEY_PATH", ""),
|
||||
ServerCAPath: getEnv("CERTCTL_GLOBALSIGN_SERVER_CA_PATH", ""),
|
||||
APIUrl: getEnv("CERTCTL_GLOBALSIGN_API_URL", ""),
|
||||
APIKey: getEnv("CERTCTL_GLOBALSIGN_API_KEY", ""),
|
||||
APISecret: getEnv("CERTCTL_GLOBALSIGN_API_SECRET", ""),
|
||||
ClientCertPath: getEnv("CERTCTL_GLOBALSIGN_CLIENT_CERT_PATH", ""),
|
||||
ClientKeyPath: getEnv("CERTCTL_GLOBALSIGN_CLIENT_KEY_PATH", ""),
|
||||
ServerCAPath: getEnv("CERTCTL_GLOBALSIGN_SERVER_CA_PATH", ""),
|
||||
PollMaxWaitSeconds: getEnvInt("CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS", 0),
|
||||
},
|
||||
EJBCA: EJBCAConfig{
|
||||
APIUrl: getEnv("CERTCTL_EJBCA_API_URL", ""),
|
||||
@@ -1596,6 +1808,37 @@ func Load() (*Config, error) {
|
||||
ARIEnabled: getEnvBool("CERTCTL_ACME_ARI_ENABLED", false),
|
||||
Insecure: getEnvBool("CERTCTL_ACME_INSECURE", false),
|
||||
},
|
||||
// ACME server (RFC 8555 + RFC 9773 ARI) — distinct from the
|
||||
// consumer-side ACME issuer connector above. Server uses
|
||||
// CERTCTL_ACME_SERVER_* prefix throughout (audit fix #11).
|
||||
// Phase 1a wires Enabled / DefaultAuthMode / DefaultProfileID /
|
||||
// NonceTTL + DirectoryMeta. Order/Authz TTLs + concurrency
|
||||
// caps + DNS01 resolver are reserved (Phases 2/3 read).
|
||||
ACMEServer: ACMEServerConfig{
|
||||
Enabled: getEnvBool("CERTCTL_ACME_SERVER_ENABLED", false),
|
||||
DefaultAuthMode: getEnv("CERTCTL_ACME_SERVER_DEFAULT_AUTH_MODE", "trust_authenticated"),
|
||||
DefaultProfileID: getEnv("CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID", ""),
|
||||
NonceTTL: getEnvDuration("CERTCTL_ACME_SERVER_NONCE_TTL", 5*time.Minute),
|
||||
OrderTTL: getEnvDuration("CERTCTL_ACME_SERVER_ORDER_TTL", 24*time.Hour),
|
||||
AuthzTTL: getEnvDuration("CERTCTL_ACME_SERVER_AUTHZ_TTL", 24*time.Hour),
|
||||
HTTP01ConcurrencyMax: getEnvInt("CERTCTL_ACME_SERVER_HTTP01_CONCURRENCY", 10),
|
||||
DNS01Resolver: getEnv("CERTCTL_ACME_SERVER_DNS01_RESOLVER", "8.8.8.8:53"),
|
||||
DNS01ConcurrencyMax: getEnvInt("CERTCTL_ACME_SERVER_DNS01_CONCURRENCY", 10),
|
||||
TLSALPN01ConcurrencyMax: getEnvInt("CERTCTL_ACME_SERVER_TLSALPN01_CONCURRENCY", 10),
|
||||
ARIEnabled: getEnvBool("CERTCTL_ACME_SERVER_ARI_ENABLED", true),
|
||||
ARIPollInterval: getEnvDuration("CERTCTL_ACME_SERVER_ARI_POLL_INTERVAL", 6*time.Hour),
|
||||
RateLimitOrdersPerHour: getEnvInt("CERTCTL_ACME_SERVER_RATE_LIMIT_ORDERS_PER_HOUR", 100),
|
||||
RateLimitConcurrentOrders: getEnvInt("CERTCTL_ACME_SERVER_RATE_LIMIT_CONCURRENT_ORDERS", 5),
|
||||
RateLimitKeyChangePerHour: getEnvInt("CERTCTL_ACME_SERVER_RATE_LIMIT_KEY_CHANGE_PER_HOUR", 5),
|
||||
RateLimitChallengeRespondsPerHour: getEnvInt("CERTCTL_ACME_SERVER_RATE_LIMIT_CHALLENGE_RESPONDS_PER_HOUR", 60),
|
||||
GCInterval: getEnvDuration("CERTCTL_ACME_SERVER_GC_INTERVAL", time.Minute),
|
||||
DirectoryMeta: ACMEServerDirectoryMeta{
|
||||
TermsOfService: getEnv("CERTCTL_ACME_SERVER_TOS_URL", ""),
|
||||
Website: getEnv("CERTCTL_ACME_SERVER_WEBSITE", ""),
|
||||
CAAIdentities: getEnvList("CERTCTL_ACME_SERVER_CAA_IDENTITIES", nil),
|
||||
ExternalAccountRequired: getEnvBool("CERTCTL_ACME_SERVER_EAB_REQUIRED", false),
|
||||
},
|
||||
},
|
||||
Digest: DigestConfig{
|
||||
Enabled: getEnvBool("CERTCTL_DIGEST_ENABLED", false),
|
||||
Interval: getEnvDuration("CERTCTL_DIGEST_INTERVAL", 24*time.Hour),
|
||||
|
||||
@@ -7,9 +7,11 @@ import (
|
||||
"crypto/rand"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
@@ -24,6 +26,7 @@ import (
|
||||
"golang.org/x/crypto/acme"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/connector/issuer"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// Config represents the ACME issuer connector configuration.
|
||||
@@ -84,6 +87,28 @@ type Config struct {
|
||||
Insecure bool `json:"insecure,omitempty"`
|
||||
}
|
||||
|
||||
// CertificateLookupRepo lets the ACME connector recover a previously-issued
|
||||
// certificate's PEM chain from the local cert store given only the serial.
|
||||
// RFC 8555 §7.6 requires the certificate DER bytes (not just the serial) on
|
||||
// the revoke wire — this interface bridges the gap so an operator who calls
|
||||
// RevokeCertificate with just a serial in hand (lost PEM, rotated key, GUI
|
||||
// revoke action) gets the same outcome as one who supplies the full DER.
|
||||
//
|
||||
// Defined at the connector boundary on purpose: the connector doesn't import
|
||||
// the service or repository packages — it accepts whatever satisfies this
|
||||
// shape. Production wiring in cmd/server/main.go injects the postgres
|
||||
// CertificateRepository (which has GetVersionBySerial); tests inject a fake.
|
||||
//
|
||||
// Audit fix #7.
|
||||
type CertificateLookupRepo interface {
|
||||
// GetVersionBySerial returns the certificate version (the row that
|
||||
// holds the PEM chain) whose SerialNumber matches the supplied
|
||||
// serial, scoped to the issuerID. Returns sql.ErrNoRows when no
|
||||
// match exists. Per RFC 5280 §5.2.3 serials are unique only within
|
||||
// a single issuer, so the scope is required.
|
||||
GetVersionBySerial(ctx context.Context, issuerID, serial string) (*domain.CertificateVersion, error)
|
||||
}
|
||||
|
||||
// Connector implements the issuer.Connector interface for ACME-compatible CAs
|
||||
// (Let's Encrypt, Sectigo, ZeroSSL, etc.).
|
||||
//
|
||||
@@ -104,6 +129,35 @@ type Connector struct {
|
||||
|
||||
// DNS-01 challenge solver (nil if using HTTP-01)
|
||||
dnsSolver DNSSolver
|
||||
|
||||
// issuerID + certLookup are wired by the registry's Rebuild via
|
||||
// SetIssuerID + SetCertificateLookup. When certLookup is nil, the
|
||||
// serial-only revoke path falls back to the legacy "not supported"
|
||||
// error so old wiring paths keep their behaviour. Audit fix #7.
|
||||
issuerID string
|
||||
certLookup CertificateLookupRepo
|
||||
}
|
||||
|
||||
// SetIssuerID records the issuer ID so the serial-only revoke path can
|
||||
// scope the cert-version lookup correctly. Per RFC 5280 §5.2.3 serial
|
||||
// numbers are only unique within a single issuer, so the scope is
|
||||
// required for the lookup to be deterministic. Mirrors the existing
|
||||
// SetIssuerID setter on local.Connector.
|
||||
//
|
||||
// Audit fix #7.
|
||||
func (c *Connector) SetIssuerID(id string) {
|
||||
c.issuerID = id
|
||||
}
|
||||
|
||||
// SetCertificateLookup wires the cert-version lookup so the ACME
|
||||
// connector can recover the leaf-cert PEM (and thus the DER bytes
|
||||
// needed by RFC 8555 §7.6) from a serial-only revoke request. nil
|
||||
// means revoke-by-serial is not supported (the historical V1
|
||||
// behaviour, preserved for old wiring paths).
|
||||
//
|
||||
// Audit fix #7.
|
||||
func (c *Connector) SetCertificateLookup(repo CertificateLookupRepo) {
|
||||
c.certLookup = repo
|
||||
}
|
||||
|
||||
// New creates a new ACME connector with the given configuration and logger.
|
||||
@@ -515,20 +569,146 @@ func (c *Connector) RenewCertificate(ctx context.Context, request issuer.Renewal
|
||||
})
|
||||
}
|
||||
|
||||
// RevokeCertificate revokes a certificate at the ACME CA.
|
||||
// RevokeCertificate revokes a certificate at the ACME CA. RFC 8555 §7.6
|
||||
// requires the certificate DER bytes (not just the serial) on the revoke
|
||||
// wire — but a CLM platform's job is to abstract over that limitation.
|
||||
// Operators routinely have only the serial in hand: lost PEM, rotated
|
||||
// key, GUI revoke action driven by a row in the certs list.
|
||||
//
|
||||
// This method recovers the leaf-cert DER by looking the serial up in
|
||||
// the local cert-version store (CertificateLookupRepo, wired by the
|
||||
// registry's Rebuild), decoding the PEM chain into DER, and calling
|
||||
// golang.org/x/crypto/acme.Client.RevokeCert with (accountKey, der,
|
||||
// reasonCode). The reason is mapped from the RFC 5280 string in the
|
||||
// request via mapRevocationReason; nil reason maps to 0 (unspecified).
|
||||
//
|
||||
// Audit fix #7. Pre-fix this returned the literal error
|
||||
// "ACME revocation by serial not supported in V1; provide certificate
|
||||
// DER" which made GUI-driven revoke unusable for ACME-issued certs.
|
||||
func (c *Connector) RevokeCertificate(ctx context.Context, request issuer.RevocationRequest) error {
|
||||
c.logger.Info("processing ACME revocation request", "serial", request.Serial)
|
||||
|
||||
if err := c.ensureClient(ctx); err != nil {
|
||||
return fmt.Errorf("ACME client init: %w", err)
|
||||
if c.certLookup == nil {
|
||||
// Backward-compat fallback. Only fires in test paths or old
|
||||
// wiring where SetCertificateLookup was not called. The audit
|
||||
// mandates the lookup wire as the production path; this is
|
||||
// retained for the test cases that build the connector
|
||||
// directly without the registry.
|
||||
return fmt.Errorf("ACME revocation by serial requires CertificateLookup wiring; call SetCertificateLookup")
|
||||
}
|
||||
|
||||
// ACME revocation requires the certificate DER, not just the serial.
|
||||
// For now, log a warning. Full revocation requires storing the cert DER
|
||||
// or re-fetching it from the order.
|
||||
c.logger.Warn("ACME revocation requires certificate DER bytes; serial-only revocation not supported in V1",
|
||||
"serial", request.Serial)
|
||||
return fmt.Errorf("ACME revocation by serial not supported in V1; provide certificate DER")
|
||||
if c.issuerID == "" {
|
||||
// Same backward-compat reasoning. The registry calls
|
||||
// SetIssuerID alongside SetCertificateLookup; both are
|
||||
// required for the lookup to be deterministic per RFC 5280
|
||||
// §5.2.3.
|
||||
return fmt.Errorf("ACME revocation by serial requires issuer ID wiring; call SetIssuerID")
|
||||
}
|
||||
|
||||
version, err := c.certLookup.GetVersionBySerial(ctx, c.issuerID, request.Serial)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return fmt.Errorf("ACME revoke: no local cert with serial %s for issuer %s (cert may not have been issued through certctl)", request.Serial, c.issuerID)
|
||||
}
|
||||
return fmt.Errorf("ACME revoke: cert version lookup: %w", err)
|
||||
}
|
||||
|
||||
if version == nil || version.PEMChain == "" {
|
||||
return fmt.Errorf("ACME revoke: local cert version row has empty PEM chain (corrupt row?) — serial=%s", request.Serial)
|
||||
}
|
||||
|
||||
// PEMChain is "leaf cert PEM\nchain PEMs..."; we only need the
|
||||
// leaf for the ACME revoke wire. pem.Decode returns the FIRST
|
||||
// block, which is exactly the leaf, then leaves the rest in the
|
||||
// trailing slice (which we discard).
|
||||
block, _ := pem.Decode([]byte(version.PEMChain))
|
||||
if block == nil {
|
||||
return fmt.Errorf("ACME revoke: cert version PEM malformed — no PEM block found in chain (serial=%s)", request.Serial)
|
||||
}
|
||||
if block.Type != "CERTIFICATE" {
|
||||
return fmt.Errorf("ACME revoke: cert version PEM has unexpected block type %q (expected CERTIFICATE, serial=%s)", block.Type, request.Serial)
|
||||
}
|
||||
der := block.Bytes
|
||||
|
||||
if err := c.ensureClient(ctx); err != nil {
|
||||
return fmt.Errorf("ACME revoke: client init: %w", err)
|
||||
}
|
||||
|
||||
reasonCode, err := mapRevocationReason(request.Reason)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ACME revoke: %w", err)
|
||||
}
|
||||
|
||||
// golang.org/x/crypto/acme.Client.RevokeCert authenticates the
|
||||
// revoke with the supplied account key (RFC 8555 §7.6 case 1,
|
||||
// "revocation request signed with account key"). The same account
|
||||
// key issued the cert, so this path covers all certctl-issued
|
||||
// ACME certs. Revocation via the cert's private key is the
|
||||
// alternative auth path (RFC 8555 §7.6 case 2) and is out of
|
||||
// scope here.
|
||||
c.logger.Info("ACME revoke: issuing RevokeCert", "serial", request.Serial, "reason_code", reasonCode)
|
||||
if err := c.client.RevokeCert(ctx, c.accountKey, der, reasonCode); err != nil {
|
||||
return fmt.Errorf("ACME RevokeCert: %w", err)
|
||||
}
|
||||
|
||||
c.logger.Info("ACME certificate revoked", "serial", request.Serial)
|
||||
return nil
|
||||
}
|
||||
|
||||
// mapRevocationReason translates an RFC 5280 §5.3.1 reason string (as
|
||||
// it appears in a RevocationRequest from the certctl service layer)
|
||||
// into the integer reason code that
|
||||
// golang.org/x/crypto/acme.CRLReasonCode expects. Codes match RFC 5280 §5.3.1: 0 unspecified,
|
||||
// 1 keyCompromise, 2 cACompromise, 3 affiliationChanged, 4 superseded,
|
||||
// 5 cessationOfOperation, 6 certificateHold, 8 removeFromCRL,
|
||||
// 9 privilegeWithdrawn, 10 aACompromise. (7 is reserved.)
|
||||
//
|
||||
// A nil reason maps to 0 (unspecified) per RFC 5280 §5.3.1's "if the
|
||||
// reason code extension is absent the reason is unspecified". An
|
||||
// unknown reason string returns an error rather than silently mapping
|
||||
// to unspecified — operators rely on the reason for compliance
|
||||
// reporting (PCI-DSS / HIPAA) and a silent demotion would obscure a
|
||||
// real bug.
|
||||
//
|
||||
// Accepted forms: the canonical RFC 5280 camelCase ("keyCompromise"),
|
||||
// underscore_lower ("key_compromise"), and ALL_CAPS_UNDERSCORE
|
||||
// ("KEY_COMPROMISE"). The certctl revocation service emits the
|
||||
// camelCase form today, but the more relaxed parsing makes it
|
||||
// trivially safe for operators typing reasons via the API.
|
||||
//
|
||||
// Audit fix #7.
|
||||
func mapRevocationReason(reason *string) (acme.CRLReasonCode, error) {
|
||||
if reason == nil || *reason == "" {
|
||||
return acme.CRLReasonUnspecified, nil
|
||||
}
|
||||
// Normalise: lowercase, strip underscores. "keyCompromise",
|
||||
// "key_compromise", "KEY_COMPROMISE" all collapse to
|
||||
// "keycompromise" and match.
|
||||
normalized := strings.ToLower(strings.ReplaceAll(*reason, "_", ""))
|
||||
switch normalized {
|
||||
case "unspecified":
|
||||
return acme.CRLReasonUnspecified, nil
|
||||
case "keycompromise":
|
||||
return acme.CRLReasonKeyCompromise, nil
|
||||
case "cacompromise":
|
||||
return acme.CRLReasonCACompromise, nil
|
||||
case "affiliationchanged":
|
||||
return acme.CRLReasonAffiliationChanged, nil
|
||||
case "superseded":
|
||||
return acme.CRLReasonSuperseded, nil
|
||||
case "cessationofoperation":
|
||||
return acme.CRLReasonCessationOfOperation, nil
|
||||
case "certificatehold":
|
||||
return acme.CRLReasonCertificateHold, nil
|
||||
case "removefromcrl":
|
||||
return acme.CRLReasonRemoveFromCRL, nil
|
||||
case "privilegewithdrawn":
|
||||
return acme.CRLReasonPrivilegeWithdrawn, nil
|
||||
case "aacompromise":
|
||||
return acme.CRLReasonAACompromise, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown revocation reason %q (expected one of: unspecified, keyCompromise, cACompromise, affiliationChanged, superseded, cessationOfOperation, certificateHold, removeFromCRL, privilegeWithdrawn, aACompromise)", *reason)
|
||||
}
|
||||
}
|
||||
|
||||
// GetOrderStatus retrieves the current status of an ACME order.
|
||||
|
||||
@@ -563,10 +563,17 @@ func TestFetchNonce_HappyPath(t *testing.T) {
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// RevokeCertificate / GetCACertPEM / GenerateCRL / SignOCSPResponse —
|
||||
// always-error paths
|
||||
// fallback / always-error paths
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestRevokeCertificate_AlwaysError(t *testing.T) {
|
||||
// TestRevokeCertificate_UnwiredCertLookupFallback exercises the
|
||||
// backward-compat branch in RevokeCertificate that fires when
|
||||
// SetCertificateLookup was never called. Audit fix #7 replaced the
|
||||
// historical "ACME revocation by serial not supported in V1" error
|
||||
// with a more actionable one pointing at the wiring requirement; the
|
||||
// production path always wires the lookup, so this branch only fires
|
||||
// in tests / old wiring paths.
|
||||
func TestRevokeCertificate_UnwiredCertLookupFallback(t *testing.T) {
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = io.WriteString(w, `{"newOrder":"","newAccount":"","newNonce":""}`)
|
||||
@@ -578,17 +585,19 @@ func TestRevokeCertificate_AlwaysError(t *testing.T) {
|
||||
Email: "test@example.com",
|
||||
ChallengeType: "http-01",
|
||||
})
|
||||
// Intentionally do NOT call SetCertificateLookup — that's the
|
||||
// behaviour under test.
|
||||
|
||||
reason := "key compromise"
|
||||
reason := "keyCompromise"
|
||||
err := c.RevokeCertificate(context.Background(), issuer.RevocationRequest{
|
||||
Serial: "ABC123",
|
||||
Reason: &reason,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected error from V1 ACME revocation")
|
||||
t.Fatal("expected error when CertificateLookup is unwired")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "not supported") {
|
||||
t.Errorf("error %q should mention 'not supported'", err)
|
||||
if !strings.Contains(err.Error(), "CertificateLookup") {
|
||||
t.Errorf("error %q should reference CertificateLookup wiring", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,217 @@
|
||||
package acme
|
||||
|
||||
// Audit fix #7 — serial-only ACME revocation tests.
|
||||
//
|
||||
// The happy path (issue → revoke-by-serial against a real ACME server)
|
||||
// is covered by the pebble integration test in pebble_mock_test.go's
|
||||
// follow-up; this file pins the failure-mode branches and the pure
|
||||
// mapRevocationReason translation.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/crypto/acme"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/connector/issuer"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// fakeCertLookup implements CertificateLookupRepo for tests. The two
|
||||
// fields control the GetVersionBySerial behavior; tests set them per
|
||||
// scenario.
|
||||
type fakeCertLookup struct {
|
||||
version *domain.CertificateVersion
|
||||
err error
|
||||
}
|
||||
|
||||
func (f *fakeCertLookup) GetVersionBySerial(ctx context.Context, issuerID, serial string) (*domain.CertificateVersion, error) {
|
||||
return f.version, f.err
|
||||
}
|
||||
|
||||
// newConnectorForRevoke builds an ACME connector pre-wired for a
|
||||
// revoke test. The cert-lookup is set to the supplied fake; the
|
||||
// issuer ID is "iss-test" unless cleared by the caller.
|
||||
func newConnectorForRevoke(t *testing.T, lookup CertificateLookupRepo) *Connector {
|
||||
t.Helper()
|
||||
c := New(&Config{
|
||||
DirectoryURL: "https://acme.example.test/dir",
|
||||
Email: "ops@example.com",
|
||||
}, testLogger())
|
||||
c.SetIssuerID("iss-test")
|
||||
c.SetCertificateLookup(lookup)
|
||||
return c
|
||||
}
|
||||
|
||||
func TestRevokeCertificate_NoCertLookupWired(t *testing.T) {
|
||||
c := New(&Config{DirectoryURL: "https://x.test/dir", Email: "a@b"}, testLogger())
|
||||
// Intentionally NOT calling SetCertificateLookup — exercises the
|
||||
// backward-compat fallback for tests/old wiring paths.
|
||||
err := c.RevokeCertificate(context.Background(), issuer.RevocationRequest{Serial: "AB:CD"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error when CertificateLookup is unwired")
|
||||
}
|
||||
if !contains(err.Error(), "CertificateLookup") {
|
||||
t.Errorf("expected wiring-error message, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRevokeCertificate_NoIssuerIDWired(t *testing.T) {
|
||||
c := New(&Config{DirectoryURL: "https://x.test/dir", Email: "a@b"}, testLogger())
|
||||
c.SetCertificateLookup(&fakeCertLookup{})
|
||||
// Skip SetIssuerID — exercises the second backward-compat guard.
|
||||
err := c.RevokeCertificate(context.Background(), issuer.RevocationRequest{Serial: "AB:CD"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error when issuer ID is unwired")
|
||||
}
|
||||
if !contains(err.Error(), "issuer ID") {
|
||||
t.Errorf("expected issuer-ID-error message, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRevokeCertificate_LookupReturnsNotFound(t *testing.T) {
|
||||
c := newConnectorForRevoke(t, &fakeCertLookup{err: sql.ErrNoRows})
|
||||
err := c.RevokeCertificate(context.Background(), issuer.RevocationRequest{Serial: "DEAD:BEEF"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error when lookup returns ErrNoRows")
|
||||
}
|
||||
// Operator-facing error must mention serial + suggest the cert
|
||||
// wasn't issued through certctl.
|
||||
if !contains(err.Error(), "DEAD:BEEF") {
|
||||
t.Errorf("expected error to include serial, got: %v", err)
|
||||
}
|
||||
if !contains(err.Error(), "may not have been issued through certctl") {
|
||||
t.Errorf("expected operator-facing hint about cert not in local store, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRevokeCertificate_LookupArbitraryError(t *testing.T) {
|
||||
c := newConnectorForRevoke(t, &fakeCertLookup{err: errors.New("connection refused")})
|
||||
err := c.RevokeCertificate(context.Background(), issuer.RevocationRequest{Serial: "AB:CD"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error to propagate")
|
||||
}
|
||||
if !contains(err.Error(), "connection refused") {
|
||||
t.Errorf("expected wrapped repo error, got: %v", err)
|
||||
}
|
||||
if !contains(err.Error(), "lookup") {
|
||||
t.Errorf("expected 'lookup' framing in error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRevokeCertificate_VersionPEMEmpty(t *testing.T) {
|
||||
c := newConnectorForRevoke(t, &fakeCertLookup{
|
||||
version: &domain.CertificateVersion{
|
||||
SerialNumber: "AB:CD",
|
||||
PEMChain: "",
|
||||
},
|
||||
})
|
||||
err := c.RevokeCertificate(context.Background(), issuer.RevocationRequest{Serial: "AB:CD"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error when version row has empty PEMChain")
|
||||
}
|
||||
if !contains(err.Error(), "empty PEM chain") {
|
||||
t.Errorf("expected empty-PEM error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRevokeCertificate_PEMMalformed_NoBlock(t *testing.T) {
|
||||
c := newConnectorForRevoke(t, &fakeCertLookup{
|
||||
version: &domain.CertificateVersion{
|
||||
SerialNumber: "AB:CD",
|
||||
PEMChain: "this is not a PEM block at all",
|
||||
},
|
||||
})
|
||||
err := c.RevokeCertificate(context.Background(), issuer.RevocationRequest{Serial: "AB:CD"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error when PEM chain has no decodable block")
|
||||
}
|
||||
if !contains(err.Error(), "no PEM block") {
|
||||
t.Errorf("expected no-PEM-block error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRevokeCertificate_PEMMalformed_WrongType(t *testing.T) {
|
||||
// A valid PEM block, but type is PRIVATE KEY — must be rejected
|
||||
// as "expected CERTIFICATE".
|
||||
pemPrivKey := "-----BEGIN PRIVATE KEY-----\nMIIBVgIBADANBgkqhkiG9w0BAQE=\n-----END PRIVATE KEY-----\n"
|
||||
c := newConnectorForRevoke(t, &fakeCertLookup{
|
||||
version: &domain.CertificateVersion{
|
||||
SerialNumber: "AB:CD",
|
||||
PEMChain: pemPrivKey,
|
||||
},
|
||||
})
|
||||
err := c.RevokeCertificate(context.Background(), issuer.RevocationRequest{Serial: "AB:CD"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error when PEM block type is not CERTIFICATE")
|
||||
}
|
||||
if !contains(err.Error(), "PRIVATE KEY") {
|
||||
t.Errorf("expected error to mention the actual block type, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMapRevocationReason_TableDriven covers the full RFC 5280 §5.3.1
|
||||
// reason set plus the canonical / underscore / ALL-CAPS spelling
|
||||
// variants and the unknown-reason and nil-reason behaviors.
|
||||
func TestMapRevocationReason_TableDriven(t *testing.T) {
|
||||
str := func(s string) *string { return &s }
|
||||
cases := []struct {
|
||||
name string
|
||||
reason *string
|
||||
want acme.CRLReasonCode
|
||||
wantErr bool
|
||||
}{
|
||||
// Nil → unspecified. RFC 5280 §5.3.1: "if the reason code
|
||||
// extension is absent the reason is unspecified".
|
||||
{"nil_reason_unspecified", nil, acme.CRLReasonUnspecified, false},
|
||||
{"empty_string_unspecified", str(""), acme.CRLReasonUnspecified, false},
|
||||
|
||||
// Canonical RFC 5280 camelCase.
|
||||
{"camel_unspecified", str("unspecified"), acme.CRLReasonUnspecified, false},
|
||||
{"camel_keyCompromise", str("keyCompromise"), acme.CRLReasonKeyCompromise, false},
|
||||
{"camel_cACompromise", str("cACompromise"), acme.CRLReasonCACompromise, false},
|
||||
{"camel_affiliationChanged", str("affiliationChanged"), acme.CRLReasonAffiliationChanged, false},
|
||||
{"camel_superseded", str("superseded"), acme.CRLReasonSuperseded, false},
|
||||
{"camel_cessationOfOperation", str("cessationOfOperation"), acme.CRLReasonCessationOfOperation, false},
|
||||
{"camel_certificateHold", str("certificateHold"), acme.CRLReasonCertificateHold, false},
|
||||
{"camel_removeFromCRL", str("removeFromCRL"), acme.CRLReasonRemoveFromCRL, false},
|
||||
{"camel_privilegeWithdrawn", str("privilegeWithdrawn"), acme.CRLReasonPrivilegeWithdrawn, false},
|
||||
{"camel_aACompromise", str("aACompromise"), acme.CRLReasonAACompromise, false},
|
||||
|
||||
// underscore_lower.
|
||||
{"underscore_key_compromise", str("key_compromise"), acme.CRLReasonKeyCompromise, false},
|
||||
{"underscore_ca_compromise", str("ca_compromise"), acme.CRLReasonCACompromise, false},
|
||||
|
||||
// ALL_CAPS_UNDERSCORE.
|
||||
{"caps_KEY_COMPROMISE", str("KEY_COMPROMISE"), acme.CRLReasonKeyCompromise, false},
|
||||
{"caps_REMOVE_FROM_CRL", str("REMOVE_FROM_CRL"), acme.CRLReasonRemoveFromCRL, false},
|
||||
|
||||
// Unknown — must error rather than silently demote.
|
||||
{"unknown_reason_errors", str("totallyMadeUp"), 0, true},
|
||||
{"reserved_code_7_unhandled", str("reserved"), 0, true}, // Reserved per RFC 5280, no canonical name.
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got, err := mapRevocationReason(tc.reason)
|
||||
if (err != nil) != tc.wantErr {
|
||||
t.Fatalf("err=%v wantErr=%v", err, tc.wantErr)
|
||||
}
|
||||
if !tc.wantErr && got != tc.want {
|
||||
t.Errorf("got code %d, want %d", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// contains is a tiny helper to avoid pulling strings into every test.
|
||||
func contains(haystack, needle string) bool {
|
||||
for i := 0; i+len(needle) <= len(haystack); i++ {
|
||||
if haystack[i:i+len(needle)] == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user