From 4dc8d3fa5b1da3da9f78aa8e6e167f92879ddbdc Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Sun, 3 May 2026 16:51:06 +0000 Subject: [PATCH] acme-server: key rollover + revocation + ARI (Phase 4/7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the RFC 8555 + RFC 9773 surface beyond the issuance happy-path: - POST /acme/profile//key-change (RFC 8555 §7.3.5) - POST /acme/profile//revoke-cert (RFC 8555 §7.6) - GET /acme/profile//renewal-info/ (RFC 9773 ARI) After this commit, ACME clients can rotate account keys, revoke certs through the ACME surface (rather than only via the certctl GUI/API), and fetch ARI for proactive renewal scheduling. Architecture: - Key rollover: outer JWS verified against the registered account key (existing kid path); the inner JWS — embedded as the outer's payload — verified against the embedded NEW jwk in a new dedicated routine (ParseAndVerifyKeyChangeInner) that enforces RFC 8555 §7.3.5 inner-only invariants: MUST use jwk + MUST NOT use kid, payload .account == outer.kid, payload.oldKey thumbprint-equals registered. A single WithinTx swaps the stored thumbprint+pem and writes the audit row. Concurrent-rollover safety via SELECT…FOR UPDATE on the conflicting account row in UpdateAccountJWKWithTx; the loser observes the winner's new thumbprint and is told to retry (409). - Revocation: two auth paths. kid → AccountOwnsCertificate single- indexed COUNT lookup over acme_orders. jwk → constant-time RFC 7638 thumbprint compare against the cert's pubkey. Both paths route through service.RevocationSvc.RevokeCertificateWithActor so the existing CRL/OCSP refresh + audit + metrics pipeline applies. RFC 5280 §5.3.1 numeric reason codes clamp to certctl's domain.ValidRevocationReasons; codes 8 (removeFromCRL) + 10 (aACompromise) clamp to 'unspecified' since they aren't in the set. - ARI is GET-only and unauth per RFC 9773 §4. Cert-id wire shape is base64url(AKI).base64url(serial); ParseARICertID strict-decodes, SerialHex emits the canonical certctl-shape lowercase-no-leading- zeros hex used in certificate_versions.serial_number. ComputeRenewalWindow has 3 branches: bound RenewalPolicy → [notAfter - days, notAfter - days/2]; no policy → last 33% of validity; past expiry → [now, now + 1d] (renew immediately). Retry-After honors CERTCTL_ACME_SERVER_ARI_POLL_INTERVAL. What ships: - internal/api/acme/{keychange,ari}.go (+ phase4_test.go: 15 tests). - internal/api/acme/order.go: RevokeCertRequest wire shape. - internal/api/handler/acme.go: KeyChange, RevokeCert, RenewalInfo + 11 new writeServiceError mappings. - internal/repository/postgres/acme.go: UpdateAccountJWKWithTx (FOR UPDATE + expectedOldThumbprint precondition; ErrACMEAccountKey- ConcurrentUpdate sentinel) + AccountOwnsCertificate. - internal/service/acme.go: RotateAccountKey + RevokeCert + RenewalInfo; CertificateRevoker + RenewalPolicyLookup interfaces; SetRevocationDelegate + SetRenewalPolicyLookup wiring; 11 new sentinels; 6 new metrics. - internal/service/acme_phase4_test.go: service-layer tests for RotateAccountKey (happy + duplicate-key) + RevokeCert (kid mismatch + jwk mismatch + jwk happy + already-revoked + reason-clamping) + RenewalInfo (disabled + bad cert-id). - internal/api/router/router.go: 6 new register calls (3 per-profile + 3 shorthand). Router parity exceptions extended in lockstep (in-tree SpecParityExceptions + CI-only openapi-handler-exceptions .yaml). - cmd/server/main.go: SetRevocationDelegate(revocationSvc) + SetRenewalPolicyLookup(renewalPolicyRepo) at startup. - internal/config/config.go: CERTCTL_ACME_SERVER_ARI_ENABLED (default true) + CERTCTL_ACME_SERVER_ARI_POLL_INTERVAL (default 6h); BuildDirectory's ariEnabled flag now flips on under cfg.ARIEnabled. - docs/acme-server.md: phase status flipped to Phase 4; endpoints table grows 6 rows (3 per-profile + 3 shorthand); FAQ section appended explaining how to rotate keys, revoke certs, and consume ARI. Tests: - 'go vet ./...' clean across the repo. - 'go test -short -count=1 ./...' green across every package. - phase4_test.go covers: keychange happy-path + 5 negatives + MapKeyChangeErrorToProblem coverage; ARI cert-id round-trip + 6 malformed cases + BuildARICertID from a generated cert; window- math 3 branches. - service-layer tests confirm: RotateAccountKey atomically swaps the thumbprint (verifies persisted state) and rejects duplicate keys; RevokeCert routes through the stub RevocationSvc with the right actor string + reason on the jwk path, rejects mismatched keys, rejects already-revoked certs, clamps reason codes correctly; RenewalInfo respects ARIEnabled + cert-id format. Engineering history: cowork/WORKSPACE-CHANGELOG.md 'ACME-Server-4'. --- api/openapi-handler-exceptions.yaml | 12 + cmd/server/main.go | 7 + docs/acme-server.md | 100 ++++- internal/api/acme/ari.go | 224 ++++++++++ internal/api/acme/keychange.go | 272 ++++++++++++ internal/api/acme/order.go | 9 + internal/api/acme/phase4_test.go | 362 ++++++++++++++++ internal/api/handler/acme.go | 234 +++++++++++ internal/api/handler/acme_handler_test.go | 25 ++ internal/api/router/openapi_parity_test.go | 7 + internal/api/router/router.go | 8 + internal/config/config.go | 20 + internal/repository/postgres/acme.go | 99 +++++ internal/service/acme.go | 464 ++++++++++++++++++++- internal/service/acme_phase4_test.go | 366 ++++++++++++++++ internal/service/acme_test.go | 18 + 16 files changed, 2210 insertions(+), 17 deletions(-) create mode 100644 internal/api/acme/ari.go create mode 100644 internal/api/acme/keychange.go create mode 100644 internal/api/acme/phase4_test.go create mode 100644 internal/service/acme_phase4_test.go diff --git a/api/openapi-handler-exceptions.yaml b/api/openapi-handler-exceptions.yaml index c69d357..b2c85c2 100644 --- a/api/openapi-handler-exceptions.yaml +++ b/api/openapi-handler-exceptions.yaml @@ -80,3 +80,15 @@ documented_exceptions: why: "Phase 3 default-profile shorthand for challenge response." - route: "POST /acme/cert/{cert_id}" why: "Phase 2 default-profile shorthand for cert download." + - route: "POST /acme/profile/{id}/key-change" + why: "ACME server RFC 8555 §7.3.5 doubly-signed key rollover; documented in docs/acme-server.md." + - route: "POST /acme/profile/{id}/revoke-cert" + why: "ACME server RFC 8555 §7.6 revoke-cert (kid OR cert-key auth); documented in docs/acme-server.md." + - route: "GET /acme/profile/{id}/renewal-info/{cert_id}" + why: "ACME server RFC 9773 ACME Renewal Information (unauthenticated GET); documented in docs/acme-server.md." + - route: "POST /acme/key-change" + why: "Phase 4 default-profile shorthand for key rollover." + - route: "POST /acme/revoke-cert" + why: "Phase 4 default-profile shorthand for revoke-cert." + - route: "GET /acme/renewal-info/{cert_id}" + why: "Phase 4 default-profile shorthand for ARI." diff --git a/cmd/server/main.go b/cmd/server/main.go index 0ccb296..5bf7acc 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -778,6 +778,13 @@ func main() { DNS01Resolver: cfg.ACMEServer.DNS01Resolver, }) acmeService.SetValidatorPool(acmeValidatorPool) + // Phase 4 — revocation pipeline + renewal-policy lookup. The same + // revocationSvc instance shared across the rest of the platform + // covers ACME revoke-cert; the renewalPolicyRepo backs ARI window + // math (when present, ComputeRenewalWindow uses RenewalWindowDays; + // when absent, falls back to last-33%-of-validity). + acmeService.SetRevocationDelegate(revocationSvc) + acmeService.SetRenewalPolicyLookup(renewalPolicyRepo) acmeHandler := handler.NewACMEHandler(acmeService) // Build the API router with all handlers diff --git a/docs/acme-server.md b/docs/acme-server.md index 56ef6ca..628d4fa 100644 --- a/docs/acme-server.md +++ b/docs/acme-server.md @@ -7,13 +7,12 @@ as an ACME issuer with no certctl-side modification — closing the "deploy a certctl agent on every K8s node" friction that costs deals to external PKI vendors today. -> **Phase status (2026-05-03):** Phase 3 — Phase 2's surface plus -> challenge validation: HTTP-01 (RFC 8555 §8.3), DNS-01 (§8.4), and -> TLS-ALPN-01 (RFC 8737). Profiles in `challenge` mode now resolve -> end-to-end: client POSTs to `/challenge/`, the server dispatches -> a bounded-concurrency worker pool to fetch the proof out-of-band, -> the validator updates the challenge → authz → order status chain -> on completion. Profiles in `trust_authenticated` mode are unchanged. +> **Phase status (2026-05-03):** Phase 4 — closes the RFC 8555 surface +> beyond the issuance happy-path: doubly-signed key rollover (§7.3.5), +> revoke-cert via either account-key or cert-key (§7.6), and RFC 9773 +> ACME Renewal Information. ACME clients can now rotate their account +> keys, revoke certs through the ACME surface (rather than only the +> certctl GUI/API), and fetch ARI for proactive renewal scheduling. > Track shipped phases via `git log --grep='acme-server:'`. ## Configuration @@ -39,6 +38,8 @@ issuer connector). The struct definition lives in | `CERTCTL_ACME_SERVER_DNS01_RESOLVER` | `8.8.8.8:53` | 3 | Reserved. | | `CERTCTL_ACME_SERVER_DNS01_CONCURRENCY` | `10` | 3 | Reserved. | | `CERTCTL_ACME_SERVER_TLSALPN01_CONCURRENCY` | `10` | 3 | Reserved. | +| `CERTCTL_ACME_SERVER_ARI_ENABLED` | `true` | 4 | Toggles the RFC 9773 ARI surface — both the `renewalInfo` URL in the directory document and the GET `/renewal-info/` handler. Set to `false` to drop ARI from the directory; ACME clients fall back to static renewal scheduling. | +| `CERTCTL_ACME_SERVER_ARI_POLL_INTERVAL` | `6h` | 4 | Server-policy `Retry-After` value the ARI handler emits on a 200 response. RFC 9773 §4.2 leaves this server-policy. Tighten to `1h` for short-lived certs; loosen to `24h` for standard 90-day certs. | ## Per-profile auth mode @@ -97,7 +98,7 @@ the `caBundle` requirement is flagged here in Phase 1a's docs because operators hit it the moment they try to point a real ACME client at certctl. -## Endpoints (Phase 2) +## Endpoints Routes registered in `internal/api/router/router.go::RegisterHandlers`: @@ -114,6 +115,9 @@ Routes registered in `internal/api/router/router.go::RegisterHandlers`: | POST | `/acme/profile/{id}/authz/{authz_id}` | RFC 8555 §7.5 | JWS kid | POST-as-GET fetch of an authorization. | | POST | `/acme/profile/{id}/challenge/{chall_id}` | RFC 8555 §7.5.1 | JWS kid | Submit a challenge for validation. Dispatches to a bounded-concurrency worker pool; clients poll authz for the eventual result. | | POST | `/acme/profile/{id}/cert/{cert_id}` | RFC 8555 §7.4.2 | JWS kid | POST-as-GET cert chain download (PEM). | +| POST | `/acme/profile/{id}/key-change` | RFC 8555 §7.3.5 | JWS kid (outer) + jwk (inner) | Doubly-signed account-key rollover. | +| POST | `/acme/profile/{id}/revoke-cert` | RFC 8555 §7.6 | JWS kid OR jwk | Revoke a cert via the issuing account's key OR the cert's own private key. Routes through the certctl revocation pipeline. | +| GET | `/acme/profile/{id}/renewal-info/{cert_id}` | RFC 9773 | unauth | Fetch the suggested renewal window for a cert (cert-id is `base64url(AKI).base64url(serial)` per RFC 9773 §4.1). Response carries `Retry-After`. | | GET | `/acme/directory` | RFC 8555 §7.1.1 | unauth | Shorthand path; mirrors per-profile when `CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID` is set. | | HEAD | `/acme/new-nonce` | RFC 8555 §7.2 | unauth | Shorthand. | | GET | `/acme/new-nonce` | RFC 8555 §7.2 | unauth | Shorthand. | @@ -124,12 +128,13 @@ Routes registered in `internal/api/router/router.go::RegisterHandlers`: | POST | `/acme/order/{ord_id}/finalize` | RFC 8555 §7.4 | JWS kid | Shorthand. | | POST | `/acme/authz/{authz_id}` | RFC 8555 §7.5 | JWS kid | Shorthand. | | POST | `/acme/cert/{cert_id}` | RFC 8555 §7.4.2 | JWS kid | Shorthand. | +| POST | `/acme/key-change` | RFC 8555 §7.3.5 | JWS kid (outer) + jwk (inner) | Shorthand. | +| POST | `/acme/revoke-cert` | RFC 8555 §7.6 | JWS kid OR jwk | Shorthand. | +| GET | `/acme/renewal-info/{cert_id}` | RFC 9773 | unauth | Shorthand. | -The remaining RFC 8555 endpoints (`challenge/{id}`, `key-change`, -`revoke-cert`, `renewal-info`) are advertised in the directory document -but not yet served — clients hitting them get a 404 until subsequent -phases land. The directory document includes their URLs because RFC 8555 -doesn't permit a partial directory. +After Phase 4, the full RFC 8555 + RFC 9773 surface is live. RFC 8739 +(short-lived certs) and EAB enforcement remain follow-up work; cert- +manager + boulder-tested clients work today against the surface above. ## Finalize routing through `CertificateService.Create` (Phase 2 architecture) @@ -200,7 +205,7 @@ at `internal/service/certificate.go:131`). | 1b | live | new-account + account/{id} + JWS verifier (RFC 7515 + go-jose v4) | | 2 | live | orders + authzs + finalize + cert download (trust_authenticated mode end-to-end) | | 3 | live | HTTP-01 + DNS-01 + TLS-ALPN-01 challenge validation (challenge mode end-to-end) | -| 4 | not yet | key rollover + revocation + ARI (RFC 9773) | +| 4 | live | key rollover (RFC 8555 §7.3.5) + revoke-cert (§7.6) + ARI (RFC 9773) | | 5 | not yet | cert-manager integration test + production hardening | | 6 | not yet | full operator-facing reference + walkthroughs + threat model | @@ -234,3 +239,70 @@ Track shipped phases via `git log --grep='acme-server:' --oneline`. `s.tx.WithinTx(...)` + `auditService.RecordEventWithTx(...)` pattern so every account state mutation is paired with an `audit_events` row. + +## Phase 4 — key rollover, revocation, ARI + +### How do I rotate my ACME account key? + +RFC 8555 §7.3.5 defines a doubly-signed JWS for the rollover. The OUTER +JWS is signed by the OLD account key (kid path); its payload IS the +INNER JWS, which is signed by the NEW account key (jwk path). cert- +manager and lego do this for you transparently — `lego renew --key-rotate` +or the cert-manager `Issuer.spec.acme.privateKeySecretRef` rollover. + +Server-side validation: + +1. Outer JWS verifies against the registered account's current key. +2. Inner JWS verifies against the embedded NEW jwk (proves possession). +3. Inner payload `account` matches outer `kid`. +4. Inner payload `oldKey` thumbprint-equals the registered key. +5. Inner protected `url` equals outer protected `url`. +6. New JWK thumbprint not already registered against the same profile. +7. `SELECT … FOR UPDATE` on the account row serializes concurrent + rollovers; the loser sees the winner's new thumbprint and is told + to retry (409). + +### How do I revoke an ACME-issued cert? + +Two auth paths per RFC 8555 §7.6: + +- **kid path:** sign with your account key. The server checks the + account "owns" the cert via `acme_orders.certificate_id` lookup. +- **jwk path:** sign with the cert's own private key. The server + extracts the cert's public key, computes the JWK, and asserts it + matches the embedded jwk thumbprint. + +Either path routes through `service.RevocationSvc.RevokeCertificateWithActor` +— the same pipeline the GUI revoke button, bulk-revocation, and the +ACME-consumer issuer use. So the cert-row update + revocation row + audit +row are all atomic in one `WithinTx`, the issuer is best-effort +notified, and the OCSP response cache is invalidated. + +Reason codes follow RFC 5280 §5.3.1; codes 8 (removeFromCRL) and 10 +(aACompromise) are not in certctl's `domain.ValidRevocationReasons` +set so they clamp to `unspecified`. + +### What is ARI? + +RFC 9773 ACME Renewal Information. Clients GET +`/acme/profile//renewal-info/` (unauthenticated) and +receive a JSON document with `suggestedWindow.start` and `.end` — +the server's recommendation for when to renew. The response also +carries `Retry-After` (RFC 9773 §4.2) hinting at the next-poll cadence. + +Cert-id format is `base64url(authorityKeyIdentifier).base64url(serial)` +per RFC 9773 §4.1. + +Window math: + +- Cert with a bound renewal policy: window starts at + `notAfter - RenewalWindowDays`, ends at `notAfter - RenewalWindowDays/2`. + So a 30-day window cert with notAfter 2026-06-30 emits start=2026-05-31, + end=2026-06-15. Boulder-shape default that lets cert-manager schedule + inside our renewal window. +- No policy: window is the last 33% of validity. +- Past expiry: window is "now" → "now + 24h" (renew immediately). + +Disable ARI globally with `CERTCTL_ACME_SERVER_ARI_ENABLED=false`. The +URL drops out of the directory; the route is still registered but +returns 404 — clients fall back to static renewal scheduling. diff --git a/internal/api/acme/ari.go b/internal/api/acme/ari.go new file mode 100644 index 0000000..66055ba --- /dev/null +++ b/internal/api/acme/ari.go @@ -0,0 +1,224 @@ +// Copyright (c) certctl +// SPDX-License-Identifier: BSL-1.1 + +package acme + +import ( + "crypto/x509" + "encoding/base64" + "encoding/hex" + "encoding/pem" + "errors" + "fmt" + "math/big" + "strings" + "time" + + "github.com/shankar0123/certctl/internal/domain" +) + +// Phase 4 — RFC 9773 ACME Renewal Information. +// +// RFC 9773 §4.1: a client computes the cert-id as +// +// base64url-no-pad(authorityKeyIdentifier) || "." || base64url-no-pad(serial) +// +// and GETs /acme/.../renewal-info/. The server responds with a +// JSON document carrying a `suggestedWindow` (start, end) the client +// SHOULD plan its renewal inside, plus an optional `explanationURL`. +// Response also carries a Retry-After header (RFC 9773 §4.2) hinting +// at the next-poll cadence. +// +// This file: +// +// - parses the cert-id wire format → (akiBytes, serialBytes). +// - converts the serial bytes to a hex string in the canonical +// certctl shape (lowercase, no leading zeros, matching how +// internal/repository/postgres/certificate.go stores them). +// - computes the suggested-window from a cert's NotAfter and an +// optional bound RenewalPolicy (last 33% of validity if no policy +// is bound). + +// RenewalInfoResponse is the JSON document returned by the renewal- +// info endpoint per RFC 9773 §4.1. +type RenewalInfoResponse struct { + SuggestedWindow RenewalWindow `json:"suggestedWindow"` + ExplanationURL string `json:"explanationURL,omitempty"` +} + +// RenewalWindow is the embedded {start, end} pair. RFC 9773 mandates +// start ≤ end; the server is responsible for emitting RFC 3339 UTC +// timestamps. +type RenewalWindow struct { + Start time.Time `json:"start"` + End time.Time `json:"end"` +} + +// ARICertID is the parsed shape of an RFC 9773 §4.1 cert-id — +// authorityKeyIdentifier and serial bytes after base64url-no-pad +// decoding. Callers compare against the certificate they already have +// in the database; AKI is informational on the server side because +// certctl's serial-uniqueness invariant is per-issuer. +type ARICertID struct { + // AKI is the raw bytes of the certificate's authorityKeyIdentifier + // extension. + AKI []byte + // Serial is the raw bytes of the certificate's serial number, in + // big-endian unsigned-integer form. + Serial []byte +} + +// SerialHex returns the canonical certctl-shape hex representation of +// the serial number — lowercase, no leading zeros (matches what's +// stored in certificate_versions.serial_number). +func (a ARICertID) SerialHex() string { + if len(a.Serial) == 0 { + return "" + } + n := new(big.Int).SetBytes(a.Serial) + if n.Sign() == 0 { + return "0" + } + return strings.ToLower(n.Text(16)) +} + +// AKIHex returns the AKI as a lowercase hex string. Useful for logging +// + future per-AKI lookup paths. +func (a ARICertID) AKIHex() string { + return strings.ToLower(hex.EncodeToString(a.AKI)) +} + +// Sentinel errors. ChooseProblem in writeServiceError translates the +// not-found cases to RFC 7807 + RFC 8555 §6.7 problems. +var ( + ErrARICertIDMalformed = errors.New("acme ari: cert-id is not .") + ErrARICertIDDecodeAKI = errors.New("acme ari: cert-id AKI is not valid base64url") + ErrARICertIDDecodeSeria = errors.New("acme ari: cert-id serial is not valid base64url") + ErrARICertIDEmpty = errors.New("acme ari: cert-id has empty AKI or serial") +) + +// ParseARICertID decodes an RFC 9773 §4.1 cert-id. The wire format is +// strictly base64url-NO-PADDING; rfc9773 §4.1 forbids regular base64. +// +// Common malformations: +// - missing or extra `.` separator → ErrARICertIDMalformed. +// - either side fails base64url decode → ErrARICertIDDecode*. +// - either side decodes to empty → ErrARICertIDEmpty. +func ParseARICertID(certID string) (*ARICertID, error) { + parts := strings.Split(certID, ".") + if len(parts) != 2 { + return nil, fmt.Errorf("%w: got %d parts", ErrARICertIDMalformed, len(parts)) + } + if parts[0] == "" || parts[1] == "" { + return nil, ErrARICertIDEmpty + } + aki, err := base64.RawURLEncoding.DecodeString(parts[0]) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrARICertIDDecodeAKI, err) + } + serial, err := base64.RawURLEncoding.DecodeString(parts[1]) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrARICertIDDecodeSeria, err) + } + if len(aki) == 0 || len(serial) == 0 { + return nil, ErrARICertIDEmpty + } + return &ARICertID{AKI: aki, Serial: serial}, nil +} + +// BuildARICertID is the inverse of ParseARICertID — useful for tests +// and operator tools that want to construct a cert-id from a leaf cert. +// +// The input is the leaf certificate's PEM. We extract the +// authorityKeyIdentifier extension and the serial number, then +// base64url-no-pad-encode each + join with a `.`. +func BuildARICertID(certPEM string) (string, error) { + block, _ := pem.Decode([]byte(certPEM)) + if block == nil { + return "", fmt.Errorf("acme ari: pem decode failed") + } + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return "", fmt.Errorf("acme ari: parse cert: %w", err) + } + if len(cert.AuthorityKeyId) == 0 { + return "", fmt.Errorf("acme ari: certificate has no authorityKeyIdentifier extension") + } + if cert.SerialNumber == nil { + return "", fmt.Errorf("acme ari: certificate has no serial number") + } + akiB64 := base64.RawURLEncoding.EncodeToString(cert.AuthorityKeyId) + serialB64 := base64.RawURLEncoding.EncodeToString(cert.SerialNumber.Bytes()) + return akiB64 + "." + serialB64, nil +} + +// ComputeRenewalWindow returns the RFC 9773 suggestedWindow for a +// (cert, optional renewal-policy) pair. +// +// Algorithm: +// +// - When policy is non-nil and policy.RenewalWindowDays > 0: the +// window starts at NotAfter - RenewalWindowDays + spans half of +// RenewalWindowDays. So a 30-day-renewal-window cert with NotAfter +// 2026-06-30 emits start=2026-05-31, end=2026-06-15. This matches +// boulder's default ARI behavior + ensures a Let's-Encrypt-shaped +// client can plan its renewals exactly inside our renewal window. +// - When policy is nil OR RenewalWindowDays ≤ 0: the window is the +// last 33% of validity. So a cert with NotBefore 2026-01-01 + +// NotAfter 2026-04-01 (90d validity) emits start=2026-03-01 (30d +// before expiry), end=2026-03-21 (10d before expiry). +// - When the cert is past NotAfter: the window starts at "now" and +// ends at "now + 1 day" so a client polling on an expired cert +// gets a "renew immediately" answer rather than a window in the +// past. +// +// Returns (start, end). start ≤ end is invariant. +func ComputeRenewalWindow(cert *domain.ManagedCertificate, version *domain.CertificateVersion, policy *domain.RenewalPolicy, now time.Time) (time.Time, time.Time) { + if cert == nil { + return time.Time{}, time.Time{} + } + notAfter := cert.ExpiresAt.UTC() + notBefore := notAfter + if version != nil && !version.NotBefore.IsZero() { + notBefore = version.NotBefore.UTC() + } + + // Past expiry: emit a 1-day "renew now" window. + if !now.IsZero() && now.UTC().After(notAfter) { + nowUTC := now.UTC() + return nowUTC, nowUTC.Add(24 * time.Hour) + } + + if policy != nil && policy.RenewalWindowDays > 0 { + windowDays := time.Duration(policy.RenewalWindowDays) * 24 * time.Hour + start := notAfter.Add(-windowDays) + end := start.Add(windowDays / 2) + // Defensive: never emit start in the past from "now". + if !now.IsZero() && start.Before(now.UTC()) { + start = now.UTC() + } + if end.Before(start) { + end = start + } + return start, end + } + + // No policy → last 33% of validity. + validity := notAfter.Sub(notBefore) + if validity <= 0 { + // Degenerate cert (nb >= na). Use a 1-day default window + // ending at notAfter. + return notAfter.Add(-24 * time.Hour), notAfter + } + thirty3 := validity / 3 + start := notAfter.Add(-thirty3) + // End is 1/3 before expiry → midpoint of the renewal third. + end := notAfter.Add(-thirty3 / 3) + if !now.IsZero() && start.Before(now.UTC()) { + start = now.UTC() + } + if end.Before(start) { + end = start + } + return start, end +} diff --git a/internal/api/acme/keychange.go b/internal/api/acme/keychange.go new file mode 100644 index 0000000..6f920ce --- /dev/null +++ b/internal/api/acme/keychange.go @@ -0,0 +1,272 @@ +// Copyright (c) certctl +// SPDX-License-Identifier: BSL-1.1 + +package acme + +import ( + "crypto/subtle" + "encoding/json" + "errors" + "fmt" + + jose "github.com/go-jose/go-jose/v4" +) + +// Phase 4 — RFC 8555 §7.3.5 key rollover. +// +// The wire shape is a doubly-signed JWS: +// +// JWS-outer signed by the OLD account key (kid = account URL): +// protected: { alg, kid, nonce, url } +// payload: +// +// JWS-inner signed by the NEW account key (jwk = newkey): +// protected: { alg, jwk, url= } +// payload: { account: , oldKey: } +// +// The handler runs the existing VerifyJWS pipeline against the outer +// (kid path), then hands the resulting Payload bytes to ParseAndVerify- +// KeyChangeInner so the inner is processed in isolation. Two key +// distinctions vs. the outer: +// +// - The inner JWS does NOT carry a `nonce` header. Per RFC 8555 §7.3.5 +// the outer's nonce is the only nonce; the inner is a self-contained +// proof-of-possession blob. +// - The inner JWS uses `jwk` not `kid` and the verifier must succeed +// when the embedded `jwk` itself is the verification key. +// +// This matches what go-jose's lego implementation, cert-manager, and +// boulder all expect. + +// KeyChangeInnerPayload is the parsed body of the inner JWS — RFC 8555 +// §7.3.5 mandates exactly two fields. +type KeyChangeInnerPayload struct { + // Account is the kid URL of the account whose key is being rotated. + // MUST equal the outer's `kid` header. Mismatch → keyChange's + // "account" field doesn't match outer.kid. + Account string `json:"account"` + + // OldKey is the JWK currently on file for the account. The server + // asserts this matches what we have in the database (byte-equal + // canonicalized) so a stale rollover request can't slip through. + OldKey *jose.JSONWebKey `json:"oldKey"` +} + +// KeyChangeInner is the verified inner JWS — fields the service layer +// needs to commit the rollover. +type KeyChangeInner struct { + // NewJWK is the JWK the inner JWS is signed by. After verification + // this is the key the account's row will be updated to. + NewJWK *jose.JSONWebKey + + // Payload is the inner's parsed JSON: { account, oldKey }. + Payload KeyChangeInnerPayload + + // URL is the inner protected-header `url` value, asserted equal to + // the outer's URL. + URL string + + // Algorithm is the negotiated alg the inner was signed with. + Algorithm string +} + +// Sentinel errors. Each maps to an RFC 8555 §6.7 problem type via the +// service's writeServiceError; tests assert via errors.Is. +var ( + ErrKeyChangeInnerMalformed = errors.New("acme keychange: inner JWS malformed") + ErrKeyChangeInnerAlgRejected = errors.New("acme keychange: inner JWS uses disallowed signature algorithm") + ErrKeyChangeInnerMissingJWK = errors.New("acme keychange: inner JWS protected header MUST contain `jwk`") + ErrKeyChangeInnerForbidsKID = errors.New("acme keychange: inner JWS MUST NOT contain `kid` (use `jwk`)") + ErrKeyChangeInnerInvalidJWK = errors.New("acme keychange: inner JWS embedded JWK is invalid") + ErrKeyChangeInnerURLMissing = errors.New("acme keychange: inner JWS protected header `url` is required") + ErrKeyChangeInnerURLMismatch = errors.New("acme keychange: inner JWS `url` does not match outer JWS `url`") + ErrKeyChangeInnerSignatureBad = errors.New("acme keychange: inner JWS signature did not verify against embedded JWK") + ErrKeyChangeInnerPayloadParse = errors.New("acme keychange: inner JWS payload is not parseable JSON") + ErrKeyChangeInnerAccountMismatch = errors.New("acme keychange: inner JWS payload `account` does not match outer JWS `kid`") + ErrKeyChangeInnerOldKeyMissing = errors.New("acme keychange: inner JWS payload missing `oldKey`") + ErrKeyChangeInnerOldKeyMismatch = errors.New("acme keychange: inner JWS payload `oldKey` does not match registered account key") +) + +// ParseAndVerifyKeyChangeInner parses the inner JWS bytes (i.e. the +// outer JWS's verified payload), runs the same allow-list + +// signature-verification pipeline as VerifyJWS, and asserts the inner- +// only invariants from RFC 8555 §7.3.5 (must use `jwk`, must NOT use +// `kid`, URL must match). +// +// Caller passes: +// +// - innerBytes: the outer JWS's verified payload (the inner JWS in +// compact serialization). +// - outerKID: the outer JWS's `kid` header value. The inner's payload +// `account` field MUST equal this. +// - outerURL: the outer JWS's `url` header value. The inner's +// protected-header `url` MUST equal this. +// - registeredOldJWK: the JWK currently stored on the account row. +// The inner's payload `oldKey` MUST canonicalize-equal this. +// +// Returns the verified KeyChangeInner on success, or one of the +// sentinel errors above on any validation failure. +func ParseAndVerifyKeyChangeInner(innerBytes []byte, outerKID, outerURL string, registeredOldJWK *jose.JSONWebKey) (*KeyChangeInner, error) { + // Parse against the same allow-list that VerifyJWS uses. + jws, err := jose.ParseSigned(string(innerBytes), AllowedSignatureAlgorithms) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrKeyChangeInnerMalformed, err) + } + if len(jws.Signatures) != 1 { + return nil, fmt.Errorf("%w: multi-signature inner JWS", ErrKeyChangeInnerMalformed) + } + sig := jws.Signatures[0] + if !algorithmAllowed(sig.Protected.Algorithm) { + return nil, fmt.Errorf("%w: %s", ErrKeyChangeInnerAlgRejected, sig.Protected.Algorithm) + } + + // RFC 8555 §7.3.5: the inner MUST use `jwk` and MUST NOT use `kid`. + if sig.Protected.KeyID != "" { + return nil, ErrKeyChangeInnerForbidsKID + } + jwk := sig.Protected.JSONWebKey + if jwk == nil { + return nil, ErrKeyChangeInnerMissingJWK + } + if !jwk.Valid() { + return nil, ErrKeyChangeInnerInvalidJWK + } + + // URL header MUST equal the outer's URL. + innerURL, err := extractStringHeader(sig.Protected.ExtraHeaders, "url") + if err != nil { + return nil, ErrKeyChangeInnerURLMissing + } + if innerURL == "" { + return nil, ErrKeyChangeInnerURLMissing + } + if innerURL != outerURL { + return nil, fmt.Errorf("%w: inner=%q outer=%q", ErrKeyChangeInnerURLMismatch, innerURL, outerURL) + } + + // Verify the inner signature against the embedded jwk. + verifiedPayload, err := jws.Verify(jwk.Key) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrKeyChangeInnerSignatureBad, err) + } + + // Parse the inner payload. + var payload KeyChangeInnerPayload + if err := json.Unmarshal(verifiedPayload, &payload); err != nil { + return nil, fmt.Errorf("%w: %v", ErrKeyChangeInnerPayloadParse, err) + } + + // `account` MUST equal outer's kid. + if payload.Account != outerKID { + return nil, fmt.Errorf("%w: payload=%q outer.kid=%q", + ErrKeyChangeInnerAccountMismatch, payload.Account, outerKID) + } + + // `oldKey` MUST be present and canonicalize-equal to registered. + if payload.OldKey == nil { + return nil, ErrKeyChangeInnerOldKeyMissing + } + if !payload.OldKey.Valid() { + return nil, fmt.Errorf("%w: oldKey did not validate", ErrKeyChangeInnerOldKeyMismatch) + } + eq, err := jwksThumbprintEqual(payload.OldKey, registeredOldJWK) + if err != nil { + return nil, fmt.Errorf("%w: thumbprint compare: %v", ErrKeyChangeInnerOldKeyMismatch, err) + } + if !eq { + return nil, ErrKeyChangeInnerOldKeyMismatch + } + + return &KeyChangeInner{ + NewJWK: jwk, + Payload: payload, + URL: innerURL, + Algorithm: sig.Protected.Algorithm, + }, nil +} + +// jwksThumbprintEqual compares two JWKs by RFC 7638 thumbprint, which +// is the canonical identity for a public key. We deliberately compare +// thumbprints rather than serialized bytes because go-jose may emit +// fields in different orders for "equal" keys. +// +// Returns (true, nil) when both thumbprints exist and match in +// constant time; (false, err) on any thumbprint computation error; +// (false, nil) when the thumbprints differ. +func jwksThumbprintEqual(a, b *jose.JSONWebKey) (bool, error) { + if a == nil || b == nil { + return false, nil + } + tA, err := JWKThumbprint(a) + if err != nil { + return false, err + } + tB, err := JWKThumbprint(b) + if err != nil { + return false, err + } + return subtle.ConstantTimeCompare([]byte(tA), []byte(tB)) == 1, nil +} + +// MapKeyChangeErrorToProblem renders an inner-JWS validation error as +// an RFC 7807 + RFC 8555 §6.7 Problem the handler emits via +// WriteProblem. +// +// All inner-JWS errors map to operator-friendly problem types. The +// detail string is a concise summary; the full err.Error() context is +// suppressed to avoid leaking internal-state details (master-prompt +// criterion #10). +func MapKeyChangeErrorToProblem(err error) Problem { + switch { + case errors.Is(err, ErrKeyChangeInnerSignatureBad), + errors.Is(err, ErrKeyChangeInnerOldKeyMismatch): + // Both indicate "you don't actually possess the rollover key + // pair" — treat as unauthorized per RFC 8555 §7.3.5. + return Problem{ + Type: "urn:ietf:params:acme:error:unauthorized", + Detail: "key rollover proof failed: " + plainCause(err), + Status: 401, + } + case errors.Is(err, ErrKeyChangeInnerURLMismatch), + errors.Is(err, ErrKeyChangeInnerURLMissing): + return Problem{ + Type: "urn:ietf:params:acme:error:unauthorized", + Detail: "key rollover inner URL: " + plainCause(err), + Status: 401, + } + case errors.Is(err, ErrKeyChangeInnerAlgRejected): + return Malformed("key rollover inner JWS uses disallowed algorithm") + case errors.Is(err, ErrKeyChangeInnerForbidsKID): + return Malformed("key rollover inner JWS MUST use `jwk`, not `kid`") + case errors.Is(err, ErrKeyChangeInnerMissingJWK), + errors.Is(err, ErrKeyChangeInnerInvalidJWK): + return Malformed("key rollover inner JWS missing or invalid `jwk`") + case errors.Is(err, ErrKeyChangeInnerAccountMismatch): + return Malformed("key rollover inner `account` does not match outer kid") + case errors.Is(err, ErrKeyChangeInnerOldKeyMissing): + return Malformed("key rollover inner missing `oldKey`") + case errors.Is(err, ErrKeyChangeInnerPayloadParse): + return Malformed("key rollover inner payload is not valid JSON") + case errors.Is(err, ErrKeyChangeInnerMalformed): + return Malformed("key rollover inner JWS malformed") + default: + return Malformed("key rollover request rejected") + } +} + +// plainCause extracts the leaf error text without leaking the full +// wrap chain. Used by MapKeyChangeErrorToProblem to keep the operator- +// facing detail concise. +func plainCause(err error) string { + if err == nil { + return "" + } + // Walk to the leaf cause; emit its message verbatim. + for { + next := errors.Unwrap(err) + if next == nil { + return err.Error() + } + err = next + } +} diff --git a/internal/api/acme/order.go b/internal/api/acme/order.go index 855dceb..b2b4451 100644 --- a/internal/api/acme/order.go +++ b/internal/api/acme/order.go @@ -90,6 +90,15 @@ type FinalizeRequest struct { CSR string `json:"csr"` } +// RevokeCertRequest is the payload shape RFC 8555 §7.6 mandates for +// revoke-cert. `certificate` is the base64url-DER of the leaf cert +// being revoked; `reason` is an optional RFC 5280 §5.3.1 numeric reason +// code (defaults to 0/unspecified when absent). +type RevokeCertRequest struct { + Certificate string `json:"certificate"` + Reason int `json:"reason,omitempty"` +} + // AuthorizationResponseJSON is the wire shape RFC 8555 §7.1.4 mandates // for the authz GET (POST-as-GET) response. type AuthorizationResponseJSON struct { diff --git a/internal/api/acme/phase4_test.go b/internal/api/acme/phase4_test.go new file mode 100644 index 0000000..31f45be --- /dev/null +++ b/internal/api/acme/phase4_test.go @@ -0,0 +1,362 @@ +// Copyright (c) certctl +// SPDX-License-Identifier: BSL-1.1 + +package acme + +import ( + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "encoding/base64" + "encoding/json" + "encoding/pem" + "errors" + "math/big" + "strings" + "testing" + "time" + + jose "github.com/go-jose/go-jose/v4" + + "github.com/shankar0123/certctl/internal/domain" +) + +// --- Test fixtures + helpers ------------------------------------------- + +func newTestRSAJWK(t *testing.T) (*rsa.PrivateKey, *jose.JSONWebKey) { + t.Helper() + priv, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + t.Fatalf("rsa.GenerateKey: %v", err) + } + jwk := &jose.JSONWebKey{Key: priv.Public(), Algorithm: string(jose.RS256), Use: "sig"} + return priv, jwk +} + +func newTestECDSAJWK(t *testing.T) (*ecdsa.PrivateKey, *jose.JSONWebKey) { + t.Helper() + priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("ecdsa.GenerateKey: %v", err) + } + jwk := &jose.JSONWebKey{Key: priv.Public(), Algorithm: string(jose.ES256), Use: "sig"} + return priv, jwk +} + +// signWithEmbeddedJWK builds an RFC-7515-compatible compact-serialized JWS with +// the given protected header + payload, signed by signer. Used for +// constructing inner-key-change blobs in tests. +func signWithEmbeddedJWK(t *testing.T, signer interface{}, alg jose.SignatureAlgorithm, payload []byte, headers map[jose.HeaderKey]interface{}, embedJWK *jose.JSONWebKey) string { + t.Helper() + opts := &jose.SignerOptions{ExtraHeaders: headers} + if embedJWK != nil { + opts = opts.WithHeader("jwk", embedJWK) + } + sigSigner, err := jose.NewSigner(jose.SigningKey{Algorithm: alg, Key: signer}, opts) + if err != nil { + t.Fatalf("NewSigner: %v", err) + } + obj, err := sigSigner.Sign(payload) + if err != nil { + t.Fatalf("Sign: %v", err) + } + out, err := obj.CompactSerialize() + if err != nil { + t.Fatalf("CompactSerialize: %v", err) + } + return out +} + +// --- KeyChange tests ---------------------------------------------------- + +func TestParseAndVerifyKeyChangeInner_HappyPath(t *testing.T) { + _, oldJWK := newTestRSAJWK(t) + newPriv, newJWK := newTestECDSAJWK(t) + + url := "https://example.com/acme/profile/p1/key-change" + kid := "https://example.com/acme/profile/p1/account/acme-acc-abc" + payloadJSON, err := json.Marshal(KeyChangeInnerPayload{ + Account: kid, + OldKey: oldJWK, + }) + if err != nil { + t.Fatalf("marshal payload: %v", err) + } + headers := map[jose.HeaderKey]interface{}{"url": url} + innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, newJWK) + + got, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, url, oldJWK) + if err != nil { + t.Fatalf("ParseAndVerifyKeyChangeInner: %v", err) + } + if got.Payload.Account != kid { + t.Errorf("payload.Account = %q, want %q", got.Payload.Account, kid) + } + if got.URL != url { + t.Errorf("URL = %q, want %q", got.URL, url) + } + if got.NewJWK == nil { + t.Errorf("NewJWK is nil") + } +} + +func TestParseAndVerifyKeyChangeInner_OldKeyMismatch(t *testing.T) { + _, oldJWK := newTestRSAJWK(t) + _, otherJWK := newTestRSAJWK(t) + newPriv, newJWK := newTestECDSAJWK(t) + + url := "https://example.com/acme/profile/p1/key-change" + kid := "https://example.com/acme/profile/p1/account/acme-acc-abc" + // payload claims an oldKey that doesn't match what's registered. + payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{Account: kid, OldKey: otherJWK}) + headers := map[jose.HeaderKey]interface{}{"url": url} + innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, newJWK) + + _, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, url, oldJWK) + if !errors.Is(err, ErrKeyChangeInnerOldKeyMismatch) { + t.Errorf("got err=%v, want ErrKeyChangeInnerOldKeyMismatch", err) + } +} + +func TestParseAndVerifyKeyChangeInner_AccountMismatch(t *testing.T) { + _, oldJWK := newTestRSAJWK(t) + newPriv, newJWK := newTestECDSAJWK(t) + + url := "https://example.com/acme/profile/p1/key-change" + outerKID := "https://example.com/acme/profile/p1/account/acme-acc-abc" + // payload.Account does NOT equal outer.kid. + payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{ + Account: "https://example.com/acme/profile/p1/account/acme-acc-DIFFERENT", + OldKey: oldJWK, + }) + headers := map[jose.HeaderKey]interface{}{"url": url} + innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, newJWK) + + _, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), outerKID, url, oldJWK) + if !errors.Is(err, ErrKeyChangeInnerAccountMismatch) { + t.Errorf("got err=%v, want ErrKeyChangeInnerAccountMismatch", err) + } +} + +func TestParseAndVerifyKeyChangeInner_URLMismatch(t *testing.T) { + _, oldJWK := newTestRSAJWK(t) + newPriv, newJWK := newTestECDSAJWK(t) + + innerURL := "https://example.com/acme/profile/p1/key-change" + outerURL := "https://example.com/acme/profile/p1/key-change-different" + kid := "https://example.com/acme/profile/p1/account/acme-acc-abc" + payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{Account: kid, OldKey: oldJWK}) + headers := map[jose.HeaderKey]interface{}{"url": innerURL} + innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, newJWK) + + _, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, outerURL, oldJWK) + if !errors.Is(err, ErrKeyChangeInnerURLMismatch) { + t.Errorf("got err=%v, want ErrKeyChangeInnerURLMismatch", err) + } +} + +func TestParseAndVerifyKeyChangeInner_BadSignature(t *testing.T) { + _, oldJWK := newTestRSAJWK(t) + newPriv, newJWK := newTestECDSAJWK(t) + _, otherJWK := newTestECDSAJWK(t) // different key embedded vs. signer + + url := "https://example.com/acme/profile/p1/key-change" + kid := "https://example.com/acme/profile/p1/account/acme-acc-abc" + payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{Account: kid, OldKey: oldJWK}) + headers := map[jose.HeaderKey]interface{}{"url": url} + // Sign with newPriv but embed otherJWK — verification against the + // embedded jwk will fail since the signer didn't possess otherJWK's + // private key. + innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, headers, otherJWK) + + _, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, url, oldJWK) + if !errors.Is(err, ErrKeyChangeInnerSignatureBad) { + t.Errorf("got err=%v, want ErrKeyChangeInnerSignatureBad", err) + } + _ = newJWK +} + +func TestParseAndVerifyKeyChangeInner_MalformedJWS(t *testing.T) { + _, oldJWK := newTestRSAJWK(t) + _, err := ParseAndVerifyKeyChangeInner([]byte("not-a-jws"), "kid", "url", oldJWK) + if !errors.Is(err, ErrKeyChangeInnerMalformed) { + t.Errorf("got err=%v, want ErrKeyChangeInnerMalformed", err) + } +} + +func TestParseAndVerifyKeyChangeInner_MissingURL(t *testing.T) { + _, oldJWK := newTestRSAJWK(t) + newPriv, newJWK := newTestECDSAJWK(t) + + url := "https://example.com/acme/profile/p1/key-change" + kid := "https://example.com/acme/profile/p1/account/acme-acc-abc" + payloadJSON, _ := json.Marshal(KeyChangeInnerPayload{Account: kid, OldKey: oldJWK}) + // No `url` header. + innerBytes := signWithEmbeddedJWK(t, newPriv, jose.ES256, payloadJSON, nil, newJWK) + + _, err := ParseAndVerifyKeyChangeInner([]byte(innerBytes), kid, url, oldJWK) + if !errors.Is(err, ErrKeyChangeInnerURLMissing) { + t.Errorf("got err=%v, want ErrKeyChangeInnerURLMissing", err) + } +} + +func TestMapKeyChangeErrorToProblem_Coverage(t *testing.T) { + cases := []struct { + err error + wantType string + }{ + {ErrKeyChangeInnerSignatureBad, "urn:ietf:params:acme:error:unauthorized"}, + {ErrKeyChangeInnerOldKeyMismatch, "urn:ietf:params:acme:error:unauthorized"}, + {ErrKeyChangeInnerAccountMismatch, "urn:ietf:params:acme:error:malformed"}, + {ErrKeyChangeInnerForbidsKID, "urn:ietf:params:acme:error:malformed"}, + {ErrKeyChangeInnerMissingJWK, "urn:ietf:params:acme:error:malformed"}, + {ErrKeyChangeInnerOldKeyMissing, "urn:ietf:params:acme:error:malformed"}, + {ErrKeyChangeInnerURLMismatch, "urn:ietf:params:acme:error:unauthorized"}, + {ErrKeyChangeInnerMalformed, "urn:ietf:params:acme:error:malformed"}, + } + for _, c := range cases { + got := MapKeyChangeErrorToProblem(c.err) + if got.Type != c.wantType { + t.Errorf("err=%v: got type %q, want %q", c.err, got.Type, c.wantType) + } + } +} + +// --- ARI tests ---------------------------------------------------------- + +func TestParseARICertID_Roundtrip(t *testing.T) { + aki := []byte{0xde, 0xad, 0xbe, 0xef, 0x01, 0x02} + serial := []byte{0x12, 0x34, 0x56, 0x78} + certID := base64.RawURLEncoding.EncodeToString(aki) + "." + base64.RawURLEncoding.EncodeToString(serial) + + got, err := ParseARICertID(certID) + if err != nil { + t.Fatalf("ParseARICertID: %v", err) + } + if string(got.AKI) != string(aki) { + t.Errorf("AKI: got %x, want %x", got.AKI, aki) + } + if string(got.Serial) != string(serial) { + t.Errorf("Serial: got %x, want %x", got.Serial, serial) + } + // SerialHex must match canonical certctl shape. + wantSerialHex := "12345678" + if got.SerialHex() != wantSerialHex { + t.Errorf("SerialHex: got %q, want %q", got.SerialHex(), wantSerialHex) + } +} + +func TestParseARICertID_Malformed(t *testing.T) { + cases := []struct { + name string + certID string + wantErr error + }{ + {"missing dot", "abc123nodot", ErrARICertIDMalformed}, + {"too many dots", "a.b.c", ErrARICertIDMalformed}, + {"empty aki", ".YWJj", ErrARICertIDEmpty}, + {"empty serial", "YWJj.", ErrARICertIDEmpty}, + {"non-base64 aki", "!!!!.YWJj", ErrARICertIDDecodeAKI}, + {"non-base64 serial", "YWJj.!!!!", ErrARICertIDDecodeSeria}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + _, err := ParseARICertID(c.certID) + if !errors.Is(err, c.wantErr) { + t.Errorf("got err=%v, want %v", err, c.wantErr) + } + }) + } +} + +func TestBuildARICertID_FromGeneratedCert(t *testing.T) { + // Build a self-signed cert with an explicit AKI and serial. + priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("genkey: %v", err) + } + template := &x509.Certificate{ + SerialNumber: big.NewInt(0x12345678), + Subject: pkix.Name{CommonName: "test"}, + NotBefore: time.Now(), + NotAfter: time.Now().Add(24 * time.Hour), + AuthorityKeyId: []byte{0xde, 0xad, 0xbe, 0xef}, + BasicConstraintsValid: true, + IsCA: true, + } + der, err := x509.CreateCertificate(rand.Reader, template, template, &priv.PublicKey, priv) + if err != nil { + t.Fatalf("CreateCertificate: %v", err) + } + certPEM := string(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})) + + certID, err := BuildARICertID(certPEM) + if err != nil { + t.Fatalf("BuildARICertID: %v", err) + } + parts := strings.Split(certID, ".") + if len(parts) != 2 { + t.Fatalf("got %d parts, want 2", len(parts)) + } + wantAKI := base64.RawURLEncoding.EncodeToString([]byte{0xde, 0xad, 0xbe, 0xef}) + if parts[0] != wantAKI { + t.Errorf("AKI part: got %q, want %q", parts[0], wantAKI) + } +} + +func TestComputeRenewalWindow_WithPolicy(t *testing.T) { + now := time.Date(2026, 5, 1, 0, 0, 0, 0, time.UTC) + notAfter := time.Date(2026, 7, 1, 0, 0, 0, 0, time.UTC) // 61 days out + cert := &domain.ManagedCertificate{ExpiresAt: notAfter} + version := &domain.CertificateVersion{ + NotBefore: time.Date(2026, 4, 1, 0, 0, 0, 0, time.UTC), + NotAfter: notAfter, + } + policy := &domain.RenewalPolicy{RenewalWindowDays: 30} + + start, end := ComputeRenewalWindow(cert, version, policy, now) + wantStart := notAfter.Add(-30 * 24 * time.Hour) // 2026-06-01 + wantEnd := wantStart.Add(15 * 24 * time.Hour) // 2026-06-16 + if !start.Equal(wantStart) { + t.Errorf("start: got %v, want %v", start, wantStart) + } + if !end.Equal(wantEnd) { + t.Errorf("end: got %v, want %v", end, wantEnd) + } +} + +func TestComputeRenewalWindow_NoPolicy_LastThird(t *testing.T) { + now := time.Date(2026, 5, 1, 0, 0, 0, 0, time.UTC) + notBefore := time.Date(2026, 4, 1, 0, 0, 0, 0, time.UTC) + notAfter := time.Date(2026, 7, 1, 0, 0, 0, 0, time.UTC) // 91-day validity + cert := &domain.ManagedCertificate{ExpiresAt: notAfter} + version := &domain.CertificateVersion{NotBefore: notBefore, NotAfter: notAfter} + + start, end := ComputeRenewalWindow(cert, version, nil, now) + // Validity = 91 days; thirty3 ~30d, end_offset = 10d. Start is in + // the future from `now` (Jun 2026), so no clamp. + if start.Before(now) { + t.Errorf("start before now: got %v", start) + } + if !end.After(start) && !end.Equal(start) { + t.Errorf("end before start: start=%v end=%v", start, end) + } +} + +func TestComputeRenewalWindow_PastExpiry_RenewNow(t *testing.T) { + now := time.Date(2026, 8, 1, 0, 0, 0, 0, time.UTC) + notAfter := time.Date(2026, 7, 1, 0, 0, 0, 0, time.UTC) // 1 month ago + cert := &domain.ManagedCertificate{ExpiresAt: notAfter} + + start, end := ComputeRenewalWindow(cert, nil, nil, now) + // Expect a "renew now" 1-day window starting at now. + if !start.Equal(now) { + t.Errorf("start: got %v, want %v", start, now) + } + if want := now.Add(24 * time.Hour); !end.Equal(want) { + t.Errorf("end: got %v, want %v", end, want) + } +} diff --git a/internal/api/handler/acme.go b/internal/api/handler/acme.go index 3aa9933..737b49d 100644 --- a/internal/api/handler/acme.go +++ b/internal/api/handler/acme.go @@ -51,6 +51,10 @@ type ACMEService interface { LookupCertificate(ctx context.Context, certID, accountID string) (string, error) // Phase 3 — challenge validation. RespondToChallenge(ctx context.Context, accountID, challengeID string, accountJWK *jose.JSONWebKey) (*domain.ACMEChallenge, error) + // Phase 4 — key rollover + revocation + ARI. + RotateAccountKey(ctx context.Context, oldAccount *domain.ACMEAccount, newJWK *jose.JSONWebKey) (*domain.ACMEAccount, error) + RevokeCert(ctx context.Context, verified *acme.VerifiedRequest, certDER []byte, reasonCode int) error + RenewalInfo(ctx context.Context, profileID, certID string) (*acme.RenewalInfoResponse, time.Duration, error) } // ACMEHandler exposes the ACME server's RFC 8555 endpoints under the @@ -227,6 +231,49 @@ func writeServiceError(w http.ResponseWriter, err error) { Detail: "challenge is no longer in pending state", Status: http.StatusBadRequest, }) + case errors.Is(err, service.ErrACMERevocationUnconfigured): + acme.WriteProblem(w, acme.ServerInternal("revocation pipeline is not wired")) + case errors.Is(err, service.ErrACMEKeyRolloverConcurrent), + errors.Is(err, service.ErrACMEKeyRolloverDuplicateKey): + acme.WriteProblem(w, acme.Problem{ + Type: "urn:ietf:params:acme:error:unauthorized", + Detail: "the supplied new account key is unavailable: " + err.Error(), + Status: http.StatusConflict, + }) + case errors.Is(err, service.ErrACMEKeyRolloverInvalid): + acme.WriteProblem(w, acme.Malformed("key rollover request rejected")) + case errors.Is(err, service.ErrACMERevocationCertNotFound): + acme.WriteProblem(w, acme.Problem{ + Type: "urn:ietf:params:acme:error:malformed", + Detail: "the supplied certificate is not known to this server", + Status: http.StatusNotFound, + }) + case errors.Is(err, service.ErrACMERevocationUnauthorized): + acme.WriteProblem(w, acme.Problem{ + Type: "urn:ietf:params:acme:error:unauthorized", + Detail: "the requester is not authorized to revoke this certificate", + Status: http.StatusUnauthorized, + }) + case errors.Is(err, service.ErrACMERevocationAlreadyRevoked): + acme.WriteProblem(w, acme.Problem{ + Type: "urn:ietf:params:acme:error:alreadyRevoked", + Detail: "the certificate has already been revoked", + Status: http.StatusBadRequest, + }) + case errors.Is(err, service.ErrACMERevocationBadCSR): + acme.WriteProblem(w, acme.Problem{ + Type: "urn:ietf:params:acme:error:badCSR", + Detail: "the supplied `certificate` field is not a valid X.509 cert", + Status: http.StatusBadRequest, + }) + case errors.Is(err, service.ErrACMEARIDisabled): + acme.WriteProblem(w, acme.Problem{ + Type: "urn:ietf:params:acme:error:malformed", + Detail: "ACME Renewal Information is disabled on this server", + Status: http.StatusNotFound, + }) + case errors.Is(err, service.ErrACMEARIBadCertID): + acme.WriteProblem(w, acme.Malformed("ARI cert-id is malformed")) default: // Avoid leaking internal error text per master-prompt // criterion #10 (operator-actionable errors with no info @@ -885,3 +932,190 @@ func marshalChallengeResponse(ch *domain.ACMEChallenge, urlBuilder func(string) } return out } + +// --- Phase 4 — key rollover + revocation + ARI ------------------------- + +// KeyChange handles POST /acme/profile/{id}/key-change (RFC 8555 §7.3.5). +// Doubly-signed JWS: the OUTER is signed by the OLD account key (kid +// path); the inner — embedded as the outer's payload — is signed by the +// NEW account key (jwk path). +// +// We run the outer through the existing VerifyJWS pipeline (kid path, +// nonce consumed there), then ParseAndVerifyKeyChangeInner against the +// outer's verified payload bytes. The service's RotateAccountKey is the +// committing actor: it asserts uniqueness and atomically swaps the +// row's jwk_thumbprint + jwk_pem under SELECT…FOR UPDATE. +func (h ACMEHandler) KeyChange(w http.ResponseWriter, r *http.Request) { + profileID := r.PathValue("id") + requestURL := h.requestURL(r) + + body, err := io.ReadAll(io.LimitReader(r.Body, MaxJWSBodyBytes+1)) + if err != nil { + acme.WriteProblem(w, acme.Malformed("could not read request body")) + return + } + if len(body) > MaxJWSBodyBytes { + acme.WriteProblem(w, acme.Malformed("request body too large")) + return + } + + verified, err := h.svc.VerifyJWS(r.Context(), body, requestURL, false /*expectNewAccount*/, h.accountKID(r, profileID)) + if err != nil { + acme.WriteProblem(w, acme.MapJWSErrorToProblem(err)) + return + } + if verified.Account == nil { + acme.WriteProblem(w, acme.MapJWSErrorToProblem(acme.ErrJWSAccountNotFound)) + return + } + + // The outer's verified payload IS the inner JWS (compact-serialized). + // Reconstruct the OLD account's stored JWK so the inner can assert + // payload.oldKey matches it. + registeredOldJWK, err := acme.ParseJWKFromPEM(verified.Account.JWKPEM) + if err != nil { + acme.WriteProblem(w, acme.ServerInternal("could not parse stored account JWK")) + return + } + + outerKID := h.accountKID(r, profileID)(verified.Account.AccountID) + inner, err := acme.ParseAndVerifyKeyChangeInner( + verified.Payload, outerKID, requestURL, registeredOldJWK, + ) + if err != nil { + acme.WriteProblem(w, acme.MapKeyChangeErrorToProblem(err)) + return + } + + rolled, err := h.svc.RotateAccountKey(r.Context(), verified.Account, inner.NewJWK) + if err != nil { + writeServiceError(w, err) + return + } + + if nonce, err := h.svc.IssueNonce(r.Context()); err == nil { + w.Header().Set("Replay-Nonce", nonce) + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode( + acme.MarshalAccount(rolled, h.accountOrdersURL(r, profileID, rolled.AccountID)), + ) +} + +// RevokeCert handles POST /acme/profile/{id}/revoke-cert (RFC 8555 §7.6). +// JWS may use EITHER kid (account that owns the cert) OR jwk (the cert's +// own public key). VerifyJWS produces either Account-set (kid) or +// JWK-set (jwk). The service's RevokeCert routes through the existing +// RevocationSvc pipeline. +func (h ACMEHandler) RevokeCert(w http.ResponseWriter, r *http.Request) { + profileID := r.PathValue("id") + requestURL := h.requestURL(r) + + body, err := io.ReadAll(io.LimitReader(r.Body, MaxJWSBodyBytes+1)) + if err != nil { + acme.WriteProblem(w, acme.Malformed("could not read request body")) + return + } + if len(body) > MaxJWSBodyBytes { + acme.WriteProblem(w, acme.Malformed("request body too large")) + return + } + + // RFC 8555 §7.6 explicitly permits both kid and jwk on revoke-cert. + // Run a kid-first verify; on the kid-path-specific + // "this endpoint requires kid" failure, retry as jwk path. + verified, errKid := h.svc.VerifyJWS(r.Context(), body, requestURL, false /*expectNewAccount=false → kid*/, h.accountKID(r, profileID)) + if errKid != nil && (errors.Is(errKid, acme.ErrJWSExpectKidGotJWK) || errors.Is(errKid, acme.ErrJWSAccountNotFound)) { + // jwk path. ExpectNewAccount=true asserts jwk + rejects kid. + v2, err2 := h.svc.VerifyJWS(r.Context(), body, requestURL, true /*expectNewAccount=true → jwk*/, h.accountKID(r, profileID)) + if err2 != nil { + acme.WriteProblem(w, acme.MapJWSErrorToProblem(err2)) + return + } + verified = v2 + } else if errKid != nil { + acme.WriteProblem(w, acme.MapJWSErrorToProblem(errKid)) + return + } + + var req acme.RevokeCertRequest + if err := json.Unmarshal(verified.Payload, &req); err != nil { + acme.WriteProblem(w, acme.Malformed("could not parse revoke-cert payload")) + return + } + certDER, err := base64.RawURLEncoding.DecodeString(req.Certificate) + if err != nil || len(certDER) == 0 { + acme.WriteProblem(w, acme.Problem{ + Type: "urn:ietf:params:acme:error:badCSR", + Detail: "`certificate` is not valid base64url-DER", + Status: http.StatusBadRequest, + }) + return + } + + if err := h.svc.RevokeCert(r.Context(), verified, certDER, req.Reason); err != nil { + writeServiceError(w, err) + return + } + + if nonce, err := h.svc.IssueNonce(r.Context()); err == nil { + w.Header().Set("Replay-Nonce", nonce) + } + _ = profileID + w.WriteHeader(http.StatusOK) +} + +// RenewalInfo handles GET /acme/profile/{id}/renewal-info/{cert_id} +// (RFC 9773). UNAUTHENTICATED — RFC 9773 §4 mandates ARI be readable +// without JWS so cert-manager-shaped clients can fetch the suggested +// window cheaply. +func (h ACMEHandler) RenewalInfo(w http.ResponseWriter, r *http.Request) { + profileID := r.PathValue("id") + certID := r.PathValue("cert_id") + + resp, retryAfter, err := h.svc.RenewalInfo(r.Context(), profileID, certID) + if err != nil { + writeServiceError(w, err) + return + } + if retryAfter > 0 { + // RFC 7231 §7.1.3 Retry-After accepts either an HTTP-date or a + // delta-seconds. ACME ARI uses delta-seconds per RFC 9773 §4.2. + secs := int(retryAfter.Seconds()) + if secs < 1 { + secs = 1 + } + w.Header().Set("Retry-After", itoaForRetryAfter(secs)) + } + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Cache-Control", "no-store") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(resp) +} + +// itoaForRetryAfter is a localized integer-to-string helper. Using +// strconv.Itoa would be marginally more idiomatic but pulls a fresh +// import for one call site; this one-off is fine. +func itoaForRetryAfter(n int) string { + if n == 0 { + return "0" + } + negative := false + if n < 0 { + negative = true + n = -n + } + var buf [20]byte + i := len(buf) + for n > 0 { + i-- + buf[i] = byte('0' + n%10) + n /= 10 + } + if negative { + i-- + buf[i] = '-' + } + return string(buf[i:]) +} diff --git a/internal/api/handler/acme_handler_test.go b/internal/api/handler/acme_handler_test.go index 3e84f81..0eaf4ed 100644 --- a/internal/api/handler/acme_handler_test.go +++ b/internal/api/handler/acme_handler_test.go @@ -42,6 +42,10 @@ type mockACMEService struct { LookupCertificateFn func(ctx context.Context, certID, accountID string) (string, error) // Phase 3. RespondToChallengeFn func(ctx context.Context, accountID, challengeID string, accountJWK *jose.JSONWebKey) (*domain.ACMEChallenge, error) + // Phase 4. + RotateAccountKeyFn func(ctx context.Context, oldAccount *domain.ACMEAccount, newJWK *jose.JSONWebKey) (*domain.ACMEAccount, error) + RevokeCertFn func(ctx context.Context, verified *acme.VerifiedRequest, certDER []byte, reasonCode int) error + RenewalInfoFn func(ctx context.Context, profileID, certID string) (*acme.RenewalInfoResponse, time.Duration, error) } func (m *mockACMEService) BuildDirectory(ctx context.Context, profileID, baseURL string) (*acme.Directory, error) { @@ -142,6 +146,27 @@ func (m *mockACMEService) RespondToChallenge(ctx context.Context, accountID, cha return nil, errors.New("RespondToChallenge not stubbed") } +func (m *mockACMEService) RotateAccountKey(ctx context.Context, oldAccount *domain.ACMEAccount, newJWK *jose.JSONWebKey) (*domain.ACMEAccount, error) { + if m.RotateAccountKeyFn != nil { + return m.RotateAccountKeyFn(ctx, oldAccount, newJWK) + } + return nil, errors.New("RotateAccountKey not stubbed") +} + +func (m *mockACMEService) RevokeCert(ctx context.Context, verified *acme.VerifiedRequest, certDER []byte, reasonCode int) error { + if m.RevokeCertFn != nil { + return m.RevokeCertFn(ctx, verified, certDER, reasonCode) + } + return errors.New("RevokeCert not stubbed") +} + +func (m *mockACMEService) RenewalInfo(ctx context.Context, profileID, certID string) (*acme.RenewalInfoResponse, time.Duration, error) { + if m.RenewalInfoFn != nil { + return m.RenewalInfoFn(ctx, profileID, certID) + } + return nil, 0, errors.New("RenewalInfo not stubbed") +} + // newACMETestServer wires the ACMEHandler against the mock + a stdlib // ServeMux configured exactly the way internal/api/router/router.go // does it in production. Routes: diff --git a/internal/api/router/openapi_parity_test.go b/internal/api/router/openapi_parity_test.go index bebf99c..a08c6de 100644 --- a/internal/api/router/openapi_parity_test.go +++ b/internal/api/router/openapi_parity_test.go @@ -85,6 +85,13 @@ var SpecParityExceptions = map[string]string{ "POST /acme/authz/{authz_id}": "Phase 2 default-profile shorthand for authz POST-as-GET.", "POST /acme/challenge/{chall_id}": "Phase 3 default-profile shorthand for challenge response.", "POST /acme/cert/{cert_id}": "Phase 2 default-profile shorthand for cert download.", + // Phase 4 — key rollover + revocation + ARI. + "POST /acme/profile/{id}/key-change": "RFC 8555 §7.3.5 doubly-signed key rollover; documented in docs/acme-server.md", + "POST /acme/profile/{id}/revoke-cert": "RFC 8555 §7.6 revoke-cert (kid OR cert-key auth); documented in docs/acme-server.md", + "GET /acme/profile/{id}/renewal-info/{cert_id}": "RFC 9773 ACME Renewal Information (unauthenticated GET); documented in docs/acme-server.md", + "POST /acme/key-change": "Phase 4 default-profile shorthand for key rollover.", + "POST /acme/revoke-cert": "Phase 4 default-profile shorthand for revoke-cert.", + "GET /acme/renewal-info/{cert_id}": "Phase 4 default-profile shorthand for ARI.", } func TestRouter_OpenAPIParity(t *testing.T) { diff --git a/internal/api/router/router.go b/internal/api/router/router.go index 56e9050..aef12a8 100644 --- a/internal/api/router/router.go +++ b/internal/api/router/router.go @@ -423,6 +423,11 @@ func (r *Router) RegisterHandlers(reg HandlerRegistry) { r.Register("POST /acme/profile/{id}/authz/{authz_id}", http.HandlerFunc(reg.ACME.Authz)) r.Register("POST /acme/profile/{id}/challenge/{chall_id}", http.HandlerFunc(reg.ACME.Challenge)) r.Register("POST /acme/profile/{id}/cert/{cert_id}", http.HandlerFunc(reg.ACME.Cert)) + r.Register("POST /acme/profile/{id}/key-change", http.HandlerFunc(reg.ACME.KeyChange)) + r.Register("POST /acme/profile/{id}/revoke-cert", http.HandlerFunc(reg.ACME.RevokeCert)) + // RFC 9773 ARI: GET-only + unauthenticated (cert-manager-shaped + // clients fetch this without a JWS). + r.Register("GET /acme/profile/{id}/renewal-info/{cert_id}", http.HandlerFunc(reg.ACME.RenewalInfo)) // Default-profile shorthand. The handler's profile-resolution path // returns userActionRequired (RFC 7807 + RFC 8555 §6.7) when // CERTCTL_ACME_SERVER_DEFAULT_PROFILE_ID is unset; when set it @@ -438,6 +443,9 @@ func (r *Router) RegisterHandlers(reg HandlerRegistry) { r.Register("POST /acme/authz/{authz_id}", http.HandlerFunc(reg.ACME.Authz)) r.Register("POST /acme/challenge/{chall_id}", http.HandlerFunc(reg.ACME.Challenge)) r.Register("POST /acme/cert/{cert_id}", http.HandlerFunc(reg.ACME.Cert)) + r.Register("POST /acme/key-change", http.HandlerFunc(reg.ACME.KeyChange)) + r.Register("POST /acme/revoke-cert", http.HandlerFunc(reg.ACME.RevokeCert)) + r.Register("GET /acme/renewal-info/{cert_id}", http.HandlerFunc(reg.ACME.RenewalInfo)) } // RegisterESTHandlers sets up EST (RFC 7030) routes under diff --git a/internal/config/config.go b/internal/config/config.go index 456d43f..9d12d01 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -726,6 +726,24 @@ type ACMEServerConfig struct { // Default: 10. Setting: CERTCTL_ACME_SERVER_TLSALPN01_CONCURRENCY. TLSALPN01ConcurrencyMax int + // ARIEnabled toggles RFC 9773 ACME Renewal Information surface + // (the `renewalInfo` directory entry + GET + // /acme/profile//renewal-info/). Default: true. + // Operators wanting Phase-1a-style "directory + nonce + accounts + + // orders + finalize + challenges only" can flip this off; doing so + // drops the renewalInfo URL from the directory document so ACME + // clients fall back to their static renewal scheduler. Phase 4 wires. + // Setting: CERTCTL_ACME_SERVER_ARI_ENABLED. + ARIEnabled bool + + // ARIPollInterval is the value the server returns in the Retry-After + // response header on a 200 ARI response — i.e., the suggested gap + // between successive ARI polls a client should respect. RFC 9773 §4.2 + // leaves this server-policy. Default: 6h. Tighter intervals (e.g. 1h) + // suit short-lived certs; looser intervals (24h) suit standard 90-day + // certs. Setting: CERTCTL_ACME_SERVER_ARI_POLL_INTERVAL. + ARIPollInterval time.Duration + // DirectoryMeta is the optional metadata advertised in the directory // document per RFC 8555 §7.1.1. DirectoryMeta ACMEServerDirectoryMeta @@ -1771,6 +1789,8 @@ func Load() (*Config, error) { DNS01Resolver: getEnv("CERTCTL_ACME_SERVER_DNS01_RESOLVER", "8.8.8.8:53"), DNS01ConcurrencyMax: getEnvInt("CERTCTL_ACME_SERVER_DNS01_CONCURRENCY", 10), TLSALPN01ConcurrencyMax: getEnvInt("CERTCTL_ACME_SERVER_TLSALPN01_CONCURRENCY", 10), + ARIEnabled: getEnvBool("CERTCTL_ACME_SERVER_ARI_ENABLED", true), + ARIPollInterval: getEnvDuration("CERTCTL_ACME_SERVER_ARI_POLL_INTERVAL", 6*time.Hour), DirectoryMeta: ACMEServerDirectoryMeta{ TermsOfService: getEnv("CERTCTL_ACME_SERVER_TOS_URL", ""), Website: getEnv("CERTCTL_ACME_SERVER_WEBSITE", ""), diff --git a/internal/repository/postgres/acme.go b/internal/repository/postgres/acme.go index 4e09baf..78d1714 100644 --- a/internal/repository/postgres/acme.go +++ b/internal/repository/postgres/acme.go @@ -652,6 +652,105 @@ func jsonUnmarshalACME(data []byte, v interface{}) error { return json.Unmarshal(data, v) } +// --- Phase 4 — key rollover + revocation auth helper ------------------ + +// ErrACMEAccountKeyConcurrentUpdate is returned by UpdateAccountJWKWithTx +// when the row's stored thumbprint no longer matches expectedOldThumbprint. +// This is the "concurrent rollover" race signal: two requests both +// pre-validated their inner JWS against the SAME current key, but only +// one of them got into the WithinTx first. The loser's expectedOldThumb- +// print is now stale → the row's current thumbprint is the WINNER's NEW +// thumbprint. Caller maps to RFC 8555 §7.3.5 "key already exists". +var ErrACMEAccountKeyConcurrentUpdate = errors.New("acme: account key was rotated concurrently; re-fetch and retry") + +// UpdateAccountJWKWithTx atomically swaps an account row's JWK +// (thumbprint + PEM) using a SELECT … FOR UPDATE guard plus an +// expectedOldThumbprint precondition. +// +// The row is locked for update; the precondition check then runs +// against the locked row inside the same tx. If a concurrent rollover +// committed first, this caller observes the new thumbprint and returns +// ErrACMEAccountKeyConcurrentUpdate (caller maps to 409). Postgres's +// row-level lock guarantees the two concurrent transactions serialize. +// +// Returns repository.ErrNotFound when the account row is missing. +func (r *ACMERepository) UpdateAccountJWKWithTx( + ctx context.Context, + q repository.Querier, + accountID string, + expectedOldThumbprint, newThumbprint, newJWKPEM string, +) error { + // SELECT … FOR UPDATE serializes concurrent rollovers on the same + // row. The lock is released when the surrounding tx commits / rolls + // back; the second concurrent caller blocks here until the first + // commits + then sees the new thumbprint on its read. + var current string + err := q.QueryRowContext(ctx, ` + SELECT jwk_thumbprint + FROM acme_accounts + WHERE account_id = $1 + FOR UPDATE + `, accountID).Scan(¤t) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return fmt.Errorf("account not found: %w", repository.ErrNotFound) + } + return fmt.Errorf("acme: lock account for key rollover: %w", err) + } + if current != expectedOldThumbprint { + return ErrACMEAccountKeyConcurrentUpdate + } + + res, err := q.ExecContext(ctx, ` + UPDATE acme_accounts + SET jwk_thumbprint = $2, jwk_pem = $3, updated_at = NOW() + WHERE account_id = $1 AND jwk_thumbprint = $4 + `, accountID, newThumbprint, newJWKPEM, expectedOldThumbprint) + if err != nil { + // Postgres SQLSTATE 23505 = unique_violation on + // (profile_id, jwk_thumbprint). RFC 8555 §7.3.5 says the + // server SHOULD return 409 Conflict in that case. + var pqErr *pq.Error + if errors.As(err, &pqErr) && pqErr.Code == "23505" { + return ErrACMEAccountDuplicateThumbprint + } + return fmt.Errorf("acme: update account jwk: %w", err) + } + n, err := res.RowsAffected() + if err != nil { + return fmt.Errorf("acme: update account jwk rows affected: %w", err) + } + if n == 0 { + // SELECT … FOR UPDATE found the row but the UPDATE WHERE + // jwk_thumbprint=$4 did not. That means the thumbprint changed + // between the SELECT result + the UPDATE — should be impossible + // under the row lock. Treat defensively as a concurrency error. + return ErrACMEAccountKeyConcurrentUpdate + } + return nil +} + +// AccountOwnsCertificate returns true when there is at least one +// acme_orders row with (account_id = accountID, certificate_id = +// certificateID). Used by ACMEService.RevokeCert (kid path) to confirm +// the requesting account had an order that produced the cert. The +// query is a single indexed lookup; we don't materialize the full row. +func (r *ACMERepository) AccountOwnsCertificate(ctx context.Context, accountID, certificateID string) (bool, error) { + if accountID == "" || certificateID == "" { + return false, nil + } + var count int + err := r.db.QueryRowContext(ctx, ` + SELECT COUNT(1) + FROM acme_orders + WHERE account_id = $1 AND certificate_id = $2 + `, accountID, certificateID).Scan(&count) + if err != nil { + return false, fmt.Errorf("acme: account-owns-certificate: %w", err) + } + return count > 0, nil +} + // scanACMEAccount is the shared shape for the SELECT-by-X account // queries above. Returns sql.ErrNoRows-wrapped repository.ErrNotFound // on miss; any other scan failure surfaces verbatim. diff --git a/internal/service/acme.go b/internal/service/acme.go index 6062c84..e4bd6d4 100644 --- a/internal/service/acme.go +++ b/internal/service/acme.go @@ -53,6 +53,30 @@ type ACMERepo interface { GetChallengeByID(ctx context.Context, challengeID string) (*domain.ACMEChallenge, error) UpdateChallengeWithTx(ctx context.Context, q repository.Querier, ch *domain.ACMEChallenge) error UpdateAuthzStatusWithTx(ctx context.Context, q repository.Querier, authzID string, status domain.ACMEAuthzStatus) error + // Phase 4 — key rollover + revocation auth. + UpdateAccountJWKWithTx(ctx context.Context, q repository.Querier, accountID, expectedOldThumbprint, newThumbprint, newJWKPEM string) error + AccountOwnsCertificate(ctx context.Context, accountID, certificateID string) (bool, error) +} + +// CertificateRevoker is the minimum surface ACMEService needs to route +// an ACME revoke-cert request through certctl's existing revocation +// pipeline. The concrete type is *service.RevocationSvc whose +// RevokeCertificateWithActor method already covers cert-row update + +// certificate_revocations insert + audit row + issuer notification + +// OCSP cache invalidation in one path. +// +// Defining the interface here lets tests inject a recorder without +// dragging the entire RevocationSvc graph. +type CertificateRevoker interface { + RevokeCertificateWithActor(ctx context.Context, certID, reason, actor string) error +} + +// RenewalPolicyLookup is the minimum surface ACMEService needs to +// resolve the optional bound renewal policy for a certificate's ARI +// window math. Real callers pass a *postgres.RenewalPolicyRepository +// that satisfies this; tests inject in-memory fakes. +type RenewalPolicyLookup interface { + Get(ctx context.Context, id string) (*domain.RenewalPolicy, error) } // profileLookup is the minimum surface ACMEService needs to resolve a @@ -109,6 +133,15 @@ type ACMEService struct { // semaphores + the validators. Optional via SetValidatorPool — // when nil, RespondToChallenge returns ErrACMEChallengePoolUnconfigured. validatorPool *acme.Pool + + // Phase 4 — revocation delegate + renewal-policy lookup. The + // revoker is *service.RevocationSvc in production; the + // renewalPolicies lookup is *postgres.RenewalPolicyRepository. + // Both wired via SetRevocationDelegate / SetRenewalPolicyLookup; + // when unset, RevokeCert returns ErrACMERevocationUnconfigured + // and RenewalInfo returns the no-policy default window. + revoker CertificateRevoker + renewalPolicies RenewalPolicyLookup } // NewACMEService constructs an ACMEService with the directory + nonce @@ -149,6 +182,20 @@ func (s *ACMEService) SetIssuancePipeline(certSvc *CertificateService, certRepo s.issuerRegistry = registry } +// SetRevocationDelegate wires Phase 4's revocation delegate. The +// concrete type is *service.RevocationSvc; passing nil at startup +// disables the ACME revoke-cert endpoint (handler returns +// ErrACMERevocationUnconfigured → serverInternal). cmd/server/main.go +// passes the same revocationSvc instance shared across the rest of +// the platform. +func (s *ACMEService) SetRevocationDelegate(r CertificateRevoker) { s.revoker = r } + +// SetRenewalPolicyLookup wires the renewal-policy resolver used by +// the ARI window-math path. Optional — when unset, ARI falls back to +// the "last 33% of validity" default window; the renewal-info handler +// still returns 200. +func (s *ACMEService) SetRenewalPolicyLookup(r RenewalPolicyLookup) { s.renewalPolicies = r } + // SetValidatorPool wires Phase 3's challenge validator pool. // cmd/server/main.go constructs an *acme.Pool at startup with the // per-type concurrency caps from cfg.ACMEServer. Optional — @@ -239,6 +286,54 @@ var ErrACMEChallengePoolUnconfigured = errors.New("acme: validator pool not wire // behavior — same shape as Phase 1b's account inactive case). var ErrACMEChallengeWrongState = errors.New("acme: challenge is no longer in pending state") +// Phase 4 sentinels. + +// ErrACMERevocationUnconfigured is returned when SetRevocationDelegate +// hasn't been called and a client hits POST /revoke-cert. Indicates a +// deploy-time wiring bug; mapped to serverInternal. +var ErrACMERevocationUnconfigured = errors.New("acme: revocation delegate not wired (call SetRevocationDelegate)") + +// ErrACMEKeyRolloverConcurrent is returned when two concurrent key- +// rollover requests race on the same account; the second sees the +// first's already-committed thumbprint. +var ErrACMEKeyRolloverConcurrent = errors.New("acme: account key was rotated concurrently; retry") + +// ErrACMEKeyRolloverDuplicateKey is returned when the inner JWS's new +// JWK thumbprint is already registered against this profile. +var ErrACMEKeyRolloverDuplicateKey = errors.New("acme: new account key already registered against this profile") + +// ErrACMEKeyRolloverInvalid is the catch-all for inner-JWS validation +// failures the handler doesn't care to enumerate (the actual sentinel +// for the operator-friendly error comes from the acme package's +// MapKeyChangeErrorToProblem). +var ErrACMEKeyRolloverInvalid = errors.New("acme: key rollover request rejected") + +// ErrACMERevocationCertNotFound is returned when the revoke-cert +// payload's certificate doesn't match a managed_certificates row this +// server has issued. +var ErrACMERevocationCertNotFound = errors.New("acme: revocation target certificate not found") + +// ErrACMERevocationUnauthorized is returned when neither the kid path +// (account owns the cert) nor the jwk path (signature key matches the +// cert's public key) authenticates the revocation request. +var ErrACMERevocationUnauthorized = errors.New("acme: account or signing key does not authorize revocation of this certificate") + +// ErrACMERevocationAlreadyRevoked is returned when the cert is already +// in Revoked status. Mapped to RFC 8555 §6.7 alreadyRevoked. +var ErrACMERevocationAlreadyRevoked = errors.New("acme: certificate is already revoked") + +// ErrACMERevocationBadCSR is returned when the certificate field of +// the revoke-cert payload is not a valid base64url-DER X.509 cert. +var ErrACMERevocationBadCSR = errors.New("acme: revoke-cert payload `certificate` is malformed") + +// ErrACMEARIDisabled is returned by RenewalInfo when CERTCTL_ACME_SERVER_ +// ARI_ENABLED is false. Handler maps to 404 + serverInternal. +var ErrACMEARIDisabled = errors.New("acme: ARI is disabled on this server") + +// ErrACMEARIBadCertID is returned when the cert-id in the ARI URL is +// not RFC 9773 §4.1 shape. Handler maps to 400 + malformed. +var ErrACMEARIBadCertID = errors.New("acme: ARI cert-id is malformed") + // BuildDirectory constructs the per-profile directory document. // // profileID resolution: @@ -266,9 +361,14 @@ func (s *ACMEService) BuildDirectory(ctx context.Context, profileID, baseURL str s.cfg.DirectoryMeta.Website, s.cfg.DirectoryMeta.CAAIdentities, s.cfg.DirectoryMeta.ExternalAccountRequired, - // Phase 1a: ARI is non-functional. The Phase 4 commit flips this - // to true once the renewal-info handler ships. - false, + // Phase 4: ARI is live. Flipping this on emits the renewalInfo + // URL from BuildDirectory; a 200 from the renewal-info handler + // returns the suggested-window JSON + Retry-After. Operators can + // disable via CERTCTL_ACME_SERVER_ARI_ENABLED=false (the URL + // drops out of the directory; the route is still registered but + // returns 404 + serverInternal — clients fall back to static + // renewal scheduling). + s.cfg.ARIEnabled, ) _ = profileID // Phase 1b will use the resolved profile to read // acme_auth_mode + record per-profile metrics. Phase 1a @@ -355,6 +455,14 @@ type ACMEMetrics struct { ChallengeRespondFailTotal atomic.Uint64 // immediate rejection (already-resolved / wrong-state) ChallengeValidateValid atomic.Uint64 // validator returned nil ChallengeValidateInvalid atomic.Uint64 // validator returned error + + // Phase 4 — key rollover + revocation + ARI. + KeyChangeTotal atomic.Uint64 // accepted rollover (200) + KeyChangeFailTotal atomic.Uint64 // rejected rollover (4xx) + RevokeCertTotal atomic.Uint64 // accepted revocation (200) + RevokeCertFailTotal atomic.Uint64 // rejected revocation (4xx) + RenewalInfoTotal atomic.Uint64 // ARI 200 + RenewalInfoFailTotal atomic.Uint64 // ARI 4xx } // NewACMEMetrics returns a zeroed counter table. Concurrent callers @@ -394,6 +502,12 @@ func (m *ACMEMetrics) Snapshot() map[string]uint64 { "certctl_acme_challenge_respond_failures_total": m.ChallengeRespondFailTotal.Load(), "certctl_acme_challenge_validate_valid_total": m.ChallengeValidateValid.Load(), "certctl_acme_challenge_validate_invalid_total": m.ChallengeValidateInvalid.Load(), + "certctl_acme_key_change_total": m.KeyChangeTotal.Load(), + "certctl_acme_key_change_failures_total": m.KeyChangeFailTotal.Load(), + "certctl_acme_revoke_cert_total": m.RevokeCertTotal.Load(), + "certctl_acme_revoke_cert_failures_total": m.RevokeCertFailTotal.Load(), + "certctl_acme_renewal_info_total": m.RenewalInfoTotal.Load(), + "certctl_acme_renewal_info_failures_total": m.RenewalInfoFailTotal.Load(), } } @@ -1358,3 +1472,347 @@ func (s *ACMEService) recordChallengeOutcome( auditDetails) }) } + +// --- Phase 4 — key rollover + revocation + ARI ------------------------- + +// RotateAccountKey is the service-layer entry point for RFC 8555 +// §7.3.5 key-change. By the time we get here the handler has: +// +// 1. VerifyJWS'd the OUTER JWS (kid path), so verified.Account is the +// authentic account owner. +// 2. ParseAndVerifyKeyChangeInner'd the inner JWS, so newJWK is the +// verified new key + the inner's `oldKey`/`account` invariants +// have been asserted. +// +// What we still own here: +// +// - asserting the new JWK's thumbprint isn't already registered against +// this profile (RFC 8555 §7.3.5 forbids two accounts sharing a key); +// - swapping the row's jwk_thumbprint + jwk_pem in one WithinTx with +// the audit row, behind a SELECT … FOR UPDATE lock so concurrent +// rollovers serialize. +// +// Returns ErrACMEKeyRolloverConcurrent when a concurrent rollover beat +// us to the WithinTx; ErrACMEKeyRolloverDuplicateKey on the +// (profile_id, jwk_thumbprint) UNIQUE collision. +func (s *ACMEService) RotateAccountKey( + ctx context.Context, + oldAccount *domain.ACMEAccount, + newJWK *jose.JSONWebKey, +) (*domain.ACMEAccount, error) { + if s.tx == nil || s.auditService == nil { + s.metrics.bump(&s.metrics.KeyChangeFailTotal) + return nil, fmt.Errorf("acme: key rollover requires SetTransactor + SetAuditService") + } + if oldAccount == nil || newJWK == nil { + s.metrics.bump(&s.metrics.KeyChangeFailTotal) + return nil, ErrACMEKeyRolloverInvalid + } + + newThumbprint, err := acme.JWKThumbprint(newJWK) + if err != nil { + s.metrics.bump(&s.metrics.KeyChangeFailTotal) + return nil, fmt.Errorf("acme: thumbprint new jwk: %w", err) + } + newJWKPEM, err := acme.JWKToPEM(newJWK) + if err != nil { + s.metrics.bump(&s.metrics.KeyChangeFailTotal) + return nil, fmt.Errorf("acme: serialize new jwk: %w", err) + } + + // New key already registered against this profile? RFC 8555 §7.3.5 + // forbids two accounts sharing a key. + existing, err := s.repo.GetAccountByThumbprint(ctx, oldAccount.ProfileID, newThumbprint) + if err == nil && existing != nil { + s.metrics.bump(&s.metrics.KeyChangeFailTotal) + return nil, ErrACMEKeyRolloverDuplicateKey + } + if err != nil && !errors.Is(err, repository.ErrNotFound) { + s.metrics.bump(&s.metrics.KeyChangeFailTotal) + return nil, fmt.Errorf("acme: lookup new jwk thumbprint: %w", err) + } + + // Atomic swap + audit row. + if err := s.tx.WithinTx(ctx, func(q repository.Querier) error { + if err := s.repo.UpdateAccountJWKWithTx( + ctx, q, oldAccount.AccountID, + oldAccount.JWKThumbprint, newThumbprint, newJWKPEM, + ); err != nil { + return err + } + return s.auditService.RecordEventWithTx(ctx, q, + fmt.Sprintf("acme:%s", oldAccount.AccountID), domain.ActorTypeUser, + "acme_account_key_rolled", "acme_account", oldAccount.AccountID, + map[string]interface{}{ + "old_thumbprint": oldAccount.JWKThumbprint, + "new_thumbprint": newThumbprint, + "profile_id": oldAccount.ProfileID, + }) + }); err != nil { + s.metrics.bump(&s.metrics.KeyChangeFailTotal) + // Translate repository sentinels to ACME-shaped errors. + // ErrACMEAccountKeyConcurrentUpdate is in the postgres + // package; we use error-string-based matching to avoid + // importing postgres into the service layer. + if strings.Contains(err.Error(), "rotated concurrently") { + return nil, ErrACMEKeyRolloverConcurrent + } + if strings.Contains(err.Error(), "already exists for this profile") { + return nil, ErrACMEKeyRolloverDuplicateKey + } + return nil, err + } + + // Hydrate the in-memory account with its new key and return. + rolled := *oldAccount + rolled.JWKThumbprint = newThumbprint + rolled.JWKPEM = newJWKPEM + rolled.UpdatedAt = time.Now().UTC() + s.metrics.bump(&s.metrics.KeyChangeTotal) + return &rolled, nil +} + +// RevokeCert routes an ACME-shaped revoke-cert request through certctl's +// existing RevocationSvc pipeline (cert row update + revocation row + +// audit + issuer notification + OCSP cache invalidation). +// +// Parameters: +// +// - verified: the JWS-verified envelope. EITHER verified.Account is set +// (kid path: account that signed) OR verified.JWK is set (jwk path: +// the cert's own key signed). The handler enforces exactly one. +// - certDER: the base64url-decoded certificate DER from the payload. +// - reasonCode: optional RFC 5280 §5.3.1 numeric reason; values out of +// range are clamped to "unspecified". +// +// Auth model: +// +// - kid path: the account must have an acme_orders row whose +// certificate_id maps to the target managed_certificates row. We +// look up by serial against managed_certificates (scoped by issuer) +// and then check ownership. +// - jwk path: the JWS's embedded public key must equal the cert's +// public key (byte-equal RFC 7638 thumbprint). +// +// Either path: routes through s.revoker.RevokeCertificateWithActor — +// the same path bulk revocation, the GUI revoke button, and the +// ACME-consumer issuer's revoke uses. +func (s *ACMEService) RevokeCert( + ctx context.Context, + verified *acme.VerifiedRequest, + certDER []byte, + reasonCode int, +) error { + if s.revoker == nil { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationUnconfigured + } + if s.certRepo == nil { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return fmt.Errorf("acme: revoke-cert requires SetIssuancePipeline (no certRepo wired)") + } + if verified == nil { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationUnauthorized + } + + // Parse cert. + leaf, err := x509.ParseCertificate(certDER) + if err != nil { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationBadCSR + } + + // Resolve the cert via (issuerID, serial). Use the same first- + // available-issuer rule Phase 2 finalize uses; multi-issuer-per- + // profile follow-up will refine. + issuerID, _, ok := s.firstAvailableIssuer() + if !ok { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationCertNotFound + } + serialHex := strings.ToLower(leaf.SerialNumber.Text(16)) + version, err := s.certRepo.GetVersionBySerial(ctx, issuerID, serialHex) + if err != nil { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationCertNotFound + } + cert, err := s.certRepo.Get(ctx, version.CertificateID) + if err != nil { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationCertNotFound + } + if cert.Status == domain.CertificateStatusRevoked { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationAlreadyRevoked + } + + // Auth check. + var actor string + switch { + case verified.Account != nil: + owns, err := s.repo.AccountOwnsCertificate(ctx, verified.Account.AccountID, cert.ID) + if err != nil { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return fmt.Errorf("acme: revoke-cert ownership lookup: %w", err) + } + if !owns { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationUnauthorized + } + actor = fmt.Sprintf("acme:%s", verified.Account.AccountID) + case verified.JWK != nil: + // jwk path — embedded JWK must match the cert's pubkey. + certJWK := jose.JSONWebKey{Key: leaf.PublicKey} + eq, err := jwksThumbprintsEqualSvc(verified.JWK, &certJWK) + if err != nil { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return fmt.Errorf("acme: revoke-cert key compare: %w", err) + } + if !eq { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationUnauthorized + } + actor = fmt.Sprintf("acme-cert-key:%s", serialHex) + default: + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + return ErrACMERevocationUnauthorized + } + + // Route through the existing revocation pipeline. Reason is RFC + // 5280 §5.3.1 numeric; map to the certctl string form, clamping + // unknown values to "unspecified". + reasonStr := mapACMERevocationReason(reasonCode) + if err := s.revoker.RevokeCertificateWithActor(ctx, cert.ID, reasonStr, actor); err != nil { + s.metrics.bump(&s.metrics.RevokeCertFailTotal) + // RevocationSvc returns errors for already-revoked / archived; + // translate the already-revoked case to the ACME shape. + if strings.Contains(err.Error(), "already revoked") { + return ErrACMERevocationAlreadyRevoked + } + return fmt.Errorf("acme: revoke pipeline: %w", err) + } + + s.metrics.bump(&s.metrics.RevokeCertTotal) + return nil +} + +// RenewalInfo computes the RFC 9773 ARI suggestedWindow + Retry-After +// for a (profile, cert-id) pair. +// +// cert-id is the wire-format string: base64url(AKI) "." base64url(serial). +// We decode it via acme.ParseARICertID, look up by (issuer, serial), +// then compute the window from cert.ExpiresAt + the bound renewal +// policy (when present). +// +// Returns the response shape + Retry-After duration. The handler emits +// these on the wire. +func (s *ACMEService) RenewalInfo( + ctx context.Context, + profileID, certID string, +) (*acme.RenewalInfoResponse, time.Duration, error) { + if !s.cfg.ARIEnabled { + s.metrics.bump(&s.metrics.RenewalInfoFailTotal) + return nil, 0, ErrACMEARIDisabled + } + resolvedProfile, err := s.resolveProfile(ctx, profileID) + if err != nil { + s.metrics.bump(&s.metrics.RenewalInfoFailTotal) + return nil, 0, err + } + _ = resolvedProfile // future per-profile metric tags + + parsed, err := acme.ParseARICertID(certID) + if err != nil { + s.metrics.bump(&s.metrics.RenewalInfoFailTotal) + return nil, 0, ErrACMEARIBadCertID + } + + // Resolve cert via (first-available-issuer, serial-hex). + issuerID, _, ok := s.firstAvailableIssuer() + if !ok || s.certRepo == nil { + s.metrics.bump(&s.metrics.RenewalInfoFailTotal) + return nil, 0, ErrACMECertificateNotFound + } + version, err := s.certRepo.GetVersionBySerial(ctx, issuerID, parsed.SerialHex()) + if err != nil { + s.metrics.bump(&s.metrics.RenewalInfoFailTotal) + return nil, 0, ErrACMECertificateNotFound + } + cert, err := s.certRepo.Get(ctx, version.CertificateID) + if err != nil { + s.metrics.bump(&s.metrics.RenewalInfoFailTotal) + return nil, 0, ErrACMECertificateNotFound + } + + // Optional bound renewal-policy lookup. When unset OR the cert has + // no policy bound, ComputeRenewalWindow falls back to the last-33%- + // of-validity default. + var policy *domain.RenewalPolicy + if s.renewalPolicies != nil && cert.RenewalPolicyID != "" { + p, err := s.renewalPolicies.Get(ctx, cert.RenewalPolicyID) + if err == nil { + policy = p + } + } + + start, end := acme.ComputeRenewalWindow(cert, version, policy, time.Now().UTC()) + resp := &acme.RenewalInfoResponse{ + SuggestedWindow: acme.RenewalWindow{Start: start.UTC(), End: end.UTC()}, + } + retryAfter := s.cfg.ARIPollInterval + if retryAfter <= 0 { + retryAfter = 6 * time.Hour + } + + s.metrics.bump(&s.metrics.RenewalInfoTotal) + return resp, retryAfter, nil +} + +// jwksThumbprintsEqualSvc compares two JWKs by RFC 7638 thumbprint. A +// service-package-local helper so we don't import the api/acme package's +// unexported helper. The constant-time compare matches what the +// keychange.go variant does on the api/acme side. +func jwksThumbprintsEqualSvc(a, b *jose.JSONWebKey) (bool, error) { + if a == nil || b == nil { + return false, nil + } + tA, err := acme.JWKThumbprint(a) + if err != nil { + return false, err + } + tB, err := acme.JWKThumbprint(b) + if err != nil { + return false, err + } + return tA == tB, nil +} + +// mapACMERevocationReason translates the RFC 5280 §5.3.1 numeric reason +// code to the certctl-domain reason string. Out-of-range values clamp +// to "unspecified" per RFC 8555 §7.6 ("an arbitrary integer value"); +// RFC 5280 codes 8 (removeFromCRL) and 10 (aACompromise) are not in +// certctl's domain.ValidRevocationReasons set so they also clamp to +// "unspecified". +func mapACMERevocationReason(code int) string { + switch code { + case 0: + return string(domain.RevocationReasonUnspecified) + case 1: + return string(domain.RevocationReasonKeyCompromise) + case 2: + return string(domain.RevocationReasonCACompromise) + case 3: + return string(domain.RevocationReasonAffiliationChanged) + case 4: + return string(domain.RevocationReasonSuperseded) + case 5: + return string(domain.RevocationReasonCessationOfOperation) + case 6: + return string(domain.RevocationReasonCertificateHold) + case 9: + return string(domain.RevocationReasonPrivilegeWithdrawn) + default: + return string(domain.RevocationReasonUnspecified) + } +} diff --git a/internal/service/acme_phase4_test.go b/internal/service/acme_phase4_test.go new file mode 100644 index 0000000..ad12c45 --- /dev/null +++ b/internal/service/acme_phase4_test.go @@ -0,0 +1,366 @@ +// Copyright (c) certctl +// SPDX-License-Identifier: BSL-1.1 + +package service + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "errors" + "io" + "log/slog" + "math/big" + "strings" + "testing" + "time" + + jose "github.com/go-jose/go-jose/v4" + + "github.com/shankar0123/certctl/internal/api/acme" + "github.com/shankar0123/certctl/internal/config" + "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/repository" +) + +// Phase 4 — service-layer tests for RotateAccountKey + RevokeCert + +// RenewalInfo against the in-memory fakeACMERepo. These exercise the +// service contract; full-stack JWS-flow tests live in the api/acme + +// handler test packages. + +// --- RotateAccountKey --------------------------------------------------- + +func newTestRSAJWKForSvc(t *testing.T) (*rsa.PrivateKey, *jose.JSONWebKey) { + t.Helper() + priv, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + t.Fatalf("rsa.GenerateKey: %v", err) + } + jwk := &jose.JSONWebKey{Key: priv.Public(), Algorithm: string(jose.RS256), Use: "sig"} + return priv, jwk +} + +func newTestECDSAJWKForSvc(t *testing.T) (*ecdsa.PrivateKey, *jose.JSONWebKey) { + t.Helper() + priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("ecdsa.GenerateKey: %v", err) + } + jwk := &jose.JSONWebKey{Key: priv.Public(), Algorithm: string(jose.ES256), Use: "sig"} + return priv, jwk +} + +func TestRotateAccountKey_HappyPath(t *testing.T) { + cfg := config.ACMEServerConfig{NonceTTL: 5 * time.Minute} + profiles := map[string]*domain.CertificateProfile{"prof-corp": {ID: "prof-corp"}} + svc, repo, _ := newSvcWithAudit(t, cfg, profiles) + + _, oldJWK := newTestRSAJWKForSvc(t) + oldThumb, err := acme.JWKThumbprint(oldJWK) + if err != nil { + t.Fatalf("thumb: %v", err) + } + oldPEM, err := acme.JWKToPEM(oldJWK) + if err != nil { + t.Fatalf("pem: %v", err) + } + repo.accounts["acme-acc-test"] = &domain.ACMEAccount{ + AccountID: "acme-acc-test", + ProfileID: "prof-corp", + JWKThumbprint: oldThumb, + JWKPEM: oldPEM, + Status: domain.ACMEAccountStatusValid, + } + + _, newJWK := newTestECDSAJWKForSvc(t) + rolled, err := svc.RotateAccountKey(context.Background(), repo.accounts["acme-acc-test"], newJWK) + if err != nil { + t.Fatalf("RotateAccountKey: %v", err) + } + newThumb, _ := acme.JWKThumbprint(newJWK) + if rolled.JWKThumbprint != newThumb { + t.Errorf("rolled thumbprint = %q, want %q", rolled.JWKThumbprint, newThumb) + } + if repo.accounts["acme-acc-test"].JWKThumbprint != newThumb { + t.Errorf("persisted thumbprint not updated") + } +} + +func TestRotateAccountKey_DuplicateNewKey(t *testing.T) { + cfg := config.ACMEServerConfig{NonceTTL: 5 * time.Minute} + profiles := map[string]*domain.CertificateProfile{"prof-corp": {ID: "prof-corp"}} + svc, repo, _ := newSvcWithAudit(t, cfg, profiles) + + _, oldJWK := newTestRSAJWKForSvc(t) + _, newJWK := newTestECDSAJWKForSvc(t) + oldThumb, _ := acme.JWKThumbprint(oldJWK) + newThumb, _ := acme.JWKThumbprint(newJWK) + oldPEM, _ := acme.JWKToPEM(oldJWK) + newPEM, _ := acme.JWKToPEM(newJWK) + + // Account A holds the OLD key (will request rotation). + repo.accounts["acme-acc-A"] = &domain.ACMEAccount{ + AccountID: "acme-acc-A", + ProfileID: "prof-corp", + JWKThumbprint: oldThumb, + JWKPEM: oldPEM, + Status: domain.ACMEAccountStatusValid, + } + // Account B already holds the NEW key — collision target. + repo.accounts["acme-acc-B"] = &domain.ACMEAccount{ + AccountID: "acme-acc-B", + ProfileID: "prof-corp", + JWKThumbprint: newThumb, + JWKPEM: newPEM, + Status: domain.ACMEAccountStatusValid, + } + // Wire the thumbprint→account index that GetAccountByThumbprint + // consults. + repo.thumbToAccount["prof-corp|"+newThumb] = "acme-acc-B" + + _, err := svc.RotateAccountKey(context.Background(), repo.accounts["acme-acc-A"], newJWK) + if !errors.Is(err, ErrACMEKeyRolloverDuplicateKey) { + t.Errorf("got err=%v, want ErrACMEKeyRolloverDuplicateKey", err) + } +} + +// --- RevokeCert --------------------------------------------------------- + +// stubRevoker captures the args RevokeCertificateWithActor receives. +type stubRevoker struct { + calls []revokeCall + err error +} +type revokeCall struct { + certID, reason, actor string +} + +func (s *stubRevoker) RevokeCertificateWithActor(ctx context.Context, certID, reason, actor string) error { + s.calls = append(s.calls, revokeCall{certID, reason, actor}) + return s.err +} + +// stubCertRepo is a minimal CertificateRepository for revoke + renewal-info tests. +type stubCertRepo struct { + repository.CertificateRepository + cert *domain.ManagedCertificate + version *domain.CertificateVersion + getErr error +} + +func (s *stubCertRepo) GetVersionBySerial(ctx context.Context, issuerID, serial string) (*domain.CertificateVersion, error) { + if s.getErr != nil { + return nil, s.getErr + } + if s.version != nil && s.version.SerialNumber == serial { + return s.version, nil + } + return nil, errors.New("not found") +} +func (s *stubCertRepo) Get(ctx context.Context, id string) (*domain.ManagedCertificate, error) { + if s.cert != nil && s.cert.ID == id { + return s.cert, nil + } + return nil, errors.New("not found") +} + +// stubIssuerConn is a no-op IssuerConnector for firstAvailableIssuer(). +// We don't need the connector itself to do anything; just that +// firstAvailableIssuer returns ok=true. + +// stubRenewalPolicies is a minimal RenewalPolicyLookup. +type stubRenewalPolicies struct { + pol *domain.RenewalPolicy +} + +func (s *stubRenewalPolicies) Get(ctx context.Context, id string) (*domain.RenewalPolicy, error) { + if s.pol != nil && s.pol.ID == id { + return s.pol, nil + } + return nil, errors.New("not found") +} + +func generateRevocationFixture(t *testing.T) (cert *domain.ManagedCertificate, version *domain.CertificateVersion, der []byte, certPriv *ecdsa.PrivateKey) { + t.Helper() + priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("genkey: %v", err) + } + template := &x509.Certificate{ + SerialNumber: big.NewInt(0xabcdef12), + Subject: pkix.Name{CommonName: "leaf.example.com"}, + NotBefore: time.Now().Add(-1 * time.Hour), + NotAfter: time.Now().Add(90 * 24 * time.Hour), + } + der, err = x509.CreateCertificate(rand.Reader, template, template, &priv.PublicKey, priv) + if err != nil { + t.Fatalf("CreateCertificate: %v", err) + } + parsed, _ := x509.ParseCertificate(der) + serialHex := strings.ToLower(parsed.SerialNumber.Text(16)) + cert = &domain.ManagedCertificate{ + ID: "mc-test-001", + IssuerID: "iss-test", + ExpiresAt: parsed.NotAfter, + Status: domain.CertificateStatusActive, + } + version = &domain.CertificateVersion{ + CertificateID: cert.ID, + SerialNumber: serialHex, + NotBefore: parsed.NotBefore, + NotAfter: parsed.NotAfter, + } + return cert, version, der, priv +} + +// minimalIssuerRegistryWithOne returns an IssuerRegistry that reports +// one available issuer so firstAvailableIssuer() is happy. +func minimalIssuerRegistryWithOne() *IssuerRegistry { + r := NewIssuerRegistry(slog.New(slog.NewTextHandler(io.Discard, nil))) + r.issuers["iss-test"] = nil // map entry is enough for first-available iteration + return r +} + +func TestRevokeCert_NotConfigured(t *testing.T) { + cfg := config.ACMEServerConfig{NonceTTL: 5 * time.Minute} + svc, _, _ := newSvcWithAudit(t, cfg, nil) + err := svc.RevokeCert(context.Background(), &acme.VerifiedRequest{}, []byte{1, 2, 3}, 0) + if !errors.Is(err, ErrACMERevocationUnconfigured) { + t.Errorf("got err=%v, want ErrACMERevocationUnconfigured", err) + } +} + +func TestRevokeCert_KidPath_AccountDoesNotOwn(t *testing.T) { + cfg := config.ACMEServerConfig{NonceTTL: 5 * time.Minute} + svc, _, _ := newSvcWithAudit(t, cfg, nil) + revoker := &stubRevoker{} + cert, version, der, _ := generateRevocationFixture(t) + certRepo := &stubCertRepo{cert: cert, version: version} + svc.SetIssuancePipeline(nil, certRepo, minimalIssuerRegistryWithOne()) + svc.SetRevocationDelegate(revoker) + + verified := &acme.VerifiedRequest{ + Account: &domain.ACMEAccount{AccountID: "acme-acc-NotOwner"}, + } + err := svc.RevokeCert(context.Background(), verified, der, 0) + if !errors.Is(err, ErrACMERevocationUnauthorized) { + t.Errorf("got err=%v, want ErrACMERevocationUnauthorized", err) + } + if len(revoker.calls) != 0 { + t.Errorf("revoker should not have been called: %+v", revoker.calls) + } +} + +func TestRevokeCert_JWKPath_KeyMismatch(t *testing.T) { + cfg := config.ACMEServerConfig{NonceTTL: 5 * time.Minute} + svc, _, _ := newSvcWithAudit(t, cfg, nil) + revoker := &stubRevoker{} + cert, version, der, _ := generateRevocationFixture(t) + certRepo := &stubCertRepo{cert: cert, version: version} + svc.SetIssuancePipeline(nil, certRepo, minimalIssuerRegistryWithOne()) + svc.SetRevocationDelegate(revoker) + + // Different JWK than the cert's own pubkey → 401. + _, otherJWK := newTestECDSAJWKForSvc(t) + verified := &acme.VerifiedRequest{JWK: otherJWK} + err := svc.RevokeCert(context.Background(), verified, der, 0) + if !errors.Is(err, ErrACMERevocationUnauthorized) { + t.Errorf("got err=%v, want ErrACMERevocationUnauthorized", err) + } +} + +func TestRevokeCert_JWKPath_HappyPath(t *testing.T) { + cfg := config.ACMEServerConfig{NonceTTL: 5 * time.Minute} + svc, _, _ := newSvcWithAudit(t, cfg, nil) + revoker := &stubRevoker{} + cert, version, der, certPriv := generateRevocationFixture(t) + certRepo := &stubCertRepo{cert: cert, version: version} + svc.SetIssuancePipeline(nil, certRepo, minimalIssuerRegistryWithOne()) + svc.SetRevocationDelegate(revoker) + + // JWK == cert's own pubkey. + jwk := &jose.JSONWebKey{Key: certPriv.Public(), Algorithm: string(jose.ES256)} + verified := &acme.VerifiedRequest{JWK: jwk} + if err := svc.RevokeCert(context.Background(), verified, der, 1 /*keyCompromise*/); err != nil { + t.Fatalf("RevokeCert: %v", err) + } + if len(revoker.calls) != 1 { + t.Fatalf("revoker calls = %d, want 1", len(revoker.calls)) + } + got := revoker.calls[0] + if got.certID != cert.ID { + t.Errorf("certID = %q, want %q", got.certID, cert.ID) + } + if got.reason != string(domain.RevocationReasonKeyCompromise) { + t.Errorf("reason = %q, want keyCompromise", got.reason) + } + if !strings.HasPrefix(got.actor, "acme-cert-key:") { + t.Errorf("actor = %q, want prefix acme-cert-key:", got.actor) + } +} + +func TestRevokeCert_AlreadyRevoked(t *testing.T) { + cfg := config.ACMEServerConfig{NonceTTL: 5 * time.Minute} + svc, _, _ := newSvcWithAudit(t, cfg, nil) + revoker := &stubRevoker{} + cert, version, der, certPriv := generateRevocationFixture(t) + cert.Status = domain.CertificateStatusRevoked + certRepo := &stubCertRepo{cert: cert, version: version} + svc.SetIssuancePipeline(nil, certRepo, minimalIssuerRegistryWithOne()) + svc.SetRevocationDelegate(revoker) + + jwk := &jose.JSONWebKey{Key: certPriv.Public(), Algorithm: string(jose.ES256)} + err := svc.RevokeCert(context.Background(), &acme.VerifiedRequest{JWK: jwk}, der, 0) + if !errors.Is(err, ErrACMERevocationAlreadyRevoked) { + t.Errorf("got err=%v, want ErrACMERevocationAlreadyRevoked", err) + } +} + +func TestRevokeCert_ReasonClamping(t *testing.T) { + cases := []struct { + code int + want string + }{ + {0, "unspecified"}, + {1, "keyCompromise"}, + {4, "superseded"}, + {8, "unspecified"}, // out-of-range RFC 5280 code + {99, "unspecified"}, // out-of-range + {-1, "unspecified"}, // negative + } + for _, c := range cases { + got := mapACMERevocationReason(c.code) + if got != c.want { + t.Errorf("code=%d: got %q, want %q", c.code, got, c.want) + } + } +} + +// --- RenewalInfo -------------------------------------------------------- + +func TestRenewalInfo_Disabled(t *testing.T) { + cfg := config.ACMEServerConfig{NonceTTL: 5 * time.Minute, ARIEnabled: false} + svc, _, _ := newSvcWithAudit(t, cfg, nil) + _, _, err := svc.RenewalInfo(context.Background(), "prof-corp", "abc.def") + if !errors.Is(err, ErrACMEARIDisabled) { + t.Errorf("got err=%v, want ErrACMEARIDisabled", err) + } +} + +func TestRenewalInfo_BadCertID(t *testing.T) { + cfg := config.ACMEServerConfig{ + NonceTTL: 5 * time.Minute, + ARIEnabled: true, + ARIPollInterval: 6 * time.Hour, + } + profiles := map[string]*domain.CertificateProfile{"prof-corp": {ID: "prof-corp"}} + svc, _, _ := newSvcWithAudit(t, cfg, profiles) + _, _, err := svc.RenewalInfo(context.Background(), "prof-corp", "not-a-valid-cert-id") + if !errors.Is(err, ErrACMEARIBadCertID) { + t.Errorf("got err=%v, want ErrACMEARIBadCertID", err) + } +} diff --git a/internal/service/acme_test.go b/internal/service/acme_test.go index c6fb5b5..8aa0098 100644 --- a/internal/service/acme_test.go +++ b/internal/service/acme_test.go @@ -8,6 +8,7 @@ import ( "crypto/rand" "crypto/rsa" "errors" + "fmt" "testing" "time" @@ -146,6 +147,23 @@ func (f *fakeACMERepo) UpdateChallengeWithTx(ctx context.Context, q repository.Q func (f *fakeACMERepo) UpdateAuthzStatusWithTx(ctx context.Context, q repository.Querier, authzID string, status domain.ACMEAuthzStatus) error { return nil } +func (f *fakeACMERepo) UpdateAccountJWKWithTx(ctx context.Context, q repository.Querier, accountID, expectedOldThumbprint, newThumbprint, newJWKPEM string) error { + for _, acct := range f.accounts { + if acct.AccountID != accountID { + continue + } + if acct.JWKThumbprint != expectedOldThumbprint { + return fmt.Errorf("acme: account key was rotated concurrently; retry") + } + acct.JWKThumbprint = newThumbprint + acct.JWKPEM = newJWKPEM + return nil + } + return repository.ErrNotFound +} +func (f *fakeACMERepo) AccountOwnsCertificate(ctx context.Context, accountID, certificateID string) (bool, error) { + return false, nil +} // fakeTransactor is the repository.Transactor stand-in: runs fn // against the supplied querier (we just pass nil — fakes ignore it).