fix: end-to-end certificate lifecycle bugs + integration test environment

Fixes 12 production bugs preventing the full issuance→deployment flow
from working with ACME (Pebble/Let's Encrypt) and step-ca issuers:

ACME connector (acme.go):
- Save orderURI before WaitOrder overwrites it (Go crypto/acme bug)
- Add CreateOrderCert fallback via WaitOrder+FetchCert
- Remove defer-reset in ValidateConfig that caused nil pointer panic
- Add Insecure TLS option for self-signed ACME servers (Pebble)

step-ca connector (stepca.go, jwe.go):
- Real JWE provisioner key loading + decryption (was using ephemeral keys)
- Fix JWT audience (/1.0/sign), sha claim (key fingerprint), kid header
- Custom root CA trust via RootCertPath config
- Remove hardcoded 90-day validity default (let step-ca decide)

NGINX target connector (nginx.go):
- Use sh -c for validate/reload commands (shell interpretation)
- Use filepath.Dir instead of fragile string slicing
- Add private key file writing (agent-mode keys were never deployed)
- Make chain_path write conditional

Server/service layer:
- TriggerRenewalWithActor now creates actual Job records (was no-op)
- createDeploymentJobs falls back to DB query when cert.TargetIDs empty
- ProcessPendingJobs skips agent-routed deployment jobs
- Agent cert pickup path parsing: len(parts)<4 → len(parts)<3
- Health/ready/auth-info endpoints bypass auth middleware
- Write timeout 15s→120s for ACME issuance
- Cert fingerprint computed on CSR submission

Integration test environment (deploy/test/):
- 10-phase test script covering Local CA, ACME, step-ca, revocation,
  discovery, renewal, and API spot checks
- Docker Compose with 7 containers (server, agent, postgres, nginx,
  pebble, challtestsrv, step-ca) on isolated network
- TLS verification checks SAN (not just Subject CN) for modern CA compat

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shankar0123
2026-04-02 17:02:20 -04:00
parent 2238f28610
commit b059ec930f
19 changed files with 2102 additions and 84 deletions
+3 -1
View File
@@ -252,6 +252,7 @@ func (h AgentHandler) AgentCSRSubmit(w http.ResponseWriter, r *http.Request) {
}
if err != nil {
slog.Error("CSR submission failed", "agent_id", agentID, "certificate_id", req.CertificateID, "error", err.Error())
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to submit CSR", requestID)
return
}
@@ -274,9 +275,10 @@ func (h AgentHandler) AgentCertificatePickup(w http.ResponseWriter, r *http.Requ
requestID := middleware.GetRequestID(r.Context())
// Extract agent ID and certificate ID from path /api/v1/agents/{id}/certificates/{cert_id}
// After TrimPrefix, path is "{id}/certificates/{cert_id}" → split gives [id, "certificates", cert_id]
path := strings.TrimPrefix(r.URL.Path, "/api/v1/agents/")
parts := strings.Split(path, "/")
if len(parts) < 4 || parts[0] == "" || parts[2] == "" {
if len(parts) < 3 || parts[0] == "" || parts[2] == "" {
ErrorWithRequestID(w, http.StatusBadRequest, "Agent ID and Certificate ID are required", requestID)
return
}
+6
View File
@@ -256,6 +256,11 @@ type ACMEConfig struct {
// Default: false. Requires a CA that supports ARI (e.g., Let's Encrypt).
// Setting: CERTCTL_ACME_ARI_ENABLED environment variable.
ARIEnabled bool
// Insecure skips TLS certificate verification when connecting to the ACME directory.
// Only use for testing with self-signed ACME servers like Pebble. Never in production.
// Setting: CERTCTL_ACME_INSECURE environment variable.
Insecure bool
}
// OpenSSLConfig contains OpenSSL/Custom CA issuer connector configuration.
@@ -503,6 +508,7 @@ func Load() (*Config, error) {
DNSCleanUpScript: getEnv("CERTCTL_ACME_DNS_CLEANUP_SCRIPT", ""),
DNSPersistIssuerDomain: getEnv("CERTCTL_ACME_DNS_PERSIST_ISSUER_DOMAIN", ""),
ARIEnabled: getEnvBool("CERTCTL_ACME_ARI_ENABLED", false),
Insecure: getEnvBool("CERTCTL_ACME_INSECURE", false),
},
Digest: DigestConfig{
Enabled: getEnvBool("CERTCTL_DIGEST_ENABLED", false),
+74 -6
View File
@@ -5,6 +5,7 @@ import (
"crypto/ecdsa"
"crypto/elliptic"
"crypto/rand"
"crypto/tls"
"crypto/x509"
"encoding/base64"
"encoding/json"
@@ -58,6 +59,10 @@ type Config struct {
// ARIEnabled enables ACME Renewal Information (RFC 9702) support per CERTCTL_ACME_ARI_ENABLED.
// When enabled, the connector queries the CA's ARI endpoint to get CA-directed renewal timing.
ARIEnabled bool `json:"ari_enabled,omitempty"`
// Insecure skips TLS certificate verification when connecting to the ACME directory.
// Only use for testing with self-signed ACME servers like Pebble.
Insecure bool `json:"insecure,omitempty"`
}
// Connector implements the issuer.Connector interface for ACME-compatible CAs
@@ -114,6 +119,18 @@ func New(config *Config, logger *slog.Logger) *Connector {
return c
}
// httpClient returns an HTTP client configured for the ACME connector.
// When Insecure is true (e.g., for Pebble test servers), TLS verification is skipped.
func (c *Connector) httpClient() *http.Client {
client := &http.Client{Timeout: 30 * time.Second}
if c.config != nil && c.config.Insecure {
client.Transport = &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec // Intentional for test ACME servers (Pebble)
}
}
return client
}
// ValidateConfig checks that the ACME directory URL is reachable and valid.
func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessage) error {
var cfg Config
@@ -129,10 +146,16 @@ func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessag
return fmt.Errorf("ACME email is required")
}
c.logger.Info("validating ACME configuration", "directory_url", cfg.DirectoryURL)
c.logger.Info("validating ACME configuration", "directory_url", cfg.DirectoryURL, "insecure", cfg.Insecure)
// Apply config so httpClient() can use it for the directory probe.
// This persists across the function — if validation fails early, the config
// will still be set, but that's fine since a failed ValidateConfig means
// the connector won't be used.
c.config = &cfg
// Verify that the directory URL is reachable
httpClient := &http.Client{Timeout: 10 * time.Second}
httpClient := c.httpClient()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, cfg.DirectoryURL, nil)
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
@@ -203,6 +226,7 @@ func (c *Connector) ensureClient(ctx context.Context) error {
c.client = &acme.Client{
Key: key,
DirectoryURL: c.config.DirectoryURL,
HTTPClient: c.httpClient(),
}
// Register or retrieve the ACME account
@@ -338,6 +362,12 @@ func (c *Connector) IssueCertificate(ctx context.Context, request issuer.Issuanc
}
c.logger.Info("ACME order created", "order_url", order.URI, "status", order.Status)
// Save FinalizeURL and URI before WaitOrder — WaitOrder returns a new Order
// object that may have empty FinalizeURL and URI fields (Go's crypto/acme
// WaitOrder doesn't populate Order.URI on the returned struct).
finalizeURL := order.FinalizeURL
orderURI := order.URI
// Step 2: Solve authorizations (HTTP-01 challenges)
if order.Status == acme.StatusPending {
if err := c.solveAuthorizations(ctx, order.AuthzURLs); err != nil {
@@ -345,10 +375,18 @@ func (c *Connector) IssueCertificate(ctx context.Context, request issuer.Issuanc
}
// Wait for the order to be ready
order, err = c.client.WaitOrder(ctx, order.URI)
order, err = c.client.WaitOrder(ctx, orderURI)
if err != nil {
return nil, fmt.Errorf("order failed after challenge: %w", err)
}
// Update finalizeURL from the waited order if it has one
if order.FinalizeURL != "" {
finalizeURL = order.FinalizeURL
}
// Preserve orderURI — WaitOrder doesn't populate Order.URI
if order.URI != "" {
orderURI = order.URI
}
}
if order.Status != acme.StatusReady {
@@ -361,9 +399,39 @@ func (c *Connector) IssueCertificate(ctx context.Context, request issuer.Issuanc
return nil, fmt.Errorf("failed to parse CSR: %w", err)
}
derChain, _, err := c.client.CreateOrderCert(ctx, order.FinalizeURL, csrDER, true)
if finalizeURL == "" {
return nil, fmt.Errorf("ACME order has no finalize URL (order URI: %s, status: %s)", order.URI, order.Status)
}
// Step 3b: Finalize the order and fetch the certificate.
// CreateOrderCert POSTs the CSR to the finalize URL and attempts to retrieve
// the certificate. Some ACME servers (notably Pebble) return the order object
// per RFC 8555 rather than redirecting to the cert, which can cause
// CreateOrderCert's internal cert URL resolution to fail. In that case, we
// fall back to WaitOrder (to get the CertURL) + FetchCert.
derChain, _, err := c.client.CreateOrderCert(ctx, finalizeURL, csrDER, true)
if err != nil {
return nil, fmt.Errorf("failed to finalize order: %w", err)
c.logger.Warn("CreateOrderCert failed, attempting manual certificate fetch",
"error", err, "order_uri", orderURI)
// The finalize POST likely succeeded (the CA issued the cert) but cert
// retrieval failed. WaitOrder returns the order in "valid" state with
// CertURL populated.
validOrder, waitErr := c.client.WaitOrder(ctx, orderURI)
if waitErr != nil {
return nil, fmt.Errorf("failed to finalize order: %w (wait fallback: %v)", err, waitErr)
}
if validOrder.CertURL == "" {
return nil, fmt.Errorf("order finalized but no certificate URL returned (original error: %w)", err)
}
c.logger.Info("fetching certificate via fallback", "cert_url", validOrder.CertURL)
fetchedChain, fetchErr := c.client.FetchCert(ctx, validOrder.CertURL, true)
if fetchErr != nil {
return nil, fmt.Errorf("failed to fetch certificate: %w (original finalize error: %v)", fetchErr, err)
}
derChain = fetchedChain
}
if len(derChain) == 0 {
@@ -387,7 +455,7 @@ func (c *Connector) IssueCertificate(ctx context.Context, request issuer.Issuanc
Serial: serial,
NotBefore: notBefore,
NotAfter: notAfter,
OrderID: order.URI,
OrderID: orderURI,
}, nil
}
+300
View File
@@ -0,0 +1,300 @@
// Package stepca — JWE decryption for step-ca provisioner keys.
//
// step-ca stores provisioner private keys as JWE-encrypted JSON files using:
// - Algorithm: PBES2-HS256+A128KW (PBKDF2 key derivation + AES-128 Key Wrap)
// - Encryption: A128GCM (AES-128 in GCM mode)
//
// This file implements just enough JWE to decrypt these files without requiring
// an external JOSE library. Uses only stdlib + golang.org/x/crypto/pbkdf2.
package stepca
import (
"crypto/aes"
"crypto/cipher"
"crypto/ecdsa"
"crypto/elliptic"
"crypto/sha256"
"encoding/base64"
"encoding/binary"
"encoding/json"
"fmt"
"math/big"
"golang.org/x/crypto/pbkdf2"
)
// jweJSON is the JWE JSON Serialization format used by step-ca provisioner keys.
type jweJSON struct {
Protected string `json:"protected"`
EncryptedKey string `json:"encrypted_key"`
IV string `json:"iv"`
Ciphertext string `json:"ciphertext"`
Tag string `json:"tag"`
}
// jweHeader is the protected header inside a step-ca provisioner key JWE.
type jweHeader struct {
Alg string `json:"alg"` // "PBES2-HS256+A128KW"
Enc string `json:"enc"` // "A128GCM"
Cty string `json:"cty"` // "jwk+json"
P2s string `json:"p2s"` // PBKDF2 salt (base64url)
P2c int `json:"p2c"` // PBKDF2 iteration count
}
// jwkEC is a minimal JWK representation for EC private keys.
type jwkEC struct {
Kty string `json:"kty"`
Crv string `json:"crv"`
X string `json:"x"`
Y string `json:"y"`
D string `json:"d"`
Kid string `json:"kid"`
}
// decryptProvisionerKey decrypts a step-ca JWE-encrypted provisioner key file.
// Returns the parsed ECDSA private key and the key ID (kid).
func decryptProvisionerKey(jweData []byte, password string) (*ecdsa.PrivateKey, string, error) {
// Parse JWE JSON
var jwe jweJSON
if err := json.Unmarshal(jweData, &jwe); err != nil {
return nil, "", fmt.Errorf("failed to parse JWE JSON: %w", err)
}
// Decode protected header
headerBytes, err := base64.RawURLEncoding.DecodeString(jwe.Protected)
if err != nil {
return nil, "", fmt.Errorf("failed to decode JWE protected header: %w", err)
}
var header jweHeader
if err := json.Unmarshal(headerBytes, &header); err != nil {
return nil, "", fmt.Errorf("failed to parse JWE header: %w", err)
}
if header.Alg != "PBES2-HS256+A128KW" {
return nil, "", fmt.Errorf("unsupported JWE algorithm: %s (expected PBES2-HS256+A128KW)", header.Alg)
}
if header.Enc != "A128GCM" && header.Enc != "A256GCM" {
return nil, "", fmt.Errorf("unsupported JWE encryption: %s (expected A128GCM or A256GCM)", header.Enc)
}
// Decode PBKDF2 salt
p2sSalt, err := base64.RawURLEncoding.DecodeString(header.P2s)
if err != nil {
return nil, "", fmt.Errorf("failed to decode PBKDF2 salt: %w", err)
}
// Decode encrypted key, IV, ciphertext, tag
encryptedKey, err := base64.RawURLEncoding.DecodeString(jwe.EncryptedKey)
if err != nil {
return nil, "", fmt.Errorf("failed to decode encrypted key: %w", err)
}
iv, err := base64.RawURLEncoding.DecodeString(jwe.IV)
if err != nil {
return nil, "", fmt.Errorf("failed to decode IV: %w", err)
}
ciphertext, err := base64.RawURLEncoding.DecodeString(jwe.Ciphertext)
if err != nil {
return nil, "", fmt.Errorf("failed to decode ciphertext: %w", err)
}
tag, err := base64.RawURLEncoding.DecodeString(jwe.Tag)
if err != nil {
return nil, "", fmt.Errorf("failed to decode tag: %w", err)
}
// Step 1: Derive Key Encryption Key (KEK) using PBKDF2
// PBES2-HS256+A128KW: PBKDF2-SHA256, 16-byte derived key for AES-128 Key Wrap
// The salt for PBKDF2 is: UTF8(alg) || 0x00 || p2s
algBytes := []byte(header.Alg)
salt := make([]byte, len(algBytes)+1+len(p2sSalt))
copy(salt, algBytes)
salt[len(algBytes)] = 0x00
copy(salt[len(algBytes)+1:], p2sSalt)
kekSize := 16 // AES-128 for A128KW
kek := pbkdf2.Key([]byte(password), salt, header.P2c, kekSize, sha256.New)
// Step 2: AES Key Unwrap (RFC 3394) to get the Content Encryption Key (CEK)
cek, err := aesKeyUnwrap(kek, encryptedKey)
if err != nil {
return nil, "", fmt.Errorf("AES key unwrap failed (wrong password?): %w", err)
}
// Step 3: AES-GCM decrypt the payload
// AAD = ASCII(BASE64URL(protected header))
aad := []byte(jwe.Protected)
block, err := aes.NewCipher(cek)
if err != nil {
return nil, "", fmt.Errorf("failed to create AES cipher: %w", err)
}
gcm, err := cipher.NewGCM(block)
if err != nil {
return nil, "", fmt.Errorf("failed to create GCM: %w", err)
}
// GCM expects ciphertext+tag concatenated
sealed := append(ciphertext, tag...)
plaintext, err := gcm.Open(nil, iv, sealed, aad)
if err != nil {
return nil, "", fmt.Errorf("GCM decryption failed: %w", err)
}
// Step 4: Parse the decrypted JWK
var jwk jwkEC
if err := json.Unmarshal(plaintext, &jwk); err != nil {
return nil, "", fmt.Errorf("failed to parse decrypted JWK: %w", err)
}
if jwk.Kty != "EC" {
return nil, "", fmt.Errorf("unsupported JWK key type: %s (expected EC)", jwk.Kty)
}
key, err := jwkToECDSA(&jwk)
if err != nil {
return nil, "", err
}
return key, jwk.Kid, nil
}
// jwkToECDSA converts a JWK EC key to an *ecdsa.PrivateKey.
func jwkToECDSA(jwk *jwkEC) (*ecdsa.PrivateKey, error) {
var curve elliptic.Curve
switch jwk.Crv {
case "P-256":
curve = elliptic.P256()
case "P-384":
curve = elliptic.P384()
case "P-521":
curve = elliptic.P521()
default:
return nil, fmt.Errorf("unsupported curve: %s", jwk.Crv)
}
xBytes, err := base64.RawURLEncoding.DecodeString(jwk.X)
if err != nil {
return nil, fmt.Errorf("failed to decode JWK x: %w", err)
}
yBytes, err := base64.RawURLEncoding.DecodeString(jwk.Y)
if err != nil {
return nil, fmt.Errorf("failed to decode JWK y: %w", err)
}
dBytes, err := base64.RawURLEncoding.DecodeString(jwk.D)
if err != nil {
return nil, fmt.Errorf("failed to decode JWK d: %w", err)
}
key := &ecdsa.PrivateKey{
PublicKey: ecdsa.PublicKey{
Curve: curve,
X: new(big.Int).SetBytes(xBytes),
Y: new(big.Int).SetBytes(yBytes),
},
D: new(big.Int).SetBytes(dBytes),
}
return key, nil
}
// ecdsaPublicKeyToJWK converts an ECDSA public key to a JWK map for JWT header embedding.
func ecdsaPublicKeyToJWK(key *ecdsa.PublicKey) map[string]string {
var crv string
var size int
switch key.Curve {
case elliptic.P256():
crv = "P-256"
size = 32
case elliptic.P384():
crv = "P-384"
size = 48
case elliptic.P521():
crv = "P-521"
size = 66
default:
crv = "unknown"
size = 32
}
xBytes := key.X.Bytes()
yBytes := key.Y.Bytes()
// Pad to fixed size
xPadded := make([]byte, size)
yPadded := make([]byte, size)
copy(xPadded[size-len(xBytes):], xBytes)
copy(yPadded[size-len(yBytes):], yBytes)
return map[string]string{
"kty": "EC",
"crv": crv,
"x": base64.RawURLEncoding.EncodeToString(xPadded),
"y": base64.RawURLEncoding.EncodeToString(yPadded),
}
}
// aesKeyUnwrap implements AES Key Unwrap per RFC 3394.
func aesKeyUnwrap(kek, ciphertext []byte) ([]byte, error) {
if len(ciphertext)%8 != 0 || len(ciphertext) < 24 {
return nil, fmt.Errorf("invalid ciphertext length for AES Key Unwrap: %d", len(ciphertext))
}
block, err := aes.NewCipher(kek)
if err != nil {
return nil, fmt.Errorf("failed to create AES cipher: %w", err)
}
n := (len(ciphertext) / 8) - 1 // number of 64-bit key data blocks
// Initialize
a := make([]byte, 8)
copy(a, ciphertext[:8])
r := make([][]byte, n)
for i := 0; i < n; i++ {
r[i] = make([]byte, 8)
copy(r[i], ciphertext[(i+1)*8:(i+2)*8])
}
// Unwrap: 6 rounds
buf := make([]byte, 16)
for j := 5; j >= 0; j-- {
for i := n; i >= 1; i-- {
// A ^= (n*j + i) encoded as big-endian uint64
t := uint64(n*j + i)
tBytes := make([]byte, 8)
binary.BigEndian.PutUint64(tBytes, t)
for k := 0; k < 8; k++ {
a[k] ^= tBytes[k]
}
// B = AES-1(KEK, A || R[i])
copy(buf[:8], a)
copy(buf[8:], r[i-1])
block.Decrypt(buf, buf)
copy(a, buf[:8])
copy(r[i-1], buf[8:])
}
}
// Check the integrity check value (must be 0xA6A6A6A6A6A6A6A6)
defaultIV := []byte{0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6}
for i := 0; i < 8; i++ {
if a[i] != defaultIV[i] {
return nil, fmt.Errorf("AES Key Unwrap integrity check failed")
}
}
// Concatenate unwrapped key data
result := make([]byte, 0, n*8)
for i := 0; i < n; i++ {
result = append(result, r[i]...)
}
return result, nil
}
+114 -34
View File
@@ -27,6 +27,7 @@ import (
"crypto/elliptic"
"crypto/rand"
"crypto/sha256"
"crypto/tls"
"crypto/x509"
"encoding/base64"
"encoding/json"
@@ -74,17 +75,37 @@ type Connector struct {
}
// New creates a new step-ca connector with the given configuration and logger.
// If RootCertPath is set, the HTTP client will trust that CA certificate for TLS connections.
// Otherwise, the system trust store is used (which works if setup-trust.sh has run).
func New(config *Config, logger *slog.Logger) *Connector {
if config != nil && config.ValidityDays == 0 {
config.ValidityDays = 90
// Don't default ValidityDays — let step-ca use its own default duration.
// Operators can explicitly set ValidityDays if their step-ca is configured
// with longer max durations. A zero value means "omit from sign request."
httpClient := &http.Client{Timeout: 30 * time.Second}
// Load custom root CA cert if provided
if config != nil && config.RootCertPath != "" {
rootPEM, err := os.ReadFile(config.RootCertPath)
if err == nil {
pool := x509.NewCertPool()
if pool.AppendCertsFromPEM(rootPEM) {
httpClient.Transport = &http.Transport{
TLSClientConfig: &tls.Config{
RootCAs: pool,
},
}
logger.Info("step-ca custom root CA loaded", "path", config.RootCertPath)
}
} else {
logger.Warn("failed to read step-ca root cert, using system trust store", "path", config.RootCertPath, "error", err)
}
}
return &Connector{
config: config,
logger: logger,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
config: config,
logger: logger,
httpClient: httpClient,
}
}
@@ -103,9 +124,7 @@ func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessag
return fmt.Errorf("step-ca provisioner_name is required")
}
if cfg.ValidityDays == 0 {
cfg.ValidityDays = 90
}
// Don't default ValidityDays — 0 means "let step-ca use its own default duration"
// Check CA health
healthURL := cfg.CAURL + "/health"
@@ -174,15 +193,18 @@ func (c *Connector) IssueCertificate(ctx context.Context, request issuer.Issuanc
return nil, fmt.Errorf("failed to generate provisioner token: %w", err)
}
// Build the sign request
now := time.Now()
notAfter := now.AddDate(0, 0, c.config.ValidityDays)
// Build the sign request.
// When ValidityDays is 0 (default), omit NotBefore/NotAfter so step-ca uses its
// own default duration (typically 24h). The signRequest struct has omitempty on
// both time fields, so zero-value time.Time{} gets stripped from the JSON.
signReq := signRequest{
CsrPEM: request.CSRPEM,
OTT: ott,
NotBefore: now,
NotAfter: notAfter,
CsrPEM: request.CSRPEM,
OTT: ott,
}
if c.config.ValidityDays > 0 {
now := time.Now()
signReq.NotBefore = now
signReq.NotAfter = now.AddDate(0, 0, c.config.ValidityDays)
}
body, err := json.Marshal(signReq)
@@ -318,39 +340,80 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer
}
// generateProvisionerToken creates a short-lived JWT (One-Time Token) for step-ca API calls.
// This is a minimal JWT signed with the provisioner's key.
// The JWT is signed with the provisioner's private key (loaded from the encrypted JWE file
// at ProvisionerKeyPath and decrypted with ProvisionerPassword).
func (c *Connector) generateProvisionerToken(subject string, sans []string) (string, error) {
// For the initial implementation, we generate a simple self-signed JWT.
// In production, the provisioner key would be loaded from the configured path.
// step-ca expects a JWT with: sub=<CN>, iss=<provisioner>, aud=<ca-url>/sign
var key *ecdsa.PrivateKey
var kid string
if c.config.ProvisionerKeyPath != "" {
// Production: load and decrypt the real provisioner key from disk
var err error
key, kid, err = c.loadProvisionerKey()
if err != nil {
return "", fmt.Errorf("failed to load provisioner key: %w", err)
}
} else {
// Fallback: generate an ephemeral key (for testing or when key path not configured).
// This won't authenticate with a real step-ca server, but allows the connector
// to function against mock servers in tests.
c.logger.Warn("no provisioner key path configured, using ephemeral key (will not work with real step-ca)")
var err error
key, err = ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
if err != nil {
return "", fmt.Errorf("failed to generate ephemeral key: %w", err)
}
kid = "ephemeral"
}
now := time.Now()
// step-ca expects: aud = <ca-url>/1.0/sign (the sign endpoint audience)
claims := map[string]interface{}{
"sub": subject,
"iss": c.config.ProvisionerName,
"aud": c.config.CAURL + "/sign",
"aud": c.config.CAURL + "/1.0/sign",
"nbf": now.Unix(),
"iat": now.Unix(),
"exp": now.Add(5 * time.Minute).Unix(),
"jti": generateJTI(),
"sha": c.config.ProvisionerName, // step-ca uses this for key lookup
"sha": kid, // step-ca uses this to look up the provisioner by key fingerprint
}
if len(sans) > 0 {
claims["sans"] = sans
}
// Generate an ephemeral signing key for the token.
// In a full implementation, this would use the provisioner key from disk.
// For now, we use an ephemeral key — step-ca administrators should configure
// the provisioner to accept tokens from this key.
key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
if err != nil {
return "", fmt.Errorf("failed to generate token signing key: %w", err)
return signJWTWithKID(claims, key, kid)
}
// loadProvisionerKey loads and decrypts the step-ca provisioner key from disk.
// Returns the ECDSA private key and the key ID (JWK thumbprint).
func (c *Connector) loadProvisionerKey() (*ecdsa.PrivateKey, string, error) {
if c.config.ProvisionerKeyPath == "" {
return nil, "", fmt.Errorf("provisioner_key_path is required for step-ca JWK authentication")
}
return signJWT(claims, key)
jweData, err := os.ReadFile(c.config.ProvisionerKeyPath)
if err != nil {
return nil, "", fmt.Errorf("failed to read provisioner key file %s: %w", c.config.ProvisionerKeyPath, err)
}
password := c.config.ProvisionerPassword
if password == "" {
return nil, "", fmt.Errorf("provisioner_password is required to decrypt the provisioner key")
}
key, kid, err := decryptProvisionerKey(jweData, password)
if err != nil {
return nil, "", fmt.Errorf("failed to decrypt provisioner key: %w", err)
}
c.logger.Info("provisioner key loaded and decrypted",
"key_path", c.config.ProvisionerKeyPath,
"kid", kid)
return key, kid, nil
}
// generateJTI creates a unique JWT ID.
@@ -360,14 +423,31 @@ func generateJTI() string {
return base64.RawURLEncoding.EncodeToString(b)
}
// signJWT creates a minimal ES256 JWT from the given claims.
// signJWTWithKID creates an ES256 JWT with a key ID in the header.
func signJWTWithKID(claims map[string]interface{}, key *ecdsa.PrivateKey, kid string) (string, error) {
// Header with kid so step-ca can look up the provisioner
header := map[string]string{
"alg": "ES256",
"typ": "JWT",
"kid": kid,
}
return signJWTRaw(claims, key, header)
}
// signJWT creates a minimal ES256 JWT from the given claims (no kid).
func signJWT(claims map[string]interface{}, key *ecdsa.PrivateKey) (string, error) {
// Header
header := map[string]string{
"alg": "ES256",
"typ": "JWT",
}
return signJWTRaw(claims, key, header)
}
// signJWTRaw creates an ES256 JWT from the given claims and header.
func signJWTRaw(claims map[string]interface{}, key *ecdsa.PrivateKey, header map[string]string) (string, error) {
headerJSON, err := json.Marshal(header)
if err != nil {
return "", err
+32 -14
View File
@@ -7,6 +7,7 @@ import (
"log/slog"
"os"
"os/exec"
"path/filepath"
"time"
"github.com/shankar0123/certctl/internal/connector/target"
@@ -67,13 +68,13 @@ func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessag
"chain_path", cfg.ChainPath)
// Verify directory exists and is writable
certDir := cfg.CertPath[:len(cfg.CertPath)-len("/cert.pem")] // Simple path extraction
certDir := filepath.Dir(cfg.CertPath)
if _, err := os.Stat(certDir); os.IsNotExist(err) {
return fmt.Errorf("NGINX cert directory does not exist: %s", certDir)
}
// Verify validate command works
cmd := exec.CommandContext(ctx, cfg.ValidateCommand)
cmd := exec.CommandContext(ctx, "sh", "-c", cfg.ValidateCommand)
if err := cmd.Run(); err != nil {
c.logger.Warn("NGINX config validation failed during config check",
"error", err,
@@ -115,20 +116,37 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy
}
// Write chain with same permissions
if err := os.WriteFile(c.config.ChainPath, []byte(request.ChainPEM), 0644); err != nil {
errMsg := fmt.Sprintf("failed to write chain: %v", err)
c.logger.Error("chain deployment failed", "error", err)
return &target.DeploymentResult{
Success: false,
TargetAddress: c.config.ChainPath,
Message: errMsg,
DeployedAt: time.Now(),
}, fmt.Errorf("%s", errMsg)
if c.config.ChainPath != "" {
if err := os.WriteFile(c.config.ChainPath, []byte(request.ChainPEM), 0644); err != nil {
errMsg := fmt.Sprintf("failed to write chain: %v", err)
c.logger.Error("chain deployment failed", "error", err)
return &target.DeploymentResult{
Success: false,
TargetAddress: c.config.ChainPath,
Message: errMsg,
DeployedAt: time.Now(),
}, fmt.Errorf("%s", errMsg)
}
}
// Write private key if provided and key_path is configured
if c.config.KeyPath != "" && request.KeyPEM != "" {
if err := os.WriteFile(c.config.KeyPath, []byte(request.KeyPEM), 0600); err != nil {
errMsg := fmt.Sprintf("failed to write private key: %v", err)
c.logger.Error("key deployment failed", "error", err)
return &target.DeploymentResult{
Success: false,
TargetAddress: c.config.KeyPath,
Message: errMsg,
DeployedAt: time.Now(),
}, fmt.Errorf("%s", errMsg)
}
c.logger.Info("private key written", "key_path", c.config.KeyPath)
}
// Validate NGINX configuration before reload
c.logger.Debug("validating NGINX configuration", "validate_command", c.config.ValidateCommand)
validateCmd := exec.CommandContext(ctx, c.config.ValidateCommand)
validateCmd := exec.CommandContext(ctx, "sh", "-c", c.config.ValidateCommand)
if output, err := validateCmd.CombinedOutput(); err != nil {
errMsg := fmt.Sprintf("NGINX config validation failed: %v (output: %s)", err, string(output))
c.logger.Error("NGINX validation failed", "error", err, "output", string(output))
@@ -142,7 +160,7 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy
// Reload NGINX
c.logger.Debug("reloading NGINX", "reload_command", c.config.ReloadCommand)
reloadCmd := exec.CommandContext(ctx, c.config.ReloadCommand)
reloadCmd := exec.CommandContext(ctx, "sh", "-c", c.config.ReloadCommand)
if output, err := reloadCmd.CombinedOutput(); err != nil {
errMsg := fmt.Sprintf("NGINX reload failed: %v (output: %s)", err, string(output))
c.logger.Error("NGINX reload failed", "error", err, "output", string(output))
@@ -187,7 +205,7 @@ func (c *Connector) ValidateDeployment(ctx context.Context, request target.Valid
startTime := time.Now()
// Validate NGINX configuration
validateCmd := exec.CommandContext(ctx, c.config.ValidateCommand)
validateCmd := exec.CommandContext(ctx, "sh", "-c", c.config.ValidateCommand)
if err := validateCmd.Run(); err != nil {
errMsg := fmt.Sprintf("NGINX config validation failed: %v", err)
c.logger.Error("validation failed", "error", err)
+9 -8
View File
@@ -178,14 +178,15 @@ func (s *AgentService) SubmitCSR(ctx context.Context, agentID string, certID str
}
version := &domain.CertificateVersion{
ID: generateID("certver"),
CertificateID: certID,
SerialNumber: result.Serial,
NotBefore: result.NotBefore,
NotAfter: result.NotAfter,
PEMChain: result.CertPEM + "\n" + result.ChainPEM,
CSRPEM: string(csrPEM),
CreatedAt: time.Now(),
ID: generateID("certver"),
CertificateID: certID,
SerialNumber: result.Serial,
NotBefore: result.NotBefore,
NotAfter: result.NotAfter,
FingerprintSHA256: computeCertFingerprint(result.CertPEM),
PEMChain: result.CertPEM + "\n" + result.ChainPEM,
CSRPEM: string(csrPEM),
CreatedAt: time.Now(),
}
if err := s.certRepo.CreateVersion(ctx, version); err != nil {
+53
View File
@@ -14,10 +14,12 @@ import (
type CertificateService struct {
certRepo repository.CertificateRepository
targetRepo repository.TargetRepository
jobRepo repository.JobRepository
policyService *PolicyService
auditService *AuditService
revSvc *RevocationSvc
caSvc *CAOperationsSvc
keygenMode string
}
// NewCertificateService creates a new certificate service.
@@ -48,6 +50,16 @@ func (s *CertificateService) SetTargetRepo(repo repository.TargetRepository) {
s.targetRepo = repo
}
// SetJobRepo sets the job repository for creating renewal/issuance jobs.
func (s *CertificateService) SetJobRepo(repo repository.JobRepository) {
s.jobRepo = repo
}
// SetKeygenMode sets the key generation mode (agent or server).
func (s *CertificateService) SetKeygenMode(mode string) {
s.keygenMode = mode
}
// List returns a paginated list of certificates matching the filter.
func (s *CertificateService) List(ctx context.Context, filter *repository.CertificateFilter) ([]*domain.ManagedCertificate, int, error) {
certs, total, err := s.certRepo.List(ctx, filter)
@@ -195,6 +207,8 @@ func (s *CertificateService) GetVersions(ctx context.Context, certID string) ([]
}
// TriggerRenewalWithActor initiates a renewal job if the certificate is eligible.
// Creates a Renewal job (or Issuance for new certs) so the scheduler's job processor
// can pick it up and route it through the issuer connector.
func (s *CertificateService) TriggerRenewalWithActor(ctx context.Context, certID string, actor string) error {
cert, err := s.certRepo.Get(ctx, certID)
if err != nil {
@@ -220,6 +234,45 @@ func (s *CertificateService) TriggerRenewalWithActor(ctx context.Context, certID
return fmt.Errorf("failed to update certificate status: %w", err)
}
// Create a renewal job so the job processor can pick it up.
// In agent keygen mode, the job starts as AwaitingCSR so the agent
// generates the key pair and submits a CSR. In server mode, it starts as Pending.
if s.jobRepo != nil {
jobStatus := domain.JobStatusPending
if s.keygenMode == "agent" {
jobStatus = domain.JobStatusAwaitingCSR
}
// Determine job type: Issuance for certs that have never been issued,
// Renewal for certs that already have a version.
jobType := domain.JobTypeRenewal
if cert.ExpiresAt.IsZero() || cert.ExpiresAt.Year() < 2000 {
jobType = domain.JobTypeIssuance
}
job := &domain.Job{
ID: generateID("job"),
CertificateID: cert.ID,
Type: jobType,
Status: jobStatus,
MaxAttempts: 3,
ScheduledAt: time.Now(),
CreatedAt: time.Now(),
}
if err := s.jobRepo.Create(ctx, job); err != nil {
slog.Error("failed to create renewal job", "cert_id", cert.ID, "error", err)
return fmt.Errorf("failed to create renewal job: %w", err)
}
slog.Info("created renewal job via API trigger",
"job_id", job.ID,
"cert_id", cert.ID,
"job_type", string(jobType),
"job_status", string(jobStatus),
"keygen_mode", s.keygenMode)
}
// Record audit event
if err := s.auditService.RecordEvent(ctx, actor, domain.ActorTypeUser,
"renewal_triggered", "certificate", certID,
+10
View File
@@ -54,6 +54,16 @@ func (s *JobService) ProcessPendingJobs(ctx context.Context) error {
// Process each job
for _, job := range pendingJobs {
// Skip deployment jobs that have an agent_id — those are meant for agent
// pickup via GetPendingWork(), not server-side processing. The server should
// only process deployment jobs without an agent (legacy/serverless targets).
if job.Type == domain.JobTypeDeployment && job.AgentID != nil && *job.AgentID != "" {
s.logger.Debug("skipping agent-routed deployment job",
"job_id", job.ID,
"agent_id", *job.AgentID)
continue
}
if err := s.processJob(ctx, job); err != nil {
s.logger.Error("failed to process job",
"job_id", job.ID,
+42 -13
View File
@@ -636,23 +636,50 @@ func (s *RenewalService) CompleteAgentCSRRenewal(ctx context.Context, job *domai
}
// createDeploymentJobs creates pending deployment jobs for each target associated with a cert.
// If cert.TargetIDs is empty (common — the repository doesn't populate this field),
// falls back to querying certificate_target_mappings via targetRepo.ListByCertificate.
func (s *RenewalService) createDeploymentJobs(ctx context.Context, cert *domain.ManagedCertificate) {
if len(cert.TargetIDs) == 0 {
// Resolve targets: prefer in-memory TargetIDs, fall back to DB query
type targetInfo struct {
id string
agentID string
}
var targets []targetInfo
if len(cert.TargetIDs) > 0 {
// TargetIDs populated (e.g. from test or manual wiring)
for _, tid := range cert.TargetIDs {
ti := targetInfo{id: tid}
if s.targetRepo != nil {
if target, err := s.targetRepo.Get(ctx, tid); err == nil && target.AgentID != "" {
ti.agentID = target.AgentID
}
}
targets = append(targets, ti)
}
} else if s.targetRepo != nil {
// TargetIDs empty — query certificate_target_mappings via repository
dbTargets, err := s.targetRepo.ListByCertificate(ctx, cert.ID)
if err != nil {
slog.Error("failed to query targets for certificate", "cert_id", cert.ID, "error", err)
return
}
for _, t := range dbTargets {
targets = append(targets, targetInfo{id: t.ID, agentID: t.AgentID})
}
}
if len(targets) == 0 {
slog.Debug("no targets found for certificate, skipping deployment", "cert_id", cert.ID)
return
}
for _, targetID := range cert.TargetIDs {
tid := targetID
// Resolve agent_id from target for job routing
for _, t := range targets {
tid := t.id
var agentIDPtr *string
if s.targetRepo != nil {
target, err := s.targetRepo.Get(ctx, tid)
if err != nil {
slog.Warn("failed to resolve agent for deployment job", "target_id", tid, "error", err)
} else if target.AgentID != "" {
agentID := target.AgentID
agentIDPtr = &agentID
}
if t.agentID != "" {
aid := t.agentID
agentIDPtr = &aid
}
deployJob := &domain.Job{
@@ -667,7 +694,9 @@ func (s *RenewalService) createDeploymentJobs(ctx context.Context, cert *domain.
CreatedAt: time.Now(),
}
if err := s.jobRepo.Create(ctx, deployJob); err != nil {
slog.Error("failed to create deployment job for target", "target_id", targetID, "error", err)
slog.Error("failed to create deployment job for target", "target_id", tid, "cert_id", cert.ID, "error", err)
} else {
slog.Info("created deployment job", "job_id", deployJob.ID, "cert_id", cert.ID, "target_id", tid, "agent_id", t.agentID)
}
}
}