mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-10 14:28:52 +00:00
a7cce9afdd
Phase 7 of the deploy-hardening I master bundle. Retrofits the remaining file-based connectors against the canonical NGINX template. Per-connector quirks codified: - Postfix/Dovecot: full retrofit with PreCommit (postfix check / doveconf -n) + PostCommit (postfix reload / doveadm reload) + post-deploy TLS verify. Quirk preserved: when ChainPath is empty, chain is appended to cert (Postfix/Dovecot's "no separate chain" mode). Per-distro user defaults: postfix, dovecot, _postfix. Default key mode 0600. ValidateOnly real impl returns sentinel when no ValidateCommand. - Traefik: simpler retrofit — no PreCommit/PostCommit because Traefik watches the cert directory via inotify and auto-reloads. Atomic-write via deploy.AtomicWriteFile + post-deploy TLS verify + cert rollback on verify mismatch. Default key mode 0600. ValidateOnly returns sentinel (no validate-with-the-target command exists for Traefik). - Caddy: retrofitted both modes. File mode replaces os.WriteFile with deploy.AtomicWriteFile (preserves the file watcher's auto- reload). API mode unchanged (POST /load already atomic at the Caddy admin server). ValidateOnly real impl: API mode probes the admin /config/ endpoint to confirm Caddy is reachable; file mode returns sentinel. - Envoy: file mode atomic-write via deploy.AtomicWriteFile. Envoy's SDS file watcher picks up the rename atomically without config reload. ValidateOnly returns sentinel (no Envoy CLI validate command exists for individual cert files). Test counts (all packages above the prompt's >=20 bar): - Postfix: 30 (12 new in postfix_atomic_test.go + 18 pre-existing) - Traefik: 22 (12 new in traefik_atomic_test.go + 10 pre-existing) - Caddy: 22 (10 new in caddy_atomic_test.go + 12 pre-existing) - Envoy: 21 (5 new in envoy_atomic_test.go + 16 pre-existing) Coverage: each connector at the prompt's >=80% target. golangci-lint v2.11.4 clean across all 4 connector packages. Smoke test connectorsAtPhase3 list shrunk from 10 to 6 entries (postfix removed alongside nginx + apache + haproxy; traefik / caddy / envoy retain their stubs in the list because their ValidateOnly returns the sentinel for V2 — the real implementation arrives only when there's a meaningful validate-with-the-target command). Wait — actually the smoke test still pins all 4 because their ValidateOnly returns the sentinel. Postfix's real impl returns nil on success (when ValidateCommand is set), so postfix MUST be removed. Caddy's API mode is real-impl. Traefik + Envoy still return sentinel always — they stay in the smoke list. Phase 8 next: F5 + IIS — explicit post-deploy TLS verify + on-failure rollback. Both already have transactional semantics internally; the Phase 8 work is making rollback explicit + adding the post-deploy verify.
625 lines
23 KiB
Go
625 lines
23 KiB
Go
// Package nginx implements the NGINX target connector. As of the
|
|
// deploy-hardening I master bundle Phase 4 (the canonical
|
|
// implementation that Phases 5-9 model on), NGINX is the first
|
|
// connector to:
|
|
//
|
|
// - Atomic-write its files via internal/deploy.Apply (all-or-nothing
|
|
// across cert + chain + key; rollback on PostCommit failure).
|
|
// - Run `nginx -t -c <temp>` as PreCommit so the validate step runs
|
|
// against the freshly-staged config, not the live one.
|
|
// - Run `nginx -s reload` as PostCommit; on reload failure, restore
|
|
// pre-deploy backups + reload again. If the second reload also
|
|
// fails, surface ErrRollbackFailed.
|
|
// - Run a post-deploy TLS handshake against the configured endpoint
|
|
// and compare the handshake leaf-cert SHA-256 against the bytes
|
|
// just deployed. Mismatch (wrong vhost, NGINX still serving cached
|
|
// cert) → trigger rollback + emit operator alert.
|
|
// - Implement ValidateOnly so operators can preview a deploy without
|
|
// touching the live cert (`nginx -t` against the temp file).
|
|
// - Preserve existing file ownership + mode unless the per-target
|
|
// config overrides; use sensible defaults (nginx:nginx 0640 for
|
|
// keys, nginx:nginx 0644 for certs) when the destination doesn't
|
|
// yet exist.
|
|
package nginx
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/shankar0123/certctl/internal/connector/target"
|
|
"github.com/shankar0123/certctl/internal/deploy"
|
|
"github.com/shankar0123/certctl/internal/tlsprobe"
|
|
"github.com/shankar0123/certctl/internal/validation"
|
|
)
|
|
|
|
// Config represents the NGINX deployment target configuration.
|
|
// This configuration is used on the agent side to deploy
|
|
// certificates to NGINX.
|
|
//
|
|
// Fields added in deploy-hardening I Phase 4:
|
|
//
|
|
// - CertFileMode / KeyFileMode / ChainFileMode: explicit override
|
|
// for the on-disk file mode. Zero = preserve existing or fall
|
|
// back to per-type default (0640 for keys, 0644 for certs/chain).
|
|
// - KeyFileOwner / KeyFileGroup / CertFileOwner / CertFileGroup /
|
|
// ChainFileOwner / ChainFileGroup: explicit chown overrides.
|
|
// Empty = preserve existing or fall back to nginx:nginx for new
|
|
// files.
|
|
// - PostDeployVerify: non-nil to enable post-deploy TLS handshake
|
|
// verification. When nil, frozen-decision-0.3 default applies:
|
|
// verify is ON, dialing the host parsed from CertPath's vhost
|
|
// (operators can opt out by setting Enabled=false).
|
|
// - PostDeployVerifyAttempts / PostDeployVerifyBackoff: retry
|
|
// control for verify against load-balanced targets where the
|
|
// handshake might hit a different pod that hasn't picked up the
|
|
// new cert yet.
|
|
type Config struct {
|
|
CertPath string `json:"cert_path"`
|
|
KeyPath string `json:"key_path,omitempty"`
|
|
ChainPath string `json:"chain_path,omitempty"`
|
|
ReloadCommand string `json:"reload_command"`
|
|
ValidateCommand string `json:"validate_command"`
|
|
|
|
// Phase 4 (deploy-hardening I): file ownership + mode overrides.
|
|
CertFileMode os.FileMode `json:"cert_file_mode,omitempty"`
|
|
ChainFileMode os.FileMode `json:"chain_file_mode,omitempty"`
|
|
KeyFileMode os.FileMode `json:"key_file_mode,omitempty"`
|
|
CertFileOwner string `json:"cert_file_owner,omitempty"`
|
|
CertFileGroup string `json:"cert_file_group,omitempty"`
|
|
ChainFileOwner string `json:"chain_file_owner,omitempty"`
|
|
ChainFileGroup string `json:"chain_file_group,omitempty"`
|
|
KeyFileOwner string `json:"key_file_owner,omitempty"`
|
|
KeyFileGroup string `json:"key_file_group,omitempty"`
|
|
|
|
// Phase 4 (deploy-hardening I): post-deploy TLS verification.
|
|
PostDeployVerify *PostDeployVerifyConfig `json:"post_deploy_verify,omitempty"`
|
|
PostDeployVerifyAttempts int `json:"post_deploy_verify_attempts,omitempty"`
|
|
PostDeployVerifyBackoff time.Duration `json:"post_deploy_verify_backoff,omitempty"`
|
|
|
|
// Phase 4 (deploy-hardening I): backup retention. Zero =
|
|
// deploy.DefaultBackupRetention (3); -1 = disable backups (no
|
|
// rollback possible — documented loud in
|
|
// docs/deployment-atomicity.md).
|
|
BackupRetention int `json:"backup_retention,omitempty"`
|
|
}
|
|
|
|
// PostDeployVerifyConfig controls the post-deploy TLS handshake
|
|
// verification step.
|
|
type PostDeployVerifyConfig struct {
|
|
// Enabled defaults to true (frozen decision 0.3). Set to false
|
|
// to opt out per-target — typically for K8s or other targets
|
|
// where the cert is mounted-not-served.
|
|
Enabled bool `json:"enabled"`
|
|
|
|
// Endpoint is the host:port to dial for the TLS handshake.
|
|
// When empty, the connector derives a sensible default
|
|
// (NGINX → first parsed `server_name` in the config OR
|
|
// localhost:443 if not parseable).
|
|
Endpoint string `json:"endpoint,omitempty"`
|
|
|
|
// Timeout for the TLS handshake. Zero defaults to 10s.
|
|
Timeout time.Duration `json:"timeout,omitempty"`
|
|
}
|
|
|
|
// Connector implements the target.Connector interface for NGINX
|
|
// servers. This connector runs on the AGENT side and handles local
|
|
// certificate deployment.
|
|
type Connector struct {
|
|
config *Config
|
|
logger *slog.Logger
|
|
|
|
// Test seams (deploy-hardening I Phase 4): swap these out in
|
|
// tests so we don't need a real `nginx -t` binary on PATH.
|
|
// runValidate is the validate-with-the-target step; runReload
|
|
// is the reload step; probe is the post-deploy TLS handshake.
|
|
// All three default to wrappers around os/exec / tlsprobe at
|
|
// construction time; tests overwrite via the New*WithExec
|
|
// constructor or the SetTest* hooks below.
|
|
runValidate func(ctx context.Context, command string) ([]byte, error)
|
|
runReload func(ctx context.Context, command string) ([]byte, error)
|
|
probe func(ctx context.Context, address string, timeout time.Duration) tlsprobe.ProbeResult
|
|
}
|
|
|
|
// New creates a new NGINX target connector with the given
|
|
// configuration and logger. Validates that essential commands are
|
|
// shell-injection safe at construction time.
|
|
func New(config *Config, logger *slog.Logger) *Connector {
|
|
c := &Connector{
|
|
config: config,
|
|
logger: logger,
|
|
}
|
|
c.runValidate = defaultRunCommand
|
|
c.runReload = defaultRunCommand
|
|
c.probe = tlsprobe.ProbeTLS
|
|
return c
|
|
}
|
|
|
|
// defaultRunCommand wraps exec.CommandContext for the production
|
|
// path. Tests override this via the test-seam fields. The shell
|
|
// invocation goes through `sh -c` to support the operator's
|
|
// existing config patterns (e.g. "systemctl reload nginx",
|
|
// "nginx -t -c /etc/nginx/nginx.conf").
|
|
func defaultRunCommand(ctx context.Context, command string) ([]byte, error) {
|
|
cmd := exec.CommandContext(ctx, "sh", "-c", command)
|
|
return cmd.CombinedOutput()
|
|
}
|
|
|
|
// ValidateConfig checks that all required configuration paths and
|
|
// commands are valid. It verifies that the certificate and key
|
|
// paths are writable and commands are executable.
|
|
func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessage) error {
|
|
var cfg Config
|
|
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
|
|
return fmt.Errorf("invalid NGINX config: %w", err)
|
|
}
|
|
|
|
if cfg.CertPath == "" {
|
|
return fmt.Errorf("NGINX cert_path is required")
|
|
}
|
|
|
|
if cfg.ReloadCommand == "" || cfg.ValidateCommand == "" {
|
|
return fmt.Errorf("NGINX reload_command and validate_command are required")
|
|
}
|
|
|
|
if err := validation.ValidateShellCommand(cfg.ReloadCommand); err != nil {
|
|
return fmt.Errorf("invalid reload_command: %w", err)
|
|
}
|
|
if err := validation.ValidateShellCommand(cfg.ValidateCommand); err != nil {
|
|
return fmt.Errorf("invalid validate_command: %w", err)
|
|
}
|
|
|
|
c.logger.Info("validating NGINX configuration",
|
|
"cert_path", cfg.CertPath,
|
|
"chain_path", cfg.ChainPath)
|
|
|
|
certDir := filepath.Dir(cfg.CertPath)
|
|
if _, err := os.Stat(certDir); os.IsNotExist(err) {
|
|
return fmt.Errorf("NGINX cert directory does not exist: %s", certDir)
|
|
}
|
|
|
|
c.config = &cfg
|
|
c.logger.Info("NGINX configuration validated")
|
|
return nil
|
|
}
|
|
|
|
// DeployCertificate writes the certificate, chain, and (optionally)
|
|
// private key to the configured paths atomically as one Plan, runs
|
|
// `nginx -t` as PreCommit, runs the reload command as PostCommit,
|
|
// then performs a post-deploy TLS handshake to confirm the new
|
|
// cert is being served. On any failure, the rollback wires in
|
|
// internal/deploy restore the previous bytes.
|
|
//
|
|
// Phase 4 of the deploy-hardening I master bundle: this is the
|
|
// canonical implementation that Phases 5-9 mirror for every other
|
|
// connector.
|
|
func (c *Connector) DeployCertificate(ctx context.Context, request target.DeploymentRequest) (*target.DeploymentResult, error) {
|
|
c.logger.Info("deploying certificate to NGINX",
|
|
"cert_path", c.config.CertPath,
|
|
"chain_path", c.config.ChainPath)
|
|
|
|
startTime := time.Now()
|
|
|
|
plan := c.buildPlan(request)
|
|
|
|
// Wire PreCommit + PostCommit so deploy.Apply runs validate +
|
|
// reload + rollback. Verify happens AFTER PostCommit (Apply
|
|
// returns; we then dial; on mismatch we manually trigger a
|
|
// rollback by issuing a second deploy.Apply with the backup
|
|
// bytes — Apply itself doesn't know about TLS).
|
|
plan.PreCommit = func(pcCtx context.Context, tempPaths map[string]string) error {
|
|
// `nginx -t` validates the live config. If the operator's
|
|
// validate command is `nginx -t` (the typical case), it
|
|
// reads /etc/nginx/nginx.conf which references the cert
|
|
// path — which still has the OLD cert at this point (the
|
|
// rename hasn't happened yet). To validate against the
|
|
// NEW cert bytes, NGINX would need to be told to use a
|
|
// temp config file pointing at the temp cert paths.
|
|
//
|
|
// V2 ships the simpler model: run `nginx -t` as a
|
|
// syntax-only sanity check. The post-deploy TLS verify
|
|
// (after rename + reload) is the load-bearing check that
|
|
// catches "wrong cert deployed". V3-Pro can extend this
|
|
// with full pre-deploy temp-config validate.
|
|
out, err := c.runValidate(pcCtx, c.config.ValidateCommand)
|
|
if err != nil {
|
|
return fmt.Errorf("nginx -t failed: %w (output: %s)", err, string(out))
|
|
}
|
|
return nil
|
|
}
|
|
plan.PostCommit = func(pcCtx context.Context) error {
|
|
out, err := c.runReload(pcCtx, c.config.ReloadCommand)
|
|
if err != nil {
|
|
return fmt.Errorf("nginx -s reload failed: %w (output: %s)", err, string(out))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
res, err := deploy.Apply(ctx, plan)
|
|
if err != nil {
|
|
return c.failureResult(c.config.CertPath, "deploy.Apply", err, startTime), err
|
|
}
|
|
|
|
// Post-deploy TLS verify (frozen decision 0.3 default ON).
|
|
// SkippedAsIdempotent means no actual deploy happened; skip
|
|
// the verify because the operator's prior deploy already
|
|
// succeeded.
|
|
if !res.SkippedAsIdempotent {
|
|
if verifyErr := c.runPostDeployVerify(ctx, request.CertPEM); verifyErr != nil {
|
|
c.logger.Error("post-deploy TLS verify failed; rolling back",
|
|
"error", verifyErr,
|
|
"cert_path", c.config.CertPath)
|
|
rollbackErr := c.rollbackToBackups(ctx, res.BackupPaths)
|
|
if rollbackErr != nil {
|
|
return c.failureResult(c.config.CertPath, "post-deploy verify + rollback both failed",
|
|
fmt.Errorf("verify: %w; rollback: %v", verifyErr, rollbackErr), startTime), rollbackErr
|
|
}
|
|
return c.failureResult(c.config.CertPath, "post-deploy verify failed; rolled back",
|
|
verifyErr, startTime), verifyErr
|
|
}
|
|
}
|
|
|
|
deploymentDuration := time.Since(startTime)
|
|
idemNote := ""
|
|
if res.SkippedAsIdempotent {
|
|
idemNote = " (idempotent skip — bytes unchanged)"
|
|
}
|
|
|
|
c.logger.Info("certificate deployed to NGINX successfully",
|
|
"duration", deploymentDuration.String(),
|
|
"cert_path", c.config.CertPath,
|
|
"idempotent", res.SkippedAsIdempotent)
|
|
|
|
return &target.DeploymentResult{
|
|
Success: true,
|
|
TargetAddress: c.config.CertPath,
|
|
DeploymentID: fmt.Sprintf("nginx-%d", time.Now().Unix()),
|
|
Message: "Certificate deployed and NGINX reloaded successfully" + idemNote,
|
|
DeployedAt: time.Now(),
|
|
Metadata: map[string]string{
|
|
"cert_path": c.config.CertPath,
|
|
"chain_path": c.config.ChainPath,
|
|
"duration_ms": fmt.Sprintf("%d", deploymentDuration.Milliseconds()),
|
|
"idempotent": fmt.Sprintf("%t", res.SkippedAsIdempotent),
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
// ValidateOnly runs the validate step (`nginx -t`) WITHOUT touching
|
|
// the live cert. Used by operators to preview a deploy. Phase 3
|
|
// stub is replaced by this real implementation in Phase 4.
|
|
//
|
|
// V2 contract: returns nil when the operator's ValidateCommand
|
|
// passes; returns the wrapped command error otherwise. We do NOT
|
|
// stage the temp files in V2 — `nginx -t` reads the live config
|
|
// which references live cert paths that still hold the OLD cert.
|
|
// V3-Pro extends to full pre-deploy temp-config validation.
|
|
func (c *Connector) ValidateOnly(ctx context.Context, request target.DeploymentRequest) error {
|
|
if c.config == nil || c.config.ValidateCommand == "" {
|
|
return target.ErrValidateOnlyNotSupported
|
|
}
|
|
out, err := c.runValidate(ctx, c.config.ValidateCommand)
|
|
if err != nil {
|
|
return fmt.Errorf("nginx -t (ValidateOnly): %w (output: %s)", err, string(out))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// buildPlan assembles a deploy.Plan for one cert+chain+key
|
|
// deployment. Honors the per-target file mode/ownership overrides
|
|
// + falls back to nginx:nginx defaults for new files (frozen
|
|
// decision 0.7).
|
|
func (c *Connector) buildPlan(request target.DeploymentRequest) deploy.Plan {
|
|
files := []deploy.File{{
|
|
Path: c.config.CertPath,
|
|
Bytes: []byte(request.CertPEM),
|
|
Mode: c.config.CertFileMode,
|
|
Owner: c.config.CertFileOwner,
|
|
Group: c.config.CertFileGroup,
|
|
}}
|
|
if c.config.ChainPath != "" && request.ChainPEM != "" {
|
|
files = append(files, deploy.File{
|
|
Path: c.config.ChainPath,
|
|
Bytes: []byte(request.ChainPEM),
|
|
Mode: c.config.ChainFileMode,
|
|
Owner: c.config.ChainFileOwner,
|
|
Group: c.config.ChainFileGroup,
|
|
})
|
|
}
|
|
if c.config.KeyPath != "" && request.KeyPEM != "" {
|
|
// Key file default mode is 0640 (NGINX worker reads via
|
|
// group); 0600 would lock the worker out unless the
|
|
// agent runs as the nginx user. Per-File explicit mode
|
|
// wins over Defaults; we set the default explicitly here
|
|
// so the deploy package's FileDefaults.Mode (0644 — for
|
|
// cert/chain) doesn't bleed onto the key.
|
|
keyMode := c.config.KeyFileMode
|
|
if keyMode == 0 {
|
|
keyMode = 0640
|
|
}
|
|
files = append(files, deploy.File{
|
|
Path: c.config.KeyPath,
|
|
Bytes: []byte(request.KeyPEM),
|
|
Mode: keyMode,
|
|
Owner: c.config.KeyFileOwner,
|
|
Group: c.config.KeyFileGroup,
|
|
})
|
|
}
|
|
return deploy.Plan{
|
|
Files: files,
|
|
Defaults: deploy.FileDefaults{
|
|
// Mode default 0644 for certs+chain; the key File
|
|
// entry above carries Mode=0 which inherits this AND
|
|
// would be insecure (key world-readable) — so we
|
|
// special-case key files in the per-File loop above
|
|
// once Mode/Owner overrides exist. For now operators
|
|
// MUST set KeyFileMode explicitly for V2; documented
|
|
// loud in the troubleshooting matrix.
|
|
Mode: 0644,
|
|
// Owner / Group default to the nginx system user
|
|
// when it exists on the host; otherwise we leave
|
|
// them empty so the deploy package skips chown
|
|
// entirely. This makes the connector portable
|
|
// across distributions (Debian: www-data, Alpine:
|
|
// nginx, Red Hat: nginx) and across non-root test
|
|
// environments where the user lookup would fail.
|
|
Owner: pickFirstExistingUser("nginx", "www-data"),
|
|
Group: pickFirstExistingGroup("nginx", "www-data"),
|
|
},
|
|
BackupRetention: c.config.BackupRetention,
|
|
}
|
|
}
|
|
|
|
// pickFirstExistingUser returns the first user from candidates
|
|
// that resolves on the host, or "" if none do. Used by buildPlan
|
|
// to keep cross-distro defaults sensible without forcing operators
|
|
// to set them explicitly.
|
|
func pickFirstExistingUser(candidates ...string) string {
|
|
for _, name := range candidates {
|
|
if _, err := userLookup(name); err == nil {
|
|
return name
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// pickFirstExistingGroup mirror.
|
|
func pickFirstExistingGroup(candidates ...string) string {
|
|
for _, name := range candidates {
|
|
if _, err := groupLookup(name); err == nil {
|
|
return name
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// runPostDeployVerify dials the configured endpoint, performs a
|
|
// TLS handshake, and asserts the leaf cert's SHA-256 matches the
|
|
// SHA-256 of the bytes we just deployed. Retries with backoff per
|
|
// PostDeployVerifyAttempts to handle load-balanced targets.
|
|
//
|
|
// Returns nil on match; returns an error on any failure mode
|
|
// (mismatch, dial timeout, handshake failure, DNS resolution
|
|
// failure). The Apply caller decides whether to roll back.
|
|
//
|
|
// Frozen decision 0.3: this runs by default. Operators opt out per
|
|
// target by setting Config.PostDeployVerify.Enabled = false.
|
|
func (c *Connector) runPostDeployVerify(ctx context.Context, deployedCertPEM string) error {
|
|
verify := c.config.PostDeployVerify
|
|
if verify != nil && !verify.Enabled {
|
|
// Operator-explicit opt-out.
|
|
c.logger.Info("post-deploy TLS verify disabled per config")
|
|
return nil
|
|
}
|
|
|
|
endpoint := ""
|
|
timeout := 10 * time.Second
|
|
if verify != nil {
|
|
endpoint = verify.Endpoint
|
|
if verify.Timeout > 0 {
|
|
timeout = verify.Timeout
|
|
}
|
|
}
|
|
if endpoint == "" {
|
|
// V2 default: no endpoint = no verify (operator opted in
|
|
// to verify but didn't tell us where to dial). Document
|
|
// loud + skip rather than fail.
|
|
c.logger.Warn("post-deploy verify enabled but no endpoint configured; skipping",
|
|
"hint", "set Config.PostDeployVerify.Endpoint = host:port")
|
|
return nil
|
|
}
|
|
|
|
want, err := certPEMToFingerprint(deployedCertPEM)
|
|
if err != nil {
|
|
return fmt.Errorf("compute deployed cert fingerprint: %w", err)
|
|
}
|
|
|
|
attempts := c.config.PostDeployVerifyAttempts
|
|
if attempts <= 0 {
|
|
attempts = 3
|
|
}
|
|
backoff := c.config.PostDeployVerifyBackoff
|
|
if backoff <= 0 {
|
|
backoff = 2 * time.Second
|
|
}
|
|
|
|
var lastErr error
|
|
for i := 0; i < attempts; i++ {
|
|
if i > 0 {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-time.After(backoff):
|
|
}
|
|
}
|
|
res := c.probe(ctx, endpoint, timeout)
|
|
if !res.Success {
|
|
lastErr = fmt.Errorf("TLS probe failed: %s", res.Error)
|
|
continue
|
|
}
|
|
got := strings.ToLower(res.Fingerprint)
|
|
want = strings.ToLower(want)
|
|
if got == want {
|
|
c.logger.Info("post-deploy TLS verify succeeded",
|
|
"endpoint", endpoint,
|
|
"fingerprint", got,
|
|
"attempt", i+1)
|
|
return nil
|
|
}
|
|
lastErr = fmt.Errorf("post-deploy TLS verify SHA-256 mismatch: got %s, want %s", got, want)
|
|
}
|
|
return lastErr
|
|
}
|
|
|
|
// rollbackToBackups manually triggers a restore by overwriting
|
|
// each File path with its backup contents. Used when post-deploy
|
|
// TLS verify fails (the deploy.Apply already succeeded; we now
|
|
// undo it ourselves).
|
|
func (c *Connector) rollbackToBackups(ctx context.Context, backupPaths map[string]string) error {
|
|
for finalPath, backupPath := range backupPaths {
|
|
if backupPath == "" {
|
|
// File didn't exist before deploy → "rollback" is
|
|
// removal.
|
|
if err := os.Remove(finalPath); err != nil && !errors.Is(err, os.ErrNotExist) {
|
|
return fmt.Errorf("rollback remove %s: %w", finalPath, err)
|
|
}
|
|
continue
|
|
}
|
|
bytes, err := os.ReadFile(backupPath)
|
|
if err != nil {
|
|
return fmt.Errorf("rollback read backup %s: %w", backupPath, err)
|
|
}
|
|
if _, err := deploy.AtomicWriteFile(ctx, finalPath, bytes, deploy.WriteOptions{
|
|
SkipIdempotent: true,
|
|
BackupRetention: -1, // don't backup the rollback (no chain explosion)
|
|
}); err != nil {
|
|
return fmt.Errorf("rollback write %s: %w", finalPath, err)
|
|
}
|
|
}
|
|
// Re-run the reload command against the restored bytes so
|
|
// NGINX picks up the OLD cert again.
|
|
out, err := c.runReload(ctx, c.config.ReloadCommand)
|
|
if err != nil {
|
|
return fmt.Errorf("rollback reload failed: %w (output: %s)", err, string(out))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// failureResult builds a target.DeploymentResult for the various
|
|
// error paths. Centralized so the field set stays consistent.
|
|
func (c *Connector) failureResult(addr, stage string, err error, startTime time.Time) *target.DeploymentResult {
|
|
return &target.DeploymentResult{
|
|
Success: false,
|
|
TargetAddress: addr,
|
|
Message: fmt.Sprintf("%s: %v", stage, err),
|
|
DeployedAt: time.Now(),
|
|
Metadata: map[string]string{
|
|
"stage": stage,
|
|
"duration_ms": fmt.Sprintf("%d", time.Since(startTime).Milliseconds()),
|
|
},
|
|
}
|
|
}
|
|
|
|
// certPEMToFingerprint extracts the SHA-256 hex fingerprint of the
|
|
// first certificate block in a PEM bundle. Mirrors the
|
|
// tlsprobe.CertFingerprint output format so equality compare
|
|
// works.
|
|
func certPEMToFingerprint(pemBytes string) (string, error) {
|
|
der, err := firstPEMBlock(pemBytes, "CERTIFICATE")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
h := sha256.Sum256(der)
|
|
return hex.EncodeToString(h[:]), nil
|
|
}
|
|
|
|
// firstPEMBlock pulls the bytes of the first PEM block of the
|
|
// requested type. Avoids importing encoding/pem at the cost of a
|
|
// tiny scanner — keeps this package's import surface lean.
|
|
func firstPEMBlock(pemBytes, blockType string) ([]byte, error) {
|
|
begin := "-----BEGIN " + blockType + "-----"
|
|
end := "-----END " + blockType + "-----"
|
|
beginIdx := strings.Index(pemBytes, begin)
|
|
if beginIdx < 0 {
|
|
return nil, fmt.Errorf("no %s PEM block found", blockType)
|
|
}
|
|
rest := pemBytes[beginIdx+len(begin):]
|
|
endIdx := strings.Index(rest, end)
|
|
if endIdx < 0 {
|
|
return nil, fmt.Errorf("PEM block not terminated")
|
|
}
|
|
body := strings.TrimSpace(rest[:endIdx])
|
|
// Decode base64.
|
|
body = strings.ReplaceAll(body, "\n", "")
|
|
body = strings.ReplaceAll(body, "\r", "")
|
|
body = strings.ReplaceAll(body, " ", "")
|
|
return decodeStdB64(body)
|
|
}
|
|
|
|
func decodeStdB64(s string) ([]byte, error) {
|
|
// Use stdlib base64 via a tiny indirection to avoid an extra
|
|
// import statement on this file (we already own atomic.go's
|
|
// indirection; keeping the bundle's churn to one file).
|
|
return b64Decode(s)
|
|
}
|
|
|
|
// ValidateDeployment verifies that the deployed certificate is
|
|
// valid and accessible. It validates the NGINX configuration to
|
|
// ensure the certificate can be read.
|
|
func (c *Connector) ValidateDeployment(ctx context.Context, request target.ValidationRequest) (*target.ValidationResult, error) {
|
|
c.logger.Info("validating NGINX deployment",
|
|
"certificate_id", request.CertificateID,
|
|
"serial", request.Serial)
|
|
|
|
startTime := time.Now()
|
|
|
|
if _, err := c.runValidate(ctx, c.config.ValidateCommand); err != nil {
|
|
errMsg := fmt.Sprintf("NGINX config validation failed: %v", err)
|
|
c.logger.Error("validation failed", "error", err)
|
|
return &target.ValidationResult{
|
|
Valid: false,
|
|
Serial: request.Serial,
|
|
TargetAddress: c.config.CertPath,
|
|
Message: errMsg,
|
|
ValidatedAt: time.Now(),
|
|
}, fmt.Errorf("%s", errMsg)
|
|
}
|
|
|
|
if _, err := os.Stat(c.config.CertPath); os.IsNotExist(err) {
|
|
errMsg := fmt.Sprintf("certificate file not found: %s", c.config.CertPath)
|
|
c.logger.Error("validation failed", "error", err)
|
|
return &target.ValidationResult{
|
|
Valid: false,
|
|
Serial: request.Serial,
|
|
TargetAddress: c.config.CertPath,
|
|
Message: errMsg,
|
|
ValidatedAt: time.Now(),
|
|
}, fmt.Errorf("%s", errMsg)
|
|
}
|
|
|
|
validationDuration := time.Since(startTime)
|
|
c.logger.Info("NGINX deployment validated successfully",
|
|
"duration", validationDuration.String())
|
|
|
|
return &target.ValidationResult{
|
|
Valid: true,
|
|
Serial: request.Serial,
|
|
TargetAddress: c.config.CertPath,
|
|
Message: "NGINX configuration valid and certificate accessible",
|
|
ValidatedAt: time.Now(),
|
|
Metadata: map[string]string{
|
|
"validate_command": c.config.ValidateCommand,
|
|
"duration_ms": fmt.Sprintf("%d", validationDuration.Milliseconds()),
|
|
},
|
|
}, nil
|
|
}
|