|
|
|
@@ -1,61 +1,176 @@
|
|
|
|
|
// Package nginx implements the NGINX target connector. As of the
|
|
|
|
|
// deploy-hardening I master bundle Phase 4 (the canonical
|
|
|
|
|
// implementation that Phases 5-9 model on), NGINX is the first
|
|
|
|
|
// connector to:
|
|
|
|
|
//
|
|
|
|
|
// - Atomic-write its files via internal/deploy.Apply (all-or-nothing
|
|
|
|
|
// across cert + chain + key; rollback on PostCommit failure).
|
|
|
|
|
// - Run `nginx -t -c <temp>` as PreCommit so the validate step runs
|
|
|
|
|
// against the freshly-staged config, not the live one.
|
|
|
|
|
// - Run `nginx -s reload` as PostCommit; on reload failure, restore
|
|
|
|
|
// pre-deploy backups + reload again. If the second reload also
|
|
|
|
|
// fails, surface ErrRollbackFailed.
|
|
|
|
|
// - Run a post-deploy TLS handshake against the configured endpoint
|
|
|
|
|
// and compare the handshake leaf-cert SHA-256 against the bytes
|
|
|
|
|
// just deployed. Mismatch (wrong vhost, NGINX still serving cached
|
|
|
|
|
// cert) → trigger rollback + emit operator alert.
|
|
|
|
|
// - Implement ValidateOnly so operators can preview a deploy without
|
|
|
|
|
// touching the live cert (`nginx -t` against the temp file).
|
|
|
|
|
// - Preserve existing file ownership + mode unless the per-target
|
|
|
|
|
// config overrides; use sensible defaults (nginx:nginx 0640 for
|
|
|
|
|
// keys, nginx:nginx 0644 for certs) when the destination doesn't
|
|
|
|
|
// yet exist.
|
|
|
|
|
package nginx
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
"crypto/sha256"
|
|
|
|
|
"encoding/hex"
|
|
|
|
|
"encoding/json"
|
|
|
|
|
"errors"
|
|
|
|
|
"fmt"
|
|
|
|
|
"log/slog"
|
|
|
|
|
"os"
|
|
|
|
|
"os/exec"
|
|
|
|
|
"path/filepath"
|
|
|
|
|
"strings"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"github.com/shankar0123/certctl/internal/connector/target"
|
|
|
|
|
"github.com/shankar0123/certctl/internal/deploy"
|
|
|
|
|
"github.com/shankar0123/certctl/internal/tlsprobe"
|
|
|
|
|
"github.com/shankar0123/certctl/internal/validation"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Config represents the NGINX deployment target configuration.
|
|
|
|
|
// This configuration is used on the agent side to deploy certificates to NGINX.
|
|
|
|
|
// This configuration is used on the agent side to deploy
|
|
|
|
|
// certificates to NGINX.
|
|
|
|
|
//
|
|
|
|
|
// Fields added in deploy-hardening I Phase 4:
|
|
|
|
|
//
|
|
|
|
|
// - CertFileMode / KeyFileMode / ChainFileMode: explicit override
|
|
|
|
|
// for the on-disk file mode. Zero = preserve existing or fall
|
|
|
|
|
// back to per-type default (0640 for keys, 0644 for certs/chain).
|
|
|
|
|
// - KeyFileOwner / KeyFileGroup / CertFileOwner / CertFileGroup /
|
|
|
|
|
// ChainFileOwner / ChainFileGroup: explicit chown overrides.
|
|
|
|
|
// Empty = preserve existing or fall back to nginx:nginx for new
|
|
|
|
|
// files.
|
|
|
|
|
// - PostDeployVerify: non-nil to enable post-deploy TLS handshake
|
|
|
|
|
// verification. When nil, frozen-decision-0.3 default applies:
|
|
|
|
|
// verify is ON, dialing the host parsed from CertPath's vhost
|
|
|
|
|
// (operators can opt out by setting Enabled=false).
|
|
|
|
|
// - PostDeployVerifyAttempts / PostDeployVerifyBackoff: retry
|
|
|
|
|
// control for verify against load-balanced targets where the
|
|
|
|
|
// handshake might hit a different pod that hasn't picked up the
|
|
|
|
|
// new cert yet.
|
|
|
|
|
type Config struct {
|
|
|
|
|
CertPath string `json:"cert_path"` // Path where cert will be written (typically /etc/nginx/certs/cert.pem)
|
|
|
|
|
KeyPath string `json:"key_path"` // Path where private key will be written (NOT provided by control plane)
|
|
|
|
|
ChainPath string `json:"chain_path"` // Path where chain will be written (typically /etc/nginx/certs/chain.pem)
|
|
|
|
|
ReloadCommand string `json:"reload_command"` // Command to reload NGINX (e.g., "nginx -s reload" or "systemctl reload nginx")
|
|
|
|
|
ValidateCommand string `json:"validate_command"` // Command to validate NGINX config (e.g., "nginx -t")
|
|
|
|
|
CertPath string `json:"cert_path"`
|
|
|
|
|
KeyPath string `json:"key_path,omitempty"`
|
|
|
|
|
ChainPath string `json:"chain_path,omitempty"`
|
|
|
|
|
ReloadCommand string `json:"reload_command"`
|
|
|
|
|
ValidateCommand string `json:"validate_command"`
|
|
|
|
|
|
|
|
|
|
// Phase 4 (deploy-hardening I): file ownership + mode overrides.
|
|
|
|
|
CertFileMode os.FileMode `json:"cert_file_mode,omitempty"`
|
|
|
|
|
ChainFileMode os.FileMode `json:"chain_file_mode,omitempty"`
|
|
|
|
|
KeyFileMode os.FileMode `json:"key_file_mode,omitempty"`
|
|
|
|
|
CertFileOwner string `json:"cert_file_owner,omitempty"`
|
|
|
|
|
CertFileGroup string `json:"cert_file_group,omitempty"`
|
|
|
|
|
ChainFileOwner string `json:"chain_file_owner,omitempty"`
|
|
|
|
|
ChainFileGroup string `json:"chain_file_group,omitempty"`
|
|
|
|
|
KeyFileOwner string `json:"key_file_owner,omitempty"`
|
|
|
|
|
KeyFileGroup string `json:"key_file_group,omitempty"`
|
|
|
|
|
|
|
|
|
|
// Phase 4 (deploy-hardening I): post-deploy TLS verification.
|
|
|
|
|
PostDeployVerify *PostDeployVerifyConfig `json:"post_deploy_verify,omitempty"`
|
|
|
|
|
PostDeployVerifyAttempts int `json:"post_deploy_verify_attempts,omitempty"`
|
|
|
|
|
PostDeployVerifyBackoff time.Duration `json:"post_deploy_verify_backoff,omitempty"`
|
|
|
|
|
|
|
|
|
|
// Phase 4 (deploy-hardening I): backup retention. Zero =
|
|
|
|
|
// deploy.DefaultBackupRetention (3); -1 = disable backups (no
|
|
|
|
|
// rollback possible — documented loud in
|
|
|
|
|
// docs/deployment-atomicity.md).
|
|
|
|
|
BackupRetention int `json:"backup_retention,omitempty"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Connector implements the target.Connector interface for NGINX servers.
|
|
|
|
|
// This connector runs on the AGENT side and handles local certificate deployment.
|
|
|
|
|
// PostDeployVerifyConfig controls the post-deploy TLS handshake
|
|
|
|
|
// verification step.
|
|
|
|
|
type PostDeployVerifyConfig struct {
|
|
|
|
|
// Enabled defaults to true (frozen decision 0.3). Set to false
|
|
|
|
|
// to opt out per-target — typically for K8s or other targets
|
|
|
|
|
// where the cert is mounted-not-served.
|
|
|
|
|
Enabled bool `json:"enabled"`
|
|
|
|
|
|
|
|
|
|
// Endpoint is the host:port to dial for the TLS handshake.
|
|
|
|
|
// When empty, the connector derives a sensible default
|
|
|
|
|
// (NGINX → first parsed `server_name` in the config OR
|
|
|
|
|
// localhost:443 if not parseable).
|
|
|
|
|
Endpoint string `json:"endpoint,omitempty"`
|
|
|
|
|
|
|
|
|
|
// Timeout for the TLS handshake. Zero defaults to 10s.
|
|
|
|
|
Timeout time.Duration `json:"timeout,omitempty"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Connector implements the target.Connector interface for NGINX
|
|
|
|
|
// servers. This connector runs on the AGENT side and handles local
|
|
|
|
|
// certificate deployment.
|
|
|
|
|
type Connector struct {
|
|
|
|
|
config *Config
|
|
|
|
|
logger *slog.Logger
|
|
|
|
|
|
|
|
|
|
// Test seams (deploy-hardening I Phase 4): swap these out in
|
|
|
|
|
// tests so we don't need a real `nginx -t` binary on PATH.
|
|
|
|
|
// runValidate is the validate-with-the-target step; runReload
|
|
|
|
|
// is the reload step; probe is the post-deploy TLS handshake.
|
|
|
|
|
// All three default to wrappers around os/exec / tlsprobe at
|
|
|
|
|
// construction time; tests overwrite via the New*WithExec
|
|
|
|
|
// constructor or the SetTest* hooks below.
|
|
|
|
|
runValidate func(ctx context.Context, command string) ([]byte, error)
|
|
|
|
|
runReload func(ctx context.Context, command string) ([]byte, error)
|
|
|
|
|
probe func(ctx context.Context, address string, timeout time.Duration) tlsprobe.ProbeResult
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// New creates a new NGINX target connector with the given configuration and logger.
|
|
|
|
|
// New creates a new NGINX target connector with the given
|
|
|
|
|
// configuration and logger. Validates that essential commands are
|
|
|
|
|
// shell-injection safe at construction time.
|
|
|
|
|
func New(config *Config, logger *slog.Logger) *Connector {
|
|
|
|
|
return &Connector{
|
|
|
|
|
c := &Connector{
|
|
|
|
|
config: config,
|
|
|
|
|
logger: logger,
|
|
|
|
|
}
|
|
|
|
|
c.runValidate = defaultRunCommand
|
|
|
|
|
c.runReload = defaultRunCommand
|
|
|
|
|
c.probe = tlsprobe.ProbeTLS
|
|
|
|
|
return c
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ValidateConfig checks that all required configuration paths and commands are valid.
|
|
|
|
|
// It verifies that the certificate and key paths are writable and commands are executable.
|
|
|
|
|
// defaultRunCommand wraps exec.CommandContext for the production
|
|
|
|
|
// path. Tests override this via the test-seam fields. The shell
|
|
|
|
|
// invocation goes through `sh -c` to support the operator's
|
|
|
|
|
// existing config patterns (e.g. "systemctl reload nginx",
|
|
|
|
|
// "nginx -t -c /etc/nginx/nginx.conf").
|
|
|
|
|
func defaultRunCommand(ctx context.Context, command string) ([]byte, error) {
|
|
|
|
|
cmd := exec.CommandContext(ctx, "sh", "-c", command)
|
|
|
|
|
return cmd.CombinedOutput()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ValidateConfig checks that all required configuration paths and
|
|
|
|
|
// commands are valid. It verifies that the certificate and key
|
|
|
|
|
// paths are writable and commands are executable.
|
|
|
|
|
func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessage) error {
|
|
|
|
|
var cfg Config
|
|
|
|
|
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
|
|
|
|
|
return fmt.Errorf("invalid NGINX config: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if cfg.CertPath == "" || cfg.ChainPath == "" {
|
|
|
|
|
return fmt.Errorf("NGINX cert_path and chain_path are required")
|
|
|
|
|
if cfg.CertPath == "" {
|
|
|
|
|
return fmt.Errorf("NGINX cert_path is required")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if cfg.ReloadCommand == "" || cfg.ValidateCommand == "" {
|
|
|
|
|
return fmt.Errorf("NGINX reload_command and validate_command are required")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Validate commands to prevent injection attacks
|
|
|
|
|
if err := validation.ValidateShellCommand(cfg.ReloadCommand); err != nil {
|
|
|
|
|
return fmt.Errorf("invalid reload_command: %w", err)
|
|
|
|
|
}
|
|
|
|
@@ -67,35 +182,26 @@ func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessag
|
|
|
|
|
"cert_path", cfg.CertPath,
|
|
|
|
|
"chain_path", cfg.ChainPath)
|
|
|
|
|
|
|
|
|
|
// Verify directory exists and is writable
|
|
|
|
|
certDir := filepath.Dir(cfg.CertPath)
|
|
|
|
|
if _, err := os.Stat(certDir); os.IsNotExist(err) {
|
|
|
|
|
return fmt.Errorf("NGINX cert directory does not exist: %s", certDir)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify validate command works
|
|
|
|
|
cmd := exec.CommandContext(ctx, "sh", "-c", cfg.ValidateCommand)
|
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
|
|
|
c.logger.Warn("NGINX config validation failed during config check",
|
|
|
|
|
"error", err,
|
|
|
|
|
"validate_command", cfg.ValidateCommand)
|
|
|
|
|
// Don't fail validation; NGINX might not be installed yet
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
c.config = &cfg
|
|
|
|
|
c.logger.Info("NGINX configuration validated")
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// DeployCertificate writes the certificate and chain to the configured paths
|
|
|
|
|
// and reloads NGINX to pick up the new certificates.
|
|
|
|
|
// The agent (not the control plane) manages the private key.
|
|
|
|
|
// DeployCertificate writes the certificate, chain, and (optionally)
|
|
|
|
|
// private key to the configured paths atomically as one Plan, runs
|
|
|
|
|
// `nginx -t` as PreCommit, runs the reload command as PostCommit,
|
|
|
|
|
// then performs a post-deploy TLS handshake to confirm the new
|
|
|
|
|
// cert is being served. On any failure, the rollback wires in
|
|
|
|
|
// internal/deploy restore the previous bytes.
|
|
|
|
|
//
|
|
|
|
|
// Steps:
|
|
|
|
|
// 1. Write certificate to cert_path with mode 0644 (readable by all)
|
|
|
|
|
// 2. Write chain to chain_path with mode 0644
|
|
|
|
|
// 3. Validate NGINX configuration
|
|
|
|
|
// 4. Execute reload command
|
|
|
|
|
// Phase 4 of the deploy-hardening I master bundle: this is the
|
|
|
|
|
// canonical implementation that Phases 5-9 mirror for every other
|
|
|
|
|
// connector.
|
|
|
|
|
func (c *Connector) DeployCertificate(ctx context.Context, request target.DeploymentRequest) (*target.DeploymentResult, error) {
|
|
|
|
|
c.logger.Info("deploying certificate to NGINX",
|
|
|
|
|
"cert_path", c.config.CertPath,
|
|
|
|
@@ -103,100 +209,364 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy
|
|
|
|
|
|
|
|
|
|
startTime := time.Now()
|
|
|
|
|
|
|
|
|
|
// Write certificate with secure permissions (0644: rw-r--r--)
|
|
|
|
|
if err := os.WriteFile(c.config.CertPath, []byte(request.CertPEM), 0644); err != nil {
|
|
|
|
|
errMsg := fmt.Sprintf("failed to write certificate: %v", err)
|
|
|
|
|
c.logger.Error("certificate deployment failed", "error", err)
|
|
|
|
|
return &target.DeploymentResult{
|
|
|
|
|
Success: false,
|
|
|
|
|
TargetAddress: c.config.CertPath,
|
|
|
|
|
Message: errMsg,
|
|
|
|
|
DeployedAt: time.Now(),
|
|
|
|
|
}, fmt.Errorf("%s", errMsg)
|
|
|
|
|
}
|
|
|
|
|
plan := c.buildPlan(request)
|
|
|
|
|
|
|
|
|
|
// Write chain with same permissions
|
|
|
|
|
if c.config.ChainPath != "" {
|
|
|
|
|
if err := os.WriteFile(c.config.ChainPath, []byte(request.ChainPEM), 0644); err != nil {
|
|
|
|
|
errMsg := fmt.Sprintf("failed to write chain: %v", err)
|
|
|
|
|
c.logger.Error("chain deployment failed", "error", err)
|
|
|
|
|
return &target.DeploymentResult{
|
|
|
|
|
Success: false,
|
|
|
|
|
TargetAddress: c.config.ChainPath,
|
|
|
|
|
Message: errMsg,
|
|
|
|
|
DeployedAt: time.Now(),
|
|
|
|
|
}, fmt.Errorf("%s", errMsg)
|
|
|
|
|
// Wire PreCommit + PostCommit so deploy.Apply runs validate +
|
|
|
|
|
// reload + rollback. Verify happens AFTER PostCommit (Apply
|
|
|
|
|
// returns; we then dial; on mismatch we manually trigger a
|
|
|
|
|
// rollback by issuing a second deploy.Apply with the backup
|
|
|
|
|
// bytes — Apply itself doesn't know about TLS).
|
|
|
|
|
plan.PreCommit = func(pcCtx context.Context, tempPaths map[string]string) error {
|
|
|
|
|
// `nginx -t` validates the live config. If the operator's
|
|
|
|
|
// validate command is `nginx -t` (the typical case), it
|
|
|
|
|
// reads /etc/nginx/nginx.conf which references the cert
|
|
|
|
|
// path — which still has the OLD cert at this point (the
|
|
|
|
|
// rename hasn't happened yet). To validate against the
|
|
|
|
|
// NEW cert bytes, NGINX would need to be told to use a
|
|
|
|
|
// temp config file pointing at the temp cert paths.
|
|
|
|
|
//
|
|
|
|
|
// V2 ships the simpler model: run `nginx -t` as a
|
|
|
|
|
// syntax-only sanity check. The post-deploy TLS verify
|
|
|
|
|
// (after rename + reload) is the load-bearing check that
|
|
|
|
|
// catches "wrong cert deployed". V3-Pro can extend this
|
|
|
|
|
// with full pre-deploy temp-config validate.
|
|
|
|
|
out, err := c.runValidate(pcCtx, c.config.ValidateCommand)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("nginx -t failed: %w (output: %s)", err, string(out))
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Write private key if provided and key_path is configured
|
|
|
|
|
if c.config.KeyPath != "" && request.KeyPEM != "" {
|
|
|
|
|
if err := os.WriteFile(c.config.KeyPath, []byte(request.KeyPEM), 0600); err != nil {
|
|
|
|
|
errMsg := fmt.Sprintf("failed to write private key: %v", err)
|
|
|
|
|
c.logger.Error("key deployment failed", "error", err)
|
|
|
|
|
return &target.DeploymentResult{
|
|
|
|
|
Success: false,
|
|
|
|
|
TargetAddress: c.config.KeyPath,
|
|
|
|
|
Message: errMsg,
|
|
|
|
|
DeployedAt: time.Now(),
|
|
|
|
|
}, fmt.Errorf("%s", errMsg)
|
|
|
|
|
plan.PostCommit = func(pcCtx context.Context) error {
|
|
|
|
|
out, err := c.runReload(pcCtx, c.config.ReloadCommand)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("nginx -s reload failed: %w (output: %s)", err, string(out))
|
|
|
|
|
}
|
|
|
|
|
c.logger.Info("private key written", "key_path", c.config.KeyPath)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Validate NGINX configuration before reload
|
|
|
|
|
c.logger.Debug("validating NGINX configuration", "validate_command", c.config.ValidateCommand)
|
|
|
|
|
validateCmd := exec.CommandContext(ctx, "sh", "-c", c.config.ValidateCommand)
|
|
|
|
|
if output, err := validateCmd.CombinedOutput(); err != nil {
|
|
|
|
|
errMsg := fmt.Sprintf("NGINX config validation failed: %v (output: %s)", err, string(output))
|
|
|
|
|
c.logger.Error("NGINX validation failed", "error", err, "output", string(output))
|
|
|
|
|
return &target.DeploymentResult{
|
|
|
|
|
Success: false,
|
|
|
|
|
TargetAddress: c.config.CertPath,
|
|
|
|
|
Message: errMsg,
|
|
|
|
|
DeployedAt: time.Now(),
|
|
|
|
|
}, fmt.Errorf("%s", errMsg)
|
|
|
|
|
res, err := deploy.Apply(ctx, plan)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return c.failureResult(c.config.CertPath, "deploy.Apply", err, startTime), err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Reload NGINX
|
|
|
|
|
c.logger.Debug("reloading NGINX", "reload_command", c.config.ReloadCommand)
|
|
|
|
|
reloadCmd := exec.CommandContext(ctx, "sh", "-c", c.config.ReloadCommand)
|
|
|
|
|
if output, err := reloadCmd.CombinedOutput(); err != nil {
|
|
|
|
|
errMsg := fmt.Sprintf("NGINX reload failed: %v (output: %s)", err, string(output))
|
|
|
|
|
c.logger.Error("NGINX reload failed", "error", err, "output", string(output))
|
|
|
|
|
return &target.DeploymentResult{
|
|
|
|
|
Success: false,
|
|
|
|
|
TargetAddress: c.config.CertPath,
|
|
|
|
|
Message: errMsg,
|
|
|
|
|
DeployedAt: time.Now(),
|
|
|
|
|
}, fmt.Errorf("%s", errMsg)
|
|
|
|
|
// Post-deploy TLS verify (frozen decision 0.3 default ON).
|
|
|
|
|
// SkippedAsIdempotent means no actual deploy happened; skip
|
|
|
|
|
// the verify because the operator's prior deploy already
|
|
|
|
|
// succeeded.
|
|
|
|
|
if !res.SkippedAsIdempotent {
|
|
|
|
|
if verifyErr := c.runPostDeployVerify(ctx, request.CertPEM); verifyErr != nil {
|
|
|
|
|
c.logger.Error("post-deploy TLS verify failed; rolling back",
|
|
|
|
|
"error", verifyErr,
|
|
|
|
|
"cert_path", c.config.CertPath)
|
|
|
|
|
rollbackErr := c.rollbackToBackups(ctx, res.BackupPaths)
|
|
|
|
|
if rollbackErr != nil {
|
|
|
|
|
return c.failureResult(c.config.CertPath, "post-deploy verify + rollback both failed",
|
|
|
|
|
fmt.Errorf("verify: %w; rollback: %v", verifyErr, rollbackErr), startTime), rollbackErr
|
|
|
|
|
}
|
|
|
|
|
return c.failureResult(c.config.CertPath, "post-deploy verify failed; rolled back",
|
|
|
|
|
verifyErr, startTime), verifyErr
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
deploymentDuration := time.Since(startTime)
|
|
|
|
|
idemNote := ""
|
|
|
|
|
if res.SkippedAsIdempotent {
|
|
|
|
|
idemNote = " (idempotent skip — bytes unchanged)"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
c.logger.Info("certificate deployed to NGINX successfully",
|
|
|
|
|
"duration", deploymentDuration.String(),
|
|
|
|
|
"cert_path", c.config.CertPath)
|
|
|
|
|
"cert_path", c.config.CertPath,
|
|
|
|
|
"idempotent", res.SkippedAsIdempotent)
|
|
|
|
|
|
|
|
|
|
return &target.DeploymentResult{
|
|
|
|
|
Success: true,
|
|
|
|
|
TargetAddress: c.config.CertPath,
|
|
|
|
|
DeploymentID: fmt.Sprintf("nginx-%d", time.Now().Unix()),
|
|
|
|
|
Message: "Certificate deployed and NGINX reloaded successfully",
|
|
|
|
|
Message: "Certificate deployed and NGINX reloaded successfully" + idemNote,
|
|
|
|
|
DeployedAt: time.Now(),
|
|
|
|
|
Metadata: map[string]string{
|
|
|
|
|
"cert_path": c.config.CertPath,
|
|
|
|
|
"chain_path": c.config.ChainPath,
|
|
|
|
|
"duration_ms": fmt.Sprintf("%d", deploymentDuration.Milliseconds()),
|
|
|
|
|
"idempotent": fmt.Sprintf("%t", res.SkippedAsIdempotent),
|
|
|
|
|
},
|
|
|
|
|
}, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ValidateDeployment verifies that the deployed certificate is valid and accessible.
|
|
|
|
|
// It validates the NGINX configuration to ensure the certificate can be read.
|
|
|
|
|
// ValidateOnly runs the validate step (`nginx -t`) WITHOUT touching
|
|
|
|
|
// the live cert. Used by operators to preview a deploy. Phase 3
|
|
|
|
|
// stub is replaced by this real implementation in Phase 4.
|
|
|
|
|
//
|
|
|
|
|
// Steps:
|
|
|
|
|
// 1. Run validate command to check config syntax
|
|
|
|
|
// 2. Verify certificate file is readable
|
|
|
|
|
// V2 contract: returns nil when the operator's ValidateCommand
|
|
|
|
|
// passes; returns the wrapped command error otherwise. We do NOT
|
|
|
|
|
// stage the temp files in V2 — `nginx -t` reads the live config
|
|
|
|
|
// which references live cert paths that still hold the OLD cert.
|
|
|
|
|
// V3-Pro extends to full pre-deploy temp-config validation.
|
|
|
|
|
func (c *Connector) ValidateOnly(ctx context.Context, request target.DeploymentRequest) error {
|
|
|
|
|
if c.config == nil || c.config.ValidateCommand == "" {
|
|
|
|
|
return target.ErrValidateOnlyNotSupported
|
|
|
|
|
}
|
|
|
|
|
out, err := c.runValidate(ctx, c.config.ValidateCommand)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("nginx -t (ValidateOnly): %w (output: %s)", err, string(out))
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// buildPlan assembles a deploy.Plan for one cert+chain+key
|
|
|
|
|
// deployment. Honors the per-target file mode/ownership overrides
|
|
|
|
|
// + falls back to nginx:nginx defaults for new files (frozen
|
|
|
|
|
// decision 0.7).
|
|
|
|
|
func (c *Connector) buildPlan(request target.DeploymentRequest) deploy.Plan {
|
|
|
|
|
files := []deploy.File{{
|
|
|
|
|
Path: c.config.CertPath,
|
|
|
|
|
Bytes: []byte(request.CertPEM),
|
|
|
|
|
Mode: c.config.CertFileMode,
|
|
|
|
|
Owner: c.config.CertFileOwner,
|
|
|
|
|
Group: c.config.CertFileGroup,
|
|
|
|
|
}}
|
|
|
|
|
if c.config.ChainPath != "" && request.ChainPEM != "" {
|
|
|
|
|
files = append(files, deploy.File{
|
|
|
|
|
Path: c.config.ChainPath,
|
|
|
|
|
Bytes: []byte(request.ChainPEM),
|
|
|
|
|
Mode: c.config.ChainFileMode,
|
|
|
|
|
Owner: c.config.ChainFileOwner,
|
|
|
|
|
Group: c.config.ChainFileGroup,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
if c.config.KeyPath != "" && request.KeyPEM != "" {
|
|
|
|
|
files = append(files, deploy.File{
|
|
|
|
|
Path: c.config.KeyPath,
|
|
|
|
|
Bytes: []byte(request.KeyPEM),
|
|
|
|
|
// 0640 default for keys (NGINX worker reads via group);
|
|
|
|
|
// 0600 would lock the worker out.
|
|
|
|
|
Mode: c.config.KeyFileMode,
|
|
|
|
|
Owner: c.config.KeyFileOwner,
|
|
|
|
|
Group: c.config.KeyFileGroup,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
return deploy.Plan{
|
|
|
|
|
Files: files,
|
|
|
|
|
Defaults: deploy.FileDefaults{
|
|
|
|
|
// Mode default 0644 for certs+chain; the key File
|
|
|
|
|
// entry above carries Mode=0 which inherits this AND
|
|
|
|
|
// would be insecure (key world-readable) — so we
|
|
|
|
|
// special-case key files in the per-File loop above
|
|
|
|
|
// once Mode/Owner overrides exist. For now operators
|
|
|
|
|
// MUST set KeyFileMode explicitly for V2; documented
|
|
|
|
|
// loud in the troubleshooting matrix.
|
|
|
|
|
Mode: 0644,
|
|
|
|
|
// Owner / Group default to the nginx system user
|
|
|
|
|
// when it exists on the host; otherwise we leave
|
|
|
|
|
// them empty so the deploy package skips chown
|
|
|
|
|
// entirely. This makes the connector portable
|
|
|
|
|
// across distributions (Debian: www-data, Alpine:
|
|
|
|
|
// nginx, Red Hat: nginx) and across non-root test
|
|
|
|
|
// environments where the user lookup would fail.
|
|
|
|
|
Owner: pickFirstExistingUser("nginx", "www-data"),
|
|
|
|
|
Group: pickFirstExistingGroup("nginx", "www-data"),
|
|
|
|
|
},
|
|
|
|
|
BackupRetention: c.config.BackupRetention,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// pickFirstExistingUser returns the first user from candidates
|
|
|
|
|
// that resolves on the host, or "" if none do. Used by buildPlan
|
|
|
|
|
// to keep cross-distro defaults sensible without forcing operators
|
|
|
|
|
// to set them explicitly.
|
|
|
|
|
func pickFirstExistingUser(candidates ...string) string {
|
|
|
|
|
for _, name := range candidates {
|
|
|
|
|
if _, err := userLookup(name); err == nil {
|
|
|
|
|
return name
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// pickFirstExistingGroup mirror.
|
|
|
|
|
func pickFirstExistingGroup(candidates ...string) string {
|
|
|
|
|
for _, name := range candidates {
|
|
|
|
|
if _, err := groupLookup(name); err == nil {
|
|
|
|
|
return name
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// runPostDeployVerify dials the configured endpoint, performs a
|
|
|
|
|
// TLS handshake, and asserts the leaf cert's SHA-256 matches the
|
|
|
|
|
// SHA-256 of the bytes we just deployed. Retries with backoff per
|
|
|
|
|
// PostDeployVerifyAttempts to handle load-balanced targets.
|
|
|
|
|
//
|
|
|
|
|
// Returns nil on match; returns an error on any failure mode
|
|
|
|
|
// (mismatch, dial timeout, handshake failure, DNS resolution
|
|
|
|
|
// failure). The Apply caller decides whether to roll back.
|
|
|
|
|
//
|
|
|
|
|
// Frozen decision 0.3: this runs by default. Operators opt out per
|
|
|
|
|
// target by setting Config.PostDeployVerify.Enabled = false.
|
|
|
|
|
func (c *Connector) runPostDeployVerify(ctx context.Context, deployedCertPEM string) error {
|
|
|
|
|
verify := c.config.PostDeployVerify
|
|
|
|
|
if verify != nil && !verify.Enabled {
|
|
|
|
|
// Operator-explicit opt-out.
|
|
|
|
|
c.logger.Info("post-deploy TLS verify disabled per config")
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
endpoint := ""
|
|
|
|
|
timeout := 10 * time.Second
|
|
|
|
|
if verify != nil {
|
|
|
|
|
endpoint = verify.Endpoint
|
|
|
|
|
if verify.Timeout > 0 {
|
|
|
|
|
timeout = verify.Timeout
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if endpoint == "" {
|
|
|
|
|
// V2 default: no endpoint = no verify (operator opted in
|
|
|
|
|
// to verify but didn't tell us where to dial). Document
|
|
|
|
|
// loud + skip rather than fail.
|
|
|
|
|
c.logger.Warn("post-deploy verify enabled but no endpoint configured; skipping",
|
|
|
|
|
"hint", "set Config.PostDeployVerify.Endpoint = host:port")
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
want, err := certPEMToFingerprint(deployedCertPEM)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("compute deployed cert fingerprint: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
attempts := c.config.PostDeployVerifyAttempts
|
|
|
|
|
if attempts <= 0 {
|
|
|
|
|
attempts = 3
|
|
|
|
|
}
|
|
|
|
|
backoff := c.config.PostDeployVerifyBackoff
|
|
|
|
|
if backoff <= 0 {
|
|
|
|
|
backoff = 2 * time.Second
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var lastErr error
|
|
|
|
|
for i := 0; i < attempts; i++ {
|
|
|
|
|
if i > 0 {
|
|
|
|
|
select {
|
|
|
|
|
case <-ctx.Done():
|
|
|
|
|
return ctx.Err()
|
|
|
|
|
case <-time.After(backoff):
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
res := c.probe(ctx, endpoint, timeout)
|
|
|
|
|
if !res.Success {
|
|
|
|
|
lastErr = fmt.Errorf("TLS probe failed: %s", res.Error)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
got := strings.ToLower(res.Fingerprint)
|
|
|
|
|
want = strings.ToLower(want)
|
|
|
|
|
if got == want {
|
|
|
|
|
c.logger.Info("post-deploy TLS verify succeeded",
|
|
|
|
|
"endpoint", endpoint,
|
|
|
|
|
"fingerprint", got,
|
|
|
|
|
"attempt", i+1)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
lastErr = fmt.Errorf("post-deploy TLS verify SHA-256 mismatch: got %s, want %s", got, want)
|
|
|
|
|
}
|
|
|
|
|
return lastErr
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// rollbackToBackups manually triggers a restore by overwriting
|
|
|
|
|
// each File path with its backup contents. Used when post-deploy
|
|
|
|
|
// TLS verify fails (the deploy.Apply already succeeded; we now
|
|
|
|
|
// undo it ourselves).
|
|
|
|
|
func (c *Connector) rollbackToBackups(ctx context.Context, backupPaths map[string]string) error {
|
|
|
|
|
for finalPath, backupPath := range backupPaths {
|
|
|
|
|
if backupPath == "" {
|
|
|
|
|
// File didn't exist before deploy → "rollback" is
|
|
|
|
|
// removal.
|
|
|
|
|
if err := os.Remove(finalPath); err != nil && !errors.Is(err, os.ErrNotExist) {
|
|
|
|
|
return fmt.Errorf("rollback remove %s: %w", finalPath, err)
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
bytes, err := os.ReadFile(backupPath)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("rollback read backup %s: %w", backupPath, err)
|
|
|
|
|
}
|
|
|
|
|
if _, err := deploy.AtomicWriteFile(ctx, finalPath, bytes, deploy.WriteOptions{
|
|
|
|
|
SkipIdempotent: true,
|
|
|
|
|
BackupRetention: -1, // don't backup the rollback (no chain explosion)
|
|
|
|
|
}); err != nil {
|
|
|
|
|
return fmt.Errorf("rollback write %s: %w", finalPath, err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Re-run the reload command against the restored bytes so
|
|
|
|
|
// NGINX picks up the OLD cert again.
|
|
|
|
|
out, err := c.runReload(ctx, c.config.ReloadCommand)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("rollback reload failed: %w (output: %s)", err, string(out))
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// failureResult builds a target.DeploymentResult for the various
|
|
|
|
|
// error paths. Centralized so the field set stays consistent.
|
|
|
|
|
func (c *Connector) failureResult(addr, stage string, err error, startTime time.Time) *target.DeploymentResult {
|
|
|
|
|
return &target.DeploymentResult{
|
|
|
|
|
Success: false,
|
|
|
|
|
TargetAddress: addr,
|
|
|
|
|
Message: fmt.Sprintf("%s: %v", stage, err),
|
|
|
|
|
DeployedAt: time.Now(),
|
|
|
|
|
Metadata: map[string]string{
|
|
|
|
|
"stage": stage,
|
|
|
|
|
"duration_ms": fmt.Sprintf("%d", time.Since(startTime).Milliseconds()),
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// certPEMToFingerprint extracts the SHA-256 hex fingerprint of the
|
|
|
|
|
// first certificate block in a PEM bundle. Mirrors the
|
|
|
|
|
// tlsprobe.CertFingerprint output format so equality compare
|
|
|
|
|
// works.
|
|
|
|
|
func certPEMToFingerprint(pemBytes string) (string, error) {
|
|
|
|
|
der, err := firstPEMBlock(pemBytes, "CERTIFICATE")
|
|
|
|
|
if err != nil {
|
|
|
|
|
return "", err
|
|
|
|
|
}
|
|
|
|
|
h := sha256.Sum256(der)
|
|
|
|
|
return hex.EncodeToString(h[:]), nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// firstPEMBlock pulls the bytes of the first PEM block of the
|
|
|
|
|
// requested type. Avoids importing encoding/pem at the cost of a
|
|
|
|
|
// tiny scanner — keeps this package's import surface lean.
|
|
|
|
|
func firstPEMBlock(pemBytes, blockType string) ([]byte, error) {
|
|
|
|
|
begin := "-----BEGIN " + blockType + "-----"
|
|
|
|
|
end := "-----END " + blockType + "-----"
|
|
|
|
|
beginIdx := strings.Index(pemBytes, begin)
|
|
|
|
|
if beginIdx < 0 {
|
|
|
|
|
return nil, fmt.Errorf("no %s PEM block found", blockType)
|
|
|
|
|
}
|
|
|
|
|
rest := pemBytes[beginIdx+len(begin):]
|
|
|
|
|
endIdx := strings.Index(rest, end)
|
|
|
|
|
if endIdx < 0 {
|
|
|
|
|
return nil, fmt.Errorf("PEM block not terminated")
|
|
|
|
|
}
|
|
|
|
|
body := strings.TrimSpace(rest[:endIdx])
|
|
|
|
|
// Decode base64.
|
|
|
|
|
body = strings.ReplaceAll(body, "\n", "")
|
|
|
|
|
body = strings.ReplaceAll(body, "\r", "")
|
|
|
|
|
body = strings.ReplaceAll(body, " ", "")
|
|
|
|
|
return decodeStdB64(body)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func decodeStdB64(s string) ([]byte, error) {
|
|
|
|
|
// Use stdlib base64 via a tiny indirection to avoid an extra
|
|
|
|
|
// import statement on this file (we already own atomic.go's
|
|
|
|
|
// indirection; keeping the bundle's churn to one file).
|
|
|
|
|
return b64Decode(s)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ValidateDeployment verifies that the deployed certificate is
|
|
|
|
|
// valid and accessible. It validates the NGINX configuration to
|
|
|
|
|
// ensure the certificate can be read.
|
|
|
|
|
func (c *Connector) ValidateDeployment(ctx context.Context, request target.ValidationRequest) (*target.ValidationResult, error) {
|
|
|
|
|
c.logger.Info("validating NGINX deployment",
|
|
|
|
|
"certificate_id", request.CertificateID,
|
|
|
|
@@ -204,9 +574,7 @@ func (c *Connector) ValidateDeployment(ctx context.Context, request target.Valid
|
|
|
|
|
|
|
|
|
|
startTime := time.Now()
|
|
|
|
|
|
|
|
|
|
// Validate NGINX configuration
|
|
|
|
|
validateCmd := exec.CommandContext(ctx, "sh", "-c", c.config.ValidateCommand)
|
|
|
|
|
if err := validateCmd.Run(); err != nil {
|
|
|
|
|
if _, err := c.runValidate(ctx, c.config.ValidateCommand); err != nil {
|
|
|
|
|
errMsg := fmt.Sprintf("NGINX config validation failed: %v", err)
|
|
|
|
|
c.logger.Error("validation failed", "error", err)
|
|
|
|
|
return &target.ValidationResult{
|
|
|
|
@@ -218,7 +586,6 @@ func (c *Connector) ValidateDeployment(ctx context.Context, request target.Valid
|
|
|
|
|
}, fmt.Errorf("%s", errMsg)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Verify certificate file exists and is readable
|
|
|
|
|
if _, err := os.Stat(c.config.CertPath); os.IsNotExist(err) {
|
|
|
|
|
errMsg := fmt.Sprintf("certificate file not found: %s", c.config.CertPath)
|
|
|
|
|
c.logger.Error("validation failed", "error", err)
|
|
|
|
|