diff --git a/internal/connector/target/envoy/envoy.go b/internal/connector/target/envoy/envoy.go index ebcd24f..8468caa 100644 --- a/internal/connector/target/envoy/envoy.go +++ b/internal/connector/target/envoy/envoy.go @@ -2,7 +2,11 @@ package envoy import ( "context" + "crypto/sha256" + "encoding/base64" + "encoding/hex" "encoding/json" + "errors" "fmt" "log/slog" "os" @@ -12,6 +16,7 @@ import ( "github.com/shankar0123/certctl/internal/connector/target" "github.com/shankar0123/certctl/internal/deploy" + "github.com/shankar0123/certctl/internal/tlsprobe" ) // Config represents the Envoy deployment target configuration. @@ -24,6 +29,45 @@ type Config struct { KeyFilename string `json:"key_filename"` // Filename for private key (default: key.pem) ChainFilename string `json:"chain_filename"` // Optional filename for chain (if set, chain written separately) SDSConfig bool `json:"sds_config"` // If true, write an SDS discovery JSON file for file-based SDS + + // Bundle 3 (deployment-target audit 2026-05-02): post-deploy TLS + // verification. Defends against Envoy's SDS file watcher's natural + // pickup latency — without this, DeployCertificate returned the + // moment file writes completed and a caller running post-deploy + // verify could see Envoy still serving the old cert (watcher + // hadn't reloaded yet, load-balanced replica hit one that hadn't + // reloaded yet, etc.). Same shape as nginx.go::PostDeployVerify. + // Default behavior is opt-in: nil PostDeployVerify or + // PostDeployVerify.Enabled=false skips the verify step entirely. + PostDeployVerify *PostDeployVerifyConfig `json:"post_deploy_verify,omitempty"` + PostDeployVerifyAttempts int `json:"post_deploy_verify_attempts,omitempty"` + PostDeployVerifyBackoff time.Duration `json:"post_deploy_verify_backoff,omitempty"` + + // Bundle 3: backup retention. Zero = + // deploy.DefaultBackupRetention (3); -1 = disable backups. Mirrors + // the per-Plan setting on file-write connectors that already use + // deploy.Apply (nginx/apache/haproxy/postfix). Envoy uses + // AtomicWriteFile per file so this gets passed via WriteOptions. + BackupRetention int `json:"backup_retention,omitempty"` +} + +// PostDeployVerifyConfig controls the post-deploy TLS handshake verification +// step. Mirrors nginx.PostDeployVerifyConfig so the Envoy + NGINX shapes are +// interchangeable for operators reading docs. +type PostDeployVerifyConfig struct { + // Enabled toggles the verify; false = skip even when the struct + // is non-nil. + Enabled bool `json:"enabled"` + + // Endpoint is the host:port to dial for the TLS handshake. When + // empty, the connector logs a warning and skips verify (V2: + // operator-explicit configuration required; no defaulting to + // localhost which would be wrong for sidecar deployments). + Endpoint string `json:"endpoint,omitempty"` + + // Timeout caps each individual probe attempt. Zero defaults to + // 10s (matches nginx default). + Timeout time.Duration `json:"timeout,omitempty"` } // SDSResource represents an Envoy SDS tls_certificate resource for file-based SDS. @@ -57,6 +101,11 @@ type DataSource struct { type Connector struct { config *Config logger *slog.Logger + + // Bundle 3: probe seam for post-deploy TLS verify. Same shape NGINX + // uses (nginx.go:130) — tlsprobe.ProbeTLS in production; tests + // inject a stub via SetTestProbe. + probe func(ctx context.Context, address string, timeout time.Duration) tlsprobe.ProbeResult } // New creates a new Envoy target connector with the given configuration and logger. @@ -64,9 +113,18 @@ func New(config *Config, logger *slog.Logger) *Connector { return &Connector{ config: config, logger: logger, + probe: tlsprobe.ProbeTLS, } } +// SetTestProbe overrides the post-deploy TLS probe for tests. Production code +// gets tlsprobe.ProbeTLS via New; tests inject a stub that returns canned +// ProbeResults to exercise watcher-pickup retry/backoff paths without standing +// up a real TLS server. +func (c *Connector) SetTestProbe(fn func(ctx context.Context, address string, timeout time.Duration) tlsprobe.ProbeResult) { + c.probe = fn +} + // ValidateConfig checks that the certificate directory is configured and valid. func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessage) error { var cfg Config @@ -126,10 +184,20 @@ func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessag // and automatically picks up changes without requiring a reload command. // // Steps: -// 1. Write certificate (+ chain if chain_filename not set) to cert_filename with mode 0644 -// 2. Write private key to key_filename with mode 0600 -// 3. If chain_filename set and chain provided, write chain separately with mode 0644 -// 4. If sds_config is true, write SDS JSON file pointing to cert/key paths +// 1. Atomic-write certificate (+ chain if chain_filename not set) to +// cert_filename with mode 0644. +// 2. Atomic-write private key to key_filename with mode 0600. +// 3. If chain_filename set and chain provided, atomic-write chain +// separately with mode 0644. +// 4. If sds_config is true, atomic-write SDS JSON file pointing to +// cert/key paths (Bundle 3: previously os.WriteFile, now +// deploy.AtomicWriteFile so the JSON itself is atomic — torn JSON +// mid-write would make Envoy refuse to load any cert). +// 5. If PostDeployVerify enabled, dial the configured TLS endpoint and +// poll until the served leaf-cert SHA-256 matches the deployed +// fingerprint, with retry/backoff to absorb watcher latency. On +// mismatch after all attempts, restore from the WriteResults' +// BackupPaths and return a wrapped error (Bundle 3). func (c *Connector) DeployCertificate(ctx context.Context, request target.DeploymentRequest) (*target.DeploymentResult, error) { c.logger.Info("deploying certificate to Envoy", "cert_dir", c.config.CertDir, @@ -148,11 +216,19 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy certData += request.ChainPEM + "\n" } - // Phase 7 (deploy-hardening I): atomic-write via - // deploy.AtomicWriteFile so cert/key/chain swap atomically and - // have backup files for rollback. Envoy's SDS file watcher - // picks up the rename atomically — no torn config. - if _, err := deploy.AtomicWriteFile(ctx, certPath, []byte(certData), deploy.WriteOptions{Mode: 0644}); err != nil { + // Bundle 3 contract: track WriteResults for every atomic write so + // the post-deploy-verify rollback path can restore from backups + // across all four files (cert, key, chain, SDS JSON) — not just + // the cert. + results := make([]*deploy.WriteResult, 0, 4) + + writeOpts := func(mode os.FileMode) deploy.WriteOptions { + return deploy.WriteOptions{Mode: mode, BackupRetention: c.config.BackupRetention} + } + + // 1. Cert (+ inline chain if no separate chain filename). + certRes, err := deploy.AtomicWriteFile(ctx, certPath, []byte(certData), writeOpts(0644)) + if err != nil { errMsg := fmt.Sprintf("failed to write certificate: %v", err) c.logger.Error("certificate deployment failed", "error", err) return &target.DeploymentResult{ @@ -162,10 +238,12 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy DeployedAt: time.Now(), }, fmt.Errorf("%s", errMsg) } + results = append(results, certRes) - // Write private key with secure permissions (0600: rw-------) + // 2. Key (mode 0600 — private material). if request.KeyPEM != "" { - if _, err := deploy.AtomicWriteFile(ctx, keyPath, []byte(request.KeyPEM), deploy.WriteOptions{Mode: 0600}); err != nil { + keyRes, err := deploy.AtomicWriteFile(ctx, keyPath, []byte(request.KeyPEM), writeOpts(0600)) + if err != nil { errMsg := fmt.Sprintf("failed to write private key: %v", err) c.logger.Error("key deployment failed", "error", err) return &target.DeploymentResult{ @@ -175,12 +253,14 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy DeployedAt: time.Now(), }, fmt.Errorf("%s", errMsg) } + results = append(results, keyRes) } - // Write chain separately if chain_filename is configured + // 3. Optional separate chain file. if c.config.ChainFilename != "" && request.ChainPEM != "" { chainPath := filepath.Join(c.config.CertDir, c.config.ChainFilename) - if _, err := deploy.AtomicWriteFile(ctx, chainPath, []byte(request.ChainPEM+"\n"), deploy.WriteOptions{Mode: 0644}); err != nil { + chainRes, err := deploy.AtomicWriteFile(ctx, chainPath, []byte(request.ChainPEM+"\n"), writeOpts(0644)) + if err != nil { errMsg := fmt.Sprintf("failed to write chain: %v", err) c.logger.Error("chain deployment failed", "error", err) return &target.DeploymentResult{ @@ -190,11 +270,13 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy DeployedAt: time.Now(), }, fmt.Errorf("%s", errMsg) } + results = append(results, chainRes) } - // Write SDS JSON file if configured + // 4. SDS JSON (Bundle 3: was os.WriteFile, now atomic). if c.config.SDSConfig { - if err := c.writeSDSConfig(); err != nil { + sdsRes, err := c.writeSDSConfig(ctx) + if err != nil { errMsg := fmt.Sprintf("failed to write SDS config: %v", err) c.logger.Error("SDS config deployment failed", "error", err) return &target.DeploymentResult{ @@ -204,19 +286,50 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy DeployedAt: time.Now(), }, fmt.Errorf("%s", errMsg) } + results = append(results, sdsRes) + } + + // 5. Post-deploy TLS verify (Bundle 3). Skip when all four files + // were idempotent (no actual change to verify) — same gate NGINX + // uses on res.SkippedAsIdempotent. + if c.shouldRunVerify(results) { + if vErr := c.runPostDeployVerify(ctx, request.CertPEM); vErr != nil { + c.logger.Error("post-deploy TLS verify failed; rolling back", "error", vErr) + rbErr := c.restoreFromBackups(ctx, results) + if rbErr != nil { + return c.failureResult(certPath, "post-deploy verify + rollback both failed", + fmt.Errorf("verify: %w; rollback: %v", vErr, rbErr), startTime), rbErr + } + return c.failureResult(certPath, "post-deploy verify failed; rolled back", + vErr, startTime), vErr + } } deploymentDuration := time.Since(startTime) + allIdempotent := true + for _, r := range results { + if !r.Idempotent { + allIdempotent = false + break + } + } + idemNote := "" + if allIdempotent { + idemNote = " (idempotent skip — all bytes unchanged)" + } + c.logger.Info("certificate deployed to Envoy successfully", "duration", deploymentDuration.String(), "cert_path", certPath, "key_path", keyPath, - "sds_config", c.config.SDSConfig) + "sds_config", c.config.SDSConfig, + "idempotent", allIdempotent) metadata := map[string]string{ "cert_path": certPath, "key_path": keyPath, "duration_ms": fmt.Sprintf("%d", deploymentDuration.Milliseconds()), + "idempotent": fmt.Sprintf("%t", allIdempotent), } if c.config.SDSConfig { metadata["sds_config_path"] = filepath.Join(c.config.CertDir, "sds.json") @@ -226,15 +339,30 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy Success: true, TargetAddress: certPath, DeploymentID: fmt.Sprintf("envoy-%d", time.Now().Unix()), - Message: "Certificate deployed to Envoy (file-based SDS will auto-reload)", + Message: "Certificate deployed to Envoy (file-based SDS will auto-reload)" + idemNote, DeployedAt: time.Now(), Metadata: metadata, }, nil } -// writeSDSConfig writes an Envoy SDS JSON file that references the cert/key file paths. -// This file is consumed by Envoy's file-based SDS provider (path_config_source). -func (c *Connector) writeSDSConfig() error { +// shouldRunVerify reports whether the post-deploy verify step should fire. +// Returns false when every WriteResult was idempotent (nothing actually +// changed; the operator's prior deploy already succeeded), mirroring +// NGINX's res.SkippedAsIdempotent gate. +func (c *Connector) shouldRunVerify(results []*deploy.WriteResult) bool { + for _, r := range results { + if !r.Idempotent { + return true + } + } + return false +} + +// writeSDSConfig writes an Envoy SDS JSON file that references the cert/key +// file paths. The write goes through deploy.AtomicWriteFile (Bundle 3) so +// power loss / OOM mid-write cannot leave a torn JSON file — Envoy's SDS +// watcher refuses to load any cert against a malformed JSON. +func (c *Connector) writeSDSConfig(ctx context.Context) (*deploy.WriteResult, error) { certPath := filepath.Join(c.config.CertDir, c.config.CertFilename) keyPath := filepath.Join(c.config.CertDir, c.config.KeyFilename) @@ -253,18 +381,184 @@ func (c *Connector) writeSDSConfig() error { sdsJSON, err := json.MarshalIndent(sdsResource, "", " ") if err != nil { - return fmt.Errorf("failed to marshal SDS config: %w", err) + return nil, fmt.Errorf("failed to marshal SDS config: %w", err) } sdsPath := filepath.Join(c.config.CertDir, "sds.json") - if err := os.WriteFile(sdsPath, sdsJSON, 0644); err != nil { - return fmt.Errorf("failed to write SDS config file: %w", err) + res, err := deploy.AtomicWriteFile(ctx, sdsPath, sdsJSON, deploy.WriteOptions{ + Mode: 0644, + BackupRetention: c.config.BackupRetention, + }) + if err != nil { + return nil, fmt.Errorf("failed to write SDS config file: %w", err) } c.logger.Info("SDS config file written", "path", sdsPath) + return res, nil +} + +// runPostDeployVerify dials the configured endpoint, performs a TLS handshake, +// and asserts the leaf cert's SHA-256 matches the SHA-256 of the bytes we just +// deployed. Retries with backoff per PostDeployVerifyAttempts to absorb the +// natural latency between SDS file write and Envoy's watcher picking up the +// change. +// +// Returns nil on match; returns a wrapped error on any failure mode (mismatch +// after all attempts, dial timeout, handshake failure, DNS resolution failure). +// The caller decides whether to roll back. Same shape as nginx.go:416. +// +// Bundle 3 of the 2026-05-02 deployment-target audit. +func (c *Connector) runPostDeployVerify(ctx context.Context, deployedCertPEM string) error { + verify := c.config.PostDeployVerify + if verify == nil || !verify.Enabled { + return nil + } + + endpoint := verify.Endpoint + timeout := verify.Timeout + if timeout <= 0 { + timeout = 10 * time.Second + } + if endpoint == "" { + c.logger.Warn("post-deploy verify enabled but no endpoint configured; skipping", + "hint", "set Config.PostDeployVerify.Endpoint = host:port") + return nil + } + + want, err := certPEMToFingerprint(deployedCertPEM) + if err != nil { + return fmt.Errorf("compute deployed cert fingerprint: %w", err) + } + want = strings.ToLower(want) + + attempts := c.config.PostDeployVerifyAttempts + if attempts <= 0 { + attempts = 3 + } + backoff := c.config.PostDeployVerifyBackoff + if backoff <= 0 { + backoff = 2 * time.Second + } + + var lastErr error + for i := 0; i < attempts; i++ { + if i > 0 { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(backoff): + } + } + res := c.probe(ctx, endpoint, timeout) + if !res.Success { + lastErr = fmt.Errorf("TLS probe failed: %s", res.Error) + continue + } + got := strings.ToLower(res.Fingerprint) + if got == want { + c.logger.Info("post-deploy TLS verify succeeded", + "endpoint", endpoint, + "fingerprint", got, + "attempt", i+1) + return nil + } + lastErr = fmt.Errorf("post-deploy TLS verify SHA-256 mismatch: got %s, want %s", got, want) + } + return lastErr +} + +// restoreFromBackups iterates the WriteResults from a successful per-file +// AtomicWriteFile pass and rewrites each destination from its BackupPath. Used +// when post-deploy TLS verify fails — the writes already succeeded, so we undo +// them by rewriting the backup bytes via AtomicWriteFile{SkipIdempotent:true, +// BackupRetention:-1}. +// +// Traefik has no PostCommit reload to retry — Envoy's SDS file watcher will +// pick up the restored bytes naturally on its next tick. The verify retry/ +// backoff in this same DeployCertificate call would have absorbed that watcher +// cycle; on rollback we trust the watcher and return. +// +// Mirrors nginx.go::rollbackToBackups (L487-515) with the reload step elided. +// +// Bundle 3 of the 2026-05-02 deployment-target audit. +func (c *Connector) restoreFromBackups(ctx context.Context, results []*deploy.WriteResult) error { + for _, r := range results { + if r == nil || r.Idempotent { + // Idempotent writes did not modify the destination, so + // there is nothing to restore. + continue + } + if r.BackupPath == "" { + // File did not exist before this deploy — restore = remove. + if err := os.Remove(r.Path); err != nil && !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("rollback remove %s: %w", r.Path, err) + } + continue + } + bytes, err := os.ReadFile(r.BackupPath) + if err != nil { + return fmt.Errorf("rollback read backup %s: %w", r.BackupPath, err) + } + if _, err := deploy.AtomicWriteFile(ctx, r.Path, bytes, deploy.WriteOptions{ + SkipIdempotent: true, + BackupRetention: -1, // don't backup the rollback (no chain explosion) + }); err != nil { + return fmt.Errorf("rollback write %s: %w", r.Path, err) + } + } return nil } +// failureResult builds a target.DeploymentResult for the various error paths. +// Centralized so the field set stays consistent. Same shape as nginx.go:519. +func (c *Connector) failureResult(addr, stage string, err error, startTime time.Time) *target.DeploymentResult { + return &target.DeploymentResult{ + Success: false, + TargetAddress: addr, + Message: fmt.Sprintf("%s: %v", stage, err), + DeployedAt: time.Now(), + Metadata: map[string]string{ + "stage": stage, + "duration_ms": fmt.Sprintf("%d", time.Since(startTime).Milliseconds()), + }, + } +} + +// certPEMToFingerprint extracts the SHA-256 hex fingerprint of the first +// certificate block in a PEM bundle. Mirrors nginx.go's helper of the same +// name (and tlsprobe.CertFingerprint's output format) so equality compare +// works against the probe's served fingerprint. +func certPEMToFingerprint(pemBytes string) (string, error) { + der, err := firstPEMBlock(pemBytes, "CERTIFICATE") + if err != nil { + return "", err + } + h := sha256.Sum256(der) + return hex.EncodeToString(h[:]), nil +} + +// firstPEMBlock pulls the bytes of the first PEM block of the requested type. +// Mirrors nginx.go:548 (kept inline rather than a shared helper because the +// nginx version is package-private; cross-package import would force exposure). +func firstPEMBlock(pemBytes, blockType string) ([]byte, error) { + begin := "-----BEGIN " + blockType + "-----" + end := "-----END " + blockType + "-----" + beginIdx := strings.Index(pemBytes, begin) + if beginIdx < 0 { + return nil, fmt.Errorf("no %s PEM block found", blockType) + } + rest := pemBytes[beginIdx+len(begin):] + endIdx := strings.Index(rest, end) + if endIdx < 0 { + return nil, fmt.Errorf("PEM block not terminated") + } + body := strings.TrimSpace(rest[:endIdx]) + body = strings.ReplaceAll(body, "\n", "") + body = strings.ReplaceAll(body, "\r", "") + body = strings.ReplaceAll(body, " ", "") + return base64.StdEncoding.DecodeString(body) +} + // ValidateDeployment verifies that the deployed certificate files are readable. // It checks that both the certificate and key files exist and are accessible. func (c *Connector) ValidateDeployment(ctx context.Context, request target.ValidationRequest) (*target.ValidationResult, error) { diff --git a/internal/connector/target/envoy/envoy_atomic_test.go b/internal/connector/target/envoy/envoy_atomic_test.go index 57a8c5c..706b785 100644 --- a/internal/connector/target/envoy/envoy_atomic_test.go +++ b/internal/connector/target/envoy/envoy_atomic_test.go @@ -2,16 +2,22 @@ package envoy_test import ( "context" + "crypto/sha256" + "encoding/base64" + "encoding/hex" "errors" "log/slog" "os" "path/filepath" "strings" + "sync/atomic" "testing" + "time" "github.com/shankar0123/certctl/internal/connector/target" "github.com/shankar0123/certctl/internal/connector/target/envoy" "github.com/shankar0123/certctl/internal/deploy" + "github.com/shankar0123/certctl/internal/tlsprobe" ) // Phase 7 of the deploy-hardening I master bundle: atomic-write @@ -93,3 +99,235 @@ func TestEnvoy_ValidateOnly_Sentinel(t *testing.T) { t.Errorf("got %v", err) } } + +// --------------------------------------------------------------------------- +// Bundle 3 (deployment-target audit 2026-05-02): SDS atomicity + post-deploy +// watcher pickup confirmation. +// --------------------------------------------------------------------------- + +// certPEMFingerprint mirrors envoy.certPEMToFingerprint (which is package- +// private). Computes SHA-256 of the first PEM block's DER bytes; matches what +// tlsprobe.CertFingerprint emits for a served leaf cert. +func certPEMFingerprint(t *testing.T, pemBytes string) string { + t.Helper() + const begin = "-----BEGIN CERTIFICATE-----" + const end = "-----END CERTIFICATE-----" + bi := strings.Index(pemBytes, begin) + if bi < 0 { + t.Fatalf("no CERTIFICATE block in PEM") + } + rest := pemBytes[bi+len(begin):] + ei := strings.Index(rest, end) + if ei < 0 { + t.Fatalf("no END CERTIFICATE in PEM") + } + body := strings.TrimSpace(rest[:ei]) + body = strings.ReplaceAll(body, "\n", "") + body = strings.ReplaceAll(body, "\r", "") + der, err := base64.StdEncoding.DecodeString(body) + if err != nil { + t.Fatalf("base64: %v", err) + } + h := sha256.Sum256(der) + return hex.EncodeToString(h[:]) +} + +// TestEnvoy_Atomic_SDSConfigWriteIsAtomic pins the wiring change at envoy.go's +// writeSDSConfig — pre-Bundle-3 the SDS JSON went through os.WriteFile (no +// backup, torn-write hazard). Post-fix it goes through deploy.AtomicWriteFile, +// which produces a sibling backup with deploy.BackupSuffix when an existing +// SDS JSON is replaced. +func TestEnvoy_Atomic_SDSConfigWriteIsAtomic(t *testing.T) { + dir := t.TempDir() + sdsPath := filepath.Join(dir, "sds.json") + // Pre-write a sentinel SDS JSON so the connector's write produces + // a backup we can assert on. + if err := os.WriteFile(sdsPath, []byte(`{"resources":[{"name":"old"}]}`), 0644); err != nil { + t.Fatalf("seed sds: %v", err) + } + cfg := envoy.Config{ + CertDir: dir, + CertFilename: "cert.pem", + KeyFilename: "key.pem", + SDSConfig: true, + } + c := envoy.New(&cfg, newTestLogger()) + res, err := c.DeployCertificate(context.Background(), target.DeploymentRequest{ + CertPEM: certA, + KeyPEM: keyA, + }) + if err != nil || !res.Success { + t.Fatalf("deploy: err=%v success=%v", err, res != nil && res.Success) + } + + // SDS JSON should be the new bytes (i.e. NOT match the sentinel). + got, err := os.ReadFile(sdsPath) + if err != nil { + t.Fatalf("read sds: %v", err) + } + if strings.Contains(string(got), `"old"`) { + t.Errorf("SDS JSON not replaced; still contains sentinel") + } + if !strings.Contains(string(got), "server_cert") { + t.Errorf("SDS JSON missing expected resource name; got %s", string(got)) + } + + // AtomicWriteFile produces a backup file with deploy.BackupSuffix + // when replacing an existing destination. Pre-Bundle-3 (os.WriteFile + // path) no backup would exist for sds.json. + entries, _ := os.ReadDir(dir) + foundBak := false + for _, e := range entries { + if strings.HasPrefix(e.Name(), "sds.json"+deploy.BackupSuffix) { + foundBak = true + } + } + if !foundBak { + t.Errorf("no SDS JSON backup created — atomic-write wiring missing? entries=%v", entryNames(entries)) + } +} + +// TestEnvoy_Atomic_WatcherPickupRetries pins the retry/backoff loop in the +// post-deploy verify path. Stub the probe so attempts 1+2 return the wrong +// fingerprint and attempt 3 returns the correct one — DeployCertificate must +// succeed and the probe must have been called exactly 3 times. +func TestEnvoy_Atomic_WatcherPickupRetries(t *testing.T) { + dir := t.TempDir() + cfg := envoy.Config{ + CertDir: dir, + CertFilename: "cert.pem", + KeyFilename: "key.pem", + PostDeployVerify: &envoy.PostDeployVerifyConfig{ + Enabled: true, + Endpoint: "envoy.test.invalid:443", + Timeout: 100 * time.Millisecond, + }, + PostDeployVerifyAttempts: 3, + PostDeployVerifyBackoff: time.Millisecond, // tight loop for tests + } + c := envoy.New(&cfg, newTestLogger()) + + want := certPEMFingerprint(t, certA) + var calls atomic.Int64 + c.SetTestProbe(func(ctx context.Context, address string, timeout time.Duration) tlsprobe.ProbeResult { + n := calls.Add(1) + if n < 3 { + return tlsprobe.ProbeResult{Success: true, Fingerprint: "deadbeef"} + } + return tlsprobe.ProbeResult{Success: true, Fingerprint: want} + }) + + res, err := c.DeployCertificate(context.Background(), target.DeploymentRequest{ + CertPEM: certA, + KeyPEM: keyA, + }) + if err != nil { + t.Fatalf("deploy returned error after retries should have succeeded: %v", err) + } + if !res.Success { + t.Fatalf("deploy.Success=false; message=%s", res.Message) + } + if got := calls.Load(); got != 3 { + t.Errorf("probe called %d times, want 3", got) + } +} + +// TestEnvoy_Atomic_WatcherPickupAllAttemptsFail_RollsBack pins the verify- +// failure rollback path. Pre-write sentinel cert + key; stub probe to always +// return the wrong fingerprint; assert DeployCertificate returns a wrapped +// error AND the destination files contain the sentinel bytes (restored from +// backups). +func TestEnvoy_Atomic_WatcherPickupAllAttemptsFail_RollsBack(t *testing.T) { + dir := t.TempDir() + certPath := filepath.Join(dir, "cert.pem") + keyPath := filepath.Join(dir, "key.pem") + sentCert := []byte("SENTINEL-CERT-BYTES") + sentKey := []byte("SENTINEL-KEY-BYTES") + if err := os.WriteFile(certPath, sentCert, 0644); err != nil { + t.Fatalf("seed cert: %v", err) + } + if err := os.WriteFile(keyPath, sentKey, 0600); err != nil { + t.Fatalf("seed key: %v", err) + } + + cfg := envoy.Config{ + CertDir: dir, + CertFilename: "cert.pem", + KeyFilename: "key.pem", + PostDeployVerify: &envoy.PostDeployVerifyConfig{ + Enabled: true, + Endpoint: "envoy.test.invalid:443", + Timeout: 100 * time.Millisecond, + }, + PostDeployVerifyAttempts: 2, + PostDeployVerifyBackoff: time.Millisecond, + } + c := envoy.New(&cfg, newTestLogger()) + c.SetTestProbe(func(ctx context.Context, address string, timeout time.Duration) tlsprobe.ProbeResult { + return tlsprobe.ProbeResult{Success: true, Fingerprint: "deadbeef"} + }) + + res, err := c.DeployCertificate(context.Background(), target.DeploymentRequest{ + CertPEM: certA, + KeyPEM: keyA, + }) + if err == nil { + t.Fatalf("expected verify-mismatch error, got nil; res=%+v", res) + } + if res.Success { + t.Errorf("expected Success=false on verify failure") + } + if !strings.Contains(strings.ToLower(res.Message), "verify") { + t.Errorf("expected message to mention verify; got %q", res.Message) + } + + // Both files must be restored to sentinel bytes. + gotCert, _ := os.ReadFile(certPath) + if string(gotCert) != string(sentCert) { + t.Errorf("cert not restored on rollback; got %q want %q", string(gotCert), string(sentCert)) + } + gotKey, _ := os.ReadFile(keyPath) + if string(gotKey) != string(sentKey) { + t.Errorf("key not restored on rollback; got %q want %q", string(gotKey), string(sentKey)) + } +} + +// TestEnvoy_Atomic_PostDeployVerifyDisabledByDefault pins the opt-in default. +// A Config with no PostDeployVerify set must NOT call the probe — preserving +// pre-Bundle-3 behaviour for callers that don't opt in. +func TestEnvoy_Atomic_PostDeployVerifyDisabledByDefault(t *testing.T) { + dir := t.TempDir() + cfg := envoy.Config{ + CertDir: dir, + CertFilename: "cert.pem", + KeyFilename: "key.pem", + // PostDeployVerify intentionally nil. + } + c := envoy.New(&cfg, newTestLogger()) + var calls atomic.Int64 + c.SetTestProbe(func(ctx context.Context, address string, timeout time.Duration) tlsprobe.ProbeResult { + calls.Add(1) + return tlsprobe.ProbeResult{Success: false, Error: "probe should not be called"} + }) + + res, err := c.DeployCertificate(context.Background(), target.DeploymentRequest{ + CertPEM: certA, + KeyPEM: keyA, + }) + if err != nil || !res.Success { + t.Fatalf("deploy: err=%v success=%v", err, res != nil && res.Success) + } + if got := calls.Load(); got != 0 { + t.Errorf("probe called %d times when PostDeployVerify is nil; want 0", got) + } +} + +// entryNames is a tiny helper for log-friendly directory listings in test +// failure messages. +func entryNames(entries []os.DirEntry) []string { + names := make([]string, 0, len(entries)) + for _, e := range entries { + names = append(names, e.Name()) + } + return names +}