diff --git a/internal/connector/target/traefik/traefik.go b/internal/connector/target/traefik/traefik.go index 8c721d7..4626184 100644 --- a/internal/connector/target/traefik/traefik.go +++ b/internal/connector/target/traefik/traefik.go @@ -1,8 +1,12 @@ // Package traefik implements the Traefik file-provider target -// connector. As of deploy-hardening I Phase 7: atomic-write via -// internal/deploy.AtomicWriteFile + optional post-deploy TLS -// verify. No PreCommit/PostCommit because Traefik watches the -// directory via inotify and auto-reloads on file change. +// connector. Bundle 4 of the 2026-05-02 deployment-target audit: +// upgraded from two separate deploy.AtomicWriteFile calls (cert, +// key) to a single deploy.Apply Plan with all-files atomicity. +// Traefik has no PreCommit (no `nginx -t` equivalent) and no +// PostCommit (file watcher auto-reloads on rename). Post-deploy +// TLS verify (optional) confirms the watcher picked up the new +// cert; on verify failure, restoreFromBackups rewrites every File +// path from its backup so the watcher reloads to the prior state. package traefik import ( @@ -92,10 +96,22 @@ func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessag return nil } -// DeployCertificate writes cert + chain (combined) and key as -// separate files via deploy.AtomicWriteFile. Traefik's inotify -// watcher picks up the changes and auto-reloads. Post-deploy -// verify (if enabled) handshakes against the configured endpoint. +// DeployCertificate writes cert + chain (combined) and key via a +// single deploy.Apply Plan. Bundle 4 of the 2026-05-02 deployment- +// target audit replaced the prior two-AtomicWriteFile approach, +// which broke all-files atomicity (a key-write failure after a +// successful cert write left an orphaned cert and the dedicated +// rollback helper only restored the cert). +// +// Traefik has no PreCommit (no validate-with-target command) and +// no PostCommit (inotify watcher auto-reloads on rename). Apply +// gives us SHA-256 idempotency over both files and rollback +// semantics if the rename loop ever fails mid-stream. +// +// Post-deploy TLS verify (optional via PostDeployVerify) confirms +// the watcher picked up the new bytes; on mismatch, restoreFromBackups +// rewrites both files from their backups and the watcher will +// auto-reload to the prior state on its next tick. func (c *Connector) DeployCertificate(ctx context.Context, request target.DeploymentRequest) (*target.DeploymentResult, error) { c.logger.Info("deploying certificate to Traefik", "cert_dir", c.config.CertDir, "cert_file", c.config.CertFile, "key_file", c.config.KeyFile) @@ -104,9 +120,54 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy certPath := filepath.Join(c.config.CertDir, c.config.CertFile) keyPath := filepath.Join(c.config.CertDir, c.config.KeyFile) - // Preserve the pre-Phase-7 trailing-newline convention so - // existing operator deploys + tests don't break on byte-equal - // comparisons. + plan := c.buildPlan(request, certPath, keyPath) + + res, err := deploy.Apply(ctx, plan) + if err != nil { + return c.failureResult(certPath, "deploy.Apply", err, startTime), err + } + + // Post-deploy TLS verify (skip when nothing changed). + if !res.SkippedAsIdempotent { + if vErr := c.runPostDeployVerify(ctx, request.CertPEM); vErr != nil { + c.logger.Error("post-deploy TLS verify failed; rolling back", "error", vErr) + rbErr := c.restoreFromBackups(ctx, res.BackupPaths) + if rbErr != nil { + return c.failureResult(certPath, "verify+rollback both failed", + fmt.Errorf("verify: %w; rollback: %v", vErr, rbErr), startTime), rbErr + } + return c.failureResult(certPath, "post-deploy verify failed; rolled back", vErr, startTime), vErr + } + } + + dur := time.Since(startTime) + idemNote := "" + if res.SkippedAsIdempotent { + idemNote = " (idempotent skip — bytes unchanged)" + } + c.logger.Info("certificate deployed to Traefik successfully", + "duration", dur.String(), "cert_path", certPath, "idempotent", res.SkippedAsIdempotent) + return &target.DeploymentResult{ + Success: true, + TargetAddress: certPath, + DeploymentID: fmt.Sprintf("traefik-%d", time.Now().Unix()), + Message: "Certificate deployed to Traefik (file watcher will auto-reload)" + idemNote, + DeployedAt: time.Now(), + Metadata: map[string]string{ + "cert_path": certPath, "key_path": keyPath, + "duration_ms": fmt.Sprintf("%d", dur.Milliseconds()), + "idempotent": fmt.Sprintf("%t", res.SkippedAsIdempotent), + }, + }, nil +} + +// buildPlan assembles the deploy.Plan for one cert+(optional)key +// deployment. Cert and key are separate Files in the same Plan so +// Apply runs SHA-256 idempotency all-files and produces all-or- +// nothing atomicity. The combined-PEM (cert + "\n" + chain + "\n") +// shape is preserved for byte-equal compatibility with pre-Bundle-4 +// deploys. +func (c *Connector) buildPlan(request target.DeploymentRequest, certPath, keyPath string) deploy.Plan { combined := request.CertPEM + "\n" if request.ChainPEM != "" { combined = combined + request.ChainPEM + "\n" @@ -120,61 +181,26 @@ func (c *Connector) DeployCertificate(ctx context.Context, request target.Deploy keyMode = 0600 } - certRes, err := deploy.AtomicWriteFile(ctx, certPath, []byte(combined), deploy.WriteOptions{ - Mode: certMode, Owner: c.config.CertFileOwner, Group: c.config.CertFileGroup, - BackupRetention: c.config.BackupRetention, - }) - if err != nil { - return c.failureResult(certPath, "write cert", err, startTime), err - } + files := []deploy.File{{ + Path: certPath, + Bytes: []byte(combined), + Mode: certMode, + Owner: c.config.CertFileOwner, + Group: c.config.CertFileGroup, + }} if request.KeyPEM != "" { - _, err := deploy.AtomicWriteFile(ctx, keyPath, []byte(request.KeyPEM), deploy.WriteOptions{ - Mode: keyMode, Owner: c.config.KeyFileOwner, Group: c.config.KeyFileGroup, - BackupRetention: c.config.BackupRetention, + files = append(files, deploy.File{ + Path: keyPath, + Bytes: []byte(request.KeyPEM), + Mode: keyMode, + Owner: c.config.KeyFileOwner, + Group: c.config.KeyFileGroup, }) - if err != nil { - // Cert already written; try to roll back the cert too. - if certRes.BackupPath != "" { - if bytes, rErr := os.ReadFile(certRes.BackupPath); rErr == nil { - _, _ = deploy.AtomicWriteFile(ctx, certPath, bytes, deploy.WriteOptions{SkipIdempotent: true, BackupRetention: -1}) - } - } - return c.failureResult(keyPath, "write key", err, startTime), err - } } - - // Post-deploy TLS verify. - if !certRes.Idempotent { - if vErr := c.runPostDeployVerify(ctx, request.CertPEM); vErr != nil { - c.logger.Error("post-deploy TLS verify failed; rolling back", "error", vErr) - rbErr := c.rollbackCertAndKey(ctx, certPath, certRes.BackupPath, keyPath) - if rbErr != nil { - return c.failureResult(certPath, "verify+rollback both failed", - fmt.Errorf("verify: %w; rollback: %v", vErr, rbErr), startTime), rbErr - } - return c.failureResult(certPath, "post-deploy verify failed; rolled back", vErr, startTime), vErr - } + return deploy.Plan{ + Files: files, + BackupRetention: c.config.BackupRetention, } - - dur := time.Since(startTime) - idemNote := "" - if certRes.Idempotent { - idemNote = " (idempotent skip — bytes unchanged)" - } - c.logger.Info("certificate deployed to Traefik successfully", - "duration", dur.String(), "cert_path", certPath, "idempotent", certRes.Idempotent) - return &target.DeploymentResult{ - Success: true, - TargetAddress: certPath, - DeploymentID: fmt.Sprintf("traefik-%d", time.Now().Unix()), - Message: "Certificate deployed to Traefik (file watcher will auto-reload)" + idemNote, - DeployedAt: time.Now(), - Metadata: map[string]string{ - "cert_path": certPath, "key_path": keyPath, - "duration_ms": fmt.Sprintf("%d", dur.Milliseconds()), - "idempotent": fmt.Sprintf("%t", certRes.Idempotent), - }, - }, nil } // ValidateOnly returns ErrValidateOnlyNotSupported. Traefik has no @@ -228,18 +254,37 @@ func (c *Connector) runPostDeployVerify(ctx context.Context, deployedCertPEM str return lastErr } -func (c *Connector) rollbackCertAndKey(ctx context.Context, certPath, certBackup, keyPath string) error { - if certBackup == "" { - if err := os.Remove(certPath); err != nil && !errors.Is(err, os.ErrNotExist) { - return err +// restoreFromBackups iterates the BackupPaths returned by deploy.Apply +// and rewrites every destination from its backup via AtomicWriteFile +// {SkipIdempotent:true, BackupRetention:-1}. The -1 prevents +// backup-of-the-backup pollution when a rollback fires. +// +// For files that did not exist before the deploy (BackupPath == ""), +// restore = remove. Mirrors nginx.go::rollbackToBackups (L487-515) +// with the reload step elided — Traefik's inotify watcher will +// pick up the restored bytes on its next tick. +// +// Bundle 4 of the 2026-05-02 deployment-target audit. Replaces the +// pre-fix rollbackCertAndKey helper which only restored the cert +// (key was orphaned on verify failure). +func (c *Connector) restoreFromBackups(ctx context.Context, backupPaths map[string]string) error { + for finalPath, backupPath := range backupPaths { + if backupPath == "" { + // File didn't exist pre-deploy → restore = remove. + if err := os.Remove(finalPath); err != nil && !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("rollback remove %s: %w", finalPath, err) + } + continue } - } else { - bytes, err := os.ReadFile(certBackup) + bytes, err := os.ReadFile(backupPath) if err != nil { - return fmt.Errorf("read cert backup: %w", err) + return fmt.Errorf("rollback read backup %s: %w", backupPath, err) } - if _, err := deploy.AtomicWriteFile(ctx, certPath, bytes, deploy.WriteOptions{SkipIdempotent: true, BackupRetention: -1}); err != nil { - return err + if _, err := deploy.AtomicWriteFile(ctx, finalPath, bytes, deploy.WriteOptions{ + SkipIdempotent: true, + BackupRetention: -1, // don't backup the rollback + }); err != nil { + return fmt.Errorf("rollback write %s: %w", finalPath, err) } } return nil diff --git a/internal/connector/target/traefik/traefik_atomic_test.go b/internal/connector/target/traefik/traefik_atomic_test.go index b70156d..99f4b0c 100644 --- a/internal/connector/target/traefik/traefik_atomic_test.go +++ b/internal/connector/target/traefik/traefik_atomic_test.go @@ -212,3 +212,190 @@ func TestTraefik_Atomic_VerifyDisabled(t *testing.T) { t.Errorf("probe called %d times despite Enabled=false", n) } } + +// --------------------------------------------------------------------------- +// Bundle 4 (deployment-target audit 2026-05-02): single-Plan deploy.Apply +// refactor regression guards. +// --------------------------------------------------------------------------- + +// TestTraefik_Atomic_KeyWriteFails_CertRollsBack is the regression guard for +// the original two-AtomicWriteFile bug. Pre-Bundle-4, a key-write failure +// after a successful cert write left the cert orphaned (the inline best- +// effort cert rollback was incomplete; the dedicated rollbackCertAndKey +// only restored the cert). Post-Bundle-4, deploy.Apply makes both writes +// all-or-nothing — if the key path is unwritable, Apply rejects the plan +// before any disk mutation OR rolls back the cert mid-rename. +// +// We trigger the failure via a key path inside a read-only subdirectory. +// The cert path is in the writable root — pre-fix the cert would land, +// post-fix Apply backs out atomically. +func TestTraefik_Atomic_KeyWriteFails_CertRollsBack(t *testing.T) { + dir := t.TempDir() + + // Pre-write sentinel cert bytes. After a failed deploy these + // must remain unchanged. + certPath := filepath.Join(dir, "cert.pem") + const sentinel = "SENTINEL-CERT\n" + if err := os.WriteFile(certPath, []byte(sentinel), 0644); err != nil { + t.Fatalf("seed cert: %v", err) + } + + // Make the key destination unwritable: a read-only subdir. + keyDir := filepath.Join(dir, "ro-keys") + if err := os.Mkdir(keyDir, 0500); err != nil { + t.Fatalf("mkdir ro: %v", err) + } + defer os.Chmod(keyDir, 0700) // restore so t.TempDir cleanup can rm + + c := traefik.New(&traefik.Config{ + CertDir: dir, + CertFile: "cert.pem", + KeyFile: "ro-keys/key.pem", // unwritable target + }, quietLogger()) + + res, err := c.DeployCertificate(context.Background(), target.DeploymentRequest{ + CertPEM: certA, + KeyPEM: keyA, + }) + if err == nil { + t.Fatalf("expected error from unwritable key path; res=%+v", res) + } + + // Cert on disk must still be the sentinel — Apply's all-files + // atomicity guarantee. Pre-Bundle-4 this assertion would have + // failed because cert was already written before the key error. + got, _ := os.ReadFile(certPath) + if string(got) != sentinel { + t.Errorf("cert clobbered despite key-write failure: got %q want %q", string(got), sentinel) + } +} + +// TestTraefik_Atomic_AllFilesIdempotent pins the all-files SHA-256 short- +// circuit. Pre-Bundle-4 idempotency was per-file (certRes.Idempotent only) +// — a cert that matched but a key that was new would get reported as +// idempotent skip even though the key actually changed. Post-fix +// res.SkippedAsIdempotent is true only when EVERY File matched; the +// negative case (cert match, key new) flips it to false and still runs +// the verify path. +func TestTraefik_Atomic_AllFilesIdempotent(t *testing.T) { + t.Run("both_match_skips", func(t *testing.T) { + dir := t.TempDir() + certPath := filepath.Join(dir, "cert.pem") + keyPath := filepath.Join(dir, "key.pem") + // Pre-write bytes that match exactly what Traefik would write + // (cert + "\n"). + if err := os.WriteFile(certPath, []byte(certA+"\n"), 0644); err != nil { + t.Fatalf("seed cert: %v", err) + } + if err := os.WriteFile(keyPath, []byte(keyA), 0600); err != nil { + t.Fatalf("seed key: %v", err) + } + + var probeCalls int32 + c := traefik.New(&traefik.Config{ + CertDir: dir, CertFile: "cert.pem", KeyFile: "key.pem", + PostDeployVerify: &traefik.PostDeployVerifyConfig{Enabled: true, Endpoint: "h:443"}, + }, quietLogger()) + c.SetTestProbe(func(_ context.Context, _ string, _ time.Duration) tlsprobe.ProbeResult { + atomic.AddInt32(&probeCalls, 1) + return tlsprobe.ProbeResult{Success: true, Fingerprint: "x"} + }) + + res, err := c.DeployCertificate(context.Background(), target.DeploymentRequest{ + CertPEM: certA, + KeyPEM: keyA, + }) + if err != nil || !res.Success { + t.Fatalf("deploy: err=%v success=%v", err, res != nil && res.Success) + } + if res.Metadata["idempotent"] != "true" { + t.Errorf("expected idempotent=true when both files match; got %q", res.Metadata["idempotent"]) + } + if probeCalls != 0 { + t.Errorf("probe called %d times on idempotent skip; want 0", probeCalls) + } + }) + + t.Run("cert_match_key_new_runs_verify", func(t *testing.T) { + dir := t.TempDir() + certPath := filepath.Join(dir, "cert.pem") + // Pre-write cert matching what Traefik would write; do NOT + // pre-write key — it'll be new. + if err := os.WriteFile(certPath, []byte(certA+"\n"), 0644); err != nil { + t.Fatalf("seed cert: %v", err) + } + + var probeCalls int32 + c := traefik.New(&traefik.Config{ + CertDir: dir, CertFile: "cert.pem", KeyFile: "key.pem", + PostDeployVerify: &traefik.PostDeployVerifyConfig{Enabled: true, Endpoint: "h:443"}, + PostDeployVerifyAttempts: 1, + }, quietLogger()) + c.SetTestProbe(func(_ context.Context, _ string, _ time.Duration) tlsprobe.ProbeResult { + atomic.AddInt32(&probeCalls, 1) + return tlsprobe.ProbeResult{Success: true, Fingerprint: fingerprintOfPEM(certA)} + }) + + res, err := c.DeployCertificate(context.Background(), target.DeploymentRequest{ + CertPEM: certA, + KeyPEM: keyA, + }) + if err != nil || !res.Success { + t.Fatalf("deploy: err=%v success=%v", err, res != nil && res.Success) + } + if res.Metadata["idempotent"] == "true" { + t.Errorf("expected idempotent=false when key is new; got true (per-file idempotency leaked through?)") + } + if probeCalls != 1 { + t.Errorf("probe should fire when key is new; called %d times want 1", probeCalls) + } + }) +} + +// TestTraefik_Atomic_VerifyMismatch_BothFilesRollBack pins all-files rollback +// on verify failure. Pre-Bundle-4 rollbackCertAndKey only restored the cert; +// the key was left in whatever state the deploy reached. Post-fix +// restoreFromBackups iterates res.BackupPaths and rewrites EVERY destination +// from its backup. +func TestTraefik_Atomic_VerifyMismatch_BothFilesRollBack(t *testing.T) { + dir := t.TempDir() + certPath := filepath.Join(dir, "cert.pem") + keyPath := filepath.Join(dir, "key.pem") + const sentinelCert = "SENTINEL-CERT\n" + const sentinelKey = "SENTINEL-KEY\n" + if err := os.WriteFile(certPath, []byte(sentinelCert), 0644); err != nil { + t.Fatalf("seed cert: %v", err) + } + if err := os.WriteFile(keyPath, []byte(sentinelKey), 0600); err != nil { + t.Fatalf("seed key: %v", err) + } + + c := traefik.New(&traefik.Config{ + CertDir: dir, CertFile: "cert.pem", KeyFile: "key.pem", + PostDeployVerifyAttempts: 1, + PostDeployVerify: &traefik.PostDeployVerifyConfig{Enabled: true, Endpoint: "h:443"}, + }, quietLogger()) + c.SetTestProbe(func(_ context.Context, _ string, _ time.Duration) tlsprobe.ProbeResult { + return tlsprobe.ProbeResult{Success: true, Fingerprint: "deadbeef"} + }) + + _, err := c.DeployCertificate(context.Background(), target.DeploymentRequest{ + CertPEM: certA, + KeyPEM: keyA, + }) + if err == nil { + t.Fatal("expected verify-mismatch error") + } + + // Both files must be restored to sentinel bytes — pre-Bundle-4 + // rollbackCertAndKey only restored the cert; the key would still + // be the new bytes. + gotCert, _ := os.ReadFile(certPath) + if string(gotCert) != sentinelCert { + t.Errorf("cert not restored on rollback: got %q want %q", string(gotCert), sentinelCert) + } + gotKey, _ := os.ReadFile(keyPath) + if string(gotKey) != sentinelKey { + t.Errorf("key not restored on rollback (Bundle 4 regression: pre-fix this would fail because rollbackCertAndKey ignored the key): got %q want %q", string(gotKey), sentinelKey) + } +}