// Copyright 2026 certctl LLC. All rights reserved. // SPDX-License-Identifier: BUSL-1.1 package deploy import ( "errors" "fmt" "os" "os/user" "strconv" ) // runningAsRoot reports whether the current process has uid 0. // Used by applyOwnership to decide whether chown EPERM is fatal // (we're root and SHOULD have been allowed; bug) vs ignorable // (we're a regular user; chown to a different uid will always // fail; not actionable). Operators run agents as root in // production, so this fork only hides EPERM in dev/CI. func runningAsRoot() bool { return os.Geteuid() == 0 } // resolvedOwnership describes the final (mode, uid, gid) to apply // to a destination file. Resolution honors the precedence: // // 1. Explicit File.Mode/Owner/Group → use as given // 2. Existing destination file → preserve that file's mode/uid/gid // 3. Plan.Defaults / WriteOptions.Default* → use as fallback // 4. Nothing set → leave as os.WriteFile default (file mode = 0644 // for new files; uid/gid = process-effective) // // uid / gid are -1 when no chown should occur (no override AND no // existing file AND no default → leave as-is). type resolvedOwnership struct { Mode os.FileMode UID int // -1 = do not chown GID int // -1 = do not chgrp (must come together with UID) ModeSet bool OwnerLabel string // best-effort string for diagnostics ("" if unknown) GroupLabel string } // resolveOwnership computes the final mode/uid/gid for a file. // existingStat is nil when the destination does not exist. func resolveOwnership(file File, defaults FileDefaults, existingStat os.FileInfo) (resolvedOwnership, error) { res := resolvedOwnership{UID: -1, GID: -1} // Mode resolution. switch { case file.Mode != 0: res.Mode = file.Mode res.ModeSet = true case existingStat != nil: res.Mode = existingStat.Mode().Perm() res.ModeSet = true case defaults.Mode != 0: res.Mode = defaults.Mode res.ModeSet = true default: // Nothing to apply; AtomicWriteFile uses os.WriteFile's // default 0644-ish for new files, preserves for existing. res.Mode = 0 res.ModeSet = false } // Owner / group resolution. owner, group := file.Owner, file.Group switch { case owner != "" && group != "": // explicit override case existingStat != nil: // preserve existing — extract from sys-stat uid, gid, ok := unixOwnerFromStat(existingStat) if ok { res.UID, res.GID = uid, gid // Best-effort labels for logs (don't fail if user/group // has been deleted from /etc/passwd between deploys). if u, err := user.LookupId(strconv.Itoa(uid)); err == nil { res.OwnerLabel = u.Username } if g, err := user.LookupGroupId(strconv.Itoa(gid)); err == nil { res.GroupLabel = g.Name } } return res, nil case defaults.Owner != "" && defaults.Group != "": owner, group = defaults.Owner, defaults.Group default: // No override, no existing file, no defaults — leave UID/GID // at -1 so AtomicWriteFile skips the chown entirely. return res, nil } uid, err := lookupUID(owner) if err != nil { return res, fmt.Errorf("resolve owner %q: %w", owner, err) } gid, err := lookupGID(group) if err != nil { return res, fmt.Errorf("resolve group %q: %w", group, err) } res.UID, res.GID = uid, gid res.OwnerLabel, res.GroupLabel = owner, group return res, nil } // applyOwnership applies the resolved (mode, uid, gid) to path. // Both chown and chmod are best-effort: we attempt them, log // warnings on failure, but do NOT fail the deploy. The agent runs // as root in production; running as a regular user (CI / developer // workstation) means chown to a different user fails with EPERM, // which is expected and not actionable. The deploy semantically // succeeded — only ownership lift was skipped. // // The "is this acceptable to silently swallow chown failure?" // question is answered yes for two reasons: // - In production (root agent), failures are real OS-level // issues that show up in the audit log + Prometheus // deploy_validate_failures_total counter. // - In dev (non-root), failures are expected behavior; tests // would otherwise need to be skipped or run with sudo. // // Connectors that NEED hard ownership enforcement (e.g. compliance // audits) can wrap a stat-after-write check in their PostCommit. func applyOwnership(path string, res resolvedOwnership) error { if res.ModeSet { if err := os.Chmod(path, res.Mode); err != nil { return fmt.Errorf("chmod %s to %#o: %w", path, res.Mode, err) } } if res.UID >= 0 && res.GID >= 0 { if err := os.Chown(path, res.UID, res.GID); err != nil { // In non-root contexts (dev, CI), chown to a // different uid will fail with one of EPERM (most // filesystems) or EINVAL (some tmpfs configs). The // agent runs as root in production where chown // will succeed; the dev-time failure is not an // actionable signal and would otherwise force every // test to run as root. We swallow the chown error // when we're not root. Production agents (uid 0) // still hard-fail on chown errors so genuine // issues surface. if runningAsRoot() { return fmt.Errorf("chown %s to %d:%d: %w", path, res.UID, res.GID, err) } // Non-root chown failure: silently skip. The // caller's audit log + Prometheus deploy-counter // surface the "ownership lift requested but not // granted" condition for production where it // matters. } } return nil } // lookupUID resolves a username to a numeric uid. Accepts numeric // strings ("1000") as a passthrough so the agent can accept either // "nginx" or "1000" in operator config. func lookupUID(username string) (int, error) { if username == "" { return -1, errors.New("empty username") } if uid, err := strconv.Atoi(username); err == nil { return uid, nil } u, err := user.Lookup(username) if err != nil { return -1, err } uid, err := strconv.Atoi(u.Uid) if err != nil { return -1, fmt.Errorf("user %q has non-numeric uid %q: %w", username, u.Uid, err) } return uid, nil } // lookupGID resolves a group name to a numeric gid. func lookupGID(groupname string) (int, error) { if groupname == "" { return -1, errors.New("empty groupname") } if gid, err := strconv.Atoi(groupname); err == nil { return gid, nil } g, err := user.LookupGroup(groupname) if err != nil { return -1, err } gid, err := strconv.Atoi(g.Gid) if err != nil { return -1, fmt.Errorf("group %q has non-numeric gid %q: %w", groupname, g.Gid, err) } return gid, nil } // unixOwnerFromStat extracts (uid, gid) from a Unix-style FileInfo. // On non-Unix platforms or when the underlying stat doesn't expose // uid/gid, returns ok=false. // // Platform-specific implementations live in: // - ownership_unix.go (//go:build unix — uses *syscall.Stat_t) // - ownership_windows.go (//go:build windows — stub returns false) // // The split exists because syscall.Stat_t is Unix-only — Windows // has no equivalent shape, so any production tsx that names it // fails to compile on GOOS=windows. The cross-platform-build CI // matrix caught this at Hotfix #16; the function was originally // in this file pre-split.