Files
certctl/cmd/agent/keymem_test.go
shankar0123 037dab7b6f fix(agent,service): SEC-002 — validate certificate_id shape + contain key path
Sprint 1 unified-master-audit closure. Pre-fix the agent built its
on-disk key path via:

  keyPath := filepath.Join(a.config.KeyDir, job.CertificateID+".key")

migrations/000001_initial_schema.up.sql declares managed_certificates.id
as TEXT PRIMARY KEY with no shape constraint, so a compromised control
plane (or a poisoned database row) could deliver a job whose
certificate_id is '../../etc/passwd', '/absolute/path', a NUL-byte
payload, or a Windows-separator-laden string — driving arbitrary
file write or read on the agent host.

Fix (two ends; both load-bearing):

Server side:
  - New internal/validation/certificate_id.go: ValidateCertificateID
    pins the canonical TEXT-PK shape (^[A-Za-z0-9._-]{1,128}$, plus
    explicit '.'/'..' rejection).
  - CertificateService.Create now invokes ValidateCertificateID after
    the existing required-fields check; malformed IDs are refused
    before persistence or downstream job creation.

Agent side:
  - cmd/agent/keymem.go: validateAgentCertID mirrors the server-side
    shape regex. safeAgentKeyPath additionally asserts the joined
    path is contained within KeyDir via filepath.Rel — even if a
    future refactor bypasses the shape check, a path that escapes
    KeyDir fails closed.
  - poll.go + deploy.go: both filepath.Join call sites routed
    through safeAgentKeyPath; rejection surfaces via reportJobStatus
    so the control plane sees the failure.

Regression coverage:
  - internal/validation/certificate_id_test.go: production shapes
    accepted; explicit rejection table for empty, overlong, posix
    traversal, absolute, Windows traversal, Windows separator, NUL
    byte, newline/tab injection, drive prefix, space, unicode dots.
  - cmd/agent/keymem_test.go: validateAgentCertID acceptance +
    rejection tables; safeAgentKeyPath happy path + the 8 audit
    vectors plus empty-keyDir refusal.

Closes SEC-002.
2026-05-16 03:31:59 +00:00

829 lines
28 KiB
Go

package main
// Bundle 0.7 (Coverage Audit Closure) — cmd/agent key-handling regression coverage.
//
// Closes finding C-008 (CRTCTL-COVAUDIT-2026-04-27-0034). The two functions in
// keymem.go are the agent's defense-in-depth for ECDSA P-256 private-key
// memory hygiene (Bundle 9 / Audit L-002 + L-003 — agent edition). They
// shipped with regression-test coverage of 0.0% / 11.1% respectively. This
// file pins:
//
// - marshalAgentKeyAndZeroize: rejects nil keys, propagates onDER errors,
// and ZEROIZES the DER backing buffer after onDER returns regardless of
// whether onDER errored. The zeroization invariant is verified observably
// (capture the slice header inside onDER, then assert every byte is 0x00
// after the function returns) — NOT just asserted in prose.
//
// - ensureAgentKeyDirSecure: refuses empty / "." / "/", creates missing
// dirs with mode 0700 (incl. nested ancestors), accepts existing 0700
// and any owner-only-no-write mode (mode&0o077 == 0), tightens any other
// mode to 0700, normalizes paths via filepath.Clean, is idempotent, is
// safe under concurrent invocation, and propagates the documented error
// messages from os.Stat / os.MkdirAll / os.Chmod failures.
import (
"crypto/ecdsa"
"crypto/elliptic"
"crypto/rand"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"testing"
)
// ---------------------------------------------------------------------------
// helpers
// ---------------------------------------------------------------------------
func mustGenAgentECDSAKey(t *testing.T) *ecdsa.PrivateKey {
t.Helper()
k, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
if err != nil {
t.Fatalf("ecdsa.GenerateKey: %v", err)
}
return k
}
// ---------------------------------------------------------------------------
// marshalAgentKeyAndZeroize
// ---------------------------------------------------------------------------
// TestMarshalAgentKeyAndZeroize_HappyPath confirms onDER receives well-formed
// DER bytes that the caller can use during the closure (e.g. to PEM-encode).
func TestMarshalAgentKeyAndZeroize_HappyPath(t *testing.T) {
k := mustGenAgentECDSAKey(t)
called := false
err := marshalAgentKeyAndZeroize(k, func(der []byte) error {
called = true
if len(der) == 0 {
t.Fatalf("der is empty inside onDER")
}
// First byte of an ECPrivateKey DER blob is the ASN.1 SEQUENCE tag 0x30.
if der[0] != 0x30 {
t.Errorf("expected DER to start with SEQUENCE tag 0x30, got %#x", der[0])
}
return nil
})
if err != nil {
t.Fatalf("marshalAgentKeyAndZeroize: %v", err)
}
if !called {
t.Fatal("onDER was never invoked")
}
}
// TestMarshalAgentKeyAndZeroize_NilKey confirms the early-return guard;
// onDER must NOT be invoked when priv is nil.
func TestMarshalAgentKeyAndZeroize_NilKey(t *testing.T) {
called := false
err := marshalAgentKeyAndZeroize(nil, func([]byte) error {
called = true
return nil
})
if err == nil {
t.Fatal("expected error on nil key")
}
if !strings.Contains(err.Error(), "nil private key") {
t.Errorf("expected error mentioning %q, got: %v", "nil private key", err)
}
if called {
t.Error("onDER must not be invoked when priv is nil")
}
}
// TestMarshalAgentKeyAndZeroize_OnDERReturnsError confirms upstream errors
// are propagated verbatim via errors.Is.
func TestMarshalAgentKeyAndZeroize_OnDERReturnsError(t *testing.T) {
k := mustGenAgentECDSAKey(t)
sentinel := errors.New("simulated downstream failure")
got := marshalAgentKeyAndZeroize(k, func([]byte) error { return sentinel })
if !errors.Is(got, sentinel) {
t.Errorf("expected upstream sentinel via errors.Is; got: %v", got)
}
}
// TestMarshalAgentKeyAndZeroize_BackingBufferZeroizedAfterReturn is the
// CRITICAL invariant test. It captures the slice header (NOT a deep copy)
// inside onDER and re-inspects after the function returns. Because Go slices
// share their backing array, the captured slice observes the zeroization
// performed by `defer clear(der)` in marshalAgentKeyAndZeroize.
//
// A future refactor that drops the `defer clear(der)` would break this test
// even if HappyPath / NilKey / OnDERReturnsError still pass.
func TestMarshalAgentKeyAndZeroize_BackingBufferZeroizedAfterReturn(t *testing.T) {
k := mustGenAgentECDSAKey(t)
var captured []byte
err := marshalAgentKeyAndZeroize(k, func(der []byte) error {
// SHARE the backing array — do NOT take a defensive copy.
captured = der
if len(der) == 0 {
t.Fatal("der is empty inside onDER")
}
// Sanity check: while still inside onDER, the bytes are live
// (defer clear has NOT run yet).
nonZero := false
for _, b := range der {
if b != 0 {
nonZero = true
break
}
}
if !nonZero {
t.Fatal("DER is all-zero INSIDE onDER; that should be impossible (clear hasn't run yet)")
}
return nil
})
if err != nil {
t.Fatalf("marshalAgentKeyAndZeroize: %v", err)
}
if len(captured) == 0 {
t.Fatal("captured slice is empty post-return")
}
// After return, defer clear(der) has run. The captured slice shares the
// backing array, so every byte must read 0x00.
for i, b := range captured {
if b != 0 {
t.Errorf("captured[%d] = %#x; expected 0x00 (zeroized)", i, b)
}
}
}
// TestMarshalAgentKeyAndZeroize_BufferZeroizedEvenOnError confirms the
// `defer clear(der)` fires regardless of onDER's return — the security
// invariant is "buffer is always zeroized after the function returns,"
// happy path or error path.
func TestMarshalAgentKeyAndZeroize_BufferZeroizedEvenOnError(t *testing.T) {
k := mustGenAgentECDSAKey(t)
sentinel := errors.New("upstream boom")
var captured []byte
gotErr := marshalAgentKeyAndZeroize(k, func(der []byte) error {
captured = der // share backing array
return sentinel
})
if !errors.Is(gotErr, sentinel) {
t.Fatalf("expected sentinel via errors.Is, got: %v", gotErr)
}
if len(captured) == 0 {
t.Fatal("captured slice empty post-return")
}
for i, b := range captured {
if b != 0 {
t.Errorf("captured[%d] = %#x; expected 0x00 (defer clear must run on error path)", i, b)
}
}
}
// TestMarshalAgentKeyAndZeroize_ContractViolatorSeesZeros frames the same
// observation as a defense-in-depth contract test. The docstring states
// "Caller must NOT retain the slice." If a caller violates that contract
// and reads the slice after onDER returns, they observe zeros — not the
// private scalar. This test pins that defense.
func TestMarshalAgentKeyAndZeroize_ContractViolatorSeesZeros(t *testing.T) {
k := mustGenAgentECDSAKey(t)
var leaked []byte // simulating a buggy caller that retains the slice
err := marshalAgentKeyAndZeroize(k, func(der []byte) error {
leaked = der
return nil
})
if err != nil {
t.Fatalf("marshalAgentKeyAndZeroize: %v", err)
}
// The contract violator now reads from `leaked`. Defense-in-depth: it's zeros.
for i, b := range leaked {
if b != 0 {
t.Errorf("contract-violator read leaked[%d] = %#x; expected 0x00", i, b)
}
}
}
// ---------------------------------------------------------------------------
// ensureAgentKeyDirSecure — table-driven coverage
// ---------------------------------------------------------------------------
func TestEnsureAgentKeyDirSecure(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission semantics differ on windows")
}
type tc struct {
name string
// setup returns the dir argument to pass to ensureAgentKeyDirSecure.
// base is a fresh t.TempDir() unique to each subtest.
setup func(t *testing.T, base string) string
// wantErrSubstr; "" means no error is expected.
wantErrSubstr string
// wantMode; if set, asserted via os.Stat after the call. Set to 0
// to skip the mode assertion (e.g. for error-path rows where the
// dir wasn't created or wasn't intended to change).
wantMode os.FileMode
}
cases := []tc{
// Refuse-empty/root invariants
{
name: "empty_string_refused",
setup: func(t *testing.T, _ string) string {
return ""
},
wantErrSubstr: `refuse empty/root dir ""`,
},
{
name: "dot_refused",
setup: func(t *testing.T, _ string) string {
return "."
},
wantErrSubstr: `refuse empty/root dir "."`,
},
{
name: "root_refused",
setup: func(t *testing.T, _ string) string {
return "/"
},
wantErrSubstr: `refuse empty/root dir "/"`,
},
// Non-existent path — MkdirAll(0700) path
{
name: "creates_with_0700",
setup: func(t *testing.T, base string) string {
return filepath.Join(base, "newdir")
},
wantMode: 0o700,
},
{
name: "creates_nested_0700",
setup: func(t *testing.T, base string) string {
return filepath.Join(base, "a", "b", "c")
},
wantMode: 0o700,
},
// Existing 0700 — no-op (mode == 0o700 branch).
{
name: "existing_0700_noop",
setup: func(t *testing.T, base string) string {
d := filepath.Join(base, "exists0700")
if err := os.Mkdir(d, 0o700); err != nil {
t.Fatalf("setup mkdir: %v", err)
}
return d
},
wantMode: 0o700,
},
// Existing more-permissive — chmod tighten to 0700.
{
name: "existing_0750_tightened",
setup: func(t *testing.T, base string) string {
d := filepath.Join(base, "exists0750")
if err := os.Mkdir(d, 0o750); err != nil {
t.Fatalf("setup mkdir: %v", err)
}
if err := os.Chmod(d, 0o750); err != nil {
t.Fatalf("setup chmod: %v", err)
}
return d
},
wantMode: 0o700,
},
{
name: "existing_0755_tightened",
setup: func(t *testing.T, base string) string {
d := filepath.Join(base, "exists0755")
if err := os.Mkdir(d, 0o755); err != nil {
t.Fatalf("setup mkdir: %v", err)
}
if err := os.Chmod(d, 0o755); err != nil {
t.Fatalf("setup chmod: %v", err)
}
return d
},
wantMode: 0o700,
},
{
name: "existing_0777_tightened",
setup: func(t *testing.T, base string) string {
d := filepath.Join(base, "exists0777")
if err := os.Mkdir(d, 0o777); err != nil {
t.Fatalf("setup mkdir: %v", err)
}
if err := os.Chmod(d, 0o777); err != nil {
t.Fatalf("setup chmod: %v", err)
}
return d
},
wantMode: 0o700,
},
// Existing owner-only-no-write modes accepted as-is via the
// `mode&0o077 == 0` branch (no chmod, mode preserved).
{
name: "existing_0500_accepted_no_chmod",
setup: func(t *testing.T, base string) string {
d := filepath.Join(base, "exists0500")
if err := os.Mkdir(d, 0o700); err != nil {
t.Fatalf("setup mkdir: %v", err)
}
if err := os.Chmod(d, 0o500); err != nil {
t.Fatalf("setup chmod: %v", err)
}
t.Cleanup(func() { _ = os.Chmod(d, 0o700) }) // let TempDir cleanup
return d
},
wantMode: 0o500,
},
{
name: "existing_0400_accepted_no_chmod",
setup: func(t *testing.T, base string) string {
d := filepath.Join(base, "exists0400")
if err := os.Mkdir(d, 0o700); err != nil {
t.Fatalf("setup mkdir: %v", err)
}
if err := os.Chmod(d, 0o400); err != nil {
t.Fatalf("setup chmod: %v", err)
}
t.Cleanup(func() { _ = os.Chmod(d, 0o700) })
return d
},
wantMode: 0o400,
},
// filepath.Clean normalization paths.
{
name: "trailing_slash_normalized",
setup: func(t *testing.T, base string) string {
d := filepath.Join(base, "trail")
if err := os.Mkdir(d, 0o755); err != nil {
t.Fatalf("setup mkdir: %v", err)
}
if err := os.Chmod(d, 0o755); err != nil {
t.Fatalf("setup chmod: %v", err)
}
return d + "/"
},
wantMode: 0o700,
},
{
name: "dot_prefix_normalized",
setup: func(t *testing.T, base string) string {
// The function uses filepath.Clean which strips redundant
// "./" segments. We only need to verify Clean is invoked,
// not that we end up at a relative path; pass an absolute
// path with an embedded "./".
d := filepath.Join(base, "dotprefix")
if err := os.Mkdir(d, 0o755); err != nil {
t.Fatalf("setup mkdir: %v", err)
}
if err := os.Chmod(d, 0o755); err != nil {
t.Fatalf("setup chmod: %v", err)
}
return filepath.Join(base, ".", "dotprefix")
},
wantMode: 0o700,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
base := t.TempDir()
dir := tc.setup(t, base)
err := ensureAgentKeyDirSecure(dir)
if tc.wantErrSubstr != "" {
if err == nil {
t.Fatalf("expected error containing %q, got nil", tc.wantErrSubstr)
}
if !strings.Contains(err.Error(), tc.wantErrSubstr) {
t.Errorf("error %q does not contain %q", err, tc.wantErrSubstr)
}
return
}
if err != nil {
t.Fatalf("ensureAgentKeyDirSecure: %v", err)
}
if tc.wantMode != 0 {
clean := filepath.Clean(dir)
info, statErr := os.Stat(clean)
if statErr != nil {
t.Fatalf("post-call stat: %v", statErr)
}
if got := info.Mode().Perm(); got != tc.wantMode {
t.Errorf("dir mode = %#o; want %#o", got, tc.wantMode)
}
}
})
}
}
// TestEnsureAgentKeyDirSecure_Idempotent confirms a second call on a
// just-created dir is a no-op (hits the `mode == 0o700` short-circuit).
func TestEnsureAgentKeyDirSecure_Idempotent(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission semantics differ on windows")
}
dir := filepath.Join(t.TempDir(), "idempotent")
if err := ensureAgentKeyDirSecure(dir); err != nil {
t.Fatalf("first call: %v", err)
}
if err := ensureAgentKeyDirSecure(dir); err != nil {
t.Fatalf("second call: %v", err)
}
info, err := os.Stat(dir)
if err != nil {
t.Fatalf("stat: %v", err)
}
if info.Mode().Perm() != 0o700 {
t.Errorf("expected 0700, got %#o", info.Mode().Perm())
}
}
// TestEnsureAgentKeyDirSecure_Concurrent runs the function from many
// goroutines simultaneously on the same fresh path. This is a safety smoke
// test under -race; it is NOT a functional correctness claim about
// concurrent agents (the agent has a single goroutine). The MkdirAll call
// is the load-bearing primitive here — it's documented as safe to call
// repeatedly with no error if the dir already exists.
func TestEnsureAgentKeyDirSecure_Concurrent(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission semantics differ on windows")
}
dir := filepath.Join(t.TempDir(), "concurrent")
const workers = 8
var wg sync.WaitGroup
errCh := make(chan error, workers)
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
defer wg.Done()
if err := ensureAgentKeyDirSecure(dir); err != nil {
errCh <- err
}
}()
}
wg.Wait()
close(errCh)
for err := range errCh {
t.Errorf("concurrent caller returned error: %v", err)
}
info, err := os.Stat(dir)
if err != nil {
t.Fatalf("post-concurrent stat: %v", err)
}
if info.Mode().Perm() != 0o700 {
t.Errorf("expected 0700 after concurrent calls, got %#o", info.Mode().Perm())
}
}
// TestEnsureAgentKeyDirSecure_PathIsAFile pins the function's behavior when
// passed a regular file. The function does not type-check (no IsDir()), so
// it stat's the file, sees mode 0o644 (or whatever), and chmod's it to 0700.
//
// This is "silently accepts a file path" behavior. It is not a correctness
// bug per the function's caller (cmd/agent/main.go always passes
// filepath.Dir(keyPath), which is a directory), but it is a hardening
// candidate. Captured as a finding observation in the test docstring rather
// than fixed in this bundle (Bundle 0.7 ships no production-code changes).
func TestEnsureAgentKeyDirSecure_PathIsAFile(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission semantics differ on windows")
}
base := t.TempDir()
filePath := filepath.Join(base, "not-a-dir.txt")
if err := os.WriteFile(filePath, []byte("x"), 0o644); err != nil {
t.Fatalf("setup writefile: %v", err)
}
err := ensureAgentKeyDirSecure(filePath)
if err != nil {
t.Fatalf("current behavior: function chmod's a file silently and returns nil; got err = %v", err)
}
info, statErr := os.Stat(filePath)
if statErr != nil {
t.Fatalf("post-call stat: %v", statErr)
}
if info.IsDir() {
t.Fatal("file became a directory; that's not a thing")
}
if info.Mode().Perm() != 0o700 {
t.Errorf("expected mode 0700 (current behavior), got %#o", info.Mode().Perm())
}
}
// TestEnsureAgentKeyDirSecure_MkdirErrorPropagated forces the MkdirAll
// branch to fail by chmod'ing the parent to 0o500 (read+exec but no write).
// On linux/darwin running as a non-root uid, MkdirAll on a child of such a
// parent fails with EACCES. We assert the error message wraps with the
// documented "create agent key dir" prefix.
//
// Skipped if running as root (root bypasses unix dir-write checks).
func TestEnsureAgentKeyDirSecure_MkdirErrorPropagated(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission semantics differ on windows")
}
if os.Getuid() == 0 {
t.Skip("running as root; cannot revoke parent dir write permission")
}
parent := t.TempDir()
if err := os.Chmod(parent, 0o500); err != nil {
t.Fatalf("setup chmod parent: %v", err)
}
t.Cleanup(func() { _ = os.Chmod(parent, 0o700) })
child := filepath.Join(parent, "no-can-create")
err := ensureAgentKeyDirSecure(child)
if err == nil {
t.Fatal("expected error when MkdirAll cannot write to read-only parent")
}
if !strings.Contains(err.Error(), "create agent key dir") {
t.Errorf("error %q should contain %q", err.Error(), "create agent key dir")
}
}
// TestEnsureAgentKeyDirSecure_StatErrorPropagated forces os.Stat to fail
// with a non-IsNotExist error by chmod'ing the parent to 0o000 (no
// read+exec). On linux/darwin running as a non-root uid, stat on a child
// of such a parent fails with EACCES. We assert the error message wraps
// with "stat agent key dir".
//
// Skipped if running as root.
func TestEnsureAgentKeyDirSecure_StatErrorPropagated(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission semantics differ on windows")
}
if os.Getuid() == 0 {
t.Skip("running as root; cannot revoke parent dir read+exec permission")
}
parent := t.TempDir()
child := filepath.Join(parent, "victim")
if err := os.Chmod(parent, 0o000); err != nil {
t.Fatalf("setup chmod parent: %v", err)
}
t.Cleanup(func() { _ = os.Chmod(parent, 0o700) })
err := ensureAgentKeyDirSecure(child)
if err == nil {
t.Fatal("expected error when stat cannot traverse unreadable parent")
}
if !strings.Contains(err.Error(), "stat agent key dir") {
t.Errorf("error %q should contain %q", err.Error(), "stat agent key dir")
}
}
// TestEnsureAgentKeyDirSecure_ChmodErrorPropagated forces os.Chmod to fail
// on an existing more-permissive dir. We achieve this by:
// 1. Creating an intermediate dir at 0o755 (so the function takes the
// tighten-via-chmod branch).
// 2. Replacing the real dir with a read-only-from-parent bind: chmod the
// grandparent to 0o500 so the chmod syscall on the child fails with
// EACCES (the syscall needs write on the path's containing dir for
// metadata updates on most unix filesystems — actually no, chmod only
// needs ownership, not parent write. So we instead drop the file's
// owner via... no — we cannot change ownership without root.)
//
// Reaching the chmod-error branch from a non-root test is awkward because
// chmod only requires ownership (which we always have on t.TempDir()).
// The cleanest way is to skip on non-root and exercise the branch in CI
// images that run as root; but our CI runs as non-root. We DO trigger the
// branch via a different mechanism: replace the path with a SYMLINK to
// /proc/1/root (or similar) where the eventual stat resolves but chmod
// fails — but that's brittle and OS-specific.
//
// Acceptable closure: document that this branch is exercised by the
// existing chmod-fails errno path, but the test as written can only assert
// the wrap-prefix when the branch IS reached. We use a synthetic approach:
// chmod-tighten a dir we then immediately delete, racing the syscall —
// not deterministic.
//
// Pragmatic resolution: the chmod-error branch is structurally identical
// to the mkdir-error and stat-error branches (errors.Wrap with a
// distinct prefix), and is exercised in production via os.Chmod ENOENT
// or read-only-filesystem failures. We add a unit test that asserts the
// branch's MESSAGE format by passing through a wrap helper construct.
// This test instead documents that the branch is structural and any new
// failure mode (read-only fs, immutable bit, ACLs) inherits the wrap
// prefix automatically.
//
// To still get coverage on the chmod-error branch, we use os.Chmod against
// a dir whose immediate parent we delete mid-call. This is racy. Instead,
// we make chmod fail by passing a path that filepath.Clean rewrites to
// a symlink whose target was just chmod-stripped. Too brittle.
//
// CLEANEST APPROACH: rely on the OS's read-only filesystem semantics under
// /sys (which is RO on linux). os.Chmod on a path under /sys returns EROFS.
// But /sys is owned by root — stat would succeed only on existing entries,
// and the function would then attempt chmod, which fails with EROFS (the
// non-root caller still gets a clean error wrap).
//
// We cannot find a well-defined non-root chmod-fail path on darwin. So the
// test runs only on linux and skips elsewhere.
func TestEnsureAgentKeyDirSecure_ChmodErrorPropagated(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("chmod-error branch is only reliably triggerable on linux via /sys (read-only fs)")
}
// /sys is mounted read-only on Linux. Pick a stable subdir we can stat
// (kernel-class). os.Chmod against it returns EROFS regardless of uid
// (well — root can remount, but the call against /sys/* still EROFS).
candidate := "/sys/kernel"
info, err := os.Stat(candidate)
if err != nil || !info.IsDir() {
t.Skipf("/sys/kernel not stat-able as a dir on this host; skipping (%v)", err)
}
mode := info.Mode().Perm()
if mode == 0o700 || mode&0o077 == 0 {
// Already in the no-chmod branch; this test cannot exercise the
// chmod-fail branch on this host. Skip rather than false-positive.
t.Skipf("/sys/kernel mode %#o already satisfies no-chmod branch", mode)
}
chmodErr := ensureAgentKeyDirSecure(candidate)
if chmodErr == nil {
t.Fatal("expected chmod failure on /sys (read-only fs)")
}
if !strings.Contains(chmodErr.Error(), "tighten agent key dir") {
t.Errorf("error %q should contain %q", chmodErr.Error(), "tighten agent key dir")
}
}
// TestEnsureAgentKeyDirSecure_FmtErrorMessageIncludesPath confirms each
// error wrap includes the cleaned path (debuggability invariant).
func TestEnsureAgentKeyDirSecure_FmtErrorMessageIncludesPath(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission semantics differ on windows")
}
if os.Getuid() == 0 {
t.Skip("running as root; cannot revoke parent dir write permission")
}
parent := t.TempDir()
if err := os.Chmod(parent, 0o500); err != nil {
t.Fatalf("setup chmod parent: %v", err)
}
t.Cleanup(func() { _ = os.Chmod(parent, 0o700) })
child := filepath.Join(parent, "child")
want := filepath.Clean(child)
err := ensureAgentKeyDirSecure(child)
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), want) {
t.Errorf("error %q should reference cleaned path %q", err, want)
}
}
// ---------------------------------------------------------------------------
// Cross-cutting: end-to-end smoke confirming the two functions compose
// the way main.go uses them (Bundle 9 / L-002 / L-003 flow).
// ---------------------------------------------------------------------------
// TestKeymem_AgentMainFlowSmoke replays the cmd/agent/main.go composition:
// ensureAgentKeyDirSecure(dir) → marshalAgentKeyAndZeroize(priv, onDER).
// Closes the contract that both helpers cooperate cleanly under realistic
// fixture conditions, and that the DER buffer is zeroized at the end of
// the marshal call.
func TestKeymem_AgentMainFlowSmoke(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission semantics differ on windows")
}
keyDir := filepath.Join(t.TempDir(), "agent-keys")
if err := ensureAgentKeyDirSecure(keyDir); err != nil {
t.Fatalf("ensureAgentKeyDirSecure: %v", err)
}
info, err := os.Stat(keyDir)
if err != nil {
t.Fatalf("stat: %v", err)
}
if info.Mode().Perm() != 0o700 {
t.Fatalf("key dir not at 0700, got %#o", info.Mode().Perm())
}
priv := mustGenAgentECDSAKey(t)
var captured []byte
if err := marshalAgentKeyAndZeroize(priv, func(der []byte) error {
captured = der // share backing array
// Pretend caller does pem.EncodeToMemory(...) here; we just check
// the DER is a valid SEQUENCE.
if len(der) == 0 || der[0] != 0x30 {
return fmt.Errorf("unexpected DER shape (len=%d, first=%#x)", len(der), der)
}
return nil
}); err != nil {
t.Fatalf("marshalAgentKeyAndZeroize: %v", err)
}
for i, b := range captured {
if b != 0 {
t.Fatalf("post-flow DER buffer not zeroized at byte %d (%#x)", i, b)
}
}
}
// =============================================================================
// SEC-002 closure (Sprint 1, 2026-05-16) — safeAgentKeyPath path-traversal
// regression coverage.
//
// Pre-fix the agent built the on-disk key path via:
//
// keyPath := filepath.Join(a.config.KeyDir, job.CertificateID+".key")
//
// migrations/000001_initial_schema.up.sql declares
// managed_certificates.id as TEXT PRIMARY KEY with no shape constraint, so
// a crafted certificate_id from a compromised control plane (or a poisoned
// DB row) could land outside KeyDir. The fix:
//
// - validateAgentCertID rejects shape violations up-front
// - safeAgentKeyPath additionally asserts the joined path is contained
// within KeyDir via filepath.Rel; even a future refactor that drops
// the shape regex would still fail closed on escape.
//
// These tests pin both legs against the four vectors called out in the
// audit (../../etc/passwd, /absolute/path, NUL byte, Windows separators).
// =============================================================================
func TestValidateAgentCertID_AcceptsCanonicalShapes(t *testing.T) {
for _, id := range []string{
"mc-cdn-edge",
"mc-cdn-edge-2026.q1",
"cert-1",
"abc123",
"MC-UPPER",
} {
t.Run(id, func(t *testing.T) {
if err := validateAgentCertID(id); err != nil {
t.Errorf("validateAgentCertID(%q): unexpected error %v", id, err)
}
})
}
}
func TestValidateAgentCertID_RejectsTraversalVectors(t *testing.T) {
cases := []struct {
name string
id string
}{
{"empty", ""},
{"parent_token", ".."},
{"current_token", "."},
{"posix_traversal", "../../etc/passwd"},
{"absolute_posix", "/absolute/path"},
{"windows_traversal", `..\..\evil`},
{"windows_separator", `bad\path`},
{"nul_byte", "abc\x00def"},
{"newline", "abc\ndef"},
{"space", "id with spaces"},
{"overlong", strings.Repeat("a", 129)},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if err := validateAgentCertID(tc.id); err == nil {
t.Errorf("id=%q: expected rejection, got nil", tc.id)
}
})
}
}
func TestSafeAgentKeyPath_HappyPath_ProducesContainedPath(t *testing.T) {
keyDir := t.TempDir()
got, err := safeAgentKeyPath(keyDir, "mc-good")
if err != nil {
t.Fatalf("safeAgentKeyPath: %v", err)
}
want := filepath.Join(keyDir, "mc-good.key")
// filepath.Clean normalisation may strip a trailing separator, etc.;
// compare canonical forms.
if filepath.Clean(got) != filepath.Clean(want) {
t.Errorf("safeAgentKeyPath = %q; want %q", got, want)
}
}
func TestSafeAgentKeyPath_RejectsTraversalVectors(t *testing.T) {
keyDir := t.TempDir()
cases := []struct {
name string
id string
}{
{"posix_traversal", "../../etc/passwd"},
{"absolute_posix", "/etc/passwd"},
{"parent_token", ".."},
{"current_token", "."},
{"windows_traversal", `..\..\evil`},
{"windows_separator", `bad\path`},
{"nul_byte", "abc\x00def"},
{"empty", ""},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
_, err := safeAgentKeyPath(keyDir, tc.id)
if err == nil {
t.Errorf("id=%q: expected rejection, got nil", tc.id)
}
})
}
}
func TestSafeAgentKeyPath_RejectsEmptyKeyDir(t *testing.T) {
_, err := safeAgentKeyPath("", "mc-good")
if err == nil {
t.Errorf("empty keyDir: expected rejection, got nil")
}
}