mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-10 02:58:52 +00:00
v2.0.47: HTTPS Everywhere — TLS-only control plane, agents/CLI/MCP
Breaking change release. Plaintext HTTP listener removed. The certctl control plane now terminates TLS 1.3 on :8443 via http.Server.ListenAndServeTLS. No CERTCTL_TLS_ENABLED=false escape hatch. No dual-listener mode. One-step cutover per docs/upgrade-to-tls.md. Server - cmd/server/tls.go: certHolder with SIGHUP hot-reload + atomic cert swap, buildServerTLSConfig (TLS 1.3 min, GetCertificate callback), preflightServerTLS validation - cmd/server/main.go: ListenAndServeTLS in place of ListenAndServe, watchSIGHUP wiring, cert/key path config threading - tls_test.go: 418-line regression coverage of reload, preflight, callback behavior, SAN validation Config - CERTCTL_TLS_CERT_PATH / CERTCTL_TLS_KEY_PATH (required) - Plaintext rejection: agents/CLI/MCP pre-flight-fail on http:// URLs with a pointer to docs/upgrade-to-tls.md Agents, CLI, MCP - All three pre-flight-reject http:// URLs with fail-loud diagnostic - CERTCTL_SERVER_CA_BUNDLE_PATH for private-CA trust - CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY for dev-only bypass (loud warning on startup) - install-agent.sh emits both vars as commented template lines docker-compose - certctl-tls-init sidecar generates SAN-valid self-signed cert into deploy/test/certs/ on first boot - All demo-stack curls pin against ca.crt with --cacert Helm chart - Three TLS provisioning modes, exactly one required: - server.tls.existingSecret (operator-supplied) - server.tls.certManager.enabled (cert-manager integration) - server.tls.selfSigned.enabled (eval only — not for production) - server-certificate.yaml template for cert-manager mode - helm install without a TLS source fails at template render with a pointer to docs/tls.md CI - .github/workflows/ci.yml Helm Chart Validation step renders the chart in both existingSecret and cert-manager modes, plus an inverse guard-regression test that asserts helm template MUST refuse to render when no TLS source is configured. Previously the single `helm template` invocation hit the certctl.tls.required fail-loud guard and exit-1'd CI. Four invocations now: lint (existingSecret), template (existingSecret), template (cert-manager), template (no args — must fail). Integration tests - deploy/test/integration_test.go stands up the Compose stack over HTTPS, extracts the CA bundle, and exercises every certctl API over https://localhost:8443 - All 34 integration subtests green (per Phase 8 local CI-parity) Documentation - New: docs/tls.md (provisioning patterns, rotation, SIGHUP reload) - New: docs/upgrade-to-tls.md (one-step cutover, no-downgrade warnings, fleet-roll sequencing) - CHANGELOG.md: v2.2.0 "HTTPS Everywhere — The Irony" entry (file heading unchanged; release tag is v2.0.47) - All curls in docs/, examples/, deploy/helm/ guides use https://localhost:8443 --cacert Verification - grep -rn "ListenAndServe[^T]" cmd/ internal/ → 0 hits - grep -rn "\"http://" cmd/ internal/ → 2 benign hits (Caddy admin API default, SSRF doc comment) — zero certctl endpoints - Tasks #197–#206 (Phases 0–8) all closed in the tracker Files: 65 changed, 3489 insertions, 372 deletions (pre-CI-fix).
This commit is contained in:
+273
-31
@@ -7,6 +7,7 @@ import (
|
||||
"crypto/elliptic"
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/json"
|
||||
@@ -72,7 +73,7 @@ func TestAgent_Heartbeat_Success(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Should not panic
|
||||
agent.sendHeartbeat(context.Background())
|
||||
@@ -93,7 +94,7 @@ func TestAgent_Heartbeat_ServerError(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Should increment consecutive failures
|
||||
failureBefore := agent.consecutiveFailures
|
||||
@@ -115,7 +116,7 @@ func TestAgent_Heartbeat_ConnectionError(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Should fail due to connection error
|
||||
agent.sendHeartbeat(context.Background())
|
||||
@@ -150,7 +151,7 @@ func TestAgent_PollWork_NoWork(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Should not panic
|
||||
agent.pollForWork(context.Background())
|
||||
@@ -195,7 +196,7 @@ func TestAgent_PollWork_Success(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Should not panic; work items are processed in separate gorines in real usage
|
||||
agent.pollForWork(context.Background())
|
||||
@@ -285,7 +286,7 @@ func TestParsePEMFile(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Parse the file
|
||||
entries := agent.parsePEMFile(certPath)
|
||||
@@ -336,7 +337,7 @@ func TestParsePEMFile_MultipleCerts(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
entries := agent.parsePEMFile(certPath)
|
||||
|
||||
@@ -362,7 +363,7 @@ func TestParseDERFile(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
entry, err := agent.parseDERFile(derPath)
|
||||
if err != nil {
|
||||
@@ -397,7 +398,7 @@ func TestParseDERFile_Invalid(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.parseDERFile(derPath)
|
||||
if err == nil {
|
||||
@@ -439,7 +440,7 @@ func TestScanDirectory(t *testing.T) {
|
||||
DiscoveryDirs: []string{tmpdir},
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Simulate directory walk manually (as runDiscoveryScan does)
|
||||
var certs []discoveredCertEntry
|
||||
@@ -474,7 +475,7 @@ func TestCreateTargetConnector_NGINX(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
configJSON := json.RawMessage(`{"cert_path":"/etc/nginx/cert.pem"}`)
|
||||
connector, err := agent.createTargetConnector("NGINX", configJSON)
|
||||
@@ -496,7 +497,7 @@ func TestCreateTargetConnector_Unsupported(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.createTargetConnector("UnsupportedType", nil)
|
||||
|
||||
@@ -530,7 +531,7 @@ func TestFetchCertificate_Success(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
certPEM, err := agent.fetchCertificate(context.Background(), "mc-001")
|
||||
if err != nil {
|
||||
@@ -556,7 +557,7 @@ func TestFetchCertificate_NotFound(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.fetchCertificate(context.Background(), "mc-nonexistent")
|
||||
if err == nil {
|
||||
@@ -592,7 +593,7 @@ func TestReportJobStatus_Success(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
err := agent.reportJobStatus(context.Background(), "j-001", "Completed", "")
|
||||
if err != nil {
|
||||
@@ -624,7 +625,7 @@ func TestReportJobStatus_WithError(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
err := agent.reportJobStatus(context.Background(), "j-001", "Failed", "deployment failed")
|
||||
if err != nil {
|
||||
@@ -658,7 +659,7 @@ func TestMakeRequest_Success(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
resp, err := agent.makeRequest(context.Background(), http.MethodPost, "/test", map[string]string{"key": "value"})
|
||||
if err != nil {
|
||||
@@ -680,7 +681,7 @@ func TestMakeRequest_InvalidURL(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.makeRequest(context.Background(), http.MethodGet, "/test", nil)
|
||||
if err == nil {
|
||||
@@ -765,7 +766,7 @@ func TestNewAgent(t *testing.T) {
|
||||
}
|
||||
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
if agent.config != cfg {
|
||||
t.Error("config not set correctly")
|
||||
@@ -791,7 +792,7 @@ func TestNewAgent_WithLogger(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
if agent.logger != logger {
|
||||
t.Error("logger not set correctly")
|
||||
@@ -954,7 +955,7 @@ func TestCreateTargetConnector_AllSupportedTypes(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
@@ -1007,7 +1008,7 @@ func TestCreateTargetConnector_InvalidJSON(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
invalidJSON := json.RawMessage("{invalid json}")
|
||||
|
||||
@@ -1031,7 +1032,7 @@ func TestCreateTargetConnector_UnknownType(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
_, err := agent.createTargetConnector("MagicBox", nil)
|
||||
|
||||
@@ -1061,7 +1062,7 @@ func TestCreateTargetConnector_EmptyConfig(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
for _, typeName := range tests {
|
||||
t.Run(typeName, func(t *testing.T) {
|
||||
@@ -1137,7 +1138,7 @@ func TestRunDiscoveryScan_ValidCerts(t *testing.T) {
|
||||
DiscoveryDirs: []string{tmpDir},
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Run discovery scan
|
||||
agent.runDiscoveryScan(context.Background())
|
||||
@@ -1165,7 +1166,7 @@ func TestRunDiscoveryScan_NoCertificates(t *testing.T) {
|
||||
DiscoveryDirs: []string{tmpDir},
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Run discovery scan - should complete without error even with empty directory
|
||||
agent.runDiscoveryScan(context.Background())
|
||||
@@ -1222,7 +1223,7 @@ func TestRunDiscoveryScan_MultipleCerts(t *testing.T) {
|
||||
DiscoveryDirs: []string{tmpDir},
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Run discovery scan
|
||||
agent.runDiscoveryScan(context.Background())
|
||||
@@ -1273,7 +1274,7 @@ func TestRunDiscoveryScan_DERCertificate(t *testing.T) {
|
||||
DiscoveryDirs: []string{tmpDir},
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Run discovery scan
|
||||
agent.runDiscoveryScan(context.Background())
|
||||
@@ -1331,7 +1332,7 @@ func TestRunDiscoveryScan_Subdirectories(t *testing.T) {
|
||||
DiscoveryDirs: []string{tmpDir},
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Run discovery scan - should recursively find certs in subdirs
|
||||
agent.runDiscoveryScan(context.Background())
|
||||
@@ -1369,7 +1370,7 @@ func TestRunDiscoveryScan_ServerError(t *testing.T) {
|
||||
DiscoveryDirs: []string{tmpDir},
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
// Should handle server error gracefully without panicking
|
||||
agent.runDiscoveryScan(context.Background())
|
||||
@@ -1396,7 +1397,7 @@ func TestDiscoveredCertEntry_ValidFields(t *testing.T) {
|
||||
Hostname: "test-host",
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent := NewAgent(cfg, logger)
|
||||
agent, _ := NewAgent(cfg, logger)
|
||||
|
||||
entries := agent.parsePEMFile(certPath)
|
||||
|
||||
@@ -1447,3 +1448,244 @@ func TestDiscoveredCertEntry_ValidFields(t *testing.T) {
|
||||
t.Error("PEMData should not be empty")
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HTTPS-Everywhere milestone (v2.2, §3.2 / §7) — Phase 5 client-side tests.
|
||||
//
|
||||
// These tests pin the agent's pre-flight HTTPS-scheme guard and the TLS
|
||||
// configuration surface (CA bundle loading + TLS 1.3 round-trip) so that
|
||||
// regressions surface at unit-test time, not at the first heartbeat of a
|
||||
// production rollout. Matches the same contract asserted by the sibling
|
||||
// binaries cmd/cli/main_test.go and cmd/mcp-server/main_test.go — the three
|
||||
// must stay in lock-step because all three are HTTPS-only clients of the
|
||||
// same control plane.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// TestValidateHTTPSScheme pins the pre-flight URL-scheme guard that the
|
||||
// HTTPS-Everywhere milestone requires on the agent binary startup path. The
|
||||
// agent's diagnostic is distinct from the CLI/MCP variants because it names
|
||||
// CERTCTL_SERVER_URL (the only input channel — no --server flag on the
|
||||
// agent). Every case here mirrors the dispatch arms in cmd/agent/main.go:
|
||||
// validateHTTPSScheme; drifting the error-message substrings is what this
|
||||
// test is here to catch.
|
||||
func TestValidateHTTPSScheme(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
serverURL string
|
||||
wantErr bool
|
||||
wantErrSub string
|
||||
}{
|
||||
{
|
||||
name: "https URL passes",
|
||||
serverURL: "https://certctl-server:8443",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "https URL with path passes",
|
||||
serverURL: "https://certctl.example.com/api/v1",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "uppercase HTTPS scheme passes (url.Parse lowercases)",
|
||||
serverURL: "HTTPS://certctl-server:8443",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty URL rejected names CERTCTL_SERVER_URL",
|
||||
serverURL: "",
|
||||
wantErr: true,
|
||||
wantErrSub: "CERTCTL_SERVER_URL is empty",
|
||||
},
|
||||
{
|
||||
name: "plaintext http rejected",
|
||||
serverURL: "http://certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "plaintext http://",
|
||||
},
|
||||
{
|
||||
name: "bare host missing scheme falls through to unsupported",
|
||||
serverURL: "localhost:8443",
|
||||
wantErr: true,
|
||||
// url.Parse treats "localhost:8443" as scheme=localhost,
|
||||
// opaque=8443 — exercises the default arm (unsupported scheme)
|
||||
// rather than the empty-scheme arm. Both are fail-closed, which
|
||||
// is what we care about.
|
||||
wantErrSub: "unsupported scheme",
|
||||
},
|
||||
{
|
||||
name: "path-only URL rejected",
|
||||
serverURL: "//certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "missing a scheme",
|
||||
},
|
||||
{
|
||||
name: "unsupported scheme rejected",
|
||||
serverURL: "ftp://certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "unsupported scheme",
|
||||
},
|
||||
{
|
||||
name: "ws scheme rejected",
|
||||
serverURL: "ws://certctl-server:8443",
|
||||
wantErr: true,
|
||||
wantErrSub: "unsupported scheme",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := validateHTTPSScheme(tt.serverURL)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("validateHTTPSScheme(%q) err=%v wantErr=%v", tt.serverURL, err, tt.wantErr)
|
||||
}
|
||||
if tt.wantErr && tt.wantErrSub != "" && !strings.Contains(err.Error(), tt.wantErrSub) {
|
||||
t.Errorf("validateHTTPSScheme(%q) err=%q must contain %q so operators see the right diagnostic",
|
||||
tt.serverURL, err.Error(), tt.wantErrSub)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// writeTestCABundle PEM-encodes a cert's DER bytes and writes the result to a
|
||||
// tmp file inside dir. Used by CA-bundle tests so each case owns a distinct
|
||||
// file path (matters for the "missing file" case which must point at a path
|
||||
// that provably does not exist). Returns the path.
|
||||
func writeTestCABundle(t *testing.T, dir string, certDER []byte, filename string) string {
|
||||
t.Helper()
|
||||
pemBytes := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certDER})
|
||||
path := filepath.Join(dir, filename)
|
||||
if err := os.WriteFile(path, pemBytes, 0644); err != nil {
|
||||
t.Fatalf("writing CA bundle %q: %v", path, err)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
// TestNewAgent_CABundle_Success confirms that a well-formed PEM bundle gets
|
||||
// parsed into an x509.CertPool and wired onto the agent's HTTP client
|
||||
// transport. This is the happy path the docs/tls.md "Private CA signed
|
||||
// server cert" section depends on.
|
||||
func TestNewAgent_CABundle_Success(t *testing.T) {
|
||||
cert, err := generateTestCertWithCN("test.certctl.local")
|
||||
if err != nil {
|
||||
t.Fatalf("generateTestCertWithCN: %v", err)
|
||||
}
|
||||
bundlePath := writeTestCABundle(t, t.TempDir(), cert.Raw, "ca-bundle.pem")
|
||||
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent, err := NewAgent(&AgentConfig{
|
||||
ServerURL: "https://certctl-server:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
CABundlePath: bundlePath,
|
||||
}, logger)
|
||||
if err != nil {
|
||||
t.Fatalf("NewAgent with valid CA bundle err=%v want nil", err)
|
||||
}
|
||||
|
||||
transport, ok := agent.client.Transport.(*http.Transport)
|
||||
if !ok {
|
||||
t.Fatalf("agent.client.Transport is %T; want *http.Transport", agent.client.Transport)
|
||||
}
|
||||
if transport.TLSClientConfig == nil {
|
||||
t.Fatal("TLSClientConfig is nil; HTTPS-everywhere milestone requires a non-nil TLS config")
|
||||
}
|
||||
if transport.TLSClientConfig.MinVersion != tls.VersionTLS13 {
|
||||
t.Errorf("MinVersion=%x want TLS 1.3 (%x) per §2.3 of the milestone spec",
|
||||
transport.TLSClientConfig.MinVersion, tls.VersionTLS13)
|
||||
}
|
||||
if transport.TLSClientConfig.RootCAs == nil {
|
||||
t.Error("RootCAs is nil; the configured CA bundle was silently dropped")
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewAgent_CABundle_MissingFile pins the fail-loud behavior when the
|
||||
// operator points CERTCTL_SERVER_CA_BUNDLE_PATH at a path that does not
|
||||
// exist. Falling back to system roots here would mask a misconfiguration as
|
||||
// a much harder-to-debug TLS handshake failure downstream.
|
||||
func TestNewAgent_CABundle_MissingFile(t *testing.T) {
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
missingPath := filepath.Join(t.TempDir(), "does-not-exist.pem")
|
||||
_, err := NewAgent(&AgentConfig{
|
||||
ServerURL: "https://certctl-server:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
CABundlePath: missingPath,
|
||||
}, logger)
|
||||
if err == nil {
|
||||
t.Fatal("NewAgent err=nil for missing CA bundle path; must fail loud at startup")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "reading CA bundle") {
|
||||
t.Errorf("err=%q must contain \"reading CA bundle\" so operators can trace the cause", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewAgent_CABundle_EmptyPEM covers the "file exists but contains no
|
||||
// valid certs" case (garbage, wrong-format, stripped PEM). AppendCertsFromPEM
|
||||
// returns false in this case; NewAgent must translate that into a fail-loud
|
||||
// startup error rather than quietly carry on with an empty pool.
|
||||
func TestNewAgent_CABundle_EmptyPEM(t *testing.T) {
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
bundlePath := filepath.Join(t.TempDir(), "empty.pem")
|
||||
if err := os.WriteFile(bundlePath, []byte("not a pem-encoded certificate, just garbage\n"), 0644); err != nil {
|
||||
t.Fatalf("writing garbage bundle: %v", err)
|
||||
}
|
||||
_, err := NewAgent(&AgentConfig{
|
||||
ServerURL: "https://certctl-server:8443",
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-test",
|
||||
Hostname: "test-host",
|
||||
CABundlePath: bundlePath,
|
||||
}, logger)
|
||||
if err == nil {
|
||||
t.Fatal("NewAgent err=nil for empty-PEM CA bundle; must fail loud at startup")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "no valid PEM-encoded certificates") {
|
||||
t.Errorf("err=%q must contain \"no valid PEM-encoded certificates\" so operators see why the bundle was rejected", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewAgent_TLSRoundTrip is the end-to-end integration-style check: spin
|
||||
// up an httptest.NewTLSServer (which presents a self-signed cert over TLS
|
||||
// 1.3), feed that cert into the agent as a CA bundle, and confirm the agent
|
||||
// successfully completes a heartbeat round-trip over HTTPS. This proves that
|
||||
// (a) the CA pool is actually being consulted during verification and (b)
|
||||
// the TLS 1.3 MinVersion doesn't break against httptest's default
|
||||
// negotiation. Equivalent to the "TLS handshake succeeds against a
|
||||
// self-signed control plane" integration gate, but runs in-process with no
|
||||
// Docker dependency.
|
||||
func TestNewAgent_TLSRoundTrip(t *testing.T) {
|
||||
var heartbeatHit int
|
||||
server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/api/v1/agents/a-tls-test/heartbeat" && r.Method == http.MethodPost {
|
||||
heartbeatHit++
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
// server.Certificate() returns the *x509.Certificate httptest presents;
|
||||
// PEM-encode its DER bytes so NewAgent's AppendCertsFromPEM can ingest it.
|
||||
bundlePath := writeTestCABundle(t, t.TempDir(), server.Certificate().Raw, "httptest-ca.pem")
|
||||
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
agent, err := NewAgent(&AgentConfig{
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-key",
|
||||
AgentID: "a-tls-test",
|
||||
Hostname: "tls-test-host",
|
||||
CABundlePath: bundlePath,
|
||||
}, logger)
|
||||
if err != nil {
|
||||
t.Fatalf("NewAgent with httptest CA bundle err=%v want nil", err)
|
||||
}
|
||||
|
||||
agent.sendHeartbeat(context.Background())
|
||||
|
||||
if heartbeatHit != 1 {
|
||||
t.Fatalf("heartbeat handler hit %d times; want 1 — the TLS round-trip must actually complete", heartbeatHit)
|
||||
}
|
||||
}
|
||||
|
||||
+134
-19
@@ -8,6 +8,7 @@ import (
|
||||
"crypto/rand"
|
||||
"crypto/rsa"
|
||||
"crypto/sha256"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"crypto/x509/pkix"
|
||||
"encoding/json"
|
||||
@@ -19,6 +20,7 @@ import (
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
@@ -46,13 +48,15 @@ import (
|
||||
|
||||
// AgentConfig represents the agent-side configuration.
|
||||
type AgentConfig struct {
|
||||
ServerURL string // Control plane server URL (e.g., http://localhost:8443)
|
||||
APIKey string // Agent API key for authentication
|
||||
AgentName string // Agent name for identification
|
||||
AgentID string // Agent ID for API calls (set after registration or from env)
|
||||
Hostname string // Server hostname
|
||||
KeyDir string // Directory for storing private keys (default: /var/lib/certctl/keys)
|
||||
DiscoveryDirs []string // Directories to scan for certificates (comma-separated via env)
|
||||
ServerURL string // Control plane server URL (e.g., https://localhost:8443) — must be https:// scheme
|
||||
APIKey string // Agent API key for authentication
|
||||
AgentName string // Agent name for identification
|
||||
AgentID string // Agent ID for API calls (set after registration or from env)
|
||||
Hostname string // Server hostname
|
||||
KeyDir string // Directory for storing private keys (default: /var/lib/certctl/keys)
|
||||
DiscoveryDirs []string // Directories to scan for certificates (comma-separated via env)
|
||||
CABundlePath string // Optional path to a PEM-encoded CA bundle that signed the server's cert (empty = system roots)
|
||||
InsecureSkipVerify bool // Dev-only: skip TLS certificate verification. Never enable in production. See docs/tls.md.
|
||||
}
|
||||
|
||||
// ErrAgentRetired is the sentinel returned by [Agent.Run] when the control
|
||||
@@ -113,16 +117,57 @@ type JobItem struct {
|
||||
}
|
||||
|
||||
// NewAgent creates a new agent instance.
|
||||
func NewAgent(cfg *AgentConfig, logger *slog.Logger) *Agent {
|
||||
//
|
||||
// The returned HTTP client enforces HTTPS-only control-plane access per the
|
||||
// HTTPS-Everywhere milestone (see docs/tls.md). TLS 1.3 is required; the
|
||||
// optional CABundlePath loads a PEM bundle into RootCAs so the agent can
|
||||
// trust internal / self-signed server certs without touching system trust
|
||||
// stores. InsecureSkipVerify is a dev-only escape hatch — callers must log a
|
||||
// loud warning when it's set; never enable in production (see §2.4 of the
|
||||
// milestone spec and docs/upgrade-to-tls.md).
|
||||
//
|
||||
// Returns an error if CABundlePath is set but unreadable or malformed — fail
|
||||
// loud at startup rather than silently fall back to system roots, which would
|
||||
// turn a misconfigured bundle path into a cryptic "x509: certificate signed
|
||||
// by unknown authority" on the first heartbeat.
|
||||
func NewAgent(cfg *AgentConfig, logger *slog.Logger) (*Agent, error) {
|
||||
tlsConfig := &tls.Config{
|
||||
MinVersion: tls.VersionTLS13,
|
||||
InsecureSkipVerify: cfg.InsecureSkipVerify, //nolint:gosec // opt-in dev escape hatch, documented in docs/tls.md
|
||||
}
|
||||
if cfg.CABundlePath != "" {
|
||||
pemBytes, err := os.ReadFile(cfg.CABundlePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading CA bundle at %q: %w", cfg.CABundlePath, err)
|
||||
}
|
||||
pool := x509.NewCertPool()
|
||||
if !pool.AppendCertsFromPEM(pemBytes) {
|
||||
return nil, fmt.Errorf("CA bundle at %q contains no valid PEM-encoded certificates", cfg.CABundlePath)
|
||||
}
|
||||
tlsConfig.RootCAs = pool
|
||||
}
|
||||
|
||||
httpClient := &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: tlsConfig,
|
||||
ForceAttemptHTTP2: true,
|
||||
MaxIdleConns: 10,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
ExpectContinueTimeout: 1 * time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
return &Agent{
|
||||
config: cfg,
|
||||
logger: logger,
|
||||
client: &http.Client{Timeout: 30 * time.Second},
|
||||
client: httpClient,
|
||||
heartbeatInterval: 60 * time.Second,
|
||||
pollInterval: 30 * time.Second,
|
||||
discoveryInterval: 6 * time.Hour, // scan for certs every 6 hours
|
||||
retiredSignal: make(chan struct{}),
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
// markRetired records that the control plane has declared this agent retired
|
||||
@@ -1118,12 +1163,14 @@ func certKeyInfo(cert *x509.Certificate) (string, int) {
|
||||
|
||||
func main() {
|
||||
// Parse command-line flags (with env var fallbacks for Docker deployment)
|
||||
serverURL := flag.String("server", getEnvDefault("CERTCTL_SERVER_URL", "http://localhost:8443"), "Control plane server URL")
|
||||
serverURL := flag.String("server", getEnvDefault("CERTCTL_SERVER_URL", "https://localhost:8443"), "Control plane server URL (must be https://)")
|
||||
apiKey := flag.String("api-key", getEnvDefault("CERTCTL_API_KEY", ""), "Agent API key")
|
||||
agentName := flag.String("name", getEnvDefault("CERTCTL_AGENT_NAME", "certctl-agent"), "Agent name")
|
||||
agentID := flag.String("agent-id", getEnvDefault("CERTCTL_AGENT_ID", ""), "Agent ID (from registration)")
|
||||
keyDir := flag.String("key-dir", getEnvDefault("CERTCTL_KEY_DIR", "/var/lib/certctl/keys"), "Directory for storing private keys")
|
||||
discoveryDirsStr := flag.String("discovery-dirs", getEnvDefault("CERTCTL_DISCOVERY_DIRS", ""), "Comma-separated directories to scan for certificates")
|
||||
caBundlePath := flag.String("ca-bundle", getEnvDefault("CERTCTL_SERVER_CA_BUNDLE_PATH", ""), "Path to a PEM-encoded CA bundle that signed the server's TLS cert (optional; falls back to system roots)")
|
||||
insecureSkipVerify := flag.Bool("insecure-skip-verify", getEnvBoolDefault("CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY", false), "Dev-only: skip TLS certificate verification. Never enable in production. See docs/tls.md.")
|
||||
flag.Parse()
|
||||
|
||||
if *apiKey == "" {
|
||||
@@ -1137,6 +1184,18 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Pre-flight URL-scheme validation — reject plaintext http:// before any
|
||||
// network call. The HTTPS-Everywhere milestone (§2.4, §7) mandates that
|
||||
// mis-configured agents fail loudly at startup with a diagnostic pointing
|
||||
// at the upgrade guide, rather than producing a TCP-refused or
|
||||
// TLS-handshake-error that obscures the actual cause.
|
||||
if err := validateHTTPSScheme(*serverURL); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
fmt.Fprintf(os.Stderr, "\nThe certctl control plane is HTTPS-only as of v2.2.\n")
|
||||
fmt.Fprintf(os.Stderr, "See docs/upgrade-to-tls.md for the cutover walkthrough.\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Set up structured logging
|
||||
logLevel := slog.LevelInfo
|
||||
if getEnvDefault("CERTCTL_LOG_LEVEL", "info") == "debug" {
|
||||
@@ -1165,17 +1224,27 @@ func main() {
|
||||
|
||||
// Create agent configuration
|
||||
agentCfg := &AgentConfig{
|
||||
ServerURL: *serverURL,
|
||||
APIKey: *apiKey,
|
||||
AgentName: *agentName,
|
||||
AgentID: *agentID,
|
||||
Hostname: hostname,
|
||||
KeyDir: *keyDir,
|
||||
DiscoveryDirs: discoveryDirs,
|
||||
ServerURL: *serverURL,
|
||||
APIKey: *apiKey,
|
||||
AgentName: *agentName,
|
||||
AgentID: *agentID,
|
||||
Hostname: hostname,
|
||||
KeyDir: *keyDir,
|
||||
DiscoveryDirs: discoveryDirs,
|
||||
CABundlePath: *caBundlePath,
|
||||
InsecureSkipVerify: *insecureSkipVerify,
|
||||
}
|
||||
|
||||
if agentCfg.InsecureSkipVerify {
|
||||
logger.Warn("TLS certificate verification is disabled (CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY=true) — never enable this in production")
|
||||
}
|
||||
|
||||
// Create and start agent
|
||||
agent := NewAgent(agentCfg, logger)
|
||||
agent, err := NewAgent(agentCfg, logger)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: failed to initialize agent: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Create context with cancellation for graceful shutdown
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
@@ -1233,3 +1302,49 @@ func getEnvDefault(key, defaultValue string) string {
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
// getEnvBoolDefault parses an environment variable as a boolean. Accepts "1",
|
||||
// "t", "true", "T", "TRUE", "True" as true; anything else (including empty)
|
||||
// returns the provided default. Kept permissive on purpose so operators can
|
||||
// flip the dev-only TLS skip-verify toggle with any common truthy spelling
|
||||
// without having to remember exactly what we parse.
|
||||
func getEnvBoolDefault(key string, defaultValue bool) bool {
|
||||
raw := os.Getenv(key)
|
||||
if raw == "" {
|
||||
return defaultValue
|
||||
}
|
||||
switch strings.ToLower(strings.TrimSpace(raw)) {
|
||||
case "1", "t", "true", "yes", "on":
|
||||
return true
|
||||
case "0", "f", "false", "no", "off":
|
||||
return false
|
||||
default:
|
||||
return defaultValue
|
||||
}
|
||||
}
|
||||
|
||||
// validateHTTPSScheme enforces the HTTPS-Everywhere milestone's §7 acceptance
|
||||
// criterion: "Agent with CERTCTL_SERVER_URL=http://... fails at startup with
|
||||
// a fail-loud diagnostic pointing at docs/upgrade-to-tls.md. Not TCP-refused,
|
||||
// not TLS-handshake-error — a pre-flight config validation failure before any
|
||||
// network call." Returns a descriptive error; the caller prints the upgrade
|
||||
// guide pointer and exits non-zero.
|
||||
func validateHTTPSScheme(serverURL string) error {
|
||||
if serverURL == "" {
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL is empty — set it to an https:// URL (e.g., https://certctl-server:8443)")
|
||||
}
|
||||
u, err := url.Parse(serverURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL %q is not a valid URL: %w", serverURL, err)
|
||||
}
|
||||
switch strings.ToLower(u.Scheme) {
|
||||
case "https":
|
||||
return nil
|
||||
case "http":
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL %q uses plaintext http:// — the certctl control plane is HTTPS-only", serverURL)
|
||||
case "":
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL %q is missing a scheme — expected https://", serverURL)
|
||||
default:
|
||||
return fmt.Errorf("CERTCTL_SERVER_URL %q uses unsupported scheme %q — expected https://", serverURL, u.Scheme)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,7 +228,7 @@ func TestReportVerificationResult_Success(t *testing.T) {
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-api-key",
|
||||
}
|
||||
agent := NewAgent(cfg, nil)
|
||||
agent, _ := NewAgent(cfg, nil)
|
||||
|
||||
result := &VerificationResult{
|
||||
ExpectedFingerprint: "abc123",
|
||||
@@ -244,7 +244,7 @@ func TestReportVerificationResult_Success(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestReportVerificationResult_MissingFields(t *testing.T) {
|
||||
agent := NewAgent(&AgentConfig{}, nil)
|
||||
agent, _ := NewAgent(&AgentConfig{}, nil)
|
||||
|
||||
result := &VerificationResult{
|
||||
Verified: true,
|
||||
@@ -343,7 +343,7 @@ func TestReportVerificationResult_ServerError(t *testing.T) {
|
||||
ServerURL: server.URL,
|
||||
APIKey: "test-api-key",
|
||||
}
|
||||
agent := NewAgent(cfg, nil)
|
||||
agent, _ := NewAgent(cfg, nil)
|
||||
|
||||
result := &VerificationResult{
|
||||
ExpectedFingerprint: "abc123",
|
||||
|
||||
Reference in New Issue
Block a user