mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 18:21:32 +00:00
52248be717
Breaking change release. Plaintext HTTP listener removed. The certctl control plane now terminates TLS 1.3 on :8443 via http.Server.ListenAndServeTLS. No CERTCTL_TLS_ENABLED=false escape hatch. No dual-listener mode. One-step cutover per docs/upgrade-to-tls.md. Server - cmd/server/tls.go: certHolder with SIGHUP hot-reload + atomic cert swap, buildServerTLSConfig (TLS 1.3 min, GetCertificate callback), preflightServerTLS validation - cmd/server/main.go: ListenAndServeTLS in place of ListenAndServe, watchSIGHUP wiring, cert/key path config threading - tls_test.go: 418-line regression coverage of reload, preflight, callback behavior, SAN validation Config - CERTCTL_TLS_CERT_PATH / CERTCTL_TLS_KEY_PATH (required) - Plaintext rejection: agents/CLI/MCP pre-flight-fail on http:// URLs with a pointer to docs/upgrade-to-tls.md Agents, CLI, MCP - All three pre-flight-reject http:// URLs with fail-loud diagnostic - CERTCTL_SERVER_CA_BUNDLE_PATH for private-CA trust - CERTCTL_SERVER_TLS_INSECURE_SKIP_VERIFY for dev-only bypass (loud warning on startup) - install-agent.sh emits both vars as commented template lines docker-compose - certctl-tls-init sidecar generates SAN-valid self-signed cert into deploy/test/certs/ on first boot - All demo-stack curls pin against ca.crt with --cacert Helm chart - Three TLS provisioning modes, exactly one required: - server.tls.existingSecret (operator-supplied) - server.tls.certManager.enabled (cert-manager integration) - server.tls.selfSigned.enabled (eval only — not for production) - server-certificate.yaml template for cert-manager mode - helm install without a TLS source fails at template render with a pointer to docs/tls.md CI - .github/workflows/ci.yml Helm Chart Validation step renders the chart in both existingSecret and cert-manager modes, plus an inverse guard-regression test that asserts helm template MUST refuse to render when no TLS source is configured. Previously the single `helm template` invocation hit the certctl.tls.required fail-loud guard and exit-1'd CI. Four invocations now: lint (existingSecret), template (existingSecret), template (cert-manager), template (no args — must fail). Integration tests - deploy/test/integration_test.go stands up the Compose stack over HTTPS, extracts the CA bundle, and exercises every certctl API over https://localhost:8443 - All 34 integration subtests green (per Phase 8 local CI-parity) Documentation - New: docs/tls.md (provisioning patterns, rotation, SIGHUP reload) - New: docs/upgrade-to-tls.md (one-step cutover, no-downgrade warnings, fleet-roll sequencing) - CHANGELOG.md: v2.2.0 "HTTPS Everywhere — The Irony" entry (file heading unchanged; release tag is v2.0.47) - All curls in docs/, examples/, deploy/helm/ guides use https://localhost:8443 --cacert Verification - grep -rn "ListenAndServe[^T]" cmd/ internal/ → 0 hits - grep -rn "\"http://" cmd/ internal/ → 2 benign hits (Caddy admin API default, SSRF doc comment) — zero certctl endpoints - Tasks #197–#206 (Phases 0–8) all closed in the tracker Files: 65 changed, 3489 insertions, 372 deletions (pre-CI-fix).
165 lines
6.6 KiB
Go
165 lines
6.6 KiB
Go
package main
|
|
|
|
import (
|
|
"crypto/tls"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"os/signal"
|
|
"sync"
|
|
"syscall"
|
|
)
|
|
|
|
// certHolder stores the server's TLS certificate under a mutex so it can be
|
|
// swapped atomically by a SIGHUP handler without restarting the server. A
|
|
// *tls.Config that wires GetCertificate → (*certHolder).GetCertificate reads
|
|
// through the holder on every ClientHello, so a successful reload takes
|
|
// effect on the next new connection immediately and without dropping
|
|
// in-flight requests.
|
|
//
|
|
// Concurrency: GetCertificate is invoked from crypto/tls handshake goroutines
|
|
// on every new inbound connection; Reload is invoked from the SIGHUP watcher
|
|
// goroutine. sync.Mutex is sufficient — TLS handshakes are not an inner-loop
|
|
// hot path and the critical section is a single pointer read.
|
|
type certHolder struct {
|
|
mu sync.Mutex
|
|
cert *tls.Certificate
|
|
certPath string
|
|
keyPath string
|
|
}
|
|
|
|
// newCertHolder loads the initial cert+key pair from disk and returns a
|
|
// holder ready to serve handshakes. Returns a non-nil error if either file
|
|
// is missing, unreadable, or the pair does not round-trip through
|
|
// tls.LoadX509KeyPair (for example the key does not sign the cert). The
|
|
// caller is expected to treat a non-nil error as a fail-loud startup gate
|
|
// and os.Exit(1) — the HTTPS-everywhere milestone (§3 locked decisions)
|
|
// prohibits plaintext HTTP fallback.
|
|
func newCertHolder(certPath, keyPath string) (*certHolder, error) {
|
|
cert, err := tls.LoadX509KeyPair(certPath, keyPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("load TLS cert/key (cert=%q key=%q): %w", certPath, keyPath, err)
|
|
}
|
|
return &certHolder{
|
|
cert: &cert,
|
|
certPath: certPath,
|
|
keyPath: keyPath,
|
|
}, nil
|
|
}
|
|
|
|
// GetCertificate is the tls.Config.GetCertificate hook. Returns the current
|
|
// cert under the holder's mutex. ClientHelloInfo is ignored — the control
|
|
// plane does not multiplex by SNI.
|
|
func (h *certHolder) GetCertificate(_ *tls.ClientHelloInfo) (*tls.Certificate, error) {
|
|
h.mu.Lock()
|
|
defer h.mu.Unlock()
|
|
return h.cert, nil
|
|
}
|
|
|
|
// Reload re-reads the cert+key pair from disk and swaps the holder
|
|
// atomically on success. On failure the holder retains its previous cert
|
|
// and the error is propagated to the caller — the SIGHUP watcher logs and
|
|
// keeps serving the previous cert rather than crashing on a bad reload.
|
|
// This is deliberately "fail-safe on reload, fail-loud on startup": an
|
|
// operator rotating certs wants a recoverable error, not a restart loop.
|
|
func (h *certHolder) Reload() error {
|
|
cert, err := tls.LoadX509KeyPair(h.certPath, h.keyPath)
|
|
if err != nil {
|
|
return fmt.Errorf("reload TLS cert/key (cert=%q key=%q): %w", h.certPath, h.keyPath, err)
|
|
}
|
|
h.mu.Lock()
|
|
h.cert = &cert
|
|
h.mu.Unlock()
|
|
return nil
|
|
}
|
|
|
|
// watchSIGHUP installs a signal handler that calls Reload() on each SIGHUP.
|
|
// The returned stop function closes the internal done channel and stops
|
|
// signal delivery so the goroutine can exit cleanly during shutdown. Errors
|
|
// from Reload are logged but do not terminate the watcher — the operator
|
|
// can fix the files and send another SIGHUP.
|
|
//
|
|
// Defensive design note: this deliberately does NOT panic on Reload error
|
|
// even though HTTPS is mission-critical. A rotation that writes half-files
|
|
// (operator overwrites cert.pem then key.pem as two separate copies) would
|
|
// otherwise crash the server mid-rotation. Logging + retaining the old
|
|
// cert gives the operator a bounded window to fix and re-SIGHUP.
|
|
func (h *certHolder) watchSIGHUP(logger *slog.Logger) (stop func()) {
|
|
ch := make(chan os.Signal, 1)
|
|
signal.Notify(ch, syscall.SIGHUP)
|
|
done := make(chan struct{})
|
|
go func() {
|
|
for {
|
|
select {
|
|
case <-ch:
|
|
if err := h.Reload(); err != nil {
|
|
logger.Error("TLS cert reload failed; continuing with previous cert",
|
|
"error", err,
|
|
"cert_path", h.certPath,
|
|
"key_path", h.keyPath)
|
|
continue
|
|
}
|
|
logger.Info("TLS cert reloaded via SIGHUP",
|
|
"cert_path", h.certPath,
|
|
"key_path", h.keyPath)
|
|
case <-done:
|
|
signal.Stop(ch)
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
return func() { close(done) }
|
|
}
|
|
|
|
// buildServerTLSConfig returns the TLS 1.3-only *tls.Config for the HTTPS
|
|
// server. Pinned per HTTPS-everywhere milestone §2.1 + §3 locked decisions:
|
|
//
|
|
// - MinVersion: TLS 1.3 (no TLS 1.2 escape hatch). Go 1.25's crypto/tls
|
|
// automatically rejects older versions.
|
|
// - CurvePreferences: explicit [X25519, P-256]. Explicit ordering keeps
|
|
// the handshake deterministic and documents the accepted curves.
|
|
// - No CipherSuites field: TLS 1.3 cipher suites are not negotiable in
|
|
// the handshake (all three mandatory suites — AES-128-GCM-SHA256,
|
|
// AES-256-GCM-SHA384, CHACHA20-POLY1305-SHA256 — are always offered).
|
|
// Go's crypto/tls ignores CipherSuites for TLS 1.3.
|
|
// - GetCertificate: reads through the holder so SIGHUP rotations take
|
|
// effect on the next new connection without a restart. Setting
|
|
// tls.Config.Certificates directly would pin the first-loaded cert
|
|
// and defeat SIGHUP reload.
|
|
func buildServerTLSConfig(holder *certHolder) *tls.Config {
|
|
return &tls.Config{
|
|
MinVersion: tls.VersionTLS13,
|
|
CurvePreferences: []tls.CurveID{tls.X25519, tls.CurveP256},
|
|
GetCertificate: holder.GetCertificate,
|
|
}
|
|
}
|
|
|
|
// preflightServerTLS is the fail-loud startup gate for HTTPS. Returns a
|
|
// non-nil error when the TLS configuration is missing or the cert+key pair
|
|
// cannot be parsed, so the caller refuses to start the control plane
|
|
// (HTTPS-everywhere §3 locked decisions: no plaintext HTTP fallback).
|
|
//
|
|
// Duplicates the emptiness + stat + parse checks in config.Validate() for
|
|
// defense in depth, mirroring the pattern established by
|
|
// preflightSCEPChallengePassword (which itself duplicates
|
|
// config.Validate()'s SCEP check for CWE-306). Extracted into a separate
|
|
// function so the gate is unit-testable without booting the full server.
|
|
func preflightServerTLS(certPath, keyPath string) error {
|
|
if certPath == "" {
|
|
return fmt.Errorf("CERTCTL_SERVER_TLS_CERT_PATH is empty: HTTPS-only control plane refuses to start (see docs/tls.md)")
|
|
}
|
|
if keyPath == "" {
|
|
return fmt.Errorf("CERTCTL_SERVER_TLS_KEY_PATH is empty: HTTPS-only control plane refuses to start (see docs/tls.md)")
|
|
}
|
|
if _, err := os.Stat(certPath); err != nil {
|
|
return fmt.Errorf("TLS cert file %q unreadable: %w (see docs/tls.md)", certPath, err)
|
|
}
|
|
if _, err := os.Stat(keyPath); err != nil {
|
|
return fmt.Errorf("TLS key file %q unreadable: %w (see docs/tls.md)", keyPath, err)
|
|
}
|
|
if _, err := tls.LoadX509KeyPair(certPath, keyPath); err != nil {
|
|
return fmt.Errorf("TLS cert/key pair invalid (cert=%q key=%q): %w (see docs/tls.md)", certPath, keyPath, err)
|
|
}
|
|
return nil
|
|
}
|