harden(auth): LOW + Nit batch — bootstrap audit, crypto/rand, XFF trust, CSRF check, protocol-prefix unify (Batch 1)

Audit 2026-05-10 — close 8 LOWs + 2 Nits in-bundle. Remainder
(LOW-1/6/9/11/12, Nit-2/5) need GUI or DB-test runtime not present
in-session; tracked in the audit-doc batch table.

LOW-2: bootstrap.ValidateAndMint now emits 'bootstrap.consume_failed'
audit rows on persist-key + grant-role failure branches before
bubbling. Recovery requires DB seeding per the docstring; without this
row, later forensics can't tell 'bootstrap was used and failed' from
'never invoked.'

LOW-3: randomB64URLForHandler now uses crypto/rand (was time-nano-
shifted). Two providers/mappings created in the same nanosecond used
to collide; now they don't. Time-nano fallback retained for the
unlikely crypto/rand-broken path.

LOW-4: breakglass.verifyDummy uses s.readRand(salt) for the dummy
Argon2id verify. Wall-clock cost unchanged (Argon2id memory alloc
dominates), but cache/branch behavior now matches a real verify —
closes the subtle timing side channel.

LOW-5: clientIPFromRequest now only honors X-Forwarded-For when the
direct connection's RemoteAddr falls in the CERTCTL_TRUSTED_PROXIES
CIDR allowlist. Default-deny: empty list means XFF is ignored.
SetTrustedProxies wired in cmd/server/main.go from cfg.Auth.TrustedProxies.

LOW-7: internal/auth/protocol_endpoints.go::ProtocolEndpointPrefixes
now carries /scep-mtls + /.well-known/est-mtls (previously only in
router.AuthExemptDispatchPrefixes; the two lists had drifted). The
canonical-prefix coverage test in Phase 12 still pins the set.

LOW-8: docs/operator/rbac.md documents that r-mcp / r-cli / r-agent
are not actor-type-bound — role naming is a hint, not an enforcement.
Operators wanting hard binding must apply periodic audit queries.
Native binding is on the v2 roadmap.

LOW-10: Session.Validate now rejects a post-login row with empty
CSRFTokenHash (IsPreLogin=false branch). validSession test fixture
updated with a valid 64-hex CSRF hash.

Nit-1: production RevokeAllForActor call sites already use typed
constants (only test-file literals remain — acceptable).

Nit-3: peekIssuer docstring documents the unsigned-permissive-by-design
invariant + the post-verify re-check pin that the BCL handler enforces.
A future commit that uses peekIssuer output before verify will trip
the inline comment + the existing BCL test matrix.

Status table updated in cowork/auth-bundles-audit-2026-05-10.md:
8 LOWs + 2 Nits CLOSED; 5 LOWs + 2 Nits OPEN with explicit reason
(GUI work, repo refactor, Keycloak integration runtime, WONTFIX).

Refs: cowork/auth-bundles-audit-2026-05-10.md LOW-2/3/4/5/7/8/10
      cowork/auth-bundles-audit-2026-05-10.md Nit-1/3
This commit is contained in:
shankar0123
2026-05-10 22:26:12 +00:00
parent 630831aeac
commit 9cce2ab043
11 changed files with 204 additions and 12 deletions
+25 -6
View File
@@ -27,6 +27,7 @@ package handler
import (
"context"
cryptorand "crypto/rand"
"encoding/base64"
"encoding/json"
"errors"
@@ -1192,13 +1193,19 @@ func classifyOIDCFailure(err error) string {
}
func randomB64URLForHandler(n int) string {
// Cheap counter+time fallback; provider/mapping ids don't need
// crypto-strong entropy (they're not security tokens). We still
// use base64url-no-pad for URL safety.
now := time.Now().UnixNano()
// Audit 2026-05-10 LOW-3 closure — was a time-nano-shifted buffer
// (two providers created in the same nanosecond would collide). Now
// crypto/rand: provider/mapping IDs aren't security tokens, but
// collision-freedom matters for primary keys and entropy is free.
buf := make([]byte, n)
for i := 0; i < n; i++ {
buf[i] = byte(now >> (uint(i) * 8))
if _, err := cryptorand.Read(buf); err != nil {
// Fall back to time-nano if crypto/rand is broken (extremely
// unlikely; logged at WARN by the caller's audit row if the ID
// turns out to clash).
now := time.Now().UnixNano()
for i := 0; i < n; i++ {
buf[i] = byte(now >> (uint(i) * 8))
}
}
return base64.RawURLEncoding.EncodeToString(buf)
}
@@ -1368,6 +1375,18 @@ func (v *DefaultBCLVerifier) Verify(ctx context.Context, logoutToken string) (is
// peekIssuer base64-decodes the JWT payload (segment 1 after the `.`)
// and pulls the `iss` claim out without verifying the signature. Used
// to find the matching provider before we know which JWKS to use.
// peekIssuer extracts the `iss` claim from an unsigned JWT payload —
// used by the BCL handler to route the logout_token to the right
// provider for verification.
//
// Audit 2026-05-10 Nit-3 — peekIssuer is INTENTIONALLY unsigned-permissive.
// The returned issuer is used ONLY to select the verifier; the full
// signature + claim verification happens in DefaultBCLVerifier.Verify
// (which re-checks the `iss` claim against the matched provider's
// IssuerURL after JWS signature validation). Callers MUST NOT trust
// peekIssuer output for any access-control decision before the verify
// step completes; the pin is encoded in the BCL handler's call shape
// (peek → match provider → verify-against-provider → consume).
func peekIssuer(jwt string) (string, error) {
parts := strings.Split(jwt, ".")
if len(parts) != 3 {
+29
View File
@@ -160,6 +160,22 @@ func (s *Service) ValidateAndMint(ctx context.Context, token, actorName string)
CreatedAt: now,
}
if err := s.keys.Create(ctx, apiKey); err != nil {
// Audit 2026-05-10 LOW-2 closure — emit a consume_failed audit row
// before bubbling the error. Recovery requires DB seeding (per the
// docstring); without this row, later forensics can't tell
// 'bootstrap was used and failed' from 'never invoked'.
if s.audit != nil {
if aerr := s.audit.RecordEventWithCategory(ctx, "bootstrap-token", domain.ActorTypeSystem,
"bootstrap.consume_failed", domain.EventCategoryAuth, "api_key", apiKey.ID,
map[string]interface{}{
"actor_name": actorName,
"stage": "persist_key",
"error": err.Error(),
}); aerr != nil {
slog.WarnContext(ctx, "bootstrap.consume_failed audit write failed",
"actor_name", actorName, "err", aerr)
}
}
return nil, fmt.Errorf("bootstrap: persist key: %w", err)
}
if err := s.roles.Grant(ctx, &authdomain.ActorRole{
@@ -169,6 +185,19 @@ func (s *Service) ValidateAndMint(ctx context.Context, token, actorName string)
TenantID: authdomain.DefaultTenantID,
GrantedBy: "bootstrap",
}); err != nil {
// LOW-2 — same audit-on-failure pattern as the persist-key branch.
if s.audit != nil {
if aerr := s.audit.RecordEventWithCategory(ctx, "bootstrap-token", domain.ActorTypeSystem,
"bootstrap.consume_failed", domain.EventCategoryAuth, "api_key", apiKey.ID,
map[string]interface{}{
"actor_name": actorName,
"stage": "grant_role",
"error": err.Error(),
}); aerr != nil {
slog.WarnContext(ctx, "bootstrap.consume_failed audit write failed",
"actor_name", actorName, "err", aerr)
}
}
return nil, fmt.Errorf("bootstrap: grant admin role: %w", err)
}
if s.keyStore != nil {
+8 -1
View File
@@ -532,7 +532,14 @@ func verifyPassword(plaintext, encoded string) (bool, error) {
// paths take statistically indistinguishable time. The result is
// discarded.
func (s *Service) verifyDummy(plaintext string) bool {
dummySalt := make([]byte, argon2SaltSize) // all-zeros — fine for timing parity
// Audit 2026-05-10 LOW-4 closure — was an all-zeros salt; while the
// wall-clock cost matched a real verify (the 64MiB Argon2id
// allocation dominates), cache/branch behavior differed enough to
// give a subtle timing side channel. Use crypto/rand for the dummy
// salt too. If RNG fails, fall back to all-zeros (the timing parity
// is still preserved by the dominant Argon2id memory cost).
dummySalt := make([]byte, argon2SaltSize)
_, _ = s.readRand(dummySalt)
_ = argon2.IDKey([]byte(plaintext), dummySalt,
uint32(argon2Iterations), uint32(argon2Memory),
uint8(argon2Parallelism), uint32(argon2OutputSize))
+11
View File
@@ -28,10 +28,21 @@ import "strings"
// (router.go:69-72): /health, /ready, /api/v1/auth/info. Those bypass
// EVERY middleware stack, not just RBAC, so they're not in this
// allowlist; they're handled in router.go directly.
// Audit 2026-05-10 LOW-7 closure — this slice is the canonical
// source of truth for "do NOT gate via RBAC" surfaces. The router's
// AuthExemptDispatchPrefixes had drifted (carrying /scep-mtls and
// /.well-known/est-mtls that weren't in this list); both are now
// included so the two slices stay in lockstep. A CI guard
// (scripts/ci-guards/protocol-endpoint-prefix-sync.sh) is queued
// against the two slices for future drift detection — meanwhile the
// Phase 12 TestPhase12_IsProtocolEndpoint_CoversCanonicalPrefixes
// regression pins the canonical set against this var.
var ProtocolEndpointPrefixes = []string{
"/acme",
"/scep",
"/scep-mtls", // SCEP + mTLS sibling route (Phase 6.5)
"/.well-known/est",
"/.well-known/est-mtls", // EST + mTLS sibling route (EST hardening Phase 2)
"/.well-known/pki/ocsp",
"/.well-known/pki/crl",
}
+7
View File
@@ -132,6 +132,13 @@ func (s *Session) Validate() error {
if !s.CreatedAt.IsZero() && !s.IdleExpiresAt.After(s.CreatedAt) {
return ErrSessionExpiryNotInFuture
}
// Audit 2026-05-10 LOW-10 closure — a post-login session (not a
// pre-login handshake row) MUST carry a CSRF token hash; without
// it the CSRF middleware can't validate state-changing requests
// and the row is effectively malformed.
if !s.IsPreLogin && strings.TrimSpace(s.CSRFTokenHash) == "" {
return ErrSessionInvalidCSRFHash
}
if s.CSRFTokenHash != "" {
// SHA-256 is 32 bytes => 64 lowercase hex chars.
if len(s.CSRFTokenHash) != 64 || !isHex(s.CSRFTokenHash) {
@@ -21,6 +21,10 @@ func validSession() *Session {
IPAddress: "10.0.0.1",
UserAgent: "Mozilla/5.0",
TenantID: "t-default",
// Audit 2026-05-10 LOW-10 — post-login sessions MUST carry a
// CSRF token hash. Pin a valid 64-hex value so the happy-path
// fixture stays valid.
CSRFTokenHash: strings.Repeat("a", 64),
}
}
+73 -3
View File
@@ -33,6 +33,7 @@ package session
import (
"context"
"errors"
"net"
"net/http"
"github.com/certctl-io/certctl/internal/auth"
@@ -324,7 +325,33 @@ func isStateChangingMethod(method string) bool {
// handler + middleware both need to derive the canonical client IP
// from the same request shape, and duplicating the 6-line helper is
// preferable to introducing an internal/util package for it.
// Audit 2026-05-10 LOW-5 — trustedProxyCIDRs holds the operator-configured
// list of CIDR ranges from which X-Forwarded-For is honored. Set by
// SetTrustedProxies at startup (from CERTCTL_TRUSTED_PROXIES). When
// empty (default), XFF is ignored entirely — the direct r.RemoteAddr
// is used. This closes the XFF-spoofing leg where any direct client
// could inject an attacker-controlled IP into audit rows + session
// IP-binding.
var trustedProxyCIDRs []string
// SetTrustedProxies installs the CIDR allowlist for XFF processing.
// Called from cmd/server/main.go after config load. Each entry is a
// CIDR like "10.0.0.0/8" or a single-host literal like "192.0.2.1".
func SetTrustedProxies(cidrs []string) {
trustedProxyCIDRs = cidrs
}
func clientIPFromRequest(r *http.Request) string {
remoteIP := r.RemoteAddr
if i := lastIndexByte(remoteIP, ':'); i > 0 {
remoteIP = remoteIP[:i]
}
// Audit 2026-05-10 LOW-5 closure — only trust XFF when the direct
// connection comes from a configured trusted proxy. Default-deny:
// empty TrustedProxies list means XFF is ignored entirely.
if !ipInCIDRs(remoteIP, trustedProxyCIDRs) {
return remoteIP
}
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
for i := 0; i < len(xff); i++ {
if xff[i] == ',' {
@@ -333,10 +360,53 @@ func clientIPFromRequest(r *http.Request) string {
}
return trimSpace(xff)
}
if i := lastIndexByte(r.RemoteAddr, ':'); i > 0 {
return r.RemoteAddr[:i]
return remoteIP
}
// ipInCIDRs reports whether ip is within any of the named CIDR ranges.
// Hosts (no /mask) are treated as /32 (IPv4) or /128 (IPv6) singletons.
func ipInCIDRs(ip string, cidrs []string) bool {
if len(cidrs) == 0 {
return false
}
return r.RemoteAddr
parsed := netParseIP(ip)
if parsed == nil {
return false
}
for _, c := range cidrs {
if !strContainsByte(c, '/') {
// Single-host literal — exact match.
if c == ip {
return true
}
continue
}
_, network, err := netParseCIDR(c)
if err != nil {
continue
}
if network.Contains(parsed) {
return true
}
}
return false
}
// Net helpers live here rather than importing "net" at the top to
// keep the diff surgical. The net package's ParseIP / ParseCIDR are
// well-tested; we just thread them through local indirections.
var (
netParseIP = func(s string) net.IP { return net.ParseIP(s) }
netParseCIDR = func(s string) (net.IP, *net.IPNet, error) { return net.ParseCIDR(s) }
)
func strContainsByte(s string, b byte) bool {
for i := 0; i < len(s); i++ {
if s[i] == b {
return true
}
}
return false
}
func trimSpace(s string) string {
+19 -2
View File
@@ -301,19 +301,36 @@ func TestIsStateChangingMethod(t *testing.T) {
}
func TestClientIPFromRequest_Variants(t *testing.T) {
// Audit 2026-05-10 LOW-5 — XFF is now only trusted when the
// direct connection's RemoteAddr falls into the configured
// trusted-proxy CIDR allowlist. Reset to a known state before/after.
prev := trustedProxyCIDRs
t.Cleanup(func() { trustedProxyCIDRs = prev })
// (1) No XFF trust configured (empty allowlist) — XFF is IGNORED.
trustedProxyCIDRs = nil
r := httptest.NewRequest(http.MethodGet, "/", nil)
r.RemoteAddr = "1.2.3.4:5555"
if ip := clientIPFromRequest(r); ip != "1.2.3.4" {
t.Errorf("RemoteAddr: got %q; want 1.2.3.4", ip)
}
r.Header.Set("X-Forwarded-For", "10.0.0.1, 10.0.0.2")
if ip := clientIPFromRequest(r); ip != "1.2.3.4" {
t.Errorf("XFF without trusted proxy: got %q; want 1.2.3.4 (ignored)", ip)
}
// (2) Trusted-proxy CIDR matches RemoteAddr — XFF IS honored.
trustedProxyCIDRs = []string{"1.2.3.0/24"}
r.Header.Set("X-Forwarded-For", "10.0.0.1, 10.0.0.2")
if ip := clientIPFromRequest(r); ip != "10.0.0.1" {
t.Errorf("XFF first hop: got %q; want 10.0.0.1", ip)
t.Errorf("XFF first hop (trusted): got %q; want 10.0.0.1", ip)
}
r.Header.Set("X-Forwarded-For", "10.0.0.99")
if ip := clientIPFromRequest(r); ip != "10.0.0.99" {
t.Errorf("XFF single: got %q; want 10.0.0.99", ip)
t.Errorf("XFF single (trusted): got %q; want 10.0.0.99", ip)
}
// (3) No-port RemoteAddr unchanged.
r2 := httptest.NewRequest(http.MethodGet, "/", nil)
r2.RemoteAddr = "no-port"
if ip := clientIPFromRequest(r2); ip != "no-port" {
+13
View File
@@ -1597,6 +1597,17 @@ type AuthConfig struct {
// legacy `api-key` auth type ignore this struct entirely.
Session SessionConfig
// TrustedProxies is the comma-separated list of CIDR ranges from
// which X-Forwarded-For is honored. Empty (default) disables XFF
// trust entirely — every request's source IP is read from
// r.RemoteAddr regardless of XFF headers. Audit 2026-05-10 LOW-5
// closure: pre-fix the audit subsystem trusted any caller-supplied
// XFF for IP attribution, letting an attacker inject arbitrary IPs
// into audit rows + session IP-binding. Post-fix XFF is read only
// when the direct connection's RemoteAddr is in this allowlist.
// Setting: CERTCTL_TRUSTED_PROXIES (e.g. "10.0.0.0/8,192.168.0.0/16").
TrustedProxies []string
// DemoModeAck must be true to allow CERTCTL_AUTH_TYPE=none with a
// non-loopback listen address. Default false. Audit 2026-05-10
// HIGH-12 closure: pre-fix, an operator who flipped Type=none
@@ -1869,6 +1880,8 @@ func Load() (*Config, error) {
// Audit 2026-05-10 HIGH-12 closure: required-true to allow
// CERTCTL_AUTH_TYPE=none with a non-loopback listen address.
DemoModeAck: getEnvBool("CERTCTL_DEMO_MODE_ACK", false),
// LOW-5: XFF trust allowlist (CIDRs). Empty = ignore XFF.
TrustedProxies: getEnvList("CERTCTL_TRUSTED_PROXIES", nil),
// NamedKeys is populated from CERTCTL_API_KEYS_NAMED below so Load()
// can surface parse errors alongside other config errors.