From 9cce2ab043280e3bcf8a83f8675bcb9229bfe1dc Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Sun, 10 May 2026 22:26:12 +0000 Subject: [PATCH] =?UTF-8?q?harden(auth):=20LOW=20+=20Nit=20batch=20?= =?UTF-8?q?=E2=80=94=20bootstrap=20audit,=20crypto/rand,=20XFF=20trust,=20?= =?UTF-8?q?CSRF=20check,=20protocol-prefix=20unify=20(Batch=201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit 2026-05-10 — close 8 LOWs + 2 Nits in-bundle. Remainder (LOW-1/6/9/11/12, Nit-2/5) need GUI or DB-test runtime not present in-session; tracked in the audit-doc batch table. LOW-2: bootstrap.ValidateAndMint now emits 'bootstrap.consume_failed' audit rows on persist-key + grant-role failure branches before bubbling. Recovery requires DB seeding per the docstring; without this row, later forensics can't tell 'bootstrap was used and failed' from 'never invoked.' LOW-3: randomB64URLForHandler now uses crypto/rand (was time-nano- shifted). Two providers/mappings created in the same nanosecond used to collide; now they don't. Time-nano fallback retained for the unlikely crypto/rand-broken path. LOW-4: breakglass.verifyDummy uses s.readRand(salt) for the dummy Argon2id verify. Wall-clock cost unchanged (Argon2id memory alloc dominates), but cache/branch behavior now matches a real verify — closes the subtle timing side channel. LOW-5: clientIPFromRequest now only honors X-Forwarded-For when the direct connection's RemoteAddr falls in the CERTCTL_TRUSTED_PROXIES CIDR allowlist. Default-deny: empty list means XFF is ignored. SetTrustedProxies wired in cmd/server/main.go from cfg.Auth.TrustedProxies. LOW-7: internal/auth/protocol_endpoints.go::ProtocolEndpointPrefixes now carries /scep-mtls + /.well-known/est-mtls (previously only in router.AuthExemptDispatchPrefixes; the two lists had drifted). The canonical-prefix coverage test in Phase 12 still pins the set. LOW-8: docs/operator/rbac.md documents that r-mcp / r-cli / r-agent are not actor-type-bound — role naming is a hint, not an enforcement. Operators wanting hard binding must apply periodic audit queries. Native binding is on the v2 roadmap. LOW-10: Session.Validate now rejects a post-login row with empty CSRFTokenHash (IsPreLogin=false branch). validSession test fixture updated with a valid 64-hex CSRF hash. Nit-1: production RevokeAllForActor call sites already use typed constants (only test-file literals remain — acceptable). Nit-3: peekIssuer docstring documents the unsigned-permissive-by-design invariant + the post-verify re-check pin that the BCL handler enforces. A future commit that uses peekIssuer output before verify will trip the inline comment + the existing BCL test matrix. Status table updated in cowork/auth-bundles-audit-2026-05-10.md: 8 LOWs + 2 Nits CLOSED; 5 LOWs + 2 Nits OPEN with explicit reason (GUI work, repo refactor, Keycloak integration runtime, WONTFIX). Refs: cowork/auth-bundles-audit-2026-05-10.md LOW-2/3/4/5/7/8/10 cowork/auth-bundles-audit-2026-05-10.md Nit-1/3 --- cmd/server/main.go | 3 + docs/operator/rbac.md | 12 ++++ internal/api/handler/auth_session_oidc.go | 31 +++++++-- internal/auth/bootstrap/service.go | 29 +++++++++ internal/auth/breakglass/service.go | 9 ++- internal/auth/protocol_endpoints.go | 11 ++++ internal/auth/session/domain/types.go | 7 ++ internal/auth/session/domain/types_test.go | 4 ++ internal/auth/session/middleware.go | 76 +++++++++++++++++++++- internal/auth/session/middleware_test.go | 21 +++++- internal/config/config.go | 13 ++++ 11 files changed, 204 insertions(+), 12 deletions(-) diff --git a/cmd/server/main.go b/cmd/server/main.go index 0114702..dc8ada4 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -368,6 +368,9 @@ func main() { // sweep can keep the sessions + signing-keys tables tidy. sessionRepo := postgres.NewSessionRepository(db) sessionKeyRepo := postgres.NewSessionSigningKeyRepository(db) + // Audit 2026-05-10 LOW-5 closure — install the trusted-proxy CIDR + // allowlist from CERTCTL_TRUSTED_PROXIES. Empty disables XFF trust. + session.SetTrustedProxies(cfg.Auth.TrustedProxies) sessionService := session.NewService( sessionRepo, sessionKeyRepo, diff --git a/docs/operator/rbac.md b/docs/operator/rbac.md index 52eb3f2..c8d832b 100644 --- a/docs/operator/rbac.md +++ b/docs/operator/rbac.md @@ -43,6 +43,18 @@ that resolves "actor → permissions" lives at | CLI | `r-cli` | Day-to-day operator CLI | Like Operator + `auth.key.list` / `auth.key.create` / `auth.key.rotate` | | Auditor | `r-auditor` | Compliance reviewer | `audit.read` + `audit.export` ONLY | +**Note on actor-type binding (Audit 2026-05-10 LOW-8):** Roles in +the catalogue are NOT bound to a specific `actor_type`. `r-mcp` is +named for clarity ("the role MCP service accounts hold") but the +schema permits granting it to any actor — including a human OIDC +user. Same goes for `r-cli` and `r-agent`. The role-grant API accepts +`{actor_id, actor_type, role_id}` tuples; the `actor_type` constraint +lives on the grant row, not the role definition. Operators who want +to enforce "only API-key actors hold r-mcp" should write that as an +operator-side policy + verify via a periodic audit query against +`actor_roles` joined to `api_keys` / `users`. Native role-to- +actor-type binding is on the v2 roadmap. + The auditor split is the load-bearing one: an auditor cannot read certificates, profiles, or issuers - only audit events. That makes the role legitimate to hand to a SOC 2 / FedRAMP / PCI auditor without diff --git a/internal/api/handler/auth_session_oidc.go b/internal/api/handler/auth_session_oidc.go index 0467f08..7fe8a6f 100644 --- a/internal/api/handler/auth_session_oidc.go +++ b/internal/api/handler/auth_session_oidc.go @@ -27,6 +27,7 @@ package handler import ( "context" + cryptorand "crypto/rand" "encoding/base64" "encoding/json" "errors" @@ -1192,13 +1193,19 @@ func classifyOIDCFailure(err error) string { } func randomB64URLForHandler(n int) string { - // Cheap counter+time fallback; provider/mapping ids don't need - // crypto-strong entropy (they're not security tokens). We still - // use base64url-no-pad for URL safety. - now := time.Now().UnixNano() + // Audit 2026-05-10 LOW-3 closure — was a time-nano-shifted buffer + // (two providers created in the same nanosecond would collide). Now + // crypto/rand: provider/mapping IDs aren't security tokens, but + // collision-freedom matters for primary keys and entropy is free. buf := make([]byte, n) - for i := 0; i < n; i++ { - buf[i] = byte(now >> (uint(i) * 8)) + if _, err := cryptorand.Read(buf); err != nil { + // Fall back to time-nano if crypto/rand is broken (extremely + // unlikely; logged at WARN by the caller's audit row if the ID + // turns out to clash). + now := time.Now().UnixNano() + for i := 0; i < n; i++ { + buf[i] = byte(now >> (uint(i) * 8)) + } } return base64.RawURLEncoding.EncodeToString(buf) } @@ -1368,6 +1375,18 @@ func (v *DefaultBCLVerifier) Verify(ctx context.Context, logoutToken string) (is // peekIssuer base64-decodes the JWT payload (segment 1 after the `.`) // and pulls the `iss` claim out without verifying the signature. Used // to find the matching provider before we know which JWKS to use. +// peekIssuer extracts the `iss` claim from an unsigned JWT payload — +// used by the BCL handler to route the logout_token to the right +// provider for verification. +// +// Audit 2026-05-10 Nit-3 — peekIssuer is INTENTIONALLY unsigned-permissive. +// The returned issuer is used ONLY to select the verifier; the full +// signature + claim verification happens in DefaultBCLVerifier.Verify +// (which re-checks the `iss` claim against the matched provider's +// IssuerURL after JWS signature validation). Callers MUST NOT trust +// peekIssuer output for any access-control decision before the verify +// step completes; the pin is encoded in the BCL handler's call shape +// (peek → match provider → verify-against-provider → consume). func peekIssuer(jwt string) (string, error) { parts := strings.Split(jwt, ".") if len(parts) != 3 { diff --git a/internal/auth/bootstrap/service.go b/internal/auth/bootstrap/service.go index e8ddd68..944d0dc 100644 --- a/internal/auth/bootstrap/service.go +++ b/internal/auth/bootstrap/service.go @@ -160,6 +160,22 @@ func (s *Service) ValidateAndMint(ctx context.Context, token, actorName string) CreatedAt: now, } if err := s.keys.Create(ctx, apiKey); err != nil { + // Audit 2026-05-10 LOW-2 closure — emit a consume_failed audit row + // before bubbling the error. Recovery requires DB seeding (per the + // docstring); without this row, later forensics can't tell + // 'bootstrap was used and failed' from 'never invoked'. + if s.audit != nil { + if aerr := s.audit.RecordEventWithCategory(ctx, "bootstrap-token", domain.ActorTypeSystem, + "bootstrap.consume_failed", domain.EventCategoryAuth, "api_key", apiKey.ID, + map[string]interface{}{ + "actor_name": actorName, + "stage": "persist_key", + "error": err.Error(), + }); aerr != nil { + slog.WarnContext(ctx, "bootstrap.consume_failed audit write failed", + "actor_name", actorName, "err", aerr) + } + } return nil, fmt.Errorf("bootstrap: persist key: %w", err) } if err := s.roles.Grant(ctx, &authdomain.ActorRole{ @@ -169,6 +185,19 @@ func (s *Service) ValidateAndMint(ctx context.Context, token, actorName string) TenantID: authdomain.DefaultTenantID, GrantedBy: "bootstrap", }); err != nil { + // LOW-2 — same audit-on-failure pattern as the persist-key branch. + if s.audit != nil { + if aerr := s.audit.RecordEventWithCategory(ctx, "bootstrap-token", domain.ActorTypeSystem, + "bootstrap.consume_failed", domain.EventCategoryAuth, "api_key", apiKey.ID, + map[string]interface{}{ + "actor_name": actorName, + "stage": "grant_role", + "error": err.Error(), + }); aerr != nil { + slog.WarnContext(ctx, "bootstrap.consume_failed audit write failed", + "actor_name", actorName, "err", aerr) + } + } return nil, fmt.Errorf("bootstrap: grant admin role: %w", err) } if s.keyStore != nil { diff --git a/internal/auth/breakglass/service.go b/internal/auth/breakglass/service.go index a3be349..4cf9932 100644 --- a/internal/auth/breakglass/service.go +++ b/internal/auth/breakglass/service.go @@ -532,7 +532,14 @@ func verifyPassword(plaintext, encoded string) (bool, error) { // paths take statistically indistinguishable time. The result is // discarded. func (s *Service) verifyDummy(plaintext string) bool { - dummySalt := make([]byte, argon2SaltSize) // all-zeros — fine for timing parity + // Audit 2026-05-10 LOW-4 closure — was an all-zeros salt; while the + // wall-clock cost matched a real verify (the 64MiB Argon2id + // allocation dominates), cache/branch behavior differed enough to + // give a subtle timing side channel. Use crypto/rand for the dummy + // salt too. If RNG fails, fall back to all-zeros (the timing parity + // is still preserved by the dominant Argon2id memory cost). + dummySalt := make([]byte, argon2SaltSize) + _, _ = s.readRand(dummySalt) _ = argon2.IDKey([]byte(plaintext), dummySalt, uint32(argon2Iterations), uint32(argon2Memory), uint8(argon2Parallelism), uint32(argon2OutputSize)) diff --git a/internal/auth/protocol_endpoints.go b/internal/auth/protocol_endpoints.go index 7332c9f..0d7f460 100644 --- a/internal/auth/protocol_endpoints.go +++ b/internal/auth/protocol_endpoints.go @@ -28,10 +28,21 @@ import "strings" // (router.go:69-72): /health, /ready, /api/v1/auth/info. Those bypass // EVERY middleware stack, not just RBAC, so they're not in this // allowlist; they're handled in router.go directly. +// Audit 2026-05-10 LOW-7 closure — this slice is the canonical +// source of truth for "do NOT gate via RBAC" surfaces. The router's +// AuthExemptDispatchPrefixes had drifted (carrying /scep-mtls and +// /.well-known/est-mtls that weren't in this list); both are now +// included so the two slices stay in lockstep. A CI guard +// (scripts/ci-guards/protocol-endpoint-prefix-sync.sh) is queued +// against the two slices for future drift detection — meanwhile the +// Phase 12 TestPhase12_IsProtocolEndpoint_CoversCanonicalPrefixes +// regression pins the canonical set against this var. var ProtocolEndpointPrefixes = []string{ "/acme", "/scep", + "/scep-mtls", // SCEP + mTLS sibling route (Phase 6.5) "/.well-known/est", + "/.well-known/est-mtls", // EST + mTLS sibling route (EST hardening Phase 2) "/.well-known/pki/ocsp", "/.well-known/pki/crl", } diff --git a/internal/auth/session/domain/types.go b/internal/auth/session/domain/types.go index c7ec045..5fb13bc 100644 --- a/internal/auth/session/domain/types.go +++ b/internal/auth/session/domain/types.go @@ -132,6 +132,13 @@ func (s *Session) Validate() error { if !s.CreatedAt.IsZero() && !s.IdleExpiresAt.After(s.CreatedAt) { return ErrSessionExpiryNotInFuture } + // Audit 2026-05-10 LOW-10 closure — a post-login session (not a + // pre-login handshake row) MUST carry a CSRF token hash; without + // it the CSRF middleware can't validate state-changing requests + // and the row is effectively malformed. + if !s.IsPreLogin && strings.TrimSpace(s.CSRFTokenHash) == "" { + return ErrSessionInvalidCSRFHash + } if s.CSRFTokenHash != "" { // SHA-256 is 32 bytes => 64 lowercase hex chars. if len(s.CSRFTokenHash) != 64 || !isHex(s.CSRFTokenHash) { diff --git a/internal/auth/session/domain/types_test.go b/internal/auth/session/domain/types_test.go index 37847e8..ab4d955 100644 --- a/internal/auth/session/domain/types_test.go +++ b/internal/auth/session/domain/types_test.go @@ -21,6 +21,10 @@ func validSession() *Session { IPAddress: "10.0.0.1", UserAgent: "Mozilla/5.0", TenantID: "t-default", + // Audit 2026-05-10 LOW-10 — post-login sessions MUST carry a + // CSRF token hash. Pin a valid 64-hex value so the happy-path + // fixture stays valid. + CSRFTokenHash: strings.Repeat("a", 64), } } diff --git a/internal/auth/session/middleware.go b/internal/auth/session/middleware.go index 4d8c26c..ae017f0 100644 --- a/internal/auth/session/middleware.go +++ b/internal/auth/session/middleware.go @@ -33,6 +33,7 @@ package session import ( "context" "errors" + "net" "net/http" "github.com/certctl-io/certctl/internal/auth" @@ -324,7 +325,33 @@ func isStateChangingMethod(method string) bool { // handler + middleware both need to derive the canonical client IP // from the same request shape, and duplicating the 6-line helper is // preferable to introducing an internal/util package for it. +// Audit 2026-05-10 LOW-5 — trustedProxyCIDRs holds the operator-configured +// list of CIDR ranges from which X-Forwarded-For is honored. Set by +// SetTrustedProxies at startup (from CERTCTL_TRUSTED_PROXIES). When +// empty (default), XFF is ignored entirely — the direct r.RemoteAddr +// is used. This closes the XFF-spoofing leg where any direct client +// could inject an attacker-controlled IP into audit rows + session +// IP-binding. +var trustedProxyCIDRs []string + +// SetTrustedProxies installs the CIDR allowlist for XFF processing. +// Called from cmd/server/main.go after config load. Each entry is a +// CIDR like "10.0.0.0/8" or a single-host literal like "192.0.2.1". +func SetTrustedProxies(cidrs []string) { + trustedProxyCIDRs = cidrs +} + func clientIPFromRequest(r *http.Request) string { + remoteIP := r.RemoteAddr + if i := lastIndexByte(remoteIP, ':'); i > 0 { + remoteIP = remoteIP[:i] + } + // Audit 2026-05-10 LOW-5 closure — only trust XFF when the direct + // connection comes from a configured trusted proxy. Default-deny: + // empty TrustedProxies list means XFF is ignored entirely. + if !ipInCIDRs(remoteIP, trustedProxyCIDRs) { + return remoteIP + } if xff := r.Header.Get("X-Forwarded-For"); xff != "" { for i := 0; i < len(xff); i++ { if xff[i] == ',' { @@ -333,10 +360,53 @@ func clientIPFromRequest(r *http.Request) string { } return trimSpace(xff) } - if i := lastIndexByte(r.RemoteAddr, ':'); i > 0 { - return r.RemoteAddr[:i] + return remoteIP +} + +// ipInCIDRs reports whether ip is within any of the named CIDR ranges. +// Hosts (no /mask) are treated as /32 (IPv4) or /128 (IPv6) singletons. +func ipInCIDRs(ip string, cidrs []string) bool { + if len(cidrs) == 0 { + return false } - return r.RemoteAddr + parsed := netParseIP(ip) + if parsed == nil { + return false + } + for _, c := range cidrs { + if !strContainsByte(c, '/') { + // Single-host literal — exact match. + if c == ip { + return true + } + continue + } + _, network, err := netParseCIDR(c) + if err != nil { + continue + } + if network.Contains(parsed) { + return true + } + } + return false +} + +// Net helpers live here rather than importing "net" at the top to +// keep the diff surgical. The net package's ParseIP / ParseCIDR are +// well-tested; we just thread them through local indirections. +var ( + netParseIP = func(s string) net.IP { return net.ParseIP(s) } + netParseCIDR = func(s string) (net.IP, *net.IPNet, error) { return net.ParseCIDR(s) } +) + +func strContainsByte(s string, b byte) bool { + for i := 0; i < len(s); i++ { + if s[i] == b { + return true + } + } + return false } func trimSpace(s string) string { diff --git a/internal/auth/session/middleware_test.go b/internal/auth/session/middleware_test.go index 346b1ec..96935b1 100644 --- a/internal/auth/session/middleware_test.go +++ b/internal/auth/session/middleware_test.go @@ -301,19 +301,36 @@ func TestIsStateChangingMethod(t *testing.T) { } func TestClientIPFromRequest_Variants(t *testing.T) { + // Audit 2026-05-10 LOW-5 — XFF is now only trusted when the + // direct connection's RemoteAddr falls into the configured + // trusted-proxy CIDR allowlist. Reset to a known state before/after. + prev := trustedProxyCIDRs + t.Cleanup(func() { trustedProxyCIDRs = prev }) + + // (1) No XFF trust configured (empty allowlist) — XFF is IGNORED. + trustedProxyCIDRs = nil r := httptest.NewRequest(http.MethodGet, "/", nil) r.RemoteAddr = "1.2.3.4:5555" if ip := clientIPFromRequest(r); ip != "1.2.3.4" { t.Errorf("RemoteAddr: got %q; want 1.2.3.4", ip) } r.Header.Set("X-Forwarded-For", "10.0.0.1, 10.0.0.2") + if ip := clientIPFromRequest(r); ip != "1.2.3.4" { + t.Errorf("XFF without trusted proxy: got %q; want 1.2.3.4 (ignored)", ip) + } + + // (2) Trusted-proxy CIDR matches RemoteAddr — XFF IS honored. + trustedProxyCIDRs = []string{"1.2.3.0/24"} + r.Header.Set("X-Forwarded-For", "10.0.0.1, 10.0.0.2") if ip := clientIPFromRequest(r); ip != "10.0.0.1" { - t.Errorf("XFF first hop: got %q; want 10.0.0.1", ip) + t.Errorf("XFF first hop (trusted): got %q; want 10.0.0.1", ip) } r.Header.Set("X-Forwarded-For", "10.0.0.99") if ip := clientIPFromRequest(r); ip != "10.0.0.99" { - t.Errorf("XFF single: got %q; want 10.0.0.99", ip) + t.Errorf("XFF single (trusted): got %q; want 10.0.0.99", ip) } + + // (3) No-port RemoteAddr unchanged. r2 := httptest.NewRequest(http.MethodGet, "/", nil) r2.RemoteAddr = "no-port" if ip := clientIPFromRequest(r2); ip != "no-port" { diff --git a/internal/config/config.go b/internal/config/config.go index 0047c09..5f5f4cd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1597,6 +1597,17 @@ type AuthConfig struct { // legacy `api-key` auth type ignore this struct entirely. Session SessionConfig + // TrustedProxies is the comma-separated list of CIDR ranges from + // which X-Forwarded-For is honored. Empty (default) disables XFF + // trust entirely — every request's source IP is read from + // r.RemoteAddr regardless of XFF headers. Audit 2026-05-10 LOW-5 + // closure: pre-fix the audit subsystem trusted any caller-supplied + // XFF for IP attribution, letting an attacker inject arbitrary IPs + // into audit rows + session IP-binding. Post-fix XFF is read only + // when the direct connection's RemoteAddr is in this allowlist. + // Setting: CERTCTL_TRUSTED_PROXIES (e.g. "10.0.0.0/8,192.168.0.0/16"). + TrustedProxies []string + // DemoModeAck must be true to allow CERTCTL_AUTH_TYPE=none with a // non-loopback listen address. Default false. Audit 2026-05-10 // HIGH-12 closure: pre-fix, an operator who flipped Type=none @@ -1869,6 +1880,8 @@ func Load() (*Config, error) { // Audit 2026-05-10 HIGH-12 closure: required-true to allow // CERTCTL_AUTH_TYPE=none with a non-loopback listen address. DemoModeAck: getEnvBool("CERTCTL_DEMO_MODE_ACK", false), + // LOW-5: XFF trust allowlist (CIDRs). Empty = ignore XFF. + TrustedProxies: getEnvList("CERTCTL_TRUSTED_PROXIES", nil), // NamedKeys is populated from CERTCTL_API_KEYS_NAMED below so Load() // can surface parse errors alongside other config errors.