mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 17:22:07 +00:00
harden(auth/session+oidc): 503/401 split + go-oidc string pin (LOW-6 + Nit-2)
Audit 2026-05-10 — close LOW-6 + Nit-2 from the HANDOFF.md backend
batch (items 8 + 9).
LOW-6: introduce ErrSessionTransient sentinel in session.Service.
session.Validate now distinguishes:
- errors.Is(err, repository.ErrSessionNotFound) → ErrSessionInvalidCookie (401)
- All other repo errors → ErrSessionTransient (503)
The session middleware maps ErrSessionTransient to HTTP 503 with
Retry-After: 1. Pre-fix, every DB hiccup looked like a forged-cookie
401 and forced the user to re-authenticate on a transient outage.
Two new regression tests pin the wire shape:
- TestService_Validate_TransientSessionGetError (service layer)
- TestService_Validate_SessionNotFoundMapsToInvalidCookie (negative
leg: not-found stays 401)
- TestSessionMiddleware_TransientErrorMappedTo503 (middleware-level
503 + Retry-After header)
Nit-2: isJWKSFetchError documentation now pins go-oidc/v3 v3.18.0 as
the source-of-truth string set. v3.18.0 exposes only
*oidc.TokenExpiredError as a typed error; JWKS-fetch failures bubble
up as fmt.Errorf-wrapped strings. New regression test
TestIsJWKSFetchError_GoOIDCV318Strings pins the canonical substrings
emitted by go-oidc's jwks.go — a future upstream bump that changes
the wording trips the test and forces the matcher to be re-derived.
The test caught a real gap: 'oidc: failed to decode keys' (emitted
when the IdP returns non-JSON at the jwks_uri — broken proxy, gateway
HTML error page, etc.) was previously misclassified as a generic 500
instead of 503 ErrJWKSUnreachable. Added 'decode keys' substring to
the matcher.
Status: LOW-6 + Nit-2 marked CLOSED in audit-doc table.
Refs: cowork/auth-bundles-fixes-2026-05-10/HANDOFF.md items 8, 9
cowork/auth-bundles-audit-2026-05-10.md LOW-6, Nit-2
This commit is contained in:
@@ -908,6 +908,19 @@ func atHashMatches(rawIDToken, accessToken, claimAtHash string) bool {
|
||||
// error talking to the IdP's jwks_uri during a key rotation event).
|
||||
// Maps to ErrJWKSUnreachable so the handler returns 503 to the
|
||||
// in-flight login attempt without auto-revoking existing sessions.
|
||||
//
|
||||
// Audit 2026-05-10 Nit-2 — pinned against go-oidc/v3 v3.18.0. As of
|
||||
// that release, the only typed error exposed by the oidc package is
|
||||
// `*oidc.TokenExpiredError`; JWKS-fetch failures bubble up as
|
||||
// fmt.Errorf-wrapped strings from internal/keyset.go's `verify` path
|
||||
// (`failed to verify signature: fetching keys: ...`,
|
||||
// `oidc: fetching keys ...`, `oidc: failed to get keys for kid ...`).
|
||||
// The regression test in service_test.go::TestIsJWKSFetchError_GoOIDCV318Strings
|
||||
// pins the canonical substrings; a future go-oidc bump that changes
|
||||
// the wording trips the test and forces this function to be re-derived.
|
||||
// When go-oidc exposes a typed error (track at
|
||||
// https://github.com/coreos/go-oidc/issues for the upstream RFE),
|
||||
// switch to errors.As.
|
||||
func isJWKSFetchError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
@@ -915,7 +928,13 @@ func isJWKSFetchError(err error) bool {
|
||||
msg := err.Error()
|
||||
return strings.Contains(msg, "fetching keys") ||
|
||||
strings.Contains(msg, "jwks_uri") ||
|
||||
strings.Contains(msg, "key set")
|
||||
strings.Contains(msg, "key set") ||
|
||||
// go-oidc/v3 v3.18.0 jwks.go:260: `oidc: failed to decode keys`
|
||||
// — emitted when the IdP returns non-JSON at the jwks_uri
|
||||
// (broken proxy, gateway HTML error page, etc.). Audit
|
||||
// 2026-05-10 Nit-2 closure — was previously misclassified as
|
||||
// a generic 500 instead of 503 ErrJWKSUnreachable.
|
||||
strings.Contains(msg, "decode keys")
|
||||
}
|
||||
|
||||
// decryptClientSecret runs the client_secret_encrypted blob through
|
||||
|
||||
@@ -1071,6 +1071,41 @@ func TestService_IsJWKSFetchError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestIsJWKSFetchError_GoOIDCV318Strings pins the canonical go-oidc/v3
|
||||
// v3.18.0 error wordings against isJWKSFetchError. Audit 2026-05-10
|
||||
// Nit-2: go-oidc's only typed error as of v3.18.0 is
|
||||
// *oidc.TokenExpiredError; JWKS-fetch failures bubble up as
|
||||
// fmt.Errorf-wrapped strings. A future go-oidc bump that changes
|
||||
// these strings will trip this test and force isJWKSFetchError to be
|
||||
// re-derived (or, ideally, switched to errors.As against a newly-
|
||||
// exposed typed error). Without this pin, a silent upstream string
|
||||
// change would make every JWKS-rotation login surface as 500 instead
|
||||
// of 503 — the operator-distinguishable wire shape promised by
|
||||
// ErrJWKSUnreachable.
|
||||
func TestIsJWKSFetchError_GoOIDCV318Strings(t *testing.T) {
|
||||
// Canonical substrings observed in go-oidc/v3 v3.18.0 verify path.
|
||||
// Sources (all under github.com/coreos/go-oidc/v3@v3.18.0/oidc/):
|
||||
// - jwks.go:175 → fmt.Errorf("fetching keys %w", err)
|
||||
// - jwks.go:260 → fmt.Errorf("oidc: failed to decode keys: %v %s", ...)
|
||||
// Also stably matched by isJWKSFetchError's "jwks_uri" + "key set"
|
||||
// fallbacks (substrings inside go-oidc-emitted strings and our
|
||||
// own /api/v1/auth/oidc/.../refresh wrap errors).
|
||||
canonical := []string{
|
||||
// Direct go-oidc v3.18.0 fmt.Errorf outputs.
|
||||
"fetching keys: dial tcp: lookup idp.example.com: no such host",
|
||||
"oidc: failed to decode keys: invalid character 'h' looking for beginning of value",
|
||||
// Wrap from our own RefreshKeys / verify retry path.
|
||||
"failed to refresh remote key set: timeout",
|
||||
"unable to load key set: cancelled",
|
||||
}
|
||||
for _, msg := range canonical {
|
||||
if !isJWKSFetchError(errors.New(msg)) {
|
||||
t.Errorf("canonical go-oidc v3.18.0 string %q not detected as JWKS-fetch error; "+
|
||||
"update isJWKSFetchError or pin the new substring", msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestService_DecryptClientSecret_NoKeyReturnsBytesAsIs covers the
|
||||
// empty-key short-circuit (used by tests with plaintext blobs).
|
||||
func TestService_DecryptClientSecret_NoKeyReturnsBytesAsIs(t *testing.T) {
|
||||
|
||||
@@ -90,6 +90,20 @@ func NewSessionMiddleware(svc SessionValidator) func(http.Handler) http.Handler
|
||||
UserAgent: r.UserAgent(),
|
||||
})
|
||||
if verr != nil {
|
||||
// Audit 2026-05-10 LOW-6 closure — ErrSessionTransient
|
||||
// means the backend hit a retryable error (DB hiccup,
|
||||
// connection reset, etc.) rather than the cookie being
|
||||
// malformed. Surface 503 + Retry-After so well-behaved
|
||||
// clients (curl --retry, browser fetch automatic retry,
|
||||
// MCP clients) retry instead of forcing the user to
|
||||
// re-auth on a transient issue. Pre-fix, every DB error
|
||||
// looked like a forged-cookie 401.
|
||||
if errors.Is(verr, ErrSessionTransient) {
|
||||
w.Header().Set("Retry-After", "1")
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
http.Error(w, `{"error":"transient backend error; retry"}`, http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
// Cookie present but invalid (expired / tampered /
|
||||
// retired-key / IP-bind / UA-bind / revoked). Defer to
|
||||
// the next middleware so a valid Bearer can still
|
||||
|
||||
@@ -338,6 +338,26 @@ func TestClientIPFromRequest_Variants(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestSessionMiddleware_TransientErrorMappedTo503 pins the LOW-6
|
||||
// closure (audit 2026-05-10): when Validate returns
|
||||
// ErrSessionTransient, the middleware MUST emit 503 with Retry-After
|
||||
// instead of falling through to the Bearer/401 path. Pre-fix, a DB
|
||||
// hiccup looked like a forged-cookie 401 + forced re-auth.
|
||||
func TestSessionMiddleware_TransientErrorMappedTo503(t *testing.T) {
|
||||
stub := &stubSessionValidator{validateErr: ErrSessionTransient}
|
||||
chain := ChainAuthSessionThenBearer(NewSessionMiddleware(stub), nil)(markAuthenticated())
|
||||
req := httptest.NewRequest(http.MethodGet, "/x", nil)
|
||||
req.AddCookie(&http.Cookie{Name: sessiondomain.PostLoginCookieName, Value: "v1.ses.sk.bad"})
|
||||
w := httptest.NewRecorder()
|
||||
chain.ServeHTTP(w, req)
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("status = %d; want 503", w.Code)
|
||||
}
|
||||
if w.Header().Get("Retry-After") != "1" {
|
||||
t.Errorf("Retry-After = %q; want 1", w.Header().Get("Retry-After"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestChainAuthSessionThenBearer_NilBearer_Session401Path(t *testing.T) {
|
||||
stub := &stubSessionValidator{validateErr: ErrSessionInvalidCookie}
|
||||
chain := ChainAuthSessionThenBearer(NewSessionMiddleware(stub), nil)(markAuthenticated())
|
||||
|
||||
@@ -153,6 +153,18 @@ var (
|
||||
// auto-revoked (user may have legitimate IP change).
|
||||
ErrSessionIPMismatch = errors.New("session: client IP does not match session-bound IP")
|
||||
|
||||
// ErrSessionTransient: a non-deterministic, retryable failure (DB
|
||||
// connection reset, network blip on the audit-row write inside
|
||||
// the validate path, etc.). Distinct from ErrSessionInvalidCookie:
|
||||
// the cookie itself isn't malformed/forged, the backend just
|
||||
// failed to look it up cleanly. The middleware maps this to HTTP
|
||||
// 503 with `Retry-After: 1` so well-behaved clients retry instead
|
||||
// of forcing the user to re-authenticate. Audit 2026-05-10 LOW-6
|
||||
// closure — pre-fix, transient DB failures collapsed into
|
||||
// ErrSessionInvalidCookie + 401, falsely framing a database outage
|
||||
// as "your cookie is bad."
|
||||
ErrSessionTransient = errors.New("session: transient backend error")
|
||||
|
||||
// ErrSessionUAMismatch: same shape as ErrSessionIPMismatch for the
|
||||
// optional CERTCTL_SESSION_BIND_USER_AGENT gate.
|
||||
ErrSessionUAMismatch = errors.New("session: User-Agent does not match session-bound User-Agent")
|
||||
@@ -453,7 +465,16 @@ func (s *Service) Validate(ctx context.Context, in ValidateInput) (*sessiondomai
|
||||
|
||||
row, err := s.sessions.Get(ctx, sessionID)
|
||||
if err != nil {
|
||||
return nil, ErrSessionInvalidCookie
|
||||
// Audit 2026-05-10 LOW-6 closure — distinguish "this cookie's
|
||||
// session row doesn't exist" (invalid: 401) from "the DB call
|
||||
// failed transiently" (retryable: 503). Pre-fix, both
|
||||
// collapsed into ErrSessionInvalidCookie, so a DB hiccup
|
||||
// looked like a forged cookie in the audit log + forced the
|
||||
// user to re-auth.
|
||||
if errors.Is(err, repository.ErrSessionNotFound) {
|
||||
return nil, ErrSessionInvalidCookie
|
||||
}
|
||||
return nil, fmt.Errorf("%w: %v", ErrSessionTransient, err)
|
||||
}
|
||||
|
||||
if row.RevokedAt != nil {
|
||||
|
||||
@@ -921,14 +921,40 @@ func TestService_RotateSigningKey_RetireError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestService_Validate_SessionGetErrorMappedToInvalidCookie(t *testing.T) {
|
||||
// TestService_Validate_TransientSessionGetError pins the LOW-6
|
||||
// closure (audit 2026-05-10): a non-deterministic DB error from
|
||||
// session.Get bubbles up as ErrSessionTransient (→ 503), NOT
|
||||
// ErrSessionInvalidCookie (→ 401). The middleware test pins the
|
||||
// 503-with-Retry-After wire shape; this one pins the service-layer
|
||||
// sentinel.
|
||||
func TestService_Validate_TransientSessionGetError(t *testing.T) {
|
||||
svc, sessions, _, _, _ := newTestService(t, defaultCfg())
|
||||
res, _ := svc.Create(context.Background(), "u-y", "User", "", "")
|
||||
sessions.getErr = fmt.Errorf("simulated session.Get failure")
|
||||
_, err := svc.Validate(context.Background(), ValidateInput{CookieValue: res.CookieValue})
|
||||
if !errors.Is(err, ErrSessionTransient) {
|
||||
t.Errorf("err = %v; want ErrSessionTransient", err)
|
||||
}
|
||||
if errors.Is(err, ErrSessionInvalidCookie) {
|
||||
t.Errorf("err also matched ErrSessionInvalidCookie; want only ErrSessionTransient")
|
||||
}
|
||||
}
|
||||
|
||||
// TestService_Validate_SessionNotFoundMapsToInvalidCookie pins the
|
||||
// other half of the LOW-6 split: repository.ErrSessionNotFound (a
|
||||
// real, deterministic "the row doesn't exist" answer from the DB)
|
||||
// stays mapped to ErrSessionInvalidCookie (→ 401), NOT 503.
|
||||
func TestService_Validate_SessionNotFoundMapsToInvalidCookie(t *testing.T) {
|
||||
svc, sessions, _, _, _ := newTestService(t, defaultCfg())
|
||||
res, _ := svc.Create(context.Background(), "u-y2", "User", "", "")
|
||||
sessions.getErr = repository.ErrSessionNotFound
|
||||
_, err := svc.Validate(context.Background(), ValidateInput{CookieValue: res.CookieValue})
|
||||
if !errors.Is(err, ErrSessionInvalidCookie) {
|
||||
t.Errorf("err = %v; want ErrSessionInvalidCookie", err)
|
||||
}
|
||||
if errors.Is(err, ErrSessionTransient) {
|
||||
t.Errorf("err also matched ErrSessionTransient; want only ErrSessionInvalidCookie")
|
||||
}
|
||||
}
|
||||
|
||||
func TestService_UpdateLastSeen_RepoErrorWraps(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user