fix(auth/ux): cause-aware OIDC + session error surfacing (HIGH-7 + HIGH-8 closure)

Server (HIGH-7): the OIDC callback failure path now 302-redirects to
/login?error=oidc_failed&reason=<category> instead of emitting a blank
400. `category` is the existing audit `failure_category` value;
classifyOIDCFailure was extended with three new sentinel paths
(email_domain_not_allowed, email_missing_but_required, pkce_invalid)
so CRIT-5 + PKCE failures get distinguishable GUI rendering.
Audit-log observability is unchanged — the same failure_category is
written to the auth.oidc_login_failed audit row; the 302 is purely a
UX leg layered on top.

Server (HIGH-8): SessionMiddleware now stashes a cause classification
on the request context when Validate returns an error, mapping the
sentinels via classifySessionError (errors.Is-based, so wrapped
sentinels still classify) to the stable wire-strings idle_timeout /
absolute_timeout / back_channel_revoked / invalid_token. The 401
emit point in bearerSkipIfAuthenticated reads the stashed cause and
emits WWW-Authenticate: Bearer realm="certctl", error="invalid_token",
error_description=<cause> per RFC 6750 §3.

GUI (HIGH-7): LoginPage reads ?error= + ?reason= from the URL via
react-router useSearchParams and renders an operator-friendly
amber-bordered banner above the form; OIDC_FAILURE_REASON_TEXT maps
all 16 known categories with a defensive 'unspecified' fallback for
forward-compat with future server-side categories.

GUI (HIGH-8): api/client fetchJSON parses the WWW-Authenticate cause
via parseWWWAuthenticateCause and attaches it to the
'certctl:auth-required' CustomEvent detail; AuthProvider redirects
to /login?session_expired=<cause> on cause-aware 401s; LoginPage
renders a blue-bordered session-cause banner. invalid_token stays
on the current page (no hard redirect for opaque failures).

Misc cleanup: ErrorState now accepts the title/message/data-testid
form added by CRIT-4 BreakglassPage (was erroring tsc on master).

Regression matrix:
- internal/api/handler/oidc_redirect_categories_test.go pins all 16
  failure categories to the 302 + reason= location + audit-row leg
- internal/auth/session/www_authenticate_test.go pins the 4 stable
  cause categories on classifySessionError (incl. errors.Is wrapped
  sentinels) + the WWW-Authenticate emission across all 4 categories
  + the no-session-context fallback case
- internal/api/handler/auth_session_oidc_test.go: 4 pre-existing
  TestLoginCallback_*Returns400 tests updated to assert 302 + reason=
  location (the wire shape changed from 400 to 302, but the audit
  observability and behaviour-equivalent failure-classification are
  preserved)
- web/src/pages/LoginPage.test.tsx: 6 new cases pinning the failure
  banner, session-cause banner, unknown-reason fallback, and
  forward-compat 'unspecified' category

Spec: cowork/auth-bundles-fixes-2026-05-10/08-high-7-8-error-surfacing.md
Closes: HIGH-7, HIGH-8 of cowork/auth-bundles-audit-2026-05-10.md
This commit is contained in:
shankar0123
2026-05-10 21:12:11 +00:00
parent 15435ca02b
commit 0f340beb14
10 changed files with 633 additions and 30 deletions
+20 -2
View File
@@ -258,7 +258,11 @@ func (h *AuthSessionOIDCHandler) LoginCallback(w http.ResponseWriter, r *http.Re
res, err := h.oidcSvc.HandleCallback(r.Context(), preLoginCookie.Value, code, state, clientIP, userAgent)
if err != nil {
// Uniform 400 to the wire; specific failure category in audit.
// Audit 2026-05-10 HIGH-7 — instead of a blank 400, redirect
// to /login?error=oidc_failed&reason=<category>. The LoginPage
// reads the query params and renders an operator-friendly
// alert. The audit row still carries the specific
// failure_category so server-side observability is unchanged.
category := classifyOIDCFailure(err)
h.recordAudit(r.Context(), "auth.oidc_login_failed", "anonymous", domain.ActorTypeSystem, "",
map[string]interface{}{"failure_category": category})
@@ -270,7 +274,10 @@ func (h *AuthSessionOIDCHandler) LoginCallback(w http.ResponseWriter, r *http.Re
}
// Always clear the pre-login cookie on failure.
h.clearPreLoginCookie(w)
Error(w, http.StatusBadRequest, "OIDC login failed")
// 302 to the login page; the reason categorizes the failure for
// the GUI to render. Keep the redirect target relative — the
// SPA serves /login.
http.Redirect(w, r, "/login?error=oidc_failed&reason="+category, http.StatusFound)
return
}
@@ -1073,6 +1080,17 @@ func classifyOIDCFailure(err error) string {
return "groups_missing"
case strings.Contains(msg, "jwks"):
return "jwks_unreachable"
// Audit 2026-05-10 HIGH-7 — surface CRIT-5 email-domain rejection
// + PKCE invalidation distinctly so the LoginPage can render an
// operator-friendly reason. The sentinel errors live in
// internal/auth/oidc/service.go (ErrEmailDomainNotAllowed,
// ErrEmailMissingButRequired, ErrPKCEPlainRejected).
case strings.Contains(msg, "email domain not in allowlist"):
return "email_domain_not_allowed"
case strings.Contains(msg, "requires email but token has none"):
return "email_missing_but_required"
case strings.Contains(msg, "pkce"):
return "pkce_invalid"
default:
return "unspecified"
}
+27 -15
View File
@@ -362,9 +362,11 @@ func TestLoginCallback_HappyPath(t *testing.T) {
}
}
// Phase 5 spec mandate #4: Callback with replayed state -> 400.
// Phase 5 spec mandate #4: Callback with replayed state -> 302 to /login.
// (The OIDC service's PreLoginStore.LookupAndConsume returns
// ErrPreLoginNotFound on the second call; the handler maps to 400.)
// ErrPreLoginNotFound on the second call; Audit 2026-05-10 HIGH-7
// flipped this from a blank 400 to a 302 to /login?error=oidc_failed
// &reason=<category>. The audit row still records failure_category.)
func TestLoginCallback_ReplayedState_Returns400(t *testing.T) {
o := &stubOIDCSvc{callbackErr: oidcsvc.ErrPreLoginNotFound}
h, _, _, _, audit, _ := newPhase5Handler(t, o, &stubSession{}, &stubBCLVerifier{})
@@ -373,17 +375,20 @@ func TestLoginCallback_ReplayedState_Returns400(t *testing.T) {
req.AddCookie(&http.Cookie{Name: sessiondomain.PreLoginCookieName, Value: "v1.pl-abc.sk-xyz.mac"})
w := httptest.NewRecorder()
h.LoginCallback(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("status = %d; want 400", w.Code)
if w.Code != http.StatusFound {
t.Errorf("status = %d; want 302 (post-HIGH-7 redirect)", w.Code)
}
if loc := w.Header().Get("Location"); !strings.HasPrefix(loc, "/login?error=oidc_failed&reason=") {
t.Errorf("Location = %q; want /login?error=oidc_failed&reason=...", loc)
}
if !contains(audit.events, "auth.oidc_login_failed") {
t.Errorf("expected auth.oidc_login_failed audit event; got %v", audit.events)
}
}
// Phase 5 spec mandate #5: Callback with PKCE verifier mismatch -> 400.
// Phase 5 spec mandate #5: Callback with PKCE verifier mismatch -> 302.
// The OIDC service's code-exchange step fails when the verifier doesn't
// match the challenge; the handler surfaces it as 400.
// match the challenge; HIGH-7 redirects to /login with reason.
func TestLoginCallback_PKCEVerifierMismatch_Returns400(t *testing.T) {
o := &stubOIDCSvc{callbackErr: errors.New("oidc: code exchange failed: invalid_grant")}
h, _, _, _, _, _ := newPhase5Handler(t, o, &stubSession{}, &stubBCLVerifier{})
@@ -391,23 +396,27 @@ func TestLoginCallback_PKCEVerifierMismatch_Returns400(t *testing.T) {
req.AddCookie(&http.Cookie{Name: sessiondomain.PreLoginCookieName, Value: "v1.pl-abc.sk-xyz.mac"})
w := httptest.NewRecorder()
h.LoginCallback(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("status = %d; want 400", w.Code)
if w.Code != http.StatusFound {
t.Errorf("status = %d; want 302 (post-HIGH-7 redirect)", w.Code)
}
if loc := w.Header().Get("Location"); !strings.HasPrefix(loc, "/login?error=oidc_failed") {
t.Errorf("Location = %q; want /login?error=oidc_failed&reason=...", loc)
}
}
// Phase 5 spec mandate #6: Callback with expired pre-login row -> 400.
// Phase 5 spec mandate #6: Callback with expired pre-login row -> 302.
func TestLoginCallback_ExpiredPreLoginRow_Returns400(t *testing.T) {
// Adapter maps ErrPreLoginExpired -> ErrPreLoginNotFound (uniform
// 400 per spec; specific reason in audit row).
// Adapter maps ErrPreLoginExpired -> ErrPreLoginNotFound; HIGH-7
// flipped the wire shape from 400 to a 302 redirect (specific
// reason still in audit row).
o := &stubOIDCSvc{callbackErr: oidcsvc.ErrPreLoginNotFound}
h, _, _, _, _, _ := newPhase5Handler(t, o, &stubSession{}, &stubBCLVerifier{})
req := httptest.NewRequest(http.MethodGet, "/auth/oidc/callback?code=abc&state=xyz", nil)
req.AddCookie(&http.Cookie{Name: sessiondomain.PreLoginCookieName, Value: "v1.pl-abc.sk-xyz.mac"})
w := httptest.NewRecorder()
h.LoginCallback(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("status = %d; want 400", w.Code)
if w.Code != http.StatusFound {
t.Errorf("status = %d; want 302 (post-HIGH-7 redirect)", w.Code)
}
}
@@ -431,8 +440,11 @@ func TestLoginCallback_UnmappedGroups_AuditRowDistinguished(t *testing.T) {
req.AddCookie(&http.Cookie{Name: sessiondomain.PreLoginCookieName, Value: "v1.pl-abc.sk-xyz.mac"})
w := httptest.NewRecorder()
h.LoginCallback(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("status = %d; want 400", w.Code)
if w.Code != http.StatusFound {
t.Errorf("status = %d; want 302 (post-HIGH-7 redirect)", w.Code)
}
if loc := w.Header().Get("Location"); !strings.Contains(loc, "reason=unmapped_groups") {
t.Errorf("Location = %q; want reason=unmapped_groups", loc)
}
if !contains(audit.events, "auth.oidc_login_unmapped_groups") {
t.Errorf("expected auth.oidc_login_unmapped_groups; got %v", audit.events)
@@ -0,0 +1,140 @@
package handler
import (
"errors"
"net/http"
"net/http/httptest"
"strings"
"testing"
oidcsvc "github.com/certctl-io/certctl/internal/auth/oidc"
sessiondomain "github.com/certctl-io/certctl/internal/auth/session/domain"
)
// Audit 2026-05-10 HIGH-7 regression matrix — pin every classified
// failure category to its post-redirect query reason. Pre-fix, every
// failure surfaced as "OIDC login failed" with status 400 and no
// machine-readable hint; the LoginPage couldn't tell idle-timeout
// from email-domain rejection from PKCE breakage. Post-fix, the
// handler 302-redirects to /login?error=oidc_failed&reason=<cat>
// where the GUI renders an operator-friendly cause.
func TestLoginCallback_RedirectsWithReason_AllCategories(t *testing.T) {
cases := []struct {
name string
err error
wantReason string
}{
{
name: "pre_login_consume_failed",
err: oidcsvc.ErrPreLoginNotFound,
wantReason: "pre_login_consume_failed",
},
{
name: "state_mismatch",
err: errors.New("state mismatch"),
wantReason: "state_mismatch",
},
{
name: "nonce_mismatch",
err: errors.New("nonce mismatch"),
wantReason: "nonce_mismatch",
},
{
name: "audience_mismatch",
err: errors.New("audience mismatch"),
wantReason: "audience_mismatch",
},
{
name: "token_expired",
err: errors.New("token expired"),
wantReason: "token_expired",
},
{
name: "azp_mismatch",
err: errors.New("azp does not match"),
wantReason: "azp_mismatch",
},
{
name: "at_hash_mismatch",
err: errors.New("at_hash mismatch"),
wantReason: "at_hash_mismatch",
},
{
name: "iat_window",
err: errors.New("iat outside window"),
wantReason: "iat_window",
},
{
name: "alg_rejected",
err: errors.New("alg not in allowlist"),
wantReason: "alg_rejected",
},
{
name: "unmapped_groups",
err: oidcsvc.ErrGroupsUnmapped,
wantReason: "unmapped_groups",
},
{
name: "groups_missing",
err: errors.New("groups missing"),
wantReason: "groups_missing",
},
{
name: "jwks_unreachable",
err: errors.New("jwks fetch failed"),
wantReason: "jwks_unreachable",
},
// HIGH-7 added these three categories so CRIT-5 (email domain)
// and PKCE failures get distinguishable GUI rendering.
{
name: "email_domain_not_allowed",
err: errors.New("email domain not in allowlist"),
wantReason: "email_domain_not_allowed",
},
{
name: "email_missing_but_required",
err: errors.New("provider requires email but token has none"),
wantReason: "email_missing_but_required",
},
{
name: "pkce_invalid",
err: errors.New("pkce verifier mismatch"),
wantReason: "pkce_invalid",
},
{
name: "unspecified_fallback",
err: errors.New("totally unrecognized error"),
wantReason: "unspecified",
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
o := &stubOIDCSvc{callbackErr: tc.err}
h, _, _, _, audit, _ := newPhase5Handler(t, o, &stubSession{}, &stubBCLVerifier{})
req := httptest.NewRequest(http.MethodGet,
"/auth/oidc/callback?code=abc&state=xyz", nil)
req.AddCookie(&http.Cookie{
Name: sessiondomain.PreLoginCookieName,
Value: "v1.pl-abc.sk-xyz.mac",
})
w := httptest.NewRecorder()
h.LoginCallback(w, req)
if w.Code != http.StatusFound {
t.Fatalf("status = %d; want 302", w.Code)
}
loc := w.Header().Get("Location")
wantPrefix := "/login?error=oidc_failed&reason=" + tc.wantReason
if !strings.HasPrefix(loc, wantPrefix) {
t.Errorf("Location = %q; want prefix %q", loc, wantPrefix)
}
// The audit row must still record the failure_category for
// server-side observability — that's the load-bearing leg
// of the HIGH-7 fix (audit retention is not narrowed by the
// GUI redirect).
if !contains(audit.events, "auth.oidc_login_failed") {
t.Errorf("expected auth.oidc_login_failed audit event; got %v", audit.events)
}
})
}
}