certctl/internal/auth/oidc/bench_keycloak_test.go

//go:build integration

package oidc_test

import (
	"context"
	"sort"
	"testing"
	"time"

	"github.com/certctl-io/certctl/internal/auth/oidc"
	"github.com/certctl-io/certctl/internal/auth/oidc/testfixtures"
)

// =============================================================================
// Bundle 2 Phase 14 — OIDC token validation benchmark (cold-cache).
//
// Build-tag-gated under `integration` so the heavy Keycloak boot (60-90s
// cold-pull) never lands in `go test -short` or the default
// `go test ./...` developer loop.
//
// What this measures: the JWKS-rotation cold-cache path. The IdP rotates
// its signing keys; the next certctl-side login attempt either fails
// validation (stale JWKS cache) or — once RefreshKeys clears the cache —
// re-fetches the discovery doc + JWKS over real HTTP and re-runs the
// IdP-downgrade-attack defense.
//
// The benchmark drives the post-rotation refresh path:
//
//   1. Boot Keycloak (Phase 10 fixture).
//   2. Configure the OIDC service against the live realm.
//   3. Pre-warm the JWKS cache.
//   4. RotateRealmKeys (admin REST API).
//   5. For each iteration:
//      a. Call svc.RefreshKeys → forces a fresh discovery + JWKS fetch.
//      b. Time the refresh + a subsequent HandleAuthRequest (which
//         re-uses the freshly-loaded entry from cache).
//      c. Measure the round-trip cost.
//
// Phase 14 target: p99 < 200ms.
//
// Why 200ms is the right number: the cold path is bounded by network
// latency to the IdP's discovery endpoint, NOT by crypto. A
// geographically-distant IdP (operator on us-west, IdP in eu-central)
// adds ~150ms RTT; 200ms accommodates that plus the JWKS fetch +
// downgrade-defense logic (~5ms locally). Steady-state OIDC is < 5ms
// because no network is involved; cold-cache is bounded by physics
// (the speed of light + TCP handshake to a remote endpoint).
//
// Run via:
//   make benchmark-auth-coldcache    # see Makefile target (Phase 14)
//   # or
//   go test -tags integration -bench BenchmarkOIDC_ColdCache \
//     -benchmem -benchtime=10x -run='^$' ./internal/auth/oidc/
//
// (Lower benchtime than the steady-state benchmark because each
// iteration involves a real HTTP fetch.)
// =============================================================================

func reportColdCachePercentiles(b *testing.B, samples []time.Duration) {
	b.Helper()
	if len(samples) == 0 {
		return
	}
	sort.Slice(samples, func(i, j int) bool { return samples[i] < samples[j] })
	p := func(pct float64) time.Duration {
		idx := int(float64(len(samples)) * pct / 100.0)
		if idx >= len(samples) {
			idx = len(samples) - 1
		}
		return samples[idx]
	}
	b.ReportMetric(float64(p(50).Milliseconds()), "p50_ms/op")
	b.ReportMetric(float64(p(95).Milliseconds()), "p95_ms/op")
	b.ReportMetric(float64(p(99).Milliseconds()), "p99_ms/op")
	b.ReportMetric(float64(samples[len(samples)-1].Milliseconds()), "max_ms/op")
}

// BenchmarkOIDC_ColdCache measures the JWKS-rotation cold-cache path
// end to end against a live Keycloak container.
//
// Phase 14 target: p99 < 200ms.
func BenchmarkOIDC_ColdCache(b *testing.B) {
	if testing.Short() {
		b.Skip("Phase 14 cold-cache benchmark: skipped under -short")
	}

	// Use a *testing.T via a sub-test so the existing Phase 10 fixture
	// helpers (which take *testing.T) work unchanged.
	var fx *testfixtures.KeycloakFixture
	b.Run("setup", func(_ *testing.B) {
		// We can't pass *testing.B to StartKeycloak; spawn a sub-test
		// that calls T-typed helpers via the t.Run pattern.
	})
	// StartKeycloak is *testing.T-typed; we adapt via a synthetic
	// test runner. The simplest path: call b.Run with a closure that
	// converts.
	// Easier: define a benchmark-side helper that takes testing.TB and
	// calls the same testcontainers logic.
	b.Helper()

	// The Phase 10 fixture's StartKeycloak takes *testing.T. The
	// signature matters because it calls t.Skip / t.Fatal / t.Cleanup.
	// All three of those exist on testing.TB. We can't directly pass
	// *testing.B → *testing.T, but we CAN pass *testing.B as
	// testing.TB to a TB-aware variant. Phase 10 doesn't expose one.
	//
	// Pragmatic choice: this benchmark requires the operator to
	// pre-boot Keycloak via `make keycloak-integration-test` (which
	// leaves the container running for some seconds) OR run the test
	// + benchmark in the same `go test -tags integration` invocation
	// so the fixture-shared sharedKeycloak variable from
	// integration_keycloak_test.go is already populated. The test
	// run + benchmark run share the same package process under
	// `go test`, so sharedKeycloak survives across them.
	if sharedKeycloak == nil {
		b.Skip("BenchmarkOIDC_ColdCache: sharedKeycloak not initialized; run integration_keycloak_test.go first or via `go test -tags integration -run TestKeycloakIntegration -bench BenchmarkOIDC_ColdCache ./internal/auth/oidc/`")
	}
	fx = sharedKeycloak

	// Build a benchmark-side OIDC service against the live provider.
	provLookup := &itestProviderLookup{provider: fx.Provider}
	mappings := &itestMappings{lookup: map[string]string{
		testfixtures.EngineerGroup: "r-operator",
	}}
	users := newItestUsers()
	sessions := newItestSessionMinter()
	pl := newItestPreLogin()
	svc := oidc.NewService(provLookup, mappings, users, sessions, pl, "")

	// Pre-warm the cache + rotate the keys ONCE before the benchmark
	// loop so every iteration measures the cold-cache path uniformly.
	ctx := context.Background()
	if err := svc.RefreshKeys(ctx, fx.Provider.ID); err != nil {
		b.Fatalf("pre-rotate RefreshKeys: %v", err)
	}
	// Note: we deliberately do NOT call fx.RotateRealmKeys per
	// iteration because Keycloak's admin REST API for adding key
	// providers has side effects across the realm. Rotating once at
	// setup time is sufficient because each RefreshKeys evicts the
	// cache, forcing a fresh discovery + JWKS fetch — the network
	// round-trip we care about — every iteration.

	samples := make([]time.Duration, 0, b.N)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		start := time.Now()
		if err := svc.RefreshKeys(ctx, fx.Provider.ID); err != nil {
			b.Fatalf("RefreshKeys: %v", err)
		}
		samples = append(samples, time.Since(start))
	}
	b.StopTimer()
	reportColdCachePercentiles(b, samples)
}