fix(oidc/prelogin): encrypt state/nonce/PKCE-verifier at rest (HIGH-5)

Pre-login rows previously persisted the OIDC state, nonce, and PKCE
verifier as plaintext columns; an operator restoring an unredacted
backup of oidc_pre_login_sessions to a debug environment leaked every
in-flight handshake. If the IdP also leaked the auth code in the same
window (logged at a misconfigured TLS terminator, etc.), the attacker
could exchange code + verifier directly. RFC 7636 §7 requires verifier
confidentiality.

This commit:
- Migration 000041 adds {state,nonce,pkce_verifier}_enc BYTEA columns
  and makes the legacy plaintext columns nullable. A follow-up
  migration drops the plaintext columns once the rolling deploy
  completes.
- internal/repository/postgres/oidc_prelogin.go::Create encrypts the
  three secrets via crypto.EncryptIfKeySet (v3 magic 0x03 + per-row
  salt + nonce + AES-256-GCM tag) and writes only the encrypted
  columns; legacy plaintext stays NULL on the write path.
- LookupAndConsume prefers encrypted columns via materialize(),
  falling back to the legacy plaintext only when _enc is NULL — the
  rolling-deploy compat layer that 000042 will retire.
- NewPreLoginRepository takes encryptionKey; cmd/server/main.go threads
  cfg.Encryption.ConfigEncryptionKey in.
- Encryption key reuses CERTCTL_CONFIG_ENCRYPTION_KEY (same passphrase
  already protecting OIDC client secrets and SessionSigningKey material).
  No new env var.

Why encryption-at-rest, not HMAC: the spec's HMAC approach required
moving plaintext into the cookie (the cookie currently carries only
row ID + HMAC). Re-shaping the cookie wire format would be a larger
refactor; the audit explicitly admits encryption-at-rest is an
acceptable closure (weaker because backups still contain decryptable
ciphertext, but the encryption key is held separately from the DB
backup, and the 10-minute TTL further bounds usable secret window).

Three new regression tests in oidc_prelogin_encryption_test.go pin:
  (a) _enc columns contain v3-format ciphertext, NOT plaintext
      substrings, post-Create
  (b) legacy plaintext columns are NULL post-Create (defends against
      future patches that re-introduce plaintext writes)
  (c) LookupAndConsume round-trips state/nonce/verifier byte-for-byte
A fourth test pins the legacy-row fallback for rolling-deploy compat.

Refs: cowork/auth-bundles-audit-2026-05-10.md HIGH-5
Spec: cowork/auth-bundles-fixes-2026-05-10/09-high-5-prelogin-secret-protection.md
This commit is contained in:
shankar0123
2026-05-10 21:17:55 +00:00
parent 0f340beb14
commit 90210c9334
5 changed files with 370 additions and 26 deletions
+101 -25
View File
@@ -7,6 +7,7 @@ import (
"fmt"
"time"
cryptopkg "github.com/certctl-io/certctl/internal/crypto"
"github.com/certctl-io/certctl/internal/repository"
)
@@ -22,46 +23,66 @@ import (
// (oidc_pre_login_sessions.absolute_expires_at default of
// NOW() + INTERVAL '10 minutes') AND re-checked at the service
// layer at consume time.
//
// Audit 2026-05-10 HIGH-5 closure — state, nonce, and pkce_verifier
// are encrypted at rest using v3 AES-256-GCM (per-row salt + nonce)
// via internal/crypto.EncryptIfKeySet. The encryption key reuses
// CERTCTL_CONFIG_ENCRYPTION_KEY. The legacy plaintext columns are
// kept nullable for backward compat with in-flight handshakes during
// rolling deploys; the new write path NEVER populates them.
// =============================================================================
// PreLoginRepository is the postgres implementation of
// repository.PreLoginRepository.
type PreLoginRepository struct {
db *sql.DB
db *sql.DB
encryptionKey string
}
// NewPreLoginRepository constructs a PreLoginRepository.
func NewPreLoginRepository(db *sql.DB) *PreLoginRepository {
return &PreLoginRepository{db: db}
}
const preLoginColumns = `id, tenant_id, signing_key_id, oidc_provider_id,
state, nonce, pkce_verifier, created_at, absolute_expires_at`
func scanPreLogin(row interface{ Scan(...interface{}) error }) (*repository.PreLoginSession, error) {
var p repository.PreLoginSession
if err := row.Scan(
&p.ID, &p.TenantID, &p.SigningKeyID, &p.OIDCProviderID,
&p.State, &p.Nonce, &p.PKCEVerifier, &p.CreatedAt, &p.AbsoluteExpiresAt,
); err != nil {
return nil, err
}
return &p, nil
//
// Audit 2026-05-10 HIGH-5: encryptionKey is the same
// CERTCTL_CONFIG_ENCRYPTION_KEY value already used for OIDC client
// secrets and SessionSigningKey material. An empty key is rejected at
// startup by config validation; if the repo is constructed with an
// empty key here it will fail-closed at write time (see Create), so
// pre-login rows can never be silently persisted plaintext.
func NewPreLoginRepository(db *sql.DB, encryptionKey string) *PreLoginRepository {
return &PreLoginRepository{db: db, encryptionKey: encryptionKey}
}
// Create persists a pre-login row. Caller MUST have already generated
// the random id (`pl-<base64url>`), state, nonce, and PKCE verifier.
// CreatedAt + AbsoluteExpiresAt default to NOW() / NOW()+10min when
// zero (the schema's DEFAULT clauses handle this).
//
// Audit 2026-05-10 HIGH-5: state / nonce / pkce_verifier are encrypted
// before INSERT via crypto.EncryptIfKeySet. The plaintext columns are
// left NULL — they remain on the schema only for in-flight backward
// compat with pre-deploy code paths that still write them, and will
// be dropped in a follow-up migration after the rolling deploy.
func (r *PreLoginRepository) Create(ctx context.Context, p *repository.PreLoginSession) error {
stateEnc, _, serr := cryptopkg.EncryptIfKeySet([]byte(p.State), r.encryptionKey)
if serr != nil {
return fmt.Errorf("oidc_pre_login encrypt state: %w", serr)
}
nonceEnc, _, nerr := cryptopkg.EncryptIfKeySet([]byte(p.Nonce), r.encryptionKey)
if nerr != nil {
return fmt.Errorf("oidc_pre_login encrypt nonce: %w", nerr)
}
verifierEnc, _, verr := cryptopkg.EncryptIfKeySet([]byte(p.PKCEVerifier), r.encryptionKey)
if verr != nil {
return fmt.Errorf("oidc_pre_login encrypt pkce_verifier: %w", verr)
}
if p.CreatedAt.IsZero() && p.AbsoluteExpiresAt.IsZero() {
_, err := r.db.ExecContext(ctx, `
INSERT INTO oidc_pre_login_sessions (
id, tenant_id, signing_key_id, oidc_provider_id,
state, nonce, pkce_verifier
state_enc, nonce_enc, pkce_verifier_enc
) VALUES ($1,$2,$3,$4,$5,$6,$7)`,
p.ID, p.TenantID, p.SigningKeyID, p.OIDCProviderID,
p.State, p.Nonce, p.PKCEVerifier)
stateEnc, nonceEnc, verifierEnc)
if err != nil {
return fmt.Errorf("oidc_pre_login create: %w", err)
}
@@ -77,10 +98,10 @@ func (r *PreLoginRepository) Create(ctx context.Context, p *repository.PreLoginS
_, err := r.db.ExecContext(ctx, `
INSERT INTO oidc_pre_login_sessions (
id, tenant_id, signing_key_id, oidc_provider_id,
state, nonce, pkce_verifier, created_at, absolute_expires_at
state_enc, nonce_enc, pkce_verifier_enc, created_at, absolute_expires_at
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9)`,
p.ID, p.TenantID, p.SigningKeyID, p.OIDCProviderID,
p.State, p.Nonce, p.PKCEVerifier, p.CreatedAt, p.AbsoluteExpiresAt)
stateEnc, nonceEnc, verifierEnc, p.CreatedAt, p.AbsoluteExpiresAt)
if err != nil {
return fmt.Errorf("oidc_pre_login create: %w", err)
}
@@ -98,22 +119,77 @@ func (r *PreLoginRepository) Create(ctx context.Context, p *repository.PreLoginS
// against concurrent callers — the second caller racing with a
// successful first caller gets ErrPreLoginNotFound, never a duplicate
// session-mint.
//
// Audit 2026-05-10 HIGH-5: prefer the encrypted columns
// (state_enc / nonce_enc / pkce_verifier_enc); fall back to the
// legacy plaintext columns ONLY when the encrypted columns are NULL
// (in-flight rows from pre-deploy code paths during a rolling
// deploy). After 000042 drops the plaintext columns, the fallback is
// dead code.
func (r *PreLoginRepository) LookupAndConsume(ctx context.Context, id string) (*repository.PreLoginSession, error) {
row := r.db.QueryRowContext(ctx, `
DELETE FROM oidc_pre_login_sessions WHERE id = $1
RETURNING `+preLoginColumns,
RETURNING id, tenant_id, signing_key_id, oidc_provider_id,
state, nonce, pkce_verifier,
state_enc, nonce_enc, pkce_verifier_enc,
created_at, absolute_expires_at`,
id)
p, err := scanPreLogin(row)
if err != nil {
var p repository.PreLoginSession
var statePlain, noncePlain, verifierPlain sql.NullString
var stateEnc, nonceEnc, verifierEnc []byte
if err := row.Scan(
&p.ID, &p.TenantID, &p.SigningKeyID, &p.OIDCProviderID,
&statePlain, &noncePlain, &verifierPlain,
&stateEnc, &nonceEnc, &verifierEnc,
&p.CreatedAt, &p.AbsoluteExpiresAt,
); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, repository.ErrPreLoginNotFound
}
return nil, fmt.Errorf("oidc_pre_login lookup_and_consume: %w", err)
}
// Prefer encrypted columns; fall back to legacy plaintext only
// when encrypted is NULL (rolling-deploy compat).
if state, err := r.materialize(stateEnc, statePlain); err != nil {
return nil, fmt.Errorf("oidc_pre_login decrypt state: %w", err)
} else {
p.State = state
}
if nonce, err := r.materialize(nonceEnc, noncePlain); err != nil {
return nil, fmt.Errorf("oidc_pre_login decrypt nonce: %w", err)
} else {
p.Nonce = nonce
}
if verifier, err := r.materialize(verifierEnc, verifierPlain); err != nil {
return nil, fmt.Errorf("oidc_pre_login decrypt pkce_verifier: %w", err)
} else {
p.PKCEVerifier = verifier
}
if time.Now().UTC().After(p.AbsoluteExpiresAt) {
return nil, repository.ErrPreLoginExpired
}
return p, nil
return &p, nil
}
// materialize returns the decrypted value when the encrypted blob is
// present; otherwise falls back to the legacy plaintext column for
// rolling-deploy compat. Returns an error when both are absent —
// inconsistent row state that should never persist beyond a deploy.
func (r *PreLoginRepository) materialize(enc []byte, plain sql.NullString) (string, error) {
if len(enc) > 0 {
decrypted, err := cryptopkg.DecryptIfKeySet(enc, r.encryptionKey)
if err != nil {
return "", err
}
return string(decrypted), nil
}
if plain.Valid {
return plain.String, nil
}
return "", errors.New("row missing both encrypted and plaintext value")
}
// GarbageCollectExpired deletes rows whose absolute_expires_at is in
@@ -0,0 +1,206 @@
package postgres_test
import (
"bytes"
"context"
"testing"
cryptopkg "github.com/certctl-io/certctl/internal/crypto"
"github.com/certctl-io/certctl/internal/repository"
"github.com/certctl-io/certctl/internal/repository/postgres"
)
// Audit 2026-05-10 HIGH-5 closure — pin the at-rest invariant for
// the OIDC pre-login table. Pre-fix, state / nonce / pkce_verifier
// rode plaintext columns; an operator restoring an unredacted backup
// to a debug environment leaked every in-flight handshake. Post-fix,
// the new write path encrypts via crypto.EncryptIfKeySet (v3 magic
// 0x03 || salt(16) || nonce(12) || ciphertext+tag). The legacy
// plaintext columns remain on the schema (nullable) for in-flight
// rolling-deploy compat; the new write path NEVER populates them.
//
// Mirror of the Phase 13 oidc_providers encryption-invariant pattern.
// Lives in the postgres_test package so it runs against the real
// migrated schema via testcontainers; protected by testing.Short().
const (
preLoginEncTestPassphrase = "high-5-prelogin-test-encryption-key-DO-NOT-USE-IN-PROD"
)
// TestPreLoginRepository_EncryptionInvariant_HIGH5 pins three legs:
//
// (a) the {state,nonce,pkce_verifier}_enc columns contain v3
// AES-GCM blobs (NOT the plaintext) immediately after Create;
// (b) the legacy plaintext columns are NULL after the new write
// path runs (defense against a regressing patch that re-adds
// plaintext writes);
// (c) LookupAndConsume round-trips the original plaintext via the
// encrypted columns, returning state / nonce / pkce_verifier
// byte-for-byte equal to the values written.
func TestPreLoginRepository_EncryptionInvariant_HIGH5(t *testing.T) {
if testing.Short() {
t.Skip("HIGH-5 encryption invariant: integration test in short mode")
}
db := getTestDB(t).freshSchema(t)
ctx := context.Background()
// Seed a session_signing_keys row + an oidc_providers row so the
// pre-login row's FK constraints are satisfied. The signing-key
// material can be any non-empty byte slice (the pre-login repo
// doesn't decrypt it).
if _, err := db.ExecContext(ctx, `
INSERT INTO session_signing_keys (id, tenant_id, key_material_encrypted)
VALUES ('sk-high5', 't-default', $1)`,
[]byte{0x03, 0x00, 0x01, 0x02}); err != nil {
t.Fatalf("seed session_signing_keys: %v", err)
}
provRepo := postgres.NewOIDCProviderRepository(db)
if err := provRepo.Create(ctx, newValidProvider("high5")); err != nil {
t.Fatalf("seed oidc_provider: %v", err)
}
repo := postgres.NewPreLoginRepository(db, preLoginEncTestPassphrase)
statePlain := "very-secret-oidc-state-do-not-leak"
noncePlain := "very-secret-oidc-nonce-do-not-leak"
verifierPlain := "very-secret-pkce-verifier-bytes-do-not-leak"
row := &repository.PreLoginSession{
ID: "pl-high5-1",
TenantID: "t-default",
SigningKeyID: "sk-high5",
OIDCProviderID: "op-high5",
State: statePlain,
Nonce: noncePlain,
PKCEVerifier: verifierPlain,
}
if err := repo.Create(ctx, row); err != nil {
t.Fatalf("Create: %v", err)
}
// ── Invariant (a): encrypted columns contain v3 blobs, NOT plaintext. ──
var stateEnc, nonceEnc, verifierEnc []byte
if err := db.QueryRowContext(ctx, `
SELECT state_enc, nonce_enc, pkce_verifier_enc
FROM oidc_pre_login_sessions WHERE id = $1`, row.ID).
Scan(&stateEnc, &nonceEnc, &verifierEnc); err != nil {
t.Fatalf("SELECT raw enc columns: %v", err)
}
for label, blob := range map[string][]byte{
"state": stateEnc,
"nonce": nonceEnc,
"pkce_verifier": verifierEnc,
} {
if len(blob) == 0 {
t.Errorf("INVARIANT (a) VIOLATED: %s_enc is empty post-Create", label)
continue
}
// v3 magic + salt(16) + nonce(12) + at least 16 bytes for the AEAD tag.
if len(blob) < 1+16+12+16 {
t.Errorf("INVARIANT (a) VIOLATED: %s_enc blob too short (%d bytes)", label, len(blob))
}
if blob[0] != 0x03 {
t.Errorf("INVARIANT (a) VIOLATED: %s_enc magic = 0x%02x; want 0x03 (v3)", label, blob[0])
}
}
if bytes.Contains(stateEnc, []byte(statePlain)) {
t.Errorf("INVARIANT (a) VIOLATED: state_enc contains plaintext substring %q", statePlain)
}
if bytes.Contains(nonceEnc, []byte(noncePlain)) {
t.Errorf("INVARIANT (a) VIOLATED: nonce_enc contains plaintext substring %q", noncePlain)
}
if bytes.Contains(verifierEnc, []byte(verifierPlain)) {
t.Errorf("INVARIANT (a) VIOLATED: pkce_verifier_enc contains plaintext substring %q", verifierPlain)
}
// ── Invariant (b): legacy plaintext columns are NULL post-Create. ──
var statePlainCol, noncePlainCol, verifierPlainCol *string
if err := db.QueryRowContext(ctx, `
SELECT state, nonce, pkce_verifier
FROM oidc_pre_login_sessions WHERE id = $1`, row.ID).
Scan(&statePlainCol, &noncePlainCol, &verifierPlainCol); err != nil {
t.Fatalf("SELECT plaintext columns: %v", err)
}
if statePlainCol != nil {
t.Errorf("INVARIANT (b) VIOLATED: legacy state column = %q; want NULL", *statePlainCol)
}
if noncePlainCol != nil {
t.Errorf("INVARIANT (b) VIOLATED: legacy nonce column = %q; want NULL", *noncePlainCol)
}
if verifierPlainCol != nil {
t.Errorf("INVARIANT (b) VIOLATED: legacy pkce_verifier column = %q; want NULL", *verifierPlainCol)
}
// ── Invariant (c): LookupAndConsume round-trips the plaintext. ──
got, err := repo.LookupAndConsume(ctx, row.ID)
if err != nil {
t.Fatalf("LookupAndConsume: %v", err)
}
if got.State != statePlain {
t.Errorf("INVARIANT (c) VIOLATED: round-trip state = %q; want %q", got.State, statePlain)
}
if got.Nonce != noncePlain {
t.Errorf("INVARIANT (c) VIOLATED: round-trip nonce = %q; want %q", got.Nonce, noncePlain)
}
if got.PKCEVerifier != verifierPlain {
t.Errorf("INVARIANT (c) VIOLATED: round-trip pkce_verifier = %q; want %q", got.PKCEVerifier, verifierPlain)
}
// Sanity: a wrong passphrase MUST fail the AEAD check.
if _, err := cryptopkg.DecryptIfKeySet(stateEnc, preLoginEncTestPassphrase+"-wrong"); err == nil {
t.Error("AEAD broken: DecryptIfKeySet succeeded with wrong passphrase")
}
}
// TestPreLoginRepository_EncryptionInvariant_LegacyPlaintextStillReadable
// pins the rolling-deploy fallback. Pre-deploy code paths that already
// wrote a row using the legacy schema (plaintext columns populated,
// _enc columns NULL) must continue to consume cleanly. After 000042
// drops the plaintext columns, this test should be deleted along with
// the materialize() fallback in the repo.
func TestPreLoginRepository_EncryptionInvariant_LegacyPlaintextStillReadable(t *testing.T) {
if testing.Short() {
t.Skip("HIGH-5 legacy fallback: integration test in short mode")
}
db := getTestDB(t).freshSchema(t)
ctx := context.Background()
if _, err := db.ExecContext(ctx, `
INSERT INTO session_signing_keys (id, tenant_id, key_material_encrypted)
VALUES ('sk-legacy', 't-default', $1)`,
[]byte{0x03, 0x00, 0x01, 0x02}); err != nil {
t.Fatalf("seed session_signing_keys: %v", err)
}
provRepo := postgres.NewOIDCProviderRepository(db)
if err := provRepo.Create(ctx, newValidProvider("legacy")); err != nil {
t.Fatalf("seed oidc_provider: %v", err)
}
// Simulate a legacy-write row (plaintext populated, _enc NULL) by
// inserting directly via SQL — this is the byte shape the pre-fix
// code path produced.
if _, err := db.ExecContext(ctx, `
INSERT INTO oidc_pre_login_sessions (
id, tenant_id, signing_key_id, oidc_provider_id,
state, nonce, pkce_verifier
) VALUES ($1, $2, $3, $4, $5, $6, $7)`,
"pl-legacy-1", "t-default", "sk-legacy", "op-legacy",
"legacy-state", "legacy-nonce", "legacy-verifier"); err != nil {
t.Fatalf("legacy direct INSERT: %v", err)
}
repo := postgres.NewPreLoginRepository(db, preLoginEncTestPassphrase)
got, err := repo.LookupAndConsume(ctx, "pl-legacy-1")
if err != nil {
t.Fatalf("LookupAndConsume legacy row: %v", err)
}
if got.State != "legacy-state" {
t.Errorf("legacy round-trip state = %q; want legacy-state", got.State)
}
if got.Nonce != "legacy-nonce" {
t.Errorf("legacy round-trip nonce = %q; want legacy-nonce", got.Nonce)
}
if got.PKCEVerifier != "legacy-verifier" {
t.Errorf("legacy round-trip pkce_verifier = %q; want legacy-verifier", got.PKCEVerifier)
}
}