Files
certctl/internal/repository/postgres/user.go
T
shankar0123 663b14bfd8 feat(retention): COMP-002-RETENTION — federated-user PII purge pipeline
Sprint 6 closure of the audit's MED-severity COMP-002-RETENTION
finding.

Pre-fix posture: the federated-user admin surface
(auth_users.go::Deactivate) sets users.deactivated_at on soft-delete,
but the PII columns (email, display_name, oidc_subject) stay
populated forever. No in-code primitive for GDPR right-to-be-
forgotten; no scheduled retention purge.

This commit ships the audit's recommended two-phase fix:

  Phase 1 — operator-callable scrub primitive
    internal/service/user_retention.go
      UserRetentionService.DeleteUserPII(ctx, userID):
        - revoke all active sessions (defense-in-depth)
        - email := 'purged@redacted.local'
        - display_name := '[purged]'
        - oidc_subject := 'sha256:' || hex(sha256(original))
        - audit_events row with action=user.purge_pii,
          category=auth, actor=system

      Why hash oidc_subject instead of NULL:
        1. (oidc_provider_id, oidc_subject) UNIQUE constraint would
           trip on multiple purged users converging to NULL
        2. The hash is one-way; the original IdP-side identifier is
           unrecoverable. Re-login under the same subject mints a
           fresh u-id (right-to-be-forgotten semantics)
        3. Forensic continuity: an operator can recompute
           sha256(<known-subject>) and confirm "this user was
           deactivated then purged"

      users.id itself is preserved so historical
      audit_events.actor = u-X rows still resolve. The forensic-
      attribution chain stays intact even after the PII is gone.

  Phase 2 — scheduled batch purge
    internal/scheduler/scheduler.go
      UserRetentionPurger interface + userRetentionLoop:
        - PurgeDeactivatedUsers enumerates every user with
          deactivated_at < NOW() - retention_window
        - DeleteUserPII per row
        - per-tick batch cap (default 200) keeps blast radius
          predictable; large backlogs spread across multiple ticks
        - atomic.Bool guard + 5-min per-tick context.WithTimeout

    Repository contract grew a single new method:
      internal/repository/user.go::ListDeactivatedBefore(ctx, t)
      internal/repository/postgres/user.go: SQL-side filter
      (deactivated_at IS NOT NULL AND deactivated_at < $1)
      ORDER BY deactivated_at ASC, cross-tenant.

  Configuration
    CERTCTL_USER_RETENTION_INTERVAL   default 24h
    CERTCTL_USER_RETENTION_WINDOW     default 30 days
    CERTCTL_USER_RETENTION_BATCH_CAP  default 200

  Test stub additions for repository.UserRepository.ListDeactivatedBefore:
    internal/auth/oidc/service_test.go::stubUsers
    internal/api/handler/auth_users_test.go::stubFullUserRepo
    internal/api/handler/auth_session_oidc_test.go::stubUserRepo

  Documentation
    docs/operator/privacy-and-retention.md
      - retention pipeline diagram (day-0 deactivate → day-N purge)
      - operator config table
      - verification runbook (4 steps with SQL)
      - what's NOT covered (deferred: DSAR export, api_keys cascade,
        retroactive audit_events.details redaction)

  Tests
    internal/service/user_retention_test.go (NEW, 4 tests):
      TestDeleteUserPII_ScrubsAndRevokes
      TestDeleteUserPII_IsIdempotent
      TestPurgeDeactivatedUsers_RespectsWindow
      TestPurgeDeactivatedUsers_BatchCap

Verified locally:
  go vet ./...                                   (clean)
  gofmt -l internal/ cmd/                        (clean)
  go test -short -count=1 \
    ./internal/service/... ./internal/scheduler/... ./internal/config/...
    (all green)

Cross-sprint interaction: pairs with COMP-001-HASH (prior commit).
The user.purge_pii audit row this service emits flows through the
new hash chain, so the scrub event is itself tamper-evident.

Closes COMP-002-RETENTION. Sprint 6 is complete (2/2 findings).
2026-05-16 06:18:39 +00:00

211 lines
6.9 KiB
Go

// Copyright 2026 certctl LLC. All rights reserved.
// SPDX-License-Identifier: BUSL-1.1
package postgres
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/lib/pq"
userdomain "github.com/certctl-io/certctl/internal/auth/user/domain"
"github.com/certctl-io/certctl/internal/repository"
)
// UserRepository is the postgres implementation of
// repository.UserRepository (Auth Bundle 2 Phase 2).
type UserRepository struct {
db *sql.DB
}
// NewUserRepository constructs a UserRepository.
func NewUserRepository(db *sql.DB) *UserRepository {
return &UserRepository{db: db}
}
// Audit 2026-05-11 A-2 — deactivated_at column added in migration
// 000045 (MED-11 foundation) but pre-fix never read here. The
// federated-user soft-delete flow at
// internal/api/handler/auth_users.go::Deactivate set the column on
// Update, but Get / GetByOIDCSubject / ListAll all returned User
// with zero-value DeactivatedAt regardless. The OIDC login path
// trusts the returned struct, so a deactivated user's next login
// re-elevated them. Adding the column to userColumns + scanUser
// closes the read leg; service.go's upsertUser closes the enforce leg.
const userColumns = `id, tenant_id, email, display_name, oidc_subject,
oidc_provider_id, last_login_at, webauthn_credentials,
created_at, updated_at, deactivated_at`
func scanUser(row interface{ Scan(...interface{}) error }) (*userdomain.User, error) {
var u userdomain.User
var deactivatedAt sql.NullTime
if err := row.Scan(
&u.ID, &u.TenantID, &u.Email, &u.DisplayName, &u.OIDCSubject,
&u.OIDCProviderID, &u.LastLoginAt, &u.WebAuthnCredentials,
&u.CreatedAt, &u.UpdatedAt, &deactivatedAt,
); err != nil {
return nil, err
}
if deactivatedAt.Valid {
t := deactivatedAt.Time
u.DeactivatedAt = &t
}
return &u, nil
}
// Get returns one user by id.
func (r *UserRepository) Get(ctx context.Context, id string) (*userdomain.User, error) {
row := r.db.QueryRowContext(ctx, `SELECT `+userColumns+` FROM users WHERE id = $1`, id)
u, err := scanUser(row)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, repository.ErrUserNotFound
}
return nil, fmt.Errorf("users get: %w", err)
}
return u, nil
}
// GetByOIDCSubject is the Phase 3 hot-path lookup at login time.
// Returns ErrUserNotFound if no row matches the (provider, subject)
// tuple — Phase 3's HandleCallback then creates the row via Create.
func (r *UserRepository) GetByOIDCSubject(ctx context.Context, providerID, subject string) (*userdomain.User, error) {
row := r.db.QueryRowContext(ctx, `
SELECT `+userColumns+`
FROM users
WHERE oidc_provider_id = $1 AND oidc_subject = $2`,
providerID, subject)
u, err := scanUser(row)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, repository.ErrUserNotFound
}
return nil, fmt.Errorf("users get_by_oidc_subject: %w", err)
}
return u, nil
}
// Create persists a new user. Translates SQLSTATE 23505 into
// ErrUserDuplicateOIDCSubject (the unique constraint on
// (oidc_provider_id, oidc_subject)).
//
// Audit 2026-05-11 A-2 — deactivated_at written explicitly. New rows
// pre-fix had deactivated_at NULL by schema default; the explicit
// write makes forward-compat with future seed-data paths that
// pre-populate the column (e.g. migration of an external user roster
// where some entries land deactivated). nil → NULL via sql.NullTime.
func (r *UserRepository) Create(ctx context.Context, u *userdomain.User) error {
var deactivatedAt sql.NullTime
if u.DeactivatedAt != nil {
deactivatedAt = sql.NullTime{Time: *u.DeactivatedAt, Valid: true}
}
_, err := r.db.ExecContext(ctx, `
INSERT INTO users (
id, tenant_id, email, display_name, oidc_subject,
oidc_provider_id, last_login_at, webauthn_credentials,
deactivated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`,
u.ID, u.TenantID, u.Email, u.DisplayName, u.OIDCSubject,
u.OIDCProviderID, u.LastLoginAt, u.WebAuthnCredentials,
deactivatedAt)
if err != nil {
var pqErr *pq.Error
if errors.As(err, &pqErr) && pqErr.Code == "23505" {
return repository.ErrUserDuplicateOIDCSubject
}
return fmt.Errorf("users create: %w", err)
}
return nil
}
// Update writes the mutable fields (email, display_name, last_login_at,
// webauthn_credentials, deactivated_at) back to the row. Immutable:
// id, tenant_id, oidc_subject, oidc_provider_id, created_at.
// updated_at = NOW().
//
// Audit 2026-05-11 A-2 — deactivated_at is now in the mutable set so
// the federated-user soft-delete flow at
// internal/api/handler/auth_users.go::Deactivate persists. Pre-fix the
// Update SQL omitted it; the handler set u.DeactivatedAt = now on the
// in-memory struct, called Update, the SQL ignored the field, and the
// row was unchanged. nil DeactivatedAt → NULL (supports reactivation).
func (r *UserRepository) Update(ctx context.Context, u *userdomain.User) error {
var deactivatedAt sql.NullTime
if u.DeactivatedAt != nil {
deactivatedAt = sql.NullTime{Time: *u.DeactivatedAt, Valid: true}
}
res, err := r.db.ExecContext(ctx, `
UPDATE users SET
email = $2,
display_name = $3,
last_login_at = $4,
webauthn_credentials = $5,
deactivated_at = $6,
updated_at = NOW()
WHERE id = $1`,
u.ID, u.Email, u.DisplayName, u.LastLoginAt, u.WebAuthnCredentials, deactivatedAt)
if err != nil {
return fmt.Errorf("users update: %w", err)
}
n, _ := res.RowsAffected()
if n == 0 {
return repository.ErrUserNotFound
}
return nil
}
// ListAll returns every user in the tenant, ordered by created_at ASC.
func (r *UserRepository) ListAll(ctx context.Context, tenantID string) ([]*userdomain.User, error) {
rows, err := r.db.QueryContext(ctx,
`SELECT `+userColumns+` FROM users WHERE tenant_id = $1 ORDER BY created_at ASC`,
tenantID)
if err != nil {
return nil, fmt.Errorf("users list_all: %w", err)
}
defer rows.Close()
var out []*userdomain.User
for rows.Next() {
u, err := scanUser(rows)
if err != nil {
return nil, fmt.Errorf("users scan: %w", err)
}
out = append(out, u)
}
return out, rows.Err()
}
// ListDeactivatedBefore returns every user (across all tenants) whose
// deactivated_at is not NULL AND strictly before threshold. Sprint 6
// COMP-002-RETENTION — the userRetentionLoop in the scheduler walks
// this list per tick and calls UserRetentionService.DeleteUserPII on
// each. Cross-tenant on purpose: a single retention policy spans the
// whole control plane.
func (r *UserRepository) ListDeactivatedBefore(ctx context.Context, threshold time.Time) ([]*userdomain.User, error) {
rows, err := r.db.QueryContext(ctx,
`SELECT `+userColumns+`
FROM users
WHERE deactivated_at IS NOT NULL
AND deactivated_at < $1
ORDER BY deactivated_at ASC`,
threshold)
if err != nil {
return nil, fmt.Errorf("users list_deactivated_before: %w", err)
}
defer rows.Close()
var out []*userdomain.User
for rows.Next() {
u, err := scanUser(rows)
if err != nil {
return nil, fmt.Errorf("users scan: %w", err)
}
out = append(out, u)
}
return out, rows.Err()
}