Files
certctl/internal/api/middleware/audit.go
T
shankar0123 7cb453a336 chore(fmt): repo-wide gofmt -w sweep — close drift surfaced by ci-pipeline-cleanup Phase 4
Mechanical reformat. The new 'gofmt drift' CI step (added in
ci-pipeline-cleanup Phase 4, commit 0f205a8) surfaced 111 files
with accumulated gofmt drift across cmd/, internal/, and deploy/test/.

Each file's diff is gofmt-standard: whitespace adjustments, intra-
group import sorting (alphabetical by import path within blank-line-
separated groups), and struct-tag column alignment. No semantic
changes — verified via 'git diff --ignore-all-space' which shows only
the line-position deltas from import reordering.

The gate stays in place after this commit. Going forward it catches
gofmt drift at PR time.
2026-04-30 22:33:57 +00:00

234 lines
8.8 KiB
Go

package middleware
import (
"context"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"strings"
"sync"
"time"
)
// AuditRecorder is the interface that the audit middleware uses to record API calls.
// This avoids importing the service package directly, maintaining dependency inversion.
//
// Implementations may perform I/O (e.g., database writes). The middleware invokes
// RecordAPICall from a tracked goroutine so that callers can drain in-flight
// recordings during graceful shutdown via AuditMiddleware.Flush.
type AuditRecorder interface {
RecordAPICall(ctx context.Context, method, path, actor string, bodyHash string, status int, latencyMs int64) error
}
// AuditConfig holds configuration for the API audit logging middleware.
type AuditConfig struct {
// ExcludePaths are path prefixes to skip audit logging (e.g., "/health", "/ready").
ExcludePaths []string
// Logger for audit middleware errors (audit recording failures shouldn't break requests).
Logger *slog.Logger
}
// ErrAuditFlushTimeout is returned by AuditMiddleware.Flush when in-flight audit
// recordings do not complete before the provided context is cancelled or its
// deadline elapses. It mirrors scheduler.ErrSchedulerShutdownTimeout so callers
// can branch on graceful-shutdown timeouts consistently across subsystems.
var ErrAuditFlushTimeout = errors.New("audit middleware flush timeout")
// AuditMiddleware is the handle returned by NewAuditLog. It wraps the audit
// logging HTTP middleware and tracks the goroutines spawned to record each API
// call, so that callers can drain them during graceful shutdown (M-1, CWE-662
// / CWE-400). The goroutines themselves still run detached from the request
// context — the shutdown-drain signal flows through this struct's WaitGroup
// instead of the per-request context.
type AuditMiddleware struct {
recorder AuditRecorder
logger *slog.Logger
excludeSet map[string]bool
// wg tracks every audit-recording goroutine spawned by Middleware so Flush
// can block until they complete before the DB pool is torn down.
wg sync.WaitGroup
}
// NewAuditLog constructs the API audit logging middleware. The returned
// *AuditMiddleware exposes the HTTP middleware via the Middleware method value
// (same func(http.Handler) http.Handler shape) and a Flush method that the
// process shutdown path must call after the HTTP server has stopped accepting
// new requests but before the audit recorder's backing store (e.g., the
// database connection pool) is closed.
//
// The middleware records method, path, authenticated actor, request body hash,
// response status, and latency. Recording is best-effort — individual failures
// are logged and do not affect the HTTP response. Shutdown is NOT best-effort:
// Flush must succeed (or time out, returning ErrAuditFlushTimeout) so that
// in-flight events are not lost when the audit recorder's connection pool is
// closed out from under the goroutines.
func NewAuditLog(recorder AuditRecorder, cfg AuditConfig) *AuditMiddleware {
excludeSet := make(map[string]bool, len(cfg.ExcludePaths))
for _, p := range cfg.ExcludePaths {
excludeSet[p] = true
}
logger := cfg.Logger
if logger == nil {
logger = slog.Default()
}
return &AuditMiddleware{
recorder: recorder,
logger: logger,
excludeSet: excludeSet,
}
}
// Middleware is the http.Handler wrapper. It has the standard
// func(http.Handler) http.Handler middleware signature so it can be composed
// into an existing middleware chain via a method value (auditMiddleware.Middleware).
func (a *AuditMiddleware) Middleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Skip excluded paths (health, readiness probes)
for prefix := range a.excludeSet {
if strings.HasPrefix(r.URL.Path, prefix) {
next.ServeHTTP(w, r)
return
}
}
start := time.Now()
// Hash request body for audit (don't store raw bodies — security + size concerns)
bodyHash := ""
if r.Body != nil && r.Body != http.NoBody {
hasher := sha256.New()
body, err := io.ReadAll(r.Body)
if err == nil && len(body) > 0 {
hasher.Write(body)
bodyHash = hex.EncodeToString(hasher.Sum(nil))[:16] // truncated hash
// Restore the body for downstream handlers
r.Body = io.NopCloser(strings.NewReader(string(body)))
}
}
// Extract actor from auth context
actor := "anonymous"
if user := GetUser(r.Context()); user != "" {
actor = user
}
// Wrap response writer to capture status code
wrapped := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
next.ServeHTTP(wrapped, r)
latency := time.Since(start).Milliseconds()
// Snapshot request-derived inputs so the goroutine does not race with
// the http.Server reusing r after this handler returns.
method := r.Method
path := r.URL.Path
status := wrapped.statusCode
// Derive a detached context that preserves request-scoped values
// (trace IDs, auth info carried via context keys) but is not cancelled
// when the HTTP server finalizes the request. Using r.Context()
// directly would cause the async audit write to observe ctx.Done()
// as soon as the response completes; using context.Background() would
// discard useful observability metadata. WithoutCancel gives us both
// (M-2 / D-3).
auditCtx := context.WithoutCancel(r.Context())
// Record audit event asynchronously (best-effort, don't block response).
// SECURITY: We intentionally use r.URL.Path (not r.URL.String() or r.RequestURI)
// to prevent query parameters from being recorded in the immutable audit trail.
// Query strings may contain cursor tokens, API keys passed as params, or other
// sensitive filter values. Since the audit trail is append-only with no deletion
// capability, any sensitive data recorded would persist permanently.
//
// The goroutine is tracked in a.wg so AuditMiddleware.Flush can drain
// in-flight recordings during graceful shutdown. Without this (M-1,
// CWE-662 / CWE-400), SIGTERM would close the DB pool while recordings
// were still mid-flight, silently dropping audit events.
a.wg.Add(1)
go func() {
defer a.wg.Done()
if err := a.recorder.RecordAPICall(
auditCtx,
method,
path,
actor,
bodyHash,
status,
latency,
); err != nil {
a.logger.Error("failed to record API audit event",
"error", err,
"method", method,
"path", path,
)
}
}()
})
}
// Flush blocks until every audit-recording goroutine spawned by Middleware has
// completed, or until ctx is cancelled / its deadline elapses. It must be
// called from the process shutdown path after http.Server.Shutdown has
// returned (so no new requests are being accepted) but before the backing
// audit recorder's resources (DB pool, etc.) are torn down.
//
// On timeout or cancellation Flush returns ErrAuditFlushTimeout wrapped with
// any context error; in-flight goroutines continue to run and may still write
// to the recorder once they unblock — the caller is responsible for deciding
// whether to proceed with teardown anyway or surface the error.
//
// Flush mirrors the idiom used by scheduler.Scheduler.WaitForCompletion so
// that the two subsystems drain identically at shutdown.
func (a *AuditMiddleware) Flush(ctx context.Context) error {
done := make(chan struct{})
go func() {
a.wg.Wait()
close(done)
}()
select {
case <-done:
a.logger.Info("audit middleware flush complete")
return nil
case <-ctx.Done():
a.logger.Warn("audit middleware flush did not complete before context cancellation",
"error", ctx.Err(),
)
return fmt.Errorf("%w: %w", ErrAuditFlushTimeout, ctx.Err())
}
}
// AuditServiceAdapter adapts the AuditService to the AuditRecorder interface.
// This keeps the middleware decoupled from the service package.
type AuditServiceAdapter struct {
recordFn func(ctx context.Context, actor string, actorType string, action string, resourceType string, resourceID string, details map[string]interface{}) error
}
// NewAuditServiceAdapter creates an adapter that bridges the middleware AuditRecorder
// interface to the service layer's RecordEvent method.
func NewAuditServiceAdapter(recordFn func(ctx context.Context, actor string, actorType string, action string, resourceType string, resourceID string, details map[string]interface{}) error) *AuditServiceAdapter {
return &AuditServiceAdapter{recordFn: recordFn}
}
// RecordAPICall implements AuditRecorder by translating API call data into an audit event.
func (a *AuditServiceAdapter) RecordAPICall(ctx context.Context, method, path, actor string, bodyHash string, status int, latencyMs int64) error {
details := map[string]interface{}{
"method": method,
"path": path,
"body_hash": bodyHash,
"status": status,
"latency_ms": latencyMs,
}
action := fmt.Sprintf("api_%s", strings.ToLower(method))
return a.recordFn(ctx, actor, "User", action, "api", path, details)
}