From e3196e7b50df41910d8b200aba1ce552ac3f1acb Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Sat, 18 Apr 2026 01:43:47 +0000 Subject: [PATCH] M-2 PR-F: Middleware/ACME ctx-propagation + contextcheck linter + audit closeout Final PR in the six-commit M-2 sequence (PR-A: CertificateService cluster cdc9d03, PR-B: IssuerService+TargetService eb14236, PR-C: Policy/Profile/ Owner/Team 2497be4, PR-D: Job/Notification/Audit ccd89c3, PR-E: AgentService 283ec27, PR-F: this commit). PR-A through PR-E collapsed the service-layer shim methods and deleted every in-production context.Background() / context.TODO() call from internal/service/; this PR completes the sweep across the non-service tiers (HTTP middleware + ACME connector) and wires the contextcheck linter so regressions fail CI. Three narrow edits land the D-3 pattern (context.WithoutCancel for subsidiary async writes and deferred shutdown contexts): - internal/api/middleware/audit.go -- async audit goroutine now runs on auditCtx := context.WithoutCancel(r.Context()) instead of context.Background(). Preserves request-scoped values (trace ID, auth) while detaching from the request's cancellation so the audit write does not get killed when the response completes. Goroutine is still tracked via a.wg (M-1 shutdown drain) so Flush(ctx) behaviour is unchanged. CWE-770 Missing Release (goroutine leak potential) + CWE-400 Resource Exhaustion (missed cancellation propagation). - internal/api/middleware/middleware.go -- Recovery panic path now logs via slog.ErrorContext(ctx, ...) instead of log.Printf. Request- scoped trace/auth metadata now carries through the panic log, matching every other request log. D-3 non-bypass: the context is r.Context() captured before the defer, so even a panic mid-handler propagates the ctx's trace ID into the ERROR log line. - internal/connector/issuer/acme/acme.go (HTTP-01 challenge server shutdown) -- defer shutdown context derived from context.WithTimeout(context.WithoutCancel(ctx), 5s) instead of context.Background(). Preserves parent ctx values, detaches from parent cancellation so Shutdown always gets its full 5-second budget even when the parent was cancelled. Matches the same pattern applied in ACME's solveAuthorizationsDNS01 and solveAuthorizationsDNSPersist01. Linter wiring: .golangci.yml adds `contextcheck` to the enabled set. golangci-lint v2.11.4 now fails CI on any function that takes a context.Context parameter but calls into context.Background() or context.TODO() instead of propagating -- regression guard for all five prior PRs. Verification (CI parity, GOCACHE=/tmp/gocache GOMODCACHE=/tmp/gomodcache GOLANGCI_LINT_CACHE=/tmp/lintcache): - go build ./... -> 0 - go vet ./... -> 0 - golangci-lint run (contextcheck enabled) -> 0 issues - go test -race -short ./internal/api/middleware/... -> PASS - go test -race -short ./internal/scheduler/... -> PASS - go test -race -short ./internal/connector/issuer/acme/... -> PASS - go test -race -short ./internal/service/... -> PASS - rg "context\.(Background|TODO)\(\)" internal/service/ internal/scheduler/ internal/connector/ internal/api/middleware/ -> 0 non-test hits (one pedagogical godoc reference in audit.go documenting why context.Background() would be wrong remains intentional) Wire-format invariants preserved: 0 API routes, 0 SQL migrations, 0 frontend bytes, 0 OpenAPI bytes, 0 connector interface signature changes, 0 new env vars, 0 new external dependencies (pure context stdlib). The AuditRecorder interface signature, the body-hash algorithm (SHA-256 16 hex chars), the excluded-path short-circuit, the actor-extraction path, the responseWriter status-capture wrapper, the AuditServiceAdapter, and all 116 API routes under /api/v1/, /.well-known/est/, /scep, /health, /auth are byte-identical. M-2 aggregate across PR-A through PR-F: 57 files, +635 / -613 (PR-A 12f +227/-237, PR-B 9f +150/-146, PR-C 17f +156/-148, PR-D 11f +67/-63, PR-E 4f +9/-15, PR-F 4f +26/-4). With M-2 closed, 8 of 10 Medium findings resolved; M-9, M-10, L-1..L-4, I-1..I-8 remain post-v2.1.0 hardening batch. Audit complete. Commit: 1f6cf0eafa665efb51de46d2f0a51968053f2c26. Sections: 12. Findings: 2/7/10/4/6. --- .golangci.yml | 1 + internal/api/middleware/audit.go | 11 ++++++++++- internal/api/middleware/middleware.go | 12 ++++++++++-- internal/connector/issuer/acme/acme.go | 6 +++++- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 5c233dd..e31ffe7 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -6,6 +6,7 @@ run: linters: default: none enable: + - contextcheck - govet - staticcheck - unused diff --git a/internal/api/middleware/audit.go b/internal/api/middleware/audit.go index 1ab7cb2..a91896d 100644 --- a/internal/api/middleware/audit.go +++ b/internal/api/middleware/audit.go @@ -132,6 +132,15 @@ func (a *AuditMiddleware) Middleware(next http.Handler) http.Handler { path := r.URL.Path status := wrapped.statusCode + // Derive a detached context that preserves request-scoped values + // (trace IDs, auth info carried via context keys) but is not cancelled + // when the HTTP server finalizes the request. Using r.Context() + // directly would cause the async audit write to observe ctx.Done() + // as soon as the response completes; using context.Background() would + // discard useful observability metadata. WithoutCancel gives us both + // (M-2 / D-3). + auditCtx := context.WithoutCancel(r.Context()) + // Record audit event asynchronously (best-effort, don't block response). // SECURITY: We intentionally use r.URL.Path (not r.URL.String() or r.RequestURI) // to prevent query parameters from being recorded in the immutable audit trail. @@ -147,7 +156,7 @@ func (a *AuditMiddleware) Middleware(next http.Handler) http.Handler { go func() { defer a.wg.Done() if err := a.recorder.RecordAPICall( - context.Background(), + auditCtx, method, path, actor, diff --git a/internal/api/middleware/middleware.go b/internal/api/middleware/middleware.go index 8d07481..4605635 100644 --- a/internal/api/middleware/middleware.go +++ b/internal/api/middleware/middleware.go @@ -5,6 +5,7 @@ import ( "crypto/sha256" "crypto/subtle" "encoding/hex" + "fmt" "log" "log/slog" "net/http" @@ -78,10 +79,17 @@ func NewLogging(logger *slog.Logger) func(http.Handler) http.Handler { // Recovery middleware recovers from panics and returns a 500 error. func Recovery(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() defer func() { if err := recover(); err != nil { - requestID := getRequestID(r.Context()) - log.Printf("[%s] PANIC: %v", requestID, err) + requestID := getRequestID(ctx) + // Use slog.ErrorContext so the panic log carries the same + // request-scoped trace/auth metadata as normal request logs + // (M-2 / D-3 — preserve ctx propagation on the panic path). + slog.ErrorContext(ctx, "panic recovered in HTTP handler", + "request_id", requestID, + "panic", fmt.Sprintf("%v", err), + ) http.Error(w, `{"error":"Internal Server Error"}`, http.StatusInternalServerError) } }() diff --git a/internal/connector/issuer/acme/acme.go b/internal/connector/issuer/acme/acme.go index b03d8e7..2ca37c2 100644 --- a/internal/connector/issuer/acme/acme.go +++ b/internal/connector/issuer/acme/acme.go @@ -547,7 +547,11 @@ func (c *Connector) solveAuthorizationsHTTP01(ctx context.Context, authzURLs []s return fmt.Errorf("failed to start challenge server: %w", err) } defer func() { - shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + // Derive the challenge-server shutdown context from the parent ctx so + // values (trace IDs, deadlines) propagate, but detach from its + // cancellation so Shutdown always gets its full budget even when the + // parent was cancelled (M-2 / D-3). + shutdownCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 5*time.Second) defer cancel() _ = srv.Shutdown(shutdownCtx) c.logger.Debug("challenge server stopped")