diff --git a/internal/api/handler/agent_groups.go b/internal/api/handler/agent_groups.go index e97cda9..a85c636 100644 --- a/internal/api/handler/agent_groups.go +++ b/internal/api/handler/agent_groups.go @@ -3,12 +3,14 @@ package handler import ( "context" "encoding/json" + "errors" "net/http" "strconv" "strings" "github.com/shankar0123/certctl/internal/api/middleware" "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/service" ) // AgentGroupService defines the service interface for agent group operations. @@ -22,6 +24,19 @@ type AgentGroupService interface { } // AgentGroupHandler handles HTTP requests for agent group operations. +// +// Error dispatch (post-M-1): every service error routes through the [errToStatus] +// choke point via `errors.Is` walking the wrap chain, with one explicit +// [service.ErrValidation] arm on the write paths (Create, Update) so the +// composed "validation: " message the service layer +// attaches via `fmt.Errorf("%w: ...", ErrValidation)` can be passed through to +// the 400 response body. Before M-1, the Create handler branched on +// `strings.Contains(err.Error(), "invalid"|"required")` — fragile because a +// single reword in [service.validateAgentGroup] would demote the 400 to 500 +// with no compile-time signal — and the Update/Delete handlers branched on +// `strings.Contains(err.Error(), "not found")`, coupling HTTP classification +// to repository human-readable strings. Both now ride the typed +// [repository.ErrNotFound] wrap through errToStatus. Mirrors ProfileHandler. type AgentGroupHandler struct { svc AgentGroupService } @@ -89,7 +104,18 @@ func (h AgentGroupHandler) GetAgentGroup(w http.ResponseWriter, r *http.Request) group, err := h.svc.GetAgentGroup(r.Context(), id) if err != nil { - ErrorWithRequestID(w, http.StatusNotFound, "Agent group not found", requestID) + // M-1: route through errToStatus so a repo-level `sql.ErrNoRows` + // (wrapped as repository.ErrNotFound) becomes 404, but a transient DB + // failure no longer masquerades as 404 — it correctly surfaces 500. The + // pre-M-1 "any error → 404" shortcut was plausible when Get's only + // expected failure was "not found", but the choke point now gives us + // correct dispatch for free. Mirrors GetProfile. + status := errToStatus(err) + msg := "Failed to get agent group" + if status == http.StatusNotFound { + msg = "Agent group not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -123,7 +149,15 @@ func (h AgentGroupHandler) CreateAgentGroup(w http.ResponseWriter, r *http.Reque created, err := h.svc.CreateAgentGroup(r.Context(), group) if err != nil { - if strings.Contains(err.Error(), "invalid") || strings.Contains(err.Error(), "required") { + // M-1: replace the 2-term substring net (`"invalid"|"required"`) with a + // single `errors.Is(err, service.ErrValidation)` arm. validateAgentGroup + // wraps every field-specific failure via `fmt.Errorf("%w: ", + // ErrValidation)`, so `err.Error()` still contains the human-readable + // reason (e.g., "agent group name is required") and can be safely passed + // to the 400 body — but the status decision no longer depends on the + // exact wording. Other errors redact to a generic 500. Mirrors + // CreateProfile. + if errors.Is(err, service.ErrValidation) { ErrorWithRequestID(w, http.StatusBadRequest, err.Error(), requestID) return } @@ -160,11 +194,22 @@ func (h AgentGroupHandler) UpdateAgentGroup(w http.ResponseWriter, r *http.Reque updated, err := h.svc.UpdateAgentGroup(r.Context(), id, group) if err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Agent group not found", requestID) + // M-1: explicit ErrValidation arm preserves the user-facing reason in + // the 400 body (validateAgentGroup wraps every failure via + // `fmt.Errorf("%w: ...", ErrValidation)`); every other error — including + // repo-layer ErrNotFound on a missing row — routes through errToStatus + // so the 404/500 decision no longer depends on substring matching. + // Mirrors UpdateProfile. + if errors.Is(err, service.ErrValidation) { + ErrorWithRequestID(w, http.StatusBadRequest, err.Error(), requestID) return } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to update agent group", requestID) + status := errToStatus(err) + msg := "Failed to update agent group" + if status == http.StatusNotFound { + msg = "Agent group not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -188,11 +233,15 @@ func (h AgentGroupHandler) DeleteAgentGroup(w http.ResponseWriter, r *http.Reque } if err := h.svc.DeleteAgentGroup(r.Context(), id); err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Agent group not found", requestID) - return + // M-1: sentinel dispatch replaces the substring 404 check — see the + // parallel comment block in UpdateAgentGroup for the rationale. Mirrors + // DeleteProfile. + status := errToStatus(err) + msg := "Failed to delete agent group" + if status == http.StatusNotFound { + msg = "Agent group not found" } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to delete agent group", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/agents.go b/internal/api/handler/agents.go index b015420..21297b2 100644 --- a/internal/api/handler/agents.go +++ b/internal/api/handler/agents.go @@ -108,7 +108,19 @@ func (h AgentHandler) GetAgent(w http.ResponseWriter, r *http.Request) { agent, err := h.svc.GetAgent(r.Context(), id) if err != nil { - ErrorWithRequestID(w, http.StatusNotFound, "Agent not found", requestID) + // M-1 (P2): route through errToStatus so a repo-level + // sql.ErrNoRows (wrapped as repository.ErrNotFound) becomes 404, + // but a transient DB failure no longer masquerades as 404 — it + // correctly surfaces 500. The pre-M-1 "any error → 404" shortcut + // was plausible when Get's only expected failure was "not found", + // but the choke point now gives us correct dispatch for free. + // Mirrors GetAgentGroup / GetProfile. + status := errToStatus(err) + msg := "Failed to get agent" + if status == http.StatusNotFound { + msg = "Agent not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -147,11 +159,20 @@ func (h AgentHandler) RegisterAgent(w http.ResponseWriter, r *http.Request) { created, err := h.svc.RegisterAgent(r.Context(), agent) if err != nil { - errMsg := err.Error() - if strings.Contains(errMsg, "unique") || strings.Contains(errMsg, "duplicate") || strings.Contains(errMsg, "already exists") { + // M-1 (P2): replace the 3-term substring net + // (`"unique"|"duplicate"|"already exists"`) with a typed + // errors.Is(err, service.ErrConflict) arm. The service layer now + // wraps pg SQLSTATE 23505 duplicate-name violations via + // fmt.Errorf("%w: agent name already exists", ErrConflict), so + // classification no longer depends on the exact driver wording. + // Other errors redact to a generic 500 with slog.Error server- + // side diagnostic capture (F-002). Mirrors CreateProfile's + // ErrValidation arm pattern, adapted for the conflict case. + if errors.Is(err, service.ErrConflict) { ErrorWithRequestID(w, http.StatusConflict, "Agent with this name already exists", requestID) return } + slog.Error("RegisterAgent failed", "name", agent.Name, "error", err.Error()) ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to register agent", requestID) return } @@ -211,7 +232,15 @@ func (h AgentHandler) Heartbeat(w http.ResponseWriter, r *http.Request) { ErrorWithRequestID(w, http.StatusGone, "Agent has been retired", requestID) return } - if strings.Contains(err.Error(), "not found") { + // M-1 (P2): the pre-M-1 `strings.Contains(err.Error(), "not + // found")` branch now rides the errToStatus choke point, which + // recognizes repository.ErrNotFound via errors.Is. The retired- + // agent sentinel is still checked FIRST above so the 410 Gone + // short-circuit is never masked by the 404 arm. Any other error + // redacts to a generic 500 with slog.Error server-side diagnostic + // capture (F-002). Mirrors GetAgentGroup. + status := errToStatus(err) + if status == http.StatusNotFound { ErrorWithRequestID(w, http.StatusNotFound, "Agent not found", requestID) return } @@ -308,7 +337,16 @@ func (h AgentHandler) AgentCertificatePickup(w http.ResponseWriter, r *http.Requ certPEM, err := h.svc.CertificatePickup(r.Context(), agentID, certID) if err != nil { - ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found or not ready", requestID) + // M-1 (P2): route through errToStatus so a repo-level + // sql.ErrNoRows (wrapped as repository.ErrNotFound) becomes 404, + // but a transient DB failure no longer masquerades as 404 — it + // correctly surfaces 500. Mirrors GetAgent. + status := errToStatus(err) + msg := "Failed to retrieve certificate" + if status == http.StatusNotFound { + msg = "Certificate not found or not ready" + } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -491,7 +529,16 @@ func (h AgentHandler) RetireAgent(w http.ResponseWriter, r *http.Request) { JSON(w, http.StatusConflict, body) return } - if strings.Contains(err.Error(), "not found") { + // M-1 (P2): the pre-M-1 `strings.Contains(err.Error(), "not + // found")` branch now rides the errToStatus choke point, which + // recognizes repository.ErrNotFound via errors.Is. The sentinel + // (ErrAgentIsSentinel, ErrForceReasonRequired) and typed + // (*BlockedByDependenciesError) checks above still run FIRST so + // the 403/400/409 structural refusals are never masked by the + // 404 arm. Any other error redacts to a generic 500 with + // slog.Error server-side diagnostic capture (F-002). + status := errToStatus(err) + if status == http.StatusNotFound { ErrorWithRequestID(w, http.StatusNotFound, "Agent not found", requestID) return } diff --git a/internal/api/handler/audit.go b/internal/api/handler/audit.go index 63ef4dd..68d4702 100644 --- a/internal/api/handler/audit.go +++ b/internal/api/handler/audit.go @@ -2,6 +2,7 @@ package handler import ( "context" + "log/slog" "net/http" "strconv" "strings" @@ -86,7 +87,24 @@ func (h AuditHandler) GetAuditEvent(w http.ResponseWriter, r *http.Request) { event, err := h.svc.GetAuditEvent(r.Context(), id) if err != nil { - ErrorWithRequestID(w, http.StatusNotFound, "Audit event not found", requestID) + // M-1 (P2): dispatch routes through errToStatus. Pre-M-1 this branch was + // a blanket `any error → 404 Audit event not found` shortcut — which + // silently demoted transient DB failures from the service's auditRepo.List + // wrap to 404 Not Found with no observable external signal. Post-M-1: + // service/audit.go GetAuditEvent only wraps the genuine zero-events path + // with service.ErrNotFound via %w, and the repo.List wrap surfaces without + // that sentinel — so errors.Is(err, service.ErrNotFound) picks up the real + // 404s and everything else correctly surfaces as 500 with server-side + // slog.Error capture (F-002 redacted-500 pattern preserved). + status := errToStatus(err) + if status == http.StatusInternalServerError { + slog.Error("GetAuditEvent failed", "audit_event_id", id, "error", err.Error()) + } + msg := "Failed to get audit event" + if status == http.StatusNotFound { + msg = "Audit event not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/certificates.go b/internal/api/handler/certificates.go index 36beefd..58c227d 100644 --- a/internal/api/handler/certificates.go +++ b/internal/api/handler/certificates.go @@ -298,12 +298,13 @@ func (h CertificateHandler) UpdateCertificate(w http.ResponseWriter, r *http.Req updated, err := h.svc.UpdateCertificate(r.Context(), id, cert) if err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("UpdateCertificate failed", "cert_id", id, "error", err) + msg = "internal error" } - slog.Error("UpdateCertificate failed", "cert_id", id, "error", err.Error()) - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to update certificate", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } @@ -327,11 +328,13 @@ func (h CertificateHandler) ArchiveCertificate(w http.ResponseWriter, r *http.Re } if err := h.svc.ArchiveCertificate(r.Context(), id); err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("ArchiveCertificate failed", "cert_id", id, "error", err) + msg = "internal error" } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to archive certificate", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } @@ -373,12 +376,13 @@ func (h CertificateHandler) GetCertificateVersions(w http.ResponseWriter, r *htt versions, total, err := h.svc.GetCertificateVersions(r.Context(), certID, page, perPage) if err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("GetCertificateVersions failed", "cert_id", certID, "error", err) + msg = "internal error" } - slog.Error("GetCertificateVersions failed", "cert_id", certID, "error", err.Error()) - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get certificate versions", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } @@ -414,20 +418,13 @@ func (h CertificateHandler) TriggerRenewal(w http.ResponseWriter, r *http.Reques actor := resolveActor(r.Context()) if err := h.svc.TriggerRenewal(r.Context(), certID, actor); err != nil { - errMsg := err.Error() - if strings.Contains(errMsg, "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("TriggerRenewal failed", "cert_id", certID, "error", err) + msg = "internal error" } - if strings.Contains(errMsg, "cannot renew") { - ErrorWithRequestID(w, http.StatusBadRequest, errMsg, requestID) - return - } - if strings.Contains(errMsg, "already in progress") { - ErrorWithRequestID(w, http.StatusConflict, errMsg, requestID) - return - } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to trigger renewal", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } @@ -516,19 +513,13 @@ func (h CertificateHandler) RevokeCertificate(w http.ResponseWriter, r *http.Req actor := resolveActor(r.Context()) if err := h.svc.RevokeCertificate(r.Context(), certID, req.Reason, actor); err != nil { - // Distinguish between client errors and server errors - errMsg := err.Error() - if strings.Contains(errMsg, "already revoked") || - strings.Contains(errMsg, "cannot revoke") || - strings.Contains(errMsg, "invalid revocation reason") { - ErrorWithRequestID(w, http.StatusBadRequest, errMsg, requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("RevokeCertificate failed", "cert_id", certID, "error", err) + msg = "internal error" } - if strings.Contains(errMsg, "not found") || strings.Contains(errMsg, "failed to fetch") || strings.Contains(errMsg, "failed to get") { - ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID) - return - } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to revoke certificate", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } @@ -557,16 +548,13 @@ func (h CertificateHandler) GetDERCRL(w http.ResponseWriter, r *http.Request) { derBytes, err := h.svc.GenerateDERCRL(r.Context(), issuerID) if err != nil { - errMsg := err.Error() - if strings.Contains(errMsg, "not found") { - ErrorWithRequestID(w, http.StatusNotFound, errMsg, requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("GenerateDERCRL failed", "issuer_id", issuerID, "error", err) + msg = "internal error" } - if strings.Contains(errMsg, "do not support") || strings.Contains(errMsg, "does not support") { - ErrorWithRequestID(w, http.StatusNotImplemented, errMsg, requestID) - return - } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to generate CRL", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } @@ -602,16 +590,13 @@ func (h CertificateHandler) HandleOCSP(w http.ResponseWriter, r *http.Request) { derBytes, err := h.svc.GetOCSPResponse(r.Context(), issuerID, serialHex) if err != nil { - errMsg := err.Error() - if strings.Contains(errMsg, "not found") { - ErrorWithRequestID(w, http.StatusNotFound, errMsg, requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("GetOCSPResponse failed", "issuer_id", issuerID, "serial", serialHex, "error", err) + msg = "internal error" } - if strings.Contains(errMsg, "do not support") || strings.Contains(errMsg, "does not support") { - ErrorWithRequestID(w, http.StatusNotImplemented, errMsg, requestID) - return - } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to generate OCSP response", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } @@ -642,12 +627,13 @@ func (h CertificateHandler) GetCertificateDeployments(w http.ResponseWriter, r * deployments, err := h.svc.GetCertificateDeployments(r.Context(), certID) if err != nil { - errMsg := err.Error() - if strings.Contains(errMsg, "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("GetCertificateDeployments failed", "cert_id", certID, "error", err) + msg = "internal error" } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to get deployments", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/errors.go b/internal/api/handler/errors.go new file mode 100644 index 0000000..153379a --- /dev/null +++ b/internal/api/handler/errors.go @@ -0,0 +1,117 @@ +package handler + +import ( + "errors" + "net/http" + + "github.com/lib/pq" + + "github.com/shankar0123/certctl/internal/repository" + "github.com/shankar0123/certctl/internal/service" +) + +// errToStatus is the single choke point that maps a service-layer or +// repository-layer error to its HTTP status code. Before M-1 (P2), 42 switch +// branches across 11 handler files classified errors via +// `strings.Contains(err.Error(), ...)` substring matching — a pattern that +// made every HTTP status mapping one sentinel-message reword away from silent +// regression (see M-003 self-approval privilege boundary: a reword of +// ErrSelfApproval.Error() would have demoted 403 Forbidden to 500 Internal +// Server Error with no compile-time error, no test failure, and no observable +// external signal). +// +// All handler branches now route through this function via errors.Is and +// errors.As, which walks the wrap chain built by fmt.Errorf("%w: ...", ...). +// The generic sentinels live in internal/service/errors.go; domain-specific +// sentinels (ErrSelfApproval, ErrAgentIsSentinel, ErrBlockedByDependencies, +// ErrForceReasonRequired, ErrAgentNotFound) wrap those generics via %w so both +// errors.Is(err, ErrSelfApproval) and errors.Is(err, ErrForbidden) succeed on +// the same wrapped error. +// +// # Dispatch order +// +// 1. ErrAgentRetired → 410 Gone. Tested FIRST. It is deliberately NOT wrapped +// under any generic sentinel — 410 Gone is semantically distinct from +// 403/404/409 (permanently-terminated resource identity that drives +// deterministic agent-binary shutdown at cmd/agent/main.go:1291). Must +// short-circuit before any generic check so wrapping can never demote it. +// 2. ErrNotFound → 404 Not Found. Both service.ErrNotFound and +// repository.ErrNotFound route here — repositories wrap sql.ErrNoRows with +// repository.ErrNotFound so a "row not found" escapes the repo layer as a +// typed sentinel rather than an untyped fmt.Errorf string. Tested BEFORE +// ErrForbidden so RFC 7235's preference for hiding resource existence from +// unauthorized callers is preserved (a caller who cannot see a resource +// should get 404, not 403). +// 3. ErrUnauthenticated → 401 Unauthorized. SCEP challenge-password mismatch +// and similar credential failures. +// 4. ErrForbidden → 403 Forbidden. M-003 gate. Tested BEFORE ErrValidation so +// double-wrapping (e.g., a future fmt.Errorf("%w: ctx", ErrSelfApproval) +// in a wrapping call site) cannot demote 403 to 400. +// 5. ErrConflict / repository.ErrRenewalPolicyDuplicateName / +// repository.ErrRenewalPolicyInUse → 409 Conflict. The repo-layer sentinels +// are routed here explicitly so handlers do not need their own dispatch +// tree for G-1's renewal-policy FK + unique-name violations. +// 6. ErrValidation → 400 Bad Request. Generic input validation / malformed +// request bodies / invalid state transitions that the caller could correct +// by changing their request. +// 7. ErrUnprocessable → 422 Unprocessable Entity. Distinct from +// ErrValidation: ErrValidation is "caller sent bad input" (400), while +// ErrUnprocessable is "caller's input was fine but our stored data can't +// satisfy the operation" — e.g., an X.509 PEM in the inventory that fails +// to decode. The pre-M-1 ExportPKCS12 handler pinned 422 on +// strings.Contains(err.Error(), "cannot be parsed"); the sentinel makes +// that dispatch survive message rewording. +// 8. ErrNotImplemented → 501 Not Implemented. Reserved for feature-flag-gated +// code paths. +// 9. *pq.Error fallback on SQLSTATE 23503 (FK violation) / 23505 (unique +// violation) → 409 Conflict. Final branch before the default 500. Anything +// that reaches here is technically a code smell (the repository layer +// should normally wrap driver errors into a typed sentinel) but the status +// mapping is still correct. +// +// # Why a function, not a middleware +// +// Handlers must continue to call [Error] / [ErrorWithRequestID] with a +// caller-chosen human-readable message (sometimes the wrapped err.Error(), +// sometimes a redacted "internal error" for 500s per F-002). This function +// gives handlers the status code; the handler keeps control of the body. +func errToStatus(err error) int { + if err == nil { + return http.StatusOK + } + + switch { + case errors.Is(err, service.ErrAgentRetired): + return http.StatusGone // 410 — must short-circuit before generic dispatch + case errors.Is(err, service.ErrNotFound), + errors.Is(err, repository.ErrNotFound): + return http.StatusNotFound // 404 — before ErrForbidden (RFC 7235 existence hiding) + case errors.Is(err, service.ErrUnauthenticated): + return http.StatusUnauthorized // 401 + case errors.Is(err, service.ErrForbidden): + return http.StatusForbidden // 403 — before ErrValidation (preserves M-003 gate under double-wrap) + case errors.Is(err, service.ErrConflict), + errors.Is(err, repository.ErrRenewalPolicyDuplicateName), + errors.Is(err, repository.ErrRenewalPolicyInUse): + return http.StatusConflict // 409 + case errors.Is(err, service.ErrValidation): + return http.StatusBadRequest // 400 + case errors.Is(err, service.ErrUnprocessable): + return http.StatusUnprocessableEntity // 422 — stored-data-unparseable, not caller-input-bad + case errors.Is(err, service.ErrNotImplemented): + return http.StatusNotImplemented // 501 + } + + // Driver-level fallback. Raw *pq.Error escaping the repository layer is a + // code smell but a real escape hatch today — we still want a correct 409 + // instead of a generic 500 for FK/unique violations. + var pgErr *pq.Error + if errors.As(err, &pgErr) { + switch pgErr.Code { + case "23503", "23505": + return http.StatusConflict + } + } + + return http.StatusInternalServerError +} diff --git a/internal/api/handler/errors_test.go b/internal/api/handler/errors_test.go new file mode 100644 index 0000000..d33f1a7 --- /dev/null +++ b/internal/api/handler/errors_test.go @@ -0,0 +1,120 @@ +package handler + +import ( + "errors" + "fmt" + "net/http" + "testing" + + "github.com/lib/pq" + + "github.com/shankar0123/certctl/internal/repository" + "github.com/shankar0123/certctl/internal/service" +) + +// TestErrToStatus_DispatchMatrix pins the handler's single error → HTTP +// status choke point. Each row covers one branch of the dispatch switch and +// the dispatch order invariants documented in errors.go: +// +// - ErrAgentRetired FIRST (410 short-circuits before generic checks) +// - ErrNotFound before ErrForbidden (RFC 7235 existence hiding) +// - ErrForbidden before ErrValidation (preserves M-003 gate under double-wrap) +// - Repo sentinels route to 409 alongside ErrConflict +// - *pq.Error on 23503 / 23505 routes to 409 as the driver-level fallback +// - Default path is 500 +func TestErrToStatus_DispatchMatrix(t *testing.T) { + cases := []struct { + name string + err error + want int + }{ + {"nil → 200", nil, http.StatusOK}, + + // Each generic sentinel resolves to its documented status code. + {"ErrNotFound → 404", service.ErrNotFound, http.StatusNotFound}, + {"ErrValidation → 400", service.ErrValidation, http.StatusBadRequest}, + {"ErrConflict → 409", service.ErrConflict, http.StatusConflict}, + {"ErrForbidden → 403", service.ErrForbidden, http.StatusForbidden}, + {"ErrUnauthenticated → 401", service.ErrUnauthenticated, http.StatusUnauthorized}, + {"ErrNotImplemented → 501", service.ErrNotImplemented, http.StatusNotImplemented}, + + // Wrapped domain sentinels route through their generic wrap. + {"ErrSelfApproval → 403 (via ErrForbidden)", service.ErrSelfApproval, http.StatusForbidden}, + {"ErrAgentIsSentinel → 403 (via ErrForbidden)", service.ErrAgentIsSentinel, http.StatusForbidden}, + {"ErrBlockedByDependencies → 409 (via ErrConflict)", service.ErrBlockedByDependencies, http.StatusConflict}, + {"ErrForceReasonRequired → 400 (via ErrValidation)", service.ErrForceReasonRequired, http.StatusBadRequest}, + {"ErrAgentNotFound → 400 (via ErrValidation)", service.ErrAgentNotFound, http.StatusBadRequest}, + + // ErrAgentRetired is standalone — 410 Gone must fire before any + // generic dispatch. This locks in the semantic-distinct short-circuit. + {"ErrAgentRetired → 410", service.ErrAgentRetired, http.StatusGone}, + + // Repository-layer sentinels (G-1 + M-1). + {"repo.ErrNotFound → 404", repository.ErrNotFound, http.StatusNotFound}, + {"wrapped repo.ErrNotFound → 404", + fmt.Errorf("%w: renewal policy rp-foo", repository.ErrNotFound), + http.StatusNotFound}, + {"repo.ErrRenewalPolicyDuplicateName → 409", repository.ErrRenewalPolicyDuplicateName, http.StatusConflict}, + {"repo.ErrRenewalPolicyInUse → 409", repository.ErrRenewalPolicyInUse, http.StatusConflict}, + + // Wrapped errors with additional context survive the dispatch. + {"wrapped ErrNotFound with context → 404", + fmt.Errorf("lookup failed: %w", service.ErrNotFound), + http.StatusNotFound}, + {"wrapped ErrSelfApproval with context → 403", + fmt.Errorf("approval gate: %w", service.ErrSelfApproval), + http.StatusForbidden}, + + // Driver-level fallback: raw *pq.Error escaping repo layer. + {"*pq.Error 23503 → 409", &pq.Error{Code: "23503"}, http.StatusConflict}, + {"*pq.Error 23505 → 409", &pq.Error{Code: "23505"}, http.StatusConflict}, + {"*pq.Error 08006 → 500", &pq.Error{Code: "08006"}, http.StatusInternalServerError}, + + // Default path. + {"unknown error → 500", errors.New("something arbitrary"), http.StatusInternalServerError}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := errToStatus(c.err) + if got != c.want { + t.Errorf("errToStatus(%v) = %d, want %d", c.err, got, c.want) + } + }) + } +} + +// TestErrToStatus_AgentRetiredShortCircuit is a dedicated regression guard +// for the most fragile dispatch invariant: ErrAgentRetired's 410 Gone must +// fire FIRST. If a future commit wraps it under ErrForbidden (e.g., to +// include it in a generic "agent operations forbidden" bucket), this test +// goes red and the agent-binary shutdown at cmd/agent/main.go:1291 would +// silently stop triggering. +func TestErrToStatus_AgentRetiredShortCircuit(t *testing.T) { + if got := errToStatus(service.ErrAgentRetired); got != http.StatusGone { + t.Fatalf("ErrAgentRetired → %d, want 410 Gone (short-circuit must fire before any generic dispatch)", got) + } +} + +// TestErrToStatus_NotFoundBeforeForbidden locks the RFC 7235 existence- +// hiding dispatch order. If someone were to reorder the switch arms to put +// ErrForbidden first, an authorization failure on a nonexistent resource +// would leak existence via a 403 instead of masking it with a 404. +func TestErrToStatus_NotFoundBeforeForbidden(t *testing.T) { + // A hypothetical wrapping where both would match — contrived but the + // ordering guarantee is what we're testing. + both := fmt.Errorf("%w: layered with %w", service.ErrNotFound, service.ErrForbidden) + if got := errToStatus(both); got != http.StatusNotFound { + t.Errorf("dual-wrapped err → %d, want 404 (ErrNotFound must dispatch before ErrForbidden)", got) + } +} + +// TestErrToStatus_ForbiddenBeforeValidation guards the M-003 self-approval +// gate against a future call site that double-wraps ErrSelfApproval under +// ErrValidation (intentionally or accidentally). The dispatch must pick +// 403, not 400. +func TestErrToStatus_ForbiddenBeforeValidation(t *testing.T) { + doubled := fmt.Errorf("%w: %w", service.ErrSelfApproval, service.ErrValidation) + if got := errToStatus(doubled); got != http.StatusForbidden { + t.Errorf("double-wrapped err → %d, want 403 (ErrForbidden must dispatch before ErrValidation — M-003 gate)", got) + } +} diff --git a/internal/api/handler/export.go b/internal/api/handler/export.go index 0e7eb27..854144a 100644 --- a/internal/api/handler/export.go +++ b/internal/api/handler/export.go @@ -46,12 +46,26 @@ func (h ExportHandler) ExportPEM(w http.ResponseWriter, r *http.Request) { result, err := h.svc.ExportPEM(r.Context(), id) if err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID) - return + // M-1 (P2): dispatch routes through errToStatus. Pre-M-1 this branch + // classified 404 via strings.Contains(err.Error(), "not found"), which + // gave false positives on any error whose rendered text happened to + // contain "not found" — notably a transient DB failure when the service + // layer wrapped every certRepo.Get error with "certificate not found". + // Post-M-1: service/export.go now wraps with "failed to get certificate" + // and only the genuine sql.ErrNoRows path surfaces repository.ErrNotFound + // through the wrap chain, so errors.Is(err, repository.ErrNotFound) picks + // up the real 404s and everything else — including transient DB errors — + // correctly surfaces as 500 with server-side slog.Error capture (F-002 + // redacted-500 pattern preserved). + status := errToStatus(err) + if status == http.StatusInternalServerError { + slog.Error("ExportPEM failed", "cert_id", id, "error", err.Error()) } - slog.Error("ExportPEM failed", "cert_id", id, "error", err.Error()) - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to export certificate", requestID) + msg := "Failed to export certificate" + if status == http.StatusNotFound { + msg = "Certificate not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -94,16 +108,32 @@ func (h ExportHandler) ExportPKCS12(w http.ResponseWriter, r *http.Request) { pfxData, err := h.svc.ExportPKCS12(r.Context(), id, req.Password) if err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Certificate not found", requestID) - return + // M-1 (P2): dispatch routes through errToStatus. The pre-M-1 3-term + // substring net (`"not found"|"cannot be parsed"|"no certificates + // found"`) is replaced with sentinel dispatch: + // - repository.ErrNotFound (from certificate.go Get/GetLatestVersion + // sql.ErrNoRows wrap) → 404 + // - service.ErrUnprocessable (from service/export.go ExportPKCS12's + // parsePEMCertificates-failure and empty-chain wraps) → 422 — + // semantically correct because the caller's request is fine; our + // stored PEM chain is what cannot be processed + // - everything else → 500 with slog.Error capture (F-002 redacted-500 + // pattern preserved) + // A transient DB failure that pre-M-1 would have been swept into the + // 404 substring branch (because the service wrapped every certRepo.Get + // error with "certificate not found") now correctly surfaces as 500. + status := errToStatus(err) + if status == http.StatusInternalServerError { + slog.Error("ExportPKCS12 failed", "cert_id", id, "error", err.Error()) } - if strings.Contains(err.Error(), "cannot be parsed") || strings.Contains(err.Error(), "no certificates found") { - ErrorWithRequestID(w, http.StatusUnprocessableEntity, "Certificate data cannot be parsed as X.509", requestID) - return + msg := "Failed to export PKCS#12" + switch status { + case http.StatusNotFound: + msg = "Certificate not found" + case http.StatusUnprocessableEntity: + msg = "Certificate data cannot be parsed as X.509" } - slog.Error("ExportPKCS12 failed", "cert_id", id, "error", err.Error()) - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to export PKCS#12", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/export_handler_test.go b/internal/api/handler/export_handler_test.go index 3c568c0..3322b38 100644 --- a/internal/api/handler/export_handler_test.go +++ b/internal/api/handler/export_handler_test.go @@ -108,9 +108,17 @@ func TestExportPEM_Download(t *testing.T) { } func TestExportPEM_NotFound(t *testing.T) { + // M-1 (P2): wrap with service.ErrNotFound via %w so the handler's + // errToStatus choke point dispatches to 404 via errors.Is. Pre-M-1 this + // test used a raw `fmt.Errorf("certificate not found")` string and relied + // on the handler's strings.Contains(err.Error(), "not found") classifier + // — which was the same mechanism that silently misclassified transient DB + // failures whose text happened to include "not found" (see docblock on + // ExportPEM handler). Pinning the sentinel contract makes this test + // regression-proof against wrap-text changes. mockSvc := &MockExportService{ ExportPEMFn: func(_ context.Context, _ string) (*service.ExportPEMResult, error) { - return nil, fmt.Errorf("certificate not found") + return nil, fmt.Errorf("%w: certificate", service.ErrNotFound) }, } h := NewExportHandler(mockSvc) @@ -214,9 +222,11 @@ func TestExportPKCS12_EmptyPassword(t *testing.T) { } func TestExportPKCS12_NotFound(t *testing.T) { + // M-1 (P2): same sentinel migration as TestExportPEM_NotFound — see + // rationale there. mockSvc := &MockExportService{ ExportPKCS12Fn: func(_ context.Context, _ string, _ string) ([]byte, error) { - return nil, fmt.Errorf("certificate not found") + return nil, fmt.Errorf("%w: certificate", service.ErrNotFound) }, } h := NewExportHandler(mockSvc) @@ -231,6 +241,31 @@ func TestExportPKCS12_NotFound(t *testing.T) { } } +// TestExportPKCS12_Unprocessable pins the M-1 (P2) 422 contract: when the +// service layer wraps a parse failure with service.ErrUnprocessable, the +// handler's errToStatus choke point must dispatch to 422 Unprocessable +// Entity. Pre-M-1 this was classified via a 2-term substring net +// (`"cannot be parsed"|"no certificates found"`) at export.go:101, which +// would have been silently broken by a message reword in service/export.go. +// The new sentinel makes the dispatch survive message rewording. +func TestExportPKCS12_Unprocessable(t *testing.T) { + mockSvc := &MockExportService{ + ExportPKCS12Fn: func(_ context.Context, _ string, _ string) ([]byte, error) { + return nil, fmt.Errorf("%w: certificate data cannot be parsed as X.509: asn1 decode error", service.ErrUnprocessable) + }, + } + h := NewExportHandler(mockSvc) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/certificates/mc-test-1/export/pkcs12", nil) + w := httptest.NewRecorder() + + h.ExportPKCS12(w, req) + + if w.Code != http.StatusUnprocessableEntity { + t.Fatalf("expected 422, got %d", w.Code) + } +} + func TestExportPKCS12_ServiceError(t *testing.T) { mockSvc := &MockExportService{ ExportPKCS12Fn: func(_ context.Context, _ string, _ string) ([]byte, error) { diff --git a/internal/api/handler/issuers.go b/internal/api/handler/issuers.go index 7ad9641..4a599c4 100644 --- a/internal/api/handler/issuers.go +++ b/internal/api/handler/issuers.go @@ -135,16 +135,13 @@ func (h IssuerHandler) CreateIssuer(w http.ResponseWriter, r *http.Request) { created, err := h.svc.CreateIssuer(r.Context(), issuer) if err != nil { - h.logger.Error("failed to create issuer", "error", err, "name", issuer.Name, "type", issuer.Type) - errMsg := err.Error() - switch { - case strings.Contains(errMsg, "unique") || strings.Contains(errMsg, "duplicate"): - ErrorWithRequestID(w, http.StatusConflict, "An issuer with this name already exists", requestID) - case strings.Contains(errMsg, "unsupported issuer type"): - ErrorWithRequestID(w, http.StatusBadRequest, errMsg, requestID) - default: - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to create issuer", requestID) + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + h.logger.Error("failed to create issuer", "error", err, "name", issuer.Name, "type", issuer.Type) + msg = "internal error" } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -177,16 +174,13 @@ func (h IssuerHandler) UpdateIssuer(w http.ResponseWriter, r *http.Request) { updated, err := h.svc.UpdateIssuer(r.Context(), id, issuer) if err != nil { - h.logger.Error("failed to update issuer", "error", err, "id", id) - errMsg := err.Error() - switch { - case strings.Contains(errMsg, "unique") || strings.Contains(errMsg, "duplicate"): - ErrorWithRequestID(w, http.StatusConflict, "An issuer with this name already exists", requestID) - case strings.Contains(errMsg, "not found"): - ErrorWithRequestID(w, http.StatusNotFound, "Issuer not found", requestID) - default: - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to update issuer", requestID) + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + h.logger.Error("failed to update issuer", "error", err, "id", id) + msg = "internal error" } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -210,13 +204,13 @@ func (h IssuerHandler) DeleteIssuer(w http.ResponseWriter, r *http.Request) { } if err := h.svc.DeleteIssuer(r.Context(), id); err != nil { - if strings.Contains(err.Error(), "violates foreign key") || strings.Contains(err.Error(), "RESTRICT") { - ErrorWithRequestID(w, http.StatusConflict, "Cannot delete issuer: certificates are still using this issuer", requestID) - } else if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Issuer not found", requestID) - } else { - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to delete issuer", requestID) + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + h.logger.Error("DeleteIssuer failed", "issuer_id", id, "error", err) + msg = "internal error" } + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/jobs.go b/internal/api/handler/jobs.go index 26d1514..27d9ae4 100644 --- a/internal/api/handler/jobs.go +++ b/internal/api/handler/jobs.go @@ -3,15 +3,14 @@ package handler import ( "context" "encoding/json" - "errors" "io" + "log/slog" "net/http" "strconv" "strings" "github.com/shankar0123/certctl/internal/api/middleware" "github.com/shankar0123/certctl/internal/domain" - "github.com/shankar0123/certctl/internal/service" ) // JobService defines the service interface for job operations. @@ -160,22 +159,13 @@ func (h JobHandler) ApproveJob(w http.ResponseWriter, r *http.Request) { actor := resolveActor(r.Context()) if err := h.svc.ApproveJob(r.Context(), jobID, actor); err != nil { - // M-003: self-approval by the certificate owner is forbidden. - if errors.Is(err, service.ErrSelfApproval) { - ErrorWithRequestID(w, http.StatusForbidden, - "Self-approval is forbidden: the certificate owner cannot approve their own renewal", - requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("ApproveJob failed", "job_id", jobID, "error", err) + msg = "internal error" } - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Job not found", requestID) - return - } - if strings.Contains(err.Error(), "cannot approve") { - ErrorWithRequestID(w, http.StatusBadRequest, err.Error(), requestID) - return - } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to approve job", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } @@ -213,15 +203,13 @@ func (h JobHandler) RejectJob(w http.ResponseWriter, r *http.Request) { actor := resolveActor(r.Context()) if err := h.svc.RejectJob(r.Context(), jobID, body.Reason, actor); err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Job not found", requestID) - return + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("RejectJob failed", "job_id", jobID, "error", err) + msg = "internal error" } - if strings.Contains(err.Error(), "cannot reject") { - ErrorWithRequestID(w, http.StatusBadRequest, err.Error(), requestID) - return - } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to reject job", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/notifications.go b/internal/api/handler/notifications.go index 513bfb9..02a9fe6 100644 --- a/internal/api/handler/notifications.go +++ b/internal/api/handler/notifications.go @@ -2,6 +2,7 @@ package handler import ( "context" + "log/slog" "net/http" "strconv" "strings" @@ -107,7 +108,25 @@ func (h NotificationHandler) GetNotification(w http.ResponseWriter, r *http.Requ notification, err := h.svc.GetNotification(r.Context(), id) if err != nil { - ErrorWithRequestID(w, http.StatusNotFound, "Notification not found", requestID) + // M-1 (P2): dispatch routes through errToStatus. Pre-M-1 this branch was + // a blanket `any error → 404 Notification not found` shortcut — which + // silently demoted transient DB failures from the service's List() wrap + // (notification.go:386 pre-M-1) to 404 Not Found with no observable + // external signal. Post-M-1: service/notification.go GetNotification only + // wraps the genuine "not found" path with service.ErrNotFound via %w, and + // every other error (including the List() wrap) surfaces without that + // sentinel — so errors.Is(err, service.ErrNotFound) picks up the real + // 404s and everything else correctly surfaces as 500 with server-side + // slog.Error capture (F-002 redacted-500 pattern preserved). + status := errToStatus(err) + if status == http.StatusInternalServerError { + slog.Error("GetNotification failed", "notification_id", id, "error", err.Error()) + } + msg := "Failed to get notification" + if status == http.StatusNotFound { + msg = "Notification not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -170,11 +189,26 @@ func (h NotificationHandler) RequeueNotification(w http.ResponseWriter, r *http. notificationID := parts[0] if err := h.svc.RequeueNotification(r.Context(), notificationID); err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Notification not found", requestID) - return + // M-1 (P2): dispatch routes through errToStatus. Pre-M-1 this branch + // classified 404 via strings.Contains(err.Error(), "not found"), which + // would have given false positives on any error whose rendered text + // happened to contain "not found" — notably a transient DB failure whose + // driver message mentioned a missing relation or column. Post-M-1: the + // repo-layer Requeue (postgres/notification.go) wraps zero-rows-affected + // with repository.ErrNotFound via %w, and the service layer forwards + // that error with %w — so errors.Is(err, repository.ErrNotFound) walks + // the full wrap chain and picks up the real 404s while everything else + // (including transient DB errors) correctly surfaces as 500 with + // server-side slog.Error capture (F-002 redacted-500 pattern preserved). + status := errToStatus(err) + if status == http.StatusInternalServerError { + slog.Error("RequeueNotification failed", "notification_id", notificationID, "error", err.Error()) } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to requeue notification", requestID) + msg := "Failed to requeue notification" + if status == http.StatusNotFound { + msg = "Notification not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/owners.go b/internal/api/handler/owners.go index 050b4d4..a2dd765 100644 --- a/internal/api/handler/owners.go +++ b/internal/api/handler/owners.go @@ -3,6 +3,7 @@ package handler import ( "context" "encoding/json" + "log/slog" "net/http" "strconv" "strings" @@ -184,13 +185,13 @@ func (h OwnerHandler) DeleteOwner(w http.ResponseWriter, r *http.Request) { id = parts[0] if err := h.svc.DeleteOwner(r.Context(), id); err != nil { - if strings.Contains(err.Error(), "violates foreign key") || strings.Contains(err.Error(), "RESTRICT") { - ErrorWithRequestID(w, http.StatusConflict, "Cannot delete owner: certificates are still assigned to this owner", requestID) - } else if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Owner not found", requestID) - } else { - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to delete owner", requestID) + status := errToStatus(err) + msg := err.Error() + if status == http.StatusInternalServerError { + slog.Error("DeleteOwner failed", "owner_id", id, "error", err) + msg = "internal error" } + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/profiles.go b/internal/api/handler/profiles.go index 054f9fe..58d8741 100644 --- a/internal/api/handler/profiles.go +++ b/internal/api/handler/profiles.go @@ -3,12 +3,14 @@ package handler import ( "context" "encoding/json" + "errors" "net/http" "strconv" "strings" "github.com/shankar0123/certctl/internal/api/middleware" "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/service" ) // ProfileService defines the service interface for certificate profile operations. @@ -21,6 +23,20 @@ type ProfileService interface { } // ProfileHandler handles HTTP requests for certificate profile operations. +// +// Error dispatch (post-M-1): every service error routes through the [errToStatus] +// choke point via `errors.Is` walking the wrap chain, with one explicit +// [service.ErrValidation] arm on the write paths (Create, Update) so the +// composed "validation: " message the service layer +// attaches via `fmt.Errorf("%w: ...", ErrValidation)` can be passed through to +// the 400 response body. Before M-1, the Create and Update handlers branched on +// `strings.Contains(err.Error(), "invalid"|"required"|"must be"|"cannot")` — a +// fragile pattern where a single reword in [service.validateProfile] would +// demote the 400 to 500 with no compile-time signal. The substring-based 404 +// branches on Update and Delete likewise depended on the repository's +// human-readable "profile not found" message surviving forever; both now ride +// the same [repository.ErrNotFound] wrap that G-1's renewal-policy and M-1's +// other repositories use. type ProfileHandler struct { svc ProfileService } @@ -88,7 +104,18 @@ func (h ProfileHandler) GetProfile(w http.ResponseWriter, r *http.Request) { profile, err := h.svc.GetProfile(r.Context(), id) if err != nil { - ErrorWithRequestID(w, http.StatusNotFound, "Profile not found", requestID) + // M-1: route through errToStatus so a repo-level `sql.ErrNoRows` + // (wrapped as repository.ErrNotFound) becomes 404, but a transient DB + // failure no longer masquerades as 404 — it correctly surfaces 500. The + // pre-M-1 "any error → 404" shortcut was plausible when Get's only + // expected failure was "not found", but the choke point now gives us + // correct dispatch for free. Mirrors GetRenewalPolicy. + status := errToStatus(err) + msg := "Failed to get profile" + if status == http.StatusNotFound { + msg = "Profile not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -123,9 +150,15 @@ func (h ProfileHandler) CreateProfile(w http.ResponseWriter, r *http.Request) { created, err := h.svc.CreateProfile(r.Context(), profile) if err != nil { - // Check if it's a validation error from the service - if strings.Contains(err.Error(), "invalid") || strings.Contains(err.Error(), "required") || - strings.Contains(err.Error(), "must be") || strings.Contains(err.Error(), "cannot") { + // M-1: replace the 4-term substring net + // (`"invalid"|"required"|"must be"|"cannot"`) with a single + // `errors.Is(err, service.ErrValidation)` arm. validateProfile wraps + // every field-specific failure via `fmt.Errorf("%w: ", + // ErrValidation)`, so `err.Error()` still contains the human-readable + // reason (e.g., "RSA minimum key size must be at least 2048") and can be + // safely passed to the 400 body — but the status decision no longer + // depends on the exact wording. Other errors redact to a generic 500. + if errors.Is(err, service.ErrValidation) { ErrorWithRequestID(w, http.StatusBadRequest, err.Error(), requestID) return } @@ -162,16 +195,21 @@ func (h ProfileHandler) UpdateProfile(w http.ResponseWriter, r *http.Request) { updated, err := h.svc.UpdateProfile(r.Context(), id, profile) if err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Profile not found", requestID) - return - } - if strings.Contains(err.Error(), "invalid") || strings.Contains(err.Error(), "required") || - strings.Contains(err.Error(), "must be") || strings.Contains(err.Error(), "cannot") { + // M-1: explicit ErrValidation arm preserves the user-facing reason in + // the 400 body (validateProfile wraps every failure via + // `fmt.Errorf("%w: ...", ErrValidation)`); every other error — including + // repo-layer ErrNotFound on a missing row — routes through errToStatus + // so the 404/500 decision no longer depends on substring matching. + if errors.Is(err, service.ErrValidation) { ErrorWithRequestID(w, http.StatusBadRequest, err.Error(), requestID) return } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to update profile", requestID) + status := errToStatus(err) + msg := "Failed to update profile" + if status == http.StatusNotFound { + msg = "Profile not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -195,11 +233,14 @@ func (h ProfileHandler) DeleteProfile(w http.ResponseWriter, r *http.Request) { } if err := h.svc.DeleteProfile(r.Context(), id); err != nil { - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Profile not found", requestID) - return + // M-1: sentinel dispatch replaces the substring 404 check — see the + // parallel comment block in UpdateProfile for the rationale. + status := errToStatus(err) + msg := "Failed to delete profile" + if status == http.StatusNotFound { + msg = "Profile not found" } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to delete profile", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/renewal_policy.go b/internal/api/handler/renewal_policy.go index 41b2424..28815f2 100644 --- a/internal/api/handler/renewal_policy.go +++ b/internal/api/handler/renewal_policy.go @@ -26,14 +26,28 @@ type RenewalPolicyService interface { // RenewalPolicyHandler serves /api/v1/renewal-policies CRUD endpoints. // -// G-1 design note: the service-level `ErrRenewalPolicyDuplicateName` / -// `ErrRenewalPolicyInUse` sentinels alias the repository sentinels (same var -// identity), so `errors.Is` walks transparently across layers. Delete/Update -// not-found detection intentionally uses a `strings.Contains(err.Error(), -// "not found")` substring check — the repo wraps `sql.ErrNoRows` as -// `fmt.Errorf("renewal policy not found: %s", id)` which strips the sentinel, -// and the handler red-tests' `ErrMockNotFound = errors.New("mock not found -// error")` follows the same substring convention. +// Error dispatch (post-M-1): every service error routes through the [errToStatus] +// choke point via `errors.Is` walking the wrap chain. Three sentinel identities +// cover the full dispatch surface: +// +// - [service.ErrRenewalPolicyDuplicateName] / [service.ErrRenewalPolicyInUse] +// are `var`-aliased to the repository-layer sentinels of the same name (G-1), +// so handler-side `errors.Is` succeeds against a sentinel raised three layers +// deep in [internal/repository/postgres.RenewalPolicyRepository] without the +// service layer having to translate. [errToStatus] routes both to 409. +// - [repository.ErrNotFound] is wrapped around `sql.ErrNoRows` inside the +// repo's Get/Update/Delete methods via `fmt.Errorf("%w: renewal policy %s", +// repository.ErrNotFound, id)` (M-1). [errToStatus] routes that to 404 in +// the same switch arm as [service.ErrNotFound], preserving the existing +// 404-on-missing behavior that the pre-M-1 substring check provided — +// without the rewording-regression risk that motivated the migration. +// +// The handler layer keeps two explicit `errors.Is` arms for the +// duplicate-name / in-use sentinels so each 409 response can carry a +// constraint-specific human-readable message ("with that name" vs. "still +// referenced by managed certificates"); every other error path — including +// not-found — delegates the status decision to [errToStatus] and provides a +// generic body via the F-002 redacted-500 pattern. type RenewalPolicyHandler struct { svc RenewalPolicyService } @@ -105,11 +119,18 @@ func (h RenewalPolicyHandler) GetRenewalPolicy(w http.ResponseWriter, r *http.Re policy, err := h.svc.GetRenewalPolicy(r.Context(), id) if err != nil { - // Matches the PolicyHandler.GetPolicy convention: any error from the - // service surfaces as 404. The repo wraps sql.ErrNoRows as - // "renewal policy not found: %s" and there's no other expected failure - // mode on Get — the caller gets a clean 404. - ErrorWithRequestID(w, http.StatusNotFound, "Renewal policy not found", requestID) + // M-1: route through errToStatus so a repo-level `sql.ErrNoRows` + // (wrapped as repository.ErrNotFound) becomes 404, but a transient DB + // failure no longer masquerades as 404 — it correctly surfaces 500. + // The pre-M-1 "any error → 404" shortcut was plausible when Get's only + // expected failure was "not found", but the choke point now gives us + // correct dispatch for free. + status := errToStatus(err) + msg := "Failed to get renewal policy" + if status == http.StatusNotFound { + msg = "Renewal policy not found" + } + ErrorWithRequestID(w, status, msg, requestID) return } @@ -158,11 +179,11 @@ func (h RenewalPolicyHandler) CreateRenewalPolicy(w http.ResponseWriter, r *http // UpdateRenewalPolicy replaces the fields of an existing renewal policy. // PUT /api/v1/renewal-policies/{id} // -// Error mapping: -// - invalid JSON / empty ID → 400 -// - ErrRenewalPolicyDuplicateName → 409 -// - error text contains "not found" → 404 (see struct doc comment re: substring check) -// - anything else → 500 +// Error mapping (post-M-1, sentinel-driven): +// - invalid JSON / empty ID → 400 +// - ErrRenewalPolicyDuplicateName (pg 23505) → 409 (explicit arm, custom msg) +// - ErrNotFound (wrapping sql.ErrNoRows) → 404 (via errToStatus) +// - anything else → 500 (via errToStatus, body redacted) func (h RenewalPolicyHandler) UpdateRenewalPolicy(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPut { Error(w, http.StatusMethodNotAllowed, "Method not allowed") @@ -191,11 +212,17 @@ func (h RenewalPolicyHandler) UpdateRenewalPolicy(w http.ResponseWriter, r *http ErrorWithRequestID(w, http.StatusConflict, "A renewal policy with that name already exists", requestID) return } - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Renewal policy not found", requestID) - return + // M-1: drop the `strings.Contains(err.Error(), "not found")` branch. + // [repository.ErrNotFound] now wraps sql.ErrNoRows at the three + // renewal-policy repo methods (Get/Update/Delete), so errToStatus + // routes a missing row to 404 via errors.Is without depending on the + // repo's fmt.Errorf format string surviving a future reword. + status := errToStatus(err) + msg := "Failed to update renewal policy" + if status == http.StatusNotFound { + msg = "Renewal policy not found" } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to update renewal policy", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } @@ -205,11 +232,11 @@ func (h RenewalPolicyHandler) UpdateRenewalPolicy(w http.ResponseWriter, r *http // DeleteRenewalPolicy removes a renewal policy. // DELETE /api/v1/renewal-policies/{id} // -// Error mapping: -// - empty ID (trailing slash) → 400 -// - ErrRenewalPolicyInUse (pg 23503 FK-RESTRICT against managed_certificates.renewal_policy_id) → 409 -// - error text contains "not found" → 404 -// - anything else → 500 +// Error mapping (post-M-1, sentinel-driven): +// - empty ID (trailing slash) → 400 +// - ErrRenewalPolicyInUse (pg 23503 FK-RESTRICT) → 409 (explicit arm, custom msg) +// - ErrNotFound (wrapping sql.ErrNoRows) → 404 (via errToStatus) +// - anything else → 500 (via errToStatus, body redacted) func (h RenewalPolicyHandler) DeleteRenewalPolicy(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodDelete { Error(w, http.StatusMethodNotAllowed, "Method not allowed") @@ -231,11 +258,14 @@ func (h RenewalPolicyHandler) DeleteRenewalPolicy(w http.ResponseWriter, r *http ErrorWithRequestID(w, http.StatusConflict, "Renewal policy is still referenced by managed certificates", requestID) return } - if strings.Contains(err.Error(), "not found") { - ErrorWithRequestID(w, http.StatusNotFound, "Renewal policy not found", requestID) - return + // M-1: sentinel dispatch replaces the substring check — see the + // parallel comment block in UpdateRenewalPolicy for the rationale. + status := errToStatus(err) + msg := "Failed to delete renewal policy" + if status == http.StatusNotFound { + msg = "Renewal policy not found" } - ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to delete renewal policy", requestID) + ErrorWithRequestID(w, status, msg, requestID) return } diff --git a/internal/api/handler/scep.go b/internal/api/handler/scep.go index da91ede..b51d8e1 100644 --- a/internal/api/handler/scep.go +++ b/internal/api/handler/scep.go @@ -6,6 +6,7 @@ import ( "encoding/asn1" "encoding/base64" "encoding/pem" + "errors" "fmt" "io" "net/http" @@ -14,6 +15,7 @@ import ( "github.com/shankar0123/certctl/internal/api/middleware" "github.com/shankar0123/certctl/internal/domain" "github.com/shankar0123/certctl/internal/pkcs7" + "github.com/shankar0123/certctl/internal/service" ) // SCEPService defines the service interface for SCEP enrollment operations. @@ -171,11 +173,25 @@ func (h SCEPHandler) pkiOperation(w http.ResponseWriter, r *http.Request) { result, err := h.svc.PKCSReq(r.Context(), csrPEM, challengePassword, transactionID) if err != nil { - if strings.Contains(err.Error(), "challenge password") { - ErrorWithRequestID(w, http.StatusForbidden, "Invalid challenge password", requestID) + // M-1 (P2): typed-sentinel dispatch replaces the pre-M-1 substring + // branch `strings.Contains(err.Error(), "challenge password")`. The + // service layer now wraps both challenge-password failure modes (server + // misconfigured / client credential wrong) via `fmt.Errorf("%w: ...", + // ErrUnauthenticated)`, so errors.Is walks the wrap chain without + // depending on the exact wording of the error string. This also + // corrects the HTTP status: pre-M-1 returned 403 Forbidden, but RFC + // 7235 classifies this as 401 Unauthorized (authentication failure, not + // authorization denial). The errToStatus doc block enumerates this as + // the canonical ErrUnauthenticated call site. + if errors.Is(err, service.ErrUnauthenticated) { + ErrorWithRequestID(w, http.StatusUnauthorized, "Invalid challenge password", requestID) return } - ErrorWithRequestID(w, http.StatusInternalServerError, fmt.Sprintf("Enrollment failed: %v", err), requestID) + // F-002 redacted-500: every other enrollment failure (CSR parse errors, + // issuer-layer failures, audit-layer failures) returns an opaque body + // so we don't leak internal state through the SCEP response. The + // logger already captured the real error at the service layer. + ErrorWithRequestID(w, http.StatusInternalServerError, "Enrollment failed", requestID) return } diff --git a/internal/api/handler/scep_handler_test.go b/internal/api/handler/scep_handler_test.go index d9e9982..9354ec6 100644 --- a/internal/api/handler/scep_handler_test.go +++ b/internal/api/handler/scep_handler_test.go @@ -4,12 +4,14 @@ import ( "context" "encoding/pem" "errors" + "fmt" "net/http" "net/http/httptest" "strings" "testing" "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/service" ) // mockSCEPService implements SCEPService for testing. @@ -214,9 +216,21 @@ func TestSCEP_PKIOperation_Success_RawCSR(t *testing.T) { } } +// TestSCEP_PKIOperation_ChallengePasswordRejected pins the M-1 (P2) dispatch +// contract: when the service wraps the failure via `fmt.Errorf("%w: ...", +// service.ErrUnauthenticated)` the handler's errToStatus choke point must +// return 401 Unauthorized, NOT 403 Forbidden. +// +// This is a deliberate semantic correction. Pre-M-1 the handler inspected +// err.Error() for the "challenge password" substring and returned 403, which +// misclassified the RFC 7235 condition (the caller presented no valid +// application-layer credential — that is auth failure, not authorization +// denial). The errToStatus doc explicitly cites this code path as the +// canonical ErrUnauthenticated consumer; see handler/errors.go and the +// symmetric M-1 comment block at handler/scep.go in the pkiOperation arm. func TestSCEP_PKIOperation_ChallengePasswordRejected(t *testing.T) { svc := &mockSCEPService{ - EnrollErr: errors.New("invalid challenge password"), + EnrollErr: fmt.Errorf("%w: invalid challenge password", service.ErrUnauthenticated), } h := NewSCEPHandler(svc) @@ -230,8 +244,8 @@ func TestSCEP_PKIOperation_ChallengePasswordRejected(t *testing.T) { w := httptest.NewRecorder() h.HandleSCEP(w, req) - if w.Code != http.StatusForbidden { - t.Errorf("expected 403, got %d: %s", w.Code, w.Body.String()) + if w.Code != http.StatusUnauthorized { + t.Errorf("expected 401 Unauthorized (M-1 dispatch of service.ErrUnauthenticated), got %d: %s", w.Code, w.Body.String()) } } diff --git a/internal/api/handler/test_utils.go b/internal/api/handler/test_utils.go index 6c05232..c94616b 100644 --- a/internal/api/handler/test_utils.go +++ b/internal/api/handler/test_utils.go @@ -1,11 +1,27 @@ package handler -import "errors" +import ( + "errors" + "fmt" -var ( - // Mock errors for testing - ErrMockServiceFailed = errors.New("mock service error") - ErrMockNotFound = errors.New("mock not found error") - ErrMockUnauthorized = errors.New("mock unauthorized error") - ErrMockConflict = errors.New("mock conflict error") + "github.com/shankar0123/certctl/internal/service" +) + +// Mock errors for testing. +// +// M-1: Since the handler layer now classifies errors via the typed-sentinel +// dispatch in [errToStatus] (errors.Is on service + repository sentinels rather +// than substring matching on err.Error()), handler mocks MUST wrap the +// appropriate generic sentinel so `errors.Is(err, service.ErrNotFound)` etc. +// succeed. Using raw errors.New() breaks the dispatch and degrades every +// mock-driven negative-path test to a 500 Internal Server Error — the same +// silent-regression trap the migration was designed to eliminate. +// +// ErrMockServiceFailed deliberately stays untyped so it continues to exercise +// the default 500 path. +var ( + ErrMockServiceFailed = errors.New("mock service error") + ErrMockNotFound = fmt.Errorf("%w: mock not found", service.ErrNotFound) + ErrMockUnauthorized = fmt.Errorf("%w: mock unauthenticated", service.ErrUnauthenticated) + ErrMockConflict = fmt.Errorf("%w: mock conflict", service.ErrConflict) ) diff --git a/internal/integration/lifecycle_test.go b/internal/integration/lifecycle_test.go index d0b22a7..c575367 100644 --- a/internal/integration/lifecycle_test.go +++ b/internal/integration/lifecycle_test.go @@ -1318,7 +1318,10 @@ func (m *mockProfileService) ListProfiles(_ context.Context, page, perPage int) } func (m *mockProfileService) GetProfile(_ context.Context, id string) (*domain.CertificateProfile, error) { - return nil, fmt.Errorf("profile not found") + // M-1: wrap service.ErrNotFound so the handler's errToStatus choke point + // routes this to 404 via errors.Is. The Error() message still contains + // "not found" so any pre-migration substring assertions continue to pass. + return nil, fmt.Errorf("%w: profile not found", service.ErrNotFound) } func (m *mockProfileService) CreateProfile(_ context.Context, profile domain.CertificateProfile) (*domain.CertificateProfile, error) { @@ -1341,7 +1344,8 @@ func (m *mockAgentGroupService) ListAgentGroups(_ context.Context, page, perPage } func (m *mockAgentGroupService) GetAgentGroup(_ context.Context, id string) (*domain.AgentGroup, error) { - return nil, fmt.Errorf("agent group not found") + // M-1: wrap service.ErrNotFound — see GetProfile above for rationale. + return nil, fmt.Errorf("%w: agent group not found", service.ErrNotFound) } func (m *mockAgentGroupService) CreateAgentGroup(_ context.Context, group domain.AgentGroup) (*domain.AgentGroup, error) { diff --git a/internal/repository/interfaces.go b/internal/repository/interfaces.go index c1af13d..8b2d27f 100644 --- a/internal/repository/interfaces.go +++ b/internal/repository/interfaces.go @@ -18,7 +18,17 @@ import ( // managed_certificates.renewal_policy_id to renewal_policies.id with ON // DELETE RESTRICT). Both map onto the same 409 status but with distinct // messages so operators can tell them apart. +// +// M-1: ErrNotFound is the repo-layer "row not found" sentinel. Repositories +// that historically returned fmt.Errorf("... not found: %s", id) without +// wrapping sql.ErrNoRows now wrap ErrNotFound via fmt.Errorf("%w: ...", so +// the handler layer's single errToStatus choke point can route them to HTTP +// 404 via errors.Is without substring-matching the message text. Existing +// service-level service.ErrNotFound stays a distinct value — both map to 404 +// through explicit branches in handler/errors.go (mirrors the G-1 treatment +// of the repo-level 409 sentinels). var ( + ErrNotFound = errors.New("repository: not found") ErrRenewalPolicyDuplicateName = errors.New("renewal policy name already exists") ErrRenewalPolicyInUse = errors.New("renewal policy is still referenced by managed certificates") ) diff --git a/internal/repository/postgres/agent.go b/internal/repository/postgres/agent.go index 75231b7..c24648d 100644 --- a/internal/repository/postgres/agent.go +++ b/internal/repository/postgres/agent.go @@ -3,11 +3,13 @@ package postgres import ( "context" "database/sql" + "errors" "fmt" "time" "github.com/google/uuid" "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/repository" ) // AgentRepository implements repository.AgentRepository @@ -72,8 +74,13 @@ func (r *AgentRepository) Get(ctx context.Context, id string) (*domain.Agent, er agent, err := scanAgent(row) if err != nil { - if err == sql.ErrNoRows { - return nil, fmt.Errorf("agent not found") + // M-1 (P2): wrap sql.ErrNoRows with repository.ErrNotFound via %w so + // the handler's errToStatus choke point dispatches to 404 via + // errors.Is instead of the pre-M-1 strings.Contains(err.Error(), + // "not found") branch at handler/agents.go. Mirrors agent_group and + // profile repositories. + if errors.Is(err, sql.ErrNoRows) { + return nil, fmt.Errorf("%w: agent %s", repository.ErrNotFound, id) } return nil, fmt.Errorf("failed to query agent: %w", err) } @@ -169,8 +176,12 @@ func (r *AgentRepository) Update(ctx context.Context, agent *domain.Agent) error return fmt.Errorf("failed to get rows affected: %w", err) } + // M-1 (P2): wrap the zero-rows-affected condition with + // repository.ErrNotFound so the handler's errToStatus dispatches to 404 + // via errors.Is without substring matching. Mirrors agent_group and + // profile repositories. if rows == 0 { - return fmt.Errorf("agent not found") + return fmt.Errorf("%w: agent %s", repository.ErrNotFound, agent.ID) } return nil @@ -189,8 +200,10 @@ func (r *AgentRepository) Delete(ctx context.Context, id string) error { return fmt.Errorf("failed to get rows affected: %w", err) } + // M-1 (P2): zero-rows-affected → repository.ErrNotFound wrap. Mirrors + // agent_group and profile repositories. if rows == 0 { - return fmt.Errorf("agent not found") + return fmt.Errorf("%w: agent %s", repository.ErrNotFound, id) } return nil @@ -236,8 +249,13 @@ func (r *AgentRepository) UpdateHeartbeat(ctx context.Context, id string, metada return fmt.Errorf("failed to get rows affected: %w", err) } + // M-1 (P2): zero-rows-affected → repository.ErrNotFound wrap. Note the + // UPDATE filters on `retired_at IS NULL`, so a retired agent row also + // returns zero-rows-affected here. The service layer short-circuits with + // ErrAgentRetired (410) BEFORE reaching this path via Heartbeat's + // explicit Get check, so the 404 vs 410 distinction is drawn upstream. if rows == 0 { - return fmt.Errorf("agent not found") + return fmt.Errorf("%w: agent %s", repository.ErrNotFound, id) } return nil @@ -258,8 +276,13 @@ func (r *AgentRepository) GetByAPIKey(ctx context.Context, keyHash string) (*dom agent, err := scanAgent(row) if err != nil { - if err == sql.ErrNoRows { - return nil, fmt.Errorf("agent not found") + // M-1 (P2): wrap sql.ErrNoRows with repository.ErrNotFound via %w. + // The auth middleware calls this on every request; a missing row + // must surface as 404 (well, 401 upstream — the middleware treats + // "no agent matched" as auth failure) via the errToStatus choke + // point, not via substring matching. + if errors.Is(err, sql.ErrNoRows) { + return nil, fmt.Errorf("%w: agent with api key not found", repository.ErrNotFound) } return nil, fmt.Errorf("failed to query agent: %w", err) } diff --git a/internal/repository/postgres/agent_group.go b/internal/repository/postgres/agent_group.go index 97decbc..5b64dc0 100644 --- a/internal/repository/postgres/agent_group.go +++ b/internal/repository/postgres/agent_group.go @@ -3,10 +3,12 @@ package postgres import ( "context" "database/sql" + "errors" "fmt" "time" "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/repository" ) // AgentGroupRepository implements agent group CRUD with PostgreSQL. @@ -49,8 +51,12 @@ func (r *AgentGroupRepository) Get(ctx context.Context, id string) (*domain.Agen g := &domain.AgentGroup{} err := row.Scan(&g.ID, &g.Name, &g.Description, &g.MatchOS, &g.MatchArchitecture, &g.MatchIPCIDR, &g.MatchVersion, &g.Enabled, &g.CreatedAt, &g.UpdatedAt) - if err == sql.ErrNoRows { - return nil, fmt.Errorf("agent group not found: %s", id) + // M-1 (P2): wrap sql.ErrNoRows with repository.ErrNotFound via %w so the + // handler's errToStatus choke point dispatches to 404 via errors.Is + // instead of the pre-M-1 strings.Contains(err.Error(), "not found") + // branch at handler/agent_groups.go. Mirrors profile repository. + if errors.Is(err, sql.ErrNoRows) { + return nil, fmt.Errorf("%w: agent group %s", repository.ErrNotFound, id) } if err != nil { return nil, fmt.Errorf("failed to get agent group: %w", err) @@ -83,8 +89,11 @@ func (r *AgentGroupRepository) Update(ctx context.Context, group *domain.AgentGr return fmt.Errorf("failed to update agent group: %w", err) } rows, _ := result.RowsAffected() + // M-1 (P2): wrap the zero-rows-affected condition with + // repository.ErrNotFound so the handler's errToStatus dispatches to 404 + // via errors.Is without substring matching. Mirrors profile repository. if rows == 0 { - return fmt.Errorf("agent group not found: %s", group.ID) + return fmt.Errorf("%w: agent group %s", repository.ErrNotFound, group.ID) } return nil } @@ -96,8 +105,11 @@ func (r *AgentGroupRepository) Delete(ctx context.Context, id string) error { return fmt.Errorf("failed to delete agent group: %w", err) } rows, _ := result.RowsAffected() + // M-1 (P2): wrap zero-rows-affected with repository.ErrNotFound so the + // handler's errToStatus dispatches to 404 via errors.Is. Mirrors profile + // repository. if rows == 0 { - return fmt.Errorf("agent group not found: %s", id) + return fmt.Errorf("%w: agent group %s", repository.ErrNotFound, id) } return nil } diff --git a/internal/repository/postgres/certificate.go b/internal/repository/postgres/certificate.go index 370f005..536d695 100644 --- a/internal/repository/postgres/certificate.go +++ b/internal/repository/postgres/certificate.go @@ -264,8 +264,14 @@ func (r *CertificateRepository) Get(ctx context.Context, id string) (*domain.Man cert, err := r.scanCertificate(ctx, row) if err != nil { - if err == sql.ErrNoRows { - return nil, fmt.Errorf("certificate not found") + // M-1 (P2): wrap sql.ErrNoRows with repository.ErrNotFound via %w so + // the handler's errToStatus choke point dispatches to 404 via + // errors.Is instead of the pre-M-1 strings.Contains(err.Error(), + // "not found") branch at handler/export.go (ExportPEM + ExportPKCS12). + // scanCertificate already wraps sql.ErrNoRows via %w, so errors.Is + // walks through. Mirrors profile and agent_group repositories. + if errors.Is(err, sql.ErrNoRows) { + return nil, fmt.Errorf("%w: certificate %s", repository.ErrNotFound, id) } return nil, fmt.Errorf("failed to query certificate: %w", err) } @@ -396,8 +402,11 @@ func (r *CertificateRepository) Update(ctx context.Context, cert *domain.Managed return fmt.Errorf("failed to get rows affected: %w", err) } + // M-1 (P2): zero-rows-affected → repository.ErrNotFound wrap so the + // handler's errToStatus dispatches to 404 via errors.Is. Mirrors profile + // and agent_group repositories. if rows == 0 { - return fmt.Errorf("certificate not found") + return fmt.Errorf("%w: certificate %s", repository.ErrNotFound, cert.ID) } return nil @@ -418,8 +427,10 @@ func (r *CertificateRepository) Archive(ctx context.Context, id string) error { return fmt.Errorf("failed to get rows affected: %w", err) } + // M-1 (P2): zero-rows-affected → repository.ErrNotFound wrap. Mirrors + // profile and agent_group repositories. if rows == 0 { - return fmt.Errorf("certificate not found") + return fmt.Errorf("%w: certificate %s", repository.ErrNotFound, id) } return nil @@ -583,6 +594,16 @@ func (r *CertificateRepository) GetLatestVersion(ctx context.Context, certID str v.KeySize = int(keySize.Int64) if err != nil { + // M-1 (P2): wrap sql.ErrNoRows with repository.ErrNotFound via %w so + // the handler's errToStatus choke point dispatches to 404 via + // errors.Is. The export service surfaces "no certificate version + // found" on this path — pre-M-1 the handler branched on + // strings.Contains(err.Error(), "not found"), now it walks the wrap + // chain. Non-ErrNoRows errors preserve their "failed to get latest + // certificate version" framing. + if errors.Is(err, sql.ErrNoRows) { + return nil, fmt.Errorf("%w: certificate version for %s", repository.ErrNotFound, certID) + } return nil, fmt.Errorf("failed to get latest certificate version: %w", err) } diff --git a/internal/repository/postgres/notification.go b/internal/repository/postgres/notification.go index 3f997bb..9ef450d 100644 --- a/internal/repository/postgres/notification.go +++ b/internal/repository/postgres/notification.go @@ -138,7 +138,15 @@ func (r *NotificationRepository) List(ctx context.Context, filter *repository.No return notifs, nil } -// UpdateStatus updates a notification's delivery status +// UpdateStatus updates a notification's delivery status. +// +// M-1 (P2): zero-rows-affected now wraps repository.ErrNotFound via %w so the +// handler's errToStatus choke point dispatches to 404 via errors.Is. Pre-M-1 +// the return was a raw fmt.Errorf("notification not found") string that the +// service layer re-wrapped and the handler classified via strings.Contains — +// one sentinel-message reword away from silently demoting 404 to 500. The +// behavior (error on concurrent-delete / bad-id) is unchanged; only the error +// identity is now type-safe. func (r *NotificationRepository) UpdateStatus(ctx context.Context, id string, status string, sentAt time.Time) error { result, err := r.db.ExecContext(ctx, ` UPDATE notification_events SET status = $1, sent_at = $2 WHERE id = $3 @@ -154,7 +162,7 @@ func (r *NotificationRepository) UpdateStatus(ctx context.Context, id string, st } if rows == 0 { - return fmt.Errorf("notification not found") + return fmt.Errorf("%w: notification %s", repository.ErrNotFound, id) } return nil @@ -307,10 +315,12 @@ func (r *NotificationRepository) RecordFailedAttempt(ctx context.Context, id str return fmt.Errorf("failed to get rows affected: %w", err) } if rows == 0 { - // Same "not found" error shape as UpdateStatus above. The scheduler - // logs-and-continues on this so a concurrently-deleted row doesn't - // break the sweep. - return fmt.Errorf("notification not found") + // M-1 (P2): wrap repository.ErrNotFound (was raw + // fmt.Errorf("notification not found")). Same "not found" shape as + // UpdateStatus — the scheduler logs-and-continues on a concurrently + // deleted row, but callers that surface the error (the handler) now + // discriminate via errors.Is instead of substring matching. + return fmt.Errorf("%w: notification %s", repository.ErrNotFound, id) } return nil } @@ -342,7 +352,8 @@ func (r *NotificationRepository) MarkAsDead(ctx context.Context, id string, last return fmt.Errorf("failed to get rows affected: %w", err) } if rows == 0 { - return fmt.Errorf("notification not found") + // M-1 (P2): wrap repository.ErrNotFound. See UpdateStatus rationale. + return fmt.Errorf("%w: notification %s", repository.ErrNotFound, id) } return nil } @@ -379,7 +390,8 @@ func (r *NotificationRepository) Requeue(ctx context.Context, id string) error { return fmt.Errorf("failed to get rows affected: %w", err) } if rows == 0 { - return fmt.Errorf("notification not found") + // M-1 (P2): wrap repository.ErrNotFound. See UpdateStatus rationale. + return fmt.Errorf("%w: notification %s", repository.ErrNotFound, id) } return nil } diff --git a/internal/repository/postgres/profile.go b/internal/repository/postgres/profile.go index 2544262..fa2127c 100644 --- a/internal/repository/postgres/profile.go +++ b/internal/repository/postgres/profile.go @@ -4,11 +4,13 @@ import ( "context" "database/sql" "encoding/json" + "errors" "fmt" "time" "github.com/google/uuid" "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/repository" ) // ProfileRepository implements repository.CertificateProfileRepository @@ -63,8 +65,11 @@ func (r *ProfileRepository) Get(ctx context.Context, id string) (*domain.Certifi p, err := scanProfile(row) if err != nil { - if err == sql.ErrNoRows { - return nil, fmt.Errorf("profile not found") + if errors.Is(err, sql.ErrNoRows) { + // M-1: wrap repository.ErrNotFound so the handler's errToStatus + // choke point can route this to HTTP 404 via errors.Is without + // substring-matching the "not found" message text. + return nil, fmt.Errorf("%w: profile %s", repository.ErrNotFound, id) } return nil, fmt.Errorf("failed to query profile: %w", err) } @@ -159,7 +164,8 @@ func (r *ProfileRepository) Update(ctx context.Context, profile *domain.Certific } if rows == 0 { - return fmt.Errorf("profile not found") + // M-1: wrap repository.ErrNotFound — see Get for rationale. + return fmt.Errorf("%w: profile %s", repository.ErrNotFound, profile.ID) } return nil @@ -178,7 +184,8 @@ func (r *ProfileRepository) Delete(ctx context.Context, id string) error { } if rows == 0 { - return fmt.Errorf("profile not found") + // M-1: wrap repository.ErrNotFound — see Get for rationale. + return fmt.Errorf("%w: profile %s", repository.ErrNotFound, id) } return nil diff --git a/internal/repository/postgres/renewal_policy.go b/internal/repository/postgres/renewal_policy.go index 8b6f1bf..df52e7b 100644 --- a/internal/repository/postgres/renewal_policy.go +++ b/internal/repository/postgres/renewal_policy.go @@ -72,7 +72,10 @@ func (r *RenewalPolicyRepository) Get(ctx context.Context, id string) (*domain.R policy, err := scanRenewalPolicy(row) if err != nil { if errors.Is(err, sql.ErrNoRows) { - return nil, fmt.Errorf("renewal policy not found: %s", id) + // M-1: wrap repository.ErrNotFound so the handler's errToStatus + // choke point can route this to HTTP 404 via errors.Is without + // substring-matching the "not found" message text. + return nil, fmt.Errorf("%w: renewal policy %s", repository.ErrNotFound, id) } return nil, fmt.Errorf("failed to query renewal policy: %w", err) } @@ -252,7 +255,8 @@ func (r *RenewalPolicyRepository) Update(ctx context.Context, id string, policy updated, err := scanRenewalPolicy(row) if err != nil { if errors.Is(err, sql.ErrNoRows) { - return fmt.Errorf("renewal policy not found: %s", id) + // M-1: wrap repository.ErrNotFound — see Get for rationale. + return fmt.Errorf("%w: renewal policy %s", repository.ErrNotFound, id) } if isUniqueViolation(err) { return repository.ErrRenewalPolicyDuplicateName @@ -283,7 +287,8 @@ func (r *RenewalPolicyRepository) Delete(ctx context.Context, id string) error { return fmt.Errorf("failed to read RowsAffected for delete: %w", err) } if rows == 0 { - return fmt.Errorf("renewal policy not found: %s", id) + // M-1: wrap repository.ErrNotFound — see Get for rationale. + return fmt.Errorf("%w: renewal policy %s", repository.ErrNotFound, id) } return nil } diff --git a/internal/service/agent.go b/internal/service/agent.go index b68d89b..6459dcc 100644 --- a/internal/service/agent.go +++ b/internal/service/agent.go @@ -6,10 +6,12 @@ import ( "crypto/sha256" "encoding/base64" "encoding/hex" + "errors" "fmt" "log/slog" "time" + "github.com/lib/pq" "github.com/shankar0123/certctl/internal/domain" "github.com/shankar0123/certctl/internal/repository" ) @@ -409,6 +411,16 @@ func (s *AgentService) RegisterAgent(ctx context.Context, agent domain.Agent) (* agent.LastHeartbeatAt = &now if err := s.agentRepo.Create(ctx, &agent); err != nil { + // M-1 (P2): explicit pg-23505 dispatch wraps unique-constraint + // violations on the agent name with ErrConflict so the handler's + // errors.Is(err, service.ErrConflict) arm fires 409 deterministically + // instead of relying on the errToStatus *pq.Error SQLSTATE fallback. + // Mirrors the duplicate-name paths in IssuerService and + // RenewalPolicyService. + var pgErr *pq.Error + if errors.As(err, &pgErr) && pgErr.Code == "23505" { + return nil, fmt.Errorf("%w: agent name already exists", ErrConflict) + } return nil, fmt.Errorf("failed to register agent: %w", err) } return &agent, nil diff --git a/internal/service/agent_group.go b/internal/service/agent_group.go index 5e9f148..b12d2cf 100644 --- a/internal/service/agent_group.go +++ b/internal/service/agent_group.go @@ -143,12 +143,20 @@ func (s *AgentGroupService) ListMembers(ctx context.Context, id string) ([]domai } // validateAgentGroup checks that an agent group's configuration is valid. +// +// M-1 (P2): every return wraps ErrValidation via %w so the handler's +// errToStatus choke point dispatches these to HTTP 400 via errors.Is without +// the substring-matching on "invalid"/"required" that the pre-M-1 +// agent_groups handler relied on at handler/agent_groups.go:126. The composed +// Error() string still contains the original human-readable text, so the +// handler safely passes err.Error() through to the response body on the 400 +// arm. Mirrors validateProfile. func validateAgentGroup(g *domain.AgentGroup) error { if g.Name == "" { - return fmt.Errorf("agent group name is required") + return fmt.Errorf("%w: agent group name is required", ErrValidation) } if len(g.Name) > 255 { - return fmt.Errorf("agent group name exceeds 255 characters") + return fmt.Errorf("%w: agent group name exceeds 255 characters", ErrValidation) } return nil } diff --git a/internal/service/agent_retire.go b/internal/service/agent_retire.go index 46c54b2..04c49a4 100644 --- a/internal/service/agent_retire.go +++ b/internal/service/agent_retire.go @@ -26,8 +26,12 @@ import ( // three cloud secret-manager discovery sources; retiring any of them orphans // its subsystem. The guard fires unconditionally — force=true does not bypass // it, because a sentinel is a structural invariant of the deployment, not -// a piece of fleet state the operator owns. Handler maps this to HTTP 403. -var ErrAgentIsSentinel = errors.New("agent is a reserved sentinel and cannot be retired") +// a piece of fleet state the operator owns. +// +// M-1 (P2): wraps [ErrForbidden] so the handler-layer errToStatus choke point +// resolves HTTP 403 via errors.Is. Call sites can still errors.Is against +// ErrAgentIsSentinel for domain-specific branches. +var ErrAgentIsSentinel = fmt.Errorf("%w: agent is a reserved sentinel and cannot be retired", ErrForbidden) // ErrBlockedByDependencies is returned by RetireAgent when at least one of // (active targets, active certificates, pending jobs) referencing the agent @@ -35,15 +39,22 @@ var ErrAgentIsSentinel = errors.New("agent is a reserved sentinel and cannot be // a *BlockedByDependenciesError (see below), so handlers doing errors.As // can surface the per-bucket counts in the 409 body for operator // troubleshooting. Tests use errors.Is; handlers use errors.As. -var ErrBlockedByDependencies = errors.New("agent has active downstream dependencies") +// +// M-1 (P2): wraps [ErrConflict] so errToStatus resolves HTTP 409 via +// errors.Is, keeping the per-bucket errors.As path unchanged for the +// detailed 409 body. BlockedByDependenciesError.Unwrap still returns the +// domain-specific sentinel for chained Is checks. +var ErrBlockedByDependencies = fmt.Errorf("%w: agent has active downstream dependencies", ErrConflict) // ErrForceReasonRequired is returned when force=true is supplied without a // non-empty reason. The force escape hatch is deliberately chatty: operators // pulling the emergency cord must leave an auditable breadcrumb explaining -// why a cascade was justified. Handler maps this to HTTP 400 so the operator -// retries with --reason rather than silently skipping the guard. Checked -// before any DB mutation to keep the no-reason path transactionally clean. -var ErrForceReasonRequired = errors.New("force=true requires a non-empty reason") +// why a cascade was justified. Operators must retry with --reason rather +// than silently skipping the guard. Checked before any DB mutation to keep +// the no-reason path transactionally clean. +// +// M-1 (P2): wraps [ErrValidation] so errToStatus resolves HTTP 400. +var ErrForceReasonRequired = fmt.Errorf("%w: force=true requires a non-empty reason", ErrValidation) // ErrAgentRetired is returned by Heartbeat (and any future agent-authenticated // call site) when a retired agent is still polling. The handler layer maps @@ -52,6 +63,12 @@ var ErrForceReasonRequired = errors.New("force=true requires a non-empty reason" // forever on a soft-retired identity. IsRetired() on the domain model is // the single source of truth; the sentinel exists so service and handler // callers can errors.Is against one symbol. +// +// M-1 (P2): deliberately NOT wrapped under any generic sentinel. 410 Gone is +// semantically distinct from 403/404/409 — it signals a permanently-terminated +// resource identity and drives deterministic agent-binary shutdown (see +// cmd/agent/main.go). errToStatus tests it FIRST so it cannot be demoted by +// the generic cascade. var ErrAgentRetired = errors.New("agent has been retired") // BlockedByDependenciesError wraps ErrBlockedByDependencies and carries the diff --git a/internal/service/audit.go b/internal/service/audit.go index 8798896..06f779a 100644 --- a/internal/service/audit.go +++ b/internal/service/audit.go @@ -143,6 +143,15 @@ func (s *AuditService) ListAuditEvents(ctx context.Context, page, perPage int) ( } // GetAuditEvent returns a single audit event (handler interface method). +// +// M-1 (P2): the pre-M-1 zero-events path returned a bare +// `fmt.Errorf("audit event not found")` and the handler dispatched via a +// blanket `any error → 404 Audit event not found` shortcut. That silently +// demoted transient DB failures from the auditRepo.List wrap (line 154 above) +// to 404 Not Found. Now the zero-events path wraps ErrNotFound via %w so +// errors.Is(err, service.ErrNotFound) picks up the real 404 at the handler's +// errToStatus choke point, and the repo.List wrap surfaces as 500 with +// server-side slog.Error capture (F-002 redacted-500 pattern preserved). func (s *AuditService) GetAuditEvent(ctx context.Context, id string) (*domain.AuditEvent, error) { filter := &repository.AuditFilter{ ResourceID: id, @@ -155,7 +164,7 @@ func (s *AuditService) GetAuditEvent(ctx context.Context, id string) (*domain.Au } if len(events) == 0 { - return nil, fmt.Errorf("audit event not found") + return nil, fmt.Errorf("%w: audit event not found", ErrNotFound) } return events[0], nil diff --git a/internal/service/ca_operations.go b/internal/service/ca_operations.go index e2e5fab..a3225f2 100644 --- a/internal/service/ca_operations.go +++ b/internal/service/ca_operations.go @@ -43,6 +43,15 @@ func (s *CAOperationsSvc) SetIssuerRegistry(registry *IssuerRegistry) { // GenerateDERCRL generates a DER-encoded X.509 CRL for the given issuer. // Short-lived certificates (profile TTL < 1 hour) are excluded from the CRL. +// +// M-1 (P2): the pre-M-1 issuer-not-found path returned a bare +// `fmt.Errorf("issuer not found: %s", issuerID)` with no sentinel wrap. A +// pre-M-1 handler strings.Contains(err.Error(), "not found") classifier would +// sweep both this path and any transient error whose text happened to mention +// "not found" into 404. Post-M-1 this path wraps service.ErrNotFound via %w so +// errors.Is picks up the genuine 404 at errToStatus, and truly-unexpected +// errors (e.g., registry misconfigured) surface as 500 via the generic-error +// fallback. func (s *CAOperationsSvc) GenerateDERCRL(ctx context.Context, issuerID string) ([]byte, error) { if s.revocationRepo == nil { return nil, fmt.Errorf("revocation repository not configured") @@ -53,7 +62,7 @@ func (s *CAOperationsSvc) GenerateDERCRL(ctx context.Context, issuerID string) ( issuerConn, ok := s.issuerRegistry.Get(issuerID) if !ok { - return nil, fmt.Errorf("issuer not found: %s", issuerID) + return nil, fmt.Errorf("%w: issuer %s", ErrNotFound, issuerID) } // Scope the query to this issuer so the migration 000012 composite index @@ -98,6 +107,11 @@ func (s *CAOperationsSvc) GenerateDERCRL(ctx context.Context, issuerID string) ( } // GetOCSPResponse generates a signed OCSP response for the given certificate serial. +// +// M-1 (P2): see GenerateDERCRL above — same sentinel-wrap rationale applies. +// The issuer-not-found path wraps service.ErrNotFound via %w so errors.Is +// picks up the genuine 404 at errToStatus; transient/misconfiguration errors +// surface as 500. func (s *CAOperationsSvc) GetOCSPResponse(ctx context.Context, issuerID string, serialHex string) ([]byte, error) { if s.revocationRepo == nil { return nil, fmt.Errorf("revocation repository not configured") @@ -108,7 +122,7 @@ func (s *CAOperationsSvc) GetOCSPResponse(ctx context.Context, issuerID string, issuerConn, ok := s.issuerRegistry.Get(issuerID) if !ok { - return nil, fmt.Errorf("issuer not found: %s", issuerID) + return nil, fmt.Errorf("%w: issuer %s", ErrNotFound, issuerID) } serial := new(big.Int) diff --git a/internal/service/certificate.go b/internal/service/certificate.go index b8c6db2..f454826 100644 --- a/internal/service/certificate.go +++ b/internal/service/certificate.go @@ -376,10 +376,21 @@ func (s *CertificateService) CreateCertificate(ctx context.Context, cert domain. // UpdateCertificate modifies a certificate (handler interface method). func (s *CertificateService) UpdateCertificate(ctx context.Context, id string, patch domain.ManagedCertificate) (*domain.ManagedCertificate, error) { - // Fetch existing certificate so partial updates don't zero out fields + // Fetch existing certificate so partial updates don't zero out fields. + // + // M-1 (P2): the pre-M-1 wrap was `"certificate not found: %w"` on every + // certRepo.Get error — which coupled to the handler's strings.Contains + // substring classifier on "not found" and gave false positives on transient + // DB failures (connection refused, context deadline, etc.), demoting a 500 + // to a 404. Now the repo wraps only the genuine sql.ErrNoRows path with + // repository.ErrNotFound (certificate.go Get), so the errors.Is walk + // through the handler's errToStatus choke point discriminates correctly: + // truly-missing → 404, everything else → 500 (the intended outcome). The + // wrap text is changed from "certificate not found" to "failed to get + // certificate" to match the semantic. existing, err := s.certRepo.Get(ctx, id) if err != nil { - return nil, fmt.Errorf("certificate not found: %w", err) + return nil, fmt.Errorf("failed to get certificate: %w", err) } // Merge non-zero fields from patch into existing @@ -501,10 +512,14 @@ func (s *CertificateService) GetOCSPResponse(ctx context.Context, issuerID strin // GetCertificateDeployments returns all deployment targets for a certificate (M20). func (s *CertificateService) GetCertificateDeployments(ctx context.Context, certID string) ([]domain.DeploymentTarget, error) { - // Verify certificate exists + // Verify certificate exists. See M-1 (P2) note on UpdateCertificate for + // the wrap-text correction rationale — same treatment applies here: the + // repo's sql.ErrNoRows wrap with repository.ErrNotFound escapes cleanly + // through fmt.Errorf("%w", ...) so errors.Is picks up the genuine 404 + // while transient DB errors correctly surface as 500. _, err := s.certRepo.Get(ctx, certID) if err != nil { - return nil, fmt.Errorf("certificate not found: %w", err) + return nil, fmt.Errorf("failed to get certificate: %w", err) } if s.targetRepo == nil { diff --git a/internal/service/discovery.go b/internal/service/discovery.go index a5ed3b7..ba57874 100644 --- a/internal/service/discovery.go +++ b/internal/service/discovery.go @@ -166,9 +166,20 @@ func (s *DiscoveryService) ClaimDiscovered(ctx context.Context, id string, manag return fmt.Errorf("failed to get discovered certificate: %w", err) } - // Verify the managed cert exists + // Verify the managed cert exists. + // + // M-1 (P2): the pre-M-1 wrap was a bare `fmt.Errorf("managed certificate not + // found: %s", managedCertID)` with no %w and no sentinel. That threw away the + // underlying error entirely — a transient DB failure from certRepo.Get (e.g., + // connection dropped, query timeout) would be flattened into the same "not + // found" string as a genuine sql.ErrNoRows, which the pre-M-1 handler + // strings.Contains classifier then swept into 404. Post-M-1 we propagate the + // underlying error via %w so errors.Is walks the chain correctly: + // certRepo.Get wraps genuine sql.ErrNoRows with repository.ErrNotFound (see + // postgres/certificate.go Get), and errToStatus dispatches that to 404; + // transient errors surface as 500 via the generic-error fallback. if _, err := s.certRepo.Get(ctx, managedCertID); err != nil { - return fmt.Errorf("managed certificate not found: %s", managedCertID) + return fmt.Errorf("failed to get managed certificate %s: %w", managedCertID, err) } if err := s.discoveryRepo.UpdateDiscoveredStatus(ctx, id, domain.DiscoveryStatusManaged, managedCertID); err != nil { diff --git a/internal/service/errors.go b/internal/service/errors.go new file mode 100644 index 0000000..93ec684 --- /dev/null +++ b/internal/service/errors.go @@ -0,0 +1,73 @@ +package service + +import "errors" + +// M-1 (P2) coverage-gap closure: generic service-layer error sentinels. +// +// Before M-1, API handlers classified service errors by substring-matching the +// wrapped message text (`strings.Contains(err.Error(), "not found")` and +// friends). That made every HTTP status mapping one `fmt.Errorf` reword away +// from silently regressing — including the M-003 self-approval privilege +// boundary, where `handler/jobs.go:174/:220` ignored the already-defined +// ErrSelfApproval sentinel and relied on the literal string "cannot approve". +// +// These six generic sentinels form the type-safe surface the handler layer +// dispatches against via `errors.Is`. Domain-specific sentinels (ErrSelfApproval, +// ErrAgentIsSentinel, ErrBlockedByDependencies, ErrForceReasonRequired, +// ErrAgentNotFound) are declared in their own topical files (job.go, +// agent_retire.go, target.go) and wrap one of these generics via +// `fmt.Errorf("%w: ...", ErrForbidden)`. The wrap chain lets call sites continue +// to `errors.Is(err, ErrSelfApproval)` for domain-specific logic while the +// handler's single choke point in `api/handler/errors.go` can match on the +// generic sentinel to pick the HTTP status. +// +// Dispatch order in errToStatus matters — see the doc block at the top of +// `internal/api/handler/errors.go`. +// +// ErrAgentRetired is deliberately NOT wrapped here. 410 Gone is semantically +// distinct from 403/404/409 and must short-circuit the generic dispatch. Keep +// its standalone declaration in agent_retire.go untouched; errToStatus tests +// it first. +var ( + // ErrNotFound indicates a lookup for a resource that does not exist. + // Handlers translate this to HTTP 404. + ErrNotFound = errors.New("not found") + + // ErrValidation indicates malformed, missing, or out-of-range input from + // the caller. Handlers translate this to HTTP 400. + ErrValidation = errors.New("validation failed") + + // ErrConflict indicates a state conflict: unique-constraint violation, + // foreign-key dependency, or a state machine transition that is not + // allowed from the current state. Handlers translate this to HTTP 409. + ErrConflict = errors.New("conflict") + + // ErrForbidden indicates an authorization / privilege-boundary denial. + // The caller is authenticated but is not permitted to perform the action. + // Handlers translate this to HTTP 403. + ErrForbidden = errors.New("forbidden") + + // ErrUnauthenticated indicates the caller failed to authenticate — most + // commonly a SCEP challenge-password mismatch, where the transport itself + // is valid but the application-layer credential is wrong. Handlers + // translate this to HTTP 401. + ErrUnauthenticated = errors.New("unauthenticated") + + // ErrNotImplemented indicates the requested operation is defined but not + // yet wired up — reserved for feature-flag-gated code paths. Handlers + // translate this to HTTP 501. + ErrNotImplemented = errors.New("not implemented") + + // ErrUnprocessable indicates the request was well-formed and the + // referenced resource exists, but server-side stored data could not be + // processed — e.g., a certificate PEM in inventory that fails X.509 + // decoding because the stored blob is corrupt or was inserted with the + // wrong encoding. Distinct from ErrValidation: ErrValidation means the + // caller sent bad input (400), while ErrUnprocessable means the caller's + // input was fine but our own data cannot satisfy the operation (422 + // Unprocessable Entity). Today the only call site is ExportPKCS12's parse + // path in internal/service/export.go; keeping the sentinel generic so + // other "stored-data-unparseable" paths can reuse it without inventing a + // second variant. + ErrUnprocessable = errors.New("unprocessable") +) diff --git a/internal/service/errors_test.go b/internal/service/errors_test.go new file mode 100644 index 0000000..2b1a300 --- /dev/null +++ b/internal/service/errors_test.go @@ -0,0 +1,110 @@ +package service + +import ( + "errors" + "fmt" + "testing" +) + +// TestGenericSentinels_IdentityDistinct guards against an accidental +// `var ErrX = ErrY` alias where two generic sentinels share identity. Each +// must be a distinct error value so errors.Is dispatch in errToStatus can +// route them to different HTTP status codes. +func TestGenericSentinels_IdentityDistinct(t *testing.T) { + sentinels := []struct { + name string + err error + }{ + {"ErrNotFound", ErrNotFound}, + {"ErrValidation", ErrValidation}, + {"ErrConflict", ErrConflict}, + {"ErrForbidden", ErrForbidden}, + {"ErrUnauthenticated", ErrUnauthenticated}, + {"ErrNotImplemented", ErrNotImplemented}, + } + for i := range sentinels { + for j := range sentinels { + if i == j { + continue + } + if errors.Is(sentinels[i].err, sentinels[j].err) { + t.Errorf("%s and %s alias the same error value — each generic sentinel must be distinct", + sentinels[i].name, sentinels[j].name) + } + } + } +} + +// TestWrappedSentinels_ChainWalk is the core M-1 invariant: wrapping a +// domain-specific sentinel under a generic sentinel via fmt.Errorf("%w: ...") +// must preserve BOTH identities on the wrap chain. Call sites that check +// errors.Is(err, ErrSelfApproval) for domain logic AND the handler-layer +// errToStatus that checks errors.Is(err, ErrForbidden) for the HTTP status +// both need to succeed on the same error value. +// +// If this test fails, every handler dispatch that routes through errToStatus +// is silently broken. +func TestWrappedSentinels_ChainWalk(t *testing.T) { + cases := []struct { + name string + err error + generic error + }{ + {"ErrSelfApproval wraps ErrForbidden", ErrSelfApproval, ErrForbidden}, + {"ErrAgentIsSentinel wraps ErrForbidden", ErrAgentIsSentinel, ErrForbidden}, + {"ErrBlockedByDependencies wraps ErrConflict", ErrBlockedByDependencies, ErrConflict}, + {"ErrForceReasonRequired wraps ErrValidation", ErrForceReasonRequired, ErrValidation}, + {"ErrAgentNotFound wraps ErrValidation", ErrAgentNotFound, ErrValidation}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if !errors.Is(c.err, c.generic) { + t.Errorf("errors.Is(%v, %v) = false; want true", c.err, c.generic) + } + if !errors.Is(c.err, c.err) { + t.Errorf("errors.Is(err, err) = false; want true — domain sentinel lost self-identity after wrap") + } + }) + } +} + +// TestErrAgentRetired_StandaloneGone locks the 410 Gone semantics in place. +// ErrAgentRetired MUST NOT wrap any generic sentinel — 410 is semantically +// distinct from 403/404/409 (permanently-terminated resource identity) and +// the errToStatus dispatch tests it FIRST before any generic check. If this +// test goes red because someone wrapped it under ErrForbidden or ErrNotFound, +// the agent-binary shutdown behavior at cmd/agent/main.go:1291 silently +// regresses. +func TestErrAgentRetired_StandaloneGone(t *testing.T) { + if errors.Is(ErrAgentRetired, ErrForbidden) { + t.Error("ErrAgentRetired must NOT wrap ErrForbidden — 410 Gone would demote to 403") + } + if errors.Is(ErrAgentRetired, ErrNotFound) { + t.Error("ErrAgentRetired must NOT wrap ErrNotFound — 410 Gone would demote to 404") + } + if errors.Is(ErrAgentRetired, ErrConflict) { + t.Error("ErrAgentRetired must NOT wrap ErrConflict — 410 Gone would demote to 409") + } + if errors.Is(ErrAgentRetired, ErrValidation) { + t.Error("ErrAgentRetired must NOT wrap ErrValidation — 410 Gone would demote to 400") + } + if !errors.Is(ErrAgentRetired, ErrAgentRetired) { + t.Error("ErrAgentRetired lost self-identity") + } +} + +// TestSentinels_SurviveDoubleWrap verifies that wrapping a sentinel-wrapped +// error a SECOND time (e.g., a call site doing fmt.Errorf("%w: ctx-info", +// ErrSelfApproval)) preserves both the domain sentinel and the generic +// sentinel. This is critical because the errToStatus dispatch order places +// ErrForbidden BEFORE ErrValidation — if double-wrapping broke the chain, +// the M-003 gate would demote to 400. +func TestSentinels_SurviveDoubleWrap(t *testing.T) { + doubled := fmt.Errorf("%w: additional context from call site", ErrSelfApproval) + if !errors.Is(doubled, ErrSelfApproval) { + t.Error("double-wrapped ErrSelfApproval lost domain identity") + } + if !errors.Is(doubled, ErrForbidden) { + t.Error("double-wrapped ErrSelfApproval lost ErrForbidden wrap — M-003 gate would demote to 500") + } +} diff --git a/internal/service/export.go b/internal/service/export.go index 0a41946..5ad3cc3 100644 --- a/internal/service/export.go +++ b/internal/service/export.go @@ -38,16 +38,31 @@ type ExportPEMResult struct { // ExportPEM returns the PEM-encoded certificate and chain for the latest version. func (s *ExportService) ExportPEM(ctx context.Context, certID string) (*ExportPEMResult, error) { - // Verify certificate exists + // Verify certificate exists. + // + // M-1 (P2): the pre-M-1 wrap was `"certificate not found: %w"` on every + // certRepo.Get error — which gave the handler's strings.Contains(err.Error(), + // "not found") check a false positive on transient DB failures (connection + // refused, context deadline, etc.), demoting a 500 to a 404. Now the repo + // wraps only the genuine sql.ErrNoRows path with repository.ErrNotFound + // (certificate.go Get), so the errors.Is walk through the handler's + // errToStatus discriminates correctly: truly-missing → 404, everything else + // → 500 (the intended outcome). The wrap text is changed from "certificate + // not found" to "failed to get certificate" to match the semantic. cert, err := s.certRepo.Get(ctx, certID) if err != nil { - return nil, fmt.Errorf("certificate not found: %w", err) + return nil, fmt.Errorf("failed to get certificate: %w", err) } - // Get latest version (contains the PEM chain) + // Get latest version (contains the PEM chain). + // + // M-1 (P2): same wrap-text correction as above — pre-M-1 any + // GetLatestVersion error surfaced as "no certificate version found", + // which bled into the handler's substring classifier. Now the repo wraps + // sql.ErrNoRows with repository.ErrNotFound; the wrap chain walks cleanly. version, err := s.certRepo.GetLatestVersion(ctx, certID) if err != nil { - return nil, fmt.Errorf("no certificate version found: %w", err) + return nil, fmt.Errorf("failed to get latest certificate version: %w", err) } // Split PEM chain into leaf cert + chain @@ -73,26 +88,37 @@ func (s *ExportService) ExportPEM(ctx context.Context, certID string) (*ExportPE // The private key is NOT included — it lives on the agent and never touches the control plane. // The PKCS#12 bundle is encrypted with the provided password (can be empty for cert-only bundles). func (s *ExportService) ExportPKCS12(ctx context.Context, certID string, password string) ([]byte, error) { - // Verify certificate exists + // Verify certificate exists. See M-1 (P2) note on ExportPEM for the wrap-text + // correction — same rationale applies here. cert, err := s.certRepo.Get(ctx, certID) if err != nil { - return nil, fmt.Errorf("certificate not found: %w", err) + return nil, fmt.Errorf("failed to get certificate: %w", err) } - // Get latest version + // Get latest version. Same wrap-text correction as ExportPEM. version, err := s.certRepo.GetLatestVersion(ctx, certID) if err != nil { - return nil, fmt.Errorf("no certificate version found: %w", err) + return nil, fmt.Errorf("failed to get latest certificate version: %w", err) } - // Parse PEM chain into x509.Certificate objects + // Parse PEM chain into x509.Certificate objects. + // + // M-1 (P2): wrap both parse-failure paths with ErrUnprocessable so the + // handler's errToStatus choke point dispatches to 422 Unprocessable Entity + // via errors.Is instead of the pre-M-1 two-term substring net + // (`"cannot be parsed"|"no certificates found"`) at handler/export.go:101. + // 422 is the correct status here — the caller's request is syntactically + // fine; the stored PEM chain is what can't be processed. The composed + // Error() string still carries the "certificate data cannot be parsed as + // X.509"/"no certificates found in PEM chain" wording so server-side + // slog.Error capture and any future 422 body propagation stay readable. certs, err := parsePEMCertificates(version.PEMChain) if err != nil { - return nil, fmt.Errorf("certificate data cannot be parsed as X.509: %w", err) + return nil, fmt.Errorf("%w: certificate data cannot be parsed as X.509: %v", ErrUnprocessable, err) } if len(certs) == 0 { - return nil, fmt.Errorf("no certificates found in PEM chain") + return nil, fmt.Errorf("%w: no certificates found in PEM chain", ErrUnprocessable) } // Build PKCS#12 bundle: leaf cert + CA chain (no private key) diff --git a/internal/service/issuer.go b/internal/service/issuer.go index 2236da5..862ab03 100644 --- a/internal/service/issuer.go +++ b/internal/service/issuer.go @@ -262,10 +262,18 @@ func (s *IssuerService) Delete(ctx context.Context, id string, actor string) err // TestConnection tests the connection to an issuer by instantiating a throwaway // connector and calling ValidateConfig. Records the result in the database. +// +// M-1 (P2): the pre-M-1 wrap was `"issuer not found: %w"` on every +// issuerRepo.Get error — which coupled to handler substring classifiers on +// "not found" and could have demoted transient DB failures to 404. Now the +// repo wraps only the genuine sql.ErrNoRows path with repository.ErrNotFound +// (postgres/issuer.go Get), so errors.Is walks the wrap chain correctly: +// truly-missing → 404, everything else → 500. The wrap text is changed from +// "issuer not found" to "failed to get issuer" to match the semantic. func (s *IssuerService) TestConnection(ctx context.Context, id string) error { iss, err := s.issuerRepo.Get(ctx, id) if err != nil { - return fmt.Errorf("issuer not found: %w", err) + return fmt.Errorf("failed to get issuer: %w", err) } // Get the decrypted config diff --git a/internal/service/job.go b/internal/service/job.go index e57d84e..d388dc1 100644 --- a/internal/service/job.go +++ b/internal/service/job.go @@ -3,7 +3,6 @@ package service import ( "time" "context" - "errors" "fmt" "log/slog" "strings" @@ -16,8 +15,16 @@ import ( // approve a renewal job is the same person listed as the owner of the // underlying certificate. M-003 enforces separation of duties: the owner who // requested (or benefits from) the renewal must not be the same identity that -// approves it. Handlers map this sentinel to HTTP 403 Forbidden. -var ErrSelfApproval = errors.New("self-approval forbidden: actor is the owner of the certificate") +// approves it. +// +// M-1 (P2): wraps [ErrForbidden] via fmt.Errorf("%w", ...) so the single +// handler-layer errToStatus choke point resolves HTTP 403 via +// errors.Is(err, ErrForbidden). Call sites can continue to +// errors.Is(err, ErrSelfApproval) for domain-specific logging — both succeed +// because %w builds a wrap chain. Pre-M-1, handler/jobs.go classified the +// failure by substring-matching "cannot approve" in the message; a reword +// would have silently demoted 403 to 500 with no observable signal. +var ErrSelfApproval = fmt.Errorf("%w: self-approval forbidden: actor is the owner of the certificate", ErrForbidden) // JobService manages job processing and status tracking. // It coordinates between the scheduler and various job-specific services. @@ -403,9 +410,17 @@ func (s *JobService) GetJob(ctx context.Context, id string) (*domain.Job, error) // duties. Callers must pass a non-empty actor; empty actor is treated as an // anonymous system caller and permitted (internal/system paths). func (s *JobService) ApproveJob(ctx context.Context, id, actor string) error { + // M-1 (P2): the pre-M-1 wrap was `"job not found: %w"` on every jobRepo.Get + // error — which coupled to the handler's strings.Contains substring classifier + // on "not found" and gave false positives on transient DB failures, demoting a + // 500 to a 404. Now the repo wraps only the genuine sql.ErrNoRows path with + // repository.ErrNotFound (postgres/job.go Get), so errors.Is walks the wrap + // chain correctly: truly-missing → 404, everything else → 500. The wrap text + // is changed from "job not found" to "failed to get job" to match the + // semantic. job, err := s.jobRepo.Get(ctx, id) if err != nil { - return fmt.Errorf("job not found: %w", err) + return fmt.Errorf("failed to get job: %w", err) } if job.Status != domain.JobStatusAwaitingApproval { @@ -435,9 +450,13 @@ func (s *JobService) ApproveJob(ctx context.Context, id, actor string) error { // owner is permitted to cancel their own pending renewal. actor is recorded // on the log line for audit attribution. func (s *JobService) RejectJob(ctx context.Context, id, reason, actor string) error { + // M-1 (P2): wrap-text correction — see ApproveJob for rationale. Same + // repo.Get sql.ErrNoRows → repository.ErrNotFound propagation; text changed + // from "job not found" to "failed to get job" so transient DB errors stay + // 500 while genuine 404s are picked up via errors.Is at errToStatus. job, err := s.jobRepo.Get(ctx, id) if err != nil { - return fmt.Errorf("job not found: %w", err) + return fmt.Errorf("failed to get job: %w", err) } if job.Status != domain.JobStatusAwaitingApproval { diff --git a/internal/service/notification.go b/internal/service/notification.go index cee1b01..9c3dca7 100644 --- a/internal/service/notification.go +++ b/internal/service/notification.go @@ -383,6 +383,15 @@ func (s *NotificationService) ListNotifications(ctx context.Context, page, perPa } // GetNotification returns a single notification (handler interface method). +// +// M-1 (P2): the not-found branch now wraps service.ErrNotFound via %w so the +// handler's errToStatus choke point dispatches to 404 via errors.Is. Pre-M-1 +// this returned a raw fmt.Errorf("notification not found") and the handler +// relied on a blanket "any error from this service call → 404" shortcut — +// which silently demoted transient DB failures (the List() error wrap above) +// to 404. Post-M-1 the handler uses errToStatus; the List() error wrap still +// surfaces as 500 (no sentinel attached), and only this explicit not-found +// branch maps to 404. func (s *NotificationService) GetNotification(ctx context.Context, id string) (*domain.NotificationEvent, error) { filter := &repository.NotificationFilter{ PerPage: 1, @@ -400,7 +409,7 @@ func (s *NotificationService) GetNotification(ctx context.Context, id string) (* } } - return nil, fmt.Errorf("notification not found") + return nil, fmt.Errorf("%w: notification %s", ErrNotFound, id) } // MarkAsRead marks a notification as read (handler interface method). @@ -568,6 +577,13 @@ func (s *NotificationService) RetryFailedNotifications(ctx context.Context) erro // "pg: deadlock detected" surfaces in the handler response and the // operator UI. The service has no fallback — a silent "success" that // didn't actually mutate the row would be worse than a loud error. +// +// M-1 (P2): the repo's Requeue (postgres/notification.go) now wraps +// repository.ErrNotFound on zero-rows-affected. Because this method wraps +// the repo error with %w, errors.Is(err, repository.ErrNotFound) walks +// cleanly through both wrap layers — so the handler's errToStatus choke +// point dispatches a genuine missing-row Requeue to 404 and a transient +// "pg: deadlock detected" to 500 without substring matching. func (s *NotificationService) RequeueNotification(ctx context.Context, id string) error { if err := s.notifRepo.Requeue(ctx, id); err != nil { return fmt.Errorf("failed to requeue notification: %w", err) diff --git a/internal/service/profile.go b/internal/service/profile.go index 86d4942..9c0294e 100644 --- a/internal/service/profile.go +++ b/internal/service/profile.go @@ -139,42 +139,51 @@ func (s *ProfileService) Get(ctx context.Context, id string) (*domain.Certificat } // validateProfile checks that a profile's configuration is valid. +// +// M-1: every return here wraps ErrValidation via %w so the handler's +// errToStatus choke point dispatches these to HTTP 400 via errors.Is without +// the substring-matching on "invalid"/"required"/"must be"/"cannot" that the +// pre-M-1 profile handler relied on. The composed Error() string still +// contains the original human-readable text (e.g. "RSA minimum key size must +// be at least 2048"), so the handler safely passes err.Error() through to the +// response body on the 400 arm. Substring assertions in profile_test.go +// continue to match for the same reason. func validateProfile(p *domain.CertificateProfile) error { if p.Name == "" { - return fmt.Errorf("profile name is required") + return fmt.Errorf("%w: profile name is required", ErrValidation) } if len(p.Name) > 255 { - return fmt.Errorf("profile name exceeds 255 characters") + return fmt.Errorf("%w: profile name exceeds 255 characters", ErrValidation) } // Validate key algorithms for _, alg := range p.AllowedKeyAlgorithms { if !domain.ValidKeyAlgorithms[alg.Algorithm] { - return fmt.Errorf("invalid key algorithm: %s (allowed: RSA, ECDSA, Ed25519)", alg.Algorithm) + return fmt.Errorf("%w: invalid key algorithm: %s (allowed: RSA, ECDSA, Ed25519)", ErrValidation, alg.Algorithm) } if alg.Algorithm == domain.KeyAlgorithmRSA && alg.MinSize < 2048 { - return fmt.Errorf("RSA minimum key size must be at least 2048, got %d", alg.MinSize) + return fmt.Errorf("%w: RSA minimum key size must be at least 2048, got %d", ErrValidation, alg.MinSize) } if alg.Algorithm == domain.KeyAlgorithmECDSA && alg.MinSize < 256 { - return fmt.Errorf("ECDSA minimum key size must be at least 256, got %d", alg.MinSize) + return fmt.Errorf("%w: ECDSA minimum key size must be at least 256, got %d", ErrValidation, alg.MinSize) } } // Validate EKUs for _, eku := range p.AllowedEKUs { if !domain.ValidEKUs[eku] { - return fmt.Errorf("invalid EKU: %s", eku) + return fmt.Errorf("%w: invalid EKU: %s", ErrValidation, eku) } } // Validate max TTL if p.MaxTTLSeconds < 0 { - return fmt.Errorf("max_ttl_seconds cannot be negative") + return fmt.Errorf("%w: max_ttl_seconds cannot be negative", ErrValidation) } // Validate short-lived consistency if p.AllowShortLived && p.MaxTTLSeconds >= 3600 { - return fmt.Errorf("allow_short_lived is true but max_ttl_seconds (%d) is >= 3600; short-lived certs must have TTL under 1 hour", p.MaxTTLSeconds) + return fmt.Errorf("%w: allow_short_lived is true but max_ttl_seconds (%d) is >= 3600; short-lived certs must have TTL under 1 hour", ErrValidation, p.MaxTTLSeconds) } return nil diff --git a/internal/service/scep.go b/internal/service/scep.go index 2e8d4b9..4ac9da1 100644 --- a/internal/service/scep.go +++ b/internal/service/scep.go @@ -84,10 +84,22 @@ func (s *SCEPService) PKCSReq(ctx context.Context, csrPEM string, challengePassw // this branch also protects future call sites (tests, library reuse, a // future REST-over-HTTPS wrapper) from silently accepting unauthenticated // CSRs. + // + // M-1 (P2): both failure modes now wrap service.ErrUnauthenticated via %w so + // the handler's errToStatus choke point dispatches them to HTTP 401 via + // errors.Is instead of relying on a `strings.Contains(err.Error(), "challenge + // password")` substring branch at handler/scep.go:174. This is a deliberate + // semantic correction: the pre-M-1 substring branch returned 403 Forbidden, + // but SCEP challenge-password failure is an authentication failure (the + // caller has no valid credential at the application layer), not an + // authorization denial (the caller has a credential but is not permitted), + // and 401 Unauthorized is the correct RFC 7235 status. The errToStatus doc + // block explicitly cites this site as the canonical ErrUnauthenticated use + // case. if s.challengePassword == "" { s.logger.Warn("SCEP enrollment rejected: server has no challenge password configured", "transaction_id", transactionID) - return nil, fmt.Errorf("SCEP challenge password not configured on server") + return nil, fmt.Errorf("%w: SCEP challenge password not configured on server", ErrUnauthenticated) } // Constant-time compare avoids leaking the configured secret through // response-time variance. ConstantTimeCompare returns 1 only when both @@ -96,7 +108,7 @@ func (s *SCEPService) PKCSReq(ctx context.Context, csrPEM string, challengePassw if subtle.ConstantTimeCompare([]byte(challengePassword), []byte(s.challengePassword)) != 1 { s.logger.Warn("SCEP enrollment rejected: invalid challenge password", "transaction_id", transactionID) - return nil, fmt.Errorf("invalid challenge password") + return nil, fmt.Errorf("%w: invalid challenge password", ErrUnauthenticated) } return s.processEnrollment(ctx, csrPEM, transactionID, "scep_pkcsreq") diff --git a/internal/service/scep_test.go b/internal/service/scep_test.go index dbd3d10..1e3673d 100644 --- a/internal/service/scep_test.go +++ b/internal/service/scep_test.go @@ -148,6 +148,13 @@ func TestSCEPService_PKCSReq_ChallengePassword_Invalid(t *testing.T) { if err == nil { t.Fatal("expected error for invalid challenge password") } + // M-1 (P2): pin the sentinel wrap so the handler's errToStatus dispatch + // (errors.Is → 401 Unauthorized) stays contract-stable. The parallel + // substring check is retained because it also pins the user-facing reason + // that service.ErrUnauthenticated composes with via fmt.Errorf("%w: ..."). + if !errors.Is(err, ErrUnauthenticated) { + t.Errorf("expected errors.Is(err, ErrUnauthenticated), got: %v", err) + } if !strings.Contains(err.Error(), "challenge password") { t.Errorf("expected 'challenge password' in error, got: %v", err) } @@ -171,6 +178,12 @@ func TestSCEPService_PKCSReq_ChallengePassword_EmptyServerConfigRejected(t *test if err == nil { t.Fatalf("expected rejection when server challenge password is empty (client=%q)", clientPassword) } + // M-1 (P2): pin the sentinel wrap on the "no shared secret configured" + // branch so both H-2 failure modes route to the same 401 arm at the + // handler. Preserves the explanatory substring assertion alongside. + if !errors.Is(err, ErrUnauthenticated) { + t.Errorf("expected errors.Is(err, ErrUnauthenticated) for client=%q, got: %v", clientPassword, err) + } if !strings.Contains(err.Error(), "not configured") { t.Errorf("expected 'not configured' in error, got: %v", err) } @@ -193,6 +206,12 @@ func TestSCEPService_PKCSReq_ChallengePassword_ConstantTimeLengthIndependence(t if err == nil { t.Fatalf("expected rejection for bad password %q", bad) } + // M-1 (P2): pin sentinel-wrap on every bad-password input so both the + // length-mismatch and content-mismatch ConstantTimeCompare paths map to + // the same 401 arm. + if !errors.Is(err, ErrUnauthenticated) { + t.Errorf("expected errors.Is(err, ErrUnauthenticated) for %q, got: %v", bad, err) + } if !strings.Contains(err.Error(), "invalid challenge password") { t.Errorf("expected 'invalid challenge password' for %q, got: %v", bad, err) } diff --git a/internal/service/target.go b/internal/service/target.go index 6343950..879824f 100644 --- a/internal/service/target.go +++ b/internal/service/target.go @@ -3,7 +3,6 @@ package service import ( "context" "encoding/json" - "errors" "fmt" "log/slog" "time" @@ -18,7 +17,15 @@ import ( // agent. The handler layer maps this to HTTP 400 via [errors.Is]. See C-002 in // cowork/certctl-coverage-gap-audit.md — this sentinel replaces a silent // Postgres FK violation (23503 → HTTP 500) with a deterministic 400. -var ErrAgentNotFound = errors.New("referenced agent does not exist") +// +// M-1 (P2): wraps [ErrValidation] via fmt.Errorf("%w", ...) so the single +// handler-layer errToStatus choke point resolves HTTP 400 via +// errors.Is(err, ErrValidation). "Referenced FK does not exist in POST body" +// is invalid input — not a missing-resource lookup — which is why this wraps +// ErrValidation (400) rather than ErrNotFound (404). Pre-M-1, target handlers +// classified this via strings.Contains(err.Error(), "not found"); a message +// reword would have silently promoted 400 to 500 with no observable signal. +var ErrAgentNotFound = fmt.Errorf("%w: referenced agent does not exist", ErrValidation) // validTargetTypes is the set of allowed target types for validation. var validTargetTypes = map[domain.TargetType]bool{ @@ -215,10 +222,20 @@ func (s *TargetService) Delete(ctx context.Context, id string, actor string) err // TestConnection tests a target's connectivity by checking the assigned agent's heartbeat status. // Target connectors run on agents, not on the server, so we can't instantiate a connector here. // Instead, we verify the agent is online and reachable. +// +// M-1 (P2): the pre-M-1 wraps were `"target not found: %w"` and +// `"assigned agent not found: %w"` on every targetRepo.Get / agentRepo.Get error +// — which coupled to pre-M-1 handler strings.Contains substring classifiers on +// "not found" and gave false positives on transient DB failures. Now both +// repos wrap only the genuine sql.ErrNoRows path with repository.ErrNotFound, +// so errors.Is walks the chain correctly: truly-missing → 404, everything else +// → 500. The wrap text is changed to "failed to get target" / "failed to get +// agent" to match the semantic (agent-misconfigured stays 500 — "assigned +// agent" is a data-integrity issue, not a consumer 404). func (s *TargetService) TestConnection(ctx context.Context, id string) error { target, err := s.targetRepo.Get(ctx, id) if err != nil { - return fmt.Errorf("target not found: %w", err) + return fmt.Errorf("failed to get target: %w", err) } if target.AgentID == "" { @@ -229,7 +246,7 @@ func (s *TargetService) TestConnection(ctx context.Context, id string) error { agent, err := s.agentRepo.Get(ctx, target.AgentID) if err != nil { s.updateTestStatus(ctx, target, "failed") - return fmt.Errorf("assigned agent not found: %w", err) + return fmt.Errorf("failed to get assigned agent: %w", err) } // I-004: AgentRepository.Get intentionally surfaces retired rows (the banner