mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-12 10:28:52 +00:00
WIP: M-1 handler sentinel error mapping (checkpoint before branch cleanup)
Uncommitted migration work at the time of branch cleanup. Tagged as checkpoint/m1-migration-wip so the commit survives git gc --prune=now. Session context: Phase 3 Part B+C of the M-1 sentinel error migration was in progress. 38 modified files, 4 new files (errors.go + errors_test.go in internal/service/ and internal/api/handler/). Resume from this commit via 'git checkout checkpoint/m1-migration-wip'.
This commit is contained in:
@@ -6,10 +6,12 @@ import (
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"github.com/lib/pq"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
)
|
||||
@@ -409,6 +411,16 @@ func (s *AgentService) RegisterAgent(ctx context.Context, agent domain.Agent) (*
|
||||
agent.LastHeartbeatAt = &now
|
||||
|
||||
if err := s.agentRepo.Create(ctx, &agent); err != nil {
|
||||
// M-1 (P2): explicit pg-23505 dispatch wraps unique-constraint
|
||||
// violations on the agent name with ErrConflict so the handler's
|
||||
// errors.Is(err, service.ErrConflict) arm fires 409 deterministically
|
||||
// instead of relying on the errToStatus *pq.Error SQLSTATE fallback.
|
||||
// Mirrors the duplicate-name paths in IssuerService and
|
||||
// RenewalPolicyService.
|
||||
var pgErr *pq.Error
|
||||
if errors.As(err, &pgErr) && pgErr.Code == "23505" {
|
||||
return nil, fmt.Errorf("%w: agent name already exists", ErrConflict)
|
||||
}
|
||||
return nil, fmt.Errorf("failed to register agent: %w", err)
|
||||
}
|
||||
return &agent, nil
|
||||
|
||||
@@ -143,12 +143,20 @@ func (s *AgentGroupService) ListMembers(ctx context.Context, id string) ([]domai
|
||||
}
|
||||
|
||||
// validateAgentGroup checks that an agent group's configuration is valid.
|
||||
//
|
||||
// M-1 (P2): every return wraps ErrValidation via %w so the handler's
|
||||
// errToStatus choke point dispatches these to HTTP 400 via errors.Is without
|
||||
// the substring-matching on "invalid"/"required" that the pre-M-1
|
||||
// agent_groups handler relied on at handler/agent_groups.go:126. The composed
|
||||
// Error() string still contains the original human-readable text, so the
|
||||
// handler safely passes err.Error() through to the response body on the 400
|
||||
// arm. Mirrors validateProfile.
|
||||
func validateAgentGroup(g *domain.AgentGroup) error {
|
||||
if g.Name == "" {
|
||||
return fmt.Errorf("agent group name is required")
|
||||
return fmt.Errorf("%w: agent group name is required", ErrValidation)
|
||||
}
|
||||
if len(g.Name) > 255 {
|
||||
return fmt.Errorf("agent group name exceeds 255 characters")
|
||||
return fmt.Errorf("%w: agent group name exceeds 255 characters", ErrValidation)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -26,8 +26,12 @@ import (
|
||||
// three cloud secret-manager discovery sources; retiring any of them orphans
|
||||
// its subsystem. The guard fires unconditionally — force=true does not bypass
|
||||
// it, because a sentinel is a structural invariant of the deployment, not
|
||||
// a piece of fleet state the operator owns. Handler maps this to HTTP 403.
|
||||
var ErrAgentIsSentinel = errors.New("agent is a reserved sentinel and cannot be retired")
|
||||
// a piece of fleet state the operator owns.
|
||||
//
|
||||
// M-1 (P2): wraps [ErrForbidden] so the handler-layer errToStatus choke point
|
||||
// resolves HTTP 403 via errors.Is. Call sites can still errors.Is against
|
||||
// ErrAgentIsSentinel for domain-specific branches.
|
||||
var ErrAgentIsSentinel = fmt.Errorf("%w: agent is a reserved sentinel and cannot be retired", ErrForbidden)
|
||||
|
||||
// ErrBlockedByDependencies is returned by RetireAgent when at least one of
|
||||
// (active targets, active certificates, pending jobs) referencing the agent
|
||||
@@ -35,15 +39,22 @@ var ErrAgentIsSentinel = errors.New("agent is a reserved sentinel and cannot be
|
||||
// a *BlockedByDependenciesError (see below), so handlers doing errors.As
|
||||
// can surface the per-bucket counts in the 409 body for operator
|
||||
// troubleshooting. Tests use errors.Is; handlers use errors.As.
|
||||
var ErrBlockedByDependencies = errors.New("agent has active downstream dependencies")
|
||||
//
|
||||
// M-1 (P2): wraps [ErrConflict] so errToStatus resolves HTTP 409 via
|
||||
// errors.Is, keeping the per-bucket errors.As path unchanged for the
|
||||
// detailed 409 body. BlockedByDependenciesError.Unwrap still returns the
|
||||
// domain-specific sentinel for chained Is checks.
|
||||
var ErrBlockedByDependencies = fmt.Errorf("%w: agent has active downstream dependencies", ErrConflict)
|
||||
|
||||
// ErrForceReasonRequired is returned when force=true is supplied without a
|
||||
// non-empty reason. The force escape hatch is deliberately chatty: operators
|
||||
// pulling the emergency cord must leave an auditable breadcrumb explaining
|
||||
// why a cascade was justified. Handler maps this to HTTP 400 so the operator
|
||||
// retries with --reason rather than silently skipping the guard. Checked
|
||||
// before any DB mutation to keep the no-reason path transactionally clean.
|
||||
var ErrForceReasonRequired = errors.New("force=true requires a non-empty reason")
|
||||
// why a cascade was justified. Operators must retry with --reason rather
|
||||
// than silently skipping the guard. Checked before any DB mutation to keep
|
||||
// the no-reason path transactionally clean.
|
||||
//
|
||||
// M-1 (P2): wraps [ErrValidation] so errToStatus resolves HTTP 400.
|
||||
var ErrForceReasonRequired = fmt.Errorf("%w: force=true requires a non-empty reason", ErrValidation)
|
||||
|
||||
// ErrAgentRetired is returned by Heartbeat (and any future agent-authenticated
|
||||
// call site) when a retired agent is still polling. The handler layer maps
|
||||
@@ -52,6 +63,12 @@ var ErrForceReasonRequired = errors.New("force=true requires a non-empty reason"
|
||||
// forever on a soft-retired identity. IsRetired() on the domain model is
|
||||
// the single source of truth; the sentinel exists so service and handler
|
||||
// callers can errors.Is against one symbol.
|
||||
//
|
||||
// M-1 (P2): deliberately NOT wrapped under any generic sentinel. 410 Gone is
|
||||
// semantically distinct from 403/404/409 — it signals a permanently-terminated
|
||||
// resource identity and drives deterministic agent-binary shutdown (see
|
||||
// cmd/agent/main.go). errToStatus tests it FIRST so it cannot be demoted by
|
||||
// the generic cascade.
|
||||
var ErrAgentRetired = errors.New("agent has been retired")
|
||||
|
||||
// BlockedByDependenciesError wraps ErrBlockedByDependencies and carries the
|
||||
|
||||
@@ -143,6 +143,15 @@ func (s *AuditService) ListAuditEvents(ctx context.Context, page, perPage int) (
|
||||
}
|
||||
|
||||
// GetAuditEvent returns a single audit event (handler interface method).
|
||||
//
|
||||
// M-1 (P2): the pre-M-1 zero-events path returned a bare
|
||||
// `fmt.Errorf("audit event not found")` and the handler dispatched via a
|
||||
// blanket `any error → 404 Audit event not found` shortcut. That silently
|
||||
// demoted transient DB failures from the auditRepo.List wrap (line 154 above)
|
||||
// to 404 Not Found. Now the zero-events path wraps ErrNotFound via %w so
|
||||
// errors.Is(err, service.ErrNotFound) picks up the real 404 at the handler's
|
||||
// errToStatus choke point, and the repo.List wrap surfaces as 500 with
|
||||
// server-side slog.Error capture (F-002 redacted-500 pattern preserved).
|
||||
func (s *AuditService) GetAuditEvent(ctx context.Context, id string) (*domain.AuditEvent, error) {
|
||||
filter := &repository.AuditFilter{
|
||||
ResourceID: id,
|
||||
@@ -155,7 +164,7 @@ func (s *AuditService) GetAuditEvent(ctx context.Context, id string) (*domain.Au
|
||||
}
|
||||
|
||||
if len(events) == 0 {
|
||||
return nil, fmt.Errorf("audit event not found")
|
||||
return nil, fmt.Errorf("%w: audit event not found", ErrNotFound)
|
||||
}
|
||||
|
||||
return events[0], nil
|
||||
|
||||
@@ -43,6 +43,15 @@ func (s *CAOperationsSvc) SetIssuerRegistry(registry *IssuerRegistry) {
|
||||
|
||||
// GenerateDERCRL generates a DER-encoded X.509 CRL for the given issuer.
|
||||
// Short-lived certificates (profile TTL < 1 hour) are excluded from the CRL.
|
||||
//
|
||||
// M-1 (P2): the pre-M-1 issuer-not-found path returned a bare
|
||||
// `fmt.Errorf("issuer not found: %s", issuerID)` with no sentinel wrap. A
|
||||
// pre-M-1 handler strings.Contains(err.Error(), "not found") classifier would
|
||||
// sweep both this path and any transient error whose text happened to mention
|
||||
// "not found" into 404. Post-M-1 this path wraps service.ErrNotFound via %w so
|
||||
// errors.Is picks up the genuine 404 at errToStatus, and truly-unexpected
|
||||
// errors (e.g., registry misconfigured) surface as 500 via the generic-error
|
||||
// fallback.
|
||||
func (s *CAOperationsSvc) GenerateDERCRL(ctx context.Context, issuerID string) ([]byte, error) {
|
||||
if s.revocationRepo == nil {
|
||||
return nil, fmt.Errorf("revocation repository not configured")
|
||||
@@ -53,7 +62,7 @@ func (s *CAOperationsSvc) GenerateDERCRL(ctx context.Context, issuerID string) (
|
||||
|
||||
issuerConn, ok := s.issuerRegistry.Get(issuerID)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("issuer not found: %s", issuerID)
|
||||
return nil, fmt.Errorf("%w: issuer %s", ErrNotFound, issuerID)
|
||||
}
|
||||
|
||||
// Scope the query to this issuer so the migration 000012 composite index
|
||||
@@ -98,6 +107,11 @@ func (s *CAOperationsSvc) GenerateDERCRL(ctx context.Context, issuerID string) (
|
||||
}
|
||||
|
||||
// GetOCSPResponse generates a signed OCSP response for the given certificate serial.
|
||||
//
|
||||
// M-1 (P2): see GenerateDERCRL above — same sentinel-wrap rationale applies.
|
||||
// The issuer-not-found path wraps service.ErrNotFound via %w so errors.Is
|
||||
// picks up the genuine 404 at errToStatus; transient/misconfiguration errors
|
||||
// surface as 500.
|
||||
func (s *CAOperationsSvc) GetOCSPResponse(ctx context.Context, issuerID string, serialHex string) ([]byte, error) {
|
||||
if s.revocationRepo == nil {
|
||||
return nil, fmt.Errorf("revocation repository not configured")
|
||||
@@ -108,7 +122,7 @@ func (s *CAOperationsSvc) GetOCSPResponse(ctx context.Context, issuerID string,
|
||||
|
||||
issuerConn, ok := s.issuerRegistry.Get(issuerID)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("issuer not found: %s", issuerID)
|
||||
return nil, fmt.Errorf("%w: issuer %s", ErrNotFound, issuerID)
|
||||
}
|
||||
|
||||
serial := new(big.Int)
|
||||
|
||||
@@ -376,10 +376,21 @@ func (s *CertificateService) CreateCertificate(ctx context.Context, cert domain.
|
||||
|
||||
// UpdateCertificate modifies a certificate (handler interface method).
|
||||
func (s *CertificateService) UpdateCertificate(ctx context.Context, id string, patch domain.ManagedCertificate) (*domain.ManagedCertificate, error) {
|
||||
// Fetch existing certificate so partial updates don't zero out fields
|
||||
// Fetch existing certificate so partial updates don't zero out fields.
|
||||
//
|
||||
// M-1 (P2): the pre-M-1 wrap was `"certificate not found: %w"` on every
|
||||
// certRepo.Get error — which coupled to the handler's strings.Contains
|
||||
// substring classifier on "not found" and gave false positives on transient
|
||||
// DB failures (connection refused, context deadline, etc.), demoting a 500
|
||||
// to a 404. Now the repo wraps only the genuine sql.ErrNoRows path with
|
||||
// repository.ErrNotFound (certificate.go Get), so the errors.Is walk
|
||||
// through the handler's errToStatus choke point discriminates correctly:
|
||||
// truly-missing → 404, everything else → 500 (the intended outcome). The
|
||||
// wrap text is changed from "certificate not found" to "failed to get
|
||||
// certificate" to match the semantic.
|
||||
existing, err := s.certRepo.Get(ctx, id)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("certificate not found: %w", err)
|
||||
return nil, fmt.Errorf("failed to get certificate: %w", err)
|
||||
}
|
||||
|
||||
// Merge non-zero fields from patch into existing
|
||||
@@ -501,10 +512,14 @@ func (s *CertificateService) GetOCSPResponse(ctx context.Context, issuerID strin
|
||||
|
||||
// GetCertificateDeployments returns all deployment targets for a certificate (M20).
|
||||
func (s *CertificateService) GetCertificateDeployments(ctx context.Context, certID string) ([]domain.DeploymentTarget, error) {
|
||||
// Verify certificate exists
|
||||
// Verify certificate exists. See M-1 (P2) note on UpdateCertificate for
|
||||
// the wrap-text correction rationale — same treatment applies here: the
|
||||
// repo's sql.ErrNoRows wrap with repository.ErrNotFound escapes cleanly
|
||||
// through fmt.Errorf("%w", ...) so errors.Is picks up the genuine 404
|
||||
// while transient DB errors correctly surface as 500.
|
||||
_, err := s.certRepo.Get(ctx, certID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("certificate not found: %w", err)
|
||||
return nil, fmt.Errorf("failed to get certificate: %w", err)
|
||||
}
|
||||
|
||||
if s.targetRepo == nil {
|
||||
|
||||
@@ -166,9 +166,20 @@ func (s *DiscoveryService) ClaimDiscovered(ctx context.Context, id string, manag
|
||||
return fmt.Errorf("failed to get discovered certificate: %w", err)
|
||||
}
|
||||
|
||||
// Verify the managed cert exists
|
||||
// Verify the managed cert exists.
|
||||
//
|
||||
// M-1 (P2): the pre-M-1 wrap was a bare `fmt.Errorf("managed certificate not
|
||||
// found: %s", managedCertID)` with no %w and no sentinel. That threw away the
|
||||
// underlying error entirely — a transient DB failure from certRepo.Get (e.g.,
|
||||
// connection dropped, query timeout) would be flattened into the same "not
|
||||
// found" string as a genuine sql.ErrNoRows, which the pre-M-1 handler
|
||||
// strings.Contains classifier then swept into 404. Post-M-1 we propagate the
|
||||
// underlying error via %w so errors.Is walks the chain correctly:
|
||||
// certRepo.Get wraps genuine sql.ErrNoRows with repository.ErrNotFound (see
|
||||
// postgres/certificate.go Get), and errToStatus dispatches that to 404;
|
||||
// transient errors surface as 500 via the generic-error fallback.
|
||||
if _, err := s.certRepo.Get(ctx, managedCertID); err != nil {
|
||||
return fmt.Errorf("managed certificate not found: %s", managedCertID)
|
||||
return fmt.Errorf("failed to get managed certificate %s: %w", managedCertID, err)
|
||||
}
|
||||
|
||||
if err := s.discoveryRepo.UpdateDiscoveredStatus(ctx, id, domain.DiscoveryStatusManaged, managedCertID); err != nil {
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
package service
|
||||
|
||||
import "errors"
|
||||
|
||||
// M-1 (P2) coverage-gap closure: generic service-layer error sentinels.
|
||||
//
|
||||
// Before M-1, API handlers classified service errors by substring-matching the
|
||||
// wrapped message text (`strings.Contains(err.Error(), "not found")` and
|
||||
// friends). That made every HTTP status mapping one `fmt.Errorf` reword away
|
||||
// from silently regressing — including the M-003 self-approval privilege
|
||||
// boundary, where `handler/jobs.go:174/:220` ignored the already-defined
|
||||
// ErrSelfApproval sentinel and relied on the literal string "cannot approve".
|
||||
//
|
||||
// These six generic sentinels form the type-safe surface the handler layer
|
||||
// dispatches against via `errors.Is`. Domain-specific sentinels (ErrSelfApproval,
|
||||
// ErrAgentIsSentinel, ErrBlockedByDependencies, ErrForceReasonRequired,
|
||||
// ErrAgentNotFound) are declared in their own topical files (job.go,
|
||||
// agent_retire.go, target.go) and wrap one of these generics via
|
||||
// `fmt.Errorf("%w: ...", ErrForbidden)`. The wrap chain lets call sites continue
|
||||
// to `errors.Is(err, ErrSelfApproval)` for domain-specific logic while the
|
||||
// handler's single choke point in `api/handler/errors.go` can match on the
|
||||
// generic sentinel to pick the HTTP status.
|
||||
//
|
||||
// Dispatch order in errToStatus matters — see the doc block at the top of
|
||||
// `internal/api/handler/errors.go`.
|
||||
//
|
||||
// ErrAgentRetired is deliberately NOT wrapped here. 410 Gone is semantically
|
||||
// distinct from 403/404/409 and must short-circuit the generic dispatch. Keep
|
||||
// its standalone declaration in agent_retire.go untouched; errToStatus tests
|
||||
// it first.
|
||||
var (
|
||||
// ErrNotFound indicates a lookup for a resource that does not exist.
|
||||
// Handlers translate this to HTTP 404.
|
||||
ErrNotFound = errors.New("not found")
|
||||
|
||||
// ErrValidation indicates malformed, missing, or out-of-range input from
|
||||
// the caller. Handlers translate this to HTTP 400.
|
||||
ErrValidation = errors.New("validation failed")
|
||||
|
||||
// ErrConflict indicates a state conflict: unique-constraint violation,
|
||||
// foreign-key dependency, or a state machine transition that is not
|
||||
// allowed from the current state. Handlers translate this to HTTP 409.
|
||||
ErrConflict = errors.New("conflict")
|
||||
|
||||
// ErrForbidden indicates an authorization / privilege-boundary denial.
|
||||
// The caller is authenticated but is not permitted to perform the action.
|
||||
// Handlers translate this to HTTP 403.
|
||||
ErrForbidden = errors.New("forbidden")
|
||||
|
||||
// ErrUnauthenticated indicates the caller failed to authenticate — most
|
||||
// commonly a SCEP challenge-password mismatch, where the transport itself
|
||||
// is valid but the application-layer credential is wrong. Handlers
|
||||
// translate this to HTTP 401.
|
||||
ErrUnauthenticated = errors.New("unauthenticated")
|
||||
|
||||
// ErrNotImplemented indicates the requested operation is defined but not
|
||||
// yet wired up — reserved for feature-flag-gated code paths. Handlers
|
||||
// translate this to HTTP 501.
|
||||
ErrNotImplemented = errors.New("not implemented")
|
||||
|
||||
// ErrUnprocessable indicates the request was well-formed and the
|
||||
// referenced resource exists, but server-side stored data could not be
|
||||
// processed — e.g., a certificate PEM in inventory that fails X.509
|
||||
// decoding because the stored blob is corrupt or was inserted with the
|
||||
// wrong encoding. Distinct from ErrValidation: ErrValidation means the
|
||||
// caller sent bad input (400), while ErrUnprocessable means the caller's
|
||||
// input was fine but our own data cannot satisfy the operation (422
|
||||
// Unprocessable Entity). Today the only call site is ExportPKCS12's parse
|
||||
// path in internal/service/export.go; keeping the sentinel generic so
|
||||
// other "stored-data-unparseable" paths can reuse it without inventing a
|
||||
// second variant.
|
||||
ErrUnprocessable = errors.New("unprocessable")
|
||||
)
|
||||
@@ -0,0 +1,110 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestGenericSentinels_IdentityDistinct guards against an accidental
|
||||
// `var ErrX = ErrY` alias where two generic sentinels share identity. Each
|
||||
// must be a distinct error value so errors.Is dispatch in errToStatus can
|
||||
// route them to different HTTP status codes.
|
||||
func TestGenericSentinels_IdentityDistinct(t *testing.T) {
|
||||
sentinels := []struct {
|
||||
name string
|
||||
err error
|
||||
}{
|
||||
{"ErrNotFound", ErrNotFound},
|
||||
{"ErrValidation", ErrValidation},
|
||||
{"ErrConflict", ErrConflict},
|
||||
{"ErrForbidden", ErrForbidden},
|
||||
{"ErrUnauthenticated", ErrUnauthenticated},
|
||||
{"ErrNotImplemented", ErrNotImplemented},
|
||||
}
|
||||
for i := range sentinels {
|
||||
for j := range sentinels {
|
||||
if i == j {
|
||||
continue
|
||||
}
|
||||
if errors.Is(sentinels[i].err, sentinels[j].err) {
|
||||
t.Errorf("%s and %s alias the same error value — each generic sentinel must be distinct",
|
||||
sentinels[i].name, sentinels[j].name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestWrappedSentinels_ChainWalk is the core M-1 invariant: wrapping a
|
||||
// domain-specific sentinel under a generic sentinel via fmt.Errorf("%w: ...")
|
||||
// must preserve BOTH identities on the wrap chain. Call sites that check
|
||||
// errors.Is(err, ErrSelfApproval) for domain logic AND the handler-layer
|
||||
// errToStatus that checks errors.Is(err, ErrForbidden) for the HTTP status
|
||||
// both need to succeed on the same error value.
|
||||
//
|
||||
// If this test fails, every handler dispatch that routes through errToStatus
|
||||
// is silently broken.
|
||||
func TestWrappedSentinels_ChainWalk(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
err error
|
||||
generic error
|
||||
}{
|
||||
{"ErrSelfApproval wraps ErrForbidden", ErrSelfApproval, ErrForbidden},
|
||||
{"ErrAgentIsSentinel wraps ErrForbidden", ErrAgentIsSentinel, ErrForbidden},
|
||||
{"ErrBlockedByDependencies wraps ErrConflict", ErrBlockedByDependencies, ErrConflict},
|
||||
{"ErrForceReasonRequired wraps ErrValidation", ErrForceReasonRequired, ErrValidation},
|
||||
{"ErrAgentNotFound wraps ErrValidation", ErrAgentNotFound, ErrValidation},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
if !errors.Is(c.err, c.generic) {
|
||||
t.Errorf("errors.Is(%v, %v) = false; want true", c.err, c.generic)
|
||||
}
|
||||
if !errors.Is(c.err, c.err) {
|
||||
t.Errorf("errors.Is(err, err) = false; want true — domain sentinel lost self-identity after wrap")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestErrAgentRetired_StandaloneGone locks the 410 Gone semantics in place.
|
||||
// ErrAgentRetired MUST NOT wrap any generic sentinel — 410 is semantically
|
||||
// distinct from 403/404/409 (permanently-terminated resource identity) and
|
||||
// the errToStatus dispatch tests it FIRST before any generic check. If this
|
||||
// test goes red because someone wrapped it under ErrForbidden or ErrNotFound,
|
||||
// the agent-binary shutdown behavior at cmd/agent/main.go:1291 silently
|
||||
// regresses.
|
||||
func TestErrAgentRetired_StandaloneGone(t *testing.T) {
|
||||
if errors.Is(ErrAgentRetired, ErrForbidden) {
|
||||
t.Error("ErrAgentRetired must NOT wrap ErrForbidden — 410 Gone would demote to 403")
|
||||
}
|
||||
if errors.Is(ErrAgentRetired, ErrNotFound) {
|
||||
t.Error("ErrAgentRetired must NOT wrap ErrNotFound — 410 Gone would demote to 404")
|
||||
}
|
||||
if errors.Is(ErrAgentRetired, ErrConflict) {
|
||||
t.Error("ErrAgentRetired must NOT wrap ErrConflict — 410 Gone would demote to 409")
|
||||
}
|
||||
if errors.Is(ErrAgentRetired, ErrValidation) {
|
||||
t.Error("ErrAgentRetired must NOT wrap ErrValidation — 410 Gone would demote to 400")
|
||||
}
|
||||
if !errors.Is(ErrAgentRetired, ErrAgentRetired) {
|
||||
t.Error("ErrAgentRetired lost self-identity")
|
||||
}
|
||||
}
|
||||
|
||||
// TestSentinels_SurviveDoubleWrap verifies that wrapping a sentinel-wrapped
|
||||
// error a SECOND time (e.g., a call site doing fmt.Errorf("%w: ctx-info",
|
||||
// ErrSelfApproval)) preserves both the domain sentinel and the generic
|
||||
// sentinel. This is critical because the errToStatus dispatch order places
|
||||
// ErrForbidden BEFORE ErrValidation — if double-wrapping broke the chain,
|
||||
// the M-003 gate would demote to 400.
|
||||
func TestSentinels_SurviveDoubleWrap(t *testing.T) {
|
||||
doubled := fmt.Errorf("%w: additional context from call site", ErrSelfApproval)
|
||||
if !errors.Is(doubled, ErrSelfApproval) {
|
||||
t.Error("double-wrapped ErrSelfApproval lost domain identity")
|
||||
}
|
||||
if !errors.Is(doubled, ErrForbidden) {
|
||||
t.Error("double-wrapped ErrSelfApproval lost ErrForbidden wrap — M-003 gate would demote to 500")
|
||||
}
|
||||
}
|
||||
+37
-11
@@ -38,16 +38,31 @@ type ExportPEMResult struct {
|
||||
|
||||
// ExportPEM returns the PEM-encoded certificate and chain for the latest version.
|
||||
func (s *ExportService) ExportPEM(ctx context.Context, certID string) (*ExportPEMResult, error) {
|
||||
// Verify certificate exists
|
||||
// Verify certificate exists.
|
||||
//
|
||||
// M-1 (P2): the pre-M-1 wrap was `"certificate not found: %w"` on every
|
||||
// certRepo.Get error — which gave the handler's strings.Contains(err.Error(),
|
||||
// "not found") check a false positive on transient DB failures (connection
|
||||
// refused, context deadline, etc.), demoting a 500 to a 404. Now the repo
|
||||
// wraps only the genuine sql.ErrNoRows path with repository.ErrNotFound
|
||||
// (certificate.go Get), so the errors.Is walk through the handler's
|
||||
// errToStatus discriminates correctly: truly-missing → 404, everything else
|
||||
// → 500 (the intended outcome). The wrap text is changed from "certificate
|
||||
// not found" to "failed to get certificate" to match the semantic.
|
||||
cert, err := s.certRepo.Get(ctx, certID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("certificate not found: %w", err)
|
||||
return nil, fmt.Errorf("failed to get certificate: %w", err)
|
||||
}
|
||||
|
||||
// Get latest version (contains the PEM chain)
|
||||
// Get latest version (contains the PEM chain).
|
||||
//
|
||||
// M-1 (P2): same wrap-text correction as above — pre-M-1 any
|
||||
// GetLatestVersion error surfaced as "no certificate version found",
|
||||
// which bled into the handler's substring classifier. Now the repo wraps
|
||||
// sql.ErrNoRows with repository.ErrNotFound; the wrap chain walks cleanly.
|
||||
version, err := s.certRepo.GetLatestVersion(ctx, certID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("no certificate version found: %w", err)
|
||||
return nil, fmt.Errorf("failed to get latest certificate version: %w", err)
|
||||
}
|
||||
|
||||
// Split PEM chain into leaf cert + chain
|
||||
@@ -73,26 +88,37 @@ func (s *ExportService) ExportPEM(ctx context.Context, certID string) (*ExportPE
|
||||
// The private key is NOT included — it lives on the agent and never touches the control plane.
|
||||
// The PKCS#12 bundle is encrypted with the provided password (can be empty for cert-only bundles).
|
||||
func (s *ExportService) ExportPKCS12(ctx context.Context, certID string, password string) ([]byte, error) {
|
||||
// Verify certificate exists
|
||||
// Verify certificate exists. See M-1 (P2) note on ExportPEM for the wrap-text
|
||||
// correction — same rationale applies here.
|
||||
cert, err := s.certRepo.Get(ctx, certID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("certificate not found: %w", err)
|
||||
return nil, fmt.Errorf("failed to get certificate: %w", err)
|
||||
}
|
||||
|
||||
// Get latest version
|
||||
// Get latest version. Same wrap-text correction as ExportPEM.
|
||||
version, err := s.certRepo.GetLatestVersion(ctx, certID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("no certificate version found: %w", err)
|
||||
return nil, fmt.Errorf("failed to get latest certificate version: %w", err)
|
||||
}
|
||||
|
||||
// Parse PEM chain into x509.Certificate objects
|
||||
// Parse PEM chain into x509.Certificate objects.
|
||||
//
|
||||
// M-1 (P2): wrap both parse-failure paths with ErrUnprocessable so the
|
||||
// handler's errToStatus choke point dispatches to 422 Unprocessable Entity
|
||||
// via errors.Is instead of the pre-M-1 two-term substring net
|
||||
// (`"cannot be parsed"|"no certificates found"`) at handler/export.go:101.
|
||||
// 422 is the correct status here — the caller's request is syntactically
|
||||
// fine; the stored PEM chain is what can't be processed. The composed
|
||||
// Error() string still carries the "certificate data cannot be parsed as
|
||||
// X.509"/"no certificates found in PEM chain" wording so server-side
|
||||
// slog.Error capture and any future 422 body propagation stay readable.
|
||||
certs, err := parsePEMCertificates(version.PEMChain)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("certificate data cannot be parsed as X.509: %w", err)
|
||||
return nil, fmt.Errorf("%w: certificate data cannot be parsed as X.509: %v", ErrUnprocessable, err)
|
||||
}
|
||||
|
||||
if len(certs) == 0 {
|
||||
return nil, fmt.Errorf("no certificates found in PEM chain")
|
||||
return nil, fmt.Errorf("%w: no certificates found in PEM chain", ErrUnprocessable)
|
||||
}
|
||||
|
||||
// Build PKCS#12 bundle: leaf cert + CA chain (no private key)
|
||||
|
||||
@@ -262,10 +262,18 @@ func (s *IssuerService) Delete(ctx context.Context, id string, actor string) err
|
||||
|
||||
// TestConnection tests the connection to an issuer by instantiating a throwaway
|
||||
// connector and calling ValidateConfig. Records the result in the database.
|
||||
//
|
||||
// M-1 (P2): the pre-M-1 wrap was `"issuer not found: %w"` on every
|
||||
// issuerRepo.Get error — which coupled to handler substring classifiers on
|
||||
// "not found" and could have demoted transient DB failures to 404. Now the
|
||||
// repo wraps only the genuine sql.ErrNoRows path with repository.ErrNotFound
|
||||
// (postgres/issuer.go Get), so errors.Is walks the wrap chain correctly:
|
||||
// truly-missing → 404, everything else → 500. The wrap text is changed from
|
||||
// "issuer not found" to "failed to get issuer" to match the semantic.
|
||||
func (s *IssuerService) TestConnection(ctx context.Context, id string) error {
|
||||
iss, err := s.issuerRepo.Get(ctx, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("issuer not found: %w", err)
|
||||
return fmt.Errorf("failed to get issuer: %w", err)
|
||||
}
|
||||
|
||||
// Get the decrypted config
|
||||
|
||||
+24
-5
@@ -3,7 +3,6 @@ package service
|
||||
import (
|
||||
"time"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
@@ -16,8 +15,16 @@ import (
|
||||
// approve a renewal job is the same person listed as the owner of the
|
||||
// underlying certificate. M-003 enforces separation of duties: the owner who
|
||||
// requested (or benefits from) the renewal must not be the same identity that
|
||||
// approves it. Handlers map this sentinel to HTTP 403 Forbidden.
|
||||
var ErrSelfApproval = errors.New("self-approval forbidden: actor is the owner of the certificate")
|
||||
// approves it.
|
||||
//
|
||||
// M-1 (P2): wraps [ErrForbidden] via fmt.Errorf("%w", ...) so the single
|
||||
// handler-layer errToStatus choke point resolves HTTP 403 via
|
||||
// errors.Is(err, ErrForbidden). Call sites can continue to
|
||||
// errors.Is(err, ErrSelfApproval) for domain-specific logging — both succeed
|
||||
// because %w builds a wrap chain. Pre-M-1, handler/jobs.go classified the
|
||||
// failure by substring-matching "cannot approve" in the message; a reword
|
||||
// would have silently demoted 403 to 500 with no observable signal.
|
||||
var ErrSelfApproval = fmt.Errorf("%w: self-approval forbidden: actor is the owner of the certificate", ErrForbidden)
|
||||
|
||||
// JobService manages job processing and status tracking.
|
||||
// It coordinates between the scheduler and various job-specific services.
|
||||
@@ -403,9 +410,17 @@ func (s *JobService) GetJob(ctx context.Context, id string) (*domain.Job, error)
|
||||
// duties. Callers must pass a non-empty actor; empty actor is treated as an
|
||||
// anonymous system caller and permitted (internal/system paths).
|
||||
func (s *JobService) ApproveJob(ctx context.Context, id, actor string) error {
|
||||
// M-1 (P2): the pre-M-1 wrap was `"job not found: %w"` on every jobRepo.Get
|
||||
// error — which coupled to the handler's strings.Contains substring classifier
|
||||
// on "not found" and gave false positives on transient DB failures, demoting a
|
||||
// 500 to a 404. Now the repo wraps only the genuine sql.ErrNoRows path with
|
||||
// repository.ErrNotFound (postgres/job.go Get), so errors.Is walks the wrap
|
||||
// chain correctly: truly-missing → 404, everything else → 500. The wrap text
|
||||
// is changed from "job not found" to "failed to get job" to match the
|
||||
// semantic.
|
||||
job, err := s.jobRepo.Get(ctx, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("job not found: %w", err)
|
||||
return fmt.Errorf("failed to get job: %w", err)
|
||||
}
|
||||
|
||||
if job.Status != domain.JobStatusAwaitingApproval {
|
||||
@@ -435,9 +450,13 @@ func (s *JobService) ApproveJob(ctx context.Context, id, actor string) error {
|
||||
// owner is permitted to cancel their own pending renewal. actor is recorded
|
||||
// on the log line for audit attribution.
|
||||
func (s *JobService) RejectJob(ctx context.Context, id, reason, actor string) error {
|
||||
// M-1 (P2): wrap-text correction — see ApproveJob for rationale. Same
|
||||
// repo.Get sql.ErrNoRows → repository.ErrNotFound propagation; text changed
|
||||
// from "job not found" to "failed to get job" so transient DB errors stay
|
||||
// 500 while genuine 404s are picked up via errors.Is at errToStatus.
|
||||
job, err := s.jobRepo.Get(ctx, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("job not found: %w", err)
|
||||
return fmt.Errorf("failed to get job: %w", err)
|
||||
}
|
||||
|
||||
if job.Status != domain.JobStatusAwaitingApproval {
|
||||
|
||||
@@ -383,6 +383,15 @@ func (s *NotificationService) ListNotifications(ctx context.Context, page, perPa
|
||||
}
|
||||
|
||||
// GetNotification returns a single notification (handler interface method).
|
||||
//
|
||||
// M-1 (P2): the not-found branch now wraps service.ErrNotFound via %w so the
|
||||
// handler's errToStatus choke point dispatches to 404 via errors.Is. Pre-M-1
|
||||
// this returned a raw fmt.Errorf("notification not found") and the handler
|
||||
// relied on a blanket "any error from this service call → 404" shortcut —
|
||||
// which silently demoted transient DB failures (the List() error wrap above)
|
||||
// to 404. Post-M-1 the handler uses errToStatus; the List() error wrap still
|
||||
// surfaces as 500 (no sentinel attached), and only this explicit not-found
|
||||
// branch maps to 404.
|
||||
func (s *NotificationService) GetNotification(ctx context.Context, id string) (*domain.NotificationEvent, error) {
|
||||
filter := &repository.NotificationFilter{
|
||||
PerPage: 1,
|
||||
@@ -400,7 +409,7 @@ func (s *NotificationService) GetNotification(ctx context.Context, id string) (*
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("notification not found")
|
||||
return nil, fmt.Errorf("%w: notification %s", ErrNotFound, id)
|
||||
}
|
||||
|
||||
// MarkAsRead marks a notification as read (handler interface method).
|
||||
@@ -568,6 +577,13 @@ func (s *NotificationService) RetryFailedNotifications(ctx context.Context) erro
|
||||
// "pg: deadlock detected" surfaces in the handler response and the
|
||||
// operator UI. The service has no fallback — a silent "success" that
|
||||
// didn't actually mutate the row would be worse than a loud error.
|
||||
//
|
||||
// M-1 (P2): the repo's Requeue (postgres/notification.go) now wraps
|
||||
// repository.ErrNotFound on zero-rows-affected. Because this method wraps
|
||||
// the repo error with %w, errors.Is(err, repository.ErrNotFound) walks
|
||||
// cleanly through both wrap layers — so the handler's errToStatus choke
|
||||
// point dispatches a genuine missing-row Requeue to 404 and a transient
|
||||
// "pg: deadlock detected" to 500 without substring matching.
|
||||
func (s *NotificationService) RequeueNotification(ctx context.Context, id string) error {
|
||||
if err := s.notifRepo.Requeue(ctx, id); err != nil {
|
||||
return fmt.Errorf("failed to requeue notification: %w", err)
|
||||
|
||||
@@ -139,42 +139,51 @@ func (s *ProfileService) Get(ctx context.Context, id string) (*domain.Certificat
|
||||
}
|
||||
|
||||
// validateProfile checks that a profile's configuration is valid.
|
||||
//
|
||||
// M-1: every return here wraps ErrValidation via %w so the handler's
|
||||
// errToStatus choke point dispatches these to HTTP 400 via errors.Is without
|
||||
// the substring-matching on "invalid"/"required"/"must be"/"cannot" that the
|
||||
// pre-M-1 profile handler relied on. The composed Error() string still
|
||||
// contains the original human-readable text (e.g. "RSA minimum key size must
|
||||
// be at least 2048"), so the handler safely passes err.Error() through to the
|
||||
// response body on the 400 arm. Substring assertions in profile_test.go
|
||||
// continue to match for the same reason.
|
||||
func validateProfile(p *domain.CertificateProfile) error {
|
||||
if p.Name == "" {
|
||||
return fmt.Errorf("profile name is required")
|
||||
return fmt.Errorf("%w: profile name is required", ErrValidation)
|
||||
}
|
||||
if len(p.Name) > 255 {
|
||||
return fmt.Errorf("profile name exceeds 255 characters")
|
||||
return fmt.Errorf("%w: profile name exceeds 255 characters", ErrValidation)
|
||||
}
|
||||
|
||||
// Validate key algorithms
|
||||
for _, alg := range p.AllowedKeyAlgorithms {
|
||||
if !domain.ValidKeyAlgorithms[alg.Algorithm] {
|
||||
return fmt.Errorf("invalid key algorithm: %s (allowed: RSA, ECDSA, Ed25519)", alg.Algorithm)
|
||||
return fmt.Errorf("%w: invalid key algorithm: %s (allowed: RSA, ECDSA, Ed25519)", ErrValidation, alg.Algorithm)
|
||||
}
|
||||
if alg.Algorithm == domain.KeyAlgorithmRSA && alg.MinSize < 2048 {
|
||||
return fmt.Errorf("RSA minimum key size must be at least 2048, got %d", alg.MinSize)
|
||||
return fmt.Errorf("%w: RSA minimum key size must be at least 2048, got %d", ErrValidation, alg.MinSize)
|
||||
}
|
||||
if alg.Algorithm == domain.KeyAlgorithmECDSA && alg.MinSize < 256 {
|
||||
return fmt.Errorf("ECDSA minimum key size must be at least 256, got %d", alg.MinSize)
|
||||
return fmt.Errorf("%w: ECDSA minimum key size must be at least 256, got %d", ErrValidation, alg.MinSize)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate EKUs
|
||||
for _, eku := range p.AllowedEKUs {
|
||||
if !domain.ValidEKUs[eku] {
|
||||
return fmt.Errorf("invalid EKU: %s", eku)
|
||||
return fmt.Errorf("%w: invalid EKU: %s", ErrValidation, eku)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate max TTL
|
||||
if p.MaxTTLSeconds < 0 {
|
||||
return fmt.Errorf("max_ttl_seconds cannot be negative")
|
||||
return fmt.Errorf("%w: max_ttl_seconds cannot be negative", ErrValidation)
|
||||
}
|
||||
|
||||
// Validate short-lived consistency
|
||||
if p.AllowShortLived && p.MaxTTLSeconds >= 3600 {
|
||||
return fmt.Errorf("allow_short_lived is true but max_ttl_seconds (%d) is >= 3600; short-lived certs must have TTL under 1 hour", p.MaxTTLSeconds)
|
||||
return fmt.Errorf("%w: allow_short_lived is true but max_ttl_seconds (%d) is >= 3600; short-lived certs must have TTL under 1 hour", ErrValidation, p.MaxTTLSeconds)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -84,10 +84,22 @@ func (s *SCEPService) PKCSReq(ctx context.Context, csrPEM string, challengePassw
|
||||
// this branch also protects future call sites (tests, library reuse, a
|
||||
// future REST-over-HTTPS wrapper) from silently accepting unauthenticated
|
||||
// CSRs.
|
||||
//
|
||||
// M-1 (P2): both failure modes now wrap service.ErrUnauthenticated via %w so
|
||||
// the handler's errToStatus choke point dispatches them to HTTP 401 via
|
||||
// errors.Is instead of relying on a `strings.Contains(err.Error(), "challenge
|
||||
// password")` substring branch at handler/scep.go:174. This is a deliberate
|
||||
// semantic correction: the pre-M-1 substring branch returned 403 Forbidden,
|
||||
// but SCEP challenge-password failure is an authentication failure (the
|
||||
// caller has no valid credential at the application layer), not an
|
||||
// authorization denial (the caller has a credential but is not permitted),
|
||||
// and 401 Unauthorized is the correct RFC 7235 status. The errToStatus doc
|
||||
// block explicitly cites this site as the canonical ErrUnauthenticated use
|
||||
// case.
|
||||
if s.challengePassword == "" {
|
||||
s.logger.Warn("SCEP enrollment rejected: server has no challenge password configured",
|
||||
"transaction_id", transactionID)
|
||||
return nil, fmt.Errorf("SCEP challenge password not configured on server")
|
||||
return nil, fmt.Errorf("%w: SCEP challenge password not configured on server", ErrUnauthenticated)
|
||||
}
|
||||
// Constant-time compare avoids leaking the configured secret through
|
||||
// response-time variance. ConstantTimeCompare returns 1 only when both
|
||||
@@ -96,7 +108,7 @@ func (s *SCEPService) PKCSReq(ctx context.Context, csrPEM string, challengePassw
|
||||
if subtle.ConstantTimeCompare([]byte(challengePassword), []byte(s.challengePassword)) != 1 {
|
||||
s.logger.Warn("SCEP enrollment rejected: invalid challenge password",
|
||||
"transaction_id", transactionID)
|
||||
return nil, fmt.Errorf("invalid challenge password")
|
||||
return nil, fmt.Errorf("%w: invalid challenge password", ErrUnauthenticated)
|
||||
}
|
||||
|
||||
return s.processEnrollment(ctx, csrPEM, transactionID, "scep_pkcsreq")
|
||||
|
||||
@@ -148,6 +148,13 @@ func TestSCEPService_PKCSReq_ChallengePassword_Invalid(t *testing.T) {
|
||||
if err == nil {
|
||||
t.Fatal("expected error for invalid challenge password")
|
||||
}
|
||||
// M-1 (P2): pin the sentinel wrap so the handler's errToStatus dispatch
|
||||
// (errors.Is → 401 Unauthorized) stays contract-stable. The parallel
|
||||
// substring check is retained because it also pins the user-facing reason
|
||||
// that service.ErrUnauthenticated composes with via fmt.Errorf("%w: ...").
|
||||
if !errors.Is(err, ErrUnauthenticated) {
|
||||
t.Errorf("expected errors.Is(err, ErrUnauthenticated), got: %v", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "challenge password") {
|
||||
t.Errorf("expected 'challenge password' in error, got: %v", err)
|
||||
}
|
||||
@@ -171,6 +178,12 @@ func TestSCEPService_PKCSReq_ChallengePassword_EmptyServerConfigRejected(t *test
|
||||
if err == nil {
|
||||
t.Fatalf("expected rejection when server challenge password is empty (client=%q)", clientPassword)
|
||||
}
|
||||
// M-1 (P2): pin the sentinel wrap on the "no shared secret configured"
|
||||
// branch so both H-2 failure modes route to the same 401 arm at the
|
||||
// handler. Preserves the explanatory substring assertion alongside.
|
||||
if !errors.Is(err, ErrUnauthenticated) {
|
||||
t.Errorf("expected errors.Is(err, ErrUnauthenticated) for client=%q, got: %v", clientPassword, err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "not configured") {
|
||||
t.Errorf("expected 'not configured' in error, got: %v", err)
|
||||
}
|
||||
@@ -193,6 +206,12 @@ func TestSCEPService_PKCSReq_ChallengePassword_ConstantTimeLengthIndependence(t
|
||||
if err == nil {
|
||||
t.Fatalf("expected rejection for bad password %q", bad)
|
||||
}
|
||||
// M-1 (P2): pin sentinel-wrap on every bad-password input so both the
|
||||
// length-mismatch and content-mismatch ConstantTimeCompare paths map to
|
||||
// the same 401 arm.
|
||||
if !errors.Is(err, ErrUnauthenticated) {
|
||||
t.Errorf("expected errors.Is(err, ErrUnauthenticated) for %q, got: %v", bad, err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "invalid challenge password") {
|
||||
t.Errorf("expected 'invalid challenge password' for %q, got: %v", bad, err)
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package service
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"time"
|
||||
@@ -18,7 +17,15 @@ import (
|
||||
// agent. The handler layer maps this to HTTP 400 via [errors.Is]. See C-002 in
|
||||
// cowork/certctl-coverage-gap-audit.md — this sentinel replaces a silent
|
||||
// Postgres FK violation (23503 → HTTP 500) with a deterministic 400.
|
||||
var ErrAgentNotFound = errors.New("referenced agent does not exist")
|
||||
//
|
||||
// M-1 (P2): wraps [ErrValidation] via fmt.Errorf("%w", ...) so the single
|
||||
// handler-layer errToStatus choke point resolves HTTP 400 via
|
||||
// errors.Is(err, ErrValidation). "Referenced FK does not exist in POST body"
|
||||
// is invalid input — not a missing-resource lookup — which is why this wraps
|
||||
// ErrValidation (400) rather than ErrNotFound (404). Pre-M-1, target handlers
|
||||
// classified this via strings.Contains(err.Error(), "not found"); a message
|
||||
// reword would have silently promoted 400 to 500 with no observable signal.
|
||||
var ErrAgentNotFound = fmt.Errorf("%w: referenced agent does not exist", ErrValidation)
|
||||
|
||||
// validTargetTypes is the set of allowed target types for validation.
|
||||
var validTargetTypes = map[domain.TargetType]bool{
|
||||
@@ -215,10 +222,20 @@ func (s *TargetService) Delete(ctx context.Context, id string, actor string) err
|
||||
// TestConnection tests a target's connectivity by checking the assigned agent's heartbeat status.
|
||||
// Target connectors run on agents, not on the server, so we can't instantiate a connector here.
|
||||
// Instead, we verify the agent is online and reachable.
|
||||
//
|
||||
// M-1 (P2): the pre-M-1 wraps were `"target not found: %w"` and
|
||||
// `"assigned agent not found: %w"` on every targetRepo.Get / agentRepo.Get error
|
||||
// — which coupled to pre-M-1 handler strings.Contains substring classifiers on
|
||||
// "not found" and gave false positives on transient DB failures. Now both
|
||||
// repos wrap only the genuine sql.ErrNoRows path with repository.ErrNotFound,
|
||||
// so errors.Is walks the chain correctly: truly-missing → 404, everything else
|
||||
// → 500. The wrap text is changed to "failed to get target" / "failed to get
|
||||
// agent" to match the semantic (agent-misconfigured stays 500 — "assigned
|
||||
// agent" is a data-integrity issue, not a consumer 404).
|
||||
func (s *TargetService) TestConnection(ctx context.Context, id string) error {
|
||||
target, err := s.targetRepo.Get(ctx, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("target not found: %w", err)
|
||||
return fmt.Errorf("failed to get target: %w", err)
|
||||
}
|
||||
|
||||
if target.AgentID == "" {
|
||||
@@ -229,7 +246,7 @@ func (s *TargetService) TestConnection(ctx context.Context, id string) error {
|
||||
agent, err := s.agentRepo.Get(ctx, target.AgentID)
|
||||
if err != nil {
|
||||
s.updateTestStatus(ctx, target, "failed")
|
||||
return fmt.Errorf("assigned agent not found: %w", err)
|
||||
return fmt.Errorf("failed to get assigned agent: %w", err)
|
||||
}
|
||||
|
||||
// I-004: AgentRepository.Get intentionally surfaces retired rows (the banner
|
||||
|
||||
Reference in New Issue
Block a user