From 05097903259d4fed0350cace364236e7059d7034 Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Sat, 2 May 2026 02:41:36 +0000 Subject: [PATCH] asyncpoll: refactor Sectigo / Entrust / GlobalSign to bounded polling (Phase 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of the #5 acquisition-readiness fix from the 2026-05-01 issuer coverage audit. Phase 1 (commit 711265b) shipped the shared asyncpoll package and refactored DigiCert as the reference. This commit applies the same pattern to the remaining three async-CA connectors and adds the operator-facing docs. Per-connector refactors: - Sectigo (sectigo.go): GetOrderStatus now wraps pollEnrollmentOnce in asyncpoll.Poll. The collectNotReady sentinel (cert approved by SCM but not yet retrievable from the collect endpoint) maps to StillPending and rides the backoff schedule rather than the prior "return pending immediately" branch. Added isPermanentStatusError helper to distinguish transient HTTP errors (5xx / 429 / network) from permanent ones (4xx / parse failure) — the wrapped checkStatus errors get triaged at the poll closure boundary. - Entrust (entrust.go): GetOrderStatus wraps pollEnrollmentOnce. The AWAITING_APPROVAL status maps to StillPending; operators using approval-pending workflows where humans approve enrollments should bump CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS to 86400 (24h) so a single scheduler tick can wait through the approval window. The default 10-minute deadline matches the other three connectors. - GlobalSign (globalsign.go): GetOrderStatus wraps pollCertificateOnce. GlobalSign tracks orders by serial number rather than order ID, but the polling shape is identical to the other three. Status-code triage matches DigiCert: 4xx (not 429) is permanent, 5xx / 429 / network is transient. Per-connector Config field added: - DigiCert.PollMaxWaitSeconds (env CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS) - Sectigo.PollMaxWaitSeconds (env CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS) - Entrust.PollMaxWaitSeconds (env CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS) - GlobalSign.PollMaxWaitSeconds (env CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS) internal/config/config.go env-var loaders updated for all four. Default is 600 seconds (10 minutes); zero falls back to the asyncpoll package default. Test-helper updates: every existing test that exercises the pending branch (collectNotReady, AWAITING_APPROVAL, status="pending", etc.) now sets PollMaxWaitSeconds=1 in its Config so the test doesn't block on the production-default 10-minute deadline. Tests that exercise permanent-error branches (404, 401, malformed JSON, etc.) continue to return immediately. Test sites updated: - buildSectigoConnector helper + GetOrderStatus_CollectNotReady test - buildEntrustConnector helper + GetOrderStatus_Pending test - buildGlobalsignConnector helper + GetOrderStatus_Pending test + the GetHTTPClient_NoMTLSCertPaths test (network failure now rides the backoff schedule rather than returning immediately) Documentation: - docs/async-polling.md: new operator reference covering the backoff schedule, status-code triage, the four env vars, failure modes, and where the implementation lives. Audit blocker citation included. - docs/connectors.md: per-issuer sections for DigiCert, Sectigo, Entrust, GlobalSign each gain the PollMaxWaitSeconds env var row and a cross-link to async-polling.md. Lint cleanup: simplified the isPermanentStatusError branch to satisfy staticcheck S1008 (single-line return for a final boolean check). Verified locally: - gofmt -l . clean - go vet ./... clean - staticcheck ./... clean - golangci-lint run --timeout 5m ./... → 0 issues - go test -short -count=1 across all 4 connector packages + config + asyncpoll: green Audit reference: cowork/issuer-coverage-audit-2026-05-01/RESULTS.md Top-10 fix #5 — Phase 2. --- docs/async-polling.md | 118 +++++++++++++++ docs/connectors.md | 8 +- internal/config/config.go | 72 ++++++--- internal/connector/issuer/entrust/entrust.go | 108 +++++++++++--- .../issuer/entrust/entrust_failure_test.go | 5 +- .../connector/issuer/entrust/entrust_test.go | 9 +- .../connector/issuer/globalsign/globalsign.go | 109 +++++++++++--- .../globalsign/globalsign_failure_test.go | 32 ++-- .../issuer/globalsign/globalsign_test.go | 7 +- internal/connector/issuer/sectigo/sectigo.go | 140 +++++++++++++++--- .../issuer/sectigo/sectigo_failure_test.go | 15 +- .../connector/issuer/sectigo/sectigo_test.go | 22 +-- 12 files changed, 523 insertions(+), 122 deletions(-) create mode 100644 docs/async-polling.md diff --git a/docs/async-polling.md b/docs/async-polling.md new file mode 100644 index 0000000..c3b058a --- /dev/null +++ b/docs/async-polling.md @@ -0,0 +1,118 @@ +# Async-CA Polling — Operator Reference + +Closes audit fix #5 from the 2026-05-01 issuer-coverage acquisition-readiness audit. + +## What this is + +Four issuer connectors talk to Certificate Authorities that issue +certificates **asynchronously** — `IssueCertificate` returns an order +ID immediately, and the caller (or scheduler) must call +`GetOrderStatus` later to retrieve the issued cert: + +- **DigiCert** (CertCentral) +- **Sectigo** (Certificate Manager) +- **Entrust** (Certificate Services / CA Gateway) +- **GlobalSign** (Atlas HVCA) + +Pre-fix, each connector's `GetOrderStatus` made one HTTP call per +invocation with no exponential backoff, no retry cap, and no deadline. +Under a renewal sweep, certctl would hammer the upstream CA's +rate-limit budget. A 429 response was treated as a hard error, +which then caused the scheduler to retry on the next tick — re-fanning +out the same call that just got rate-limited. + +Post-fix, `GetOrderStatus` blocks for up to `PollMaxWait` (default +10 minutes) doing **bounded internal polling**: + +``` +attempt 1 → wait 5s → attempt 2 → wait 15s → attempt 3 → wait 45s → +attempt 4 → wait 2m → attempt 5 → wait 5m → ... (capped at 5m) +``` + +±20% jitter applied at every wait so multiple certctl instances +never synchronize on the upstream CA's rate-limit window. The +`PollMaxWait` deadline is a hard cap; if the upstream still hasn't +completed by then, `GetOrderStatus` returns `StillPending` and the +scheduler can re-enqueue the job for a future tick. + +## Status-code triage + +Each connector classifies HTTP responses to drive polling decisions: + +| Response | Meaning | Decision | +|---|---|---| +| 2xx + status="issued"/"completed" | Cert ready | Done — return the cert | +| 2xx + status="pending"/"processing" | Still working | StillPending — keep polling | +| 2xx + status="rejected"/"denied"/"failed" | Permanent | Done — return `OrderStatus{Status:"failed"}` | +| 2xx + parse failure | Body is broken | Failed — return error | +| 4xx (404/400/401/403) | Permanent client error | Failed — return error | +| 429 (rate limited) | Transient | StillPending — keep polling with backoff | +| 5xx | Transient | StillPending — keep polling with backoff | +| Network / TLS error | Transient | StillPending — keep polling with backoff | + +## Operator tuning + +Each connector exposes a `PollMaxWaitSeconds` config field and +matching env var: + +| Connector | Env var | Default | +|---|---|---| +| DigiCert | `CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS` | 600 (10m) | +| Sectigo | `CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS` | 600 (10m) | +| Entrust | `CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS` | 600 (10m) | +| GlobalSign | `CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS` | 600 (10m) | + +Tune up (e.g., `86400` = 24 hours) for **Entrust approval-pending +workflows** where humans manually approve enrollments. Tune down (e.g., +`60`) for high-throughput environments that prefer to recycle the +scheduler tick rather than block one renewal goroutine for minutes. + +A value of 0 (or unset) falls back to the package default in +`internal/connector/issuer/asyncpoll`. + +## Failure modes + +**Upstream returns 429 forever.** The Poller respects the backoff +(5s → 15s → 45s → 2m → 5m), so a sustained 429 stream burns through +the full `PollMaxWait` budget with at most 7-8 attempts (instead of +~600 attempts at 1/sec). After `PollMaxWait` expires, `GetOrderStatus` +returns `StillPending`; the scheduler re-enqueues for the next tick. +The total request volume against the upstream is bounded by `tick +interval / minimum backoff` — typically 1-2 requests per minute even +under heavy load. + +**Sectigo `collectNotReady` sentinel.** When the SCM status endpoint +reports `Issued` but the cert collect endpoint isn't yet ready, the +old code branched into a special "pending" return. Now that branch +returns `StillPending` from the poll closure, so the cert collection +rides the same backoff schedule. + +**Entrust approval-pending.** The `AWAITING_APPROVAL` status maps to +`StillPending`. With the default `PollMaxWait=10m`, the scheduler +will re-enqueue once per tick if approval hasn't happened yet; with +`PollMaxWait=24h` the same renewal goroutine waits the full approval +window. Pick the latter when you have many approval-pending +enrollments per tick. + +## Where the implementation lives + +- `internal/connector/issuer/asyncpoll/asyncpoll.go` — shared `Poller` + with backoff math, jitter, deadline, and ctx-aware cancellation. +- `internal/connector/issuer/digicert/digicert.go` — + `pollOrderOnce` + `GetOrderStatus` orchestrator. +- `internal/connector/issuer/sectigo/sectigo.go` — + `pollEnrollmentOnce` + status-code permanence triage + (`isPermanentStatusError`). +- `internal/connector/issuer/entrust/entrust.go` — + `pollEnrollmentOnce` + approval-pending mapping. +- `internal/connector/issuer/globalsign/globalsign.go` — + `pollCertificateOnce` (serial-number tracking). +- `internal/connector/issuer/asyncpoll/asyncpoll_test.go` — 11 unit + tests covering happy path, transient-then-success, Failed + termination, MaxWait timeout, last-error wrap, ctx cancel, + multiplicative backoff, jitter bounds, defaults. + +## Audit blocker reference + +cowork/issuer-coverage-audit-2026-05-01/RESULTS.md, Top-10 fix #5 +(Part 1.5 finding #4: "No polling backoff for async CAs"). diff --git a/docs/connectors.md b/docs/connectors.md index 29e4f5e..4c713a7 100644 --- a/docs/connectors.md +++ b/docs/connectors.md @@ -436,8 +436,9 @@ The DigiCert connector integrates with DigiCert's CertCentral REST API for order | `CERTCTL_DIGICERT_ORG_ID` | — | DigiCert organization ID | | `CERTCTL_DIGICERT_PRODUCT_TYPE` | `ssl_basic` | Certificate product (e.g., `ssl_basic`, `ssl_plus`, `ssl_ev`) | | `CERTCTL_DIGICERT_BASE_URL` | `https://www.digicert.com/services/v2` | DigiCert API base URL | +| `CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS` | `600` | Bounded-polling deadline for `GetOrderStatus`. See [docs/async-polling.md](async-polling.md). | -The connector submits certificate orders to DigiCert's `/order/certificate/create` API. DV certificates may issue immediately; OV/EV certificates require validation (handled by DigiCert) and poll-based completion. The connector periodically checks order status via `/order/certificate/{order_id}` until the certificate is available. +The connector submits certificate orders to DigiCert's `/order/certificate/create` API. DV certificates may issue immediately; OV/EV certificates require validation (handled by DigiCert) and poll-based completion. `GetOrderStatus` runs bounded internal polling (5s/15s/45s/2m/5m capped, ±20% jitter, default 10-minute deadline) — see [async-polling.md](async-polling.md). **Authentication:** API key passed via `X-DC-DEVKEY` header, with organization ID in request body. @@ -460,8 +461,9 @@ The Sectigo connector integrates with Sectigo Certificate Manager's REST API for | `CERTCTL_SECTIGO_CERT_TYPE` | — | Certificate type ID (integer, from `/ssl/v1/types`) | | `CERTCTL_SECTIGO_TERM` | `365` | Certificate validity in days | | `CERTCTL_SECTIGO_BASE_URL` | `https://cert-manager.com/api` | Sectigo API base URL | +| `CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS` | `600` | Bounded-polling deadline for `GetOrderStatus`. The `collectNotReady` sentinel (cert approved but not yet retrievable) rides the same backoff schedule. See [docs/async-polling.md](async-polling.md). | -The connector submits certificate enrollments to Sectigo's `/ssl/v1/enroll` API. DV certificates may issue immediately; OV/EV certificates require validation (handled by Sectigo) and poll-based completion. The connector periodically checks enrollment status via `/ssl/v1/{sslId}` and downloads the PEM bundle via `/ssl/v1/collect/{sslId}/pem` when issued. +The connector submits certificate enrollments to Sectigo's `/ssl/v1/enroll` API. DV certificates may issue immediately; OV/EV certificates require validation (handled by Sectigo) and poll-based completion. `GetOrderStatus` runs bounded internal polling — see [async-polling.md](async-polling.md). **Authentication:** Three custom headers on every request — `customerUri`, `login`, and `password`. @@ -566,6 +568,7 @@ Entrust CA Gateway REST API with mutual TLS (mTLS) client certificate authentica | `CERTCTL_ENTRUST_CLIENT_KEY_PATH` | Yes | — | Path to mTLS client private key PEM | | `CERTCTL_ENTRUST_CA_ID` | Yes | — | Certificate Authority ID (from `GET /certificate-authorities`) | | `CERTCTL_ENTRUST_PROFILE_ID` | No | — | Optional enrollment profile ID | +| `CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS` | No | `600` (10m) | Bounded-polling deadline for `GetOrderStatus`. Approval-pending workflows where humans approve enrollments should bump to `86400` (24h) so a single tick can wait through the approval window. See [docs/async-polling.md](async-polling.md). | **Authentication:** Mutual TLS — the client certificate and key are loaded via `tls.LoadX509KeyPair()` and attached to the HTTP transport. No API key or token required. @@ -587,6 +590,7 @@ GlobalSign Atlas High Volume CA REST API with dual authentication: mTLS for the | `CERTCTL_GLOBALSIGN_CLIENT_CERT_PATH` | Yes | — | Path to mTLS client certificate PEM | | `CERTCTL_GLOBALSIGN_CLIENT_KEY_PATH` | Yes | — | Path to mTLS client private key PEM | | `CERTCTL_GLOBALSIGN_SERVER_CA_PATH` | No | system trust store | PEM bundle used to verify the Atlas API server certificate. Set this for private/lab Atlas deployments whose server TLS chain is not in the host's default trust bundle. | +| `CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS` | No | `600` (10m) | Bounded-polling deadline for `GetOrderStatus`. GlobalSign tracks orders by serial number rather than order ID; the polling shape is identical. See [docs/async-polling.md](async-polling.md). | **Authentication:** Dual — mTLS client certificate for TLS handshake plus `X-API-Key` and `X-API-Secret` headers on every request. diff --git a/internal/config/config.go b/internal/config/config.go index 0879fc9..f854c4d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -122,6 +122,13 @@ type EntrustConfig struct { // ProfileId is the optional enrollment profile identifier. // Setting: CERTCTL_ENTRUST_PROFILE_ID environment variable. ProfileId string + + // PollMaxWaitSeconds caps GetOrderStatus's bounded-polling + // deadline. Approval-pending workflows should bump this (e.g., + // 86400 = 24h) so a single tick can wait through the approval + // window. Default 600. Audit fix #5. + // Setting: CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS. + PollMaxWaitSeconds int } // GlobalSignConfig contains GlobalSign Atlas HVCA issuer connector configuration. @@ -154,6 +161,11 @@ type GlobalSignConfig struct { // present in the host's default trust bundle. // Setting: CERTCTL_GLOBALSIGN_SERVER_CA_PATH environment variable. ServerCAPath string + + // PollMaxWaitSeconds caps GetOrderStatus's bounded-polling + // deadline. Default 600 (10 minutes). Audit fix #5. + // Setting: CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS. + PollMaxWaitSeconds int } // EJBCAConfig contains EJBCA (Keyfactor) issuer connector configuration. @@ -439,6 +451,12 @@ type DigiCertConfig struct { // Default: "https://www.digicert.com/services/v2". // Setting: CERTCTL_DIGICERT_BASE_URL environment variable. BaseURL string + + // PollMaxWaitSeconds caps how long GetOrderStatus blocks doing + // internal exponential-backoff polling before returning. Default + // 600 (10 minutes); 0 falls back to asyncpoll default. + // Setting: CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS. Audit fix #5. + PollMaxWaitSeconds int } // SectigoConfig contains Sectigo Certificate Manager issuer connector configuration. @@ -476,6 +494,12 @@ type SectigoConfig struct { // Default: "https://cert-manager.com/api". // Setting: CERTCTL_SECTIGO_BASE_URL environment variable. BaseURL string + + // PollMaxWaitSeconds caps how long GetOrderStatus blocks doing + // internal exponential-backoff polling. Default 600. Sectigo's + // collectNotReady sentinel rides the backoff schedule. + // Setting: CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS. Audit fix #5. + PollMaxWaitSeconds int } // GoogleCASConfig contains Google Cloud Certificate Authority Service configuration. @@ -1532,19 +1556,21 @@ func Load() (*Config, error) { TTL: getEnv("CERTCTL_VAULT_TTL", "8760h"), }, DigiCert: DigiCertConfig{ - APIKey: getEnv("CERTCTL_DIGICERT_API_KEY", ""), - OrgID: getEnv("CERTCTL_DIGICERT_ORG_ID", ""), - ProductType: getEnv("CERTCTL_DIGICERT_PRODUCT_TYPE", "ssl_basic"), - BaseURL: getEnv("CERTCTL_DIGICERT_BASE_URL", "https://www.digicert.com/services/v2"), + APIKey: getEnv("CERTCTL_DIGICERT_API_KEY", ""), + OrgID: getEnv("CERTCTL_DIGICERT_ORG_ID", ""), + ProductType: getEnv("CERTCTL_DIGICERT_PRODUCT_TYPE", "ssl_basic"), + BaseURL: getEnv("CERTCTL_DIGICERT_BASE_URL", "https://www.digicert.com/services/v2"), + PollMaxWaitSeconds: getEnvInt("CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS", 0), }, Sectigo: SectigoConfig{ - CustomerURI: getEnv("CERTCTL_SECTIGO_CUSTOMER_URI", ""), - Login: getEnv("CERTCTL_SECTIGO_LOGIN", ""), - Password: getEnv("CERTCTL_SECTIGO_PASSWORD", ""), - OrgID: getEnvInt("CERTCTL_SECTIGO_ORG_ID", 0), - CertType: getEnvInt("CERTCTL_SECTIGO_CERT_TYPE", 0), - Term: getEnvInt("CERTCTL_SECTIGO_TERM", 365), - BaseURL: getEnv("CERTCTL_SECTIGO_BASE_URL", "https://cert-manager.com/api"), + CustomerURI: getEnv("CERTCTL_SECTIGO_CUSTOMER_URI", ""), + Login: getEnv("CERTCTL_SECTIGO_LOGIN", ""), + Password: getEnv("CERTCTL_SECTIGO_PASSWORD", ""), + OrgID: getEnvInt("CERTCTL_SECTIGO_ORG_ID", 0), + CertType: getEnvInt("CERTCTL_SECTIGO_CERT_TYPE", 0), + Term: getEnvInt("CERTCTL_SECTIGO_TERM", 365), + BaseURL: getEnv("CERTCTL_SECTIGO_BASE_URL", "https://cert-manager.com/api"), + PollMaxWaitSeconds: getEnvInt("CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS", 0), }, GoogleCAS: GoogleCASConfig{ Project: getEnv("CERTCTL_GOOGLE_CAS_PROJECT", ""), @@ -1561,19 +1587,21 @@ func Load() (*Config, error) { TemplateArn: getEnv("CERTCTL_AWS_PCA_TEMPLATE_ARN", ""), }, Entrust: EntrustConfig{ - APIUrl: getEnv("CERTCTL_ENTRUST_API_URL", ""), - ClientCertPath: getEnv("CERTCTL_ENTRUST_CLIENT_CERT_PATH", ""), - ClientKeyPath: getEnv("CERTCTL_ENTRUST_CLIENT_KEY_PATH", ""), - CAId: getEnv("CERTCTL_ENTRUST_CA_ID", ""), - ProfileId: getEnv("CERTCTL_ENTRUST_PROFILE_ID", ""), + APIUrl: getEnv("CERTCTL_ENTRUST_API_URL", ""), + ClientCertPath: getEnv("CERTCTL_ENTRUST_CLIENT_CERT_PATH", ""), + ClientKeyPath: getEnv("CERTCTL_ENTRUST_CLIENT_KEY_PATH", ""), + CAId: getEnv("CERTCTL_ENTRUST_CA_ID", ""), + ProfileId: getEnv("CERTCTL_ENTRUST_PROFILE_ID", ""), + PollMaxWaitSeconds: getEnvInt("CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS", 0), }, GlobalSign: GlobalSignConfig{ - APIUrl: getEnv("CERTCTL_GLOBALSIGN_API_URL", ""), - APIKey: getEnv("CERTCTL_GLOBALSIGN_API_KEY", ""), - APISecret: getEnv("CERTCTL_GLOBALSIGN_API_SECRET", ""), - ClientCertPath: getEnv("CERTCTL_GLOBALSIGN_CLIENT_CERT_PATH", ""), - ClientKeyPath: getEnv("CERTCTL_GLOBALSIGN_CLIENT_KEY_PATH", ""), - ServerCAPath: getEnv("CERTCTL_GLOBALSIGN_SERVER_CA_PATH", ""), + APIUrl: getEnv("CERTCTL_GLOBALSIGN_API_URL", ""), + APIKey: getEnv("CERTCTL_GLOBALSIGN_API_KEY", ""), + APISecret: getEnv("CERTCTL_GLOBALSIGN_API_SECRET", ""), + ClientCertPath: getEnv("CERTCTL_GLOBALSIGN_CLIENT_CERT_PATH", ""), + ClientKeyPath: getEnv("CERTCTL_GLOBALSIGN_CLIENT_KEY_PATH", ""), + ServerCAPath: getEnv("CERTCTL_GLOBALSIGN_SERVER_CA_PATH", ""), + PollMaxWaitSeconds: getEnvInt("CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS", 0), }, EJBCA: EJBCAConfig{ APIUrl: getEnv("CERTCTL_EJBCA_API_URL", ""), diff --git a/internal/connector/issuer/entrust/entrust.go b/internal/connector/issuer/entrust/entrust.go index 00b276f..154ba81 100644 --- a/internal/connector/issuer/entrust/entrust.go +++ b/internal/connector/issuer/entrust/entrust.go @@ -34,6 +34,7 @@ import ( "time" "github.com/shankar0123/certctl/internal/connector/issuer" + "github.com/shankar0123/certctl/internal/connector/issuer/asyncpoll" ) // Config represents the Entrust Certificate Services issuer connector configuration. @@ -58,6 +59,25 @@ type Config struct { // If set, constrains enrollments to use this profile. // Set via CERTCTL_ENTRUST_PROFILE_ID environment variable. ProfileId string `json:"profile_id,omitempty"` + + // PollMaxWaitSeconds caps how long GetOrderStatus blocks doing + // internal exponential-backoff polling before returning + // StillPending. Default 600 (10 minutes); operators using + // approval-pending workflows where humans approve enrollments + // should bump this to a higher value (e.g., 86400 = 24h) so a + // single scheduler tick can wait through the approval window. + // + // Set via CERTCTL_ENTRUST_POLL_MAX_WAIT_SECONDS. Audit fix #5. + PollMaxWaitSeconds int `json:"poll_max_wait_seconds,omitempty"` +} + +// pollMaxWait returns the configured PollMaxWait as a time.Duration, +// or the asyncpoll package default if unset. +func (c *Config) pollMaxWait() time.Duration { + if c.PollMaxWaitSeconds <= 0 { + return asyncpoll.DefaultMaxWait + } + return time.Duration(c.PollMaxWaitSeconds) * time.Second } // Connector implements the issuer.Connector interface for Entrust Certificate Services. @@ -336,56 +356,102 @@ func (c *Connector) RevokeCertificate(ctx context.Context, request issuer.Revoca return nil } -// GetOrderStatus checks the status of an Entrust enrollment. -// If the enrollment is "ISSUED", returns the certificate. -// If still pending, returns pending status for continued polling. +// GetOrderStatus checks the status of an Entrust enrollment using +// bounded internal polling (asyncpoll.Poll). One call blocks for up +// to PollMaxWait (default 10m; operators using approval-pending +// workflows can raise to 24h) doing exponential backoff with jitter. +// +// Audit fix #5 Phase 2: previously each scheduler tick made one HTTP +// call. Approval-pending enrollments now ride the backoff schedule +// rather than tight-loop polling. func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer.OrderStatus, error) { c.logger.Debug("checking Entrust enrollment status", "tracking_id", orderID) + var done *issuer.OrderStatus + var lastPendingMsg string + cfg := asyncpoll.Config{MaxWait: c.config.pollMaxWait()} + + res, err := asyncpoll.Poll(ctx, cfg, func(ctx context.Context) (asyncpoll.Result, error) { + status, result, pollErr := c.pollEnrollmentOnce(ctx, orderID) + if status != nil { + switch result { + case asyncpoll.Done: + done = status + case asyncpoll.StillPending: + if status.Message != nil { + lastPendingMsg = *status.Message + } + } + } + return result, pollErr + }) + + now := time.Now() + switch res { + case asyncpoll.Done: + return done, nil + case asyncpoll.Failed: + return nil, err + default: + msg := lastPendingMsg + if msg == "" { + msg = fmt.Sprintf("enrollment %s still pending after PollMaxWait", orderID) + } + return &issuer.OrderStatus{ + OrderID: orderID, + Status: "pending", + Message: &msg, + UpdatedAt: now, + }, nil + } +} + +// pollEnrollmentOnce makes one HTTP GET against the Entrust enrollment +// status endpoint. 4xx (not 429) is permanent; 5xx / 429 / network is +// transient and rides the backoff schedule. +func (c *Connector) pollEnrollmentOnce(ctx context.Context, orderID string) (*issuer.OrderStatus, asyncpoll.Result, error) { statusURL := fmt.Sprintf("%s/v1/certificate-authorities/%s/enrollments/%s", c.config.APIUrl, c.config.CAId, orderID) req, err := http.NewRequestWithContext(ctx, http.MethodGet, statusURL, nil) if err != nil { - return nil, fmt.Errorf("failed to create status request: %w", err) + return nil, asyncpoll.Failed, fmt.Errorf("failed to create status request: %w", err) } resp, err := c.httpClient.Do(req) if err != nil { - return nil, fmt.Errorf("Entrust status request failed: %w", err) + return nil, asyncpoll.StillPending, fmt.Errorf("Entrust status request failed: %w", err) } defer resp.Body.Close() respBody, err := io.ReadAll(resp.Body) if err != nil { - return nil, fmt.Errorf("failed to read status response: %w", err) + return nil, asyncpoll.StillPending, fmt.Errorf("failed to read status response: %w", err) } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("Entrust enrollment status returned %d: %s", resp.StatusCode, string(respBody)) + err := fmt.Errorf("Entrust enrollment status returned %d: %s", resp.StatusCode, string(respBody)) + if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode >= 500 { + return nil, asyncpoll.StillPending, err + } + return nil, asyncpoll.Failed, err } var statusResp enrollmentStatusResponse if err := json.Unmarshal(respBody, &statusResp); err != nil { - return nil, fmt.Errorf("failed to parse status response: %w", err) + return nil, asyncpoll.Failed, fmt.Errorf("failed to parse status response: %w", err) } now := time.Now() - switch statusResp.Status { case "ISSUED": if statusResp.Certificate == "" { - return nil, fmt.Errorf("enrollment is ISSUED but certificate is missing") + return nil, asyncpoll.Failed, fmt.Errorf("enrollment is ISSUED but certificate is missing") } - serial, notBefore, notAfter, err := parseCertMetadata(statusResp.Certificate) if err != nil { - return nil, fmt.Errorf("failed to parse certificate metadata: %w", err) + return nil, asyncpoll.Failed, fmt.Errorf("failed to parse certificate metadata: %w", err) } - - c.logger.Info("Entrust enrollment completed", - "tracking_id", orderID, - "serial", serial) - + c.logger.Info("Entrust enrollment completed", "tracking_id", orderID, "serial", serial) return &issuer.OrderStatus{ OrderID: orderID, Status: "completed", @@ -395,7 +461,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer NotBefore: ¬Before, NotAfter: ¬After, UpdatedAt: now, - }, nil + }, asyncpoll.Done, nil case "PENDING", "PROCESSING", "AWAITING_APPROVAL": msg := fmt.Sprintf("enrollment %s is %s", orderID, statusResp.Status) @@ -404,7 +470,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "pending", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.StillPending, nil case "REJECTED", "DENIED", "FAILED": msg := fmt.Sprintf("enrollment %s was %s", orderID, statusResp.Status) @@ -413,7 +479,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "failed", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.Done, nil default: msg := fmt.Sprintf("unknown enrollment status: %s", statusResp.Status) @@ -422,7 +488,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "pending", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.StillPending, nil } } diff --git a/internal/connector/issuer/entrust/entrust_failure_test.go b/internal/connector/issuer/entrust/entrust_failure_test.go index cd49289..c7f22eb 100644 --- a/internal/connector/issuer/entrust/entrust_failure_test.go +++ b/internal/connector/issuer/entrust/entrust_failure_test.go @@ -21,8 +21,9 @@ import ( func buildEntrustConnector(t *testing.T, baseURL string) *Connector { t.Helper() cfg := &Config{ - APIUrl: baseURL, - CAId: "test-ca-id", + APIUrl: baseURL, + CAId: "test-ca-id", + PollMaxWaitSeconds: 1, // keep async-pending tests fast } httpClient := &http.Client{Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}} //nolint:gosec return NewWithHTTPClient(cfg, slog.Default(), httpClient) diff --git a/internal/connector/issuer/entrust/entrust_test.go b/internal/connector/issuer/entrust/entrust_test.go index fa148ff..a812d04 100644 --- a/internal/connector/issuer/entrust/entrust_test.go +++ b/internal/connector/issuer/entrust/entrust_test.go @@ -344,10 +344,11 @@ func TestEntrustConnector(t *testing.T) { defer srv.Close() config := &entrust.Config{ - APIUrl: srv.URL, - ClientCertPath: "/dev/null", - ClientKeyPath: "/dev/null", - CAId: "ca-123", + APIUrl: srv.URL, + ClientCertPath: "/dev/null", + ClientKeyPath: "/dev/null", + CAId: "ca-123", + PollMaxWaitSeconds: 1, // keep async-pending tests fast } connector := entrust.NewWithHTTPClient(config, logger, srv.Client()) diff --git a/internal/connector/issuer/globalsign/globalsign.go b/internal/connector/issuer/globalsign/globalsign.go index d3e007f..5609661 100644 --- a/internal/connector/issuer/globalsign/globalsign.go +++ b/internal/connector/issuer/globalsign/globalsign.go @@ -39,6 +39,7 @@ import ( "time" "github.com/shankar0123/certctl/internal/connector/issuer" + "github.com/shankar0123/certctl/internal/connector/issuer/asyncpoll" ) // Config represents the GlobalSign Atlas HVCA issuer connector configuration. @@ -73,6 +74,24 @@ type Config struct { // internal CA not present in the host's default trust bundle. // Set via CERTCTL_GLOBALSIGN_SERVER_CA_PATH environment variable. ServerCAPath string `json:"server_ca_path,omitempty"` + + // PollMaxWaitSeconds caps how long GetOrderStatus blocks doing + // internal exponential-backoff polling before returning + // StillPending. Default 600 (10 minutes). GlobalSign tracks + // orders by serial number rather than order ID, but the polling + // shape is identical. + // + // Set via CERTCTL_GLOBALSIGN_POLL_MAX_WAIT_SECONDS. Audit fix #5. + PollMaxWaitSeconds int `json:"poll_max_wait_seconds,omitempty"` +} + +// pollMaxWait returns the configured PollMaxWait as a time.Duration, +// or the asyncpoll package default if unset. +func (c *Config) pollMaxWait() time.Duration { + if c.PollMaxWaitSeconds <= 0 { + return asyncpoll.DefaultMaxWait + } + return time.Duration(c.PollMaxWaitSeconds) * time.Second } // Connector implements the issuer.Connector interface for GlobalSign Atlas HVCA. @@ -423,21 +442,72 @@ func (c *Connector) RevokeCertificate(ctx context.Context, request issuer.Revoca return nil } -// GetOrderStatus checks the status of a GlobalSign certificate order by serial number. -// Polls the certificate endpoint; when status is "issued", downloads and returns the cert. +// GetOrderStatus checks the status of a GlobalSign certificate order +// by serial number, using bounded internal polling (asyncpoll.Poll). +// One call blocks for up to PollMaxWait (default 10m) doing +// exponential backoff with jitter; returns Done with the cert, +// Failed with the rejection reason, or StillPending if the deadline +// expires (caller can re-invoke). +// +// Audit fix #5 Phase 2: previously each scheduler tick made one HTTP +// call against an unready order. GlobalSign tracks orders by serial +// number rather than order ID, but the polling shape is identical. func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer.OrderStatus, error) { c.logger.Debug("checking GlobalSign certificate status", "serial", orderID) + var done *issuer.OrderStatus + var lastPendingMsg string + cfg := asyncpoll.Config{MaxWait: c.config.pollMaxWait()} + + res, err := asyncpoll.Poll(ctx, cfg, func(ctx context.Context) (asyncpoll.Result, error) { + status, result, pollErr := c.pollCertificateOnce(ctx, orderID) + if status != nil { + switch result { + case asyncpoll.Done: + done = status + case asyncpoll.StillPending: + if status.Message != nil { + lastPendingMsg = *status.Message + } + } + } + return result, pollErr + }) + + now := time.Now() + switch res { + case asyncpoll.Done: + return done, nil + case asyncpoll.Failed: + return nil, err + default: + msg := lastPendingMsg + if msg == "" { + msg = fmt.Sprintf("certificate %s still pending after PollMaxWait", orderID) + } + return &issuer.OrderStatus{ + OrderID: orderID, + Status: "pending", + Message: &msg, + UpdatedAt: now, + }, nil + } +} + +// pollCertificateOnce makes one HTTP GET against the GlobalSign Atlas +// HVCA certificate status endpoint and translates the response into +// an asyncpoll.Result. 4xx (not 429) is permanent; 5xx / 429 / network +// is transient. +func (c *Connector) pollCertificateOnce(ctx context.Context, orderID string) (*issuer.OrderStatus, asyncpoll.Result, error) { client, err := c.getHTTPClient(ctx) if err != nil { - return nil, err + return nil, asyncpoll.Failed, err } - // GlobalSign status endpoint: GET /v2/certificates/{serial} statusURL := strings.TrimSuffix(c.config.APIUrl, "/") + fmt.Sprintf("/v2/certificates/%s", orderID) req, err := http.NewRequestWithContext(ctx, http.MethodGet, statusURL, nil) if err != nil { - return nil, fmt.Errorf("failed to create status request: %w", err) + return nil, asyncpoll.Failed, fmt.Errorf("failed to create status request: %w", err) } req.Header.Set("ApiKey", c.config.APIKey) @@ -446,40 +516,39 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer resp, err := client.Do(req) if err != nil { - return nil, fmt.Errorf("GlobalSign status request failed: %w", err) + return nil, asyncpoll.StillPending, fmt.Errorf("GlobalSign status request failed: %w", err) } defer resp.Body.Close() respBody, err := io.ReadAll(resp.Body) if err != nil { - return nil, fmt.Errorf("failed to read status response: %w", err) + return nil, asyncpoll.StillPending, fmt.Errorf("failed to read status response: %w", err) } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("GlobalSign certificate status returned %d: %s", resp.StatusCode, string(respBody)) + statusErr := fmt.Errorf("GlobalSign certificate status returned %d: %s", resp.StatusCode, string(respBody)) + if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode >= 500 { + return nil, asyncpoll.StillPending, statusErr + } + return nil, asyncpoll.Failed, statusErr } var certResp certificateResponse if err := json.Unmarshal(respBody, &certResp); err != nil { - return nil, fmt.Errorf("failed to parse status response: %w", err) + return nil, asyncpoll.Failed, fmt.Errorf("failed to parse status response: %w", err) } now := time.Now() - switch certResp.Status { case "issued": if certResp.Certificate == "" { - return nil, fmt.Errorf("certificate status is issued but certificate PEM is missing") + return nil, asyncpoll.Failed, fmt.Errorf("certificate status is issued but certificate PEM is missing") } - notBefore, notAfter, err := parseCertDates(certResp.Certificate) if err != nil { c.logger.Warn("failed to parse certificate dates", "error", err) } - - c.logger.Info("GlobalSign certificate ready", - "serial", orderID) - + c.logger.Info("GlobalSign certificate ready", "serial", orderID) return &issuer.OrderStatus{ OrderID: orderID, Status: "completed", @@ -489,7 +558,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer NotBefore: ¬Before, NotAfter: ¬After, UpdatedAt: now, - }, nil + }, asyncpoll.Done, nil case "pending", "processing": msg := fmt.Sprintf("certificate %s is %s", orderID, certResp.Status) @@ -498,7 +567,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "pending", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.StillPending, nil case "rejected", "denied", "failed": msg := fmt.Sprintf("certificate %s was %s", orderID, certResp.Status) @@ -507,7 +576,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "failed", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.Done, nil default: msg := fmt.Sprintf("unknown certificate status: %s", certResp.Status) @@ -516,7 +585,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "pending", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.StillPending, nil } } diff --git a/internal/connector/issuer/globalsign/globalsign_failure_test.go b/internal/connector/issuer/globalsign/globalsign_failure_test.go index 3aba85f..bc94fd4 100644 --- a/internal/connector/issuer/globalsign/globalsign_failure_test.go +++ b/internal/connector/issuer/globalsign/globalsign_failure_test.go @@ -19,9 +19,10 @@ import ( func buildGlobalsignConnector(t *testing.T, baseURL string) *globalsign.Connector { t.Helper() cfg := &globalsign.Config{ - APIUrl: baseURL, - APIKey: "k", - APISecret: "s", + APIUrl: baseURL, + APIKey: "k", + APISecret: "s", + PollMaxWaitSeconds: 1, // keep async-pending tests fast } // Use NewWithHTTPClient with a test client so getHTTPClient short-circuits // (no mTLS cert loading). Custom transport is required so the @@ -125,19 +126,26 @@ func TestGlobalsign_GetOrderStatus_IssuedWithMalformedPEM_NonFatalParseDateWarni func TestGlobalsign_GetHTTPClient_NoMTLSCertPaths_ReturnsClientAsIs(t *testing.T) { // When ClientCertPath and ClientKeyPath are both empty, getHTTPClient // returns httpClient as-is — exercises that branch. + // + // PollMaxWaitSeconds=1 keeps this test fast: a network failure on + // the invalid host is now treated as transient by the bounded- + // polling Poller, so without the deadline the call blocks for + // the production-default 10 minutes. cfg := &globalsign.Config{ - APIUrl: "http://example.invalid", - APIKey: "k", - APISecret: "s", + APIUrl: "http://example.invalid", + APIKey: "k", + APISecret: "s", + PollMaxWaitSeconds: 1, // no cert paths } c := globalsign.NewWithHTTPClient(cfg, slog.Default(), &http.Client{}) - // GetOrderStatus will fail at HTTP do (invalid host), but getHTTPClient - // will have been exercised through the no-mTLS branch. - _, err := c.GetOrderStatus(context.Background(), "x") - if err == nil { - t.Errorf("expected error from invalid host") - } + // GetOrderStatus blocks briefly (1s) due to bounded polling, then + // returns a pending OrderStatus (transient network err did not + // become a permanent failure). The test exercises the + // no-mTLS branch in getHTTPClient — the post-poll status doesn't + // matter; we just need GetOrderStatus to be invoked through the + // branch. + _, _ = c.GetOrderStatus(context.Background(), "x") } func TestGlobalsign_GetHTTPClient_MTLSPathConfigured_LoadsKeyPair(t *testing.T) { diff --git a/internal/connector/issuer/globalsign/globalsign_test.go b/internal/connector/issuer/globalsign/globalsign_test.go index 6676c82..1b1a74d 100644 --- a/internal/connector/issuer/globalsign/globalsign_test.go +++ b/internal/connector/issuer/globalsign/globalsign_test.go @@ -355,9 +355,10 @@ func TestGlobalSignConnector(t *testing.T) { defer mockServer.Close() config := &globalsign.Config{ - APIUrl: mockServer.URL, - APIKey: "gs-test-key", - APISecret: "gs-test-secret", + APIUrl: mockServer.URL, + APIKey: "gs-test-key", + APISecret: "gs-test-secret", + PollMaxWaitSeconds: 1, // keep async-pending tests fast } connector := globalsign.NewWithHTTPClient(config, logger, httpClient) diff --git a/internal/connector/issuer/sectigo/sectigo.go b/internal/connector/issuer/sectigo/sectigo.go index 677a0a7..d1580d8 100644 --- a/internal/connector/issuer/sectigo/sectigo.go +++ b/internal/connector/issuer/sectigo/sectigo.go @@ -37,6 +37,7 @@ import ( "time" "github.com/shankar0123/certctl/internal/connector/issuer" + "github.com/shankar0123/certctl/internal/connector/issuer/asyncpoll" ) // Config represents the Sectigo Certificate Manager issuer connector configuration. @@ -69,6 +70,25 @@ type Config struct { // Default: "https://cert-manager.com/api". // Set via CERTCTL_SECTIGO_BASE_URL environment variable. BaseURL string `json:"base_url"` + + // PollMaxWaitSeconds caps how long GetOrderStatus blocks doing + // internal exponential-backoff polling before returning + // StillPending. Default 600 (10 minutes). Sectigo's + // collectNotReady sentinel maps to StillPending so recently- + // issued certs that aren't yet retrievable get the backoff + // schedule rather than tight-loop polling. + // + // Set via CERTCTL_SECTIGO_POLL_MAX_WAIT_SECONDS. Audit fix #5. + PollMaxWaitSeconds int `json:"poll_max_wait_seconds,omitempty"` +} + +// pollMaxWait returns the configured PollMaxWait as a time.Duration, +// or the asyncpoll package default if unset. +func (c *Config) pollMaxWait() time.Duration { + if c.PollMaxWaitSeconds <= 0 { + return asyncpoll.DefaultMaxWait + } + return time.Duration(c.PollMaxWaitSeconds) * time.Second } // Connector implements the issuer.Connector interface for Sectigo Certificate Manager. @@ -355,30 +375,94 @@ func (c *Connector) RevokeCertificate(ctx context.Context, request issuer.Revoca return nil } -// GetOrderStatus checks the status of a Sectigo certificate enrollment. -// If the enrollment is "Issued", downloads the certificate and returns it. -// If still pending, returns pending status for continued polling. +// GetOrderStatus checks the status of a Sectigo certificate enrollment +// using bounded internal polling (asyncpoll.Poll). One call blocks for +// up to PollMaxWait (default 10m) doing exponential backoff with +// jitter; returns Done with the cert, Failed with the rejection +// reason, or StillPending if the deadline expires (caller can +// re-invoke). +// +// Audit fix #5 Phase 2: previously each scheduler tick made one HTTP +// call against an unready order. Sectigo's collectNotReady sentinel +// (cert approved but not yet generated) now maps to StillPending and +// rides the backoff schedule rather than tight-loop polling. func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer.OrderStatus, error) { c.logger.Debug("checking Sectigo enrollment status", "ssl_id", orderID) - // Parse sslId from string + // Parse sslId from string once at entry — invalid ID is a + // permanent error, no point polling. var sslId int if _, err := fmt.Sscanf(orderID, "%d", &sslId); err != nil { return nil, fmt.Errorf("invalid Sectigo ssl_id: %s", orderID) } + var done *issuer.OrderStatus + var lastPendingMsg string + cfg := asyncpoll.Config{MaxWait: c.config.pollMaxWait()} + + res, err := asyncpoll.Poll(ctx, cfg, func(ctx context.Context) (asyncpoll.Result, error) { + status, result, pollErr := c.pollEnrollmentOnce(ctx, sslId, orderID) + if status != nil { + switch result { + case asyncpoll.Done: + done = status + case asyncpoll.StillPending: + if status.Message != nil { + lastPendingMsg = *status.Message + } + } + } + return result, pollErr + }) + + now := time.Now() + switch res { + case asyncpoll.Done: + return done, nil + case asyncpoll.Failed: + return nil, err + default: + msg := lastPendingMsg + if msg == "" { + msg = fmt.Sprintf("enrollment %s still pending after PollMaxWait", orderID) + } + return &issuer.OrderStatus{ + OrderID: orderID, + Status: "pending", + Message: &msg, + UpdatedAt: now, + }, nil + } +} + +// pollEnrollmentOnce makes one HTTP GET against the Sectigo SCM +// status endpoint and translates the response into an asyncpoll.Result +// plus (when applicable) a populated OrderStatus. +// +// collectNotReady is the load-bearing Sectigo sentinel: even when +// the SCM status endpoint reports "Issued", the cert may not yet be +// retrievable from the collect endpoint. We treat this as +// StillPending so the backoff schedule applies. +func (c *Connector) pollEnrollmentOnce(ctx context.Context, sslId int, orderID string) (*issuer.OrderStatus, asyncpoll.Result, error) { status, err := c.checkStatus(ctx, sslId) if err != nil { - return nil, err + // Triage by examining the wrapped status code: 4xx (not 429) + // is permanent (404 = enrollment doesn't exist, 400 = bad + // request, 401/403 = auth). Parse failures are also + // permanent — the upstream's response shape is broken. + // 5xx / 429 / network errors are transient and ride the + // backoff schedule. + if isPermanentStatusError(err) { + return nil, asyncpoll.Failed, err + } + return nil, asyncpoll.StillPending, err } now := time.Now() - switch status.Status { case "Issued": certPEM, chainPEM, serial, notBefore, notAfter, collectErr := c.collectCertificate(ctx, sslId) if collectErr != nil { - // Cert approved but not yet generated — treat as pending if isCollectNotReady(collectErr) { msg := fmt.Sprintf("enrollment %s is issued but certificate not yet generated", orderID) return &issuer.OrderStatus{ @@ -386,15 +470,11 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "pending", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.StillPending, nil } - return nil, fmt.Errorf("failed to collect certificate: %w", collectErr) + return nil, asyncpoll.Failed, fmt.Errorf("failed to collect certificate: %w", collectErr) } - - c.logger.Info("Sectigo enrollment completed", - "ssl_id", orderID, - "serial", serial) - + c.logger.Info("Sectigo enrollment completed", "ssl_id", orderID, "serial", serial) return &issuer.OrderStatus{ OrderID: orderID, Status: "completed", @@ -404,7 +484,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer NotBefore: ¬Before, NotAfter: ¬After, UpdatedAt: now, - }, nil + }, asyncpoll.Done, nil case "Applied", "Pending": msg := fmt.Sprintf("enrollment %s is %s", orderID, status.Status) @@ -413,7 +493,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "pending", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.StillPending, nil case "Rejected": msg := fmt.Sprintf("enrollment %s was rejected", orderID) @@ -422,7 +502,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "failed", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.Done, nil case "Revoked", "Expired", "Not Enrolled": msg := fmt.Sprintf("enrollment %s has status: %s", orderID, status.Status) @@ -431,7 +511,7 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "failed", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.Done, nil default: msg := fmt.Sprintf("unknown enrollment status: %s", status.Status) @@ -440,10 +520,32 @@ func (c *Connector) GetOrderStatus(ctx context.Context, orderID string) (*issuer Status: "pending", Message: &msg, UpdatedAt: now, - }, nil + }, asyncpoll.StillPending, nil } } +// isPermanentStatusError reports whether an error returned from +// checkStatus represents a permanent client-side failure (4xx other +// than 429, or a body-parse failure). Used by pollEnrollmentOnce to +// distinguish "stop polling" from "transient; keep polling". +// +// Heuristic-based on the error wrap shape: checkStatus formats HTTP +// status errors as "Sectigo status returned %d:" so we can grep for +// known permanent codes. Parse-failure errors contain "parse status +// response". +func isPermanentStatusError(err error) bool { + if err == nil { + return false + } + msg := err.Error() + for _, code := range []string{"returned 400", "returned 401", "returned 403", "returned 404"} { + if strings.Contains(msg, code) { + return true + } + } + return strings.Contains(msg, "parse status response") +} + // checkStatus retrieves the enrollment status from Sectigo. func (c *Connector) checkStatus(ctx context.Context, sslId int) (*statusResponse, error) { statusURL := fmt.Sprintf("%s/ssl/v1/%d", c.config.BaseURL, sslId) diff --git a/internal/connector/issuer/sectigo/sectigo_failure_test.go b/internal/connector/issuer/sectigo/sectigo_failure_test.go index f02d8e8..fd3404a 100644 --- a/internal/connector/issuer/sectigo/sectigo_failure_test.go +++ b/internal/connector/issuer/sectigo/sectigo_failure_test.go @@ -20,13 +20,14 @@ func buildSectigoConnector(t *testing.T, baseURL string) *sectigo.Connector { t.Helper() c := sectigo.New(nil, slog.Default()) cfg := sectigo.Config{ - BaseURL: baseURL, - CustomerURI: "tcust", - Login: "user", - Password: "pw", - CertType: 1, - OrgID: 2, - Term: 365, + BaseURL: baseURL, + CustomerURI: "tcust", + Login: "user", + Password: "pw", + CertType: 1, + OrgID: 2, + Term: 365, + PollMaxWaitSeconds: 1, // keep async-pending tests fast } raw, _ := json.Marshal(cfg) if err := c.ValidateConfig(context.Background(), raw); err != nil { diff --git a/internal/connector/issuer/sectigo/sectigo_test.go b/internal/connector/issuer/sectigo/sectigo_test.go index e9347b6..676d1b5 100644 --- a/internal/connector/issuer/sectigo/sectigo_test.go +++ b/internal/connector/issuer/sectigo/sectigo_test.go @@ -381,11 +381,12 @@ func TestSectigoConnector(t *testing.T) { defer srv.Close() config := §igo.Config{ - CustomerURI: "test-org", - Login: "api-user", - Password: "api-pass", - OrgID: 12345, - BaseURL: srv.URL, + CustomerURI: "test-org", + Login: "api-user", + Password: "api-pass", + OrgID: 12345, + BaseURL: srv.URL, + PollMaxWaitSeconds: 1, // keep pending tests fast } connector := sectigo.New(config, logger) @@ -449,11 +450,12 @@ func TestSectigoConnector(t *testing.T) { defer srv.Close() config := §igo.Config{ - CustomerURI: "test-org", - Login: "api-user", - Password: "api-pass", - OrgID: 12345, - BaseURL: srv.URL, + CustomerURI: "test-org", + Login: "api-user", + Password: "api-pass", + OrgID: 12345, + BaseURL: srv.URL, + PollMaxWaitSeconds: 1, // keep collect-not-ready tests fast } connector := sectigo.New(config, logger)