// Copyright 2026 certctl LLC. All rights reserved. // SPDX-License-Identifier: BUSL-1.1 // Package asyncpoll provides bounded polling for async-CA issuer // connectors (DigiCert, Sectigo, Entrust, GlobalSign). // // Closes the #5 acquisition-readiness blocker from the 2026-05-01 // issuer coverage audit. Pre-fix, each async-CA connector had its own // GetOrderStatus path that polled the upstream CA on every scheduler // tick with no exponential backoff, no max-retry cap, and no deadline. // The scheduler's tick rate (typically 30s) was the only throttle — // an unready order got hit every 30s indefinitely, and a 429 from a // rate-limited upstream produced "retry on the next tick" which // re-fanned-out the same call. // // This package consolidates the four implementations behind a single // Poller with: // // - Exponential backoff: 5s → 15s → 45s → 2m → 5m capped (default). // - ±20% jitter at every wait so multiple certctl instances don't // synchronize on the upstream CA's rate-limit window. // - MaxWait deadline (default 10m) — a hard cap on how long a // single Poll call blocks before returning StillPending. The // scheduler can re-enqueue the job for a future tick if the // operator's policy allows further attempts. // - ctx-aware cancellation — propagates the caller's deadline / // cancel through every wait. // // Issuer-specific HTTP request shapes live in the PollFunc closure // passed to Poll; the backoff math is shared. package asyncpoll import ( "context" "errors" "fmt" "math/rand/v2" "time" ) // Result is the outcome of one poll attempt. type Result int const ( // StillPending — the upstream is still working on the order; // keep polling. The Poller waits, then invokes the PollFunc // again (subject to MaxWait). StillPending Result = iota // Done — the order succeeded. The Poller returns immediately // with the (Done, nil) tuple to the caller. Done // Failed — permanent failure (rejected, denied, malformed // response). The Poller returns immediately with (Failed, err) // to the caller; no further polling. Failed ) // PollFunc runs ONE poll attempt and reports the outcome. // // Returning (StillPending, nil) signals "transient; keep polling" // and the Poller waits with the backoff schedule. // // Returning (StillPending, err) ALSO keeps polling — useful when // the upstream returned a transient HTTP error (5xx, network blip) // that the caller wants logged but not treated as fatal. // // Returning (Done, nil) signals success. // // Returning (Failed, err) signals permanent failure; err must be // non-nil so the caller can include it in the upstream-facing // status message. type PollFunc func(ctx context.Context) (Result, error) // Config holds the backoff knobs. All fields are optional; zero // values fall back to package defaults documented inline. type Config struct { // MaxWait — hard cap on total wall-clock time inside Poll. After // this expires, Poll returns (StillPending, ErrMaxWait). Default // 10 minutes; tune via per-issuer Config.PollMaxWait. MaxWait time.Duration // InitialWait — first backoff (after the first poll attempt). // Default 5 seconds. InitialWait time.Duration // MaxBackoff — cap on per-iteration wait. Default 5 minutes. // Backoff schedule: InitialWait → 3× → 3× → ... capped at // MaxBackoff (so 5s → 15s → 45s → 2m15s → 5m → 5m → ... by // default). MaxBackoff time.Duration // JitterPct — fractional jitter applied to every wait, ±value. // Default 0.2 (i.e., ±20%). Set to 0 for deterministic timing // in tests. JitterPct float64 } // ErrMaxWait is returned (alongside StillPending) when the total // wall-clock time inside Poll exceeded Config.MaxWait. Callers can // errors.Is against this sentinel to distinguish "deadline exhausted" // from "fn errored". var ErrMaxWait = errors.New("asyncpoll: MaxWait deadline exceeded") // Defaults. Exported so per-issuer tests can reference the same // schedule without duplicating constants. const ( DefaultMaxWait = 10 * time.Minute DefaultInitialWait = 5 * time.Second DefaultMaxBackoff = 5 * time.Minute DefaultJitterPct = 0.2 ) // Phase 6 SCALE-M3 closure (2026-05-14): operator-overridable global // default for the package-level MaxWait fallback. Priority chain for // every Poll() call: // // 1. cfg.MaxWait > 0 → per-call value (set by the caller, usually // from a per-connector env like CERTCTL_DIGICERT_POLL_MAX_WAIT_SECONDS) // 2. effectiveDefaultMaxWait != nil → process-wide override set via // SetDefaultMaxWait (from CERTCTL_ASYNC_POLL_MAX_WAIT_SECONDS at // server boot) // 3. DefaultMaxWait constant (10 minutes) // // Pre-Phase-6, paths (1) + (3) existed. Path (2) lets an operator tune // the global fallback in one place without setting four per-connector // envs (digicert, entrust, globalsign, sectigo). var effectiveDefaultMaxWait *time.Duration // SetDefaultMaxWait overrides the package-level DefaultMaxWait // fallback for the rest of the process lifetime. Intended to be // called exactly once at boot from cmd/server/main.go after reading // CERTCTL_ASYNC_POLL_MAX_WAIT_SECONDS. Subsequent calls overwrite the // previous override. A zero or negative duration clears the override // (restoring the constant default). // // Per-connector overrides (caller-provided cfg.MaxWait) take // precedence over this global default. func SetDefaultMaxWait(d time.Duration) { if d <= 0 { effectiveDefaultMaxWait = nil return } effectiveDefaultMaxWait = &d } // currentDefaultMaxWait returns the effective default — the // SetDefaultMaxWait override if one is in place, else the package's // DefaultMaxWait constant. func currentDefaultMaxWait() time.Duration { if effectiveDefaultMaxWait != nil { return *effectiveDefaultMaxWait } return DefaultMaxWait } // Poll runs fn with exponential backoff + jitter until Done, Failed, // MaxWait, or ctx cancellation. // // On Done — returns (Done, nil). The cert is ready; caller proceeds. // // On Failed — returns (Failed, fnErr). Permanent; no retry. // // On MaxWait timeout — returns (StillPending, ErrMaxWait). The // upstream isn't done yet but the deadline exhausted. Scheduler // can re-enqueue. // // On ctx cancel — returns (StillPending, ctx.Err()). Caller's // deadline / shutdown signal won. // // On fn returning (StillPending, transientErr) — the err is logged // by the closure (not by Poll), and Poll continues with the // backoff schedule. The transient err is preserved as the last // error in case MaxWait or ctx-cancel later fires. func Poll(ctx context.Context, cfg Config, fn PollFunc) (Result, error) { if cfg.MaxWait <= 0 { cfg.MaxWait = currentDefaultMaxWait() } if cfg.InitialWait <= 0 { cfg.InitialWait = DefaultInitialWait } if cfg.MaxBackoff <= 0 { cfg.MaxBackoff = DefaultMaxBackoff } if cfg.JitterPct < 0 { cfg.JitterPct = 0 } deadline := time.Now().Add(cfg.MaxWait) wait := cfg.InitialWait var lastErr error for { result, err := fn(ctx) switch result { case Done: return Done, nil case Failed: return Failed, err case StillPending: lastErr = err // may be nil (clean keep-polling) or a transient err default: return Failed, fmt.Errorf("asyncpoll: PollFunc returned unknown Result %d", result) } // Compute the next wait with jitter. wait is the cumulative // backoff base; jittered is what actually sleeps. jittered := jitterDuration(wait, cfg.JitterPct) // If the next wait would push us past the deadline, return // StillPending now rather than sleeping uselessly. now := time.Now() remaining := deadline.Sub(now) if remaining <= 0 { if lastErr != nil { return StillPending, fmt.Errorf("%w (last err: %v)", ErrMaxWait, lastErr) } return StillPending, ErrMaxWait } if jittered > remaining { jittered = remaining } // Sleep, but respect ctx cancellation. select { case <-ctx.Done(): if lastErr != nil { return StillPending, fmt.Errorf("%w (last err: %v)", ctx.Err(), lastErr) } return StillPending, ctx.Err() case <-time.After(jittered): } // Multiplicative backoff (3×) capped at MaxBackoff. wait *= 3 if wait > cfg.MaxBackoff { wait = cfg.MaxBackoff } } } // jitterDuration applies ±pct jitter to base. Returned duration is // always positive (a base of 0 returns 0 regardless of pct). // // Visible for testing — the test asserts the bounded envelope rather // than the exact value. func jitterDuration(base time.Duration, pct float64) time.Duration { if base <= 0 || pct <= 0 { return base } // rand/v2's Float64 returns [0, 1); we want [-pct, +pct]. delta := (rand.Float64()*2 - 1) * pct jittered := time.Duration(float64(base) * (1 + delta)) if jittered < 0 { jittered = 0 } return jittered }