Files
certctl/internal/service/issuance_metrics_test.go
shankar0123 74d6b462a4 metrics: gofmt issuance_metrics_test.go — fix CI
Trivial whitespace fix: gofmt collapsed three trailing-comment columns
that I'd hand-aligned in the test file. Local sandbox missed this
because the per-file gofmt run earlier in the commit cycle was scoped
to the changed-files list and didn't include the test file at the
final write moment; CI's project-wide `gofmt -l .` caught it.

Behavior unchanged.
2026-05-02 01:27:33 +00:00

195 lines
6.8 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) certctl
// SPDX-License-Identifier: BSL-1.1
package service
import (
"context"
"errors"
"net"
"sync"
"testing"
"time"
)
// TestIssuanceMetrics_RecordAndSnapshot exercises the happy-path
// counter + histogram + failure recording. Asserts:
// - SnapshotCounters returns the expected (issuer_type, outcome, count) tuples
// - SnapshotDurations returns cumulative bucket counts
// - SnapshotFailures returns the expected (issuer_type, error_class, count) tuples
// - BucketBoundaries returns a copy that doesn't share backing storage
func TestIssuanceMetrics_RecordAndSnapshot(t *testing.T) {
m := NewIssuanceMetrics(DefaultIssuanceBucketBoundaries)
// Record three issuances: two success (one fast, one slow), one failure.
m.RecordIssuance("local", "success", 50*time.Millisecond) // 0.05 bucket
m.RecordIssuance("local", "success", 2*time.Second) // 2.5 bucket
m.RecordIssuance("digicert", "failure", 90*time.Second) // 120 bucket
m.RecordFailure("digicert", "rate_limited")
counters := m.SnapshotCounters()
if len(counters) != 2 {
t.Fatalf("expected 2 counter entries, got %d", len(counters))
}
for _, c := range counters {
switch {
case c.IssuerType == "local" && c.Outcome == "success":
if c.Count != 2 {
t.Errorf("local/success: want 2, got %d", c.Count)
}
case c.IssuerType == "digicert" && c.Outcome == "failure":
if c.Count != 1 {
t.Errorf("digicert/failure: want 1, got %d", c.Count)
}
default:
t.Errorf("unexpected counter entry: %+v", c)
}
}
failures := m.SnapshotFailures()
if len(failures) != 1 {
t.Fatalf("expected 1 failure entry, got %d", len(failures))
}
if failures[0].IssuerType != "digicert" || failures[0].ErrorClass != "rate_limited" || failures[0].Count != 1 {
t.Errorf("unexpected failure entry: %+v", failures[0])
}
durations := m.SnapshotDurations()
if len(durations) != 2 {
t.Fatalf("expected 2 duration entries, got %d", len(durations))
}
// BucketBoundaries: returned slice must be a copy.
b1 := m.BucketBoundaries()
b2 := m.BucketBoundaries()
if &b1[0] == &b2[0] {
t.Error("BucketBoundaries should return a copy, not shared storage")
}
}
// TestIssuanceMetrics_HistogramCumulative pins the cumulative-buckets
// contract. Prometheus histograms require buckets to be cumulative —
// `le=0.5` includes everything <= 0.5, including <= 0.05 and <= 0.1.
// Off-by-one here corrupts every quantile query downstream.
func TestIssuanceMetrics_HistogramCumulative(t *testing.T) {
m := NewIssuanceMetrics([]float64{0.1, 0.5, 1.0})
// Observe 100ms (= 0.1s exactly).
m.RecordIssuance("local", "success", 100*time.Millisecond)
durs := m.SnapshotDurations()
if len(durs) != 1 {
t.Fatalf("expected 1 duration entry, got %d", len(durs))
}
// Boundaries: [0.1, 0.5, 1.0]. 100ms falls into 0.1 bucket and
// every larger bucket (cumulative). Sum = 0.1, count = 1.
want := []uint64{1, 1, 1}
for i, w := range want {
if durs[0].Buckets[i] != w {
t.Errorf("bucket[%d]: want %d, got %d", i, w, durs[0].Buckets[i])
}
}
if durs[0].Sum < 0.099 || durs[0].Sum > 0.101 {
t.Errorf("sum: want ~0.1, got %v", durs[0].Sum)
}
if durs[0].Count != 1 {
t.Errorf("count: want 1, got %d", durs[0].Count)
}
// Observe 750ms — falls into 1.0 bucket only (>0.1, >0.5).
m.RecordIssuance("local", "success", 750*time.Millisecond)
durs = m.SnapshotDurations()
want = []uint64{1, 1, 2} // 100ms in all 3, 750ms in only the 1.0 bucket
for i, w := range want {
if durs[0].Buckets[i] != w {
t.Errorf("after 750ms — bucket[%d]: want %d, got %d", i, w, durs[0].Buckets[i])
}
}
}
// TestIssuanceMetrics_Concurrency stresses RecordIssuance under 100
// goroutines × 1000 ops to assert atomic counter integrity. Race-
// detector clean is non-optional for this test (the whole point of
// IssuanceMetrics is concurrent recording from many service
// goroutines).
func TestIssuanceMetrics_Concurrency(t *testing.T) {
m := NewIssuanceMetrics(DefaultIssuanceBucketBoundaries)
const goroutines = 100
const opsPerGoroutine = 1000
var wg sync.WaitGroup
wg.Add(goroutines)
for i := 0; i < goroutines; i++ {
go func() {
defer wg.Done()
for j := 0; j < opsPerGoroutine; j++ {
m.RecordIssuance("local", "success", 50*time.Millisecond)
}
}()
}
wg.Wait()
counters := m.SnapshotCounters()
if len(counters) != 1 {
t.Fatalf("expected 1 counter entry, got %d", len(counters))
}
wantTotal := uint64(goroutines * opsPerGoroutine)
if counters[0].Count != wantTotal {
t.Errorf("counter under contention: want %d, got %d", wantTotal, counters[0].Count)
}
durs := m.SnapshotDurations()
if durs[0].Count != wantTotal {
t.Errorf("histogram count under contention: want %d, got %d", wantTotal, durs[0].Count)
}
}
// TestClassifyError exercises every branch of the closed-enum
// classifier. The classification logic is the load-bearing piece of
// the failure metric — misclassification doesn't break operators, but
// it makes their alerts noisier. Each enum value has at least one
// representative input.
func TestClassifyError(t *testing.T) {
cases := []struct {
name string
err error
want string
}{
{"context_canceled", context.Canceled, "timeout"},
{"context_deadline", context.DeadlineExceeded, "timeout"},
{"timeout_substring", errors.New("operation deadline exceeded"), "timeout"},
{"i_o_timeout", errors.New("read tcp: i/o timeout"), "timeout"},
{"net_op_error", &net.OpError{Op: "dial", Net: "tcp", Err: errors.New("connection refused")}, "network"},
{"unauthorized_4xx", errors.New("DigiCert: 401 Unauthorized"), "auth"},
{"access_denied_aws", errors.New("AccessDeniedException: not authorized"), "auth"},
{"forbidden_403", errors.New("forbidden: insufficient permissions"), "auth"},
{"rate_limited_429", errors.New("Sectigo: 429 too many requests"), "rate_limited"},
{"throttled", errors.New("ThrottlingException: rate exceeded"), "rate_limited"},
{"validation_csr", errors.New("malformed CSR: invalid PEM block"), "validation"},
{"validation_invalid", errors.New("invalid signing algorithm"), "validation"},
{"upstream_503", errors.New("ServiceUnavailable: 503"), "upstream_5xx"},
{"upstream_500_internal", errors.New("Internal Server Error: 500"), "upstream_5xx"},
{"upstream_404", errors.New("NotFound: 404 cert not found"), "upstream_4xx"},
{"network_no_host", errors.New("dial tcp: no such host"), "network"},
{"other_unmatched", errors.New("something completely unexpected happened"), "other"},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := ClassifyError(tc.err)
if got != tc.want {
t.Errorf("ClassifyError(%q): want %q, got %q", tc.err.Error(), tc.want, got)
}
})
}
// Special case: nil → "" so callers that accidentally call us
// with a nil err don't bump the failure counter.
if got := ClassifyError(nil); got != "" {
t.Errorf("ClassifyError(nil): want \"\", got %q", got)
}
}