mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 15:11:29 +00:00
74d6b462a4
Trivial whitespace fix: gofmt collapsed three trailing-comment columns that I'd hand-aligned in the test file. Local sandbox missed this because the per-file gofmt run earlier in the commit cycle was scoped to the changed-files list and didn't include the test file at the final write moment; CI's project-wide `gofmt -l .` caught it. Behavior unchanged.
195 lines
6.8 KiB
Go
195 lines
6.8 KiB
Go
// Copyright (c) certctl
|
||
// SPDX-License-Identifier: BSL-1.1
|
||
|
||
package service
|
||
|
||
import (
|
||
"context"
|
||
"errors"
|
||
"net"
|
||
"sync"
|
||
"testing"
|
||
"time"
|
||
)
|
||
|
||
// TestIssuanceMetrics_RecordAndSnapshot exercises the happy-path
|
||
// counter + histogram + failure recording. Asserts:
|
||
// - SnapshotCounters returns the expected (issuer_type, outcome, count) tuples
|
||
// - SnapshotDurations returns cumulative bucket counts
|
||
// - SnapshotFailures returns the expected (issuer_type, error_class, count) tuples
|
||
// - BucketBoundaries returns a copy that doesn't share backing storage
|
||
func TestIssuanceMetrics_RecordAndSnapshot(t *testing.T) {
|
||
m := NewIssuanceMetrics(DefaultIssuanceBucketBoundaries)
|
||
|
||
// Record three issuances: two success (one fast, one slow), one failure.
|
||
m.RecordIssuance("local", "success", 50*time.Millisecond) // 0.05 bucket
|
||
m.RecordIssuance("local", "success", 2*time.Second) // 2.5 bucket
|
||
m.RecordIssuance("digicert", "failure", 90*time.Second) // 120 bucket
|
||
m.RecordFailure("digicert", "rate_limited")
|
||
|
||
counters := m.SnapshotCounters()
|
||
if len(counters) != 2 {
|
||
t.Fatalf("expected 2 counter entries, got %d", len(counters))
|
||
}
|
||
for _, c := range counters {
|
||
switch {
|
||
case c.IssuerType == "local" && c.Outcome == "success":
|
||
if c.Count != 2 {
|
||
t.Errorf("local/success: want 2, got %d", c.Count)
|
||
}
|
||
case c.IssuerType == "digicert" && c.Outcome == "failure":
|
||
if c.Count != 1 {
|
||
t.Errorf("digicert/failure: want 1, got %d", c.Count)
|
||
}
|
||
default:
|
||
t.Errorf("unexpected counter entry: %+v", c)
|
||
}
|
||
}
|
||
|
||
failures := m.SnapshotFailures()
|
||
if len(failures) != 1 {
|
||
t.Fatalf("expected 1 failure entry, got %d", len(failures))
|
||
}
|
||
if failures[0].IssuerType != "digicert" || failures[0].ErrorClass != "rate_limited" || failures[0].Count != 1 {
|
||
t.Errorf("unexpected failure entry: %+v", failures[0])
|
||
}
|
||
|
||
durations := m.SnapshotDurations()
|
||
if len(durations) != 2 {
|
||
t.Fatalf("expected 2 duration entries, got %d", len(durations))
|
||
}
|
||
|
||
// BucketBoundaries: returned slice must be a copy.
|
||
b1 := m.BucketBoundaries()
|
||
b2 := m.BucketBoundaries()
|
||
if &b1[0] == &b2[0] {
|
||
t.Error("BucketBoundaries should return a copy, not shared storage")
|
||
}
|
||
}
|
||
|
||
// TestIssuanceMetrics_HistogramCumulative pins the cumulative-buckets
|
||
// contract. Prometheus histograms require buckets to be cumulative —
|
||
// `le=0.5` includes everything <= 0.5, including <= 0.05 and <= 0.1.
|
||
// Off-by-one here corrupts every quantile query downstream.
|
||
func TestIssuanceMetrics_HistogramCumulative(t *testing.T) {
|
||
m := NewIssuanceMetrics([]float64{0.1, 0.5, 1.0})
|
||
|
||
// Observe 100ms (= 0.1s exactly).
|
||
m.RecordIssuance("local", "success", 100*time.Millisecond)
|
||
|
||
durs := m.SnapshotDurations()
|
||
if len(durs) != 1 {
|
||
t.Fatalf("expected 1 duration entry, got %d", len(durs))
|
||
}
|
||
|
||
// Boundaries: [0.1, 0.5, 1.0]. 100ms falls into 0.1 bucket and
|
||
// every larger bucket (cumulative). Sum = 0.1, count = 1.
|
||
want := []uint64{1, 1, 1}
|
||
for i, w := range want {
|
||
if durs[0].Buckets[i] != w {
|
||
t.Errorf("bucket[%d]: want %d, got %d", i, w, durs[0].Buckets[i])
|
||
}
|
||
}
|
||
if durs[0].Sum < 0.099 || durs[0].Sum > 0.101 {
|
||
t.Errorf("sum: want ~0.1, got %v", durs[0].Sum)
|
||
}
|
||
if durs[0].Count != 1 {
|
||
t.Errorf("count: want 1, got %d", durs[0].Count)
|
||
}
|
||
|
||
// Observe 750ms — falls into 1.0 bucket only (>0.1, >0.5).
|
||
m.RecordIssuance("local", "success", 750*time.Millisecond)
|
||
|
||
durs = m.SnapshotDurations()
|
||
want = []uint64{1, 1, 2} // 100ms in all 3, 750ms in only the 1.0 bucket
|
||
for i, w := range want {
|
||
if durs[0].Buckets[i] != w {
|
||
t.Errorf("after 750ms — bucket[%d]: want %d, got %d", i, w, durs[0].Buckets[i])
|
||
}
|
||
}
|
||
}
|
||
|
||
// TestIssuanceMetrics_Concurrency stresses RecordIssuance under 100
|
||
// goroutines × 1000 ops to assert atomic counter integrity. Race-
|
||
// detector clean is non-optional for this test (the whole point of
|
||
// IssuanceMetrics is concurrent recording from many service
|
||
// goroutines).
|
||
func TestIssuanceMetrics_Concurrency(t *testing.T) {
|
||
m := NewIssuanceMetrics(DefaultIssuanceBucketBoundaries)
|
||
|
||
const goroutines = 100
|
||
const opsPerGoroutine = 1000
|
||
|
||
var wg sync.WaitGroup
|
||
wg.Add(goroutines)
|
||
for i := 0; i < goroutines; i++ {
|
||
go func() {
|
||
defer wg.Done()
|
||
for j := 0; j < opsPerGoroutine; j++ {
|
||
m.RecordIssuance("local", "success", 50*time.Millisecond)
|
||
}
|
||
}()
|
||
}
|
||
wg.Wait()
|
||
|
||
counters := m.SnapshotCounters()
|
||
if len(counters) != 1 {
|
||
t.Fatalf("expected 1 counter entry, got %d", len(counters))
|
||
}
|
||
wantTotal := uint64(goroutines * opsPerGoroutine)
|
||
if counters[0].Count != wantTotal {
|
||
t.Errorf("counter under contention: want %d, got %d", wantTotal, counters[0].Count)
|
||
}
|
||
|
||
durs := m.SnapshotDurations()
|
||
if durs[0].Count != wantTotal {
|
||
t.Errorf("histogram count under contention: want %d, got %d", wantTotal, durs[0].Count)
|
||
}
|
||
}
|
||
|
||
// TestClassifyError exercises every branch of the closed-enum
|
||
// classifier. The classification logic is the load-bearing piece of
|
||
// the failure metric — misclassification doesn't break operators, but
|
||
// it makes their alerts noisier. Each enum value has at least one
|
||
// representative input.
|
||
func TestClassifyError(t *testing.T) {
|
||
cases := []struct {
|
||
name string
|
||
err error
|
||
want string
|
||
}{
|
||
{"context_canceled", context.Canceled, "timeout"},
|
||
{"context_deadline", context.DeadlineExceeded, "timeout"},
|
||
{"timeout_substring", errors.New("operation deadline exceeded"), "timeout"},
|
||
{"i_o_timeout", errors.New("read tcp: i/o timeout"), "timeout"},
|
||
{"net_op_error", &net.OpError{Op: "dial", Net: "tcp", Err: errors.New("connection refused")}, "network"},
|
||
{"unauthorized_4xx", errors.New("DigiCert: 401 Unauthorized"), "auth"},
|
||
{"access_denied_aws", errors.New("AccessDeniedException: not authorized"), "auth"},
|
||
{"forbidden_403", errors.New("forbidden: insufficient permissions"), "auth"},
|
||
{"rate_limited_429", errors.New("Sectigo: 429 too many requests"), "rate_limited"},
|
||
{"throttled", errors.New("ThrottlingException: rate exceeded"), "rate_limited"},
|
||
{"validation_csr", errors.New("malformed CSR: invalid PEM block"), "validation"},
|
||
{"validation_invalid", errors.New("invalid signing algorithm"), "validation"},
|
||
{"upstream_503", errors.New("ServiceUnavailable: 503"), "upstream_5xx"},
|
||
{"upstream_500_internal", errors.New("Internal Server Error: 500"), "upstream_5xx"},
|
||
{"upstream_404", errors.New("NotFound: 404 cert not found"), "upstream_4xx"},
|
||
{"network_no_host", errors.New("dial tcp: no such host"), "network"},
|
||
{"other_unmatched", errors.New("something completely unexpected happened"), "other"},
|
||
}
|
||
|
||
for _, tc := range cases {
|
||
t.Run(tc.name, func(t *testing.T) {
|
||
got := ClassifyError(tc.err)
|
||
if got != tc.want {
|
||
t.Errorf("ClassifyError(%q): want %q, got %q", tc.err.Error(), tc.want, got)
|
||
}
|
||
})
|
||
}
|
||
|
||
// Special case: nil → "" so callers that accidentally call us
|
||
// with a nil err don't bump the failure counter.
|
||
if got := ClassifyError(nil); got != "" {
|
||
t.Errorf("ClassifyError(nil): want \"\", got %q", got)
|
||
}
|
||
}
|