Files
certctl/internal/service/approval_metrics.go
T
shankar0123 2886b58daf chore: drop 'Infisical' label from internal references
Strategic naming cleanup. Earlier doc-comments + commit messages framed Rank
4 / Rank 5 / Rank 7 work as 'Rank N of the 2026-05-03 Infisical deep-research
deliverable' — the 'Infisical' qualifier was a holdover from the original
deep-research framing where Infisical (a competing secrets-management
platform) was the comparator. Keeping the comparator's name in our source
adds noise without value; an external reader sees 'Infisical' and assumes a
dependency or shared lineage rather than reading it as the competitive
context it was.

Mechanical sed across 34 files (32 source / docs + 2 follow-up Python passes
to collapse 'deep-research deep-research' duplicates that emerged where the
original phrase wrapped across lines):

  s|Infisical deep-research|deep-research|g
  s|infisical-deep-research-results|deep-research-results-2026-05-03|g
  s|infisical-deep-research-prompt|deep-research-prompt-2026-05-03|g
  s|infisical-deep-research|deep-research|g
  s|Infisical|deep-research|g
  s|deep-research deep-research|deep-research|g  # collapse-pass

Net diff: 63 insertions / 64 deletions across cmd/, docs/, internal/,
migrations/. Pure text substitution; zero behavior change. Code path
unchanged — go vet clean, tests for TestApproval pass on both
internal/service and internal/api/handler packages.

Workspace docs (cowork/) carry the same references and will be swept
separately — they're not under certctl/ git control. The two filename
references (cowork/infisical-deep-research-results.md +
cowork/infisical-deep-research-prompt.md) get renamed alongside that sweep
to deep-research-results-2026-05-03.md /
deep-research-prompt-2026-05-03.md so cross-references in the certctl
repo doc-comments resolve cleanly.
2026-05-04 01:15:01 +00:00

213 lines
6.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package service
import (
"math"
"sort"
"sync"
"sync/atomic"
)
// ApprovalMetrics is a thread-safe counter table for the issuance
// approval-workflow dispatch path. Rank 7 of the 2026-05-03 deep-research
// deep-research deliverable. Mirrors the ExpiryAlertMetrics +
// VaultRenewalMetrics shape: cmd/server/main.go constructs ONE instance,
// passes it to ApprovalService (recording side) AND metricsHandler
// (exposing side) so the snapshotter is the single source of truth.
//
// Dimensions:
//
// outcome — closed enum from internal/domain/approval.go:
// "approved" — Approve transitioned a pending request.
// "rejected" — Reject transitioned a pending request.
// "expired" — scheduler reaper transitioned a stale
// pending request via ExpireStale.
// "bypassed" — CERTCTL_APPROVAL_BYPASS=true short-
// circuited RequestApproval. Production
// deploys MUST have zero rows of this
// outcome.
// profile_id — CertificateProfile.ID that drove the gate. Bounded
// cardinality (operators have <100 profiles in production).
//
// Cardinality bound: 4 outcomes × N profiles. With N=100, that's 400
// series — well within Prometheus's per-target series budget for a
// well-bounded label.
//
// Pending-age histogram: ObservePendingAge records the seconds-since-
// creation of a pending approval at the moment of decision. Operators
// alert when the p99 hits hours/days (compliance has a deadline).
// Bucket boundaries: 60, 300, 1800, 3600, 21600, 86400, +Inf — 1
// minute, 5 minutes, 30 minutes, 1 hour, 6 hours, 24 hours, beyond.
type ApprovalMetrics struct {
mu sync.RWMutex
counters map[approvalKey]*atomic.Uint64
pendingAgeHist *approvalDurationHistogram
}
type approvalKey struct {
Outcome string
ProfileID string
}
// NewApprovalMetrics returns a zero-value ApprovalMetrics ready for
// concurrent use. The caller MUST register the same instance on both
// the ApprovalService (recording) and the MetricsHandler (exposing)
// sides.
func NewApprovalMetrics() *ApprovalMetrics {
return &ApprovalMetrics{
counters: make(map[approvalKey]*atomic.Uint64),
pendingAgeHist: newApprovalDurationHistogram(),
}
}
// RecordDecision bumps the (outcome, profile_id) counter by one. Called
// from ApprovalService.Approve / Reject / ExpireStale and from the
// bypass-mode short-circuit inside RequestApproval.
func (m *ApprovalMetrics) RecordDecision(outcome, profileID string) {
if m == nil {
return
}
key := approvalKey{Outcome: outcome, ProfileID: profileID}
m.mu.RLock()
c, ok := m.counters[key]
m.mu.RUnlock()
if !ok {
m.mu.Lock()
c, ok = m.counters[key]
if !ok {
c = &atomic.Uint64{}
m.counters[key] = c
}
m.mu.Unlock()
}
c.Add(1)
}
// ObservePendingAge records the seconds-since-creation of a pending
// approval at the moment of decision (Approve / Reject / Expire).
func (m *ApprovalMetrics) ObservePendingAge(seconds float64) {
if m == nil {
return
}
m.pendingAgeHist.observe(seconds)
}
// SnapshotApprovalDecisions returns the current decision counter table
// as a sorted slice for deterministic Prometheus exposition. Sort key
// is (outcome, profile_id).
type ApprovalDecisionEntry struct {
Outcome string
ProfileID string
Count uint64
}
func (m *ApprovalMetrics) SnapshotApprovalDecisions() []ApprovalDecisionEntry {
if m == nil {
return nil
}
m.mu.RLock()
out := make([]ApprovalDecisionEntry, 0, len(m.counters))
for k, c := range m.counters {
out = append(out, ApprovalDecisionEntry{
Outcome: k.Outcome,
ProfileID: k.ProfileID,
Count: c.Load(),
})
}
m.mu.RUnlock()
sort.Slice(out, func(i, j int) bool {
if out[i].Outcome != out[j].Outcome {
return out[i].Outcome < out[j].Outcome
}
return out[i].ProfileID < out[j].ProfileID
})
return out
}
// SnapshotApprovalPendingAgeHistogram returns the current bucket counts
// + sum + total count for the pending-age histogram. Format suits the
// Prometheus histogram exposition (le buckets + _sum + _count).
type ApprovalPendingAgeSnapshot struct {
BucketBounds []float64 // [60, 300, 1800, 3600, 21600, 86400] — exclusive of +Inf
BucketCounts []uint64 // cumulative counts per bucket; len = len(BucketBounds) + 1 (last is +Inf)
Sum float64
Count uint64
}
func (m *ApprovalMetrics) SnapshotApprovalPendingAgeHistogram() ApprovalPendingAgeSnapshot {
if m == nil {
return ApprovalPendingAgeSnapshot{}
}
return m.pendingAgeHist.snapshot()
}
// approvalDurationHistogram is a tiny lock-free histogram with fixed
// bucket boundaries for approval-pending-age. Atomic counters per
// bucket + sum stored as uint64-bits-of-float64 atomic.
type approvalDurationHistogram struct {
bounds []float64
buckets []*atomic.Uint64 // len = len(bounds) + 1; last is +Inf
sumBits *atomic.Uint64 // float64 bits stored atomically
count *atomic.Uint64
}
func newApprovalDurationHistogram() *approvalDurationHistogram {
bounds := []float64{60, 300, 1800, 3600, 21600, 86400}
buckets := make([]*atomic.Uint64, len(bounds)+1)
for i := range buckets {
buckets[i] = &atomic.Uint64{}
}
return &approvalDurationHistogram{
bounds: bounds,
buckets: buckets,
sumBits: &atomic.Uint64{},
count: &atomic.Uint64{},
}
}
func (h *approvalDurationHistogram) observe(seconds float64) {
if h == nil {
return
}
// Find the first bucket whose bound is >= seconds.
idx := len(h.bounds) // default to +Inf bucket
for i, b := range h.bounds {
if seconds <= b {
idx = i
break
}
}
h.buckets[idx].Add(1)
h.count.Add(1)
// Atomic float64 add via CAS loop.
for {
oldBits := h.sumBits.Load()
old := math.Float64frombits(oldBits)
newBits := math.Float64bits(old + seconds)
if h.sumBits.CompareAndSwap(oldBits, newBits) {
return
}
}
}
func (h *approvalDurationHistogram) snapshot() ApprovalPendingAgeSnapshot {
if h == nil {
return ApprovalPendingAgeSnapshot{}
}
counts := make([]uint64, len(h.buckets))
cumulative := uint64(0)
for i, b := range h.buckets {
cumulative += b.Load()
counts[i] = cumulative
}
return ApprovalPendingAgeSnapshot{
BucketBounds: append([]float64(nil), h.bounds...),
BucketCounts: counts,
Sum: math.Float64frombits(h.sumBits.Load()),
Count: h.count.Load(),
}
}