mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 15:11:29 +00:00
31e50d987f
Two CI failures from the Rank 7 chain push (#438): Go Build & Test — staticcheck ST1021: internal/service/approval_metrics.go:97 comment for ApprovalDecisionEntry doesn't start with the type name internal/service/approval_metrics.go:130 comment for ApprovalPendingAgeSnapshot doesn't start with the type name Frontend Build — scripts/ci-guards/openapi-handler-parity.sh: 4 router routes have no OpenAPI operationId: GET /api/v1/approvals GET /api/v1/approvals/{id} POST /api/v1/approvals/{id}/approve POST /api/v1/approvals/{id}/reject The Rank 7 commit-3 spec deferred OpenAPI extension to commit 4 with a 'batched alongside the integration changes' note; commit 4 didn't actually add them. This commit closes that gap. Fixes: approval_metrics.go — split the doc comment that was attached to SnapshotApprovalDecisions (the function) but visually preceded ApprovalDecisionEntry (the type), so the type appeared to staticcheck as having a comment that named the function instead of the type. Same fix on ApprovalPendingAgeSnapshot. Now each exported type has its own type-name-leading comment per Go convention. api/openapi.yaml — added 4 new operationIds (listApprovalRequests, getApprovalRequest, approveApprovalRequest, rejectApprovalRequest) + new ApprovalRequest schema component under components/schemas. Inline 401 response (the Unauthorized component does not exist in this spec; the canonical pattern in the rest of the file is inline 'description: Authentication required'). The two-person integrity contract surface is documented in the description of the approve / reject endpoints so external readers see the RBAC contract from the spec alone. Verified locally: go vet ./internal/service/...: exit 0. scripts/ci-guards/openapi-handler-parity.sh: clean (140 ops vs 174 routes, 36 documented exceptions). Third CI failure (image-and-supply-chain) was a transient apt-fetch 'Connection reset by peer' from deb.debian.org while pulling libasan6_10.2.1-6_amd64.deb. Not a code issue; just re-run the workflow. No code change needed.
218 lines
6.6 KiB
Go
218 lines
6.6 KiB
Go
package service
|
||
|
||
import (
|
||
"math"
|
||
"sort"
|
||
"sync"
|
||
"sync/atomic"
|
||
)
|
||
|
||
// ApprovalMetrics is a thread-safe counter table for the issuance
|
||
// approval-workflow dispatch path. Rank 7 of the 2026-05-03 Infisical
|
||
// deep-research deliverable. Mirrors the ExpiryAlertMetrics +
|
||
// VaultRenewalMetrics shape: cmd/server/main.go constructs ONE instance,
|
||
// passes it to ApprovalService (recording side) AND metricsHandler
|
||
// (exposing side) so the snapshotter is the single source of truth.
|
||
//
|
||
// Dimensions:
|
||
//
|
||
// outcome — closed enum from internal/domain/approval.go:
|
||
// "approved" — Approve transitioned a pending request.
|
||
// "rejected" — Reject transitioned a pending request.
|
||
// "expired" — scheduler reaper transitioned a stale
|
||
// pending request via ExpireStale.
|
||
// "bypassed" — CERTCTL_APPROVAL_BYPASS=true short-
|
||
// circuited RequestApproval. Production
|
||
// deploys MUST have zero rows of this
|
||
// outcome.
|
||
// profile_id — CertificateProfile.ID that drove the gate. Bounded
|
||
// cardinality (operators have <100 profiles in production).
|
||
//
|
||
// Cardinality bound: 4 outcomes × N profiles. With N=100, that's 400
|
||
// series — well within Prometheus's per-target series budget for a
|
||
// well-bounded label.
|
||
//
|
||
// Pending-age histogram: ObservePendingAge records the seconds-since-
|
||
// creation of a pending approval at the moment of decision. Operators
|
||
// alert when the p99 hits hours/days (compliance has a deadline).
|
||
// Bucket boundaries: 60, 300, 1800, 3600, 21600, 86400, +Inf — 1
|
||
// minute, 5 minutes, 30 minutes, 1 hour, 6 hours, 24 hours, beyond.
|
||
type ApprovalMetrics struct {
|
||
mu sync.RWMutex
|
||
counters map[approvalKey]*atomic.Uint64
|
||
|
||
pendingAgeHist *approvalDurationHistogram
|
||
}
|
||
|
||
type approvalKey struct {
|
||
Outcome string
|
||
ProfileID string
|
||
}
|
||
|
||
// NewApprovalMetrics returns a zero-value ApprovalMetrics ready for
|
||
// concurrent use. The caller MUST register the same instance on both
|
||
// the ApprovalService (recording) and the MetricsHandler (exposing)
|
||
// sides.
|
||
func NewApprovalMetrics() *ApprovalMetrics {
|
||
return &ApprovalMetrics{
|
||
counters: make(map[approvalKey]*atomic.Uint64),
|
||
pendingAgeHist: newApprovalDurationHistogram(),
|
||
}
|
||
}
|
||
|
||
// RecordDecision bumps the (outcome, profile_id) counter by one. Called
|
||
// from ApprovalService.Approve / Reject / ExpireStale and from the
|
||
// bypass-mode short-circuit inside RequestApproval.
|
||
func (m *ApprovalMetrics) RecordDecision(outcome, profileID string) {
|
||
if m == nil {
|
||
return
|
||
}
|
||
key := approvalKey{Outcome: outcome, ProfileID: profileID}
|
||
|
||
m.mu.RLock()
|
||
c, ok := m.counters[key]
|
||
m.mu.RUnlock()
|
||
|
||
if !ok {
|
||
m.mu.Lock()
|
||
c, ok = m.counters[key]
|
||
if !ok {
|
||
c = &atomic.Uint64{}
|
||
m.counters[key] = c
|
||
}
|
||
m.mu.Unlock()
|
||
}
|
||
c.Add(1)
|
||
}
|
||
|
||
// ObservePendingAge records the seconds-since-creation of a pending
|
||
// approval at the moment of decision (Approve / Reject / Expire).
|
||
func (m *ApprovalMetrics) ObservePendingAge(seconds float64) {
|
||
if m == nil {
|
||
return
|
||
}
|
||
m.pendingAgeHist.observe(seconds)
|
||
}
|
||
|
||
// ApprovalDecisionEntry is a single row of the SnapshotApprovalDecisions
|
||
// output — the (outcome, profile_id) tuple plus the cumulative count.
|
||
// Used by the Prometheus exposer to emit
|
||
// certctl_approval_decisions_total{outcome,profile_id} samples.
|
||
type ApprovalDecisionEntry struct {
|
||
Outcome string
|
||
ProfileID string
|
||
Count uint64
|
||
}
|
||
|
||
// SnapshotApprovalDecisions returns the current decision counter table
|
||
// as a sorted slice for deterministic Prometheus exposition. Sort key
|
||
// is (outcome, profile_id).
|
||
func (m *ApprovalMetrics) SnapshotApprovalDecisions() []ApprovalDecisionEntry {
|
||
if m == nil {
|
||
return nil
|
||
}
|
||
m.mu.RLock()
|
||
out := make([]ApprovalDecisionEntry, 0, len(m.counters))
|
||
for k, c := range m.counters {
|
||
out = append(out, ApprovalDecisionEntry{
|
||
Outcome: k.Outcome,
|
||
ProfileID: k.ProfileID,
|
||
Count: c.Load(),
|
||
})
|
||
}
|
||
m.mu.RUnlock()
|
||
|
||
sort.Slice(out, func(i, j int) bool {
|
||
if out[i].Outcome != out[j].Outcome {
|
||
return out[i].Outcome < out[j].Outcome
|
||
}
|
||
return out[i].ProfileID < out[j].ProfileID
|
||
})
|
||
return out
|
||
}
|
||
|
||
// ApprovalPendingAgeSnapshot is the snapshot output of
|
||
// SnapshotApprovalPendingAgeHistogram — bucket bounds + cumulative
|
||
// counts + sum + total count. Format suits the Prometheus histogram
|
||
// exposition (le buckets + _sum + _count).
|
||
type ApprovalPendingAgeSnapshot struct {
|
||
BucketBounds []float64 // [60, 300, 1800, 3600, 21600, 86400] — exclusive of +Inf
|
||
BucketCounts []uint64 // cumulative counts per bucket; len = len(BucketBounds) + 1 (last is +Inf)
|
||
Sum float64
|
||
Count uint64
|
||
}
|
||
|
||
func (m *ApprovalMetrics) SnapshotApprovalPendingAgeHistogram() ApprovalPendingAgeSnapshot {
|
||
if m == nil {
|
||
return ApprovalPendingAgeSnapshot{}
|
||
}
|
||
return m.pendingAgeHist.snapshot()
|
||
}
|
||
|
||
// approvalDurationHistogram is a tiny lock-free histogram with fixed
|
||
// bucket boundaries for approval-pending-age. Atomic counters per
|
||
// bucket + sum stored as uint64-bits-of-float64 atomic.
|
||
type approvalDurationHistogram struct {
|
||
bounds []float64
|
||
buckets []*atomic.Uint64 // len = len(bounds) + 1; last is +Inf
|
||
sumBits *atomic.Uint64 // float64 bits stored atomically
|
||
count *atomic.Uint64
|
||
}
|
||
|
||
func newApprovalDurationHistogram() *approvalDurationHistogram {
|
||
bounds := []float64{60, 300, 1800, 3600, 21600, 86400}
|
||
buckets := make([]*atomic.Uint64, len(bounds)+1)
|
||
for i := range buckets {
|
||
buckets[i] = &atomic.Uint64{}
|
||
}
|
||
return &approvalDurationHistogram{
|
||
bounds: bounds,
|
||
buckets: buckets,
|
||
sumBits: &atomic.Uint64{},
|
||
count: &atomic.Uint64{},
|
||
}
|
||
}
|
||
|
||
func (h *approvalDurationHistogram) observe(seconds float64) {
|
||
if h == nil {
|
||
return
|
||
}
|
||
// Find the first bucket whose bound is >= seconds.
|
||
idx := len(h.bounds) // default to +Inf bucket
|
||
for i, b := range h.bounds {
|
||
if seconds <= b {
|
||
idx = i
|
||
break
|
||
}
|
||
}
|
||
h.buckets[idx].Add(1)
|
||
h.count.Add(1)
|
||
// Atomic float64 add via CAS loop.
|
||
for {
|
||
oldBits := h.sumBits.Load()
|
||
old := math.Float64frombits(oldBits)
|
||
newBits := math.Float64bits(old + seconds)
|
||
if h.sumBits.CompareAndSwap(oldBits, newBits) {
|
||
return
|
||
}
|
||
}
|
||
}
|
||
|
||
func (h *approvalDurationHistogram) snapshot() ApprovalPendingAgeSnapshot {
|
||
if h == nil {
|
||
return ApprovalPendingAgeSnapshot{}
|
||
}
|
||
counts := make([]uint64, len(h.buckets))
|
||
cumulative := uint64(0)
|
||
for i, b := range h.buckets {
|
||
cumulative += b.Load()
|
||
counts[i] = cumulative
|
||
}
|
||
return ApprovalPendingAgeSnapshot{
|
||
BucketBounds: append([]float64(nil), h.bounds...),
|
||
BucketCounts: counts,
|
||
Sum: math.Float64frombits(h.sumBits.Load()),
|
||
Count: h.count.Load(),
|
||
}
|
||
}
|