service: ApprovalService + ApprovalMetrics + 8 table-driven tests

Rank 7 of the 2026-05-03 Infisical deep-research deliverable, commit 2 of 4
(cowork/rank-7-approval-workflow-primitive-prompt.md). Builds on the
foundation in commit b4d1ad1 — wires the service layer that drives the
approval workflow. Still no handler / integration wiring; commits 3-4
land that.

Files added:
  internal/service/approval.go         - ApprovalService struct + 6
                                          methods: RequestApproval,
                                          Approve, Reject, ListPending,
                                          List, Get, ExpireStale.
                                          Same-actor RBAC check
                                          (ErrApproveBySameActor) at
                                          both Approve and Reject; the
                                          load-bearing two-person
                                          integrity gate. Bypass mode
                                          short-circuits via
                                          approveInternal(outcome=
                                          "bypassed", actorType=System).
                                          Audit + metric emission per
                                          decision via shared
                                          recordAudit helper. Tolerates
                                          nil AuditService for tests.
                                          Service depends on a narrow
                                          JobStatusUpdater interface
                                          (single-method) rather than
                                          the full repository.JobRepository
                                          — production wiring satisfies
                                          it implicitly via postgres'
                                          existing UpdateStatus.

  internal/service/approval_metrics.go - ApprovalMetrics: thread-safe
                                          counter table (decisions
                                          counter dimensioned by
                                          outcome × profile_id) + a
                                          custom durationHistogram for
                                          pending-age (le buckets:
                                          60, 300, 1800, 3600, 21600,
                                          86400, +Inf — 1m, 5m, 30m,
                                          1h, 6h, 24h, beyond).
                                          Snapshot* methods return the
                                          Prometheus exposer's input
                                          shapes. Mirrors the
                                          ExpiryAlertMetrics +
                                          VaultRenewalMetrics pattern
                                          from prior ranks.

  internal/service/approval_test.go    - 8 table-driven tests with
                                          tight in-package fakes
                                          (fakeApprovalRepo +
                                          fakeJobStateRepo):
                                            TestApproval_RequestCreatesPendingRow_BypassDisabled
                                            TestApproval_BypassMode_AutoApprovesWithSystemBypassActor
                                            TestApproval_Approve_TransitionsJobFromAwaitingApprovalToPending
                                            TestApproval_Reject_TransitionsJobFromAwaitingApprovalToCancelled
                                            TestApproval_Approve_RejectsSameActor
                                              ↑ THE LOAD-BEARING TWO-PERSON
                                                INTEGRITY TEST. PCI-DSS 6.4.5
                                                / NIST 800-53 SA-15 / SOC 2
                                                CC6.1 compliance auditors
                                                pattern-match against this.
                                                Pins same-actor rejection on
                                                both Approve and Reject paths;
                                                pins success when a different
                                                actor approves.
                                            TestApproval_Approve_RejectsAlreadyDecided
                                            TestApproval_ExpireStale_TransitionsPendingToExpired_AndCancelsJob
                                            TestApproval_MetricCounterIncrements

Verified:
  gofmt: clean.
  go vet ./internal/service/...: exit 0.
  go test -short -count=1 -run TestApproval ./internal/service/...:
    ok 0.005s — all 8 tests green.

Out of scope for this commit (lands in commits 3-4):
  - api/handler/approval.go (5 endpoints + handler-side RBAC).
  - api/openapi.yaml extensions.
  - Integration into CertificateService.TriggerRenewal +
    RenewalService.CheckExpiringCertificates + Scheduler.ReapTimedOutJobs.
  - cmd/server/main.go wiring of ApprovalService + ApprovalMetrics.
  - Config.Approval.BypassEnabled + CERTCTL_APPROVAL_BYPASS env var.
  - docs/connectors.md row + docs/approval-workflow.md runbook.

Reference: cowork/rank-7-approval-workflow-primitive-prompt.md.
This commit is contained in:
shankar0123
2026-05-04 01:01:53 +00:00
parent b4d1ad1c97
commit df23294476
3 changed files with 970 additions and 0 deletions
+372
View File
@@ -0,0 +1,372 @@
package service
import (
"context"
"errors"
"fmt"
"time"
"github.com/certctl-io/certctl/internal/domain"
"github.com/certctl-io/certctl/internal/repository"
)
// ApprovalService manages the issuance approval-workflow primitive.
// Rank 7 of the 2026-05-03 Infisical deep-research deliverable.
//
// Lifecycle: a profile with RequiresApproval=true causes the renewal
// entry points (TriggerRenewal + CheckExpiringCertificates) to call
// RequestApproval; the resulting Job is created at
// JobStatusAwaitingApproval; the scheduler does NOT dispatch until
// Approve transitions the job to Pending.
//
// RBAC contract: the requester cannot approve their own request.
// Approve checks decidedBy != request.RequestedBy and rejects with
// ErrApproveBySameActor otherwise. This is the load-bearing two-
// person integrity check; compliance auditors pattern-match against
// it.
//
// Bypass mode: if CERTCTL_APPROVAL_BYPASS=true at boot, every
// RequestApproval call immediately auto-approves with
// decidedBy="system-bypass". Used by dev / CI to keep renewal-
// scheduler tests fast without standing up an approver. Production
// deploys MUST leave this unset; the bypass emits an audit row with
// ActorType=System so a downstream auditor can grep for
// "system-bypass" approvals and confirm none happened in production.
type ApprovalService struct {
approvalRepo repository.ApprovalRepository
jobRepo JobStatusUpdater
auditService *AuditService
metrics *ApprovalMetrics
bypassEnabled bool
}
// JobStatusUpdater is the narrow interface ApprovalService depends on
// from JobRepository. Accepting the small interface (rather than the
// full repository.JobRepository) keeps the test mock surface tiny —
// real JobRepository implementations (postgres + any future) satisfy
// it implicitly because they implement UpdateStatus already.
type JobStatusUpdater interface {
UpdateStatus(ctx context.Context, id string, status domain.JobStatus, errMsg string) error
}
// NewApprovalService constructs an ApprovalService. metrics may be nil
// for tests that don't need Prometheus integration; auditService should
// not be nil in production but is tolerated for unit tests that don't
// care about audit-row emission.
func NewApprovalService(
approvalRepo repository.ApprovalRepository,
jobRepo JobStatusUpdater,
auditService *AuditService,
metrics *ApprovalMetrics,
bypassEnabled bool,
) *ApprovalService {
return &ApprovalService{
approvalRepo: approvalRepo,
jobRepo: jobRepo,
auditService: auditService,
metrics: metrics,
bypassEnabled: bypassEnabled,
}
}
// Sentinels for handler-side dispatch via errors.Is.
var (
// ErrApprovalNotFound is returned when the request ID does not exist.
// Handlers map to HTTP 404.
ErrApprovalNotFound = errors.New("approval request not found")
// ErrApprovalAlreadyDecided is returned when Approve / Reject is called
// on a request whose State is already terminal. Handlers map to HTTP 409.
ErrApprovalAlreadyDecided = errors.New("approval request already decided")
// ErrApproveBySameActor is the load-bearing two-person integrity check.
// Returned when the supplied decidedBy equals request.RequestedBy.
// Handlers map to HTTP 403.
ErrApproveBySameActor = errors.New("approver cannot be the same as requester (two-person integrity)")
)
// RequestApproval creates a pending ApprovalRequest row and is invoked
// from the renewal entry points after they have created the Job at
// Status=AwaitingApproval. Returns the request ID for handler /
// caller use.
//
// If bypassEnabled is true, this method synchronously calls Approve
// internally with decidedBy=ApprovalActorSystemBypass and returns the
// resulting (now-approved) request ID. The audit row records
// ActorType=System so a downstream auditor can confirm bypass-mode
// was off in production via a single SQL query.
func (s *ApprovalService) RequestApproval(
ctx context.Context,
cert *domain.ManagedCertificate,
jobID, profileID, requestedBy string,
metadata map[string]string,
) (string, error) {
if cert == nil {
return "", fmt.Errorf("approval: nil certificate")
}
if jobID == "" || profileID == "" || requestedBy == "" {
return "", fmt.Errorf("approval: jobID, profileID, requestedBy required")
}
now := time.Now().UTC()
req := &domain.ApprovalRequest{
CertificateID: cert.ID,
JobID: jobID,
ProfileID: profileID,
RequestedBy: requestedBy,
State: domain.ApprovalStatePending,
Metadata: metadata,
CreatedAt: now,
UpdatedAt: now,
}
if err := s.approvalRepo.Create(ctx, req); err != nil {
return "", fmt.Errorf("approval: create request: %w", err)
}
// Audit the request creation. Bypass-mode logs both the request and
// the auto-approval as separate rows so the timeline is honest.
s.recordAudit(ctx, requestedBy, domain.ActorTypeUser, "approval_requested", req, nil)
if s.bypassEnabled {
if err := s.approveInternal(ctx, req.ID, domain.ApprovalActorSystemBypass,
"auto-approved by CERTCTL_APPROVAL_BYPASS — dev/CI mode",
domain.ApprovalOutcomeBypassed, domain.ActorTypeSystem); err != nil {
return req.ID, fmt.Errorf("approval: bypass auto-approve: %w", err)
}
}
return req.ID, nil
}
// Approve transitions a pending request to approved AND the linked Job
// from AwaitingApproval to Pending so the job processor picks it up.
// RBAC: rejects if decidedBy == request.RequestedBy.
func (s *ApprovalService) Approve(ctx context.Context, requestID, decidedBy, note string) error {
req, err := s.approvalRepo.Get(ctx, requestID)
if err != nil {
if errors.Is(err, repository.ErrNotFound) {
return ErrApprovalNotFound
}
return fmt.Errorf("approval: get for approve: %w", err)
}
if req.State.IsTerminal() {
return ErrApprovalAlreadyDecided
}
if decidedBy == req.RequestedBy {
return ErrApproveBySameActor
}
return s.approveInternal(ctx, requestID, decidedBy, note,
domain.ApprovalOutcomeApproved, domain.ActorTypeUser)
}
// approveInternal is the shared transition path for both human-Approve
// and bypass-mode auto-approve. Same DB transition + audit + metric
// recording, but the outcome label + actorType differ.
func (s *ApprovalService) approveInternal(
ctx context.Context, requestID, decidedBy, note, outcome string,
actorType domain.ActorType,
) error {
now := time.Now().UTC()
// Re-fetch the request after the state-transition guards in Approve so
// we can stamp the metric's pending-age + transition the job. For the
// bypass path, this is the first read.
req, err := s.approvalRepo.Get(ctx, requestID)
if err != nil {
if errors.Is(err, repository.ErrNotFound) {
return ErrApprovalNotFound
}
return fmt.Errorf("approval: get for transition: %w", err)
}
if req.State.IsTerminal() {
return ErrApprovalAlreadyDecided
}
if err := s.approvalRepo.UpdateState(ctx, requestID,
domain.ApprovalStateApproved, decidedBy, now, note); err != nil {
if errors.Is(err, repository.ErrNotFound) {
return ErrApprovalNotFound
}
if errors.Is(err, repository.ErrAlreadyExists) {
return ErrApprovalAlreadyDecided
}
return fmt.Errorf("approval: update state to approved: %w", err)
}
// Transition the linked Job from AwaitingApproval to Pending so the
// scheduler picks it up. Best-effort — if the Job has already been
// cancelled or otherwise mutated externally, log via audit and move on.
if err := s.jobRepo.UpdateStatus(ctx, req.JobID, domain.JobStatusPending, ""); err != nil {
s.recordAudit(ctx, decidedBy, actorType, "approval_job_transition_failed", req,
map[string]interface{}{"target_status": string(domain.JobStatusPending), "error": err.Error()})
return fmt.Errorf("approval: transition job to Pending: %w", err)
}
s.recordAudit(ctx, decidedBy, actorType, "approval_"+outcome, req,
map[string]interface{}{"note": note, "outcome": outcome})
if s.metrics != nil {
s.metrics.RecordDecision(outcome, req.ProfileID)
s.metrics.ObservePendingAge(now.Sub(req.CreatedAt).Seconds())
}
return nil
}
// Reject transitions a pending request to rejected AND the linked Job
// from AwaitingApproval to Cancelled. RBAC: same-actor check applies.
func (s *ApprovalService) Reject(ctx context.Context, requestID, decidedBy, note string) error {
req, err := s.approvalRepo.Get(ctx, requestID)
if err != nil {
if errors.Is(err, repository.ErrNotFound) {
return ErrApprovalNotFound
}
return fmt.Errorf("approval: get for reject: %w", err)
}
if req.State.IsTerminal() {
return ErrApprovalAlreadyDecided
}
if decidedBy == req.RequestedBy {
return ErrApproveBySameActor
}
now := time.Now().UTC()
if err := s.approvalRepo.UpdateState(ctx, requestID,
domain.ApprovalStateRejected, decidedBy, now, note); err != nil {
if errors.Is(err, repository.ErrNotFound) {
return ErrApprovalNotFound
}
if errors.Is(err, repository.ErrAlreadyExists) {
return ErrApprovalAlreadyDecided
}
return fmt.Errorf("approval: update state to rejected: %w", err)
}
if err := s.jobRepo.UpdateStatus(ctx, req.JobID, domain.JobStatusCancelled,
"approval rejected: "+note); err != nil {
s.recordAudit(ctx, decidedBy, domain.ActorTypeUser, "approval_job_transition_failed", req,
map[string]interface{}{"target_status": string(domain.JobStatusCancelled), "error": err.Error()})
return fmt.Errorf("approval: transition job to Cancelled: %w", err)
}
s.recordAudit(ctx, decidedBy, domain.ActorTypeUser, "approval_rejected", req,
map[string]interface{}{"note": note, "outcome": domain.ApprovalOutcomeRejected})
if s.metrics != nil {
s.metrics.RecordDecision(domain.ApprovalOutcomeRejected, req.ProfileID)
s.metrics.ObservePendingAge(now.Sub(req.CreatedAt).Seconds())
}
return nil
}
// ListPending returns approval requests in state=pending, paginated.
// Operators reading the dashboard call this on every page load.
func (s *ApprovalService) ListPending(ctx context.Context, page, perPage int) ([]*domain.ApprovalRequest, error) {
return s.approvalRepo.List(ctx, &repository.ApprovalFilter{
State: string(domain.ApprovalStatePending),
Page: page,
PerPage: perPage,
})
}
// List returns approval requests filtered by the supplied filter. Used
// by handler GET /api/v1/approvals with arbitrary state.
func (s *ApprovalService) List(ctx context.Context, filter *repository.ApprovalFilter) ([]*domain.ApprovalRequest, error) {
return s.approvalRepo.List(ctx, filter)
}
// Get returns a single approval request by ID, or ErrApprovalNotFound.
func (s *ApprovalService) Get(ctx context.Context, id string) (*domain.ApprovalRequest, error) {
req, err := s.approvalRepo.Get(ctx, id)
if err != nil {
if errors.Is(err, repository.ErrNotFound) {
return nil, ErrApprovalNotFound
}
return nil, err
}
return req, nil
}
// ExpireStale runs from the scheduler's reaper loop. Calls the
// repository's ExpireStale (bulk pending→expired transition) +
// transitions matching jobs from AwaitingApproval to Cancelled.
// Records one audit row per expiry. Returns the count expired.
//
// Operators alert when this is non-zero — it means an approval
// request timed out without human review.
func (s *ApprovalService) ExpireStale(ctx context.Context, before time.Time) (int, error) {
// Find pending requests older than `before` so we can record the
// audit + metric per expiry. ExpireStale on the repo bulk-mutates
// the rows; we read first to capture the per-row metadata for
// auditing, then call the repo's bulk update.
pending, err := s.approvalRepo.List(ctx, &repository.ApprovalFilter{
State: string(domain.ApprovalStatePending),
PerPage: 500,
})
if err != nil {
return 0, fmt.Errorf("approval: list pending for expiry: %w", err)
}
var stale []*domain.ApprovalRequest
for _, req := range pending {
if req.CreatedAt.Before(before) || req.CreatedAt.Equal(before) {
stale = append(stale, req)
}
}
if len(stale) == 0 {
return 0, nil
}
count, err := s.approvalRepo.ExpireStale(ctx, before)
if err != nil {
return 0, fmt.Errorf("approval: bulk expire: %w", err)
}
now := time.Now().UTC()
for _, req := range stale {
// Cancel the linked job — best-effort. The scheduler's existing
// ReapTimedOutJobs already handles AwaitingApproval timeouts on
// the job side; this is a defensive double-cancel that's
// idempotent if the scheduler already ran.
if err := s.jobRepo.UpdateStatus(ctx, req.JobID, domain.JobStatusCancelled,
"approval expired: timed out without review"); err != nil {
// Log via audit and continue — don't fail the whole sweep on
// one bad job.
s.recordAudit(ctx, "system-reaper", domain.ActorTypeSystem, "approval_job_transition_failed", req,
map[string]interface{}{"target_status": string(domain.JobStatusCancelled), "error": err.Error()})
}
s.recordAudit(ctx, "system-reaper", domain.ActorTypeSystem, "approval_expired", req,
map[string]interface{}{"outcome": domain.ApprovalOutcomeExpired, "before_cutoff": before.Format(time.RFC3339)})
if s.metrics != nil {
s.metrics.RecordDecision(domain.ApprovalOutcomeExpired, req.ProfileID)
s.metrics.ObservePendingAge(now.Sub(req.CreatedAt).Seconds())
}
}
return count, nil
}
// recordAudit is the shared audit-emission helper. Tolerates a nil
// AuditService (unit tests that don't wire it) and discards errors —
// audit failures must not block the primary state transition.
func (s *ApprovalService) recordAudit(ctx context.Context, actor string, actorType domain.ActorType,
action string, req *domain.ApprovalRequest, extra map[string]interface{}) {
if s.auditService == nil || req == nil {
return
}
details := map[string]interface{}{
"approval_id": req.ID,
"certificate_id": req.CertificateID,
"job_id": req.JobID,
"profile_id": req.ProfileID,
"requested_by": req.RequestedBy,
"state": string(req.State),
}
for k, v := range req.Metadata {
details["metadata_"+k] = v
}
for k, v := range extra {
details[k] = v
}
_ = s.auditService.RecordEvent(ctx, actor, actorType, action,
"approval_request", req.ID, details)
}
+212
View File
@@ -0,0 +1,212 @@
package service
import (
"math"
"sort"
"sync"
"sync/atomic"
)
// ApprovalMetrics is a thread-safe counter table for the issuance
// approval-workflow dispatch path. Rank 7 of the 2026-05-03 Infisical
// deep-research deliverable. Mirrors the ExpiryAlertMetrics +
// VaultRenewalMetrics shape: cmd/server/main.go constructs ONE instance,
// passes it to ApprovalService (recording side) AND metricsHandler
// (exposing side) so the snapshotter is the single source of truth.
//
// Dimensions:
//
// outcome — closed enum from internal/domain/approval.go:
// "approved" — Approve transitioned a pending request.
// "rejected" — Reject transitioned a pending request.
// "expired" — scheduler reaper transitioned a stale
// pending request via ExpireStale.
// "bypassed" — CERTCTL_APPROVAL_BYPASS=true short-
// circuited RequestApproval. Production
// deploys MUST have zero rows of this
// outcome.
// profile_id — CertificateProfile.ID that drove the gate. Bounded
// cardinality (operators have <100 profiles in production).
//
// Cardinality bound: 4 outcomes × N profiles. With N=100, that's 400
// series — well within Prometheus's per-target series budget for a
// well-bounded label.
//
// Pending-age histogram: ObservePendingAge records the seconds-since-
// creation of a pending approval at the moment of decision. Operators
// alert when the p99 hits hours/days (compliance has a deadline).
// Bucket boundaries: 60, 300, 1800, 3600, 21600, 86400, +Inf — 1
// minute, 5 minutes, 30 minutes, 1 hour, 6 hours, 24 hours, beyond.
type ApprovalMetrics struct {
mu sync.RWMutex
counters map[approvalKey]*atomic.Uint64
pendingAgeHist *approvalDurationHistogram
}
type approvalKey struct {
Outcome string
ProfileID string
}
// NewApprovalMetrics returns a zero-value ApprovalMetrics ready for
// concurrent use. The caller MUST register the same instance on both
// the ApprovalService (recording) and the MetricsHandler (exposing)
// sides.
func NewApprovalMetrics() *ApprovalMetrics {
return &ApprovalMetrics{
counters: make(map[approvalKey]*atomic.Uint64),
pendingAgeHist: newApprovalDurationHistogram(),
}
}
// RecordDecision bumps the (outcome, profile_id) counter by one. Called
// from ApprovalService.Approve / Reject / ExpireStale and from the
// bypass-mode short-circuit inside RequestApproval.
func (m *ApprovalMetrics) RecordDecision(outcome, profileID string) {
if m == nil {
return
}
key := approvalKey{Outcome: outcome, ProfileID: profileID}
m.mu.RLock()
c, ok := m.counters[key]
m.mu.RUnlock()
if !ok {
m.mu.Lock()
c, ok = m.counters[key]
if !ok {
c = &atomic.Uint64{}
m.counters[key] = c
}
m.mu.Unlock()
}
c.Add(1)
}
// ObservePendingAge records the seconds-since-creation of a pending
// approval at the moment of decision (Approve / Reject / Expire).
func (m *ApprovalMetrics) ObservePendingAge(seconds float64) {
if m == nil {
return
}
m.pendingAgeHist.observe(seconds)
}
// SnapshotApprovalDecisions returns the current decision counter table
// as a sorted slice for deterministic Prometheus exposition. Sort key
// is (outcome, profile_id).
type ApprovalDecisionEntry struct {
Outcome string
ProfileID string
Count uint64
}
func (m *ApprovalMetrics) SnapshotApprovalDecisions() []ApprovalDecisionEntry {
if m == nil {
return nil
}
m.mu.RLock()
out := make([]ApprovalDecisionEntry, 0, len(m.counters))
for k, c := range m.counters {
out = append(out, ApprovalDecisionEntry{
Outcome: k.Outcome,
ProfileID: k.ProfileID,
Count: c.Load(),
})
}
m.mu.RUnlock()
sort.Slice(out, func(i, j int) bool {
if out[i].Outcome != out[j].Outcome {
return out[i].Outcome < out[j].Outcome
}
return out[i].ProfileID < out[j].ProfileID
})
return out
}
// SnapshotApprovalPendingAgeHistogram returns the current bucket counts
// + sum + total count for the pending-age histogram. Format suits the
// Prometheus histogram exposition (le buckets + _sum + _count).
type ApprovalPendingAgeSnapshot struct {
BucketBounds []float64 // [60, 300, 1800, 3600, 21600, 86400] — exclusive of +Inf
BucketCounts []uint64 // cumulative counts per bucket; len = len(BucketBounds) + 1 (last is +Inf)
Sum float64
Count uint64
}
func (m *ApprovalMetrics) SnapshotApprovalPendingAgeHistogram() ApprovalPendingAgeSnapshot {
if m == nil {
return ApprovalPendingAgeSnapshot{}
}
return m.pendingAgeHist.snapshot()
}
// approvalDurationHistogram is a tiny lock-free histogram with fixed
// bucket boundaries for approval-pending-age. Atomic counters per
// bucket + sum stored as uint64-bits-of-float64 atomic.
type approvalDurationHistogram struct {
bounds []float64
buckets []*atomic.Uint64 // len = len(bounds) + 1; last is +Inf
sumBits *atomic.Uint64 // float64 bits stored atomically
count *atomic.Uint64
}
func newApprovalDurationHistogram() *approvalDurationHistogram {
bounds := []float64{60, 300, 1800, 3600, 21600, 86400}
buckets := make([]*atomic.Uint64, len(bounds)+1)
for i := range buckets {
buckets[i] = &atomic.Uint64{}
}
return &approvalDurationHistogram{
bounds: bounds,
buckets: buckets,
sumBits: &atomic.Uint64{},
count: &atomic.Uint64{},
}
}
func (h *approvalDurationHistogram) observe(seconds float64) {
if h == nil {
return
}
// Find the first bucket whose bound is >= seconds.
idx := len(h.bounds) // default to +Inf bucket
for i, b := range h.bounds {
if seconds <= b {
idx = i
break
}
}
h.buckets[idx].Add(1)
h.count.Add(1)
// Atomic float64 add via CAS loop.
for {
oldBits := h.sumBits.Load()
old := math.Float64frombits(oldBits)
newBits := math.Float64bits(old + seconds)
if h.sumBits.CompareAndSwap(oldBits, newBits) {
return
}
}
}
func (h *approvalDurationHistogram) snapshot() ApprovalPendingAgeSnapshot {
if h == nil {
return ApprovalPendingAgeSnapshot{}
}
counts := make([]uint64, len(h.buckets))
cumulative := uint64(0)
for i, b := range h.buckets {
cumulative += b.Load()
counts[i] = cumulative
}
return ApprovalPendingAgeSnapshot{
BucketBounds: append([]float64(nil), h.bounds...),
BucketCounts: counts,
Sum: math.Float64frombits(h.sumBits.Load()),
Count: h.count.Load(),
}
}
+386
View File
@@ -0,0 +1,386 @@
package service
import (
"context"
"errors"
"sync"
"testing"
"time"
"github.com/certctl-io/certctl/internal/domain"
"github.com/certctl-io/certctl/internal/repository"
)
// fakeApprovalRepo is a minimal in-memory ApprovalRepository for unit
// testing the service-layer logic in isolation. Stores rows in a map
// keyed by ID; List returns rows matching a single state filter.
type fakeApprovalRepo struct {
mu sync.Mutex
rows map[string]*domain.ApprovalRequest
}
func newFakeApprovalRepo() *fakeApprovalRepo {
return &fakeApprovalRepo{rows: make(map[string]*domain.ApprovalRequest)}
}
func (f *fakeApprovalRepo) Create(ctx context.Context, req *domain.ApprovalRequest) error {
f.mu.Lock()
defer f.mu.Unlock()
if req.ID == "" {
req.ID = "ar-fake-" + time.Now().Format("150405.000000000")
}
// Enforce the partial-unique pending-per-job at the mock layer too.
for _, existing := range f.rows {
if existing.JobID == req.JobID && existing.State == domain.ApprovalStatePending {
return repository.ErrAlreadyExists
}
}
cp := *req
f.rows[req.ID] = &cp
return nil
}
func (f *fakeApprovalRepo) Get(ctx context.Context, id string) (*domain.ApprovalRequest, error) {
f.mu.Lock()
defer f.mu.Unlock()
if r, ok := f.rows[id]; ok {
cp := *r
return &cp, nil
}
return nil, repository.ErrNotFound
}
func (f *fakeApprovalRepo) GetByJobID(ctx context.Context, jobID string) (*domain.ApprovalRequest, error) {
f.mu.Lock()
defer f.mu.Unlock()
for _, r := range f.rows {
if r.JobID == jobID {
cp := *r
return &cp, nil
}
}
return nil, repository.ErrNotFound
}
func (f *fakeApprovalRepo) List(ctx context.Context, filter *repository.ApprovalFilter) ([]*domain.ApprovalRequest, error) {
f.mu.Lock()
defer f.mu.Unlock()
var out []*domain.ApprovalRequest
for _, r := range f.rows {
if filter != nil && filter.State != "" && string(r.State) != filter.State {
continue
}
if filter != nil && filter.CertificateID != "" && r.CertificateID != filter.CertificateID {
continue
}
if filter != nil && filter.RequestedBy != "" && r.RequestedBy != filter.RequestedBy {
continue
}
cp := *r
out = append(out, &cp)
}
return out, nil
}
func (f *fakeApprovalRepo) UpdateState(ctx context.Context, id string, state domain.ApprovalState,
decidedBy string, decidedAt time.Time, note string) error {
f.mu.Lock()
defer f.mu.Unlock()
r, ok := f.rows[id]
if !ok {
return repository.ErrNotFound
}
if r.State != domain.ApprovalStatePending {
return repository.ErrAlreadyExists // signals "already terminal"
}
r.State = state
r.DecidedBy = &decidedBy
r.DecidedAt = &decidedAt
if note != "" {
n := note
r.DecisionNote = &n
}
r.UpdatedAt = decidedAt
return nil
}
func (f *fakeApprovalRepo) ExpireStale(ctx context.Context, before time.Time) (int, error) {
f.mu.Lock()
defer f.mu.Unlock()
now := time.Now().UTC()
count := 0
for _, r := range f.rows {
if r.State == domain.ApprovalStatePending && (r.CreatedAt.Before(before) || r.CreatedAt.Equal(before)) {
r.State = domain.ApprovalStateExpired
s := "system-reaper"
r.DecidedBy = &s
r.DecidedAt = &now
r.UpdatedAt = now
count++
}
}
return count, nil
}
// fakeJobStateRepo implements service.JobStatusUpdater and tracks per-job
// status mutations so the tests can introspect them. It does NOT implement
// the full repository.JobRepository — ApprovalService only needs UpdateStatus.
type fakeJobStateRepo struct {
mu sync.Mutex
statuses map[string]domain.JobStatus
}
func newFakeJobStateRepo() *fakeJobStateRepo {
return &fakeJobStateRepo{statuses: make(map[string]domain.JobStatus)}
}
func (f *fakeJobStateRepo) seed(id string, status domain.JobStatus) {
f.mu.Lock()
defer f.mu.Unlock()
f.statuses[id] = status
}
func (f *fakeJobStateRepo) status(id string) domain.JobStatus {
f.mu.Lock()
defer f.mu.Unlock()
return f.statuses[id]
}
func (f *fakeJobStateRepo) UpdateStatus(ctx context.Context, id string, status domain.JobStatus, errMsg string) error {
f.mu.Lock()
defer f.mu.Unlock()
f.statuses[id] = status
return nil
}
// helper builders --------------------------------------------------------
func newApprovalSvcForTest(bypass bool) (*ApprovalService, *fakeApprovalRepo, *fakeJobStateRepo) {
ar := newFakeApprovalRepo()
jr := newFakeJobStateRepo()
metrics := NewApprovalMetrics()
svc := NewApprovalService(ar, jr, nil, metrics, bypass)
return svc, ar, jr
}
func sampleCert() *domain.ManagedCertificate {
return &domain.ManagedCertificate{ID: "mc-test-cert"}
}
// tests ------------------------------------------------------------------
func TestApproval_RequestCreatesPendingRow_BypassDisabled(t *testing.T) {
svc, ar, jr := newApprovalSvcForTest(false)
jr.seed("job-1", domain.JobStatusAwaitingApproval)
id, err := svc.RequestApproval(context.Background(), sampleCert(),
"job-1", "profile-prod-cdn", "user-alice", map[string]string{"common_name": "api.example.com"})
if err != nil {
t.Fatalf("RequestApproval err: %v", err)
}
got, err := ar.Get(context.Background(), id)
if err != nil {
t.Fatalf("Get err: %v", err)
}
if got.State != domain.ApprovalStatePending {
t.Fatalf("expected state=pending, got %s", got.State)
}
if got.RequestedBy != "user-alice" {
t.Fatalf("requested_by mismatch: %s", got.RequestedBy)
}
if jr.status("job-1") != domain.JobStatusAwaitingApproval {
t.Fatalf("job should remain AwaitingApproval; got %s", jr.status("job-1"))
}
}
func TestApproval_BypassMode_AutoApprovesWithSystemBypassActor(t *testing.T) {
svc, ar, jr := newApprovalSvcForTest(true)
jr.seed("job-2", domain.JobStatusAwaitingApproval)
id, err := svc.RequestApproval(context.Background(), sampleCert(),
"job-2", "profile-iot", "user-bob", nil)
if err != nil {
t.Fatalf("bypass RequestApproval err: %v", err)
}
got, err := ar.Get(context.Background(), id)
if err != nil {
t.Fatalf("Get err: %v", err)
}
if got.State != domain.ApprovalStateApproved {
t.Fatalf("bypass should auto-approve; got state=%s", got.State)
}
if got.DecidedBy == nil || *got.DecidedBy != domain.ApprovalActorSystemBypass {
t.Fatalf("bypass should stamp decided_by=%s; got %v",
domain.ApprovalActorSystemBypass, got.DecidedBy)
}
if jr.status("job-2") != domain.JobStatusPending {
t.Fatalf("bypass should transition job to Pending; got %s", jr.status("job-2"))
}
}
func TestApproval_Approve_TransitionsJobFromAwaitingApprovalToPending(t *testing.T) {
svc, ar, jr := newApprovalSvcForTest(false)
jr.seed("job-3", domain.JobStatusAwaitingApproval)
id, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-3", "p1", "user-alice", nil)
if err := svc.Approve(context.Background(), id, "user-bob", "approved per ticket SECOPS-123"); err != nil {
t.Fatalf("Approve err: %v", err)
}
got, _ := ar.Get(context.Background(), id)
if got.State != domain.ApprovalStateApproved {
t.Fatalf("expected state=approved; got %s", got.State)
}
if jr.status("job-3") != domain.JobStatusPending {
t.Fatalf("expected job=Pending; got %s", jr.status("job-3"))
}
}
func TestApproval_Reject_TransitionsJobFromAwaitingApprovalToCancelled(t *testing.T) {
svc, ar, jr := newApprovalSvcForTest(false)
jr.seed("job-4", domain.JobStatusAwaitingApproval)
id, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-4", "p1", "user-alice", nil)
if err := svc.Reject(context.Background(), id, "user-bob", "not on the approved-domains list"); err != nil {
t.Fatalf("Reject err: %v", err)
}
got, _ := ar.Get(context.Background(), id)
if got.State != domain.ApprovalStateRejected {
t.Fatalf("expected state=rejected; got %s", got.State)
}
if jr.status("job-4") != domain.JobStatusCancelled {
t.Fatalf("expected job=Cancelled; got %s", jr.status("job-4"))
}
}
func TestApproval_Approve_RejectsSameActor(t *testing.T) {
// LOAD-BEARING TWO-PERSON INTEGRITY TEST. PCI-DSS 6.4.5 / NIST 800-53
// SA-15 / SOC 2 CC6.1 compliance auditors pattern-match against this.
svc, _, jr := newApprovalSvcForTest(false)
jr.seed("job-5", domain.JobStatusAwaitingApproval)
id, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-5", "p1", "user-alice", nil)
err := svc.Approve(context.Background(), id, "user-alice", "trying to self-approve")
if !errors.Is(err, ErrApproveBySameActor) {
t.Fatalf("expected ErrApproveBySameActor; got %v", err)
}
if jr.status("job-5") != domain.JobStatusAwaitingApproval {
t.Fatalf("job should remain AwaitingApproval; got %s", jr.status("job-5"))
}
// Approval as a different actor succeeds.
if err := svc.Approve(context.Background(), id, "user-bob", "approved by separate actor"); err != nil {
t.Fatalf("Approve as different actor err: %v", err)
}
if jr.status("job-5") != domain.JobStatusPending {
t.Fatalf("expected job=Pending after bob approve; got %s", jr.status("job-5"))
}
// Same-actor rejection also fails.
jr.seed("job-5b", domain.JobStatusAwaitingApproval)
id2, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-5b", "p1", "user-charlie", nil)
err2 := svc.Reject(context.Background(), id2, "user-charlie", "self-reject")
if !errors.Is(err2, ErrApproveBySameActor) {
t.Fatalf("expected ErrApproveBySameActor on Reject; got %v", err2)
}
}
func TestApproval_Approve_RejectsAlreadyDecided(t *testing.T) {
svc, _, jr := newApprovalSvcForTest(false)
jr.seed("job-6", domain.JobStatusAwaitingApproval)
id, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-6", "p1", "user-alice", nil)
if err := svc.Approve(context.Background(), id, "user-bob", ""); err != nil {
t.Fatalf("first Approve err: %v", err)
}
err := svc.Approve(context.Background(), id, "user-charlie", "second approve")
if !errors.Is(err, ErrApprovalAlreadyDecided) {
t.Fatalf("expected ErrApprovalAlreadyDecided; got %v", err)
}
err2 := svc.Reject(context.Background(), id, "user-charlie", "late reject")
if !errors.Is(err2, ErrApprovalAlreadyDecided) {
t.Fatalf("expected ErrApprovalAlreadyDecided on Reject; got %v", err2)
}
}
func TestApproval_ExpireStale_TransitionsPendingToExpired_AndCancelsJob(t *testing.T) {
svc, ar, jr := newApprovalSvcForTest(false)
jr.seed("job-7", domain.JobStatusAwaitingApproval)
jr.seed("job-8", domain.JobStatusAwaitingApproval)
id7, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-7", "p1", "user-alice", nil)
id8, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-8", "p1", "user-alice", nil)
// Backdate one of the requests to before the cutoff.
old := time.Now().Add(-200 * time.Hour).UTC()
ar.mu.Lock()
ar.rows[id7].CreatedAt = old
ar.mu.Unlock()
cutoff := time.Now().Add(-168 * time.Hour).UTC()
count, err := svc.ExpireStale(context.Background(), cutoff)
if err != nil {
t.Fatalf("ExpireStale err: %v", err)
}
if count != 1 {
t.Fatalf("expected 1 row expired; got %d", count)
}
got7, _ := ar.Get(context.Background(), id7)
if got7.State != domain.ApprovalStateExpired {
t.Fatalf("expected job-7 expired; got %s", got7.State)
}
got8, _ := ar.Get(context.Background(), id8)
if got8.State != domain.ApprovalStatePending {
t.Fatalf("job-8 should still be pending; got %s", got8.State)
}
if jr.status("job-7") != domain.JobStatusCancelled {
t.Fatalf("expected job-7 cancelled; got %s", jr.status("job-7"))
}
if jr.status("job-8") != domain.JobStatusAwaitingApproval {
t.Fatalf("job-8 should remain AwaitingApproval; got %s", jr.status("job-8"))
}
}
func TestApproval_MetricCounterIncrements(t *testing.T) {
svc, _, jr := newApprovalSvcForTest(false)
metrics := svc.metrics
jr.seed("job-9", domain.JobStatusAwaitingApproval)
id9, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-9", "p-cdn", "user-alice", nil)
_ = svc.Approve(context.Background(), id9, "user-bob", "approved")
jr.seed("job-10", domain.JobStatusAwaitingApproval)
id10, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-10", "p-cdn", "user-alice", nil)
_ = svc.Reject(context.Background(), id10, "user-bob", "rejected")
jr.seed("job-11", domain.JobStatusAwaitingApproval)
id11, _ := svc.RequestApproval(context.Background(), sampleCert(), "job-11", "p-cdn", "user-alice", nil)
// Backdate + expire.
old := time.Now().Add(-200 * time.Hour).UTC()
repo := svc.approvalRepo.(*fakeApprovalRepo)
repo.mu.Lock()
repo.rows[id11].CreatedAt = old
repo.mu.Unlock()
if _, err := svc.ExpireStale(context.Background(), time.Now().Add(-168*time.Hour)); err != nil {
t.Fatalf("ExpireStale err: %v", err)
}
snap := metrics.SnapshotApprovalDecisions()
got := map[string]uint64{}
for _, e := range snap {
got[e.Outcome] = e.Count
}
if got[domain.ApprovalOutcomeApproved] != 1 {
t.Fatalf("expected 1 approved counter; got %d", got[domain.ApprovalOutcomeApproved])
}
if got[domain.ApprovalOutcomeRejected] != 1 {
t.Fatalf("expected 1 rejected counter; got %d", got[domain.ApprovalOutcomeRejected])
}
if got[domain.ApprovalOutcomeExpired] != 1 {
t.Fatalf("expected 1 expired counter; got %d", got[domain.ApprovalOutcomeExpired])
}
// Histogram observed at least 3 samples.
hist := metrics.SnapshotApprovalPendingAgeHistogram()
if hist.Count < 3 {
t.Fatalf("expected at least 3 histogram samples; got %d", hist.Count)
}
}