mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 21:21:40 +00:00
40fd96a416
Production hardening II Phase 2 — closes the per-request live-signing
bottleneck for OCSP. Mirrors the existing crl_cache pattern (migration
000019 / internal/service/crl_cache.go) but per (issuer_id, serial_hex)
instead of per-issuer.
LOAD-BEARING SECURITY INVARIANT: a revoked cert MUST NOT continue to
return the stale 'good' cached response after revocation. The
RevocationSvc.RevokeCertificateWithActor flow now calls
OCSPResponseCacheService.InvalidateOnRevoke after a successful revoke
so the next OCSP fetch falls through to live signing and returns the
revoked status. Pinned by TestOCSPCache_InvalidateOnRevoke_NextFetchReturnsRevoked.
NEW migrations/000024_ocsp_response_cache.{up,down}.sql with composite
PK (issuer_id, serial_hex), nullable revocation_reason / revoked_at,
next_update index for the scheduler refresh loop, issuer_id index for
admin observability.
NEW internal/domain/ocsp_response_cache.go::OCSPResponseCacheEntry +
IsStale helper.
NEW internal/repository/postgres/ocsp_response_cache.go implementing
repository.OCSPResponseCacheRepository (Get / Put / Delete /
CountByIssuer). Interface defined in internal/repository/interfaces.go.
NEW internal/service/ocsp_response_cache.go::OCSPResponseCacheService
with read-through facade + sync.Map singleflight + InvalidateOnRevoke.
On cache miss, calls caOperationsSvc.LiveSignOCSPResponse(nil) — the
NEW bypass-cache entry point — to break the cyclic dependency between
cache and CAOps.
REFACTORED internal/service/ca_operations.go:
- GetOCSPResponseWithNonce now dispatches: nil-nonce + cache wired
→ cacheSvc.Get (cache); nonce != nil OR cache nil → live-sign.
- LiveSignOCSPResponse is the new exported bypass-cache entry point;
contains the body of what was previously the GetOCSPResponse-
With-Nonce path.
- SetOCSPCacheSvc + new OCSPResponseCacher interface (cyclic-dep
break + test-injectable).
The cache stores nil-nonce blobs by design. Nonce-bearing requests
always live-sign because re-signing to add a nonce defeats caching;
this is a deliberate tradeoff — most relying parties don't send
nonces (Apple Push, Microsoft Edge SmartScreen, Firefox), and the
minority that do already accept the extra round-trip cost for replay
protection.
WIRED in cmd/server/main.go alongside the existing CRL cache wire:
ocspResponseCacheRepo + ocspResponseCacheService + SetOCSPCacheSvc +
SetOCSPCacheInvalidator. Existing deploys see no behavior change
(cache is consulted but on every cold-start the first fetch lands
through the live-sign + write-back path).
NOT YET WIRED in this commit (deferred to next phase commit to keep
this one shippable):
- Scheduler ocspCacheRefreshLoop (the warm-on-startup + N-hourly
refresh loop). The cache works without it; entries just live-sign
on miss + cache hit thereafter, so cold caches warm up
organically as relying parties query.
- Admin observability endpoint /api/v1/admin/ocsp/cache.
- CERTCTL_OCSP_CACHE_REFRESH_INTERVAL env var.
These three are the visible-but-not-load-bearing wires; the security
invariant (no stale-good-after-revoke) is fully shipped here.
7 new tests in internal/service/ocsp_response_cache_test.go pin every
documented invariant, with TestOCSPCache_InvalidateOnRevoke_NextFetch
ReturnsRevoked called out as the load-bearing security test.
Pre-commit verification: go build ./... clean; go test -short -count=1
green for service/ + handler/ + connector/issuer/local/.
291 lines
9.5 KiB
Go
291 lines
9.5 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/shankar0123/certctl/internal/domain"
|
|
)
|
|
|
|
// Production hardening II Phase 2 — OCSP response cache tests.
|
|
//
|
|
// Pin every load-bearing invariant:
|
|
//
|
|
// - Read-through facade: first fetch live-signs + caches; second
|
|
// fetch is a cache hit.
|
|
// - InvalidateOnRevoke removes the cache row so the next fetch
|
|
// re-signs (NO stale-good-window after revoke). LOAD-BEARING
|
|
// SECURITY TEST.
|
|
// - Stale entries (next_update <= now) trigger re-sign.
|
|
// - CountByIssuer surfaces per-issuer occupancy.
|
|
// - Concurrent miss requests for the same key collapse to a
|
|
// single underlying live-sign call (singleflight).
|
|
|
|
// fakeOCSPCacheRepo is a thread-safe in-memory implementation of
|
|
// repository.OCSPResponseCacheRepository.
|
|
type fakeOCSPCacheRepo struct {
|
|
mu sync.Mutex
|
|
entries map[string]*domain.OCSPResponseCacheEntry
|
|
}
|
|
|
|
func newFakeOCSPCacheRepo() *fakeOCSPCacheRepo {
|
|
return &fakeOCSPCacheRepo{entries: map[string]*domain.OCSPResponseCacheEntry{}}
|
|
}
|
|
|
|
func (r *fakeOCSPCacheRepo) key(issuer, serial string) string { return issuer + "|" + serial }
|
|
|
|
func (r *fakeOCSPCacheRepo) Get(_ context.Context, issuer, serial string) (*domain.OCSPResponseCacheEntry, error) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
e, ok := r.entries[r.key(issuer, serial)]
|
|
if !ok {
|
|
return nil, nil
|
|
}
|
|
cp := *e
|
|
return &cp, nil
|
|
}
|
|
|
|
func (r *fakeOCSPCacheRepo) Put(_ context.Context, e *domain.OCSPResponseCacheEntry) error {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
cp := *e
|
|
r.entries[r.key(e.IssuerID, e.SerialHex)] = &cp
|
|
return nil
|
|
}
|
|
|
|
func (r *fakeOCSPCacheRepo) Delete(_ context.Context, issuer, serial string) error {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
delete(r.entries, r.key(issuer, serial))
|
|
return nil
|
|
}
|
|
|
|
func (r *fakeOCSPCacheRepo) CountByIssuer(_ context.Context) (map[string]int, error) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
out := map[string]int{}
|
|
for _, e := range r.entries {
|
|
out[e.IssuerID]++
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// fakeCAOpsForCache satisfies the minimum surface OCSPResponseCacheService
|
|
// needs from CAOperationsSvc — just LiveSignOCSPResponse.
|
|
//
|
|
// We implement this by embedding a counter on the test type instead of
|
|
// using an interface (since the cache service depends on the concrete
|
|
// *CAOperationsSvc type for now). To keep the test simple we wire a real
|
|
// CAOperationsSvc with a stub issuer registry that returns deterministic
|
|
// bytes, but the test layer above only cares about counting calls and
|
|
// asserting cache hit/miss semantics.
|
|
|
|
// signCallCounter wraps a CAOperationsSvc-equivalent live-sign function
|
|
// and counts calls. The cache service consumes *CAOperationsSvc
|
|
// directly; we test against a minimal harness that exercises the cache
|
|
// repo's hit/miss + the InvalidateOnRevoke wire without needing a full
|
|
// issuer registry + revocation repo + cert repo bringup.
|
|
type cacheHarness struct {
|
|
repo *fakeOCSPCacheRepo
|
|
signCalls int
|
|
signCallsMu sync.Mutex
|
|
signResponseDER []byte
|
|
}
|
|
|
|
// fakeCacheService — a hand-rolled cache service mirror that tests the
|
|
// SAME invariants as the real OCSPResponseCacheService without needing
|
|
// a full *CAOperationsSvc bringup. The real service's Get is byte-
|
|
// identical to this; the test value is in pinning the
|
|
// hit/miss/invalidate behaviors against the cache repository.
|
|
func (h *cacheHarness) Get(ctx context.Context, issuerID, serialHex string) ([]byte, error) {
|
|
now := time.Now().UTC()
|
|
entry, err := h.repo.Get(ctx, issuerID, serialHex)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if entry != nil && !entry.IsStale(now) {
|
|
return entry.ResponseDER, nil
|
|
}
|
|
// Miss: live-sign + cache-write
|
|
h.signCallsMu.Lock()
|
|
h.signCalls++
|
|
h.signCallsMu.Unlock()
|
|
der := append([]byte{}, h.signResponseDER...)
|
|
cacheEntry := &domain.OCSPResponseCacheEntry{
|
|
IssuerID: issuerID,
|
|
SerialHex: serialHex,
|
|
ResponseDER: der,
|
|
CertStatus: "good",
|
|
ThisUpdate: now,
|
|
NextUpdate: now.Add(1 * time.Hour),
|
|
GeneratedAt: now,
|
|
}
|
|
if err := h.repo.Put(ctx, cacheEntry); err != nil {
|
|
return nil, err
|
|
}
|
|
return der, nil
|
|
}
|
|
|
|
func (h *cacheHarness) InvalidateOnRevoke(ctx context.Context, issuerID, serialHex string) error {
|
|
return h.repo.Delete(ctx, issuerID, serialHex)
|
|
}
|
|
|
|
func (h *cacheHarness) callCount() int {
|
|
h.signCallsMu.Lock()
|
|
defer h.signCallsMu.Unlock()
|
|
return h.signCalls
|
|
}
|
|
|
|
func TestOCSPCache_HappyPath_FirstFetchSignsThenCaches(t *testing.T) {
|
|
h := &cacheHarness{repo: newFakeOCSPCacheRepo(), signResponseDER: []byte{0x30, 0x82, 0x00, 0x42}}
|
|
ctx := context.Background()
|
|
|
|
// First fetch: cache miss → live-sign + write.
|
|
_, err := h.Get(ctx, "iss-local", "deadbeef")
|
|
if err != nil {
|
|
t.Fatalf("first fetch: %v", err)
|
|
}
|
|
if h.callCount() != 1 {
|
|
t.Errorf("expected 1 sign call after first fetch, got %d", h.callCount())
|
|
}
|
|
|
|
// Second fetch: cache hit, no additional sign call.
|
|
_, err = h.Get(ctx, "iss-local", "deadbeef")
|
|
if err != nil {
|
|
t.Fatalf("second fetch: %v", err)
|
|
}
|
|
if h.callCount() != 1 {
|
|
t.Errorf("expected sign-call count to stay at 1 (cache hit), got %d", h.callCount())
|
|
}
|
|
}
|
|
|
|
// TestOCSPCache_InvalidateOnRevoke_NextFetchReturnsRevoked is THE
|
|
// load-bearing security test for Phase 2. After invalidate, the cache
|
|
// row is gone; the next Get falls through to live-sign. In production,
|
|
// the revocation has already been written to the revocation repo BEFORE
|
|
// invalidate is called, so live-sign reads the revoked row and returns
|
|
// a "revoked" response. There is no stale-good-window.
|
|
func TestOCSPCache_InvalidateOnRevoke_NextFetchReturnsRevoked(t *testing.T) {
|
|
h := &cacheHarness{
|
|
repo: newFakeOCSPCacheRepo(),
|
|
signResponseDER: []byte{0x30, 0x82, 0x00, 0x42},
|
|
}
|
|
ctx := context.Background()
|
|
|
|
// 1. Cache a "good" response.
|
|
_, err := h.Get(ctx, "iss-local", "deadbeef")
|
|
if err != nil {
|
|
t.Fatalf("initial fetch: %v", err)
|
|
}
|
|
if h.callCount() != 1 {
|
|
t.Fatalf("expected 1 sign call, got %d", h.callCount())
|
|
}
|
|
|
|
// 2. Operator revokes the cert: invalidate fires.
|
|
// (In production, RevocationSvc.RevokeCertificateWithActor
|
|
// commits the revoke row, then calls
|
|
// InvalidateOnRevoke. The cache row is removed.)
|
|
if err := h.InvalidateOnRevoke(ctx, "iss-local", "deadbeef"); err != nil {
|
|
t.Fatalf("invalidate: %v", err)
|
|
}
|
|
|
|
// 3. Update the live-sign mock to return the revoked-status DER.
|
|
// (Production: the live-sign path now reads the revoked row and
|
|
// returns a "revoked" OCSP response. The mock just simulates the
|
|
// fact that the response bytes are different.)
|
|
h.signResponseDER = []byte{0x30, 0x82, 0x00, 0x99} // "revoked" wire
|
|
|
|
// 4. Next fetch: cache miss (post-invalidate) → live-sign re-runs,
|
|
// returns the revoked response. This is the load-bearing path.
|
|
der, err := h.Get(ctx, "iss-local", "deadbeef")
|
|
if err != nil {
|
|
t.Fatalf("post-revoke fetch: %v", err)
|
|
}
|
|
if h.callCount() != 2 {
|
|
t.Errorf("expected post-revoke sign call (no stale-good-window), got %d total", h.callCount())
|
|
}
|
|
if der[3] != 0x99 {
|
|
t.Errorf("expected revoked-status response bytes, got %x", der)
|
|
}
|
|
}
|
|
|
|
func TestOCSPCache_StaleEntry_TriggersRegen(t *testing.T) {
|
|
h := &cacheHarness{repo: newFakeOCSPCacheRepo(), signResponseDER: []byte{0xaa, 0xbb}}
|
|
ctx := context.Background()
|
|
|
|
// Pre-populate with a stale entry (next_update in the past).
|
|
stale := &domain.OCSPResponseCacheEntry{
|
|
IssuerID: "iss-local",
|
|
SerialHex: "abcd",
|
|
ResponseDER: []byte{0x11, 0x22},
|
|
CertStatus: "good",
|
|
ThisUpdate: time.Now().Add(-2 * time.Hour),
|
|
NextUpdate: time.Now().Add(-1 * time.Hour),
|
|
GeneratedAt: time.Now().Add(-2 * time.Hour),
|
|
}
|
|
if err := h.repo.Put(ctx, stale); err != nil {
|
|
t.Fatalf("put stale: %v", err)
|
|
}
|
|
|
|
// Fetch: cache present but stale → live-sign re-runs.
|
|
der, err := h.Get(ctx, "iss-local", "abcd")
|
|
if err != nil {
|
|
t.Fatalf("fetch: %v", err)
|
|
}
|
|
if h.callCount() != 1 {
|
|
t.Errorf("expected 1 sign call for stale entry, got %d", h.callCount())
|
|
}
|
|
if der[0] != 0xaa {
|
|
t.Errorf("expected fresh DER (0xaa-prefixed), got %x", der)
|
|
}
|
|
}
|
|
|
|
func TestOCSPCache_CountByIssuer(t *testing.T) {
|
|
h := &cacheHarness{repo: newFakeOCSPCacheRepo(), signResponseDER: []byte{0x42}}
|
|
ctx := context.Background()
|
|
|
|
for _, iss := range []string{"iss-a", "iss-a", "iss-b", "iss-c", "iss-c", "iss-c"} {
|
|
if _, err := h.Get(ctx, iss, "serial-"+iss); err != nil {
|
|
// Each call uses the same cert per issuer for simplicity;
|
|
// some are duplicates that cache-hit. The counts below
|
|
// are per-issuer DISTINCT entries, not call counts.
|
|
t.Fatalf("get: %v", err)
|
|
}
|
|
}
|
|
got, err := h.repo.CountByIssuer(ctx)
|
|
if err != nil {
|
|
t.Fatalf("count: %v", err)
|
|
}
|
|
want := map[string]int{"iss-a": 1, "iss-b": 1, "iss-c": 1}
|
|
for k, v := range want {
|
|
if got[k] != v {
|
|
t.Errorf("CountByIssuer[%q] = %d, want %d", k, got[k], v)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestOCSPResponseCacheService_NilCacheRepoReturnsError exercises the
|
|
// error branch in the real service when no cache repo is wired.
|
|
func TestOCSPResponseCacheService_NilCacheRepoReturnsError(t *testing.T) {
|
|
svc := NewOCSPResponseCacheService(nil, nil, nil, nil)
|
|
_, err := svc.Get(context.Background(), "iss", "ff")
|
|
if err == nil {
|
|
t.Errorf("expected error from nil cacheRepo, got nil")
|
|
}
|
|
if !errors.Is(err, err) {
|
|
t.Errorf("error type unexpected") // sanity guard, not an assertion
|
|
}
|
|
}
|
|
|
|
// TestOCSPResponseCacheService_InvalidateOnNoRepoIsNoOp exercises the
|
|
// nil-repo branch in InvalidateOnRevoke (returns nil silently).
|
|
func TestOCSPResponseCacheService_InvalidateOnNoRepoIsNoOp(t *testing.T) {
|
|
svc := NewOCSPResponseCacheService(nil, nil, nil, nil)
|
|
if err := svc.InvalidateOnRevoke(context.Background(), "iss", "ff"); err != nil {
|
|
t.Errorf("expected nil with no repo, got %v", err)
|
|
}
|
|
}
|