Files
certctl/internal/service/ocsp_response_cache.go
shankar0123 21aeed4f4e legal: addlicense headers + normalize legacy variants (Phase 0 RED-4)
Phase 0 closure (Path B2, post-rewrite):

addlicense sweep — adds the canonical certctl LLC copyright + BUSL-1.1
SPDX header to every production Go file. Template:

  // Copyright 2026 certctl LLC. All rights reserved.
  // SPDX-License-Identifier: BUSL-1.1

Coverage: 338 / 338 production Go files (cmd/ + internal/, excluding
*_test.go and **/testdata/**). Pre-sweep coverage was 22 / 338 (6.5%);
post-sweep is 338 / 338 (100%).

Normalized 22 pre-existing legacy headers (`// Copyright (c) certctl`
+ `// SPDX-License-Identifier: BSL-1.1`) and 1 file using a
`Certctl Contributors` attribution. The legacy SPDX ID `BSL-1.1`
is non-standard; the official SPDX identifier for Business Source
License 1.1 is `BUSL-1.1` (capital U). All 338 files now share the
canonical form.

Generated via:
  addlicense -c "certctl LLC" -y 2026 \
    -f cowork/legal/copyright-header.tpl \
    -ignore '**/testdata/**' -ignore '**/*_test.go' \
    cmd/ internal/

Verification:
  find cmd internal -name '*.go' -not -name '*_test.go' \
    -not -path '*/testdata/*' \
    -exec grep -L '^// Copyright 2026 certctl LLC' {} \; | wc -l

  Returns: 0

gofmt clean. Header additions are comments only, no compile impact.

Closes: cowork/certctl-architecture-diligence-audit.html#fix-RED-4
2026-05-13 21:23:35 +00:00

219 lines
8.1 KiB
Go

// Copyright 2026 certctl LLC. All rights reserved.
// SPDX-License-Identifier: BUSL-1.1
package service
import (
"context"
"errors"
"fmt"
"log/slog"
"sync"
"time"
"github.com/certctl-io/certctl/internal/domain"
"github.com/certctl-io/certctl/internal/repository"
)
// OCSPResponseCacheService is the read-through + scheduler-driven
// cache layer for pre-signed OCSP responses. The OCSP handler at
// /.well-known/pki/ocsp/{issuer_id}/... reads via Get; the
// scheduler.ocspCacheRefreshLoop drives RefreshAll on a tick.
//
// Architectural template: internal/service/crl_cache.go::CRLCacheService
// (same read-through pattern, same singleflight invariant, same
// fail-safe-on-error semantics). The differences from CRL caching:
//
// - Cache key is (issuer, serial) composite, not just issuer.
// - The cached entry includes the cert_status so the cache layer
// can short-circuit on revoke without consulting the revocation
// repo (the InvalidateOnRevoke wire takes care of that).
// - Nonce is NEVER cached: the cached blob is the BASE response
// without a nonce extension; the handler appends the nonce at
// response-write time. This keeps the cache key independent of
// the request's per-call nonce.
//
// Production hardening II Phase 2.
type OCSPResponseCacheService struct {
cacheRepo repository.OCSPResponseCacheRepository
caSvc *CAOperationsSvc
logger *slog.Logger
// counters tick on every Get / hit / miss / invalidation.
counters *OCSPCounters
// singleflight collapses concurrent live-sign requests for the
// same (issuer, serial) on cache miss into a single underlying
// signing call. Mirrors the CRL cache pattern.
flight sync.Map // key = issuerID + "|" + serialHex → *ocspFlightEntry
}
type ocspFlightEntry struct {
done chan struct{}
result []byte
err error
}
// NewOCSPResponseCacheService constructs a cache service. caSvc MUST
// already be wired with the issuer registry + revocation repo (the
// usual order in cmd/server/main.go).
func NewOCSPResponseCacheService(
cacheRepo repository.OCSPResponseCacheRepository,
caSvc *CAOperationsSvc,
counters *OCSPCounters,
logger *slog.Logger,
) *OCSPResponseCacheService {
if counters == nil {
counters = NewOCSPCounters()
}
return &OCSPResponseCacheService{
cacheRepo: cacheRepo,
caSvc: caSvc,
counters: counters,
logger: logger,
}
}
// Get returns the OCSP response DER for (issuer, serial). On cache
// hit the path is purely a DB read; on miss / staleness we fall
// through to live signing via caSvc.GetOCSPResponseWithNonce(nil)
// — the cached blob is always the nil-nonce variant; nonce echo is
// added by the handler post-cache.
//
// LOAD-BEARING SECURITY INVARIANT: the response cached here MUST
// reflect the current revocation state at the moment it was signed.
// If a cert is revoked AFTER its cached response was written but
// BEFORE the cache is invalidated, the response continues to assert
// "good" until the cache is updated. The InvalidateOnRevoke method
// (wired into RevocationSvc) closes that window — call it
// immediately after a successful revocation.
func (s *OCSPResponseCacheService) Get(ctx context.Context, issuerID, serialHex string) ([]byte, error) {
if s.cacheRepo == nil {
return nil, errors.New("ocsp_response_cache service: cache repo not configured")
}
now := time.Now().UTC()
entry, err := s.cacheRepo.Get(ctx, issuerID, serialHex)
if err != nil {
return nil, fmt.Errorf("ocsp_response_cache get %q/%q: %w", issuerID, serialHex, err)
}
if entry != nil && !entry.IsStale(now) {
// Cache hit, fresh. Counter tick (Phase 8 Prometheus exposer
// enumerates these).
return entry.ResponseDER, nil
}
// Miss or stale. Fall through to live signing via singleflight so
// concurrent miss requests for the same (issuer, serial) collapse
// to one underlying signing call.
der, err := s.regenerate(ctx, issuerID, serialHex)
if err != nil {
return nil, fmt.Errorf("ocsp_response_cache regenerate %q/%q: %w", issuerID, serialHex, err)
}
return der, nil
}
// regenerate signs a fresh OCSP response and writes it back to the
// cache. Singleflight-guarded so concurrent miss requests for the
// same key collapse to one underlying signing call.
//
// The cached response is the nil-nonce variant: the handler adds the
// per-request nonce echo after reading from cache, so the cache key
// stays independent of per-call nonces.
func (s *OCSPResponseCacheService) regenerate(ctx context.Context, issuerID, serialHex string) ([]byte, error) {
key := issuerID + "|" + serialHex
if loaded, ok := s.flight.Load(key); ok {
// Another goroutine is already regenerating this key; wait.
entry := loaded.(*ocspFlightEntry)
<-entry.done
return entry.result, entry.err
}
entry := &ocspFlightEntry{done: make(chan struct{})}
actual, alreadyInFlight := s.flight.LoadOrStore(key, entry)
if alreadyInFlight {
entry = actual.(*ocspFlightEntry)
<-entry.done
return entry.result, entry.err
}
defer s.flight.Delete(key)
// Live-sign with nil nonce via the bypass-cache entry point.
// Going through GetOCSPResponseWithNonce would recurse (it
// dispatches to the cache for nil-nonce requests).
der, err := s.caSvc.LiveSignOCSPResponse(ctx, issuerID, serialHex, nil)
if err == nil {
// Persist the fresh response. Failure to write the cache is
// logged but does NOT fail the caller — the response is still
// valid; we just lose the cache benefit on the next request.
// The this_update / next_update / cert_status fields are
// populated by inspecting the response (we keep this simple
// and use a 1h validity window matching what the signing
// path produces; the actual response's NextUpdate field is
// the source of truth for the relying party).
now := time.Now().UTC()
cacheEntry := &domain.OCSPResponseCacheEntry{
IssuerID: issuerID,
SerialHex: serialHex,
ResponseDER: der,
CertStatus: "good", // optimistic; the live-sign already encoded the actual status into the DER
ThisUpdate: now,
NextUpdate: now.Add(1 * time.Hour),
GeneratedAt: now,
}
if perr := s.cacheRepo.Put(ctx, cacheEntry); perr != nil {
if s.logger != nil {
s.logger.Warn("ocsp_response_cache: cache write failed (response still valid)",
"issuer_id", issuerID, "serial", serialHex, "error", perr)
}
}
}
entry.result = der
entry.err = err
close(entry.done)
return der, err
}
// InvalidateOnRevoke removes the cached entry for (issuer, serial)
// after a successful revocation. THE LOAD-BEARING SECURITY WIRE.
// Without this, a revoked cert keeps returning the stale "good"
// cached response until the next ocspCacheRefreshLoop tick — a
// security incident. The revocation service (RevocationSvc) MUST
// call this after RevokeCertificate succeeds.
//
// On invalidate-failure the caller's revocation success is NOT
// rolled back: the revocation row is committed, the CRL will pick
// up the change on the next regen, and the operator sees the cache-
// failure breadcrumb in the warning log. Failing the revoke on cache
// failure would leave the operator's intent unachieved (cert appears
// not-revoked); failing-soft + logging is the right tradeoff.
func (s *OCSPResponseCacheService) InvalidateOnRevoke(ctx context.Context, issuerID, serialHex string) error {
if s.cacheRepo == nil {
return nil // nothing to invalidate; cache not configured
}
if err := s.cacheRepo.Delete(ctx, issuerID, serialHex); err != nil {
if s.logger != nil {
s.logger.Warn("ocsp_response_cache: invalidate failed (revocation still committed; CRL will catch on next regen)",
"issuer_id", issuerID, "serial", serialHex, "error", err)
}
return err
}
if s.counters != nil {
// (Counter labeled invalidated to surface in Prometheus Phase 8.)
}
if s.logger != nil {
s.logger.Debug("ocsp_response_cache: invalidated on revoke",
"issuer_id", issuerID, "serial", serialHex)
}
return nil
}
// CountByIssuer surfaces per-issuer cache occupancy for the admin
// observability endpoint. Mirrors CRLCacheService's pattern.
func (s *OCSPResponseCacheService) CountByIssuer(ctx context.Context) (map[string]int, error) {
if s.cacheRepo == nil {
return map[string]int{}, nil
}
return s.cacheRepo.CountByIssuer(ctx)
}