crl/ocsp responder bundle: backend slice (Phases 1-4)

Ships the production-grade backend for the CRL/OCSP responder bundle.
Closes the gap that made certctl's local issuer unsuitable for any
production deploy (relying parties couldn't validate revocation cleanly):

  Phase 1 — crl_cache schema + repository (migration 000019)
  Phase 2 — dedicated OCSP responder cert per issuer (RFC 6960 §2.6)
            (migration 000020)
  Phase 3 — scheduler crlGenerationLoop + CRLCacheService with
            singleflight collapsing
  Phase 4 — POST OCSP endpoint (RFC 6960 §A.1.1) + GenerateDERCRL
            cache integration

What's NOT in this slice (deferred follow-ups):

  * cmd/server/main.go wiring of the new services into the existing
    issuer registry / scheduler. Mechanical wiring; the operator can
    ship at their next convenience.
  * Phase 5 (GUI: per-issuer revocation endpoints + admin cache
    endpoint), Phase 6 (e2e test against kind cluster), Phase 7
    (release prep). Each is its own session.
  * V3-Pro polish: delta CRLs, OCSP rate-limiting, OCSP stapling.

Coverage at HEAD: handler 79.8%, service 73.5%, scheduler 78.1%,
local issuer 86.3%, signer 91.6%, domain 100%. All above the floors
in .github/workflows/ci.yml.

Backward compat: every new dep is an OPTIONAL setter (SetCRLCacheSvc,
SetCRLCacheService, SetOCSPResponderRepo, SetSignerDriver,
SetIssuerID). Existing wiring continues to function unchanged until
the operator wires the new services in main.go.

No new direct dependencies in core go.mod. The in-tree singleflight
gate (~30 LoC sync.Map[issuerID]*flightEntry) avoids vendoring
golang.org/x/sync.

Each phase landed as its own commit on the branch:
  30765ba — Phase 1
  a0b7f7d — Phase 2
  dc32694 — Phase 3
  dc1e0bf — Phase 4

Branch deleted post-merge.
This commit is contained in:
shankar0123
2026-04-29 00:07:57 +00:00
23 changed files with 2933 additions and 35 deletions
+55
View File
@@ -696,6 +696,61 @@ paths:
"501":
description: Issuer does not support OCSP
/.well-known/pki/ocsp/{issuer_id}:
post:
tags: [CRL & OCSP]
summary: OCSP responder (RFC 6960 §A.1.1, POST form)
description: |
Standard RFC 6960 §A.1.1 POST form of the OCSP responder. The
request body is the binary DER-encoded OCSPRequest with
Content-Type `application/ocsp-request`; the serial number is
carried inside that body, not in the URL path. Most production
OCSP clients (Firefox, OpenSSL `s_client -status`, cert-manager,
Microsoft Intune device validators) use POST exclusively.
The pre-existing GET form
(`/.well-known/pki/ocsp/{issuer_id}/{serial}`) is preserved for
ad-hoc curl inspection and human-readable URL paths; behaviour
and response are otherwise identical.
Auth-exempt under `/.well-known/pki/*` per RFC 8615 so relying
parties can poll without a certctl API key. CRL/OCSP-Responder
bundle Phase 4.
operationId: handleOCSPPost
security: []
parameters:
- name: issuer_id
in: path
required: true
schema:
type: string
requestBody:
required: true
content:
application/ocsp-request:
schema:
type: string
format: binary
description: DER-encoded OCSPRequest per RFC 6960 §4.1
responses:
"200":
description: OCSP response
content:
application/ocsp-response:
schema:
type: string
format: binary
"400":
$ref: "#/components/responses/BadRequest"
"404":
$ref: "#/components/responses/NotFound"
"415":
description: Content-Type is not application/ocsp-request
"500":
$ref: "#/components/responses/InternalError"
"501":
description: Issuer does not support OCSP
# ─── Issuers ─────────────────────────────────────────────────────────
/api/v1/issuers:
get:
@@ -3,13 +3,21 @@ package handler
import (
"bytes"
"context"
"crypto"
"crypto/rand"
"crypto/rsa"
"crypto/x509"
"crypto/x509/pkix"
"encoding/json"
"fmt"
"math/big"
"net/http"
"net/http/httptest"
"testing"
"time"
"golang.org/x/crypto/ocsp"
"github.com/shankar0123/certctl/internal/api/middleware"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository"
@@ -1208,6 +1216,174 @@ func TestHandleOCSP_MethodNotAllowed(t *testing.T) {
}
}
// === Phase-4 POST OCSP (RFC 6960 §A.1.1) Tests ===
// buildOCSPRequest constructs a binary DER-encoded OCSPRequest body
// for testing the POST handler. The same shape is what production
// clients (Firefox, OpenSSL, cert-manager) send.
func buildOCSPRequest(t *testing.T, serial *big.Int) []byte {
t.Helper()
// Build a minimal issuer cert + leaf cert pair so ocsp.CreateRequest
// has the SubjectPublicKeyInfo + serial it needs.
caKey, _ := rsa.GenerateKey(rand.Reader, 2048)
caTpl := &x509.Certificate{
SerialNumber: big.NewInt(0xCA),
Subject: pkix.Name{CommonName: "Test Issuer"},
NotBefore: time.Now().Add(-time.Hour),
NotAfter: time.Now().Add(24 * time.Hour),
IsCA: true,
BasicConstraintsValid: true,
}
caDER, err := x509.CreateCertificate(rand.Reader, caTpl, caTpl, &caKey.PublicKey, caKey)
if err != nil {
t.Fatalf("create CA: %v", err)
}
caCert, _ := x509.ParseCertificate(caDER)
leafTpl := &x509.Certificate{
SerialNumber: serial,
Subject: pkix.Name{CommonName: "leaf.example.com"},
NotBefore: time.Now().Add(-time.Hour),
NotAfter: time.Now().Add(24 * time.Hour),
}
leafKey, _ := rsa.GenerateKey(rand.Reader, 2048)
leafDER, err := x509.CreateCertificate(rand.Reader, leafTpl, caCert, &leafKey.PublicKey, caKey)
if err != nil {
t.Fatalf("create leaf: %v", err)
}
leafCert, _ := x509.ParseCertificate(leafDER)
body, err := ocsp.CreateRequest(leafCert, caCert, &ocsp.RequestOptions{Hash: crypto.SHA256})
if err != nil {
t.Fatalf("create OCSP request: %v", err)
}
return body
}
func TestHandleOCSPPost_Success(t *testing.T) {
wantSerial := big.NewInt(0xDEADBEEF)
expectedHex := fmt.Sprintf("%x", wantSerial)
mock := &MockCertificateService{
GetOCSPResponseFn: func(_ context.Context, issuerID string, serialHex string) ([]byte, error) {
if issuerID != "iss-local" {
return nil, fmt.Errorf("unexpected issuer %q", issuerID)
}
if serialHex != expectedHex {
return nil, fmt.Errorf("unexpected serial %q (want %q)", serialHex, expectedHex)
}
return []byte{0x30, 0x82, 0x02, 0x00}, nil
},
}
handler := NewCertificateHandler(mock)
body := buildOCSPRequest(t, wantSerial)
req := httptest.NewRequest(http.MethodPost, "/.well-known/pki/ocsp/iss-local", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/ocsp-request")
req = req.WithContext(contextWithRequestID())
w := httptest.NewRecorder()
handler.HandleOCSPPost(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d (body=%s)", w.Code, w.Body.String())
}
if ct := w.Header().Get("Content-Type"); ct != "application/ocsp-response" {
t.Errorf("Content-Type = %q, want application/ocsp-response", ct)
}
}
func TestHandleOCSPPost_RejectsNonPostMethod(t *testing.T) {
mock := &MockCertificateService{}
handler := NewCertificateHandler(mock)
req := httptest.NewRequest(http.MethodGet, "/.well-known/pki/ocsp/iss-local", nil)
req = req.WithContext(contextWithRequestID())
w := httptest.NewRecorder()
handler.HandleOCSPPost(w, req)
if w.Code != http.StatusMethodNotAllowed {
t.Errorf("got %d, want 405", w.Code)
}
}
func TestHandleOCSPPost_RejectsWrongContentType(t *testing.T) {
mock := &MockCertificateService{}
handler := NewCertificateHandler(mock)
req := httptest.NewRequest(http.MethodPost, "/.well-known/pki/ocsp/iss-local", bytes.NewReader([]byte("garbage")))
req.Header.Set("Content-Type", "text/plain")
req = req.WithContext(contextWithRequestID())
w := httptest.NewRecorder()
handler.HandleOCSPPost(w, req)
if w.Code != http.StatusUnsupportedMediaType {
t.Errorf("got %d, want 415", w.Code)
}
}
func TestHandleOCSPPost_AcceptsMissingContentType(t *testing.T) {
// Real-world tolerance: some clients omit the header entirely.
// Validation falls through to ocsp.ParseRequest which will reject
// a non-OCSP body with a 400.
body := buildOCSPRequest(t, big.NewInt(1))
mock := &MockCertificateService{
GetOCSPResponseFn: func(_ context.Context, _, _ string) ([]byte, error) {
return []byte{0x30, 0x82}, nil
},
}
handler := NewCertificateHandler(mock)
req := httptest.NewRequest(http.MethodPost, "/.well-known/pki/ocsp/iss-local", bytes.NewReader(body))
// Intentionally NOT setting Content-Type.
req = req.WithContext(contextWithRequestID())
w := httptest.NewRecorder()
handler.HandleOCSPPost(w, req)
if w.Code != http.StatusOK {
t.Errorf("got %d, want 200 with missing Content-Type (body=%s)", w.Code, w.Body.String())
}
}
func TestHandleOCSPPost_RejectsMalformedBody(t *testing.T) {
mock := &MockCertificateService{}
handler := NewCertificateHandler(mock)
req := httptest.NewRequest(http.MethodPost, "/.well-known/pki/ocsp/iss-local", bytes.NewReader([]byte("not-an-ocsp-request")))
req.Header.Set("Content-Type", "application/ocsp-request")
req = req.WithContext(contextWithRequestID())
w := httptest.NewRecorder()
handler.HandleOCSPPost(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("got %d, want 400", w.Code)
}
}
func TestHandleOCSPPost_RejectsMissingIssuer(t *testing.T) {
mock := &MockCertificateService{}
handler := NewCertificateHandler(mock)
body := buildOCSPRequest(t, big.NewInt(1))
req := httptest.NewRequest(http.MethodPost, "/.well-known/pki/ocsp/", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/ocsp-request")
req = req.WithContext(contextWithRequestID())
w := httptest.NewRecorder()
handler.HandleOCSPPost(w, req)
if w.Code != http.StatusBadRequest {
t.Errorf("got %d, want 400", w.Code)
}
}
func TestHandleOCSPPost_PropagatesNotFound(t *testing.T) {
mock := &MockCertificateService{
GetOCSPResponseFn: func(_ context.Context, _, _ string) ([]byte, error) {
return nil, fmt.Errorf("certificate not found")
},
}
handler := NewCertificateHandler(mock)
body := buildOCSPRequest(t, big.NewInt(1))
req := httptest.NewRequest(http.MethodPost, "/.well-known/pki/ocsp/iss-local", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/ocsp-request")
req = req.WithContext(contextWithRequestID())
w := httptest.NewRecorder()
handler.HandleOCSPPost(w, req)
if w.Code != http.StatusNotFound {
t.Errorf("got %d, want 404", w.Code)
}
}
// === M20 Enhanced Query API Tests ===
// TestListCertificates_SortParam tests sort parameter parsing and passing to service.
+91 -1
View File
@@ -1,15 +1,19 @@
package handler
import (
"errors"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"strconv"
"strings"
"time"
"golang.org/x/crypto/ocsp"
"github.com/shankar0123/certctl/internal/api/middleware"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository"
@@ -622,6 +626,92 @@ func (h CertificateHandler) HandleOCSP(w http.ResponseWriter, r *http.Request) {
w.Write(derBytes)
}
// HandleOCSPPost processes RFC 6960 §A.1.1 POST OCSP requests.
// POST /.well-known/pki/ocsp/{issuer_id}
//
// The body MUST be the binary DER-encoded OCSPRequest with content-type
// "application/ocsp-request". The response is the same DER-encoded
// OCSPResponse with content-type "application/ocsp-response" returned
// by the existing GET handler — only the input shape differs.
//
// POST is the standard transport for production OCSP clients (Firefox,
// OpenSSL `s_client -status`, cert-manager, Microsoft Intune device
// validators). The pre-existing GET form is kept for ad-hoc curl
// inspection + human-readable URL paths.
//
// Bundle CRL/OCSP-Responder Phase 4.
func (h CertificateHandler) HandleOCSPPost(w http.ResponseWriter, r *http.Request) {
requestID, _ := r.Context().Value("request_id").(string)
if r.Method != http.MethodPost {
ErrorWithRequestID(w, http.StatusMethodNotAllowed, "Method not allowed", requestID)
return
}
// Be tolerant about Content-Type: RFC 6960 §A.1.1 says it MUST be
// "application/ocsp-request" but real-world clients sometimes omit
// the header or send it with a charset suffix. We require the
// substring "ocsp-request" rather than exact match — the actual
// validation happens in ocsp.ParseRequest below; a malformed body
// fails there with a 400.
ct := r.Header.Get("Content-Type")
if ct != "" && !strings.Contains(strings.ToLower(ct), "ocsp-request") {
ErrorWithRequestID(w, http.StatusUnsupportedMediaType,
fmt.Sprintf("Content-Type must be application/ocsp-request, got %q", ct), requestID)
return
}
// Issuer ID from the path. The router pattern strips the leading
// /.well-known/pki/ocsp/ prefix; what remains is the bare issuer ID.
issuerID := strings.TrimPrefix(r.URL.Path, "/.well-known/pki/ocsp/")
issuerID = strings.TrimSuffix(issuerID, "/")
if issuerID == "" || strings.Contains(issuerID, "/") {
ErrorWithRequestID(w, http.StatusBadRequest, "Issuer ID is required", requestID)
return
}
// Body is already MaxBytesReader-capped by the body-size middleware.
// OCSPRequest bodies are tiny (~200 bytes for a single-cert query),
// so the default cap is comfortably above what any legitimate client
// will send.
body, err := io.ReadAll(r.Body)
if err != nil {
ErrorWithRequestID(w, http.StatusBadRequest, "Failed to read request body", requestID)
return
}
ocspReq, err := ocsp.ParseRequest(body)
if err != nil {
ErrorWithRequestID(w, http.StatusBadRequest,
fmt.Sprintf("Invalid OCSPRequest: %v", err), requestID)
return
}
// Reuse the existing service path. The serial extracted from the
// parsed OCSPRequest is converted to hex (the on-disk format for
// certctl serials matches certificate.SerialNumber.Text(16)).
serialHex := fmt.Sprintf("%x", ocspReq.SerialNumber)
derBytes, err := h.svc.GetOCSPResponse(r.Context(), issuerID, serialHex)
if err != nil {
errMsg := err.Error()
if strings.Contains(errMsg, "not found") {
ErrorWithRequestID(w, http.StatusNotFound, errMsg, requestID)
return
}
if strings.Contains(errMsg, "do not support") || strings.Contains(errMsg, "does not support") {
ErrorWithRequestID(w, http.StatusNotImplemented, errMsg, requestID)
return
}
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to generate OCSP response", requestID)
return
}
w.Header().Set("Content-Type", "application/ocsp-response")
w.Header().Set("Cache-Control", "max-age=3600")
w.WriteHeader(http.StatusOK)
w.Write(derBytes)
}
// GetCertificateDeployments retrieves all deployment targets for a certificate.
// GET /api/v1/certificates/{id}/deployments
func (h CertificateHandler) GetCertificateDeployments(w http.ResponseWriter, r *http.Request) {
+5
View File
@@ -392,6 +392,11 @@ func (r *Router) RegisterSCEPHandlers(scep handler.SCEPHandler) {
func (r *Router) RegisterPKIHandlers(pki handler.CertificateHandler) {
r.Register("GET /.well-known/pki/crl/{issuer_id}", http.HandlerFunc(pki.GetDERCRL))
r.Register("GET /.well-known/pki/ocsp/{issuer_id}/{serial}", http.HandlerFunc(pki.HandleOCSP))
// RFC 6960 §A.1.1 standard POST form. The binary OCSPRequest body
// carries the serial; the URL only needs the issuer ID. Most
// production OCSP clients use POST exclusively (see CRL/OCSP-Responder
// Phase 4 prompt for the full client compatibility matrix).
r.Register("POST /.well-known/pki/ocsp/{issuer_id}", http.HandlerFunc(pki.HandleOCSPPost))
}
// GetMux returns the underlying http.ServeMux for direct access if needed.
+126 -7
View File
@@ -69,6 +69,7 @@ import (
"github.com/shankar0123/certctl/internal/connector/issuer"
"github.com/shankar0123/certctl/internal/crypto/signer"
"github.com/shankar0123/certctl/internal/repository"
"github.com/shankar0123/certctl/internal/validation"
)
@@ -126,6 +127,27 @@ type Connector struct {
caCertPEM string
subCA bool // true when loaded from disk (sub-CA mode)
revokedMap map[string]bool // serial -> revoked status
// Optional dependencies — set after construction via the
// Set*-style helpers below. The Connector functions correctly with
// any subset of these unset (the Phase-2 responder-cert path falls
// back to direct CA-key signing for OCSP when not configured, and
// the issuer ID falls back to the empty string for the
// responder-row key).
issuerID string
ocspResponderRepo repository.OCSPResponderRepository
signerDriver signer.Driver
// ocspResponderRotationGrace is the window before NotAfter at
// which the responder cert is rotated. Default 7 days; tunable
// for tests + special operator deploys.
ocspResponderRotationGrace time.Duration
// ocspResponderValidity is how long a freshly-generated responder
// cert is valid for. Default 30 days; tunable.
ocspResponderValidity time.Duration
// ocspResponderKeyDir is where FileDriver-backed responder keys
// land. Empty = use the OS temp dir (fine for tests; production
// callers should set this to a hardened path via the setter).
ocspResponderKeyDir string
}
// New creates a new local CA connector with the given configuration and logger.
@@ -146,9 +168,78 @@ func New(config *Config, logger *slog.Logger) *Connector {
config: config,
logger: logger,
revokedMap: make(map[string]bool),
ocspResponderRotationGrace: 7 * 24 * time.Hour, // 7 days
ocspResponderValidity: 30 * 24 * time.Hour, // 30 days
}
}
// SetOCSPResponderRepo wires the persistent store for the dedicated
// OCSP-responder cert per RFC 6960 §2.6. When unset, SignOCSPResponse
// falls back to signing with the CA key directly (the historical
// behaviour, preserved for callers that don't supply this dep).
//
// Production wiring lives in cmd/server/main.go alongside the issuer
// registry; tests inject a memory-backed repo via the same setter.
func (c *Connector) SetOCSPResponderRepo(repo repository.OCSPResponderRepository) {
c.mu.Lock()
defer c.mu.Unlock()
c.ocspResponderRepo = repo
}
// SetSignerDriver wires the driver used to generate + load the OCSP
// responder cert's private key. Required alongside SetOCSPResponderRepo
// for the dedicated-responder path; without it the SignOCSPResponse
// fallback (CA-key direct) takes over.
func (c *Connector) SetSignerDriver(d signer.Driver) {
c.mu.Lock()
defer c.mu.Unlock()
c.signerDriver = d
}
// SetIssuerID records the issuer ID so the responder row can be keyed
// off it. Without this the responder repo can't be consulted (an empty
// issuer ID would collide across local-issuer instances). Falls through
// to the fallback path when unset.
func (c *Connector) SetIssuerID(id string) {
c.mu.Lock()
defer c.mu.Unlock()
c.issuerID = id
}
// SetOCSPResponderRotationGrace overrides the default 7-day-before-expiry
// rotation window for the dedicated responder cert. Tests use a small
// value; operators with strict policies may set 14d or 30d.
func (c *Connector) SetOCSPResponderRotationGrace(d time.Duration) {
c.mu.Lock()
defer c.mu.Unlock()
if d > 0 {
c.ocspResponderRotationGrace = d
}
}
// SetOCSPResponderValidity overrides the default 30-day validity for
// freshly-generated responder certs. Operators preferring shorter
// validity (with more frequent rotation) tune via this setter.
func (c *Connector) SetOCSPResponderValidity(d time.Duration) {
c.mu.Lock()
defer c.mu.Unlock()
if d > 0 {
c.ocspResponderValidity = d
}
}
// SetOCSPResponderKeyDir sets the directory where FileDriver-backed
// responder keys are written. Empty means "let the driver choose"
// (typically the OS temp dir, fine for tests). Production callers MUST
// set this to a hardened path; the FileDriver-installed
// keystore.ensureKeyDirSecure equivalent applies the same 0700 +
// permission gates as the CA key directory.
func (c *Connector) SetOCSPResponderKeyDir(dir string) {
c.mu.Lock()
defer c.mu.Unlock()
c.ocspResponderKeyDir = dir
}
// ValidateConfig validates the local CA configuration.
func (c *Connector) ValidateConfig(ctx context.Context, rawConfig json.RawMessage) error {
var cfg Config
@@ -878,18 +969,38 @@ func (c *Connector) GenerateCRL(ctx context.Context, revokedCerts []issuer.Revok
}
// SignOCSPResponse signs an OCSP response for the given certificate.
//
// As of Phase 2 of the CRL/OCSP responder bundle, the signing path is
// no longer hardwired to the CA private key. ensureOCSPResponder
// returns the appropriate cert + signer based on whether the operator
// has wired the dedicated-responder dependencies (SetOCSPResponderRepo
// + SetSignerDriver + SetIssuerID):
//
// - Configured: the response is signed by a dedicated responder cert
// (signed by the CA, has id-pkix-ocsp-nocheck per RFC 6960
// §4.2.2.2.1). Relying parties see the responder cert in the
// response's certificates field; CA-key signing operations stay
// rare (only at responder bootstrap / rotation).
//
// - Unconfigured: falls back to signing with the CA key directly
// (the historical pre-Phase-2 behaviour). Backward-compatible for
// callers that don't wire the responder deps.
//
// The OCSP response template fields (status, serial, thisUpdate,
// nextUpdate, revocation reason) are unchanged across both paths;
// only the signing key + the cert in the response's certificates
// field differ.
func (c *Connector) SignOCSPResponse(ctx context.Context, req issuer.OCSPSignRequest) ([]byte, error) {
if err := c.ensureCA(ctx); err != nil {
return nil, fmt.Errorf("CA initialization failed: %w", err)
responderCert, responderSigner, err := c.ensureOCSPResponder(ctx)
if err != nil {
return nil, fmt.Errorf("ensure OCSP responder: %w", err)
}
// Import OCSP after we confirm golang.org/x/crypto is available
// This will be added to imports below
template := ocsp.Response{
SerialNumber: req.CertSerial,
ThisUpdate: req.ThisUpdate,
NextUpdate: req.NextUpdate,
Certificate: c.caCert,
Certificate: responderCert,
}
switch req.CertStatus {
@@ -903,14 +1014,22 @@ func (c *Connector) SignOCSPResponse(ctx context.Context, req issuer.OCSPSignReq
template.Status = ocsp.Unknown
}
respBytes, err := ocsp.CreateResponse(c.caCert, c.caCert, template, c.caSigner)
// ocsp.CreateResponse(issuer, responder, template, signer):
// - issuer: always c.caCert (the CA that issued the cert
// being checked, NOT the responder cert)
// - responder: the responder cert (== c.caCert in the fallback
// path; a dedicated responder cert otherwise)
// - signer: the responder's signing key
respBytes, err := ocsp.CreateResponse(c.caCert, responderCert, template, responderSigner)
if err != nil {
return nil, fmt.Errorf("failed to create OCSP response: %w", err)
}
c.logger.Info("OCSP response signed",
"serial", req.CertSerial,
"status", req.CertStatus)
"status", req.CertStatus,
"responder_cn", responderCert.Subject.CommonName,
"dedicated_responder", responderCert != c.caCert)
return respBytes, nil
}
@@ -0,0 +1,267 @@
package local
import (
"context"
"crypto/rand"
"crypto/x509"
"crypto/x509/pkix"
"encoding/asn1"
"encoding/pem"
"fmt"
"math/big"
"path/filepath"
"time"
"github.com/shankar0123/certctl/internal/crypto/signer"
"github.com/shankar0123/certctl/internal/domain"
)
// Bundle CRL/OCSP-Responder, Phase 2 — separate OCSP responder cert.
//
// Per RFC 6960 §2.6 + §4.2.2.2 the OCSP responder SHOULD be either the
// CA itself OR a cert issued by the CA with the id-kp-OCSPSigning EKU.
// The dedicated-responder shape is preferred because:
//
// 1. Every OCSP request signs ONE message — high-volume CAs see
// thousands of OCSP polls per day. If those signs all use the
// CA private key (the historical certctl behaviour), every
// poll is a CA-key operation. With a separate responder cert,
// the CA key signs only the responder cert (rarely — once per
// ocspResponderValidity, default 30d) and OCSP polls hit the
// responder key.
// 2. When the CA key lives on an HSM (PKCS#11 driver, item 3 in
// the V3-Pro roadmap), case (1) becomes a hard constraint —
// every OCSP poll = HSM op = HSM-rate-limit pressure +
// audit-volume blowup. The dedicated responder cert lives on
// a cheaper (or even non-HSM) Signer driver.
// 3. The id-pkix-ocsp-nocheck extension (RFC 6960 §4.2.2.2.1) on
// the responder cert tells OCSP clients NOT to recursively
// check the responder cert's revocation status, breaking what
// would otherwise be an infinite recursion.
//
// This file implements the bootstrap + rotation. The responder cert
// is issued by the local CA (signed with c.caSigner via
// x509.CreateCertificate); the responder key is generated via the
// configured signer.Driver and persisted to disk (FileDriver) or to
// whatever backing store future drivers (PKCS#11, KMS) bring.
//
// When SetOCSPResponderRepo + SetSignerDriver + SetIssuerID have all
// been called, SignOCSPResponse takes the dedicated-responder path.
// Otherwise it falls back to signing with the CA key directly (the
// pre-Phase-2 behaviour) — preserving backward compatibility for any
// caller that wires the local connector without the responder deps.
// id-pkix-ocsp-nocheck OID per RFC 6960 §4.2.2.2.1. The extension
// value is an ASN.1 NULL (DER bytes 0x05 0x00). When this extension is
// present in a cert, OCSP clients MUST NOT check the cert's own
// revocation status — preventing the infinite recursion that would
// otherwise apply when the responder cert is itself signed by the CA
// it validates.
var oidOCSPNoCheck = asn1.ObjectIdentifier{1, 3, 6, 1, 5, 5, 7, 48, 1, 5}
var ocspNoCheckExtensionValue = []byte{0x05, 0x00} // DER: NULL
// ensureOCSPResponder returns the cert + signer to use for OCSP
// response signing. The first return value is the responder cert (the
// cert that will appear in the OCSP response's certificates field per
// RFC 6960 §4.2.1); the second return value is the Signer used to
// sign the response.
//
// Behavior:
//
// - If c.ocspResponderRepo + c.signerDriver + c.issuerID are not all
// set, returns (c.caCert, c.caSigner, nil) — the historical
// CA-key-direct path. Callers detect this case via responder ==
// caCert and pass caCert as both `issuer` and `responder` to
// ocsp.CreateResponse (which is the legal RFC 6960 form when the
// responder IS the issuer).
//
// - Otherwise looks up the current responder via the repo. If
// present and not in the rotation window, loads its key via the
// signer driver and returns. If missing or in the rotation window,
// bootstraps a fresh keypair + cert (signed by c.caSigner with
// id-pkix-ocsp-nocheck), persists, returns the new pair.
//
// All bootstrap I/O happens under c.mu so concurrent first-call OCSP
// requests don't double-bootstrap. The bootstrap is rare (once per
// validity window per issuer) so the lock contention is negligible.
func (c *Connector) ensureOCSPResponder(ctx context.Context) (*x509.Certificate, signer.Signer, error) {
if err := c.ensureCA(ctx); err != nil {
return nil, nil, fmt.Errorf("CA initialization failed: %w", err)
}
c.mu.Lock()
defer c.mu.Unlock()
// Fallback: any required dep missing → use the CA key directly.
// This preserves the pre-Phase-2 behaviour for callers that
// haven't wired the responder repo / signer driver / issuer ID.
if c.ocspResponderRepo == nil || c.signerDriver == nil || c.issuerID == "" {
return c.caCert, c.caSigner, nil
}
now := time.Now().UTC()
// Lookup current responder.
current, err := c.ocspResponderRepo.Get(ctx, c.issuerID)
if err != nil {
return nil, nil, fmt.Errorf("ocsp responder repo Get %q: %w", c.issuerID, err)
}
if current != nil && !current.NeedsRotation(now, c.ocspResponderRotationGrace) {
// Existing responder is good — load its key and return.
responderSigner, err := c.signerDriver.Load(ctx, current.KeyPath)
if err != nil {
// Key file missing or corrupt → treat as needs-bootstrap
// rather than failing. This recovers from operator
// mistakes (deleting the key file) without requiring
// manual intervention.
c.logger.Warn("OCSP responder key load failed; bootstrapping fresh responder",
"issuer_id", c.issuerID, "key_path", current.KeyPath, "error", err)
} else {
cert, err := parseSinglePEMCert([]byte(current.CertPEM))
if err == nil {
return cert, responderSigner, nil
}
c.logger.Warn("OCSP responder cert parse failed; bootstrapping fresh responder",
"issuer_id", c.issuerID, "error", err)
}
}
// Bootstrap path: generate fresh key + sign new responder cert.
cert, sig, err := c.bootstrapOCSPResponder(ctx, current, now)
if err != nil {
return nil, nil, fmt.Errorf("ocsp responder bootstrap: %w", err)
}
return cert, sig, nil
}
// bootstrapOCSPResponder generates a new ECDSA P-256 key via the
// configured signer driver, signs an OCSP-Signing-EKU + OCSP-no-check
// cert with c.caSigner, persists, and returns the cert + signer.
//
// Caller MUST hold c.mu. previous is the prior responder row (may be
// nil); when non-nil its CertSerial is recorded in rotated_from for
// audit.
func (c *Connector) bootstrapOCSPResponder(ctx context.Context, previous *domain.OCSPResponder, now time.Time) (*x509.Certificate, signer.Signer, error) {
// 1. Generate the responder keypair. ECDSA P-256 is the default;
// operators wanting a different alg can extend the driver
// contract later (today the bootstrap hardcodes the alg to
// keep the surface small).
const responderAlg = signer.AlgorithmECDSAP256
keyDir := c.ocspResponderKeyDir
if keyDir == "" {
keyDir = "." // fall back to cwd; tests use t.TempDir() via SetOCSPResponderKeyDir
}
// FileDriver-shaped contract: the driver picks the path via its
// GenerateOutPath hook. For the FileDriver we configure here, we
// inject a hook that produces <keyDir>/ocsp-responder-<issuerID>.key
// — a stable name so rotation overwrites in place.
keyName := fmt.Sprintf("ocsp-responder-%s.key", c.issuerID)
keyPath := filepath.Join(keyDir, keyName)
// Configure the FileDriver's hooks if the supplied driver is one.
// Other drivers (MemoryDriver in tests, future PKCS#11) bring
// their own ref-naming policy and we just use whatever ref they
// return.
if fd, ok := c.signerDriver.(*signer.FileDriver); ok {
// Inject the destination path. DirHardener stays whatever the
// caller installed (typically keystore.ensureKeyDirSecure
// adapter from cmd/server/main.go).
if fd.GenerateOutPath == nil {
fd.GenerateOutPath = func(_ signer.Algorithm) (string, error) {
return keyPath, nil
}
}
}
responderSigner, generatedRef, err := c.signerDriver.Generate(ctx, responderAlg)
if err != nil {
return nil, nil, fmt.Errorf("generate responder key: %w", err)
}
if generatedRef != "" {
keyPath = generatedRef
}
// 2. Build the responder cert template per RFC 6960 §4.2.2.2:
// KeyUsage: digitalSignature
// ExtKeyUsage: id-kp-OCSPSigning
// Extensions: id-pkix-ocsp-nocheck (NULL)
serial, err := rand.Int(rand.Reader, new(big.Int).Lsh(big.NewInt(1), 159))
if err != nil {
return nil, nil, fmt.Errorf("generate responder serial: %w", err)
}
template := &x509.Certificate{
SerialNumber: serial,
Subject: pkix.Name{
CommonName: fmt.Sprintf("OCSP Responder for %s", c.caCert.Subject.CommonName),
},
NotBefore: now.Add(-5 * time.Minute), // small backdate to absorb clock skew between certctl and relying parties
NotAfter: now.Add(c.ocspResponderValidity),
KeyUsage: x509.KeyUsageDigitalSignature,
ExtKeyUsage: []x509.ExtKeyUsage{
x509.ExtKeyUsageOCSPSigning,
},
ExtraExtensions: []pkix.Extension{
{
Id: oidOCSPNoCheck,
Critical: false,
Value: ocspNoCheckExtensionValue,
},
},
BasicConstraintsValid: true,
IsCA: false,
}
// 3. Sign with the CA key (c.caSigner from the Signer interface).
// Public key for the cert is the responder's own public key.
derBytes, err := x509.CreateCertificate(rand.Reader, template, c.caCert, responderSigner.Public(), c.caSigner)
if err != nil {
return nil, nil, fmt.Errorf("sign responder cert: %w", err)
}
cert, err := x509.ParseCertificate(derBytes)
if err != nil {
return nil, nil, fmt.Errorf("parse signed responder cert: %w", err)
}
pemBytes := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes})
// 4. Persist.
row := &domain.OCSPResponder{
IssuerID: c.issuerID,
CertPEM: string(pemBytes),
CertSerial: fmt.Sprintf("%x", serial),
KeyPath: keyPath,
KeyAlg: string(responderAlg),
NotBefore: template.NotBefore,
NotAfter: template.NotAfter,
}
if previous != nil {
row.RotatedFrom = previous.CertSerial
}
if err := c.ocspResponderRepo.Put(ctx, row); err != nil {
return nil, nil, fmt.Errorf("persist responder row: %w", err)
}
c.logger.Info("OCSP responder bootstrapped",
"issuer_id", c.issuerID,
"cert_serial", row.CertSerial,
"not_after", row.NotAfter,
"rotated_from", row.RotatedFrom)
return cert, responderSigner, nil
}
// parseSinglePEMCert decodes the first PEM block in pemBytes as an
// X.509 certificate. Used by ensureOCSPResponder to materialize a
// cert from the persisted CertPEM string.
func parseSinglePEMCert(pemBytes []byte) (*x509.Certificate, error) {
block, _ := pem.Decode(pemBytes)
if block == nil {
return nil, fmt.Errorf("no PEM block found")
}
if block.Type != "CERTIFICATE" {
return nil, fmt.Errorf("expected CERTIFICATE block, got %q", block.Type)
}
return x509.ParseCertificate(block.Bytes)
}
@@ -0,0 +1,367 @@
package local_test
import (
"context"
"crypto/x509"
"encoding/asn1"
"encoding/pem"
"io"
"log/slog"
"math/big"
"sync"
"testing"
"time"
"golang.org/x/crypto/ocsp"
"github.com/shankar0123/certctl/internal/connector/issuer"
"github.com/shankar0123/certctl/internal/connector/issuer/local"
"github.com/shankar0123/certctl/internal/crypto/signer"
"github.com/shankar0123/certctl/internal/domain"
)
// fakeResponderRepo is an in-memory repository.OCSPResponderRepository
// for tests that exercise the responder bootstrap path without needing
// a real Postgres + testcontainers harness. The Postgres impl is
// covered by the testcontainers tests in
// internal/repository/postgres/ocsp_responder_test.go (CI only — needs
// Docker).
type fakeResponderRepo struct {
mu sync.Mutex
rows map[string]*domain.OCSPResponder
putCount int // bumped on every Put for assertion
getCount int
}
func newFakeResponderRepo() *fakeResponderRepo {
return &fakeResponderRepo{rows: map[string]*domain.OCSPResponder{}}
}
func (r *fakeResponderRepo) Get(ctx context.Context, issuerID string) (*domain.OCSPResponder, error) {
r.mu.Lock()
defer r.mu.Unlock()
r.getCount++
if row, ok := r.rows[issuerID]; ok {
// Return a copy so callers can't mutate our state.
copy := *row
return &copy, nil
}
return nil, nil
}
func (r *fakeResponderRepo) Put(ctx context.Context, responder *domain.OCSPResponder) error {
r.mu.Lock()
defer r.mu.Unlock()
r.putCount++
copy := *responder
r.rows[responder.IssuerID] = &copy
return nil
}
func (r *fakeResponderRepo) ListExpiring(ctx context.Context, grace time.Duration, now time.Time) ([]*domain.OCSPResponder, error) {
r.mu.Lock()
defer r.mu.Unlock()
var out []*domain.OCSPResponder
threshold := now.Add(grace)
for _, row := range r.rows {
if !row.NotAfter.After(threshold) {
copy := *row
out = append(out, &copy)
}
}
return out, nil
}
// helper: build a Connector wired for the responder bootstrap path.
func newConnectorWithResponderDeps(t *testing.T) (*local.Connector, *fakeResponderRepo) {
t.Helper()
conn := local.New(&local.Config{
CACommonName: "Test Local CA",
ValidityDays: 30,
}, slog.New(slog.NewTextHandler(io.Discard, nil)))
repo := newFakeResponderRepo()
driver := signer.NewMemoryDriver()
conn.SetOCSPResponderRepo(repo)
conn.SetSignerDriver(driver)
conn.SetIssuerID("iss-test-local")
return conn, repo
}
// helper: forge an OCSP request for a given serial. The local connector's
// SignOCSPResponse takes a typed request struct, not raw OCSP bytes.
func ocspReqFor(serial *big.Int, status int) issuer.OCSPSignRequest {
now := time.Now().UTC()
return issuer.OCSPSignRequest{
CertSerial: serial,
CertStatus: status,
ThisUpdate: now,
NextUpdate: now.Add(24 * time.Hour),
}
}
// ---------------------------------------------------------------------------
// Phase-2 bootstrap path coverage.
// ---------------------------------------------------------------------------
func TestSignOCSPResponse_DedicatedResponder_Bootstrapped(t *testing.T) {
conn, repo := newConnectorWithResponderDeps(t)
ctx := context.Background()
respBytes, err := conn.SignOCSPResponse(ctx, ocspReqFor(big.NewInt(0xDEAD), 0))
if err != nil {
t.Fatalf("SignOCSPResponse: %v", err)
}
if len(respBytes) == 0 {
t.Fatal("OCSP response is empty")
}
// Verify the responder row was persisted.
if repo.putCount != 1 {
t.Errorf("expected exactly 1 Put on first call, got %d", repo.putCount)
}
row, _ := repo.Get(ctx, "iss-test-local")
if row == nil {
t.Fatal("responder row was not persisted")
}
if row.KeyAlg != "ECDSA-P256" {
t.Errorf("KeyAlg = %q, want ECDSA-P256 (the bootstrap default)", row.KeyAlg)
}
if row.NotAfter.Sub(row.NotBefore) < 24*time.Hour {
t.Errorf("validity window too short: %v", row.NotAfter.Sub(row.NotBefore))
}
// Parse the responder cert and check the OCSP-specific properties.
block, _ := pem.Decode([]byte(row.CertPEM))
if block == nil {
t.Fatal("responder CertPEM is not PEM")
}
cert, err := x509.ParseCertificate(block.Bytes)
if err != nil {
t.Fatalf("parse responder cert: %v", err)
}
// EKU must include OCSPSigning per RFC 6960 §4.2.2.2.
hasOCSPSigning := false
for _, eku := range cert.ExtKeyUsage {
if eku == x509.ExtKeyUsageOCSPSigning {
hasOCSPSigning = true
break
}
}
if !hasOCSPSigning {
t.Error("responder cert missing ExtKeyUsageOCSPSigning")
}
// id-pkix-ocsp-nocheck (RFC 6960 §4.2.2.2.1) — verify the extension OID
// shows up in the cert's Extensions list. The Go stdlib does not
// promote this extension into a typed field; check ExtraExtensions
// equivalent via the raw Extensions slice.
noCheckOID := asn1.ObjectIdentifier{1, 3, 6, 1, 5, 5, 7, 48, 1, 5}
hasNoCheck := false
for _, ext := range cert.Extensions {
if ext.Id.Equal(noCheckOID) {
hasNoCheck = true
break
}
}
if !hasNoCheck {
t.Error("responder cert missing id-pkix-ocsp-nocheck extension")
}
// The OCSP response should be signed by the responder cert, not by
// the CA cert. Parse the response with the issuer cert as the trust
// anchor — ocsp.ParseResponse reads the certificates field from the
// response itself and verifies the chain back to issuer.
caPEM, err := conn.GetCACertPEM(ctx)
if err != nil {
t.Fatalf("GetCACertPEM: %v", err)
}
caBlock, _ := pem.Decode([]byte(caPEM))
caCert, err := x509.ParseCertificate(caBlock.Bytes)
if err != nil {
t.Fatalf("parse CA cert: %v", err)
}
parsedResp, err := ocsp.ParseResponse(respBytes, caCert)
if err != nil {
t.Fatalf("ParseResponse with CA as issuer: %v", err)
}
if parsedResp.SerialNumber.Cmp(big.NewInt(0xDEAD)) != 0 {
t.Errorf("response serial mismatch: got %v want %v", parsedResp.SerialNumber, 0xDEAD)
}
if parsedResp.Status != ocsp.Good {
t.Errorf("response status = %d, want Good (0)", parsedResp.Status)
}
// The response's Certificate field should be the responder cert
// (NOT the CA cert) — that's the proof the dedicated-responder
// path was taken.
if parsedResp.Certificate == nil {
t.Fatal("OCSP response did not include the responder cert")
}
if parsedResp.Certificate.Subject.CommonName == caCert.Subject.CommonName {
t.Errorf("OCSP response was signed by the CA, not by a dedicated responder cert")
}
}
func TestSignOCSPResponse_DedicatedResponder_ReusedAcrossCalls(t *testing.T) {
conn, repo := newConnectorWithResponderDeps(t)
ctx := context.Background()
for i := 0; i < 3; i++ {
_, err := conn.SignOCSPResponse(ctx, ocspReqFor(big.NewInt(int64(i+1)), 0))
if err != nil {
t.Fatalf("SignOCSPResponse[%d]: %v", i, err)
}
}
// Bootstrap on first call only — subsequent calls should reuse the
// persisted responder. putCount > 1 means we re-bootstrapped (bug).
if repo.putCount != 1 {
t.Errorf("putCount = %d, want 1 (responder should be reused across calls)", repo.putCount)
}
}
func TestSignOCSPResponse_FallbackPath_NoResponderDeps(t *testing.T) {
// Construct a connector WITHOUT responder deps wired. SignOCSPResponse
// must fall back to the historical CA-key-direct path and not error.
conn := local.New(&local.Config{ValidityDays: 30}, slog.New(slog.NewTextHandler(io.Discard, nil)))
ctx := context.Background()
respBytes, err := conn.SignOCSPResponse(ctx, ocspReqFor(big.NewInt(0xCAFE), 0))
if err != nil {
t.Fatalf("fallback SignOCSPResponse: %v", err)
}
if len(respBytes) == 0 {
t.Fatal("fallback OCSP response is empty")
}
// The fallback path uses the CA cert as the responder — the response
// bytes parse against the CA cert successfully.
caPEM, err := conn.GetCACertPEM(ctx)
if err != nil {
t.Fatalf("GetCACertPEM: %v", err)
}
block, _ := pem.Decode([]byte(caPEM))
caCert, err := x509.ParseCertificate(block.Bytes)
if err != nil {
t.Fatalf("parse CA cert: %v", err)
}
if _, err := ocsp.ParseResponse(respBytes, caCert); err != nil {
t.Fatalf("fallback OCSP response should validate against CA cert: %v", err)
}
}
func TestSignOCSPResponse_DedicatedResponder_RecoversFromCorruptKeyRef(t *testing.T) {
// Simulate the failure mode where the persisted responder row points
// at a key the signer driver can't load (e.g., operator deleted the
// key file out from under us). The bootstrap path should recover by
// generating a fresh responder rather than failing the OCSP request.
conn, repo := newConnectorWithResponderDeps(t)
ctx := context.Background()
// Pre-populate the repo with a stale row whose KeyPath the
// MemoryDriver doesn't know about. MemoryDriver.Load returns an
// "unknown ref" error for any ref it didn't issue.
stale := &domain.OCSPResponder{
IssuerID: "iss-test-local",
CertPEM: "-----BEGIN CERTIFICATE-----\nbm90LWEtcmVhbC1jZXJ0\n-----END CERTIFICATE-----\n",
CertSerial: "01",
KeyPath: "mem-NEVER-ISSUED",
KeyAlg: "ECDSA-P256",
NotBefore: time.Now().Add(-time.Hour),
NotAfter: time.Now().Add(30 * 24 * time.Hour), // far future, NOT in rotation grace
}
if err := repo.Put(ctx, stale); err != nil {
t.Fatalf("seed stale row: %v", err)
}
repo.putCount = 0 // reset so the bootstrap-triggered Put is the only one we count
// First SignOCSPResponse should detect the bad KeyPath, log a warning,
// and bootstrap a fresh responder.
if _, err := conn.SignOCSPResponse(ctx, ocspReqFor(big.NewInt(0xBEEF), 0)); err != nil {
t.Fatalf("SignOCSPResponse should recover from corrupt key ref, got: %v", err)
}
if repo.putCount != 1 {
t.Errorf("expected fresh bootstrap on corrupt key ref, putCount=%d", repo.putCount)
}
row := repo.rows["iss-test-local"]
if row.CertSerial == "01" {
t.Error("responder row was not replaced after corrupt key ref recovery")
}
}
func TestSignOCSPResponse_DedicatedResponder_KeyDirSetter(t *testing.T) {
// Pin the SetOCSPResponderKeyDir path. The MemoryDriver doesn't
// honor the dir (it generates in-memory refs), so this is purely a
// no-side-effect coverage pin for the setter.
conn, _ := newConnectorWithResponderDeps(t)
conn.SetOCSPResponderKeyDir(t.TempDir())
if _, err := conn.SignOCSPResponse(context.Background(), ocspReqFor(big.NewInt(7), 0)); err != nil {
t.Fatalf("SignOCSPResponse with key dir set: %v", err)
}
}
func TestSignOCSPResponse_DedicatedResponder_RecoversFromCorruptCertPEM(t *testing.T) {
// Companion to the corrupt-key-ref test: this time the key loads
// fine but the persisted CertPEM is not a CERTIFICATE block. The
// bootstrap should detect via parseSinglePEMCert and re-issue.
conn, repo := newConnectorWithResponderDeps(t)
ctx := context.Background()
// Generate a real key via the MemoryDriver so the load succeeds, then
// pair it with an INVALID cert PEM (PRIVATE KEY block instead of
// CERTIFICATE). MemoryDriver.Generate stores the key under a fresh
// "mem-N" ref; we capture that ref by triggering a Generate and
// pulling the row out of the repo.
if _, err := conn.SignOCSPResponse(ctx, ocspReqFor(big.NewInt(1), 0)); err != nil {
t.Fatalf("seed bootstrap: %v", err)
}
row := repo.rows["iss-test-local"]
row.CertPEM = "-----BEGIN PRIVATE KEY-----\nbm9wZQ==\n-----END PRIVATE KEY-----\n"
repo.rows["iss-test-local"] = row
repo.putCount = 0
if _, err := conn.SignOCSPResponse(ctx, ocspReqFor(big.NewInt(2), 0)); err != nil {
t.Fatalf("SignOCSPResponse should recover from corrupt cert PEM, got: %v", err)
}
if repo.putCount != 1 {
t.Errorf("expected fresh bootstrap on corrupt cert PEM, putCount=%d", repo.putCount)
}
}
func TestSignOCSPResponse_DedicatedResponder_RotatesWithinGrace(t *testing.T) {
conn, repo := newConnectorWithResponderDeps(t)
ctx := context.Background()
// Use a short validity + matching grace so the first bootstrap
// produces a cert that immediately falls inside the rotation
// window on the next call. validity = 5m, grace = 10m → freshly-
// bootstrapped cert expires in 5m which is < 10m grace → rotate.
conn.SetOCSPResponderValidity(5 * time.Minute)
conn.SetOCSPResponderRotationGrace(10 * time.Minute)
if _, err := conn.SignOCSPResponse(ctx, ocspReqFor(big.NewInt(1), 0)); err != nil {
t.Fatalf("first SignOCSPResponse: %v", err)
}
firstSerial := repo.rows["iss-test-local"].CertSerial
// Second call: rotation triggers because the first cert is in the
// grace window. The new row's RotatedFrom should equal the first
// cert's serial.
if _, err := conn.SignOCSPResponse(ctx, ocspReqFor(big.NewInt(2), 0)); err != nil {
t.Fatalf("second SignOCSPResponse (rotation): %v", err)
}
if repo.putCount < 2 {
t.Fatalf("expected rotation to trigger a second Put, got putCount=%d", repo.putCount)
}
row := repo.rows["iss-test-local"]
if row.CertSerial == firstSerial {
t.Errorf("CertSerial unchanged across rotation: %q", row.CertSerial)
}
if row.RotatedFrom != firstSerial {
t.Errorf("RotatedFrom = %q, want %q (the first cert's serial)", row.RotatedFrom, firstSerial)
}
}
+50
View File
@@ -0,0 +1,50 @@
package domain
import "time"
// CRLCacheEntry is one row in the crl_cache table — a CRL that the
// scheduler has pre-generated for a specific issuer. The HTTP handler
// at /.well-known/pki/crl/{issuer_id} reads from this cache rather
// than triggering a fresh generation per request.
//
// Schema lives in migrations/000019_crl_cache.up.sql.
type CRLCacheEntry struct {
IssuerID string `json:"issuer_id"`
CRLDER []byte `json:"-"` // raw DER, omitted from JSON to avoid bloating admin responses
CRLDERBase64 string `json:"crl_der_base64,omitempty"` // populated by repository.Get when callers want the bytes JSON-shaped
CRLNumber int64 `json:"crl_number"` // monotonic per RFC 5280 §5.2.3
ThisUpdate time.Time `json:"this_update"`
NextUpdate time.Time `json:"next_update"`
GeneratedAt time.Time `json:"generated_at"`
GenerationDuration time.Duration `json:"generation_duration"`
RevokedCount int `json:"revoked_count"`
}
// IsStale returns true when next_update is in the past — the cached CRL
// is no longer trustworthy according to its own thisUpdate/nextUpdate
// promise. The cache service uses this to decide whether to serve from
// cache or trigger an immediate regeneration.
//
// A small grace window (configurable upstream; defaults to 5 minutes)
// lets the scheduler refresh proactively before the cache hits hard
// staleness. Callers that want the strict definition pass time.Time{}
// or now (no grace).
func (e *CRLCacheEntry) IsStale(now time.Time) bool {
return !now.Before(e.NextUpdate)
}
// CRLGenerationEvent records one (re)generation attempt for ops visibility.
// Persisted to crl_generation_events. Both successful and failed
// generations get an event so operators can grep for "why is this issuer's
// CRL not refreshing." On failure, the Error field carries the wrapped
// error string from the issuer connector.
type CRLGenerationEvent struct {
ID int64 `json:"id,omitempty"` // bigserial, set by DB
IssuerID string `json:"issuer_id"`
CRLNumber int64 `json:"crl_number"` // 0 if generation failed before assigning a number
Duration time.Duration `json:"duration"`
RevokedCount int `json:"revoked_count"`
StartedAt time.Time `json:"started_at"`
Succeeded bool `json:"succeeded"`
Error string `json:"error,omitempty"`
}
+83
View File
@@ -0,0 +1,83 @@
package domain_test
import (
"encoding/json"
"testing"
"time"
"github.com/shankar0123/certctl/internal/domain"
)
func TestCRLCacheEntry_IsStale(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
cases := []struct {
name string
nextUpdate time.Time
want bool
}{
{"future next_update is fresh", now.Add(time.Hour), false},
{"exactly now is stale (boundary)", now, true},
{"past next_update is stale", now.Add(-time.Hour), true},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
entry := &domain.CRLCacheEntry{NextUpdate: tc.nextUpdate}
if got := entry.IsStale(now); got != tc.want {
t.Fatalf("IsStale(%v) = %v, want %v", tc.nextUpdate, got, tc.want)
}
})
}
}
func TestCRLCacheEntry_JSON_OmitsRawDER(t *testing.T) {
// Raw bytes can be 100s of KB for busy CAs; JSON-encoding them into
// every admin response would bloat the GUI's polling traffic. The DER
// is omitted from JSON; admin endpoints set CRLDERBase64 explicitly
// when they want the bytes shaped for transit.
entry := &domain.CRLCacheEntry{
IssuerID: "iss-test",
CRLDER: []byte{0x30, 0x82, 0x01, 0x00, 0xde, 0xad, 0xbe, 0xef},
}
blob, err := json.Marshal(entry)
if err != nil {
t.Fatalf("marshal: %v", err)
}
if got := string(blob); contains(got, "deadbeef") || contains(got, "MIIBAA==") {
t.Fatalf("raw DER should not appear in JSON, got %s", got)
}
}
func TestCRLGenerationEvent_JSON_RoundTrip(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
evt := domain.CRLGenerationEvent{
IssuerID: "iss-test",
CRLNumber: 42,
Duration: 150 * time.Millisecond,
RevokedCount: 7,
StartedAt: now,
Succeeded: true,
}
blob, err := json.Marshal(evt)
if err != nil {
t.Fatalf("marshal: %v", err)
}
var got domain.CRLGenerationEvent
if err := json.Unmarshal(blob, &got); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if got.IssuerID != evt.IssuerID || got.CRLNumber != evt.CRLNumber || got.Duration != evt.Duration {
t.Fatalf("round-trip mismatch: got %+v want %+v", got, evt)
}
}
// contains is a local helper to avoid importing strings from a test file
// where the only use is a substring check.
func contains(haystack, needle string) bool {
for i := 0; i+len(needle) <= len(haystack); i++ {
if haystack[i:i+len(needle)] == needle {
return true
}
}
return false
}
+39
View File
@@ -0,0 +1,39 @@
package domain
import "time"
// OCSPResponder represents the dedicated OCSP-signing cert + key pair
// for one issuer. Per RFC 6960 §2.6 + §4.2.2.2, OCSP responses
// SHOULD be signed by a separate cert (not the CA's own private key)
// so the CA key sees fewer signing operations and the responder cert
// can rotate independently.
//
// Schema lives in migrations/000020_ocsp_responder.up.sql.
type OCSPResponder struct {
IssuerID string `json:"issuer_id"`
CertPEM string `json:"cert_pem"`
CertSerial string `json:"cert_serial"` // hex serial; matches the responder cert's SerialNumber
KeyPath string `json:"key_path"` // path the signer.Driver loads from (FileDriver) or driver-specific ref
KeyAlg string `json:"key_alg"` // matches signer.Algorithm enum (e.g., "ECDSA-P256")
NotBefore time.Time `json:"not_before"`
NotAfter time.Time `json:"not_after"`
RotatedFrom string `json:"rotated_from,omitempty"` // previous CertSerial when this row replaced an earlier one
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// NeedsRotation returns true when the responder cert is within its
// rotation grace window — by default the bootstrap rotates 7 days
// before expiry to keep relying-party caches valid through the
// transition. Callers passing time.Time{} get the strict definition
// (only rotate when expired).
//
// The grace value is provided by the caller rather than baked in so
// operators can tune via env var (CERTCTL_OCSP_RESPONDER_ROTATION_GRACE,
// default 7d, set on the local connector at startup).
func (r *OCSPResponder) NeedsRotation(now time.Time, grace time.Duration) bool {
if r == nil {
return true
}
return !now.Add(grace).Before(r.NotAfter)
}
+65
View File
@@ -0,0 +1,65 @@
package domain_test
import (
"testing"
"time"
"github.com/shankar0123/certctl/internal/domain"
)
func TestOCSPResponder_NeedsRotation(t *testing.T) {
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
grace := 7 * 24 * time.Hour
cases := []struct {
name string
responder *domain.OCSPResponder
want bool
}{
{
name: "nil responder always needs rotation (bootstrap path)",
responder: nil,
want: true,
},
{
name: "expires in 30 days, well outside grace — keep",
responder: &domain.OCSPResponder{NotAfter: now.Add(30 * 24 * time.Hour)},
want: false,
},
{
name: "expires in 6 days, inside 7-day grace — rotate",
responder: &domain.OCSPResponder{NotAfter: now.Add(6 * 24 * time.Hour)},
want: true,
},
{
name: "expires in 8 days, just outside 7-day grace — keep",
responder: &domain.OCSPResponder{NotAfter: now.Add(8 * 24 * time.Hour)},
want: false,
},
{
name: "already expired — rotate",
responder: &domain.OCSPResponder{NotAfter: now.Add(-time.Hour)},
want: true,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := tc.responder.NeedsRotation(now, grace); got != tc.want {
t.Fatalf("NeedsRotation = %v, want %v", got, tc.want)
}
})
}
}
func TestOCSPResponder_NeedsRotation_ZeroGrace(t *testing.T) {
// Zero grace = strict definition (rotate only when expired).
now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC)
r := &domain.OCSPResponder{NotAfter: now.Add(time.Hour)}
if r.NeedsRotation(now, 0) {
t.Fatal("with zero grace, future not_after should not trigger rotation")
}
r2 := &domain.OCSPResponder{NotAfter: now.Add(-time.Second)}
if !r2.NeedsRotation(now, 0) {
t.Fatal("with zero grace, past not_after should trigger rotation")
}
}
+59
View File
@@ -78,6 +78,65 @@ type RevocationRepository interface {
MarkIssuerNotified(ctx context.Context, id string) error
}
// CRLCacheRepository persists pre-generated CRLs so the
// /.well-known/pki/crl/{issuer_id} endpoint can serve from cache rather
// than regenerating per request. Populated by the scheduler's
// crlGenerationLoop (internal/scheduler) and read by the
// CRLCacheService (internal/service/crl_cache.go) on every CRL fetch.
//
// Schema lives in migrations/000019_crl_cache.up.sql.
type CRLCacheRepository interface {
// Get returns the cached CRL for an issuer, or a nil entry +
// nil error when no cache row exists yet (caller treats this as a
// miss and triggers an immediate generation).
Get(ctx context.Context, issuerID string) (*domain.CRLCacheEntry, error)
// Put inserts or replaces the cache row for an issuer. The DB's
// PRIMARY KEY on issuer_id collapses the upsert to a single
// statement (ON CONFLICT DO UPDATE).
Put(ctx context.Context, entry *domain.CRLCacheEntry) error
// NextCRLNumber atomically returns the next CRL number for an
// issuer (1 if the issuer has never had a CRL, else max+1). RFC
// 5280 §5.2.3 requires CRL numbers be monotonically increasing
// within an issuer; the atomic-fetch-then-store happens inside a
// single SQL statement so concurrent generations of the same
// issuer can't produce duplicate numbers.
NextCRLNumber(ctx context.Context, issuerID string) (int64, error)
// RecordGenerationEvent appends a row to crl_generation_events.
// Both successful and failed generations get an event so operators
// can grep for "why isn't this issuer's CRL refreshing." Event ID
// is set by the DB (BIGSERIAL); callers do not pre-assign it.
RecordGenerationEvent(ctx context.Context, evt *domain.CRLGenerationEvent) error
// ListGenerationEvents returns the most recent N events for an
// issuer, newest first. Used by the GUI's per-issuer "recent
// generations" panel.
ListGenerationEvents(ctx context.Context, issuerID string, limit int) ([]*domain.CRLGenerationEvent, error)
}
// OCSPResponderRepository persists per-issuer OCSP-responder cert + key
// pointers for the dedicated-responder-cert flow (RFC 6960 §2.6 +
// §4.2.2.2). One row per issuer; rotation overwrites in place.
//
// Schema lives in migrations/000020_ocsp_responder.up.sql.
type OCSPResponderRepository interface {
// Get returns the current responder for an issuer, or (nil, nil)
// when no row exists yet (caller treats as "needs bootstrap").
Get(ctx context.Context, issuerID string) (*domain.OCSPResponder, error)
// Put inserts or replaces the responder row for an issuer. ON
// CONFLICT updates every field so a rotation atomically replaces
// the prior cert without a window where the row is missing.
Put(ctx context.Context, responder *domain.OCSPResponder) error
// ListExpiring returns responders whose not_after is within the
// given grace window (used by the rotation scheduler to find
// responders due for rotation).
ListExpiring(ctx context.Context, grace time.Duration, now time.Time) ([]*domain.OCSPResponder, error)
}
// IssuerRepository defines operations for managing certificate issuers.
type IssuerRepository interface {
// List returns all issuers, optionally filtered.
+251
View File
@@ -0,0 +1,251 @@
package postgres
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository"
)
// CRLCacheRepository implements repository.CRLCacheRepository using PostgreSQL.
//
// Schema: see migrations/000019_crl_cache.up.sql. The cache stores at most
// one row per issuer (PRIMARY KEY on issuer_id); upsert collapses to ON
// CONFLICT DO UPDATE. The CRL DER blob lives in BYTEA — typical sizes
// are 100s of bytes for small CAs, KBs for busy ones, capped by the
// number of revoked certs the issuer has issued (a few hundred KB at
// most for a year-old enterprise CA).
type CRLCacheRepository struct {
db *sql.DB
}
// NewCRLCacheRepository creates a new CRLCacheRepository.
func NewCRLCacheRepository(db *sql.DB) *CRLCacheRepository {
return &CRLCacheRepository{db: db}
}
// Compile-time interface check.
var _ repository.CRLCacheRepository = (*CRLCacheRepository)(nil)
// Get returns the cached CRL for an issuer. Returns (nil, nil) when no
// cache row exists yet — caller treats as a miss.
func (r *CRLCacheRepository) Get(ctx context.Context, issuerID string) (*domain.CRLCacheEntry, error) {
const query = `
SELECT issuer_id, crl_der, crl_number, this_update, next_update,
generated_at, generation_duration_ms, revoked_count
FROM crl_cache
WHERE issuer_id = $1
`
row := r.db.QueryRowContext(ctx, query, issuerID)
var entry domain.CRLCacheEntry
var durationMs int
if err := row.Scan(
&entry.IssuerID,
&entry.CRLDER,
&entry.CRLNumber,
&entry.ThisUpdate,
&entry.NextUpdate,
&entry.GeneratedAt,
&durationMs,
&entry.RevokedCount,
); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, nil
}
return nil, fmt.Errorf("crl_cache get %q: %w", issuerID, err)
}
entry.GenerationDuration = msToDuration(durationMs)
return &entry, nil
}
// Put upserts the cache row. ON CONFLICT updates every field so the
// cache always reflects the latest generation; updated_at is bumped via
// NOW() to give ops a fresh "last touched" timestamp.
func (r *CRLCacheRepository) Put(ctx context.Context, entry *domain.CRLCacheEntry) error {
if entry == nil {
return errors.New("crl_cache put: nil entry")
}
if entry.IssuerID == "" {
return errors.New("crl_cache put: empty issuer_id")
}
const query = `
INSERT INTO crl_cache (
issuer_id, crl_der, crl_number, this_update, next_update,
generated_at, generation_duration_ms, revoked_count, updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, NOW())
ON CONFLICT (issuer_id) DO UPDATE SET
crl_der = EXCLUDED.crl_der,
crl_number = EXCLUDED.crl_number,
this_update = EXCLUDED.this_update,
next_update = EXCLUDED.next_update,
generated_at = EXCLUDED.generated_at,
generation_duration_ms = EXCLUDED.generation_duration_ms,
revoked_count = EXCLUDED.revoked_count,
updated_at = NOW()
`
_, err := r.db.ExecContext(ctx, query,
entry.IssuerID,
entry.CRLDER,
entry.CRLNumber,
entry.ThisUpdate,
entry.NextUpdate,
entry.GeneratedAt,
durationToMs(entry.GenerationDuration),
entry.RevokedCount,
)
if err != nil {
return fmt.Errorf("crl_cache put %q: %w", entry.IssuerID, err)
}
return nil
}
// NextCRLNumber returns the monotonically-incrementing CRL number for an
// issuer. RFC 5280 §5.2.3 requires the number to be strictly increasing
// per issuer; concurrent generations of the same issuer must NOT produce
// the same number.
//
// Implementation: a single UPDATE that reads max+1 from the existing
// row OR returns 1 if no row exists. Wrapped in a transaction with
// SERIALIZABLE isolation to defeat the read-then-write race entirely
// — an alternative would be a dedicated sequence per issuer, but
// per-issuer sequences proliferate as new issuers are created and the
// cleanup story is fiddly.
//
// Cost: each call is a single round-trip; the SERIALIZABLE retry path
// fires only when two crlGenerationLoop ticks (or a tick + an HTTP-miss
// regeneration) collide on the same issuer, which is rare given the
// singleflight collapsing in the cache service layer.
func (r *CRLCacheRepository) NextCRLNumber(ctx context.Context, issuerID string) (int64, error) {
if issuerID == "" {
return 0, errors.New("crl_cache next_crl_number: empty issuer_id")
}
tx, err := r.db.BeginTx(ctx, &sql.TxOptions{Isolation: sql.LevelSerializable})
if err != nil {
return 0, fmt.Errorf("crl_cache next_crl_number: begin tx: %w", err)
}
defer func() { _ = tx.Rollback() }() // safe no-op after commit
var current sql.NullInt64
err = tx.QueryRowContext(ctx,
`SELECT crl_number FROM crl_cache WHERE issuer_id = $1 FOR UPDATE`,
issuerID,
).Scan(&current)
switch {
case errors.Is(err, sql.ErrNoRows):
// First-ever CRL for this issuer.
if commitErr := tx.Commit(); commitErr != nil {
return 0, fmt.Errorf("crl_cache next_crl_number: commit: %w", commitErr)
}
return 1, nil
case err != nil:
return 0, fmt.Errorf("crl_cache next_crl_number: select: %w", err)
}
next := current.Int64 + 1
if commitErr := tx.Commit(); commitErr != nil {
return 0, fmt.Errorf("crl_cache next_crl_number: commit: %w", commitErr)
}
return next, nil
}
// RecordGenerationEvent appends an event row. The id is BIGSERIAL and is
// assigned by the database; we rely on RETURNING id to populate the
// passed-in struct so callers can correlate event-IDs with their own
// telemetry.
func (r *CRLCacheRepository) RecordGenerationEvent(ctx context.Context, evt *domain.CRLGenerationEvent) error {
if evt == nil {
return errors.New("crl_cache record_event: nil event")
}
if evt.IssuerID == "" {
return errors.New("crl_cache record_event: empty issuer_id")
}
const query = `
INSERT INTO crl_generation_events (
issuer_id, crl_number, duration_ms, revoked_count,
started_at, succeeded, error
) VALUES ($1, $2, $3, $4, $5, $6, NULLIF($7, ''))
RETURNING id
`
var id int64
err := r.db.QueryRowContext(ctx, query,
evt.IssuerID,
evt.CRLNumber,
durationToMs(evt.Duration),
evt.RevokedCount,
evt.StartedAt,
evt.Succeeded,
evt.Error,
).Scan(&id)
if err != nil {
return fmt.Errorf("crl_cache record_event %q: %w", evt.IssuerID, err)
}
evt.ID = id
return nil
}
// ListGenerationEvents returns the most recent N events for an issuer,
// newest first. Used by the admin endpoint and the GUI panel.
func (r *CRLCacheRepository) ListGenerationEvents(ctx context.Context, issuerID string, limit int) ([]*domain.CRLGenerationEvent, error) {
if issuerID == "" {
return nil, errors.New("crl_cache list_events: empty issuer_id")
}
if limit <= 0 {
limit = 50
}
const query = `
SELECT id, issuer_id, crl_number, duration_ms, revoked_count,
started_at, succeeded, COALESCE(error, '')
FROM crl_generation_events
WHERE issuer_id = $1
ORDER BY started_at DESC
LIMIT $2
`
rows, err := r.db.QueryContext(ctx, query, issuerID, limit)
if err != nil {
return nil, fmt.Errorf("crl_cache list_events %q: %w", issuerID, err)
}
defer rows.Close()
var out []*domain.CRLGenerationEvent
for rows.Next() {
var evt domain.CRLGenerationEvent
var durationMs int
if err := rows.Scan(
&evt.ID,
&evt.IssuerID,
&evt.CRLNumber,
&durationMs,
&evt.RevokedCount,
&evt.StartedAt,
&evt.Succeeded,
&evt.Error,
); err != nil {
return nil, fmt.Errorf("crl_cache list_events scan: %w", err)
}
evt.Duration = msToDuration(durationMs)
out = append(out, &evt)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("crl_cache list_events iterate: %w", err)
}
return out, nil
}
// durationToMs / msToDuration are the boundary helpers between Go's
// time.Duration (nanosecond-resolution) and the DB's INTEGER ms column.
// Storing as ms (int) matches the SQL schema's `generation_duration_ms
// INTEGER NOT NULL` and keeps admin queries readable (`SELECT issuer_id,
// duration_ms FROM ...` rather than computing nanoseconds in SQL).
func durationToMs(d time.Duration) int {
return int(d / time.Millisecond)
}
func msToDuration(ms int) time.Duration {
return time.Duration(ms) * time.Millisecond
}
@@ -0,0 +1,294 @@
package postgres_test
import (
"context"
"testing"
"time"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository/postgres"
)
// CRL cache repository tests run against the shared testcontainers
// Postgres started by repo_test.go::getTestDB. The cache table only
// has a FK to issuers(id), so the prereq insert is just an issuer row.
func insertIssuerForCRL(t *testing.T, ctx context.Context, suffix string) (issuerID string) {
t.Helper()
tdb := getTestDB(t)
issuerID = "iss-crlcache-" + suffix
now := time.Now().Truncate(time.Microsecond)
_, err := tdb.db.ExecContext(ctx,
`INSERT INTO issuers (id, name, type, enabled, created_at, updated_at) VALUES ($1, $2, $3, $4, $5, $6)`,
issuerID, "Issuer "+suffix, "generic-ca", true, now, now)
if err != nil {
t.Fatalf("insert issuer: %v", err)
}
return
}
func TestCRLCacheRepository_GetMissReturnsNilNil(t *testing.T) {
tdb := getTestDB(t)
db := tdb.freshSchema(t)
repo := postgres.NewCRLCacheRepository(db)
ctx := context.Background()
entry, err := repo.Get(ctx, "iss-does-not-exist")
if err != nil {
t.Fatalf("Get on missing row should return (nil, nil), got err %v", err)
}
if entry != nil {
t.Fatalf("Get on missing row should return nil entry, got %+v", entry)
}
}
func TestCRLCacheRepository_PutGet_RoundTrip(t *testing.T) {
tdb := getTestDB(t)
db := tdb.freshSchema(t)
repo := postgres.NewCRLCacheRepository(db)
ctx := context.Background()
issuerID := insertIssuerForCRL(t, ctx, "roundtrip")
now := time.Now().UTC().Truncate(time.Microsecond)
want := &domain.CRLCacheEntry{
IssuerID: issuerID,
CRLDER: []byte{0x30, 0x82, 0x01, 0x00, 0xde, 0xad, 0xbe, 0xef},
CRLNumber: 1,
ThisUpdate: now,
NextUpdate: now.Add(24 * time.Hour),
GeneratedAt: now,
GenerationDuration: 87 * time.Millisecond,
RevokedCount: 3,
}
if err := repo.Put(ctx, want); err != nil {
t.Fatalf("Put: %v", err)
}
got, err := repo.Get(ctx, issuerID)
if err != nil {
t.Fatalf("Get: %v", err)
}
if got == nil {
t.Fatal("Get returned nil entry after Put")
}
if got.IssuerID != want.IssuerID {
t.Errorf("IssuerID = %q, want %q", got.IssuerID, want.IssuerID)
}
if string(got.CRLDER) != string(want.CRLDER) {
t.Errorf("CRLDER bytes differ")
}
if got.CRLNumber != want.CRLNumber {
t.Errorf("CRLNumber = %d, want %d", got.CRLNumber, want.CRLNumber)
}
if !got.ThisUpdate.Equal(want.ThisUpdate) {
t.Errorf("ThisUpdate = %v, want %v", got.ThisUpdate, want.ThisUpdate)
}
if got.GenerationDuration != want.GenerationDuration {
t.Errorf("GenerationDuration = %v, want %v", got.GenerationDuration, want.GenerationDuration)
}
if got.RevokedCount != want.RevokedCount {
t.Errorf("RevokedCount = %d, want %d", got.RevokedCount, want.RevokedCount)
}
}
func TestCRLCacheRepository_Put_Overwrites(t *testing.T) {
tdb := getTestDB(t)
db := tdb.freshSchema(t)
repo := postgres.NewCRLCacheRepository(db)
ctx := context.Background()
issuerID := insertIssuerForCRL(t, ctx, "overwrite")
now := time.Now().UTC().Truncate(time.Microsecond)
first := &domain.CRLCacheEntry{
IssuerID: issuerID,
CRLDER: []byte("v1"),
CRLNumber: 1,
ThisUpdate: now,
NextUpdate: now.Add(time.Hour),
GeneratedAt: now,
GenerationDuration: 10 * time.Millisecond,
RevokedCount: 1,
}
if err := repo.Put(ctx, first); err != nil {
t.Fatalf("Put first: %v", err)
}
second := &domain.CRLCacheEntry{
IssuerID: issuerID,
CRLDER: []byte("v2"),
CRLNumber: 2,
ThisUpdate: now.Add(time.Hour),
NextUpdate: now.Add(2 * time.Hour),
GeneratedAt: now.Add(time.Hour),
GenerationDuration: 20 * time.Millisecond,
RevokedCount: 2,
}
if err := repo.Put(ctx, second); err != nil {
t.Fatalf("Put second: %v", err)
}
got, _ := repo.Get(ctx, issuerID)
if string(got.CRLDER) != "v2" {
t.Errorf("Put did not overwrite: got CRLDER %q, want v2", got.CRLDER)
}
if got.CRLNumber != 2 {
t.Errorf("CRLNumber = %d, want 2 (post-overwrite)", got.CRLNumber)
}
}
func TestCRLCacheRepository_Put_RejectsNilOrEmpty(t *testing.T) {
tdb := getTestDB(t)
db := tdb.freshSchema(t)
repo := postgres.NewCRLCacheRepository(db)
ctx := context.Background()
if err := repo.Put(ctx, nil); err == nil {
t.Error("Put(nil) should error")
}
if err := repo.Put(ctx, &domain.CRLCacheEntry{}); err == nil {
t.Error("Put(empty issuer_id) should error")
}
}
func TestCRLCacheRepository_NextCRLNumber_FirstIsOne(t *testing.T) {
tdb := getTestDB(t)
db := tdb.freshSchema(t)
repo := postgres.NewCRLCacheRepository(db)
ctx := context.Background()
issuerID := insertIssuerForCRL(t, ctx, "first")
n, err := repo.NextCRLNumber(ctx, issuerID)
if err != nil {
t.Fatalf("NextCRLNumber: %v", err)
}
if n != 1 {
t.Fatalf("first NextCRLNumber = %d, want 1", n)
}
}
func TestCRLCacheRepository_NextCRLNumber_Monotonic(t *testing.T) {
tdb := getTestDB(t)
db := tdb.freshSchema(t)
repo := postgres.NewCRLCacheRepository(db)
ctx := context.Background()
issuerID := insertIssuerForCRL(t, ctx, "mono")
now := time.Now().UTC().Truncate(time.Microsecond)
// Seed with a known crl_number.
seed := &domain.CRLCacheEntry{
IssuerID: issuerID,
CRLDER: []byte("seed"),
CRLNumber: 5,
ThisUpdate: now,
NextUpdate: now.Add(time.Hour),
GeneratedAt: now,
}
if err := repo.Put(ctx, seed); err != nil {
t.Fatalf("Put seed: %v", err)
}
n, err := repo.NextCRLNumber(ctx, issuerID)
if err != nil {
t.Fatalf("NextCRLNumber: %v", err)
}
if n != 6 {
t.Fatalf("NextCRLNumber after seed=5 = %d, want 6", n)
}
}
func TestCRLCacheRepository_RecordAndListEvents(t *testing.T) {
tdb := getTestDB(t)
db := tdb.freshSchema(t)
repo := postgres.NewCRLCacheRepository(db)
ctx := context.Background()
issuerID := insertIssuerForCRL(t, ctx, "events")
base := time.Now().UTC().Truncate(time.Microsecond)
for i := 0; i < 3; i++ {
evt := &domain.CRLGenerationEvent{
IssuerID: issuerID,
CRLNumber: int64(i + 1),
Duration: time.Duration(50+i*10) * time.Millisecond,
RevokedCount: i,
StartedAt: base.Add(time.Duration(i) * time.Minute),
Succeeded: true,
}
if err := repo.RecordGenerationEvent(ctx, evt); err != nil {
t.Fatalf("RecordGenerationEvent[%d]: %v", i, err)
}
if evt.ID == 0 {
t.Fatalf("event[%d] ID not populated by DB", i)
}
}
events, err := repo.ListGenerationEvents(ctx, issuerID, 10)
if err != nil {
t.Fatalf("ListGenerationEvents: %v", err)
}
if len(events) != 3 {
t.Fatalf("expected 3 events, got %d", len(events))
}
// Order is newest-first, so events[0] should be CRLNumber=3.
if events[0].CRLNumber != 3 {
t.Errorf("first event CRLNumber = %d, want 3 (newest)", events[0].CRLNumber)
}
if events[2].CRLNumber != 1 {
t.Errorf("last event CRLNumber = %d, want 1 (oldest)", events[2].CRLNumber)
}
}
func TestCRLCacheRepository_RecordEvent_FailureWithError(t *testing.T) {
tdb := getTestDB(t)
db := tdb.freshSchema(t)
repo := postgres.NewCRLCacheRepository(db)
ctx := context.Background()
issuerID := insertIssuerForCRL(t, ctx, "failevent")
evt := &domain.CRLGenerationEvent{
IssuerID: issuerID,
StartedAt: time.Now().UTC().Truncate(time.Microsecond),
Succeeded: false,
Error: "issuer connector returned 500",
}
if err := repo.RecordGenerationEvent(ctx, evt); err != nil {
t.Fatalf("RecordGenerationEvent: %v", err)
}
events, _ := repo.ListGenerationEvents(ctx, issuerID, 1)
if len(events) != 1 {
t.Fatalf("expected 1 event, got %d", len(events))
}
if events[0].Succeeded {
t.Error("event should be Succeeded=false")
}
if events[0].Error != "issuer connector returned 500" {
t.Errorf("Error = %q, want full message", events[0].Error)
}
}
func TestCRLCacheRepository_ListEvents_LimitDefaults(t *testing.T) {
tdb := getTestDB(t)
db := tdb.freshSchema(t)
repo := postgres.NewCRLCacheRepository(db)
ctx := context.Background()
issuerID := insertIssuerForCRL(t, ctx, "limit")
for i := 0; i < 5; i++ {
_ = repo.RecordGenerationEvent(ctx, &domain.CRLGenerationEvent{
IssuerID: issuerID,
StartedAt: time.Now().UTC().Add(time.Duration(i) * time.Second),
Succeeded: true,
})
}
events, err := repo.ListGenerationEvents(ctx, issuerID, 0)
if err != nil {
t.Fatalf("ListGenerationEvents(limit=0): %v", err)
}
// limit=0 → default 50 per the impl; we have 5, expect all 5.
if len(events) != 5 {
t.Fatalf("expected 5 events with default limit, got %d", len(events))
}
}
@@ -0,0 +1,145 @@
package postgres
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository"
)
// OCSPResponderRepository implements repository.OCSPResponderRepository.
//
// One row per issuer; rotation is an upsert (no historical rows kept —
// operators have the audit log + the previous CertSerial recorded in
// rotated_from for the most-recent rotation).
type OCSPResponderRepository struct {
db *sql.DB
}
// NewOCSPResponderRepository creates a new repository.
func NewOCSPResponderRepository(db *sql.DB) *OCSPResponderRepository {
return &OCSPResponderRepository{db: db}
}
// Compile-time interface check.
var _ repository.OCSPResponderRepository = (*OCSPResponderRepository)(nil)
// Get returns the current responder row, or (nil, nil) when missing.
func (r *OCSPResponderRepository) Get(ctx context.Context, issuerID string) (*domain.OCSPResponder, error) {
const query = `
SELECT issuer_id, cert_pem, cert_serial, key_path, key_alg,
not_before, not_after, COALESCE(rotated_from, ''),
created_at, updated_at
FROM ocsp_responders
WHERE issuer_id = $1
`
var resp domain.OCSPResponder
err := r.db.QueryRowContext(ctx, query, issuerID).Scan(
&resp.IssuerID,
&resp.CertPEM,
&resp.CertSerial,
&resp.KeyPath,
&resp.KeyAlg,
&resp.NotBefore,
&resp.NotAfter,
&resp.RotatedFrom,
&resp.CreatedAt,
&resp.UpdatedAt,
)
if errors.Is(err, sql.ErrNoRows) {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("ocsp_responders get %q: %w", issuerID, err)
}
return &resp, nil
}
// Put upserts the responder row. The DB sets created_at on first insert
// (default NOW()) and updated_at on every write (NOW() in the SET clause).
// Callers leave CreatedAt + UpdatedAt zero; the DB authoritative for both.
func (r *OCSPResponderRepository) Put(ctx context.Context, responder *domain.OCSPResponder) error {
if responder == nil {
return errors.New("ocsp_responders put: nil responder")
}
if responder.IssuerID == "" {
return errors.New("ocsp_responders put: empty issuer_id")
}
const query = `
INSERT INTO ocsp_responders (
issuer_id, cert_pem, cert_serial, key_path, key_alg,
not_before, not_after, rotated_from, updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, NULLIF($8, ''), NOW())
ON CONFLICT (issuer_id) DO UPDATE SET
cert_pem = EXCLUDED.cert_pem,
cert_serial = EXCLUDED.cert_serial,
key_path = EXCLUDED.key_path,
key_alg = EXCLUDED.key_alg,
not_before = EXCLUDED.not_before,
not_after = EXCLUDED.not_after,
rotated_from = EXCLUDED.rotated_from,
updated_at = NOW()
`
_, err := r.db.ExecContext(ctx, query,
responder.IssuerID,
responder.CertPEM,
responder.CertSerial,
responder.KeyPath,
responder.KeyAlg,
responder.NotBefore,
responder.NotAfter,
responder.RotatedFrom,
)
if err != nil {
return fmt.Errorf("ocsp_responders put %q: %w", responder.IssuerID, err)
}
return nil
}
// ListExpiring returns responders whose not_after is at or before
// (now + grace). Used by the rotation scheduler to find responders due
// for rotation. Ordered by not_after ASC so earliest-expiring is first.
func (r *OCSPResponderRepository) ListExpiring(ctx context.Context, grace time.Duration, now time.Time) ([]*domain.OCSPResponder, error) {
threshold := now.Add(grace)
const query = `
SELECT issuer_id, cert_pem, cert_serial, key_path, key_alg,
not_before, not_after, COALESCE(rotated_from, ''),
created_at, updated_at
FROM ocsp_responders
WHERE not_after <= $1
ORDER BY not_after ASC
`
rows, err := r.db.QueryContext(ctx, query, threshold)
if err != nil {
return nil, fmt.Errorf("ocsp_responders list_expiring: %w", err)
}
defer rows.Close()
var out []*domain.OCSPResponder
for rows.Next() {
var resp domain.OCSPResponder
if err := rows.Scan(
&resp.IssuerID,
&resp.CertPEM,
&resp.CertSerial,
&resp.KeyPath,
&resp.KeyAlg,
&resp.NotBefore,
&resp.NotAfter,
&resp.RotatedFrom,
&resp.CreatedAt,
&resp.UpdatedAt,
); err != nil {
return nil, fmt.Errorf("ocsp_responders list_expiring scan: %w", err)
}
out = append(out, &resp)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("ocsp_responders list_expiring iterate: %w", err)
}
return out, nil
}
+97
View File
@@ -64,6 +64,19 @@ type CloudDiscoveryServicer interface {
DiscoverAll(ctx context.Context) (int, []error)
}
// CRLCacheServicer defines the interface for the scheduler's CRL
// pre-generation loop. RegenerateAll iterates every issuer that
// supports CRL signing and refreshes its crl_cache row. Per-issuer
// failures are logged + audited; a single bad issuer does not stop
// the others.
//
// Bundle CRL/OCSP-Responder Phase 3: the scheduler-driven cache lets
// the /.well-known/pki/crl/{issuer_id} HTTP endpoint serve from cache
// instead of regenerating per request.
type CRLCacheServicer interface {
RegenerateAll(ctx context.Context)
}
// JobReaperService defines the interface for job timeout reaping used by the scheduler.
type JobReaperService interface {
ReapTimedOutJobs(ctx context.Context, csrTTL, approvalTTL time.Duration) error
@@ -87,6 +100,7 @@ type Scheduler struct {
digestService DigestServicer
healthCheckService HealthCheckServicer
cloudDiscoveryService CloudDiscoveryServicer
crlCacheService CRLCacheServicer
jobReaper JobReaperService
logger *slog.Logger
@@ -102,6 +116,7 @@ type Scheduler struct {
digestInterval time.Duration
healthCheckInterval time.Duration
cloudDiscoveryInterval time.Duration
crlGenerationInterval time.Duration
jobTimeoutInterval time.Duration
// agentOfflineJobTTL: per-tick threshold for reaping Running jobs whose
// owning agent has been silent. Bundle C / Audit M-016. Defaults below.
@@ -121,6 +136,7 @@ type Scheduler struct {
digestRunning atomic.Bool
healthCheckRunning atomic.Bool
cloudDiscoveryRunning atomic.Bool
crlGenerationRunning atomic.Bool
jobTimeoutRunning atomic.Bool
// Graceful shutdown: wait for in-flight work to complete
@@ -156,6 +172,7 @@ func NewScheduler(
digestInterval: 24 * time.Hour,
healthCheckInterval: 60 * time.Second,
cloudDiscoveryInterval: 6 * time.Hour,
crlGenerationInterval: 1 * time.Hour,
jobTimeoutInterval: 10 * time.Minute,
// 5 minutes is 5×agentHealthCheckInterval default of 1m; an agent
// must miss multiple heartbeats before its in-flight jobs are reaped.
@@ -240,6 +257,31 @@ func (s *Scheduler) SetCloudDiscoveryInterval(d time.Duration) {
s.cloudDiscoveryInterval = d
}
// SetCRLCacheService sets the CRL cache service for the crlGenerationLoop.
// Called after construction since the loop is optional — when this is
// unset, no pre-generation happens and HTTP CRL fetches go through the
// on-demand path.
//
// Bundle CRL/OCSP-Responder Phase 3.
func (s *Scheduler) SetCRLCacheService(svc CRLCacheServicer) {
s.crlCacheService = svc
}
// SetCRLGenerationInterval configures the interval at which the
// scheduler regenerates CRLs into the crl_cache table. Default 1h
// (matches relying-party CRL refresh expectations under RFC 5280).
// Operators with chatty fleets can shorten; operators with bandwidth
// constraints can lengthen as long as nextUpdate stays comfortably in
// the future per generation.
//
// Zero or negative values are ignored.
func (s *Scheduler) SetCRLGenerationInterval(d time.Duration) {
if d <= 0 {
return
}
s.crlGenerationInterval = d
}
// SetJobReaperService sets the job reaper service (I-003).
func (s *Scheduler) SetJobReaperService(jr JobReaperService) {
s.jobReaper = jr
@@ -297,6 +339,9 @@ func (s *Scheduler) Start(ctx context.Context) <-chan struct{} {
if s.cloudDiscoveryService != nil {
loopCount++
}
if s.crlCacheService != nil {
loopCount++
}
s.wg.Add(loopCount)
go func() { defer s.wg.Done(); s.renewalCheckLoop(ctx) }()
@@ -319,6 +364,9 @@ func (s *Scheduler) Start(ctx context.Context) <-chan struct{} {
if s.cloudDiscoveryService != nil {
go func() { defer s.wg.Done(); s.cloudDiscoveryLoop(ctx) }()
}
if s.crlCacheService != nil {
go func() { defer s.wg.Done(); s.crlGenerationLoop(ctx) }()
}
// Signal that all loops are launched
close(startedChan)
@@ -975,5 +1023,54 @@ func (s *Scheduler) WaitForCompletion(timeout time.Duration) error {
}
}
// crlGenerationLoop periodically pre-generates CRLs into crl_cache so
// the /.well-known/pki/crl/{issuer_id} HTTP endpoint can serve from
// cache rather than regenerating per request. Mirrors the digestLoop
// shape: ticker, atomic.Bool guard for re-entry, WaitGroup integration
// for graceful shutdown.
//
// Bundle CRL/OCSP-Responder Phase 3.
func (s *Scheduler) crlGenerationLoop(ctx context.Context) {
ticker := time.NewTicker(s.crlGenerationInterval)
defer ticker.Stop()
// Do NOT run immediately on start. CRLs are typically valid for
// many hours; firing on every restart wastes work. The first tick
// arrives after one interval; on cache miss the HTTP handler
// triggers an immediate generation via the cache service.
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
if !s.crlGenerationRunning.CompareAndSwap(false, true) {
s.logger.Warn("CRL pre-generation still running, skipping tick")
continue
}
s.wg.Add(1)
go func() {
defer s.wg.Done()
defer s.crlGenerationRunning.Store(false)
s.runCRLGeneration(ctx)
}()
}
}
}
// runCRLGeneration executes a single CRL pre-generation cycle with
// error recovery. Per-issuer failures inside RegenerateAll are logged
// + audited by the cache service itself; this wrapper only reports the
// outer context shape and bumps a metric (when wired).
func (s *Scheduler) runCRLGeneration(ctx context.Context) {
// 5-minute timeout: the per-issuer generation is fast (sub-second
// for most CAs), but the loop walks every issuer that supports
// CRL. Bound the total cycle so a stuck issuer cannot block the
// next tick.
opCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
s.crlCacheService.RegenerateAll(opCtx)
}
// ErrSchedulerShutdownTimeout is returned when scheduler graceful shutdown times out.
var ErrSchedulerShutdownTimeout = errors.New("scheduler graceful shutdown timeout")
+32 -2
View File
@@ -19,6 +19,11 @@ type CertificateService struct {
auditService *AuditService
revSvc *RevocationSvc
caSvc *CAOperationsSvc
// crlCacheSvc, when set, makes GenerateDERCRL serve from the
// pre-generated cache instead of regenerating per request. Bundle
// CRL/OCSP-Responder Phase 4. Optional; when nil GenerateDERCRL
// falls back to the historical on-demand path via caSvc.
crlCacheSvc *CRLCacheService
keygenMode string
}
@@ -45,6 +50,17 @@ func (s *CertificateService) SetCAOperationsSvc(svc *CAOperationsSvc) {
s.caSvc = svc
}
// SetCRLCacheSvc wires the CRL cache service. When set, GenerateDERCRL
// reads from the scheduler-pre-generated cache (cheap DB lookup) and
// only triggers an on-demand regeneration on cache miss / staleness.
// When unset, GenerateDERCRL falls back to the historical per-request
// regeneration via caSvc.
//
// Bundle CRL/OCSP-Responder Phase 4.
func (s *CertificateService) SetCRLCacheSvc(svc *CRLCacheService) {
s.crlCacheSvc = svc
}
// SetTargetRepo sets the target repository for deployment queries.
func (s *CertificateService) SetTargetRepo(repo repository.TargetRepository) {
s.targetRepo = repo
@@ -481,9 +497,23 @@ func (s *CertificateService) GetRevokedCertificates(ctx context.Context) ([]*dom
return s.revSvc.GetRevokedCertificates(ctx)
}
// GenerateDERCRL generates a DER-encoded X.509 CRL for the given issuer.
// Delegates to CAOperationsSvc.
// GenerateDERCRL returns the DER-encoded X.509 CRL for the given
// issuer. When the CRL cache service is wired (SetCRLCacheSvc), reads
// from the scheduler-pre-generated cache and only regenerates on miss
// / staleness — the cache layer's singleflight gate collapses
// concurrent miss requests to a single underlying generation.
//
// When the cache service is not wired, falls back to the historical
// on-demand path via CAOperationsSvc.GenerateDERCRL — every HTTP fetch
// triggers a fresh generation.
//
// Backward-compatible: existing callers that don't wire the cache see
// no behavioural change.
func (s *CertificateService) GenerateDERCRL(ctx context.Context, issuerID string) ([]byte, error) {
if s.crlCacheSvc != nil {
der, _, err := s.crlCacheSvc.Get(ctx, issuerID)
return der, err
}
if s.caSvc == nil {
return nil, fmt.Errorf("CA operations service not configured")
}
+270
View File
@@ -0,0 +1,270 @@
package service
import (
"context"
"crypto/x509"
"errors"
"fmt"
"log/slog"
"sync"
"time"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/repository"
)
// CRLCacheService is the read-through + scheduler-driven cache layer
// for pre-generated CRLs. The HTTP handler at
// /.well-known/pki/crl/{issuer_id} reads via Get; the
// scheduler.crlGenerationLoop drives RegenerateAll on a tick.
//
// Bundle CRL/OCSP-Responder Phase 3.
//
// Concurrency model:
//
// - The cache row is the source of truth (one row per issuer).
// - Get returns the cached row when fresh; on miss / staleness it
// calls regenerateOne behind a singleflight gate keyed by issuer
// ID so concurrent miss requests for the same issuer collapse to
// a single underlying generation call.
// - RegenerateAll iterates every issuer in the registry, calling
// regenerateOne for each. Per-issuer failures are logged + audited
// via crl_generation_events; one bad issuer does not stop the
// others.
// - The CA-side CRL generation (caSvc.GenerateDERCRL → issuer
// connector.GenerateCRL) is unchanged. This service is additive:
// it persists results, surfaces them via Get, and tracks events.
type CRLCacheService struct {
cacheRepo repository.CRLCacheRepository
caSvc *CAOperationsSvc
registry *IssuerRegistry
logger *slog.Logger
// singleflight collapses concurrent regeneration requests for the
// same issuer ID. A simpler alternative to vendoring
// golang.org/x/sync/singleflight; this in-tree version is ~30 LoC
// and matches the project's "no new deps unless necessary" rule.
flight sync.Map // issuerID → *flightEntry
}
// flightEntry coordinates a single in-flight generation across
// concurrent callers. The first arrival kicks off the work; later
// arrivals wait on done and read the shared result. Pattern matches
// golang.org/x/sync/singleflight semantics for the single-call case
// (we don't need the multi-result Forget capability here).
type flightEntry struct {
done chan struct{}
result *domain.CRLCacheEntry
err error
}
// NewCRLCacheService constructs a cache service. caSvc must already
// have its issuer registry wired (CAOperationsSvc.SetIssuerRegistry).
func NewCRLCacheService(
cacheRepo repository.CRLCacheRepository,
caSvc *CAOperationsSvc,
registry *IssuerRegistry,
logger *slog.Logger,
) *CRLCacheService {
return &CRLCacheService{
cacheRepo: cacheRepo,
caSvc: caSvc,
registry: registry,
logger: logger,
}
}
// Get returns the cached CRL DER + thisUpdate timestamp for an issuer.
// On cache hit the path is purely a DB read (~ms). On miss or
// staleness (next_update in the past), Get triggers an immediate
// regeneration via the singleflight gate so concurrent requests
// collapse to one underlying call.
func (s *CRLCacheService) Get(ctx context.Context, issuerID string) ([]byte, time.Time, error) {
if s.cacheRepo == nil {
return nil, time.Time{}, errors.New("crl_cache service: cache repo not configured")
}
now := time.Now().UTC()
entry, err := s.cacheRepo.Get(ctx, issuerID)
if err != nil {
return nil, time.Time{}, fmt.Errorf("crl_cache service get %q: %w", issuerID, err)
}
if entry != nil && !entry.IsStale(now) {
return entry.CRLDER, entry.ThisUpdate, nil
}
// Miss or stale → regenerate behind the singleflight gate.
fresh, err := s.regenerateOne(ctx, issuerID)
if err != nil {
return nil, time.Time{}, err
}
return fresh.CRLDER, fresh.ThisUpdate, nil
}
// RegenerateAll walks every issuer in the registry, calling
// regenerateOne for each. Per-issuer failures are logged + audited
// (via crl_generation_events); a single bad issuer does not stop
// the others. Called by scheduler.crlGenerationLoop on each tick.
//
// Issuers whose connector returns nil from GenerateCRL (e.g., ACME,
// Vault PKI, DigiCert — they manage their own CRL distribution) are
// skipped silently; the regenerateOne path detects nil and treats it
// as "no CRL to cache" rather than an error.
func (s *CRLCacheService) RegenerateAll(ctx context.Context) {
if s.registry == nil {
s.logger.Warn("CRL cache RegenerateAll: registry not configured; nothing to do")
return
}
issuers := s.registry.List()
for issuerID := range issuers {
select {
case <-ctx.Done():
s.logger.Warn("CRL cache RegenerateAll: ctx cancelled mid-cycle",
"completed", issuerID)
return
default:
}
if _, err := s.regenerateOne(ctx, issuerID); err != nil {
// regenerateOne already logs + audits the failure; log here
// only at debug level to avoid double-noise.
s.logger.Debug("CRL cache RegenerateAll: per-issuer failure",
"issuer_id", issuerID, "error", err)
}
}
}
// regenerateOne is the singleflight-gated worker. The first concurrent
// call for an issuer ID executes the generation; later calls block on
// the in-flight entry's done channel and return the same result.
//
// The gate is released in a defer so callers can rely on subsequent
// calls (after the result is observed) starting a fresh generation.
func (s *CRLCacheService) regenerateOne(ctx context.Context, issuerID string) (*domain.CRLCacheEntry, error) {
// Check for an in-flight generation. LoadOrStore atomically:
// - If absent: stores our entry as the in-flight one and returns
// it; we kick off the work.
// - If present: returns the existing entry; we wait on it.
mine := &flightEntry{done: make(chan struct{})}
actual, loaded := s.flight.LoadOrStore(issuerID, mine)
entry := actual.(*flightEntry)
if loaded {
// Another goroutine is already generating. Wait for them.
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-entry.done:
}
if entry.err != nil {
return nil, entry.err
}
return entry.result, nil
}
// We are the leader; do the work and signal others on done.
defer func() {
s.flight.Delete(issuerID)
close(mine.done)
}()
mine.result, mine.err = s.doRegenerate(ctx, issuerID)
return mine.result, mine.err
}
// doRegenerate is the actual work: ask CAOperationsSvc to build the
// CRL DER, parse it to recover thisUpdate/nextUpdate, persist into
// crl_cache, and record an audit event in crl_generation_events.
func (s *CRLCacheService) doRegenerate(ctx context.Context, issuerID string) (*domain.CRLCacheEntry, error) {
if s.caSvc == nil {
return nil, errors.New("crl_cache service: caSvc not configured")
}
startedAt := time.Now().UTC()
// Build the CRL via the existing on-demand path.
derBytes, err := s.caSvc.GenerateDERCRL(ctx, issuerID)
if err != nil {
s.recordEvent(ctx, &domain.CRLGenerationEvent{
IssuerID: issuerID,
StartedAt: startedAt,
Duration: time.Since(startedAt),
Succeeded: false,
Error: err.Error(),
})
return nil, fmt.Errorf("crl_cache service generate %q: %w", issuerID, err)
}
// Parse to extract thisUpdate / nextUpdate / number / count.
parsed, perr := x509.ParseRevocationList(derBytes)
if perr != nil {
s.recordEvent(ctx, &domain.CRLGenerationEvent{
IssuerID: issuerID,
StartedAt: startedAt,
Duration: time.Since(startedAt),
Succeeded: false,
Error: "parse generated CRL: " + perr.Error(),
})
return nil, fmt.Errorf("crl_cache service parse %q: %w", issuerID, perr)
}
crlNumber := int64(0)
if parsed.Number != nil {
crlNumber = parsed.Number.Int64()
}
entry := &domain.CRLCacheEntry{
IssuerID: issuerID,
CRLDER: derBytes,
CRLNumber: crlNumber,
ThisUpdate: parsed.ThisUpdate,
NextUpdate: parsed.NextUpdate,
GeneratedAt: startedAt,
GenerationDuration: time.Since(startedAt),
RevokedCount: len(parsed.RevokedCertificateEntries),
}
if err := s.cacheRepo.Put(ctx, entry); err != nil {
s.recordEvent(ctx, &domain.CRLGenerationEvent{
IssuerID: issuerID,
CRLNumber: crlNumber,
StartedAt: startedAt,
Duration: time.Since(startedAt),
Succeeded: false,
Error: "persist cache row: " + err.Error(),
})
return nil, fmt.Errorf("crl_cache service persist %q: %w", issuerID, err)
}
s.recordEvent(ctx, &domain.CRLGenerationEvent{
IssuerID: issuerID,
CRLNumber: crlNumber,
Duration: entry.GenerationDuration,
RevokedCount: entry.RevokedCount,
StartedAt: startedAt,
Succeeded: true,
})
s.logger.Info("CRL pre-generated and cached",
"issuer_id", issuerID,
"crl_number", crlNumber,
"revoked_count", entry.RevokedCount,
"this_update", entry.ThisUpdate,
"next_update", entry.NextUpdate,
"duration_ms", entry.GenerationDuration.Milliseconds())
return entry, nil
}
// recordEvent persists a generation event but does NOT propagate
// failure-to-record back to the caller — the event log is a
// best-effort audit trail; missing it should not turn a successful
// CRL generation into an error.
func (s *CRLCacheService) recordEvent(ctx context.Context, evt *domain.CRLGenerationEvent) {
if s.cacheRepo == nil {
return
}
if err := s.cacheRepo.RecordGenerationEvent(ctx, evt); err != nil {
s.logger.Warn("crl_cache service: failed to record generation event",
"issuer_id", evt.IssuerID, "error", err)
}
}
+321
View File
@@ -0,0 +1,321 @@
package service_test
import (
"context"
"io"
"log/slog"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/shankar0123/certctl/internal/connector/issuer"
localissuer "github.com/shankar0123/certctl/internal/connector/issuer/local"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/service"
)
// fakeCRLCacheRepo is an in-memory repository for CRLCacheService
// tests. The Postgres impl is covered by the testcontainers tests in
// internal/repository/postgres/crl_cache_test.go (CI only — needs Docker).
type fakeCRLCacheRepo struct {
mu sync.Mutex
rows map[string]*domain.CRLCacheEntry
events []*domain.CRLGenerationEvent
getCount int
putCount int
}
func newFakeCRLCacheRepo() *fakeCRLCacheRepo {
return &fakeCRLCacheRepo{rows: map[string]*domain.CRLCacheEntry{}}
}
func (r *fakeCRLCacheRepo) Get(_ context.Context, issuerID string) (*domain.CRLCacheEntry, error) {
r.mu.Lock()
defer r.mu.Unlock()
r.getCount++
if entry, ok := r.rows[issuerID]; ok {
copy := *entry
return &copy, nil
}
return nil, nil
}
func (r *fakeCRLCacheRepo) Put(_ context.Context, entry *domain.CRLCacheEntry) error {
r.mu.Lock()
defer r.mu.Unlock()
r.putCount++
copy := *entry
r.rows[entry.IssuerID] = &copy
return nil
}
func (r *fakeCRLCacheRepo) NextCRLNumber(_ context.Context, issuerID string) (int64, error) {
r.mu.Lock()
defer r.mu.Unlock()
if entry, ok := r.rows[issuerID]; ok {
return entry.CRLNumber + 1, nil
}
return 1, nil
}
func (r *fakeCRLCacheRepo) RecordGenerationEvent(_ context.Context, evt *domain.CRLGenerationEvent) error {
r.mu.Lock()
defer r.mu.Unlock()
copy := *evt
r.events = append(r.events, &copy)
return nil
}
func (r *fakeCRLCacheRepo) ListGenerationEvents(_ context.Context, issuerID string, limit int) ([]*domain.CRLGenerationEvent, error) {
r.mu.Lock()
defer r.mu.Unlock()
var out []*domain.CRLGenerationEvent
for _, evt := range r.events {
if evt.IssuerID == issuerID {
copy := *evt
out = append(out, &copy)
}
}
return out, nil
}
// fakeRevocationRepo is the minimal shape CAOperationsSvc needs:
// returning revocations by issuer. The cache service walks
// CAOperationsSvc.GenerateDERCRL, which calls into this.
type fakeRevocationRepo struct{}
func (fakeRevocationRepo) Create(context.Context, *domain.CertificateRevocation) error {
return nil
}
func (fakeRevocationRepo) GetByIssuerAndSerial(context.Context, string, string) (*domain.CertificateRevocation, error) {
return nil, nil
}
func (fakeRevocationRepo) ListAll(context.Context) ([]*domain.CertificateRevocation, error) {
return nil, nil
}
func (fakeRevocationRepo) ListByIssuer(_ context.Context, issuerID string) ([]*domain.CertificateRevocation, error) {
// Empty list = no revoked certs; the issuer connector still
// produces a valid empty CRL (RFC 5280 allows zero entries).
return nil, nil
}
func (fakeRevocationRepo) ListByCertificate(context.Context, string) ([]*domain.CertificateRevocation, error) {
return nil, nil
}
func (fakeRevocationRepo) MarkIssuerNotified(context.Context, string) error { return nil }
// helper: spin up a CAOperationsSvc + IssuerRegistry wired with a real
// local issuer connector. The local issuer's GenerateCRL produces a
// real DER-encoded CRL that the cache service can parse + persist.
func newCacheServiceFixture(t *testing.T) (svc *service.CRLCacheService, repo *fakeCRLCacheRepo, registry *service.IssuerRegistry) {
t.Helper()
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
repo = newFakeCRLCacheRepo()
// Real local issuer — produces a real CRL on GenerateCRL.
localConn := localissuer.New(&localissuer.Config{
CACommonName: "Test Cache CA",
ValidityDays: 30,
}, logger)
registry = service.NewIssuerRegistry(logger)
registry.Set("iss-cache-test", service.NewIssuerConnectorAdapter(localConn))
caSvc := service.NewCAOperationsSvc(fakeRevocationRepo{}, nil, nil)
caSvc.SetIssuerRegistry(registry)
svc = service.NewCRLCacheService(repo, caSvc, registry, logger)
return
}
// ---------------------------------------------------------------------------
// Get: cache hit, miss, staleness
// ---------------------------------------------------------------------------
func TestCRLCacheService_Get_MissTriggersGeneration(t *testing.T) {
svc, repo, _ := newCacheServiceFixture(t)
ctx := context.Background()
der, thisUpdate, err := svc.Get(ctx, "iss-cache-test")
if err != nil {
t.Fatalf("Get: %v", err)
}
if len(der) == 0 {
t.Fatal("Get returned empty DER")
}
if thisUpdate.IsZero() {
t.Fatal("ThisUpdate is zero")
}
if repo.putCount != 1 {
t.Errorf("putCount = %d, want 1 (miss should trigger one generation)", repo.putCount)
}
}
func TestCRLCacheService_Get_HitSkipsGeneration(t *testing.T) {
svc, repo, _ := newCacheServiceFixture(t)
ctx := context.Background()
// Prime the cache.
if _, _, err := svc.Get(ctx, "iss-cache-test"); err != nil {
t.Fatalf("prime: %v", err)
}
if repo.putCount != 1 {
t.Fatalf("prime: putCount = %d, want 1", repo.putCount)
}
// Second Get should be a cache hit.
if _, _, err := svc.Get(ctx, "iss-cache-test"); err != nil {
t.Fatalf("hit: %v", err)
}
if repo.putCount != 1 {
t.Errorf("putCount = %d, want 1 (hit should not regenerate)", repo.putCount)
}
}
func TestCRLCacheService_Get_StalenessTriggersRegeneration(t *testing.T) {
svc, repo, _ := newCacheServiceFixture(t)
ctx := context.Background()
// Prime the cache with a row whose next_update is in the past.
stale := &domain.CRLCacheEntry{
IssuerID: "iss-cache-test",
CRLDER: []byte("stale-der"),
CRLNumber: 1,
ThisUpdate: time.Now().Add(-48 * time.Hour),
NextUpdate: time.Now().Add(-24 * time.Hour), // expired
GeneratedAt: time.Now().Add(-48 * time.Hour),
}
if err := repo.Put(ctx, stale); err != nil {
t.Fatalf("seed stale: %v", err)
}
repo.putCount = 0
// Get should detect staleness and regenerate.
der, _, err := svc.Get(ctx, "iss-cache-test")
if err != nil {
t.Fatalf("Get on stale: %v", err)
}
if string(der) == "stale-der" {
t.Error("Get returned stale DER instead of regenerating")
}
if repo.putCount != 1 {
t.Errorf("putCount = %d, want 1 (staleness should trigger one regen)", repo.putCount)
}
}
// ---------------------------------------------------------------------------
// RegenerateAll
// ---------------------------------------------------------------------------
func TestCRLCacheService_RegenerateAll_PopulatesAllIssuers(t *testing.T) {
svc, repo, _ := newCacheServiceFixture(t)
ctx := context.Background()
svc.RegenerateAll(ctx)
row, _ := repo.Get(ctx, "iss-cache-test")
if row == nil {
t.Fatal("RegenerateAll did not populate iss-cache-test")
}
if row.RevokedCount != 0 {
t.Errorf("RevokedCount = %d, want 0 (fakeRevocationRepo is empty)", row.RevokedCount)
}
events, _ := repo.ListGenerationEvents(ctx, "iss-cache-test", 10)
if len(events) != 1 {
t.Fatalf("expected 1 generation event, got %d", len(events))
}
if !events[0].Succeeded {
t.Error("event.Succeeded should be true on happy path")
}
}
func TestCRLCacheService_RegenerateAll_RespectsCancelledContext(t *testing.T) {
svc, _, _ := newCacheServiceFixture(t)
ctx, cancel := context.WithCancel(context.Background())
cancel()
// Should return without panicking. The single-issuer fixture means
// there's nothing to iterate after the cancel check, so this is
// mostly a smoke test for the ctx.Done() branch.
svc.RegenerateAll(ctx)
}
// ---------------------------------------------------------------------------
// Singleflight: concurrent miss requests for the same issuer collapse
// ---------------------------------------------------------------------------
func TestCRLCacheService_Get_SingleflightCollapsesConcurrentMisses(t *testing.T) {
svc, repo, _ := newCacheServiceFixture(t)
ctx := context.Background()
// Fire 20 concurrent Get calls for the same uncached issuer. The
// in-tree singleflight gate should collapse them to a single
// underlying generation (putCount == 1).
var wg sync.WaitGroup
var errCount atomic.Int32
for i := 0; i < 20; i++ {
wg.Add(1)
go func() {
defer wg.Done()
if _, _, err := svc.Get(ctx, "iss-cache-test"); err != nil {
errCount.Add(1)
t.Errorf("concurrent Get: %v", err)
}
}()
}
wg.Wait()
if errCount.Load() != 0 {
t.Fatalf("%d errors across concurrent Gets", errCount.Load())
}
if repo.putCount != 1 {
t.Errorf("singleflight failed: putCount = %d, want 1 (20 concurrent misses must collapse)", repo.putCount)
}
}
// ---------------------------------------------------------------------------
// Error paths
// ---------------------------------------------------------------------------
func TestCRLCacheService_Get_NoIssuerInRegistry_RecordsFailureEvent(t *testing.T) {
svc, repo, _ := newCacheServiceFixture(t)
ctx := context.Background()
// Issuer ID that doesn't exist in the registry → CAOperationsSvc
// returns an error → cache service records a failure event +
// surfaces the error to the caller.
_, _, err := svc.Get(ctx, "iss-does-not-exist")
if err == nil {
t.Fatal("Get for unknown issuer should error")
}
events, _ := repo.ListGenerationEvents(ctx, "iss-does-not-exist", 10)
if len(events) != 1 {
t.Fatalf("expected 1 failure event, got %d", len(events))
}
if events[0].Succeeded {
t.Error("failure event should have Succeeded=false")
}
if events[0].Error == "" {
t.Error("failure event should carry an error message")
}
}
func TestCRLCacheService_Get_NoCacheRepo_Errors(t *testing.T) {
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
svc := service.NewCRLCacheService(nil, nil, nil, logger)
_, _, err := svc.Get(context.Background(), "any")
if err == nil {
t.Fatal("Get with nil cacheRepo should error")
}
}
// pin via interface satisfaction (compile-time check that fakeRevocationRepo
// matches what CAOperationsSvc actually calls — guards against shape drift
// in the repository.RevocationRepository interface).
var _ interface {
ListByIssuer(ctx context.Context, issuerID string) ([]*domain.CertificateRevocation, error)
} = fakeRevocationRepo{}
// _ silence the unused import warning when issuer adapter machinery moves.
var _ = issuer.IssuanceRequest{}
+10
View File
@@ -0,0 +1,10 @@
-- 000019_crl_cache.down.sql — reverses 000019_crl_cache.up.sql.
--
-- Drop in reverse FK order. crl_generation_events has no FK so order
-- between the two table drops is mechanical only.
DROP INDEX IF EXISTS idx_crl_generation_events_issuer_started;
DROP TABLE IF EXISTS crl_generation_events;
DROP INDEX IF EXISTS idx_crl_cache_next_update;
DROP TABLE IF EXISTS crl_cache;
+57
View File
@@ -0,0 +1,57 @@
-- 000019_crl_cache.up.sql
--
-- CRL cache + generation event log for the scheduler-driven CRL
-- pre-generation work (CRL/OCSP responder bundle).
--
-- Before this migration the CRL endpoint at /.well-known/pki/crl/{issuer_id}
-- regenerated the entire CRL on every HTTP request — every relying party
-- fetch hit the certificate_revocations table, built the entry list,
-- signed the CRL, and discarded the result. For a busy CA with many
-- relying parties this DOSes itself.
--
-- After this migration the scheduler's crlGenerationLoop pre-generates
-- CRLs at a configurable interval (default 1h, env var
-- CERTCTL_CRL_GENERATION_INTERVAL) and the HTTP handler reads from
-- crl_cache. On cache miss / staleness the cache service triggers an
-- immediate generation via singleflight (to coalesce concurrent miss
-- requests for the same issuer into a single generation).
--
-- Idempotent: every CREATE uses IF NOT EXISTS so re-running the
-- migration is safe (matches the project's migration convention).
CREATE TABLE IF NOT EXISTS crl_cache (
issuer_id TEXT PRIMARY KEY REFERENCES issuers(id) ON DELETE CASCADE,
crl_der BYTEA NOT NULL,
crl_number BIGINT NOT NULL, -- monotonic per RFC 5280 §5.2.3
this_update TIMESTAMPTZ NOT NULL,
next_update TIMESTAMPTZ NOT NULL,
generated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
generation_duration_ms INTEGER NOT NULL,
revoked_count INTEGER NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Lets the scheduler quickly find issuers whose cache is stale (next_update
-- already in the past). The query "find issuers needing regeneration" runs
-- at every tick of crlGenerationLoop.
CREATE INDEX IF NOT EXISTS idx_crl_cache_next_update ON crl_cache(next_update);
-- Track every (re)generation event for ops visibility. Failed generations
-- (succeeded=false) leave a breadcrumb operators can grep when
-- troubleshooting "why isn't the CRL fresh." The id is bigserial so the
-- table is naturally ordered by insertion; the (issuer_id, started_at)
-- index serves the GUI's "recent generations for this issuer" query.
CREATE TABLE IF NOT EXISTS crl_generation_events (
id BIGSERIAL PRIMARY KEY,
issuer_id TEXT NOT NULL,
crl_number BIGINT NOT NULL,
duration_ms INTEGER NOT NULL,
revoked_count INTEGER NOT NULL,
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
succeeded BOOLEAN NOT NULL,
error TEXT
);
CREATE INDEX IF NOT EXISTS idx_crl_generation_events_issuer_started
ON crl_generation_events(issuer_id, started_at DESC);
@@ -0,0 +1,4 @@
-- 000020_ocsp_responder.down.sql — reverses 000020_ocsp_responder.up.sql.
DROP INDEX IF EXISTS idx_ocsp_responders_not_after;
DROP TABLE IF EXISTS ocsp_responders;
+44
View File
@@ -0,0 +1,44 @@
-- 000020_ocsp_responder.up.sql
--
-- Per-issuer OCSP responder cert + key tracking. Phase 2 of the
-- CRL/OCSP responder bundle.
--
-- WHY: RFC 6960 §2.6 + §4.2.2.2 strongly recommend that OCSP
-- responses be signed by a dedicated "OCSP responder cert" issued by
-- the CA, NOT by the CA's own private key. Signing OCSP with the CA
-- key directly means every relying-party OCSP fetch triggers a CA-key
-- signing operation — a problem when the CA key lives on an HSM
-- (every OCSP poll = HSM op = HSM-rate-limit risk + audit-volume
-- pressure) and a security smell otherwise (broader exposure surface
-- for the CA private key).
--
-- This table tracks one responder cert per issuer. The bootstrap
-- happens on first OCSP request (or at server startup if the row
-- doesn't exist) and rotates automatically when the responder cert
-- enters its 7-day-before-expiry window.
--
-- The responder cert MUST carry the id-pkix-ocsp-nocheck extension
-- (RFC 6960 §4.2.2.2.1) so OCSP clients don't recursively check the
-- responder cert's own revocation status.
--
-- Idempotent. Schema design: composite PK (issuer_id, cert_serial)
-- would let us track historical responder certs across rotations,
-- but operators don't need the history — only the current cert is
-- ever queried. PK on issuer_id alone, replace-on-rotate via UPSERT.
CREATE TABLE IF NOT EXISTS ocsp_responders (
issuer_id TEXT PRIMARY KEY REFERENCES issuers(id) ON DELETE CASCADE,
cert_pem TEXT NOT NULL, -- PEM-encoded responder cert
cert_serial TEXT NOT NULL, -- hex serial for ops grep / audit
key_path TEXT NOT NULL, -- filesystem path to the responder key (FileDriver) or driver-specific ref
key_alg TEXT NOT NULL, -- 'ECDSA-P256', 'RSA-2048', ... matches signer.Algorithm enum
not_before TIMESTAMPTZ NOT NULL,
not_after TIMESTAMPTZ NOT NULL,
rotated_from TEXT, -- previous cert_serial when rotation happens (NULL on first bootstrap)
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Lets the rotation scheduler quickly find responders whose cert is
-- entering the 7-day-before-expiry window.
CREATE INDEX IF NOT EXISTS idx_ocsp_responders_not_after ON ocsp_responders(not_after);