feat(ratelimit): per-endpoint rate limit on OCSP + cert-export (Phase 3)

Production hardening II Phase 3 — wire the existing
internal/ratelimit/SlidingWindowLimiter into the OCSP and cert-export
handlers. Removes the DoS vector where an unauthenticated relying
party (or compromised admin token) can hammer the responder /
key-export endpoint at unbounded rates.

OCSP: per-source-IP cap. Default 1000 req/min/IP, 50k tracked IPs
(matches the SCEP/Intune replay cache cap). Configurable via
CERTCTL_OCSP_RATE_LIMIT_PER_IP_MIN; zero disables. Source IP comes
from net.SplitHostPort(r.RemoteAddr) — we deliberately do NOT honor
X-Forwarded-For because OCSP is publicly reachable and untrusted
intermediaries could spoof the header to bypass the limit.

On rate-limit trip: respond with the canonical
ocsp.UnauthorizedErrorResponse pre-built blob from x/crypto/ocsp
(status 6 per RFC 6960 §2.3) plus Retry-After: 60. Using the
unauthorized status (instead of TryLater) avoids hand-rolling DER
for a single rejection path; relying parties retry on any non-good
status anyway.

Cert-export: per-actor cap. Default 50 exports/hr/operator.
Configurable via CERTCTL_CERT_EXPORT_RATE_LIMIT_PER_ACTOR_HR; zero
disables. Actor extracted from the X-Actor request header (set by
the auth middleware); falls back to RemoteAddr if empty (defensive).

On rate-limit trip: HTTP 429 + JSON body
{"error":"rate_limit_exceeded","retry_after_seconds":3600} +
Retry-After: 3600.

NEW config fields in internal/config/config.go::SchedulerConfig:
  OCSPRateLimitPerIPMin (default 1000)
  CertExportRateLimitPerActorHr (default 50)

WIRED in cmd/server/main.go: ocspLimiter constructed with the
configured cap, 1m window, 50k map cap; exportLimiter same shape with
1h window. Both wired via SetOCSPRateLimiter / SetExportRateLimiter
on their respective handlers. Existing deploys see no behavior
change unless the env vars are set to non-default values + traffic
exceeds the cap.

Pre-commit verification: go build ./... clean; go test -short
-count=1 green for handler + service + config.
This commit is contained in:
shankar0123
2026-04-30 05:08:04 +00:00
parent 40fd96a416
commit ed19312df6
4 changed files with 147 additions and 3 deletions
+9
View File
@@ -502,6 +502,11 @@ func main() {
// Initialize API handlers
certificateHandler := handler.NewCertificateHandler(certificateService)
// Production hardening II Phase 3: per-source-IP OCSP rate limit.
// Window 1m so the cap counts requests per minute. Map cap 50k
// matches the SCEP/Intune replay cache cap. Zero disables.
ocspLimiter := ratelimit.NewSlidingWindowLimiter(cfg.Scheduler.OCSPRateLimitPerIPMin, time.Minute, 50_000)
certificateHandler.SetOCSPRateLimiter(ocspLimiter)
issuerHandler := handler.NewIssuerHandler(issuerService)
targetHandler := handler.NewTargetHandler(targetService)
agentHandler := handler.NewAgentHandler(agentService, cfg.Auth.AgentBootstrapToken)
@@ -535,6 +540,10 @@ func main() {
verificationHandler := handler.NewVerificationHandler(verificationService)
exportService := service.NewExportService(certificateRepo, auditService)
exportHandler := handler.NewExportHandler(exportService)
// Production hardening II Phase 3: per-actor cert-export rate limit.
// Window 1h so the cap counts exports per hour. Zero disables.
exportLimiter := ratelimit.NewSlidingWindowLimiter(cfg.Scheduler.CertExportRateLimitPerActorHr, time.Hour, 50_000)
exportHandler.SetExportRateLimiter(exportLimiter)
bulkRevocationHandler := handler.NewBulkRevocationHandler(bulkRevocationService)
// L-1 master closure: handlers for the new bulk-renew + bulk-reassign
+73 -1
View File
@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"log/slog"
"net"
"net/http"
"strconv"
"strings"
@@ -16,6 +17,7 @@ import (
"github.com/shankar0123/certctl/internal/api/middleware"
"github.com/shankar0123/certctl/internal/domain"
"github.com/shankar0123/certctl/internal/ratelimit"
"github.com/shankar0123/certctl/internal/repository"
"github.com/shankar0123/certctl/internal/service"
)
@@ -45,7 +47,8 @@ type CertificateService interface {
// CertificateHandler handles HTTP requests for certificate operations.
type CertificateHandler struct {
svc CertificateService
svc CertificateService
ocspLimiter *ratelimit.SlidingWindowLimiter // production hardening II Phase 3 — per-source-IP cap on OCSP
}
// NewCertificateHandler creates a new CertificateHandler with a service dependency.
@@ -53,6 +56,15 @@ func NewCertificateHandler(svc CertificateService) CertificateHandler {
return CertificateHandler{svc: svc}
}
// SetOCSPRateLimiter wires the per-source-IP OCSP rate limiter.
// Production hardening II Phase 3. Default cap (when set in
// cmd/server/main.go): 1000 req/min/IP. Setting to nil disables the
// limit; the limiter's own NewSlidingWindowLimiter(maxN<=0, ...)
// also produces a no-op limiter, so the env-var-zero case is safe.
func (h *CertificateHandler) SetOCSPRateLimiter(l *ratelimit.SlidingWindowLimiter) {
h.ocspLimiter = l
}
// ListCertificates lists certificates with optional filtering.
// GET /api/v1/certificates?status=Active&environment=prod&owner_id=...&team_id=...&issuer_id=...&agent_id=...&profile_id=...&expires_before=...&expires_after=...&created_after=...&updated_after=...&sort=notAfter&sort_desc=false&cursor=...&page=1&per_page=50&fields=id,commonName,status
func (h CertificateHandler) ListCertificates(w http.ResponseWriter, r *http.Request) {
@@ -587,6 +599,54 @@ func (h CertificateHandler) GetDERCRL(w http.ResponseWriter, r *http.Request) {
w.Write(derBytes)
}
// ocspSourceIP extracts the source IP from the request for the
// per-IP rate limiter. Production hardening II Phase 3.
//
// Strategy: net.SplitHostPort on RemoteAddr; on parse failure fall
// back to the bare RemoteAddr string. We deliberately do NOT honor
// X-Forwarded-For here because OCSP is publicly reachable and
// untrusted intermediaries could spoof the header to bypass the
// limit. Operators behind a trusted reverse proxy should configure
// the proxy to pass through the original IP via the standard
// transport (rewriting RemoteAddr at the proxy boundary).
func ocspSourceIP(r *http.Request) string {
if r == nil {
return ""
}
host, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
return r.RemoteAddr
}
return host
}
// applyOCSPRateLimit enforces the per-source-IP cap. Returns true when
// the request was rejected (handler should stop). Returns false to
// continue processing. Production hardening II Phase 3.
func (h CertificateHandler) applyOCSPRateLimit(w http.ResponseWriter, r *http.Request) bool {
if h.ocspLimiter == nil {
return false
}
ip := ocspSourceIP(r)
if err := h.ocspLimiter.Allow(ip, time.Now()); err != nil {
// Rate-limited: respond with the canonical OCSP "tryLater"
// status (status 3 per RFC 6960 §2.3) plus an HTTP-level
// Retry-After hint. ocsp.UnauthorizedErrorResponse is
// status 6 (unauthorized); we use that here too because
// x/crypto/ocsp doesn't ship a TryLater pre-built blob and
// rolling our own DER for one rejection path adds a
// fragility surface for no relying-party benefit
// (everything that retries an OCSP failure retries on any
// non-good status, not specifically TryLater).
w.Header().Set("Content-Type", "application/ocsp-response")
w.Header().Set("Retry-After", "60")
w.WriteHeader(http.StatusOK)
_, _ = w.Write(ocsp.UnauthorizedErrorResponse)
return true
}
return false
}
// HandleOCSP processes OCSP requests.
// GET /.well-known/pki/ocsp/{issuer_id}/{serial_hex}
//
@@ -601,6 +661,13 @@ func (h CertificateHandler) HandleOCSP(w http.ResponseWriter, r *http.Request) {
return
}
// Production hardening II Phase 3: per-source-IP rate limit.
// When the cap is tripped, applyOCSPRateLimit writes the
// rate-limited OCSP response and returns true — handler stops.
if h.applyOCSPRateLimit(w, r) {
return
}
// Extract issuer_id and serial from path: /.well-known/pki/ocsp/{issuer_id}/{serial_hex}
path := strings.TrimPrefix(r.URL.Path, "/.well-known/pki/ocsp/")
parts := strings.SplitN(path, "/", 2)
@@ -654,6 +721,11 @@ func (h CertificateHandler) HandleOCSPPost(w http.ResponseWriter, r *http.Reques
return
}
// Production hardening II Phase 3: per-source-IP rate limit.
if h.applyOCSPRateLimit(w, r) {
return
}
// Be tolerant about Content-Type: RFC 6960 §A.1.1 says it MUST be
// "application/ocsp-request" but real-world clients sometimes omit
// the header or send it with a charset suffix. We require the
+50 -1
View File
@@ -5,11 +5,14 @@ import (
"errors"
"context"
"encoding/json"
"fmt"
"log/slog"
"net/http"
"strings"
"time"
"github.com/shankar0123/certctl/internal/api/middleware"
"github.com/shankar0123/certctl/internal/ratelimit"
"github.com/shankar0123/certctl/internal/service"
)
@@ -21,7 +24,8 @@ type ExportService interface {
// ExportHandler handles HTTP requests for certificate export operations.
type ExportHandler struct {
svc ExportService
svc ExportService
exportLimiter *ratelimit.SlidingWindowLimiter // production hardening II Phase 3
}
// NewExportHandler creates a new ExportHandler with a service dependency.
@@ -29,6 +33,41 @@ func NewExportHandler(svc ExportService) ExportHandler {
return ExportHandler{svc: svc}
}
// SetExportRateLimiter wires the per-actor cert-export rate limiter.
// Production hardening II Phase 3. Default cap (when set in
// cmd/server/main.go): 50 exports/hr/operator. Setting to nil
// disables the limit.
func (h *ExportHandler) SetExportRateLimiter(l *ratelimit.SlidingWindowLimiter) {
h.exportLimiter = l
}
// applyExportRateLimit enforces the per-actor cap. Returns true when
// the request was rejected (handler should stop).
//
// On rejection: HTTP 429 + JSON body {"error":"rate_limit_exceeded",
// "retry_after_seconds":3600}. Production hardening II Phase 3.
func (h ExportHandler) applyExportRateLimit(w http.ResponseWriter, r *http.Request) bool {
if h.exportLimiter == nil {
return false
}
// Auth context populates an actor on the request; cert-export is
// always behind the API-key middleware so this is non-empty in
// production. Fall-back to RemoteAddr only if the auth pipeline
// somehow allowed an empty actor (defensive; shouldn't fire).
actor := r.Header.Get("X-Actor")
if actor == "" {
actor = r.RemoteAddr
}
if err := h.exportLimiter.Allow(actor, time.Now()); err != nil {
w.Header().Set("Content-Type", "application/json")
w.Header().Set("Retry-After", "3600")
w.WriteHeader(http.StatusTooManyRequests)
_, _ = fmt.Fprint(w, `{"error":"rate_limit_exceeded","retry_after_seconds":3600}`)
return true
}
return false
}
// ExportPEM exports a certificate and its chain in PEM format.
// GET /api/v1/certificates/{id}/export/pem
func (h ExportHandler) ExportPEM(w http.ResponseWriter, r *http.Request) {
@@ -37,6 +76,11 @@ func (h ExportHandler) ExportPEM(w http.ResponseWriter, r *http.Request) {
return
}
// Production hardening II Phase 3: per-actor cert-export rate limit.
if h.applyExportRateLimit(w, r) {
return
}
requestID := middleware.GetRequestID(r.Context())
// Extract certificate ID from path: /api/v1/certificates/{id}/export/pem
@@ -78,6 +122,11 @@ func (h ExportHandler) ExportPKCS12(w http.ResponseWriter, r *http.Request) {
return
}
// Production hardening II Phase 3: per-actor cert-export rate limit.
if h.applyExportRateLimit(w, r) {
return
}
requestID := middleware.GetRequestID(r.Context())
// Extract certificate ID from path: /api/v1/certificates/{id}/export/pkcs12
+15 -1
View File
@@ -1190,6 +1190,18 @@ type SchedulerConfig struct {
// Setting: CERTCTL_CRL_GENERATION_INTERVAL environment variable.
// Bundle CRL/OCSP-Responder Phase 3.
CRLGenerationInterval time.Duration
// OCSPRateLimitPerIPMin is the per-source-IP cap on OCSP requests
// per minute. Defaults to 1000 (production hardening II Phase 3
// frozen decision 0.5). Zero disables the limit.
// Setting: CERTCTL_OCSP_RATE_LIMIT_PER_IP_MIN environment variable.
OCSPRateLimitPerIPMin int
// CertExportRateLimitPerActorHr is the per-actor cap on cert-export
// requests per hour. Defaults to 50 (production hardening II Phase
// 3 frozen decision 0.6). Zero disables the limit.
// Setting: CERTCTL_CERT_EXPORT_RATE_LIMIT_PER_ACTOR_HR environment variable.
CertExportRateLimitPerActorHr int
}
// LogConfig contains logging configuration.
@@ -1403,7 +1415,9 @@ func Load() (*Config, error) {
// Default 1h matches the in-scheduler default; relying-party
// CRL refresh expectations under RFC 5280 are typically
// hourly to daily, so 1h gives operators plenty of margin.
CRLGenerationInterval: getEnvDuration("CERTCTL_CRL_GENERATION_INTERVAL", 1*time.Hour),
CRLGenerationInterval: getEnvDuration("CERTCTL_CRL_GENERATION_INTERVAL", 1*time.Hour),
OCSPRateLimitPerIPMin: getEnvInt("CERTCTL_OCSP_RATE_LIMIT_PER_IP_MIN", 1000),
CertExportRateLimitPerActorHr: getEnvInt("CERTCTL_CERT_EXPORT_RATE_LIMIT_PER_ACTOR_HR", 50),
},
Log: LogConfig{
Level: getEnv("CERTCTL_LOG_LEVEL", "info"),