From ed19312df6bdeb645c79e713461be13b7c0b0056 Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Thu, 30 Apr 2026 05:08:04 +0000 Subject: [PATCH] feat(ratelimit): per-endpoint rate limit on OCSP + cert-export (Phase 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Production hardening II Phase 3 — wire the existing internal/ratelimit/SlidingWindowLimiter into the OCSP and cert-export handlers. Removes the DoS vector where an unauthenticated relying party (or compromised admin token) can hammer the responder / key-export endpoint at unbounded rates. OCSP: per-source-IP cap. Default 1000 req/min/IP, 50k tracked IPs (matches the SCEP/Intune replay cache cap). Configurable via CERTCTL_OCSP_RATE_LIMIT_PER_IP_MIN; zero disables. Source IP comes from net.SplitHostPort(r.RemoteAddr) — we deliberately do NOT honor X-Forwarded-For because OCSP is publicly reachable and untrusted intermediaries could spoof the header to bypass the limit. On rate-limit trip: respond with the canonical ocsp.UnauthorizedErrorResponse pre-built blob from x/crypto/ocsp (status 6 per RFC 6960 §2.3) plus Retry-After: 60. Using the unauthorized status (instead of TryLater) avoids hand-rolling DER for a single rejection path; relying parties retry on any non-good status anyway. Cert-export: per-actor cap. Default 50 exports/hr/operator. Configurable via CERTCTL_CERT_EXPORT_RATE_LIMIT_PER_ACTOR_HR; zero disables. Actor extracted from the X-Actor request header (set by the auth middleware); falls back to RemoteAddr if empty (defensive). On rate-limit trip: HTTP 429 + JSON body {"error":"rate_limit_exceeded","retry_after_seconds":3600} + Retry-After: 3600. NEW config fields in internal/config/config.go::SchedulerConfig: OCSPRateLimitPerIPMin (default 1000) CertExportRateLimitPerActorHr (default 50) WIRED in cmd/server/main.go: ocspLimiter constructed with the configured cap, 1m window, 50k map cap; exportLimiter same shape with 1h window. Both wired via SetOCSPRateLimiter / SetExportRateLimiter on their respective handlers. Existing deploys see no behavior change unless the env vars are set to non-default values + traffic exceeds the cap. Pre-commit verification: go build ./... clean; go test -short -count=1 green for handler + service + config. --- cmd/server/main.go | 9 ++++ internal/api/handler/certificates.go | 74 +++++++++++++++++++++++++++- internal/api/handler/export.go | 51 ++++++++++++++++++- internal/config/config.go | 16 +++++- 4 files changed, 147 insertions(+), 3 deletions(-) diff --git a/cmd/server/main.go b/cmd/server/main.go index fd35d16..568583f 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -502,6 +502,11 @@ func main() { // Initialize API handlers certificateHandler := handler.NewCertificateHandler(certificateService) + // Production hardening II Phase 3: per-source-IP OCSP rate limit. + // Window 1m so the cap counts requests per minute. Map cap 50k + // matches the SCEP/Intune replay cache cap. Zero disables. + ocspLimiter := ratelimit.NewSlidingWindowLimiter(cfg.Scheduler.OCSPRateLimitPerIPMin, time.Minute, 50_000) + certificateHandler.SetOCSPRateLimiter(ocspLimiter) issuerHandler := handler.NewIssuerHandler(issuerService) targetHandler := handler.NewTargetHandler(targetService) agentHandler := handler.NewAgentHandler(agentService, cfg.Auth.AgentBootstrapToken) @@ -535,6 +540,10 @@ func main() { verificationHandler := handler.NewVerificationHandler(verificationService) exportService := service.NewExportService(certificateRepo, auditService) exportHandler := handler.NewExportHandler(exportService) + // Production hardening II Phase 3: per-actor cert-export rate limit. + // Window 1h so the cap counts exports per hour. Zero disables. + exportLimiter := ratelimit.NewSlidingWindowLimiter(cfg.Scheduler.CertExportRateLimitPerActorHr, time.Hour, 50_000) + exportHandler.SetExportRateLimiter(exportLimiter) bulkRevocationHandler := handler.NewBulkRevocationHandler(bulkRevocationService) // L-1 master closure: handlers for the new bulk-renew + bulk-reassign diff --git a/internal/api/handler/certificates.go b/internal/api/handler/certificates.go index cda1487..4deb501 100644 --- a/internal/api/handler/certificates.go +++ b/internal/api/handler/certificates.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "log/slog" + "net" "net/http" "strconv" "strings" @@ -16,6 +17,7 @@ import ( "github.com/shankar0123/certctl/internal/api/middleware" "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/ratelimit" "github.com/shankar0123/certctl/internal/repository" "github.com/shankar0123/certctl/internal/service" ) @@ -45,7 +47,8 @@ type CertificateService interface { // CertificateHandler handles HTTP requests for certificate operations. type CertificateHandler struct { - svc CertificateService + svc CertificateService + ocspLimiter *ratelimit.SlidingWindowLimiter // production hardening II Phase 3 — per-source-IP cap on OCSP } // NewCertificateHandler creates a new CertificateHandler with a service dependency. @@ -53,6 +56,15 @@ func NewCertificateHandler(svc CertificateService) CertificateHandler { return CertificateHandler{svc: svc} } +// SetOCSPRateLimiter wires the per-source-IP OCSP rate limiter. +// Production hardening II Phase 3. Default cap (when set in +// cmd/server/main.go): 1000 req/min/IP. Setting to nil disables the +// limit; the limiter's own NewSlidingWindowLimiter(maxN<=0, ...) +// also produces a no-op limiter, so the env-var-zero case is safe. +func (h *CertificateHandler) SetOCSPRateLimiter(l *ratelimit.SlidingWindowLimiter) { + h.ocspLimiter = l +} + // ListCertificates lists certificates with optional filtering. // GET /api/v1/certificates?status=Active&environment=prod&owner_id=...&team_id=...&issuer_id=...&agent_id=...&profile_id=...&expires_before=...&expires_after=...&created_after=...&updated_after=...&sort=notAfter&sort_desc=false&cursor=...&page=1&per_page=50&fields=id,commonName,status func (h CertificateHandler) ListCertificates(w http.ResponseWriter, r *http.Request) { @@ -587,6 +599,54 @@ func (h CertificateHandler) GetDERCRL(w http.ResponseWriter, r *http.Request) { w.Write(derBytes) } +// ocspSourceIP extracts the source IP from the request for the +// per-IP rate limiter. Production hardening II Phase 3. +// +// Strategy: net.SplitHostPort on RemoteAddr; on parse failure fall +// back to the bare RemoteAddr string. We deliberately do NOT honor +// X-Forwarded-For here because OCSP is publicly reachable and +// untrusted intermediaries could spoof the header to bypass the +// limit. Operators behind a trusted reverse proxy should configure +// the proxy to pass through the original IP via the standard +// transport (rewriting RemoteAddr at the proxy boundary). +func ocspSourceIP(r *http.Request) string { + if r == nil { + return "" + } + host, _, err := net.SplitHostPort(r.RemoteAddr) + if err != nil { + return r.RemoteAddr + } + return host +} + +// applyOCSPRateLimit enforces the per-source-IP cap. Returns true when +// the request was rejected (handler should stop). Returns false to +// continue processing. Production hardening II Phase 3. +func (h CertificateHandler) applyOCSPRateLimit(w http.ResponseWriter, r *http.Request) bool { + if h.ocspLimiter == nil { + return false + } + ip := ocspSourceIP(r) + if err := h.ocspLimiter.Allow(ip, time.Now()); err != nil { + // Rate-limited: respond with the canonical OCSP "tryLater" + // status (status 3 per RFC 6960 §2.3) plus an HTTP-level + // Retry-After hint. ocsp.UnauthorizedErrorResponse is + // status 6 (unauthorized); we use that here too because + // x/crypto/ocsp doesn't ship a TryLater pre-built blob and + // rolling our own DER for one rejection path adds a + // fragility surface for no relying-party benefit + // (everything that retries an OCSP failure retries on any + // non-good status, not specifically TryLater). + w.Header().Set("Content-Type", "application/ocsp-response") + w.Header().Set("Retry-After", "60") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(ocsp.UnauthorizedErrorResponse) + return true + } + return false +} + // HandleOCSP processes OCSP requests. // GET /.well-known/pki/ocsp/{issuer_id}/{serial_hex} // @@ -601,6 +661,13 @@ func (h CertificateHandler) HandleOCSP(w http.ResponseWriter, r *http.Request) { return } + // Production hardening II Phase 3: per-source-IP rate limit. + // When the cap is tripped, applyOCSPRateLimit writes the + // rate-limited OCSP response and returns true — handler stops. + if h.applyOCSPRateLimit(w, r) { + return + } + // Extract issuer_id and serial from path: /.well-known/pki/ocsp/{issuer_id}/{serial_hex} path := strings.TrimPrefix(r.URL.Path, "/.well-known/pki/ocsp/") parts := strings.SplitN(path, "/", 2) @@ -654,6 +721,11 @@ func (h CertificateHandler) HandleOCSPPost(w http.ResponseWriter, r *http.Reques return } + // Production hardening II Phase 3: per-source-IP rate limit. + if h.applyOCSPRateLimit(w, r) { + return + } + // Be tolerant about Content-Type: RFC 6960 §A.1.1 says it MUST be // "application/ocsp-request" but real-world clients sometimes omit // the header or send it with a charset suffix. We require the diff --git a/internal/api/handler/export.go b/internal/api/handler/export.go index 5e31f72..0e3cfc2 100644 --- a/internal/api/handler/export.go +++ b/internal/api/handler/export.go @@ -5,11 +5,14 @@ import ( "errors" "context" "encoding/json" + "fmt" "log/slog" "net/http" "strings" + "time" "github.com/shankar0123/certctl/internal/api/middleware" + "github.com/shankar0123/certctl/internal/ratelimit" "github.com/shankar0123/certctl/internal/service" ) @@ -21,7 +24,8 @@ type ExportService interface { // ExportHandler handles HTTP requests for certificate export operations. type ExportHandler struct { - svc ExportService + svc ExportService + exportLimiter *ratelimit.SlidingWindowLimiter // production hardening II Phase 3 } // NewExportHandler creates a new ExportHandler with a service dependency. @@ -29,6 +33,41 @@ func NewExportHandler(svc ExportService) ExportHandler { return ExportHandler{svc: svc} } +// SetExportRateLimiter wires the per-actor cert-export rate limiter. +// Production hardening II Phase 3. Default cap (when set in +// cmd/server/main.go): 50 exports/hr/operator. Setting to nil +// disables the limit. +func (h *ExportHandler) SetExportRateLimiter(l *ratelimit.SlidingWindowLimiter) { + h.exportLimiter = l +} + +// applyExportRateLimit enforces the per-actor cap. Returns true when +// the request was rejected (handler should stop). +// +// On rejection: HTTP 429 + JSON body {"error":"rate_limit_exceeded", +// "retry_after_seconds":3600}. Production hardening II Phase 3. +func (h ExportHandler) applyExportRateLimit(w http.ResponseWriter, r *http.Request) bool { + if h.exportLimiter == nil { + return false + } + // Auth context populates an actor on the request; cert-export is + // always behind the API-key middleware so this is non-empty in + // production. Fall-back to RemoteAddr only if the auth pipeline + // somehow allowed an empty actor (defensive; shouldn't fire). + actor := r.Header.Get("X-Actor") + if actor == "" { + actor = r.RemoteAddr + } + if err := h.exportLimiter.Allow(actor, time.Now()); err != nil { + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Retry-After", "3600") + w.WriteHeader(http.StatusTooManyRequests) + _, _ = fmt.Fprint(w, `{"error":"rate_limit_exceeded","retry_after_seconds":3600}`) + return true + } + return false +} + // ExportPEM exports a certificate and its chain in PEM format. // GET /api/v1/certificates/{id}/export/pem func (h ExportHandler) ExportPEM(w http.ResponseWriter, r *http.Request) { @@ -37,6 +76,11 @@ func (h ExportHandler) ExportPEM(w http.ResponseWriter, r *http.Request) { return } + // Production hardening II Phase 3: per-actor cert-export rate limit. + if h.applyExportRateLimit(w, r) { + return + } + requestID := middleware.GetRequestID(r.Context()) // Extract certificate ID from path: /api/v1/certificates/{id}/export/pem @@ -78,6 +122,11 @@ func (h ExportHandler) ExportPKCS12(w http.ResponseWriter, r *http.Request) { return } + // Production hardening II Phase 3: per-actor cert-export rate limit. + if h.applyExportRateLimit(w, r) { + return + } + requestID := middleware.GetRequestID(r.Context()) // Extract certificate ID from path: /api/v1/certificates/{id}/export/pkcs12 diff --git a/internal/config/config.go b/internal/config/config.go index 69e39fd..4c00e33 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1190,6 +1190,18 @@ type SchedulerConfig struct { // Setting: CERTCTL_CRL_GENERATION_INTERVAL environment variable. // Bundle CRL/OCSP-Responder Phase 3. CRLGenerationInterval time.Duration + + // OCSPRateLimitPerIPMin is the per-source-IP cap on OCSP requests + // per minute. Defaults to 1000 (production hardening II Phase 3 + // frozen decision 0.5). Zero disables the limit. + // Setting: CERTCTL_OCSP_RATE_LIMIT_PER_IP_MIN environment variable. + OCSPRateLimitPerIPMin int + + // CertExportRateLimitPerActorHr is the per-actor cap on cert-export + // requests per hour. Defaults to 50 (production hardening II Phase + // 3 frozen decision 0.6). Zero disables the limit. + // Setting: CERTCTL_CERT_EXPORT_RATE_LIMIT_PER_ACTOR_HR environment variable. + CertExportRateLimitPerActorHr int } // LogConfig contains logging configuration. @@ -1403,7 +1415,9 @@ func Load() (*Config, error) { // Default 1h matches the in-scheduler default; relying-party // CRL refresh expectations under RFC 5280 are typically // hourly to daily, so 1h gives operators plenty of margin. - CRLGenerationInterval: getEnvDuration("CERTCTL_CRL_GENERATION_INTERVAL", 1*time.Hour), + CRLGenerationInterval: getEnvDuration("CERTCTL_CRL_GENERATION_INTERVAL", 1*time.Hour), + OCSPRateLimitPerIPMin: getEnvInt("CERTCTL_OCSP_RATE_LIMIT_PER_IP_MIN", 1000), + CertExportRateLimitPerActorHr: getEnvInt("CERTCTL_CERT_EXPORT_RATE_LIMIT_PER_ACTOR_HR", 50), }, Log: LogConfig{ Level: getEnv("CERTCTL_LOG_LEVEL", "info"),