mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-11 08:18:54 +00:00
fix(middleware): SEC-006 — TTL-evict idle token-bucket rate-limiter entries
Sprint 2 unified-master-audit closure. Pre-fix the keyed rate
limiter's bucket map had no eviction. The package-level comment
explicitly noted the leak: high-cardinality unauthenticated traffic
(CGNAT churn, Tor exit lists, botnets, infinite-cardinality scanners)
grew process memory unboundedly. Production deploys with millions of
unique IPs would eventually OOM.
Fix:
- RateLimitConfig.BucketTTL (env CERTCTL_RATE_LIMIT_BUCKET_TTL,
default 1h, clamp-floor 1m). 1h chosen to be well above realistic
operator IP churn windows (returning clients keep their bucket)
and well below the unbounded-leak window the pre-fix code
allowed.
- tokenBucket gains a lastAccess field updated on every allow()
call via touch(); reading via lastAccessTime() under the bucket's
own mutex.
- keyedRateLimiter.sweepLoop runs in a single goroutine per
limiter (production wires 2: default + no-auth fallback), waking
every BucketTTL/4. sweep() removes any bucket whose lastAccess
is older than the cutoff and bumps evictedTotal atomically.
- Both NewRateLimiter call sites in cmd/server/main.go (default
stack and no-auth fallback) now thread cfg.RateLimit.BucketTTL.
Regression coverage:
- TestKeyedRateLimiter_SweepEvictsIdleBuckets: 1000 synthetic IP
keys populate the map, advance past TTL, call sweep() directly,
assert map drained to 0 + evictedTotal=1000 + fresh key creates
new bucket (map not poisoned).
- TestKeyedRateLimiter_SweepKeepsActiveBuckets: inverse — a bucket
touched within the TTL window survives the sweep. Catches a
future regression that inverts the cutoff comparison.
Closes SEC-006.
This commit is contained in:
@@ -2,9 +2,11 @@ package middleware
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/certctl-io/certctl/internal/auth"
|
||||
)
|
||||
@@ -188,3 +190,94 @@ func TestRateLimiter_M025_EmptyUserKeyTreatedAsAnonymous(t *testing.T) {
|
||||
t.Errorf("second anonymous request from different IP should still pass (independent IP buckets); got %d", rr.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// SEC-006 closure (Sprint 2, 2026-05-16). The token-bucket map now has
|
||||
// a background sweeper that evicts buckets whose last allow() call is
|
||||
// older than the configured BucketTTL. This test pins the eviction
|
||||
// path against a synthetic 1000-key load and asserts:
|
||||
//
|
||||
// 1. Buckets created by N distinct keys land in the map.
|
||||
// 2. After the simulated TTL elapses and the sweeper runs, the map
|
||||
// is reclaimed and evictedTotal reflects the count.
|
||||
// 3. A subsequent request from a fresh key creates a new bucket
|
||||
// (i.e. the map isn't poisoned by the eviction).
|
||||
//
|
||||
// The test calls sweep() directly rather than relying on the goroutine
|
||||
// + time.Ticker so it stays deterministic and fast. The sweeper
|
||||
// goroutine itself is exercised in production; this test pins the
|
||||
// eviction predicate.
|
||||
// =============================================================================
|
||||
|
||||
func TestKeyedRateLimiter_SweepEvictsIdleBuckets(t *testing.T) {
|
||||
limiter := &keyedRateLimiter{
|
||||
ipRate: 1000,
|
||||
ipBurst: 1000,
|
||||
userRate: 1000,
|
||||
userBurst: 1000,
|
||||
buckets: make(map[string]*tokenBucket),
|
||||
bucketTTL: 100 * time.Millisecond,
|
||||
}
|
||||
|
||||
// Populate 1000 buckets from a synthetic IP-key churn.
|
||||
for i := 0; i < 1000; i++ {
|
||||
key := "ip:198.51.100." + fmt.Sprintf("%d", i%256) + "/" + fmt.Sprintf("%d", i)
|
||||
if !limiter.allow(key, false) {
|
||||
t.Fatalf("synthetic IP-key %d: allow returned false on first call", i)
|
||||
}
|
||||
}
|
||||
limiter.mu.RLock()
|
||||
if got := len(limiter.buckets); got != 1000 {
|
||||
limiter.mu.RUnlock()
|
||||
t.Fatalf("post-populate bucket count = %d; want 1000", got)
|
||||
}
|
||||
limiter.mu.RUnlock()
|
||||
|
||||
// Advance past the TTL boundary, then sweep.
|
||||
time.Sleep(110 * time.Millisecond)
|
||||
limiter.sweep()
|
||||
|
||||
limiter.mu.RLock()
|
||||
remaining := len(limiter.buckets)
|
||||
limiter.mu.RUnlock()
|
||||
if remaining != 0 {
|
||||
t.Errorf("post-sweep bucket count = %d; want 0 (all should have been evicted)", remaining)
|
||||
}
|
||||
if got := limiter.evictedTotal.Load(); got != 1000 {
|
||||
t.Errorf("evictedTotal = %d; want 1000", got)
|
||||
}
|
||||
|
||||
// A fresh request creates a new bucket — map isn't poisoned.
|
||||
if !limiter.allow("ip:203.0.113.7", false) {
|
||||
t.Errorf("fresh key: allow returned false on first call after sweep")
|
||||
}
|
||||
limiter.mu.RLock()
|
||||
defer limiter.mu.RUnlock()
|
||||
if got := len(limiter.buckets); got != 1 {
|
||||
t.Errorf("post-sweep-plus-one bucket count = %d; want 1", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestKeyedRateLimiter_SweepKeepsActiveBuckets pins the inverse — a
|
||||
// bucket touched within the TTL window survives the sweep. Catches a
|
||||
// future regression that inverts the cutoff comparison.
|
||||
func TestKeyedRateLimiter_SweepKeepsActiveBuckets(t *testing.T) {
|
||||
limiter := &keyedRateLimiter{
|
||||
ipRate: 1000,
|
||||
ipBurst: 1000,
|
||||
userRate: 1000,
|
||||
userBurst: 1000,
|
||||
buckets: make(map[string]*tokenBucket),
|
||||
bucketTTL: 1 * time.Hour, // generous so test timing doesn't flake
|
||||
}
|
||||
limiter.allow("ip:198.51.100.42", false)
|
||||
limiter.sweep()
|
||||
limiter.mu.RLock()
|
||||
defer limiter.mu.RUnlock()
|
||||
if got := len(limiter.buckets); got != 1 {
|
||||
t.Errorf("active-bucket count = %d; want 1 (sweep should not evict within TTL)", got)
|
||||
}
|
||||
if got := limiter.evictedTotal.Load(); got != 0 {
|
||||
t.Errorf("evictedTotal = %d; want 0 (no evictions expected)", got)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user