fix(middleware): SEC-006 — TTL-evict idle token-bucket rate-limiter entries

Sprint 2 unified-master-audit closure. Pre-fix the keyed rate
limiter's bucket map had no eviction. The package-level comment
explicitly noted the leak: high-cardinality unauthenticated traffic
(CGNAT churn, Tor exit lists, botnets, infinite-cardinality scanners)
grew process memory unboundedly. Production deploys with millions of
unique IPs would eventually OOM.

Fix:
  - RateLimitConfig.BucketTTL (env CERTCTL_RATE_LIMIT_BUCKET_TTL,
    default 1h, clamp-floor 1m). 1h chosen to be well above realistic
    operator IP churn windows (returning clients keep their bucket)
    and well below the unbounded-leak window the pre-fix code
    allowed.
  - tokenBucket gains a lastAccess field updated on every allow()
    call via touch(); reading via lastAccessTime() under the bucket's
    own mutex.
  - keyedRateLimiter.sweepLoop runs in a single goroutine per
    limiter (production wires 2: default + no-auth fallback), waking
    every BucketTTL/4. sweep() removes any bucket whose lastAccess
    is older than the cutoff and bumps evictedTotal atomically.
  - Both NewRateLimiter call sites in cmd/server/main.go (default
    stack and no-auth fallback) now thread cfg.RateLimit.BucketTTL.

Regression coverage:
  - TestKeyedRateLimiter_SweepEvictsIdleBuckets: 1000 synthetic IP
    keys populate the map, advance past TTL, call sweep() directly,
    assert map drained to 0 + evictedTotal=1000 + fresh key creates
    new bucket (map not poisoned).
  - TestKeyedRateLimiter_SweepKeepsActiveBuckets: inverse — a bucket
    touched within the TTL window survives the sweep. Catches a
    future regression that inverts the cutoff comparison.

Closes SEC-006.
This commit is contained in:
shankar0123
2026-05-16 04:01:18 +00:00
parent 037876fa0f
commit 8f2e5771db
5 changed files with 246 additions and 11 deletions
@@ -2,9 +2,11 @@ package middleware
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/certctl-io/certctl/internal/auth"
)
@@ -188,3 +190,94 @@ func TestRateLimiter_M025_EmptyUserKeyTreatedAsAnonymous(t *testing.T) {
t.Errorf("second anonymous request from different IP should still pass (independent IP buckets); got %d", rr.Code)
}
}
// =============================================================================
// SEC-006 closure (Sprint 2, 2026-05-16). The token-bucket map now has
// a background sweeper that evicts buckets whose last allow() call is
// older than the configured BucketTTL. This test pins the eviction
// path against a synthetic 1000-key load and asserts:
//
// 1. Buckets created by N distinct keys land in the map.
// 2. After the simulated TTL elapses and the sweeper runs, the map
// is reclaimed and evictedTotal reflects the count.
// 3. A subsequent request from a fresh key creates a new bucket
// (i.e. the map isn't poisoned by the eviction).
//
// The test calls sweep() directly rather than relying on the goroutine
// + time.Ticker so it stays deterministic and fast. The sweeper
// goroutine itself is exercised in production; this test pins the
// eviction predicate.
// =============================================================================
func TestKeyedRateLimiter_SweepEvictsIdleBuckets(t *testing.T) {
limiter := &keyedRateLimiter{
ipRate: 1000,
ipBurst: 1000,
userRate: 1000,
userBurst: 1000,
buckets: make(map[string]*tokenBucket),
bucketTTL: 100 * time.Millisecond,
}
// Populate 1000 buckets from a synthetic IP-key churn.
for i := 0; i < 1000; i++ {
key := "ip:198.51.100." + fmt.Sprintf("%d", i%256) + "/" + fmt.Sprintf("%d", i)
if !limiter.allow(key, false) {
t.Fatalf("synthetic IP-key %d: allow returned false on first call", i)
}
}
limiter.mu.RLock()
if got := len(limiter.buckets); got != 1000 {
limiter.mu.RUnlock()
t.Fatalf("post-populate bucket count = %d; want 1000", got)
}
limiter.mu.RUnlock()
// Advance past the TTL boundary, then sweep.
time.Sleep(110 * time.Millisecond)
limiter.sweep()
limiter.mu.RLock()
remaining := len(limiter.buckets)
limiter.mu.RUnlock()
if remaining != 0 {
t.Errorf("post-sweep bucket count = %d; want 0 (all should have been evicted)", remaining)
}
if got := limiter.evictedTotal.Load(); got != 1000 {
t.Errorf("evictedTotal = %d; want 1000", got)
}
// A fresh request creates a new bucket — map isn't poisoned.
if !limiter.allow("ip:203.0.113.7", false) {
t.Errorf("fresh key: allow returned false on first call after sweep")
}
limiter.mu.RLock()
defer limiter.mu.RUnlock()
if got := len(limiter.buckets); got != 1 {
t.Errorf("post-sweep-plus-one bucket count = %d; want 1", got)
}
}
// TestKeyedRateLimiter_SweepKeepsActiveBuckets pins the inverse — a
// bucket touched within the TTL window survives the sweep. Catches a
// future regression that inverts the cutoff comparison.
func TestKeyedRateLimiter_SweepKeepsActiveBuckets(t *testing.T) {
limiter := &keyedRateLimiter{
ipRate: 1000,
ipBurst: 1000,
userRate: 1000,
userBurst: 1000,
buckets: make(map[string]*tokenBucket),
bucketTTL: 1 * time.Hour, // generous so test timing doesn't flake
}
limiter.allow("ip:198.51.100.42", false)
limiter.sweep()
limiter.mu.RLock()
defer limiter.mu.RUnlock()
if got := len(limiter.buckets); got != 1 {
t.Errorf("active-bucket count = %d; want 1 (sweep should not evict within TTL)", got)
}
if got := limiter.evictedTotal.Load(); got != 0 {
t.Errorf("evictedTotal = %d; want 0 (no evictions expected)", got)
}
}