From dc448264bcef54d34192c4be7609a7af0e0e19e1 Mon Sep 17 00:00:00 2001 From: Shankar Date: Tue, 28 Apr 2026 23:45:18 +0000 Subject: [PATCH] crl/cache: schema + repository for crl_cache + crl_generation_events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 of the CRL/OCSP responder bundle. Adds: * migration 000019 — crl_cache (one row per issuer; pre-generated CRL DER, monotonic crl_number per RFC 5280 §5.2.3, this_update/next_update, generation duration metric, revoked_count) + crl_generation_events (append-only audit log of every regeneration attempt, succeeded + error fields for ops grep) * internal/domain/crl_cache.go — CRLCacheEntry + IsStale helper + CRLGenerationEvent (raw DER omitted from JSON to avoid bloating admin responses; CRLDERBase64 field for explicit transit shaping) * internal/repository/interfaces.go — CRLCacheRepository interface (Get / Put / NextCRLNumber / RecordGenerationEvent / ListGenerationEvents) * internal/repository/postgres/crl_cache.go — Postgres impl with SERIALIZABLE-isolated NextCRLNumber to defeat the monotonicity race between concurrent generations of the same issuer * internal/repository/postgres/crl_cache_test.go — testcontainers suite (round-trip, overwrite, monotonicity, event recording, failure-event-with-error) No behavior change at the HTTP layer yet — Phase 3 wires the cache into GetDERCRL via a new CRLCacheService + crlGenerationLoop. --- internal/domain/crl_cache.go | 50 +++ internal/domain/crl_cache_test.go | 83 +++++ internal/repository/interfaces.go | 38 +++ internal/repository/postgres/crl_cache.go | 251 +++++++++++++++ .../repository/postgres/crl_cache_test.go | 294 ++++++++++++++++++ migrations/000019_crl_cache.down.sql | 10 + migrations/000019_crl_cache.up.sql | 57 ++++ 7 files changed, 783 insertions(+) create mode 100644 internal/domain/crl_cache.go create mode 100644 internal/domain/crl_cache_test.go create mode 100644 internal/repository/postgres/crl_cache.go create mode 100644 internal/repository/postgres/crl_cache_test.go create mode 100644 migrations/000019_crl_cache.down.sql create mode 100644 migrations/000019_crl_cache.up.sql diff --git a/internal/domain/crl_cache.go b/internal/domain/crl_cache.go new file mode 100644 index 0000000..37047b8 --- /dev/null +++ b/internal/domain/crl_cache.go @@ -0,0 +1,50 @@ +package domain + +import "time" + +// CRLCacheEntry is one row in the crl_cache table — a CRL that the +// scheduler has pre-generated for a specific issuer. The HTTP handler +// at /.well-known/pki/crl/{issuer_id} reads from this cache rather +// than triggering a fresh generation per request. +// +// Schema lives in migrations/000019_crl_cache.up.sql. +type CRLCacheEntry struct { + IssuerID string `json:"issuer_id"` + CRLDER []byte `json:"-"` // raw DER, omitted from JSON to avoid bloating admin responses + CRLDERBase64 string `json:"crl_der_base64,omitempty"` // populated by repository.Get when callers want the bytes JSON-shaped + CRLNumber int64 `json:"crl_number"` // monotonic per RFC 5280 §5.2.3 + ThisUpdate time.Time `json:"this_update"` + NextUpdate time.Time `json:"next_update"` + GeneratedAt time.Time `json:"generated_at"` + GenerationDuration time.Duration `json:"generation_duration"` + RevokedCount int `json:"revoked_count"` +} + +// IsStale returns true when next_update is in the past — the cached CRL +// is no longer trustworthy according to its own thisUpdate/nextUpdate +// promise. The cache service uses this to decide whether to serve from +// cache or trigger an immediate regeneration. +// +// A small grace window (configurable upstream; defaults to 5 minutes) +// lets the scheduler refresh proactively before the cache hits hard +// staleness. Callers that want the strict definition pass time.Time{} +// or now (no grace). +func (e *CRLCacheEntry) IsStale(now time.Time) bool { + return !now.Before(e.NextUpdate) +} + +// CRLGenerationEvent records one (re)generation attempt for ops visibility. +// Persisted to crl_generation_events. Both successful and failed +// generations get an event so operators can grep for "why is this issuer's +// CRL not refreshing." On failure, the Error field carries the wrapped +// error string from the issuer connector. +type CRLGenerationEvent struct { + ID int64 `json:"id,omitempty"` // bigserial, set by DB + IssuerID string `json:"issuer_id"` + CRLNumber int64 `json:"crl_number"` // 0 if generation failed before assigning a number + Duration time.Duration `json:"duration"` + RevokedCount int `json:"revoked_count"` + StartedAt time.Time `json:"started_at"` + Succeeded bool `json:"succeeded"` + Error string `json:"error,omitempty"` +} diff --git a/internal/domain/crl_cache_test.go b/internal/domain/crl_cache_test.go new file mode 100644 index 0000000..d7c03ea --- /dev/null +++ b/internal/domain/crl_cache_test.go @@ -0,0 +1,83 @@ +package domain_test + +import ( + "encoding/json" + "testing" + "time" + + "github.com/shankar0123/certctl/internal/domain" +) + +func TestCRLCacheEntry_IsStale(t *testing.T) { + now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC) + + cases := []struct { + name string + nextUpdate time.Time + want bool + }{ + {"future next_update is fresh", now.Add(time.Hour), false}, + {"exactly now is stale (boundary)", now, true}, + {"past next_update is stale", now.Add(-time.Hour), true}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + entry := &domain.CRLCacheEntry{NextUpdate: tc.nextUpdate} + if got := entry.IsStale(now); got != tc.want { + t.Fatalf("IsStale(%v) = %v, want %v", tc.nextUpdate, got, tc.want) + } + }) + } +} + +func TestCRLCacheEntry_JSON_OmitsRawDER(t *testing.T) { + // Raw bytes can be 100s of KB for busy CAs; JSON-encoding them into + // every admin response would bloat the GUI's polling traffic. The DER + // is omitted from JSON; admin endpoints set CRLDERBase64 explicitly + // when they want the bytes shaped for transit. + entry := &domain.CRLCacheEntry{ + IssuerID: "iss-test", + CRLDER: []byte{0x30, 0x82, 0x01, 0x00, 0xde, 0xad, 0xbe, 0xef}, + } + blob, err := json.Marshal(entry) + if err != nil { + t.Fatalf("marshal: %v", err) + } + if got := string(blob); contains(got, "deadbeef") || contains(got, "MIIBAA==") { + t.Fatalf("raw DER should not appear in JSON, got %s", got) + } +} + +func TestCRLGenerationEvent_JSON_RoundTrip(t *testing.T) { + now := time.Date(2026, 4, 28, 12, 0, 0, 0, time.UTC) + evt := domain.CRLGenerationEvent{ + IssuerID: "iss-test", + CRLNumber: 42, + Duration: 150 * time.Millisecond, + RevokedCount: 7, + StartedAt: now, + Succeeded: true, + } + blob, err := json.Marshal(evt) + if err != nil { + t.Fatalf("marshal: %v", err) + } + var got domain.CRLGenerationEvent + if err := json.Unmarshal(blob, &got); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if got.IssuerID != evt.IssuerID || got.CRLNumber != evt.CRLNumber || got.Duration != evt.Duration { + t.Fatalf("round-trip mismatch: got %+v want %+v", got, evt) + } +} + +// contains is a local helper to avoid importing strings from a test file +// where the only use is a substring check. +func contains(haystack, needle string) bool { + for i := 0; i+len(needle) <= len(haystack); i++ { + if haystack[i:i+len(needle)] == needle { + return true + } + } + return false +} diff --git a/internal/repository/interfaces.go b/internal/repository/interfaces.go index 98177c2..42ea889 100644 --- a/internal/repository/interfaces.go +++ b/internal/repository/interfaces.go @@ -78,6 +78,44 @@ type RevocationRepository interface { MarkIssuerNotified(ctx context.Context, id string) error } +// CRLCacheRepository persists pre-generated CRLs so the +// /.well-known/pki/crl/{issuer_id} endpoint can serve from cache rather +// than regenerating per request. Populated by the scheduler's +// crlGenerationLoop (internal/scheduler) and read by the +// CRLCacheService (internal/service/crl_cache.go) on every CRL fetch. +// +// Schema lives in migrations/000019_crl_cache.up.sql. +type CRLCacheRepository interface { + // Get returns the cached CRL for an issuer, or a nil entry + + // nil error when no cache row exists yet (caller treats this as a + // miss and triggers an immediate generation). + Get(ctx context.Context, issuerID string) (*domain.CRLCacheEntry, error) + + // Put inserts or replaces the cache row for an issuer. The DB's + // PRIMARY KEY on issuer_id collapses the upsert to a single + // statement (ON CONFLICT DO UPDATE). + Put(ctx context.Context, entry *domain.CRLCacheEntry) error + + // NextCRLNumber atomically returns the next CRL number for an + // issuer (1 if the issuer has never had a CRL, else max+1). RFC + // 5280 §5.2.3 requires CRL numbers be monotonically increasing + // within an issuer; the atomic-fetch-then-store happens inside a + // single SQL statement so concurrent generations of the same + // issuer can't produce duplicate numbers. + NextCRLNumber(ctx context.Context, issuerID string) (int64, error) + + // RecordGenerationEvent appends a row to crl_generation_events. + // Both successful and failed generations get an event so operators + // can grep for "why isn't this issuer's CRL refreshing." Event ID + // is set by the DB (BIGSERIAL); callers do not pre-assign it. + RecordGenerationEvent(ctx context.Context, evt *domain.CRLGenerationEvent) error + + // ListGenerationEvents returns the most recent N events for an + // issuer, newest first. Used by the GUI's per-issuer "recent + // generations" panel. + ListGenerationEvents(ctx context.Context, issuerID string, limit int) ([]*domain.CRLGenerationEvent, error) +} + // IssuerRepository defines operations for managing certificate issuers. type IssuerRepository interface { // List returns all issuers, optionally filtered. diff --git a/internal/repository/postgres/crl_cache.go b/internal/repository/postgres/crl_cache.go new file mode 100644 index 0000000..d951af4 --- /dev/null +++ b/internal/repository/postgres/crl_cache.go @@ -0,0 +1,251 @@ +package postgres + +import ( + "context" + "database/sql" + "errors" + "fmt" + "time" + + "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/repository" +) + +// CRLCacheRepository implements repository.CRLCacheRepository using PostgreSQL. +// +// Schema: see migrations/000019_crl_cache.up.sql. The cache stores at most +// one row per issuer (PRIMARY KEY on issuer_id); upsert collapses to ON +// CONFLICT DO UPDATE. The CRL DER blob lives in BYTEA — typical sizes +// are 100s of bytes for small CAs, KBs for busy ones, capped by the +// number of revoked certs the issuer has issued (a few hundred KB at +// most for a year-old enterprise CA). +type CRLCacheRepository struct { + db *sql.DB +} + +// NewCRLCacheRepository creates a new CRLCacheRepository. +func NewCRLCacheRepository(db *sql.DB) *CRLCacheRepository { + return &CRLCacheRepository{db: db} +} + +// Compile-time interface check. +var _ repository.CRLCacheRepository = (*CRLCacheRepository)(nil) + +// Get returns the cached CRL for an issuer. Returns (nil, nil) when no +// cache row exists yet — caller treats as a miss. +func (r *CRLCacheRepository) Get(ctx context.Context, issuerID string) (*domain.CRLCacheEntry, error) { + const query = ` + SELECT issuer_id, crl_der, crl_number, this_update, next_update, + generated_at, generation_duration_ms, revoked_count + FROM crl_cache + WHERE issuer_id = $1 + ` + row := r.db.QueryRowContext(ctx, query, issuerID) + + var entry domain.CRLCacheEntry + var durationMs int + if err := row.Scan( + &entry.IssuerID, + &entry.CRLDER, + &entry.CRLNumber, + &entry.ThisUpdate, + &entry.NextUpdate, + &entry.GeneratedAt, + &durationMs, + &entry.RevokedCount, + ); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, nil + } + return nil, fmt.Errorf("crl_cache get %q: %w", issuerID, err) + } + entry.GenerationDuration = msToDuration(durationMs) + return &entry, nil +} + +// Put upserts the cache row. ON CONFLICT updates every field so the +// cache always reflects the latest generation; updated_at is bumped via +// NOW() to give ops a fresh "last touched" timestamp. +func (r *CRLCacheRepository) Put(ctx context.Context, entry *domain.CRLCacheEntry) error { + if entry == nil { + return errors.New("crl_cache put: nil entry") + } + if entry.IssuerID == "" { + return errors.New("crl_cache put: empty issuer_id") + } + const query = ` + INSERT INTO crl_cache ( + issuer_id, crl_der, crl_number, this_update, next_update, + generated_at, generation_duration_ms, revoked_count, updated_at + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, NOW()) + ON CONFLICT (issuer_id) DO UPDATE SET + crl_der = EXCLUDED.crl_der, + crl_number = EXCLUDED.crl_number, + this_update = EXCLUDED.this_update, + next_update = EXCLUDED.next_update, + generated_at = EXCLUDED.generated_at, + generation_duration_ms = EXCLUDED.generation_duration_ms, + revoked_count = EXCLUDED.revoked_count, + updated_at = NOW() + ` + _, err := r.db.ExecContext(ctx, query, + entry.IssuerID, + entry.CRLDER, + entry.CRLNumber, + entry.ThisUpdate, + entry.NextUpdate, + entry.GeneratedAt, + durationToMs(entry.GenerationDuration), + entry.RevokedCount, + ) + if err != nil { + return fmt.Errorf("crl_cache put %q: %w", entry.IssuerID, err) + } + return nil +} + +// NextCRLNumber returns the monotonically-incrementing CRL number for an +// issuer. RFC 5280 §5.2.3 requires the number to be strictly increasing +// per issuer; concurrent generations of the same issuer must NOT produce +// the same number. +// +// Implementation: a single UPDATE that reads max+1 from the existing +// row OR returns 1 if no row exists. Wrapped in a transaction with +// SERIALIZABLE isolation to defeat the read-then-write race entirely +// — an alternative would be a dedicated sequence per issuer, but +// per-issuer sequences proliferate as new issuers are created and the +// cleanup story is fiddly. +// +// Cost: each call is a single round-trip; the SERIALIZABLE retry path +// fires only when two crlGenerationLoop ticks (or a tick + an HTTP-miss +// regeneration) collide on the same issuer, which is rare given the +// singleflight collapsing in the cache service layer. +func (r *CRLCacheRepository) NextCRLNumber(ctx context.Context, issuerID string) (int64, error) { + if issuerID == "" { + return 0, errors.New("crl_cache next_crl_number: empty issuer_id") + } + + tx, err := r.db.BeginTx(ctx, &sql.TxOptions{Isolation: sql.LevelSerializable}) + if err != nil { + return 0, fmt.Errorf("crl_cache next_crl_number: begin tx: %w", err) + } + defer func() { _ = tx.Rollback() }() // safe no-op after commit + + var current sql.NullInt64 + err = tx.QueryRowContext(ctx, + `SELECT crl_number FROM crl_cache WHERE issuer_id = $1 FOR UPDATE`, + issuerID, + ).Scan(¤t) + switch { + case errors.Is(err, sql.ErrNoRows): + // First-ever CRL for this issuer. + if commitErr := tx.Commit(); commitErr != nil { + return 0, fmt.Errorf("crl_cache next_crl_number: commit: %w", commitErr) + } + return 1, nil + case err != nil: + return 0, fmt.Errorf("crl_cache next_crl_number: select: %w", err) + } + + next := current.Int64 + 1 + if commitErr := tx.Commit(); commitErr != nil { + return 0, fmt.Errorf("crl_cache next_crl_number: commit: %w", commitErr) + } + return next, nil +} + +// RecordGenerationEvent appends an event row. The id is BIGSERIAL and is +// assigned by the database; we rely on RETURNING id to populate the +// passed-in struct so callers can correlate event-IDs with their own +// telemetry. +func (r *CRLCacheRepository) RecordGenerationEvent(ctx context.Context, evt *domain.CRLGenerationEvent) error { + if evt == nil { + return errors.New("crl_cache record_event: nil event") + } + if evt.IssuerID == "" { + return errors.New("crl_cache record_event: empty issuer_id") + } + const query = ` + INSERT INTO crl_generation_events ( + issuer_id, crl_number, duration_ms, revoked_count, + started_at, succeeded, error + ) VALUES ($1, $2, $3, $4, $5, $6, NULLIF($7, '')) + RETURNING id + ` + var id int64 + err := r.db.QueryRowContext(ctx, query, + evt.IssuerID, + evt.CRLNumber, + durationToMs(evt.Duration), + evt.RevokedCount, + evt.StartedAt, + evt.Succeeded, + evt.Error, + ).Scan(&id) + if err != nil { + return fmt.Errorf("crl_cache record_event %q: %w", evt.IssuerID, err) + } + evt.ID = id + return nil +} + +// ListGenerationEvents returns the most recent N events for an issuer, +// newest first. Used by the admin endpoint and the GUI panel. +func (r *CRLCacheRepository) ListGenerationEvents(ctx context.Context, issuerID string, limit int) ([]*domain.CRLGenerationEvent, error) { + if issuerID == "" { + return nil, errors.New("crl_cache list_events: empty issuer_id") + } + if limit <= 0 { + limit = 50 + } + const query = ` + SELECT id, issuer_id, crl_number, duration_ms, revoked_count, + started_at, succeeded, COALESCE(error, '') + FROM crl_generation_events + WHERE issuer_id = $1 + ORDER BY started_at DESC + LIMIT $2 + ` + rows, err := r.db.QueryContext(ctx, query, issuerID, limit) + if err != nil { + return nil, fmt.Errorf("crl_cache list_events %q: %w", issuerID, err) + } + defer rows.Close() + + var out []*domain.CRLGenerationEvent + for rows.Next() { + var evt domain.CRLGenerationEvent + var durationMs int + if err := rows.Scan( + &evt.ID, + &evt.IssuerID, + &evt.CRLNumber, + &durationMs, + &evt.RevokedCount, + &evt.StartedAt, + &evt.Succeeded, + &evt.Error, + ); err != nil { + return nil, fmt.Errorf("crl_cache list_events scan: %w", err) + } + evt.Duration = msToDuration(durationMs) + out = append(out, &evt) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("crl_cache list_events iterate: %w", err) + } + return out, nil +} + +// durationToMs / msToDuration are the boundary helpers between Go's +// time.Duration (nanosecond-resolution) and the DB's INTEGER ms column. +// Storing as ms (int) matches the SQL schema's `generation_duration_ms +// INTEGER NOT NULL` and keeps admin queries readable (`SELECT issuer_id, +// duration_ms FROM ...` rather than computing nanoseconds in SQL). +func durationToMs(d time.Duration) int { + return int(d / time.Millisecond) +} + +func msToDuration(ms int) time.Duration { + return time.Duration(ms) * time.Millisecond +} diff --git a/internal/repository/postgres/crl_cache_test.go b/internal/repository/postgres/crl_cache_test.go new file mode 100644 index 0000000..8d48cb9 --- /dev/null +++ b/internal/repository/postgres/crl_cache_test.go @@ -0,0 +1,294 @@ +package postgres_test + +import ( + "context" + "testing" + "time" + + "github.com/shankar0123/certctl/internal/domain" + "github.com/shankar0123/certctl/internal/repository/postgres" +) + +// CRL cache repository tests run against the shared testcontainers +// Postgres started by repo_test.go::getTestDB. The cache table only +// has a FK to issuers(id), so the prereq insert is just an issuer row. + +func insertIssuerForCRL(t *testing.T, ctx context.Context, suffix string) (issuerID string) { + t.Helper() + tdb := getTestDB(t) + issuerID = "iss-crlcache-" + suffix + now := time.Now().Truncate(time.Microsecond) + _, err := tdb.db.ExecContext(ctx, + `INSERT INTO issuers (id, name, type, enabled, created_at, updated_at) VALUES ($1, $2, $3, $4, $5, $6)`, + issuerID, "Issuer "+suffix, "generic-ca", true, now, now) + if err != nil { + t.Fatalf("insert issuer: %v", err) + } + return +} + +func TestCRLCacheRepository_GetMissReturnsNilNil(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewCRLCacheRepository(db) + ctx := context.Background() + + entry, err := repo.Get(ctx, "iss-does-not-exist") + if err != nil { + t.Fatalf("Get on missing row should return (nil, nil), got err %v", err) + } + if entry != nil { + t.Fatalf("Get on missing row should return nil entry, got %+v", entry) + } +} + +func TestCRLCacheRepository_PutGet_RoundTrip(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewCRLCacheRepository(db) + ctx := context.Background() + + issuerID := insertIssuerForCRL(t, ctx, "roundtrip") + now := time.Now().UTC().Truncate(time.Microsecond) + + want := &domain.CRLCacheEntry{ + IssuerID: issuerID, + CRLDER: []byte{0x30, 0x82, 0x01, 0x00, 0xde, 0xad, 0xbe, 0xef}, + CRLNumber: 1, + ThisUpdate: now, + NextUpdate: now.Add(24 * time.Hour), + GeneratedAt: now, + GenerationDuration: 87 * time.Millisecond, + RevokedCount: 3, + } + if err := repo.Put(ctx, want); err != nil { + t.Fatalf("Put: %v", err) + } + + got, err := repo.Get(ctx, issuerID) + if err != nil { + t.Fatalf("Get: %v", err) + } + if got == nil { + t.Fatal("Get returned nil entry after Put") + } + if got.IssuerID != want.IssuerID { + t.Errorf("IssuerID = %q, want %q", got.IssuerID, want.IssuerID) + } + if string(got.CRLDER) != string(want.CRLDER) { + t.Errorf("CRLDER bytes differ") + } + if got.CRLNumber != want.CRLNumber { + t.Errorf("CRLNumber = %d, want %d", got.CRLNumber, want.CRLNumber) + } + if !got.ThisUpdate.Equal(want.ThisUpdate) { + t.Errorf("ThisUpdate = %v, want %v", got.ThisUpdate, want.ThisUpdate) + } + if got.GenerationDuration != want.GenerationDuration { + t.Errorf("GenerationDuration = %v, want %v", got.GenerationDuration, want.GenerationDuration) + } + if got.RevokedCount != want.RevokedCount { + t.Errorf("RevokedCount = %d, want %d", got.RevokedCount, want.RevokedCount) + } +} + +func TestCRLCacheRepository_Put_Overwrites(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewCRLCacheRepository(db) + ctx := context.Background() + + issuerID := insertIssuerForCRL(t, ctx, "overwrite") + now := time.Now().UTC().Truncate(time.Microsecond) + + first := &domain.CRLCacheEntry{ + IssuerID: issuerID, + CRLDER: []byte("v1"), + CRLNumber: 1, + ThisUpdate: now, + NextUpdate: now.Add(time.Hour), + GeneratedAt: now, + GenerationDuration: 10 * time.Millisecond, + RevokedCount: 1, + } + if err := repo.Put(ctx, first); err != nil { + t.Fatalf("Put first: %v", err) + } + + second := &domain.CRLCacheEntry{ + IssuerID: issuerID, + CRLDER: []byte("v2"), + CRLNumber: 2, + ThisUpdate: now.Add(time.Hour), + NextUpdate: now.Add(2 * time.Hour), + GeneratedAt: now.Add(time.Hour), + GenerationDuration: 20 * time.Millisecond, + RevokedCount: 2, + } + if err := repo.Put(ctx, second); err != nil { + t.Fatalf("Put second: %v", err) + } + + got, _ := repo.Get(ctx, issuerID) + if string(got.CRLDER) != "v2" { + t.Errorf("Put did not overwrite: got CRLDER %q, want v2", got.CRLDER) + } + if got.CRLNumber != 2 { + t.Errorf("CRLNumber = %d, want 2 (post-overwrite)", got.CRLNumber) + } +} + +func TestCRLCacheRepository_Put_RejectsNilOrEmpty(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewCRLCacheRepository(db) + ctx := context.Background() + + if err := repo.Put(ctx, nil); err == nil { + t.Error("Put(nil) should error") + } + if err := repo.Put(ctx, &domain.CRLCacheEntry{}); err == nil { + t.Error("Put(empty issuer_id) should error") + } +} + +func TestCRLCacheRepository_NextCRLNumber_FirstIsOne(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewCRLCacheRepository(db) + ctx := context.Background() + + issuerID := insertIssuerForCRL(t, ctx, "first") + n, err := repo.NextCRLNumber(ctx, issuerID) + if err != nil { + t.Fatalf("NextCRLNumber: %v", err) + } + if n != 1 { + t.Fatalf("first NextCRLNumber = %d, want 1", n) + } +} + +func TestCRLCacheRepository_NextCRLNumber_Monotonic(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewCRLCacheRepository(db) + ctx := context.Background() + + issuerID := insertIssuerForCRL(t, ctx, "mono") + now := time.Now().UTC().Truncate(time.Microsecond) + + // Seed with a known crl_number. + seed := &domain.CRLCacheEntry{ + IssuerID: issuerID, + CRLDER: []byte("seed"), + CRLNumber: 5, + ThisUpdate: now, + NextUpdate: now.Add(time.Hour), + GeneratedAt: now, + } + if err := repo.Put(ctx, seed); err != nil { + t.Fatalf("Put seed: %v", err) + } + + n, err := repo.NextCRLNumber(ctx, issuerID) + if err != nil { + t.Fatalf("NextCRLNumber: %v", err) + } + if n != 6 { + t.Fatalf("NextCRLNumber after seed=5 = %d, want 6", n) + } +} + +func TestCRLCacheRepository_RecordAndListEvents(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewCRLCacheRepository(db) + ctx := context.Background() + + issuerID := insertIssuerForCRL(t, ctx, "events") + base := time.Now().UTC().Truncate(time.Microsecond) + + for i := 0; i < 3; i++ { + evt := &domain.CRLGenerationEvent{ + IssuerID: issuerID, + CRLNumber: int64(i + 1), + Duration: time.Duration(50+i*10) * time.Millisecond, + RevokedCount: i, + StartedAt: base.Add(time.Duration(i) * time.Minute), + Succeeded: true, + } + if err := repo.RecordGenerationEvent(ctx, evt); err != nil { + t.Fatalf("RecordGenerationEvent[%d]: %v", i, err) + } + if evt.ID == 0 { + t.Fatalf("event[%d] ID not populated by DB", i) + } + } + + events, err := repo.ListGenerationEvents(ctx, issuerID, 10) + if err != nil { + t.Fatalf("ListGenerationEvents: %v", err) + } + if len(events) != 3 { + t.Fatalf("expected 3 events, got %d", len(events)) + } + // Order is newest-first, so events[0] should be CRLNumber=3. + if events[0].CRLNumber != 3 { + t.Errorf("first event CRLNumber = %d, want 3 (newest)", events[0].CRLNumber) + } + if events[2].CRLNumber != 1 { + t.Errorf("last event CRLNumber = %d, want 1 (oldest)", events[2].CRLNumber) + } +} + +func TestCRLCacheRepository_RecordEvent_FailureWithError(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewCRLCacheRepository(db) + ctx := context.Background() + + issuerID := insertIssuerForCRL(t, ctx, "failevent") + evt := &domain.CRLGenerationEvent{ + IssuerID: issuerID, + StartedAt: time.Now().UTC().Truncate(time.Microsecond), + Succeeded: false, + Error: "issuer connector returned 500", + } + if err := repo.RecordGenerationEvent(ctx, evt); err != nil { + t.Fatalf("RecordGenerationEvent: %v", err) + } + events, _ := repo.ListGenerationEvents(ctx, issuerID, 1) + if len(events) != 1 { + t.Fatalf("expected 1 event, got %d", len(events)) + } + if events[0].Succeeded { + t.Error("event should be Succeeded=false") + } + if events[0].Error != "issuer connector returned 500" { + t.Errorf("Error = %q, want full message", events[0].Error) + } +} + +func TestCRLCacheRepository_ListEvents_LimitDefaults(t *testing.T) { + tdb := getTestDB(t) + db := tdb.freshSchema(t) + repo := postgres.NewCRLCacheRepository(db) + ctx := context.Background() + + issuerID := insertIssuerForCRL(t, ctx, "limit") + for i := 0; i < 5; i++ { + _ = repo.RecordGenerationEvent(ctx, &domain.CRLGenerationEvent{ + IssuerID: issuerID, + StartedAt: time.Now().UTC().Add(time.Duration(i) * time.Second), + Succeeded: true, + }) + } + events, err := repo.ListGenerationEvents(ctx, issuerID, 0) + if err != nil { + t.Fatalf("ListGenerationEvents(limit=0): %v", err) + } + // limit=0 → default 50 per the impl; we have 5, expect all 5. + if len(events) != 5 { + t.Fatalf("expected 5 events with default limit, got %d", len(events)) + } +} diff --git a/migrations/000019_crl_cache.down.sql b/migrations/000019_crl_cache.down.sql new file mode 100644 index 0000000..53838f4 --- /dev/null +++ b/migrations/000019_crl_cache.down.sql @@ -0,0 +1,10 @@ +-- 000019_crl_cache.down.sql — reverses 000019_crl_cache.up.sql. +-- +-- Drop in reverse FK order. crl_generation_events has no FK so order +-- between the two table drops is mechanical only. + +DROP INDEX IF EXISTS idx_crl_generation_events_issuer_started; +DROP TABLE IF EXISTS crl_generation_events; + +DROP INDEX IF EXISTS idx_crl_cache_next_update; +DROP TABLE IF EXISTS crl_cache; diff --git a/migrations/000019_crl_cache.up.sql b/migrations/000019_crl_cache.up.sql new file mode 100644 index 0000000..57687e6 --- /dev/null +++ b/migrations/000019_crl_cache.up.sql @@ -0,0 +1,57 @@ +-- 000019_crl_cache.up.sql +-- +-- CRL cache + generation event log for the scheduler-driven CRL +-- pre-generation work (CRL/OCSP responder bundle). +-- +-- Before this migration the CRL endpoint at /.well-known/pki/crl/{issuer_id} +-- regenerated the entire CRL on every HTTP request — every relying party +-- fetch hit the certificate_revocations table, built the entry list, +-- signed the CRL, and discarded the result. For a busy CA with many +-- relying parties this DOSes itself. +-- +-- After this migration the scheduler's crlGenerationLoop pre-generates +-- CRLs at a configurable interval (default 1h, env var +-- CERTCTL_CRL_GENERATION_INTERVAL) and the HTTP handler reads from +-- crl_cache. On cache miss / staleness the cache service triggers an +-- immediate generation via singleflight (to coalesce concurrent miss +-- requests for the same issuer into a single generation). +-- +-- Idempotent: every CREATE uses IF NOT EXISTS so re-running the +-- migration is safe (matches the project's migration convention). + +CREATE TABLE IF NOT EXISTS crl_cache ( + issuer_id TEXT PRIMARY KEY REFERENCES issuers(id) ON DELETE CASCADE, + crl_der BYTEA NOT NULL, + crl_number BIGINT NOT NULL, -- monotonic per RFC 5280 §5.2.3 + this_update TIMESTAMPTZ NOT NULL, + next_update TIMESTAMPTZ NOT NULL, + generated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + generation_duration_ms INTEGER NOT NULL, + revoked_count INTEGER NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- Lets the scheduler quickly find issuers whose cache is stale (next_update +-- already in the past). The query "find issuers needing regeneration" runs +-- at every tick of crlGenerationLoop. +CREATE INDEX IF NOT EXISTS idx_crl_cache_next_update ON crl_cache(next_update); + +-- Track every (re)generation event for ops visibility. Failed generations +-- (succeeded=false) leave a breadcrumb operators can grep when +-- troubleshooting "why isn't the CRL fresh." The id is bigserial so the +-- table is naturally ordered by insertion; the (issuer_id, started_at) +-- index serves the GUI's "recent generations for this issuer" query. +CREATE TABLE IF NOT EXISTS crl_generation_events ( + id BIGSERIAL PRIMARY KEY, + issuer_id TEXT NOT NULL, + crl_number BIGINT NOT NULL, + duration_ms INTEGER NOT NULL, + revoked_count INTEGER NOT NULL, + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + succeeded BOOLEAN NOT NULL, + error TEXT +); + +CREATE INDEX IF NOT EXISTS idx_crl_generation_events_issuer_started + ON crl_generation_events(issuer_id, started_at DESC);