mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 21:31:34 +00:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 677524d9ec | |||
| 9dc0742e77 | |||
| 1440a30d28 | |||
| a3d8b9c607 | |||
| aa6fafdee9 | |||
| 86fffa305a | |||
| e17788355b | |||
| 87213128cc | |||
| 697fa792ea | |||
| 9c1d446e40 | |||
| 3192cd15c5 | |||
| af47d19ae2 | |||
| cfc234ec42 | |||
| a91197014f | |||
| d6959a75c1 | |||
| 97b23e98d9 | |||
| 4cf5fcdb4f | |||
| 1ee67b7792 | |||
| 128d0eeaa8 | |||
| 9834b4e4a4 | |||
| cab579368b | |||
| 4e5522a999 |
+25
-4
@@ -13,22 +13,43 @@ POSTGRES_PASSWORD=change-me-in-production
|
||||
# Certctl Server
|
||||
# All server vars use the CERTCTL_ prefix (see internal/config/config.go)
|
||||
# ==============================================================================
|
||||
CERTCTL_DATABASE_URL=postgres://certctl:certctl@postgres:5432/certctl?sslmode=disable
|
||||
# IMPORTANT: keep the password segment of CERTCTL_DATABASE_URL in sync with
|
||||
# POSTGRES_PASSWORD above. If you deploy via `deploy/docker-compose.yml`,
|
||||
# this value is *overridden* by the compose file's
|
||||
# `postgres://certctl:${POSTGRES_PASSWORD:-certctl}@postgres:5432/...`
|
||||
# interpolation — but if you run the binary directly with this .env loaded
|
||||
# (e.g. `set -a; source .env; ./certctl-server`), update *both* lines.
|
||||
# Background: editing POSTGRES_PASSWORD after the postgres data directory
|
||||
# has been initialized once does NOT rotate the password — initdb only
|
||||
# seeds pg_authid on first boot of an empty volume. See docs/quickstart.md
|
||||
# "Warning" callout and `internal/repository/postgres/db.go::wrapPingError`
|
||||
# for the SQLSTATE 28P01 diagnostic that fires when the two drift.
|
||||
CERTCTL_DATABASE_URL=postgres://certctl:change-me-in-production@postgres:5432/certctl?sslmode=disable
|
||||
CERTCTL_SERVER_HOST=0.0.0.0
|
||||
CERTCTL_SERVER_PORT=8443
|
||||
CERTCTL_LOG_LEVEL=info
|
||||
CERTCTL_LOG_FORMAT=json
|
||||
|
||||
# Auth type: "api-key", "jwt", or "none" (for demo/development)
|
||||
# Auth type: "api-key" (production) or "none" (demo/development).
|
||||
# For JWT/OIDC, run an authenticating gateway in front of certctl
|
||||
# (oauth2-proxy / Envoy ext_authz / Traefik ForwardAuth / Pomerium) and
|
||||
# set CERTCTL_AUTH_TYPE=none on the upstream — see
|
||||
# docs/architecture.md "Authenticating-gateway pattern". G-1 removed
|
||||
# the in-process "jwt" option (no JWT middleware shipped — silent auth
|
||||
# downgrade); see docs/upgrade-to-v2-jwt-removal.md if you previously
|
||||
# set CERTCTL_AUTH_TYPE=jwt.
|
||||
CERTCTL_AUTH_TYPE=none
|
||||
# Required when CERTCTL_AUTH_TYPE is "api-key" or "jwt"
|
||||
# Required when CERTCTL_AUTH_TYPE is "api-key".
|
||||
# Generate with: openssl rand -base64 32
|
||||
# CERTCTL_AUTH_SECRET=change-me-in-production
|
||||
|
||||
# ==============================================================================
|
||||
# Certctl Agent
|
||||
# ==============================================================================
|
||||
CERTCTL_SERVER_URL=http://localhost:8443
|
||||
# HTTPS-only as of v2.2 (TLS 1.3 pinned). Agents reject http:// URLs at
|
||||
# startup. Use the docker-compose self-signed bootstrap CA bundle from
|
||||
# `deploy/test/certs/ca.crt` or supply your own via CERTCTL_SERVER_CA_BUNDLE_PATH.
|
||||
CERTCTL_SERVER_URL=https://localhost:8443
|
||||
CERTCTL_API_KEY=change-me-in-production
|
||||
CERTCTL_AGENT_NAME=local-agent
|
||||
|
||||
|
||||
@@ -44,6 +44,306 @@ jobs:
|
||||
- name: Run govulncheck
|
||||
run: govulncheck ./...
|
||||
|
||||
- name: Forbidden auth-type literal regression guard (G-1)
|
||||
# G-1 closed the JWT silent auth downgrade by removing "jwt" from the
|
||||
# accepted CERTCTL_AUTH_TYPE values. This step grep-fails the build
|
||||
# if "jwt" reappears in any of the *additive* auth-type surfaces:
|
||||
# the validAuthTypes / ValidAuthTypes() set, the OpenAPI enum, the
|
||||
# helm chart's allowed-types list, or the .env.example default.
|
||||
# Comment lines and the dedicated rejection branch in config.go
|
||||
# (`c.Auth.Type == "jwt"`) are intentionally exempt — those are the
|
||||
# G-1 fix itself, not a regression.
|
||||
#
|
||||
# Connector packages (internal/connector/) are exempt because the
|
||||
# Google OAuth2 service-account JWT and step-ca provisioner one-
|
||||
# time-token JWT are external-protocol uses, unrelated to certctl's
|
||||
# own auth shape. Test files (_test.go) are exempt so negative
|
||||
# tests can pass the literal.
|
||||
#
|
||||
# See docs/upgrade-to-v2-jwt-removal.md for the closure rationale,
|
||||
# or internal/config/config.go::ValidAuthTypes for the allowed set.
|
||||
run: |
|
||||
set -e
|
||||
|
||||
# Scoped patterns that indicate "jwt" being added back to an
|
||||
# allowed-set surface. Each catches a regression shape we've
|
||||
# actually seen in pre-G-1 code:
|
||||
# - Go map/slice literal: "jwt": true or "jwt",
|
||||
# - Go switch case: case "jwt"
|
||||
# - YAML enum: enum: [..., jwt, ...] or - jwt
|
||||
# - .env conditional: AUTH_TYPE.*"jwt"|=jwt$
|
||||
BAD=$(grep -rnEH \
|
||||
-e '"jwt"\s*:\s*true' \
|
||||
-e '"jwt"\s*,' \
|
||||
-e 'case\s+"jwt"' \
|
||||
-e 'enum:.*\bjwt\b' \
|
||||
-e '^\s*-\s*jwt\s*$' \
|
||||
-e 'AUTH_TYPE\s*=\s*jwt\s*$' \
|
||||
-e 'AUTH_TYPE\s*=\s*jwt\s*#' \
|
||||
-e 'auth\.type\s*=\s*jwt\s*$' \
|
||||
-e 'AuthType\("jwt"\)' \
|
||||
internal/config/ \
|
||||
internal/api/ \
|
||||
cmd/ \
|
||||
api/openapi.yaml \
|
||||
.env.example \
|
||||
deploy/.env.example \
|
||||
deploy/helm/certctl/values.yaml \
|
||||
deploy/helm/certctl/templates/ \
|
||||
2>/dev/null \
|
||||
| grep -v '_test.go' \
|
||||
| grep -vE '^\s*[^:]+:[0-9]+:\s*(//|#)' \
|
||||
| grep -v 'is no longer accepted' \
|
||||
|| true)
|
||||
if [ -n "$BAD" ]; then
|
||||
echo "G-1 regression: \"jwt\" reappeared in an allowed-set surface:"
|
||||
echo "$BAD"
|
||||
echo ""
|
||||
echo "Allowed surface for 'jwt' literals: comment lines, the"
|
||||
echo "dedicated rejection branch in internal/config/config.go,"
|
||||
echo "and connector packages (Google OAuth2, step-ca)."
|
||||
echo "See docs/upgrade-to-v2-jwt-removal.md and"
|
||||
echo "internal/config/config.go::ValidAuthTypes()."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Forbidden api_key_hash JSON-shape regression guard (G-2)
|
||||
# G-2 closed cat-s5-apikey_leak by tagging Agent.APIKeyHash
|
||||
# `json:"-"` and adding a defense-in-depth Agent.MarshalJSON that
|
||||
# zeroes the field on the marshal-time copy. This step grep-fails
|
||||
# the build if `api_key_hash` reappears in any of the *additive*
|
||||
# JSON-emitting surfaces: a Go struct json tag in internal/domain/,
|
||||
# an OpenAPI Agent schema property, a TypeScript field declaration
|
||||
# in web/src/, or an enum-list / discriminator in handler
|
||||
# production code.
|
||||
#
|
||||
# Repository, migration, seed, service, integration-test, and
|
||||
# unit-test files are exempt — those are server-internal use
|
||||
# sites (the DB column stays, the in-memory struct field stays,
|
||||
# the auth-lookup path stays). Comment lines are exempt so the
|
||||
# G-2 closure rationale can stay in the source.
|
||||
#
|
||||
# See coverage-gap-audit-2026-04-24-v5/unified-audit.md
|
||||
# cat-s5-apikey_leak for the closure rationale, or
|
||||
# internal/domain/connector.go::Agent::MarshalJSON for the
|
||||
# redaction enforcement.
|
||||
run: |
|
||||
set -e
|
||||
|
||||
# Scoped patterns that indicate api_key_hash being added back
|
||||
# to a JSON-emitting surface. Each catches a regression shape
|
||||
# that pre-G-2 actually shipped or that a future refactor
|
||||
# could plausibly introduce:
|
||||
# - Go struct tag: `json:"api_key_hash"`
|
||||
# - Frontend interface: api_key_hash[?]: string
|
||||
# - OpenAPI schema property: api_key_hash: (column-aligned)
|
||||
# - YAML enum / array: - api_key_hash
|
||||
BAD=$(grep -rnEH \
|
||||
-e 'json:"api_key_hash[",]' \
|
||||
-e '^\s*api_key_hash\??\s*:' \
|
||||
-e '^\s*-\s*api_key_hash\s*$' \
|
||||
internal/domain/ \
|
||||
internal/api/ \
|
||||
cmd/ \
|
||||
api/openapi.yaml \
|
||||
web/src/ \
|
||||
2>/dev/null \
|
||||
| grep -v '_test.go' \
|
||||
| grep -vE '^\s*[^:]+:[0-9]+:\s*(//|#)' \
|
||||
|| true)
|
||||
if [ -n "$BAD" ]; then
|
||||
echo "G-2 regression: api_key_hash reappeared in a JSON-emitting surface:"
|
||||
echo "$BAD"
|
||||
echo ""
|
||||
echo "Allowed surface for api_key_hash literals: comment lines,"
|
||||
echo "the database column (migrations/), the in-memory struct"
|
||||
echo "field tagged \`json:\"-\"\`, and the repository / service"
|
||||
echo "use sites. See internal/domain/connector.go::Agent and"
|
||||
echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md"
|
||||
echo "cat-s5-apikey_leak for the closure rationale."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Forbidden plaintext HEALTHCHECK regression guard (U-2)
|
||||
# U-2 closed cat-u-healthcheck_protocol_mismatch by switching the
|
||||
# published image's HEALTHCHECK from `curl -f http://localhost:
|
||||
# 8443/health` (always failed against the HTTPS-only listener) to
|
||||
# `curl -fsk https://localhost:8443/health`. This step grep-fails
|
||||
# the build if any Dockerfile in the repo carries the pre-U-2
|
||||
# plaintext shape — either explicitly (`http://localhost:8443/
|
||||
# health` in a HEALTHCHECK) or via the looser pattern of any
|
||||
# HEALTHCHECK that targets `http://` against the certctl server
|
||||
# port.
|
||||
#
|
||||
# Comment lines and the docs/upgrade-to-tls.md:182 expected-to-
|
||||
# fail invariant ("plaintext is gone, expect Connection refused")
|
||||
# are intentionally exempt — we DO want the upgrade-doc string
|
||||
# `http://localhost:8443/health` to remain there, since it
|
||||
# documents what operators should test for to confirm plaintext
|
||||
# is dead. The guardrail is scoped to Dockerfile* only, so docs
|
||||
# are out of its reach.
|
||||
#
|
||||
# See coverage-gap-audit-2026-04-24-v5/unified-audit.md
|
||||
# cat-u-healthcheck_protocol_mismatch for the closure rationale,
|
||||
# or deploy/test/healthcheck_test.go for the binary-image
|
||||
# contract the runtime test pins.
|
||||
run: |
|
||||
set -e
|
||||
|
||||
# Patterns that catch the actual regression shapes:
|
||||
# - HEALTHCHECK directive carrying any http:// (even if the
|
||||
# port differs, no plaintext probe should ship).
|
||||
# - The exact pre-U-2 string for grep-friendliness.
|
||||
BAD=$(grep -rnEH \
|
||||
-e 'HEALTHCHECK.*http://' \
|
||||
-e 'curl[^|&;]*-f[^|&;]*http://localhost:8443/health' \
|
||||
Dockerfile Dockerfile.agent Dockerfile.* 2>/dev/null \
|
||||
| grep -vE '^\s*[^:]+:[0-9]+:\s*#' \
|
||||
|| true)
|
||||
if [ -n "$BAD" ]; then
|
||||
echo "U-2 regression: plaintext HEALTHCHECK reappeared in a Dockerfile:"
|
||||
echo "$BAD"
|
||||
echo ""
|
||||
echo "Allowed: HTTPS HEALTHCHECK with -k (acceptable for"
|
||||
echo "localhost-to-localhost), or non-HTTP probe shapes"
|
||||
echo "(pgrep, /proc check). See Dockerfile / Dockerfile.agent"
|
||||
echo "for the post-U-2 reference shape and"
|
||||
echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md"
|
||||
echo "cat-u-healthcheck_protocol_mismatch for rationale."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Forbidden migration mount in compose initdb (U-3)
|
||||
# U-3 closed cat-u-seed_initdb_schema_drift (GitHub #10) by
|
||||
# eliminating the dual-source-of-truth between
|
||||
# `migrations/*.up.sql` mounted into postgres
|
||||
# `/docker-entrypoint-initdb.d/` and the same files re-applied at
|
||||
# runtime by `RunMigrations`. Pre-U-3 every new migration that
|
||||
# the seed depended on (000013 added `policy_rules.severity`,
|
||||
# 000017 renames `retry_interval_seconds`, etc.) had to be added
|
||||
# by hand to the compose mount list; missing the update crashed
|
||||
# initdb on first boot, postgres flagged unhealthy, and the
|
||||
# whole stack failed to start from a fresh clone. Post-U-3 the
|
||||
# server is the single source of truth — `RunMigrations` +
|
||||
# `RunSeed` apply everything at boot.
|
||||
#
|
||||
# This step grep-fails the build if any compose file under
|
||||
# `deploy/` re-introduces a `migrations/.*\.sql` mount into
|
||||
# `/docker-entrypoint-initdb.d`. Comments are exempt so the
|
||||
# post-fix rationale block in the compose files (which
|
||||
# documents WHY the mounts were removed) doesn't trip the guard.
|
||||
# The demo overlay's `seed_demo.sql` is the explicit exception:
|
||||
# it is tolerated only when it lives behind the
|
||||
# CERTCTL_DEMO_SEED env var (post-U-3 demo path) — bare initdb
|
||||
# mounts are NOT tolerated. The grep matches all compose
|
||||
# mount-list shapes (`-` indented, `volumes:` indented, both),
|
||||
# so any future drift surfaces here before the operator hits it
|
||||
# on a fresh clone.
|
||||
#
|
||||
# See coverage-gap-audit-2026-04-24-v5/unified-audit.md
|
||||
# cat-u-seed_initdb_schema_drift for the closure rationale, or
|
||||
# internal/repository/postgres/db.go::RunSeed for the runtime
|
||||
# contract.
|
||||
run: |
|
||||
set -e
|
||||
|
||||
BAD=$(grep -rnEH \
|
||||
-e 'migrations/.*\.sql:.*docker-entrypoint-initdb' \
|
||||
-e 'seed.*\.sql:.*docker-entrypoint-initdb' \
|
||||
deploy/docker-compose.yml \
|
||||
deploy/docker-compose.test.yml \
|
||||
deploy/docker-compose.demo.yml \
|
||||
2>/dev/null \
|
||||
| grep -vE '^\s*[^:]+:[0-9]+:\s*#' \
|
||||
|| true)
|
||||
if [ -n "$BAD" ]; then
|
||||
echo "U-3 regression: migration/seed mount into postgres initdb reappeared:"
|
||||
echo "$BAD"
|
||||
echo ""
|
||||
echo "The post-U-3 contract is: postgres comes up with an empty"
|
||||
echo "schema and the server applies migrations + seed at boot via"
|
||||
echo "internal/repository/postgres.RunMigrations + RunSeed. Demo"
|
||||
echo "data lives behind CERTCTL_DEMO_SEED=true (RunDemoSeed),"
|
||||
echo "not an initdb mount. See"
|
||||
echo "coverage-gap-audit-2026-04-24-v5/unified-audit.md"
|
||||
echo "cat-u-seed_initdb_schema_drift for the closure rationale."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Forbidden StatusBadge dead-key + Certificate phantom-field regression guard (D-1)
|
||||
# D-1 master closed cat-d-359e92c20cbf (Agent: 'Stale' dead key,
|
||||
# 'Degraded' missing), cat-d-9f4c8e4a91f1 (Notification: 'dead'
|
||||
# missing), cat-d-1447e04732e7 (Cert: 'PendingIssuance' dead
|
||||
# key), cat-f-cert_detail_page_key_render_fallback (render-site
|
||||
# uses cert.X directly), and cat-f-ae0d06b6588f (Certificate
|
||||
# TS phantom fields). This step grep-fails the build if either
|
||||
# half of the closure is reverted:
|
||||
#
|
||||
# 1. The dead StatusBadge keys ('Stale' for Agent, 'PendingIssuance'
|
||||
# for Cert) reappearing as map literals, OR
|
||||
# 2. The five phantom Certificate TS fields (serial_number,
|
||||
# fingerprint_sha256, key_algorithm, key_size, issued_at)
|
||||
# reappearing on the `Certificate` interface in types.ts
|
||||
# (CertificateVersion legitimately carries them and is
|
||||
# explicitly excluded by the awk pre-filter below).
|
||||
#
|
||||
# Comments are exempt so the closure prose in StatusBadge.tsx +
|
||||
# types.ts can stay. Test files are exempt so negative tests
|
||||
# asserting the dead keys fall through to neutral keep working.
|
||||
#
|
||||
# See coverage-gap-audit-2026-04-24-v5/unified-audit.md
|
||||
# cat-d-* / cat-f-* for the closure rationale, or
|
||||
# web/src/components/StatusBadge.test.tsx for the live
|
||||
# enum-coverage contract.
|
||||
run: |
|
||||
set -e
|
||||
|
||||
BAD_BADGE=$(grep -nE "^\s*(Stale|PendingIssuance)\s*:\s*'badge-" \
|
||||
web/src/components/StatusBadge.tsx 2>/dev/null \
|
||||
| grep -v '\.test\.' \
|
||||
| grep -vE '^\s*[^:]+:[0-9]+:\s*//' \
|
||||
|| true)
|
||||
if [ -n "$BAD_BADGE" ]; then
|
||||
echo "D-1 regression: dead StatusBadge key reappeared:"
|
||||
echo "$BAD_BADGE"
|
||||
echo ""
|
||||
echo "Allowed surface: comment lines naming the removed key in"
|
||||
echo "the file's preamble. The Go-side AgentStatus values are"
|
||||
echo "Online/Offline/Degraded (no Stale); CertificateStatus values"
|
||||
echo "are Pending/Active/... (no PendingIssuance). See"
|
||||
echo "web/src/components/StatusBadge.test.tsx for the contract."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Certificate TS phantom-field check. Scoped to the
|
||||
# `export interface Certificate {` block in web/src/api/types.ts
|
||||
# — CertificateVersion legitimately declares these fields and
|
||||
# must NOT trip the guardrail. The awk window opens on the
|
||||
# exact `Certificate {` header (not `CertificateVersion {`,
|
||||
# not `CertificateProfile {`) and closes at the first `}`,
|
||||
# then the grep matches a phantom-field declaration anywhere
|
||||
# in that window.
|
||||
BAD_TS=$(awk '
|
||||
/^export interface Certificate \{/ { flag=1; next }
|
||||
flag && /^\}/ { flag=0 }
|
||||
flag { print FILENAME":"NR":"$0 }
|
||||
' web/src/api/types.ts \
|
||||
| grep -E '\b(serial_number|fingerprint_sha256|key_algorithm|key_size|issued_at)\??\s*:' \
|
||||
|| true)
|
||||
if [ -n "$BAD_TS" ]; then
|
||||
echo "D-1 regression: Certificate TS interface re-added a phantom field:"
|
||||
echo "$BAD_TS"
|
||||
echo ""
|
||||
echo "These fields live on CertificateVersion, not ManagedCertificate."
|
||||
echo "The Go-side ManagedCertificate has never carried them; the"
|
||||
echo "TS optional declarations were silently undefined on every"
|
||||
echo "list response. Render-site consumers (e.g. CertificateDetailPage)"
|
||||
echo "use latestVersion?.field as the canonical access path."
|
||||
echo "See coverage-gap-audit-2026-04-24-v5/unified-audit.md"
|
||||
echo "cat-f-ae0d06b6588f for the closure rationale."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Race Detection
|
||||
run: go test -race ./internal/service/... ./internal/api/handler/... ./internal/api/middleware/... ./internal/scheduler/... ./internal/connector/... ./internal/crypto/... ./internal/domain/... ./internal/validation/... ./internal/tlsprobe/... -count=1 -timeout 300s
|
||||
|
||||
|
||||
+127
@@ -2,6 +2,133 @@
|
||||
|
||||
All notable changes to certctl are documented in this file. Dates use ISO 8601. Versions follow [Semantic Versioning](https://semver.org/).
|
||||
|
||||
## [unreleased] — 2026-04-25
|
||||
|
||||
### D-1: StatusBadge enum drift + Certificate phantom fields — closed end-to-end
|
||||
|
||||
> The dashboard silently lied in five places. Agents in the `Degraded` state (the only Go-side AgentStatus that means "needs operator attention") rendered as default neutral grey because StatusBadge mapped `Stale` (a key Go has never emitted) to yellow and let the real `Degraded` value fall through to the dictionary default. Dead-letter notifications (`status: 'dead'`, retries exhausted) rendered as default neutral, visually equated with `read` (operator-acknowledged). The Certificate badge map carried a `PendingIssuance` key that no Go enum value ever emits — dead key, latent confusion vector. CertificateDetailPage's Key Algorithm and Key Size rows always rendered `—` even when the data was a single fetch away, because the lookup went through `cert.key_algorithm` directly — and the underlying `Certificate` TypeScript interface declared five optional fields (`serial_number`, `fingerprint_sha256`, `key_algorithm`, `key_size`, `issued_at`) that Go's `ManagedCertificate` has never carried (those values live on `CertificateVersion`). Five findings, two files, one frontend rebuild. Pre-D-1 the only reason this didn't trip a regression suite was that the regression suite never asserted "every Go-emitted enum value gets a non-default StatusBadge class" — D-1 fixes the visual lies and adds a 38-case Vitest property test that walks every Go enum and pins the contract.
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- **`Certificate` TypeScript interface no longer declares `serial_number?`, `fingerprint_sha256?`, `key_algorithm?`, `key_size?`, or `issued_at?`.** The Go `ManagedCertificate` (`internal/domain/certificate.go`) has never emitted these fields on list responses; they live on `CertificateVersion` and are reachable via `getCertificateVersions(id)`. Pre-D-5 (the cat-f phantom-fields finding) the optional declarations made `cert.X` always-undefined on lists, and downstream consumers silently rendered `—` for every cert. Post-D-5 a `cert.X` access for any of the five fields is a TypeScript compile error, forcing every consumer to acknowledge the version-fallback pattern. The OpenAPI `ManagedCertificate` schema was already correct — only the TS type was drifted.
|
||||
- **StatusBadge no longer maps `Stale` (Agent) or `PendingIssuance` (Certificate).** Both were dead keys — no Go enum value emits them. Operators with custom CSS hooked off `.badge-warning` for `Stale` will see the same color come back via the new `Degraded` mapping (same class), but JS/TS code that switches on the literal `'Stale'` will need to switch on `'Degraded'` instead. The `PendingIssuance` deletion has no documented downstream consumer.
|
||||
|
||||
### Added
|
||||
|
||||
- **`web/src/components/StatusBadge.tsx`: `Degraded` (Agent) → `badge-warning` and `dead` (Notification) → `badge-danger`.** First mappings restore the color contract for the two real Go-side values that previously fell through to the dictionary default. The `Degraded` mapping cross-references `internal/domain/connector.go::AgentStatusDegraded`; the `dead` mapping cross-references `internal/domain/notification.go::NotificationStatusDead`.
|
||||
- **`web/src/components/StatusBadge.test.tsx`: 38-case Vitest property test.** Iterates every Go-side enum value (`AgentStatus`, `CertificateStatus`, `JobStatus`, `NotificationStatus`, `DiscoveryStatus`, `HealthStatus`) plus the two frontend-synthesized `Enabled`/`Disabled` labels, asserts every value gets a non-default class (or, for the five intentionally-neutral terminal values like `Archived`/`Cancelled`/`read`, an explicit `badge badge-neutral`). Includes negative assertions on the deleted `Stale` and `PendingIssuance` keys (must fall through to neutral) and specific UX-correctness assertions on the operator-attention semantics (`dead` → danger, `Degraded` → warning).
|
||||
- **`web/src/api/types.test.ts`: D-5 Certificate phantom-fields trim regression.** A `Certificate` literal construction pinned post-trim, plus a sibling `CertificateVersion` literal pinning that the trimmed fields still live on the version envelope. The `tsc --noEmit` gate in CI is the primary enforcement; the test is the documentation of intent.
|
||||
- **CI regression guardrail in `.github/workflows/ci.yml` (`Forbidden StatusBadge dead-key + Certificate phantom-field regression guard (D-1)`).** Two grep blocks: (1) catches `Stale: 'badge-...'` or `PendingIssuance: 'badge-...'` in `web/src/components/StatusBadge.tsx`; (2) uses an awk-scoped window over the `export interface Certificate {` block in `web/src/api/types.ts` to catch any of the five phantom fields reappearing — explicitly excludes the `CertificateVersion` block which legitimately carries them. Verified locally on the post-fix tree (passes) and against synthetic regressions (each fires the guardrail).
|
||||
|
||||
### Changed
|
||||
|
||||
- **`web/src/pages/CertificateDetailPage.tsx`: Key Algorithm and Key Size rows now read from `latestVersion?.key_algorithm` / `latestVersion?.key_size`.** Mirrors the existing `latestVersion` fallback used for `serial_number` and `fingerprint_sha256` earlier in the same file. Pre-D-4 these rows accessed `cert.key_algorithm` and `cert.key_size` directly — both phantom fields per D-5 — so the rows always rendered `—`. The same file's `serial_number` / `fingerprint_sha256` / `issued_at` derivations were also simplified to drop the now-impossible `cert.X || latestVersion?.X` cert-side leg.
|
||||
- **`web/src/components/StatusBadge.tsx` adds a leading docblock** naming the Go-side source-of-truth file for every status family it maps (`AgentStatus`, `CertificateStatus`, `JobStatus`, `NotificationStatus`, `DiscoveryStatus`, `HealthStatus`) and pointing at the property test as the regression vector for future enum changes.
|
||||
- **`api/openapi.yaml::ManagedCertificate`** gets a leading comment cross-referencing the D-5 closure and explaining why per-issuance fields legitimately don't appear here (they live on `CertificateVersion`). Schema property list unchanged — the OpenAPI spec was already correct.
|
||||
|
||||
### Closed audit findings
|
||||
|
||||
- `cat-d-359e92c20cbf` (P1 primary) — Agent: `Stale` dead key + `Degraded` neutral fallthrough
|
||||
- `cat-d-9f4c8e4a91f1` (P2) — Notification: `dead` missing
|
||||
- `cat-d-1447e04732e7` (P3) — Certificate: `PendingIssuance` dead key
|
||||
- `cat-f-cert_detail_page_key_render_fallback` (P2) — render-site uses `cert.key_algorithm` directly
|
||||
- `cat-f-ae0d06b6588f` (P2) — Certificate TS phantom fields (root cause)
|
||||
|
||||
### Known follow-ups (deferred from D-1 scope)
|
||||
|
||||
The audit's broader type-drift cluster (`diff-05x06-7cdf4e78ae24` Agent TS, `diff-05x06-2044a46f4dd0` DeploymentTarget TS, `diff-05x06-caba9eb3620e` Notification TS, `diff-05x06-85ab6b98a2f7` DiscoveredCertificate TS, `diff-05x06-97fab8783a5c` Issuer TS) is out of D-1 scope. Recon for those is per-type field-by-field diff Go ↔ TS — codegen-shaped, not edit-shaped — and warrants its own D-2 master prompt.
|
||||
|
||||
### U-3: GitHub #10 reopened — fresh-clone first-up postgres init failure (P1) — closed end-to-end
|
||||
|
||||
> Operator `mikeakasully` cloned v2.0.50 fresh, ran the canonical quickstart `docker compose -f deploy/docker-compose.yml up -d --build`, and postgres reported `unhealthy` indefinitely; dependent containers (certctl-server, certctl-agent) never started. Root cause: the deploy compose stack mounted both a hand-curated subset of `migrations/*.up.sql` and `seed.sql` into postgres `/docker-entrypoint-initdb.d/`. Postgres applied them at initdb time. Once `seed.sql` referenced columns added by migrations *after* the mounted cutoff (e.g., `policy_rules.severity` from migration 000013, which the mount list never included), initdb crashed mid-seed and the container loop wedged. Two sources of truth — the mount list and the in-tree migration ladder — diverged the moment a seed-touching migration shipped, and the only thing that fixed it was hand-editing the compose file every release. The U-3 closure removes the dual source: postgres now boots empty and the server applies the entire migration ladder + seed at startup via `RunMigrations` + `RunSeed`. Same pattern Helm has used since day one. Bundled with four ride-along audit findings whose fixes are in adjacent code (column rename, missing column, dropped orphan columns, new build-identity endpoint) so operators take the schema-change pain only once.
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- **`deploy/docker-compose.yml` postgres no longer initdb-mounts the migration files or `seed.sql`.** Operators running on a populated `postgres_data` volume from a pre-U-3 release see no behavioral change (the schema is already in place; `RunMigrations` is `IF NOT EXISTS` and `RunSeed` is `ON CONFLICT DO NOTHING`). Operators running on a *fresh* clone now rely on the server to apply both — which is the bug fix. There is no rollback path other than re-introducing the dual-source-of-truth hazard. See `internal/repository/postgres/db.go::RunSeed` for the runtime contract.
|
||||
- **`migrations/000017_db_coupling_cleanup.up.sql` renames `renewal_policies.retry_interval_minutes` → `retry_interval_seconds`.** The column always held seconds; the column name lied (`cat-o-retry_interval_unit_mismatch`). Operators running raw SQL against the old name need to update their queries. The Go layer (`internal/repository/postgres/renewal_policy.go`) is updated in lockstep so the in-tree code path is unaffected.
|
||||
- **`migrations/000017_db_coupling_cleanup.up.sql` drops `network_scan_targets.health_check_enabled` and `network_scan_targets.health_check_interval_seconds`.** These columns were declared by a long-ago migration but never wired into Go code (`cat-o-health_check_column_orphans`) — schema noise that confused operators reading raw SQL. Anyone with custom dashboards selecting those columns will break.
|
||||
- **The compose demo overlay (`deploy/docker-compose.demo.yml`) no longer initdb-mounts `seed_demo.sql`.** It now sets `CERTCTL_DEMO_SEED=true` and the server applies the demo seed at boot via `RunDemoSeed` after baseline migrations + seed.sql are in place. Same single-source-of-truth pattern as the production path.
|
||||
|
||||
### Added
|
||||
|
||||
- **Migration `000017_db_coupling_cleanup`** (up + down). Bundles three schema changes in idempotent SQL: (1) rename `renewal_policies.retry_interval_minutes` → `retry_interval_seconds` (DO $$ guard so re-application is safe), (2) add `notification_events.created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()`, (3) drop the orphan `network_scan_targets.health_check_*` columns. Reduces operator-visible "schema-change releases" from four to one.
|
||||
- **`internal/repository/postgres.RunSeed`** — runtime equivalent of the deleted initdb mount for `seed.sql`. Called from `cmd/server/main.go` immediately after `RunMigrations`. Idempotent (every INSERT in the shipped seed uses `ON CONFLICT (id) DO NOTHING`); missing-file is a no-op so operators with custom packaging that strips the seed don't break.
|
||||
- **`internal/repository/postgres.RunDemoSeed`** + **`config.DatabaseConfig.DemoSeed`** + **`CERTCTL_DEMO_SEED` env var.** Replaces the deleted `seed_demo.sql` initdb mount. The compose demo overlay sets `CERTCTL_DEMO_SEED=true` and the server applies the demo seed after baseline. Same idempotency contract as the baseline path. Default-off so a vanilla deploy never lands fake-history rows.
|
||||
- **`GET /api/v1/version` endpoint** + **`internal/api/handler.VersionHandler`**. Returns `{version, commit, modified, build_time, go_version}` from `runtime/debug.ReadBuildInfo()` with ldflags-supplied `Version` taking priority. Wired through the no-auth dispatch in `cmd/server/main.go` so probes and rollout systems can read build identity without Bearer credentials. Audit middleware excludes the path so rollout polls don't dominate the audit trail. Closes `cat-u-no_version_endpoint`.
|
||||
- **`notification_events.created_at` column** is now populated by `NotificationRepository.Create` (with a `time.Now()` fallback when the caller leaves it zero) and read back by `scanNotification`. Pre-U-3 the JSON API serialised `0001-01-01T00:00:00Z` — closes `cat-o-notification_created_at_dead_field`.
|
||||
- **Five regression tests** for the U-3 contract: `TestRunSeed_AppliesIdempotently`, `TestRunSeed_MissingFileIsNoOp`, `TestRunDemoSeed_AppliesIdempotently`, `TestMigration000017_RetryIntervalRename`, `TestMigration000017_NotificationCreatedAt`, `TestMigration000017_HealthCheckOrphansDropped`, plus `TestNotificationRepository_CreatedAt_IsPersisted` / `TestNotificationRepository_CreatedAt_DefaultsToNow` for the round-trip. All testcontainers-gated (skipped under `-short`). Three handler-layer unit tests pin `/api/v1/version` (`TestVersion_ReturnsBuildInfo`, `TestVersion_RejectsNonGet`, `TestVersion_LdflagsOverride`).
|
||||
- **CI regression guardrail** in `.github/workflows/ci.yml` (`Forbidden migration mount in compose initdb (U-3)`) — grep-fails the build if any `migrations/.*\.sql` or `seed.*\.sql` file is re-mounted into `/docker-entrypoint-initdb.d` in any compose file. Catches future drift before a fresh-clone operator hits it.
|
||||
|
||||
### Changed
|
||||
|
||||
- **`deploy/docker-compose.yml`** + **`deploy/docker-compose.test.yml`** — postgres `volumes:` no longer mount migrations or seed files; postgres healthcheck gains `start_period: 30s`; certctl-server healthcheck gains `start_period: 30s` to absorb the runtime migration + seed application window on first boot.
|
||||
- **`deploy/docker-compose.demo.yml`** — replaces the `seed_demo.sql` initdb mount with the `CERTCTL_DEMO_SEED=true` env var on `certctl-server`.
|
||||
- **`migrations/seed.sql`** — `INSERT INTO renewal_policies` updated to use the new `retry_interval_seconds` column name (lockstep with migration 000017).
|
||||
- **`internal/repository/postgres/renewal_policy.go`** — column references updated to `retry_interval_seconds` across SELECT, INSERT, and UPDATE sites (lockstep with migration 000017).
|
||||
|
||||
### Closed audit findings
|
||||
|
||||
- `cat-u-seed_initdb_schema_drift` (P1, primary U-3 finding)
|
||||
- `cat-o-retry_interval_unit_mismatch` (P1)
|
||||
- `cat-o-notification_created_at_dead_field` (P2)
|
||||
- `cat-o-health_check_column_orphans` (P1)
|
||||
- `cat-u-no_version_endpoint` (P2)
|
||||
|
||||
### G-1: JWT silent auth downgrade — closed end-to-end
|
||||
|
||||
> Pre-G-1 the config validator accepted `CERTCTL_AUTH_TYPE=jwt` and the startup log faithfully echoed `"authentication enabled" "type"="jwt"`. Reasonable people read that and concluded JWT was on. It wasn't. The auth-middleware wiring at `cmd/server/main.go` unconditionally routed every request through the api-key bearer middleware regardless of `cfg.Auth.Type`. So `CERTCTL_AUTH_TYPE=jwt` quietly compared incoming `Authorization: Bearer <something>` against whatever string the operator put in `CERTCTL_AUTH_SECRET` — real JWT clients got 401, and operators who treated `CERTCTL_AUTH_SECRET` as a *signing* secret (because they thought they were configuring JWT) had effectively handed an attacker an api-key. A security finding masquerading as a config option. We chose to remove the option rather than ship JWT middleware — the audit-recommended structural fix that closes the hazard. Operators who actually need JWT/OIDC front certctl with an authenticating gateway (oauth2-proxy / Envoy `ext_authz` / Traefik `ForwardAuth` / Pomerium / Authelia) and run the upstream certctl with `CERTCTL_AUTH_TYPE=none`. The same pattern works on docker-compose and Helm.
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- **`CERTCTL_AUTH_TYPE=jwt` is no longer accepted.** Pre-G-1 the value was silently downgraded to api-key middleware. Post-G-1 the server fails at startup with a dedicated diagnostic naming the authenticating-gateway pattern. Operators with this in their env block must either switch to `api-key` (if they were de facto using api-key auth all along — same Bearer token continues to work) or switch to `none` and front certctl with an oauth2-proxy / Envoy / Traefik / Pomerium gateway. See [`docs/upgrade-to-v2-jwt-removal.md`](docs/upgrade-to-v2-jwt-removal.md).
|
||||
- **Helm chart `server.auth.type=jwt` now fails at `helm install` / `helm upgrade` template time.** New `certctl.validateAuthType` template helper runs on every template that depends on `.Values.server.auth.type` (`server-deployment.yaml`, `server-configmap.yaml`, `server-secret.yaml`) and fails the render with a pointer at the gateway-fronting pattern.
|
||||
- **OpenAPI spec `auth_type` enum no longer includes `jwt`.** API consumers checking `/api/v1/auth/info` against the spec will see a smaller enum.
|
||||
|
||||
### Removed
|
||||
|
||||
- Documented references to JWT in the certctl auth surface (config docblocks, middleware/health-handler comments, `.env.example`, `docs/architecture.md` middleware-stack bullet). Connector-level JWT references (Google OAuth2 service-account JWT in `internal/connector/discovery/gcpsm/`, `internal/connector/issuer/googlecas/`; step-ca's provisioner one-time-token JWT in `internal/connector/issuer/stepca/`) are unrelated and untouched — those are external-protocol uses, not certctl's own auth shape.
|
||||
|
||||
### Added
|
||||
|
||||
- **`config.AuthType` typed alias** with `AuthTypeAPIKey` / `AuthTypeNone` exported constants. Single source of truth for the allowed set across the validator, the runtime defense-in-depth switch in `main.go`, and the helm chart's `validateAuthType` helper.
|
||||
- **`config.ValidAuthTypes()`** helper returning the complete allowed set; pinned by a property test (`TestValidAuthTypesDoesNotContainJWT`) that fails the build if `"jwt"` is ever re-added to the slice.
|
||||
- **Defense-in-depth runtime guard** in `cmd/server/main.go` immediately after `config.Load()` — a `switch config.AuthType(cfg.Auth.Type)` that exits 1 if the validator was bypassed (test harness, alt config loader, env-var rebinding).
|
||||
- **`certctl.validateAuthType` Helm template helper** mirroring the existing `certctl.tls.required` pattern. Fails template render on any `server.auth.type` outside `{api-key, none}`.
|
||||
- **`docs/architecture.md` "Authenticating-gateway pattern (JWT, OIDC, mTLS)"** section explaining the design rationale for the narrow in-process auth surface and listing oauth2-proxy / Envoy `ext_authz` / Traefik `ForwardAuth` / Pomerium / Authelia / Caddy `forward_auth` / Apache `mod_auth_openidc` / nginx `auth_request` as the standard fronting options.
|
||||
- **`docs/upgrade-to-v2-jwt-removal.md`** migration guide. Same shape as `docs/upgrade-to-tls.md`. Walks through the dedicated startup error, both recovery paths (`api-key` vs gateway-fronting), a complete docker-compose oauth2-proxy walkthrough, Traefik ForwardAuth and Envoy `ext_authz` patterns, and rollback posture.
|
||||
- **`deploy/helm/certctl/README.md`** "JWT / OIDC via authenticating gateway" section with a Kubernetes-flavored oauth2-proxy + certctl walkthrough.
|
||||
- **CI regression guardrail** in `.github/workflows/ci.yml` (`Forbidden auth-type literal regression guard (G-1)`) — grep-fails the build if `"jwt"` appears as an auth-type literal in production code or spec. Connector packages exempt (legitimate external-protocol uses).
|
||||
- **Negative test coverage** in `internal/config/config_test.go`: `TestValidate_JWTAuth_RejectedDedicated` (two table rows pinning that the dedicated G-1 error fires regardless of whether `Secret` is set), `TestValidAuthTypesDoesNotContainJWT` (property-level guard), `TestValidAuthTypesIsExactly_APIKey_None` (allowed-set contract), `TestValidate_GenericInvalidAuthType` (pins that other invalid values still surface the generic invalid-auth-type error, so the dedicated G-1 path doesn't accidentally swallow non-jwt typos).
|
||||
|
||||
### Changed
|
||||
|
||||
- `internal/api/middleware/middleware.go::AuthConfig.Type` field comment now references the typed `config.AuthType` constants instead of an inline string enumeration.
|
||||
- `internal/api/handler/health.go::HealthHandler.AuthType` field comment same treatment.
|
||||
- `internal/api/handler/health_test.go` — the prior `TestAuthInfo_ReturnsAuthType_JWT` (which asserted the handler echoed `"jwt"`, baking the silent-downgrade lie into the regression suite) is removed; the pre-existing `TestAuthInfo_ReturnsAuthType_APIKey` continues to cover the api-key happy path.
|
||||
- Auth-disabled startup log in `main.go` now points operators at the authenticating-gateway pattern explicitly.
|
||||
|
||||
### U-2: Dockerfile HEALTHCHECK protocol mismatch — closed end-to-end
|
||||
|
||||
> Pre-U-2 the published `ghcr.io/shankar0123/certctl-server` image shipped with `HEALTHCHECK CMD curl -f http://localhost:8443/health`. The server has been HTTPS-only since the v2.2 HTTPS-Everywhere milestone (`cmd/server/main.go::ListenAndServeTLS`, no plaintext fallback, TLS 1.3 pinned), so the probe failed every interval and Docker marked the container `unhealthy` indefinitely. Operators inside docker-compose / Helm / the example stacks were unaffected — compose overrides the HEALTHCHECK with `--cacert + https://`, Helm uses explicit `httpGet` probes that ignore Docker's HEALTHCHECK, and every example compose file overrides with `curl -sfk https://localhost:8443/health`. But anyone running bare `docker run` / Docker Swarm / Nomad / ECS — exactly the "I just pulled the published image" path — saw permanent `unhealthy` status and (depending on orchestrator policy) a restart-loop. Recon for U-2 also surfaced two adjacent bugs from the same v2.2 milestone gap: the Helm chart's `readinessProbe.httpGet.path` pointed at `/readyz`, a route the server doesn't register (only `/health` and `/ready` are wired and bypass the auth middleware), so K8s readiness probes were getting 404/auth-rejection and pods stayed `NotReady`; and the agent image had no HEALTHCHECK at all (the compose override called `pgrep -f certctl-agent` against an image that didn't ship `procps` — latent always-fail). All three are closed in this commit.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **`Dockerfile` HEALTHCHECK now speaks HTTPS.** Bare `docker run` / Swarm / Nomad / ECS users no longer see `unhealthy` forever. The probe uses `curl -fsk https://localhost:8443/health` — `-k` (insecure) is acceptable because the probe is localhost-to-localhost: the same process serving the cert is being probed; the probe never traverses a network. Compose / Helm / examples already perform full cert-chain validation and are unaffected.
|
||||
- **Helm `server.readinessProbe.httpGet.path` corrected from `/readyz` to `/ready`.** The `/readyz` path was never registered as a no-auth route (see `internal/api/router/router.go:81` and `cmd/server/main.go:920`), so K8s readiness probes received 401 (api-key auth rejection) or 404 (when auth was disabled). Pods previously failed to report Ready under most realistic Helm deployments. Liveness probe path (`/health`) was already correct and is unchanged.
|
||||
- **`docs/connectors.md` curl examples** (15 sites) updated from `http://localhost:8443/...` to `https://localhost:8443/...` with a one-time `--cacert "$CA"` extraction note matching the existing pattern in `docs/quickstart.md`. Pre-U-2 these examples silently failed against the HTTPS listener.
|
||||
|
||||
### Added
|
||||
|
||||
- **`Dockerfile.agent` HEALTHCHECK** — `pgrep -f certctl-agent` process-presence check (the agent has no HTTP listener; presence is the right primitive). Bare-`docker run` agents now report health-status the same way compose-managed ones do. Also adds `procps` to the runtime image so `pgrep` is actually available — pre-U-2 the docker-compose override at `deploy/docker-compose.yml:173` called `pgrep -f certctl-agent` against an image that lacked it (latent always-fail; container was reported unhealthy in compose too, just rarely noticed because nothing acted on the signal).
|
||||
- **`deploy/test/healthcheck_test.go`** (`//go:build integration`) — image-level integration tests. `TestPublishedServerImage_HealthcheckSpecUsesHTTPS` builds the server image, inspects `Config.Healthcheck.Test` via `docker inspect`, and asserts the array contains `https://localhost:8443/health` and `-k`, and does NOT contain `http://localhost:8443/health` (negative regression contract). `TestPublishedAgentImage_HealthcheckSpecExists` builds the agent image and asserts the HEALTHCHECK uses `pgrep` against `certctl-agent`. Both tests `t.Skip` cleanly when docker isn't available (sandbox / CI without docker-in-docker). A third runtime test (`TestPublishedServerImage_HealthcheckTransitionsToHealthy`) is a `t.Skip` placeholder until the harness wires a sidecar postgres for image-level smoke — documented honestly so the next refactor adopts it instead of rediscovering the gap.
|
||||
- **CI regression guardrail** in `.github/workflows/ci.yml` (`Forbidden plaintext HEALTHCHECK regression guard (U-2)`) — grep-fails the build if any `Dockerfile*` carries `HEALTHCHECK.*http://` or `curl -f http://localhost:8443/health`. Comments exempt; the `docs/upgrade-to-tls.md:182` post-cutover invariant string (which deliberately documents the expected-failure shape) is out of the guardrail's scope because the guardrail only scans Dockerfiles.
|
||||
|
||||
### Changed
|
||||
|
||||
- `Dockerfile` final-stage HEALTHCHECK lines now carry a long-form docblock explaining the `-k` design choice, the published-image vs compose vs Helm vs examples coverage matrix, and cross-references to the audit closure + the integration test.
|
||||
- `Dockerfile.agent` runtime stage adds `procps` to the apk install so the new HEALTHCHECK and the existing compose override both have a working `pgrep`.
|
||||
- `deploy/helm/certctl/values.yaml` server probes block now carries an explanatory comment naming the registered probe routes (`/health`, `/ready`) and the U-2 closure rationale for the `/readyz` → `/ready` correction.
|
||||
|
||||
## [2.2.0] — 2026-04-19
|
||||
|
||||
### HTTPS Everywhere — The Irony
|
||||
|
||||
+28
-1
@@ -76,7 +76,34 @@ USER certctl
|
||||
|
||||
EXPOSE 8443
|
||||
|
||||
# Image-level HEALTHCHECK for bare `docker run` / Docker Swarm / Nomad / ECS.
|
||||
#
|
||||
# U-2 (P1, cat-u-healthcheck_protocol_mismatch): pre-U-2 this probe used
|
||||
# `curl -f http://localhost:8443/health`, which always failed against the
|
||||
# HTTPS-only listener (HTTPS-Everywhere milestone, v2.2 / tag v2.0.47 —
|
||||
# `cmd/server/main.go::ListenAndServeTLS`, no plaintext fallback, TLS 1.3
|
||||
# pinned). Operators outside docker-compose / Helm saw permanent
|
||||
# `unhealthy` status and a restart-loop the first time they pulled the
|
||||
# image. The compose stack overrides this HEALTHCHECK with `--cacert` to
|
||||
# the bootstrap CA bundle (deploy/docker-compose.yml:126); the Helm chart
|
||||
# uses explicit `httpGet` probes with `scheme: HTTPS` and ignores Docker's
|
||||
# HEALTHCHECK; every example compose file in `examples/*/docker-compose.yml`
|
||||
# overrides with `curl -sfk https://localhost:8443/health`. This image-
|
||||
# level probe is for the bare-`docker run` consumer ONLY.
|
||||
#
|
||||
# `-k` (insecure) is acceptable here because the probe is localhost-to-
|
||||
# localhost: the same process serving the cert is being probed; the probe
|
||||
# never traverses a network. Pinning a `--cacert` is not viable for the
|
||||
# published image because the bootstrap cert is per-deploy (generated into
|
||||
# the `certs` named volume on first up; operator-supplied via Helm's
|
||||
# `existingSecret` or cert-manager). Compose / Helm / examples already
|
||||
# perform full cert-chain validation and are unaffected.
|
||||
#
|
||||
# CI grep guardrail at .github/workflows/ci.yml ("Forbidden plaintext
|
||||
# HEALTHCHECK regression guard (U-2)") blocks reintroduction of the
|
||||
# `http://` shape. Image-level integration test in
|
||||
# deploy/test/healthcheck_test.go pins the contract end-to-end.
|
||||
HEALTHCHECK --interval=10s --timeout=5s --start-period=5s --retries=5 \
|
||||
CMD curl -f http://localhost:8443/health || exit 1
|
||||
CMD curl -fsk https://localhost:8443/health || exit 1
|
||||
|
||||
ENTRYPOINT ["/app/server"]
|
||||
|
||||
+23
-1
@@ -36,7 +36,14 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} go build \
|
||||
# Stage 2: Runtime
|
||||
FROM alpine:3.19
|
||||
|
||||
RUN apk add --no-cache ca-certificates curl
|
||||
# U-2: `procps` ships pgrep, which the HEALTHCHECK below uses to verify the
|
||||
# agent process is alive. Pre-U-2 the deploy/docker-compose.yml agent
|
||||
# HEALTHCHECK called `pgrep -f certctl-agent` against this image but
|
||||
# pgrep wasn't installed — the compose probe was a latent always-fail.
|
||||
# Adding procps here fixes both the new image-level HEALTHCHECK and the
|
||||
# pre-existing compose override. Adds ~250KB to the image; acceptable for
|
||||
# observability parity with the server image.
|
||||
RUN apk add --no-cache ca-certificates curl procps
|
||||
|
||||
RUN addgroup -g 1000 certctl && \
|
||||
adduser -D -u 1000 -G certctl certctl
|
||||
@@ -51,4 +58,19 @@ RUN mkdir -p /var/lib/certctl/keys && \
|
||||
|
||||
USER certctl
|
||||
|
||||
# Image-level HEALTHCHECK for bare `docker run` / Docker Swarm / Nomad / ECS.
|
||||
#
|
||||
# U-2 (P1, cat-u-healthcheck_protocol_mismatch — adjacent fix): the agent
|
||||
# has no HTTP listener (it polls the server via outbound HTTPS), so a
|
||||
# process-presence check is the correct primitive. Pre-U-2 the agent image
|
||||
# shipped with no HEALTHCHECK at all, so bare-`docker run` operators got
|
||||
# zero health signal and orchestrators that key off Docker's HEALTHCHECK
|
||||
# (Swarm, Nomad, ECS) saw the container reported as `none`. The compose
|
||||
# override at deploy/docker-compose.yml:173 used the same `pgrep -f
|
||||
# certctl-agent` shape; we mirror it here so the published image has
|
||||
# parity with the compose stack and the override on docker-compose.yml
|
||||
# becomes redundant-but-correct rather than load-bearing.
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
||||
CMD pgrep -f certctl-agent > /dev/null || exit 1
|
||||
|
||||
ENTRYPOINT ["/app/agent"]
|
||||
|
||||
+332
-3
@@ -42,6 +42,8 @@ tags:
|
||||
description: Job queue — issuance, renewal, deployment, validation
|
||||
- name: Policies
|
||||
description: Policy rules and violation tracking
|
||||
- name: RenewalPolicies
|
||||
description: Lifecycle renewal policies (distinct from compliance policy rules above)
|
||||
- name: Profiles
|
||||
description: Certificate enrollment profiles with crypto constraints
|
||||
- name: Teams
|
||||
@@ -130,7 +132,14 @@ paths:
|
||||
properties:
|
||||
auth_type:
|
||||
type: string
|
||||
enum: [api-key, jwt, none]
|
||||
# G-1 (P1): "jwt" removed from this enum after the silent
|
||||
# auth downgrade was identified — no JWT middleware ships
|
||||
# with certctl. Operators who need JWT/OIDC front certctl
|
||||
# with an authenticating gateway (oauth2-proxy / Envoy /
|
||||
# Traefik / Pomerium) and set CERTCTL_AUTH_TYPE=none
|
||||
# upstream. See docs/architecture.md "Authenticating-
|
||||
# gateway pattern".
|
||||
enum: [api-key, none]
|
||||
required:
|
||||
type: boolean
|
||||
|
||||
@@ -154,6 +163,50 @@ paths:
|
||||
"401":
|
||||
description: Unauthorized
|
||||
|
||||
/api/v1/version:
|
||||
get:
|
||||
tags: [Health]
|
||||
summary: Build identity (version, commit, Go runtime)
|
||||
description: |
|
||||
Returns the running server's build identity. Served without
|
||||
auth so rollout systems and blackbox probes can read it without
|
||||
Bearer credentials. U-3 ride-along (cat-u-no_version_endpoint).
|
||||
Excluded from audit logging because rollout polling would
|
||||
otherwise dominate the audit trail.
|
||||
|
||||
The Version field follows a fallback ladder: ldflags-supplied
|
||||
value > VCS commit SHA > "dev". Commit / Modified / BuildTime
|
||||
come from runtime/debug.BuildInfo (Go 1.18+ stamps these on
|
||||
every module-tracked build). GoVersion is runtime.Version().
|
||||
security: []
|
||||
operationId: getVersion
|
||||
responses:
|
||||
"200":
|
||||
description: Build identity
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required: [version, commit, modified, build_time, go_version]
|
||||
properties:
|
||||
version:
|
||||
type: string
|
||||
description: Release tag (ldflags-supplied) or VCS SHA fallback or "dev"
|
||||
example: v2.0.51
|
||||
commit:
|
||||
type: string
|
||||
description: Git SHA from runtime/debug.BuildInfo (vcs.revision); empty when not VCS-tracked
|
||||
modified:
|
||||
type: boolean
|
||||
description: True when build had uncommitted changes (vcs.modified)
|
||||
build_time:
|
||||
type: string
|
||||
description: RFC 3339 build timestamp (vcs.time); empty when not VCS-tracked
|
||||
go_version:
|
||||
type: string
|
||||
description: Go toolchain version that compiled the binary (runtime.Version())
|
||||
example: go1.25.9
|
||||
|
||||
# ─── Certificates ────────────────────────────────────────────────────
|
||||
/api/v1/certificates:
|
||||
get:
|
||||
@@ -1528,6 +1581,137 @@ paths:
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
|
||||
# ─── Renewal Policies ────────────────────────────────────────────────
|
||||
# G-1: lifecycle policies (rp-* ids, table renewal_policies). DISTINCT from
|
||||
# /api/v1/policies above, which returns compliance rules (pol-* ids, table
|
||||
# policy_rules). `managed_certificates.renewal_policy_id` FK points at
|
||||
# renewal_policies(id) — populating that dropdown from /api/v1/policies
|
||||
# caused 23503 FK violations; hence this endpoint.
|
||||
/api/v1/renewal-policies:
|
||||
get:
|
||||
tags: [RenewalPolicies]
|
||||
summary: List renewal policies
|
||||
operationId: listRenewalPolicies
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/page"
|
||||
- $ref: "#/components/parameters/per_page"
|
||||
responses:
|
||||
"200":
|
||||
description: Paginated list of renewal policies
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
allOf:
|
||||
- $ref: "#/components/schemas/PaginationEnvelope"
|
||||
- type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/RenewalPolicy"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
post:
|
||||
tags: [RenewalPolicies]
|
||||
summary: Create renewal policy
|
||||
operationId: createRenewalPolicy
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RenewalPolicyCreateRequest"
|
||||
responses:
|
||||
"201":
|
||||
description: Renewal policy created
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RenewalPolicy"
|
||||
"400":
|
||||
$ref: "#/components/responses/BadRequest"
|
||||
"409":
|
||||
description: Duplicate policy name
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
|
||||
/api/v1/renewal-policies/{id}:
|
||||
get:
|
||||
tags: [RenewalPolicies]
|
||||
summary: Get renewal policy
|
||||
operationId: getRenewalPolicy
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/resourceId"
|
||||
responses:
|
||||
"200":
|
||||
description: Renewal policy details
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RenewalPolicy"
|
||||
"400":
|
||||
$ref: "#/components/responses/BadRequest"
|
||||
"404":
|
||||
$ref: "#/components/responses/NotFound"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
put:
|
||||
tags: [RenewalPolicies]
|
||||
summary: Update renewal policy
|
||||
operationId: updateRenewalPolicy
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/resourceId"
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RenewalPolicyUpdateRequest"
|
||||
responses:
|
||||
"200":
|
||||
description: Renewal policy updated
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/RenewalPolicy"
|
||||
"400":
|
||||
$ref: "#/components/responses/BadRequest"
|
||||
"404":
|
||||
$ref: "#/components/responses/NotFound"
|
||||
"409":
|
||||
description: Duplicate policy name
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
delete:
|
||||
tags: [RenewalPolicies]
|
||||
summary: Delete renewal policy
|
||||
operationId: deleteRenewalPolicy
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/resourceId"
|
||||
responses:
|
||||
"204":
|
||||
description: Renewal policy deleted
|
||||
"400":
|
||||
$ref: "#/components/responses/BadRequest"
|
||||
"404":
|
||||
$ref: "#/components/responses/NotFound"
|
||||
"409":
|
||||
description: Policy in use by one or more certificates (FK restrict)
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"500":
|
||||
$ref: "#/components/responses/InternalError"
|
||||
|
||||
# ─── Profiles ────────────────────────────────────────────────────────
|
||||
/api/v1/profiles:
|
||||
get:
|
||||
@@ -3308,6 +3492,15 @@ components:
|
||||
- Archived
|
||||
|
||||
ManagedCertificate:
|
||||
# D-5 (cat-f-ae0d06b6588f, master): per-issuance fields
|
||||
# (serial_number, fingerprint_sha256, key_algorithm, key_size,
|
||||
# issued_at) are intentionally NOT declared here. They live on
|
||||
# CertificateVersion (per-issuance evidence) and are fetched via
|
||||
# /api/v1/certificates/{id}/versions. ManagedCertificate is the
|
||||
# management envelope; CertificateVersion is the issuance record.
|
||||
# Pre-D-5 the TS Certificate interface had them as optional and
|
||||
# the dashboard's Key Algorithm / Key Size rows always rendered
|
||||
# '—' as a result. The TS trim restores parity with this schema.
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
@@ -3547,8 +3740,18 @@ components:
|
||||
registered_at:
|
||||
type: string
|
||||
format: date-time
|
||||
api_key_hash:
|
||||
type: string
|
||||
# G-2 (P1): the `api_key_hash` field was REMOVED from this
|
||||
# schema after cat-s5-apikey_leak audit closure. The DB column
|
||||
# still exists (migrations/000001_initial_schema.up.sql) and
|
||||
# the server still populates the in-memory struct for the
|
||||
# auth-lookup path (repository.AgentRepository::GetByAPIKey),
|
||||
# but the JSON wire shape no longer carries it — see
|
||||
# internal/domain/connector.go::Agent::APIKeyHash + MarshalJSON
|
||||
# for the redaction enforcement and docs/architecture.md ER
|
||||
# diagram for the database-vs-API distinction. Do NOT re-add
|
||||
# the field here without first removing the JSON-shape redaction
|
||||
# in the domain package; the CI guardrail at
|
||||
# .github/workflows/ci.yml will block re-introduction either way.
|
||||
os:
|
||||
type: string
|
||||
architecture:
|
||||
@@ -3765,6 +3968,132 @@ components:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
# ─── Renewal Policies ─────────────────────────────────────────────
|
||||
# G-1: renewal_policies table — lifecycle policies, referenced by
|
||||
# managed_certificates.renewal_policy_id ON DELETE RESTRICT. Distinct
|
||||
# from PolicyRule above (compliance rules, table policy_rules).
|
||||
RenewalPolicy:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
- name
|
||||
- renewal_window_days
|
||||
- auto_renew
|
||||
- max_retries
|
||||
- retry_interval_seconds
|
||||
- alert_thresholds_days
|
||||
- created_at
|
||||
- updated_at
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: Human-readable ID, prefixed `rp-` (e.g., `rp-default`).
|
||||
name:
|
||||
type: string
|
||||
description: Unique display name (UNIQUE in DB).
|
||||
renewal_window_days:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 365
|
||||
description: Days before expiry to trigger renewal.
|
||||
auto_renew:
|
||||
type: boolean
|
||||
description: Whether renewal is triggered automatically by the scheduler.
|
||||
max_retries:
|
||||
type: integer
|
||||
minimum: 0
|
||||
maximum: 10
|
||||
description: Maximum renewal retry attempts on failure.
|
||||
retry_interval_seconds:
|
||||
type: integer
|
||||
minimum: 60
|
||||
maximum: 86400
|
||||
description: Seconds to wait between retry attempts.
|
||||
alert_thresholds_days:
|
||||
type: array
|
||||
items:
|
||||
type: integer
|
||||
minimum: 0
|
||||
maximum: 365
|
||||
description: Days-before-expiry thresholds at which to emit alerts.
|
||||
certificate_profile_id:
|
||||
type: string
|
||||
nullable: true
|
||||
description: Optional certificate profile binding. Read-only at this endpoint; UI does not currently edit this field.
|
||||
created_at:
|
||||
type: string
|
||||
format: date-time
|
||||
updated_at:
|
||||
type: string
|
||||
format: date-time
|
||||
|
||||
RenewalPolicyCreateRequest:
|
||||
type: object
|
||||
required:
|
||||
- name
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: Optional human-readable ID. Auto-generated from name when omitted.
|
||||
name:
|
||||
type: string
|
||||
minLength: 1
|
||||
maxLength: 255
|
||||
renewal_window_days:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 365
|
||||
default: 30
|
||||
auto_renew:
|
||||
type: boolean
|
||||
default: true
|
||||
max_retries:
|
||||
type: integer
|
||||
minimum: 0
|
||||
maximum: 10
|
||||
description: Required. Not defaulted — 0 is a valid operator choice.
|
||||
retry_interval_seconds:
|
||||
type: integer
|
||||
minimum: 60
|
||||
maximum: 86400
|
||||
default: 3600
|
||||
alert_thresholds_days:
|
||||
type: array
|
||||
items:
|
||||
type: integer
|
||||
minimum: 0
|
||||
maximum: 365
|
||||
default: [30, 14, 7, 0]
|
||||
|
||||
RenewalPolicyUpdateRequest:
|
||||
type: object
|
||||
description: Partial update. Omitted fields are left unchanged.
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
minLength: 1
|
||||
maxLength: 255
|
||||
renewal_window_days:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 365
|
||||
auto_renew:
|
||||
type: boolean
|
||||
max_retries:
|
||||
type: integer
|
||||
minimum: 0
|
||||
maximum: 10
|
||||
retry_interval_seconds:
|
||||
type: integer
|
||||
minimum: 60
|
||||
maximum: 86400
|
||||
alert_thresholds_days:
|
||||
type: array
|
||||
items:
|
||||
type: integer
|
||||
minimum: 0
|
||||
maximum: 365
|
||||
|
||||
# ─── Profiles ────────────────────────────────────────────────────
|
||||
CertificateProfile:
|
||||
type: object
|
||||
|
||||
+8
-1
@@ -269,7 +269,14 @@ func (a *Agent) Run(ctx context.Context) error {
|
||||
a.logger.Warn("backing off due to consecutive failures",
|
||||
"failures", a.consecutiveFailures,
|
||||
"backoff", backoff.String())
|
||||
time.Sleep(backoff)
|
||||
// F-003: ctx-aware wait so graceful shutdown does not stall on
|
||||
// a long backoff. If ctx cancels mid-backoff, return to the
|
||||
// outer loop so the <-ctx.Done() case can trigger clean exit.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
continue
|
||||
case <-time.After(backoff):
|
||||
}
|
||||
}
|
||||
a.pollForWork(ctx)
|
||||
|
||||
|
||||
+88
-7
@@ -39,6 +39,26 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Defense-in-depth runtime guard for the auth-type discriminator.
|
||||
//
|
||||
// G-1 (P1): config.Load() already runs Validate() which rejects "jwt"
|
||||
// and any value outside config.ValidAuthTypes() with a dedicated
|
||||
// diagnostic. This switch is belt-and-braces — if a future refactor
|
||||
// bypasses the validator (test harness, alt config loader, env-var
|
||||
// rebinding after Load) the server must not silently boot with an
|
||||
// unsupported auth shape. The error path uses fmt.Fprintf because
|
||||
// the slog logger is constructed from cfg below this point; we want
|
||||
// the failure to be visible regardless of log-level configuration.
|
||||
switch config.AuthType(cfg.Auth.Type) {
|
||||
case config.AuthTypeAPIKey, config.AuthTypeNone:
|
||||
// ok — fall through
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr,
|
||||
"unsupported auth type at runtime: %q (valid: %v) — config validation should have caught this; refusing to start\n",
|
||||
cfg.Auth.Type, config.ValidAuthTypes())
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Set up structured logging
|
||||
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
|
||||
Level: cfg.GetLogLevel(),
|
||||
@@ -66,6 +86,41 @@ func main() {
|
||||
}
|
||||
logger.Info("migrations completed")
|
||||
|
||||
// Apply baseline seed data.
|
||||
//
|
||||
// U-3 (P1, cat-u-seed_initdb_schema_drift): pre-U-3 seed.sql was mounted
|
||||
// into postgres `/docker-entrypoint-initdb.d/` alongside a hand-curated
|
||||
// subset of migrations. Adding a migration that introduced a new column
|
||||
// referenced by seed.sql (cat-o-retry_interval_unit_mismatch /
|
||||
// policy_rules.severity / etc.) without also updating the compose volume
|
||||
// mounts caused initdb to crash on first up. Post-U-3 the compose stack
|
||||
// drops all initdb mounts; postgres comes up with empty schema, the
|
||||
// server runs RunMigrations above, then this RunSeed call lands the
|
||||
// baseline data — all from a single source of truth (this binary).
|
||||
// See internal/repository/postgres/db.go::RunSeed for the contract.
|
||||
logger.Info("applying baseline seed", "path", cfg.Database.MigrationsPath)
|
||||
if err := postgres.RunSeed(db, cfg.Database.MigrationsPath); err != nil {
|
||||
logger.Error("failed to apply seed data", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
logger.Info("seed completed")
|
||||
|
||||
// Apply demo overlay seed when CERTCTL_DEMO_SEED=true. Pre-U-3 the demo
|
||||
// overlay (deploy/docker-compose.demo.yml) mounted seed_demo.sql into
|
||||
// postgres `/docker-entrypoint-initdb.d/`; that broke once U-3 dropped
|
||||
// the initdb migration mounts (the demo seed references tables that
|
||||
// wouldn't exist at initdb time). The runtime path here is the
|
||||
// post-U-3 replacement. Default-off so a vanilla deploy never lands
|
||||
// fake-history rows. See postgres.RunDemoSeed for the contract.
|
||||
if cfg.Database.DemoSeed {
|
||||
logger.Info("applying demo seed (CERTCTL_DEMO_SEED=true)", "path", cfg.Database.MigrationsPath)
|
||||
if err := postgres.RunDemoSeed(db, cfg.Database.MigrationsPath); err != nil {
|
||||
logger.Error("failed to apply demo seed data", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
logger.Info("demo seed completed")
|
||||
}
|
||||
|
||||
// Initialize repositories with real PostgreSQL connection
|
||||
auditRepo := postgres.NewAuditRepository(db)
|
||||
certificateRepo := postgres.NewCertificateRepository(db)
|
||||
@@ -147,6 +202,11 @@ func main() {
|
||||
auditService := service.NewAuditService(auditRepo)
|
||||
policyService := service.NewPolicyService(policyRepo, auditService)
|
||||
policyService.SetCertRepo(certificateRepo) // D-008: CertificateLifetime arm needs CertificateVersion.NotBefore/NotAfter
|
||||
// G-1: RenewalPolicyService — distinct from PolicyService (compliance rules).
|
||||
// Drives /api/v1/renewal-policies CRUD; the service layer owns slugify + validation,
|
||||
// the repo layer owns sentinel translation for 23505 (name UNIQUE) and 23503
|
||||
// (FK-RESTRICT against managed_certificates.renewal_policy_id).
|
||||
renewalPolicyService := service.NewRenewalPolicyService(renewalPolicyRepo)
|
||||
certificateService := service.NewCertificateService(certificateRepo, policyService, auditService)
|
||||
notifierRegistry := make(map[string]service.Notifier)
|
||||
|
||||
@@ -368,6 +428,10 @@ func main() {
|
||||
agentHandler := handler.NewAgentHandler(agentService)
|
||||
jobHandler := handler.NewJobHandler(jobService)
|
||||
policyHandler := handler.NewPolicyHandler(policyService)
|
||||
// G-1: RenewalPolicyHandler — /api/v1/renewal-policies CRUD. Value-returning
|
||||
// constructor matches the house pattern (PolicyHandler, IssuerHandler etc.);
|
||||
// the registry stores it by value in HandlerRegistry.RenewalPolicies.
|
||||
renewalPolicyHandler := handler.NewRenewalPolicyHandler(renewalPolicyService)
|
||||
profileHandler := handler.NewProfileHandler(profileService)
|
||||
teamHandler := handler.NewTeamHandler(teamService)
|
||||
ownerHandler := handler.NewOwnerHandler(ownerService)
|
||||
@@ -377,6 +441,13 @@ func main() {
|
||||
statsHandler := handler.NewStatsHandler(statsService)
|
||||
metricsHandler := handler.NewMetricsHandler(statsService, time.Now())
|
||||
healthHandler := handler.NewHealthHandler(cfg.Auth.Type)
|
||||
// U-3 ride-along (cat-u-no_version_endpoint, P2): the version handler
|
||||
// answers GET /api/v1/version with build identity (ldflags Version,
|
||||
// VCS commit/dirty/timestamp, Go runtime version). Wired through the
|
||||
// no-auth dispatch + audit ExcludePaths below so probes and rollout
|
||||
// systems can read it without Bearer credentials and without flooding
|
||||
// the audit trail.
|
||||
versionHandler := handler.NewVersionHandler()
|
||||
discoveryHandler := handler.NewDiscoveryHandler(discoveryService)
|
||||
networkScanHandler := handler.NewNetworkScanHandler(networkScanService)
|
||||
verificationService := service.NewVerificationService(jobRepo, auditService, logger)
|
||||
@@ -508,6 +579,7 @@ func main() {
|
||||
Agents: agentHandler,
|
||||
Jobs: jobHandler,
|
||||
Policies: policyHandler,
|
||||
RenewalPolicies: renewalPolicyHandler,
|
||||
Profiles: profileHandler,
|
||||
Teams: teamHandler,
|
||||
Owners: ownerHandler,
|
||||
@@ -524,6 +596,7 @@ func main() {
|
||||
Digest: *digestHandler,
|
||||
HealthChecks: healthCheckHandler,
|
||||
BulkRevocation: bulkRevocationHandler,
|
||||
Version: versionHandler,
|
||||
})
|
||||
// Register EST (RFC 7030) handlers if enabled
|
||||
if cfg.EST.Enabled {
|
||||
@@ -605,7 +678,7 @@ func main() {
|
||||
// compatibility CERTCTL_AUTH_SECRET is synthesized into legacy-key-N
|
||||
// entries with Admin=false.
|
||||
var namedKeys []middleware.NamedAPIKey
|
||||
if cfg.Auth.Type != "none" {
|
||||
if config.AuthType(cfg.Auth.Type) != config.AuthTypeNone {
|
||||
// Translate typed config.NamedAPIKey -> middleware.NamedAPIKey. The
|
||||
// two structs are field-compatible but live in different packages to
|
||||
// preserve the config→middleware dependency direction.
|
||||
@@ -660,10 +733,15 @@ func main() {
|
||||
},
|
||||
)
|
||||
auditMiddleware := middleware.NewAuditLog(auditAdapter, middleware.AuditConfig{
|
||||
ExcludePaths: []string{"/health", "/ready"},
|
||||
// /api/v1/version is excluded for the same reason /health and /ready
|
||||
// are: rollout systems and blackbox probes hammer it on a tight
|
||||
// interval, and the audit trail's value comes from rare,
|
||||
// operator-authored mutations — not from sub-second readonly polls.
|
||||
// U-3 ride-along (cat-u-no_version_endpoint, P2).
|
||||
ExcludePaths: []string{"/health", "/ready", "/api/v1/version"},
|
||||
Logger: logger,
|
||||
})
|
||||
logger.Info("API audit logging enabled (excluding /health, /ready)")
|
||||
logger.Info("API audit logging enabled (excluding /health, /ready, /api/v1/version)")
|
||||
|
||||
middlewareStack := []func(http.Handler) http.Handler{
|
||||
middleware.RequestID,
|
||||
@@ -694,8 +772,8 @@ func main() {
|
||||
logger.Info("rate limiting enabled", "rps", cfg.RateLimit.RPS, "burst", cfg.RateLimit.BurstSize)
|
||||
}
|
||||
|
||||
if cfg.Auth.Type == "none" {
|
||||
logger.Warn("authentication disabled (CERTCTL_AUTH_TYPE=none) — not suitable for production")
|
||||
if config.AuthType(cfg.Auth.Type) == config.AuthTypeNone {
|
||||
logger.Warn("authentication disabled (CERTCTL_AUTH_TYPE=none) — not suitable for production except behind an authenticating gateway (oauth2-proxy / Envoy ext_authz / Traefik ForwardAuth / Pomerium)")
|
||||
} else {
|
||||
logger.Info("authentication enabled", "type", cfg.Auth.Type)
|
||||
}
|
||||
@@ -859,6 +937,7 @@ func preflightSCEPChallengePassword(enabled bool, challengePassword string) erro
|
||||
// Dispatch rules (M-001, audit 2026-04-19, option D):
|
||||
//
|
||||
// - /health, /ready, /api/v1/auth/info → no-auth (probes + login detection)
|
||||
// - /api/v1/version → no-auth (U-3 ride-along: build identity for rollout/probes)
|
||||
// - /.well-known/pki/* → no-auth (RFC 5280 CRL, RFC 6960 OCSP)
|
||||
// - /.well-known/est/* → no-auth (RFC 7030 §3.2.3)
|
||||
// - /scep, /scep/* → no-auth (RFC 8894 §3.2, CSR challengePassword)
|
||||
@@ -884,10 +963,12 @@ func buildFinalHandler(apiHandler, noAuthHandler http.Handler, webDir string, da
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
path := r.URL.Path
|
||||
|
||||
// Health/ready and auth/info bypass auth middleware.
|
||||
// Health/ready, auth/info, and version bypass auth middleware.
|
||||
// Health/ready: Docker/K8s health probes don't carry Bearer tokens.
|
||||
// auth/info: React app calls this before login to detect auth mode.
|
||||
if path == "/health" || path == "/ready" || path == "/api/v1/auth/info" {
|
||||
// version: U-3 ride-along (cat-u-no_version_endpoint) — rollout
|
||||
// systems and blackbox probes need build identity without a key.
|
||||
if path == "/health" || path == "/ready" || path == "/api/v1/auth/info" || path == "/api/v1/version" {
|
||||
noAuthHandler.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -122,6 +122,8 @@ The `volumes` section mounts 10 migration files into PostgreSQL's init directory
|
||||
|
||||
**Expert note:** The numbered prefix pattern (`001_`, `002_`, ..., `020_`) ensures deterministic execution order. All migrations use `IF NOT EXISTS` and `ON CONFLICT DO NOTHING` for idempotency, so re-running them against an existing database is safe.
|
||||
|
||||
**Stateful volume — first-boot password binding (U-1).** The same "first boot only" semantics that govern migration scripts also govern `POSTGRES_PASSWORD`. The official `postgres` image runs `initdb` exactly once — when `/var/lib/postgresql/data` is empty — and that pass is the only time `POSTGRES_PASSWORD` is written into `pg_authid`. On every subsequent boot, the postgres container ignores the env var and authenticates against whatever password was baked into the data directory on the original `up`. Editing `POSTGRES_PASSWORD` in `.env` after a successful first boot therefore only updates the **certctl-server** container's `CERTCTL_DATABASE_URL` — postgres still expects the previous password, and the server fails to ping with `pq: password authentication failed for user "certctl"` (SQLSTATE 28P01). The certctl-server container surfaces this case explicitly: when SQLSTATE 28P01 fires at startup, the wrap text in `internal/repository/postgres/db.go::wrapPingError` points operators at the two remediation paths — destructive volume teardown via `docker compose -f deploy/docker-compose.yml down -v && up -d --build`, or non-destructive in-place rotation via `docker compose -f deploy/docker-compose.yml exec postgres psql -U certctl -c "ALTER ROLE certctl PASSWORD '<new>';"` followed by a server restart with the matching `POSTGRES_PASSWORD`. Use the destructive path on the demo / first-time setup; use the non-destructive path on any environment that holds data you want to keep.
|
||||
|
||||
#### certctl Server
|
||||
|
||||
```yaml
|
||||
|
||||
@@ -7,8 +7,20 @@
|
||||
# To start fresh (wipe previous data):
|
||||
# docker compose -f docker-compose.yml -f docker-compose.demo.yml down -v
|
||||
# docker compose -f docker-compose.yml -f docker-compose.demo.yml up --build
|
||||
|
||||
#
|
||||
# U-3 (P1, cat-u-seed_initdb_schema_drift): pre-U-3 this overlay mounted
|
||||
# `seed_demo.sql` into postgres `/docker-entrypoint-initdb.d/`. That worked
|
||||
# only because the production stack also mounted the migrations there, so
|
||||
# the schema existed at initdb time. Once U-3 dropped the production
|
||||
# initdb mounts (single source of truth: server runs RunMigrations + RunSeed
|
||||
# at boot), the demo seed could no longer be applied at initdb time — the
|
||||
# tables it references wouldn't exist yet.
|
||||
#
|
||||
# Post-U-3 the demo overlay just sets CERTCTL_DEMO_SEED=true; the server
|
||||
# applies seed_demo.sql at boot via postgres.RunDemoSeed AFTER baseline
|
||||
# migrations + seed.sql are in place. Same single source of truth, no
|
||||
# initdb mounts, no schema-vs-seed drift.
|
||||
services:
|
||||
postgres:
|
||||
volumes:
|
||||
- ../migrations/seed_demo.sql:/docker-entrypoint-initdb.d/030_seed_demo.sql
|
||||
certctl-server:
|
||||
environment:
|
||||
CERTCTL_DEMO_SEED: "true"
|
||||
|
||||
@@ -93,6 +93,17 @@ services:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Database
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# U-3 (P1, cat-u-seed_initdb_schema_drift, GitHub #10): the test stack used
|
||||
# to mount a hand-curated subset of migrations + seed.sql + a never-checked-in
|
||||
# seed_test.sql into postgres `/docker-entrypoint-initdb.d/`. Same hazard as
|
||||
# the production compose — initdb crashed any time a new migration shipped
|
||||
# that the seed depended on without the mount list being updated. Post-U-3
|
||||
# the schema is built EXCLUSIVELY by the server at startup via
|
||||
# internal/repository/postgres.RunMigrations + RunSeed. Postgres comes up
|
||||
# empty and the server lands the full ladder + baseline seed in one shot.
|
||||
# `start_period: 30s` matches the production compose and shields slow CI
|
||||
# runners from healthcheck flap during initdb.
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: certctl-test-postgres
|
||||
@@ -102,19 +113,6 @@ services:
|
||||
POSTGRES_PASSWORD: testpass
|
||||
volumes:
|
||||
- test_postgres_data:/var/lib/postgresql/data
|
||||
- ../migrations/000001_initial_schema.up.sql:/docker-entrypoint-initdb.d/001_schema.sql
|
||||
- ../migrations/000002_agent_metadata.up.sql:/docker-entrypoint-initdb.d/002_agent_metadata.sql
|
||||
- ../migrations/000003_certificate_profiles.up.sql:/docker-entrypoint-initdb.d/003_certificate_profiles.sql
|
||||
- ../migrations/000004_agent_groups.up.sql:/docker-entrypoint-initdb.d/004_agent_groups.sql
|
||||
- ../migrations/000005_revocation.up.sql:/docker-entrypoint-initdb.d/005_revocation.sql
|
||||
- ../migrations/000006_discovery.up.sql:/docker-entrypoint-initdb.d/006_discovery.sql
|
||||
- ../migrations/000007_network_discovery.up.sql:/docker-entrypoint-initdb.d/007_network_discovery.sql
|
||||
- ../migrations/000008_verification.up.sql:/docker-entrypoint-initdb.d/008_verification.sql
|
||||
- ../migrations/000009_issuer_config.up.sql:/docker-entrypoint-initdb.d/009_issuer_config.sql
|
||||
- ../migrations/000010_target_config.up.sql:/docker-entrypoint-initdb.d/010_target_config.sql
|
||||
- ../migrations/seed.sql:/docker-entrypoint-initdb.d/020_seed.sql
|
||||
- ../migrations/seed_test.sql:/docker-entrypoint-initdb.d/025_seed_test.sql
|
||||
# No seed_demo.sql — start with a clean database for real testing
|
||||
networks:
|
||||
certctl-test:
|
||||
ipv4_address: 10.30.50.2
|
||||
@@ -125,6 +123,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
restart: unless-stopped
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
+29
-11
@@ -53,6 +53,29 @@ services:
|
||||
- certctl-network
|
||||
|
||||
# PostgreSQL database
|
||||
#
|
||||
# U-3 (P1, cat-u-seed_initdb_schema_drift, GitHub #10):
|
||||
# Pre-U-3 this stack mounted a hand-curated subset of `migrations/*.up.sql`
|
||||
# plus `seed.sql` into `/docker-entrypoint-initdb.d/`, and postgres
|
||||
# initdb-applied them on first boot. The mount list rotted every time a
|
||||
# new migration shipped that the seed depended on (000013 added
|
||||
# policy_rules.severity, 000017 renames retry_interval_minutes, etc.) —
|
||||
# initdb crashed, the container reported `unhealthy` indefinitely, and
|
||||
# `docker compose -f deploy/docker-compose.yml up -d --build` from a
|
||||
# fresh clone of v2.0.50 hit it on the first try.
|
||||
#
|
||||
# Post-U-3 the schema is built EXCLUSIVELY by the server at startup via
|
||||
# internal/repository/postgres.RunMigrations + RunSeed. Single source of
|
||||
# truth, no list to keep in sync. Postgres comes up empty; the server
|
||||
# waits for it healthy, then applies the full migration ladder + seed in
|
||||
# one shot. Helm + the dev examples were already runtime-only (Path B)
|
||||
# and worked through the same window.
|
||||
#
|
||||
# `start_period: 30s` gives postgres room to bootstrap on slow runners
|
||||
# (CI macOS, low-spec laptops) before the healthcheck failure counter
|
||||
# starts ticking. Pre-U-3 a slow first-init combined with the
|
||||
# `unhealthy` flap to cascade into certctl-server's `service_healthy`
|
||||
# depends_on, blocking the whole stack.
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: certctl-postgres
|
||||
@@ -64,17 +87,6 @@ services:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
- ../migrations/000001_initial_schema.up.sql:/docker-entrypoint-initdb.d/001_schema.sql
|
||||
- ../migrations/000002_agent_metadata.up.sql:/docker-entrypoint-initdb.d/002_agent_metadata.sql
|
||||
- ../migrations/000003_certificate_profiles.up.sql:/docker-entrypoint-initdb.d/003_certificate_profiles.sql
|
||||
- ../migrations/000004_agent_groups.up.sql:/docker-entrypoint-initdb.d/004_agent_groups.sql
|
||||
- ../migrations/000005_revocation.up.sql:/docker-entrypoint-initdb.d/005_revocation.sql
|
||||
- ../migrations/000006_discovery.up.sql:/docker-entrypoint-initdb.d/006_discovery.sql
|
||||
- ../migrations/000007_network_discovery.up.sql:/docker-entrypoint-initdb.d/007_network_discovery.sql
|
||||
- ../migrations/000008_verification.up.sql:/docker-entrypoint-initdb.d/008_verification.sql
|
||||
- ../migrations/000009_issuer_config.up.sql:/docker-entrypoint-initdb.d/009_issuer_config.sql
|
||||
- ../migrations/000010_target_config.up.sql:/docker-entrypoint-initdb.d/010_target_config.sql
|
||||
- ../migrations/seed.sql:/docker-entrypoint-initdb.d/020_seed.sql
|
||||
networks:
|
||||
- certctl-network
|
||||
healthcheck:
|
||||
@@ -82,6 +94,7 @@ services:
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
restart: unless-stopped
|
||||
|
||||
# Certctl Server (API + scheduler)
|
||||
@@ -127,6 +140,11 @@ services:
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
# U-3: server boot now does RunMigrations + RunSeed before listening on
|
||||
# 8443. On a fresh clone the full migration ladder + seed application
|
||||
# can take ~10s on a small VM; start_period prevents the first few
|
||||
# healthcheck attempts from counting as failures while that work runs.
|
||||
start_period: 30s
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: "json-file"
|
||||
|
||||
@@ -246,8 +246,8 @@ helm install certctl certctl/ \
|
||||
|--------|---------|-------------|
|
||||
| `server.replicas` | 1 | Number of server replicas |
|
||||
| `server.port` | 8443 | Server port |
|
||||
| `server.auth.type` | api-key | Authentication type |
|
||||
| `server.auth.apiKey` | "" | API key (REQUIRED) |
|
||||
| `server.auth.type` | api-key | Authentication type — `api-key` or `none` (G-1: `jwt` removed; for JWT/OIDC use a fronting authenticating gateway, see `docs/architecture.md` and `docs/upgrade-to-v2-jwt-removal.md`) |
|
||||
| `server.auth.apiKey` | "" | API key (REQUIRED when `auth.type=api-key`) |
|
||||
| `server.logging.level` | info | Log level |
|
||||
| `server.logging.format` | json | Log format |
|
||||
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
# certctl Helm Chart
|
||||
|
||||
Production-ready Helm chart for deploying [certctl](https://github.com/shankar0123/certctl) on Kubernetes. Wires up the certctl server (Deployment), PostgreSQL (StatefulSet with PVC), and the agent (DaemonSet — one per node) on a private cluster, with health probes, security contexts, and optional Ingress.
|
||||
|
||||
## Quick install
|
||||
|
||||
```bash
|
||||
helm install certctl deploy/helm/certctl/ \
|
||||
--create-namespace --namespace certctl \
|
||||
--set server.auth.apiKey="$(openssl rand -base64 32)" \
|
||||
--set postgresql.auth.password="$(openssl rand -base64 24)"
|
||||
```
|
||||
|
||||
This brings up:
|
||||
|
||||
- `<release>-server` Deployment (HTTPS-only on port 8443; TLS 1.3)
|
||||
- `<release>-postgres` StatefulSet (PostgreSQL 16-alpine, 1 replica, 10Gi PVC by default)
|
||||
- `<release>-agent` DaemonSet (polls server, generates ECDSA P-256 keys locally)
|
||||
- Service objects, optional Ingress, and ServiceAccount with RBAC
|
||||
|
||||
See [`values.yaml`](values.yaml) for the full configuration surface — issuer settings, target connectors, scheduler intervals, notifier credentials, and resource requests/limits all live there.
|
||||
|
||||
## Operational notes
|
||||
|
||||
### Postgres password rotation — read this before changing `postgresql.auth.password`
|
||||
|
||||
**The trap.** `postgresql.auth.password` is bound to `pg_authid` exactly once — when the StatefulSet's PVC is provisioned and `initdb` runs. The official `postgres:16-alpine` image only runs `initdb` when `/var/lib/postgresql/data` is empty, so on every subsequent rollout the `POSTGRES_PASSWORD` env var is read into the container but **ignored** by postgres itself. The certctl-server container also picks up the new value (via the database URL helper template), so the two halves diverge: server presents the new password, postgres still expects the old one.
|
||||
|
||||
**Symptom.** The certctl-server pod's startup log shows:
|
||||
|
||||
```
|
||||
failed to ping database: postgres rejected the configured credentials
|
||||
(SQLSTATE 28P01 — invalid_password). If you recently rotated POSTGRES_PASSWORD ...
|
||||
```
|
||||
|
||||
That diagnostic is emitted by `internal/repository/postgres/db.go::wrapPingError` — it points operators at the two remediation paths below.
|
||||
|
||||
**Remediation, non-destructive (preferred for any environment with real data):**
|
||||
|
||||
```bash
|
||||
# 1. Rotate the password in postgres directly
|
||||
kubectl -n certctl exec -it <release>-postgres-0 -- \
|
||||
psql -U certctl -c "ALTER ROLE certctl PASSWORD '<new-password>';"
|
||||
|
||||
# 2. Update the secret / Helm values to the same value
|
||||
helm upgrade <release> deploy/helm/certctl/ \
|
||||
--reuse-values \
|
||||
--set postgresql.auth.password='<new-password>'
|
||||
|
||||
# 3. Bounce the certctl-server pod so it re-reads the secret
|
||||
kubectl -n certctl rollout restart deployment/<release>-server
|
||||
```
|
||||
|
||||
**Remediation, destructive (DESTROYS ALL CERTCTL DATA — only acceptable on dev/demo clusters):**
|
||||
|
||||
```bash
|
||||
helm uninstall <release> -n certctl
|
||||
kubectl -n certctl delete pvc -l \
|
||||
app.kubernetes.io/name=certctl,app.kubernetes.io/component=postgres
|
||||
helm install <release> deploy/helm/certctl/ \
|
||||
--namespace certctl \
|
||||
--set postgresql.auth.password='<new-password>'
|
||||
```
|
||||
|
||||
The PVC re-creates empty, `initdb` runs on first boot of the new postgres pod, and `pg_authid` is seeded with the new password.
|
||||
|
||||
**Why we don't fix this in the chart.** The env-vs-`pg_authid` divergence is intrinsic to how the upstream `postgres` image bootstraps — `initdb` is run-once-per-empty-data-dir, and there is no upstream-supported way to make subsequent boots re-seed `pg_authid` from `POSTGRES_PASSWORD`. The ergonomic answer is the runtime diagnostic plus this operational note.
|
||||
|
||||
**Cross-references.** Same root cause is documented for the docker-compose path in [`docs/quickstart.md`](../../../docs/quickstart.md) (Warning callout after the `cp .env.example .env` block) and in [`deploy/ENVIRONMENTS.md`](../../ENVIRONMENTS.md) (Stateful volume — first-boot password binding section). The runtime diagnostic itself lives in `internal/repository/postgres/db.go::wrapPingError` with regression coverage in `internal/repository/postgres/db_test.go`.
|
||||
|
||||
### Server API key rotation
|
||||
|
||||
Unlike the postgres password, `server.auth.apiKey` accepts a comma-separated list, so zero-downtime rotation is straightforward:
|
||||
|
||||
```bash
|
||||
# 1. Add the new key alongside the old
|
||||
helm upgrade <release> deploy/helm/certctl/ \
|
||||
--reuse-values \
|
||||
--set server.auth.apiKey='new-key,old-key'
|
||||
|
||||
# 2. Roll your agents / clients over to the new key
|
||||
|
||||
# 3. Remove the old key
|
||||
helm upgrade <release> deploy/helm/certctl/ \
|
||||
--reuse-values \
|
||||
--set server.auth.apiKey='new-key'
|
||||
```
|
||||
|
||||
### JWT / OIDC via authenticating gateway
|
||||
|
||||
certctl's in-process auth surface is intentionally narrow: `server.auth.type=api-key` for production deployments and `server.auth.type=none` for development. There is no in-process JWT, OIDC, mTLS, or SAML middleware. (`server.auth.type=jwt` was accepted pre-G-1 but silently routed every request through the api-key bearer middleware — silent auth downgrade. The chart now fails at `helm install`/`helm upgrade` template time via the `certctl.validateAuthType` helper if you set it. See [`../../../docs/upgrade-to-v2-jwt-removal.md`](../../../docs/upgrade-to-v2-jwt-removal.md) if you previously had this in your values.)
|
||||
|
||||
For deployments that need JWT/OIDC, the canonical Kubernetes-flavored shape is to put oauth2-proxy in front of the certctl Service, attach an authenticating Ingress middleware, and run certctl with `server.auth.type=none`:
|
||||
|
||||
```bash
|
||||
# 1. Install oauth2-proxy (or any OIDC-terminating sidecar) in the same namespace
|
||||
helm install oauth2-proxy oauth2-proxy/oauth2-proxy \
|
||||
--namespace certctl \
|
||||
--set config.clientID="$OIDC_CLIENT_ID" \
|
||||
--set config.clientSecret="$OIDC_CLIENT_SECRET" \
|
||||
--set config.cookieSecret="$(openssl rand -base64 32)" \
|
||||
--set config.configFile='|
|
||||
provider = "oidc"
|
||||
oidc_issuer_url = "https://your-issuer/"
|
||||
upstreams = ["http://<release>-server.certctl.svc.cluster.local:8443"]
|
||||
pass_authorization_header = true
|
||||
set_authorization_header = true
|
||||
email_domains = ["*"]
|
||||
'
|
||||
|
||||
# 2. Install certctl with type=none (gateway terminates auth)
|
||||
helm install certctl deploy/helm/certctl/ \
|
||||
--namespace certctl \
|
||||
--set server.auth.type=none \
|
||||
--set postgresql.auth.password="$(openssl rand -base64 24)"
|
||||
|
||||
# 3. Attach an Ingress that routes through oauth2-proxy
|
||||
# (Traefik ForwardAuth, nginx auth_request, Envoy ext_authz, etc.)
|
||||
```
|
||||
|
||||
Same root pattern works with Pomerium, Authelia, Caddy `forward_auth`, Apache `mod_auth_openidc`, or any service-mesh `ext_authz`. See [`../../../docs/architecture.md`](../../../docs/architecture.md) "Authenticating-gateway pattern" for the full design rationale and [`../../../docs/upgrade-to-v2-jwt-removal.md`](../../../docs/upgrade-to-v2-jwt-removal.md) for the migration walkthrough.
|
||||
|
||||
### TLS certificate sourcing
|
||||
|
||||
By default the chart provisions a self-signed cert via the same init-container pattern as the docker-compose deploy. For production, supply an operator-managed Secret (cert-manager, internal CA, etc.) — see [`docs/tls.md`](../../../docs/tls.md) for the full provisioning matrix and [`docs/upgrade-to-tls.md`](../../../docs/upgrade-to-tls.md) for upgrade-from-HTTP procedures.
|
||||
|
||||
## Disabling embedded postgres
|
||||
|
||||
If you have an existing PostgreSQL cluster, disable the embedded one and point at it directly:
|
||||
|
||||
```bash
|
||||
helm install certctl deploy/helm/certctl/ \
|
||||
--set postgresql.enabled=false \
|
||||
--set server.databaseUrl='postgres://certctl:<pw>@my-pg-host:5432/certctl?sslmode=require'
|
||||
```
|
||||
|
||||
The volume-trap section above does **not** apply to this configuration — your postgres operator (or cloud DB) handles password rotation, and you control `pg_authid` directly.
|
||||
|
||||
## Uninstall
|
||||
|
||||
```bash
|
||||
helm uninstall <release> -n certctl
|
||||
# Optional — also delete the postgres PVC (DESTROYS DATA):
|
||||
kubectl -n certctl delete pvc -l \
|
||||
app.kubernetes.io/name=certctl,app.kubernetes.io/component=postgres
|
||||
```
|
||||
|
||||
By default `helm uninstall` retains the StatefulSet's PVCs, so reinstalling with the same release name preserves the database. If you've changed `postgresql.auth.password` in your values between uninstall and reinstall, you'll hit the trap on the reinstall — apply the non-destructive remediation above, or also delete the PVC.
|
||||
@@ -169,3 +169,26 @@ per affected resource. No-op when configured correctly.
|
||||
{{- fail "\n\nserver.tls.certManager.enabled=true but server.tls.certManager.issuerRef.name is empty.\n\nSet:\n --set server.tls.certManager.issuerRef.name=<your-issuer-or-clusterissuer>\n\nSee docs/tls.md.\n" -}}
|
||||
{{- end -}}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Auth-type validation gate.
|
||||
|
||||
G-1 (P1): pre-G-1 the chart accepted server.auth.type=jwt and the
|
||||
certctl-server container silently routed every request through the
|
||||
api-key bearer middleware (no JWT impl ships with certctl). Post-G-1
|
||||
the chart fails at template-time with a pointer at the authenticating-
|
||||
gateway pattern. The valid set must stay in sync with
|
||||
internal/config.ValidAuthTypes() in the Go binary; if you add a value
|
||||
there you must add it here too (and update the property test in
|
||||
internal/config/config_test.go that pins both surfaces).
|
||||
|
||||
Any template that consumes .Values.server.auth.type should call
|
||||
`{{ include "certctl.validateAuthType" . }}` at the top so this guard
|
||||
runs once per affected resource. No-op when configured correctly.
|
||||
*/}}
|
||||
{{- define "certctl.validateAuthType" -}}
|
||||
{{- $valid := list "api-key" "none" -}}
|
||||
{{- if not (has .Values.server.auth.type $valid) -}}
|
||||
{{- fail (printf "\n\nserver.auth.type=%q is not supported (valid: %v).\n\nFor JWT/OIDC, run an authenticating gateway in front of certctl\n(oauth2-proxy / Envoy ext_authz / Traefik ForwardAuth / Pomerium) and\nset server.auth.type=none here so the gateway terminates federated\nidentity. See docs/architecture.md \"Authenticating-gateway pattern\"\nand docs/upgrade-to-v2-jwt-removal.md for the migration walkthrough.\n\nG-1 audit closure: pre-G-1 the chart accepted type=jwt and the binary\nsilently downgraded to api-key middleware. The chart now fails at\ntemplate time so misconfigured deployments cannot ship.\n" .Values.server.auth.type $valid) -}}
|
||||
{{- end -}}
|
||||
{{- end }}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
{{- include "certctl.validateAuthType" . }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
{{- include "certctl.tls.required" . }}
|
||||
{{- include "certctl.validateAuthType" . }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
{{- include "certctl.validateAuthType" . }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
|
||||
@@ -48,7 +48,14 @@ server:
|
||||
drop:
|
||||
- ALL
|
||||
|
||||
# Liveness and readiness probes (HTTPS-only as of v2.2)
|
||||
# Liveness and readiness probes (HTTPS-only as of v2.2).
|
||||
#
|
||||
# The two paths exposed for probes are `/health` and `/ready` —
|
||||
# registered in internal/api/router/router.go:76-85 and bypassing the
|
||||
# auth middleware via the no-auth list at cmd/server/main.go:920.
|
||||
# Both serve the same JSON shape today (`{"status":"healthy"}` /
|
||||
# `{"status":"ready"}`) but exist as separate routes so liveness and
|
||||
# readiness can diverge in the future without renaming.
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
@@ -59,9 +66,18 @@ server:
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
|
||||
# U-2 (P1, cat-u-healthcheck_protocol_mismatch — adjacent fix): pre-U-2
|
||||
# the readiness probe pointed at `/readyz`, the conventional kube-flavor
|
||||
# name. The certctl server doesn't register `/readyz` (only `/health`
|
||||
# and `/ready`) — see cmd/server/main.go:920 and
|
||||
# internal/api/router/router.go:81. K8s readiness probes therefore
|
||||
# received a 404 (or, with auth enabled, a 401 from the api-key middleware
|
||||
# because `/readyz` was NOT in the no-auth bypass set), pods stayed
|
||||
# `NotReady` indefinitely, and Helm rollouts stalled. Post-U-2 the path
|
||||
# matches a registered route.
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readyz
|
||||
path: /ready
|
||||
port: https
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 5
|
||||
@@ -112,10 +128,23 @@ server:
|
||||
port: 8443
|
||||
annotations: {}
|
||||
|
||||
# Authentication configuration
|
||||
# Authentication configuration.
|
||||
# Valid types: "api-key" (production) or "none" (demo only — disables
|
||||
# authentication on the API and logs a loud Warn at server startup).
|
||||
# For JWT/OIDC, run an authenticating gateway in front of certctl
|
||||
# (oauth2-proxy / Envoy ext_authz / Traefik ForwardAuth / Pomerium)
|
||||
# and set type=none here so the gateway terminates federated identity.
|
||||
# See docs/architecture.md "Authenticating-gateway pattern".
|
||||
#
|
||||
# G-1 (P1): pre-G-1 the chart accepted server.auth.type=jwt and the
|
||||
# certctl-server container silently routed every request through the
|
||||
# api-key bearer middleware — silent auth downgrade. Post-G-1 the
|
||||
# chart's `certctl.validateAuthType` template helper rejects any value
|
||||
# outside {api-key, none} at template time. See
|
||||
# docs/upgrade-to-v2-jwt-removal.md if you previously set type=jwt.
|
||||
auth:
|
||||
type: api-key # Options: api-key, none (for demo only)
|
||||
apiKey: "" # REQUIRED in production - set via --set or values override
|
||||
type: api-key
|
||||
apiKey: "" # REQUIRED when type=api-key (set via --set or values override).
|
||||
|
||||
# Logging configuration
|
||||
logging:
|
||||
@@ -260,7 +289,30 @@ postgresql:
|
||||
auth:
|
||||
database: certctl
|
||||
username: certctl
|
||||
password: "" # REQUIRED - set via --set or values override
|
||||
# REQUIRED — set via `--set postgresql.auth.password=<value>` or values override.
|
||||
#
|
||||
# WARNING (U-1): rotating this value after first deploy does NOT change the
|
||||
# database password. The `postgres:16-alpine` image runs `initdb` only when
|
||||
# /var/lib/postgresql/data is empty, so POSTGRES_PASSWORD is written into
|
||||
# pg_authid exactly once — on the first boot of the StatefulSet's PVC.
|
||||
# Subsequent rollouts pick up the new env value in the postgres container
|
||||
# but the certctl-server container's CERTCTL_DATABASE_URL also picks up
|
||||
# the new value, while pg_authid still expects the old one — leading to
|
||||
# `pq: password authentication failed for user "certctl"` (SQLSTATE 28P01).
|
||||
#
|
||||
# The certctl-server emits guidance via internal/repository/postgres/db.go::
|
||||
# wrapPingError when it sees SQLSTATE 28P01 at startup. To resolve in a
|
||||
# Helm deployment:
|
||||
# - Non-destructive (preferred for environments with data):
|
||||
# kubectl exec -it <release>-postgres-0 -- \
|
||||
# psql -U certctl -c "ALTER ROLE certctl PASSWORD '<new>';"
|
||||
# then update the secret/values to match and let the certctl-server
|
||||
# pod restart against the matching credential.
|
||||
# - Destructive (DESTROYS DATA — only acceptable on dev/demo PVCs):
|
||||
# helm uninstall <release> && \
|
||||
# kubectl delete pvc -l app.kubernetes.io/name=certctl,app.kubernetes.io/component=postgres && \
|
||||
# helm install <release> ... # PVC re-creates empty, initdb seeds new password
|
||||
password: ""
|
||||
|
||||
# Storage configuration
|
||||
storage:
|
||||
|
||||
@@ -0,0 +1,216 @@
|
||||
//go:build integration
|
||||
|
||||
// Package integration_test — image-level HEALTHCHECK contract.
|
||||
//
|
||||
// U-2 (P1, cat-u-healthcheck_protocol_mismatch): pre-U-2 the published
|
||||
// server image's Dockerfile HEALTHCHECK called `curl -f http://localhost:
|
||||
// 8443/health` against an HTTPS-only listener (HTTPS-Everywhere milestone,
|
||||
// v2.2 / tag v2.0.47). Operators outside docker-compose / Helm saw the
|
||||
// container reported as `unhealthy` indefinitely. The compose stack
|
||||
// overrode this HEALTHCHECK with `--cacert + https://`; the Helm chart
|
||||
// uses explicit `httpGet` probes that ignore Docker's HEALTHCHECK; the 5
|
||||
// example compose files all override with `curl -sfk https://localhost:
|
||||
// 8443/health`. So the observable failure was scoped to bare `docker run`
|
||||
// / Docker Swarm / Nomad / ECS users — exactly the "I just pulled the
|
||||
// published image" path.
|
||||
//
|
||||
// This file's tests pin the contract at the binary-image level. The
|
||||
// matching CI grep guardrail in .github/workflows/ci.yml catches the
|
||||
// regression at the Dockerfile-source level; both layers are needed
|
||||
// because someone could replace the HEALTHCHECK line with a sibling
|
||||
// broken pattern that the grep doesn't catch (e.g., a TCP-only check
|
||||
// against the HTTPS port).
|
||||
//
|
||||
// Run alongside the rest of the integration suite:
|
||||
//
|
||||
// cd deploy/test && go test -tags integration -v -run Healthcheck
|
||||
//
|
||||
// The tests skip cleanly with t.Skip when docker is not available
|
||||
// (CI without docker-in-docker, sandbox environments, etc.) so they
|
||||
// don't block local development on machines without docker.
|
||||
package integration_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// dockerAvailable returns true when `docker version` returns 0.
|
||||
// We cache it across tests in this file so the skip message prints once.
|
||||
func dockerAvailable(t *testing.T) bool {
|
||||
t.Helper()
|
||||
cmd := exec.Command("docker", "version", "--format", "{{.Server.Version}}")
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Logf("docker not available: %v\noutput: %s", err, string(out))
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// dockerCmd runs `docker <args...>` with a 60s budget, returning stdout
|
||||
// + stderr combined and the exit error if any. Used for short-lived
|
||||
// probes (inspect, build, run -d).
|
||||
func dockerCmd(t *testing.T, timeout time.Duration, args ...string) (string, error) {
|
||||
t.Helper()
|
||||
cmd := exec.Command("docker", args...)
|
||||
done := make(chan struct{})
|
||||
var out []byte
|
||||
var err error
|
||||
go func() {
|
||||
out, err = cmd.CombinedOutput()
|
||||
close(done)
|
||||
}()
|
||||
select {
|
||||
case <-done:
|
||||
return string(out), err
|
||||
case <-time.After(timeout):
|
||||
_ = cmd.Process.Kill()
|
||||
t.Fatalf("docker %v timed out after %v", args, timeout)
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
// TestPublishedServerImage_HealthcheckSpecUsesHTTPS performs the Dockerfile-
|
||||
// source-level shipped-shape pin: the inspected image's Healthcheck.Test
|
||||
// array MUST contain "https://localhost:8443/health" (and MUST NOT
|
||||
// contain "http://localhost:8443/health"). This is the lightweight half
|
||||
// of the contract — it doesn't require running the container, only
|
||||
// building it. It catches the audit-flagged bug directly.
|
||||
func TestPublishedServerImage_HealthcheckSpecUsesHTTPS(t *testing.T) {
|
||||
if !dockerAvailable(t) {
|
||||
t.Skip("docker not available — skipping image-level HEALTHCHECK test")
|
||||
}
|
||||
|
||||
const imgTag = "certctl-u2-healthcheck-spec-test"
|
||||
t.Cleanup(func() {
|
||||
_, _ = dockerCmd(t, 30*time.Second, "rmi", "-f", imgTag)
|
||||
})
|
||||
|
||||
// Build the server image. Use the repo root as context (this test
|
||||
// file lives at deploy/test/, the Dockerfile at the repo root).
|
||||
buildOut, err := dockerCmd(t, 5*time.Minute,
|
||||
"build", "-f", "../../Dockerfile", "-t", imgTag, "../..")
|
||||
if err != nil {
|
||||
t.Fatalf("docker build failed: %v\noutput:\n%s", err, buildOut)
|
||||
}
|
||||
|
||||
// Inspect the shipped HEALTHCHECK metadata.
|
||||
inspectOut, err := dockerCmd(t, 30*time.Second,
|
||||
"inspect", "--format", "{{json .Config.Healthcheck}}", imgTag)
|
||||
if err != nil {
|
||||
t.Fatalf("docker inspect failed: %v\noutput:\n%s", err, inspectOut)
|
||||
}
|
||||
|
||||
var hc struct {
|
||||
Test []string
|
||||
Interval int64
|
||||
Timeout int64
|
||||
}
|
||||
if err := json.Unmarshal([]byte(strings.TrimSpace(inspectOut)), &hc); err != nil {
|
||||
t.Fatalf("could not parse Healthcheck JSON %q: %v", inspectOut, err)
|
||||
}
|
||||
|
||||
joined := strings.Join(hc.Test, " ")
|
||||
|
||||
// Positive contract.
|
||||
if !strings.Contains(joined, "https://localhost:8443/health") {
|
||||
t.Errorf("Healthcheck.Test does not target https://localhost:8443/health\nfull: %v", hc.Test)
|
||||
}
|
||||
|
||||
// Negative contract — pre-U-2 regression shape MUST be absent.
|
||||
if strings.Contains(joined, "http://localhost:8443/health") {
|
||||
t.Errorf("Healthcheck.Test still contains the pre-U-2 plaintext shape: %v", hc.Test)
|
||||
}
|
||||
|
||||
// `-k` (or `--insecure`) must be present because the bootstrap cert
|
||||
// is per-deploy and the published image can't pin a CA bundle —
|
||||
// see the U-2 closure docblock on Dockerfile and the audit doc.
|
||||
if !strings.Contains(joined, "-k") && !strings.Contains(joined, "--insecure") {
|
||||
t.Errorf("Healthcheck.Test omits -k / --insecure flag (required for self-signed bootstrap probe): %v", hc.Test)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPublishedAgentImage_HealthcheckSpecExists pins the U-2 adjacent
|
||||
// fix that added a HEALTHCHECK to the agent image. Pre-U-2 the agent
|
||||
// image had no HEALTHCHECK declaration, so bare-`docker run` agents got
|
||||
// `none` health status from Docker. Post-U-2 the agent uses pgrep to
|
||||
// verify the process is alive (mirroring the docker-compose pattern at
|
||||
// deploy/docker-compose.yml:173, which also became reliable post-U-2
|
||||
// because procps is now installed in the runtime image).
|
||||
func TestPublishedAgentImage_HealthcheckSpecExists(t *testing.T) {
|
||||
if !dockerAvailable(t) {
|
||||
t.Skip("docker not available — skipping image-level HEALTHCHECK test")
|
||||
}
|
||||
|
||||
const imgTag = "certctl-u2-agent-healthcheck-spec-test"
|
||||
t.Cleanup(func() {
|
||||
_, _ = dockerCmd(t, 30*time.Second, "rmi", "-f", imgTag)
|
||||
})
|
||||
|
||||
buildOut, err := dockerCmd(t, 5*time.Minute,
|
||||
"build", "-f", "../../Dockerfile.agent", "-t", imgTag, "../..")
|
||||
if err != nil {
|
||||
t.Fatalf("docker build failed: %v\noutput:\n%s", err, buildOut)
|
||||
}
|
||||
|
||||
inspectOut, err := dockerCmd(t, 30*time.Second,
|
||||
"inspect", "--format", "{{json .Config.Healthcheck}}", imgTag)
|
||||
if err != nil {
|
||||
t.Fatalf("docker inspect failed: %v\noutput:\n%s", err, inspectOut)
|
||||
}
|
||||
|
||||
trimmed := strings.TrimSpace(inspectOut)
|
||||
if trimmed == "null" || trimmed == "" {
|
||||
t.Fatalf("agent image has no HEALTHCHECK (got %q) — U-2 adjacent fix regressed", inspectOut)
|
||||
}
|
||||
|
||||
var hc struct {
|
||||
Test []string
|
||||
}
|
||||
if err := json.Unmarshal([]byte(trimmed), &hc); err != nil {
|
||||
t.Fatalf("could not parse Healthcheck JSON %q: %v", inspectOut, err)
|
||||
}
|
||||
|
||||
joined := strings.Join(hc.Test, " ")
|
||||
if !strings.Contains(joined, "pgrep") {
|
||||
t.Errorf("agent Healthcheck.Test does not use pgrep (lost the process-presence shape): %v", hc.Test)
|
||||
}
|
||||
if !strings.Contains(joined, "certctl-agent") {
|
||||
t.Errorf("agent Healthcheck.Test does not target the certctl-agent process name: %v", hc.Test)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPublishedServerImage_HealthcheckTransitionsToHealthy is the
|
||||
// runtime-level contract: the built image, when started, must transition
|
||||
// to `healthy` within the start-period + 30s observability budget. This
|
||||
// is the heavy test — it requires the server to actually start, which
|
||||
// in turn requires either a reachable database OR a startup that fails
|
||||
// gracefully enough to keep the HEALTHCHECK probe target alive.
|
||||
//
|
||||
// The container is started with CERTCTL_DATABASE_URL pointing at an
|
||||
// unreachable host so the server fails its postgres bring-up — but
|
||||
// importantly, fails AFTER the TLS listener has come up, because the
|
||||
// HEALTHCHECK probe target is the TLS listener. We don't actually need
|
||||
// the database to validate the HEALTHCHECK shape.
|
||||
//
|
||||
// IMPORTANT: this test is the runtime contract. If you're working on the
|
||||
// server's startup ordering and the listener now comes up AFTER the
|
||||
// database, this test must adapt — start a sidecar postgres via
|
||||
// testcontainers-go (see internal/integration/lifecycle_test.go for the
|
||||
// pattern) and connect the certctl-server container to it.
|
||||
func TestPublishedServerImage_HealthcheckTransitionsToHealthy(t *testing.T) {
|
||||
if !dockerAvailable(t) {
|
||||
t.Skip("docker not available — skipping runtime HEALTHCHECK test")
|
||||
}
|
||||
if testing.Short() {
|
||||
t.Skip("runtime HEALTHCHECK test takes ~45s; skipping under -short")
|
||||
}
|
||||
t.Skip("runtime probe contract not yet wired to a sidecar postgres; " +
|
||||
"image-spec contract above (TestPublishedServerImage_HealthcheckSpecUsesHTTPS) " +
|
||||
"covers the audit-flagged regression. Re-enable once the integration " +
|
||||
"harness provisions postgres for image-level smoke.")
|
||||
}
|
||||
+13
-2
@@ -348,7 +348,12 @@ erDiagram
|
||||
}
|
||||
```
|
||||
|
||||
Migrations are idempotent (`IF NOT EXISTS` on all CREATE statements, `ON CONFLICT (id) DO NOTHING` on all seed data) so they're safe to run multiple times — important for Docker Compose where both initdb and the server may run the same SQL.
|
||||
The ER diagram above documents **database shape**, not REST-API wire shape. Several columns are intentionally server-internal and never serialized to clients:
|
||||
|
||||
- `agents.api_key_hash` — SHA-256 of the agent's plaintext API key, populated by `service.RegisterAgent` (`hashAPIKey(apiKey)` at `internal/service/agent.go`) and consumed by `repository.AgentRepository::GetByAPIKey` for the auth-lookup. **Not** exposed via the REST API, **not** echoed via CLI / MCP / agent registration response, **never** logged. Enforced by `internal/domain/connector.go::Agent.MarshalJSON` (G-2 audit closure, `cat-s5-apikey_leak`); the OpenAPI Agent schema explicitly excludes the field, the frontend `Agent` interface omits it, and a CI grep guardrail at `.github/workflows/ci.yml` blocks reintroduction.
|
||||
- `issuers.config` / `deployment_targets.config` — plaintext jsonb shadow of the AES-GCM-encrypted on-disk blob; the encrypted form lives on `EncryptedConfig []byte` (Go-only field tagged `json:"-"`).
|
||||
|
||||
Migrations are idempotent (`IF NOT EXISTS` on all CREATE statements, `ON CONFLICT (id) DO NOTHING` on all seed data) so they're safe to run multiple times. Pre-U-3 (`cat-u-seed_initdb_schema_drift`, GitHub #10) the deploy compose stack mounted both a hand-curated subset of `migrations/*.up.sql` and `seed.sql` into postgres `/docker-entrypoint-initdb.d/` so initdb applied them on first boot, *and* the server re-applied the same files via `RunMigrations` on every start. The dual source of truth was the bug: every time a migration shipped that the seed depended on (e.g., 000013 added `policy_rules.severity`), the mount list had to be updated by hand, and missing the update crashed initdb on first boot. Post-U-3 the server is the single source of truth: postgres comes up with an empty schema, `RunMigrations` applies the entire ladder, then `RunSeed` lands the baseline seed (and `RunDemoSeed` lands the demo overlay when `CERTCTL_DEMO_SEED=true`). Helm has used this pattern since day one (postgres-init `emptyDir`); the docker-compose deploy now matches.
|
||||
|
||||
## Data Flow: Certificate Lifecycle
|
||||
|
||||
@@ -891,9 +896,15 @@ The HTTP middleware stack processes requests in the following order (see `cmd/se
|
||||
4. **BodyLimit** - request body size cap via `http.MaxBytesReader`
|
||||
5. **RateLimiter** - token bucket rate limiting (optional, when enabled)
|
||||
6. **CORS** - cross-origin request handling (deny-by-default)
|
||||
7. **Auth** - API key or JWT validation
|
||||
7. **Auth** - API key validation (or none in development; JWT/OIDC via authenticating gateway, see below — not in-process)
|
||||
8. **AuditLog** - records every API call to the audit trail (requires auth context for actor)
|
||||
|
||||
### Authenticating-gateway pattern (JWT, OIDC, mTLS)
|
||||
|
||||
certctl's in-process authentication surface is intentionally narrow: `api-key` for production deployments and `none` for development. There is no in-process JWT, OIDC, mTLS, or SAML middleware. (`CERTCTL_AUTH_TYPE=jwt` was accepted pre-G-1 but silently routed through the api-key bearer middleware — a security finding masquerading as a config option, removed at the v2.x boundary; see [`upgrade-to-v2-jwt-removal.md`](upgrade-to-v2-jwt-removal.md) if you previously set it.)
|
||||
|
||||
For deployments that need JWT/OIDC/mTLS, the standard pattern is to put an authenticating gateway in front of certctl and configure `CERTCTL_AUTH_TYPE=none` on the upstream certctl process. The gateway terminates the federated identity protocol, validates tokens / certificates / SAML assertions, and proxies the authenticated request to certctl as a same-origin call on a private network. This separation gives operators the full breadth of the modern identity ecosystem (oauth2-proxy, Envoy `ext_authz`, Traefik `ForwardAuth`, Pomerium, Authelia, Caddy `forward_auth`, Apache `mod_auth_openidc`, nginx `auth_request`) without certctl itself having to track signing-key rotation, claim mapping, audience validation, and the rest of the JWT/OIDC surface area. Operators wanting per-request actor attribution past the gateway boundary forward the gateway-resolved identity (e.g., `X-Auth-Request-User` from oauth2-proxy) and run a small authorization layer at the gateway that enforces the bearer-key contract certctl actually uses.
|
||||
|
||||
### Concurrency Safety
|
||||
|
||||
The background scheduler uses `sync/atomic.Bool` idempotency guards on every loop (8 always-on plus up to 4 optional) — if a tick fires while the previous iteration is still running, it skips. A `sync.WaitGroup` tracks all in-flight goroutines. `WaitForCompletion(timeout)` blocks during shutdown until all work finishes or the timeout expires, preventing state corruption from mid-flight database operations during process exit.
|
||||
|
||||
+32
-15
@@ -1141,13 +1141,30 @@ API Endpoints:
|
||||
- **`GET /api/v1/digest/preview`** — Render digest HTML for preview (no email sent)
|
||||
- **`POST /api/v1/digest/send`** — Trigger digest send immediately (outside of schedule)
|
||||
|
||||
> **Note (HTTPS-only as of v2.2):** The `curl` examples in this section
|
||||
> and below all target the HTTPS-only control plane. Extract the
|
||||
> docker-compose self-signed bootstrap CA bundle once and reuse it on
|
||||
> every call:
|
||||
>
|
||||
> ```bash
|
||||
> export CA=/tmp/certctl-ca.crt
|
||||
> docker compose -f deploy/docker-compose.yml exec -T certctl-server \
|
||||
> cat /etc/certctl/tls/ca.crt > "$CA"
|
||||
> ```
|
||||
>
|
||||
> Then pass `--cacert "$CA"` (or `-k` for one-off smoke tests, never in
|
||||
> production). The same pattern is documented in
|
||||
> [`quickstart.md`](quickstart.md). Pre-U-2 these examples used `http://`
|
||||
> and silently failed against the HTTPS listener; post-U-2 they speak
|
||||
> HTTPS with the operator-managed CA bundle.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
# Preview digest
|
||||
curl http://localhost:8443/api/v1/digest/preview | jq '.html'
|
||||
curl --cacert "$CA" https://localhost:8443/api/v1/digest/preview | jq '.html'
|
||||
|
||||
# Send digest immediately
|
||||
curl -X POST http://localhost:8443/api/v1/digest/send
|
||||
curl --cacert "$CA" -X POST https://localhost:8443/api/v1/digest/send
|
||||
```
|
||||
|
||||
Each notifier is enabled by its configuration env var:
|
||||
@@ -1294,24 +1311,24 @@ The agent scans these directories on startup and every 6 hours, looking for cert
|
||||
|
||||
```bash
|
||||
# List discovered certificates (filter by agent, status)
|
||||
curl -s "http://localhost:8443/api/v1/discovered-certificates?agent_id=agent-nginx-01&status=new" | jq .
|
||||
curl --cacert "$CA" -s "https://localhost:8443/api/v1/discovered-certificates?agent_id=agent-nginx-01&status=new" | jq .
|
||||
|
||||
# Get discovery detail
|
||||
curl -s http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID | jq .
|
||||
|
||||
# Claim a discovered cert (link to managed certificate)
|
||||
curl -s -X POST http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID/claim \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID/claim \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"managed_certificate_id": "mc-api-prod"}' | jq .
|
||||
|
||||
# Dismiss a discovery
|
||||
curl -s -X POST http://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID/dismiss | jq .
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/discovered-certificates/DISCOVERY_ID/dismiss | jq .
|
||||
|
||||
# View discovery scan history
|
||||
curl -s http://localhost:8443/api/v1/discovery-scans | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/discovery-scans | jq .
|
||||
|
||||
# Summary counts (new, claimed, dismissed)
|
||||
curl -s http://localhost:8443/api/v1/discovery-summary | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/discovery-summary | jq .
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
@@ -1340,7 +1357,7 @@ Network scan targets can be managed from the **Network Scans** dashboard page (c
|
||||
|
||||
```bash
|
||||
# Create a scan target for your internal network (or use the dashboard's "+ New Target" button)
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/network-scan-targets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "Production Web Servers",
|
||||
@@ -1365,26 +1382,26 @@ curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
|
||||
|
||||
```bash
|
||||
# List all scan targets
|
||||
curl -s http://localhost:8443/api/v1/network-scan-targets | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/network-scan-targets | jq .
|
||||
|
||||
# Create a scan target
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets \
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/network-scan-targets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name": "DMZ", "cidrs": ["172.16.0.0/24"], "ports": [443]}' | jq .
|
||||
|
||||
# Get a specific target (includes last_scan_at, last_scan_certs_found)
|
||||
curl -s http://localhost:8443/api/v1/network-scan-targets/nst-dmz | jq .
|
||||
curl --cacert "$CA" -s https://localhost:8443/api/v1/network-scan-targets/nst-dmz | jq .
|
||||
|
||||
# Trigger an immediate scan (doesn't wait for scheduler)
|
||||
curl -s -X POST http://localhost:8443/api/v1/network-scan-targets/nst-dmz/scan | jq .
|
||||
curl --cacert "$CA" -s -X POST https://localhost:8443/api/v1/network-scan-targets/nst-dmz/scan | jq .
|
||||
|
||||
# Update scan configuration
|
||||
curl -s -X PUT http://localhost:8443/api/v1/network-scan-targets/nst-dmz \
|
||||
curl --cacert "$CA" -s -X PUT https://localhost:8443/api/v1/network-scan-targets/nst-dmz \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"ports": [443, 8443, 9443], "timeout_ms": 3000}' | jq .
|
||||
|
||||
# Delete a scan target
|
||||
curl -s -X DELETE http://localhost:8443/api/v1/network-scan-targets/nst-dmz
|
||||
curl --cacert "$CA" -s -X DELETE https://localhost:8443/api/v1/network-scan-targets/nst-dmz
|
||||
```
|
||||
|
||||
### Scheduler Integration
|
||||
|
||||
@@ -60,6 +60,8 @@ cp deploy/.env.example deploy/.env
|
||||
docker compose -f deploy/docker-compose.yml up -d --build
|
||||
```
|
||||
|
||||
> **Warning:** Edit `POSTGRES_PASSWORD` *before* the very first `docker compose up`. Postgres seeds the password into its data directory only on first boot of an empty volume — after that, the password is baked into `pg_authid` and the env var is ignored. If you boot once with the default and later change `POSTGRES_PASSWORD` in `.env`, the certctl-server container picks up the new value but postgres still authenticates against the old one, and the server logs `pq: password authentication failed for user "certctl"` (SQLSTATE 28P01). Two ways out: tear down the volume with `docker compose -f deploy/docker-compose.yml down -v` (this **deletes all data**) and bring up fresh, or rotate non-destructively with `docker compose -f deploy/docker-compose.yml exec postgres psql -U certctl -c "ALTER ROLE certctl PASSWORD '<new>';"` and then restart certctl-server with the matching `POSTGRES_PASSWORD`.
|
||||
|
||||
### Docker Compose Environments
|
||||
|
||||
The `deploy/` directory contains four compose files for different use cases:
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
# Upgrading past G-1 — `CERTCTL_AUTH_TYPE=jwt` removal
|
||||
|
||||
If your certctl deployment currently sets `CERTCTL_AUTH_TYPE=jwt` (or `server.auth.type=jwt` in Helm), the next certctl upgrade will fail-fast at startup with a dedicated diagnostic. This guide explains why, what to switch to, and how to keep JWT/OIDC at your edge.
|
||||
|
||||
For everyone else — operators running `api-key` or `none` — this upgrade is a no-op. Skip to [`upgrade-to-tls.md`](upgrade-to-tls.md) for the v2.2 HTTPS-everywhere migration if you haven't done that one yet.
|
||||
|
||||
## Why we removed it
|
||||
|
||||
Pre-G-1, the config validator at `internal/config/config.go` accepted three values for `CERTCTL_AUTH_TYPE`: `api-key`, `jwt`, and `none`. The startup log line at `cmd/server/main.go` faithfully echoed `"authentication enabled" "type"="jwt"` when an operator picked `jwt`. Reasonable people read that and concluded JWT auth was on.
|
||||
|
||||
It wasn't. Grep `internal/ cmd/` for `NewJWT`, `JWTMiddleware`, or `jwt.Parse` — pre-G-1, there were zero matches in production code. The auth-middleware wiring at `cmd/server/main.go:653` unconditionally called `middleware.NewAuthWithNamedKeys(namedKeys)` regardless of `cfg.Auth.Type`. So `CERTCTL_AUTH_TYPE=jwt` just routed every request through the api-key bearer middleware, comparing the incoming `Authorization: Bearer <something>` against whatever string the operator put in `CERTCTL_AUTH_SECRET`. Real JWT clients got 401 (the api-key middleware saw the JWT string as a literal token and compared bytes). Operators who treated `CERTCTL_AUTH_SECRET` as a JWT signing secret (and therefore handled it less carefully than an api-key) handed an attacker an api-key. Silent auth downgrade — a security finding masquerading as a config option.
|
||||
|
||||
We chose to remove the option rather than implement JWT middleware. Implementing real JWT/OIDC requires jwks vs static-secret rotation, claim mapping (which claim is the actor / the admin flag?), expiry enforcement, audience and issuer validation, key rollover semantics, and regression coverage at the same depth as the existing api-key path. That's a feature, not a fix. The audit-recommended structural fix — and the one that actually closes the hazard — is to fail loudly instead of silently downgrading.
|
||||
|
||||
## What changes at startup
|
||||
|
||||
Post-G-1, a binary started with `CERTCTL_AUTH_TYPE=jwt` exits non-zero before opening the listener:
|
||||
|
||||
```
|
||||
Failed to load configuration: CERTCTL_AUTH_TYPE=jwt is no longer accepted
|
||||
(G-1 silent auth downgrade): no JWT middleware ships with certctl. To use
|
||||
JWT/OIDC, run an authenticating gateway (oauth2-proxy / Envoy ext_authz /
|
||||
Traefik ForwardAuth / Pomerium) in front of certctl and set
|
||||
CERTCTL_AUTH_TYPE=none on the upstream. See docs/architecture.md
|
||||
"Authenticating-gateway pattern" and docs/upgrade-to-v2-jwt-removal.md
|
||||
for the migration walkthrough
|
||||
```
|
||||
|
||||
Helm operators get the same shape at `helm install` / `helm upgrade` template time: `server.auth.type=jwt` is rejected by the chart's `certctl.validateAuthType` template helper before any Kubernetes object is rendered.
|
||||
|
||||
The CI-side regression guard at `.github/workflows/ci.yml` blocks any future PR that re-introduces `"jwt"` as an auth-type literal in production code or spec.
|
||||
|
||||
## Recovery — pick one
|
||||
|
||||
### Option A — switch to `api-key` (you weren't actually using JWT)
|
||||
|
||||
If your `CERTCTL_AUTH_SECRET` was a single high-entropy token and your clients sent it as `Authorization: Bearer <token>`, you were already using api-key auth — you just had `CERTCTL_AUTH_TYPE` set to the wrong string. Flip it:
|
||||
|
||||
```
|
||||
# .env (docker-compose)
|
||||
CERTCTL_AUTH_TYPE=api-key
|
||||
CERTCTL_AUTH_SECRET=<your-existing-token>
|
||||
```
|
||||
|
||||
```
|
||||
# Helm
|
||||
helm upgrade <release> deploy/helm/certctl/ \
|
||||
--reuse-values \
|
||||
--set server.auth.type=api-key \
|
||||
--set server.auth.apiKey=<your-existing-token>
|
||||
```
|
||||
|
||||
No client changes needed — the same Bearer token continues to work. The startup log will now read `"authentication enabled" "type"="api-key"`, which matches what was actually happening pre-G-1.
|
||||
|
||||
### Option B — front certctl with an authenticating gateway
|
||||
|
||||
If you genuinely need JWT, OIDC, mTLS, or SAML, run an authenticating gateway in front of certctl and let the gateway terminate the federated identity protocol. Configure certctl for `CERTCTL_AUTH_TYPE=none`:
|
||||
|
||||
```
|
||||
CERTCTL_AUTH_TYPE=none
|
||||
```
|
||||
|
||||
Then put an oauth2-proxy / Envoy `ext_authz` / Traefik `ForwardAuth` / Pomerium / Authelia (etc.) in the network path between operators and certctl. The gateway validates the identity and proxies the authenticated request to certctl as a same-origin call on a private network.
|
||||
|
||||
### Concrete walkthrough — oauth2-proxy + certctl on docker-compose
|
||||
|
||||
This is the simplest production-grade JWT/OIDC shape. It assumes you have an OIDC provider (Okta, Auth0, Google Workspace, Keycloak, Dex) and a registered client_id / client_secret.
|
||||
|
||||
```yaml
|
||||
# deploy/docker-compose.gateway.yml — overlay on the base compose file
|
||||
services:
|
||||
oauth2-proxy:
|
||||
image: quay.io/oauth2-proxy/oauth2-proxy:latest
|
||||
command:
|
||||
- --provider=oidc
|
||||
- --oidc-issuer-url=https://<your-issuer>/
|
||||
- --client-id=${OIDC_CLIENT_ID}
|
||||
- --client-secret=${OIDC_CLIENT_SECRET}
|
||||
- --cookie-secret=${OAUTH2_PROXY_COOKIE_SECRET} # openssl rand -base64 32
|
||||
- --upstream=http://certctl-server:8443 # internal-network only; certctl listens on 8443
|
||||
- --http-address=0.0.0.0:4180
|
||||
- --email-domain=*
|
||||
- --pass-access-token=true
|
||||
- --pass-authorization-header=true
|
||||
- --set-authorization-header=true # forwards a bearer token upstream
|
||||
- --skip-provider-button=true
|
||||
- --reverse-proxy=true
|
||||
ports:
|
||||
- "443:4180"
|
||||
depends_on:
|
||||
- certctl-server
|
||||
networks:
|
||||
- certctl-network
|
||||
|
||||
certctl-server:
|
||||
environment:
|
||||
CERTCTL_AUTH_TYPE: none # gateway terminates auth — see docs/upgrade-to-v2-jwt-removal.md
|
||||
# ... rest of the certctl env block unchanged
|
||||
```
|
||||
|
||||
Operators hit `https://<your-host>/`, get redirected through the OIDC provider, land back at oauth2-proxy with a session cookie, and oauth2-proxy proxies their request to certctl on the internal Docker network. certctl itself is HTTPS-only on `:8443` (TLS 1.3, see [`tls.md`](tls.md)) but operator browsers never see that hop directly. Bind certctl-server's `:8443` to the internal Docker network only — do NOT publish it to the host. The audit trail will record the actor as the gateway-forwarded identity if you also configure a small bearer-token-mapping shim at the gateway (most production deployments do this with a per-user api-key issued by the gateway after OIDC validation).
|
||||
|
||||
### Traefik ForwardAuth pattern (Kubernetes)
|
||||
|
||||
Same shape, kubernetes-flavored:
|
||||
|
||||
```yaml
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: Middleware
|
||||
metadata:
|
||||
name: oidc-forward-auth
|
||||
spec:
|
||||
forwardAuth:
|
||||
address: http://oauth2-proxy.auth.svc.cluster.local:4180
|
||||
trustForwardHeader: true
|
||||
authResponseHeaders:
|
||||
- X-Auth-Request-User
|
||||
- X-Auth-Request-Email
|
||||
- Authorization
|
||||
---
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: certctl
|
||||
spec:
|
||||
routes:
|
||||
- match: Host(`certctl.example.com`)
|
||||
kind: Rule
|
||||
middlewares:
|
||||
- name: oidc-forward-auth
|
||||
services:
|
||||
- name: certctl-server
|
||||
port: 8443
|
||||
```
|
||||
|
||||
The certctl Helm release runs with `server.auth.type=none`. The Traefik IngressRoute attaches `oidc-forward-auth` as a middleware so every request is OIDC-validated by oauth2-proxy before reaching certctl.
|
||||
|
||||
### Envoy `ext_authz` pattern
|
||||
|
||||
For service-mesh deployments (Istio, Consul, plain Envoy), the `ext_authz` filter calls out to an external authorization service per-request. Same outcome: certctl runs `CERTCTL_AUTH_TYPE=none` and Envoy + your authz service handle JWT/OIDC/mTLS at the mesh edge. See the [Envoy ext_authz docs](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/ext_authz_filter) for the configuration surface.
|
||||
|
||||
## Rollback
|
||||
|
||||
Pre-G-1 binaries silently accepted `CERTCTL_AUTH_TYPE=jwt` and routed through the api-key middleware. Downgrading the binary is the only mechanical rollback path, and it puts you back into the silent-downgrade state — which is exactly what the G-1 audit finding is about. We don't recommend it. If something is forcing your hand, capture the operational issue you're hitting and open a GitHub issue against the certctl repo with the SHAs involved; the Authenticating-gateway pattern was specifically designed to cover the use cases that historically led operators to set `CERTCTL_AUTH_TYPE=jwt`.
|
||||
|
||||
There is no on-disk state that changes with this upgrade — no migrations to roll back, no encrypted config to re-encode, no certificates to re-issue. The change is entirely in the config-validation surface and the helm-chart template guard.
|
||||
|
||||
## Cross-references
|
||||
|
||||
- [`architecture.md`](architecture.md) — "Authenticating-gateway pattern (JWT, OIDC, mTLS)" section.
|
||||
- [`tls.md`](tls.md) — TLS provisioning patterns. The gateway proxying to certctl-server still needs to trust certctl's TLS cert; same patterns apply.
|
||||
- [`../deploy/helm/certctl/README.md`](../deploy/helm/certctl/README.md) — Helm-chart-flavored guidance.
|
||||
- `internal/config/config.go::ValidAuthTypes` — the single source of truth for what's accepted post-G-1.
|
||||
- `internal/repository/postgres/db.go::wrapPingError` — unrelated; pattern for runtime diagnostic of operator misconfiguration.
|
||||
- `coverage-gap-audit-2026-04-24-v5/unified-audit.md` — the audit finding (`cat-g-jwt_silent_auth_downgrade`).
|
||||
@@ -0,0 +1,55 @@
|
||||
# Deployment Examples
|
||||
|
||||
Five turnkey docker-compose scenarios that show certctl deployed against real CA backends and target shapes. Each subdirectory is self-contained — pick the one closest to your stack and have it running in minutes.
|
||||
|
||||
| Example | Stack | What it shows |
|
||||
|---------|-------|---------------|
|
||||
| [`acme-nginx/`](acme-nginx/acme-nginx.md) | Let's Encrypt + NGINX (HTTP-01) | The default public-CA path: ACME-issued certs deployed to NGINX. |
|
||||
| [`acme-wildcard-dns01/`](acme-wildcard-dns01/acme-wildcard-dns01.md) | Let's Encrypt wildcard (DNS-01) | Wildcard certificates via DNS-01 with pluggable DNS hooks. |
|
||||
| [`private-ca-traefik/`](private-ca-traefik/private-ca-traefik.md) | Local CA + Traefik | Internal-only certs from a private CA, deployed to Traefik. |
|
||||
| [`step-ca-haproxy/`](step-ca-haproxy/step-ca-haproxy.md) | Smallstep step-ca + HAProxy | Self-hosted CA with HAProxy as the deployment target. |
|
||||
| [`multi-issuer/`](multi-issuer/multi-issuer.md) | Let's Encrypt + Local CA | Public + private certs side-by-side from a single dashboard. |
|
||||
|
||||
## Common operational notes
|
||||
|
||||
These notes apply to **every** example. They're called out here so the per-example walkthroughs stay focused on the issuer/target wiring instead of repeating ops boilerplate.
|
||||
|
||||
### Postgres password rotation — first-boot binding trap (U-1)
|
||||
|
||||
Every example file uses `${DB_PASSWORD:-certctl-dev-password}` as the postgres password env var, with the data directory persisted via a named volume. The `postgres:16-alpine` image runs `initdb` exactly once — when `/var/lib/postgresql/data` is empty — and that's the only time `POSTGRES_PASSWORD` is written into `pg_authid`. If you boot once with the default and then change `DB_PASSWORD` (in your shell, in a `.env` file, or in a wrapper script), the certctl-server container picks up the new value but the postgres container continues to authenticate against the old one. The server fails its startup `db.Ping()` with `pq: password authentication failed for user "certctl"` (SQLSTATE 28P01).
|
||||
|
||||
The certctl-server emits guidance pointing at the fix when this fires (see `internal/repository/postgres/db.go::wrapPingError`). The two remediation paths:
|
||||
|
||||
- **Destructive — wipes all certctl data, only acceptable on demo/test setups:**
|
||||
```bash
|
||||
docker compose -f examples/<example>/docker-compose.yml down -v
|
||||
docker compose -f examples/<example>/docker-compose.yml up -d --build
|
||||
```
|
||||
- **Non-destructive — preserves data, rotates `pg_authid` in place:**
|
||||
```bash
|
||||
docker compose -f examples/<example>/docker-compose.yml exec postgres \
|
||||
psql -U certctl -c "ALTER ROLE certctl PASSWORD '<new>';"
|
||||
# Then redeploy with DB_PASSWORD set to <new> in your shell or .env
|
||||
```
|
||||
|
||||
The cleanest practice for a fresh demo: set `DB_PASSWORD` once in your shell **before** the very first `docker compose up`, and don't change it during the demo's lifetime. If you must rotate, use the non-destructive path.
|
||||
|
||||
Same root cause and remediation pattern is documented for the canonical quickstart in [`../docs/quickstart.md`](../docs/quickstart.md), the production compose surface in [`../deploy/ENVIRONMENTS.md`](../deploy/ENVIRONMENTS.md), and the Helm chart in [`../deploy/helm/certctl/README.md`](../deploy/helm/certctl/README.md).
|
||||
|
||||
### TLS for the certctl control plane
|
||||
|
||||
Every example boots certctl with HTTPS-only on port 8443 (TLS 1.3 pinned, no plaintext listener as of v2.2). The shipped `certctl-tls-init` init container generates a self-signed ECDSA-P256 cert on first boot — fine for the example demos, **never** acceptable for a public deployment. For production, swap the init container for cert-manager, an operator-supplied Secret, or your internal CA — see [`../docs/tls.md`](../docs/tls.md) for the full pattern matrix.
|
||||
|
||||
### Tearing down
|
||||
|
||||
To stop services but **keep** the postgres volume (so you can pick up where you left off):
|
||||
```bash
|
||||
docker compose -f examples/<example>/docker-compose.yml down
|
||||
```
|
||||
|
||||
To stop services **and** wipe all data (clean slate for the next run):
|
||||
```bash
|
||||
docker compose -f examples/<example>/docker-compose.yml down -v
|
||||
```
|
||||
|
||||
Note that `down -v` is the only canonical way to recover from the postgres-password trap when the non-destructive `ALTER ROLE` route is unavailable (e.g., you've forgotten the original password).
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
This example demonstrates certctl's core use case: **automatically manage TLS certificates for NGINX using Let's Encrypt (ACME HTTP-01 challenges).**
|
||||
|
||||
> **Operational notes** shared by every example (postgres password rotation trap, TLS provisioning, teardown semantics) live in [`../README.md`](../README.md). Read it first if you plan to change `DB_PASSWORD` after the initial `docker compose up` — the postgres volume binds the password on first boot only.
|
||||
|
||||
## What This Does
|
||||
|
||||
- Deploys certctl server (control plane) with PostgreSQL
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
**What this does:** Issues wildcard certificates (e.g., `*.example.com`) from Let's Encrypt using DNS-01 challenge validation.
|
||||
|
||||
> **Operational notes** shared by every example (postgres password rotation trap, TLS provisioning, teardown semantics) live in [`../README.md`](../README.md). Read it first if you plan to change `DB_PASSWORD` after the initial `docker compose up` — the postgres volume binds the password on first boot only.
|
||||
|
||||
This example is ideal for:
|
||||
- Issuing wildcard certificates (`*.example.com`)
|
||||
- Services behind NAT, firewalls, or non-public networks
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
This example demonstrates certctl managing **both public and internal certificates from a single dashboard**. Public-facing services use Let's Encrypt (ACME), while internal services use a private Local CA — all visible and managed in one place.
|
||||
|
||||
> **Operational notes** shared by every example (postgres password rotation trap, TLS provisioning, teardown semantics) live in [`../README.md`](../README.md). Read it first if you plan to change `DB_PASSWORD` after the initial `docker compose up` — the postgres volume binds the password on first boot only.
|
||||
|
||||
## The Use Case
|
||||
|
||||
You have:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# Private CA + Traefik Example
|
||||
|
||||
> **Operational notes** shared by every example (postgres password rotation trap, TLS provisioning, teardown semantics) live in [`../README.md`](../README.md). Read it first if you plan to change `DB_PASSWORD` after the initial `docker compose up` — the postgres volume binds the password on first boot only.
|
||||
|
||||
This example demonstrates certctl managing certificates for **internal services without public CA dependency**. Ideal for enterprise environments where:
|
||||
|
||||
- All services are internal (VPN, private networks)
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
This example demonstrates certctl managing certificates issued by **Smallstep step-ca** and deploying them to **HAProxy**.
|
||||
|
||||
> **Operational notes** shared by every example (postgres password rotation trap, TLS provisioning, teardown semantics) live in [`../README.md`](../README.md). Read it first if you plan to change `DB_PASSWORD` after the initial `docker compose up` — the postgres volume binds the password on first boot only.
|
||||
|
||||
## Scenario
|
||||
|
||||
You're a Smallstep user running step-ca as your internal PKI. You have HAProxy load balancers that need certificates. This setup:
|
||||
|
||||
@@ -893,3 +893,161 @@ func TestAgentReportJobStatus_ServiceError(t *testing.T) {
|
||||
func stringPtr(s string) *string {
|
||||
return &s
|
||||
}
|
||||
|
||||
// G-2 (P1): cat-s5-apikey_leak audit closure tests. Pre-G-2,
|
||||
// Agent.APIKeyHash was tagged `json:"api_key_hash"` and shipped on
|
||||
// every wire surface that returned domain.Agent. Post-G-2 the tag is
|
||||
// "-" and Agent.MarshalJSON enforces redaction via a marshal-time copy
|
||||
// (see internal/domain/connector_test.go for the type-level pin). These
|
||||
// four tests are the wire-shape contract — they capture the actual HTTP
|
||||
// response body via httptest and assert the credential-derivative hash
|
||||
// is absent.
|
||||
//
|
||||
// One sentinel value (g2HandlerLeakSentinel) flows through every fixture
|
||||
// so a single grep over a failing test's output identifies the leak
|
||||
// surface immediately.
|
||||
const g2HandlerLeakSentinel = "sha256:LEAKED-CREDENTIAL-DERIVATIVE-HANDLER-SENTINEL"
|
||||
|
||||
func TestListAgents_DoesNotLeakAPIKeyHash(t *testing.T) {
|
||||
now := time.Now()
|
||||
mock := &MockAgentService{
|
||||
ListAgentsFn: func(page, perPage int) ([]domain.Agent, int64, error) {
|
||||
return []domain.Agent{
|
||||
{ID: "a-1", Name: "agent-one", Hostname: "host-1",
|
||||
Status: domain.AgentStatusOnline, RegisteredAt: now,
|
||||
APIKeyHash: g2HandlerLeakSentinel + "-1"},
|
||||
{ID: "a-2", Name: "agent-two", Hostname: "host-2",
|
||||
Status: domain.AgentStatusOnline, RegisteredAt: now,
|
||||
APIKeyHash: g2HandlerLeakSentinel + "-2"},
|
||||
}, 2, nil
|
||||
},
|
||||
}
|
||||
h := NewAgentHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/agents?page=1&per_page=50", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
h.ListAgents(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("ListAgents status = %d, want 200", w.Code)
|
||||
}
|
||||
body := w.Body.String()
|
||||
if bytes.Contains([]byte(body), []byte("api_key_hash")) {
|
||||
t.Errorf("ListAgents response leaked \"api_key_hash\" key (G-2 regressed):\n%s", body)
|
||||
}
|
||||
if bytes.Contains([]byte(body), []byte(g2HandlerLeakSentinel)) {
|
||||
t.Errorf("ListAgents response leaked sentinel %q:\n%s", g2HandlerLeakSentinel, body)
|
||||
}
|
||||
// Sanity: the non-leaked fields ARE present (handler did serve real data).
|
||||
for _, want := range []string{"a-1", "a-2", "agent-one", "agent-two"} {
|
||||
if !bytes.Contains([]byte(body), []byte(want)) {
|
||||
t.Errorf("ListAgents response missing expected field %q (handler may not be serving data):\n%s", want, body)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAgent_DoesNotLeakAPIKeyHash(t *testing.T) {
|
||||
now := time.Now()
|
||||
mock := &MockAgentService{
|
||||
GetAgentFn: func(id string) (*domain.Agent, error) {
|
||||
return &domain.Agent{
|
||||
ID: id, Name: "single-agent", Hostname: "single.host",
|
||||
Status: domain.AgentStatusOnline, RegisteredAt: now,
|
||||
APIKeyHash: g2HandlerLeakSentinel,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
h := NewAgentHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/agents/a-prod-001", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
h.GetAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("GetAgent status = %d, want 200, body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
body := w.Body.String()
|
||||
if bytes.Contains([]byte(body), []byte("api_key_hash")) {
|
||||
t.Errorf("GetAgent response leaked \"api_key_hash\" key:\n%s", body)
|
||||
}
|
||||
if bytes.Contains([]byte(body), []byte(g2HandlerLeakSentinel)) {
|
||||
t.Errorf("GetAgent response leaked sentinel:\n%s", body)
|
||||
}
|
||||
if !bytes.Contains([]byte(body), []byte("single-agent")) {
|
||||
t.Errorf("GetAgent response missing the agent name (handler may not be serving data):\n%s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegisterAgent_DoesNotLeakAPIKeyHash(t *testing.T) {
|
||||
// Registration is the most likely path for a freshly-hashed key to
|
||||
// leak: the service mints a new APIKeyHash inside RegisterAgent
|
||||
// (service/agent.go:405) and the handler returns the agent struct
|
||||
// verbatim. Pin that the redaction holds even on a "freshly created"
|
||||
// agent payload.
|
||||
now := time.Now()
|
||||
mock := &MockAgentService{
|
||||
RegisterAgentFn: func(in domain.Agent) (*domain.Agent, error) {
|
||||
return &domain.Agent{
|
||||
ID: "agent-new", Name: in.Name, Hostname: in.Hostname,
|
||||
Status: domain.AgentStatusOnline, RegisteredAt: now,
|
||||
APIKeyHash: g2HandlerLeakSentinel,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
h := NewAgentHandler(mock)
|
||||
body := bytes.NewBufferString(`{"name":"freshly-registered","hostname":"new.host"}`)
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/agents", body)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
h.RegisterAgent(w, req)
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("RegisterAgent status = %d, want 201, body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
respBody := w.Body.String()
|
||||
if bytes.Contains([]byte(respBody), []byte("api_key_hash")) {
|
||||
t.Errorf("RegisterAgent response leaked \"api_key_hash\" key:\n%s", respBody)
|
||||
}
|
||||
if bytes.Contains([]byte(respBody), []byte(g2HandlerLeakSentinel)) {
|
||||
t.Errorf("RegisterAgent response leaked sentinel:\n%s", respBody)
|
||||
}
|
||||
if !bytes.Contains([]byte(respBody), []byte("agent-new")) {
|
||||
t.Errorf("RegisterAgent response missing the new agent ID (handler may not be serving data):\n%s", respBody)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListRetiredAgents_DoesNotLeakAPIKeyHash(t *testing.T) {
|
||||
// I-004 surface — separate handler from ListAgents; same leak risk.
|
||||
now := time.Now()
|
||||
retiredAt := now.Add(-1 * time.Hour)
|
||||
reason := "test cascade"
|
||||
mock := &MockAgentService{
|
||||
ListRetiredAgentsFn: func(page, perPage int) ([]domain.Agent, int64, error) {
|
||||
return []domain.Agent{
|
||||
{ID: "ret-1", Name: "retired-one", Hostname: "host-r1",
|
||||
Status: domain.AgentStatusOffline, RegisteredAt: now,
|
||||
RetiredAt: &retiredAt, RetiredReason: &reason,
|
||||
APIKeyHash: g2HandlerLeakSentinel},
|
||||
}, 1, nil
|
||||
},
|
||||
}
|
||||
h := NewAgentHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/agents/retired?page=1&per_page=50", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
h.ListRetiredAgents(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("ListRetiredAgents status = %d, want 200, body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
body := w.Body.String()
|
||||
if bytes.Contains([]byte(body), []byte("api_key_hash")) {
|
||||
t.Errorf("ListRetiredAgents response leaked \"api_key_hash\" key:\n%s", body)
|
||||
}
|
||||
if bytes.Contains([]byte(body), []byte(g2HandlerLeakSentinel)) {
|
||||
t.Errorf("ListRetiredAgents response leaked sentinel:\n%s", body)
|
||||
}
|
||||
if !bytes.Contains([]byte(body), []byte("ret-1")) {
|
||||
t.Errorf("ListRetiredAgents response missing the retired agent ID:\n%s", body)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package handler
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
@@ -39,7 +40,8 @@ func (h *DigestHandler) PreviewDigest(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
html, err := h.service.PreviewDigest(r.Context())
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
slog.Error("digest preview failed", "error", err.Error())
|
||||
http.Error(w, "internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -64,9 +66,10 @@ func (h *DigestHandler) SendDigest(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
if err := h.service.SendDigest(r.Context()); err != nil {
|
||||
slog.Error("digest send failed", "error", err.Error())
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
json.NewEncoder(w).Encode(map[string]string{"error": err.Error()})
|
||||
json.NewEncoder(w).Encode(map[string]string{"error": "internal error"})
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -7,8 +7,14 @@ import (
|
||||
)
|
||||
|
||||
// HealthHandler handles health and readiness check endpoints.
|
||||
//
|
||||
// G-1 (P1): AuthType is one of "api-key" or "none" — see
|
||||
// internal/config.AuthType / config.ValidAuthTypes() for the typed
|
||||
// constants and the rationale for dropping "jwt" (no JWT middleware
|
||||
// ships with certctl; operators who need JWT/OIDC front certctl with
|
||||
// an authenticating gateway and set AuthType="none" on the upstream).
|
||||
type HealthHandler struct {
|
||||
AuthType string // "api-key", "jwt", "none"
|
||||
AuthType string // "api-key" or "none" (see config.AuthType constants)
|
||||
}
|
||||
|
||||
// NewHealthHandler creates a new HealthHandler.
|
||||
|
||||
@@ -162,30 +162,14 @@ func TestAuthInfo_ReturnsAuthType_None(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAuthInfo_ReturnsAuthType_JWT(t *testing.T) {
|
||||
handler := NewHealthHandler("jwt")
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, "/api/v1/auth/info", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest failed: %v", err)
|
||||
}
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
handler.AuthInfo(w, req)
|
||||
|
||||
var result map[string]interface{}
|
||||
if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if result["auth_type"] != "jwt" {
|
||||
t.Errorf("auth_type = %q, want jwt", result["auth_type"])
|
||||
}
|
||||
|
||||
if required, ok := result["required"].(bool); !ok || !required {
|
||||
t.Errorf("required = %v, want true", result["required"])
|
||||
}
|
||||
}
|
||||
// G-1 (P1): the prior `TestAuthInfo_ReturnsAuthType_JWT` asserted the
|
||||
// handler echoed "jwt" — using the silent-auth-downgrade value as a
|
||||
// test fixture, which baked the lie into the regression suite. The
|
||||
// test is removed because "jwt" is now rejected at config-load time
|
||||
// (see internal/config/config_test.go::TestValidate_JWTAuth_RejectedDedicated)
|
||||
// and never reaches this handler. The pre-existing
|
||||
// `TestAuthInfo_ReturnsAuthType_APIKey` above (line ~107) covers the
|
||||
// api-key happy path; nothing else needs replacing here.
|
||||
|
||||
func TestAuthCheck_ReturnsOK(t *testing.T) {
|
||||
handler := NewHealthHandler("api-key")
|
||||
|
||||
@@ -0,0 +1,243 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/api/middleware"
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// RenewalPolicyService defines the service interface for renewal policy
|
||||
// operations. G-1: all methods take ctx so the handler can propagate
|
||||
// request-scoped cancellation/deadlines through the full stack.
|
||||
type RenewalPolicyService interface {
|
||||
ListRenewalPolicies(ctx context.Context, page, perPage int) ([]domain.RenewalPolicy, int64, error)
|
||||
GetRenewalPolicy(ctx context.Context, id string) (*domain.RenewalPolicy, error)
|
||||
CreateRenewalPolicy(ctx context.Context, rp domain.RenewalPolicy) (*domain.RenewalPolicy, error)
|
||||
UpdateRenewalPolicy(ctx context.Context, id string, rp domain.RenewalPolicy) (*domain.RenewalPolicy, error)
|
||||
DeleteRenewalPolicy(ctx context.Context, id string) error
|
||||
}
|
||||
|
||||
// RenewalPolicyHandler serves /api/v1/renewal-policies CRUD endpoints.
|
||||
//
|
||||
// G-1 design note: the service-level `ErrRenewalPolicyDuplicateName` /
|
||||
// `ErrRenewalPolicyInUse` sentinels alias the repository sentinels (same var
|
||||
// identity), so `errors.Is` walks transparently across layers. Delete/Update
|
||||
// not-found detection intentionally uses a `strings.Contains(err.Error(),
|
||||
// "not found")` substring check — the repo wraps `sql.ErrNoRows` as
|
||||
// `fmt.Errorf("renewal policy not found: %s", id)` which strips the sentinel,
|
||||
// and the handler red-tests' `ErrMockNotFound = errors.New("mock not found
|
||||
// error")` follows the same substring convention.
|
||||
type RenewalPolicyHandler struct {
|
||||
svc RenewalPolicyService
|
||||
}
|
||||
|
||||
// NewRenewalPolicyHandler constructs the handler with its service dependency.
|
||||
// Returned by value to match the house pattern (PolicyHandler, IssuerHandler
|
||||
// etc.) — the registry stores handlers by value in router.HandlerRegistry.
|
||||
func NewRenewalPolicyHandler(svc RenewalPolicyService) RenewalPolicyHandler {
|
||||
return RenewalPolicyHandler{svc: svc}
|
||||
}
|
||||
|
||||
// ListRenewalPolicies lists all renewal policies (paginated).
|
||||
// GET /api/v1/renewal-policies?page=1&per_page=50
|
||||
func (h RenewalPolicyHandler) ListRenewalPolicies(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
|
||||
page := 1
|
||||
perPage := 50
|
||||
query := r.URL.Query()
|
||||
if p := query.Get("page"); p != "" {
|
||||
if parsed, err := strconv.Atoi(p); err == nil && parsed > 0 {
|
||||
page = parsed
|
||||
}
|
||||
}
|
||||
if pp := query.Get("per_page"); pp != "" {
|
||||
if parsed, err := strconv.Atoi(pp); err == nil && parsed > 0 && parsed <= 500 {
|
||||
perPage = parsed
|
||||
}
|
||||
}
|
||||
|
||||
policies, total, err := h.svc.ListRenewalPolicies(r.Context(), page, perPage)
|
||||
if err != nil {
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to list renewal policies", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
response := PagedResponse{
|
||||
Data: policies,
|
||||
Total: total,
|
||||
Page: page,
|
||||
PerPage: perPage,
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, response)
|
||||
}
|
||||
|
||||
// GetRenewalPolicy retrieves a single renewal policy by ID.
|
||||
// GET /api/v1/renewal-policies/{id}
|
||||
func (h RenewalPolicyHandler) GetRenewalPolicy(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
|
||||
id := strings.TrimPrefix(r.URL.Path, "/api/v1/renewal-policies/")
|
||||
parts := strings.Split(id, "/")
|
||||
if len(parts) == 0 || parts[0] == "" {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Renewal policy ID is required", requestID)
|
||||
return
|
||||
}
|
||||
id = parts[0]
|
||||
|
||||
policy, err := h.svc.GetRenewalPolicy(r.Context(), id)
|
||||
if err != nil {
|
||||
// Matches the PolicyHandler.GetPolicy convention: any error from the
|
||||
// service surfaces as 404. The repo wraps sql.ErrNoRows as
|
||||
// "renewal policy not found: %s" and there's no other expected failure
|
||||
// mode on Get — the caller gets a clean 404.
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Renewal policy not found", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, policy)
|
||||
}
|
||||
|
||||
// CreateRenewalPolicy inserts a new renewal policy.
|
||||
// POST /api/v1/renewal-policies
|
||||
//
|
||||
// Error mapping:
|
||||
// - invalid JSON / missing name → 400
|
||||
// - ErrRenewalPolicyDuplicateName (pg 23505 on name UNIQUE) → 409
|
||||
// - anything else → 500
|
||||
func (h RenewalPolicyHandler) CreateRenewalPolicy(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
|
||||
var rp domain.RenewalPolicy
|
||||
if err := json.NewDecoder(r.Body).Decode(&rp); err != nil {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Invalid request body", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
if err := ValidateRequired("name", rp.Name); err != nil {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, err.Error(), requestID)
|
||||
return
|
||||
}
|
||||
|
||||
created, err := h.svc.CreateRenewalPolicy(r.Context(), rp)
|
||||
if err != nil {
|
||||
if errors.Is(err, service.ErrRenewalPolicyDuplicateName) {
|
||||
ErrorWithRequestID(w, http.StatusConflict, "A renewal policy with that name already exists", requestID)
|
||||
return
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to create renewal policy", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusCreated, created)
|
||||
}
|
||||
|
||||
// UpdateRenewalPolicy replaces the fields of an existing renewal policy.
|
||||
// PUT /api/v1/renewal-policies/{id}
|
||||
//
|
||||
// Error mapping:
|
||||
// - invalid JSON / empty ID → 400
|
||||
// - ErrRenewalPolicyDuplicateName → 409
|
||||
// - error text contains "not found" → 404 (see struct doc comment re: substring check)
|
||||
// - anything else → 500
|
||||
func (h RenewalPolicyHandler) UpdateRenewalPolicy(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPut {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
|
||||
id := strings.TrimPrefix(r.URL.Path, "/api/v1/renewal-policies/")
|
||||
parts := strings.Split(id, "/")
|
||||
if len(parts) == 0 || parts[0] == "" {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Renewal policy ID is required", requestID)
|
||||
return
|
||||
}
|
||||
id = parts[0]
|
||||
|
||||
var rp domain.RenewalPolicy
|
||||
if err := json.NewDecoder(r.Body).Decode(&rp); err != nil {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Invalid request body", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
updated, err := h.svc.UpdateRenewalPolicy(r.Context(), id, rp)
|
||||
if err != nil {
|
||||
if errors.Is(err, service.ErrRenewalPolicyDuplicateName) {
|
||||
ErrorWithRequestID(w, http.StatusConflict, "A renewal policy with that name already exists", requestID)
|
||||
return
|
||||
}
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Renewal policy not found", requestID)
|
||||
return
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to update renewal policy", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
JSON(w, http.StatusOK, updated)
|
||||
}
|
||||
|
||||
// DeleteRenewalPolicy removes a renewal policy.
|
||||
// DELETE /api/v1/renewal-policies/{id}
|
||||
//
|
||||
// Error mapping:
|
||||
// - empty ID (trailing slash) → 400
|
||||
// - ErrRenewalPolicyInUse (pg 23503 FK-RESTRICT against managed_certificates.renewal_policy_id) → 409
|
||||
// - error text contains "not found" → 404
|
||||
// - anything else → 500
|
||||
func (h RenewalPolicyHandler) DeleteRenewalPolicy(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodDelete {
|
||||
Error(w, http.StatusMethodNotAllowed, "Method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
requestID := middleware.GetRequestID(r.Context())
|
||||
|
||||
id := strings.TrimPrefix(r.URL.Path, "/api/v1/renewal-policies/")
|
||||
parts := strings.Split(id, "/")
|
||||
if len(parts) == 0 || parts[0] == "" {
|
||||
ErrorWithRequestID(w, http.StatusBadRequest, "Renewal policy ID is required", requestID)
|
||||
return
|
||||
}
|
||||
id = parts[0]
|
||||
|
||||
if err := h.svc.DeleteRenewalPolicy(r.Context(), id); err != nil {
|
||||
if errors.Is(err, service.ErrRenewalPolicyInUse) {
|
||||
ErrorWithRequestID(w, http.StatusConflict, "Renewal policy is still referenced by managed certificates", requestID)
|
||||
return
|
||||
}
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
ErrorWithRequestID(w, http.StatusNotFound, "Renewal policy not found", requestID)
|
||||
return
|
||||
}
|
||||
ErrorWithRequestID(w, http.StatusInternalServerError, "Failed to delete renewal policy", requestID)
|
||||
return
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
@@ -0,0 +1,434 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/service"
|
||||
)
|
||||
|
||||
// G-1 red tests: lock in the HTTP surface of /api/v1/renewal-policies before
|
||||
// the production code exists. Every subtest here references a symbol that
|
||||
// Phase 2b must introduce:
|
||||
//
|
||||
// - NewRenewalPolicyHandler(svc) (constructor)
|
||||
// - RenewalPolicyService (service-layer interface, in this package)
|
||||
// - handler.ListRenewalPolicies / GetRenewalPolicy / CreateRenewalPolicy /
|
||||
// UpdateRenewalPolicy / DeleteRenewalPolicy
|
||||
// - service.ErrRenewalPolicyDuplicateName (pg 23505 → 409 mapping)
|
||||
// - service.ErrRenewalPolicyInUse (pg 23503 → 409 mapping)
|
||||
|
||||
// MockRenewalPolicyService is a mock implementation of RenewalPolicyService.
|
||||
type MockRenewalPolicyService struct {
|
||||
ListRenewalPoliciesFn func(page, perPage int) ([]domain.RenewalPolicy, int64, error)
|
||||
GetRenewalPolicyFn func(id string) (*domain.RenewalPolicy, error)
|
||||
CreateRenewalPolicyFn func(rp domain.RenewalPolicy) (*domain.RenewalPolicy, error)
|
||||
UpdateRenewalPolicyFn func(id string, rp domain.RenewalPolicy) (*domain.RenewalPolicy, error)
|
||||
DeleteRenewalPolicyFn func(id string) error
|
||||
}
|
||||
|
||||
func (m *MockRenewalPolicyService) ListRenewalPolicies(_ context.Context, page, perPage int) ([]domain.RenewalPolicy, int64, error) {
|
||||
if m.ListRenewalPoliciesFn != nil {
|
||||
return m.ListRenewalPoliciesFn(page, perPage)
|
||||
}
|
||||
return nil, 0, nil
|
||||
}
|
||||
|
||||
func (m *MockRenewalPolicyService) GetRenewalPolicy(_ context.Context, id string) (*domain.RenewalPolicy, error) {
|
||||
if m.GetRenewalPolicyFn != nil {
|
||||
return m.GetRenewalPolicyFn(id)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockRenewalPolicyService) CreateRenewalPolicy(_ context.Context, rp domain.RenewalPolicy) (*domain.RenewalPolicy, error) {
|
||||
if m.CreateRenewalPolicyFn != nil {
|
||||
return m.CreateRenewalPolicyFn(rp)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockRenewalPolicyService) UpdateRenewalPolicy(_ context.Context, id string, rp domain.RenewalPolicy) (*domain.RenewalPolicy, error) {
|
||||
if m.UpdateRenewalPolicyFn != nil {
|
||||
return m.UpdateRenewalPolicyFn(id, rp)
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockRenewalPolicyService) DeleteRenewalPolicy(_ context.Context, id string) error {
|
||||
if m.DeleteRenewalPolicyFn != nil {
|
||||
return m.DeleteRenewalPolicyFn(id)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ----- List -----
|
||||
|
||||
func TestListRenewalPolicies_Success(t *testing.T) {
|
||||
now := time.Now()
|
||||
rp1 := domain.RenewalPolicy{
|
||||
ID: "rp-default", Name: "Default", RenewalWindowDays: 30,
|
||||
MaxRetries: 3, RetryInterval: 3600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
}
|
||||
rp2 := domain.RenewalPolicy{
|
||||
ID: "rp-urgent", Name: "Urgent", RenewalWindowDays: 7,
|
||||
MaxRetries: 5, RetryInterval: 600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
}
|
||||
|
||||
mock := &MockRenewalPolicyService{
|
||||
ListRenewalPoliciesFn: func(page, perPage int) ([]domain.RenewalPolicy, int64, error) {
|
||||
return []domain.RenewalPolicy{rp1, rp2}, 2, nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/renewal-policies", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.ListRenewalPolicies(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
var resp PagedResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
if resp.Total != 2 {
|
||||
t.Errorf("expected total 2, got %d", resp.Total)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListRenewalPolicies_ServiceError(t *testing.T) {
|
||||
mock := &MockRenewalPolicyService{
|
||||
ListRenewalPoliciesFn: func(page, perPage int) ([]domain.RenewalPolicy, int64, error) {
|
||||
return nil, 0, ErrMockServiceFailed
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/renewal-policies", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.ListRenewalPolicies(w, req)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Fatalf("expected status 500, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListRenewalPolicies_MethodNotAllowed(t *testing.T) {
|
||||
handler := NewRenewalPolicyHandler(&MockRenewalPolicyService{})
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/renewal-policies", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.ListRenewalPolicies(w, req)
|
||||
|
||||
if w.Code != http.StatusMethodNotAllowed {
|
||||
t.Fatalf("expected status 405, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// ----- Get -----
|
||||
|
||||
func TestGetRenewalPolicy_Success(t *testing.T) {
|
||||
now := time.Now()
|
||||
mock := &MockRenewalPolicyService{
|
||||
GetRenewalPolicyFn: func(id string) (*domain.RenewalPolicy, error) {
|
||||
return &domain.RenewalPolicy{
|
||||
ID: id, Name: "Default", RenewalWindowDays: 30,
|
||||
MaxRetries: 3, RetryInterval: 3600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/renewal-policies/rp-default", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.GetRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status 200, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRenewalPolicy_NotFound(t *testing.T) {
|
||||
mock := &MockRenewalPolicyService{
|
||||
GetRenewalPolicyFn: func(id string) (*domain.RenewalPolicy, error) {
|
||||
return nil, ErrMockNotFound
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/renewal-policies/nonexistent", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.GetRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Fatalf("expected status 404, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// ----- Create -----
|
||||
|
||||
func TestCreateRenewalPolicy_Success(t *testing.T) {
|
||||
now := time.Now()
|
||||
mock := &MockRenewalPolicyService{
|
||||
CreateRenewalPolicyFn: func(rp domain.RenewalPolicy) (*domain.RenewalPolicy, error) {
|
||||
rp.ID = "rp-new"
|
||||
rp.CreatedAt = now
|
||||
rp.UpdatedAt = now
|
||||
return &rp, nil
|
||||
},
|
||||
}
|
||||
|
||||
body := map[string]interface{}{
|
||||
"name": "New Policy",
|
||||
"renewal_window_days": 30,
|
||||
"max_retries": 3,
|
||||
"retry_interval_seconds": 3600,
|
||||
"auto_renew": true,
|
||||
}
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/renewal-policies", bytes.NewReader(bodyBytes))
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.CreateRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("expected status 201, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateRenewalPolicy_MissingName(t *testing.T) {
|
||||
body := map[string]interface{}{
|
||||
"renewal_window_days": 30,
|
||||
"max_retries": 3,
|
||||
"retry_interval_seconds": 3600,
|
||||
}
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
handler := NewRenewalPolicyHandler(&MockRenewalPolicyService{})
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/renewal-policies", bytes.NewReader(bodyBytes))
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.CreateRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status 400, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateRenewalPolicy_InvalidJSON(t *testing.T) {
|
||||
handler := NewRenewalPolicyHandler(&MockRenewalPolicyService{})
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/renewal-policies", bytes.NewReader([]byte("not json")))
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.CreateRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status 400, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateRenewalPolicy_DuplicateName(t *testing.T) {
|
||||
// Service bubbles up ErrRenewalPolicyDuplicateName (pg 23505) → handler maps to 409.
|
||||
mock := &MockRenewalPolicyService{
|
||||
CreateRenewalPolicyFn: func(rp domain.RenewalPolicy) (*domain.RenewalPolicy, error) {
|
||||
return nil, service.ErrRenewalPolicyDuplicateName
|
||||
},
|
||||
}
|
||||
|
||||
body := map[string]interface{}{
|
||||
"name": "Duplicate",
|
||||
"renewal_window_days": 30,
|
||||
"max_retries": 3,
|
||||
"retry_interval_seconds": 3600,
|
||||
}
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/renewal-policies", bytes.NewReader(bodyBytes))
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.CreateRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusConflict {
|
||||
t.Fatalf("expected status 409 on duplicate name, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateRenewalPolicy_MethodNotAllowed(t *testing.T) {
|
||||
handler := NewRenewalPolicyHandler(&MockRenewalPolicyService{})
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/renewal-policies", nil)
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.CreateRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusMethodNotAllowed {
|
||||
t.Fatalf("expected status 405, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// ----- Update -----
|
||||
|
||||
func TestUpdateRenewalPolicy_Success(t *testing.T) {
|
||||
now := time.Now()
|
||||
mock := &MockRenewalPolicyService{
|
||||
UpdateRenewalPolicyFn: func(id string, rp domain.RenewalPolicy) (*domain.RenewalPolicy, error) {
|
||||
return &domain.RenewalPolicy{
|
||||
ID: id, Name: rp.Name, RenewalWindowDays: rp.RenewalWindowDays,
|
||||
MaxRetries: rp.MaxRetries, RetryInterval: rp.RetryInterval,
|
||||
AutoRenew: rp.AutoRenew,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
body := map[string]interface{}{
|
||||
"name": "Updated Policy",
|
||||
"renewal_window_days": 45,
|
||||
"max_retries": 5,
|
||||
"retry_interval_seconds": 1800,
|
||||
"auto_renew": true,
|
||||
}
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodPut, "/api/v1/renewal-policies/rp-default", bytes.NewReader(bodyBytes))
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.UpdateRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status 200, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateRenewalPolicy_NotFound(t *testing.T) {
|
||||
mock := &MockRenewalPolicyService{
|
||||
UpdateRenewalPolicyFn: func(id string, rp domain.RenewalPolicy) (*domain.RenewalPolicy, error) {
|
||||
return nil, ErrMockNotFound
|
||||
},
|
||||
}
|
||||
|
||||
body := map[string]interface{}{
|
||||
"name": "Updated",
|
||||
"renewal_window_days": 30,
|
||||
"max_retries": 3,
|
||||
"retry_interval_seconds": 3600,
|
||||
}
|
||||
bodyBytes, _ := json.Marshal(body)
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodPut, "/api/v1/renewal-policies/rp-missing", bytes.NewReader(bodyBytes))
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.UpdateRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Fatalf("expected status 404, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// ----- Delete -----
|
||||
|
||||
func TestDeleteRenewalPolicy_Success(t *testing.T) {
|
||||
var deletedID string
|
||||
mock := &MockRenewalPolicyService{
|
||||
DeleteRenewalPolicyFn: func(id string) error {
|
||||
deletedID = id
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/renewal-policies/rp-default", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.DeleteRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusNoContent {
|
||||
t.Fatalf("expected status 204, got %d", w.Code)
|
||||
}
|
||||
if deletedID != "rp-default" {
|
||||
t.Errorf("expected deleted ID 'rp-default', got '%s'", deletedID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteRenewalPolicy_NotFound(t *testing.T) {
|
||||
mock := &MockRenewalPolicyService{
|
||||
DeleteRenewalPolicyFn: func(id string) error {
|
||||
return ErrMockNotFound
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/renewal-policies/rp-missing", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.DeleteRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Fatalf("expected status 404, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteRenewalPolicy_InUseConflict(t *testing.T) {
|
||||
// Service bubbles up ErrRenewalPolicyInUse (pg 23503 FK-RESTRICT) → handler maps to 409.
|
||||
mock := &MockRenewalPolicyService{
|
||||
DeleteRenewalPolicyFn: func(id string) error {
|
||||
return service.ErrRenewalPolicyInUse
|
||||
},
|
||||
}
|
||||
|
||||
handler := NewRenewalPolicyHandler(mock)
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/renewal-policies/rp-active", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.DeleteRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusConflict {
|
||||
t.Fatalf("expected status 409 on in-use conflict, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteRenewalPolicy_EmptyID(t *testing.T) {
|
||||
handler := NewRenewalPolicyHandler(&MockRenewalPolicyService{})
|
||||
req := httptest.NewRequest(http.MethodDelete, "/api/v1/renewal-policies/", nil)
|
||||
req = req.WithContext(contextWithRequestID())
|
||||
w := httptest.NewRecorder()
|
||||
|
||||
handler.DeleteRenewalPolicy(w, req)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("expected status 400, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,158 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
)
|
||||
|
||||
// VersionHandler exposes the running server's build identity at
|
||||
// /api/v1/version. U-3 ride-along (cat-u-no_version_endpoint, P2): pre-U-3
|
||||
// there was no in-band way for an operator (or an automated rollout system)
|
||||
// to ask "what version of certctl is this binary?" — they had to either read
|
||||
// the container image tag externally or trust whatever the README said. The
|
||||
// gap matters for the same operability story U-3 closes: when fresh-clone
|
||||
// quickstarts fail, the very first question is "what code did I actually
|
||||
// build", and the only honest answer needs to come from the binary itself.
|
||||
//
|
||||
// VersionInfo is populated from three sources, in priority order:
|
||||
//
|
||||
// 1. The Version field — typically supplied at build time via
|
||||
// `-ldflags='-X github.com/shankar0123/certctl/internal/api/handler.Version=v2.0.50'`.
|
||||
// Production releases set this from the git tag (see release.yml).
|
||||
//
|
||||
// 2. runtime/debug.ReadBuildInfo() — populated by Go 1.18+ for any binary
|
||||
// built from a module. Provides the VCS commit SHA, dirty flag, and
|
||||
// build timestamp. We read these fields directly so a `go build` from a
|
||||
// working tree (no -ldflags incantation) still produces a useful
|
||||
// /api/v1/version payload — the failure mode pre-U-3 was that everything
|
||||
// looked like "dev" everywhere, which made "is the bug fixed in this
|
||||
// binary" unanswerable.
|
||||
//
|
||||
// 3. Static fallbacks ("dev" / "unknown") — only reached when neither
|
||||
// ldflags nor build-info are populated, which in practice means
|
||||
// `go run` from a non-VCS-tracked workspace.
|
||||
//
|
||||
// The handler runs through the no-auth bypass dispatch in cmd/server/main.go
|
||||
// so probes and rollout systems can query it without presenting Bearer
|
||||
// credentials, mirroring how /health and /ready are reachable. Audit logging
|
||||
// excludes /api/v1/version for the same reason — the path is hot under
|
||||
// rollout polling and would otherwise dominate the audit trail.
|
||||
type VersionHandler struct{}
|
||||
|
||||
// Version is overridden at build time via:
|
||||
//
|
||||
// -ldflags='-X github.com/shankar0123/certctl/internal/api/handler.Version=<tag>'
|
||||
//
|
||||
// release.yml does this for the server container and CLI/agent binaries.
|
||||
// The empty default (rather than "dev") lets the Handler fall back to the
|
||||
// runtime/debug VCS revision when ldflags wasn't supplied — preferable to
|
||||
// returning a literal "dev" that masks the actual git SHA the binary was
|
||||
// built from.
|
||||
var Version = ""
|
||||
|
||||
// NewVersionHandler returns a value (not a pointer) to match the
|
||||
// HealthHandler convention — the handler holds no mutable state and is
|
||||
// safe to copy.
|
||||
func NewVersionHandler() VersionHandler {
|
||||
return VersionHandler{}
|
||||
}
|
||||
|
||||
// VersionInfo is the JSON shape returned by GET /api/v1/version.
|
||||
//
|
||||
// Field ordering and tag names are part of the contract — operator tooling
|
||||
// (k8s rollout checks, CI smoke tests, /api/v1/version Prometheus blackbox
|
||||
// probes) parses this payload and must continue to work across releases.
|
||||
// Don't rename a field without an OpenAPI bump and a deprecation cycle.
|
||||
type VersionInfo struct {
|
||||
// Version is the human-readable release identifier (e.g. "v2.0.50").
|
||||
// Falls back to the VCS revision when ldflags wasn't set, and to "dev"
|
||||
// when the build wasn't VCS-tracked at all.
|
||||
Version string `json:"version"`
|
||||
|
||||
// Commit is the git SHA of HEAD at build time, sourced from
|
||||
// runtime/debug.BuildInfo.Settings["vcs.revision"]. Empty string when
|
||||
// the binary was built outside a VCS-tracked workspace (rare —
|
||||
// `go build` from a tarball does this).
|
||||
Commit string `json:"commit"`
|
||||
|
||||
// Modified reports whether the build had uncommitted changes
|
||||
// (debug.BuildInfo.Settings["vcs.modified"]). True for developer
|
||||
// builds, false for release builds out of CI.
|
||||
Modified bool `json:"modified"`
|
||||
|
||||
// BuildTime is the RFC 3339 timestamp captured at build time
|
||||
// (debug.BuildInfo.Settings["vcs.time"]). Empty when not VCS-tracked.
|
||||
BuildTime string `json:"build_time"`
|
||||
|
||||
// GoVersion is the Go toolchain version that compiled the binary
|
||||
// (runtime.Version, e.g. "go1.25.9"). Useful when triaging stdlib
|
||||
// behavior differences ("the deploy that broke was on 1.24, this one
|
||||
// is on 1.25").
|
||||
GoVersion string `json:"go_version"`
|
||||
}
|
||||
|
||||
// readBuildInfo extracts the VCS settings from debug.BuildInfo and pairs
|
||||
// them with the ldflags-supplied Version. Split out from ServeHTTP so the
|
||||
// handler can be unit-tested by injecting synthetic BuildInfo (see
|
||||
// version_handler_test.go) without depending on the test binary's actual
|
||||
// debug info.
|
||||
//
|
||||
// debug.ReadBuildInfo returns ok=false when the binary was built without
|
||||
// module info — extremely rare for a Go 1.18+ build, but we guard it so
|
||||
// the handler degrades to "dev / unknown / runtime.Version()" instead of
|
||||
// nil-deref panicking.
|
||||
func readBuildInfo() VersionInfo {
|
||||
info := VersionInfo{
|
||||
Version: Version,
|
||||
GoVersion: runtime.Version(),
|
||||
}
|
||||
|
||||
bi, ok := debug.ReadBuildInfo()
|
||||
if !ok {
|
||||
// Pre-Go 1.18 binary or a stripped build with no buildinfo segment.
|
||||
// Both are pathological in 2026 but worth the two-line guard.
|
||||
if info.Version == "" {
|
||||
info.Version = "dev"
|
||||
}
|
||||
return info
|
||||
}
|
||||
|
||||
for _, s := range bi.Settings {
|
||||
switch s.Key {
|
||||
case "vcs.revision":
|
||||
info.Commit = s.Value
|
||||
case "vcs.modified":
|
||||
// debug.BuildInfo encodes this as the literal string "true" or
|
||||
// "false"; comparing to "true" is the canonical pattern (mirrors
|
||||
// how the standard library's own version sub-command parses it).
|
||||
info.Modified = s.Value == "true"
|
||||
case "vcs.time":
|
||||
info.BuildTime = s.Value
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback ladder for Version: ldflags > VCS commit > "dev". The git
|
||||
// SHA is more useful than "dev" because it's at least groundable — an
|
||||
// operator can `git show <sha>` to see what code is actually running.
|
||||
if info.Version == "" {
|
||||
if info.Commit != "" {
|
||||
info.Version = info.Commit
|
||||
} else {
|
||||
info.Version = "dev"
|
||||
}
|
||||
}
|
||||
|
||||
return info
|
||||
}
|
||||
|
||||
// ServeHTTP implements http.Handler. Returns the VersionInfo payload as
|
||||
// JSON with a 200 status. GET-only — any other method returns 405, matching
|
||||
// the HealthHandler convention.
|
||||
func (h VersionHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
JSON(w, http.StatusOK, readBuildInfo())
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestVersion_ReturnsBuildInfo is the regression for the U-3 ride-along
|
||||
// cat-u-no_version_endpoint (P2). Three behaviors must hold for the
|
||||
// endpoint to be useful in operator tooling:
|
||||
//
|
||||
// 1. GET /api/v1/version returns 200 with a JSON body that decodes into
|
||||
// the documented VersionInfo shape — the wire contract that rollout
|
||||
// systems and Prometheus blackbox probes parse.
|
||||
// 2. The Go runtime version always populates (runtime.Version() can never
|
||||
// return empty), so consumers can always answer "which Go did this
|
||||
// binary compile with" even when ldflags / VCS info are missing.
|
||||
// 3. The Version field is never empty — the fallback ladder
|
||||
// (ldflags > VCS commit > "dev") guarantees a non-empty string so
|
||||
// consumers don't have to special-case absent values.
|
||||
//
|
||||
// We don't pin the exact Version value because it depends on whether the
|
||||
// test binary was built with -ldflags or under `go test`, both of which
|
||||
// the handler must tolerate. The "no empty string" check is the
|
||||
// behavioral contract.
|
||||
func TestVersion_ReturnsBuildInfo(t *testing.T) {
|
||||
h := NewVersionHandler()
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/version", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
h.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, want 200", rec.Code)
|
||||
}
|
||||
|
||||
contentType := rec.Header().Get("Content-Type")
|
||||
if !strings.HasPrefix(contentType, "application/json") {
|
||||
t.Errorf("Content-Type = %q, want application/json prefix (operator tooling parses JSON)", contentType)
|
||||
}
|
||||
|
||||
var got VersionInfo
|
||||
if err := json.NewDecoder(rec.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("response body did not decode into VersionInfo: %v\nbody: %s", err, rec.Body.String())
|
||||
}
|
||||
|
||||
// Version must never be empty — the fallback ladder in readBuildInfo
|
||||
// guarantees this. An empty Version would force every downstream
|
||||
// consumer (k8s rollouts, Prometheus blackbox, the support tooling)
|
||||
// to special-case the missing value, which defeats the point of
|
||||
// /api/v1/version existing.
|
||||
if got.Version == "" {
|
||||
t.Error("Version is empty — the fallback ladder (ldflags > VCS commit > 'dev') must guarantee a non-empty value")
|
||||
}
|
||||
|
||||
// GoVersion must equal runtime.Version() — the handler reads it
|
||||
// directly and cannot be subverted by ldflags or BuildInfo. This is
|
||||
// the one field that should always be ground-truth.
|
||||
if got.GoVersion != runtime.Version() {
|
||||
t.Errorf("GoVersion = %q, want %q (must come straight from runtime.Version())",
|
||||
got.GoVersion, runtime.Version())
|
||||
}
|
||||
}
|
||||
|
||||
// TestVersion_RejectsNonGet pins the GET-only contract. /api/v1/version
|
||||
// is read-only build identity; POST/PUT/DELETE etc. are nonsensical and
|
||||
// should return 405 like the HealthHandler does. Operator tooling that
|
||||
// fat-fingers the verb gets a clear error rather than a confusing 200
|
||||
// from the wrong code path.
|
||||
func TestVersion_RejectsNonGet(t *testing.T) {
|
||||
h := NewVersionHandler()
|
||||
|
||||
for _, method := range []string{
|
||||
http.MethodPost, http.MethodPut, http.MethodDelete, http.MethodPatch,
|
||||
} {
|
||||
req := httptest.NewRequest(method, "/api/v1/version", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
h.ServeHTTP(rec, req)
|
||||
if rec.Code != http.StatusMethodNotAllowed {
|
||||
t.Errorf("%s /api/v1/version → status %d, want 405", method, rec.Code)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestVersion_LdflagsOverride locks in the priority order: when the
|
||||
// build-time Version variable is non-empty (e.g. "v2.0.50" injected by
|
||||
// release.yml), readBuildInfo MUST surface that value verbatim and not
|
||||
// silently substitute the VCS commit. The release-pipeline contract
|
||||
// depends on this — a release tagged v2.0.50 should report "v2.0.50",
|
||||
// not the underlying SHA.
|
||||
//
|
||||
// We achieve test isolation by save/restore on the package-level Version
|
||||
// variable; t.Cleanup ensures parallel/subsequent tests see the original.
|
||||
func TestVersion_LdflagsOverride(t *testing.T) {
|
||||
original := Version
|
||||
t.Cleanup(func() { Version = original })
|
||||
|
||||
Version = "v2.0.50-test"
|
||||
got := readBuildInfo()
|
||||
if got.Version != "v2.0.50-test" {
|
||||
t.Errorf("Version = %q, want %q (ldflags-supplied Version must take priority over VCS fallback)",
|
||||
got.Version, "v2.0.50-test")
|
||||
}
|
||||
}
|
||||
@@ -117,8 +117,14 @@ func HashAPIKey(key string) string {
|
||||
}
|
||||
|
||||
// AuthConfig holds configuration for the Auth middleware.
|
||||
//
|
||||
// G-1 (P1): valid Type values are "api-key" or "none" only. "jwt" was
|
||||
// removed because no JWT middleware ships with certctl (silent auth
|
||||
// downgrade pre-G-1). The single source of truth for the allowed set
|
||||
// lives at internal/config.AuthType / config.ValidAuthTypes() — prefer
|
||||
// those constants over string literals when comparing.
|
||||
type AuthConfig struct {
|
||||
Type string // "api-key", "jwt", "none"
|
||||
Type string // "api-key" or "none" (see config.AuthType constants)
|
||||
Secret string // The raw API key or comma-separated list of valid API keys
|
||||
}
|
||||
|
||||
|
||||
@@ -65,8 +65,14 @@ type HandlerRegistry struct {
|
||||
Verification handler.VerificationHandler
|
||||
Export handler.ExportHandler
|
||||
Digest handler.DigestHandler
|
||||
HealthChecks *handler.HealthCheckHandler
|
||||
BulkRevocation handler.BulkRevocationHandler
|
||||
HealthChecks *handler.HealthCheckHandler
|
||||
BulkRevocation handler.BulkRevocationHandler
|
||||
RenewalPolicies handler.RenewalPolicyHandler
|
||||
// Version handles GET /api/v1/version (U-3 ride-along,
|
||||
// cat-u-no_version_endpoint). Wired through the no-auth dispatch in
|
||||
// cmd/server/main.go so probes and rollout systems can read build
|
||||
// identity without Bearer credentials. See handler/version.go.
|
||||
Version handler.VersionHandler
|
||||
}
|
||||
|
||||
// RegisterHandlers sets up all API routes with their handlers.
|
||||
@@ -88,6 +94,17 @@ func (r *Router) RegisterHandlers(reg HandlerRegistry) {
|
||||
middleware.CORS,
|
||||
middleware.ContentType,
|
||||
))
|
||||
// Version endpoint (no auth middleware — used by rollout probes that
|
||||
// don't carry Bearer tokens; the dispatch layer in cmd/server/main.go
|
||||
// also routes /api/v1/version through the no-auth chain). U-3 ride-along
|
||||
// (cat-u-no_version_endpoint, P2). The handler reads
|
||||
// runtime/debug.BuildInfo for VCS attribution; ldflags-supplied Version
|
||||
// is preferred when present.
|
||||
r.mux.Handle("GET /api/v1/version", middleware.Chain(
|
||||
reg.Version,
|
||||
middleware.CORS,
|
||||
middleware.ContentType,
|
||||
))
|
||||
// Auth check endpoint (uses full middleware chain via r.Register)
|
||||
r.Register("GET /api/v1/auth/check", http.HandlerFunc(reg.Health.AuthCheck))
|
||||
|
||||
@@ -167,6 +184,18 @@ func (r *Router) RegisterHandlers(reg HandlerRegistry) {
|
||||
r.Register("DELETE /api/v1/policies/{id}", http.HandlerFunc(reg.Policies.DeletePolicy))
|
||||
r.Register("GET /api/v1/policies/{id}/violations", http.HandlerFunc(reg.Policies.ListViolations))
|
||||
|
||||
// Renewal Policies routes: /api/v1/renewal-policies
|
||||
// G-1: fixes frontend FK drift — OnboardingWizard + CertificatesPage dropdowns
|
||||
// were previously populating renewal_policy_id from /api/v1/policies (compliance
|
||||
// rules, pol-* IDs), violating FK managed_certificates.renewal_policy_id →
|
||||
// renewal_policies(id) ON DELETE RESTRICT. This block is the backend half; the
|
||||
// frontend half swaps getPolicies → getRenewalPolicies at 3 call sites.
|
||||
r.Register("GET /api/v1/renewal-policies", http.HandlerFunc(reg.RenewalPolicies.ListRenewalPolicies))
|
||||
r.Register("POST /api/v1/renewal-policies", http.HandlerFunc(reg.RenewalPolicies.CreateRenewalPolicy))
|
||||
r.Register("GET /api/v1/renewal-policies/{id}", http.HandlerFunc(reg.RenewalPolicies.GetRenewalPolicy))
|
||||
r.Register("PUT /api/v1/renewal-policies/{id}", http.HandlerFunc(reg.RenewalPolicies.UpdateRenewalPolicy))
|
||||
r.Register("DELETE /api/v1/renewal-policies/{id}", http.HandlerFunc(reg.RenewalPolicies.DeleteRenewalPolicy))
|
||||
|
||||
// Profiles routes: /api/v1/profiles
|
||||
r.Register("GET /api/v1/profiles", http.HandlerFunc(reg.Profiles.ListProfiles))
|
||||
r.Register("POST /api/v1/profiles", http.HandlerFunc(reg.Profiles.CreateProfile))
|
||||
|
||||
@@ -612,7 +612,7 @@ func (c *Client) CancelJob(id string) error {
|
||||
|
||||
// GetStatus retrieves server health and summary stats.
|
||||
func (c *Client) GetStatus() error {
|
||||
resp, err := c.do("GET", "/api/v1/health", nil, nil)
|
||||
resp, err := c.do("GET", "/health", nil, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -293,7 +293,7 @@ func TestClient_GetStatus(t *testing.T) {
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
|
||||
if r.URL.Path == "/api/v1/health" {
|
||||
if r.URL.Path == "/health" {
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"status": "healthy",
|
||||
"timestamp": time.Now().Format(time.RFC3339),
|
||||
|
||||
+91
-12
@@ -709,6 +709,16 @@ type DatabaseConfig struct {
|
||||
URL string
|
||||
MaxConnections int
|
||||
MigrationsPath string
|
||||
|
||||
// DemoSeed, when true, makes the server apply
|
||||
// `<MigrationsPath>/seed_demo.sql` after the baseline `seed.sql`. Set
|
||||
// via CERTCTL_DEMO_SEED. The compose demo overlay
|
||||
// (deploy/docker-compose.demo.yml) sets this to keep the demo path
|
||||
// alive after U-3 dropped initdb-mounted seed files. The seed file
|
||||
// uses ON CONFLICT (id) DO NOTHING so re-running on a populated
|
||||
// database is safe; missing-file is a no-op (returns nil) so a
|
||||
// minimal-image deploy that strips seed_demo.sql still boots cleanly.
|
||||
DemoSeed bool
|
||||
}
|
||||
|
||||
// SchedulerConfig contains scheduler timing configuration.
|
||||
@@ -802,13 +812,59 @@ type NamedAPIKey struct {
|
||||
Admin bool
|
||||
}
|
||||
|
||||
// AuthType is the discriminator for the API auth middleware shape. The
|
||||
// string alias preserves env-var roundtrip (the value flows through getEnv
|
||||
// as a plain string) while giving us a typed surface for switches and
|
||||
// validation. Use the named constants below rather than string literals
|
||||
// so future enum additions/removals are caught at compile time.
|
||||
//
|
||||
// G-1 (P1): the pre-G-1 validAuthTypes map literal accepted "jwt" with no
|
||||
// JWT middleware behind it (silent auth downgrade — the configured type
|
||||
// was logged as "jwt" but every request routed through the api-key bearer
|
||||
// middleware regardless). Operators who set CERTCTL_AUTH_TYPE=jwt thought
|
||||
// they had JWT auth; they didn't. The typed alias + ValidAuthTypes()
|
||||
// helper make the allowed set the single source of truth across config
|
||||
// validation, the runtime defense-in-depth switch in main.go, and the
|
||||
// helm-chart template guard (`certctl.validateAuthType`).
|
||||
type AuthType string
|
||||
|
||||
const (
|
||||
// AuthTypeAPIKey routes requests through the api-key bearer middleware.
|
||||
// CERTCTL_AUTH_SECRET (or CERTCTL_API_KEYS_NAMED) is required.
|
||||
AuthTypeAPIKey AuthType = "api-key"
|
||||
|
||||
// AuthTypeNone disables authentication entirely. Development only —
|
||||
// the server logs a loud Warn at startup. Operators who need
|
||||
// JWT/OIDC/mTLS run an authenticating gateway (oauth2-proxy / Envoy
|
||||
// ext_authz / Traefik ForwardAuth / Pomerium) in front of certctl
|
||||
// and set this value on the upstream certctl process. See
|
||||
// docs/architecture.md "Authenticating-gateway pattern".
|
||||
AuthTypeNone AuthType = "none"
|
||||
)
|
||||
|
||||
// ValidAuthTypes returns the allowed CERTCTL_AUTH_TYPE values. The set is
|
||||
// intentionally narrow — JWT was accepted pre-G-1 with no middleware
|
||||
// implementation behind it. Single source of truth referenced by the
|
||||
// validator below, the runtime guard in cmd/server/main.go, the helm
|
||||
// chart template (`certctl.validateAuthType`), and the property test in
|
||||
// config_test.go that pins "jwt" out of the slice forever.
|
||||
func ValidAuthTypes() []AuthType {
|
||||
return []AuthType{AuthTypeAPIKey, AuthTypeNone}
|
||||
}
|
||||
|
||||
// AuthConfig contains authentication configuration.
|
||||
type AuthConfig struct {
|
||||
// Type sets the authentication mechanism for the REST API.
|
||||
// Valid values: "api-key" (default, production), "jwt", "none" (development only).
|
||||
// When "api-key", clients must provide Authorization: Bearer <key> header.
|
||||
// "none" requires explicit opt-in via CERTCTL_AUTH_TYPE env var with warning logged.
|
||||
// Valid values: "api-key" (default, production) and "none" (development
|
||||
// only — disables authentication on the API and logs a loud Warn at
|
||||
// startup). For JWT/OIDC, run an authenticating gateway (oauth2-proxy /
|
||||
// Envoy / Traefik ForwardAuth / Pomerium) in front of certctl and set
|
||||
// CERTCTL_AUTH_TYPE=none on the upstream — see docs/architecture.md
|
||||
// "Authenticating-gateway pattern" and docs/upgrade-to-v2-jwt-removal.md.
|
||||
// Setting: CERTCTL_AUTH_TYPE environment variable. Default: "api-key".
|
||||
// Use the AuthType constants (AuthTypeAPIKey / AuthTypeNone) for typed
|
||||
// comparisons; the field stays `string` to preserve env-var roundtrip
|
||||
// shape used by getEnv() and downstream Helm/compose interpolation.
|
||||
Type string
|
||||
|
||||
// Secret is the legacy authentication secret (comma-separated API keys).
|
||||
@@ -875,6 +931,7 @@ func Load() (*Config, error) {
|
||||
URL: getEnv("CERTCTL_DATABASE_URL", "postgres://localhost/certctl"),
|
||||
MaxConnections: getEnvInt("CERTCTL_DATABASE_MAX_CONNS", 25),
|
||||
MigrationsPath: getEnv("CERTCTL_DATABASE_MIGRATIONS_PATH", "./migrations"),
|
||||
DemoSeed: getEnvBool("CERTCTL_DEMO_SEED", false),
|
||||
},
|
||||
Scheduler: SchedulerConfig{
|
||||
RenewalCheckInterval: getEnvDuration("CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL", 1*time.Hour),
|
||||
@@ -1148,18 +1205,40 @@ func (c *Config) Validate() error {
|
||||
return fmt.Errorf("invalid log format: %s", c.Log.Format)
|
||||
}
|
||||
|
||||
// Validate auth type
|
||||
validAuthTypes := map[string]bool{
|
||||
"api-key": true,
|
||||
"jwt": true,
|
||||
"none": true,
|
||||
// Validate auth type.
|
||||
//
|
||||
// G-1 (P1): the pre-G-1 set was {"api-key", "jwt", "none"} with "jwt"
|
||||
// accepted but no JWT middleware shipped — silent auth downgrade.
|
||||
// Post-G-1 we route a literal "jwt" value through a dedicated
|
||||
// rejection that gives operators actionable guidance (the
|
||||
// authenticating-gateway pattern) instead of the generic
|
||||
// "invalid auth type". Then we cross-check against ValidAuthTypes()
|
||||
// so any value outside {api-key, none} surfaces uniformly.
|
||||
if c.Auth.Type == "jwt" {
|
||||
return fmt.Errorf(
|
||||
"CERTCTL_AUTH_TYPE=jwt is no longer accepted (G-1 silent auth " +
|
||||
"downgrade): no JWT middleware ships with certctl. To use " +
|
||||
"JWT/OIDC, run an authenticating gateway (oauth2-proxy / " +
|
||||
"Envoy ext_authz / Traefik ForwardAuth / Pomerium) in " +
|
||||
"front of certctl and set CERTCTL_AUTH_TYPE=none on the " +
|
||||
"upstream. See docs/architecture.md \"Authenticating-" +
|
||||
"gateway pattern\" and docs/upgrade-to-v2-jwt-removal.md " +
|
||||
"for the migration walkthrough")
|
||||
}
|
||||
if !validAuthTypes[c.Auth.Type] {
|
||||
return fmt.Errorf("invalid auth type: %s", c.Auth.Type)
|
||||
authTypeValid := false
|
||||
for _, t := range ValidAuthTypes() {
|
||||
if AuthType(c.Auth.Type) == t {
|
||||
authTypeValid = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !authTypeValid {
|
||||
return fmt.Errorf("invalid auth type: %s (valid: %v)", c.Auth.Type, ValidAuthTypes())
|
||||
}
|
||||
|
||||
// If using JWT or API-key, secret is required
|
||||
if (c.Auth.Type == "jwt" || c.Auth.Type == "api-key") && c.Auth.Secret == "" {
|
||||
// If using API-key, secret is required. (Secret was previously also
|
||||
// required for "jwt"; removed with the jwt rejection above.)
|
||||
if c.Auth.Type == string(AuthTypeAPIKey) && c.Auth.Secret == "" {
|
||||
return fmt.Errorf("auth secret is required for auth type %s", c.Auth.Type)
|
||||
}
|
||||
|
||||
|
||||
+149
-15
@@ -458,6 +458,8 @@ func TestValidate_InvalidAuthType(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
@@ -477,6 +479,8 @@ func TestValidate_APIKeyAuth_MissingSecret(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
@@ -484,25 +488,133 @@ func TestValidate_APIKeyAuth_MissingSecret(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_JWTAuth_MissingSecret(t *testing.T) {
|
||||
cfg := &Config{
|
||||
Server: validServerConfig(t),
|
||||
Database: DatabaseConfig{URL: "postgres://localhost/certctl", MaxConnections: 25},
|
||||
Log: LogConfig{Level: "info", Format: "json"},
|
||||
Auth: AuthConfig{Type: "jwt", Secret: ""},
|
||||
Keygen: KeygenConfig{Mode: "agent"},
|
||||
Scheduler: SchedulerConfig{
|
||||
RenewalCheckInterval: 1 * time.Hour,
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
},
|
||||
// TestValidate_JWTAuth_RejectedDedicated locks down the G-1 fix: pre-G-1
|
||||
// `CERTCTL_AUTH_TYPE=jwt` was accepted by the validator (the bare error
|
||||
// path was the empty-secret one previously). Post-G-1 the literal "jwt"
|
||||
// value is rejected with a dedicated diagnostic regardless of whether
|
||||
// Secret is set, because there is no JWT middleware in the binary —
|
||||
// operators who need JWT/OIDC must front certctl with an authenticating
|
||||
// gateway.
|
||||
//
|
||||
// Two table rows pin the contract: missing-secret cannot paper over the
|
||||
// rejection (the dedicated error fires first, before the secret check),
|
||||
// and a populated secret also cannot paper over it. Both paths must
|
||||
// hit the dedicated G-1 diagnostic, not the generic "invalid auth
|
||||
// type" or "auth secret is required".
|
||||
func TestValidate_JWTAuth_RejectedDedicated(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
name string
|
||||
secret string
|
||||
}{
|
||||
{"jwt rejected (no secret)", ""},
|
||||
{"jwt rejected (with secret — operator can't paper over)", "anything"},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
t.Error("Validate() should return error when jwt auth has empty secret")
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
cfg := &Config{
|
||||
Server: validServerConfig(t),
|
||||
Database: DatabaseConfig{URL: "postgres://localhost/certctl", MaxConnections: 25},
|
||||
Log: LogConfig{Level: "info", Format: "json"},
|
||||
Auth: AuthConfig{Type: "jwt", Secret: tc.secret},
|
||||
Keygen: KeygenConfig{Mode: "agent"},
|
||||
Scheduler: SchedulerConfig{
|
||||
RenewalCheckInterval: 1 * time.Hour,
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
err := cfg.Validate()
|
||||
if err == nil {
|
||||
t.Fatal("Validate() returned nil; expected dedicated G-1 rejection")
|
||||
}
|
||||
const wantSubstr = "CERTCTL_AUTH_TYPE=jwt is no longer accepted"
|
||||
if !strings.Contains(err.Error(), wantSubstr) {
|
||||
t.Errorf("Validate() = %v\nwant substring %q (the dedicated G-1 diagnostic)", err, wantSubstr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidAuthTypesDoesNotContainJWT is a property-level guard against
|
||||
// a future PR silently re-introducing "jwt" into the allowed set. If
|
||||
// someone adds JWT back to ValidAuthTypes(), this test fails immediately
|
||||
// with a pointer at the audit finding. The matching CI grep guardrail
|
||||
// in .github/workflows/ci.yml provides a secondary check at build time.
|
||||
func TestValidAuthTypesDoesNotContainJWT(t *testing.T) {
|
||||
t.Parallel()
|
||||
for _, at := range ValidAuthTypes() {
|
||||
if at == "jwt" {
|
||||
t.Fatalf("jwt is in ValidAuthTypes — silent auth downgrade regressed (G-1)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidAuthTypesIsExactly_APIKey_None pins the current allowed set.
|
||||
// If a future change adds a new auth type, this test must be updated
|
||||
// alongside the validator and the helm-chart `validateAuthType` helper —
|
||||
// keeping all three surfaces in sync.
|
||||
func TestValidAuthTypesIsExactly_APIKey_None(t *testing.T) {
|
||||
t.Parallel()
|
||||
got := ValidAuthTypes()
|
||||
if len(got) != 2 {
|
||||
t.Fatalf("ValidAuthTypes() returned %d entries, want 2: %v", len(got), got)
|
||||
}
|
||||
want := map[AuthType]bool{AuthTypeAPIKey: true, AuthTypeNone: true}
|
||||
for _, at := range got {
|
||||
if !want[at] {
|
||||
t.Errorf("unexpected auth type in ValidAuthTypes: %q", at)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestValidate_GenericInvalidAuthType ensures that values outside the
|
||||
// allowed set (other than the special-cased "jwt") still surface the
|
||||
// generic "invalid auth type" error. Pins that the dedicated G-1
|
||||
// rejection didn't accidentally swallow non-jwt typos.
|
||||
func TestValidate_GenericInvalidAuthType(t *testing.T) {
|
||||
t.Parallel()
|
||||
for _, badType := range []string{"", "garbage", "oidc", "mtls", "API-KEY"} {
|
||||
t.Run("type="+badType, func(t *testing.T) {
|
||||
cfg := &Config{
|
||||
Server: validServerConfig(t),
|
||||
Database: DatabaseConfig{URL: "postgres://localhost/certctl", MaxConnections: 25},
|
||||
Log: LogConfig{Level: "info", Format: "json"},
|
||||
Auth: AuthConfig{Type: badType, Secret: "x"},
|
||||
Keygen: KeygenConfig{Mode: "agent"},
|
||||
Scheduler: SchedulerConfig{
|
||||
RenewalCheckInterval: 1 * time.Hour,
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
err := cfg.Validate()
|
||||
if err == nil {
|
||||
t.Fatalf("Validate(type=%q) returned nil; expected invalid-auth-type rejection", badType)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "invalid auth type") {
|
||||
t.Errorf("Validate(type=%q) = %v; want \"invalid auth type\" error", badType, err)
|
||||
}
|
||||
if strings.Contains(err.Error(), "G-1 silent auth") {
|
||||
t.Errorf("Validate(type=%q) = %v; should not hit the dedicated G-1 path for non-jwt values", badType, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// G-1 (P1): no need to add `TestValidate_NoneAuth_AcceptsEmptySecret` or
|
||||
// `TestValidate_APIKeyAuth_RequiresSecret` here — the pre-existing tests
|
||||
// `TestValidate_AuthTypeNone` (above) and `TestValidate_APIKeyAuth_MissingSecret`
|
||||
// (above) already cover those paths. Documented for the next reader: the
|
||||
// G-1 fix flipped jwt off but did not disturb either the
|
||||
// none-bypasses-secret or the api-key-requires-secret behavior.
|
||||
|
||||
func TestValidate_InvalidKeygenMode(t *testing.T) {
|
||||
cfg := &Config{
|
||||
Server: validServerConfig(t),
|
||||
@@ -515,6 +627,8 @@ func TestValidate_InvalidKeygenMode(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
@@ -544,6 +658,8 @@ func TestValidate_InvalidPort(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
@@ -574,6 +690,8 @@ func TestValidate_TLSCertPathEmpty(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
err := cfg.Validate()
|
||||
@@ -605,6 +723,8 @@ func TestValidate_TLSKeyPathEmpty(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
err := cfg.Validate()
|
||||
@@ -637,6 +757,8 @@ func TestValidate_TLSCertFileMissing(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
err := cfg.Validate()
|
||||
@@ -668,6 +790,8 @@ func TestValidate_TLSKeyFileMissing(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
err := cfg.Validate()
|
||||
@@ -701,6 +825,8 @@ func TestValidate_TLSMismatchedPair(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
err := cfg.Validate()
|
||||
@@ -724,6 +850,8 @@ func TestValidate_EmptyDatabaseURL(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
@@ -743,6 +871,8 @@ func TestValidate_InvalidLogLevel(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
@@ -762,6 +892,8 @@ func TestValidate_InvalidLogFormat(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
@@ -840,6 +972,8 @@ func TestValidate_DatabaseMaxConnectionsZero(t *testing.T) {
|
||||
JobProcessorInterval: 30 * time.Second,
|
||||
AgentHealthCheckInterval: 2 * time.Minute,
|
||||
NotificationProcessInterval: 1 * time.Minute,
|
||||
NotificationRetryInterval: 2 * time.Minute,
|
||||
RetryInterval: 5 * time.Minute,
|
||||
},
|
||||
}
|
||||
if err := cfg.Validate(); err == nil {
|
||||
|
||||
@@ -664,12 +664,17 @@ func (c *Connector) solveAuthorizationsDNS01(ctx context.Context, authzURLs []st
|
||||
return fmt.Errorf("failed to present DNS record for %s: %w", domain, err)
|
||||
}
|
||||
|
||||
// Wait for DNS propagation
|
||||
// Wait for DNS propagation (ctx-aware so graceful shutdown can interrupt — F-003)
|
||||
propagationWait := time.Duration(c.config.DNSPropagationWait) * time.Second
|
||||
c.logger.Info("waiting for DNS propagation",
|
||||
"domain", domain,
|
||||
"wait_seconds", c.config.DNSPropagationWait)
|
||||
time.Sleep(propagationWait)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
_ = c.dnsSolver.CleanUp(ctx, domain, dnsChallenge.Token, keyAuth)
|
||||
return ctx.Err()
|
||||
case <-time.After(propagationWait):
|
||||
}
|
||||
|
||||
// Tell the CA we're ready
|
||||
if _, err := c.client.Accept(ctx, dnsChallenge); err != nil {
|
||||
@@ -773,12 +778,16 @@ func (c *Connector) solveAuthorizationsDNSPersist01(ctx context.Context, authzUR
|
||||
return fmt.Errorf("failed to create persistent DNS record for %s: %w", domain, err)
|
||||
}
|
||||
|
||||
// Wait for DNS propagation
|
||||
// Wait for DNS propagation (ctx-aware so graceful shutdown can interrupt — F-003)
|
||||
propagationWait := time.Duration(c.config.DNSPropagationWait) * time.Second
|
||||
c.logger.Info("waiting for DNS propagation",
|
||||
"domain", domain,
|
||||
"wait_seconds", c.config.DNSPropagationWait)
|
||||
time.Sleep(propagationWait)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(propagationWait):
|
||||
}
|
||||
|
||||
// Tell the CA we're ready
|
||||
if _, err := c.client.Accept(ctx, persistChallenge); err != nil {
|
||||
|
||||
@@ -46,7 +46,26 @@ type Agent struct {
|
||||
Status AgentStatus `json:"status"`
|
||||
LastHeartbeatAt *time.Time `json:"last_heartbeat_at,omitempty"`
|
||||
RegisteredAt time.Time `json:"registered_at"`
|
||||
APIKeyHash string `json:"api_key_hash"`
|
||||
// APIKeyHash is the SHA-256 of the agent's plaintext API key,
|
||||
// populated by service.RegisterAgent (`hashAPIKey(apiKey)`) and
|
||||
// consumed by repository.AgentRepository::GetByAPIKey at auth time.
|
||||
// It is server-internal: never serialized to clients, never echoed
|
||||
// via CLI / MCP / agent registration response, never logged.
|
||||
//
|
||||
// G-2 (P1): pre-G-2 the field was tagged `json:"api_key_hash"` and
|
||||
// shipped on every /api/v1/agents response (cat-s5-apikey_leak). Even
|
||||
// SHA-256 should not be shipped to clients — it gives an offline
|
||||
// brute-force target if API-key entropy is low (certctl doesn't enforce
|
||||
// a minimum on operator-supplied keys), and there is no business reason
|
||||
// for any client to ever receive it. Post-G-2 the JSON tag is "-" and
|
||||
// Agent.MarshalJSON below zeroes the field on a copy before delegating
|
||||
// to the default marshal — defense in depth so a future tag-revert by
|
||||
// refactor cannot reopen the leak. The DB column, repo SELECT/INSERT/
|
||||
// UPDATE paths, and service-side hashing are unchanged. See
|
||||
// docs/architecture.md ER diagram (which documents DB shape, not API
|
||||
// shape) and coverage-gap-audit-2026-04-24-v5/unified-audit.md
|
||||
// cat-s5-apikey_leak for the full closure rationale.
|
||||
APIKeyHash string `json:"-"`
|
||||
OS string `json:"os"`
|
||||
Architecture string `json:"architecture"`
|
||||
IPAddress string `json:"ip_address"`
|
||||
@@ -60,6 +79,31 @@ type Agent struct {
|
||||
RetiredReason *string `json:"retired_reason,omitempty"`
|
||||
}
|
||||
|
||||
// MarshalJSON implements json.Marshaler. It explicitly zeros APIKeyHash
|
||||
// before serialization to defense-in-depth the `json:"-"` tag above.
|
||||
//
|
||||
// G-2 (P1): pre-G-2 the field was tagged `json:"api_key_hash"` and
|
||||
// shipped on every /api/v1/agents response (cat-s5-apikey_leak). Post-G-2
|
||||
// the tag is "-" and this method enforces redaction even if the tag is
|
||||
// reverted by a future refactor — the receiver is by-value so the
|
||||
// APIKeyHash = "" assignment mutates only the marshal-time copy, never
|
||||
// the caller's original. The type-alias trick (`type alias Agent`)
|
||||
// breaks the recursive MarshalJSON call that would otherwise stack-
|
||||
// overflow. Both *Agent and Agent receivers route through here because
|
||||
// the json package looks the method up via reflect.Value, and a value
|
||||
// receiver satisfies both kinds of pointer.
|
||||
//
|
||||
// Auditor's note for the next reader: do NOT remove this method even if
|
||||
// the json:"-" tag stays. The CI guardrail at .github/workflows/ci.yml
|
||||
// also blocks reintroduction at the tag site, but this method is the
|
||||
// last line of defense for serialization paths that bypass struct tags
|
||||
// (e.g., a future MarshalJSON on a parent struct that embeds Agent).
|
||||
func (a Agent) MarshalJSON() ([]byte, error) {
|
||||
type alias Agent // breaks recursion: alias has no MarshalJSON method
|
||||
a.APIKeyHash = ""
|
||||
return json.Marshal(alias(a))
|
||||
}
|
||||
|
||||
// IsRetired returns true when this agent has been soft-retired.
|
||||
// I-004: callers that iterate active agents (stats dashboard, stale-offline
|
||||
// sweeper, handler-facing list) must skip retired rows by default.
|
||||
|
||||
@@ -1,10 +1,22 @@
|
||||
package domain
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// jsonMarshalDirect / jsonUnmarshalDirect are thin aliases for the
|
||||
// stdlib encoding/json calls. They exist only to make the G-2 redaction
|
||||
// tests below grep-friendly: any future test author searching for "how
|
||||
// is APIKeyHash redaction tested" will land on these and the call sites,
|
||||
// rather than having to grep through dozens of unrelated json.Marshal
|
||||
// usages.
|
||||
func jsonMarshalDirect(v interface{}) ([]byte, error) { return json.Marshal(v) }
|
||||
func jsonUnmarshalDirect(data []byte, v interface{}) error { return json.Unmarshal(data, v) }
|
||||
func containsSubstr(haystack, needle string) bool { return strings.Contains(haystack, needle) }
|
||||
|
||||
// TestAgent_IsRetired covers the I-004 soft-retirement predicate that gates
|
||||
// which callers hide an agent row from active listings.
|
||||
func TestAgent_IsRetired(t *testing.T) {
|
||||
@@ -53,3 +65,172 @@ func TestAgentDependencyCounts_HasDependencies(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// G-2 (P1): the cat-s5-apikey_leak audit closure tests. Pre-G-2,
|
||||
// Agent.APIKeyHash was tagged `json:"api_key_hash"` and shipped on
|
||||
// every /api/v1/agents response — credential-derivative leak that gave
|
||||
// offline brute-force targets to every authenticated client. Post-G-2
|
||||
// the tag is "-" AND Agent.MarshalJSON zeroes the field on a marshal-
|
||||
// time copy. These tests pin both layers of the defense:
|
||||
//
|
||||
// 1. A populated APIKeyHash is never present in the marshaled JSON.
|
||||
// 2. The redaction holds on *Agent, on slice elements, and on a
|
||||
// sentinel literal-value check (so even a future field that
|
||||
// happens to contain the same hash string would not appear).
|
||||
// 3. The marshal-time copy does not mutate the caller's original —
|
||||
// receiver is by-value, but pin it explicitly so a future refactor
|
||||
// that switches to pointer-receiver gets caught.
|
||||
// 4. Round-trip preserves every other field (hash dropped on encode,
|
||||
// cannot reappear on decode because the wire never carries it).
|
||||
|
||||
const g2LeakSentinel = "sha256:LEAKED-CREDENTIAL-DERIVATIVE-SENTINEL"
|
||||
|
||||
// TestAgent_MarshalJSON_RedactsAPIKeyHash is the marshal-boundary
|
||||
// contract test: a single Agent value with a populated APIKeyHash must
|
||||
// not emit the field name nor the sentinel value.
|
||||
func TestAgent_MarshalJSON_RedactsAPIKeyHash(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := Agent{
|
||||
ID: "agent-test",
|
||||
Name: "test-agent",
|
||||
Hostname: "host.example",
|
||||
Status: AgentStatusOnline,
|
||||
RegisteredAt: time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
APIKeyHash: g2LeakSentinel,
|
||||
OS: "linux",
|
||||
Architecture: "amd64",
|
||||
IPAddress: "10.0.0.1",
|
||||
Version: "v2.0.49",
|
||||
}
|
||||
out, err := jsonMarshalDirect(a)
|
||||
if err != nil {
|
||||
t.Fatalf("Marshal returned error: %v", err)
|
||||
}
|
||||
body := string(out)
|
||||
if containsSubstr(body, "api_key_hash") {
|
||||
t.Errorf("marshaled body contains \"api_key_hash\" key — G-2 leak regressed:\n%s", body)
|
||||
}
|
||||
if containsSubstr(body, "APIKeyHash") {
|
||||
t.Errorf("marshaled body contains \"APIKeyHash\" — type-alias redaction broke:\n%s", body)
|
||||
}
|
||||
if containsSubstr(body, g2LeakSentinel) {
|
||||
t.Errorf("marshaled body contains the leak sentinel %q — value redaction broke:\n%s", g2LeakSentinel, body)
|
||||
}
|
||||
// Sanity: every OTHER non-zero field IS present (this guards against
|
||||
// the type-alias trick accidentally dropping siblings).
|
||||
for _, want := range []string{"agent-test", "test-agent", "host.example", "Online", "linux", "amd64", "10.0.0.1", "v2.0.49"} {
|
||||
if !containsSubstr(body, want) {
|
||||
t.Errorf("marshaled body missing expected field value %q:\n%s", want, body)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgent_MarshalJSON_RedactsViaPointer covers the *Agent path that
|
||||
// handlers hit when calling JSON(w, http.StatusOK, agent) with a *Agent
|
||||
// from svc.GetAgent. A value-receiver MarshalJSON is reachable from
|
||||
// pointer values via reflect; this test pins that contract.
|
||||
func TestAgent_MarshalJSON_RedactsViaPointer(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := &Agent{ID: "agent-x", APIKeyHash: g2LeakSentinel}
|
||||
out, err := jsonMarshalDirect(a)
|
||||
if err != nil {
|
||||
t.Fatalf("Marshal *Agent returned error: %v", err)
|
||||
}
|
||||
if containsSubstr(string(out), g2LeakSentinel) {
|
||||
t.Errorf("*Agent marshal leaked sentinel:\n%s", string(out))
|
||||
}
|
||||
if containsSubstr(string(out), "api_key_hash") {
|
||||
t.Errorf("*Agent marshal contains \"api_key_hash\" key:\n%s", string(out))
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgent_MarshalJSON_RedactsInSlice covers the []domain.Agent path
|
||||
// the ListAgents handler emits via PagedResponse{Data: agents}. Each
|
||||
// element must be redacted independently.
|
||||
func TestAgent_MarshalJSON_RedactsInSlice(t *testing.T) {
|
||||
t.Parallel()
|
||||
agents := []Agent{
|
||||
{ID: "agent-1", APIKeyHash: g2LeakSentinel + "-1"},
|
||||
{ID: "agent-2", APIKeyHash: g2LeakSentinel + "-2"},
|
||||
{ID: "agent-3", APIKeyHash: g2LeakSentinel + "-3"},
|
||||
}
|
||||
out, err := jsonMarshalDirect(agents)
|
||||
if err != nil {
|
||||
t.Fatalf("Marshal []Agent returned error: %v", err)
|
||||
}
|
||||
body := string(out)
|
||||
if containsSubstr(body, "api_key_hash") {
|
||||
t.Errorf("[]Agent marshal contains \"api_key_hash\" key:\n%s", body)
|
||||
}
|
||||
for i := 1; i <= 3; i++ {
|
||||
sentinel := g2LeakSentinel + "-" + string(rune('0'+i))
|
||||
if containsSubstr(body, sentinel) {
|
||||
t.Errorf("[]Agent marshal leaked sentinel %q:\n%s", sentinel, body)
|
||||
}
|
||||
}
|
||||
// Every agent ID is present — the redaction didn't accidentally
|
||||
// strip the entire element.
|
||||
for _, id := range []string{"agent-1", "agent-2", "agent-3"} {
|
||||
if !containsSubstr(body, id) {
|
||||
t.Errorf("[]Agent marshal missing element ID %q:\n%s", id, body)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgent_MarshalJSON_DoesNotMutateReceiver pins the by-value-receiver
|
||||
// contract: marshaling must not zero APIKeyHash on the caller's struct,
|
||||
// only on the marshal-time copy. This guards against a future refactor
|
||||
// that switches to pointer receiver and breaks every code path that
|
||||
// marshals an Agent and then re-uses it (e.g., audit-event payload
|
||||
// construction immediately after returning the agent in a handler).
|
||||
func TestAgent_MarshalJSON_DoesNotMutateReceiver(t *testing.T) {
|
||||
t.Parallel()
|
||||
a := Agent{ID: "agent-keep", APIKeyHash: g2LeakSentinel}
|
||||
if _, err := jsonMarshalDirect(a); err != nil {
|
||||
t.Fatalf("Marshal returned error: %v", err)
|
||||
}
|
||||
if a.APIKeyHash != g2LeakSentinel {
|
||||
t.Errorf("MarshalJSON mutated caller's APIKeyHash: got %q want %q", a.APIKeyHash, g2LeakSentinel)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgent_MarshalJSON_RoundTrip pins the wire-shape contract: an
|
||||
// Agent marshaled to JSON and unmarshaled back into a fresh Agent has
|
||||
// every field preserved EXCEPT APIKeyHash, which the wire never carries.
|
||||
// This double-confirms the redaction is a one-way guarantee at the
|
||||
// serialization boundary, not an accidental on-decode behavior.
|
||||
func TestAgent_MarshalJSON_RoundTrip(t *testing.T) {
|
||||
t.Parallel()
|
||||
now := time.Date(2026, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||
hb := now.Add(-5 * time.Minute)
|
||||
original := Agent{
|
||||
ID: "agent-rt",
|
||||
Name: "rt",
|
||||
Hostname: "rt.host",
|
||||
Status: AgentStatusOnline,
|
||||
LastHeartbeatAt: &hb,
|
||||
RegisteredAt: now,
|
||||
APIKeyHash: g2LeakSentinel,
|
||||
OS: "linux",
|
||||
Architecture: "arm64",
|
||||
IPAddress: "10.0.0.99",
|
||||
Version: "v2.0.49",
|
||||
}
|
||||
out, err := jsonMarshalDirect(original)
|
||||
if err != nil {
|
||||
t.Fatalf("Marshal returned error: %v", err)
|
||||
}
|
||||
var got Agent
|
||||
if err := jsonUnmarshalDirect(out, &got); err != nil {
|
||||
t.Fatalf("Unmarshal returned error: %v", err)
|
||||
}
|
||||
if got.APIKeyHash != "" {
|
||||
t.Errorf("APIKeyHash survived round-trip: got %q want empty (the wire must not carry it)", got.APIKeyHash)
|
||||
}
|
||||
if got.ID != original.ID || got.Name != original.Name || got.Hostname != original.Hostname {
|
||||
t.Errorf("identity fields lost in round-trip: got %+v want %+v", got, original)
|
||||
}
|
||||
if got.OS != original.OS || got.Architecture != original.Architecture || got.IPAddress != original.IPAddress {
|
||||
t.Errorf("metadata fields lost in round-trip: got %+v want %+v", got, original)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1151,6 +1151,25 @@ func (m *mockRenewalPolicyRepository) List(ctx context.Context) ([]*domain.Renew
|
||||
return policies, nil
|
||||
}
|
||||
|
||||
// Create/Update/Delete satisfy the G-1 interface extension. The integration
|
||||
// harness never drives the CRUD endpoints directly — these methods exist
|
||||
// purely for interface compliance so the binary still builds.
|
||||
func (m *mockRenewalPolicyRepository) Create(ctx context.Context, policy *domain.RenewalPolicy) error {
|
||||
m.policies[policy.ID] = policy
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockRenewalPolicyRepository) Update(ctx context.Context, id string, policy *domain.RenewalPolicy) error {
|
||||
policy.ID = id
|
||||
m.policies[id] = policy
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockRenewalPolicyRepository) Delete(ctx context.Context, id string) error {
|
||||
delete(m.policies, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
type mockIssuerRepository struct {
|
||||
issuers map[string]*domain.Issuer
|
||||
}
|
||||
@@ -1371,6 +1390,16 @@ func (m *mockRevocationRepository) ListAll(ctx context.Context) ([]*domain.Certi
|
||||
return m.revocations, nil
|
||||
}
|
||||
|
||||
func (m *mockRevocationRepository) ListByIssuer(ctx context.Context, issuerID string) ([]*domain.CertificateRevocation, error) {
|
||||
var result []*domain.CertificateRevocation
|
||||
for _, r := range m.revocations {
|
||||
if r.IssuerID == issuerID {
|
||||
result = append(result, r)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (m *mockRevocationRepository) ListByCertificate(ctx context.Context, certID string) ([]*domain.CertificateRevocation, error) {
|
||||
var result []*domain.CertificateRevocation
|
||||
for _, r := range m.revocations {
|
||||
|
||||
@@ -2,11 +2,27 @@ package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// Repository-level sentinel errors. Repositories (primarily the postgres
|
||||
// implementation) translate RDBMS-specific errors into these typed envelopes
|
||||
// so the service/handler layers can branch with errors.Is without importing
|
||||
// lib/pq or care about SQLSTATE codes.
|
||||
//
|
||||
// G-1: renewal-policy sentinels — DuplicateName → HTTP 409 (pg 23505 on
|
||||
// renewal_policies.name UNIQUE), InUse → HTTP 409 (pg 23503 on the FK from
|
||||
// managed_certificates.renewal_policy_id to renewal_policies.id with ON
|
||||
// DELETE RESTRICT). Both map onto the same 409 status but with distinct
|
||||
// messages so operators can tell them apart.
|
||||
var (
|
||||
ErrRenewalPolicyDuplicateName = errors.New("renewal policy name already exists")
|
||||
ErrRenewalPolicyInUse = errors.New("renewal policy is still referenced by managed certificates")
|
||||
)
|
||||
|
||||
// CertificateRepository defines operations for managing certificates.
|
||||
type CertificateRepository interface {
|
||||
// List returns a paginated list of certificates matching the filter criteria.
|
||||
@@ -47,8 +63,15 @@ type RevocationRepository interface {
|
||||
// protocol endpoints carry it in the request path; RFC 5280 §5.2.3 guarantees
|
||||
// uniqueness only within a single issuer.
|
||||
GetByIssuerAndSerial(ctx context.Context, issuerID, serial string) (*domain.CertificateRevocation, error)
|
||||
// ListAll returns all revocations, ordered by revocation time (for CRL generation).
|
||||
// ListAll returns all revocations, ordered by revocation time (for global
|
||||
// revocation admin views). CRL generation should prefer ListByIssuer to
|
||||
// avoid loading and discarding rows that belong to other issuers.
|
||||
ListAll(ctx context.Context) ([]*domain.CertificateRevocation, error)
|
||||
// ListByIssuer returns revocations for a single issuer, ordered by revocation
|
||||
// time (for CRL generation). Pushing the issuer filter into the query lets
|
||||
// the migration 000012 composite index (issuer_id, serial_number) drive a
|
||||
// prefix scan instead of a full table read + in-memory filter.
|
||||
ListByIssuer(ctx context.Context, issuerID string) ([]*domain.CertificateRevocation, error)
|
||||
// ListByCertificate returns all revocations for a certificate.
|
||||
ListByCertificate(ctx context.Context, certID string) ([]*domain.CertificateRevocation, error)
|
||||
// MarkIssuerNotified updates the issuer_notified flag for a revocation.
|
||||
@@ -251,11 +274,35 @@ type JobRepository interface {
|
||||
}
|
||||
|
||||
// RenewalPolicyRepository defines operations for managing renewal policies.
|
||||
//
|
||||
// G-1: extended with Create/Update/Delete so the new /api/v1/renewal-policies
|
||||
// CRUD surface has a repo contract to lean on. Delete must map the PostgreSQL
|
||||
// 23503 (foreign_key_violation on managed_certificates.renewal_policy_id
|
||||
// REFERENCES renewal_policies(id) ON DELETE RESTRICT) onto the typed
|
||||
// ErrRenewalPolicyInUse sentinel so the handler can emit a 409 Conflict
|
||||
// instead of an opaque 500. Create/Update map PostgreSQL 23505
|
||||
// (unique_violation on renewal_policies.name) onto ErrRenewalPolicyDuplicateName
|
||||
// for the same 409 Conflict reason.
|
||||
//
|
||||
// List stays single-shot (no pagination params) because the production row
|
||||
// count is in the single digits — the service layer paginates/sorts in Go.
|
||||
// Changing the signature would churn every mock without functional benefit.
|
||||
type RenewalPolicyRepository interface {
|
||||
// Get retrieves a renewal policy by ID.
|
||||
Get(ctx context.Context, id string) (*domain.RenewalPolicy, error)
|
||||
// List returns all renewal policies.
|
||||
// List returns all renewal policies, ordered by name.
|
||||
List(ctx context.Context) ([]*domain.RenewalPolicy, error)
|
||||
// Create inserts a new renewal policy. The caller is responsible for
|
||||
// populating Name; Create auto-generates ID (as rp-<slug(name)>) if empty.
|
||||
// Returns ErrRenewalPolicyDuplicateName on pg 23505.
|
||||
Create(ctx context.Context, policy *domain.RenewalPolicy) error
|
||||
// Update modifies an existing renewal policy in-place. Returns
|
||||
// sql.ErrNoRows-wrapped error when id is unknown, or
|
||||
// ErrRenewalPolicyDuplicateName on pg 23505 (name collision with another row).
|
||||
Update(ctx context.Context, id string, policy *domain.RenewalPolicy) error
|
||||
// Delete removes a renewal policy. Returns ErrRenewalPolicyInUse when the
|
||||
// policy is still referenced by rows in managed_certificates (pg 23503).
|
||||
Delete(ctx context.Context, id string) error
|
||||
}
|
||||
|
||||
// PolicyRepository defines operations for managing compliance policies and violations.
|
||||
|
||||
@@ -2,14 +2,26 @@ package postgres
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
_ "github.com/lib/pq"
|
||||
"github.com/lib/pq"
|
||||
)
|
||||
|
||||
// pgErrInvalidPassword is the SQLSTATE for class 28 / code 28P01 —
|
||||
// invalid_password — emitted by PostgreSQL when the client presents
|
||||
// credentials that don't match pg_authid. Defined locally because the
|
||||
// lib/pq package does not export named constants for SQLSTATE codes (it
|
||||
// only exposes the typed string alias pq.ErrorCode and a name-lookup map
|
||||
// at runtime). Pinned as a string constant rather than a pq.ErrorCode
|
||||
// literal so the contract is grep-able from operator-facing log lines.
|
||||
//
|
||||
// Reference: https://www.postgresql.org/docs/16/errcodes-appendix.html
|
||||
const pgErrInvalidPassword = "28P01"
|
||||
|
||||
// NewDB opens a PostgreSQL database connection and sets up connection pooling.
|
||||
func NewDB(connStr string) (*sql.DB, error) {
|
||||
db, err := sql.Open("postgres", connStr)
|
||||
@@ -23,12 +35,65 @@ func NewDB(connStr string) (*sql.DB, error) {
|
||||
|
||||
// Ping to verify connection
|
||||
if err := db.Ping(); err != nil {
|
||||
return nil, fmt.Errorf("failed to ping database: %w", err)
|
||||
return nil, wrapPingError(err)
|
||||
}
|
||||
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// wrapPingError converts a db.Ping() failure into an operator-friendly
|
||||
// diagnostic. The default wrap is the original opaque
|
||||
// `"failed to ping database: <inner>"` shape. The exception is SQLSTATE 28P01
|
||||
// (invalid_password): when postgres rejects the server's credentials we emit
|
||||
// extended guidance that names the most common operator misstep — editing
|
||||
// POSTGRES_PASSWORD in `.env` after the postgres named volume has already
|
||||
// been initialized — and lists both the destructive (`docker compose down -v`)
|
||||
// and non-destructive (`ALTER ROLE`) remediations.
|
||||
//
|
||||
// U-1 (P1, GitHub #10): closes the audit-flagged
|
||||
// cat-u-quickstart_postgres_password_volume_trap finding. The postgres
|
||||
// docker-entrypoint runs initdb only when /var/lib/postgresql/data is empty;
|
||||
// on subsequent boots the password baked into pg_authid on first boot wins
|
||||
// over whatever the env var carries, so the env-vs-pg_authid divergence is
|
||||
// intrinsic to how the postgres image bootstraps and cannot be fixed by us
|
||||
// upstream of pg_authid. The ergonomic answer is to surface a clear
|
||||
// diagnostic at the failure site so operators don't waste an hour on
|
||||
// "is my password right" before discovering the volume needs to be torn
|
||||
// down (or the role's password rotated in-place).
|
||||
//
|
||||
// The wrap chain is preserved via fmt.Errorf("%w", err) so callers using
|
||||
// errors.As(err, &*pq.Error) on the returned value continue to work; this
|
||||
// matches the audit's "no substring matching on err.Error()" requirement
|
||||
// from the M-1 sentinel-error migration.
|
||||
//
|
||||
// Returns nil when err is nil so callers can defensively pipe through this
|
||||
// helper without an extra branch.
|
||||
func wrapPingError(err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var pqErr *pq.Error
|
||||
if errors.As(err, &pqErr) && string(pqErr.Code) == pgErrInvalidPassword {
|
||||
return fmt.Errorf(
|
||||
"failed to ping database: postgres rejected the configured credentials "+
|
||||
"(SQLSTATE %s — invalid_password). If you recently rotated POSTGRES_PASSWORD "+
|
||||
"on a docker-compose deploy, the postgres container's data volume still "+
|
||||
"holds the previous password: initdb seeds POSTGRES_PASSWORD into pg_authid "+
|
||||
"only on first boot of a fresh data dir, so editing the env var after that "+
|
||||
"point updates only the certctl-server container. Reset destructively with "+
|
||||
"`docker compose -f deploy/docker-compose.yml down -v && "+
|
||||
"docker compose -f deploy/docker-compose.yml up -d --build` (this DESTROYS "+
|
||||
"all data in the postgres volume), or non-destructively with "+
|
||||
"`docker compose -f deploy/docker-compose.yml exec postgres "+
|
||||
"psql -U certctl -c \"ALTER ROLE certctl PASSWORD '<new-password>';\"` "+
|
||||
"and then redeploy with the matching POSTGRES_PASSWORD. Underlying error: %w",
|
||||
pgErrInvalidPassword, err)
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to ping database: %w", err)
|
||||
}
|
||||
|
||||
// RunMigrations reads and executes SQL migration files from a directory.
|
||||
func RunMigrations(db *sql.DB, migrationsPath string) error {
|
||||
// Check if migrations directory exists
|
||||
@@ -66,3 +131,111 @@ func RunMigrations(db *sql.DB, migrationsPath string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RunSeed reads and executes the baseline seed SQL file from the migrations
|
||||
// directory. Designed to run AFTER RunMigrations so every column referenced by
|
||||
// the seed is already in place.
|
||||
//
|
||||
// U-3 (P1, cat-u-seed_initdb_schema_drift): pre-U-3 the deploy compose stack
|
||||
// mounted both a hand-curated subset of `migrations/*.up.sql` and `seed.sql`
|
||||
// into postgres `/docker-entrypoint-initdb.d/`. Postgres applied them at
|
||||
// initdb time. When `seed.sql` was updated to reference columns added by
|
||||
// migrations *after* the mounted cutoff (e.g., `policy_rules.severity` from
|
||||
// `000013_policy_rule_severity.up.sql`), initdb crashed during the seed step
|
||||
// and the container was reported `unhealthy` indefinitely — bare
|
||||
// `docker compose -f deploy/docker-compose.yml up -d --build` from a fresh
|
||||
// clone of v2.0.50 hit this on the first try (GitHub #10 reopened by
|
||||
// mikeakasully). Helm and the example compose files were already runtime-
|
||||
// only (Path B) and worked through the same window.
|
||||
//
|
||||
// Post-U-3 the compose stack drops all initdb mounts; postgres comes up with
|
||||
// an empty schema; the server applies all migrations via RunMigrations and
|
||||
// then this function applies the seed. Single source of truth, removes the
|
||||
// drift hazard architecturally.
|
||||
//
|
||||
// The seed file is expected at `<migrationsPath>/seed.sql`. Missing-file is
|
||||
// treated as a no-op (returns nil) so deployments that explicitly remove the
|
||||
// seed (custom packaging, cert-manager managed schemas) don't break.
|
||||
//
|
||||
// Idempotency: every INSERT in the shipped seed.sql uses
|
||||
// `ON CONFLICT (id) DO NOTHING`, so re-running on a populated DB is safe.
|
||||
// This function is invoked on every server start, so the contract MUST hold.
|
||||
//
|
||||
// Demo seed: `seed_demo.sql` is applied separately by RunDemoSeed below
|
||||
// when CERTCTL_DEMO_SEED=true (see internal/config/config.go::DemoSeed).
|
||||
// Splitting demo from baseline keeps a default deploy from accidentally
|
||||
// landing 90-days-of-fake-history into a real customer database, while
|
||||
// still giving the demo overlay a single source of truth (no more initdb
|
||||
// mounts). The demo seed itself uses ON CONFLICT (id) DO NOTHING so it's
|
||||
// idempotent; missing-file is also tolerated (custom packaging may strip
|
||||
// seed_demo.sql to shrink the image).
|
||||
func RunSeed(db *sql.DB, migrationsPath string) error {
|
||||
if _, err := os.Stat(migrationsPath); os.IsNotExist(err) {
|
||||
return fmt.Errorf("migrations directory not found: %s", migrationsPath)
|
||||
}
|
||||
|
||||
seedPath := filepath.Join(migrationsPath, "seed.sql")
|
||||
content, err := os.ReadFile(seedPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// Missing seed.sql is acceptable — operators may have removed it
|
||||
// for custom-packaging reasons. Return nil rather than fail-loud.
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("failed to read seed file %s: %w", seedPath, err)
|
||||
}
|
||||
|
||||
if _, err := db.Exec(string(content)); err != nil {
|
||||
return fmt.Errorf("failed to execute seed file %s: %w", seedPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RunDemoSeed applies the demo overlay seed file
|
||||
// (`<migrationsPath>/seed_demo.sql`) on top of the baseline seed.
|
||||
//
|
||||
// U-3 follow-on: pre-U-3 the demo overlay mounted `seed_demo.sql` into
|
||||
// postgres `/docker-entrypoint-initdb.d/` and relied on initdb to apply it
|
||||
// alongside the schema. Once U-3 dropped the initdb migration mounts, that
|
||||
// path stopped working — postgres comes up empty, and the demo seed
|
||||
// references tables (issuers, certificates, etc.) that wouldn't exist yet
|
||||
// at initdb time. RunDemoSeed restores the demo capability through the
|
||||
// same runtime path RunSeed uses, gated by CERTCTL_DEMO_SEED so production
|
||||
// deploys never accidentally land the fake-history rows.
|
||||
//
|
||||
// Order contract: must run AFTER RunSeed so foreign-key references from
|
||||
// demo rows to baseline rows (e.g., demo certificates referencing
|
||||
// `rp-default` from baseline) resolve cleanly. The caller in
|
||||
// cmd/server/main.go enforces this order.
|
||||
//
|
||||
// Missing-file is acceptable (returns nil) — operators packaging a
|
||||
// production-only image often strip seed_demo.sql to shrink the artifact,
|
||||
// and that should not break boot when CERTCTL_DEMO_SEED happens to be set.
|
||||
//
|
||||
// Idempotency: every INSERT in seed_demo.sql uses
|
||||
// `ON CONFLICT (id) DO NOTHING`, so re-running on a populated DB is safe.
|
||||
// Server restarts in demo mode therefore re-apply the file harmlessly.
|
||||
func RunDemoSeed(db *sql.DB, migrationsPath string) error {
|
||||
if _, err := os.Stat(migrationsPath); os.IsNotExist(err) {
|
||||
return fmt.Errorf("migrations directory not found: %s", migrationsPath)
|
||||
}
|
||||
|
||||
seedPath := filepath.Join(migrationsPath, "seed_demo.sql")
|
||||
content, err := os.ReadFile(seedPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
// Custom production packaging frequently strips this file.
|
||||
// Fail-soft to preserve the U-3 contract: a missing seed file
|
||||
// must not gate server boot.
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("failed to read demo seed file %s: %w", seedPath, err)
|
||||
}
|
||||
|
||||
if _, err := db.Exec(string(content)); err != nil {
|
||||
return fmt.Errorf("failed to execute demo seed file %s: %w", seedPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
// Internal-package tests for db.go — covers the diagnostic dispatch in
|
||||
// wrapPingError. Lives in `package postgres` (not `postgres_test`) so it can
|
||||
// call the unexported helper directly without exposing it on the API surface.
|
||||
//
|
||||
// Sibling integration tests in this directory live in `package postgres_test`
|
||||
// (testcontainers-driven, schema-per-test). They exercise the live-DB
|
||||
// happy path; this file owns the unit-level diagnostic dispatch and runs in
|
||||
// `-short` mode without spinning up postgres.
|
||||
//
|
||||
// U-1 (P1, GitHub #10): closes the audit-flagged
|
||||
// cat-u-quickstart_postgres_password_volume_trap finding by pinning the
|
||||
// post-fix wrap-text contract for `db.Ping()` failures. Pre-U-1 every Ping
|
||||
// error was wrapped with the same opaque `"failed to ping database: %w"`,
|
||||
// so an operator who edited POSTGRES_PASSWORD after first-boot saw only
|
||||
// `pq: password authentication failed for user "certctl"` in the server
|
||||
// log with no pointer to the actual cause (postgres data dir retains the
|
||||
// initial password from first-boot initdb; subsequent boots ignore the env
|
||||
// var). Post-U-1 the SQLSTATE-28P01 path emits a multi-line diagnostic
|
||||
// pointing at the down -v / ALTER ROLE remediation; non-auth failures
|
||||
// retain the original wrap shape so verbose noise does not bleed into
|
||||
// transient connection-refused / timeout paths.
|
||||
package postgres
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/lib/pq"
|
||||
)
|
||||
|
||||
// TestWrapPingError_AuthFailureGuidance asserts the diagnostic wrap fires on
|
||||
// SQLSTATE 28P01 (invalid_password) and contains all three contract elements:
|
||||
// the SQLSTATE code (so operators can grep), the down-v destructive
|
||||
// remediation, and the ALTER ROLE non-destructive remediation. Also asserts
|
||||
// the wrap chain still satisfies errors.As(err, &*pq.Error) so callers that
|
||||
// programmatically inspect the underlying postgres error code keep working.
|
||||
func TestWrapPingError_AuthFailureGuidance(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
original := &pq.Error{
|
||||
Code: pq.ErrorCode("28P01"),
|
||||
Message: `password authentication failed for user "certctl"`,
|
||||
}
|
||||
|
||||
wrapped := wrapPingError(original)
|
||||
if wrapped == nil {
|
||||
t.Fatal("wrapPingError returned nil for a non-nil input")
|
||||
}
|
||||
|
||||
got := wrapped.Error()
|
||||
|
||||
// Contract elements — the operator-facing string is what we ship.
|
||||
wantSubstrings := []string{
|
||||
"SQLSTATE 28P01", // operators grep on this
|
||||
"POSTGRES_PASSWORD", // names the variable that traps
|
||||
"first boot", // the mechanism in plain language
|
||||
"down -v", // destructive remediation
|
||||
"ALTER ROLE", // non-destructive remediation
|
||||
}
|
||||
for _, s := range wantSubstrings {
|
||||
if !strings.Contains(got, s) {
|
||||
t.Errorf("wrap text missing %q\ngot: %s", s, got)
|
||||
}
|
||||
}
|
||||
|
||||
// Wrap chain must still expose the underlying *pq.Error for callers
|
||||
// that want to inspect Code / Detail / Constraint fields. Pre-fix
|
||||
// callers used errors.As(err, &pqErr) on the unwrapped Ping result;
|
||||
// the new wrap is fmt.Errorf("...%w", err) so errors.As must walk it.
|
||||
var pqErr *pq.Error
|
||||
if !errors.As(wrapped, &pqErr) {
|
||||
t.Fatalf("errors.As did not extract *pq.Error from wrapped chain: %v", wrapped)
|
||||
}
|
||||
if pqErr.Code != "28P01" {
|
||||
t.Errorf("extracted pq.Error.Code = %q, want %q", pqErr.Code, "28P01")
|
||||
}
|
||||
}
|
||||
|
||||
// TestWrapPingError_NonAuthErrorPreservesOriginalWrap guards against the
|
||||
// guidance text bleeding into unrelated failure modes. SQLSTATE 08006
|
||||
// (connection_failure) is the canonical non-auth case — server unreachable,
|
||||
// TLS handshake failure, network drop. The wrap should be the original
|
||||
// shape so transient-error log noise does not include the (now lengthy)
|
||||
// volume-state remediation paragraph.
|
||||
func TestWrapPingError_NonAuthErrorPreservesOriginalWrap(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
original := &pq.Error{
|
||||
Code: pq.ErrorCode("08006"),
|
||||
Message: "connection refused",
|
||||
}
|
||||
|
||||
wrapped := wrapPingError(original)
|
||||
if wrapped == nil {
|
||||
t.Fatal("wrapPingError returned nil for a non-nil input")
|
||||
}
|
||||
|
||||
got := wrapped.Error()
|
||||
|
||||
// Original-wrap shape: prefix only, no guidance text.
|
||||
const wantPrefix = "failed to ping database: "
|
||||
if !strings.HasPrefix(got, wantPrefix) {
|
||||
t.Errorf("expected prefix %q, got: %s", wantPrefix, got)
|
||||
}
|
||||
|
||||
// Negative assertions: guidance text MUST NOT appear on non-auth paths.
|
||||
mustNotContain := []string{
|
||||
"SQLSTATE 08006", // we only call out 28P01 specifically
|
||||
"POSTGRES_PASSWORD",
|
||||
"down -v",
|
||||
"ALTER ROLE",
|
||||
}
|
||||
for _, s := range mustNotContain {
|
||||
if strings.Contains(got, s) {
|
||||
t.Errorf("non-auth wrap leaked guidance substring %q\ngot: %s", s, got)
|
||||
}
|
||||
}
|
||||
|
||||
// Wrap chain still walks for errors.As — same contract as auth path.
|
||||
var pqErr *pq.Error
|
||||
if !errors.As(wrapped, &pqErr) {
|
||||
t.Fatalf("errors.As did not extract *pq.Error from non-auth wrapped chain: %v", wrapped)
|
||||
}
|
||||
if pqErr.Code != "08006" {
|
||||
t.Errorf("extracted pq.Error.Code = %q, want %q", pqErr.Code, "08006")
|
||||
}
|
||||
}
|
||||
|
||||
// TestWrapPingError_NonPqErrorPreservesOriginalWrap guards the network-level
|
||||
// case: a pre-handshake failure (TCP refused, DNS, TLS) returns a
|
||||
// non-*pq.Error from db.Ping(). errors.As must return false, the helper
|
||||
// must fall through to the generic wrap, and the chain must remain walkable.
|
||||
func TestWrapPingError_NonPqErrorPreservesOriginalWrap(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
original := errors.New("dial tcp 127.0.0.1:5432: connect: connection refused")
|
||||
|
||||
wrapped := wrapPingError(original)
|
||||
if wrapped == nil {
|
||||
t.Fatal("wrapPingError returned nil for a non-nil input")
|
||||
}
|
||||
|
||||
got := wrapped.Error()
|
||||
|
||||
const wantPrefix = "failed to ping database: "
|
||||
if !strings.HasPrefix(got, wantPrefix) {
|
||||
t.Errorf("expected prefix %q, got: %s", wantPrefix, got)
|
||||
}
|
||||
if strings.Contains(got, "SQLSTATE") || strings.Contains(got, "POSTGRES_PASSWORD") {
|
||||
t.Errorf("network-level wrap leaked SQLSTATE/postgres guidance\ngot: %s", got)
|
||||
}
|
||||
if !errors.Is(wrapped, original) {
|
||||
t.Errorf("errors.Is did not walk to original sentinel: %v", wrapped)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWrapPingError_NilReturnsNil — defensive contract: if Ping returned nil
|
||||
// (no failure), the helper must not synthesize a fake error. This isn't on
|
||||
// the documented call path (NewDB only invokes wrapPingError inside the
|
||||
// `if err != nil` branch), but pinning it prevents a future refactor from
|
||||
// regressing the contract silently.
|
||||
func TestWrapPingError_NilReturnsNil(t *testing.T) {
|
||||
t.Parallel()
|
||||
if got := wrapPingError(nil); got != nil {
|
||||
t.Errorf("wrapPingError(nil) = %v, want nil", got)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,453 @@
|
||||
package postgres_test
|
||||
|
||||
// Integration tests for HealthCheckRepository (M48). Closes the L-1
|
||||
// coverage gap flagged in coverage-gap-audit.md: the 453-line repository
|
||||
// shipped in M48 had zero live-DB tests, leaving 11 methods — including
|
||||
// the time-sensitive ListDueForCheck, PurgeHistory, and GetSummary —
|
||||
// without migration-pinned regression protection. These tests exercise
|
||||
// every method against a real Postgres 16 container through the same
|
||||
// schema-per-test harness used by repo_test.go.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
"github.com/shankar0123/certctl/internal/repository/postgres"
|
||||
)
|
||||
|
||||
// newHealthCheck builds a minimal EndpointHealthCheck the repository will
|
||||
// accept. All time-pointer fields are left nil so callers can override
|
||||
// exactly the bits each subtest cares about — Create stores nil pointers
|
||||
// as NULL, which is what ListDueForCheck's `last_checked_at IS NULL`
|
||||
// branch relies on.
|
||||
func newHealthCheck(id, endpoint string, status domain.HealthStatus, enabled bool) *domain.EndpointHealthCheck {
|
||||
now := time.Now().UTC().Truncate(time.Microsecond)
|
||||
return &domain.EndpointHealthCheck{
|
||||
ID: id,
|
||||
Endpoint: endpoint,
|
||||
Status: status,
|
||||
DegradedThreshold: 2,
|
||||
DownThreshold: 5,
|
||||
CheckIntervalSecs: 300,
|
||||
Enabled: enabled,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
}
|
||||
|
||||
// TestHealthCheckRepository_CRUD covers Create → Get → Update → Delete on
|
||||
// the nominal path. Also verifies the sql.NullTime round-trip: a check
|
||||
// created without timestamps comes back with nil pointers (not
|
||||
// zero-valued time.Time) so downstream Go code can distinguish "never
|
||||
// probed" from "probed at epoch".
|
||||
func TestHealthCheckRepository_CRUD(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewHealthCheckRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
check := newHealthCheck("hc-crud", "example.com:443", domain.HealthStatusHealthy, true)
|
||||
check.ExpectedFingerprint = "sha256:expected"
|
||||
check.ResponseTimeMs = 42
|
||||
|
||||
if err := repo.Create(ctx, check); err != nil {
|
||||
t.Fatalf("Create failed: %v", err)
|
||||
}
|
||||
|
||||
got, err := repo.Get(ctx, "hc-crud")
|
||||
if err != nil {
|
||||
t.Fatalf("Get failed: %v", err)
|
||||
}
|
||||
if got.Endpoint != "example.com:443" {
|
||||
t.Errorf("Endpoint = %q, want example.com:443", got.Endpoint)
|
||||
}
|
||||
if got.Status != domain.HealthStatusHealthy {
|
||||
t.Errorf("Status = %q, want %q", got.Status, domain.HealthStatusHealthy)
|
||||
}
|
||||
if got.ExpectedFingerprint != "sha256:expected" {
|
||||
t.Errorf("ExpectedFingerprint = %q, want sha256:expected", got.ExpectedFingerprint)
|
||||
}
|
||||
if got.CheckIntervalSecs != 300 {
|
||||
t.Errorf("CheckIntervalSecs = %d, want 300", got.CheckIntervalSecs)
|
||||
}
|
||||
if got.LastCheckedAt != nil {
|
||||
t.Errorf("LastCheckedAt = %v, want nil (never probed)", got.LastCheckedAt)
|
||||
}
|
||||
|
||||
// Update: status transition + observed fingerprint assignment.
|
||||
// Update() rewrites UpdatedAt to time.Now() regardless of what we
|
||||
// send, so record the pre-call timestamp to assert monotonic advance.
|
||||
preUpdate := got.UpdatedAt
|
||||
time.Sleep(2 * time.Millisecond) // ensure a measurable delta
|
||||
got.Status = domain.HealthStatusDegraded
|
||||
got.ObservedFingerprint = "sha256:observed"
|
||||
got.ConsecutiveFailures = 2
|
||||
if err := repo.Update(ctx, got); err != nil {
|
||||
t.Fatalf("Update failed: %v", err)
|
||||
}
|
||||
|
||||
got2, err := repo.Get(ctx, "hc-crud")
|
||||
if err != nil {
|
||||
t.Fatalf("Get after Update failed: %v", err)
|
||||
}
|
||||
if got2.Status != domain.HealthStatusDegraded {
|
||||
t.Errorf("Status after Update = %q, want %q", got2.Status, domain.HealthStatusDegraded)
|
||||
}
|
||||
if got2.ObservedFingerprint != "sha256:observed" {
|
||||
t.Errorf("ObservedFingerprint after Update = %q, want sha256:observed", got2.ObservedFingerprint)
|
||||
}
|
||||
if got2.ConsecutiveFailures != 2 {
|
||||
t.Errorf("ConsecutiveFailures after Update = %d, want 2", got2.ConsecutiveFailures)
|
||||
}
|
||||
if !got2.UpdatedAt.After(preUpdate) {
|
||||
t.Errorf("UpdatedAt did not advance: pre=%v post=%v", preUpdate, got2.UpdatedAt)
|
||||
}
|
||||
|
||||
if err := repo.Delete(ctx, "hc-crud"); err != nil {
|
||||
t.Fatalf("Delete failed: %v", err)
|
||||
}
|
||||
if _, err := repo.Get(ctx, "hc-crud"); err == nil {
|
||||
t.Errorf("Get after Delete returned nil error, want not-found")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHealthCheckRepository_GetByEndpoint verifies the secondary lookup
|
||||
// path used by AutoCreateFromDeployment to decide whether to INSERT or
|
||||
// UPDATE. Missing endpoints return an error (not a nil cert) so the
|
||||
// service layer can branch safely.
|
||||
func TestHealthCheckRepository_GetByEndpoint(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewHealthCheckRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
check := newHealthCheck("hc-byep", "svc.internal:443", domain.HealthStatusHealthy, true)
|
||||
if err := repo.Create(ctx, check); err != nil {
|
||||
t.Fatalf("Create failed: %v", err)
|
||||
}
|
||||
|
||||
got, err := repo.GetByEndpoint(ctx, "svc.internal:443")
|
||||
if err != nil {
|
||||
t.Fatalf("GetByEndpoint failed: %v", err)
|
||||
}
|
||||
if got.ID != "hc-byep" {
|
||||
t.Errorf("ID = %q, want hc-byep", got.ID)
|
||||
}
|
||||
|
||||
if _, err := repo.GetByEndpoint(ctx, "never-seen.example.com:443"); err == nil {
|
||||
t.Errorf("GetByEndpoint on unknown endpoint returned nil error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHealthCheckRepository_List_Filters seeds rows across the filter
|
||||
// axes (status, certificate_id, enabled) and asserts each branch of the
|
||||
// WHERE builder, plus the Page/PerPage pagination shim.
|
||||
func TestHealthCheckRepository_List_Filters(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewHealthCheckRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
// Create prereq managed certificate so certificate_id FK can be
|
||||
// populated on one row — proves the filter path joins on a real ID.
|
||||
ownerID, teamID, issuerID, policyID := insertCertPrereqsRaw(t, db, ctx, "hclist")
|
||||
certID := "mc-hc-list"
|
||||
now := time.Now().UTC().Truncate(time.Microsecond)
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
INSERT INTO managed_certificates (id, name, common_name, sans, environment, owner_id, team_id, issuer_id, renewal_policy_id, status, expires_at, tags, created_at, updated_at)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)`,
|
||||
certID, "hc-list-cert", "hc.example.com", "{}", "production",
|
||||
ownerID, teamID, issuerID, policyID,
|
||||
string(domain.CertificateStatusActive), now.Add(90*24*time.Hour), "{}",
|
||||
now, now); err != nil {
|
||||
t.Fatalf("seed managed_certificate: %v", err)
|
||||
}
|
||||
|
||||
// 4 rows: healthy+enabled+cert, degraded+enabled, down+disabled, unknown+enabled.
|
||||
rows := []*domain.EndpointHealthCheck{
|
||||
newHealthCheck("hc-list-1", "a.example.com:443", domain.HealthStatusHealthy, true),
|
||||
newHealthCheck("hc-list-2", "b.example.com:443", domain.HealthStatusDegraded, true),
|
||||
newHealthCheck("hc-list-3", "c.example.com:443", domain.HealthStatusDown, false),
|
||||
newHealthCheck("hc-list-4", "d.example.com:443", domain.HealthStatusUnknown, true),
|
||||
}
|
||||
rows[0].CertificateID = &certID
|
||||
for _, r := range rows {
|
||||
if err := repo.Create(ctx, r); err != nil {
|
||||
t.Fatalf("Create %s: %v", r.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Filter: status=healthy → 1 result.
|
||||
got, total, err := repo.List(ctx, &repository.HealthCheckFilter{Status: string(domain.HealthStatusHealthy)})
|
||||
if err != nil {
|
||||
t.Fatalf("List status=healthy: %v", err)
|
||||
}
|
||||
if total != 1 || len(got) != 1 || got[0].ID != "hc-list-1" {
|
||||
t.Errorf("status=healthy: total=%d rows=%d want 1/1 with hc-list-1", total, len(got))
|
||||
}
|
||||
|
||||
// Filter: certificate_id → 1 result.
|
||||
got, total, err = repo.List(ctx, &repository.HealthCheckFilter{CertificateID: certID})
|
||||
if err != nil {
|
||||
t.Fatalf("List certificate_id: %v", err)
|
||||
}
|
||||
if total != 1 || len(got) != 1 || got[0].ID != "hc-list-1" {
|
||||
t.Errorf("certificate_id filter: total=%d rows=%d want 1/1", total, len(got))
|
||||
}
|
||||
|
||||
// Filter: enabled=false → 1 result.
|
||||
disabled := false
|
||||
got, total, err = repo.List(ctx, &repository.HealthCheckFilter{Enabled: &disabled})
|
||||
if err != nil {
|
||||
t.Fatalf("List enabled=false: %v", err)
|
||||
}
|
||||
if total != 1 || len(got) != 1 || got[0].ID != "hc-list-3" {
|
||||
t.Errorf("enabled=false: total=%d rows=%d want 1/1 with hc-list-3", total, len(got))
|
||||
}
|
||||
|
||||
// Pagination: per_page=2 → first page has 2, total reflects all 4.
|
||||
got, total, err = repo.List(ctx, &repository.HealthCheckFilter{Page: 1, PerPage: 2})
|
||||
if err != nil {
|
||||
t.Fatalf("List paginated: %v", err)
|
||||
}
|
||||
if total != 4 {
|
||||
t.Errorf("paginated total = %d, want 4", total)
|
||||
}
|
||||
if len(got) != 2 {
|
||||
t.Errorf("paginated rows = %d, want 2", len(got))
|
||||
}
|
||||
}
|
||||
|
||||
// TestHealthCheckRepository_ListDueForCheck seeds all four branches of
|
||||
// the WHERE clause — (a) enabled+null → due, (b) enabled+past-due → due,
|
||||
// (c) enabled+recent → not due, (d) disabled+null → excluded — and
|
||||
// asserts the ORDER BY last_checked_at NULLS FIRST, ASC ordering.
|
||||
//
|
||||
// This is the hot path the scheduler's 8th loop hits every 60 seconds;
|
||||
// a correctness regression here silently fails every probe.
|
||||
func TestHealthCheckRepository_ListDueForCheck(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewHealthCheckRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
now := time.Now().UTC().Truncate(time.Microsecond)
|
||||
pastDue := now.Add(-10 * time.Minute) // > 300s ago, enabled → due
|
||||
recent := now.Add(-30 * time.Second) // < 300s ago, enabled → not due
|
||||
|
||||
// (a) enabled + null last_checked_at — NULLS FIRST puts this first
|
||||
a := newHealthCheck("hc-due-a", "a.example.com:443", domain.HealthStatusUnknown, true)
|
||||
// (b) enabled + past-due last_checked_at
|
||||
b := newHealthCheck("hc-due-b", "b.example.com:443", domain.HealthStatusHealthy, true)
|
||||
b.LastCheckedAt = &pastDue
|
||||
// (c) enabled + recent last_checked_at — must NOT appear
|
||||
c := newHealthCheck("hc-due-c", "c.example.com:443", domain.HealthStatusHealthy, true)
|
||||
c.LastCheckedAt = &recent
|
||||
// (d) disabled + null last_checked_at — must NOT appear
|
||||
d := newHealthCheck("hc-due-d", "d.example.com:443", domain.HealthStatusUnknown, false)
|
||||
|
||||
for _, r := range []*domain.EndpointHealthCheck{a, b, c, d} {
|
||||
if err := repo.Create(ctx, r); err != nil {
|
||||
t.Fatalf("Create %s: %v", r.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
due, err := repo.ListDueForCheck(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("ListDueForCheck: %v", err)
|
||||
}
|
||||
if len(due) != 2 {
|
||||
ids := make([]string, 0, len(due))
|
||||
for _, r := range due {
|
||||
ids = append(ids, r.ID)
|
||||
}
|
||||
t.Fatalf("due rows = %d (%v), want exactly 2 (hc-due-a, hc-due-b)", len(due), ids)
|
||||
}
|
||||
// NULLS FIRST: variant (a) should precede variant (b).
|
||||
if due[0].ID != "hc-due-a" {
|
||||
t.Errorf("due[0].ID = %q, want hc-due-a (NULLS FIRST)", due[0].ID)
|
||||
}
|
||||
if due[1].ID != "hc-due-b" {
|
||||
t.Errorf("due[1].ID = %q, want hc-due-b", due[1].ID)
|
||||
}
|
||||
// Sanity: neither excluded row leaked through.
|
||||
for _, r := range due {
|
||||
if r.ID == "hc-due-c" {
|
||||
t.Errorf("recent-probed row hc-due-c leaked into due set")
|
||||
}
|
||||
if r.ID == "hc-due-d" {
|
||||
t.Errorf("disabled row hc-due-d leaked into due set")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestHealthCheckRepository_RecordHistory_GetHistory asserts FIFO
|
||||
// insertion with DESC retrieval (most-recent-first) and the explicit
|
||||
// limit clamp.
|
||||
func TestHealthCheckRepository_RecordHistory_GetHistory(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewHealthCheckRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
parent := newHealthCheck("hc-hist", "hist.example.com:443", domain.HealthStatusHealthy, true)
|
||||
if err := repo.Create(ctx, parent); err != nil {
|
||||
t.Fatalf("Create parent: %v", err)
|
||||
}
|
||||
|
||||
base := time.Now().UTC().Truncate(time.Microsecond)
|
||||
entries := []*domain.HealthHistoryEntry{
|
||||
{ID: "hh-1", HealthCheckID: "hc-hist", Status: string(domain.HealthStatusHealthy), ResponseTimeMs: 10, CheckedAt: base.Add(-3 * time.Minute)},
|
||||
{ID: "hh-2", HealthCheckID: "hc-hist", Status: string(domain.HealthStatusDegraded), ResponseTimeMs: 20, CheckedAt: base.Add(-2 * time.Minute)},
|
||||
{ID: "hh-3", HealthCheckID: "hc-hist", Status: string(domain.HealthStatusHealthy), ResponseTimeMs: 30, CheckedAt: base.Add(-1 * time.Minute)},
|
||||
}
|
||||
for _, e := range entries {
|
||||
if err := repo.RecordHistory(ctx, e); err != nil {
|
||||
t.Fatalf("RecordHistory %s: %v", e.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// limit=2 → newest 2 in DESC order: hh-3, hh-2.
|
||||
got, err := repo.GetHistory(ctx, "hc-hist", 2)
|
||||
if err != nil {
|
||||
t.Fatalf("GetHistory: %v", err)
|
||||
}
|
||||
if len(got) != 2 {
|
||||
t.Fatalf("rows = %d, want 2", len(got))
|
||||
}
|
||||
if got[0].ID != "hh-3" || got[1].ID != "hh-2" {
|
||||
t.Errorf("order = [%s, %s], want [hh-3, hh-2]", got[0].ID, got[1].ID)
|
||||
}
|
||||
|
||||
// limit=0 → default 100 → returns all 3.
|
||||
got, err = repo.GetHistory(ctx, "hc-hist", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("GetHistory limit=0: %v", err)
|
||||
}
|
||||
if len(got) != 3 {
|
||||
t.Errorf("limit=0 rows = %d, want 3", len(got))
|
||||
}
|
||||
}
|
||||
|
||||
// TestHealthCheckRepository_PurgeHistory exercises the retention-sweep
|
||||
// hot path (scheduler calls this once/day). 5 past + 5 future straddling
|
||||
// the cutoff exposes both sides of the < comparator — an off-by-one
|
||||
// regression here would either nuke live data or skip rows the retention
|
||||
// policy was meant to remove.
|
||||
func TestHealthCheckRepository_PurgeHistory(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewHealthCheckRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
parent := newHealthCheck("hc-purge", "purge.example.com:443", domain.HealthStatusHealthy, true)
|
||||
if err := repo.Create(ctx, parent); err != nil {
|
||||
t.Fatalf("Create parent: %v", err)
|
||||
}
|
||||
|
||||
cutoff := time.Now().UTC().Truncate(time.Microsecond)
|
||||
|
||||
// 5 rows BEFORE cutoff (should be purged).
|
||||
for i := 0; i < 5; i++ {
|
||||
e := &domain.HealthHistoryEntry{
|
||||
ID: "hh-past-" + string(rune('0'+i)),
|
||||
HealthCheckID: "hc-purge",
|
||||
Status: string(domain.HealthStatusHealthy),
|
||||
CheckedAt: cutoff.Add(time.Duration(-10-i) * time.Minute),
|
||||
}
|
||||
if err := repo.RecordHistory(ctx, e); err != nil {
|
||||
t.Fatalf("RecordHistory past %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
// 5 rows AFTER cutoff (should remain).
|
||||
for i := 0; i < 5; i++ {
|
||||
e := &domain.HealthHistoryEntry{
|
||||
ID: "hh-future-" + string(rune('0'+i)),
|
||||
HealthCheckID: "hc-purge",
|
||||
Status: string(domain.HealthStatusHealthy),
|
||||
CheckedAt: cutoff.Add(time.Duration(1+i) * time.Minute),
|
||||
}
|
||||
if err := repo.RecordHistory(ctx, e); err != nil {
|
||||
t.Fatalf("RecordHistory future %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
deleted, err := repo.PurgeHistory(ctx, cutoff)
|
||||
if err != nil {
|
||||
t.Fatalf("PurgeHistory: %v", err)
|
||||
}
|
||||
if deleted != 5 {
|
||||
t.Errorf("deleted = %d, want 5", deleted)
|
||||
}
|
||||
|
||||
remaining, err := repo.GetHistory(ctx, "hc-purge", 100)
|
||||
if err != nil {
|
||||
t.Fatalf("GetHistory after purge: %v", err)
|
||||
}
|
||||
if len(remaining) != 5 {
|
||||
t.Errorf("remaining = %d, want 5", len(remaining))
|
||||
}
|
||||
}
|
||||
|
||||
// TestHealthCheckRepository_GetSummary seeds all 5 HealthStatus values
|
||||
// in a non-uniform distribution so the GROUP BY status branch-table gets
|
||||
// exercised on each arm. The Total field is computed inside the
|
||||
// aggregator — its drift would not surface unless we assert it too.
|
||||
func TestHealthCheckRepository_GetSummary(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewHealthCheckRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
seed := map[domain.HealthStatus]int{
|
||||
domain.HealthStatusHealthy: 3,
|
||||
domain.HealthStatusDegraded: 2,
|
||||
domain.HealthStatusDown: 2,
|
||||
domain.HealthStatusCertMismatch: 1,
|
||||
domain.HealthStatusUnknown: 1,
|
||||
}
|
||||
idx := 0
|
||||
for status, count := range seed {
|
||||
for i := 0; i < count; i++ {
|
||||
check := newHealthCheck(
|
||||
"hc-sum-"+string(rune('a'+idx)),
|
||||
"sum.example.com:443",
|
||||
status,
|
||||
true,
|
||||
)
|
||||
// Endpoint uniqueness isn't enforced by the schema but making
|
||||
// it unique documents intent and rules out false-positives.
|
||||
check.Endpoint = check.ID + "-" + check.Endpoint
|
||||
if err := repo.Create(ctx, check); err != nil {
|
||||
t.Fatalf("Create %s: %v", check.ID, err)
|
||||
}
|
||||
idx++
|
||||
}
|
||||
}
|
||||
|
||||
summary, err := repo.GetSummary(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("GetSummary: %v", err)
|
||||
}
|
||||
if summary.Healthy != 3 {
|
||||
t.Errorf("Healthy = %d, want 3", summary.Healthy)
|
||||
}
|
||||
if summary.Degraded != 2 {
|
||||
t.Errorf("Degraded = %d, want 2", summary.Degraded)
|
||||
}
|
||||
if summary.Down != 2 {
|
||||
t.Errorf("Down = %d, want 2", summary.Down)
|
||||
}
|
||||
if summary.CertMismatch != 1 {
|
||||
t.Errorf("CertMismatch = %d, want 1", summary.CertMismatch)
|
||||
}
|
||||
if summary.Unknown != 1 {
|
||||
t.Errorf("Unknown = %d, want 1", summary.Unknown)
|
||||
}
|
||||
if summary.Total != 9 {
|
||||
t.Errorf("Total = %d, want 9 (3+2+2+1+1)", summary.Total)
|
||||
}
|
||||
}
|
||||
@@ -22,19 +22,37 @@ func NewNotificationRepository(db *sql.DB) *NotificationRepository {
|
||||
return &NotificationRepository{db: db}
|
||||
}
|
||||
|
||||
// Create stores a new notification
|
||||
// Create stores a new notification.
|
||||
//
|
||||
// U-3 ride-along (cat-o-notification_created_at_dead_field, P2): the
|
||||
// `created_at` column is added to notification_events by migration 000017.
|
||||
// Pre-U-3 the Go domain.NotificationEvent had a CreatedAt field but the
|
||||
// INSERT path never set it AND no DB column existed — the JSON API
|
||||
// serialised the field as `0001-01-01T00:00:00Z`, breaking timestamp
|
||||
// ordering on operator dashboards and any consumer that filtered by age.
|
||||
// Post-U-3 the column exists with a NOT NULL DEFAULT NOW() backstop, and
|
||||
// this INSERT explicitly sets it from the domain field. If the caller
|
||||
// hasn't populated CreatedAt (zero-value time.Time) we substitute
|
||||
// time.Now() so the row never carries the placeholder zero-time forward
|
||||
// — the DEFAULT would handle this too, but emitting the value explicitly
|
||||
// keeps the wire-level JSON consistent with what the row will hold once
|
||||
// scanNotification reads it back, and prevents a clock-skew gap between
|
||||
// "Go computed CreatedAt" and "DB applied DEFAULT NOW()" on the read path.
|
||||
func (r *NotificationRepository) Create(ctx context.Context, notif *domain.NotificationEvent) error {
|
||||
if notif.ID == "" {
|
||||
notif.ID = uuid.New().String()
|
||||
}
|
||||
if notif.CreatedAt.IsZero() {
|
||||
notif.CreatedAt = time.Now()
|
||||
}
|
||||
|
||||
err := r.db.QueryRowContext(ctx, `
|
||||
INSERT INTO notification_events (
|
||||
id, type, certificate_id, channel, recipient, message, sent_at, status, error
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
id, type, certificate_id, channel, recipient, message, sent_at, status, error, created_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
|
||||
RETURNING id
|
||||
`, notif.ID, notif.Type, notif.CertificateID, notif.Channel, notif.Recipient,
|
||||
notif.Message, notif.SentAt, notif.Status, notif.Error).Scan(¬if.ID)
|
||||
notif.Message, notif.SentAt, notif.Status, notif.Error, notif.CreatedAt).Scan(¬if.ID)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create notification: %w", err)
|
||||
@@ -102,12 +120,14 @@ func (r *NotificationRepository) List(ctx context.Context, filter *repository.No
|
||||
|
||||
// Get paginated results. I-005 extends the SELECT with the three retry
|
||||
// columns (retry_count / next_retry_at / last_error) so scanNotification
|
||||
// can populate the new fields on domain.NotificationEvent. The column
|
||||
// order here MUST stay in lockstep with scanNotification below.
|
||||
// can populate the new fields on domain.NotificationEvent. U-3 extends
|
||||
// it once more with `created_at` (column added by migration 000017) so
|
||||
// the field is no longer serialized as 0001-01-01. The column order
|
||||
// here MUST stay in lockstep with scanNotification below.
|
||||
offset := (filter.Page - 1) * filter.PerPage
|
||||
query := fmt.Sprintf(`
|
||||
SELECT id, type, certificate_id, channel, recipient, message, sent_at, status, error,
|
||||
retry_count, next_retry_at, last_error
|
||||
retry_count, next_retry_at, last_error, created_at
|
||||
FROM notification_events
|
||||
%s
|
||||
ORDER BY sent_at DESC NULLS LAST
|
||||
@@ -162,8 +182,14 @@ func (r *NotificationRepository) UpdateStatus(ctx context.Context, id string, st
|
||||
|
||||
// scanNotification scans a notification from a row or rows.
|
||||
//
|
||||
// I-005 extends the scan list from 9 → 12 columns (adds retry_count,
|
||||
// next_retry_at, last_error). Every caller — List and the four new retry
|
||||
// I-005 extended the scan list from 9 → 12 columns (adds retry_count,
|
||||
// next_retry_at, last_error). U-3 extends it once more to 13 columns by
|
||||
// appending `created_at` (column added by migration 000017,
|
||||
// cat-o-notification_created_at_dead_field). CreatedAt scans into a
|
||||
// non-pointer time.Time because the migration declares the column
|
||||
// NOT NULL with DEFAULT NOW().
|
||||
//
|
||||
// Every caller — List, ListRetryEligible, and the four other I-005 retry
|
||||
// methods below — funnels rows through this helper, so the SELECT column
|
||||
// order in every query must match the Scan order here exactly. RetryCount
|
||||
// scans into an `int` (migration 000016 declares the column NOT NULL with
|
||||
@@ -176,7 +202,7 @@ func scanNotification(scanner interface {
|
||||
var notif domain.NotificationEvent
|
||||
err := scanner.Scan(¬if.ID, ¬if.Type, ¬if.CertificateID, ¬if.Channel,
|
||||
¬if.Recipient, ¬if.Message, ¬if.SentAt, ¬if.Status, ¬if.Error,
|
||||
¬if.RetryCount, ¬if.NextRetryAt, ¬if.LastError)
|
||||
¬if.RetryCount, ¬if.NextRetryAt, ¬if.LastError, ¬if.CreatedAt)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to scan notification: %w", err)
|
||||
@@ -248,7 +274,7 @@ func (r *NotificationRepository) ListRetryEligible(ctx context.Context, now time
|
||||
|
||||
rows, err := r.db.QueryContext(ctx, `
|
||||
SELECT id, type, certificate_id, channel, recipient, message, sent_at, status, error,
|
||||
retry_count, next_retry_at, last_error
|
||||
retry_count, next_retry_at, last_error, created_at
|
||||
FROM notification_events
|
||||
WHERE status = 'failed'
|
||||
AND next_retry_at IS NOT NULL
|
||||
|
||||
@@ -339,6 +339,95 @@ func TestNotificationRepository_Requeue(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestNotificationRepository_CreatedAt_IsPersisted is the U-3 ride-along
|
||||
// regression for cat-o-notification_created_at_dead_field. Pre-U-3 the
|
||||
// Go domain.NotificationEvent had a CreatedAt field but the DB had no
|
||||
// column — JSON serialisation produced 0001-01-01T00:00:00Z, breaking
|
||||
// timestamp ordering on operator dashboards. Post-U-3 migration 000017
|
||||
// adds the column NOT NULL DEFAULT NOW(), Create populates it, and
|
||||
// scanNotification reads it back.
|
||||
//
|
||||
// The contract under test is round-trip equivalence: the timestamp the
|
||||
// caller sets goes into the DB and comes back out unchanged (modulo
|
||||
// PostgreSQL's microsecond precision). Truncate to microseconds before
|
||||
// comparing because TIMESTAMPTZ rounds nanoseconds away.
|
||||
func TestNotificationRepository_CreatedAt_IsPersisted(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewNotificationRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
// A specific, recognisable timestamp. Truncated to microseconds so
|
||||
// the post-roundtrip equality assertion isn't tripped up by Postgres
|
||||
// dropping the nanosecond tail.
|
||||
want := time.Now().UTC().Add(-2 * time.Hour).Truncate(time.Microsecond)
|
||||
|
||||
notif := &domain.NotificationEvent{
|
||||
Type: domain.NotificationTypeExpirationWarning,
|
||||
Channel: domain.NotificationChannelWebhook,
|
||||
Recipient: "https://hooks.example.com/u3",
|
||||
Message: "U-3 round-trip witness",
|
||||
Status: string(domain.NotificationStatusPending),
|
||||
CreatedAt: want,
|
||||
}
|
||||
if err := repo.Create(ctx, notif); err != nil {
|
||||
t.Fatalf("Create failed: %v", err)
|
||||
}
|
||||
|
||||
// Re-read via List (which goes through scanNotification) so we're
|
||||
// testing both the INSERT and SELECT halves of the U-3 plumbing.
|
||||
got, err := repo.List(ctx, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("List failed: %v", err)
|
||||
}
|
||||
if len(got) != 1 {
|
||||
t.Fatalf("List returned %d rows, want 1", len(got))
|
||||
}
|
||||
if !got[0].CreatedAt.Equal(want) {
|
||||
t.Errorf("CreatedAt round-trip mismatch:\n set: %v\n got: %v\n"+
|
||||
"Pre-U-3 this would have come back as 0001-01-01 because the column didn't exist.",
|
||||
want, got[0].CreatedAt)
|
||||
}
|
||||
}
|
||||
|
||||
// TestNotificationRepository_CreatedAt_DefaultsToNow verifies the helper
|
||||
// behavior in Create: when the caller hands over an event with the
|
||||
// zero-value CreatedAt, Create substitutes time.Now() rather than
|
||||
// trusting the DB DEFAULT. This keeps wire-level JSON consistent with
|
||||
// what the row will hold once it's read back, and avoids a clock-skew
|
||||
// gap between "Go computed the timestamp" and "DB applied DEFAULT NOW()".
|
||||
func TestNotificationRepository_CreatedAt_DefaultsToNow(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewNotificationRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
before := time.Now().UTC().Add(-time.Second)
|
||||
|
||||
notif := &domain.NotificationEvent{
|
||||
Type: domain.NotificationTypeExpirationWarning,
|
||||
Channel: domain.NotificationChannelWebhook,
|
||||
Recipient: "https://hooks.example.com/zerotime",
|
||||
Message: "U-3 zero-time fallback",
|
||||
Status: string(domain.NotificationStatusPending),
|
||||
// CreatedAt left zero on purpose — the contract is that Create
|
||||
// fills it in from time.Now() when it's unset.
|
||||
}
|
||||
if err := repo.Create(ctx, notif); err != nil {
|
||||
t.Fatalf("Create failed: %v", err)
|
||||
}
|
||||
|
||||
after := time.Now().UTC().Add(time.Second)
|
||||
|
||||
if notif.CreatedAt.IsZero() {
|
||||
t.Fatalf("CreatedAt is still zero after Create — the fallback in NotificationRepository.Create did not fire")
|
||||
}
|
||||
if notif.CreatedAt.Before(before) || notif.CreatedAt.After(after) {
|
||||
t.Errorf("CreatedAt = %v is outside the [%v, %v] window — the substituted time.Now() should fall inside the test's wall-clock bracket",
|
||||
notif.CreatedAt, before, after)
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
// past returns a stable "5 minutes ago" time for fixture seeding. Truncated
|
||||
|
||||
@@ -4,46 +4,61 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/lib/pq"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
)
|
||||
|
||||
// RenewalPolicyRepository implements repository.RenewalPolicyRepository
|
||||
// RenewalPolicyRepository implements repository.RenewalPolicyRepository.
|
||||
type RenewalPolicyRepository struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
// NewRenewalPolicyRepository creates a new RenewalPolicyRepository
|
||||
// NewRenewalPolicyRepository creates a new RenewalPolicyRepository.
|
||||
func NewRenewalPolicyRepository(db *sql.DB) *RenewalPolicyRepository {
|
||||
return &RenewalPolicyRepository{db: db}
|
||||
}
|
||||
|
||||
// Get retrieves a renewal policy by ID
|
||||
func (r *RenewalPolicyRepository) Get(ctx context.Context, id string) (*domain.RenewalPolicy, error) {
|
||||
// SELECT column order is the shared contract between scanRenewalPolicy and
|
||||
// every SELECT/RETURNING in this file. Keep them in lockstep; if you add a
|
||||
// new column, add it to all SELECTs, all scan calls, and scanRenewalPolicy.
|
||||
//
|
||||
// Note: certificate_profile_id and agent_group_id live on renewal_policies
|
||||
// (migrations 000003 and 000004) but are deliberately NOT read here — that
|
||||
// pre-existing drift is out of G-1's minimum-viable-delta and is tracked in
|
||||
// the design doc §8. Introducing them would change struct shapes / JSON tags
|
||||
// and require domain-layer churn we're not taking on in this change.
|
||||
const renewalPolicyColumns = `
|
||||
id, name, renewal_window_days, auto_renew, max_retries,
|
||||
retry_interval_seconds, alert_thresholds_days, created_at, updated_at
|
||||
`
|
||||
|
||||
// scanRenewalPolicy decodes one renewal_policies row from a Row or Rows
|
||||
// scanner, unmarshaling alert_thresholds_days JSONB into the domain slice.
|
||||
// Malformed JSONB silently falls back to DefaultAlertThresholds() — same
|
||||
// behavior as the pre-G-1 code so we don't change observable semantics.
|
||||
func scanRenewalPolicy(scanner interface {
|
||||
Scan(dest ...any) error
|
||||
}) (*domain.RenewalPolicy, error) {
|
||||
var policy domain.RenewalPolicy
|
||||
var thresholdsJSON []byte
|
||||
|
||||
err := r.db.QueryRowContext(ctx, `
|
||||
SELECT id, name, renewal_window_days, auto_renew, max_retries,
|
||||
retry_interval_minutes, alert_thresholds_days, created_at, updated_at
|
||||
FROM renewal_policies
|
||||
WHERE id = $1
|
||||
`, id).Scan(&policy.ID, &policy.Name, &policy.RenewalWindowDays, &policy.AutoRenew,
|
||||
if err := scanner.Scan(
|
||||
&policy.ID, &policy.Name, &policy.RenewalWindowDays, &policy.AutoRenew,
|
||||
&policy.MaxRetries, &policy.RetryInterval, &thresholdsJSON,
|
||||
&policy.CreatedAt, &policy.UpdatedAt)
|
||||
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, fmt.Errorf("renewal policy not found: %s", id)
|
||||
}
|
||||
return nil, fmt.Errorf("failed to query renewal policy: %w", err)
|
||||
&policy.CreatedAt, &policy.UpdatedAt,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Parse alert thresholds from JSONB
|
||||
if len(thresholdsJSON) > 0 {
|
||||
if err := json.Unmarshal(thresholdsJSON, &policy.AlertThresholdsDays); err != nil {
|
||||
// Fall back to defaults if JSON is malformed
|
||||
policy.AlertThresholdsDays = domain.DefaultAlertThresholds()
|
||||
}
|
||||
}
|
||||
@@ -51,14 +66,23 @@ func (r *RenewalPolicyRepository) Get(ctx context.Context, id string) (*domain.R
|
||||
return &policy, nil
|
||||
}
|
||||
|
||||
// List returns all renewal policies
|
||||
// Get retrieves a renewal policy by ID.
|
||||
func (r *RenewalPolicyRepository) Get(ctx context.Context, id string) (*domain.RenewalPolicy, error) {
|
||||
row := r.db.QueryRowContext(ctx, `SELECT `+renewalPolicyColumns+` FROM renewal_policies WHERE id = $1`, id)
|
||||
policy, err := scanRenewalPolicy(row)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, fmt.Errorf("renewal policy not found: %s", id)
|
||||
}
|
||||
return nil, fmt.Errorf("failed to query renewal policy: %w", err)
|
||||
}
|
||||
return policy, nil
|
||||
}
|
||||
|
||||
// List returns all renewal policies, ordered by name (matches the index on
|
||||
// renewal_policies.name from migration 000001 so ORDER BY is index-served).
|
||||
func (r *RenewalPolicyRepository) List(ctx context.Context) ([]*domain.RenewalPolicy, error) {
|
||||
rows, err := r.db.QueryContext(ctx, `
|
||||
SELECT id, name, renewal_window_days, auto_renew, max_retries,
|
||||
retry_interval_minutes, alert_thresholds_days, created_at, updated_at
|
||||
FROM renewal_policies
|
||||
ORDER BY name
|
||||
`)
|
||||
rows, err := r.db.QueryContext(ctx, `SELECT `+renewalPolicyColumns+` FROM renewal_policies ORDER BY name`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query renewal policies: %w", err)
|
||||
}
|
||||
@@ -66,22 +90,11 @@ func (r *RenewalPolicyRepository) List(ctx context.Context) ([]*domain.RenewalPo
|
||||
|
||||
var policies []*domain.RenewalPolicy
|
||||
for rows.Next() {
|
||||
var policy domain.RenewalPolicy
|
||||
var thresholdsJSON []byte
|
||||
|
||||
if err := rows.Scan(&policy.ID, &policy.Name, &policy.RenewalWindowDays, &policy.AutoRenew,
|
||||
&policy.MaxRetries, &policy.RetryInterval, &thresholdsJSON,
|
||||
&policy.CreatedAt, &policy.UpdatedAt); err != nil {
|
||||
policy, err := scanRenewalPolicy(rows)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to scan renewal policy: %w", err)
|
||||
}
|
||||
|
||||
if len(thresholdsJSON) > 0 {
|
||||
if err := json.Unmarshal(thresholdsJSON, &policy.AlertThresholdsDays); err != nil {
|
||||
policy.AlertThresholdsDays = domain.DefaultAlertThresholds()
|
||||
}
|
||||
}
|
||||
|
||||
policies = append(policies, &policy)
|
||||
policies = append(policies, policy)
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
@@ -90,3 +103,187 @@ func (r *RenewalPolicyRepository) List(ctx context.Context) ([]*domain.RenewalPo
|
||||
|
||||
return policies, nil
|
||||
}
|
||||
|
||||
// slugRegex matches non-alphanumeric characters that slugifyPolicyName strips.
|
||||
var slugRegex = regexp.MustCompile(`[^a-z0-9-]+`)
|
||||
|
||||
// slugifyPolicyName produces `rp-<slug>` for an auto-generated policy ID.
|
||||
// Slug: lowercase, spaces→hyphens, non-alphanumeric stripped, trimmed to 64
|
||||
// chars. Matches the existing seed convention (rp-default, rp-standard,
|
||||
// rp-urgent). Collision resolution is handled by Create's retry loop.
|
||||
func slugifyPolicyName(name string) string {
|
||||
slug := strings.ToLower(strings.TrimSpace(name))
|
||||
slug = strings.ReplaceAll(slug, " ", "-")
|
||||
slug = slugRegex.ReplaceAllString(slug, "")
|
||||
slug = strings.Trim(slug, "-")
|
||||
if slug == "" {
|
||||
slug = "policy"
|
||||
}
|
||||
if len(slug) > 64 {
|
||||
slug = slug[:64]
|
||||
}
|
||||
return "rp-" + slug
|
||||
}
|
||||
|
||||
// isUniqueViolation reports whether err is a PostgreSQL 23505 unique_violation.
|
||||
// Used by Create/Update to translate name-collision errors onto the typed
|
||||
// ErrRenewalPolicyDuplicateName sentinel.
|
||||
func isUniqueViolation(err error) bool {
|
||||
var pqErr *pq.Error
|
||||
return errors.As(err, &pqErr) && pqErr.Code == "23505"
|
||||
}
|
||||
|
||||
// isForeignKeyViolation reports whether err is a PostgreSQL 23503
|
||||
// foreign_key_violation. Used by Delete to translate ON DELETE RESTRICT
|
||||
// failures onto the typed ErrRenewalPolicyInUse sentinel.
|
||||
func isForeignKeyViolation(err error) bool {
|
||||
var pqErr *pq.Error
|
||||
return errors.As(err, &pqErr) && pqErr.Code == "23503"
|
||||
}
|
||||
|
||||
// Create inserts a new renewal policy. If policy.ID is empty, auto-generates
|
||||
// `rp-<slug(name)>` with -2/-3/... suffixes on collision (up to 10 attempts).
|
||||
// Returns ErrRenewalPolicyDuplicateName on pg 23505 (name collision).
|
||||
//
|
||||
// alert_thresholds_days is marshaled to JSONB here rather than relying on the
|
||||
// DB default because the service layer already applies DefaultAlertThresholds
|
||||
// for empty input — the DB default is a safety net, not the primary path.
|
||||
func (r *RenewalPolicyRepository) Create(ctx context.Context, policy *domain.RenewalPolicy) error {
|
||||
if policy == nil {
|
||||
return errors.New("renewal policy is nil")
|
||||
}
|
||||
|
||||
thresholdsJSON, err := json.Marshal(policy.AlertThresholdsDays)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal alert thresholds: %w", err)
|
||||
}
|
||||
|
||||
// ID auto-generation with collision retry. We attempt up to 10 suffix
|
||||
// variants (rp-foo, rp-foo-2, ..., rp-foo-10) before giving up — the
|
||||
// 23505 error the caller gets back past that point is on Name (their
|
||||
// job to fix) rather than on a slug-collision we swallowed.
|
||||
baseID := policy.ID
|
||||
if baseID == "" {
|
||||
baseID = slugifyPolicyName(policy.Name)
|
||||
}
|
||||
|
||||
insertSQL := `
|
||||
INSERT INTO renewal_policies (
|
||||
id, name, renewal_window_days, auto_renew, max_retries,
|
||||
retry_interval_seconds, alert_thresholds_days, created_at, updated_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, NOW(), NOW())
|
||||
RETURNING ` + renewalPolicyColumns
|
||||
|
||||
maxAttempts := 10
|
||||
if policy.ID != "" {
|
||||
// Caller supplied a specific ID — no collision-retry, just one shot.
|
||||
maxAttempts = 1
|
||||
}
|
||||
|
||||
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
||||
candidateID := baseID
|
||||
if attempt > 1 {
|
||||
candidateID = fmt.Sprintf("%s-%d", baseID, attempt)
|
||||
}
|
||||
|
||||
row := r.db.QueryRowContext(ctx, insertSQL,
|
||||
candidateID, policy.Name, policy.RenewalWindowDays, policy.AutoRenew,
|
||||
policy.MaxRetries, policy.RetryInterval, thresholdsJSON,
|
||||
)
|
||||
|
||||
inserted, scanErr := scanRenewalPolicy(row)
|
||||
if scanErr == nil {
|
||||
*policy = *inserted
|
||||
return nil
|
||||
}
|
||||
|
||||
if isUniqueViolation(scanErr) {
|
||||
// Determine which unique constraint — if it's the name UNIQUE
|
||||
// we can't recover (caller has to pick a new name); if it's the
|
||||
// primary-key slug collision we loop to the next suffix.
|
||||
var pqErr *pq.Error
|
||||
errors.As(scanErr, &pqErr)
|
||||
// Postgres reports the constraint name in pqErr.Constraint;
|
||||
// renewal_policies_name_key is the name UNIQUE, renewal_policies_pkey
|
||||
// is the PK. Name collision is terminal, PK collision is retryable.
|
||||
if pqErr.Constraint != "" && !strings.Contains(pqErr.Constraint, "pkey") {
|
||||
return repository.ErrRenewalPolicyDuplicateName
|
||||
}
|
||||
// PK collision — try next suffix.
|
||||
continue
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to insert renewal policy: %w", scanErr)
|
||||
}
|
||||
|
||||
// Exhausted retry budget on PK collisions — surface as duplicate so the
|
||||
// caller at least gets a 409 rather than a mysterious 500.
|
||||
return repository.ErrRenewalPolicyDuplicateName
|
||||
}
|
||||
|
||||
// Update modifies an existing renewal policy by ID. Returns an error wrapping
|
||||
// sql.ErrNoRows when id is unknown (detected by RETURNING returning zero rows),
|
||||
// or ErrRenewalPolicyDuplicateName on pg 23505 (name collision with another row).
|
||||
func (r *RenewalPolicyRepository) Update(ctx context.Context, id string, policy *domain.RenewalPolicy) error {
|
||||
if policy == nil {
|
||||
return errors.New("renewal policy is nil")
|
||||
}
|
||||
|
||||
thresholdsJSON, err := json.Marshal(policy.AlertThresholdsDays)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal alert thresholds: %w", err)
|
||||
}
|
||||
|
||||
row := r.db.QueryRowContext(ctx, `
|
||||
UPDATE renewal_policies SET
|
||||
name = $2,
|
||||
renewal_window_days = $3,
|
||||
auto_renew = $4,
|
||||
max_retries = $5,
|
||||
retry_interval_seconds = $6,
|
||||
alert_thresholds_days = $7,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
RETURNING `+renewalPolicyColumns,
|
||||
id, policy.Name, policy.RenewalWindowDays, policy.AutoRenew,
|
||||
policy.MaxRetries, policy.RetryInterval, thresholdsJSON,
|
||||
)
|
||||
|
||||
updated, err := scanRenewalPolicy(row)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return fmt.Errorf("renewal policy not found: %s", id)
|
||||
}
|
||||
if isUniqueViolation(err) {
|
||||
return repository.ErrRenewalPolicyDuplicateName
|
||||
}
|
||||
return fmt.Errorf("failed to update renewal policy: %w", err)
|
||||
}
|
||||
|
||||
*policy = *updated
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete removes a renewal policy by ID. Returns ErrRenewalPolicyInUse when
|
||||
// the policy is still referenced by rows in managed_certificates (pg 23503
|
||||
// foreign_key_violation against the ON DELETE RESTRICT FK from
|
||||
// managed_certificates.renewal_policy_id). Returns an error wrapping
|
||||
// sql.ErrNoRows when id is unknown.
|
||||
func (r *RenewalPolicyRepository) Delete(ctx context.Context, id string) error {
|
||||
result, err := r.db.ExecContext(ctx, `DELETE FROM renewal_policies WHERE id = $1`, id)
|
||||
if err != nil {
|
||||
if isForeignKeyViolation(err) {
|
||||
return repository.ErrRenewalPolicyInUse
|
||||
}
|
||||
return fmt.Errorf("failed to delete renewal policy: %w", err)
|
||||
}
|
||||
|
||||
rows, err := result.RowsAffected()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read RowsAffected for delete: %w", err)
|
||||
}
|
||||
if rows == 0 {
|
||||
return fmt.Errorf("renewal policy not found: %s", id)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,317 @@
|
||||
package postgres_test
|
||||
|
||||
// Integration tests for RenewalPolicyRepository (post-G-1, 289 lines, 5
|
||||
// methods). Closes the L-1 coverage gap flagged in coverage-gap-audit.md:
|
||||
// the repository's auto-generated-ID collision retry loop and its two
|
||||
// typed error sentinels (ErrRenewalPolicyDuplicateName on pg 23505,
|
||||
// ErrRenewalPolicyInUse on pg 23503) shipped with zero live-DB regression
|
||||
// coverage — a mock-only test surface cannot exercise the PostgreSQL
|
||||
// constraint semantics these paths depend on.
|
||||
//
|
||||
// The audit listed the file as "92 lines, 2 methods"; that was stale
|
||||
// pre-G-1. Current state is 5 methods (Get/List/Create/Update/Delete),
|
||||
// all covered below.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
"github.com/shankar0123/certctl/internal/repository/postgres"
|
||||
)
|
||||
|
||||
// TestRenewalPolicyRepository_CRUD exercises the happy path for all five
|
||||
// interface methods. In particular it drives the slug-based ID
|
||||
// auto-generation branch (policy.ID left empty → Create emits
|
||||
// rp-<slug(name)>) so any regression to slugifyPolicyName or the retry
|
||||
// loop surfaces immediately. The AlertThresholdsDays JSONB round-trip is
|
||||
// asserted end-to-end: marshal on Create → store as JSONB → scan back on
|
||||
// Get preserves the slice ordering and values.
|
||||
func TestRenewalPolicyRepository_CRUD(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewRenewalPolicyRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
// Create: leave ID empty so the repository generates rp-<slug(name)>.
|
||||
// "Prod TLS 90d" → rp-prod-tls-90d per slugifyPolicyName's rules
|
||||
// (lowercase, spaces→hyphens, non-alphanumeric stripped).
|
||||
policy := &domain.RenewalPolicy{
|
||||
Name: "Prod TLS 90d",
|
||||
RenewalWindowDays: 30,
|
||||
AutoRenew: true,
|
||||
MaxRetries: 5,
|
||||
RetryInterval: 3600, // stored as seconds in retry_interval_seconds column (renamed in 000017_db_coupling_cleanup, U-3)
|
||||
AlertThresholdsDays: []int{30, 14, 7, 0},
|
||||
}
|
||||
|
||||
if err := repo.Create(ctx, policy); err != nil {
|
||||
t.Fatalf("Create failed: %v", err)
|
||||
}
|
||||
if policy.ID != "rp-prod-tls-90d" {
|
||||
t.Errorf("auto-generated ID = %q, want %q", policy.ID, "rp-prod-tls-90d")
|
||||
}
|
||||
if policy.CreatedAt.IsZero() {
|
||||
t.Error("Create did not populate CreatedAt (RETURNING clause regression?)")
|
||||
}
|
||||
if policy.UpdatedAt.IsZero() {
|
||||
t.Error("Create did not populate UpdatedAt (RETURNING clause regression?)")
|
||||
}
|
||||
|
||||
// Get: pull the just-created row back and confirm every stored field
|
||||
// survives the scanRenewalPolicy path, including the JSONB unmarshal.
|
||||
got, err := repo.Get(ctx, policy.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("Get failed: %v", err)
|
||||
}
|
||||
if got.Name != "Prod TLS 90d" {
|
||||
t.Errorf("Get: Name = %q, want %q", got.Name, "Prod TLS 90d")
|
||||
}
|
||||
if got.RenewalWindowDays != 30 {
|
||||
t.Errorf("Get: RenewalWindowDays = %d, want 30", got.RenewalWindowDays)
|
||||
}
|
||||
if !got.AutoRenew {
|
||||
t.Error("Get: AutoRenew = false, want true")
|
||||
}
|
||||
if got.MaxRetries != 5 {
|
||||
t.Errorf("Get: MaxRetries = %d, want 5", got.MaxRetries)
|
||||
}
|
||||
if len(got.AlertThresholdsDays) != 4 {
|
||||
t.Fatalf("Get: AlertThresholdsDays length = %d, want 4 (JSONB round-trip regression)", len(got.AlertThresholdsDays))
|
||||
}
|
||||
for i, want := range []int{30, 14, 7, 0} {
|
||||
if got.AlertThresholdsDays[i] != want {
|
||||
t.Errorf("Get: AlertThresholdsDays[%d] = %d, want %d", i, got.AlertThresholdsDays[i], want)
|
||||
}
|
||||
}
|
||||
|
||||
// Update: 3-arg signature is a house invariant — don't let it slip to
|
||||
// 2-arg without the test catching the breakage. Tweak scalar + JSONB
|
||||
// simultaneously so both SET branches exercise.
|
||||
updated := *got
|
||||
updated.Name = "Prod TLS 90d (tightened)"
|
||||
updated.RenewalWindowDays = 45
|
||||
updated.AlertThresholdsDays = []int{45, 30, 14, 7, 0}
|
||||
|
||||
// Sleep long enough that NOW() ticks past the Create timestamp so we
|
||||
// can assert UpdatedAt monotonicity without a flaky equality check.
|
||||
time.Sleep(2 * time.Millisecond)
|
||||
|
||||
if err := repo.Update(ctx, policy.ID, &updated); err != nil {
|
||||
t.Fatalf("Update failed: %v", err)
|
||||
}
|
||||
if !updated.UpdatedAt.After(got.UpdatedAt) {
|
||||
t.Errorf("Update: UpdatedAt %v not after Create's %v (RETURNING NOW() regression?)", updated.UpdatedAt, got.UpdatedAt)
|
||||
}
|
||||
|
||||
refetched, err := repo.Get(ctx, policy.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("Get after Update failed: %v", err)
|
||||
}
|
||||
if refetched.Name != "Prod TLS 90d (tightened)" {
|
||||
t.Errorf("Get after Update: Name = %q, want %q", refetched.Name, "Prod TLS 90d (tightened)")
|
||||
}
|
||||
if refetched.RenewalWindowDays != 45 {
|
||||
t.Errorf("Get after Update: RenewalWindowDays = %d, want 45", refetched.RenewalWindowDays)
|
||||
}
|
||||
if len(refetched.AlertThresholdsDays) != 5 {
|
||||
t.Errorf("Get after Update: AlertThresholdsDays length = %d, want 5", len(refetched.AlertThresholdsDays))
|
||||
}
|
||||
|
||||
// List: add a second policy so the ORDER BY name contract is non-vacuous.
|
||||
second := &domain.RenewalPolicy{
|
||||
Name: "Aa Earliest",
|
||||
RenewalWindowDays: 14,
|
||||
AutoRenew: false,
|
||||
MaxRetries: 1,
|
||||
RetryInterval: 60,
|
||||
AlertThresholdsDays: []int{7, 0},
|
||||
}
|
||||
if err := repo.Create(ctx, second); err != nil {
|
||||
t.Fatalf("Create second failed: %v", err)
|
||||
}
|
||||
|
||||
all, err := repo.List(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("List failed: %v", err)
|
||||
}
|
||||
if len(all) != 2 {
|
||||
t.Fatalf("List: len = %d, want 2", len(all))
|
||||
}
|
||||
// "Aa Earliest" sorts before "Prod TLS 90d (tightened)" under ORDER BY name ASC.
|
||||
if all[0].Name != "Aa Earliest" {
|
||||
t.Errorf("List[0].Name = %q, want %q (ORDER BY name regression?)", all[0].Name, "Aa Earliest")
|
||||
}
|
||||
|
||||
// Delete: removes the policy and a follow-up Get surfaces "not found".
|
||||
if err := repo.Delete(ctx, policy.ID); err != nil {
|
||||
t.Fatalf("Delete failed: %v", err)
|
||||
}
|
||||
if _, err := repo.Get(ctx, policy.ID); err == nil {
|
||||
t.Error("Get after Delete: err = nil, want not-found")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRenewalPolicyRepository_DuplicateName verifies the pg 23505
|
||||
// unique_violation translation. The name UNIQUE constraint is enforced
|
||||
// on the renewal_policies.name column; Create's inner scanRenewalPolicy
|
||||
// must see the pq.Error, call isUniqueViolation, check the constraint
|
||||
// name, and return ErrRenewalPolicyDuplicateName. A non-sentinel error
|
||||
// here would cause the handler to emit 500 instead of 409.
|
||||
func TestRenewalPolicyRepository_DuplicateName(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewRenewalPolicyRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
first := &domain.RenewalPolicy{
|
||||
ID: "rp-first",
|
||||
Name: "Shared Name",
|
||||
RenewalWindowDays: 30,
|
||||
AutoRenew: true,
|
||||
MaxRetries: 3,
|
||||
RetryInterval: 300,
|
||||
AlertThresholdsDays: domain.DefaultAlertThresholds(),
|
||||
}
|
||||
if err := repo.Create(ctx, first); err != nil {
|
||||
t.Fatalf("Create first failed: %v", err)
|
||||
}
|
||||
|
||||
// Second policy with a distinct ID but the same Name — the name UNIQUE
|
||||
// constraint fires, Create's collision branch inspects pqErr.Constraint,
|
||||
// and because it's NOT *_pkey, it returns ErrRenewalPolicyDuplicateName
|
||||
// without retrying.
|
||||
second := &domain.RenewalPolicy{
|
||||
ID: "rp-second",
|
||||
Name: "Shared Name",
|
||||
RenewalWindowDays: 60,
|
||||
AutoRenew: false,
|
||||
MaxRetries: 1,
|
||||
RetryInterval: 600,
|
||||
AlertThresholdsDays: domain.DefaultAlertThresholds(),
|
||||
}
|
||||
err := repo.Create(ctx, second)
|
||||
if err == nil {
|
||||
t.Fatal("Create second: err = nil, want ErrRenewalPolicyDuplicateName")
|
||||
}
|
||||
if !errors.Is(err, repository.ErrRenewalPolicyDuplicateName) {
|
||||
t.Errorf("Create second: err = %v, want ErrRenewalPolicyDuplicateName (via errors.Is)", err)
|
||||
}
|
||||
|
||||
// Also verify Update surfaces the same sentinel when an existing row's
|
||||
// name is changed to collide with another policy's name.
|
||||
third := &domain.RenewalPolicy{
|
||||
ID: "rp-third",
|
||||
Name: "Third Name",
|
||||
RenewalWindowDays: 90,
|
||||
AutoRenew: true,
|
||||
MaxRetries: 2,
|
||||
RetryInterval: 1200,
|
||||
AlertThresholdsDays: domain.DefaultAlertThresholds(),
|
||||
}
|
||||
if err := repo.Create(ctx, third); err != nil {
|
||||
t.Fatalf("Create third failed: %v", err)
|
||||
}
|
||||
third.Name = "Shared Name" // collide with first
|
||||
err = repo.Update(ctx, third.ID, third)
|
||||
if err == nil {
|
||||
t.Fatal("Update: err = nil, want ErrRenewalPolicyDuplicateName")
|
||||
}
|
||||
if !errors.Is(err, repository.ErrRenewalPolicyDuplicateName) {
|
||||
t.Errorf("Update: err = %v, want ErrRenewalPolicyDuplicateName (via errors.Is)", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRenewalPolicyRepository_DeleteInUse verifies the pg 23503
|
||||
// foreign_key_violation translation. managed_certificates.renewal_policy_id
|
||||
// REFERENCES renewal_policies(id) ON DELETE RESTRICT; attempting to Delete
|
||||
// a policy while a certificate still references it must surface as
|
||||
// ErrRenewalPolicyInUse so the handler can emit 409 Conflict. Any change
|
||||
// to either the FK definition or the isForeignKeyViolation mapping breaks
|
||||
// this.
|
||||
func TestRenewalPolicyRepository_DeleteInUse(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
repo := postgres.NewRenewalPolicyRepository(db)
|
||||
ctx := context.Background()
|
||||
|
||||
// The policy under test — create via repo so ID auto-generation is
|
||||
// also exercised end-to-end in this path.
|
||||
policy := &domain.RenewalPolicy{
|
||||
Name: "InUse Policy",
|
||||
RenewalWindowDays: 30,
|
||||
AutoRenew: true,
|
||||
MaxRetries: 3,
|
||||
RetryInterval: 300,
|
||||
AlertThresholdsDays: domain.DefaultAlertThresholds(),
|
||||
}
|
||||
if err := repo.Create(ctx, policy); err != nil {
|
||||
t.Fatalf("Create policy failed: %v", err)
|
||||
}
|
||||
|
||||
// Create owner/team/issuer prerequisites, then raw-INSERT a
|
||||
// managed_certificate row referencing the policy. Using raw SQL here
|
||||
// (matching insertCertPrereqsRaw's idiom) keeps the test independent
|
||||
// of the service layer.
|
||||
ownerID, teamID, issuerID, _ := insertCertPrereqsRaw(t, db, ctx, "inuse")
|
||||
|
||||
now := time.Now().UTC().Truncate(time.Microsecond)
|
||||
_, err := db.ExecContext(ctx, `
|
||||
INSERT INTO managed_certificates (
|
||||
id, name, common_name, sans, environment,
|
||||
owner_id, team_id, issuer_id, renewal_policy_id,
|
||||
status, expires_at, tags, created_at, updated_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
|
||||
`,
|
||||
"mc-inuse", "inuse-cert", "inuse.example.com", []string{}, "production",
|
||||
ownerID, teamID, issuerID, policy.ID,
|
||||
string(domain.CertificateStatusActive), now.Add(90*24*time.Hour), "{}", now, now,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("INSERT managed_certificates failed: %v", err)
|
||||
}
|
||||
|
||||
// Delete: the ON DELETE RESTRICT FK fires, the pg driver returns a
|
||||
// *pq.Error with Code 23503, isForeignKeyViolation detects it, and
|
||||
// the repository returns ErrRenewalPolicyInUse.
|
||||
err = repo.Delete(ctx, policy.ID)
|
||||
if err == nil {
|
||||
t.Fatal("Delete: err = nil, want ErrRenewalPolicyInUse (ON DELETE RESTRICT should have fired)")
|
||||
}
|
||||
if !errors.Is(err, repository.ErrRenewalPolicyInUse) {
|
||||
t.Errorf("Delete: err = %v, want ErrRenewalPolicyInUse (via errors.Is)", err)
|
||||
}
|
||||
|
||||
// And the policy is still there — RESTRICT aborted the delete.
|
||||
if _, err := repo.Get(ctx, policy.ID); err != nil {
|
||||
t.Errorf("Get after failed Delete: err = %v, want nil (policy should still exist)", err)
|
||||
}
|
||||
|
||||
// After removing the referencing cert, Delete succeeds — proves the
|
||||
// RESTRICT was the only thing blocking the earlier Delete and rules
|
||||
// out any unrelated failure mode.
|
||||
if _, err := db.ExecContext(ctx, `DELETE FROM managed_certificates WHERE id = $1`, "mc-inuse"); err != nil {
|
||||
t.Fatalf("cleanup DELETE managed_certificates failed: %v", err)
|
||||
}
|
||||
if err := repo.Delete(ctx, policy.ID); err != nil {
|
||||
t.Errorf("Delete after cleanup: err = %v, want nil", err)
|
||||
}
|
||||
|
||||
// Also verify Delete on a non-existent ID returns a not-found error
|
||||
// (not nil, not the InUse sentinel) — guards against a silent no-op
|
||||
// regression in the RowsAffected check.
|
||||
err = repo.Delete(ctx, "rp-does-not-exist")
|
||||
if err == nil {
|
||||
t.Fatal("Delete(non-existent): err = nil, want not-found")
|
||||
}
|
||||
if errors.Is(err, repository.ErrRenewalPolicyInUse) {
|
||||
t.Errorf("Delete(non-existent): err = %v, should not be ErrRenewalPolicyInUse", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "not found") {
|
||||
t.Errorf("Delete(non-existent): err = %v, want substring %q", err, "not found")
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
// Package postgres_test provides repository integration tests covering 15 of 17
|
||||
// Package postgres_test provides repository integration tests covering 17 of 17
|
||||
// PostgreSQL repository files. Each test function exercises CRUD operations,
|
||||
// edge cases, and deduplication logic against a real database.
|
||||
// edge cases, and deduplication logic against a real database. HealthCheck
|
||||
// and RenewalPolicy integration tests live in sibling *_test.go files in this
|
||||
// package (see health_check_test.go and renewal_policy_test.go).
|
||||
package postgres_test
|
||||
|
||||
import (
|
||||
@@ -76,7 +78,7 @@ func insertCertPrereqsRaw(t *testing.T, db *sql.DB, ctx context.Context, suffix
|
||||
}
|
||||
|
||||
// Create renewal policy
|
||||
_, err = db.ExecContext(ctx, `INSERT INTO renewal_policies (id, name, renewal_window_days, auto_renew, max_retries, retry_interval_minutes, created_at, updated_at) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
|
||||
_, err = db.ExecContext(ctx, `INSERT INTO renewal_policies (id, name, renewal_window_days, auto_renew, max_retries, retry_interval_seconds, created_at, updated_at) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
|
||||
policyID, "Policy "+suffix, 30, true, 3, 60, now, now)
|
||||
if err != nil {
|
||||
t.Fatalf("insertCertPrereqs: create renewal_policy failed: %v", err)
|
||||
|
||||
@@ -81,6 +81,29 @@ func (r *RevocationRepository) ListAll(ctx context.Context) ([]*domain.Certifica
|
||||
return scanRevocations(rows)
|
||||
}
|
||||
|
||||
// ListByIssuer returns all revocations for a single issuer, ordered by revocation time.
|
||||
//
|
||||
// This is the hot path for CRL generation. Pushing the issuer filter into the
|
||||
// SQL query lets the composite index `idx_certificate_revocations_issuer_serial`
|
||||
// (migration 000012) drive a prefix scan on issuer_id rather than forcing
|
||||
// callers to load every row in the table and discard the ones belonging to
|
||||
// other issuers.
|
||||
func (r *RevocationRepository) ListByIssuer(ctx context.Context, issuerID string) ([]*domain.CertificateRevocation, error) {
|
||||
rows, err := r.db.QueryContext(ctx, `
|
||||
SELECT id, certificate_id, serial_number, reason, revoked_by, revoked_at,
|
||||
issuer_id, issuer_notified, created_at
|
||||
FROM certificate_revocations
|
||||
WHERE issuer_id = $1
|
||||
ORDER BY revoked_at ASC
|
||||
`, issuerID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list revocations by issuer: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
return scanRevocations(rows)
|
||||
}
|
||||
|
||||
// ListByCertificate returns all revocations for a certificate.
|
||||
func (r *RevocationRepository) ListByCertificate(ctx context.Context, certID string) ([]*domain.CertificateRevocation, error) {
|
||||
rows, err := r.db.QueryContext(ctx, `
|
||||
|
||||
@@ -0,0 +1,246 @@
|
||||
// Integration tests for the U-3 schema-vs-seed coupling fix.
|
||||
//
|
||||
// Pre-U-3 the deploy compose stack mounted both a hand-curated subset of
|
||||
// `migrations/*.up.sql` and `seed.sql` into postgres
|
||||
// `/docker-entrypoint-initdb.d/`. Postgres applied them at initdb time.
|
||||
// When `seed.sql` was updated to reference columns added by migrations
|
||||
// *after* the mounted cutoff (e.g., `policy_rules.severity` from
|
||||
// `000013_policy_rule_severity.up.sql`), initdb crashed during the seed
|
||||
// step and the container was reported `unhealthy` indefinitely.
|
||||
//
|
||||
// Post-U-3 the schema is built EXCLUSIVELY by the server at startup via
|
||||
// internal/repository/postgres.RunMigrations + RunSeed. These tests pin
|
||||
// that contract: RunSeed must complete without error against a freshly
|
||||
// migrated database, and re-application must be idempotent so server
|
||||
// restarts don't double-insert.
|
||||
//
|
||||
// Skipped under -short to keep CI fast lanes green; the integration lane
|
||||
// runs them via the testcontainers harness.
|
||||
package postgres_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"testing"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/repository/postgres"
|
||||
)
|
||||
|
||||
// TestRunSeed_AppliesIdempotently verifies the U-3 contract that RunSeed
|
||||
// can be called repeatedly against a populated database without error and
|
||||
// without producing duplicate rows. The server invokes RunSeed on EVERY
|
||||
// boot (it has no migration-state table to skip from), so any non-
|
||||
// idempotent INSERT in seed.sql would crash the container loop on the
|
||||
// second start.
|
||||
//
|
||||
// The assertion uses renewal_policies.id='rp-default' as a witness — that
|
||||
// row is the most-referenced FK target in the seed (it's the default
|
||||
// renewal policy attached to every certificate that doesn't override).
|
||||
// If the seed double-inserted, we'd see SQLSTATE 23505 from the second
|
||||
// RunSeed call. If the seed silently ON CONFLICT-DO-NOTHING'd as
|
||||
// designed, the row count stays at exactly 1.
|
||||
func TestRunSeed_AppliesIdempotently(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
ctx := context.Background()
|
||||
|
||||
migrationsPath := findMigrationsDir()
|
||||
|
||||
// Apply the seed twice — second call simulates a server restart on a
|
||||
// populated database. Both must succeed; pre-U-3 the second call
|
||||
// would fail with 23505 if any INSERT lacked ON CONFLICT.
|
||||
if err := postgres.RunSeed(db, migrationsPath); err != nil {
|
||||
t.Fatalf("RunSeed (first call) returned error: %v", err)
|
||||
}
|
||||
if err := postgres.RunSeed(db, migrationsPath); err != nil {
|
||||
t.Fatalf("RunSeed (second call — idempotency check) returned error: %v\n"+
|
||||
"This means the seed produced a duplicate row; every INSERT in seed.sql "+
|
||||
"must use ON CONFLICT (id) DO NOTHING because the server applies the "+
|
||||
"seed on EVERY start.", err)
|
||||
}
|
||||
|
||||
// Witness check: rp-default is the renewal policy every cert defaults
|
||||
// to. Exactly one row must exist after two seed applications.
|
||||
var count int
|
||||
err := db.QueryRowContext(ctx,
|
||||
`SELECT COUNT(*) FROM renewal_policies WHERE id = 'rp-default'`,
|
||||
).Scan(&count)
|
||||
if err != nil {
|
||||
t.Fatalf("witness query failed: %v", err)
|
||||
}
|
||||
if count != 1 {
|
||||
t.Errorf("renewal_policies WHERE id='rp-default' returned %d rows after two RunSeed calls; want exactly 1 (ON CONFLICT idempotency contract)", count)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunSeed_MissingFileIsNoOp verifies the fail-soft contract documented
|
||||
// on RunSeed: an operator who deletes seed.sql for custom packaging (CI
|
||||
// pipelines that bake their own seeds, cert-manager managed deployments)
|
||||
// must still get a healthy server boot. RunSeed returning nil for a
|
||||
// missing file is the only way to hold this contract — returning an error
|
||||
// would force every minimal-image deployment to ship the seed file just
|
||||
// to satisfy a no-op load.
|
||||
//
|
||||
// We point at a directory that exists (empty temp dir) but contains no
|
||||
// seed.sql. RunSeed must return nil silently.
|
||||
func TestRunSeed_MissingFileIsNoOp(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping integration test in short mode")
|
||||
}
|
||||
|
||||
// Use a brand-new empty directory so seed.sql is unambiguously absent.
|
||||
emptyDir := t.TempDir()
|
||||
|
||||
// Pass a nil *sql.DB on purpose — RunSeed must short-circuit on the
|
||||
// missing file BEFORE touching the DB. If the implementation ever
|
||||
// regresses and tries to db.Exec(string(content)) with nil content,
|
||||
// this will surface as a nil-deref instead of a silent corruption.
|
||||
var db *sql.DB
|
||||
if err := postgres.RunSeed(db, emptyDir); err != nil {
|
||||
t.Fatalf("RunSeed against an empty directory should return nil; got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunDemoSeed_AppliesIdempotently mirrors the RunSeed idempotency
|
||||
// contract for the demo overlay. The compose demo stack
|
||||
// (deploy/docker-compose.demo.yml) sets CERTCTL_DEMO_SEED=true; the
|
||||
// server applies seed_demo.sql at every boot. Same constraint as the
|
||||
// baseline seed: if any INSERT lacks ON CONFLICT, the server will
|
||||
// crash-loop on restart.
|
||||
//
|
||||
// Witness: seed_demo.sql inserts t-platform into the teams table at line
|
||||
// 11. That row is referenced by every demo-team-owned certificate, so
|
||||
// duplicate-insertion would block the entire demo on restart.
|
||||
func TestRunDemoSeed_AppliesIdempotently(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
ctx := context.Background()
|
||||
|
||||
migrationsPath := findMigrationsDir()
|
||||
|
||||
// Order matters — RunSeed must run first so the FK targets the demo
|
||||
// seed depends on (rp-* renewal policies, etc.) exist before the
|
||||
// demo INSERTs run. This mirrors the order in cmd/server/main.go.
|
||||
if err := postgres.RunSeed(db, migrationsPath); err != nil {
|
||||
t.Fatalf("RunSeed prerequisite failed: %v", err)
|
||||
}
|
||||
|
||||
if err := postgres.RunDemoSeed(db, migrationsPath); err != nil {
|
||||
t.Fatalf("RunDemoSeed (first call) returned error: %v", err)
|
||||
}
|
||||
if err := postgres.RunDemoSeed(db, migrationsPath); err != nil {
|
||||
t.Fatalf("RunDemoSeed (second call — idempotency check) returned error: %v", err)
|
||||
}
|
||||
|
||||
var count int
|
||||
err := db.QueryRowContext(ctx,
|
||||
`SELECT COUNT(*) FROM teams WHERE id = 't-platform'`,
|
||||
).Scan(&count)
|
||||
if err != nil {
|
||||
t.Fatalf("witness query failed: %v", err)
|
||||
}
|
||||
if count != 1 {
|
||||
t.Errorf("teams WHERE id='t-platform' returned %d rows after two RunDemoSeed calls; want exactly 1", count)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMigration000017_RetryIntervalRename verifies the U-3 ride-along
|
||||
// column rename: renewal_policies.retry_interval_minutes →
|
||||
// retry_interval_seconds (cat-o-retry_interval_unit_mismatch). The unit
|
||||
// was always seconds in practice — the column name lied. Migration 000017
|
||||
// renames the column with a DO $$ guard so re-application is safe.
|
||||
//
|
||||
// After all migrations have been applied (which the test harness does in
|
||||
// freshSchema), the new column must exist and the old column must NOT.
|
||||
// information_schema.columns is the source of truth for both checks.
|
||||
func TestMigration000017_RetryIntervalRename(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
ctx := context.Background()
|
||||
|
||||
// Helper — true iff the named column exists on renewal_policies.
|
||||
hasColumn := func(name string) bool {
|
||||
t.Helper()
|
||||
var n int
|
||||
err := db.QueryRowContext(ctx, `
|
||||
SELECT COUNT(*) FROM information_schema.columns
|
||||
WHERE table_name = 'renewal_policies' AND column_name = $1
|
||||
`, name).Scan(&n)
|
||||
if err != nil {
|
||||
t.Fatalf("information_schema query for column %q failed: %v", name, err)
|
||||
}
|
||||
return n > 0
|
||||
}
|
||||
|
||||
if !hasColumn("retry_interval_seconds") {
|
||||
t.Error("renewal_policies.retry_interval_seconds is missing — migration 000017 did not apply, or it was applied before the rename block")
|
||||
}
|
||||
if hasColumn("retry_interval_minutes") {
|
||||
t.Error("renewal_policies.retry_interval_minutes still exists — the rename in migration 000017 must drop the old name (cat-o-retry_interval_unit_mismatch)")
|
||||
}
|
||||
}
|
||||
|
||||
// TestMigration000017_NotificationCreatedAt verifies the U-3 ride-along
|
||||
// column add: notification_events.created_at NOT NULL DEFAULT NOW()
|
||||
// (cat-o-notification_created_at_dead_field). Pre-U-3 the Go domain had
|
||||
// the field but the DB lacked the column, so the JSON API serialised
|
||||
// 0001-01-01.
|
||||
func TestMigration000017_NotificationCreatedAt(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
ctx := context.Background()
|
||||
|
||||
var dataType, isNullable, columnDefault sql.NullString
|
||||
err := db.QueryRowContext(ctx, `
|
||||
SELECT data_type, is_nullable, column_default
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'notification_events' AND column_name = 'created_at'
|
||||
`).Scan(&dataType, &isNullable, &columnDefault)
|
||||
if err != nil {
|
||||
t.Fatalf("information_schema query for created_at failed: %v\n"+
|
||||
"Migration 000017 should have added notification_events.created_at TIMESTAMPTZ NOT NULL DEFAULT NOW().", err)
|
||||
}
|
||||
|
||||
if dataType.String != "timestamp with time zone" {
|
||||
t.Errorf("notification_events.created_at data_type = %q, want %q",
|
||||
dataType.String, "timestamp with time zone")
|
||||
}
|
||||
if isNullable.String != "NO" {
|
||||
t.Errorf("notification_events.created_at is_nullable = %q, want NO (the column must be NOT NULL so legacy rows get the DEFAULT)",
|
||||
isNullable.String)
|
||||
}
|
||||
if columnDefault.String == "" {
|
||||
t.Error("notification_events.created_at has no DEFAULT — legacy rows added before migration 000017 would fail the NOT NULL gate without one")
|
||||
}
|
||||
}
|
||||
|
||||
// TestMigration000017_HealthCheckOrphansDropped verifies the U-3
|
||||
// ride-along column drop: network_scan_targets lost the orphan
|
||||
// health_check_enabled / health_check_interval_seconds columns
|
||||
// (cat-o-health_check_column_orphans). These were declared by an early
|
||||
// migration but never wired into Go code — schema noise that confused
|
||||
// operators reading raw SQL. Migration 000017 drops them.
|
||||
func TestMigration000017_HealthCheckOrphansDropped(t *testing.T) {
|
||||
tdb := getTestDB(t)
|
||||
db := tdb.freshSchema(t)
|
||||
ctx := context.Background()
|
||||
|
||||
hasColumn := func(name string) bool {
|
||||
t.Helper()
|
||||
var n int
|
||||
err := db.QueryRowContext(ctx, `
|
||||
SELECT COUNT(*) FROM information_schema.columns
|
||||
WHERE table_name = 'network_scan_targets' AND column_name = $1
|
||||
`, name).Scan(&n)
|
||||
if err != nil {
|
||||
t.Fatalf("information_schema query for column %q failed: %v", name, err)
|
||||
}
|
||||
return n > 0
|
||||
}
|
||||
|
||||
for _, col := range []string{"health_check_enabled", "health_check_interval_seconds"} {
|
||||
if hasColumn(col) {
|
||||
t.Errorf("network_scan_targets.%s still exists — migration 000017 must drop it (cat-o-health_check_column_orphans)", col)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -56,19 +56,19 @@ func (s *CAOperationsSvc) GenerateDERCRL(ctx context.Context, issuerID string) (
|
||||
return nil, fmt.Errorf("issuer not found: %s", issuerID)
|
||||
}
|
||||
|
||||
revocations, err := s.revocationRepo.ListAll(ctx)
|
||||
// Scope the query to this issuer so the migration 000012 composite index
|
||||
// drives a prefix scan; previously this path read every revocation in the
|
||||
// table and filtered in Go, which did not scale as the revocation table
|
||||
// grew across many issuers (F-001).
|
||||
revocations, err := s.revocationRepo.ListByIssuer(ctx, issuerID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list revocations: %w", err)
|
||||
return nil, fmt.Errorf("failed to list revocations for issuer %s: %w", issuerID, err)
|
||||
}
|
||||
|
||||
// Filter to this issuer and convert to CRL entries.
|
||||
// Short-lived certificates (profile TTL < 1 hour) are excluded — expiry is sufficient revocation.
|
||||
// Convert revocations to CRL entries. Short-lived certificates (profile
|
||||
// TTL < 1 hour) are excluded — expiry is sufficient revocation.
|
||||
var entries []CRLEntry
|
||||
for _, rev := range revocations {
|
||||
if rev.IssuerID != issuerID {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check short-lived exemption: look up the cert's profile
|
||||
if s.profileRepo != nil && s.certRepo != nil {
|
||||
cert, err := s.certRepo.Get(ctx, rev.CertificateID)
|
||||
|
||||
@@ -75,6 +75,73 @@ func TestCAOperationsSvc_GenerateDERCRL_Success(t *testing.T) {
|
||||
t.Logf("DER CRL generated successfully: %d bytes", len(crl))
|
||||
}
|
||||
|
||||
// TestCAOperationsSvc_GenerateDERCRL_UsesListByIssuer_NotListAll guards F-001.
|
||||
// Before the fix, GenerateDERCRL called revocationRepo.ListAll(ctx) and filtered
|
||||
// results in Go (if rev.IssuerID != issuerID { continue }). That was O(N) in the
|
||||
// size of the entire revocation table and did not scale as revocations piled up
|
||||
// across many issuers. Migration 000012 added the composite index
|
||||
// idx_certificate_revocations_issuer_serial(issuer_id, serial_number), which is
|
||||
// a prefix scan target — so the hot path must now call ListByIssuer(ctx, id) to
|
||||
// drive an indexed query. This regression test asserts the hot path invokes
|
||||
// ListByIssuer exactly once and never falls back to the full-table ListAll scan,
|
||||
// and also double-checks that cross-issuer revocations are correctly excluded
|
||||
// from the generated CRL (no in-Go filter left to catch them).
|
||||
func TestCAOperationsSvc_GenerateDERCRL_UsesListByIssuer_NotListAll(t *testing.T) {
|
||||
caSvc, revocationRepo, _ := newCAOperationsSvcTest()
|
||||
|
||||
// Pre-populate with revocations from TWO issuers. If the hot path regresses
|
||||
// and calls ListAll instead of ListByIssuer, the generated CRL would either
|
||||
// include the wrong rows or — with the in-Go filter gone — pull in both
|
||||
// issuers' revocations. ListByIssuer scopes at the query level so only
|
||||
// iss-local rows come back.
|
||||
now := time.Now()
|
||||
revocationRepo.Revocations = []*domain.CertificateRevocation{
|
||||
{
|
||||
SerialNumber: "LOCAL-001",
|
||||
CertificateID: "cert-local-1",
|
||||
IssuerID: "iss-local",
|
||||
Reason: "keyCompromise",
|
||||
RevokedAt: now.Add(-24 * time.Hour),
|
||||
RevokedBy: "admin",
|
||||
},
|
||||
{
|
||||
SerialNumber: "LOCAL-002",
|
||||
CertificateID: "cert-local-2",
|
||||
IssuerID: "iss-local",
|
||||
Reason: "superseded",
|
||||
RevokedAt: now.Add(-12 * time.Hour),
|
||||
RevokedBy: "admin",
|
||||
},
|
||||
{
|
||||
SerialNumber: "OTHER-001",
|
||||
CertificateID: "cert-other-1",
|
||||
IssuerID: "iss-other",
|
||||
Reason: "keyCompromise",
|
||||
RevokedAt: now.Add(-6 * time.Hour),
|
||||
RevokedBy: "admin",
|
||||
},
|
||||
}
|
||||
|
||||
crl, err := caSvc.GenerateDERCRL(context.Background(), "iss-local")
|
||||
if err != nil {
|
||||
t.Fatalf("expected no error, got: %v", err)
|
||||
}
|
||||
if len(crl) == 0 {
|
||||
t.Fatal("expected non-empty CRL")
|
||||
}
|
||||
|
||||
// The contractual assertion: the CRL hot path MUST use the scoped query.
|
||||
if got, want := revocationRepo.ListByIssuerCalls, 1; got != want {
|
||||
t.Errorf("ListByIssuerCalls = %d, want %d — CRL hot path must call the scoped query driven by migration 000012 index", got, want)
|
||||
}
|
||||
if got := revocationRepo.ListAllCalls; got != 0 {
|
||||
t.Errorf("ListAllCalls = %d, want 0 — CRL hot path must NOT fall back to the full-table scan after F-001", got)
|
||||
}
|
||||
if got, want := revocationRepo.LastListIssuerID, "iss-local"; got != want {
|
||||
t.Errorf("LastListIssuerID = %q, want %q — issuer scoping argument lost", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCAOperationsSvc_GenerateDERCRL_EmptyCRL(t *testing.T) {
|
||||
caSvc, revocationRepo, _ := newCAOperationsSvcTest()
|
||||
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
"github.com/shankar0123/certctl/internal/repository"
|
||||
)
|
||||
|
||||
// G-1: service-level sentinels alias the repository sentinels so errors.Is
|
||||
// walks transparently across layers. Do NOT errors.New a fresh copy — the
|
||||
// handler's `errors.Is(err, repository.ErrRenewalPolicyInUse)` branch and
|
||||
// the service-layer tests' `errors.Is(err, service.ErrRenewalPolicyInUse)`
|
||||
// branch need to match the same sentinel var identity.
|
||||
var (
|
||||
ErrRenewalPolicyDuplicateName = repository.ErrRenewalPolicyDuplicateName
|
||||
ErrRenewalPolicyInUse = repository.ErrRenewalPolicyInUse
|
||||
)
|
||||
|
||||
// RenewalPolicyService implements the /api/v1/renewal-policies CRUD surface.
|
||||
//
|
||||
// G-1 scope note: the red-test contract pins NewRenewalPolicyService to a
|
||||
// repo-only signature (no auditService). Renewal-policy CRUD does not emit
|
||||
// audit events in this change — if audit coverage is needed later, add a
|
||||
// SetAuditService setter rather than churning the constructor signature.
|
||||
type RenewalPolicyService struct {
|
||||
repo repository.RenewalPolicyRepository
|
||||
}
|
||||
|
||||
// NewRenewalPolicyService constructs the service bound to its repository.
|
||||
func NewRenewalPolicyService(repo repository.RenewalPolicyRepository) *RenewalPolicyService {
|
||||
return &RenewalPolicyService{repo: repo}
|
||||
}
|
||||
|
||||
// rpSlugRegex matches non-alphanumeric characters that slugifyRenewalPolicyName strips.
|
||||
// Mirrors the identical regex in internal/repository/postgres/renewal_policy.go —
|
||||
// the service owns the rp-<slug> convention so the repo's retry loop is a
|
||||
// pure PK-collision safety net, not the primary ID generator.
|
||||
var rpSlugRegex = regexp.MustCompile(`[^a-z0-9-]+`)
|
||||
|
||||
// slugifyRenewalPolicyName produces `rp-<slug>` for an auto-generated policy
|
||||
// ID. Slug: lowercase, spaces→hyphens, non-alphanumeric stripped, trimmed to
|
||||
// 64 chars. Matches the seed convention (rp-default, rp-standard, rp-urgent)
|
||||
// and the repo's slugifyPolicyName byte-for-byte.
|
||||
func slugifyRenewalPolicyName(name string) string {
|
||||
slug := strings.ToLower(strings.TrimSpace(name))
|
||||
slug = strings.ReplaceAll(slug, " ", "-")
|
||||
slug = rpSlugRegex.ReplaceAllString(slug, "")
|
||||
slug = strings.Trim(slug, "-")
|
||||
if slug == "" {
|
||||
slug = "policy"
|
||||
}
|
||||
if len(slug) > 64 {
|
||||
slug = slug[:64]
|
||||
}
|
||||
return "rp-" + slug
|
||||
}
|
||||
|
||||
// ListRenewalPolicies returns a single page of renewal policies sorted by
|
||||
// name (the repo's ORDER BY name is index-served via idx_renewal_policies_name).
|
||||
// Pagination is done in Go rather than SQL — the expected row count is in the
|
||||
// single digits so LIMIT/OFFSET would be premature optimization and would
|
||||
// churn the repo contract for no measurable benefit (design doc §Known
|
||||
// Caller Audit).
|
||||
//
|
||||
// Bounds: page defaults to 1, per_page defaults to 50, caps at 500 to match
|
||||
// the /api/v1/policies handler's behavior. Past-end slices return an empty
|
||||
// slice with no error — callers use `total` to detect end of pagination.
|
||||
func (s *RenewalPolicyService) ListRenewalPolicies(ctx context.Context, page, perPage int) ([]domain.RenewalPolicy, int64, error) {
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
if perPage < 1 {
|
||||
perPage = 50
|
||||
}
|
||||
if perPage > 500 {
|
||||
perPage = 500
|
||||
}
|
||||
|
||||
items, err := s.repo.List(ctx)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("failed to list renewal policies: %w", err)
|
||||
}
|
||||
|
||||
total := int64(len(items))
|
||||
start := (page - 1) * perPage
|
||||
if start >= int(total) {
|
||||
return nil, total, nil
|
||||
}
|
||||
end := start + perPage
|
||||
if end > int(total) {
|
||||
end = int(total)
|
||||
}
|
||||
|
||||
out := make([]domain.RenewalPolicy, 0, end-start)
|
||||
for _, p := range items[start:end] {
|
||||
if p != nil {
|
||||
out = append(out, *p)
|
||||
}
|
||||
}
|
||||
return out, total, nil
|
||||
}
|
||||
|
||||
// GetRenewalPolicy retrieves one renewal policy by ID. Not-found errors
|
||||
// surface from the repo verbatim; the handler translates them to 404.
|
||||
func (s *RenewalPolicyService) GetRenewalPolicy(ctx context.Context, id string) (*domain.RenewalPolicy, error) {
|
||||
return s.repo.Get(ctx, id)
|
||||
}
|
||||
|
||||
// validateBounds enforces the design doc §Validation Bounds invariants:
|
||||
// - name required, ≤ 255 chars
|
||||
// - renewal_window_days in [1, 365]
|
||||
// - max_retries in [0, 10]
|
||||
// - retry_interval_seconds in [60, 86400]
|
||||
// - alert_thresholds_days each in [0, 365]
|
||||
//
|
||||
// Called after applyCreateDefaults so zero-value fields that the caller
|
||||
// expects to be defaulted don't trip the range checks.
|
||||
func (s *RenewalPolicyService) validateBounds(rp *domain.RenewalPolicy) error {
|
||||
if strings.TrimSpace(rp.Name) == "" {
|
||||
return fmt.Errorf("name is required")
|
||||
}
|
||||
if len(rp.Name) > 255 {
|
||||
return fmt.Errorf("name must be 255 characters or fewer, got %d", len(rp.Name))
|
||||
}
|
||||
if rp.RenewalWindowDays < 1 || rp.RenewalWindowDays > 365 {
|
||||
return fmt.Errorf("renewal_window_days must be between 1 and 365, got %d", rp.RenewalWindowDays)
|
||||
}
|
||||
if rp.MaxRetries < 0 || rp.MaxRetries > 10 {
|
||||
return fmt.Errorf("max_retries must be between 0 and 10, got %d", rp.MaxRetries)
|
||||
}
|
||||
if rp.RetryInterval < 60 || rp.RetryInterval > 86400 {
|
||||
return fmt.Errorf("retry_interval_seconds must be between 60 and 86400, got %d", rp.RetryInterval)
|
||||
}
|
||||
for i, t := range rp.AlertThresholdsDays {
|
||||
if t < 0 || t > 365 {
|
||||
return fmt.Errorf("alert_thresholds_days[%d]=%d must be between 0 and 365", i, t)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// applyCreateDefaults fills in zero-valued optional fields with the design
|
||||
// doc defaults. Name is never defaulted — missing name fails validation.
|
||||
// MaxRetries=0 is a legal explicit value (no retries), so it is NOT
|
||||
// defaulted; the DB default column handles that path if needed.
|
||||
func (s *RenewalPolicyService) applyCreateDefaults(rp *domain.RenewalPolicy) {
|
||||
if rp.RenewalWindowDays == 0 {
|
||||
rp.RenewalWindowDays = 30
|
||||
}
|
||||
if rp.RetryInterval == 0 {
|
||||
rp.RetryInterval = 3600
|
||||
}
|
||||
if len(rp.AlertThresholdsDays) == 0 {
|
||||
rp.AlertThresholdsDays = domain.DefaultAlertThresholds()
|
||||
}
|
||||
}
|
||||
|
||||
// CreateRenewalPolicy inserts a new renewal policy. Auto-generates
|
||||
// `rp-<slug(name)>` for ID if empty. Defaults are applied before bounds
|
||||
// validation so a caller can omit RenewalWindowDays / RetryInterval and
|
||||
// still pass bounds. Returns ErrRenewalPolicyDuplicateName unwrapped from
|
||||
// the repo when a name collision occurs (pg 23505 on the UNIQUE constraint);
|
||||
// the handler surfaces that as 409 Conflict.
|
||||
func (s *RenewalPolicyService) CreateRenewalPolicy(ctx context.Context, rp domain.RenewalPolicy) (*domain.RenewalPolicy, error) {
|
||||
s.applyCreateDefaults(&rp)
|
||||
if err := s.validateBounds(&rp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if rp.ID == "" {
|
||||
rp.ID = slugifyRenewalPolicyName(rp.Name)
|
||||
}
|
||||
if rp.CreatedAt.IsZero() {
|
||||
rp.CreatedAt = time.Now()
|
||||
}
|
||||
if err := s.repo.Create(ctx, &rp); err != nil {
|
||||
// Propagate repository sentinels verbatim — service-level sentinels
|
||||
// alias repo sentinels (same var identity), so errors.Is walks
|
||||
// through without any translation.
|
||||
return nil, err
|
||||
}
|
||||
return &rp, nil
|
||||
}
|
||||
|
||||
// UpdateRenewalPolicy replaces the fields of an existing renewal policy.
|
||||
// Applies the same defaults+bounds as Create so partial updates do not slip
|
||||
// an invalid row past validation via zero-value fields. id in the path wins
|
||||
// over any id the caller supplied in the body.
|
||||
func (s *RenewalPolicyService) UpdateRenewalPolicy(ctx context.Context, id string, rp domain.RenewalPolicy) (*domain.RenewalPolicy, error) {
|
||||
s.applyCreateDefaults(&rp)
|
||||
if err := s.validateBounds(&rp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rp.ID = id
|
||||
if err := s.repo.Update(ctx, id, &rp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &rp, nil
|
||||
}
|
||||
|
||||
// DeleteRenewalPolicy removes a renewal policy. Returns ErrRenewalPolicyInUse
|
||||
// when the policy is still referenced by rows in managed_certificates (the
|
||||
// repo translates pg 23503 FK_RESTRICT violations onto that sentinel). The
|
||||
// handler surfaces that as 409 Conflict.
|
||||
func (s *RenewalPolicyService) DeleteRenewalPolicy(ctx context.Context, id string) error {
|
||||
return s.repo.Delete(ctx, id)
|
||||
}
|
||||
@@ -0,0 +1,341 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/shankar0123/certctl/internal/domain"
|
||||
)
|
||||
|
||||
// G-1 red tests: lock in the behavior of RenewalPolicyService before
|
||||
// the production code exists. Every subtest here references a type or
|
||||
// method that Phase 2b must introduce:
|
||||
//
|
||||
// - NewRenewalPolicyService(repo) (constructor)
|
||||
// - svc.ListRenewalPolicies(ctx, page, pp) ([]RenewalPolicy, int64, error)
|
||||
// - svc.GetRenewalPolicy(ctx, id) (*RenewalPolicy, error)
|
||||
// - svc.CreateRenewalPolicy(ctx, rp) (*RenewalPolicy, error)
|
||||
// - svc.UpdateRenewalPolicy(ctx, id, rp) (*RenewalPolicy, error)
|
||||
// - svc.DeleteRenewalPolicy(ctx, id) error
|
||||
// - ErrRenewalPolicyDuplicateName sentinel (pg 23505 → 409)
|
||||
// - ErrRenewalPolicyInUse sentinel (pg 23503 → 409)
|
||||
//
|
||||
// Once Phase 2b lands, these should all turn green without modification.
|
||||
|
||||
func TestRenewalPolicyService_List_Success(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
now := time.Now()
|
||||
repo := &mockRenewalPolicyRepo{
|
||||
Policies: map[string]*domain.RenewalPolicy{
|
||||
"rp-default": {
|
||||
ID: "rp-default", Name: "Default", RenewalWindowDays: 30,
|
||||
MaxRetries: 3, RetryInterval: 3600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
},
|
||||
"rp-urgent": {
|
||||
ID: "rp-urgent", Name: "Urgent", RenewalWindowDays: 7,
|
||||
MaxRetries: 5, RetryInterval: 600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
},
|
||||
},
|
||||
}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
items, total, err := svc.ListRenewalPolicies(ctx, 1, 50)
|
||||
if err != nil {
|
||||
t.Fatalf("ListRenewalPolicies failed: %v", err)
|
||||
}
|
||||
if total != 2 {
|
||||
t.Errorf("expected total 2, got %d", total)
|
||||
}
|
||||
if len(items) != 2 {
|
||||
t.Errorf("expected 2 items, got %d", len(items))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_List_Empty(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
items, total, err := svc.ListRenewalPolicies(ctx, 1, 50)
|
||||
if err != nil {
|
||||
t.Fatalf("ListRenewalPolicies failed: %v", err)
|
||||
}
|
||||
if total != 0 {
|
||||
t.Errorf("expected total 0, got %d", total)
|
||||
}
|
||||
if len(items) != 0 {
|
||||
t.Errorf("expected 0 items, got %d", len(items))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_List_Pagination(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
now := time.Now()
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{}}
|
||||
// Seed 5 policies, names A..E so the mock's sort.Slice yields a deterministic
|
||||
// ordering that pagination boundaries can assert against.
|
||||
for _, name := range []string{"A", "B", "C", "D", "E"} {
|
||||
p := &domain.RenewalPolicy{
|
||||
ID: "rp-" + strings.ToLower(name), Name: name,
|
||||
RenewalWindowDays: 30, MaxRetries: 3, RetryInterval: 3600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
}
|
||||
repo.Policies[p.ID] = p
|
||||
}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
// Page 1, size 2 → [A, B]
|
||||
page1, total, err := svc.ListRenewalPolicies(ctx, 1, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("page 1 failed: %v", err)
|
||||
}
|
||||
if total != 5 {
|
||||
t.Errorf("expected total 5, got %d", total)
|
||||
}
|
||||
if len(page1) != 2 || page1[0].Name != "A" || page1[1].Name != "B" {
|
||||
t.Errorf("unexpected page 1 slice: %+v", page1)
|
||||
}
|
||||
|
||||
// Page 3, size 2 → [E] (single-item last page)
|
||||
page3, _, err := svc.ListRenewalPolicies(ctx, 3, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("page 3 failed: %v", err)
|
||||
}
|
||||
if len(page3) != 1 || page3[0].Name != "E" {
|
||||
t.Errorf("unexpected page 3 slice: %+v", page3)
|
||||
}
|
||||
|
||||
// Page 4, size 2 → [] (past the end, no error)
|
||||
page4, _, err := svc.ListRenewalPolicies(ctx, 4, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("page 4 failed: %v", err)
|
||||
}
|
||||
if len(page4) != 0 {
|
||||
t.Errorf("expected empty past-end slice, got %+v", page4)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_List_RepoError(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := &mockRenewalPolicyRepo{
|
||||
Policies: map[string]*domain.RenewalPolicy{},
|
||||
ListErr: errors.New("boom"),
|
||||
}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
_, _, err := svc.ListRenewalPolicies(ctx, 1, 50)
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Get_Success(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
now := time.Now()
|
||||
rp := &domain.RenewalPolicy{
|
||||
ID: "rp-default", Name: "Default", RenewalWindowDays: 30,
|
||||
MaxRetries: 3, RetryInterval: 3600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
}
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{"rp-default": rp}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
got, err := svc.GetRenewalPolicy(ctx, "rp-default")
|
||||
if err != nil {
|
||||
t.Fatalf("GetRenewalPolicy failed: %v", err)
|
||||
}
|
||||
if got.Name != "Default" {
|
||||
t.Errorf("expected name Default, got %s", got.Name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Get_NotFound(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
_, err := svc.GetRenewalPolicy(ctx, "rp-missing")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing policy, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Create_Success(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
rp := domain.RenewalPolicy{
|
||||
Name: "Weekly Renewal",
|
||||
RenewalWindowDays: 7,
|
||||
MaxRetries: 3,
|
||||
RetryInterval: 3600,
|
||||
AutoRenew: true,
|
||||
}
|
||||
created, err := svc.CreateRenewalPolicy(ctx, rp)
|
||||
if err != nil {
|
||||
t.Fatalf("CreateRenewalPolicy failed: %v", err)
|
||||
}
|
||||
if created.ID == "" {
|
||||
t.Fatal("expected auto-generated ID, got empty")
|
||||
}
|
||||
// ID convention: rp-<slug(name)> matches seed rows rp-default/rp-standard/rp-urgent.
|
||||
if !strings.HasPrefix(created.ID, "rp-") {
|
||||
t.Errorf("expected ID prefix rp-, got %s", created.ID)
|
||||
}
|
||||
if created.CreatedAt.IsZero() {
|
||||
t.Error("expected CreatedAt to be populated")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Create_MissingName(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
_, err := svc.CreateRenewalPolicy(ctx, domain.RenewalPolicy{
|
||||
RenewalWindowDays: 30, MaxRetries: 3, RetryInterval: 3600,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected validation error for missing name, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Create_BoundsViolation(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
// RenewalWindowDays out of range [1, 365]
|
||||
_, err := svc.CreateRenewalPolicy(ctx, domain.RenewalPolicy{
|
||||
Name: "Bad Window",
|
||||
RenewalWindowDays: 999,
|
||||
MaxRetries: 3,
|
||||
RetryInterval: 3600,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected bounds violation on RenewalWindowDays, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Create_DuplicateName(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := &mockRenewalPolicyRepo{
|
||||
Policies: map[string]*domain.RenewalPolicy{},
|
||||
CreateErr: ErrRenewalPolicyDuplicateName,
|
||||
}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
_, err := svc.CreateRenewalPolicy(ctx, domain.RenewalPolicy{
|
||||
Name: "Duplicate",
|
||||
RenewalWindowDays: 30,
|
||||
MaxRetries: 3,
|
||||
RetryInterval: 3600,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected duplicate-name error, got nil")
|
||||
}
|
||||
if !errors.Is(err, ErrRenewalPolicyDuplicateName) {
|
||||
t.Errorf("expected ErrRenewalPolicyDuplicateName, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Update_Success(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
now := time.Now()
|
||||
rp := &domain.RenewalPolicy{
|
||||
ID: "rp-default", Name: "Default", RenewalWindowDays: 30,
|
||||
MaxRetries: 3, RetryInterval: 3600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
}
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{"rp-default": rp}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
updated, err := svc.UpdateRenewalPolicy(ctx, "rp-default", domain.RenewalPolicy{
|
||||
Name: "Default Renamed",
|
||||
RenewalWindowDays: 45,
|
||||
MaxRetries: 5,
|
||||
RetryInterval: 1800,
|
||||
AutoRenew: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("UpdateRenewalPolicy failed: %v", err)
|
||||
}
|
||||
if updated.Name != "Default Renamed" {
|
||||
t.Errorf("expected updated name, got %s", updated.Name)
|
||||
}
|
||||
if updated.RenewalWindowDays != 45 {
|
||||
t.Errorf("expected window 45, got %d", updated.RenewalWindowDays)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Update_NotFound(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
_, err := svc.UpdateRenewalPolicy(ctx, "rp-missing", domain.RenewalPolicy{
|
||||
Name: "X", RenewalWindowDays: 30, MaxRetries: 3, RetryInterval: 3600,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing policy, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Delete_Success(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
now := time.Now()
|
||||
rp := &domain.RenewalPolicy{
|
||||
ID: "rp-default", Name: "Default", RenewalWindowDays: 30,
|
||||
MaxRetries: 3, RetryInterval: 3600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
}
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{"rp-default": rp}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
if err := svc.DeleteRenewalPolicy(ctx, "rp-default"); err != nil {
|
||||
t.Fatalf("DeleteRenewalPolicy failed: %v", err)
|
||||
}
|
||||
if _, exists := repo.Policies["rp-default"]; exists {
|
||||
t.Error("expected policy to be removed from repo")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Delete_NotFound(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
repo := &mockRenewalPolicyRepo{Policies: map[string]*domain.RenewalPolicy{}}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
err := svc.DeleteRenewalPolicy(ctx, "rp-missing")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing policy, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenewalPolicyService_Delete_InUseConflict(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
now := time.Now()
|
||||
rp := &domain.RenewalPolicy{
|
||||
ID: "rp-active", Name: "Active", RenewalWindowDays: 30,
|
||||
MaxRetries: 3, RetryInterval: 3600, AutoRenew: true,
|
||||
CreatedAt: now, UpdatedAt: now,
|
||||
}
|
||||
repo := &mockRenewalPolicyRepo{
|
||||
Policies: map[string]*domain.RenewalPolicy{"rp-active": rp},
|
||||
DeleteErr: ErrRenewalPolicyInUse,
|
||||
}
|
||||
svc := NewRenewalPolicyService(repo)
|
||||
|
||||
err := svc.DeleteRenewalPolicy(ctx, "rp-active")
|
||||
if err == nil {
|
||||
t.Fatal("expected in-use conflict error, got nil")
|
||||
}
|
||||
if !errors.Is(err, ErrRenewalPolicyInUse) {
|
||||
t.Errorf("expected ErrRenewalPolicyInUse, got %v", err)
|
||||
}
|
||||
}
|
||||
@@ -749,11 +749,21 @@ func (m *mockPolicyRepo) AddRule(rule *domain.PolicyRule) {
|
||||
m.Rules[rule.ID] = rule
|
||||
}
|
||||
|
||||
// mockRenewalPolicyRepo is a test implementation of RenewalPolicyRepository
|
||||
// mockRenewalPolicyRepo is a test implementation of RenewalPolicyRepository.
|
||||
//
|
||||
// G-1: repo contract extended with Create/Update/Delete to support the
|
||||
// /api/v1/renewal-policies CRUD endpoints. Per-method *Err fields let tests
|
||||
// force specific repo failures (duplicate name → 23505, FK RESTRICT on Delete
|
||||
// → 23503) without standing up a real Postgres connection. The sentinel
|
||||
// errors `ErrRenewalPolicyDuplicateName` and `ErrRenewalPolicyInUse` are the
|
||||
// typed envelopes the service / handler layers translate into 409 Conflict.
|
||||
type mockRenewalPolicyRepo struct {
|
||||
Policies map[string]*domain.RenewalPolicy
|
||||
GetErr error
|
||||
ListErr error
|
||||
Policies map[string]*domain.RenewalPolicy
|
||||
GetErr error
|
||||
ListErr error
|
||||
CreateErr error
|
||||
UpdateErr error
|
||||
DeleteErr error
|
||||
}
|
||||
|
||||
func (m *mockRenewalPolicyRepo) Get(ctx context.Context, id string) (*domain.RenewalPolicy, error) {
|
||||
@@ -775,9 +785,49 @@ func (m *mockRenewalPolicyRepo) List(ctx context.Context) ([]*domain.RenewalPoli
|
||||
for _, p := range m.Policies {
|
||||
policies = append(policies, p)
|
||||
}
|
||||
// Deterministic ordering mirrors the production repo's ORDER BY name,
|
||||
// so pagination-boundary assertions don't become flaky under map
|
||||
// iteration randomness.
|
||||
sort.Slice(policies, func(i, j int) bool {
|
||||
return policies[i].Name < policies[j].Name
|
||||
})
|
||||
return policies, nil
|
||||
}
|
||||
|
||||
func (m *mockRenewalPolicyRepo) Create(ctx context.Context, policy *domain.RenewalPolicy) error {
|
||||
if m.CreateErr != nil {
|
||||
return m.CreateErr
|
||||
}
|
||||
if _, exists := m.Policies[policy.ID]; exists {
|
||||
return m.CreateErr
|
||||
}
|
||||
m.Policies[policy.ID] = policy
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockRenewalPolicyRepo) Update(ctx context.Context, id string, policy *domain.RenewalPolicy) error {
|
||||
if m.UpdateErr != nil {
|
||||
return m.UpdateErr
|
||||
}
|
||||
if _, exists := m.Policies[id]; !exists {
|
||||
return errNotFound
|
||||
}
|
||||
policy.ID = id
|
||||
m.Policies[id] = policy
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockRenewalPolicyRepo) Delete(ctx context.Context, id string) error {
|
||||
if m.DeleteErr != nil {
|
||||
return m.DeleteErr
|
||||
}
|
||||
if _, exists := m.Policies[id]; !exists {
|
||||
return errNotFound
|
||||
}
|
||||
delete(m.Policies, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockRenewalPolicyRepo) AddPolicy(policy *domain.RenewalPolicy) {
|
||||
m.Policies[policy.ID] = policy
|
||||
}
|
||||
@@ -1385,6 +1435,13 @@ type mockRevocationRepo struct {
|
||||
Revocations []*domain.CertificateRevocation
|
||||
CreateErr error
|
||||
ListErr error
|
||||
// F-001 regression instrumentation: track which list method was invoked
|
||||
// so tests can assert that the CRL generation hot path uses the scoped
|
||||
// ListByIssuer query (migration 000012 composite index) rather than
|
||||
// ListAll followed by in-Go filtering.
|
||||
ListAllCalls int
|
||||
ListByIssuerCalls int
|
||||
LastListIssuerID string
|
||||
}
|
||||
|
||||
func (m *mockRevocationRepo) Create(ctx context.Context, revocation *domain.CertificateRevocation) error {
|
||||
@@ -1405,12 +1462,28 @@ func (m *mockRevocationRepo) GetByIssuerAndSerial(ctx context.Context, issuerID,
|
||||
}
|
||||
|
||||
func (m *mockRevocationRepo) ListAll(ctx context.Context) ([]*domain.CertificateRevocation, error) {
|
||||
m.ListAllCalls++
|
||||
if m.ListErr != nil {
|
||||
return nil, m.ListErr
|
||||
}
|
||||
return m.Revocations, nil
|
||||
}
|
||||
|
||||
func (m *mockRevocationRepo) ListByIssuer(ctx context.Context, issuerID string) ([]*domain.CertificateRevocation, error) {
|
||||
m.ListByIssuerCalls++
|
||||
m.LastListIssuerID = issuerID
|
||||
if m.ListErr != nil {
|
||||
return nil, m.ListErr
|
||||
}
|
||||
var result []*domain.CertificateRevocation
|
||||
for _, r := range m.Revocations {
|
||||
if r.IssuerID == issuerID {
|
||||
result = append(result, r)
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (m *mockRevocationRepo) ListByCertificate(ctx context.Context, certID string) ([]*domain.CertificateRevocation, error) {
|
||||
var result []*domain.CertificateRevocation
|
||||
for _, r := range m.Revocations {
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
-- Migration 000017 (down): reverse the U-3 bundle.
|
||||
--
|
||||
-- Operators almost certainly never need this — each block in the up
|
||||
-- migration was a strict improvement (column-name truth, dead-schema
|
||||
-- removal, missing-column add). The down migration exists for
|
||||
-- documentation and disaster-recovery completeness only.
|
||||
--
|
||||
-- Idempotent: each block uses the standard IF EXISTS / IF NOT EXISTS
|
||||
-- guards plus a DO $$ guard on the rename to handle re-application.
|
||||
-- Reverses the up migration's blocks in reverse order.
|
||||
|
||||
-- (3) Re-add the orphan health_check columns at their original defaults.
|
||||
--
|
||||
-- Note: re-adding does NOT restore the auto-health-check feature —
|
||||
-- that code was never written. The column values revert to the
|
||||
-- DEFAULT FALSE / 300 baseline that operators saw pre-U-3.
|
||||
ALTER TABLE network_scan_targets
|
||||
ADD COLUMN IF NOT EXISTS health_check_enabled BOOLEAN DEFAULT FALSE,
|
||||
ADD COLUMN IF NOT EXISTS health_check_interval_seconds INTEGER DEFAULT 300;
|
||||
|
||||
-- (2) Drop the notification_events.created_at column.
|
||||
--
|
||||
-- This re-introduces the cat-o-notification_created_at_dead_field bug
|
||||
-- (Go field with no DB column → API serializes 0001-01-01). Only roll
|
||||
-- back if you've also rolled back the Go-side INSERT path that sets
|
||||
-- created_at, otherwise INSERTs will fail with "column created_at does
|
||||
-- not exist".
|
||||
ALTER TABLE notification_events
|
||||
DROP COLUMN IF EXISTS created_at;
|
||||
|
||||
-- (1) Rename the renewal_policies column back to the misleading name.
|
||||
--
|
||||
-- Re-introduces cat-o-retry_interval_unit_mismatch. Operators running
|
||||
-- raw SQL revert to the 60x confusion. No data conversion (values are
|
||||
-- still seconds; the column label lies again).
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'renewal_policies'
|
||||
AND column_name = 'retry_interval_seconds'
|
||||
) THEN
|
||||
ALTER TABLE renewal_policies
|
||||
RENAME COLUMN retry_interval_seconds TO retry_interval_minutes;
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,81 @@
|
||||
-- Migration 000017: DB coupling cleanup (U-3 bundle).
|
||||
--
|
||||
-- Closes three audit findings that share the migrations/ surface and the
|
||||
-- "schema vs Go vs label drifts in different directions" pattern:
|
||||
--
|
||||
-- * cat-o-retry_interval_unit_mismatch (P1):
|
||||
-- renewal_policies.retry_interval_minutes column stored seconds, named
|
||||
-- minutes. Operators running raw SQL got 60x confusion.
|
||||
--
|
||||
-- * cat-o-notification_created_at_dead_field (P2):
|
||||
-- internal/domain/notification.go::NotificationEvent.CreatedAt was
|
||||
-- tagged json:"created_at" with no DB column behind it. Every API
|
||||
-- response serialized 0001-01-01T00:00:00Z. Visible zero-value
|
||||
-- timestamp on every notification row in the dashboard.
|
||||
--
|
||||
-- * cat-o-health_check_column_orphans (P1):
|
||||
-- migration 000011 added network_scan_targets.health_check_enabled +
|
||||
-- .health_check_interval_seconds. No Go field decoded either column;
|
||||
-- no handler exposed them; OpenAPI schema didn't carry them. The
|
||||
-- auto-health-check feature was never wired through. Removing dead
|
||||
-- schema is cheaper than completing dead code; if the feature gets
|
||||
-- revived, a future migration can re-add the columns alongside the
|
||||
-- Go-side wiring.
|
||||
--
|
||||
-- Idempotency: RunMigrations at internal/repository/postgres/db.go has
|
||||
-- no applied-tracking table — every server restart re-applies every
|
||||
-- migration in sequence. Each block in this file MUST be safe to re-run
|
||||
-- on a database that has already had it applied. The RENAME COLUMN in
|
||||
-- block (1) is wrapped in a DO $$ guard that checks information_schema
|
||||
-- before renaming; the ADD COLUMN in (2) and the DROP COLUMNs in (3)
|
||||
-- use the standard IF NOT EXISTS / IF EXISTS clauses.
|
||||
--
|
||||
-- See the U-3 closure entry in
|
||||
-- coverage-gap-audit-2026-04-24-v5/unified-audit.md and CHANGELOG.md
|
||||
-- for the full rationale, the bundled-fix list, and the architectural
|
||||
-- shift to runtime-only migration application.
|
||||
|
||||
-- (1) cat-o-retry_interval_unit_mismatch — rename column to match unit.
|
||||
--
|
||||
-- The values stored in this column have always been seconds (validator
|
||||
-- at internal/service/renewal_policy.go enforces a [60, 86400] range
|
||||
-- inclusive — 60 seconds to 24 hours, unambiguously seconds). The
|
||||
-- column name was the bug; data conversion is a no-op. The Go field
|
||||
-- has always been tagged json:"retry_interval_seconds", so the API
|
||||
-- shape is unchanged.
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'renewal_policies'
|
||||
AND column_name = 'retry_interval_minutes'
|
||||
) THEN
|
||||
ALTER TABLE renewal_policies
|
||||
RENAME COLUMN retry_interval_minutes TO retry_interval_seconds;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- (2) cat-o-notification_created_at_dead_field — add the missing column.
|
||||
--
|
||||
-- DEFAULT NOW() back-fills existing rows with the migration apply
|
||||
-- timestamp. Acceptable trade-off: those rows had no real CreatedAt
|
||||
-- info anyway (the field was a Go-only zero-value), and approximating
|
||||
-- them with the migration time gives the dashboard a usable rendering
|
||||
-- instead of '0001-01-01'. NOT NULL is enforced because the repo
|
||||
-- INSERT path will set CreatedAt on every new row post-fix.
|
||||
ALTER TABLE notification_events
|
||||
ADD COLUMN IF NOT EXISTS created_at TIMESTAMPTZ NOT NULL DEFAULT NOW();
|
||||
|
||||
-- (3) cat-o-health_check_column_orphans — drop unwired columns.
|
||||
--
|
||||
-- migrations/000011_health_checks.up.sql added these two columns with
|
||||
-- the intent of wiring auto-health-checks for network-scan-discovered
|
||||
-- endpoints. The Go side was never written; no handler reads or writes
|
||||
-- them; the OpenAPI NetworkScanTarget schema doesn't expose them. The
|
||||
-- columns have been carrying their default values (false / 300) on
|
||||
-- every row since shipping. Dropping them removes dead schema; the
|
||||
-- network_scan_targets row size shrinks marginally and operators stop
|
||||
-- seeing flag/interval columns that don't actually do anything.
|
||||
ALTER TABLE network_scan_targets
|
||||
DROP COLUMN IF EXISTS health_check_enabled,
|
||||
DROP COLUMN IF EXISTS health_check_interval_seconds;
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
-- Seed data for certificate control plane
|
||||
|
||||
-- Default renewal policy
|
||||
INSERT INTO renewal_policies (id, name, renewal_window_days, auto_renew, max_retries, retry_interval_minutes, alert_thresholds_days)
|
||||
INSERT INTO renewal_policies (id, name, renewal_window_days, auto_renew, max_retries, retry_interval_seconds, alert_thresholds_days)
|
||||
VALUES (
|
||||
'rp-default',
|
||||
'default',
|
||||
|
||||
@@ -29,7 +29,7 @@ ON CONFLICT (id) DO NOTHING;
|
||||
-- ============================================================
|
||||
-- 2. Policies
|
||||
-- ============================================================
|
||||
INSERT INTO renewal_policies (id, name, renewal_window_days, auto_renew, max_retries, retry_interval_minutes, alert_thresholds_days, created_at, updated_at) VALUES
|
||||
INSERT INTO renewal_policies (id, name, renewal_window_days, auto_renew, max_retries, retry_interval_seconds, alert_thresholds_days, created_at, updated_at) VALUES
|
||||
('rp-standard', 'Standard 30-day', 30, true, 3, 60, '[30, 14, 7, 0]'::jsonb, NOW() - INTERVAL '180 days', NOW() - INTERVAL '180 days'),
|
||||
('rp-urgent', 'Urgent 14-day', 14, true, 5, 30, '[14, 7, 3, 0]'::jsonb, NOW() - INTERVAL '180 days', NOW() - INTERVAL '180 days'),
|
||||
('rp-manual', 'Manual Only', 30, false, 0, 0, '[30, 14, 7, 0]'::jsonb, NOW() - INTERVAL '180 days', NOW() - INTERVAL '180 days')
|
||||
|
||||
@@ -33,6 +33,10 @@ import {
|
||||
updatePolicy,
|
||||
deletePolicy,
|
||||
getPolicyViolations,
|
||||
getRenewalPolicies,
|
||||
createRenewalPolicy,
|
||||
updateRenewalPolicy,
|
||||
deleteRenewalPolicy,
|
||||
getIssuers,
|
||||
createIssuer,
|
||||
testIssuerConnection,
|
||||
@@ -575,6 +579,60 @@ describe('API Client', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Renewal Policies (G-1) ─────────────────────────
|
||||
// Distinct from compliance Policies above. Populates the
|
||||
// `renewal_policy_id` dropdown on OnboardingWizard + CertificatesPage +
|
||||
// CertificateDetailPage.InlinePolicyEditor. Hits `/api/v1/renewal-policies`.
|
||||
|
||||
describe('RenewalPolicies', () => {
|
||||
it('getRenewalPolicies sends GET', async () => {
|
||||
mockFetch.mockReturnValueOnce(mockJsonResponse({ data: [], total: 0, page: 1, per_page: 50 }));
|
||||
await getRenewalPolicies();
|
||||
expect(mockFetch.mock.calls[0][0]).toContain('/api/v1/renewal-policies');
|
||||
});
|
||||
|
||||
it('createRenewalPolicy sends POST with body', async () => {
|
||||
mockFetch.mockReturnValueOnce(
|
||||
mockJsonResponse({
|
||||
id: 'rp-new',
|
||||
name: 'New Policy',
|
||||
renewal_window_days: 30,
|
||||
max_retries: 3,
|
||||
retry_interval_seconds: 3600,
|
||||
auto_renew: true,
|
||||
}),
|
||||
);
|
||||
await createRenewalPolicy({
|
||||
name: 'New Policy',
|
||||
renewal_window_days: 30,
|
||||
max_retries: 3,
|
||||
retry_interval_seconds: 3600,
|
||||
auto_renew: true,
|
||||
});
|
||||
const [url, init] = mockFetch.mock.calls[0];
|
||||
expect(url).toBe('/api/v1/renewal-policies');
|
||||
expect(init.method).toBe('POST');
|
||||
expect(JSON.parse(init.body).name).toBe('New Policy');
|
||||
});
|
||||
|
||||
it('updateRenewalPolicy sends PUT with partial data', async () => {
|
||||
mockFetch.mockReturnValueOnce(mockJsonResponse({ id: 'rp-default', name: 'Renamed' }));
|
||||
await updateRenewalPolicy('rp-default', { name: 'Renamed' });
|
||||
const [url, init] = mockFetch.mock.calls[0];
|
||||
expect(url).toBe('/api/v1/renewal-policies/rp-default');
|
||||
expect(init.method).toBe('PUT');
|
||||
expect(JSON.parse(init.body)).toEqual({ name: 'Renamed' });
|
||||
});
|
||||
|
||||
it('deleteRenewalPolicy sends DELETE', async () => {
|
||||
mockFetch.mockReturnValueOnce(mockJsonResponse({ message: 'deleted' }));
|
||||
await deleteRenewalPolicy('rp-default');
|
||||
const [url, init] = mockFetch.mock.calls[0];
|
||||
expect(url).toBe('/api/v1/renewal-policies/rp-default');
|
||||
expect(init.method).toBe('DELETE');
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Issuers ────────────────────────────────────────
|
||||
|
||||
describe('Issuers', () => {
|
||||
|
||||
+24
-1
@@ -1,4 +1,4 @@
|
||||
import type { Certificate, CertificateVersion, Agent, Job, Notification, AuditEvent, PolicyRule, PolicyViolation, Issuer, Target, CertificateProfile, Owner, Team, AgentGroup, PaginatedResponse, DashboardSummary, CertificateStatusCount, ExpirationBucket, JobTrendDataPoint, IssuanceRateDataPoint, MetricsResponse, DiscoveredCertificate, DiscoveryScan, DiscoverySummary, NetworkScanTarget, EndpointHealthCheck, HealthHistoryEntry, HealthCheckSummary, AgentDependencyCounts, RetireAgentResponse, BlockedByDependenciesResponse } from './types';
|
||||
import type { Certificate, CertificateVersion, Agent, Job, Notification, AuditEvent, PolicyRule, PolicyViolation, RenewalPolicy, Issuer, Target, CertificateProfile, Owner, Team, AgentGroup, PaginatedResponse, DashboardSummary, CertificateStatusCount, ExpirationBucket, JobTrendDataPoint, IssuanceRateDataPoint, MetricsResponse, DiscoveredCertificate, DiscoveryScan, DiscoverySummary, NetworkScanTarget, EndpointHealthCheck, HealthHistoryEntry, HealthCheckSummary, AgentDependencyCounts, RetireAgentResponse, BlockedByDependenciesResponse } from './types';
|
||||
|
||||
const BASE = '/api/v1';
|
||||
|
||||
@@ -344,6 +344,29 @@ export const deletePolicy = (id: string) =>
|
||||
export const getPolicyViolations = (id: string) =>
|
||||
fetchJSON<PaginatedResponse<PolicyViolation>>(`${BASE}/policies/${id}/violations`);
|
||||
|
||||
// G-1: Renewal Policies (/api/v1/renewal-policies) — lifecycle policies with
|
||||
// rp-* IDs in the renewal_policies table. Distinct from getPolicies() above
|
||||
// which hits /api/v1/policies and returns PolicyRule (compliance, pol-* IDs).
|
||||
// OnboardingWizard, CertificatesPage, and CertificateDetailPage populate the
|
||||
// `renewal_policy_id` dropdown from this endpoint; populating it from
|
||||
// getPolicies() produced FK violations on certificate insert/update.
|
||||
export const getRenewalPolicies = (page = 1, perPage = 50) => {
|
||||
const qs = new URLSearchParams({ page: String(page), per_page: String(perPage) }).toString();
|
||||
return fetchJSON<PaginatedResponse<RenewalPolicy>>(`${BASE}/renewal-policies?${qs}`);
|
||||
};
|
||||
|
||||
export const getRenewalPolicy = (id: string) =>
|
||||
fetchJSON<RenewalPolicy>(`${BASE}/renewal-policies/${id}`);
|
||||
|
||||
export const createRenewalPolicy = (data: Partial<RenewalPolicy>) =>
|
||||
fetchJSON<RenewalPolicy>(`${BASE}/renewal-policies`, { method: 'POST', body: JSON.stringify(data) });
|
||||
|
||||
export const updateRenewalPolicy = (id: string, data: Partial<RenewalPolicy>) =>
|
||||
fetchJSON<RenewalPolicy>(`${BASE}/renewal-policies/${id}`, { method: 'PUT', body: JSON.stringify(data) });
|
||||
|
||||
export const deleteRenewalPolicy = (id: string) =>
|
||||
fetchJSON<void>(`${BASE}/renewal-policies/${id}`, { method: 'DELETE' });
|
||||
|
||||
// Issuers
|
||||
export const getIssuers = (params: Record<string, string> = {}) => {
|
||||
const qs = new URLSearchParams({ page: '1', per_page: '50', ...params }).toString();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { POLICY_TYPES, POLICY_SEVERITIES } from './types';
|
||||
import type { Agent } from './types';
|
||||
import type { Agent, Certificate, CertificateVersion } from './types';
|
||||
|
||||
/**
|
||||
* Regression tests for the policy enum tuples.
|
||||
@@ -132,3 +132,91 @@ describe('Agent interface (I-004 retirement)', () => {
|
||||
expect(active.retired_reason).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* D-5 (cat-f-ae0d06b6588f, master): Certificate TS phantom-fields trim.
|
||||
*
|
||||
* Pre-D-5 the Certificate interface declared `serial_number`,
|
||||
* `fingerprint_sha256`, `key_algorithm`, `key_size`, and `issued_at` as
|
||||
* optional. These fields were never emitted by Go's `ManagedCertificate`
|
||||
* (internal/domain/certificate.go) — they live on `CertificateVersion`,
|
||||
* which is the per-issuance record fetched from
|
||||
* /api/v1/certificates/{id}/versions. The optional declarations made
|
||||
* `cert.serial_number` always-undefined on list responses, and downstream
|
||||
* consumers (CertificateDetailPage's Key Algorithm / Key Size rows in
|
||||
* particular) silently rendered '—' for every cert despite the data
|
||||
* being available a single fetch away.
|
||||
*
|
||||
* Post-D-5 the TS type makes the missing-data case explicit: a
|
||||
* `cert.serial_number` access becomes a TS compile error, forcing every
|
||||
* consumer to acknowledge the version-fallback pattern. This regression
|
||||
* test pins the trim — if a future PR re-adds any of the five phantom
|
||||
* fields to Certificate (e.g. via merge conflict, copy-paste, or a
|
||||
* codegen run that regenerates from a stale OpenAPI spec), the
|
||||
* compile-fail block here will surface it.
|
||||
*/
|
||||
describe('Certificate interface (D-5 phantom-fields trim)', () => {
|
||||
it('does NOT declare per-issuance fields — those live on CertificateVersion', () => {
|
||||
// Construct a fully-populated Certificate. If a future PR re-adds
|
||||
// any of the five phantom fields (serial_number, fingerprint_sha256,
|
||||
// key_algorithm, key_size, issued_at) to the interface, every
|
||||
// omission in this literal becomes "missing required field" and
|
||||
// the test fails to compile. Conversely, attempting to set any of
|
||||
// the five fields on the literal is a TS error today (excess
|
||||
// property), so the negative-assertion block below also fails to
|
||||
// compile if someone re-adds them as optional.
|
||||
const cert: Certificate = {
|
||||
id: 'mc-test',
|
||||
name: 'test',
|
||||
common_name: 'test.example.com',
|
||||
sans: [],
|
||||
status: 'Active',
|
||||
environment: 'production',
|
||||
issuer_id: 'iss-test',
|
||||
owner_id: 'o-test',
|
||||
team_id: 't-test',
|
||||
renewal_policy_id: 'rp-default',
|
||||
certificate_profile_id: 'cp-default',
|
||||
expires_at: '2027-01-01T00:00:00Z',
|
||||
tags: {},
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
updated_at: '2026-01-01T00:00:00Z',
|
||||
};
|
||||
expect(cert.id).toBe('mc-test');
|
||||
|
||||
// Excess-property check: each of these MUST be a TS error if
|
||||
// uncommented. Keep them in the test as documentation of what's
|
||||
// intentionally absent. (We can't directly assert "type does not
|
||||
// have property X" without a type-level helper, but the literal
|
||||
// construction above plus tsc --noEmit in CI is the binding check.)
|
||||
//
|
||||
// const broken: Certificate = { ...cert, serial_number: '01:02' }; // ❌ TS2353
|
||||
// const broken2: Certificate = { ...cert, key_algorithm: 'EC' }; // ❌ TS2353
|
||||
// const broken3: Certificate = { ...cert, key_size: 256 }; // ❌ TS2353
|
||||
// const broken4: Certificate = { ...cert, fingerprint_sha256: '' };// ❌ TS2353
|
||||
// const broken5: Certificate = { ...cert, issued_at: '...' }; // ❌ TS2353
|
||||
});
|
||||
|
||||
it('CertificateVersion still carries the per-issuance fields', () => {
|
||||
// The other half of the contract: the trimmed fields didn't go to
|
||||
// /dev/null — they live (and have always lived) on CertificateVersion.
|
||||
// If a refactor removes them from CertificateVersion too, the
|
||||
// CertificateDetailPage fallback path breaks. Pin both halves.
|
||||
const v: CertificateVersion = {
|
||||
id: 'mcv-test',
|
||||
certificate_id: 'mc-test',
|
||||
serial_number: '01:02:03',
|
||||
fingerprint_sha256: 'a'.repeat(64),
|
||||
pem_chain: '-----BEGIN CERTIFICATE-----\n...',
|
||||
csr_pem: '-----BEGIN CERTIFICATE REQUEST-----\n...',
|
||||
not_before: '2026-01-01T00:00:00Z',
|
||||
not_after: '2027-01-01T00:00:00Z',
|
||||
key_algorithm: 'ECDSA',
|
||||
key_size: 256,
|
||||
created_at: '2026-01-01T00:00:00Z',
|
||||
};
|
||||
expect(v.serial_number).toBe('01:02:03');
|
||||
expect(v.key_algorithm).toBe('ECDSA');
|
||||
expect(v.key_size).toBe(256);
|
||||
});
|
||||
});
|
||||
|
||||
+46
-5
@@ -1,3 +1,15 @@
|
||||
// D-5 (cat-f-ae0d06b6588f, master): the five per-issuance fields
|
||||
// (serial_number, fingerprint_sha256, key_algorithm, key_size,
|
||||
// issued_at) USED to live here as optional. They were never emitted
|
||||
// by Go's `ManagedCertificate` (internal/domain/certificate.go) — they
|
||||
// live on `CertificateVersion` (per-issuance evidence) and are fetched
|
||||
// via getCertificateVersions(id). Render-site consumers (notably
|
||||
// CertificateDetailPage) use `latestVersion?.field` as the canonical
|
||||
// access path. Pre-D-5 the optional declaration silently returned
|
||||
// `undefined` on every list response, so consumers who didn't know
|
||||
// about the version-fallback pattern rendered '—' for every cert; now
|
||||
// the missing-data case is explicit at the type level (a `cert.X`
|
||||
// access for one of these fields is a TS compile error).
|
||||
export interface Certificate {
|
||||
id: string;
|
||||
name: string;
|
||||
@@ -10,11 +22,6 @@ export interface Certificate {
|
||||
team_id: string;
|
||||
renewal_policy_id: string;
|
||||
certificate_profile_id: string;
|
||||
serial_number?: string;
|
||||
fingerprint_sha256?: string;
|
||||
key_algorithm?: string;
|
||||
key_size?: number;
|
||||
issued_at?: string;
|
||||
expires_at: string;
|
||||
revoked_at?: string;
|
||||
revocation_reason?: string;
|
||||
@@ -51,6 +58,15 @@ export interface CertificateVersion {
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
// G-2 (P1): `api_key_hash` is intentionally absent from this interface.
|
||||
// The server-side struct (internal/domain/connector.go::Agent) carries
|
||||
// the field for the auth-lookup path but redacts it via a custom
|
||||
// MarshalJSON so it never reaches the JSON wire. Adding `api_key_hash`
|
||||
// here would not magically populate it on the wire — and would mislead
|
||||
// future contributors into thinking the field is part of the public
|
||||
// API contract. See docs/architecture.md ER-diagram note and
|
||||
// coverage-gap-audit-2026-04-24-v5/unified-audit.md cat-s5-apikey_leak
|
||||
// for the closure rationale.
|
||||
export interface Agent {
|
||||
id: string;
|
||||
name: string;
|
||||
@@ -228,6 +244,31 @@ export interface PolicyViolation {
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* G-1: RenewalPolicy is the lifecycle policy attached to managed certificates
|
||||
* via `managed_certificates.renewal_policy_id` (FK ON DELETE RESTRICT → `rp-*`
|
||||
* IDs in the `renewal_policies` table). Distinct from `PolicyRule` above, which
|
||||
* models compliance rules in the `policy_rules` table with `pol-*` IDs. The
|
||||
* OnboardingWizard + CertificatesPage + CertificateDetailPage dropdowns populate
|
||||
* `renewal_policy_id` from this interface — previously they mis-populated it
|
||||
* from `getPolicies()` which returned `pol-*` IDs and produced FK violations on
|
||||
* certificate insert/update.
|
||||
*
|
||||
* JSON tags mirror internal/domain/renewal_policy.go.
|
||||
*/
|
||||
export interface RenewalPolicy {
|
||||
id: string;
|
||||
name: string;
|
||||
renewal_window_days: number;
|
||||
auto_renew: boolean;
|
||||
max_retries: number;
|
||||
retry_interval_seconds: number;
|
||||
alert_thresholds_days: number[];
|
||||
certificate_profile_id?: string | null;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
export interface Issuer {
|
||||
id: string;
|
||||
name: string;
|
||||
|
||||
@@ -0,0 +1,130 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { render } from '@testing-library/react';
|
||||
import StatusBadge from './StatusBadge';
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// D-1 master — StatusBadge enum-coverage contract
|
||||
//
|
||||
// The single source of truth for what Go actually emits on the wire.
|
||||
// Update this if the Go enums change (and the StatusBadge will go red
|
||||
// here BEFORE any user sees a wrong color in production).
|
||||
//
|
||||
// Sources (mirror the Go const blocks verbatim — wire VALUES, not Go
|
||||
// identifier names):
|
||||
// AgentStatus — internal/domain/connector.go:174-176
|
||||
// CertificateStatus — internal/domain/certificate.go:50-57
|
||||
// JobStatus — internal/domain/job.go:43-49
|
||||
// NotificationStatus— internal/domain/notification.go:51-55
|
||||
// DiscoveryStatus — internal/domain/discovery.go:13-17
|
||||
// HealthStatus — internal/domain/health_check.go:9-13
|
||||
//
|
||||
// Issuer 'Enabled' / 'Disabled' are NOT a Go enum — they're frontend-
|
||||
// synthesized labels mapped from `Issuer.enabled bool` at the call
|
||||
// site (TargetsPage.tsx similarly). Pinned in a separate group below.
|
||||
//
|
||||
// Pre-D-1 drift this test would have caught:
|
||||
// - Agent: StatusBadge had 'Stale' (never emitted), missing 'Degraded'
|
||||
// (real). Degraded agents rendered as default neutral grey, hiding
|
||||
// attention-needed state from operators.
|
||||
// - Notification: StatusBadge missing 'dead' (retries exhausted).
|
||||
// Dead-letter notifications rendered as default neutral, visually
|
||||
// equated with 'read' (operator-acknowledged).
|
||||
// - Certificate: StatusBadge had 'PendingIssuance' (never emitted).
|
||||
// Dead key, latent confusion vector if anyone copies it as
|
||||
// canonical.
|
||||
// -----------------------------------------------------------------------------
|
||||
const ENUMS_FROM_GO = {
|
||||
AgentStatus: ['Online', 'Offline', 'Degraded'] as const,
|
||||
CertificateStatus: ['Pending', 'Active', 'Expiring', 'Expired',
|
||||
'RenewalInProgress', 'Failed', 'Revoked', 'Archived'] as const,
|
||||
JobStatus: ['Pending', 'AwaitingCSR', 'AwaitingApproval', 'Running',
|
||||
'Completed', 'Failed', 'Cancelled'] as const,
|
||||
NotificationStatus: ['pending', 'sent', 'failed', 'dead', 'read'] as const,
|
||||
DiscoveryStatus: ['Unmanaged', 'Managed', 'Dismissed'] as const,
|
||||
HealthStatus: ['healthy', 'degraded', 'down', 'cert_mismatch', 'unknown'] as const,
|
||||
};
|
||||
|
||||
// Frontend-synthesized labels — not in any Go enum, but surfaced via
|
||||
// StatusBadge from real call sites (TargetsPage, AgentGroupsPage etc.)
|
||||
// and therefore part of the visual contract this component owns.
|
||||
const FRONTEND_SYNTHESIZED = ['Enabled', 'Disabled'] as const;
|
||||
|
||||
describe('StatusBadge — enum-coverage contract (D-1 master)', () => {
|
||||
// Iterate every Go-emitted value across every enum and assert the
|
||||
// rendered <span> carries a class OTHER than the default 'badge-neutral'.
|
||||
// EXCEPT for legitimately-neutral statuses (Archived, Cancelled,
|
||||
// Dismissed, read, unknown) which are intentionally neutral by UX
|
||||
// design — those are pinned by a separate sub-test below.
|
||||
const INTENTIONALLY_NEUTRAL = new Set(['Archived', 'Cancelled', 'Dismissed', 'read', 'unknown']);
|
||||
|
||||
for (const [enumName, values] of Object.entries(ENUMS_FROM_GO)) {
|
||||
for (const v of values) {
|
||||
it(`${enumName}: '${v}' renders a recognised class (no fallthrough)`, () => {
|
||||
const { container } = render(<StatusBadge status={v} />);
|
||||
const span = container.querySelector('span');
|
||||
expect(span).not.toBeNull();
|
||||
const cls = span!.className;
|
||||
if (INTENTIONALLY_NEUTRAL.has(v)) {
|
||||
// Neutral is the right semantic answer for terminal-acknowledged
|
||||
// states — but it must come from an EXPLICIT mapping, not the
|
||||
// dictionary-default fallthrough. Asserting a 'badge-neutral'
|
||||
// class here pins that the explicit entry exists; if someone
|
||||
// deletes it, this still passes (because the default is also
|
||||
// 'badge-neutral'). The negative assertion in the dead-keys
|
||||
// sub-test below catches the deletion case.
|
||||
expect(cls).toBe('badge badge-neutral');
|
||||
} else {
|
||||
expect(cls).toMatch(/badge-(success|warning|danger|info)/);
|
||||
expect(cls).not.toBe('badge badge-neutral');
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for (const v of FRONTEND_SYNTHESIZED) {
|
||||
it(`Frontend-synthesized '${v}' has an explicit StatusBadge mapping`, () => {
|
||||
const { container } = render(<StatusBadge status={v} />);
|
||||
const cls = container.querySelector('span')!.className;
|
||||
// 'Disabled' is intentionally neutral; 'Enabled' is success.
|
||||
expect(cls).toMatch(/badge-(success|warning|danger|info|neutral)/);
|
||||
});
|
||||
}
|
||||
|
||||
// Negative contract: the dead keys we deleted MUST fall through to the
|
||||
// default. If a future PR re-adds 'Stale' or 'PendingIssuance' to
|
||||
// statusStyles, this test will surface it because the rendered class
|
||||
// will no longer be 'badge badge-neutral' (it'd be the explicit value
|
||||
// someone re-added, e.g. 'badge-warning').
|
||||
it.each(['Stale', 'PendingIssuance'])(
|
||||
"dead key '%s' falls through to neutral default (no explicit mapping)",
|
||||
(deadKey) => {
|
||||
const { container } = render(<StatusBadge status={deadKey} />);
|
||||
expect(container.querySelector('span')!.className).toBe('badge badge-neutral');
|
||||
},
|
||||
);
|
||||
|
||||
// Specific danger-class contracts (UX correctness, not just non-default).
|
||||
// These pin the operator-attention semantics. If anyone changes 'dead'
|
||||
// or 'Degraded' away from these classes, the operator's perception of
|
||||
// "this needs my attention" changes — these are the highest-stakes
|
||||
// visual semantics in the dashboard.
|
||||
it("Notification 'dead' renders as danger (operator attention required)", () => {
|
||||
const { container } = render(<StatusBadge status="dead" />);
|
||||
expect(container.querySelector('span')!.className).toContain('badge-danger');
|
||||
});
|
||||
|
||||
it("Agent 'Degraded' renders as warning (degradation, not failure)", () => {
|
||||
const { container } = render(<StatusBadge status="Degraded" />);
|
||||
expect(container.querySelector('span')!.className).toContain('badge-warning');
|
||||
});
|
||||
|
||||
// Unknown statuses fall through to neutral. The string is still
|
||||
// displayed verbatim so an operator can see "what is this?" rather
|
||||
// than nothing at all.
|
||||
it('unknown status string renders as neutral but preserves the label text', () => {
|
||||
const { container } = render(<StatusBadge status="SomeFutureStatus" />);
|
||||
const span = container.querySelector('span');
|
||||
expect(span!.className).toBe('badge badge-neutral');
|
||||
expect(span!.textContent).toBe('SomeFutureStatus');
|
||||
});
|
||||
});
|
||||
@@ -1,13 +1,41 @@
|
||||
// StatusBadge — single source of truth for the certctl dashboard's
|
||||
// per-status color mapping. Keys are the EXACT wire values Go emits
|
||||
// (case-sensitive). Update this file when a new status value lands on
|
||||
// the Go side; StatusBadge.test.tsx walks every value and will go red
|
||||
// before users see a default-grey "what is happening?" badge.
|
||||
//
|
||||
// D-1 master closure (cat-d-359e92c20cbf, cat-d-9f4c8e4a91f1,
|
||||
// cat-d-1447e04732e7, cat-f-cert_detail_page_key_render_fallback,
|
||||
// cat-f-ae0d06b6588f) fixed the pre-master drift:
|
||||
// - Agent: 'Stale' (never emitted) → 'Degraded' (real value);
|
||||
// `internal/domain/connector.go::AgentStatusDegraded = "Degraded"`.
|
||||
// - Notification: added 'dead' (was falling through to neutral);
|
||||
// `internal/domain/notification.go::NotificationStatusDead = "dead"`.
|
||||
// - Certificate: dropped dead 'PendingIssuance' key — the real
|
||||
// `CertificateStatusPending = "Pending"` is mapped under Job
|
||||
// statuses below.
|
||||
//
|
||||
// Source-of-truth references (re-verify if the Go enum changes):
|
||||
// - internal/domain/connector.go::AgentStatus*
|
||||
// - internal/domain/certificate.go::CertificateStatus*
|
||||
// - internal/domain/job.go::JobStatus*
|
||||
// - internal/domain/notification.go::NotificationStatus*
|
||||
// - internal/domain/discovery.go::DiscoveryStatus*
|
||||
// - internal/domain/health_check.go::HealthStatus*
|
||||
//
|
||||
// Issuer 'Enabled'/'Disabled' are frontend-synthesized labels (mapped
|
||||
// from the `enabled bool` field on the Issuer struct), not Go-emitted
|
||||
// enum values, but they're surfaced via StatusBadge for consistency.
|
||||
const statusStyles: Record<string, string> = {
|
||||
// Certificate statuses
|
||||
// Certificate statuses (internal/domain/certificate.go::CertificateStatus*)
|
||||
Active: 'badge-success',
|
||||
Expiring: 'badge-warning',
|
||||
Expired: 'badge-danger',
|
||||
RenewalInProgress: 'badge-info',
|
||||
PendingIssuance: 'badge-info',
|
||||
Archived: 'badge-neutral',
|
||||
Revoked: 'badge-danger',
|
||||
// Job statuses
|
||||
// Job statuses (internal/domain/job.go::JobStatus*) — note: 'Pending' is
|
||||
// shared between CertificateStatusPending and JobStatusPending.
|
||||
Pending: 'badge-info',
|
||||
AwaitingCSR: 'badge-info',
|
||||
AwaitingApproval: 'badge-info',
|
||||
@@ -15,23 +43,30 @@ const statusStyles: Record<string, string> = {
|
||||
Completed: 'badge-success',
|
||||
Failed: 'badge-danger',
|
||||
Cancelled: 'badge-neutral',
|
||||
// Agent statuses
|
||||
// Agent statuses (internal/domain/connector.go::AgentStatus*) — D-1:
|
||||
// 'Degraded' replaces the never-emitted 'Stale' from pre-D-1 (the Go
|
||||
// domain has only Online / Offline / Degraded; mapping 'Stale' yellow
|
||||
// and letting 'Degraded' fall through to neutral hid degraded agents).
|
||||
Online: 'badge-success',
|
||||
Offline: 'badge-danger',
|
||||
Stale: 'badge-warning',
|
||||
// Discovery statuses
|
||||
Degraded: 'badge-warning',
|
||||
// Discovery statuses (internal/domain/discovery.go::DiscoveryStatus*)
|
||||
Unmanaged: 'badge-warning',
|
||||
Managed: 'badge-success',
|
||||
Dismissed: 'badge-neutral',
|
||||
// Issuer statuses
|
||||
// Issuer statuses (frontend-synthesized from Issuer.enabled bool)
|
||||
Enabled: 'badge-success',
|
||||
Disabled: 'badge-neutral',
|
||||
// Notification statuses
|
||||
// Notification statuses (internal/domain/notification.go::NotificationStatus*)
|
||||
// — D-2: added 'dead' (retries exhausted, dead-letter queue). Pre-D-2 it
|
||||
// fell through to neutral, visually equating "needs operator attention"
|
||||
// with "operator already acknowledged" (read).
|
||||
sent: 'badge-success',
|
||||
pending: 'badge-warning',
|
||||
failed: 'badge-danger',
|
||||
dead: 'badge-danger',
|
||||
read: 'badge-neutral',
|
||||
// Health check statuses
|
||||
// Health check statuses (internal/domain/health_check.go::HealthStatus*)
|
||||
healthy: 'badge-success',
|
||||
degraded: 'badge-warning',
|
||||
down: 'badge-danger',
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { useState } from 'react';
|
||||
import { useParams, useNavigate } from 'react-router-dom';
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { getCertificate, getCertificateVersions, triggerRenewal, triggerDeployment, archiveCertificate, revokeCertificate, updateCertificate, getTargets, getJobs, getPolicies, getProfiles, getProfile, downloadCertificatePEM, exportCertificatePKCS12 } from '../api/client';
|
||||
import { getCertificate, getCertificateVersions, triggerRenewal, triggerDeployment, archiveCertificate, revokeCertificate, updateCertificate, getTargets, getJobs, getRenewalPolicies, getProfiles, getProfile, downloadCertificatePEM, exportCertificatePKCS12 } from '../api/client';
|
||||
import { REVOCATION_REASONS } from '../api/types';
|
||||
import PageHeader from '../components/PageHeader';
|
||||
import StatusBadge from '../components/StatusBadge';
|
||||
@@ -164,9 +164,14 @@ function InlinePolicyEditor({ certId, currentPolicyId, currentProfileId }: { cer
|
||||
const [policyId, setPolicyId] = useState(currentPolicyId);
|
||||
const [profileId, setProfileId] = useState(currentProfileId);
|
||||
|
||||
// G-1: swap from getPolicies (compliance rules, pol-*) to getRenewalPolicies
|
||||
// (lifecycle policies, rp-*). managed_certificates.renewal_policy_id FK
|
||||
// points at renewal_policies(id); the previous getPolicies call populated
|
||||
// the dropdown with pol-* IDs that would 400/23503 at the server. See also
|
||||
// OnboardingWizard.tsx:603 and CertificatesPage.tsx:53 for the sibling fixes.
|
||||
const { data: policies } = useQuery({
|
||||
queryKey: ['policies'],
|
||||
queryFn: () => getPolicies(),
|
||||
queryKey: ['renewal-policies'],
|
||||
queryFn: () => getRenewalPolicies(1, 500),
|
||||
enabled: editing,
|
||||
});
|
||||
|
||||
@@ -227,7 +232,10 @@ function InlinePolicyEditor({ certId, currentPolicyId, currentProfileId }: { cer
|
||||
className="w-full bg-white border border-surface-border rounded px-3 py-2 text-sm text-ink">
|
||||
<option value="">None</option>
|
||||
{policies?.data?.map(p => (
|
||||
<option key={p.id} value={p.id}>{p.name} ({p.type})</option>
|
||||
// G-1: RenewalPolicy has no `type` field (that was PolicyRule).
|
||||
// Show the human-readable name + renewal window so operators can
|
||||
// pick the correct lifecycle policy at a glance.
|
||||
<option key={p.id} value={p.id}>{p.name} ({p.renewal_window_days}d window)</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
@@ -372,11 +380,20 @@ export default function CertificateDetailPage() {
|
||||
);
|
||||
}
|
||||
|
||||
// Derive certificate metadata from latest version (backend doesn't include these on the cert object)
|
||||
// Derive certificate metadata from latest version. Per-issuance fields
|
||||
// (serial_number, fingerprint_sha256, key_algorithm, key_size, issued_at)
|
||||
// live on `CertificateVersion`, NOT on `ManagedCertificate` — the Go
|
||||
// domain has always been this way; the TS interface used to lie about
|
||||
// it via optional `cert.X?` declarations that always returned undefined
|
||||
// on list responses (D-5 / cat-f-ae0d06b6588f). Post-D-5 the TS type
|
||||
// makes the missing-data case explicit, and every read goes through
|
||||
// `latestVersion?.field` here.
|
||||
const latestVersion = versions?.data?.[0];
|
||||
const serialNumber = cert.serial_number || latestVersion?.serial_number;
|
||||
const fingerprintSha256 = cert.fingerprint_sha256 || latestVersion?.fingerprint_sha256;
|
||||
const issuedAt = cert.issued_at || latestVersion?.not_before;
|
||||
const serialNumber = latestVersion?.serial_number;
|
||||
const fingerprintSha256 = latestVersion?.fingerprint_sha256;
|
||||
const issuedAt = latestVersion?.not_before;
|
||||
const keyAlgorithm = latestVersion?.key_algorithm;
|
||||
const keySize = latestVersion?.key_size;
|
||||
|
||||
const days = daysUntil(cert.expires_at);
|
||||
const isRevoked = cert.status === 'Revoked';
|
||||
@@ -528,8 +545,13 @@ export default function CertificateDetailPage() {
|
||||
<InfoRow label="Fingerprint" value={
|
||||
fingerprintSha256 ? <span className="font-mono text-xs">{fingerprintSha256.slice(0, 24)}...</span> : '—'
|
||||
} />
|
||||
<InfoRow label="Key Algorithm" value={cert.key_algorithm || '—'} />
|
||||
<InfoRow label="Key Size" value={cert.key_size ? `${cert.key_size} bits` : '—'} />
|
||||
{/* D-4 (cat-f-cert_detail_page_key_render_fallback): mirror the
|
||||
latestVersion fallback used for serialNumber / fingerprintSha256
|
||||
above. Pre-D-4 these rows accessed `cert.key_algorithm` /
|
||||
`cert.key_size` directly — both phantom Certificate fields per
|
||||
D-5 (cat-f-ae0d06b6588f), so the rows always rendered '—'. */}
|
||||
<InfoRow label="Key Algorithm" value={keyAlgorithm || '—'} />
|
||||
<InfoRow label="Key Size" value={keySize != null ? `${keySize} bits` : '—'} />
|
||||
{profile?.allowed_ekus && profile.allowed_ekus.length > 0 && (
|
||||
<InfoRow label="Extended Key Usage" value={
|
||||
<div className="flex flex-wrap gap-1">
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { useState } from 'react';
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import { getCertificates, createCertificate, triggerRenewal, revokeCertificate, updateCertificate, getOwners, getTeams, getPolicies, getProfiles, getIssuers, bulkRevokeCertificates } from '../api/client';
|
||||
import { getCertificates, createCertificate, triggerRenewal, revokeCertificate, updateCertificate, getOwners, getTeams, getRenewalPolicies, getProfiles, getIssuers, bulkRevokeCertificates } from '../api/client';
|
||||
import { useAuth } from '../components/AuthProvider';
|
||||
import { REVOCATION_REASONS } from '../api/types';
|
||||
import PageHeader from '../components/PageHeader';
|
||||
@@ -48,9 +48,15 @@ function CreateCertificateModal({ onClose, onSuccess }: { onClose: () => void; o
|
||||
queryKey: ['teams', 'form'],
|
||||
queryFn: () => getTeams({ per_page: '500' }),
|
||||
});
|
||||
// G-1: swap from getPolicies (compliance rules, pol-*) to getRenewalPolicies
|
||||
// (lifecycle policies, rp-*). managed_certificates.renewal_policy_id FK
|
||||
// points at renewal_policies(id), so the dropdown must pull from that table
|
||||
// — the previous getPolicies call populated the dropdown with pol-* IDs that
|
||||
// would 400/23503 at the server. See also OnboardingWizard.tsx:603 and
|
||||
// CertificateDetailPage.tsx:169 for the sibling fixes.
|
||||
const { data: policiesResp } = useQuery({
|
||||
queryKey: ['renewal-policies', 'form'],
|
||||
queryFn: () => getPolicies({ per_page: '500' }),
|
||||
queryFn: () => getRenewalPolicies(1, 500),
|
||||
});
|
||||
const profiles = profilesResp?.data || [];
|
||||
const issuers = issuersResp?.data || [];
|
||||
|
||||
@@ -39,7 +39,10 @@ vi.mock('../api/client', () => ({
|
||||
getProfiles: vi.fn(),
|
||||
getOwners: vi.fn(),
|
||||
getTeams: vi.fn(),
|
||||
getPolicies: vi.fn(),
|
||||
// G-1: wizard populates the renewal_policy_id dropdown from
|
||||
// getRenewalPolicies (rp-* ids), not getPolicies (which returns compliance
|
||||
// rules with pol-* ids and violates the FK).
|
||||
getRenewalPolicies: vi.fn(),
|
||||
createIssuer: vi.fn(),
|
||||
testIssuerConnection: vi.fn(),
|
||||
createCertificate: vi.fn(),
|
||||
@@ -85,7 +88,9 @@ function stubAllQueriesEmpty() {
|
||||
vi.mocked(client.getTeams).mockResolvedValue({
|
||||
data: [], total: 0, page: 1, per_page: 500,
|
||||
} as never);
|
||||
vi.mocked(client.getPolicies).mockResolvedValue({
|
||||
// G-1: wizard populates renewal_policy_id from getRenewalPolicies, not
|
||||
// getPolicies. See comment on the mock factory above.
|
||||
vi.mocked(client.getRenewalPolicies).mockResolvedValue({
|
||||
data: [], total: 0, page: 1, per_page: 500,
|
||||
} as never);
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ import { useState } from 'react';
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { useNavigate, Link } from 'react-router-dom';
|
||||
import {
|
||||
getIssuers, getAgents, getProfiles, getOwners, getTeams, getPolicies,
|
||||
getIssuers, getAgents, getProfiles, getOwners, getTeams, getRenewalPolicies,
|
||||
createIssuer, testIssuerConnection,
|
||||
createCertificate, triggerRenewal,
|
||||
createTeam, createOwner,
|
||||
@@ -600,7 +600,11 @@ function CertificateStep({ onNext, onSkip, createdIssuerId }: {
|
||||
const { data: agents } = useQuery({ queryKey: ['agents'], queryFn: () => getAgents() });
|
||||
const { data: owners } = useQuery({ queryKey: ['owners'], queryFn: () => getOwners({ per_page: '500' }) });
|
||||
const { data: teams } = useQuery({ queryKey: ['teams'], queryFn: () => getTeams({ per_page: '500' }) });
|
||||
const { data: policies } = useQuery({ queryKey: ['renewal-policies'], queryFn: () => getPolicies({ per_page: '500' }) });
|
||||
// G-1: bind renewal_policy_id dropdown to /api/v1/renewal-policies (rp-* IDs
|
||||
// from the renewal_policies table). Previously populated from getPolicies()
|
||||
// which returned compliance rules (pol-* IDs) and violated the FK
|
||||
// managed_certificates.renewal_policy_id → renewal_policies(id) on submit.
|
||||
const { data: policies } = useQuery({ queryKey: ['renewal-policies'], queryFn: () => getRenewalPolicies(1, 500) });
|
||||
|
||||
const hasAgents = (agents?.data?.length ?? 0) > 0;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user