mirror of
https://github.com/shankar0123/certctl.git
synced 2026-06-07 21:41:39 +00:00
efea4d0e03
The bundled `docker-compose.yml` started the `certctl-agent` service
without setting `CERTCTL_AGENT_ID`. `cmd/agent/main.go:1297-1300`
fails fast on missing AGENT_ID with "Error: -agent-id flag or
CERTCTL_AGENT_ID env var is required", which sends the container
into a silent restart loop on every fresh `docker compose up`.
Latent since commit d395776 (2026-03-14), which added the env-var
contract on the agent side but never wired a pre-seeded matching
row + env injection on the compose side. The integration test
compose (`docker-compose.test.yml`) does set CERTCTL_AGENT_ID +
seed agent-test-01 via seed_test.sql, which is why CI didn't
surface the bug. Caught when an external operator first cloned
dev/auth-bundle-1 to test Bundle 1.
Closure mirrors the integration-test pattern:
* migrations/seed_demo.sql pre-seeds an `agent-demo-1` row
alongside the existing server-scanner sentinel. ON CONFLICT
(id) DO NOTHING preserves idempotency. api_key_hash is a
no-auth placeholder since demo runs with CERTCTL_AUTH_TYPE=none
(synthetic actor-demo-anon covers every request).
* deploy/docker-compose.yml certctl-server: add
CERTCTL_DEMO_SEED=true so the demo seed (which holds the
agent-demo-1 row + the rest of the demo fixtures) actually
runs in the bundled compose. The compose is already a demo
posture (CERTCTL_AUTH_TYPE=none + CERTCTL_KEYGEN_MODE=server),
so this is consistent. docker-compose.demo.yml still works
(it sets the same flag) and stays for backward compat.
* deploy/docker-compose.yml certctl-agent: set
CERTCTL_AGENT_ID=agent-demo-1 (overridable via env) so the
agent finds its row on first heartbeat.
* Makefile qa-stats: agents-table count bumped 12 -> 13.
Production deploys are unaffected: they override CERTCTL_AUTH_TYPE,
CERTCTL_KEYGEN_MODE, CERTCTL_DEMO_SEED, and CERTCTL_AGENT_ID with
their own compose. The agent is registered via
POST /api/v1/agents and the returned ID is plugged into
CERTCTL_AGENT_ID per docs/operator/installation.md.
Verified path: `docker compose -f deploy/docker-compose.yml up
--build` boots green; certctl-agent reaches Online state on the
first heartbeat; `curl --cacert ... https://localhost:8443/api/v1/agents`
returns agent-demo-1 with status Online instead of an empty list.
242 lines
9.7 KiB
YAML
242 lines
9.7 KiB
YAML
services:
|
|
# HTTPS-Everywhere Phase 3 — self-signed TLS bootstrap (init container).
|
|
# Generates a CN=certctl-server ECDSA-P256 (SHA-256 signature) cert with
|
|
# the SAN list locked by milestone §3.6 on first boot; subsequent boots
|
|
# see the cert already present in the `certs` named volume and no-op out.
|
|
# Server + agent mount the volume read-only. Destroy via `docker compose
|
|
# down -v` to force regeneration. This bootstrap is for docker-compose
|
|
# demos and local dev only; Helm operators supply a Secret / cert-manager
|
|
# Certificate per docs/tls.md.
|
|
#
|
|
# Rationale for ECDSA-P256 (was ed25519 pre-v2.0.48): Apple's TLS stack
|
|
# — Safari Network Framework and the macOS-bundled LibreSSL 3.3.6
|
|
# /usr/bin/curl — does not advertise ed25519 in the ClientHello
|
|
# signature_algorithms extension for server certs, yielding "tls: peer
|
|
# doesn't support any of the certificate's signature algorithms" at
|
|
# handshake. ECDSA-P256 with SHA-256 is universally supported. See
|
|
# docs/tls.md Pattern 1.
|
|
certctl-tls-init:
|
|
image: alpine/openssl:latest
|
|
container_name: certctl-tls-init
|
|
restart: "no"
|
|
entrypoint: /bin/sh
|
|
command:
|
|
- -c
|
|
- |
|
|
set -eu
|
|
CERT=/etc/certctl/tls/server.crt
|
|
KEY=/etc/certctl/tls/server.key
|
|
CA=/etc/certctl/tls/ca.crt
|
|
if [ -f "$$CERT" ] && [ -f "$$KEY" ] && [ -f "$$CA" ]; then
|
|
echo "TLS cert already present at $$CERT — skipping generation"
|
|
else
|
|
mkdir -p /etc/certctl/tls
|
|
openssl req -x509 -newkey ec \
|
|
-pkeyopt ec_paramgen_curve:P-256 \
|
|
-nodes \
|
|
-keyout "$$KEY" \
|
|
-out "$$CERT" \
|
|
-days 3650 \
|
|
-subj "/CN=certctl-server" \
|
|
-addext "subjectAltName=DNS:certctl-server,DNS:localhost,IP:127.0.0.1,IP:::1"
|
|
cp "$$CERT" "$$CA"
|
|
echo "Generated self-signed TLS cert for certctl-server (ECDSA-P256/SHA-256, 3650d, CN=certctl-server)"
|
|
fi
|
|
# certctl binary runs as UID 1000 inside the server container per
|
|
# Dockerfile:64-65; the cert + key must be readable by that UID.
|
|
chown 1000:1000 "$$CERT" "$$KEY" "$$CA"
|
|
chmod 0644 "$$CERT" "$$CA"
|
|
chmod 0600 "$$KEY"
|
|
volumes:
|
|
- certs:/etc/certctl/tls
|
|
networks:
|
|
- certctl-network
|
|
|
|
# PostgreSQL database
|
|
#
|
|
# U-3 (P1, cat-u-seed_initdb_schema_drift, GitHub #10):
|
|
# Pre-U-3 this stack mounted a hand-curated subset of `migrations/*.up.sql`
|
|
# plus `seed.sql` into `/docker-entrypoint-initdb.d/`, and postgres
|
|
# initdb-applied them on first boot. The mount list rotted every time a
|
|
# new migration shipped that the seed depended on (000013 added
|
|
# policy_rules.severity, 000017 renames retry_interval_minutes, etc.) —
|
|
# initdb crashed, the container reported `unhealthy` indefinitely, and
|
|
# `docker compose -f deploy/docker-compose.yml up -d --build` from a
|
|
# fresh clone of v2.0.50 hit it on the first try.
|
|
#
|
|
# Post-U-3 the schema is built EXCLUSIVELY by the server at startup via
|
|
# internal/repository/postgres.RunMigrations + RunSeed. Single source of
|
|
# truth, no list to keep in sync. Postgres comes up empty; the server
|
|
# waits for it healthy, then applies the full migration ladder + seed in
|
|
# one shot. Helm + the dev examples were already runtime-only (Path B)
|
|
# and worked through the same window.
|
|
#
|
|
# `start_period: 30s` gives postgres room to bootstrap on slow runners
|
|
# (CI macOS, low-spec laptops) before the healthcheck failure counter
|
|
# starts ticking. Pre-U-3 a slow first-init combined with the
|
|
# `unhealthy` flap to cascade into certctl-server's `service_healthy`
|
|
# depends_on, blocking the whole stack.
|
|
postgres:
|
|
image: postgres:16-alpine
|
|
container_name: certctl-postgres
|
|
environment:
|
|
POSTGRES_DB: certctl
|
|
POSTGRES_USER: certctl
|
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-certctl}
|
|
ports:
|
|
- "5432:5432"
|
|
volumes:
|
|
- postgres_data:/var/lib/postgresql/data
|
|
networks:
|
|
- certctl-network
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U certctl -d certctl"]
|
|
interval: 5s
|
|
timeout: 5s
|
|
retries: 5
|
|
start_period: 30s
|
|
restart: unless-stopped
|
|
|
|
# Certctl Server (API + scheduler)
|
|
certctl-server:
|
|
build:
|
|
context: ..
|
|
dockerfile: Dockerfile
|
|
# Proxy propagation (M-4, Issue #9) — forwards host shell's proxy env
|
|
# vars into the Docker build so the Node frontend stage and Go module
|
|
# download can reach the public registries behind corporate proxies.
|
|
# Defaults to empty; omit the variables from the host environment for
|
|
# un-proxied builds and the behaviour is byte-identical to the pre-fix
|
|
# tree.
|
|
args:
|
|
HTTP_PROXY: ${HTTP_PROXY:-}
|
|
HTTPS_PROXY: ${HTTPS_PROXY:-}
|
|
NO_PROXY: ${NO_PROXY:-}
|
|
container_name: certctl-server
|
|
depends_on:
|
|
postgres:
|
|
condition: service_healthy
|
|
certctl-tls-init:
|
|
condition: service_completed_successfully
|
|
environment:
|
|
# Bundle B / Audit M-018 (PCI-DSS Req 4 / CWE-319): in-cluster Postgres
|
|
# on the docker bridge network keeps sslmode=disable acceptable; for
|
|
# external/managed Postgres operators MUST override CERTCTL_DATABASE_URL
|
|
# with sslmode=verify-full and provide the CA bundle. See docs/database-tls.md.
|
|
CERTCTL_DATABASE_URL: ${CERTCTL_DATABASE_URL:-postgres://certctl:${POSTGRES_PASSWORD:-certctl}@postgres:5432/certctl?sslmode=disable}
|
|
CERTCTL_SERVER_HOST: 0.0.0.0
|
|
CERTCTL_SERVER_PORT: 8443
|
|
CERTCTL_SERVER_TLS_CERT_PATH: /etc/certctl/tls/server.crt
|
|
CERTCTL_SERVER_TLS_KEY_PATH: /etc/certctl/tls/server.key
|
|
CERTCTL_LOG_LEVEL: info
|
|
CERTCTL_AUTH_TYPE: none
|
|
CERTCTL_KEYGEN_MODE: server # Demo uses server-side keygen; production should use "agent"
|
|
CERTCTL_NETWORK_SCAN_ENABLED: "true" # Enable network scan GUI with seeded demo targets
|
|
CERTCTL_CONFIG_ENCRYPTION_KEY: ${CERTCTL_CONFIG_ENCRYPTION_KEY:-change-me-32-char-encryption-key} # AES-256-GCM for dynamic issuer/target config
|
|
# Bundle 1 follow-on: this compose IS the bundled demo path
|
|
# (CERTCTL_AUTH_TYPE=none + KEYGEN_MODE=server above), so the
|
|
# demo seed runs by default. seed_demo.sql pre-seeds the
|
|
# agent-demo-1 row that the bundled certctl-agent below needs
|
|
# to authenticate. The docker-compose.demo.yml overlay still
|
|
# works (it sets the same flag) and remains for backward
|
|
# compat. Production deploys override CERTCTL_AUTH_TYPE +
|
|
# KEYGEN_MODE + DEMO_SEED via their own compose.
|
|
CERTCTL_DEMO_SEED: "true"
|
|
ports:
|
|
- "8443:8443"
|
|
volumes:
|
|
- certs:/etc/certctl/tls:ro
|
|
networks:
|
|
- certctl-network
|
|
healthcheck:
|
|
test: ["CMD", "curl", "--cacert", "/etc/certctl/tls/ca.crt", "-f", "https://localhost:8443/health"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
# U-3: server boot now does RunMigrations + RunSeed before listening on
|
|
# 8443. On a fresh clone the full migration ladder + seed application
|
|
# can take ~10s on a small VM; start_period prevents the first few
|
|
# healthcheck attempts from counting as failures while that work runs.
|
|
start_period: 30s
|
|
restart: unless-stopped
|
|
logging:
|
|
driver: "json-file"
|
|
options:
|
|
max-size: "10m"
|
|
max-file: "3"
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
cpus: '1.0'
|
|
memory: 512M
|
|
|
|
# Certctl Agent
|
|
certctl-agent:
|
|
build:
|
|
context: ..
|
|
dockerfile: Dockerfile.agent
|
|
# Proxy propagation (M-4, Issue #9) — forwards host shell's proxy env
|
|
# vars into the Docker build so the Go module download stage can reach
|
|
# the public Go module proxy behind corporate proxies. Defaults to
|
|
# empty; omit the variables from the host environment for un-proxied
|
|
# builds and the behaviour is byte-identical to the pre-fix tree.
|
|
args:
|
|
HTTP_PROXY: ${HTTP_PROXY:-}
|
|
HTTPS_PROXY: ${HTTPS_PROXY:-}
|
|
NO_PROXY: ${NO_PROXY:-}
|
|
container_name: certctl-agent
|
|
depends_on:
|
|
certctl-server:
|
|
condition: service_healthy
|
|
environment:
|
|
CERTCTL_SERVER_URL: https://certctl-server:8443
|
|
CERTCTL_SERVER_CA_BUNDLE_PATH: /etc/certctl/tls/ca.crt
|
|
CERTCTL_API_KEY: ${CERTCTL_API_KEY:-change-me-in-production}
|
|
# Bundle 1 follow-on: pre-Bundle-1 the bundled agent had no
|
|
# CERTCTL_AGENT_ID set, hit cmd/agent/main.go's fail-fast guard
|
|
# ("agent-id flag or CERTCTL_AGENT_ID env var is required"), and
|
|
# restart-looped silently on every fresh `docker compose up`.
|
|
# Latent since 2026-03-14 (commit d395776). seed_demo.sql now
|
|
# pre-seeds the matching agents row; the demo runs with
|
|
# CERTCTL_AUTH_TYPE=none on the server so the api_key Bearer
|
|
# token is irrelevant here. Production deploys override
|
|
# CERTCTL_AGENT_ID with the value returned from
|
|
# POST /api/v1/agents during registration.
|
|
CERTCTL_AGENT_ID: ${CERTCTL_AGENT_ID:-agent-demo-1}
|
|
CERTCTL_AGENT_NAME: docker-agent
|
|
CERTCTL_LOG_LEVEL: info
|
|
CERTCTL_DISCOVERY_DIRS: /var/lib/certctl/keys # Agent scans this directory for existing certificates
|
|
volumes:
|
|
- agent_keys:/var/lib/certctl/keys
|
|
- certs:/etc/certctl/tls:ro
|
|
networks:
|
|
- certctl-network
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pgrep -f certctl-agent || exit 1"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
restart: unless-stopped
|
|
logging:
|
|
driver: "json-file"
|
|
options:
|
|
max-size: "10m"
|
|
max-file: "3"
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
cpus: '0.5'
|
|
memory: 256M
|
|
|
|
networks:
|
|
certctl-network:
|
|
driver: bridge
|
|
|
|
volumes:
|
|
postgres_data:
|
|
driver: local
|
|
agent_keys:
|
|
driver: local
|
|
certs:
|
|
driver: local
|