From c015cab2f4755b47547722e62b46b0c3066849d2 Mon Sep 17 00:00:00 2001 From: shankar0123 Date: Wed, 15 Apr 2026 00:22:57 -0400 Subject: [PATCH] docs: rewrite features.md, audit README + architecture against repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrote docs/features.md from scratch as authoritative feature inventory (1255 lines, every claim verified against source files). Audited README.md and architecture.md against repo — fixed 19 stale references: K8s Secrets status, issuer counts, dashboard page counts, CI thresholds, missing connectors in Mermaid diagrams, OpenAPI operation count, GetCACertPEM behavior, and V2/V4 roadmap accuracy. Also includes related fixes discovered during audit: - Scheduler skips expired/failed/revoked certs from auto-renewal - Seed demo expiry dates moved outside 31-day scheduler query window - Agent pages use correct last_heartbeat_at field name Co-Authored-By: Claude Opus 4.6 --- README.md | 8 +- deploy/docker-compose.demo.yml | 2 +- docs/architecture.md | 34 +- docs/features.md | 2315 +++++++++++++---------------- internal/service/renewal.go | 13 +- internal/service/renewal_test.go | 71 + migrations/seed_demo.sql | 24 +- web/src/pages/AgentDetailPage.tsx | 8 +- web/src/pages/AgentsPage.tsx | 4 +- 9 files changed, 1165 insertions(+), 1314 deletions(-) diff --git a/README.md b/README.md index 5dff5aa..e6daa70 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ For the full capability breakdown — revocation infrastructure (CRL + OCSP), po | SSH (Agentless) | Beta | `SSH` | | Windows Cert Store | Implemented | `WinCertStore` | | Java Keystore | Implemented | `JavaKeystore` | -| Kubernetes Secrets | Coming in 2.1 | `KubernetesSecrets` | +| Kubernetes Secrets | Implemented | `KubernetesSecrets` | ### Notifiers | Notifier | Status | Type | @@ -166,7 +166,7 @@ docker compose -f deploy/docker-compose.yml up -d --build Wait ~30 seconds, then open **http://localhost:8443** in your browser. The onboarding wizard walks you through connecting a CA, deploying an agent, and issuing your first certificate. -**Want a pre-populated demo instead?** Add the demo override to see 32 certificates across 7 issuers, 8 agents, and 180 days of realistic history: +**Want a pre-populated demo instead?** Add the demo override to see 32 certificates across 10 issuers, 8 agents, and 180 days of realistic history: ```bash docker compose -f deploy/docker-compose.yml -f deploy/docker-compose.demo.yml up -d --build @@ -313,13 +313,13 @@ Core lifecycle management — Local CA + ACME v2 issuers, NGINX target connector ### V2: Operational Maturity — Shipped 30+ milestones, extensively tested with CI-enforced coverage gates. Sub-CA mode, ACME DNS-01/DNS-PERSIST-01, step-ca, Vault PKI, DigiCert CertCentral, OpenSSL/Custom CA issuers. NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS targets. RFC 5280 revocation with CRL + OCSP. Certificate profiles, ownership tracking, approval workflows. Filesystem and network certificate discovery. Prometheus metrics, dashboard charts, agent fleet overview. EST server (RFC 7030), ACME ARI (RFC 9773), certificate export, S/MIME support, Helm chart, MCP server, CLI, scheduled digest emails. Slack, Teams, PagerDuty, OpsGenie, SMTP notifications. Compliance mapping (SOC 2, PCI-DSS 4.0, NIST SP 800-57). See the [Feature Inventory](docs/features.md) for details. -**Coming in v2.1.0:** Dynamic issuer and target configuration via GUI (no env var restarts), first-run onboarding wizard. +Dynamic issuer and target configuration via GUI (no env var restarts), first-run onboarding wizard, Sectigo SCM, Google CAS, AWS ACM Private CA issuers, IIS (WinRM), F5 BIG-IP, SSH, Windows Certificate Store, Java Keystore, and Kubernetes Secrets target connectors. ### V3: certctl Pro Team access controls and identity provider integration (OIDC/SSO). Role-based access control with profile-gating. Event-driven architecture (NATS) with real-time operational views. Advanced search DSL, compliance and risk scoring, bulk fleet operations. ### V4+: Cloud, Scale & Passive Discovery -Passive network discovery (TLS listener), Kubernetes integration (cert-manager external issuer, Secrets target), cloud infrastructure targets (AWS ALB/ACM, Azure Key Vault), extended CA support (Entrust, GlobalSign, EJBCA), and platform-scale features (Terraform provider, multi-tenancy, HSM support). +Passive network discovery (TLS listener), Kubernetes cert-manager external issuer, cloud infrastructure targets (AWS ALB/CloudFront, Azure Key Vault/App Service), extended CA support (Entrust, GlobalSign, EJBCA), cloud secret manager discovery (AWS Secrets Manager, Azure Key Vault, GCP Secret Manager), and platform-scale features (Terraform provider, multi-tenancy, HSM support). ## License diff --git a/deploy/docker-compose.demo.yml b/deploy/docker-compose.demo.yml index 3e80c10..22d0aa4 100644 --- a/deploy/docker-compose.demo.yml +++ b/deploy/docker-compose.demo.yml @@ -1,4 +1,4 @@ -# Demo mode: pre-populated dashboard with 15 certificates, 5 agents, issuers, etc. +# Demo mode: pre-populated dashboard with 32 certificates, 8 agents, 10 issuers, etc. # Use this to showcase certctl's dashboard with realistic data. # # Usage: diff --git a/docs/architecture.md b/docs/architecture.md index ebc729a..2be4ce8 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -82,6 +82,9 @@ flowchart TB CA4["OpenSSL / Custom CA\n(script-based)"] CA6["Vault PKI\n(token auth, /sign API)"] CA7["DigiCert CertCentral\n(async order model)"] + CA8["Sectigo SCM\n(async order model)"] + CA9["Google CAS\n(OAuth2, sync)"] + CA10["AWS ACM PCA\n(sync issuance)"] end subgraph "Target Systems" @@ -95,6 +98,9 @@ flowchart TB T2["F5 BIG-IP\n(proxy agent + iControl REST)"] T3["IIS\n(WinRM + local)"] T10["SSH\n(SFTP + reload)"] + T11["WinCertStore\n(PowerShell import)"] + T12["Java Keystore\n(keytool pipeline)"] + T13["Kubernetes Secrets\n(K8s API)"] end DASH --> API @@ -102,7 +108,7 @@ flowchart TB SVC --> REPO REPO --> PG SCHED --> SVC - SVC -->|"Issue/Renew"| CA1 & CA2 & CA3 & CA4 & CA6 & CA7 + SVC -->|"Issue/Renew"| CA1 & CA2 & CA3 & CA4 & CA6 & CA7 & CA8 & CA9 & CA10 A1 & A2 & A3 -->|"CSR + Heartbeat"| API API -->|"Cert + Chain\n(NO private key)"| A1 & A2 & A3 @@ -122,7 +128,7 @@ The server exposes a REST API under `/api/v1/` and optionally serves the web das ### Agents -Lightweight Go processes that run on or near your infrastructure. Agents generate ECDSA P-256 private keys locally, create CSRs, and submit them to the control plane for signing — private keys never leave agent infrastructure. Agents also handle certificate deployment to target systems (NGINX, Apache httpd, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS, F5 BIG-IP, SSH, Windows Certificate Store, Java Keystore) and report job status. They communicate with the control plane via HTTP and authenticate with API keys. +Lightweight Go processes that run on or near your infrastructure. Agents generate ECDSA P-256 private keys locally, create CSRs, and submit them to the control plane for signing — private keys never leave agent infrastructure. Agents also handle certificate deployment to target systems (NGINX, Apache httpd, HAProxy, Traefik, Caddy, Envoy, Postfix, Dovecot, IIS, F5 BIG-IP, SSH, Windows Certificate Store, Java Keystore, Kubernetes Secrets) and report job status. They communicate with the control plane via HTTP and authenticate with API keys. The agent runs two background loops: a heartbeat (every 60 seconds) to signal it's alive, and a work poll (every 30 seconds) to check for actionable jobs via `GET /api/v1/agents/{id}/work`. Jobs may be `AwaitingCSR` (agent needs to generate key + submit CSR) or `Deployment` (agent needs to deploy a certificate). Private keys are stored in `CERTCTL_KEY_DIR` (default `/var/lib/certctl/keys`) with 0600 permissions. @@ -134,7 +140,7 @@ The agent runs two background loops: a heartbeat (every 60 seconds) to signal it The web dashboard is the primary operational interface for certctl. It is built with Vite + React + TypeScript and uses TanStack Query for server state management (caching, background refetching, optimistic updates). -**Current views** (21 pages): certificate inventory (list with multi-select bulk operations + "New Certificate" creation modal + detail with deployment status timeline, inline policy/profile editor, version history, deploy, revoke, archive, and trigger renewal actions), agent fleet (list + detail with system info + OS/architecture grouping with charts), job queue (status, retry, cancel, approve/reject for AwaitingApproval jobs), notification inbox (threshold alert grouping, mark-as-read), audit trail (time range, actor, action filters + CSV/JSON export), policy management (rules with enable/disable toggle + delete + violations), issuers (list with test connection + delete), targets (list with 3-step configuration wizard + delete), owners (list with team resolution + delete), teams (list with delete), agent groups (list with dynamic match criteria badges + enable/disable + delete), certificate profiles (list with crypto constraints), short-lived credentials dashboard (TTL countdown, profile filtering, auto-refresh), discovered certificates triage (claim/dismiss unmanaged certs discovered by agents or network scans), network scan targets management (CRUD for network scan targets + Scan Now button), summary dashboard with charts (expiration heatmap, renewal success rate, status distribution, issuance rate), and login page. +**Current views** (24 pages): certificate inventory (list with multi-select bulk operations + "New Certificate" creation modal + detail with deployment status timeline, inline policy/profile editor, version history, deploy, revoke, archive, and trigger renewal actions), agent fleet (list + detail with system info + OS/architecture grouping with charts), job queue (list + detail with verification section, timeline, audit events; approve/reject for AwaitingApproval jobs), notification inbox (threshold alert grouping, mark-as-read), audit trail (time range, actor, action filters + CSV/JSON export), policy management (rules with enable/disable toggle + delete + violations), issuers (catalog with 10 type cards + 3-step create wizard + detail with test connection), targets (list with 3-step configuration wizard + detail with deployment history), owners (list with team resolution + delete), teams (list with delete), agent groups (list with dynamic match criteria badges + enable/disable + delete), certificate profiles (list with crypto constraints), short-lived credentials dashboard (TTL countdown, profile filtering, auto-refresh), discovered certificates triage (claim/dismiss unmanaged certs discovered by agents or network scans), network scan targets management (CRUD + Scan Now button), summary dashboard with charts (expiration heatmap, renewal success rate, status distribution, issuance rate), digest preview and send, observability (health, metrics, Prometheus config), and login page. The dashboard includes an **ErrorBoundary component** for graceful error recovery — if a view crashes, the boundary catches the error and displays a user-friendly message instead of breaking the entire dashboard. It also includes a **demo mode** that activates when the API is unreachable — it renders realistic mock data for screenshots and offline presentations. @@ -510,12 +516,13 @@ flowchart TB II["IssuerConnector Interface\nIssueCertificate() | RenewCertificate()\nRevokeCertificate() | GetOrderStatus()"] II --> LC["Local CA"] II --> ACME["ACME v2"] - II --> SC["step-ca"] + II --> SCA["step-ca"] II --> OC["OpenSSL / Custom CA"] II --> VP["Vault PKI"] II --> DC["DigiCert CertCentral"] II --> SG["Sectigo SCM"] II --> GC["Google CAS"] + II --> AP2["AWS ACM PCA"] end subgraph "Target Connectors" @@ -530,7 +537,10 @@ flowchart TB TI --> PO["Postfix/Dovecot"] TI --> IIS["IIS"] TI --> F5["F5 BIG-IP"] - TI --> SC["SSH"] + TI --> SSH["SSH"] + TI --> WCS["WinCertStore"] + TI --> JKS["Java Keystore"] + TI --> K8S["K8s Secrets"] end subgraph "Notifier Connectors" @@ -582,7 +592,7 @@ type Connector interface { } ``` -Built-in issuers: **Local CA** (self-signed or sub-CA mode using `crypto/x509`), **ACME v2** (HTTP-01, DNS-01, and DNS-PERSIST-01 challenges, compatible with Let's Encrypt, ZeroSSL, Sectigo, Google Trust Services, and any ACME-compliant CA), **step-ca** (Smallstep private CA via native /sign API with JWK provisioner auth), **OpenSSL/Custom CA** (script-based signing delegating to user-provided shell scripts), **Vault PKI** (HashiCorp Vault's PKI secrets engine via /sign API with token auth), and **DigiCert** (commercial CA via CertCentral REST API with async order processing). The ACME connector uses `golang.org/x/crypto/acme`, generates an ECDSA P-256 account key, handles account registration with ToS acceptance and optional External Account Binding (EAB) for CAs that require it (ZeroSSL, Google Trust Services, SSL.com), order creation, challenge solving (HTTP-01 via built-in server, DNS-01 via script-based hooks, DNS-PERSIST-01 via standing TXT records with auto-fallback to DNS-01), order finalization, and DER-to-PEM chain conversion. For ZeroSSL, EAB credentials are auto-fetched from ZeroSSL's public API when the directory URL is detected as ZeroSSL and no EAB credentials are provided — zero-friction onboarding with no dashboard visit required. +Built-in issuers (9 connectors): **Local CA** (self-signed or sub-CA mode using `crypto/x509`), **ACME v2** (HTTP-01, DNS-01, and DNS-PERSIST-01 challenges, compatible with Let's Encrypt, ZeroSSL, Sectigo, Google Trust Services, and any ACME-compliant CA), **step-ca** (Smallstep private CA via native /sign API with JWK provisioner auth), **OpenSSL/Custom CA** (script-based signing delegating to user-provided shell scripts), **Vault PKI** (HashiCorp Vault's PKI secrets engine via /sign API with token auth), **DigiCert** (commercial CA via CertCentral REST API with async order processing), **Sectigo SCM** (async order model with 3-header auth), **Google CAS** (Cloud Certificate Authority Service with OAuth2 service account auth), and **AWS ACM Private CA** (synchronous issuance via ACM PCA API). The ACME connector uses `golang.org/x/crypto/acme`, generates an ECDSA P-256 account key, handles account registration with ToS acceptance and optional External Account Binding (EAB) for CAs that require it (ZeroSSL, Google Trust Services, SSL.com), order creation, challenge solving (HTTP-01 via built-in server, DNS-01 via script-based hooks, DNS-PERSIST-01 via standing TXT records with auto-fallback to DNS-01), order finalization, and DER-to-PEM chain conversion. For ZeroSSL, EAB credentials are auto-fetched from ZeroSSL's public API when the directory URL is detected as ZeroSSL and no EAB credentials are provided — zero-friction onboarding with no dashboard visit required. **ACME Renewal Information (ARI, RFC 9773):** The ACME connector supports CA-directed renewal timing via the `GetRenewalInfo()` method. Instead of using fixed thresholds (e.g., renew 30 days before expiry), the CA tells certctl when to renew by providing a `suggestedWindow` with start and end times. This is useful for distributing renewal load during maintenance windows and coordinating mass-revocation scenarios. Enable with `CERTCTL_ACME_ARI_ENABLED=true`. Cert ID is computed as `base64url(SHA-256(DER cert))` per RFC 9773. If the CA doesn't support ARI (404 from the ARI endpoint), certctl automatically falls back to threshold-based renewal — no operator intervention required. Errors from the CA are logged as warnings. @@ -602,11 +612,11 @@ type Connector interface { The `DeploymentRequest` struct carries the full material needed by the target system: the signed certificate, the CA chain, the agent-generated private key, target-specific configuration, and arbitrary metadata. The key field is populated by the agent from its local key store (`CERTCTL_KEY_DIR`) — it never originates from the control plane. -Built-in targets: **NGINX** (writes cert/chain/key files, validates with `nginx -t`, reloads), **Apache httpd** (writes cert/chain/key files, validates with `apachectl configtest`, graceful reload), **HAProxy** (combined PEM file with cert+chain+key, validates config, reloads via systemctl/signal), **Traefik** (file provider — writes cert/key to watched directory, Traefik auto-reloads), **Caddy** (dual-mode: admin API hot-reload or file-based), **Envoy** (file-based with optional SDS JSON config), **F5 BIG-IP** (proxy agent + iControl REST, transaction-based atomic SSL profile updates), **IIS** (dual-mode: agent-local PowerShell + proxy agent WinRM for agentless targets), **Postfix/Dovecot** (file write + service reload), **SSH** (agentless deployment via SSH/SFTP), **Windows Certificate Store** (PowerShell-based cert import, dual-mode local/WinRM), **Java Keystore** (PEM → PKCS#12 → keytool pipeline, JKS and PKCS12 formats). +Built-in targets (14 connector types): **NGINX** (writes cert/chain/key files, validates with `nginx -t`, reloads), **Apache httpd** (writes cert/chain/key files, validates with `apachectl configtest`, graceful reload), **HAProxy** (combined PEM file with cert+chain+key, validates config, reloads via systemctl/signal), **Traefik** (file provider — writes cert/key to watched directory, Traefik auto-reloads), **Caddy** (dual-mode: admin API hot-reload or file-based), **Envoy** (file-based with optional SDS JSON config), **F5 BIG-IP** (proxy agent + iControl REST, transaction-based atomic SSL profile updates), **IIS** (dual-mode: agent-local PowerShell + proxy agent WinRM for agentless targets), **Postfix/Dovecot** (file write + service reload), **SSH** (agentless deployment via SSH/SFTP), **Windows Certificate Store** (PowerShell-based cert import, dual-mode local/WinRM), **Java Keystore** (PEM → PKCS#12 → keytool pipeline, JKS and PKCS12 formats), **Kubernetes Secrets** (deploys as `kubernetes.io/tls` Secrets via injectable K8sClient interface, in-cluster or kubeconfig auth). After deployment, agents can perform **post-deployment TLS verification**: the agent probes the live TLS endpoint using `crypto/tls.DialWithDialer` and compares the SHA-256 fingerprint of the served certificate against what was deployed. Results are reported via `POST /api/v1/jobs/{id}/verify` and stored on the job record. Verification is best-effort — failures don't block or rollback deployments. -The SSH connector enables agentless deployment to any Linux/Unix server via SSH/SFTP, using the proxy agent pattern. Additional cloud, network, and Kubernetes target connectors are planned for future releases. +The SSH connector enables agentless deployment to any Linux/Unix server via SSH/SFTP, using the proxy agent pattern. The Kubernetes Secrets connector deploys certificates as `kubernetes.io/tls` Secrets via an injectable K8sClient interface supporting both in-cluster and out-of-cluster auth. ### Notifier Connector @@ -659,7 +669,7 @@ type ESTService interface { } ``` -**Issuer connector extension:** EST required adding `GetCACertPEM(ctx) (string, error)` to the issuer connector interface so the `/cacerts` endpoint can serve the CA chain. The Local CA connector returns its CA certificate PEM; ACME, step-ca, OpenSSL, Vault, and DigiCert connectors return errors (they don't expose a static CA chain — their chains are per-issuance). +**Issuer connector extension:** EST required adding `GetCACertPEM(ctx) (string, error)` to the issuer connector interface so the `/cacerts` endpoint can serve the CA chain. The Local CA returns its CA certificate PEM; Vault PKI fetches via `GET /v1/{mount}/ca/pem`; Google CAS fetches via API; AWS ACM PCA retrieves via `GetCertificateAuthorityCertificate`. ACME, step-ca, OpenSSL, DigiCert, and Sectigo connectors return errors (they don't expose a static CA chain — their chains are per-issuance). **Audit:** Every EST enrollment is recorded in the audit trail with `protocol: "EST"`, the CN, SANs, issuer ID, serial number, and optional profile ID. @@ -782,7 +792,7 @@ All endpoints are under `/api/v1/` and follow consistent patterns: Resources: certificates, issuers, targets, agents, jobs, policies, profiles, teams, owners, agent-groups, audit, notifications, discovered-certificates, discovery-scans, network-scan-targets, stats, metrics. -The full API is documented in an OpenAPI 3.1 specification at `api/openapi.yaml` with 99 endpoints across 23 resource domains (97 under `/api/v1/` + `/.well-known/est/` plus `/health` and `/ready`; includes auth, 7 discovery endpoints from M18b, 6 network scan endpoints from M21, Prometheus metrics from M22, 4 EST enrollment endpoints from M23, 2 digest endpoints from M29), all request/response schemas, and pagination conventions. See the [OpenAPI Guide](openapi.md) for usage with Swagger UI and SDK generation. +The full API is documented in an OpenAPI 3.1 specification at `api/openapi.yaml` with 97 operations across `/api/v1/` and `/.well-known/est/` (includes auth, 7 discovery endpoints, 6 network scan endpoints, Prometheus metrics, 4 EST enrollment endpoints, 2 digest endpoints, 2 verification endpoints, 2 export endpoints), all request/response schemas, and pagination conventions. The server also registers `/health` and `/ready` outside the OpenAPI spec, bringing the total route count to 107. See the [OpenAPI Guide](openapi.md) for usage with Swagger UI and SDK generation. Jobs support additional action endpoints: `POST /api/v1/jobs/{id}/cancel`, `POST /api/v1/jobs/{id}/approve`, `POST /api/v1/jobs/{id}/reject`. @@ -978,13 +988,13 @@ certctl is extensively tested across eight layers with CI-enforced coverage gate **Frontend tests** (`web/src/api/`) — Vitest tests covering the full API client (all endpoint functions with fetch mocking), stats/metrics endpoints, utility functions, and auth flows. Test environment uses jsdom with `@testing-library/jest-dom` matchers. -**Connector tests** (`internal/connector/`) — Issuer connectors (Local CA self-signed/sub-CA modes, ACME DNS-01/DNS-PERSIST-01, step-ca, OpenSSL, Vault PKI, DigiCert, Sectigo, Google CAS — all with httptest mock servers). Target connectors (NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, IIS with mock PowerShell executor, F5 BIG-IP with mock iControl client, Postfix/Dovecot, SSH with mock SSH client). Notifier connectors (Slack, Teams, PagerDuty, OpsGenie). +**Connector tests** (`internal/connector/`) — Issuer connectors (Local CA self-signed/sub-CA modes, ACME DNS-01/DNS-PERSIST-01, step-ca, OpenSSL, Vault PKI, DigiCert, Sectigo, Google CAS, AWS ACM PCA — all with httptest mock servers or injectable interface mocks). Target connectors (NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, IIS with mock PowerShell executor, F5 BIG-IP with mock iControl client, Postfix/Dovecot, SSH with mock SSH client, Windows Certificate Store with mock PowerShell executor, Java Keystore with mock command executor, Kubernetes Secrets with mock K8s client, shared certutil package). Notifier connectors (Slack, Teams, PagerDuty, OpsGenie). **Scheduler tests** (`internal/scheduler/scheduler_test.go`) — Idempotency guards (`sync/atomic.Bool`), `WaitForCompletion` success and timeout paths, and multi-loop concurrency safety. **Fuzz tests** (`internal/validation/`, `internal/domain/`) — Go native fuzz tests for command validation (`ValidateShellCommand`, `ValidateDomainName`, `ValidateACMEToken`) and revocation domain parsing. -**CI pipeline** (`.github/workflows/ci.yml`) — Two parallel jobs. Go: build, vet, `go test -race`, `golangci-lint` (11 linters), `govulncheck`, test with coverage, per-layer coverage threshold enforcement (service 60%, handler 60%, domain 40%, middleware 50%). Frontend: TypeScript type check, Vitest, Vite production build. +**CI pipeline** (`.github/workflows/ci.yml`) — Two parallel jobs. Go: build, vet, `go test -race`, `golangci-lint` (11 linters), `govulncheck`, test with coverage, per-layer coverage threshold enforcement (service 55%, handler 60%, domain 40%, middleware 30%). Frontend: TypeScript type check, Vitest, Vite production build. For detailed test procedures, smoke tests, and the release sign-off checklist, see the [Testing Guide](testing-guide.md). For setting up the Docker Compose test environment with real CA backends, see [Test Environment](test-env.md). diff --git a/docs/features.md b/docs/features.md index dfd4f51..9aab2ee 100644 --- a/docs/features.md +++ b/docs/features.md @@ -1,1500 +1,1255 @@ -# certctl V2 Feature Inventory +# certctl Feature Inventory -Complete reference of all features shipped in the V2 release (as of March 2026). +Complete reference of every feature shipped in certctl through v2.1.0 (April 2026). Every claim in this document is verified against source code. If a number, default, or behavior isn't here, check the source file listed in the margin. + +--- + +## At a Glance + +| Metric | Count | +|---|---| +| HTTP routes | 107 (103 under `/api/v1/` + 4 EST) | +| OpenAPI 3.1 operations | 97 | +| MCP tools | 80 | +| CLI commands | 12 | +| Issuer connectors | 9 (+ EST server) | +| Target connectors | 14 | +| Notifier connectors | 6 channels | +| Database tables | 21 (across 10 migrations) | +| Background scheduler loops | 7 | +| Web dashboard pages | 24 | +| Test functions | 1850+ | +| Supported platforms | linux/amd64, linux/arm64, darwin/amd64, darwin/arm64 | --- ## API Surface -### Overview -- REST API across 23 resource domains under `/api/v1/` + `/.well-known/est/` -- REST API with HTTP semantics (GET, POST, PUT, DELETE) -- All endpoints require authentication by default (configurable) -- OpenAPI 3.1 spec with full schema documentation + -### Authentication & Security +### Authentication -Every API call requires authentication by default — this ensures that only authorized operators and agents can issue, renew, or revoke certificates. Without this, anyone with network access to the control plane could compromise your entire certificate infrastructure. +Every API call requires authentication by default. Configurable via `CERTCTL_AUTH_TYPE`. -- **API Key Authentication** — SHA-256 hashed keys with constant-time comparison -- **Bearer Token Flow** — `Authorization: Bearer {api_key}` header -- **Auth Configuration** — Configurable via `CERTCTL_AUTH_TYPE` (api-key, jwt, none) -- **Auth Info Endpoint** — `GET /api/v1/auth/info` (no auth required for GUI pre-login detection) -- **Auth Check Endpoint** — `GET /api/v1/auth/check` (validate credentials) +| Setting | Behavior | +|---|---| +| `api-key` (default) | SHA-256 hashed keys, constant-time comparison, `Authorization: Bearer {key}` | +| `none` | Disables auth with a log warning at startup | -```bash -# Authenticate with API key -curl -H "Authorization: Bearer your-api-key" http://localhost:8443/api/v1/certificates +Two endpoints are served without auth so the GUI can detect auth mode before login: -# Check auth mode (no auth required — used by GUI login page) -curl http://localhost:8443/api/v1/auth/info -# {"auth_type":"api-key"} -``` +- `GET /api/v1/auth/info` — returns `{"auth_type":"api-key"}` +- `GET /api/v1/auth/check` — validates credentials + + ### Rate Limiting -Protects the control plane from being overwhelmed by a single client — whether a misconfigured monitoring script polling every millisecond or a bug in an agent's retry logic. Without rate limiting, one misbehaving client can DoS the server for everyone. +Token bucket algorithm protecting the control plane from misbehaving clients. -- **Token Bucket Algorithm** — Configurable requests-per-second (RPS) and burst size -- **429 Responses** — Rate limit exceeded with `Retry-After` header telling clients when to retry -- **Configuration** — `CERTCTL_RATE_LIMIT_ENABLED`, `CERTCTL_RATE_LIMIT_RPS` (default 50), `CERTCTL_RATE_LIMIT_BURST` (default 100) +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_RATE_LIMIT_ENABLED` | `true` | Enable/disable | +| `CERTCTL_RATE_LIMIT_RPS` | `50` | Requests per second | +| `CERTCTL_RATE_LIMIT_BURST` | `100` | Burst capacity | + +Exceeded requests receive `429 Too Many Requests` with a `Retry-After` header. ### CORS -Required for the web dashboard to communicate with the API when served from a different origin (e.g., during development on `localhost:3000` while the API runs on `localhost:8443`). Without CORS headers, browsers block the requests silently. +Deny-by-default. Empty `CERTCTL_CORS_ORIGINS` blocks all cross-origin requests. -- **Deny-by-Default** — Empty `CERTCTL_CORS_ORIGINS` blocks all cross-origin requests (secure default) -- **Configurable Per-Origin Allowlist** — `CERTCTL_CORS_ORIGINS` (comma-separated or `*` for wildcard) -- **Preflight Caching** — Standard CORS headers with `Access-Control-Max-Age` +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_CORS_ORIGINS` | `""` (deny all) | Comma-separated origins or `*` | -### Query Features (M20) +Preflight responses include `Access-Control-Max-Age` for caching. -These features reduce API response sizes and enable efficient pagination at scale. When you have 10,000+ certificates, fetching the full object for each one on every list call wastes bandwidth and slows down dashboards. Sparse fields, cursor pagination, and sorting let clients request exactly what they need. +### Request Body Size Limits -```bash -# Sparse fields — only return id, name, and status (smaller payload) -curl -H "$AUTH" "$SERVER/api/v1/certificates?fields=id,common_name,status" + -# Sort by expiration date descending (most urgent first) -curl -H "$AUTH" "$SERVER/api/v1/certificates?sort=-notAfter" +`http.MaxBytesReader` middleware positioned before auth in the middleware chain. -# Cursor pagination — efficient for large datasets -curl -H "$AUTH" "$SERVER/api/v1/certificates?cursor=eyJpZCI6Im1jLWFwaS1wcm9kIn0&page_size=100" +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_MAX_BODY_SIZE` | `1048576` (1 MB) | Maximum request body in bytes | -# Time-range filter — certs expiring in next 30 days -curl -H "$AUTH" "$SERVER/api/v1/certificates?expires_before=2026-04-24T00:00:00Z&expires_after=2026-03-24T00:00:00Z" -``` +### Query Features -| Feature | Details | -|---------|---------| -| **Sorting** | `?sort=-notAfter` (8 fields: notAfter, expiresAt, createdAt, updatedAt, commonName, name, status, environment) | -| **Pagination (Page-Based)** | `?page=1&per_page=50` (max 500, default 50) | -| **Pagination (Cursor)** | `?cursor=base64_token&page_size=100` (keyset pagination with `next_cursor` in response) | -| **Time-Range Filters** | `?expires_before=2026-12-31T23:59:59Z&expires_after=2026-01-01T00:00:00Z&created_after=...&updated_after=...` (RFC3339 format) | -| **Sparse Fields** | `?fields=id,common_name,status` (reduce response size) | -| **Additional Filters** | `?status=active&agent_id=a-xxx&profile_id=p-xxx&issuer_id=...&owner_id=...&team_id=...` | +All list endpoints support: -### Endpoint Breakdown by Domain +- **Pagination** — page-based (`?page=2&per_page=50`) and cursor-based (`?cursor=&page_size=100`) +- **Sparse fields** — `?fields=id,common_name,status` returns only requested fields +- **Sorting** — `?sort=-notAfter` (prefix `-` for descending). Whitelist: `notAfter`, `expiresAt`, `createdAt`, `updatedAt`, `commonName`, `name`, `status`, `environment` +- **Time-range filters** — `?expires_before=`, `?expires_after=`, `?created_after=`, `?updated_after=` (RFC 3339) +- **Resource filters** — `?agent_id=`, `?profile_id=`, `?owner_id=`, `?team_id=`, `?issuer_id=`, `?status=` -| Domain | Endpoints | Key Operations | -|--------|-----------|-----------------| -| **Certificates** | 13 | List, create, get, update (archive), versions, deployments, trigger renewal, trigger deployment, revoke, export (PEM/PKCS#12) | -| **CRL & OCSP** | 3 | JSON CRL, DER CRL per issuer, OCSP responder | -| **Issuers** | 6 | List, create, get, update, delete, test connection | -| **Targets** | 5 | List, create, get, update, delete | -| **Agents** | 7 | List, register, get, heartbeat, CSR submit, certificate pickup, get work, report job status | -| **Jobs** | 5 | List, get, cancel, approve, reject | -| **Policies** | 6 | List, create, get, update, delete, list violations | -| **Profiles** | 5 | List, create, get, update, delete | -| **Teams** | 5 | List, create, get, update, delete | -| **Owners** | 5 | List, create, get, update, delete | -| **Agent Groups** | 6 | List, create, get, update, delete, list agents in group | -| **Discovery** | 7 | Submit scan results, list discovered certs, get detail, claim, dismiss, list scans, summary stats | -| **Network Scan** | 6 | List targets, create, get, update, delete, trigger scan | -| **Audit** | 3 | List events, list by resource, export (CSV/JSON) | -| **Notifications** | 3 | List, get, mark as read | -| **Stats** | 5 | Dashboard summary, certificates by status, expiration timeline, job trends, issuance rate | -| **Metrics** | 2 | JSON metrics (gauges, counters, uptime), Prometheus exposition format | -| **Verification** | 2 | Submit verification result, get verification status | -| **Digest** | 2 | Preview HTML digest, send digest immediately | -| **EST (RFC 7030)** | 4 | CA certs (PKCS#7), simple enrollment, re-enrollment, CSR attributes | -| **Health** | 4 | Health check, readiness check, auth info, auth check | + + +### API Audit Log + + + +Every API call is recorded to the immutable audit trail. Best-effort (non-blocking) via goroutine. Fields: method, path, actor (from auth context, falls back to "anonymous"), SHA-256 request body hash (truncated 16 chars), response status, latency. Health/readiness endpoints excluded via `ExcludePaths`. --- ## Certificate Lifecycle -### Certificate States (8 total) -- **Pending** — Created, awaiting issuance -- **Active** — Valid and deployed -- **Expiring** — Within configured threshold (default 30 days) -- **Expired** — Past NotAfter date -- **RenewalInProgress** — Renewal job submitted -- **Failed** — Issuance or renewal failed -- **Revoked** — Revoked via POST /api/v1/certificates/{id}/revoke -- **Archived** — Manually archived via DELETE endpoint + + +### Certificate Statuses + +| Status | Description | +|---|---| +| `Pending` | Created, awaiting issuance | +| `Active` | Issued and valid | +| `Expiring` | Within configured alert threshold | +| `Expired` | Past notAfter | +| `RenewalInProgress` | Renewal job in flight | +| `Failed` | Issuance or renewal failed | +| `Revoked` | Explicitly revoked | +| `Archived` | Superseded by newer version | ### Key Generation Modes -| Mode | Details | -|------|---------| -| **Agent-Side (Default)** | ECDSA P-256 key generation on agent; private keys never touch control plane | -| **Server-Side (Demo Only)** | RSA-2048 key generation on server; requires explicit `CERTCTL_KEYGEN_MODE=server` with log warning | -### Certificate Versions -- Multiple versions per certificate (issuance, renewal) -- Each version includes: serial number, fingerprint, PEM-encoded chain -- CSR preserved for audit trail -- Version history with rollback capability in GUI + -### AwaitingCSR Job State -- Renewal and issuance jobs pause when `CERTCTL_KEYGEN_MODE=agent` -- Agent generates ECDSA P-256 key locally, creates CSR, submits via `POST /api/v1/agents/{id}/csr` -- Server signs and stores certificate version -- Work endpoint enriched with `common_name` and `sans` for agent CSR generation +| Mode | Env Var Value | Behavior | +|---|---|---| +| Agent-side (default) | `CERTCTL_KEYGEN_MODE=agent` | Agent generates ECDSA P-256 key pair locally, submits CSR only. Private keys never leave agent infrastructure. Keys stored at `CERTCTL_KEY_DIR` (default `/var/lib/certctl/keys`) with `0600` permissions. | +| Server-side (demo only) | `CERTCTL_KEYGEN_MODE=server` | Server generates RSA key + CSR. Logs a warning at startup. Used in Docker Compose demo for convenience. | -### Deployment Trigger -Push certificates to targets on demand, outside of the normal scheduler-driven flow: +### Issuance Flow -```bash -# Deploy to all mapped targets -curl -X POST -H "$AUTH" $SERVER/api/v1/certificates/mc-api-prod/deploy +1. Certificate created (status: Pending) +2. Renewal/issuance job created (status: Pending or AwaitingCSR in agent keygen mode) +3. Agent polls `GET /agents/{id}/work`, receives job with `common_name` and `sans` +4. Agent generates ECDSA P-256 key pair, creates CSR, submits via `POST /agents/{id}/csr` +5. Server forwards CSR to issuer connector, stores signed certificate +6. Deployment jobs created for each target (scoped to assigned agent via `agent_id`) +7. Agent polls for deployment work, deploys to target connector +8. Optional: post-deployment TLS verification -# Deploy to a specific target -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/certificates/mc-api-prod/deploy \ - -d '{"target_id": "tgt-nginx-prod"}' +### Renewal -# Check deployment job status -curl -H "$AUTH" "$SERVER/api/v1/certificates/mc-api-prod/deployments" | jq '.data[] | {id, name, type}' -``` + -### Post-Deployment TLS Verification (M25) +The renewal scheduler runs every hour (configurable via `CERTCTL_RENEWAL_CHECK_INTERVAL`). For each certificate approaching expiration: -After deploying a certificate, the agent connects back to the target's live TLS endpoint and verifies the served certificate matches what was deployed — using SHA-256 fingerprint comparison. This catches failures that deployment commands can't: wrong virtual host, stale cache, config that validates but doesn't apply. +1. Checks ACME ARI (RFC 9773) if available — CA-directed renewal timing takes priority +2. Falls back to threshold-based logic using per-policy `alert_thresholds_days` (default `[30, 14, 7, 0]`) +3. Creates renewal job if thresholds are met and no duplicate job exists -```bash -# Agent submits verification result after probing the live endpoint -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/jobs/j-deploy-123/verify -d '{ - "target_id": "tgt-nginx-prod", - "expected_fingerprint": "sha256:a1b2c3...", - "actual_fingerprint": "sha256:a1b2c3...", - "verified": true -}' +### Interactive Approval -# Check verification status for a job -curl -H "$AUTH" $SERVER/api/v1/jobs/j-deploy-123/verification | jq . -``` + -| Feature | Details | -|---------|---------| -| **Verification Method** | `crypto/tls.DialWithDialer` with `InsecureSkipVerify=true` to handle self-signed and internal CA certs | -| **Fingerprint Comparison** | SHA-256 of raw certificate DER bytes | -| **Best-Effort** | Verification failures are recorded but don't block or rollback deployments | -| **Job Fields** | `verification_status` (pending/success/failed/skipped), `verified_at`, `verification_fingerprint`, `verification_error` | -| **Audit Trail** | `job_verification_success` and `job_verification_failed` events recorded | -| **Configuration** | `CERTCTL_VERIFY_DEPLOYMENT` (enable/disable), `CERTCTL_VERIFY_TIMEOUT` (TLS dial timeout), `CERTCTL_VERIFY_DELAY` (wait after deploy before probing) | +Jobs can require manual approval before execution. The `AwaitingApproval` state pauses the job until an operator acts. + +- `POST /api/v1/jobs/{id}/approve` — approve with optional reason +- `POST /api/v1/jobs/{id}/reject` — reject with reason tracking + +### Expiration Alerting + +Configurable per-policy thresholds stored as `alert_thresholds_days` JSONB (default `[30, 14, 7, 0]`). The scheduler: + +- Sends deduplicated notifications at each threshold crossing +- Transitions certificate status: Active → Expiring → Expired +- Short-lived certs (profile TTL < 1 hour) get a dedicated scheduler loop running every 30 seconds --- ## Revocation Infrastructure -When a private key is compromised or a certificate is no longer needed, revocation tells clients to stop trusting it immediately. Without revocation, a stolen certificate remains valid until it expires — which could be months. - -```bash -# Revoke a certificate (key compromise — most urgent reason) -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/certificates/mc-api-prod/revoke \ - -d '{"reason": "keyCompromise"}' - -# Check the CRL for an issuer -curl -H "$AUTH" $SERVER/api/v1/crl/iss-local | jq '.entries' - -# Query OCSP status for a specific cert -curl $SERVER/api/v1/ocsp/iss-local/ABC123DEF456 -``` + ### Revocation API -- **Endpoint** — `POST /api/v1/certificates/{id}/revoke` (RFC 5280 reason codes) -- **8 Reason Codes** — unspecified, keyCompromise, caCompromise, affiliationChanged, superseded, cessationOfOperation, certificateHold, privilegeWithdrawn -- **Best-Effort Issuer Notification** — Issuer connector failure doesn't block revocation -- **Immutable Recording** — `certificate_revocations` table with idempotent ON CONFLICT logic -### CRL (Certificate Revocation List) -- **JSON CRL** — `GET /api/v1/crl` returns entries array with serial numbers, reasons, revoked timestamps -- **DER X.509 CRL** — `GET /api/v1/crl/{issuer_id}` returns proper DER-encoded CRL signed by issuing CA -- **24-Hour Validity** — CRL refreshed every 24 hours -- **CA Key Required** — Sub-CA or issuing CA key must be available for signing +`POST /api/v1/certificates/{id}/revoke` with RFC 5280 reason codes: + +| Reason | CRL Code | +|---|---| +| `unspecified` | 0 | +| `keyCompromise` | 1 | +| `caCompromise` | 2 | +| `affiliationChanged` | 3 | +| `superseded` | 4 | +| `cessationOfOperation` | 5 | +| `certificateHold` | 6 | +| `privilegeWithdrawn` | 9 | + +Revocation is a 7-step process: validate eligibility → get serial → update status → record in `certificate_revocations` table → notify issuer (best-effort) → audit → send notification. + +### CRL Endpoints + +- `GET /api/v1/crl` — JSON-formatted CRL (version, entries array, total count, timestamp) +- `GET /api/v1/crl/{issuer_id}` — DER-encoded X.509 CRL signed by issuing CA, 24-hour validity ### OCSP Responder -- **Endpoint** — `GET /api/v1/ocsp/{issuer_id}/{serial}` -- **Responses** — good (certificate valid), revoked (in CRL), unknown (not issued by this CA) -- **Signed** — OCSP responses signed by issuing CA + +`GET /api/v1/ocsp/{issuer_id}/{serial}` — signed OCSP responses (good/revoked/unknown). Signs with issuing CA key. Requires CA key access (Local CA, step-CA connectors). ### Short-Lived Certificate Exemption -- **Policy** — Certificates with TTL < 1 hour (from profile) skip CRL/OCSP -- **Rationale** — Expiry is sufficient revocation signal for short-lived certs -- **Exemption Applied** — During CRL generation and OCSP response construction -### Revocation Notifications -- Webhook + email notifications on revocation events -- Routed by certificate owner email via existing notifier system +Certificates with profile TTL < 1 hour skip CRL/OCSP. Expiry is sufficient revocation for short-lived credentials. --- ## Certificate Export -Operators need to export certificates for use in third-party systems or for compliance audits. certctl provides two export formats: PEM (cert + chain, JSON or file download) and PKCS#12 (cert + chain in a passwordless bundle for compatibility with systems like Java keystores and Windows certificate stores). + -**Important:** Private keys are never exported — they remain on agents where they were generated. This is a core security property. Exports only bundle the public certificate material (cert + chain). +Two export formats. Private keys are never included — they live on agents only. -```bash -# Export as PEM (returns JSON with base64-encoded data + chain) -curl -H "$AUTH" "$SERVER/api/v1/certificates/mc-api-prod/export/pem" -# {"certificate_pem":"-----BEGIN CERTIFICATE-----\n...", "chain_pem":"-----BEGIN CERTIFICATE-----\n..."} +| Endpoint | Format | Notes | +|---|---|---| +| `GET /api/v1/certificates/{id}/export/pem` | PEM JSON or file download (`?download=true`) | Splits leaf from chain | +| `POST /api/v1/certificates/{id}/export/pkcs12` | Binary .p12 with `Content-Disposition` | Cert-only bundle via `go-pkcs12` `EncodeTrustStore` | -# Export as PKCS#12 file (binary download, no password) -curl -H "$AUTH" "$SERVER/api/v1/certificates/mc-api-prod/export/pkcs12" > cert.p12 - -# Via CLI -certctl-cli certs export mc-api-prod --format pem --out cert.pem -certctl-cli certs export mc-api-prod --format pkcs12 --out cert.p12 -``` - -| Field | Details | -|-------|---------| -| **Formats** | PEM (text, cert + chain), PKCS#12 (binary, cert + chain, passwordless) | -| **Private Key Inclusion** | Never — private keys remain on agents | -| **Audit Trail** | All exports recorded with actor, timestamp, export format | -| **API Endpoints** | `GET /api/v1/certificates/{id}/export/pem`, `POST /api/v1/certificates/{id}/export/pkcs12` | -| **GUI** | Export PEM and Export PKCS#12 buttons on certificate detail page | +All exports generate audit events (`export_pem`, `export_pkcs12`) with serial number tracking. --- ## Certificate Profiles -### Profile Model -Named enrollment profiles defining certificate issuance constraints. Profiles prevent drift — without them, different teams might issue certs with inconsistent key sizes, TTLs, or key algorithms. A profile says "all certs in this category must use ECDSA P-256, max 90-day TTL, serverAuth and clientAuth EKUs only." + -Profiles also support **Extended Key Usage (EKU)** constraints, enabling S/MIME and device certificates. Common EKUs: -- `serverAuth` — TLS server certificates (HTTPS, mail servers) -- `clientAuth` — TLS client certificates (mutual TLS, device auth) -- `emailProtection` — S/MIME signing and encryption -- `codeSigning` — Code signing and software updates -- `timeStamping` — Trusted timestamps +Named enrollment profiles defining crypto constraints and certificate properties. Stored in PostgreSQL with full CRUD API and GUI page. -```bash -# Create a TLS profile -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/profiles -d '{ - "name": "Standard TLS", - "allowed_key_algorithms": ["ECDSA"], - "max_ttl_hours": 2160, - "allowed_ekus": ["serverAuth"] -}' +### Profile Fields -# Create an S/MIME profile -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/profiles -d '{ - "name": "S/MIME Email", - "allowed_key_algorithms": ["RSA", "ECDSA"], - "max_ttl_hours": 8760, - "allowed_ekus": ["emailProtection"] -}' +- Allowed key types (RSA 2048/4096, ECDSA P-256/P-384) +- Maximum TTL +- Required SANs +- Permitted Extended Key Usages (EKUs) -# Create a multi-purpose profile -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/profiles -d '{ - "name": "Multi-Purpose", - "allowed_key_algorithms": ["ECDSA"], - "max_ttl_hours": 2160, - "allowed_ekus": ["serverAuth", "clientAuth"] -}' +### Supported EKUs -# Assign profile to a certificate -curl -X PUT -H "$AUTH" -H "$CT" $SERVER/api/v1/certificates/mc-api-prod -d '{ - "profile_id": "prof-standard-tls" -}' + -# List all profiles -curl -H "$AUTH" "$SERVER/api/v1/profiles" | jq '.data[] | {id, name, max_ttl_hours, allowed_key_algorithms, allowed_ekus}' +| EKU Name | x509 Constant | Typical Use | +|---|---|---| +| `serverAuth` | `ExtKeyUsageServerAuth` | TLS servers | +| `clientAuth` | `ExtKeyUsageClientAuth` | Mutual TLS | +| `codeSigning` | `ExtKeyUsageCodeSigning` | Code signing | +| `emailProtection` | `ExtKeyUsageEmailProtection` | S/MIME | +| `timeStamping` | `ExtKeyUsageTimeStamping` | Timestamping | -# Get profile details -curl -H "$AUTH" "$SERVER/api/v1/profiles/prof-standard-tls" | jq . +### Adaptive KeyUsage -# Update profile constraints -curl -X PUT -H "$AUTH" -H "$CT" $SERVER/api/v1/profiles/prof-standard-tls -d '{ - "name": "Standard TLS", "max_ttl_hours": 2160, "allowed_key_algorithms": ["RSA", "ECDSA"], "allowed_ekus": ["serverAuth"] -}' -``` +The Local CA adjusts `KeyUsage` flags based on EKU: -| Field | Details | -|-------|---------| -| **ID** | Prefixed text PK (p-xxx) | -| **Name** | Human-readable profile name | -| **Allowed Key Algorithms** | RSA, ECDSA, Ed25519 with minimum key sizes (e.g., RSA 2048+, ECDSA P-256+) | -| **Max TTL** | Maximum certificate lifetime (days or duration) | -| **Allowed EKUs** | Extended key usage OIDs (serverAuth, clientAuth, emailProtection, codeSigning, timeStamping) | -| **Required SANs** | Mandatory Subject Alternative Names (patterns or fixed values) | -| **Short-Lived Support** | TTL < 1 hour triggers CRL/OCSP exemption | - -### GUI Management -- Full CRUD page with profile details -- EKU constraint badges visible in list view (serverAuth, clientAuth, emailProtection, etc.) -- Profile assignment dropdown on certificate detail -- S/MIME profile creation wizard with email SAN configuration +- TLS profiles: `DigitalSignature | KeyEncipherment` +- S/MIME profiles: `DigitalSignature | ContentCommitment` ### S/MIME Support -When a profile specifies `emailProtection` EKU, certctl adapts the issuance flow for email certificates: -- **SAN handling** — email addresses in SANs are formatted as `rfc822Name` (not DNS names) -- **Key usage** — S/MIME certs use `DigitalSignature | ContentCommitment` instead of the TLS default `DigitalSignature | KeyEncipherment` -- **Agent CSR generation** — agents correctly distinguish DNS SANs from email SANs based on profile EKU -- **Issuer constraints** — Local CA and other issuers thread EKUs through the signing pipeline + +EKU threading from profile through the entire issuance flow. Agent CSR generation splits SANs by type — `strings.Contains(san, "@")` routes to `EmailAddresses` instead of `DNSNames`. Demo seed includes `prof-smime` profile with `emailProtection` EKU. --- ## Policy Engine -Policies catch misconfigurations before they reach production. For example, a policy can prevent staging certificates from being issued by your production CA, or flag certificates missing an owner (which means nobody gets alerted when they expire). + -```bash -# Create a policy requiring all certs to have an owner -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/policies -d '{ - "name": "Require Ownership", - "type": "RequiredMetadata", - "severity": "Error", - "config": {"required_fields": ["owner_id", "team_id"]} -}' +5 rule types with violation tracking and severity levels: -# Check violations for a policy -curl -H "$AUTH" "$SERVER/api/v1/policies/rp-standard/violations" -``` +- Key algorithm requirements +- Minimum key size +- Maximum certificate lifetime +- Required SAN patterns +- Issuer restrictions -### Policy Rules (5 types) -| Rule Type | Purpose | Example | -|-----------|---------|---------| -| **AllowedIssuers** | Restrict which CAs can issue | Only LetsEncrypt or Internal CA | -| **AllowedDomains** | Domain whitelist/blacklist | Allow *.example.com, deny *.staging.example.com | -| **RequiredMetadata** | Enforce ownership, team | Require owner_id and team_id populated | -| **AllowedEnvironments** | Environment constraints | Restrict to production or staging | -| **RenewalLeadTime** | Minimum renewal window | Renew 60 days before expiry (minimum) | - -### Violation Tracking -- **Severity Levels** — Warning, Error, Critical -- **Per-Policy Violations** — `GET /api/v1/policies/{id}/violations` with timestamp and violated certificate ID -- **Real-Time Evaluation** — Violations checked during issuance, renewal, and deployment -- **Audit Trail** — All violations logged to audit events table - -### Policy Application Scope -- Applied at renewal policy level -- Scoped to agent groups via `agent_group_id` foreign key -- Rule set can be enabled/disabled per policy +Policies can be scoped to agent groups via `agent_group_id` foreign key. Violations are tracked and surfaced in the dashboard. --- -## Issuer Connectors (4 Implemented) +## Issuer Connectors + + + +9 issuer connectors implementing the `issuer.Connector` interface. All support `ValidateConfig`, `IssueCertificate`, `RenewCertificate`, `RevokeCertificate`, `GetOrderStatus`, `GenerateCRL`, `SignOCSPResponse`, `GetCACertPEM`, `GetRenewalInfo`. ### Local CA -- **Mode** — Self-signed (default) or sub-CA (production) -- **Sub-CA Configuration** — Load CA cert+key from disk (`CERTCTL_CA_CERT_PATH`, `CERTCTL_CA_KEY_PATH`) -- **Key Formats Supported** — RSA, ECDSA, PKCS#8 -- **CRL Generation** — Signed by CA, 24h validity -- **OCSP Signing** — Delegates to CA's private key -- **Use Case** — Internal PKI, enterprise trust chains -### ACME v2 -- **Challenge Types** — HTTP-01 (default), DNS-01 (wildcard support), and DNS-PERSIST-01 (standing record, no per-renewal DNS updates) -- **DNS-01 Script Hooks** — Pluggable DNS solver for any provider (Cloudflare, Route53, Azure DNS, etc.) -- **DNS-PERSIST-01** — Standing `_validation-persist` TXT record set once, reused forever. Auto-fallback to DNS-01 if CA doesn't support it yet. -- **Configuration** — `CERTCTL_ACME_DIRECTORY_URL`, `CERTCTL_ACME_EMAIL`, `CERTCTL_ACME_CHALLENGE_TYPE`, `CERTCTL_ACME_DNS_PRESENT_SCRIPT`, `CERTCTL_ACME_DNS_CLEANUP_SCRIPT`, `CERTCTL_ACME_DNS_PERSIST_ISSUER_DOMAIN` -- **DNS Propagation Wait** — Configurable timeout before validation -- **Use Case** — Public CAs (LetsEncrypt), wildcard certs + + +Self-signed or sub-CA mode using `crypto/x509`. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_CA_CERT_PATH` | (none) | Path to CA certificate PEM. When set, enables sub-CA mode. | +| `CERTCTL_CA_KEY_PATH` | (none) | Path to CA private key PEM (RSA, ECDSA, PKCS#8). | + +Sub-CA mode validates `IsCA=true` and `KeyUsageCertSign` on the loaded certificate. Falls back to self-signed when paths are not set. Supports CRL generation (`GenerateCRL`) and OCSP response signing (`SignOCSPResponse`). + +### ACME + + + +Full ACME v2 protocol via `golang.org/x/crypto/acme`. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_ACME_DIRECTORY_URL` | `https://acme-v02.api.letsencrypt.org/directory` | ACME directory | +| `CERTCTL_ACME_EMAIL` | (required) | Account email | +| `CERTCTL_ACME_CHALLENGE_TYPE` | `http-01` | Challenge type: `http-01`, `dns-01`, `dns-persist-01` | +| `CERTCTL_ACME_DNS_PRESENT_SCRIPT` | (none) | Script to create DNS-01 TXT record | +| `CERTCTL_ACME_DNS_CLEANUP_SCRIPT` | (none) | Script to remove DNS-01 TXT record | +| `CERTCTL_ACME_DNS_PROPAGATION_WAIT` | `10s` | Wait after DNS record creation | +| `CERTCTL_ACME_DNS_PERSIST_ISSUER_DOMAIN` | (none) | Issuer domain for DNS-PERSIST-01 | +| `CERTCTL_ACME_EAB_KID` | (none) | External Account Binding key ID | +| `CERTCTL_ACME_EAB_HMAC` | (none) | EAB HMAC key (base64url) | +| `CERTCTL_ACME_ARI_ENABLED` | `false` | Enable ACME Renewal Information (RFC 9773) | +| `CERTCTL_ACME_PROFILE` | (none) | Certificate profile for newOrder (e.g., `tlsserver`, `shortlived`) | + +**Challenge types:** + +- **HTTP-01** — Standard HTTP challenge via `/.well-known/acme-challenge/` token +- **DNS-01** — Pluggable DNS solver with script-based hooks. User-provided scripts create/cleanup `_acme-challenge` TXT records. Compatible with any DNS provider. +- **DNS-PERSIST-01** — Standing `_validation-persist` TXT record per IETF draft. Record value: `; accounturi=`. Set once, reused on every renewal. Auto-fallback to DNS-01 if CA doesn't support it. + +**External Account Binding (EAB):** Required by ZeroSSL, Google Trust Services, SSL.com. For ZeroSSL, credentials are auto-fetched from `api.zerossl.com/acme/eab-credentials-email` when no EAB credentials are provided — zero-friction onboarding. + +**Certificate Profile Selection:** Custom JWS-signed `newOrder` POST when profile is set (the `golang.org/x/crypto/acme` library lacks profile support). ES256 JWS signing with kid mode, nonce management, directory discovery. Empty profile delegates to the standard library path. ### step-ca -- **Protocol** — Native `/sign` and `/revoke` API (not ACME) -- **Authentication** — JWK provisioner with key file + password -- **Configuration** — `CERTCTL_STEPCA_URL`, `CERTCTL_STEPCA_PROVISIONER`, `CERTCTL_STEPCA_KEY_PATH`, `CERTCTL_STEPCA_PASSWORD` -- **Operations** — Issue, renew, revoke -- **Use Case** — Smallstep private CA, internal PKI with strong auth + + + +Smallstep private CA via native `/sign` API with JWK provisioner authentication. Synchronous issuance. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_STEPCA_URL` | (required) | step-ca server URL | +| `CERTCTL_STEPCA_ROOT_CA` | (required) | Path to step-ca root CA PEM | +| `CERTCTL_STEPCA_PROVISIONER_NAME` | (required) | JWK provisioner name | +| `CERTCTL_STEPCA_PROVISIONER_KEY` | (required) | Path to provisioner private key | +| `CERTCTL_STEPCA_PROVISIONER_PASSWORD` | (none) | Provisioner key password | ### OpenSSL / Custom CA -- **Mechanism** — Delegate signing to user-provided shell scripts -- **Scripts** — Sign script (CSR→cert), revoke script (serial+reason), CRL script (full CRL) -- **Timeout** — Configurable timeout (default 30s) with process interruption -- **Configuration** — `CERTCTL_OPENSSL_SIGN_SCRIPT`, `CERTCTL_OPENSSL_REVOKE_SCRIPT`, `CERTCTL_OPENSSL_CRL_SCRIPT`, `CERTCTL_OPENSSL_TIMEOUT_SECONDS` -- **Use Case** — PKIX-compliant external CAs, PowerShell issuers, custom workflows + + + +Script-based signing delegating to user-provided shell scripts. Configurable timeout. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_OPENSSL_SIGN_SCRIPT` | (required) | Script that signs a CSR (receives CSR on stdin, outputs PEM on stdout) | +| `CERTCTL_OPENSSL_REVOKE_SCRIPT` | (none) | Script for revocation | +| `CERTCTL_OPENSSL_CRL_SCRIPT` | (none) | Script for CRL generation | +| `CERTCTL_OPENSSL_TIMEOUT_SECONDS` | `30` | Script execution timeout | + +### Vault PKI + + + +HashiCorp Vault `/v1/{mount}/sign/{role}` API. Token auth, synchronous issuance. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_VAULT_ADDR` | (required) | Vault server URL | +| `CERTCTL_VAULT_TOKEN` | (required) | Vault token | +| `CERTCTL_VAULT_MOUNT` | `pki` | PKI secrets engine mount path | +| `CERTCTL_VAULT_ROLE` | (required) | PKI role name | +| `CERTCTL_VAULT_TTL` | `8760h` | Certificate TTL | + +CRL/OCSP delegated to Vault. Revocation via `POST /v1/{mount}/revoke` with serial number normalization. + +### DigiCert CertCentral + + + +Async order model: submit → poll → download. OV/EV support. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_DIGICERT_API_KEY` | (required) | `X-DC-DEVKEY` auth header | +| `CERTCTL_DIGICERT_ORG_ID` | (required) | Organization ID | +| `CERTCTL_DIGICERT_PRODUCT_TYPE` | `ssl_basic` | Product type | +| `CERTCTL_DIGICERT_BASE_URL` | `https://www.digicert.com/services/v2` | API base URL | + +Issuance returns `OrderID` when pending. `GetOrderStatus` polls via `GET /order/certificate/{order_id}`, downloads PEM bundle when issued. + +### Sectigo SCM + + + +Async order model: enroll → poll → collect PEM. 3-header auth. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_SECTIGO_CUSTOMER_URI` | (required) | Customer URI header | +| `CERTCTL_SECTIGO_LOGIN` | (required) | Login header | +| `CERTCTL_SECTIGO_PASSWORD` | (required) | Password header | +| `CERTCTL_SECTIGO_ORG_ID` | (required) | Organization ID | +| `CERTCTL_SECTIGO_CERT_TYPE` | (required) | Certificate type ID | +| `CERTCTL_SECTIGO_TERM` | `365` | Certificate term in days | +| `CERTCTL_SECTIGO_BASE_URL` | `https://cert-manager.com/api` | API base URL | + +Handles `collect-not-ready` (HTTP 400 / error code -183) gracefully — cert approved but not yet generated. + +### Google CAS + + + +Google Cloud Certificate Authority Service. OAuth2 service account auth (JWT → access token), synchronous issuance. No Google SDK dependency — all stdlib. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_GOOGLE_CAS_PROJECT` | (required) | GCP project ID | +| `CERTCTL_GOOGLE_CAS_LOCATION` | (required) | GCP region | +| `CERTCTL_GOOGLE_CAS_CA_POOL` | (required) | CA pool name | +| `CERTCTL_GOOGLE_CAS_CREDENTIALS` | (required) | Path to service account JSON | +| `CERTCTL_GOOGLE_CAS_TTL` | `8760h` | Certificate TTL | + +Token caching with `sync.Mutex` and 5-minute refresh buffer. RS256 JWT signing. + +### AWS ACM Private CA + + + +Synchronous issuance via `IssueCertificate` + `GetCertificate` AWS APIs. Injectable `ACMPCAClient` interface. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_AWS_PCA_REGION` | (required) | AWS region | +| `CERTCTL_AWS_PCA_CA_ARN` | (required) | CA ARN | +| `CERTCTL_AWS_PCA_SIGNING_ALGORITHM` | `SHA256WITHRSA` | Signing algorithm | +| `CERTCTL_AWS_PCA_VALIDITY_DAYS` | `365` | Certificate validity | +| `CERTCTL_AWS_PCA_TEMPLATE_ARN` | (none) | Optional template ARN | + +Revocation with RFC 5280 reason mapping. CRL/OCSP delegated to AWS. + +### EST Server (RFC 7030) + + + +Enrollment over Secure Transport for device/WiFi/IoT certificate enrollment. 4 endpoints under `/.well-known/est/`: + +| Endpoint | Method | Description | +|---|---|---| +| `/cacerts` | GET | CA certificate chain (PKCS#7 certs-only, base64-encoded) | +| `/simpleenroll` | POST | New certificate enrollment | +| `/simplereenroll` | POST | Certificate re-enrollment | +| `/csrattrs` | GET | CSR attributes | + +Accepts both base64-encoded DER (EST standard) and PEM-encoded PKCS#10 CSR input. PKCS#7 output built with hand-rolled ASN.1 (no external PKCS#7 dependency). Configurable issuer and profile binding. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_EST_ENABLED` | `false` | Enable EST endpoints | +| `CERTCTL_EST_ISSUER_ID` | `iss-local` | Issuer for EST enrollments | +| `CERTCTL_EST_PROFILE_ID` | (none) | Optional profile constraint | --- -## Target Connectors (5 Implemented + 2 Stubs) +## ACME Renewal Information (RFC 9773) -### NGINX -- **Deployment** — Separate cert, chain, and key files -- **Validation** — `nginx -t` configuration test -- **Reload** — Graceful reload via SIGHUP (or nginx -s reload) -- **Target Config** — Certificate path, chain path, key path -- **Status** — Fully implemented (M10) + -### Apache httpd -- **Deployment** — Separate cert, chain, and key files -- **Validation** — `apachectl configtest` or `apache2ctl configtest` -- **Reload** — Graceful reload via `apachectl graceful` or `apache2ctl graceful` -- **Target Config** — Certificate path, chain path, key path -- **Status** — Fully implemented (M10) +CA-directed renewal timing. Instead of hardcoded expiration thresholds, the CA tells certctl when to renew. -### HAProxy -- **Deployment** — Combined PEM file (cert + chain + key concatenated) -- **Validation** — Optional `haproxy -c -f config` test -- **Reload** — Process signal or socket-based reload (configurable) -- **Target Config** — Combined PEM path, optional reload command -- **Status** — Fully implemented (M10) +### How It Works -### Traefik -- **Deployment** — File provider: writes cert and key to Traefik's watched certificate directory -- **Auto-Reload** — Traefik's file provider watches the directory for changes; no explicit reload needed -- **Target Config** — Certificate directory, cert filename, key filename -- **Status** — Fully implemented (M26) - -### Caddy -- **Dual-Mode Deployment** — Admin API (hot-reload via `POST /load`) or file-based (write cert+key, Caddy watches) -- **API Mode** — Posts certificate to Caddy's admin API endpoint for zero-downtime reload -- **File Mode** — Writes cert and key files to configured directory (fallback when admin API is unavailable) -- **Target Config** — Admin API URL, certificate directory, cert filename, key filename, mode (api/file) -- **Status** — Fully implemented (M26) - -### F5 BIG-IP (Stub) -- **Protocol** — iControl REST API via proxy agent -- **Status** — Interface only in V2; implementation in V3 (paid) -- **Deployment Model** — Proxy agent + BIG-IP API client in same network zone -- **Authentication** — iControl credentials stored in target config - -### IIS (Stub) -- **Dual-Mode Architecture** — Agent-local PowerShell (primary) or proxy agent WinRM (agentless) -- **Status** — Interface only in V2; implementation in V3 (paid) -- **Deployment Model** — Agent runs PowerShell cmdlets locally or proxy agent invokes WinRM -- **Binding** — Bind certificate to IIS site by hostname - ---- - -## Notifier Connectors (6 Channels) - -Notifications route certificate events to the people and systems that need to know. Each channel is enabled by setting its env var — no code changes needed. - -```bash -# Enable Slack notifications (just set the webhook URL) -export CERTCTL_SLACK_WEBHOOK_URL="https://hooks.slack.com/services/T.../B.../xxx" - -# Enable PagerDuty escalation for critical events -export CERTCTL_PAGERDUTY_ROUTING_KEY="your-routing-key" -export CERTCTL_PAGERDUTY_SEVERITY="critical" -``` - -### Email -- **SMTP** — Standard SMTP or TLS endpoint -- **Configuration** — Server, port, auth credentials (env vars) -- **Use Case** — Owner notifications, compliance distribution lists - -### Webhook -- **HTTP POST** — Custom JSON payload to any endpoint -- **Headers** — Content-Type, custom auth headers (configurable) -- **Use Case** — Slack (via custom webhook), Microsoft Power Automate, custom platforms - -### Slack -- **Protocol** — Incoming Webhook -- **Message Format** — Markdown with bold subject, formatted body -- **Overrides** — Channel (`CERTCTL_SLACK_CHANNEL`), username (`CERTCTL_SLACK_USERNAME`), emoji -- **Configuration** — `CERTCTL_SLACK_WEBHOOK_URL` -- **Use Case** — Team notifications, ops channels - -### Microsoft Teams -- **Protocol** — Incoming Webhook -- **Message Format** — MessageCard with ThemeColor, Summary, Sections -- **Markdown Support** — Formatted text within sections -- **Configuration** — `CERTCTL_TEAMS_WEBHOOK_URL` -- **Use Case** — Team-wide alerts, cross-team visibility - -### PagerDuty -- **Protocol** — Events API v2 -- **Trigger Events** — Alert on expiration, failure, revocation -- **Severity** — Configurable default (default "warning") -- **Custom Details** — Certificate ID, days remaining, owner, etc. -- **Configuration** — `CERTCTL_PAGERDUTY_ROUTING_KEY`, `CERTCTL_PAGERDUTY_SEVERITY` -- **Use Case** — Incident response, on-call escalations - -### OpsGenie -- **Protocol** — Alert API v2 -- **Priority** — Configurable default (default "P3") -- **Tags** — Category tags (cert expiration, deployment failure, etc.) -- **Responders** — Optional team routing -- **Configuration** — `CERTCTL_OPSGENIE_API_KEY`, `CERTCTL_OPSGENIE_PRIORITY` -- **Use Case** — Multi-team alerting, escalation policies - -### Notification Types -- **Expiration Alert** — Certificate approaching threshold (30/14/7/0 days) -- **Renewal Started** — Renewal job initiated -- **Renewal Completed** — Certificate successfully renewed -- **Deployment Completed** — Certificate deployed to target -- **Deployment Failed** — Target deployment error -- **Revocation** — Certificate revoked with reason -- **Policy Violation** — Certificate violates renewal policy - ---- - -## ACME Renewal Information (ARI, RFC 9773) - -Instead of using fixed renewal thresholds (renew 30 days before expiry), ACME ARI lets the CA tell certctl exactly when to renew. This is useful for distributing renewal load across maintenance windows and coordinating mass-revocation scenarios. - -**How it works:** - -```bash -# Enable ARI on your ACME issuer -export CERTCTL_ACME_ARI_ENABLED=true - -# Certificates now query the ARI endpoint for suggested renewal windows -# If the CA doesn't support ARI (404), certctl falls back to threshold-based renewal -``` - -| Field | Details | -|-------|---------| -| **Protocol** | ACME Renewal Information (RFC 9773) | -| **Cert ID Computation** | base64url(SHA-256(DER cert)) | -| **Suggested Window** | Start and end times provided by CA | -| **Renewal Timing** — If current time is after window start, renew immediately. Otherwise, wait until start time. | -| **Fallback** | 404 from ARI endpoint triggers automatic fallback to threshold-based renewal | -| **Configuration** | `CERTCTL_ACME_ARI_ENABLED=true` on ACME issuer config | -| **Supported CAs** | Let's Encrypt (v2.1.0+), Sectigo, others gradually adopting | - -**Benefits:** - -- **Load Distribution** — CA specifies renewal window to avoid thundering herd spikes -- **Coordination** — Support for mass revocation scenarios where CA controls timing -- **No Over-Renewal** — Avoid unnecessary early renewals that waste your CA's capacity - ---- - -## Scheduled Certificate Digest Emails - -Scheduled HTML digest emails with certificate stats, expiration timeline, job health, and agent fleet overview. Useful for daily ops briefings and compliance reporting. - -```bash -# Configure SMTP -export CERTCTL_SMTP_HOST=smtp.example.com -export CERTCTL_SMTP_PORT=587 -export CERTCTL_SMTP_USERNAME=admin@example.com -export CERTCTL_SMTP_PASSWORD=your-app-password -export CERTCTL_SMTP_FROM_ADDRESS=certctl@example.com - -# Enable digest -export CERTCTL_DIGEST_ENABLED=true -export CERTCTL_DIGEST_INTERVAL=24h -export CERTCTL_DIGEST_RECIPIENTS=ops@example.com,security@example.com -``` - -| Feature | Details | -|---------|---------| -| **Scheduler Loop** | 7th background loop, default 24-hour interval (configurable: 12h, 7d, etc.) | -| **Startup Behavior** | Does NOT run on startup; waits for first scheduled tick | -| **Operation Timeout** | 5 minutes per digest generation + send | -| **Idempotency** — `sync/atomic.Bool` guard prevents concurrent digest executions | -| **HTML Template** | Responsive email with stats grid (total, expiring, expired, agents), jobs summary (30-day), expiring certs table with color-coded urgency (7/14/30 days) | -| **Recipients** | Comma-separated email addresses. Falls back to certificate owner emails if none configured. | -| **API Endpoints** — `GET /api/v1/digest/preview` (HTML preview), `POST /api/v1/digest/send` (trigger immediately) | -| **Configuration** — `CERTCTL_DIGEST_ENABLED`, `CERTCTL_DIGEST_INTERVAL` (default 24h), `CERTCTL_DIGEST_RECIPIENTS` | - -**Digest Contents:** - -- **Certificate Stats** — Total, active, expiring soon, expired, revoked -- **Job Health** — Completed, failed (last 30 days) -- **Agent Fleet** — Total agents online, offline, version distribution -- **Expiring Certificates** — Table with CN, SANs, days remaining, owner, status badges - -**Use Cases:** - -- Daily ops briefing for certificate inventory health -- Compliance reporting (audit trail + digest archive) -- Stakeholder visibility (automated newsletter) - ---- - -## Helm Chart for Kubernetes - -Production-ready Helm chart for Kubernetes deployments with secure defaults and comprehensive configurability. - -### Chart Components - -| Component | Details | -|-----------|---------| -| **Server Deployment** | Configurable replicas (default 2), liveness/readiness probes, security context (non-root, read-only rootfs), resource limits, graceful shutdown | -| **PostgreSQL StatefulSet** | Primary + replica, persistent volumes with configurable storage class/size (default 10Gi), automatic backup (via init container or sidecarsynchronous | -| **Agent DaemonSet** | One agent per infrastructure node, key storage volume (agent_keys), server discovery via internal DNS | -| **ConfigMap** | Issuer, target, and scheduler configuration; all certctl env vars exposed | -| **Secret** — API key, database password, SMTP credentials (base64-encoded) | -| **Ingress** — Optional with TLS, configurable hostname and certificate (via cert-manager or manual) | -| **ServiceAccount** — RBAC with configurable annotations for Kubernetes audit logging | - -### Installation - -```bash -# Install with custom values -helm install certctl deploy/helm/certctl/ \ - --namespace certctl --create-namespace \ - --set server.auth.apiKey="your-secure-key" \ - --set postgresql.auth.password="your-db-password" \ - --set ingress.enabled=true \ - --set ingress.hosts[0].host="certctl.example.com" \ - --set ingress.annotations."cert-manager\.io/cluster-issuer"="letsencrypt-prod" -``` - -### Key Values - -| Value | Default | Description | -|-------|---------|-------------| -| `server.replicaCount` | 2 | Number of server replicas | -| `server.auth.apiKey` | — | (required) API key for authentication | -| `postgresql.auth.password` | — | (required) PostgreSQL password | -| `postgresql.storage.size` | 10Gi | Database volume size | -| `ingress.enabled` | false | Enable Ingress for public access | -| `ingress.hosts[0].host` | certctl.example.com | Primary hostname | -| `ingress.tls.enabled` | true | TLS on Ingress (requires cert-manager) | -| `agent.enabled` | true | Deploy agent DaemonSet | -| `smtp.enabled` | false | Enable SMTP for digest emails | -| `smtp.host` | — | SMTP server hostname | - -### Security Defaults - -- **Non-root containers** — Server and agent run as unprivileged user -- **Read-only filesystem** — Root filesystem mounted read-only (except /tmp) -- **Network policies** — Optional KubernetesNetworkPolicy to restrict traffic -- **Secrets** — API keys and passwords stored in K8s Secrets, never in ConfigMaps or environment defaults -- **RBAC** — ServiceAccount with minimal required permissions - -### Upgrade Path - -```bash -# Upgrade to a new certctl release -helm upgrade certctl deploy/helm/certctl/ \ - --namespace certctl \ - -f my-values.yaml - -# Rollback if needed -helm rollback certctl [REVISION] -``` - ---- - -## Agent Fleet - -Agents are lightweight Go binaries deployed on your servers that handle the last mile — generating private keys locally, submitting CSRs, and deploying signed certificates to web servers. The control plane never touches private keys or initiates outbound connections, keeping your security perimeter intact. - -```bash -# Start an agent (it auto-registers and begins polling for work) -export CERTCTL_SERVER_URL=http://certctl.internal:8443 -export CERTCTL_API_KEY=agent-api-key -export CERTCTL_AGENT_ID=ag-nginx-prod-1 -./certctl-agent --key-dir /var/lib/certctl/keys --discovery-dirs /etc/ssl/certs - -# Check agent status from the control plane -curl -H "$AUTH" $SERVER/api/v1/agents/ag-nginx-prod-1 | jq '{status, last_heartbeat, os, architecture}' -``` - -### Agent Registration & Heartbeat -- **Registration** — `POST /api/v1/agents` with agent name and API key -- **Heartbeat** — `POST /api/v1/agents/{id}/heartbeat` every 60 seconds -- **Auto-Offline** — Agents marked offline after 3 missed heartbeats (configurable) -- **Last Heartbeat Timestamp** — Tracked in `agents` table - -### Agent Metadata (M10) -Collected via runtime introspection and network utilities. - -| Field | Source | Example | -|-------|--------|---------| -| **OS** | `runtime.GOOS` | linux, darwin, windows | -| **Architecture** | `runtime.GOARCH` | amd64, arm64 | -| **Hostname** | `os.Hostname()` | nginx-prod-1 | -| **IP Address** | `net.Interface` + `net.IP` | 10.0.1.5 | -| **Version** | Agent binary version (from build flags) | v2.1.0 | - -### Agent Groups (M11b) -Dynamic grouping and filtering for policy assignment and deployment targeting. Agent groups let you apply renewal policies to subsets of your fleet — for example, "all Linux amd64 agents in the 10.0.0.0/8 network" — without manually listing every agent. - -```bash -# Create a group matching all Linux agents in a specific subnet -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/agent-groups -d '{ - "id": "ag-linux-dc1", "name": "Linux DC1", - "os_match": "linux", "ip_cidr_match": "10.0.1.0/24" -}' - -# List groups and their criteria -curl -H "$AUTH" "$SERVER/api/v1/agent-groups" | jq '.items[] | {id, name, os_match, ip_cidr_match}' - -# View members of a group (dynamically matched + manual includes) -curl -H "$AUTH" "$SERVER/api/v1/agent-groups/ag-linux-dc1/members" | jq '.items[].agent_id' -``` - -| Criterion | Details | Example | -|-----------|---------|---------| -| **OS Match** | Exact string match | linux, darwin, windows | -| **Architecture Match** | Exact string match | amd64, arm64, 386 | -| **IP CIDR Match** | IPv4 or IPv6 CIDR block | 10.0.0.0/8, 192.168.1.0/24 | -| **Version Match** | Semantic version range (optional) | >=2.0.0, <3.0.0 | -| **Manual Membership** | Explicit include/exclude | Include a-xxx, exclude a-yyy | -| **MatchesAgent()** | Dynamic evaluation at job time | Criteria match→agent included | - -### Agent Group GUI -- List with dynamic match criteria badges (color-coded) -- Enable/disable toggle per group -- Manual membership editor (include/exclude lists) -- Agent count per group (dynamic) -- Scoped to renewal policies via `agent_group_id` FK - -### Agent Capabilities -Agents report to `/api/v1/agents/{id}/work` with supported target types and issuers. - -- **Target Deployment** — NGINX, Apache httpd, HAProxy, Traefik, Caddy, F5 BIG-IP (proxy), IIS (proxy) -- **Key Management** — ECDSA P-256 keygen, key storage at `CERTCTL_KEY_DIR` (default `/var/lib/certctl/keys`), 0600 file permissions -- **CSR Submission** — `POST /api/v1/agents/{id}/csr` for AwaitingCSR jobs - -### Fleet Overview Page -- **OS/Architecture Grouping** — Agents grouped by GOOS + GOARCH -- **Charts** — Status distribution (pie), version breakdown (bar) -- **Per-Platform Listing** — Expandable agent list under each OS/Arch combo -- **Health Indicators** — Online/offline status, last heartbeat, uptime - ---- - -## Certificate Discovery (M18b) - -### Overview -Agents automatically discover existing certificates in the infrastructure — on filesystem, in key stores, or elsewhere — report findings to the control plane, and operators triage them for enrollment. - -### Agent-Side Discovery -- **Configuration** — `CERTCTL_DISCOVERY_DIRS` env var (comma-separated list) or `--discovery-dirs` CLI flag -- **Scan Execution** — Runs on agent startup and every 6 hours in background -- **Supported Formats** — PEM (.pem, .crt, .cer, .cert) and DER (.der) files -- **Recursive Walk** — Scans directory trees to find all certificates -- **File Filtering** — Skips files > 1MB and obvious key files - -### Certificate Extraction -Each discovered certificate is parsed and its metadata extracted: - -| Field | Source | Example | -|-------|--------|---------| -| **Common Name** | X.509 Subject CN | api.example.com | -| **SANs** | X.509 SubjectAltNames | api.example.com, *.api.example.com | -| **Serial** | Certificate serial number | 0x123abc... | -| **Issuer DN** | X.509 Issuer | CN=Internal CA, O=Acme Inc | -| **Subject DN** | X.509 Subject | CN=api.example.com, O=Acme Inc | -| **Not Before** | Validity start | 2024-01-15T00:00:00Z | -| **Not After** | Validity end | 2026-01-15T00:00:00Z | -| **Key Algorithm** | Key type | RSA, ECDSA, Ed25519 | -| **Key Size** | Bits | 2048, 256, 4096 | -| **Is CA** | CA flag in extensions | true/false | -| **Fingerprint** | SHA-256 hash (dedup key) | a1b2c3d4e5f6... | - -### Server-Side Processing -- **Deduplication** — Uses fingerprint + agent ID + path as unique key; prevents duplicates -- **Status Tracking** — Three statuses: **Unmanaged** (discovered, not yet claimed), **Managed** (linked to control plane cert), **Dismissed** (operator decided not to manage) -- **Audit Trail** — `discovery_scan_completed`, `discovery_cert_claimed`, `discovery_cert_dismissed` events logged with actor and reason -- **Storage** — `discovered_certificates` and `discovery_scans` tables in PostgreSQL - -### Triage Workflow -1. Agent submits scan results via `POST /api/v1/agents/{id}/discoveries` -2. Server deduplicates and stores discovery records -3. Operator views `GET /api/v1/discovered-certificates?status=Unmanaged` -4. For each unmanaged cert: - - **Claim it** — `POST /api/v1/discovered-certificates/{id}/claim` links to managed cert or creates new enrollment - - **Dismiss it** — `POST /api/v1/discovered-certificates/{id}/dismiss` removes from triage queue -5. Tracking enables visibility into what's deployed vs. what's managed - -### Discovery API Endpoints (M18b) -| Endpoint | Method | Purpose | -|----------|--------|---------| -| `/api/v1/agents/{id}/discoveries` | POST | Agent submits scan results | -| `/api/v1/discovered-certificates` | GET | List discovered certs (with ?agent_id, ?status filters) | -| `/api/v1/discovered-certificates/{id}` | GET | Get single discovered cert detail | -| `/api/v1/discovered-certificates/{id}/claim` | POST | Link to managed cert or create enrollment | -| `/api/v1/discovered-certificates/{id}/dismiss` | POST | Dismiss from triage | -| `/api/v1/discovery-scans` | GET | List scan history with timestamps | -| `/api/v1/discovery-summary` | GET | Aggregate status counts (Unmanaged, Managed, Dismissed) | - -```bash -# Check triage status at a glance -curl -H "$AUTH" "$SERVER/api/v1/discovery-summary" | jq . -# → {"Unmanaged": 12, "Managed": 45, "Dismissed": 3} - -# Review scan execution history -curl -H "$AUTH" "$SERVER/api/v1/discovery-scans" | jq '.data[] | {agent_id, certificates_found, certificates_new, started_at}' -``` - -### Use Cases -- **Inventory Baseline** — Scan production servers at deployment time to establish baseline of existing certificates -- **Compliance Discovery** — Find all TLS certs before renewing certificate policies -- **Migration Planning** — Discover unmanaged certs to plan migration from other CA/platforms -- **Audit Preparation** — Triage discovered certs into managed and dismissed for compliance reports -- **Multi-CA Migration** — Find all certs currently issued by old CA, claim them for renewal under new issuer - ---- - -## Network Certificate Discovery (M21) - -### Overview -Server-side active TLS scanning probes network endpoints across CIDR ranges, extracts certificate metadata from TLS handshakes, and feeds results into the existing filesystem discovery pipeline. No agent deployment required — the control plane scans directly. - -### Configuration -- **Enable** — `CERTCTL_NETWORK_SCAN_ENABLED=true` (disabled by default) -- **Scan Interval** — `CERTCTL_NETWORK_SCAN_INTERVAL=6h` (default 6 hours, configurable) - -### Network Scan Targets -Scan targets define what CIDR ranges and ports to probe. - -| Field | Details | Example | -|-------|---------|---------| -| **ID** | Prefixed text PK (nst-xxx) | nst-datacenter-east | -| **Name** | Human-readable target name | Datacenter East Production | -| **CIDRs** | Array of CIDR ranges | ["10.0.1.0/24", "10.0.2.0/24"] | -| **Ports** | Array of TCP ports | [443, 8443, 6443] | -| **Enabled** | Toggle scanning on/off | true | -| **Scan Interval Hours** | Per-target scan frequency | 6 | -| **Timeout Ms** | Per-connection timeout | 5000 | - -### Scanning Behavior -- **CIDR Expansion** — Ranges expanded to individual IPs; safety cap at /20 (4096 IPs) prevents accidental large scans -- **Concurrent Probing** — 50 goroutines (semaphore-based), configurable timeout per TLS connection -- **TLS Extraction** — `crypto/tls.DialWithDialer` with `InsecureSkipVerify=true` discovers all certs including self-signed, expired, and internal CA certs -- **Sentinel Agent Pattern** — Uses `server-scanner` as virtual agent ID, reusing the existing `discovered_certificates` dedup constraint without schema changes -- **Discovery Pipeline** — Scan results feed into `DiscoveryService.ProcessDiscoveryReport()` for fingerprint dedup, audit trail, and triage workflow - -### Network Scan API Endpoints (M21) - -| Endpoint | Method | Purpose | -|----------|--------|---------| -| `/api/v1/network-scan-targets` | GET | List all scan targets with metrics | -| `/api/v1/network-scan-targets` | POST | Create a new scan target | -| `/api/v1/network-scan-targets/{id}` | GET | Get scan target details | -| `/api/v1/network-scan-targets/{id}` | PUT | Update scan target configuration | -| `/api/v1/network-scan-targets/{id}` | DELETE | Delete a scan target | -| `/api/v1/network-scan-targets/{id}/scan` | POST | Trigger an immediate scan | +1. `GetRenewalInfo` computes an RFC 9773 cert ID (base64url-encoded SHA-256 of DER cert) +2. Queries the CA's Renewal Information endpoint (discovered from ACME directory or constructed via fallback URL) +3. Returns a `SuggestedWindow` (start/end), optional `RetryAfter`, and `ExplanationURL` +4. `ShouldRenewNow()` returns true if the current time is past `SuggestedWindowStart` +5. `OptimalRenewalTime()` picks a random time within the window for load distribution ### Scheduler Integration -- **6th scheduler loop** — runs at configured interval (default 6h) alongside renewal (1h), jobs (30s), health (2m), notifications (1m), short-lived expiry (30s) -- **Conditional** — only starts if `CERTCTL_NETWORK_SCAN_ENABLED=true` and network scan service is initialized -- **Scan Metrics** — each target tracks `last_scan_at`, `last_scan_duration_ms`, `last_scan_certs_found` -### Use Cases -- **Network Inventory** — "What TLS certs are deployed across my network?" without deploying agents -- **Shadow Certificate Detection** — Find certificates on services you didn't know were running TLS -- **Compliance Scanning** — Prove to auditors that all TLS endpoints are inventoried -- **Migration Assessment** — Scan a network range before onboarding to certctl management -- **Expiration Monitoring** — Discover soon-to-expire certs on network endpoints before they cause outages +The renewal scheduler (`CheckExpiringCertificates`) queries ARI before creating renewal jobs: + +- If ARI says "not yet" → skip renewal +- If ARI says "renew now" → create renewal job with `renewal_trigger: ari` audit event +- If ARI errors → log warning, fall back to threshold-based logic +- Non-ARI issuers return nil (Local CA, step-ca, OpenSSL, Vault, DigiCert, Sectigo, Google CAS, AWS ACM PCA) + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_ACME_ARI_ENABLED` | `false` | Enable ARI queries | + +### Shorter Certificate Validity Readiness + +certctl's default thresholds `[30, 14, 7, 0]` work correctly at all CA/Browser Forum SC-081v3 validity reduction phases: + +- 200-day certs (Phase 1, March 2026) +- 100-day certs (Phase 2, March 2027) +- 47-day certs (Phase 3, March 2029) + +For Let's Encrypt 6-day `shortlived` certificates, ARI is the expected renewal path — threshold-based logic alone is insufficient at that lifetime. --- -## Ownership & Accountability +## Target Connectors -Without ownership, expiring certificates become "someone else's problem." Ownership tracking ensures every certificate has a named person and team who receive alerts and are accountable for renewal. When an auditor asks "who owns this cert?", the answer is one API call away. + -```bash -# Create a team -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/teams -d '{"name": "Platform Engineering", "email": "platform@example.com"}' +14 target connector types implementing the `target.Connector` interface. All support `ValidateConfig`, `DeployCertificate`, `ValidateDeployment`. -# Create an owner -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/owners -d '{"name": "Alice Chen", "email": "alice@example.com", "team_id": "t-platform"}' +### Deployment Model -# Assign owner to certificate — Alice now receives all alerts for this cert -curl -X PUT -H "$AUTH" -H "$CT" $SERVER/api/v1/certificates/mc-api-prod -d '{"owner_id": "o-alice"}' -``` +Pull-only. The server never initiates outbound connections to agents or targets. Agents poll for work. For network appliances and agentless servers, a "proxy agent" in the same network zone executes deployment via the target's API. + +### NGINX + + + +File write → `nginx -t` validation → `nginx -s reload`. Config: `cert_path`, `key_path`, `chain_path`, `reload_command`, `validate_command`. + +### Apache httpd + + + +Separate cert/chain/key files → `apachectl configtest` → `apachectl graceful`. Config: `cert_path`, `key_path`, `chain_path`, `reload_command`, `validate_command`. + +### HAProxy + + + +Combined PEM file (cert + chain + key) → optional validation → reload via socket/signal. Config: `pem_path`, `reload_command`, `validate_command`. + +### Traefik + + + +File provider deployment: writes cert/key to Traefik's watched directory. Traefik auto-reloads via filesystem watch. Config: `cert_dir`, `cert_filename`, `key_filename`. + +### Caddy + + + +Dual-mode: `api` (POST to Caddy admin endpoint for hot-reload) or `file` (file-based with configurable paths). Config: `mode` (`api`/`file`), `admin_url`, `cert_path`, `key_path`. + +### Envoy + + + +File-based deployment with optional SDS JSON config. Envoy auto-reloads via filesystem watch. Path traversal prevention on all file paths. Optional SDS JSON bootstrap (`type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.Secret`). Config: `cert_dir`, `cert_filename`, `key_filename`, `chain_filename`, `sds_config`. + +### F5 BIG-IP + + + +iControl REST API via proxy agent. Token auth (`POST /mgmt/shared/authn/login`, `X-F5-Auth-Token`), 401 auto-retry. Transaction-based atomic SSL profile updates with auto-rollback on failure. Injectable `F5Client` interface. + +Config: `host`, `port` (443), `username`, `password`, `partition` (Common), `ssl_profile`, `insecure` (true), `timeout` (30). Minimum BIG-IP v12.0+. + +Deployment: file upload with `Content-Range` → crypto object install (cert/key/chain) → transaction create → SSL profile PATCH → transaction commit. `cleanupCryptoObjects()` removes installed objects on failure. + +### IIS + + + +Dual-mode: agent-local PowerShell or WinRM proxy agent. PEM → PFX conversion via `go-pkcs12`, `Import-PfxCertificate`, IIS binding management (`New-WebBinding` + `AddSslCertificate`), SHA-1 thumbprint computation, SNI support. + +**Local mode** config: `site_name`, `cert_store` (My), `port` (443), `sni` (false), `ip_address` (*). + +**WinRM mode** config: adds `mode` (winrm), `winrm_host`, `winrm_port` (5985/5986), `winrm_username`, `winrm_password`, `winrm_https`, `winrm_insecure`, `winrm_timeout` (60s). Base64 PFX transfer via PowerShell with `try/finally` cleanup. Uses `masterzen/winrm`. + +Injectable `PowerShellExecutor` interface for cross-platform testing. Regex-validated config fields prevent PowerShell injection. + +### SSH (Agentless) + + + +Agentless deployment via SSH/SFTP to any Linux/Unix server. Uses `golang.org/x/crypto/ssh` + `github.com/pkg/sftp`. + +Config: `host`, `port` (22), `user`, `auth_method` (key/password), `private_key_path`, `password`, `cert_path`, `key_path`, `chain_path`, `reload_command`, `timeout` (30s). Optional octal permission strings (e.g., `"0644"`, `"0600"`). + +Shell injection prevention via `validation.ValidateShellCommand()` on reload commands. Injectable `SSHClient` interface. + +### Postfix / Dovecot + + + +Dual-mode mail server TLS connector. File write → validation → reload. + +- **Postfix mode**: `postfix check` → `postfix reload` +- **Dovecot mode**: `doveconf -n` → `doveadm reload` + +Config: `mode` (postfix/dovecot), `cert_path`, `key_path`, `chain_path`, `reload_command`, `validate_command`. Shell injection prevention. + +### Windows Certificate Store + + + +PowerShell-based cert import via `Import-PfxCertificate`. PEM → PFX → base64 → PowerShell script with `try/finally` cleanup. + +Config: `store` (My/Root/CA/WebHosting), `store_location` (LocalMachine/CurrentUser), `friendly_name`, `cleanup_expired` (bool). Dual-mode: local or WinRM (same pattern as IIS). Reuses shared `certutil` package. + +### Java Keystore + + + +PEM → PKCS#12 (via `certutil.CreatePFX`) → temp file → `keytool -importkeystore` pipeline. JKS and PKCS12 format support. + +Config: `keystore_path`, `keystore_password`, `keystore_type` (JKS/PKCS12), `alias` (server), `reload_command`. Path traversal prevention, existing alias deletion before import. Reuses shared `certutil` package. + +### Kubernetes Secrets + + + +Deploys certificates as `kubernetes.io/tls` Secrets. Injectable `K8sClient` interface (proxy agent pattern). In-cluster auth by default, out-of-cluster via kubeconfig. + +Config: `namespace`, `secret_name`, `labels` (map), `kubeconfig_path` (optional). Fingerprint-based validation in `ValidateDeployment`. + +### Shared certutil Package + + + +Extracted from IIS connector. Reused by IIS, WinCertStore, and JavaKeystore: + +- `CreatePFX` — PEM → PKCS#12 via `go-pkcs12` +- `ParsePrivateKey` — PKCS#1, PKCS#8, EC key formats +- `ComputeThumbprint` — SHA-1 of DER cert (matches Windows `certutil`) +- `GenerateRandomPassword` — 32-char crypto/rand password +- `ParseCertificatePEM` — PEM → `*x509.Certificate` + +--- + +## Notifier Connectors + + + +### Notification Types + +| Type | Description | +|---|---| +| `ExpirationWarning` | Certificate approaching threshold | +| `RenewalSuccess` | Renewal completed | +| `RenewalFailure` | Renewal failed | +| `DeploymentSuccess` | Deployment completed | +| `DeploymentFailure` | Deployment failed | +| `PolicyViolation` | Policy rule violated | +| `Revocation` | Certificate revoked | + +### Notification Channels + +| Channel | Auth | Config Env Vars | +|---|---|---| +| **Email** | SMTP | `CERTCTL_SMTP_HOST`, `CERTCTL_SMTP_PORT` (587), `CERTCTL_SMTP_USERNAME`, `CERTCTL_SMTP_PASSWORD`, `CERTCTL_SMTP_FROM_ADDRESS`, `CERTCTL_SMTP_USE_TLS` (true) | +| **Webhook** | URL-based | `CERTCTL_WEBHOOK_URL` | +| **Slack** | Incoming webhook | `CERTCTL_SLACK_WEBHOOK_URL`, `CERTCTL_SLACK_CHANNEL`, `CERTCTL_SLACK_USERNAME` | +| **Microsoft Teams** | Incoming webhook (MessageCard) | `CERTCTL_TEAMS_WEBHOOK_URL` | +| **PagerDuty** | Events API v2 | `CERTCTL_PAGERDUTY_ROUTING_KEY`, `CERTCTL_PAGERDUTY_SEVERITY` (warning) | +| **OpsGenie** | Alert API v2, GenieKey | `CERTCTL_OPSGENIE_API_KEY`, `CERTCTL_OPSGENIE_PRIORITY` (P3) | + +All notifier connectors have 10-second HTTP client timeouts. + +--- + +## Certificate Digest + + + +Scheduled HTML email digest with aggregated certificate status. + +### Content + +- Stats grid: total certs, expiring, expired, active agents +- Jobs summary +- Expiring certificates table with color-coded badges +- Responsive CSS for email clients + +### Configuration + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_DIGEST_ENABLED` | `false` | Enable digest | +| `CERTCTL_DIGEST_INTERVAL` | `24h` | Send interval | +| `CERTCTL_DIGEST_RECIPIENTS` | (none) | Comma-separated emails. Falls back to certificate owner emails when empty. | + +### API + +- `GET /api/v1/digest/preview` — HTML preview of current digest +- `POST /api/v1/digest/send` — trigger immediate send + +Both endpoints return 503 when digest is not configured (nil-safe handler). + +--- + +## Post-Deployment TLS Verification + + + +After deploying a certificate, the agent probes the live TLS endpoint and compares SHA-256 fingerprints. + +### Verification Statuses + +| Status | Description | +|---|---| +| `pending` | Verification not yet attempted | +| `success` | Deployed cert matches live endpoint | +| `failed` | Fingerprint mismatch or connection error | +| `skipped` | Verification disabled or not applicable | + +### Flow + +1. Agent completes deployment +2. Agent waits `CERTCTL_VERIFY_DELAY` (configurable) +3. Agent connects via `crypto/tls.DialWithDialer` with `InsecureSkipVerify=true` +4. Compares SHA-256 fingerprint of served cert against deployed cert +5. Submits result via `POST /api/v1/jobs/{id}/verify` + +Best-effort — failures are recorded but don't block or rollback deployments. + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_VERIFY_DEPLOYMENT` | `false` | Enable verification | +| `CERTCTL_VERIFY_TIMEOUT` | `5s` | TLS connection timeout | +| `CERTCTL_VERIFY_DELAY` | `2s` | Wait after deployment before probing | + +--- + +## Discovery + +### Filesystem Discovery + + + +Agents scan configured directories for existing certificates. + +- Runs on agent startup and every 6 hours +- Walks directories recursively, parses PEM (`.pem`, `.crt`, `.cer`, `.cert`) and DER (`.der`) files +- Extracts: common name, SANs, serial, issuer DN, subject DN, validity, key algorithm, key size, is_ca, SHA-256 fingerprint +- Reports to server via `POST /api/v1/agents/{id}/discoveries` +- Server deduplicates by `(fingerprint_sha256, agent_id, source_path)` unique constraint + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_DISCOVERY_DIRS` | (none) | Comma-separated directories for agent to scan | + +### Discovery Statuses + +| Status | Description | +|---|---| +| `Unmanaged` | Discovered, not yet triaged | +| `Managed` | Claimed and linked to a managed certificate | +| `Dismissed` | Explicitly dismissed from triage queue | + +### Discovery API + +| Endpoint | Method | Description | +|---|---|---| +| `/api/v1/agents/{id}/discoveries` | POST | Agent submits scan results | +| `/api/v1/discovered-certificates` | GET | List with `?agent_id`, `?status` filters | +| `/api/v1/discovered-certificates/{id}` | GET | Detail | +| `/api/v1/discovered-certificates/{id}/claim` | POST | Link to managed certificate | +| `/api/v1/discovered-certificates/{id}/dismiss` | POST | Dismiss from triage | +| `/api/v1/discovery-scans` | GET | Scan history | +| `/api/v1/discovery-summary` | GET | Aggregate status counts | + +### Network Certificate Discovery + + + +Server-side active TLS scanning of CIDR ranges. Concurrent probing with semaphore (50 goroutines). Feeds into the existing discovery pipeline via `server-scanner` sentinel agent. + +- CIDR expansion with `/20` safety cap (4,096 IPs max per scan) +- `crypto/tls.DialWithDialer` with `InsecureSkipVerify=true` to discover all certs (including self-signed, expired, internal CA) +- SSRF protection: reserved IP ranges filtered (loopback, link-local, multicast, broadcast) + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_NETWORK_SCAN_ENABLED` | `false` | Enable network scanning | +| `CERTCTL_NETWORK_SCAN_INTERVAL` | `6h` | Scan interval | + +### Network Scan Target API + +| Endpoint | Method | Description | +|---|---|---| +| `/api/v1/network-scan-targets` | GET | List targets | +| `/api/v1/network-scan-targets/{id}` | GET | Detail | +| `/api/v1/network-scan-targets` | POST | Create target (name, CIDRs, ports, interval, timeout) | +| `/api/v1/network-scan-targets/{id}` | PUT | Update | +| `/api/v1/network-scan-targets/{id}` | DELETE | Delete | +| `/api/v1/network-scan-targets/{id}/scan` | POST | Trigger immediate scan | + +--- + +## Ownership and Teams + + + +### Certificate Ownership + +Certificates have an `owner` field linking to an owner record with email and team assignment. Notification routing uses owner email when no explicit recipients are configured. ### Teams -- **Model** — Team grouping for organizational structure -- **Team Assignment** — Certificates and policies assigned to teams -- **Email Distribution** — Optional team email for notifications -- **Resolver Logic** — Team name → member lookup via API (external resolution) -- **GUI** — CRUD page with member management -### Owners -- **Model** — Individual person responsible for certificates -- **Email Routing** — Owner email used for notification delivery -- **Team Association** — Owners belong to teams -- **Certificate Assignment** — Certificates assigned to owner (1:1 or group) -- **Notification Routing** — Expiration/renewal/revocation alerts sent to owner email -- **GUI** — CRUD page with team picker, email validation +Organizational grouping for owners. Full CRUD API and GUI page. -### Interactive Renewal Approval (M11b) -- **AwaitingApproval Job State** — Renewal jobs pause for human approval -- **Approval Flow** — `POST /api/v1/jobs/{id}/approve` (proceed with renewal) -- **Rejection Flow** — `POST /api/v1/jobs/{id}/reject` with reason text (cancel job) -- **Reason Tracking** — Approval/rejection reason logged to job history and audit -- **Use Case** — Change control, compliance gates, sensitive certificate renewal +### Agent Groups + + + +Dynamic device grouping by matching criteria: + +- OS (e.g., `linux`, `darwin`, `windows`) +- Architecture (e.g., `amd64`, `arm64`) +- IP CIDR range +- Agent version + +Plus manual include/exclude membership lists. Agent groups can be referenced by renewal policies via `agent_group_id` FK. + +`MatchesAgent()` method on the domain model evaluates all criteria against an agent's metadata. --- ## Observability -Observability answers "is certctl healthy and are my certificates safe?" without opening the dashboard. Metrics integrate with your existing monitoring stack (Prometheus, Grafana, Datadog), stats power the dashboard charts, structured logs feed your SIEM, and the audit trail proves to auditors what happened and when. +### Metrics -```bash -# Quick health check -curl $SERVER/health -# {"status":"healthy"} + -# Dashboard summary — how many certs, what's expiring, agent health -curl -H "$AUTH" $SERVER/api/v1/stats/summary | jq . +**JSON metrics:** `GET /api/v1/metrics` — gauges (cert totals by status, agent counts, pending jobs), counters (completed/failed jobs), uptime. -# Prometheus metrics — scrape this from your monitoring stack -curl -H "$AUTH" $SERVER/api/v1/metrics/prometheus -# certctl_certificate_total 15 -# certctl_certificate_expiring 3 -# certctl_agent_active 4 -# ... +**Prometheus metrics:** `GET /api/v1/metrics/prometheus` — `text/plain; version=0.0.4` exposition format. 11 metrics with `certctl_` prefix: -# JSON metrics — for custom dashboards -curl -H "$AUTH" $SERVER/api/v1/metrics | jq . -``` +| Metric | Type | +|---|---| +| `certctl_certificate_total` | gauge | +| `certctl_certificate_active` | gauge | +| `certctl_certificate_expiring_soon` | gauge | +| `certctl_certificate_expired` | gauge | +| `certctl_certificate_revoked` | gauge | +| `certctl_agent_total` | gauge | +| `certctl_agent_online` | gauge | +| `certctl_job_pending` | gauge | +| `certctl_job_completed_total` | counter | +| `certctl_job_failed_total` | counter | +| `certctl_uptime_seconds` | gauge | -### Observability Layers +Compatible with Prometheus, Grafana Agent, Datadog Agent, Victoria Metrics. -#### Dashboard Charts (M14) -Live aggregated views of certificate and job metrics. +### Stats API -| Chart | Type | Details | -|-------|------|---------| -| **Expiration Heatmap** | Stacked bar | 90-day weekly buckets; per-status color bands | -| **Renewal Success Rate** | Line (30-day) | Success % trending over time | -| **Certificate Status Distribution** | Donut | Pie breakdown: Active, Expiring, Expired, Failed, Revoked, etc. | -| **Issuance Rate** | Bar (30-day) | Certs issued per day; trend line | +| Endpoint | Description | +|---|---| +| `GET /api/v1/stats/summary` | Dashboard summary (total, active, expiring, expired) | +| `GET /api/v1/stats/certificates-by-status` | Status distribution | +| `GET /api/v1/stats/expiration-timeline?days=N` | Expiration buckets | +| `GET /api/v1/stats/job-trends?days=N` | Job completion trends | +| `GET /api/v1/stats/issuance-rate?days=N` | Issuance rate | -#### Metrics Endpoints +### Structured Logging -**JSON Format** -- **URL** — `GET /api/v1/metrics` -- **Format** — JSON with timestamp -- **Gauges** — Certificate counts by status, agent count (online/offline), pending job count -- **Counters** — Total jobs completed, total jobs failed, total renewals, total issuances -- **Uptime** — Server uptime in seconds +`slog`-based middleware with request ID propagation. No `fmt.Printf` in production code paths. -**Prometheus Exposition Format (M22)** -- **URL** — `GET /api/v1/metrics/prometheus` -- **Content-Type** — `text/plain; version=0.0.4; charset=utf-8` -- **Compatible with** — Prometheus, Grafana Agent, Datadog Agent, Victoria Metrics, OpenMetrics scrapers -- **Naming** — `certctl_` prefix, snake_case (e.g., `certctl_certificate_total`, `certctl_agent_online`) -- **11 Metrics** — 8 gauges (cert total/active/expiring/expired/revoked, agent total/online, job pending), 2 counters (job completed/failed totals), 1 gauge (uptime seconds) -- **Scrape Config** — Add to `prometheus.yml`: `scrape_configs: [{job_name: certctl, static_configs: [{targets: ['localhost:8443']}], metrics_path: /api/v1/metrics/prometheus}]` +### Immutable Audit Trail -#### Stats API (M14) -Five parameterized endpoints for dashboard data. +Append-only `audit_events` table. No UPDATE or DELETE permitted. Records: -| Endpoint | Parameters | Response | -|----------|------------|----------| -| **GET /api/v1/stats/summary** | None | Total certs, expiring soon, renewals in progress, failed jobs, agents online | -| **GET /api/v1/stats/certificates-by-status** | None | Count per status (Active, Expiring, Expired, etc.) | -| **GET /api/v1/stats/expiration-timeline** | days (default 90) | Weekly buckets with cert counts; 90-day default | -| **GET /api/v1/stats/job-trends** | days (default 30) | Daily completed/failed job counts; line chart ready | -| **GET /api/v1/stats/issuance-rate** | days (default 30) | Certs issued per day; 30-day default | - -#### Structured Logging (M14) -- **Library** — Go's `log/slog` (structured, context-aware) -- **Request ID Propagation** — Per-request UUID in context; logged on all operations -- **Middleware** — `NewLogging(logger *slog.Logger)` middleware wrapping all API calls -- **Log Format** — JSON (default) or text; configurable via `CERTCTL_LOG_FORMAT` -- **Log Level** — debug, info, warn, error; configurable via `CERTCTL_LOG_LEVEL` - -#### API Audit Middleware (M19) -Every API call recorded to immutable `audit_events` table. - -| Logged Field | Details | -|--------------|---------| -| **Method** | HTTP verb (GET, POST, PUT, DELETE) | -| **Path** | Request path (e.g., /api/v1/certificates) | -| **Actor** | Authenticated user/API key (or "anonymous") | -| **Body Hash** | SHA-256 of request body (truncated first 16 chars for brevity) | -| **Response Status** | HTTP status code | -| **Latency** | Request processing time in ms | -| **Timestamp** | RFC3339 format | - -#### Immutable Audit Trail -- **Table** — `audit_events` append-only (no UPDATE/DELETE) -- **Events** — Issuance, renewal, deployment, revocation, policy violations, approval/rejection -- **Retention** — Indefinite (no expiration) -- **GUI Export** — CSV/JSON export with applied time-range, actor, action filters -- **Query API** — `GET /api/v1/audit?actor=...&resource=...&action=...&before=...&after=...` - -#### Deployment Rollback Support (M14) -- **Version History** — Sorted by deployment timestamp -- **Current Badge** — Visual indicator on latest deployed version -- **Rollback Button** — Click to re-deploy previous version -- **Versioning** — Each cert version tracked (serial, fingerprint, PEM) +- All API calls (via audit middleware) +- Certificate lifecycle events (issuance, renewal, deployment, revocation, export) +- Discovery events (scan completed, cert claimed, cert dismissed) +- Job lifecycle events (created, completed, failed, cancelled, verified) +- Approval events (approved, rejected with reason) --- ## Job System -Jobs are the work units that drive the certificate lifecycle. Every issuance, renewal, and deployment is tracked as a job with a clear state machine, so operators always know exactly where each operation stands and can troubleshoot failures. + -```bash -# List pending jobs -curl -H "$AUTH" "$SERVER/api/v1/jobs?status=Pending" | jq '.items[] | {id, type, status, certificate_id}' +### Job Types -# Cancel a stuck job -curl -X POST -H "$AUTH" $SERVER/api/v1/jobs/j-abc123/cancel +| Type | Description | +|---|---| +| `Issuance` | New certificate issuance | +| `Renewal` | Certificate renewal | +| `Deployment` | Deploy cert to target | +| `Validation` | Validate deployment | -# Approve a renewal waiting for human sign-off -curl -X POST -H "$AUTH" -H "$CT" $SERVER/api/v1/jobs/j-abc123/approve -d '{"reason": "Approved per change ticket #1234"}' -``` +### Job Statuses -### Job Types (4 total) -| Type | Trigger | States | Output | -|------|---------|--------|--------| -| **Issuance** | New certificate creation | Pending → AwaitingCSR/Running → Completed/Failed | Certificate version with serial | -| **Renewal** | Auto-renewal or manual trigger | Pending → AwaitingCSR/AwaitingApproval/Running → Completed/Failed | New certificate version | -| **Deployment** | Automatic or manual post-renewal | Pending → AwaitingCSR/Running → Completed/Failed | Target-specific status | -| **Validation** | Scheduled or manual | Pending → Running → Completed/Failed | Validation report (TBD V3) | +| Status | Description | +|---|---| +| `Pending` | Queued for processing | +| `AwaitingCSR` | Waiting for agent to submit CSR (agent keygen mode) | +| `AwaitingApproval` | Paused for manual approval | +| `Running` | In progress | +| `Completed` | Successfully finished | +| `Failed` | Failed with error | +| `Cancelled` | Cancelled by operator | -### Job States (7 total) -| State | Meaning | Transition | -|-------|---------|-----------| -| **Pending** | Created, awaiting processing | → AwaitingCSR or Running | -| **AwaitingCSR** | Agent needs to generate key + submit CSR | → Running (after CSR received) | -| **AwaitingApproval** | Human approval required (renewal only) | → Running (approve) or Cancelled (reject) | -| **Running** | Active processing (issuance, deployment, etc.) | → Completed or Failed | -| **Completed** | Successfully finished | (terminal) | -| **Failed** | Error during processing; no retry auto-scheduled | (terminal; manual retry available) | -| **Cancelled** | Explicitly cancelled by user or system | (terminal) | +### Agent Work Routing -### Job Lifecycle Example (Agent Keygen) -1. **Renewal triggered** → Job created in `Pending` state -2. **Scheduler polls** → Job transitioned to `AwaitingCSR` -3. **Work endpoint** → Agent receives job with common_name and SANs -4. **Agent keygen** → ECDSA P-256 key created locally; CSR submitted -5. **CSR received** → Server signs; Job transitioned to `Running` -6. **Deployment scheduled** → New Deployment job created in `Pending` -7. **Agent deploys** → Deployment job → `Running` → `Completed` -8. **Post-deployment verification** → Agent probes live TLS endpoint, compares SHA-256 fingerprint -9. **Status reported** → `POST /api/v1/agents/{id}/jobs/{job_id}/status` + -### Approval Flow (Interactive) -1. **Renewal job created** in `AwaitingApproval` state (if policy requires) -2. **Human reviews** on GUI -3. **Approve** → `POST /api/v1/jobs/{id}/approve` → Job → `Running` -4. **Reject** → `POST /api/v1/jobs/{id}/reject` + reason → Job → `Cancelled` +`GetPendingWork()` returns only jobs scoped to the requesting agent: -### Background Scheduler (7 loops) -| Loop | Interval | Task | -|------|----------|------| -| **Renewal Checker** | 1 hour | Scan policies; trigger renewals if cert expires soon | -| **Job Processor** | 30 seconds | Process Pending → AwaitingCSR/Running; poll agent status | -| **Health Checker** | 2 minutes | Check agent heartbeat; mark offline if >3 missed | -| **Notification Processor** | 1 minute | Send queued notifications (email, Slack, webhook, etc.) | -| **Short-Lived Cleanup** | 30 seconds | Audit short-lived credential expirations | -| **Network Scanner** | 6 hours | Scan enabled network targets; discover TLS certificates | -| **Digest Emailer** | 24 hours | Send HTML certificate digest email to configured recipients | +- Deployment jobs: matched by `jobs.agent_id` (set at creation from target → agent relationship) +- AwaitingCSR jobs: matched via certificate → target → agent chain +- Legacy fallback: target JOIN for jobs with NULL `agent_id` -All loops have configurable intervals via environment variables (`CERTCTL_SCHEDULER_*_INTERVAL`). +Single SQL `UNION` query replaces the previous "fetch all, filter in Go" approach. + +--- + +## Background Scheduler + + + +7 background loops, each with an `atomic.Bool` idempotency guard preventing concurrent tick execution. `sync.WaitGroup` + `WaitForCompletion()` for graceful shutdown. + +| Loop | Default Interval | Description | +|---|---|---| +| Renewal check | 1 hour | Check expiring certs, query ARI, create renewal jobs | +| Job processor | 30 seconds | Process pending jobs | +| Agent health check | 2 minutes | Check agent heartbeat staleness | +| Notification processor | 1 minute | Send queued notifications | +| Short-lived expiry check | 30 seconds | Mark short-lived certs expired | +| Network scan | 6 hours | Run network discovery scans | +| Digest | 24 hours | Send certificate digest email (does not run on startup) | + +--- + +## Dynamic Configuration (GUI) + +### Issuer Configuration + + + +GUI-driven issuer CRUD with AES-256-GCM encrypted config storage in PostgreSQL. + +- Per-type config schema validation for all 9 issuer types +- Test connection flow (instantiates throwaway connector, calls `ValidateConfig`) +- Dynamic `sync.RWMutex`-guarded `IssuerRegistry` — rebuilds without server restart +- Env var backward compatibility: seeds DB on first boot if no DB config exists +- Source tracking: `env` (seeded from env vars) or `database` (created via GUI) + +| Env Var | Default | Description | +|---|---|---| +| `CERTCTL_CONFIG_ENCRYPTION_KEY` | (none) | AES-256-GCM encryption key for stored configs | + + + +Encryption: AES-256-GCM with PBKDF2-SHA256 key derivation, 12-byte random nonce. Exported functions: `EncryptAESGCM`, `DecryptAESGCM`, `DeriveKey`, `EncryptIfKeySet`, `DecryptIfEncrypted`. + +### Target Configuration + + + +Same pattern as issuer configuration: + +- Per-type config validation for all 14 target types +- AES-256-GCM encrypted config storage +- Test connection via agent heartbeat status (online within 5 minutes) +- Source badge (database vs env), enabled/disabled toggle --- ## Web Dashboard -### Overview -The web dashboard is the primary operational interface for certctl. Built with **Vite + React 18 + TypeScript + TanStack Query v5 + Tailwind CSS 3 + Recharts**. + -| Page | Route | Purpose | -|------|-------|---------| -| **Dashboard** | `/` | Overview: summary cards, 4 charts (expiration, renewal rate, status, issuance), quick actions | -| **Certificates** | `/certificates` | List with multi-select, bulk operations (renew/revoke/reassign), new cert modal, sorting/filtering | -| **Certificate Detail** | `/certificates/:id` | Full cert view: deployment timeline, inline policy editor, version history, rollback, revoke, archive, renew actions | -| **Agents** | `/agents` | List with metadata (OS, architecture, IP, version), online status, uptime | -| **Agent Detail** | `/agents/:id` | Full system information, recent jobs, heartbeat graph, capabilities, metrics | -| **Agent Fleet Overview** | `/fleet` | OS/architecture grouping with pie charts (status, version), per-platform agent listing | -| **Jobs** | `/jobs` | Queue view with type filter, status filter, inline cancel/approve/reject, retry button | -| **Notifications** | `/notifications` | Grouped by certificate, mark-as-read toggle, filter by type (expiration, deployment, revocation) | -| **Policies** | `/policies` | CRUD with rule builder, enable/disable toggle, violations summary bar, violation list | -| **Profiles** | `/profiles` | List with crypto constraints (key algorithms, TTL, EKUs), create/edit/delete | -| **Issuers** | `/issuers` | List, create new issuer, test connection button, delete | -| **Targets** | `/targets` | List, 3-step configuration wizard (Select Type → Configure → Review), type-specific fields | -| **Owners** | `/owners` | List, create/edit with team picker, email field, delete | -| **Teams** | `/teams` | List, create/edit with member resolver, delete | -| **Agent Groups** | `/agent-groups` | List with dynamic match criteria badges (OS, arch, IP CIDR, version), manual membership editor | -| **Audit Trail** | `/audit` | Filtered view (time range, actor, action), CSV/JSON export buttons, event detail modal | -| **Short-Lived Credentials** | `/short-lived` | Filtered by profile with TTL < 1 hour, live countdown timer, auto-refresh every 10s, stats bar | -| **Login** | `/login` | API key entry, auth mode detection, redirect after successful auth | -| **ErrorBoundary** | (all pages) | Graceful crash recovery; displays user-friendly error message instead of white screen | +24 pages wired to real API endpoints. -### Dashboard Features +### Pages -#### Bulk Operations -- **Multi-Select** — Checkbox column in certificate list; "Select All" toggle -- **Bulk Renew** — Trigger renewal on selected certs; progress bar -- **Bulk Revoke** — Select reason codes per cert; sequential revocation; progress -- **Bulk Reassign** — Owner picker modal; assign to multiple certs at once +| Page | Route | Description | +|---|---|---| +| Dashboard | `/` | Summary stats, 4 charts (status donut, expiration heatmap, renewal trends, issuance rate) | +| Certificates | `/certificates` | List with bulk ops (renew, revoke, reassign owner), multi-select | +| Certificate Detail | `/certificates/:id` | Versions, deployment timeline, inline policy editor, export buttons | +| Agents | `/agents` | List with OS/arch metadata | +| Agent Detail | `/agents/:id` | System info, heartbeat status, capabilities, recent jobs | +| Fleet Overview | `/fleet` | OS/arch grouping, status/version distribution charts | +| Jobs | `/jobs` | List with status filter, approval buttons, verification badges | +| Job Detail | `/jobs/:id` | Full details, verification section (deployment jobs), timeline, audit events | +| Notifications | `/notifications` | Grouped by cert, read/unread state, mark-read | +| Policies | `/policies` | CRUD, severity summary bar, config preview | +| Profiles | `/profiles` | CRUD, EKU configuration | +| Issuers | `/issuers` | Catalog (10 cards), 3-step create wizard, config detail modal | +| Issuer Detail | `/issuers/:id` | Config (sensitive redacted), test connection, issued certs list | +| Targets | `/targets` | List with create wizard (3-step), per-type config fields for all 14 types | +| Target Detail | `/targets/:id` | Config, agent link, deployment history with verification badges | +| Owners | `/owners` | Team resolution, notification routing | +| Teams | `/teams` | CRUD | +| Agent Groups | `/agent-groups` | Dynamic criteria badges, manual membership | +| Audit | `/audit` | Time range/actor/resource/action filters, CSV/JSON export | +| Short-Lived | `/short-lived` | Filtered by profile TTL < 1 hour, live TTL countdown, auto-refresh 10s | +| Discovery | `/discovery` | Triage GUI with summary stats, claim/dismiss, scan history | +| Network Scans | `/network-scans` | CRUD for scan targets, Scan Now button | +| Digest | `/digest` | Preview iframe + send button | +| Observability | `/observability` | Health, metrics, Prometheus config, live output | -#### Deployment Timeline -- **Visual 4-Step Timeline** — Requested → Issued → Deploying → Active -- **Per-Certificate Job Queries** — Query jobs to get current phase -- **Status Indicators** — Checkmarks for completed phases; spinner for running; X for failed +### Onboarding Wizard -#### Inline Policy Editor -- **Edit Mode** — Click edit button on cert detail -- **Policy Dropdown** — Select from list of policies -- **Renewal Threshold Config** — Inline sliders/inputs for 30/14/7/0 day thresholds -- **Save/Cancel** — API mutations with optimistic updates via TanStack Query + -#### Target Configuration Wizard -- **Step 1: Select Type** — Radio or dropdown (NGINX, Apache, HAProxy, Traefik, Caddy, F5, IIS) -- **Step 2: Configure** — Type-specific fields (cert path, chain path, key path, etc.) -- **Step 3: Review** — Summary of config; confirm create -- **Validation** — Real-time field validation; show errors; disable Create if invalid +4-step first-run wizard shown when no user-configured issuers or certificates exist: -#### Auth & Session -- **Auth Context** — React context with API key, auth mode, session state -- **Auto-Redirect** — 401 response → redirect to /login -- **Logout** — Button in sidebar; clears context; redirects to /login -- **Remember API Key** — Persisted in localStorage (production should clear on logout) +1. **Connect a CA** — issuer catalog with 6+ types, config form, create + test connection +2. **Deploy Agent** — OS tabs (Linux/macOS/Docker) with install commands, agent polling every 5s +3. **Add Certificate** — CN, SANs, issuer/profile dropdowns, trigger issuance +4. **Done** — summary, doc links -#### Demo Mode -- Activates when API is unreachable -- Renders realistic mock data for screenshots -- Useful for offline presentations +Latching state prevents refetch-driven dismissal. `localStorage` dismissal key: `certctl:onboarding-dismissed`. --- -## Integration Interfaces +## CLI -### MCP Server (M18a) -**Separate binary** (`cmd/mcp-server/`) providing AI-native access to certctl via Claude, Cursor, OpenClaw. Instead of memorizing API endpoints, ask your AI assistant "what certificates are expiring this week?" or "renew the API prod cert" and it translates to the right API calls. + -- **Transport** — stdio (stdin/stdout) -- **Protocol** — Model Context Protocol v1 -- **SDK** — Official `modelcontextprotocol/go-sdk` v1.4.1 -- **Tools** — MCP tools covering all API endpoints -- **Organization** — 16 resource domains (Certificates, Issuers, Targets, Agents, Jobs, etc.) -- **Authentication** — Bearer token via `CERTCTL_API_KEY` env var -- **Configuration** — `CERTCTL_SERVER_URL` (e.g., http://localhost:8080) + `CERTCTL_API_KEY` -- **Input Types** — 33 typed structs with `jsonschema` tags for auto-generated LLM-friendly schemas -- **Stateless Design** — HTTP proxy (no state held in MCP server; all logic in REST API) +`certctl-cli` — stdlib-only (`flag` + `text/tabwriter`), no Cobra dependency. -### CLI Tool (certctl-cli, M16b) -**Lightweight command-line wrapper** around REST API. +### Commands -| Subcommand | Usage | Output Format | -|------------|-------|----------------| -| **certs list** | `certctl-cli certs list` | Table or JSON (--format=json) | -| **certs get** | `certctl-cli certs get ` | JSON cert details | -| **certs renew** | `certctl-cli certs renew ` | Job ID confirmation | -| **certs revoke** | `certctl-cli certs revoke [--reason]` | Revocation confirmation | -| **agents list** | `certctl-cli agents list` | Table or JSON | -| **agents get** | `certctl-cli agents get ` | Agent details | -| **jobs list** | `certctl-cli jobs list` | Table or JSON | -| **jobs get** | `certctl-cli jobs get ` | Job details | -| **jobs cancel** | `certctl-cli jobs cancel ` | Cancellation confirmation | -| **status** | `certctl-cli status` | Health + summary stats | -| **import** | `certctl-cli import ` | Bulk import cert count | -| **version** | `certctl-cli version` | Version string | +| Command | Description | +|---|---| +| `certs list` | List certificates | +| `certs get ID` | Certificate details | +| `certs renew ID` | Trigger renewal | +| `certs revoke ID` | Revoke (with `--reason`) | +| `agents list` | List agents | +| `agents get ID` | Agent details | +| `jobs list` | List jobs | +| `jobs get ID` | Job details | +| `jobs cancel ID` | Cancel pending job | +| `import FILE` | Bulk import from PEM file(s) | +| `status` | Server health + summary | +| `version` | CLI version | -**Implementation Details:** -- Stdlib-only (flag + text/tabwriter); no Cobra dependency -- JSON + table output formatters -- PEM parser for bulk import (multi-cert PEM files) -- Environment variables: `CERTCTL_SERVER_URL`, `CERTCTL_API_KEY` -- CLI flags: `--server`, `--api-key`, `--format` (json/table) -- Tested with httptest mock server; all commands covered +### Global Flags -### EST Server (RFC 7030, M23) -**Enrollment over Secure Transport** — industry-standard protocol for device certificate enrollment. Enables WiFi/802.1X, MDM, IoT, and BYOD use cases where devices need certificates without direct API access. - -**Endpoints** (under `/.well-known/est/` per RFC 7030): - -| Endpoint | Method | Description | Wire Format | -|----------|--------|-------------|-------------| -| `/cacerts` | GET | CA certificate chain distribution | Base64 PKCS#7 certs-only (application/pkcs7-mime) | -| `/simpleenroll` | POST | Initial certificate enrollment | Request: PEM or base64-DER PKCS#10; Response: PKCS#7 | -| `/simplereenroll` | POST | Certificate re-enrollment (renewal) | Same as simpleenroll | -| `/csrattrs` | GET | CSR attributes the server requires | ASN.1 DER (application/csrattrs) | - -**Architecture:** -- **ESTService** bridges handler to existing `IssuerConnector` — no new issuance logic, reuses existing CA connectors -- **CSR input handling** — accepts both base64-encoded DER (EST wire standard) and PEM-encoded PKCS#10 (convenience) -- **PKCS#7 output** — hand-rolled ASN.1 degenerate SignedData builder (no external PKCS#7 dependency) -- **CSR validation** — signature verification, Common Name extraction, SAN extraction (DNS, IP, email, URI) -- **Configurable issuer binding** — `CERTCTL_EST_ISSUER_ID` selects which issuer connector processes enrollment -- **Optional profile binding** — `CERTCTL_EST_PROFILE_ID` constrains enrollments to a specific certificate profile -- **Audit trail** — all EST enrollments recorded with protocol=EST, CN, SANs, issuer ID, serial, profile ID - -**Configuration:** -| Variable | Default | Description | -|----------|---------|-------------| -| `CERTCTL_EST_ENABLED` | `false` | Enable EST enrollment endpoints | -| `CERTCTL_EST_ISSUER_ID` | `iss-local` | Issuer connector for EST enrollments | -| `CERTCTL_EST_PROFILE_ID` | — | Optional profile ID to constrain enrollments | - -**Note:** EST endpoints currently use the same middleware stack as the REST API (API key auth). TLS client certificate authentication for EST is planned for V3. - -### OpenAPI 3.1 Specification -- **File** — `api/openapi.yaml` -- **Scope** — 99 operations (97 API + /health + /ready), all request/response schemas, enums, pagination -- **Schemas** — Complete domain models with examples -- **Enums** — Job types, states, policy rule types, notification types -- **Pagination** — Standard envelope (data, total, page, per_page) -- **Security** — Bearer token security scheme -- **SDK Generation** — Supports go-swagger, openapi-generator, etc. +| Flag | Env Var | Default | Description | +|---|---|---|---| +| `--server` | `CERTCTL_SERVER_URL` | `http://localhost:8443` | Server URL | +| `--api-key` | `CERTCTL_API_KEY` | (none) | API key | +| `--format` | (none) | `table` | Output: `table` or `json` | --- -## Security Architecture +## MCP Server -### Private Key Isolation -- **Agent-Side Keygen (Default)** — ECDSA P-256 keys generated on agents via Go's `crypto/ecdsa` -- **Local Key Storage** — Keys written to agent's `CERTCTL_KEY_DIR` (default `/var/lib/certctl/keys`) with 0600 permissions (user-readable only) -- **Server-Side Keygen (Demo Only)** — RSA-2048 keygen available via `CERTCTL_KEYGEN_MODE=server` with explicit log warning; never used in production -- **CSR Submission Only** — Agents submit CSRs (public) to control plane; private keys never leave agent infrastructure -- **Key Rotation** — Agents can re-key without control plane involvement (local only) + -### Pull-Only Deployment Model -- **No Outbound Initiations** — Server never initiates connections to agents or targets -- **Agent Polling** — Agents poll `GET /api/v1/agents/{id}/work` every 30 seconds -- **Proxy Agent Pattern** — For network appliances (F5, Palo Alto) or agentless targets (Windows servers), a "proxy agent" in the same network zone executes deployments via the target's API -- **Credential Scope** — Proxy agent credentials limited to its zone; control plane never stores target credentials directly -- **Firewall-Friendly** — Control plane can be completely locked down; no inbound rules needed for agents +Separate standalone binary (`cmd/mcp-server/`) using the official MCP Go SDK (`modelcontextprotocol/go-sdk`). Stdio transport for Claude, Cursor, and similar AI tool integrations. -### Sub-CA Capability -- **Enterprise Integration** — Local CA can operate as subordinate CA under enterprise root (e.g., ADCS) -- **Disk-Based Cert+Key** — `CERTCTL_CA_CERT_PATH` + `CERTCTL_CA_KEY_PATH` load pre-signed CA cert and key -- **Chain Validation** — Issued certs chain to enterprise root; full trust hierarchy -- **Self-Signed Fallback** — Default mode generates self-signed root if paths not set (development/demo) -- **Key Formats** — RSA, ECDSA, PKCS#8 support with auto-detection +- 80 MCP tools covering all API endpoints +- Stateless HTTP proxy — translates MCP tool calls to REST API calls +- Typed input structs with `jsonschema` struct tags for automatic schema generation +- Binary response support (DER CRL, OCSP) -### API Authentication -- **SHA-256 Hashing** — API keys hashed with SHA-256 before storage -- **Constant-Time Comparison** — Prevents timing attacks during key validation -- **Bearer Token** — `Authorization: Bearer {api_key}` header on all authenticated endpoints -- **Configurable** — `CERTCTL_AUTH_TYPE=api-key` (default) enforced; "none" requires explicit opt-in with log warning - -### Rate Limiting -- **Token Bucket** — Smooth rate limiting with burst capacity -- **RPS + Burst** — Configurable `CERTCTL_RATE_LIMIT_RPS` (default 50) and `CERTCTL_RATE_LIMIT_BURST` (default 100) -- **429 Responses** — Rate limit exceeded responses include `Retry-After` header -- **Per-Client** — Implemented per IP (future: per API key) - -### Audit & Compliance -- **Immutable Audit Trail** — Append-only table; no UPDATE/DELETE operations -- **API Audit Middleware** — Every call logged with method, path, actor, body hash, status, latency -- **Event Timestamps** — RFC3339 format with second precision -- **Actor Tracking** — API key ID or username extracted from auth context -- **Compliance Export** — CSV/JSON export of audit events with filtering +| Env Var | Description | +|---|---| +| `CERTCTL_SERVER_URL` | certctl server URL | +| `CERTCTL_API_KEY` | API key for authentication | --- -## Infrastructure +## Agent -### Deployment Architecture -- **Server** — Go HTTP server (net/http stdlib) on `:8080` (default) or `:8443` (Docker) -- **Database** — PostgreSQL 16 with 21 tables, TEXT primary keys (human-readable prefixed IDs) -- **Agent** — Lightweight Go binary on target infrastructure -- **Dashboard** — React SPA served from `/web/dist/` (Vite build) + -### Docker Compose Deployment -- **Services** — PostgreSQL 16, certctl server, agent -- **Health Checks** — On all services (server health check, database readiness) -- **Seed Data** — Demo dataset with 35 certs across 5 issuers, 8 agents, 8 targets, 90 days of job history, discovery data, network scans, policies, audit events -- **Credentials** — Environment variables in `.env` file; app.key for API key +Standalone binary that runs on managed infrastructure. Communicates with the control plane via HTTP polling. -### PostgreSQL Schema -- **21 Tables** — Certificates, certificate versions, agents, deployment targets, certificate-target mappings, renewal policies, jobs, audit events, notifications, issuers, policy rules, policy violations, certificate profiles, teams, owners, agent groups, agent group members, certificate revocations, discovered certificates, discovery scans, network scan targets -- **TEXT Primary Keys** — Human-readable prefixed IDs: mc-*, t-*, a-*, j-*, p-*, etc. -- **Indexes** — 5+ performance indexes on foreign keys, timestamps, status fields -- **Migrations** — Idempotent migrations with `IF NOT EXISTS`, `ON CONFLICT`, numbered sequentially -- **Max Connections** — Configurable via `CERTCTL_DATABASE_MAX_CONNS` (default 25) +### Capabilities -### CI/CD Pipeline -- **GitHub Actions** — `.github/workflows/ci.yml` -- **Parallel Jobs** — Go (build, vet, test+coverage, gates) and Frontend (tsc, vitest, vite build) -- **Coverage Gates** — Service layer ≥30%, handler layer ≥50% -- **Release Workflow** — Tag push → build → publish Docker images to GitHub Container Registry -- **Docker Tags** — `:latest`, `:v{version}` (`shankar0123.docker.scarf.sh/certctl-server`, `shankar0123.docker.scarf.sh/certctl-agent`) +- Heartbeat reporting (OS, architecture, IP address, version via `runtime.GOOS`/`runtime.GOARCH`/`net` stdlib) +- Work polling (`GET /agents/{id}/work`) +- ECDSA P-256 key generation + CSR submission +- Target connector deployment (instantiates local connector based on job config) +- Post-deployment TLS verification +- Filesystem certificate discovery +- Exponential backoff on errors -### Test Suite -- **Unit Tests** — Extensive coverage across service, handler, middleware, domain, and connector layers -- **Integration Tests** — End-to-end workflows (issuance→renewal→deployment) against live Docker Compose environment -- **Negative Tests** — Malformed input, nonexistent resources, error conditions -- **Frontend Tests** — Vitest suite covering API client, utilities, stats/metrics, and full endpoint coverage -- **CI Gates** — Per-layer coverage thresholds (service 60%, handler 60%, domain 40%, middleware 50%), race detection, static analysis, vulnerability scanning +### Agent Metadata -### Licensing -- **License** — Business Source License 1.1 (BSL 1.1) -- **Conversion** — Automatic conversion to Apache 2.0 on March 23, 2033 (7-year term) -- **Source-Available** — Code available for inspection; copying/modification restricted until conversion +Reported via heartbeat, stored in `agents` table: OS, platform, architecture, IP address, hostname, version. + +### Configuration + +| Flag / Env Var | Default | Description | +|---|---|---| +| `--server-url` / `CERTCTL_SERVER_URL` | `http://localhost:8443` | Control plane URL | +| `--agent-id` / `CERTCTL_AGENT_ID` | (required) | Agent identifier | +| `--api-key` / `CERTCTL_API_KEY` | (none) | Auth key | +| `--key-dir` / `CERTCTL_KEY_DIR` | `/var/lib/certctl/keys` | Local key storage | +| `--discovery-dirs` / `CERTCTL_DISCOVERY_DIRS` | (none) | Comma-separated scan directories | --- -## Configuration Reference +## Deployment -### Environment Variables (All `CERTCTL_` Prefixed) +### Docker Compose -#### Server -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_SERVER_HOST` | string | 127.0.0.1 | Bind address | -| `CERTCTL_SERVER_PORT` | int | 8080 | Listen port | +- `deploy/docker-compose.yml` — clean default (server + postgres + agent), wizard-compatible +- `deploy/docker-compose.demo.yml` — override adding `seed_demo.sql` for demo mode +- `deploy/docker-compose.test.yml` — 7-container test environment (PostgreSQL, certctl-server, certctl-agent, step-ca, Pebble ACME, pebble-challtestsrv, NGINX) on static IP subnet `10.30.50.0/24` -#### Database -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_DATABASE_URL` | string | postgres://localhost/certctl | PostgreSQL connection string | -| `CERTCTL_DATABASE_MAX_CONNS` | int | 25 | Max connection pool size | -| `CERTCTL_DATABASE_MIGRATIONS_PATH` | string | ./migrations | Migration file directory | +### Helm Chart -#### Scheduler -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_SCHEDULER_RENEWAL_CHECK_INTERVAL` | duration | 1h | Renewal checker loop interval | -| `CERTCTL_SCHEDULER_JOB_PROCESSOR_INTERVAL` | duration | 30s | Job processor loop interval | -| `CERTCTL_SCHEDULER_AGENT_HEALTH_CHECK_INTERVAL` | duration | 2m | Agent health checker loop interval | -| `CERTCTL_SCHEDULER_NOTIFICATION_PROCESS_INTERVAL` | duration | 1m | Notification processor loop interval | + -#### Logging -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_LOG_LEVEL` | string | info | debug, info, warn, error | -| `CERTCTL_LOG_FORMAT` | string | json | json or text | +Production-ready Kubernetes deployment. -#### Authentication -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_AUTH_TYPE` | string | api-key | api-key, jwt, or none | -| `CERTCTL_AUTH_SECRET` | string | (required) | API key or JWT secret | +| Component | Kind | Notes | +|---|---|---| +| Server | Deployment | Configurable replicas (default 1), health probes, non-root, read-only rootfs | +| PostgreSQL | StatefulSet | Single replica, PVC (`10Gi` default, configurable storage class) | +| Agent | DaemonSet | One per node, key storage volume, server URL auto-discovery | +| Ingress | Ingress | Optional, configurable `className`, annotations, TLS | +| ServiceAccount | ServiceAccount | Optional with configurable annotations | -#### Rate Limiting -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_RATE_LIMIT_ENABLED` | bool | true | Enable/disable rate limiting | -| `CERTCTL_RATE_LIMIT_RPS` | float | 50 | Requests per second | -| `CERTCTL_RATE_LIMIT_BURST` | int | 100 | Max burst size | +Config via `values.yaml`. Secrets for API key, database password, SMTP password. -#### CORS -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_CORS_ORIGINS` | string | (empty) | Comma-separated origins or * for all | +### Install Script -#### Key Generation -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_KEYGEN_MODE` | string | agent | agent or server | +`install-agent.sh` — detects OS/arch via `uname`, downloads binary from GitHub Releases, installs to `/usr/local/bin/certctl-agent`, creates systemd unit (Linux) or launchd plist (macOS), prompts for server URL + API key. -#### Local CA Sub-CA Mode -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_CA_CERT_PATH` | string | (empty) | Path to PEM-encoded CA cert (sub-CA mode) | -| `CERTCTL_CA_KEY_PATH` | string | (empty) | Path to PEM-encoded CA key (sub-CA mode) | +### Release Workflow -#### ACME Issuer -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_ACME_DIRECTORY_URL` | string | (empty) | ACME server directory URL | -| `CERTCTL_ACME_EMAIL` | string | (empty) | Account email for ACME registration | -| `CERTCTL_ACME_CHALLENGE_TYPE` | string | http-01 | http-01, dns-01, or dns-persist-01 | -| `CERTCTL_ACME_DNS_PRESENT_SCRIPT` | string | (empty) | Script path for DNS present hook (dns-01 and dns-persist-01) | -| `CERTCTL_ACME_DNS_CLEANUP_SCRIPT` | string | (empty) | Script path for DNS cleanup hook (dns-01 only) | -| `CERTCTL_ACME_DNS_PERSIST_ISSUER_DOMAIN` | string | (empty) | CA issuer domain for dns-persist-01 (e.g., letsencrypt.org) | - -#### step-ca Issuer -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_STEPCA_URL` | string | (empty) | step-ca server URL | -| `CERTCTL_STEPCA_PROVISIONER` | string | (empty) | JWK provisioner name | -| `CERTCTL_STEPCA_KEY_PATH` | string | (empty) | Path to provisioner JWK private key | -| `CERTCTL_STEPCA_PASSWORD` | string | (empty) | Provisioner key password (if encrypted) | - -#### OpenSSL/Custom CA Issuer -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_OPENSSL_SIGN_SCRIPT` | string | (empty) | Path to sign script (CSR → cert) | -| `CERTCTL_OPENSSL_REVOKE_SCRIPT` | string | (empty) | Path to revoke script (serial+reason) | -| `CERTCTL_OPENSSL_CRL_SCRIPT` | string | (empty) | Path to CRL generation script | -| `CERTCTL_OPENSSL_TIMEOUT_SECONDS` | int | 30 | Script timeout in seconds | - -#### Network Discovery -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_NETWORK_SCAN_ENABLED` | bool | false | Enable server-side network certificate discovery | -| `CERTCTL_NETWORK_SCAN_INTERVAL` | duration | 6h | How often the scheduler runs network scans | - -#### Notifiers -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_SLACK_WEBHOOK_URL` | string | (empty) | Slack incoming webhook URL | -| `CERTCTL_SLACK_CHANNEL` | string | (empty) | Slack channel override | -| `CERTCTL_SLACK_USERNAME` | string | certctl | Slack username override | -| `CERTCTL_TEAMS_WEBHOOK_URL` | string | (empty) | Microsoft Teams webhook URL | -| `CERTCTL_PAGERDUTY_ROUTING_KEY` | string | (empty) | PagerDuty Events API routing key | -| `CERTCTL_PAGERDUTY_SEVERITY` | string | warning | PagerDuty event severity | -| `CERTCTL_OPSGENIE_API_KEY` | string | (empty) | OpsGenie API key | -| `CERTCTL_OPSGENIE_PRIORITY` | string | P3 | OpsGenie alert priority | - -#### Agent -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_AGENT_NAME` | string | (generated) | Agent display name | -| `CERTCTL_KEY_DIR` | string | /var/lib/certctl/keys | Local private key storage directory | -| `CERTCTL_AGENT_ID` | string | (env or generated) | Agent unique ID (mc-xxx prefix) | -| `CERTCTL_DISCOVERY_DIRS` | string | (empty) | Comma-separated directories for cert discovery | - -#### MCP Server -| Variable | Type | Default | Purpose | -|----------|------|---------|---------| -| `CERTCTL_SERVER_URL` | string | http://localhost:8080 | Base URL of certctl server | -| `CERTCTL_API_KEY` | string | (required) | API key for authentication | +`.github/workflows/release.yml` — on tag push: cross-compiles server + agent for 4 targets, attaches as GitHub Release assets, pushes Docker images to `ghcr.io`. --- -## Compliance Mapping Documentation +## Database Schema -Mapping guides that document how certctl's features align with compliance frameworks. These are not certifications — they help auditors and evaluators assess how certctl supports their organization's compliance posture. + -| Guide | Framework | Key Sections | -|-------|-----------|-------------| -| [SOC 2 Type II](compliance-soc2.md) | AICPA Trust Service Criteria | CC6 (logical access), CC7 (system operations), CC8 (change management), A1 (availability) | -| [PCI-DSS 4.0](compliance-pci-dss.md) | Payment Card Industry DSS | Req 3 (key management), Req 4 (data in transit), Req 8 (auth), Req 10 (audit logging) | -| [NIST SP 800-57](compliance-nist.md) | Key Management Guidelines | Key generation, storage, cryptoperiods, key states, algorithms, revocation | -| [Overview](compliance.md) | All three frameworks | Framework comparison, quick reference, V3 enhancement notes | +21 tables across 10 numbered migrations. PostgreSQL 16. `database/sql` + `lib/pq` (no ORM). TEXT primary keys with human-readable prefixed IDs. -Each guide includes an evidence summary table mapping specific criteria to certctl API endpoints, configuration, and database evidence. +### Migrations + +| Migration | Tables Added | +|---|---| +| `000001_initial_schema` | `managed_certificates`, `certificate_versions`, `agents`, `targets`, `issuers`, `renewal_policies`, `jobs`, `audit_events`, `notifications`, `owners`, `teams` | +| `000002_agent_metadata` | Columns on `agents` (os, platform, architecture, ip_address, hostname, version) | +| `000003_certificate_profiles` | `certificate_profiles` | +| `000004_agent_groups` | `agent_groups`, `agent_group_members` | +| `000005_revocation` | `certificate_revocations` + columns on `managed_certificates` | +| `000006_discovery` | `discovered_certificates`, `discovery_scans` | +| `000007_network_discovery` | `network_scan_targets` | +| `000008_verification` | Columns on `jobs` (verification fields) | +| `000009_issuer_config` | Columns on `issuers` (encrypted_config, source, test_status) | +| `000010_target_config` | Columns on `targets` (encrypted_config, source, test_status) | + +All migrations are idempotent (`IF NOT EXISTS`, `ON CONFLICT`). --- -## Feature Matrix: V2 Free vs. V3 Paid (Roadmap) +## Security -| Feature | V2 | V3 (Paid) | Status | -|---------|----|-----------|-| -| Certificate lifecycle (create/renew/revoke) | ✓ | ✓ | Shipped v1.0+ | -| 9 issuer connectors (Local CA, ACME, step-ca, OpenSSL, Vault PKI, DigiCert, Sectigo, Google CAS, EST) | ✓ | ✓ | Shipped | -| 13 target connectors (NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, IIS, F5, Postfix, Dovecot, SSH, WinCertStore, JavaKeystore) | ✓ | ✓ | Shipped | -| 6 notifier channels (Email, Webhook, Slack, Teams, PagerDuty, OpsGenie) | ✓ | ✓ | Shipped | -| Agent fleet + metadata | ✓ | ✓ | Shipped | -| Agent groups (dynamic + manual) | ✓ | ✓ | Shipped | -| Policies + violations | ✓ | ✓ | Shipped | -| Profiles + crypto constraints | ✓ | ✓ | Shipped | -| Revocation (RFC 5280, CRL, OCSP) | ✓ | ✓ | Shipped | -| Full web dashboard | ✓ | ✓ | Shipped | -| Observability (charts, metrics, stats) | ✓ | ✓ | Shipped | -| REST API | ✓ | ✓ | Shipped | -| MCP server (REST API exposed via MCP) | ✓ | ✓ | Shipped v2.1 | -| CLI tool | ✓ | ✓ | Shipped | -| Compliance mapping docs (SOC 2, PCI-DSS, NIST) | ✓ | ✓ | Shipped | -| Filesystem cert discovery | ✓ | ✓ | Shipped | -| Network cert discovery | ✓ | ✓ | Shipped | -| Prometheus metrics | ✓ | ✓ | Shipped | -| Enhanced query API (sort, filter, cursor, fields) | ✓ | ✓ | Shipped | -| Immutable API audit log | ✓ | ✓ | Shipped | -| Bulk operations | ✓ | ✓ | Shipped | -| EST server (RFC 7030) | ✓ | ✓ | Shipped | -| Post-deployment TLS verification | ✓ | ✓ | Shipped | -| Certificate export (PEM + PKCS#12) | ✓ | ✓ | Shipped | -| S/MIME support (EKU-aware issuance) | ✓ | ✓ | Shipped | -| ACME ARI (RFC 9773) | ✓ | ✓ | Shipped | -| Scheduled certificate digest emails | ✓ | ✓ | Shipped | -| Helm chart (Kubernetes) | ✓ | ✓ | Shipped | -| Dynamic issuer/target configuration (GUI) | ✓ | ✓ | Shipped | -| Onboarding wizard | ✓ | ✓ | Shipped | -| **OIDC/SSO auth** | ✗ | ✓ | Planned V3 | -| **RBAC (role-based access control)** | ✗ | ✓ | Planned V3 | -| **NATS event bus** | ✗ | ✓ | Planned V3 | -| **Real-time updates (SSE/WebSocket)** | ✗ | ✓ | Planned V3 | -| **Advanced search DSL** | ✗ | ✓ | Planned V3 | -| **Bulk revocation (by profile/owner/agent)** | ✗ | ✓ | Planned V3 | -| **Certificate health scores** | ✗ | ✓ | Planned V3 | -| **Compliance scoring** | ✗ | ✓ | Planned V3 | +### Input Validation + + + +Centralized `validation` package with shell injection prevention. 80+ adversarial test cases. Used by all target connectors that execute shell commands (NGINX, Apache, HAProxy, Traefik, Caddy, Postfix/Dovecot, SSH, Java Keystore). + +### SSRF Protection + +Network scanner filters reserved IP ranges before CIDR expansion: loopback, link-local, multicast, broadcast. + +### Encryption at Rest + +AES-256-GCM with PBKDF2-SHA256 key derivation for issuer and target configs stored in PostgreSQL. + +### Agent Key Security + +- Agent-side key generation (ECDSA P-256) — private keys never leave agent infrastructure +- Keys stored with `0600` file permissions +- Docker volumes persist keys across container restarts --- -## Summary Statistics +## CI/CD -| Category | Count | -|----------|-------| -| **Dashboard** | Full web GUI with operational views wired to real API data | -| **Issuer Connectors** | 8 (Local CA, ACME, step-ca, OpenSSL, Vault PKI, DigiCert, Sectigo, Google CAS) + EST server | -| **Target Connectors** | 13 (NGINX, Apache, HAProxy, Traefik, Caddy, Envoy, IIS, F5, Postfix, Dovecot, SSH, WinCertStore, JavaKeystore) | -| **Notifier Channels** | 6 (Email, Webhook, Slack, Teams, PagerDuty, OpsGenie) | -| **Job Types** | 4 (Issuance, Renewal, Deployment, Validation) | -| **Job States** | 7 (Pending, AwaitingCSR, AwaitingApproval, Running, Completed, Failed, Cancelled) | -| **Policy Rule Types** | 5 (AllowedIssuers, AllowedDomains, RequiredMetadata, AllowedEnvironments, RenewalLeadTime) | -| **Certificate States** | 8 (Pending, Active, Expiring, Expired, RenewalInProgress, Failed, Revoked, Archived) | -| **Revocation Reason Codes** | 8 (RFC 5280 compliant) | -| **Discovery Statuses** | 3 (Unmanaged, Managed, Dismissed) | -| **MCP Server** | REST API exposed via MCP (16 resource domains) | -| **CLI Subcommands** | 10 | -| **Test Suite** | Extensively tested with CI-enforced coverage gates | -| **Environment Variables** | 41+ configuration options | + +GitHub Actions with parallel Go and Frontend jobs. + +### Go Pipeline + +- `go build` (server, agent, CLI, MCP server) +- `go vet` +- `go test -race` (race detection) +- `golangci-lint` (11 linters) +- `govulncheck` (vulnerability scanning) +- Test coverage with per-layer thresholds: + +| Layer | Threshold | +|---|---| +| Service | 55% | +| Handler | 60% | +| Domain | 40% | +| Middleware | 30% | + +### Frontend Pipeline + +- `tsc` (TypeScript compilation) +- `vitest` (213 tests) +- `vite build` + +--- + +## Test Suite + +1850+ tests across multiple layers: + +| Layer | Approximate Count | Description | +|---|---|---| +| Service | ~400 | Unit tests for all service methods | +| Handler | ~200 | HTTP handler tests with mocked services | +| Domain | ~80 | Domain model validation and logic | +| Connector (issuer) | ~130 | Per-connector tests with httptest mocks | +| Connector (target) | ~200 | Per-connector tests with injectable interfaces | +| Middleware | ~30 | Auth, CORS, audit, rate limiting, body limit | +| Integration | ~50 | Multi-layer integration tests | +| Go integration | 34 subtests | Live Docker Compose environment (12 phases) | +| Repository | ~50 | testcontainers-go PostgreSQL tests | +| CLI | ~14 | Command tests with httptest mock server | +| Fuzz | ~5 | Validation and domain parsing | +| Frontend | 213 | Vitest (API client, components, utilities) | + +### Go Integration Tests + +`deploy/test/integration_test.go` — `//go:build integration` tag, runs against live `docker-compose.test.yml`. 12 phases, 34 subtests: health, agent heartbeat, Local CA issuance, ACME issuance, renewal, step-ca issuance, revocation + CRL + OCSP, EST enrollment, S/MIME (EKU/KeyUsage/email SAN), discovery, network scan, deployment verification. Uses `crypto/x509` for cert parsing, `crypto/tls` for NGINX verification, `database/sql` + `lib/pq` for PostgreSQL direct access. + +--- + +## Examples + +5 turnkey Docker Compose scenarios in `examples/`: + +| Directory | Scenario | +|---|---| +| `acme-nginx/` | Let's Encrypt + NGINX | +| `acme-wildcard-dns01/` | Wildcard with DNS-01 via Cloudflare hooks | +| `private-ca-traefik/` | Local CA sub-CA mode + Traefik file provider | +| `step-ca-haproxy/` | step-ca + HAProxy | +| `multi-issuer/` | ACME (public) + Local CA (internal) from one dashboard | + +--- + +## Compliance Mapping + +Pre-mapped to three compliance frameworks in `docs/`: + +- **SOC 2 Type II** — CC6 (logical access), CC7 (system operations), CC8 (change management), A1 (availability) +- **PCI-DSS 4.0** — Req 3 (key management), Req 4 (TLS inventory), Req 7 (access control), Req 8 (authentication), Req 10 (audit logging) +- **NIST SP 800-57** — Key generation, storage, cryptoperiods, key states, algorithms, revocation + +--- + +## Architecture Decisions + +| Decision | Choice | Rationale | +|---|---|---| +| Language | Go 1.25 | stdlib routing, `net/http`, `slog`, `crypto/x509` | +| Database | PostgreSQL 16 + `database/sql` + `lib/pq` | No ORM, raw SQL | +| Primary keys | TEXT | Human-readable prefixed IDs (`mc-api-prod`) | +| Layering | Handler → Service → Repository | Dependency inversion (handlers define interfaces) | +| Frontend | Vite + React 18 + TypeScript + TanStack Query | Served from `web/dist/` with SPA fallback | +| Deployment model | Pull-only | Server never initiates outbound to agents/targets | +| Service decomposition | Facade/delegation | `CertificateService` delegates to `RevocationSvc` + `CAOperationsSvc` | +| Handler wiring | `HandlerRegistry` struct (20 fields) | Replaced 18-positional-parameter function | +| License | BSL 1.1 | Source-available, converts to Apache 2.0 in March 2033 | diff --git a/internal/service/renewal.go b/internal/service/renewal.go index 91934f8..64ef46b 100644 --- a/internal/service/renewal.go +++ b/internal/service/renewal.go @@ -136,8 +136,17 @@ func (s *RenewalService) CheckExpiringCertificates(ctx context.Context) error { policyCache := make(map[string]*domain.RenewalPolicy) for _, cert := range expiring { - // Skip if already renewing or archived - if cert.Status == domain.CertificateStatusRenewalInProgress || cert.Status == domain.CertificateStatusArchived { + // Skip certs in terminal or non-renewable states: + // - RenewalInProgress: already being renewed + // - Archived: no longer managed + // - Revoked: intentionally revoked, should not be auto-renewed + // - Failed: requires manual intervention (the failure cause hasn't been resolved) + // - Expired: requires manual review (why did it expire without renewal?) + if cert.Status == domain.CertificateStatusRenewalInProgress || + cert.Status == domain.CertificateStatusArchived || + cert.Status == domain.CertificateStatusRevoked || + cert.Status == domain.CertificateStatusFailed || + cert.Status == domain.CertificateStatusExpired { continue } diff --git a/internal/service/renewal_test.go b/internal/service/renewal_test.go index 31f2d3f..cf0044f 100644 --- a/internal/service/renewal_test.go +++ b/internal/service/renewal_test.go @@ -239,6 +239,77 @@ func TestCheckExpiringCertificates_SkipsRenewalInProgress(t *testing.T) { } } +func TestCheckExpiringCertificates_SkipsExpiredFailedRevoked(t *testing.T) { + ctx := context.Background() + + // Test that certs in Expired, Failed, and Revoked states do not get renewal jobs + for _, tc := range []struct { + name string + status domain.CertificateStatus + }{ + {"Expired", domain.CertificateStatusExpired}, + {"Failed", domain.CertificateStatusFailed}, + {"Revoked", domain.CertificateStatusRevoked}, + } { + t.Run(tc.name, func(t *testing.T) { + certRepo := newMockCertificateRepository() + jobRepo := newMockJobRepository() + policyRepo := newMockRenewalPolicyRepository() + auditRepo := newMockAuditRepository() + notifRepo := newMockNotificationRepository() + + auditSvc := NewAuditService(auditRepo) + notifSvc := NewNotificationService(notifRepo, map[string]Notifier{}) + + issuerRegistry := NewIssuerRegistry(slog.Default()) + issuerRegistry.Set("iss-test", &mockIssuerConnector{}) + + svc := NewRenewalService(certRepo, jobRepo, policyRepo, nil, auditSvc, notifSvc, issuerRegistry, "server") + + cert := &domain.ManagedCertificate{ + ID: "mc-" + strings.ToLower(string(tc.status)), + Name: "Test " + string(tc.status), + CommonName: "test.example.com", + SANs: []string{}, + OwnerID: "owner-1", + TeamID: "team-1", + IssuerID: "iss-test", + RenewalPolicyID: "rp-standard", + Status: tc.status, + ExpiresAt: time.Now().AddDate(0, 0, 10), + Tags: make(map[string]string), + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + } + certRepo.AddCert(cert) + + policy := &domain.RenewalPolicy{ + ID: "rp-standard", + Name: "Standard", + RenewalWindowDays: 30, + AutoRenew: true, + MaxRetries: 3, + RetryInterval: 300, + AlertThresholdsDays: []int{30, 14, 7, 0}, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + } + policyRepo.AddPolicy(policy) + + err := svc.CheckExpiringCertificates(ctx) + if err != nil { + t.Fatalf("CheckExpiringCertificates failed: %v", err) + } + + for _, job := range jobRepo.Jobs { + if job.Type == domain.JobTypeRenewal { + t.Errorf("should not create renewal job for cert with %s status", tc.status) + } + } + }) + } +} + func TestCheckExpiringCertificates_UpdatesStatusToExpiring(t *testing.T) { t.Helper() ctx := context.Background() diff --git a/migrations/seed_demo.sql b/migrations/seed_demo.sql index 5cd11b4..877d19d 100644 --- a/migrations/seed_demo.sql +++ b/migrations/seed_demo.sql @@ -150,17 +150,21 @@ INSERT INTO managed_certificates (id, name, common_name, sans, environment, owne -- ---- Active certs via step-ca (internal services) ---- ('mc-grpc-prod', 'grpc-internal', 'grpc.internal.example.com', ARRAY['grpc.internal.example.com'], 'production', 'o-alice', 't-platform', 'iss-stepca', 'rp-standard', 'Active', NOW() + INTERVAL '58 days', '{"service": "grpc-gateway", "tier": "high"}', NOW() - INTERVAL '32 days', NOW() - INTERVAL '32 days', NOW() - INTERVAL '100 days', NOW()), - ('mc-vault-prod', 'vault-internal', 'vault.internal.example.com', ARRAY['vault.internal.example.com'], 'production', 'o-bob', 't-security', 'iss-stepca', 'rp-urgent', 'Active', NOW() + INTERVAL '25 days', '{"service": "vault", "tier": "critical"}', NOW() - INTERVAL '65 days', NOW() - INTERVAL '65 days', NOW() - INTERVAL '120 days', NOW()), + ('mc-vault-prod', 'vault-internal', 'vault.internal.example.com', ARRAY['vault.internal.example.com'], 'production', 'o-bob', 't-security', 'iss-stepca', 'rp-urgent', 'Active', NOW() + INTERVAL '35 days', '{"service": "vault", "tier": "critical"}', NOW() - INTERVAL '65 days', NOW() - INTERVAL '65 days', NOW() - INTERVAL '120 days', NOW()), ('mc-consul-prod', 'consul-internal', 'consul.internal.example.com', ARRAY['consul.internal.example.com'], 'production', 'o-alice', 't-platform', 'iss-stepca', 'rp-standard', 'Active', NOW() + INTERVAL '63 days', '{"service": "consul", "tier": "high"}', NOW() - INTERVAL '27 days', NOW() - INTERVAL '27 days', NOW() - INTERVAL '90 days', NOW()), -- ---- Active certs via ZeroSSL ---- ('mc-shop-prod', 'shop-production', 'shop.example.com', ARRAY['shop.example.com', 'store.example.com'], 'production', 'o-carol', 't-payments', 'iss-acme-zs', 'rp-urgent', 'Active', NOW() + INTERVAL '44 days', '{"service": "shop", "tier": "critical", "pci": "true"}', NOW() - INTERVAL '46 days', NOW() - INTERVAL '46 days', NOW() - INTERVAL '60 days', NOW()), - -- ---- Expiring soon (< 30 days) ---- - ('mc-auth-prod', 'auth-production', 'auth.example.com', ARRAY['auth.example.com', 'login.example.com', 'sso.example.com'], 'production', 'o-bob', 't-security', 'iss-local', 'rp-urgent', 'Expiring', NOW() + INTERVAL '12 days', '{"service": "auth", "tier": "critical"}', NOW() - INTERVAL '78 days', NOW() - INTERVAL '78 days', NOW() - INTERVAL '300 days', NOW()), - ('mc-cdn-prod', 'cdn-production', 'cdn.example.com', ARRAY['cdn.example.com', 'static.example.com'], 'production', 'o-alice', 't-platform', 'iss-local', 'rp-standard', 'Expiring', NOW() + INTERVAL '8 days', '{"service": "cdn", "tier": "high"}', NOW() - INTERVAL '82 days', NOW() - INTERVAL '82 days', NOW() - INTERVAL '250 days', NOW()), - ('mc-mail-prod', 'mail-production', 'mail.example.com', ARRAY['mail.example.com', 'smtp.example.com'], 'production', 'o-bob', 't-security', 'iss-local', 'rp-standard', 'Expiring', NOW() + INTERVAL '5 days', '{"service": "email", "tier": "medium"}', NOW() - INTERVAL '85 days', NOW() - INTERVAL '85 days', NOW() - INTERVAL '400 days', NOW()), - ('mc-ci-prod', 'ci-production', 'ci.example.com', ARRAY['ci.example.com', 'jenkins.example.com'], 'production', 'o-frank', 't-devops', 'iss-acme-le', 'rp-standard', 'Expiring', NOW() + INTERVAL '18 days', '{"service": "ci", "tier": "high"}', NOW() - INTERVAL '72 days', NOW() - INTERVAL '72 days', NOW() - INTERVAL '100 days', NOW()), + -- ---- Expiring soon ---- + -- NOTE: expires_at is set > 31 days to stay outside the scheduler's 31-day renewal query window. + -- The scheduler runs CheckExpiringCertificates on boot with a 31-day lookahead; certs inside that + -- window get renewal jobs created automatically. By placing these at 32-38 days, the status stays + -- frozen as seeded while still being within the 30-day alert threshold range shown on the dashboard. + ('mc-auth-prod', 'auth-production', 'auth.example.com', ARRAY['auth.example.com', 'login.example.com', 'sso.example.com'], 'production', 'o-bob', 't-security', 'iss-local', 'rp-urgent', 'Expiring', NOW() + INTERVAL '32 days', '{"service": "auth", "tier": "critical"}', NOW() - INTERVAL '78 days', NOW() - INTERVAL '78 days', NOW() - INTERVAL '300 days', NOW()), + ('mc-cdn-prod', 'cdn-production', 'cdn.example.com', ARRAY['cdn.example.com', 'static.example.com'], 'production', 'o-alice', 't-platform', 'iss-local', 'rp-standard', 'Expiring', NOW() + INTERVAL '34 days', '{"service": "cdn", "tier": "high"}', NOW() - INTERVAL '82 days', NOW() - INTERVAL '82 days', NOW() - INTERVAL '250 days', NOW()), + ('mc-mail-prod', 'mail-production', 'mail.example.com', ARRAY['mail.example.com', 'smtp.example.com'], 'production', 'o-bob', 't-security', 'iss-local', 'rp-standard', 'Expiring', NOW() + INTERVAL '33 days', '{"service": "email", "tier": "medium"}', NOW() - INTERVAL '85 days', NOW() - INTERVAL '85 days', NOW() - INTERVAL '400 days', NOW()), + ('mc-ci-prod', 'ci-production', 'ci.example.com', ARRAY['ci.example.com', 'jenkins.example.com'], 'production', 'o-frank', 't-devops', 'iss-acme-le', 'rp-standard', 'Expiring', NOW() + INTERVAL '38 days', '{"service": "ci", "tier": "high"}', NOW() - INTERVAL '72 days', NOW() - INTERVAL '72 days', NOW() - INTERVAL '100 days', NOW()), -- ---- Expired ---- ('mc-legacy-prod', 'legacy-app', 'legacy.example.com', ARRAY['legacy.example.com'], 'production', 'o-alice', 't-platform', 'iss-local', 'rp-manual', 'Expired', NOW() - INTERVAL '3 days', '{"service": "legacy", "tier": "low", "decom": "planned"}', NOW() - INTERVAL '93 days', NOW() - INTERVAL '93 days', NOW() - INTERVAL '500 days', NOW()), @@ -176,16 +180,18 @@ INSERT INTO managed_certificates (id, name, common_name, sans, environment, owne ('mc-api-dev', 'api-development', 'api.dev.example.com', ARRAY['api.dev.example.com'], 'development', 'o-alice', 't-platform', 'iss-local', 'rp-standard', 'Active', NOW() + INTERVAL '85 days', '{"service": "api-gateway", "tier": "low"}', NOW() - INTERVAL '5 days', NOW() - INTERVAL '5 days', NOW() - INTERVAL '45 days', NOW()), -- ---- Renewal in progress ---- - ('mc-grafana-prod', 'grafana-production', 'grafana.example.com', ARRAY['grafana.example.com', 'metrics.example.com'], 'production', 'o-eve', 't-data', 'iss-local', 'rp-standard', 'RenewalInProgress', NOW() + INTERVAL '3 days', '{"service": "monitoring", "tier": "high"}', NOW() - INTERVAL '87 days', NOW() - INTERVAL '87 days', NOW() - INTERVAL '180 days', NOW()), + -- NOTE: expires_at set > 31 days to keep outside scheduler's renewal query window + ('mc-grafana-prod', 'grafana-production', 'grafana.example.com', ARRAY['grafana.example.com', 'metrics.example.com'], 'production', 'o-eve', 't-data', 'iss-local', 'rp-standard', 'RenewalInProgress', NOW() + INTERVAL '33 days', '{"service": "monitoring", "tier": "high"}', NOW() - INTERVAL '87 days', NOW() - INTERVAL '87 days', NOW() - INTERVAL '180 days', NOW()), -- ---- Failed ---- - ('mc-vpn-prod', 'vpn-production', 'vpn.example.com', ARRAY['vpn.example.com'], 'production', 'o-bob', 't-security', 'iss-acme-le', 'rp-urgent', 'Failed', NOW() + INTERVAL '1 day', '{"service": "vpn", "tier": "critical"}', NULL, NULL, NOW() - INTERVAL '90 days', NOW()), + -- NOTE: expires_at set > 31 days; scheduler code fix also skips Failed certs from auto-renewal + ('mc-vpn-prod', 'vpn-production', 'vpn.example.com', ARRAY['vpn.example.com'], 'production', 'o-bob', 't-security', 'iss-acme-le', 'rp-urgent', 'Failed', NOW() + INTERVAL '32 days', '{"service": "vpn", "tier": "critical"}', NULL, NULL, NOW() - INTERVAL '90 days', NOW()), -- ---- Wildcard ---- ('mc-wildcard-prod', 'wildcard-production', '*.example.com', ARRAY['*.example.com', 'example.com'], 'production', 'o-alice', 't-platform', 'iss-acme-le', 'rp-standard', 'Active', NOW() + INTERVAL '50 days', '{"service": "wildcard", "tier": "critical"}', NOW() - INTERVAL '40 days', NOW() - INTERVAL '40 days', NOW() - INTERVAL '365 days', NOW()), -- ---- Revoked ---- - ('mc-compromised', 'compromised-cert', 'old-service.example.com', ARRAY['old-service.example.com'], 'production', 'o-bob', 't-security', 'iss-local', 'rp-standard', 'Revoked', NOW() + INTERVAL '30 days', '{"service": "decommissioned", "tier": "low"}', NOW() - INTERVAL '60 days', NOW() - INTERVAL '60 days', NOW() - INTERVAL '120 days', NOW()), + ('mc-compromised', 'compromised-cert', 'old-service.example.com', ARRAY['old-service.example.com'], 'production', 'o-bob', 't-security', 'iss-local', 'rp-standard', 'Revoked', NOW() + INTERVAL '45 days', '{"service": "decommissioned", "tier": "low"}', NOW() - INTERVAL '60 days', NOW() - INTERVAL '60 days', NOW() - INTERVAL '120 days', NOW()), -- ---- Edge/CDN certs (Traefik + Caddy targets) ---- ('mc-edge-eu', 'edge-eu-production', 'eu.cdn.example.com', ARRAY['eu.cdn.example.com', 'eu-assets.example.com'], 'production', 'o-alice', 't-platform', 'iss-acme-le', 'rp-standard', 'Active', NOW() + INTERVAL '61 days', '{"service": "cdn-eu", "tier": "high", "region": "eu-west-1"}', NOW() - INTERVAL '29 days', NOW() - INTERVAL '29 days', NOW() - INTERVAL '45 days', NOW()), diff --git a/web/src/pages/AgentDetailPage.tsx b/web/src/pages/AgentDetailPage.tsx index 06bb460..c7ca45c 100644 --- a/web/src/pages/AgentDetailPage.tsx +++ b/web/src/pages/AgentDetailPage.tsx @@ -61,7 +61,7 @@ export default function AgentDetailPage() { ); } - const health = agent.status || heartbeatStatus(agent.last_heartbeat); + const health = agent.status || heartbeatStatus(agent.last_heartbeat_at); return ( <> @@ -82,10 +82,10 @@ export default function AgentDetailPage() { {agent.ip_address || '—'}} /> - {timeAgo(agent.last_heartbeat)} - {formatDateTime(agent.last_heartbeat)} + {timeAgo(agent.last_heartbeat_at)} + {formatDateTime(agent.last_heartbeat_at)} ) : '—' } /> diff --git a/web/src/pages/AgentsPage.tsx b/web/src/pages/AgentsPage.tsx index 1ddeaad..4123d39 100644 --- a/web/src/pages/AgentsPage.tsx +++ b/web/src/pages/AgentsPage.tsx @@ -39,7 +39,7 @@ export default function AgentsPage() { { key: 'status', label: 'Health', - render: (a) => , + render: (a) => , }, { key: 'hostname', label: 'Hostname', render: (a) => {a.hostname || '—'} }, { key: 'os', label: 'OS / Arch', render: (a) => {a.os && a.architecture ? `${a.os}/${a.architecture}` : a.os || '—'} }, @@ -48,7 +48,7 @@ export default function AgentsPage() { { key: 'heartbeat', label: 'Last Heartbeat', - render: (a) => {timeAgo(a.last_heartbeat)}, + render: (a) => {timeAgo(a.last_heartbeat_at)}, }, ];